# LSTM for Generating Shakespeare Text

by, Udaikaran Singh

In [1]:
import torch as torch
import numpy as np
import matplotlib.pyplot as plt
import string
import torch.nn as nn
import random

In [2]:
from models.lstm import LSTM
from models.gru import GRU
from models.vanilla_rnn import Vanilla_RNN

## Preparing Dataset

In [3]:
all_chars = string.printable
num_chars = len(string.printable)
file = open('./shakespeare/hamlet.txt').read()
file_len = len(file)

In [4]:
def get_random_seq():
    # Get a random sequence file
    seq_len     = 128
    start_index = random.randint(0, file_len - seq_len)
    end_index   = start_index + seq_len + 1
    return file[start_index:end_index]

def seq_to_onehot(seq):
    # Convert the sequence to one-hot tensor.
    tensor = torch.zeros(len(seq), 1, num_chars) 
    for t, char in enumerate(seq):
        index = all_chars.index(char)
        tensor[t][0][index] = 1
    return tensor

def seq_to_index(seq):
    # Convert the sequence to index tensor.
    tensor = torch.zeros(len(seq), 1)
    for t, char in enumerate(seq):
        tensor[t] = all_chars.index(char)
    return tensor

def get_input_and_target():
    #sampling from the file in general
    seq    = get_random_seq()
    input  = seq_to_onehot(seq[:-1])      # Input is represented in one-hot.
    target = seq_to_index(seq[1:]).long() # Target is represented in index.
    return input, target

## Training

In [5]:
# Training step function.
def train_step(net, opt, input, target):
    """ Training step.
        net:    The network instance.
        opt:    The optimizer instance.
        input:  Input tensor.  Shape: [seq_len, 1, n_chars].
        target: Target tensor. Shape: [seq_len, 1].
    """
    seq_len = input.shape[0]    # Get the sequence length of current input.
    hidden = net.init_hidden()  # Initial hidden state.
    net.zero_grad()             # Clear the gradient.
    loss = 0                    # Initial loss.

    for t in range(seq_len):    # For each one in the input sequence.
        output, hidden = net(input[t], hidden)
        loss += loss_func(output, target[t])

    loss.backward()             # Backward. 
    opt.step()                  # Update the weights.

    return loss / seq_len       # Return the average loss w.r.t sequence length.

In [6]:
# Evaluation step function.
def eval_step(net, init_seq='W', predicted_len=100):
    # Initialize the hidden state, input and the predicted sequence.
    hidden        = net.init_hidden()
    init_input    = seq_to_onehot(init_seq).to(device)
    predicted_seq = init_seq

    # Use initial string to "build up" hidden state.
    for t in range(len(init_seq) - 1):
        output, hidden = net(init_input[t], hidden)
        
    # Set current input as the last character of the initial string.
    input = init_input[-1]
    
    # Predict more characters after the initial string.
    for t in range(predicted_len):
        # Get the current output and hidden state.
        output, hidden = net(input, hidden)
        
        # Sample from the output as a multinomial distribution.
        predicted_index = torch.multinomial(output.view(-1).exp(), 1)[0]
        
        # Add predicted character to the sequence and use it as next input.
        predicted_char  = all_chars[predicted_index]
        predicted_seq  += predicted_char
        
        # Use the predicted character to generate the input of next round.
        input = seq_to_onehot(predicted_char)[0].to(device)

    return predicted_seq

In [7]:
# If there are GPUs, choose the first one for computing. Otherwise use CPU.
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [8]:
net = LSTM(num_chars, 100, num_chars, device)

In [9]:
# Number of iterations.
iters       = 200000  # Number of training iterations.
print_iters = 100    # Number of iterations for each log printing.

# The loss variables.
all_losses = []
loss_sum   = 0

# Initialize the optimizer and the loss function.
opt       = torch.optim.Adam(net.parameters(), lr=0.005)
loss_func = nn.CrossEntropyLoss()

# Training procedure.
for i in range(iters):
    input, target = get_input_and_target()            # Fetch input and target.
    input, target = input.to(device), target.to(device) # Move to GPU memory.
    loss      = train_step(net, opt, input, target)   # Calculate the loss.
    loss_sum += loss                                  # Accumulate the loss.

    # Print the log.
    if i % print_iters == print_iters - 1:
        print('iter:{}/{} loss:{}'.format(i, iters, loss_sum / print_iters))
        print('generated sequence: {}\n'.format(eval_step(net)))
              
        # Track the loss.
        all_losses.append(loss_sum / print_iters)
        loss_sum = 0

iter:99/20000 loss:3.3709065914154053
generated sequence: Whso
	: ut  osorg ithobomo ;. w he snthies nenono cimyite c nl'heo tresoe hire-oe Keel: s  samelins
	

iter:199/20000 loss:2.7001307010650635
generated sequence: Wof be ltur Isandor m
	S



LENlilereter t ber'muN
	EO	AB; f ithaetond s s w bon
u 'toshacouavemeisav



KeyboardInterrupt: 

## Evaluation

In [None]:
plt.xlabel('iters')
plt.ylabel('loss')
plt.plot(all_losses)
plt.show()

In [None]:
print(eval_step(net, predicted_len=600))