In [1]:
import os
import time
from datetime import datetime
import argparse

import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import time

from dataset import TextDataset
from model import TextGenerationModel
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()


In [2]:
# Initialize the device which to run the model on
device = torch.device("cpu") if not torch.cuda.is_available() else torch.device("cuda:0")
print("[INFO]: Using device", device)

[INFO]: Using device cpu


In [50]:
## Parameters 
batch_size = 64
seq_length = 30
txt_file = "assets/book_EN_grimms_fairy_tails.txt"

lstm_num_hidden = 128 #128
lstm_num_layers = 2 #2
learning_rate = 2e-3
max_norm = 10

print_every = 100
train_steps = 3000
embedding_dim = 87

dropout_keep_prob = 0
num_examples = 5 # how many examples to sample

sample_every = 100 #int(train_steps/3) # 3 times during training

# Initialize the dataset and data loader (note the +1)
dataset = TextDataset(filename=txt_file, seq_length=seq_length)  # fixme
data_loader = DataLoader(dataset, batch_size)
voc_size = dataset._vocab_size

Initialize dataset with 540241 characters, 87 unique.


In [6]:
# Some experiments to know the dimensions

x,y = next(iter(data_loader))
#batch_inputs = torch.stack(x).view(seq_length,batch_size, 1).float().to(device) # [batch_size, seq_length, 1], 1 is the input_size
batch_inputs = torch.stack(x)
batch_targets = torch.stack(y).to(device)

print("Batch targets shape:", batch_targets.shape)
print("Batch inputs shape:", batch_inputs.shape)

embeddings = nn.Embedding(voc_size, embedding_dim)
print("Batch inputs shape after putting through the embedding:",embeddings(batch_inputs).shape )

Batch targets shape: torch.Size([30, 64])
Batch inputs shape: torch.Size([30, 64])
Batch inputs shape after putting through the embedding: torch.Size([30, 64, 64])


In [51]:
class TextGenerationModel(nn.Module):

    def __init__(self, batch_size, seq_length, vocabulary_size,
                 lstm_num_hidden=256, lstm_num_layers=2, device='cuda:0', embedding_dim = 32, drop_prob = 0.5, bidirectional = False):

        super(TextGenerationModel, self).__init__()
        
        self.batch_size = batch_size
        self.seq_length = seq_length
        self.device = device
      

        self.lstm = nn.LSTM(input_size = embedding_dim, hidden_size = lstm_num_hidden, num_layers = lstm_num_layers, dropout = dropout_keep_prob, bidirectional = bidirectional)
        self.dropout = nn.Dropout(drop_prob)
        self.fc = nn.Linear(in_features = lstm_num_hidden, out_features = vocabulary_size)
        
        h0 = torch.zeros((lstm_num_layers*1,batch_size,lstm_num_hidden))
        c0 = torch.zeros((lstm_num_layers*1,batch_size,lstm_num_hidden))
        
        self.logSoftmax = nn.LogSoftmax(dim=2)
        self.embeddings = nn.Embedding(vocabulary_size, embedding_dim)

    def forward(self, x, h = None):
        x = self.embeddings(x)
        
        if h == None:
            lstm_output, h = self.lstm(x) # randomly intialize the hidden state and cell state
        else:
            lstm_output, h = self.lstm(x, h) # used for sampling from the model
            
            
        drop_output = self.dropout(lstm_output)
        fc_output = self.fc(drop_output) # shape: [seq_length, batch_size, voc_size]
    
        return self.logSoftmax(fc_output), h # shape: [seq_length, batch_size, voc_size], a disritbution over the vocabulary
    
model = TextGenerationModel(batch_size, seq_length, vocabulary_size = dataset._vocab_size, lstm_num_hidden=lstm_num_hidden,
                            lstm_num_layers=lstm_num_layers, device = device, embedding_dim= embedding_dim, drop_prob = dropout_keep_prob, bidirectional=bidirectional)

In [None]:
def generate_text(model, dataset, num_examples, example_length):
    """Uses the model to randomly create batch_size sentences from characters in a dataset"""
    start_letters = [torch.tensor(np.random.randint(0, voc_size)) for i in range(0, num_examples)] # get num_examples random letters
    letters = torch.stack(start_letters).view(1,-1) # initial shape [1, num_examples], 0-th dim will increase up to example_lentgh
    
    h = (torch.zeros((lstm_num_layers*1,num_examples,lstm_num_hidden)), torch.zeros((lstm_num_layers*1,num_examples,lstm_num_hidden)) ) # initialize the hidden state
    
    next_letter_given_previous = letters # initalize next letter to the random letters
    
    for letter_num in range(example_length-1):
        log_probs, h = model(next_letter_given_previous, h) # store the log probs and the hidden state
        

        next_letter_given_previous = torch.argmax(log_probs, dim = 2) # get the next most probable letter
        letters = torch.cat((letters, next_letter_given_previous), dim=0) # add letters to the generated sequence
        
        #next_letter_given_previous = torch.argmax(model(letters)[[-1]], dim = 2)
        #letters = torch.cat((letters, next_letter_given_previous), dim=0) # add the consecutive letters to the sequence 
    
    return letters

In [59]:
model.train()

# Setup the loss and optimizer
criterion = nn.NLLLoss()  #
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay = 1e-4 )

#param = next(model.parameters()).data
#h = (param.new(lstm_num_layers, batch_size, lstm_num_hidden).zero_(), 
#         param.new(lstm_num_layers, batch_size, lstm_num_hidden).zero_())



for step, (batch_inputs, batch_targets) in enumerate(data_loader):

    # Only for time measurement of step through network
    t1 = time.time()

    # Move to GPU
    batch_inputs = torch.stack(batch_inputs).to(device) # [batch_size, seq_length]
    batch_targets = torch.stack(batch_targets).to(device)  # [batch_size, seq_length]

    # Reset for next iteration
    model.zero_grad()

    # Forward pass
    log_probs, _ = model(batch_inputs) # [seq_length, batch_size, voc_size]
    

    # Calculate loss, gradients
    """
    loss = 0   #
    for timestep in range(seq_length): # iterate over timesteps
        loss += criterion(log_probs[timestep, :, :], batch_targets[timestep])
    loss /= seq_length
    """
    
    loss = criterion(log_probs.view(-1, voc_size), batch_targets.view(-1))
    loss.backward()

    # Clip gradients to prevent explosion
    torch.nn.utils.clip_grad_norm_(model.parameters(),
                                   max_norm=max_norm)

    # Update network parameters
    optimizer.step()

    predictions = torch.argmax(log_probs, dim=2)
    correct = (predictions == batch_targets).sum().item()
    accuracy = correct/(batch_size*seq_length)
    
    # Log plots to tensorboard
    writer.add_scalar("Loss", loss, step)
    writer.add_scalar("Accuracy", accuracy, step)
    
    # Just for time measurement
    t2 = time.time()
    examples_per_second = batch_size/float(t2-t1)

    
    if (step + 1) % print_every == 0:

        print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, \
                Examples/Sec = {:.2f}, "
              "Accuracy = {:.2f}, Loss = {:.3f}".format(
                datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                train_steps, batch_size, examples_per_second,
                accuracy, loss
                ))
        
    if (step + 1) % sample_every == 0:
        
    if step == train_steps:
        break
        
writer.flush()        

[2020-11-22 15:23] Train Step 0099/2000, Batch Size = 64,                 Examples/Sec = 995.38, Accuracy = 0.56, Loss = 1.450
[2020-11-22 15:23] Train Step 0199/2000, Batch Size = 64,                 Examples/Sec = 931.10, Accuracy = 0.51, Loss = 1.624


KeyboardInterrupt: 

In [53]:
model

TextGenerationModel(
  (lstm): LSTM(87, 128, num_layers=2)
  (dropout): Dropout(p=0, inplace=False)
  (fc): Linear(in_features=128, out_features=87, bias=True)
  (logSoftmax): LogSoftmax(dim=2)
  (embeddings): Embedding(87, 87)
)

In [54]:
def generate_text(model, dataset, num_examples, example_length):
    """Uses the model to randomly create batch_size sentences from characters in a dataset"""
    start_letters = [torch.tensor(np.random.randint(0, voc_size)) for i in range(0, num_examples)] # get num_examples random letters
    letters = torch.stack(start_letters).view(1,-1) # initial shape [1, num_examples], 0-th dim will increase up to example_lentgh
    
    h = (torch.zeros((lstm_num_layers*1,num_examples,lstm_num_hidden)), torch.zeros((lstm_num_layers*1,num_examples,lstm_num_hidden)) ) # initialize the hidden state
    
    next_letter_given_previous = letters # initalize next letter to the random letters
    
    for letter_num in range(example_length-1):
        log_probs, h = model(next_letter_given_previous, h) # store the log probs and the hidden state
        

        next_letter_given_previous = torch.argmax(log_probs, dim = 2) # get the next most probable letter
        letters = torch.cat((letters, next_letter_given_previous), dim=0) # add letters to the generated sequence
        
        #next_letter_given_previous = torch.argmax(model(letters)[[-1]], dim = 2)
        #letters = torch.cat((letters, next_letter_given_previous), dim=0) # add the consecutive letters to the sequence 
    
    return letters
    

In [55]:
example_sentences = generate_text(model, dataset, 3, 50)

In [58]:
dataset.convert_to_string([letter.item() for letter in example_sentences[:,2]])

'E ‘The wind the world to the world to the world to'

In [None]:
offset = np.random.randint(0, len(dataset._data)-dataset._seq_length-2)
inputs = [dataset._char_to_ix[ch] for ch in dataset._data[offset:offset+dataset._seq_length]]
offset

In [None]:
start_letters = [torch.tensor(np.random.randint(0, voc_size)) for i in range(0, 5)]
random_letters = torch.stack(start_letters).view(1,-1)

second_letters = torch.argmax(model(random_letters), dim = 2)
letters = torch.cat([random_letters, second_letters])

model(letters)[[-1]].shape

In [None]:
start_index = vectorizer.surname_vocab.start_index
batch_size_new = 2
# hidden_size = whatever hidden size the model is set to

initial_h = Variable(torch.ones(batch_size, hidden_size))
initial_x_index = Variable(torch.ones(batch_size).long()) * start_index

In [None]:
batch_inputs.shape

In [None]:
batch_inputs.shape

In [None]:
example = torch.tensor([[1],
                       [2], 
                       [3]]).T
example.shape

In [None]:
model(example).shape

In [None]:
x = torch.tensor(list(dataset._ix_to_char.keys()))
model.embeddings(x).shape

In [None]:
def pred_next_char(model, char)

In [None]:
import torch.nn as nn
import torch

rnn = nn.LSTM(10, 20, 2)
input = torch.randn(5, 3, 10)
h0 = torch.randn(2, 3, 20)
c0 = torch.randn(2, 3, 20)
output, (hn, cn) = rnn(input, h0)