In [None]:
#importing the libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import string, os
import re
import random
import io
import sys
from PIL import Image
from PIL import Image, ImageDraw, ImageFont
import warnings
warnings.filterwarnings("ignore")


**MODEL BUILDING**


Recurrent Neural Networks are pretty popular with generating text. In this project, I will be using a LSTM Model, an improved version of a standard recurrent neural network

**Following steps are involved in the model building**

* Initialising the Model
* Training the Model
* Checking  output

**Building the Model**

In [None]:
with open('/content/drive/MyDrive/LSML2/data.txt', 'r') as file:
    Corpus = file.read()


print(Corpus[:100])
print('corpus length:', len(Corpus))

every man will ask the questions and every man will suffer blame and loss  every day you die a littl
corpus length: 44806010


In [47]:
chars = sorted(list(set(Corpus)))


In [48]:
char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))


In [None]:
import numpy as np

seq_length = 50 # The sentence window size
step = 1 # The steps between the windows
sentences = []
next_chars = []

# Create Target and sentences window
for i in range(0, len(Corpus) - seq_length, step):
    sentences.append(Corpus[i: i + seq_length]) # range from current index to sequence length charaters
    next_chars.append(Corpus[i + seq_length]) # the next character

sentences = np.array(sentences)
next_chars = np.array(next_chars)

#Print Sentence Window and next charaters
print('Sentence Window')
print (sentences[:5])
print('Target charaters')
print (next_chars[:5])
print('Number of sequences:', len(sentences))

Sentence Window
['every man will ask the questions and every man wil'
 'very man will ask the questions and every man will'
 'ery man will ask the questions and every man will '
 'ry man will ask the questions and every man will s'
 'y man will ask the questions and every man will su']
Target charaters
['l' ' ' 's' 'u' 'f']
Number of sequences: 44805960


In [None]:
def getdata(sentences, next_chars):
    X = np.zeros((len(sentences),seq_length))
    y = np.zeros((len(sentences)))
    for i in range(len(sentences)):
        sentence = sentences[i]
        for t, char in enumerate(sentence):
            X[i, t] = char_to_int[char]
        y[i] = char_to_int[next_chars[i]]
    return X, y

In [None]:
train_x,train_y = getdata(sentences, next_chars)
print('Shape of training_x:', train_x.shape)
print('Shape of training_y:', train_y.shape)

Shape of training_x: (44805960, 50)
Shape of training_y: (44805960,)


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class SimpleLSTM(nn.Module):
    def __init__(self, n_vocab, hidden_dim, embedding_dim, dropout=0.2, **kwargs):
        super(SimpleLSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.embedding_dim = embedding_dim
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, dropout=dropout, num_layers=2)
        self.embeddings = nn.Embedding(n_vocab, embedding_dim)
        self.fc = nn.Linear(hidden_dim, n_vocab)
        self.kwargs = kwargs

    def forward(self, seq_in):
        # for LSTM, input should be (Sequnce_length,batchsize,hidden_layer), so we need to transpose the input
        embedded = self.embeddings(seq_in.t())
        lstm_out, _ = self.lstm(embedded)
        # Only need to keep the last character
        ht=lstm_out[-1]
        out = self.fc(ht)
        return out

In [None]:
X_train_tensor = torch.tensor(train_x, dtype=torch.long).cuda()
Y_train_tensor = torch.tensor(train_y, dtype=torch.long).cuda()

In [None]:
from torch.utils.data import Dataset, DataLoader
train = torch.utils.data.TensorDataset(X_train_tensor,Y_train_tensor)
train_loader = torch.utils.data.DataLoader(train, batch_size = 128)

In [None]:
model = SimpleLSTM(len(chars), 256, 256, char_to_int=char_to_int, int_to_char=int_to_char)
model.cuda()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.002) # Using Adam optimizer

**Training part**


In [49]:
import time # Add time counter
avg_losses_f = []
n_epochs = 20

for epoch in range(n_epochs):
    start_time = time.time()
    model.train()
    loss_fn = torch.nn.CrossEntropyLoss()
    avg_loss = 0.
    for i, (x_batch, y_batch) in enumerate(train_loader):
        y_pred = model(x_batch)

        loss = loss_fn(y_pred, y_batch)
        optimizer.zero_grad()
        loss.backward()

        optimizer.step()
        avg_loss+= loss.item() / len(train_loader)

    elapsed_time = time.time() - start_time
    print('Epoch {}/{} \t loss={:.4f} \t time={:.2f}s'.format(
        epoch + 1, n_epochs, avg_loss, elapsed_time))

    avg_losses_f.append(avg_loss)

print('All \t loss={:.4f} \t '.format(np.average(avg_losses_f)))

**Checking  output**

In [None]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [59]:
sentence = 'Wait until the reaper takes my life Never gonna get me out alive'
start_text = ''.join(sym.lower() for sym in sentence if sym.lower() in char_to_int)[:seq_length]
variance = 0.3
generated = ''
original = start_text
window = start_text

for i in range(400):
    x = np.zeros((1, seq_length))
    for t, char in enumerate(window):
        x[0, t] = char_to_int[char] # Change the sentence to index vector shape (1,50)

    x_in = Variable(torch.LongTensor(x))
    pred = model(x_in)
    pred = np.array(F.softmax(pred, dim=1).data[0].cpu())
    next_index = sample(pred, variance)
    next_char = int_to_char[next_index] # index to char

    generated += next_char
    window = window[1:] + next_char # Update Window for next char predict

print(original + generated)

wait until the reaper takes my life never gonna get you
as think for yourself
'cause i won't be the day when you make me cry
you know it's a lie
'cause that'll be the day
when you tree bompa bom
sail that weight and far away, yeah young blood
i can't get you out of my mind
i tried to be ready to love, babe
well what'll be the day
when you treat me so unkind
what's your name
what's should i love you
and she done me some on comesonifed to lost me
l


In [58]:
torch.save(model.state_dict(), f'/content/trained_model.model')