### RNN Implmentation for NLP

In [1]:
# Importing the libraries
import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np
import matplotlib.pyplot as plt

In [2]:
with open('Data/shakespeare.txt', 'r', encoding = 'utf8') as t:
    text = t.read()

In [3]:
print(text[:500])


                     1
  From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  But as the riper should by time decease,
  His tender heir might bear his memory:
  But thou contracted to thine own bright eyes,
  Feed'st thy light's flame with self-substantial fuel,
  Making a famine where abundance lies,
  Thy self thy foe, to thy sweet self too cruel:
  Thou that art now the world's fresh ornament,
  And only herald to the gaudy spring,
  Within thine own bu


In [4]:
len(text)

5445609

In [5]:
all_char = set(text)

In [6]:
len(all_char)

84

In [7]:
# decoder
decoder = dict(enumerate(all_char))

In [8]:
# encoder
encoder = {char: i for i,char in decoder.items()}

In [9]:
encoded_text = np.array([encoder[char] for char in text])

In [10]:
encoded_text[:100]

array([81, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
       35, 35, 35, 35, 35, 16, 81, 35, 35, 73, 59, 31,  0, 35, 33,  1, 54,
       59, 28, 21, 58, 35, 71, 59, 28,  1, 58, 72, 59, 28, 21, 35, 17, 28,
       35, 27, 28, 21, 54, 59, 28, 35, 54, 44, 71, 59, 28,  1, 21, 28, 47,
       81, 35, 35, 11,  2,  1, 58, 35, 58,  2, 28, 59, 28, 74, 60, 35, 74,
       28,  1, 72, 58, 60, 83, 21, 35, 59, 31, 21, 28, 35,  0, 54])

In [11]:
# one hot encoding
def one_hot_enc(batch_text, uni_chars):
    one_hot = np.zeros((batch_text.size, uni_chars))
    one_hot = one_hot.astype(np.float32)
    
    one_hot[np.arange(one_hot.shape[0]), batch_text.flatten()] = 1.0
    
    one_hot = one_hot.reshape((*batch_text.shape, uni_chars))
    
    return one_hot

In [12]:
# Example
x = np.array([1,2,0])

In [13]:
one_hot_enc(x, 3)

array([[0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.]], dtype=float32)

In [14]:
# generate batches for training
def gen_batch(en_text, sample_size = 10, seq_len = 50):
    
    char_len = sample_size * seq_len
    num_batches = int(len(en_text) / char_len)
    
    en_text = en_text[: num_batches * char_len]
    en_text = en_text.reshape((sample_size, -1))
    
    for n in range(0,en_text.shape[-1], seq_len):
        x = en_text[:, n : n + seq_len]
        y = np.zeros_like(x)
        
        try:
            y[:, : -1] = x[:, 1:]
            y[:, -1] = en_text[:, n + seq_len]
        
        except:
            y[:, : -1] = x[:, 1:]
            y[:, -1] = en_text[:, 0]
        
        yield x,y

In [15]:
# Example 
sample_text = np.arange(20)

In [16]:
sample_text

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [17]:
batch_gen = gen_batch(sample_text, sample_size = 2, seq_len = 5)

In [18]:
x,y = next(batch_gen)

In [19]:
x

array([[ 0,  1,  2,  3,  4],
       [10, 11, 12, 13, 14]])

In [20]:
y

array([[ 1,  2,  3,  4,  5],
       [11, 12, 13, 14, 15]])

In [21]:
# RNN model
class CharRNN(nn.Module):
    def __init__(self, all_chars, num_hidden = 256, num_layers = 4, drop_prob = 0.5):
        
        super().__init__()
        
        self.drop_prob = drop_prob
        self.num_layers = num_layers
        self.num_hidden = num_hidden
        self.all_chars = all_chars
        
        self.decoder = dict(enumerate(all_chars))
        self.encoder = {char: i for i, char in decoder.items()}
        
        # Architecture
        self.lstm = nn.LSTM(len(all_chars), num_hidden, num_layers, dropout = drop_prob, batch_first = True)
        self.dropout = nn.Dropout(drop_prob)
        self.fc1 = nn.Linear(num_hidden, len(self.all_chars))
        
    def forward(self, x, hidden):
        
        lstm_out, hidden = self.lstm(x, hidden)
        drop_out = self.dropout(lstm_out)
        drop_out = drop_out.contiguous().view(-1, self.num_hidden)
        output = self.fc1(drop_out)
        
        return output, hidden
    
    def hidden_state(self, batch_size):
        
        hidden = (torch.zeros(self.num_layers, batch_size, self.num_hidden).cuda(),
                  torch.zeros(self.num_layers, batch_size, self.num_hidden).cuda())
        
        return hidden

In [22]:
model = CharRNN(all_chars = all_char, num_hidden = 512, num_layers = 3, drop_prob = 0.5)

In [23]:
model

CharRNN(
  (lstm): LSTM(84, 512, num_layers=3, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=512, out_features=84, bias=True)
)

In [24]:
params = []

for p in model.parameters():
    params.append(int(p.numel()))

In [25]:
sum(params)
# have some of params roughly equal to size of text data set to prevent over fitting

5470292

In [28]:
# hyperparams
lr = 0.001
train_per = 0.9
optimizer = torch.optim.Adam(model.parameters(), lr = lr)
criterion = nn.CrossEntropyLoss()
train_ind = int(len(encoded_text) * (train_per))

In [29]:
train_ind

4901048

In [30]:
train_data = encoded_text[:train_ind]
test_data = encoded_text[train_ind:]

In [31]:
train_data[:50]

array([81, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
       35, 35, 35, 35, 35, 16, 81, 35, 35, 73, 59, 31,  0, 35, 33,  1, 54,
       59, 28, 21, 58, 35, 71, 59, 28,  1, 58, 72, 59, 28, 21, 35, 17])

In [32]:
# training hyperparams
epochs = 20
batch_size = 100
seq_len = 100
t = 0
num_char = max(encoded_text) + 1

In [33]:
num_char

84

In [34]:
# Train the model
model.train()
model.cuda()

for i in range(epochs):
    
    hidden = model.hidden_state(batch_size)
    
    for x,y in gen_batch(train_data, batch_size, seq_len):
        
        t += 1
        x = one_hot_enc(x, num_char)
        
        inputs = torch.from_numpy(x)
        targets = torch.from_numpy(y)
        inputs = inputs.cuda()
        targets = targets.cuda()
            
        hidden = tuple([state.data for state in hidden])
        
        model.zero_grad()
        
        lstm_out, hidden = model.forward(inputs, hidden)
        loss = criterion(lstm_out, targets.view(batch_size * seq_len).long())
        
        loss.backward()
        
        # Avoid exploding gradient
        nn.utils.clip_grad_norm_(model.parameters(), max_norm = 5)
        
        optimizer.step()
        
        # validation step
        if t % 25 == 0:
            
            val_hidden = model.hidden_state(batch_size)
            val_losses = []
            model.eval()
            
            for x,y in gen_batch(test_data, batch_size, seq_len):
                
                x = one_hot_enc(x,num_char)

                inputs = torch.from_numpy(x)
                targets = torch.from_numpy(y)
                inputs = inputs.cuda()
                targets = targets.cuda()
                    
                val_hidden = tuple([state.data for state in val_hidden])
                
                lstm_out, val_hidden = model.forward(inputs, val_hidden)
                val_loss = criterion(lstm_out, targets.view(batch_size * seq_len).long())
        
                val_losses.append(val_loss.item())
            
            # Reset to training 
            model.train()
            
            print(f"epoch: {i} | step: {t} | val loss {val_loss.item()}")

epoch: 0 | step: 25 | val loss 3.2006781101226807
epoch: 0 | step: 50 | val loss 3.1942505836486816
epoch: 0 | step: 75 | val loss 3.196668863296509
epoch: 0 | step: 100 | val loss 3.1933629512786865
epoch: 0 | step: 125 | val loss 3.163191318511963
epoch: 0 | step: 150 | val loss 3.0347700119018555
epoch: 0 | step: 175 | val loss 2.920569896697998
epoch: 0 | step: 200 | val loss 2.7438316345214844
epoch: 0 | step: 225 | val loss 2.5890860557556152
epoch: 0 | step: 250 | val loss 2.4908289909362793
epoch: 0 | step: 275 | val loss 2.401627779006958
epoch: 0 | step: 300 | val loss 2.3215043544769287
epoch: 0 | step: 325 | val loss 2.2386534214019775
epoch: 0 | step: 350 | val loss 2.181492567062378
epoch: 0 | step: 375 | val loss 2.1380929946899414
epoch: 0 | step: 400 | val loss 2.0923383235931396
epoch: 0 | step: 425 | val loss 2.055506467819214
epoch: 0 | step: 450 | val loss 2.0319364070892334
epoch: 0 | step: 475 | val loss 1.994659185409546
epoch: 1 | step: 500 | val loss 1.9593356

epoch: 8 | step: 4025 | val loss 1.3442476987838745
epoch: 8 | step: 4050 | val loss 1.342037320137024
epoch: 8 | step: 4075 | val loss 1.3453288078308105
epoch: 8 | step: 4100 | val loss 1.3479368686676025
epoch: 8 | step: 4125 | val loss 1.3404797315597534
epoch: 8 | step: 4150 | val loss 1.3338595628738403
epoch: 8 | step: 4175 | val loss 1.3303786516189575
epoch: 8 | step: 4200 | val loss 1.3415168523788452
epoch: 8 | step: 4225 | val loss 1.333423376083374
epoch: 8 | step: 4250 | val loss 1.3387411832809448
epoch: 8 | step: 4275 | val loss 1.3405542373657227
epoch: 8 | step: 4300 | val loss 1.339881181716919
epoch: 8 | step: 4325 | val loss 1.3433884382247925
epoch: 8 | step: 4350 | val loss 1.3436346054077148
epoch: 8 | step: 4375 | val loss 1.3380862474441528
epoch: 8 | step: 4400 | val loss 1.336854100227356
epoch: 9 | step: 4425 | val loss 1.3353625535964966
epoch: 9 | step: 4450 | val loss 1.3311353921890259
epoch: 9 | step: 4475 | val loss 1.3346662521362305
epoch: 9 | step:

epoch: 16 | step: 7950 | val loss 1.3134551048278809
epoch: 16 | step: 7975 | val loss 1.3046443462371826
epoch: 16 | step: 8000 | val loss 1.3109959363937378
epoch: 16 | step: 8025 | val loss 1.3143054246902466
epoch: 16 | step: 8050 | val loss 1.315353512763977
epoch: 16 | step: 8075 | val loss 1.3089423179626465
epoch: 16 | step: 8100 | val loss 1.3067079782485962
epoch: 16 | step: 8125 | val loss 1.3147329092025757
epoch: 16 | step: 8150 | val loss 1.3084266185760498
epoch: 16 | step: 8175 | val loss 1.3107188940048218
epoch: 16 | step: 8200 | val loss 1.314167857170105
epoch: 16 | step: 8225 | val loss 1.3164421319961548
epoch: 16 | step: 8250 | val loss 1.3191379308700562
epoch: 16 | step: 8275 | val loss 1.3137906789779663
epoch: 16 | step: 8300 | val loss 1.3177214860916138
epoch: 16 | step: 8325 | val loss 1.3197646141052246
epoch: 17 | step: 8350 | val loss 1.315169095993042
epoch: 17 | step: 8375 | val loss 1.3153284788131714
epoch: 17 | step: 8400 | val loss 1.3186155557632

In [35]:
# converges around a 1.30 loss
# premise: loss does not reduce after a whole epoch
# save model
name = 'CharRNN_hidden512_layers3_shakes.net'

In [36]:
torch.save(model.state_dict(), name)

In [37]:
# Prediction 
def predict_next(model, char, hidden = None, k = 1):
    
    encoded_text = model.encoder[char]
    encoded_text = np.array([[encoded_text]])
    encoded_text = one_hot_enc(encoded_text, len(model.all_chars))
    
    inputs = torch.from_numpy(encoded_text)
    inputs = inputs.cuda()
    
    hidden = tuple([state.data for state in hidden])
    
    lstm_out, hidden = model(inputs, hidden)
    
    probs = F.softmax(lstm_out, dim = 1).data
    probs = probs.cpu()
    
    probs, index_pos = probs.topk(k)
    index_pos = index_pos.numpy().squeeze()
    probs = probs.numpy().flatten()
    probs = probs/probs.sum()
    
    char = np.random.choice(index_pos, p = probs)
    
    return model.decoder[char], hidden

In [38]:
# Generate text
def generate_text(model, size, seed = 'The', k = 1):
    
    model.cuda()
    model.eval()
    
    output_chars = [c for c in seed]
    hidden = model.hidden_state(1)
    
    for char in seed:
        char, hidden = predict_next(model, char, hidden, k = k)
    
    output_chars.append(char)
    
    for i in range(size):
        char, hidden = predict_next(model, output_chars[-1], hidden, k = k)
        output_chars.append(char)
    
    return ''.join(output_chars)

In [32]:
# Load model
# model.load_state_dict(torch.load(name))
# model.eval()

CharRNN(
  (lstm): LSTM(84, 512, num_layers=3, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=512, out_features=84, bias=True)
)

In [39]:
# Test
print(generate_text(model, 1000, seed = 'The ', k = 3))

The PRINCE HERRY and MARCIUS and SOMERSET
  SHYLOCK. It is no more to strike me, and the warding
    To this, I shall see, that I have set him,
    And to the world was stay with him on this,
    With sorrow that I would have seen thy state,
    With the service of merry trial and
    That the subject of this body were
    When the securest thing of his tongue does.
    The sun are strange and too and this doth shake
    The way and blessed with a field of heaven
    As that the world will stay again.
                                             Exit, worse than her horse.  
  CORIOLANUS. The way, a service, thanks, and see how she hath.
    I have sent to me.
  CLEOPATRA. Welcome, my lady,
    And that thou wast not so made and set thee.  
    I am an enemy, and we say, they say,
    That whom thy sons, while he shall strong and strange
    To stand to seek the company and strong,
    The market or a false of the conscience,
    Whose truth and hand of mine are striking thine.
    Tho