# RNN Examplecode

In [101]:
import torch
from torch import nn
import numpy as np


In [102]:
texts_list = ['hey how are you','good, i am fine','have a nice day']

# Join all the sentences together and extract the unique characters from the combined sentences.
chars = set(''.join(texts_list))

# Creating a dictionary that maps integers to the characters.
int2char = dict(enumerate(chars))

# Creating another dictionary that maps characters to integers.
char2int = {char: ind for ind, char in int2char.items()}

char2int


{'h': 0,
 'e': 1,
 'g': 2,
 ',': 3,
 'w': 4,
 'y': 5,
 'd': 6,
 'r': 7,
 'u': 8,
 'i': 9,
 'a': 10,
 'c': 11,
 'f': 12,
 'n': 13,
 ' ': 14,
 'o': 15,
 'm': 16,
 'v': 17}

In [103]:
maxlen = len(max(texts_list, key=len))
print(f"Longest string: {maxlen} characters")

Longest string: 15 characters


# Padding
Feeding training data in batches to speed up the training process, so need to ensure that each input sequence has equal size.

In [104]:
# Add whitespaces to sequences until the length matches the longest sentence.
for i in range(len(texts_list)):
    while len(texts_list[i])<maxlen:
        texts_list[i] += ' '

Make target is one time-step ahead of the Input data:

In [105]:
# Lists that will hold our input and target sequences.
input_seq = []
target_seq = []

for i in range(len(texts_list)):
    # Remove last character for input sequence.
    input_seq.append(texts_list[i][:-1])
    
    # Remove firsts character for target sequence
    target_seq.append(texts_list[i][1:])
    print("Input Sequence: {}\nTarget Sequence: {}".format(input_seq[i], target_seq[i]))

Input Sequence: hey how are yo
Target Sequence: ey how are you
Input Sequence: good, i am fin
Target Sequence: ood, i am fine
Input Sequence: have a nice da
Target Sequence: ave a nice day


Convert our input and target sequences to integers instead of characters for one-hot-encoding:

In [106]:
print(input_seq)
print(target_seq)

for i in range(len(texts_list)):
    input_seq[i] = [char2int[character] for character in input_seq[i]]
    target_seq[i] = [char2int[character] for character in target_seq[i]]

print(input_seq)
print(target_seq)

['hey how are yo', 'good, i am fin', 'have a nice da']
['ey how are you', 'ood, i am fine', 'ave a nice day']
[[0, 1, 5, 14, 0, 15, 4, 14, 10, 7, 1, 14, 5, 15], [2, 15, 15, 6, 3, 14, 9, 14, 10, 16, 14, 12, 9, 13], [0, 10, 17, 1, 14, 10, 14, 13, 9, 11, 1, 14, 6, 10]]
[[1, 5, 14, 0, 15, 4, 14, 10, 7, 1, 14, 5, 15, 8], [15, 15, 6, 3, 14, 9, 14, 10, 16, 14, 12, 9, 13, 1], [10, 17, 1, 14, 10, 14, 13, 9, 11, 1, 14, 6, 10, 5]]


In [107]:
# Number of unique characters our texts.
dict_size = len(char2int)
# Length of the sequences feeded into the model.
seq_len = maxlen - 1
# Number of sentences to feed into the model as a batch.
batch_size = len(texts_list)

def one_hot_encode(sequence, dict_size, seq_len, batch_size):
    # Creating a multi-dimensional array of zeros with the desired output shape
    features = np.zeros((batch_size, seq_len, dict_size), dtype=np.float32)
    
    # Replacing the 0 at the relevant character index with a 1 to represent that character
    for i in range(batch_size):
        for u in range(seq_len):
            features[i, u, sequence[i][u]] = 1
    return features

In [108]:
input_seq = one_hot_encode(input_seq, dict_size, seq_len, batch_size)
print(f"{input_seq.shape} (Batch Size, Sequence Length, One-Hot Encoding Size)")
print("One encoded sequence example:")
print(input_seq[0])

(3, 14, 18) (Batch Size, Sequence Length, One-Hot Encoding Size)
One encoded sequence example:
[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]]


In [109]:
# Cast to torch tensor.
input_seq = torch.from_numpy(input_seq)
target_seq = torch.Tensor(target_seq)

In [110]:
# torch.cuda.is_available() checks and returns a Boolean True if a GPU is available, else it'll return False
is_cuda = torch.cuda.is_available()

# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
if is_cuda:
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU not available, CPU used")

GPU not available, CPU used


In [111]:
class RNNModel(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers):
        super(RNNModel, self).__init__()

        # Defining model parameters.
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        #Defining layer achitecture.
        # RNN Layer.
        self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True)   
        # Fully connected layer that converts RNN output to desired output shape.
        self.fc = nn.Linear(hidden_dim, output_size)
    
    def forward(self, x):
        batch_size = x.size(0)
        # zero-initialized hidden state.
        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(device)
        # Passing in the input and hidden state into the RNN layer and obtaining outputs.
        out, hidden = self.rnn(x, hidden)
        # Reshaping the outputs such that it can be fit into the fully connected layer.
        out = out.contiguous().view(-1, self.hidden_dim)
        out = self.fc(out)
        return out, hidden


In [112]:
# Instantiate the model with hyperparameters.
model = RNNModel(input_size=dict_size, output_size=dict_size, hidden_dim=12, n_layers=1)
# Set the model to the device that we defined earlier (default is CPU).
model = model.to(device)

# Define some other hyperparameters.
n_epochs = 500
lr=0.01
cross_loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [113]:
# Training Run.
input_seq = input_seq.to(device)
for epoch in range(1, n_epochs + 1):
    # Clears existing gradients from previous epoch
    optimizer.zero_grad()
    # Feed data to model.
    output, hidden = model(input_seq)
    # Compute loss.
    loss = cross_loss(output, target_seq.view(-1).long())
    # Backpropagation and calculate gradients.
    loss.backward() 
    # Updates the weights accordingly.
    optimizer.step() 
    
    if epoch%10 == 0:
        print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
        print("Loss: {:.4f}".format(loss.item()))

Epoch: 10/500............. Loss: 2.5097
Epoch: 20/500............. Loss: 2.2044
Epoch: 30/500............. Loss: 1.7965
Epoch: 40/500............. Loss: 1.3074
Epoch: 50/500............. Loss: 0.8871
Epoch: 60/500............. Loss: 0.5864
Epoch: 70/500............. Loss: 0.3921
Epoch: 80/500............. Loss: 0.2719
Epoch: 90/500............. Loss: 0.1980
Epoch: 100/500............. Loss: 0.1508
Epoch: 110/500............. Loss: 0.1207
Epoch: 120/500............. Loss: 0.1013
Epoch: 130/500............. Loss: 0.0882
Epoch: 140/500............. Loss: 0.0789
Epoch: 150/500............. Loss: 0.0720
Epoch: 160/500............. Loss: 0.0668
Epoch: 170/500............. Loss: 0.0626
Epoch: 180/500............. Loss: 0.0593
Epoch: 190/500............. Loss: 0.0566
Epoch: 200/500............. Loss: 0.0543
Epoch: 210/500............. Loss: 0.0524
Epoch: 220/500............. Loss: 0.0508
Epoch: 230/500............. Loss: 0.0494
Epoch: 240/500............. Loss: 0.0481
Epoch: 250/500...........

In [114]:
def predict(model, character):
    # One-hot encoding our input to fit into the model.
    character = np.array([[char2int[c] for c in character]])
    character = one_hot_encode(character, dict_size, character.shape[1], 1)
    character = torch.from_numpy(character)
    # Feed to model.
    out, hidden = model(character)
    # Softmax activation to get propabilities.
    prob = nn.functional.softmax(out[-1], dim=0).data
    # Taking the class with the highest probability score from the output.
    char_ind = torch.max(prob, dim=0)[1].item()

    return int2char[char_ind], hidden

In [115]:
def sample(model, out_len, start='hey'):
    # eval mode (turn off optimization and gradient updates).
    model.eval() 
    # All small caps.
    start = start.lower()
    # First off, separate the starting characters that were given.
    chars = [ch for ch in start]
    size = out_len - len(chars)
    # Now pass in the previous characters and get a new one.
    for ii in range(size):
        char, h = predict(model, chars)
        chars.append(char)

    return ''.join(chars)

In [119]:
sample(model, 15, 'nice')

'nice a nice day'