##### What are Recurrent Neural Networks (RNN)?

* Trad NN's take fixed amount of data all in at once
* RNNs consume inputs one at a time in sequence
* At each step the RNN does a calculation before producing an output called the hidden state
* The hidden state is then combined with the next input in the sequence that produces another output
* The calculations from the prvious step form the hidden state aka context
* RNNs can be adapted to take in many or one inputs and output many or one or both

In [1]:
import torch
from torch import nn
import numpy as np

In [2]:
text = ['hey how are you','good i am fine','have a nice day']

# join the sentences together and extract the unique characters from the combined sentence
chars = set(''.join(text))
print('unique characters')
print(chars)
print('')

# create a dicitonary that maps integers to characters
int2char = dict(enumerate(chars))
print('int2char')
print(int2char)
print('')

# create another dictionary that maps characters to integers
char2int = {c: i for i, c in int2char.items()}
print('characters to integers')
print(char2int)

unique characters
{'o', 'a', 'e', 'y', 'f', 'n', 'h', 'm', 'v', 'g', 'r', 'i', 'w', 'd', 'c', ' ', 'u'}

int2char
{0: 'o', 1: 'a', 2: 'e', 3: 'y', 4: 'f', 5: 'n', 6: 'h', 7: 'm', 8: 'v', 9: 'g', 10: 'r', 11: 'i', 12: 'w', 13: 'd', 14: 'c', 15: ' ', 16: 'u'}

characters to integers
{'o': 0, 'a': 1, 'e': 2, 'y': 3, 'f': 4, 'n': 5, 'h': 6, 'm': 7, 'v': 8, 'g': 9, 'r': 10, 'i': 11, 'w': 12, 'd': 13, 'c': 14, ' ': 15, 'u': 16}


In [3]:
# find length of the longest string in our data
maxlen = len(max(text,key=len))
print('longest string length in dataset')
print(maxlen)

# hey how goes it is our longest sentence so we pad the other two sentences with whitespace equiv to that sent
for i in range(len(text)):
    while len(text[i]) < maxlen:
        text[i] += ' '
        
print(text)

longest string length in dataset
15
['hey how are you', 'good i am fine ', 'have a nice day']


In [4]:
# create lists that will hold our input and target seq
# remove last character of input seq and first character of target seq
input_seq = []
target_seq = []

for i in range(len(text)):
    # remove last character from input sequence
    input_seq.append(text[i][:-1])
    
    # remove first character from target sequence
    target_seq.append(text[i][1:])
    
print(input_seq)
print(target_seq)


['hey how are yo', 'good i am fine', 'have a nice da']
['ey how are you', 'ood i am fine ', 'ave a nice day']


In [5]:
# convert to seq of ints by mapping to char2int so we can one-hot encode

for i in range(len(text)):
    input_seq[i] = [char2int[char] for char in input_seq[i]]
    target_seq[i] = [char2int[char] for char in target_seq[i]]
    
print(input_seq)
print(target_seq)

[[6, 2, 3, 15, 6, 0, 12, 15, 1, 10, 2, 15, 3, 0], [9, 0, 0, 13, 15, 11, 15, 1, 7, 15, 4, 11, 5, 2], [6, 1, 8, 2, 15, 1, 15, 5, 11, 14, 2, 15, 13, 1]]
[[2, 3, 15, 6, 0, 12, 15, 1, 10, 2, 15, 3, 0, 16], [0, 0, 13, 15, 11, 15, 1, 7, 15, 4, 11, 5, 2, 15], [1, 8, 2, 15, 1, 15, 5, 11, 14, 2, 15, 13, 1, 3]]


In [7]:
dict_size = len(char2int)
seq_len = maxlen - 1
batch_size = len(text)

# one hot encode
def one_hot_encode(sequence, dict_size, seq_len, batch_size):
    # create a multi-dim array of zeros with desired output shape
    features = np.zeros((batch_size, seq_len, dict_size), dtype=np.float32)
    
    # replace the 0 at the relevant character index with a 1 to rep that char
    for i in range(batch_size):
        for u in range(seq_len):
            features[i, u, sequence[i][u]] = 1
    return features

# input shape --> (batchsize, sequence length, one-hot encoding size)
input_seq = one_hot_encode(input_seq, dict_size, seq_len, batch_size)
print("Input shape: {} --> (Batch Size, Sequence Length, One-Hot Encoding Size)".format(input_seq.shape))

Input shape: (3, 14, 17) --> (Batch Size, Sequence Length, One-Hot Encoding Size)


In [8]:
input_seq = torch.from_numpy(input_seq)
target_seq = torch.Tensor(target_seq)

In [9]:
# torch.cuda.is_available() checks and returns a Boolean true if a GPU is available
is_cuda = torch.cuda.is_available()

# if GPU available we'll set deviceto GPU
if is_cuda:
    device = torch.device('cuda')
    print('GPU is avaiable')
else:
    device = torch.device('cpu')
    print('Using CPU')

Using CPU


In [89]:
class Model(nn.Module):  
    def __init__(self, input_size, output_size, hidden_dim, n_layers):
        super(Model, self).__init__()
        
        # define some params
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
        # define the layers
        # RNN layer
        self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True)
        # fully connected layer
        self.fc = nn.Linear(hidden_dim, output_size)
        
        
    def forward(self, x):
        batch_size = x.size(0)
        
        # initialize the hidden state for first input using method defined below
        hidden = self.init_hidden(batch_size)
        
        # passing in the input and hidden state into the model and obtaining outputs
        out, hidden = self.rnn(x, hidden)
        
        # Reshaping the outputs such that it can be fit into the fully connected layer
        out = out.contiguous().view(-1, self.hidden_dim)
        out = self.fc(out)
        
        return out, hidden
    
    def init_hidden(self, batch_size):
        # this method generates the first hidden state of zeros which we'll use in th eforward pass
        # We'll send the tensor hoding the hidden state to the device we specified earlier as well
        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(device)
        return hidden



In [90]:
# instantiate the model with hyperparams
model = Model(input_size=dict_size, output_size=dict_size, hidden_dim=12, n_layers=1)
# We'll also se thte model to the device that we defined earlier (default is CPU)
model.to(device)

# Define hyper params
n_epochs = 100
lr = 0.01

# Define loss, optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [91]:
# training
for epoch in range(1, n_epochs + 1):
    optimizer.zero_grad() # clear existing grads from previous epoch
    input_seq.to(device)
    output, hidden = model(input_seq)
    loss = criterion(output, target_seq.view(-1).long())
    loss.backward()
    optimizer.step()
    
    if epoch % 10 == 0:
        print(f'Epoch: {epoch}/{n_epochs}......')
        print(f'loss: {loss.item()}')

Epoch: 10/100......
loss: 2.331434726715088
Epoch: 20/100......
loss: 2.006199359893799
Epoch: 30/100......
loss: 1.6283578872680664
Epoch: 40/100......
loss: 1.246556282043457
Epoch: 50/100......
loss: 0.910328209400177
Epoch: 60/100......
loss: 0.6482764482498169
Epoch: 70/100......
loss: 0.4624045193195343
Epoch: 80/100......
loss: 0.3413064777851105
Epoch: 90/100......
loss: 0.26413577795028687
Epoch: 100/100......
loss: 0.2127576768398285


In [92]:
def predict(mode, character):
    # one-hot encode our input to fit into the model
    char = np.array([[char2int[c] for c in character]])
    char = one_hot_encode(char, dict_size, char.shape[1], 1)
    char = torch.from_numpy(char)
    char.to(device)
    
    out, hidden = model(char)
    
    prob = nn.functional.softmax(out[-1], dim=0).data
    # take class with highest probability
    char_ind = torch.max(prob, dim=0)[1].item()
    
    return int2char[char_ind], hidden

    
def sample(model, out_len, start='hey'):
    model.eval()
    start = start.lower()
    chars = [ch for ch in start]
    size = out_len - len(chars)
    for ii in range(size):
        char, h = predict(model, chars)
        chars.append(char)
        
    return ''.join(chars)
    
      

In [93]:
sample(model, 15, 'good')

'good i am fine '