This notebook demonstrates learning several sentences using Elman RNN.

Original source code: https://github.com/gabrielloye/RNN-walkthrough/blob/master/main.ipynb

In [63]:
import torch
from torch import nn

import numpy as np
import os

In [3]:
text = ['hey how are you', 'good i am fine', 'have a nice day']

In [4]:
chars = set(''.join(text))
int2char = dict(enumerate(chars))
char2int = {char: ind for ind, char in int2char.items()}

In [13]:
print(len(chars), char2int)

17 {'o': 0, 'v': 1, 'd': 2, 'i': 3, 'f': 4, 'e': 5, ' ': 6, 'h': 7, 'u': 8, 'w': 9, 'n': 10, 'r': 11, 'm': 12, 'a': 13, 'g': 14, 'y': 15, 'c': 16}


In [6]:
maxlen = len(max(text, key=len))
maxlen

15

In [7]:
for i in range(len(text)):
    while len(text[i]) < maxlen:
        text[i] += ' '
text

['hey how are you', 'good i am fine ', 'have a nice day']

In [8]:
input_seq = []
target_seq = []

for i in range(len(text)):
    input_seq.append(text[i][:-1])
    
    target_seq.append(text[i][1:])
    print("{} -> {}".format(input_seq[i], target_seq[i]))

hey how are yo -> ey how are you
good i am fine -> ood i am fine 
have a nice da -> ave a nice day


In [9]:
for i in range(len(text)):
    input_seq[i] = [char2int[character] for character in input_seq[i]]
    target_seq[i] = [char2int[character] for character in target_seq[i]]

In [11]:
dict_size = len(char2int)
seq_len = maxlen - 1
batch_size = len(text)

def one_hot_encode(sequence, dict_size, seq_len, batch_size):
    features = np.zeros((batch_size, seq_len, dict_size), dtype=np.float32)
    
    for i in range(batch_size):
        for u in range(seq_len):
            features[i, u, sequence[i][u]] = 1
    return features

input_seq = one_hot_encode(input_seq, dict_size, seq_len, batch_size)
input_seq.shape

(3, 14, 17)

In [14]:
input_seq = torch.from_numpy(input_seq)
target_seq = torch.Tensor(target_seq)

In [15]:
input_seq

tensor([[[0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.

In [16]:
target_seq

tensor([[ 5., 15.,  6.,  7.,  0.,  9.,  6., 13., 11.,  5.,  6., 15.,  0.,  8.],
        [ 0.,  0.,  2.,  6.,  3.,  6., 13., 12.,  6.,  4.,  3., 10.,  5.,  6.],
        [13.,  1.,  5.,  6., 13.,  6., 10.,  3., 16.,  5.,  6.,  2., 13., 15.]])

In [17]:
is_cuda = torch.cuda.is_available()

if is_cuda:
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU not available, CPU used")

GPU not available, CPU used


In [22]:
input_seq = input_seq.to(device)

In [85]:
class ModelWithRNN(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers):
        super().__init__()

        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        # FIXME use standard order (seq_len, batch, input_size)
        self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True)   
        self.fc = nn.Linear(hidden_dim, output_size)
    
    def forward(self, x):
        batch_size = x.size(0)
        hidden = self.init_hidden(batch_size)

        out, hidden = self.rnn(x) #, hidden)
        
        out = out.contiguous().view(-1, self.hidden_dim)
        out = self.fc(out)
        
        return out, hidden
    
    def init_hidden(self, batch_size):
        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(device)
        return hidden

In [86]:
model = ModelWithRNN(input_size=dict_size, output_size=dict_size, hidden_dim=12, n_layers=1)
model.to(device)

ModelWithRNN(
  (rnn): RNN(17, 12, batch_first=True)
  (fc): Linear(in_features=12, out_features=17, bias=True)
)

In [42]:
n_epochs = 100
lr = 0.01

In [43]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [54]:
def train(model, optimizer, criterion, n_epochs):
    for epoch in range(1, n_epochs + 1):
        optimizer.zero_grad()
        output, hidden = model(input_seq)
        loss = criterion(output, target_seq.view(-1).long())
        loss.backward()
        optimizer.step()

        if epoch % 10 == 0:
            print('Epoch: {:3d}/{}, train loss: {:.4f}'.format(epoch, n_epochs, loss.item()))

In [55]:
train(model, optimizer, criterion, n_epochs)

Epoch:  10/100, loss: 0.0450
Epoch:  20/100, loss: 0.0443
Epoch:  30/100, loss: 0.0437
Epoch:  40/100, loss: 0.0432
Epoch:  50/100, loss: 0.0427
Epoch:  60/100, loss: 0.0422
Epoch:  70/100, loss: 0.0418
Epoch:  80/100, loss: 0.0413
Epoch:  90/100, loss: 0.0410
Epoch: 100/100, loss: 0.0406


In [45]:
def predict(model, character):
    character = np.array([[char2int[c] for c in character]])
    character = one_hot_encode(character, dict_size, character.shape[1], 1)
    character = torch.from_numpy(character)
    character.to(device)
    
    out, hidden = model(character)

    prob = nn.functional.softmax(out[-1], dim=0).data
    char_ind = torch.max(prob, dim=0)[1].item()

    return int2char[char_ind], hidden

In [46]:
def sample(model, out_len, start):
    model.eval()
    start = start.lower()
    chars = [ch for ch in start]
    size = out_len - len(chars)
    for ii in range(size):
        char, _ = predict(model, chars)
        chars.append(char)
    return ''.join(chars)

In [47]:
sample(model, 15, 'hey')

'hey how are you'

In [48]:
sample(model, 15, 'good')

'good i am fine '

In [49]:
sample(model, 15, 'have')

'have a nice day'

In [50]:
sample(model, 15, 'hi')

'hiy how are you'