# Learning the sequence "hello" one letter at a time

In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable

In [2]:
# x and y data. The task is for the rnn to learn the sequence "hello" (or rather, produce the next corresponding letter in the sequence hello, since we aren't feeding the output of the lstm itself as the next input, are we?)
idx2char = ['h', 'e', 'l', 'o']
x_data = [0, 1, 2, 2]
y_data = [1, 2, 2, 3]
one_hot_lookup = [[1, 0, 0, 0],
                  [0, 1, 0, 0],
                  [0, 0, 1, 0],
                  [0, 0, 0, 1]]
# converting data using one_hot_lookup tables (this is sort of a foundation before we move to Embedding Layers)
x_data_ohe = [one_hot_lookup[x] for x in x_data]

In [3]:
# create nodes on our computational graph
inputs = Variable(torch.Tensor(x_data_ohe))
labels = Variable(torch.LongTensor(y_data))

In [4]:
# setting hyperparams
num_classes = 4
input_size = 4
hidden_size = 4 # we don't want to feed the hidden tensor to a linear layer to scale up/down the output to ohe dimensions, since this task is easy enough.
batch_size = 1 # a single slice of bread
seq_len = 1 # we are breaking up the bread and feeding it in letter by letter (1st case in previous notebook)
num_layers = 1

In [5]:
#Creating our model
class RecurrentModel(nn.Module):
    def __init__(self):
        super(RecurrentModel, self).__init__()
        self.rnn = nn.RNN(input_size=input_size, hidden_size=hidden_size, batch_first=True)
    
    def forward(self, hidden, x):
        # reshape input to be batch first.
        x = x.view(batch_size, seq_len, input_size)
        out, hidden = self.rnn(x, hidden)
        return hidden, out.view(-1, num_classes) # would be [1 x 4] in our case
    
    def init_hidden(self):
        return Variable(torch.zeros(num_layers, batch_size, hidden_size))

In [6]:
# instantiate model, loss, and optimizer
recurrent_model = RecurrentModel()
print(recurrent_model)

loss_criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(recurrent_model.parameters(), lr=0.1)

RecurrentModel(
  (rnn): RNN(4, 4, batch_first=True)
)


In [7]:
# train model
for epoch in range(100):
    optimizer.zero_grad()
    loss = 0
    hidden = recurrent_model.init_hidden()
    
    print("predicted string: ", end="")
    for inp, label in zip(inputs, labels):
        hidden, output = recurrent_model(hidden, inp)
        val, idx = output.max(1)
        print(idx2char[idx.data[0]], end="")
        loss += loss_criterion(output, torch.LongTensor([label]))
    print("")
    print("Epoch: {}, loss: {}\n\n".format(epoch + 1, loss))
    loss.backward()
    optimizer.step()
    
print("Learning finished")

predicted string: oloo
Epoch: 1, loss: 5.427931785583496


predicted string: lloo
Epoch: 2, loss: 4.51144552230835


predicted string: llll
Epoch: 3, loss: 3.9218273162841797


predicted string: llll
Epoch: 4, loss: 3.5491323471069336


predicted string: elll
Epoch: 5, loss: 3.2913217544555664


predicted string: elll
Epoch: 6, loss: 3.0916199684143066


predicted string: elll
Epoch: 7, loss: 2.918445110321045


predicted string: elll
Epoch: 8, loss: 2.745919704437256


predicted string: elll
Epoch: 9, loss: 2.5968942642211914


predicted string: elll
Epoch: 10, loss: 2.4741508960723877


predicted string: elll
Epoch: 11, loss: 2.37528395652771


predicted string: elll
Epoch: 12, loss: 2.2958881855010986


predicted string: elll
Epoch: 13, loss: 2.2356719970703125


predicted string: elll
Epoch: 14, loss: 2.1860783100128174


predicted string: elll
Epoch: 15, loss: 2.14987850189209


predicted string: elll
Epoch: 16, loss: 2.120878219604492


predicted string: elll
Epoch: 17, loss: 2.0