## Lecture 12: 

* We are going to do RNN in this session. 
* Very powerful concept. Hidden state of one cell is used as an input for another cell in addition to the original input. 


For `RNN` the arguments that are passed are:

* `input_size`
* `hidden_size`

In [1]:
# Importing stock libraries

import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset

### Experiment 1: 

* Feeding `H E L L O` to RNN
* We will use 1 hot vector encoding for this


* $h = [1,0,0,0]$
* $e = [0,1,0,0]$
* $l = [0,0,1,0]$
* $o = [0,0,0,1]$

This is feeded to the network/cell one by one. Hence,
* `input size = 4`
* `hidden size = 2`


In [14]:
h = [1,0,0,0]
e = [0,1,0,0]
l = [0,0,1,0]
o = [0,0,0,1]

In [25]:
# Create an RNN cell with the desited input size and hidden size

cell = torch.nn.RNN(input_size=4, hidden_size=2,batch_first=True)

# Creating the one letter input
h = [1,0,0,0]
inputs = torch.Tensor([[h]])
print('Input size', inputs.size())

# initialize the hidden state
# (num_layers*num_directios, batch, hidden_size)

hidden = torch.Tensor(torch.rand(1,1,2))

# Feed one element at a time
# after each step, hidden contains the hidden state
out, hidden = cell(inputs, hidden)
print('Out', out.data)
print('Type', out.dtype)
print('Size',out.size())
print('Hidden', hidden.data)

Input size torch.Size([1, 1, 4])
Out tensor([[[0.6362, 0.4687]]])
Type torch.float32
Size torch.Size([1, 1, 2])
Hidden tensor([[[0.6362, 0.4687]]])


In [24]:
# Create an RNN cell with the desited input size and hidden size, this time we are entering more than 1 charecter

cell = torch.nn.RNN(input_size=4, hidden_size=2,batch_first=True)

# hidden_size =2
# batch_size=1
# sequence_length=5

inputs = torch.Tensor([[h,e,l,l,o]])
print('Input size', inputs.size())

# initialize the hidden state
# (num_layers*num_directios, batch, hidden_size)

hidden = torch.Tensor(torch.rand(1,1,2))

# Feed one element at a time
# after each step, hidden contains the hidden state
out, hidden = cell(inputs, hidden)
print('Out', out.data)
print('Type', out.dtype)
print('Size',out.size())
print('Hidden', hidden.data)

Input size torch.Size([1, 5, 4])
Out tensor([[[ 0.4185, -0.8764],
         [ 0.5675,  0.2853],
         [ 0.3233, -0.7229],
         [ 0.2798, -0.4977],
         [ 0.2200, -0.6902]]])
Type torch.float32
Size torch.Size([1, 5, 2])
Hidden tensor([[[ 0.2200, -0.6902]]])


In [23]:
# Create an RNN cell with the desited input size and hidden size, this time we are entering more than 1 word. 

# hidden_size =2
# batch_size=2
# sequence_length=5

cell = torch.nn.RNN(input_size=4, hidden_size=2,batch_first=True)

# Creating the one letter input
inputs = torch.Tensor(
    [
        [h,e,l,l,o],
        [e,l,l,o,l],
        [l,l,e,e,l]
    ]
    )
print('Input size', inputs.size())

# initialize the hidden state
# (num_layers*num_directios, batch, hidden_size)

hidden = torch.Tensor(torch.rand(1,3,2))

# Feed one element at a time
# after each step, hidden contains the hidden state
out, hidden = cell(inputs, hidden)
print('Out', out.data)
print('Type', out.dtype)
print('Size',out.size())
print('Hidden', hidden.data)

Input size torch.Size([3, 5, 4])
Out tensor([[[-0.5312, -0.0883],
         [ 0.6064, -0.6325],
         [ 0.3647, -0.7075],
         [ 0.3591, -0.7897],
         [-0.5652, -0.8731]],

        [[ 0.7213,  0.2784],
         [ 0.6125, -0.4499],
         [ 0.4220, -0.6717],
         [-0.5384, -0.8517],
         [ 0.3825, -0.9386]],

        [[ 0.7266, -0.2211],
         [ 0.4819, -0.5763],
         [ 0.4050, -0.2343],
         [ 0.5114, -0.1682],
         [ 0.5114, -0.6541]]])
Type torch.float32
Size torch.Size([3, 5, 2])
Hidden tensor([[[-0.5652, -0.8731],
         [ 0.3825, -0.9386],
         [ 0.5114, -0.6541]]])


### Experiment 2:

* We will be feeding the string `hihell` to the network such that it gives us the output `ihello` basically predicting the next charter

* This is a sequence classification. 

In [100]:
# Creating a project to convert hihell -> ihello

# Data prepration

idx2char = ['h', 'i', 'e', 'l', 'o']

x_data = [0,1,0,2,3,3] #hihell

x_data = [[0, 1, 0, 2, 3, 3]]   # hihell
x_one_hot = [[[1, 0, 0, 0, 0],   # h 0
             [0, 1, 0, 0, 0],    # i 1
             [1, 0, 0, 0, 0],    # h 0
             [0, 0, 1, 0, 0],    # e 2
             [0, 0, 0, 1, 0],    # l 3
             [0, 0, 0, 1, 0]]]   # l 3

y_data = [1, 0, 2, 3, 3, 4]    # ihello


inputs = torch.Tensor(x_one_hot)
labels = torch.LongTensor(y_data)

In [96]:
# Parameters
num_classes =5
input_size =5 # One-hot size
hidden_size = 5 # output from the cell
batch_size=1 # one sentence
sequence_length=6
num_layers=1 # one layer run

In [97]:
class Model(torch.nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers, sequence_length):
        super(Model, self).__init__()
        self.num_classes = num_classes
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.sequence_length = sequence_length

        self.rnn = torch.nn.RNN(input_size=5, hidden_size=5, batch_first=True)

    def forward(self,x):
        h_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        x = x.view(x.size(0), self.sequence_length, self.input_size)
        outputs, hidden = self.rnn(x,h_0)
        return outputs.view(-1, num_classes)


model = Model(num_classes, input_size, hidden_size, num_layers ,sequence_length)
print(model)

Model(
  (rnn): RNN(5, 5, batch_first=True)
)


In [98]:
# Set loss and optimizer

criterion = torch.nn.CrossEntropyLoss()
optimus = torch.optim.SGD(model.parameters(), lr=0.1)

In [101]:
for epoch in range(100):
    outputs = model(inputs)
    optimus.zero_grad()
    loss = criterion(outputs, labels)
    loss.backward()
    optimus.step()
    _, idx = outputs.max(1)
    idx = idx.data.numpy()
    result_str = [idx2char[c] for c in idx.squeeze()]
    if epoch%10 == 0:
        print("epoch: %d, loss: %1.3f" % (epoch + 1, loss.item()))
        print("Predicted string: ", ''.join(result_str))

print("Learning finished!")
  
    


epoch: 1, loss: 1.689
Predicted string:  oeoooo
epoch: 11, loss: 1.448
Predicted string:  llllll
epoch: 21, loss: 1.314
Predicted string:  llllll
epoch: 31, loss: 1.202
Predicted string:  illlll
epoch: 41, loss: 1.115
Predicted string:  ilelll
epoch: 51, loss: 1.047
Predicted string:  ilelll
epoch: 61, loss: 0.993
Predicted string:  ihelll
epoch: 71, loss: 0.948
Predicted string:  ihelll
epoch: 81, loss: 0.909
Predicted string:  ihelll
epoch: 91, loss: 0.874
Predicted string:  ihelll
Learning finished!


### Experiment 3:

* We will be doing the same experiment as above but rather than using one hot embedding we will be using `embedding layer`. 

In [133]:
# Creating a project to convert hihell -> ihello

# Data prepration


x_data = [[0,1,0,2,3,3]] #hihell
y_data = [1,0,2,3,3,4] #ihello


inputs = torch.LongTensor(x_data)
labels = torch.LongTensor(y_data)

In [134]:
labels.size(0)

6

In [135]:
# Parameters
embedding_size = 10
num_classes =5
input_size =5 # One-hot size
hidden_size = 5 # output from the cell
batch_size=1 # one sentence
sequence_length=6
num_layers=1 # one layer run

In [136]:
class Model(torch.nn.Module):

    def __init__(self):
        super(Model, self).__init__()
        # self.num_classes = num_classes
        # self.input_size = input_size
        # self.hidden_size = hidden_size
        # self.num_layers = num_layers
        # self.sequence_length = sequence_length

        
        self.embedding = torch.nn.Embedding(input_size, embedding_size)
        self.rnn = torch.nn.RNN(input_size=embedding_size, hidden_size=5, batch_first=True)
        self.fc = torch.nn.Linear(hidden_size, num_classes)

    def forward(self,x):
        h_0 = torch.zeros(num_layers, x.size(0),hidden_size)
        emb = self.embedding(x)
        emb = emb.view(batch_size, sequence_length, -1)
        outputs, hidden = self.rnn(emb,h_0)
        return self.fc(outputs.view(-1, num_classes))


model = Model()
print(model)

Model(
  (embedding): Embedding(5, 10)
  (rnn): RNN(10, 5, batch_first=True)
  (fc): Linear(in_features=5, out_features=5, bias=True)
)


In [137]:
# Set loss and optimizer

criterion = torch.nn.CrossEntropyLoss()
optimus = torch.optim.Adam(model.parameters(), lr=0.1)

In [138]:
for epoch in range(100):
    outputs = model(inputs)
    optimus.zero_grad()
    loss = criterion(outputs, labels)
    loss.backward()
    optimus.step()
    _, idx = outputs.max(1)
    idx = idx.data.numpy()
    result_str = [idx2char[c] for c in idx.squeeze()]
    if epoch%5 == 0:
        print("epoch: %d, loss: %1.3f" % (epoch + 1, loss.item()))
        print("Predicted string: ", ''.join(result_str))

print("Learning finished!")

epoch: 1, loss: 1.808
Predicted string:  oooooo
epoch: 6, loss: 0.765
Predicted string:  ehello
epoch: 11, loss: 0.362
Predicted string:  ihello
epoch: 16, loss: 0.166
Predicted string:  ihello
epoch: 21, loss: 0.077
Predicted string:  ihello
epoch: 26, loss: 0.040
Predicted string:  ihello
epoch: 31, loss: 0.025
Predicted string:  ihello
epoch: 36, loss: 0.017
Predicted string:  ihello
epoch: 41, loss: 0.013
Predicted string:  ihello
epoch: 46, loss: 0.010
Predicted string:  ihello
epoch: 51, loss: 0.009
Predicted string:  ihello
epoch: 56, loss: 0.008
Predicted string:  ihello
epoch: 61, loss: 0.007
Predicted string:  ihello
epoch: 66, loss: 0.006
Predicted string:  ihello
epoch: 71, loss: 0.006
Predicted string:  ihello
epoch: 76, loss: 0.005
Predicted string:  ihello
epoch: 81, loss: 0.005
Predicted string:  ihello
epoch: 86, loss: 0.005
Predicted string:  ihello
epoch: 91, loss: 0.005
Predicted string:  ihello
epoch: 96, loss: 0.004
Predicted string:  ihello
Learning finished!
