<a href="https://colab.research.google.com/github/akshatjain2k/Data-Science-NLP/blob/Amey/SimpleRNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from torch import nn
import numpy as np

In [2]:
text = ['hey how are you','good i am fine','have a nice day']

# All Caharcters
chars = set(''.join(text))

int2char = dict(enumerate(chars))

char2int = {char: i for i, char in int2char.items()}
char2int

{'i': 0,
 'm': 1,
 'a': 2,
 'e': 3,
 'h': 4,
 'y': 5,
 'w': 6,
 'c': 7,
 'n': 8,
 ' ': 9,
 'v': 10,
 'o': 11,
 'r': 12,
 'u': 13,
 'g': 14,
 'd': 15,
 'f': 16}

## Padding

In [3]:
# Finding Longest sequence
maxlen = len(max(text,key =len))
maxlen

15

In [4]:
# Adding ' ' until it matches the maxlen
for i in range(len(text)):
  while len(text[i])<maxlen:
    text[i] = text[i] + ' '

In [5]:
text

['hey how are you', 'good i am fine ', 'have a nice day']

In [6]:
# Input & Target split

input_seq = []
target_seq = []

for i in range(len(text)):
  input_seq.append(text[i][:-1])

for i in range(len(text)):
  target_seq.append(text[i][1:])

In [7]:
input_seq , target_seq

(['hey how are yo', 'good i am fine', 'have a nice da'],
 ['ey how are you', 'ood i am fine ', 'ave a nice day'])

## One hot Encoding

In [8]:
for i in range(len(text)):
  input_seq[i] = [char2int[char] for char in input_seq[i]]
  target_seq[i] = [char2int[char] for char in target_seq[i]]

In [9]:
input_seq , target_seq

([[4, 3, 5, 9, 4, 11, 6, 9, 2, 12, 3, 9, 5, 11],
  [14, 11, 11, 15, 9, 0, 9, 2, 1, 9, 16, 0, 8, 3],
  [4, 2, 10, 3, 9, 2, 9, 8, 0, 7, 3, 9, 15, 2]],
 [[3, 5, 9, 4, 11, 6, 9, 2, 12, 3, 9, 5, 11, 13],
  [11, 11, 15, 9, 0, 9, 2, 1, 9, 16, 0, 8, 3, 9],
  [2, 10, 3, 9, 2, 9, 8, 0, 7, 3, 9, 15, 2, 5]])

In [10]:
# Vocabulary
vocab = len(char2int)

# Same for all
seq_len = maxlen-1

# number of sentences
batch_size = len(text)

In [11]:
def one_hot_encode(sequence, vocab, seq_len, batch_size):
    # Creating a multi-dimensional array of zeros
    features = np.zeros((batch_size, seq_len, vocab), dtype=np.float32)

    # Replacing 0 at character index to 1
    for i in range(batch_size):
      for u in range(seq_len):
        features[i, u, sequence[i][u]] = 1
    return features


In [12]:
input_seq = one_hot_encode(input_seq,vocab,seq_len,batch_size)
input_seq

array([[[0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0.],
        [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
         0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
         0.],
        [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
         0.],
        [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
         0.],
        [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0.

In [13]:
input_seq = torch.from_numpy(input_seq)
target_seq = torch.Tensor(target_seq)

In [14]:
is_cuda = torch.cuda.is_available()
device = torch.device("cuda")

In [15]:
class Model(nn.Module):
  def __init__(self, input_size, output_size, hidden_dim, n_layers):
    super(Model, self).__init__()

    self.hidden_dim = hidden_dim
    self.n_layers = n_layers

    # RNN layer
    self.rnn = nn.RNN(input_size,hidden_dim,n_layers,batch_first=True)

    # Fully Connected Layer
    self.fc = nn.Linear(hidden_dim,output_size)

  def forward(self,x):
    batch_size = x.size(0)

    hidden = self.init_hidden(batch_size)

    out, hidden = self.rnn(x,hidden)

    out = out.contiguous().view(-1,self.hidden_dim)
    out = self.fc(out)

    return out,hidden

  def init_hidden(self, batch_size):
    hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim)
    return hidden

      

In [16]:
# Instantiate the model
model = Model(input_size=vocab, output_size=vocab, hidden_dim=12, n_layers=1)

# Define hyperparameters
n_epochs = 200
lr=0.01

# Define Loss, Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [17]:
# Training
for epoch in range(1, n_epochs + 1):
    # Clears existing gradients from previous epoch
    optimizer.zero_grad()
    input_seq.to(device)
    output, hidden = model(input_seq)
    loss = criterion(output, target_seq.view(-1).long())
    # Does backpropagation and calculates gradients
    loss.backward()
    # Updates the weights 
    optimizer.step() 
    
    if epoch%10 == 0:
        print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
        print("Loss: {}".format(loss.item()))

Epoch: 10/200............. Loss: 2.442443370819092
Epoch: 20/200............. Loss: 2.0461201667785645
Epoch: 30/200............. Loss: 1.6283056735992432
Epoch: 40/200............. Loss: 1.2083864212036133
Epoch: 50/200............. Loss: 0.8466749787330627
Epoch: 60/200............. Loss: 0.5810109972953796
Epoch: 70/200............. Loss: 0.40184617042541504
Epoch: 80/200............. Loss: 0.2845492660999298
Epoch: 90/200............. Loss: 0.2101481556892395
Epoch: 100/200............. Loss: 0.16252335906028748
Epoch: 110/200............. Loss: 0.13149338960647583
Epoch: 120/200............. Loss: 0.11070019751787186
Epoch: 130/200............. Loss: 0.09623147547245026
Epoch: 140/200............. Loss: 0.08580297976732254
Epoch: 150/200............. Loss: 0.07802615314722061
Epoch: 160/200............. Loss: 0.07204810529947281
Epoch: 170/200............. Loss: 0.06733028590679169
Epoch: 180/200............. Loss: 0.06357487291097641
Epoch: 190/200............. Loss: 0.0608303844