In [46]:
import torch
from torch import nn

taken from : https://blog.floydhub.com/a-beginners-guide-on-recurrent-neural-networks-with-pytorch/

In [47]:
import numpy as np

In [48]:
texts = ['hey how are you', 'good I am fine', 'have a nice day']
chars = set(''.join(texts))
chars

{' ',
 'I',
 'a',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'm',
 'n',
 'o',
 'r',
 'u',
 'v',
 'w',
 'y'}

In [49]:
int2char = dict(enumerate(chars))

In [50]:
char2int = {char : ind for ind,char in int2char.items()}

In [51]:
max([len(t) for t in texts])

15

looking at padding

In [52]:

maxlen = max([len(t) for t in texts])

In [53]:
maxlen

15

In [54]:
pad_text = []
for text in texts:
    length_text = len(text)
    if length_text < maxlen:
        text += (maxlen - length_text)* ' '
    pad_text.append(text)


In [55]:
pad_text

['hey how are you', 'good I am fine ', 'have a nice day']

In [56]:
len(pad_text)

3

In [57]:
input_seq = []
output_seq = []

for i in pad_text:
    print(i)
    input_seq.append(i[:-1])
    output_seq.append(i[1:])


hey how are you
good I am fine 
have a nice day


In [58]:
input_seq[2]

'have a nice da'

In [59]:
output_seq

['ey how are you', 'ood I am fine ', 'ave a nice day']

In [60]:
for i in range(len(pad_text)):
    input_seq[i] = [char2int[character] for character in input_seq[i]]
    output_seq[i] = [char2int[character] for character in output_seq[i]]
    #input_seq[i] = [char2int[character] for character in input_seq[i]]
    #target_seq[i] = [char2int[character] for character in target_seq[i]]

In [61]:
input_seq, output_seq

([[10, 15, 1, 11, 10, 17, 5, 11, 0, 12, 15, 11, 1, 17],
  [3, 17, 17, 6, 11, 16, 11, 0, 2, 11, 13, 8, 7, 15],
  [10, 0, 14, 15, 11, 0, 11, 7, 8, 4, 15, 11, 6, 0]],
 [[15, 1, 11, 10, 17, 5, 11, 0, 12, 15, 11, 1, 17, 9],
  [17, 17, 6, 11, 16, 11, 0, 2, 11, 13, 8, 7, 15, 11],
  [0, 14, 15, 11, 0, 11, 7, 8, 4, 15, 11, 6, 0, 1]])

In [62]:
dict_size = len(char2int)
batch_size = len(pad_text)
seq_len = maxlen -1

def one_hot_encode(sequence, dict_size, seq_len, batch_size):
    features = np.zeros((batch_size, seq_len, dict_size),dtype=np.float32)
    
    for i in range(batch_size):
        #print(f"{i}\n")
        for u in range(seq_len):
                #print(f"{u}")
                features[i,u,sequence[i][u]] = 1
    return features

In [63]:
input_seq = one_hot_encode(input_seq, dict_size, seq_len, batch_size)
output_seq = one_hot_encode(output_seq, dict_size, seq_len, batch_size)

In [64]:
input_seq

array([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
         0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
         0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
         0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
         0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 1.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
         0., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
         0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
         0., 0.],

In [65]:
input_seq = torch.from_numpy(input_seq)
output_seq = torch.tensor(output_seq)

In [67]:
input_seq.shape, output_seq.shape

(torch.Size([3, 14, 18]), torch.Size([3, 14, 18]))

In [68]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [81]:
from torch import nn
class RNN(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers):
        super(RNN, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
        self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True )
        self.fc = nn.Linear(hidden_dim, output_size)
        
    def forward(self, x):
        batch_size = x.size(0)
        hidden = self.init_hidden(batch_size)
        out, hidden = self.rnn(x, hidden)
        out = out.contiguous().view(-1, self.hidden_dim)
        out = self.fc(out)
        
        return out, hidden
    
    def init_hidden(self, batch_size):
        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim)
        return hidden.to(device)

In [82]:
model = RNN(input_size=dict_size, output_size=dict_size, hidden_dim = 12, n_layers=1)
model = model.to(device)

In [83]:
n_epochs = 100
lr = 0.01

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [101]:
## testing
input_seq = input_seq.to(device)
batch_size = input_seq.size(0)
hidden = model.init_hidden(batch_size)
out, hidden = model.rnn(input_seq, hidden)
print(f"output shape after running rnn: {out.shape}")
out = model.fc(out)
print(f"output shape after running fc : {out.shape}")

out, hidden = model(input_seq)
print(f"output shape after model forward function: {out.shape}")

print(f"output contiguous view: {output_seq.view(-1).shape}")

print(f"shape of output : {output_seq.shape}")

output shape after running rnn: torch.Size([3, 14, 12])
output shape after running fc : torch.Size([3, 14, 18])
output shape after model forward function: torch.Size([42, 18])
output contiguous view: torch.Size([756])
shape of output : torch.Size([3, 14, 18])


In [100]:
for epoch in range(1, n_epochs+1):
    optimizer.zero_grad()
    input_seq = input_seq.to(device)
    output_seq = output_seq.to(device)
    output, hidden = model(input_seq)
    loss = criterion(output, output_seq(-1, ).long())
    loss.backward()
    optimizer.step()
    
    if epoch%10 == 0:
        print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
        print("Loss: {:.4f}".format(loss.item()))

ValueError: Expected input batch_size (42) to match target batch_size (3).