Todo:
- Add training 

In [136]:
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
from torch.nn.utils.rnn import pad_sequence
from torch.autograd import Variable
import torch.optim as optim
import numpy as np
import random
import math
import pickle
import numpy as np
import copy

In [137]:
class RNN(nn.Module):
    
    def __init__(self, n_inputs, n_hiddens, n_outputs):
        super().__init__()
        self.n_inputs = n_inputs
        self.n_hiddens = n_hiddens
        self.n_outputs = n_outputs
        
        self.input_to_hidden = nn.Linear(n_inputs, n_hiddens, bias=False)
        self.hidden_to_hidden = nn.Linear(n_hiddens, n_hiddens)
        self.hidden_to_output = nn.Linear(n_hiddens, n_outputs)
    
    def forward(self, X, hidden_state):
        X = self.input_to_hidden(X)
        hidden_state = self.hidden_to_hidden(hidden_state)
        hidden_state = torch.tanh(X + hidden_state)
        output = self.hidden_to_output(hidden_state)
        
        return output, hidden_state
    
    def init_zero_hidden(self, batch_size=1):
        return torch.zeros(batch_size, self.n_hiddens, requires_grad=False)

In [138]:
def train(model: RNN, 
          dataloader: DataLoader, 
          epochs: int, 
          optimizer: optim.Optimizer, 
          loss_fn: nn.Module,
          batch_size: int):
    train_losses = {}
    model.train()
    for epoch in range(epochs):
        epoch_losses = []
        for X, y in dataloader:
            
            hidden = model.init_zero_hidden(batch_size=batch_size)
            model.zero_grad()
            loss = 0
            
            for token_index in range(X.shape[0]):
                output, hidden = model(token, hidden)
                loss += loss_fn(output, y[token_index])
                
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=3)
            optimizer.step()

In [139]:
training_words = ["yo what's up homie", 'how are you doing man', 'i enjoy listening to beethoven']

In [140]:
chars = set(''.join(training_words))

In [141]:
int2char = dict(enumerate(chars))

In [142]:
char2int = {character : value for value, character in int2char.items()}

In [143]:
maxlen = len(max(training_words, key=len))

In [144]:
for i in range(len(training_words)):
    for _ in range(maxlen - len(training_words[i])):
        training_words[i] += ' '

In [145]:
training_words

["yo what's up homie            ",
 'how are you doing man         ',
 'i enjoy listening to beethoven']

In [146]:
input_sequences = []
target_sequences = []

In [147]:
for i in range(len(training_words)):
    input_sequences.append(training_words[i][:-1])
    target_sequences.append(training_words[i][1:])

In [148]:
for i in range(len(training_words)):
    input_sequences[i] = [char2int[character] for character in input_sequences[i]]
    target_sequences[i] = [char2int[character] for character in target_sequences[i]]

In [149]:
dict_size = len(char2int)
sequence_length = maxlen - 1
batch_size = len(training_words)

In [182]:
def one_hot_encode(sequences, dict_size, seq_len, batch_size):
    features = np.zeros(shape=(batch_size, seq_len, dict_size), dtype=np.float32)
    
    for i in range(len(sequences)):
        for token_index in range(len(sequences[i])):
            #print(i, token_index, sequences[i][token_index])
            features[i, token_index, sequences[i][token_index]] = 1
            
    return features

In [183]:
input_sequences_onehot = one_hot_encode(input_sequences, dict_size, sequence_length, batch_size)

In [184]:
input_sequences_onehot = torch.from_numpy(input_sequences_onehot)
target_sequences = torch.Tensor(target_sequences)

In [185]:
input_sequences_onehot

tensor([[[0., 1., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 1., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 1.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]])

In [192]:
model(input_sequences_onehot, torch.zeros(12))

(tensor([[[-0.0429,  0.2822,  0.3401,  ...,  0.1586, -0.0085,  0.4177],
          [-0.0966,  0.1920,  0.3414,  ..., -0.0144,  0.0038,  0.3242],
          [-0.0758,  0.2001,  0.1977,  ...,  0.2723, -0.1006,  0.4388],
          ...,
          [-0.0758,  0.2001,  0.1977,  ...,  0.2723, -0.1006,  0.4388],
          [-0.0758,  0.2001,  0.1977,  ...,  0.2723, -0.1006,  0.4388],
          [-0.0758,  0.2001,  0.1977,  ...,  0.2723, -0.1006,  0.4388]],
 
         [[-0.1909,  0.1320,  0.4030,  ...,  0.0346,  0.0614,  0.2956],
          [-0.0966,  0.1920,  0.3414,  ..., -0.0144,  0.0038,  0.3242],
          [-0.0255,  0.3182,  0.2620,  ...,  0.1954, -0.0039,  0.3855],
          ...,
          [-0.0758,  0.2001,  0.1977,  ...,  0.2723, -0.1006,  0.4388],
          [-0.0758,  0.2001,  0.1977,  ...,  0.2723, -0.1006,  0.4388],
          [-0.0758,  0.2001,  0.1977,  ...,  0.2723, -0.1006,  0.4388]],
 
         [[-0.0584,  0.3065,  0.1580,  ...,  0.2317, -0.0934,  0.4788],
          [-0.0758,  0.2001,

In [193]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)

In [200]:
target_sequences.shape

torch.Size([3, 29])

In [None]:
for epoch in range(100):
    optimizer.zero_grad() # Clears existing gradients from previous epoch
    output, hidden = model(input_sequences_onehot, torch.zeros(12))
    loss = loss_fn(output, target_sequences)
    loss.backward() # Does backpropagation and calculates gradients
    optimizer.step() # Updates the weights accordingly
    
    if epoch%10 == 0:
        print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
        print("Loss: {:.4f}".format(loss.item()))