In [1]:
import torch
import math
import time
import json

import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from torchinfo import summary

In [2]:
text = open("nietzsche.txt").read()
chars = sorted(list(set(text)))

chars.insert(0, '\0')

In [3]:
char_to_index = {v:i for i,v in enumerate(chars)}
index_to_char = {i:v for i,v in enumerate(chars)}

In [4]:
total_index = [char_to_index[char] for char in text]
pred_num = 25 #max character length to input in one go
xin = [[total_index[j+i] for j in range(0, len(total_index)-1-pred_num, pred_num)] for i in range(pred_num)]
y = [total_index[i+pred_num] for i in range(0, len(total_index)-1-pred_num, pred_num)]

In [17]:
np.array(y).shape

(24035,)

In [18]:
X = np.stack([np.stack(xin[i][:-2]) for i in range(pred_num)],1)
Y = np.stack(y[:-2])

In [24]:
[index_to_char[x] for x in X[0]]

['P',
 'R',
 'E',
 'F',
 'A',
 'C',
 'E',
 '\n',
 '\n',
 '\n',
 'S',
 'U',
 'P',
 'P',
 'O',
 'S',
 'I',
 'N',
 'G',
 ' ',
 't',
 'h',
 'a',
 't',
 ' ']

In [14]:
X_tensor = torch.tensor(X, dtype=torch.long)
Y_tensor = torch.tensor(Y, dtype=torch.long)

In [16]:
class CustomRNN(nn.Module):
    def __init__(self, input_size, hidden_size, nonlinearity='tanh'):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.nonlinearity = nonlinearity

        self.W_ih = nn.Parameter(torch.Tensor(hidden_size, input_size))
        self.W_hh = nn.Parameter(torch.Tensor(hidden_size, hidden_size))
        self.b_ih = nn.Parameter(torch.Tensor(hidden_size))
        self.b_hh = nn.Parameter(torch.Tensor(hidden_size))

        self.reset_parameters()

    def reset_parameters(self):
        nn.init.kaiming_uniform_(self.W_ih, a=math.sqrt(5))
        nn.init.kaiming_uniform_(self.W_hh, a=math.sqrt(5))
        fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.W_ih)
        bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
        nn.init.uniform_(self.b_ih, -bound, bound)
        nn.init.uniform_(self.b_hh, -bound, bound)


    def forward(self, x, h_0=None):
        batch_size, seq_len, input_size = x.size()
        hidden_size = self.hidden_size
        
        if h_0 is None:
            h_t = torch.zeros(batch_size, hidden_size, device=x.device) # Initialize hidden state if not provided
        else:
            h_t = h_0

        output = torch.zeros(batch_size, seq_len, hidden_size, device=x.device) # Initialize output tensor

        for t in range(seq_len):
            x_t = x[:, t, :]  # Input at time t (batch_size, input_size)

            
            h_t = torch.tanh(F.linear(x_t, self.W_ih, self.b_ih) + F.linear(h_t, self.W_hh, self.b_hh)) #TODO: tanh is hardcoded as activation rn
            output[:, t, :] = h_t

        h_n = h_t # h_n is the last hidden state

        return output, h_n

In [77]:
class SimpleRNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_layers):
        super(SimpleRNN, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        # self.rnn = nn.RNN(embedding_dim, hidden_layers, nonlinearity='tanh', batch_first=True) 
        self.rnn = CustomRNN(embedding_dim, hidden_layers, nonlinearity='tanh')  
        self.dense = nn.Linear(hidden_layers, vocab_size)

        nn.init.xavier_normal_(self.embedding.weight)
        nn.init.xavier_normal_(self.dense.weight)

    def forward(self, x):
        embedded = self.embedding(x)
        out, _ = self.rnn(embedded)
        out = self.dense(out[:, -1, :]) 
        return out

In [None]:
# Hyperparameters (same as Keras)
hidden_layers = 128
vocab_size = 86
embedding_dim = 42
batch_size = 64
epochs = 150 #150 works well for data with 24033 rows, adjust accordingly for data with fewer

In [None]:
print("Trainable Parameters:",sum(p.numel() for p in model.parameters() if p.requires_grad),"Total:",sum(p.numel() for p in model.parameters()))

(24033, 25)

In [79]:
model = SimpleRNN(vocab_size, embedding_dim, hidden_layers)

In [80]:
summary(model)

Layer (type:depth-idx)                   Param #
SimpleRNN                                --
├─Embedding: 1-1                         3,612
├─CustomRNN: 1-2                         22,016
├─Linear: 1-3                            11,094
Total params: 36,722
Trainable params: 36,722
Non-trainable params: 0

In [81]:
criterion = nn.CrossEntropyLoss()  # PyTorch uses CrossEntropyLoss for multi-class classification
optimizer = optim.Adam(model.parameters())

In [82]:
for epoch in range(epochs):
    current_time = time.time()
    model.train() 
    optimizer.zero_grad() 
    outputs = model(X_tensor)
    loss = criterion(outputs, Y_tensor) 
    loss.backward() 
    optimizer.step() 

    print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}, Time: {time.time() - current_time:.2f}s')

Epoch [1/150], Loss: 4.4605, Time: 1.27s
Epoch [2/150], Loss: 4.3909, Time: 1.16s
Epoch [3/150], Loss: 4.3233, Time: 1.14s
Epoch [4/150], Loss: 4.2529, Time: 1.27s
Epoch [5/150], Loss: 4.1746, Time: 1.23s
Epoch [6/150], Loss: 4.0829, Time: 1.14s
Epoch [7/150], Loss: 3.9721, Time: 1.17s
Epoch [8/150], Loss: 3.8412, Time: 1.18s
Epoch [9/150], Loss: 3.7024, Time: 1.14s
Epoch [10/150], Loss: 3.5789, Time: 1.13s
Epoch [11/150], Loss: 3.4857, Time: 1.12s
Epoch [12/150], Loss: 3.4183, Time: 1.12s
Epoch [13/150], Loss: 3.3627, Time: 1.13s
Epoch [14/150], Loss: 3.3117, Time: 1.13s
Epoch [15/150], Loss: 3.2660, Time: 1.13s
Epoch [16/150], Loss: 3.2276, Time: 1.13s
Epoch [17/150], Loss: 3.1972, Time: 1.13s
Epoch [18/150], Loss: 3.1744, Time: 1.12s
Epoch [19/150], Loss: 3.1587, Time: 1.12s
Epoch [20/150], Loss: 3.1486, Time: 1.11s
Epoch [21/150], Loss: 3.1422, Time: 1.13s
Epoch [22/150], Loss: 3.1373, Time: 1.13s
Epoch [23/150], Loss: 3.1325, Time: 1.13s
Epoch [24/150], Loss: 3.1269, Time: 1.13s
E

In [85]:
def predict_next_char(inp):
    model.eval() # Set the model to evaluation mode

    index = [char_to_index[i] for i in inp]
    arr = np.expand_dims(np.array(index), axis=0)
    input_tensor = torch.tensor(arr, dtype=torch.long) # Convert to tensor
    with torch.no_grad(): # Disable gradient calculation during inference
        prediction = model(input_tensor)
    predicted_index = torch.argmax(prediction).item() # get the index of the maximum log-probability
    return index_to_char[predicted_index],inp+index_to_char[predicted_index]

In [86]:
print(predict_next_char('those w'))
print(predict_next_char(' th'))
print(predict_next_char(' an'))
print(predict_next_char('does th'))
print(predict_next_char('woma'))
print(predict_next_char('philosoph'))

('h', 'those wh')
('e', ' the')
('d', ' and')
('e', 'does the')
('n', 'woman')
('e', 'philosophe')


In [87]:
torch.save(model.state_dict(), 'simpleRNN_3pred.pth')

#### Now to load movies and prepare the dataset and character functions to predict next letter of movie name

In [14]:
"""
Look in the notebook Prediction.ipynb for model and training code
"""

'\nLook in the notebook Prediction.ipynb for model and training code\n'