In [29]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import CrossEntropyLoss
import random
import numpy as np

In [31]:
# Read dataset
dataset_raw = open("data/shakespear.txt").read()
# Create x & y raw (non-encoded)
x_raw = list(dataset_raw)
y_raw = [x for x in x_raw[1:]]
y_raw.append(" ") # to make it the same size as x_raw

In [32]:
# Create 
idx_to_char = {idx: char for idx, char in enumerate(set(x_raw))}
char_to_idx = {char: idx for idx, char in idx_to_char.items()}

vocab_size = len(idx_to_char)

In [7]:
# Encode X & Y
x = torch.zeros(len(x_raw), vocab_size, dtype=torch.float32)
y = torch.zeros(len(y_raw), vocab_size)

for i in range(len(x_raw)):
    x[i][char_to_idx[x_raw[i]]] = 1

for i in range(len(y_raw)):
    y[i][char_to_idx[y_raw[i]]] = 1

In [56]:
class RNNLayer:
    def __init__(self, input_dim: int, hidden_state_size: int) -> None:
        self.input_dim = input_dim
        self.hidden_state_size = hidden_state_size

        # Definine H0 (initial hidden state)
        self.h = torch.randn(100, dtype=torch.float32)

        # Initialize Weights
        self.w_xh = torch.randn(hidden_state_size, input_dim, dtype=torch.float32, requires_grad=True)
        self.w_hh = torch.randn(hidden_state_size, hidden_state_size, dtype=torch.float32, requires_grad=True)
        self.w_hy = torch.randn(input_dim, hidden_state_size, dtype=torch.float32, requires_grad=True)
        
        # Initialize Biases
        # self.b_h = torch.randn(hidden_state_size, dtype=torch.float32, requires_grad=True)
        # self.b_y = torch.randn(input_dim, dtype=torch.float32, requires_grad=True)
        self.model_params = [self.w_xh, self.w_hy, self.w_hh]

    def train(self, x: torch.tensor, y: torch.tensor, timesteps: int, epochs: int, lr: float) -> torch.tensor:
        """Forward Function

        Args:
            x (torch.tensor): Input Vector
            timesteps (int): Timestep to do the RNN

        Returns:
            torch.tensor: Output of the forward pass
        """
        smooth_loss = torch.tensor(-np.log(1.0/67)*100)
        for epoch in range(epochs):
            # start_index = random.randint(0, len(x))
            start_index = 0
            
            loss_func = CrossEntropyLoss()
            loss = torch.zeros(1, dtype=torch.float32)
            
            for timestep in range(start_index, timesteps):
                # Update the hidden state
                self.h = torch.tanh(self.w_xh @ x[timestep] + self.w_hh @ self.h)
                y_hat = (self.w_hy @ self.h).softmax(0)
                loss += loss_func(y_hat, y[timestep])
            
            complete_loss = 0.99 * smooth_loss + 0.01 * loss
            print(f"Epoch: {epoch}; Loss: {loss.item()}")

            for param in self.model_params:
                param.grad = None
            
            complete_loss.backward(retain_graph=True)

            for param in self.model_params:
                # param.data -= lr * np.clip(param.grad, -10, 10)
                param.data -= lr * param.grad

        

In [57]:
rnn = RNNLayer(67, 100)

In [58]:
y_hat = rnn.train(x, y, 100, 10, 0.5)

Epoch: 0; Loss: 420.3501892089844
Epoch: 1; Loss: 422.21624755859375
Epoch: 2; Loss: 421.87255859375
Epoch: 3; Loss: 420.741455078125
Epoch: 4; Loss: 421.73828125
Epoch: 5; Loss: 421.8819580078125
Epoch: 6; Loss: 421.7661437988281
Epoch: 7; Loss: 422.5824279785156
Epoch: 8; Loss: 420.39697265625
Epoch: 9; Loss: 420.6890869140625
