<a href="https://colab.research.google.com/github/hissain/mlworks/blob/main/codes/Simple_RNN_Implementation_O2O.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# %pip install torchtext torchdata portalocker>=2.0.0

In [None]:
# Import
%matplotlib inline
import math
import torch
from torch import nn
from torch.nn import functional as F

In [None]:
torch.manual_seed(0)

<torch._C.Generator at 0x782e0c3ad8d0>

In [None]:
class RNNLayer(torch.nn.Module):
    def __init__(self, input_size, hidden_size):
        super(RNNLayer, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size

        # Weight matrices for input and hidden layer connections
        self.W_xh = torch.nn.Parameter(torch.randn(input_size, hidden_size))
        self.W_hh = torch.nn.Parameter(torch.randn(hidden_size, hidden_size))
        # Bias term for hidden layer
        self.b_h = torch.nn.Parameter(torch.zeros(hidden_size))

        # Fully connected layer for output generation
        self.fc = torch.nn.Linear(hidden_size, output_size)

    def forward(self, input_data, hidden_state=None):
        """
        Performs a forward pass through the RNN layer.

        Args:
            input_data: A tensor of shape (batch_size, input_size) representing the input sequence.
            hidden_state: A tensor of shape (batch_size, hidden_size) representing the initial hidden state (optional).

        Returns:
            output: A tensor of shape (batch_size, hidden_size) representing the hidden state at each time step.
            hidden_state: A tensor of shape (batch_size, hidden_size) representing the hidden state.
        """
        batch_size, _ = input_data.size()

        # Initialize hidden state if not provided
        if hidden_state is None:
            hidden_state = torch.zeros(batch_size, self.hidden_size)

        # Calculate current hidden state
        hidden_state = torch.tanh(
            # (batch_size, input_size) x (input_size, hidden_size)
            # = (batch_size, hidden_size)
            torch.mm(input_data, self.W_xh) + \
            # (batch_size, hidden_size) x (hidden_size, hidden_size)
            # = (batch_size, hidden_size)
            torch.mm(hidden_state, self.W_hh) + \
            # hidden_size
            self.b_h
        )

        # Output generation
        # (batch_size, hidden_size) x (hidden_size, output_size)
        # = (batch_size, output_size)
        output = self.fc(hidden_state)

        return output, hidden_state

In [None]:
class RNNModel(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNNModel, self).__init__()
        self.rnn = RNNLayer(input_size, hidden_size)

    def forward(self, inputs):
        """
        Performs a forward pass through the RNN model.

        Args:
            inputs: A tensor of shape (batch_size, seq_len, input_size) representing the input sequence.

        Returns:
            predictions: A tensor of shape (batch_size, seq_len, output_size) representing the model output at each time step.
        """
        _, seq_len, _ = inputs.size()
        predictions, hidden_states = [], []

        hidden_state = None
        # loop through the sequence
        for i in range(seq_len):
            # Pass data through RNN layer
            prediction, hidden_state = self.rnn(inputs[:, i, :], hidden_state)

            # Storing the results
            predictions.append(torch.argmax(prediction, dim=1))
            hidden_states.append(hidden_state)

        predictions = torch.stack(predictions, dim=1)
        hidden_states = torch.stack(hidden_states, dim=1)

        return predictions, hidden_states



In [None]:
# Define hyperparameters
input_size =  3  # Vocabulary size (assuming one-hot encoded words)
hidden_size = 20
output_size = 3

In [None]:
# Create model instance
model = RNNModel(input_size, hidden_size, output_size)

In [None]:
# Example input sequence (one-hot encoded)
inputs = torch.tensor([
    [[1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0]],
    [[0, 1, 0], [1, 0, 0], [0, 0, 1], [1, 0, 0]]
    ], dtype=torch.float
)
print(inputs.size())

torch.Size([2, 4, 3])


In [None]:
outputs = torch.tensor([
    [1, 2, 0, 1],
    [2, 1, 0, 1]
    ], dtype=torch.int64
)

In [None]:
# Pass the sequence through the model
predictions, _ = model(inputs)
print(predictions.size())

torch.Size([2, 4])


In [None]:
outputs.tolist()

[[1, 2, 0, 1], [2, 1, 0, 1]]

In [None]:
predictions.tolist()

[[2, 2, 1, 2], [0, 2, 2, 2]]

In [None]:
# Accuracy
acc = (torch.sum(predictions == outputs) / predictions.numel()).item()
print(f"Accuracy: ", acc)

Accuracy:  0.125
