# RNN implementation

In [None]:
import numpy as np

class RNN:
    def __init__(self, input_size, hidden_size, output_size, seq_len):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.seq_len = seq_len

        # Initialize weights
        self.Wxh = np.random.randn(hidden_size, input_size) * 0.01
        self.Whh = np.random.randn(hidden_size, hidden_size) * 0.01
        self.Why = np.random.randn(output_size, hidden_size) * 0.01
        self.bh = np.zeros((hidden_size, 1))
        self.by = np.zeros((output_size, 1))

    def forward(self, inputs):
        self.inputs = inputs
        self.hs = {} #Initialize a dictionary hs to store the hidden state at each time step.We need it in backward pass
        self.hs[-1] = np.zeros((self.hidden_size, 1))  # h₋₁ = 0

        for t in range(self.seq_len): # t is time step
            x_t = inputs[t].reshape(-1, 1) #Extract the input at time t and make it a column vector.
            self.hs[t] = np.tanh(self.Wxh @ x_t + self.Whh @ self.hs[t - 1] + self.bh) #use previous hidden state as well
            # store hs at each timestep
        output = self.Why @ self.hs[self.seq_len - 1] + self.by # use last time step hidden state
        return output

    def backward(self, dL_dy, lr=0.01):
        dWhy = dL_dy @ self.hs[self.seq_len - 1].T
        dby = dL_dy

        dWxh = np.zeros_like(self.Wxh)
        dWhh = np.zeros_like(self.Whh)
        dbh = np.zeros_like(self.bh)
        dh_next = np.zeros((self.hidden_size, 1))

        for t in reversed(range(self.seq_len)):
            dh = self.Why.T @ dL_dy if t == self.seq_len - 1 else dh_next
            dh_raw = (1 - self.hs[t] ** 2) * dh  # tanh'
            dbh += dh_raw
            dWxh += dh_raw @ self.inputs[t].reshape(1, -1)
            dWhh += dh_raw @ self.hs[t - 1].T
            dh_next = self.Whh.T @ dh_raw

        # Gradient step
        self.Wxh -= lr * dWxh
        self.Whh -= lr * dWhh
        self.Why -= lr * dWhy
        self.bh -= lr * dbh
        self.by -= lr * dby
