In [44]:
import torch
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt

from base import BaseRnnFamily

In [None]:
class RNN(BaseRnnFamily):
    def __init__(self, input_dim, hidden_dim, output_dim, loss_type="mse", **kwargs):
        super().__init__(input_dim, hidden_dim, output_dim, loss_type=loss_type, **kwargs)

        self.init_weights()
        self.h_t = np.zeros((self.hidden_dim,))
        self.states = {}

    def init_weights(self, **kwargs):
        self.W_xh = np.random.uniform(0, 1, (self.input_dim, self.hidden_dim))
        self.W_hh = np.random.uniform(0, 1, (self.hidden_dim, self.hidden_dim))
        self.b_h = np.random.uniform(0, 1, self.hidden_dim)
        self.W_hy = np.random.uniform(0, 1, (self.hidden_dim, self.output_dim))
        self.b_y = np.random.uniform(0, 1, self.output_dim)

    def tanh(self, x):
        return np.tanh(x)

    def forward_step(self, x_t, h_prev):
        x_latent = x_t @ self.W_xh
        h_relative = h_prev @ self.W_hh
        x_interm = x_latent + h_relative + self.b_h
        h_t = self.tanh(x_interm)
        y_t = h_t @ self.W_hy + self.b_y
        return h_t, y_t

    def forward_sequence(self, x_seq):
        hidden_states = []
        y_states = []
        h_prev = self.h_t

        for x_t in x_seq:
            h_t, y_t = self.forward_step(x_t, h_prev)
            h_prev = h_t
            hidden_states.append(h_t.copy())
            y_states.append(y_t.copy())

        self.states['hidden_states'] = np.array(hidden_states)
        self.states['y_states'] = np.array(y_states)
        return np.array(y_states)

    def backward_step(self, grad_output, x_t, h_t, h_prev):
        dW_hy = np.outer(h_t, grad_output)  # (hidden_dim, output_dim)
        db_y = grad_output  # (output_dim,)

        grad_h = grad_output @ self.W_hy.T  # (hidden_dim,)
        dtanh = 1 - h_t ** 2
        grad_h_raw = grad_h * dtanh  # (hidden_dim,)

        dW_xh = np.outer(x_t, grad_h_raw)  # (input_dim, hidden_dim)
        dW_hh = np.outer(h_prev, grad_h_raw)  # (hidden_dim, hidden_dim)
        db_h = grad_h_raw  # (hidden_dim,)

        grad_h_prev = grad_h_raw @ self.W_hh.T  # (hidden_dim,)

        return {
            "dW_hy": dW_hy,
            "db_y": db_y,
            "dW_xh": dW_xh,
            "dW_hh": dW_hh,
            "db_h": db_h,
            "grad_h_prev": grad_h_prev
        }

    def backward_sequence(self, x_seq, y_seq, pred_seq, debug=False):
        """
        x_seq: (seq_len, input_dim)
        y_seq: (seq_len, output_dim)
        pred_seq: (seq_len, output_dim)
        """

        seq_len = len(x_seq)

        dW_xh = np.zeros_like(self.W_xh)
        dW_hh = np.zeros_like(self.W_hh)
        db_h = np.zeros_like(self.b_h)
        dW_hy = np.zeros_like(self.W_hy)
        db_y = np.zeros_like(self.b_y)

        grad_h_prev = np.zeros((self.hidden_dim,))

        for t in reversed(range(seq_len)):
            x_t = x_seq[t]
            y_t_true = y_seq[t]
            y_t_pred = pred_seq[t]
            h_t = self.states['hidden_states'][t]
            h_prev = self.states['hidden_states'][t - 1] if t > 0 else self.h_t

            grad_output = self.grad(y_t_true, y_t_pred)
            grad_output_total = grad_output 

            grads = self.backward_step(
                grad_output=grad_output_total,
                x_t=x_t,
                h_t=h_t,
                h_prev=h_prev
            )
            if debug:
                print(f"\n=== Backward step t={t} ===")
                print(f"grad_output: {grad_output}")
                print(f"dW_xh norm: {np.linalg.norm(grads['dW_xh'])}")
                print(f"dW_hh norm: {np.linalg.norm(grads['dW_hh'])}")
                print(f"dW_hy norm: {np.linalg.norm(grads['dW_hy'])}")
                print(f"db_h norm: {np.linalg.norm(grads['db_h'])}")
                print(f"db_y norm: {np.linalg.norm(grads['db_y'])}")
                print(f"grad_h_prev norm: {np.linalg.norm(grads['grad_h_prev'])}")

            dW_xh += grads["dW_xh"]
            dW_hh += grads["dW_hh"]
            db_h += grads["db_h"]
            dW_hy += grads["dW_hy"]
            db_y += grads["db_y"]

            grad_h_prev = grads["grad_h_prev"]

        return {
            "dW_xh": dW_xh,
            "dW_hh": dW_hh,
            "db_h": db_h,
            "dW_hy": dW_hy,
            "db_y": db_y
        }



In [38]:
np.random.uniform(0, 1, 10)

array([0.67414925, 0.94640503, 0.62870795, 0.96582873, 0.32993353,
       0.22289821, 0.94950267, 0.48081432, 0.83340873, 0.09800429])

In [43]:
np.zeros(2).shape

(2,)