<a href="https://colab.research.google.com/github/Undasnr/DL-ML/blob/main/Ronny_Recurrent_Neural_Network_Assignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**1. Simple Forward propagation implementation of RNN**

In [5]:
import numpy as np

class SimpleRNN:
    """
    A minimal RNN layer implementing only the forward pass.

    Args:
      n_features  (int): dimensionality of each time-step input x_t
      n_nodes     (int): number of hidden units in the RNN
      activation  (callable): nonlinearity applied to pre-activations (default: np.tanh)
      seed        (int, optional): random seed for reproducibility
    """
    def __init__(self, n_features, n_nodes, activation=np.tanh, seed=None):
        if seed is not None:
            np.random.seed(seed)

        self.n_features = n_features
        self.n_nodes    = n_nodes
        self.activation = activation

        # Weight: input → hidden
        self.W_x = np.random.randn(n_features, n_nodes) * 0.01
        # Weight: hidden(prev) → hidden
        self.W_h = np.random.randn(n_nodes, n_nodes) * 0.01
        # Bias for hidden units
        self.b   = np.zeros((n_nodes,))

        # Placeholder for hidden state at current time
        self.h   = None

    def forward(self, x):
        """
        Run the RNN forward over an input sequence.

        Args:
          x (ndarray): shape (batch_size, n_sequences, n_features)

        Returns:
          outputs (ndarray): shape (batch_size, n_sequences, n_nodes)
        """
        batch_size, n_sequences, _ = x.shape

        # Initialize h₀ = 0
        self.h = np.zeros((batch_size, self.n_nodes))

        # Collect hidden states at each time step
        hidden_seq = []

        for t in range(n_sequences):
            x_t = x[:, t, :]                               # (batch_size, n_features)
            a_t = x_t.dot(self.W_x)                        \
                + self.h.dot(self.W_h)                     \
                + self.b                                   # (batch_size, n_nodes)

            # Activation (default: tanh; swap in ReLU or others by passing activation=...)
            self.h = self.activation(a_t)                  # (batch_size, n_nodes)
            hidden_seq.append(self.h)

        # Stack along time axis → (batch_size, n_sequences, n_nodes)
        return np.stack(hidden_seq, axis=1)

# Example Usage
import numpy as np

if __name__ == "__main__":
    batch_size, n_sequences, n_features = 4, 5, 3
    n_nodes = 6

    np.random.seed(0)
    x = np.random.randn(batch_size, n_sequences, n_features)

    rnn = SimpleRNN(n_features, n_nodes, seed=42)
    y   = rnn.forward(x)

    print("Output shape:", y.shape)
    print("First time-step hidden state:\n", y[:, 0, :])

Output shape: (4, 5, 6)
First time-step hidden state:
 [[ 0.01744804 -0.01809214 -0.00733537  0.02353047 -0.01589661 -0.00291828]
 [ 0.02475063  0.01492887 -0.00131434  0.01434083 -0.00562717 -0.00838413]
 [ 0.0045935   0.0196712   0.01454075  0.00940329  0.00687638 -0.00491381]
 [-0.02007635 -0.02387972 -0.0103665  -0.01783901 -0.00104322  0.00930331]]


**2. Experiment of forward propagation with small sequence**

In [6]:
import numpy as np

# Given data
x = np.array([[[1, 2], [2, 3], [3, 4]]]) / 100      # shape (1, 3, 2)
w_x = np.array([[1, 3, 5, 7],
                [3, 5, 7, 8]]) / 100              # shape (2, 4)
w_h = np.array([[ 1,  3,  5,  7],
                [ 2,  4,  6,  8],
                [ 3,  5,  7,  8],
                [ 4,  6,  8, 10]]) / 100          # shape (4, 4)
b   = np.array([1, 1, 1, 1])                         # shape (4,)

batch_size, n_sequences, _ = x.shape
n_nodes = w_x.shape[1]

# Initialize hidden state to zeros
h = np.zeros((batch_size, n_nodes))

# Forward pass through all time steps
for t in range(n_sequences):
    x_t = x[:, t, :]                     # (1, 2)
    a_t = x_t.dot(w_x) + h.dot(w_h) + b  # (1, 4)
    h   = np.tanh(a_t)                   # (1, 4)

print("Final hidden state h₃:\n", h)

Final hidden state h₃:
 [[0.79494228 0.81839002 0.83939649 0.85584174]]


The output is the same as the expected output.

**3. (Advance assignment) Implementation of backpropagation**

In [9]:
import numpy as np

class SimpleRNN:
    """
    Minimal RNN layer with forward and backward (BPTT) implementations.
    """
    def __init__(self, n_features, n_nodes, activation=np.tanh, seed=None):
        if seed is not None:
            np.random.seed(seed)
        self.n_features       = n_features
        self.n_nodes          = n_nodes
        self.activation       = activation
        self.activation_deriv = lambda a: 1 - np.tanh(a)**2

        # Parameters
        self.W_x = np.random.randn(n_features, n_nodes) * 0.01
        self.W_h = np.random.randn(n_nodes, n_nodes)    * 0.01
        self.b   = np.zeros((n_nodes,))

        # For storing intermediate states during forward pass
        self.h_states = []
        self.a_states = []

    def forward(self, x):
        """
        Args:
          x: (batch, T, n_features)
        Returns:
          h_seq: (batch, T, n_nodes)
        """
        batch, T, _ = x.shape
        h_t = np.zeros((batch, self.n_nodes))
        self.h_states = [h_t]
        self.a_states = []

        outputs = []
        for t in range(T):
            x_t = x[:, t, :]
            a_t = x_t.dot(self.W_x) + h_t.dot(self.W_h) + self.b
            h_t = self.activation(a_t)

            self.a_states.append(a_t)
            self.h_states.append(h_t)
            outputs.append(h_t)

        return np.stack(outputs, axis=1)

    def backward(self, x, dh_out):
        """
        Backprop through time.
        Args:
          x:      (batch, T, n_features)
          dh_out: (batch, T, n_nodes) upstream grad on h_t
        Returns:
          dW_x, dW_h, db  (normalized by batch size)
        """
        batch, T, _ = x.shape
        dW_x   = np.zeros_like(self.W_x)
        dW_h   = np.zeros_like(self.W_h)
        db     = np.zeros_like(self.b)
        dh_next = np.zeros((batch, self.n_nodes))

        for t in reversed(range(T)):
            dh_total = dh_out[:, t, :] + dh_next      # include next‐time gradient
            a_t      = self.a_states[t]
            da_t     = dh_total * self.activation_deriv(a_t)

            x_t    = x[:, t, :]
            h_prev = self.h_states[t]

            dW_x += x_t.T.dot(da_t)
            dW_h += h_prev.T.dot(da_t)
            db   += da_t.sum(axis=0)

            dh_next = da_t.dot(self.W_h.T)

        return dW_x / batch, dW_h / batch, db / batch


class ScratchSimpleRNNClassifier:
    """
    Wraps SimpleRNN + a softmax head for sequence classification.
    """
    def __init__(self, n_features, n_nodes, n_classes, lr=1e-2, seed=None):
        self.rnn   = SimpleRNN(n_features, n_nodes, seed=seed)
        self.W_out = np.random.randn(n_nodes, n_classes) * 0.01
        self.b_out = np.zeros((n_classes,))
        self.lr    = lr
        self.cache = {}

    def forward(self, x):
        h_seq      = self.rnn.forward(x)         # (batch, T, n_nodes)
        h_last     = h_seq[:, -1, :]             # (batch, n_nodes)
        logits     = h_last.dot(self.W_out) + self.b_out
        self.cache = {'x': x, 'h_last': h_last, 'logits': logits}
        return logits

    def compute_loss_and_grad(self, logits, y_true):
        batch_size = logits.shape[0]
        # stable softmax
        exp_s   = np.exp(logits - logits.max(axis=1, keepdims=True))
        probs   = exp_s / exp_s.sum(axis=1, keepdims=True)
        correct = probs[np.arange(batch_size), y_true]
        loss    = -np.log(correct + 1e-12).mean()

        dlogits = probs.copy()
        dlogits[np.arange(batch_size), y_true] -= 1
        dlogits /= batch_size
        return loss, dlogits

    def backward(self, y_true):
        x, h_last, logits = (self.cache[k] for k in ('x', 'h_last', 'logits'))
        loss, dlogits     = self.compute_loss_and_grad(logits, y_true)

        # head grads
        dW_out = h_last.T.dot(dlogits)
        db_out = dlogits.sum(axis=0)

        # grad into last hidden state
        dh_last = dlogits.dot(self.W_out.T)
        batch, T, _ = x.shape
        dh_out = np.zeros((batch, T, self.rnn.n_nodes))
        dh_out[:, -1, :] = dh_last

        # BPTT
        dW_x, dW_h, db = self.rnn.backward(x, dh_out)

        self.grads = {
            'W_x':   dW_x,
            'W_h':   dW_h,
            'b':     db,
            'W_out': dW_out,
            'b_out': db_out
        }
        return loss

    def update_params(self):
        # RNN params
        self.rnn.W_x -= self.lr * self.grads['W_x']
        self.rnn.W_h -= self.lr * self.grads['W_h']
        self.rnn.b   -= self.lr * self.grads['b']
        # head params
        self.W_out   -= self.lr * self.grads['W_out']
        self.b_out   -= self.lr * self.grads['b_out']


if __name__ == "__main__":
    # Toy example
    batch, T, f, C = 2, 5, 4, 3
    n_nodes = 6
    np.random.seed(0)

    x = np.random.randn(batch, T, f)
    y = np.random.randint(0, C, size=(batch,))

    model = ScratchSimpleRNNClassifier(f, n_nodes, C, lr=1e-2, seed=0)
    logits = model.forward(x)
    loss   = model.backward(y)
    model.update_params()

    print("Loss:", loss)

Loss: 1.0985512704761449
