#####  Problem 1: Simple Forward Propagation Implementation of RNN

In [1]:
import numpy as np

class SimpleRNN:
    def __init__(self, n_features, n_nodes):
        self.n_features = n_features
        self.n_nodes = n_nodes
        # Initialize weights
        self.Wx = None  # Shape: (n_features, n_nodes)
        self.Wh = None  # Shape: (n_nodes, n_nodes)
        self.B = None   # Shape: (n_nodes,)

    def forward(self, x, h0=None):
        batch_size, n_sequences, _ = x.shape
        # Initialize weights if not already set
        if self.Wx is None:
            self.Wx = np.random.randn(self.n_features, self.n_nodes)
        if self.Wh is None:
            self.Wh = np.random.randn(self.n_nodes, self.n_nodes)
        if self.B is None:
            self.B = np.zeros(self.n_nodes)
        # Initial hidden state
        if h0 is None:
            h = np.zeros((batch_size, self.n_nodes))
        else:
            h = h0
        self.h_list = []
        for t in range(n_sequences):
            xt = x[:, t, :]  # (batch_size, n_features)
            at = xt @ self.Wx + h @ self.Wh + self.B  # pre-activation
            h = np.tanh(at)
            self.h_list.append(h)
        return h  # final hidden state


##### Problem 2: Experiment of Forward Propagation with Small Sequence

In [2]:
# Input and parameter setup
x = np.array([[[1, 2], [2, 3], [3, 4]]]) / 100  # (1, 3, 2)
w_x = np.array([[1, 3, 5, 7], [3, 5, 7, 8]]) / 100  # (2, 4)
w_h = np.array([[1, 3, 5, 7],
                [2, 4, 6, 8],
                [3, 5, 7, 8],
                [4, 6, 8, 10]]) / 100  # (4, 4)
b = np.array([1, 1, 1, 1])  # (4,)
batch_size = x.shape[0]
n_sequences = x.shape[1]
n_features = x.shape[2]
n_nodes = w_x.shape[1]
h = np.zeros((batch_size, n_nodes))

# Forward propagation step-by-step
for t in range(n_sequences):
    xt = x[:, t, :]  # (1, 2)
    at = xt @ w_x + h @ w_h + b  # (1, 4)
    h = np.tanh(at)

print("Final hidden state:\n", h)
# Should print approx: [[0.79494228, 0.81839002, 0.83939649, 0.85584174]]


Final hidden state:
 [[0.79494228 0.81839002 0.83939649 0.85584174]]


##### Problem 3 (Advanced): Implementation of Backpropagation


In [3]:
class SimpleRNNWithGrad(SimpleRNN):
    def backward(self, x, h_list, dh_next, learning_rate=0.01):
        batch_size, n_sequences, n_features = x.shape

        # Initialize gradients
        dWx = np.zeros_like(self.Wx)
        dWh = np.zeros_like(self.Wh)
        dB = np.zeros_like(self.B)

        dh_t = dh_next  # (batch_size, n_nodes)

        for t in reversed(range(n_sequences)):
            xt = x[:, t, :]  # (batch_size, n_features)
            h_t = h_list[t]
            h_prev = h_list[t-1] if t != 0 else np.zeros_like(h_t)

            # Derivative of tanh
            da = dh_t * (1 - h_t ** 2)  # (batch_size, n_nodes)

            # Gradients
            dWx += xt.T @ da  # (n_features, n_nodes)
            dWh += h_prev.T @ da  # (n_nodes, n_nodes)
            dB += np.sum(da, axis=0)  # (n_nodes,)

            # Propagate error to previous step
            dh_t = da @ self.Wh.T

        # Parameter updates
        self.Wx -= learning_rate * dWx
        self.Wh -= learning_rate * dWh
        self.B -= learning_rate * dB

        return dWx, dWh, dB
