In [None]:
import numpy as np

class CustomRNN:
    def __init__(self, input_size, hidden_size, output_size, alpha=0.01):
        self.hidden_size = hidden_size
        self.alpha = alpha  # Leaky ReLU parameter

        # Xavier Initialization for stability
        self.W_x = np.random.randn(hidden_size, input_size) * np.sqrt(1 / input_size)
        self.W_h = np.random.randn(hidden_size, hidden_size) * np.sqrt(1 / hidden_size)
        self.W_y = np.random.randn(output_size, hidden_size) * np.sqrt(1 / hidden_size)

        self.b_h = np.zeros((hidden_size, 1))
        self.b_y = np.zeros((output_size, 1))

    def leaky_relu(self, x):
        return np.maximum(self.alpha * x, x)

    def forward(self, inputs):
        T = len(inputs)
        h = np.zeros((self.hidden_size, 1))
        outputs = []

        for t in range(T):
            x = inputs[t].reshape(-1, 1)
            h = self.leaky_relu(np.dot(self.W_x, x) + np.dot(self.W_h, h) + self.b_h)  # Custom activation
            y = np.dot(self.W_y, h) + self.b_y
            outputs.append(y)

        return outputs, h

# Example usage
input_size = 3
hidden_size = 5
output_size = 2

rnn = CustomRNN(input_size, hidden_size, output_size)

# Batch input (T=4, input_size=3)
inputs = [np.random.randn(input_size) for _ in range(4)]

outputs, final_hidden_state = rnn.forward(inputs)

print("Outputs:", outputs)
print("Final hidden state:", final_hidden_state)


Outputs: [array([[-0.13247714],
       [ 0.14764116]]), array([[0.217034  ],
       [0.38420155]]), array([[ 0.07121305],
       [-0.01310297]]), array([[0.10845563],
       [0.31901473]])]
Final hidden state: [[ 0.61189645]
 [-0.00710323]
 [ 0.38153264]
 [ 0.78761046]
 [ 0.87324151]]


In [None]:
import numpy as np

class VanillaRNN:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.01):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.learning_rate = learning_rate

        # Xavier Initialization
        self.W_x = np.random.randn(hidden_size, input_size) * np.sqrt(1. / input_size)
        self.W_h = np.random.randn(hidden_size, hidden_size) * np.sqrt(1. / hidden_size)
        self.W_y = np.random.randn(output_size, hidden_size) * np.sqrt(1. / hidden_size)

        self.b_h = np.zeros((hidden_size, 1))
        self.b_y = np.zeros((output_size, 1))

    def forward(self, inputs):
        T = len(inputs)
        h_t = np.zeros((self.hidden_size, 1))  # Initial hidden state
        hs, ys = {}, {}
        hs[-1] = h_t  # Store initial hidden state

        for t in range(T):
            x_t = inputs[t].reshape(-1, 1)
            h_t = np.tanh(np.dot(self.W_x, x_t) + np.dot(self.W_h, hs[t-1]) + self.b_h)
            y_t = np.dot(self.W_y, h_t) + self.b_y
            hs[t] = h_t
            ys[t] = y_t

        return ys, hs

    def backward(self, inputs, targets, hs, ys):
        T = len(inputs)

        # Initialize gradients
        dW_x = np.zeros_like(self.W_x)
        dW_h = np.zeros_like(self.W_h)
        dW_y = np.zeros_like(self.W_y)
        db_h = np.zeros_like(self.b_h)
        db_y = np.zeros_like(self.b_y)
        dh_next = np.zeros_like(hs[0])

        for t in reversed(range(T)):
            dy = ys[t] - targets[t].reshape(-1, 1)  # Error derivative
            dW_y += np.dot(dy, hs[t].T)
            db_y += dy

            dh = np.dot(self.W_y.T, dy) + dh_next
            dtanh = (1 - hs[t] ** 2) * dh  # Derivative of tanh

            dW_x += np.dot(dtanh, inputs[t].reshape(1, -1))
            dW_h += np.dot(dtanh, hs[t-1].T)
            db_h += dtanh

            dh_next = np.dot(self.W_h.T, dtanh)

        # Gradient Clipping (to prevent exploding gradients)
        for dparam in [dW_x, dW_h, dW_y, db_h, db_y]:
            np.clip(dparam, -1, 1, out=dparam)

        # Update weights
        self.W_x -= self.learning_rate * dW_x
        self.W_h -= self.learning_rate * dW_h
        self.W_y -= self.learning_rate * dW_y
        self.b_h -= self.learning_rate * db_h
        self.b_y -= self.learning_rate * db_y

    def train(self, inputs, targets):
        ys, hs = self.forward(inputs)
        self.backward(inputs, targets, hs, ys)
        return ys

# Example Usage
input_size, hidden_size, output_size = 3, 5, 2
rnn = VanillaRNN(input_size, hidden_size, output_size)
inputs = [np.random.randn(input_size) for _ in range(4)]
targets = [np.random.randn(output_size) for _ in range(4)]

outputs = rnn.train(inputs, targets)
print("Outputs:", outputs)

Outputs: {0: array([[-0.38853524],
       [ 0.25306185]]), 1: array([[0.62221773],
       [0.20157802]]), 2: array([[-0.77206261],
       [-0.50142082]]), 3: array([[-0.04477417],
       [-0.94373344]])}


In [None]:
import numpy as np

class VanillaRNN:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.01):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.learning_rate = learning_rate

        # Weight matrices (shared across time steps)
        self.W_x = np.random.randn(hidden_size, input_size) * 0.01
        self.W_h = np.random.randn(hidden_size, hidden_size) * 0.01
        self.W_y = np.random.randn(output_size, hidden_size) * 0.01

        # Bias vectors
        self.b_h = np.zeros((hidden_size, 1))
        self.b_y = np.zeros((output_size, 1))

    def forward(self, inputs):
        T = len(inputs)
        h = np.zeros((self.hidden_size, 1))  # Initialize hidden state
        self.h_states = [h]  # Store hidden states for backpropagation
        self.inputs = inputs  # Store inputs for backpropagation
        outputs = []

        for t in range(T):
            x = inputs[t].reshape(-1, 1)  # Ensure input is column vector
            h = np.tanh(np.dot(self.W_x, x) + np.dot(self.W_h, h) + self.b_h)  # Hidden state update
            y = np.dot(self.W_y, h) + self.b_y  # Output computation
            outputs.append(y)
            self.h_states.append(h)

        return outputs, h

    def backward(self, doutputs):
        T = len(self.inputs)
        dW_x, dW_h, dW_y = np.zeros_like(self.W_x), np.zeros_like(self.W_h), np.zeros_like(self.W_y)
        db_h, db_y = np.zeros_like(self.b_h), np.zeros_like(self.b_y)
        dh_next = np.zeros((self.hidden_size, 1))

        for t in reversed(range(T)):
            dy = doutputs[t]
            dW_y += np.dot(dy, self.h_states[t + 1].T)
            db_y += dy

            dh = np.dot(self.W_y.T, dy) + dh_next
            dh_raw = (1 - self.h_states[t + 1] ** 2) * dh  # Derivative of tanh
            dW_x += np.dot(dh_raw, self.inputs[t].reshape(1, -1))
            dW_h += np.dot(dh_raw, self.h_states[t].T)
            db_h += dh_raw

            dh_next = np.dot(self.W_h.T, dh_raw)

        for param, dparam in zip([self.W_x, self.W_h, self.W_y, self.b_h, self.b_y],
                                 [dW_x, dW_h, dW_y, db_h, db_y]):
            np.clip(dparam, -1, 1, out=dparam)  # Gradient clipping
            param -= self.learning_rate * dparam

    def train(self, inputs, targets):
        outputs, _ = self.forward(inputs)
        doutputs = [y_pred - y_true for y_pred, y_true in zip(outputs, targets)]
        self.backward(doutputs)

# Example usage
input_size = 3    # Input feature size
hidden_size = 5   # Number of hidden neurons
output_size = 2   # Output feature size

# Initialize RNN
rnn = VanillaRNN(input_size, hidden_size, output_size)

# Define input sequence (T=4, input_size=3)
inputs = [np.random.randn(input_size) for _ in range(4)]
targets = [np.random.randn(output_size, 1) for _ in range(4)]

# Perform training step
rnn.train(inputs, targets)

print("Training completed.")


Training completed.


In [None]:
import numpy as np

class VanillaRNN:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.01):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.learning_rate = learning_rate

        # Weight matrices
        self.W_x = np.random.randn(hidden_size, input_size) * 0.01
        self.W_h = np.random.randn(hidden_size, hidden_size) * 0.01
        self.W_y = np.random.randn(output_size, hidden_size) * 0.01

        # Bias vectors
        self.b_h = np.zeros((hidden_size, 1))
        self.b_y = np.zeros((output_size, 1))

    def forward(self, inputs):
        T = len(inputs)
        h = np.zeros((self.hidden_size, 1))  # Initialize hidden state
        outputs = []
        hidden_states = [h]

        for t in range(T):
            x = inputs[t].reshape(-1, 1)  # Ensure input is column vector
            h = np.tanh(np.dot(self.W_x, x) + np.dot(self.W_h, h) + self.b_h)  # Hidden state update
            y = np.dot(self.W_y, h) + self.b_y  # Output computation
            outputs.append(y)
            hidden_states.append(h)

        return outputs, hidden_states

    def backward(self, inputs, targets, outputs, hidden_states):
        T = len(inputs)
        dW_x = np.zeros_like(self.W_x)
        dW_h = np.zeros_like(self.W_h)
        dW_y = np.zeros_like(self.W_y)
        db_h = np.zeros_like(self.b_h)
        db_y = np.zeros_like(self.b_y)

        dh_next = np.zeros((self.hidden_size, 1))

        for t in reversed(range(T)):
            x = inputs[t].reshape(-1, 1)
            h = hidden_states[t + 1]
            h_prev = hidden_states[t]

            dy = outputs[t] - targets[t].reshape(-1, 1)  # Properly indented

            dW_y += np.dot(dy, h.T)
            db_y += dy

            dh = np.dot(self.W_y.T, dy) + dh_next
            dh_raw = (1 - h ** 2) * dh
            dW_x += np.dot(dh_raw, x.T)
            dW_h += np.dot(dh_raw, h_prev.T)
            db_h += dh_raw

            dh_next = np.dot(self.W_h.T, dh_raw)

        # Update weights
        self.W_x -= self.learning_rate * dW_x
        self.W_h -= self.learning_rate * dW_h
        self.W_y -= self.learning_rate * dW_y
        self.b_h -= self.learning_rate * db_h
        self.b_y -= self.learning_rate * db_y

# Example usage
input_size = 3    # Input feature size
hidden_size = 5   # Number of hidden neurons
output_size = 2   # Output feature size

# Initialize RNN
rnn = VanillaRNN(input_size, hidden_size, output_size, learning_rate=0.01)

# Define input sequence (T=4, input_size=3)
inputs = [np.random.randn(input_size) for _ in range(4)]
targets = [np.random.randn(output_size) for _ in range(4)]

# Perform forward and backward pass
outputs, hidden_states = rnn.forward(inputs)
rnn.backward(inputs, targets, outputs, hidden_states)

print("Outputs:", outputs)
print("Final hidden state:", hidden_states[-1])


Outputs: [array([[0.00027748],
       [0.00016316]]), array([[0.00024286],
       [0.00024122]]), array([[3.85212746e-05],
       [9.64918783e-05]]), array([[ 3.26208925e-06],
       [-2.08332242e-04]])]
Final hidden state: [[-0.05627065]
 [-0.01653509]
 [-0.01294208]
 [-0.00424558]
 [-0.05891212]]
