<a href="https://colab.research.google.com/github/developerabhi14/ML-Notebooks/blob/main/RNN_implementation_using_NumPy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# RNN Implementation using Numpy


In [1]:
import numpy as np

In [2]:
# Sample data: "hello"
char_to_index = {'h': 0, 'e': 1, 'l': 2, 'o': 3}
index_to_char = {0: 'h', 1: 'e', 2: 'l', 3: 'o'}
text = "hello"

In [3]:
input_size = len(char_to_index)  # 4 (one-hot encoded)
hidden_size = 8 # You can change it!
output_size = len(char_to_index) # 4

In [5]:
U = np.random.randn(hidden_size, input_size) * 0.01  # Input to hidden
W = np.random.randn(hidden_size, hidden_size) * 0.01 # Hidden to hidden
V = np.random.randn(output_size, hidden_size) * 0.01 # Hidden to output

# Bias vectors
b = np.zeros((hidden_size, 1)) # Hidden bias
c = np.zeros((output_size, 1)) # Output bias

In [6]:
def forward_pass(inputs, h_prev):
    x = np.zeros((input_size, 1)) # Create vector for input
    x[inputs] = 1 # One hot encode current input
    h = np.tanh(np.dot(U, x) + np.dot(W, h_prev) + b) # Get the current hidden state
    o = np.dot(V, h) + c # Predict the output
    y_hat = np.exp(o) / np.sum(np.exp(o)) # Use softmax function to get probabilities for all output classes

    return y_hat, h


In [7]:
def cross_entropy_loss(y_hat, target_index):
    return -np.log(y_hat[target_index, 0])

In [8]:
def backward_pass(y_hat, target_index, h, h_prev, x):
    # Gradients of output layer
    do = np.copy(y_hat) # copy y_hat to do
    do[target_index] -= 1 # Subtract 1 on the output vector
    dV = np.dot(do, h.T) # Gradient of V
    dc = do # Gradient of c

    dh = np.dot(V.T, do) # Gradient of hidden layer
    dhraw = (1 - h * h) * dh # Gradient of raw hidden layer

    db = dhraw # Gradient of b
    dU = np.dot(dhraw, x.T) # Gradient of U
    dW = np.dot(dhraw, h_prev.T) # Gradient of W

    return dU, dW, dV, db, dc

In [9]:
# Training loop
learning_rate = 0.1
n_epochs = 100

h_prev = np.zeros((hidden_size, 1)) # Initiate the hidden layer

for epoch in range(n_epochs):
    loss = 0
    dU, dW, dV, db, dc = np.zeros_like(U), np.zeros_like(W), np.zeros_like(V), np.zeros_like(b), np.zeros_like(c) # Initialize the derivatives

    for t in range(len(text) - 1):
        inputs = char_to_index[text[t]] # Get the input from text data
        target_index = char_to_index[text[t+1]] # Get the target
        x = np.zeros((input_size, 1)) # Create vector for input
        x[inputs] = 1 # One hot encode current input

        # Forward pass
        y_hat, h_prev = forward_pass(inputs, h_prev) # One forward pass

        # Calculate loss
        loss += cross_entropy_loss(y_hat, target_index) # Calculate cross entropy loss

        # Backpropagation
        current_dU, current_dW, current_dV, current_db, current_dc = backward_pass(y_hat, target_index, h_prev, h_prev, x)

        # Sum the gradients
        dU += current_dU
        dW += current_dW
        dV += current_dV
        db += current_db
        dc += current_dc

    # Update parameters
    U -= learning_rate * dU
    W -= learning_rate * dW
    V -= learning_rate * dV
    b -= learning_rate * db
    c -= learning_rate * dc

    # Print loss every epoch
    print('Epoch:', epoch+1, 'Loss =', loss)


Epoch: 1 Loss = 5.545382814249162
Epoch: 2 Loss = 5.354954398014552
Epoch: 3 Loss = 5.2005847192089645
Epoch: 4 Loss = 5.075195278970911
Epoch: 5 Loss = 4.972962568681064
Epoch: 6 Loss = 4.889139287775643
Epoch: 7 Loss = 4.819895676638067
Epoch: 8 Loss = 4.762171158293414
Epoch: 9 Loss = 4.7135372886093485
Epoch: 10 Loss = 4.67207520644026
Epoch: 11 Loss = 4.636269706707246
Epoch: 12 Loss = 4.604920264816385
Epoch: 13 Loss = 4.57706789246887
Epoch: 14 Loss = 4.551935862777016
Epoch: 15 Loss = 4.528882023779998
Epoch: 16 Loss = 4.50736045275451
Epoch: 17 Loss = 4.486890436967573
Epoch: 18 Loss = 4.467031096704478
Epoch: 19 Loss = 4.447360337722577
Epoch: 20 Loss = 4.427457212910774
Epoch: 21 Loss = 4.406887191488078
Epoch: 22 Loss = 4.385190297627041
Epoch: 23 Loss = 4.361872612571785
Epoch: 24 Loss = 4.336402250198583
Epoch: 25 Loss = 4.308211601321883
Epoch: 26 Loss = 4.276708320640965
Epoch: 27 Loss = 4.241298017269559
Epoch: 28 Loss = 4.201421559133063
Epoch: 29 Loss = 4.15660877542

In [16]:
# Prediction
h_prev = np.zeros((hidden_size, 1))
predicted_text = 'e'
for t in range(len(text) - 1):
    inputs = char_to_index[text[t]]
    y_hat, h_prev = forward_pass(inputs, h_prev)
    predicted_index = np.argmax(y_hat)
    predicted_text += index_to_char[predicted_index]
print(predicted_text)

eello
