<a href="https://colab.research.google.com/github/grvnair/rnn-using-numpy/blob/main/RNN_implementation_from_scratch_using_NumPy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np

In [None]:
class RNN:
    
    def __init__(self, input_size, hidden_size, output_size, learning_rate = 0.01):
        self.lr = learning_rate
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        # Weights
        self.wx = np.random.randn(input_size, hidden_size) / 1000
        self.wh = np.random.randn(hidden_size, hidden_size) / 1000
        self.wy = np.random.randn(hidden_size, output_size) / 1000
        
        # Biases
        self.bh = np.zeros((1, hidden_size))
        self.by = np.zeros((1, output_size))
   
    # Forward Propagation    
    def forwardprop(self, x):
        
        # Initializing h with zeros
        prev_h = np.zeros((self.wh.shape[0], 1))
        
        for i in range(x.shape[0]):
            # Computation in the hidden state
            h = np.tanh(np.dot(x, self.wx) + np.dot(prev_h, self.wh) + self.bh)
            prev_h = h
        
        # Computation in the output state
        y = h * self.wy + self.by
        return y, h
    
    # Backpropagation through time
    def backprop(self, x, h, y, y_true):
        
        t = x.shape[0]
        
        d_wx = np.zeros_like(self.wx)
        d_wh = np.zeros_like(self.wh)
        d_wy = np.zeros_like(self.wy)
        d_bh = np.zeros_like(self.bh)
        d_by = np.zeros_like(self.by)
        d_h = np.zeros((t + 1, self.hidden_size))
        
        # Looping in reverse for backpropagation
        for i in range(t - 1, -1, -1):
            dy = y - y_true
            
            # Gradient calculation in the Outer Layer
            d_wy += np.dot(h[i].reshape(-1, 1), dy.reshape(1, -1))
            d_by += dy
            
            # Gradient calculation in the Hidden Layer
            d_h[i] = np.dot(self.wy.T, dy) + dh[i + 1] * (1 - np.power(h[t], 2))
            d_wh += np.dot(h[i - 1].reshape(-1, 1), dh[i].reshape(1, -1))
            d_by += d_h[i]
            
            # Gradient calculation in the Input Layer
            d_wx += np.dot(x[i].reshape(-1, 1), d_h[i].reshape(1, -1))

        # Updating the Weights and Biases    
        self.wx -= self.lr * d_wx
        self.wh -= self.lr * d_wh
        self.wy -= self.lr * d_wy
        self.bh -= self.lr * d_bh
        self.by -= self.lr * d_by