<a href="https://colab.research.google.com/github/gaixen/Codes-from-scratch/blob/main/RNN%20and%20LSTM/RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [175]:
import numpy as np
import pandas as pd
import string
from scipy.special import softmax
inputs = np.array([
    ["A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z"],
    ["Z","Y","X","W","V","U","T","S","R","Q","P","O","N","M","L","K","J","I","H","G","F","E","D","C","B","A"],
    ["B","D","F","H","J","L","N","P","R","T","V","X","Z","A","C","E","G","I","K","M","O","Q","S","U","W","Y"],
    ["M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z","A","B","C","D","E","F","G","H","I","J","K","L"],
    ["H","G","F","E","D","C","B","A","L","K","J","I","P","O","N","M","U","T","S","R","Q","X","W","V","Z","Y"]
])

expected = np.array([
    ["B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z","A"],
    ["A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z"],
    ["C","E","G","I","K","M","O","Q","S","U","W","Y","A","B","D","F","H","J","L","N","P","R","T","V","X","Z"],
    ["N","O","P","Q","R","S","T","U","V","W","X","Y","Z","A","B","C","D","E","F","G","H","I","J","K","L","M"],
    ["I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z","A","B","C","D","E","F","G","H"]
])

In [176]:
def string_to_one_hot_encoding(inputs :np.ndarray )->np.ndarray:
  char_to_index = {char : i for i,char in enumerate(string.ascii_uppercase)}

  one_hot_inputs = []
  for row in inputs:
    one_hot_row = []
    for char in row:
      if char.upper() in char_to_index:
        one_hot_vectors = np.zeros((len(string.ascii_uppercase),1))
        one_hot_vectors[char_to_index[char.upper()]]=1
        one_hot_row.append(one_hot_vectors)

    one_hot_inputs.append(one_hot_row)
  return one_hot_inputs

# Layers Classes

In [177]:
class inputlayer():
  inputs:np.ndarray
  U :np.ndarray = None
  delta_U :np.ndarray = None

  def __init__(self,inputs:np.ndarray,hidden_size:int)->None:
    self.inputs = inputs
    self.U = np.random.uniform(low = 0 ,high =1,size = (hidden_size , inputs[0].shape[0]))
    self.delta_U = np.zeros_like(self.U)

  def get_inputs(self,time_step:int)->np.ndarray:
    return self.inputs[time_step].reshape((26,1))

  def weighted_sum(self , time_step:int)->np.ndarray:
    return self.U @ self.get_inputs(time_step)

  def calculate_deltas_per_step(self , time_step:int ,
                                delta_weighted_sum : np.ndarray )->None:
    self.delta_U += delta_weighted_sum @ self.get_inputs(time_step).T

  def update_weights(self , learning_rate:float)->None:
    self.delta_U -= learning_rate * self.delta_U

In [178]:
class hiddenlayer():
  W:np.ndarray = None
  delta_W:np.ndarray = None
  bias:np.ndarray = None
  delta_bias:np.ndarray = None
  states:np.ndarray = None
  current_activation:np.ndarray = None

  def __init__(self , vocab_size:int ,size:int):
    self.W = np.random.uniform(low = 0 ,high = 1 , size = (size,size))
    self.delta_W = np.zeros_like(self.W)
    self.bias = np.random.uniform(low=0,high=1,size = (size,1))
    self.delta_bias = np.zeros_like(self.bias)
    self.states = None # np.zeros(shape=(vocab_size,size,1))
    self.current_activation = np.zeros(shape = (size,1))

  def get_hidden_state(self , time_step:int)->np.ndarray :
    if time_step < 0:
      return np.zeros_like(self.states[0])
    if time_step >= self.states.shape[0]:
      return np.zeros_like(self.bias)
    return self.states[time_step]

  def set_hidden_state(self , time_step:int , hidden_state:np.ndarray)->None:
    if time_step < self.states.shape[0]:
      self.states[time_step] = hidden_state
    else:
      pass

  def calculate_activation(self , weighted_input:np.ndarray , time_step:int)->np.ndarray:
    previous_hidden_state = self.get_hidden_state(time_step - 1)
    weighted_hidden_state = self.W @ previous_hidden_state
    weighted_sum = weighted_input + weighted_hidden_state + self.bias
    activation = np.tanh(weighted_sum)
    self.set_hidden_state(time_step , activation)
    return activation

  def calculate_deltas_per_step(self,time_step:int,delta_output:np.ndarray)->np.ndarray:
    delta_activation = delta_output + self.current_activation
    delta_weighted_sum = delta_activation * (1-self.get_hidden_state(time_step))**2
    self.current_activation = self.W.T @ delta_weighted_sum
    self.delta_W += delta_weighted_sum @ self.get_hidden_state(time_step-1).T
    self.delta_bias += delta_weighted_sum
    return delta_weighted_sum

  def update_weights_biases(self,learning_rate:float)->None:
    self.W -=learning_rate * self.delta_W
    self.bias -= learning_rate * self.delta_bias

In [179]:
class outputlayer():
  states:np.ndarray = None
  V : np.ndarray = None
  delta_V : np.ndarray = None
  C : np.ndarray = None
  delta_C : np.ndarray = None

  def __init__(self,size:int,hidden_size:int)-> None:
    self.states = np.zeros(shape=(size,size,1))
    self.V = np.random.uniform(low=0,high=1,size = (size,hidden_size))
    self.delta_V = np.zeros_like(self.V)
    self.C = np.random.uniform(low=0,high=1,size = (size,1))
    self.delta_C = np.zeros_like(self.C)

  def output(self,hidden_state:np.ndarray , time_step:int)->np.ndarray:
    output = self.V @ hidden_state +self.C
    prediction = softmax(output)
    self.set_state(time_step , prediction)
    return prediction

  def get_state(self , time_step:int )->np.ndarray:
    if time_step < 0 or time_step >= self.states.shape[0]:
      return np.zeros_like(self.C)
    return self.states[time_step]

  def set_state(self , time_step:int , prediction:np.ndarray)->None:
    self.states[time_step] = prediction

  def calculate_deltas_per_step(self , hidden_state:np.ndarray , expected : np.ndarray , time_step:int)->np.ndarray:
    output = self.V @ hidden_state + self.C
    delta_output = output - expected
    self.delta_V += delta_output @ hidden_state.T
    self.delta_C += delta_output
    return self.V.T @ delta_output

  def update_weights_biases(self ,learning_rate:float)->None:
    self.V -= learning_rate * self.delta_V
    self.C -=learning_rate * self.delta_C

In [180]:
class VanillaRNN():
  input_layer = None
  hidden_layer = hiddenlayer
  alpha  = float # 0.01
  output_layer = outputlayer
  def __init__(self, vocab_size:int,hidden_size:int,alpha:float)->None:
    self.vocab_size = vocab_size
    self.hidden_layer = hiddenlayer(vocab_size,hidden_size)
    self.output_layer = outputlayer(vocab_size,hidden_size)
    self.alpha = alpha
    self.hidden_size = hidden_size

  def feed_forward(self , inputs:np.ndarray)->outputlayer:
    sequence_length = len(inputs)
    self.hidden_layer.states = np.zeros(shape=(sequence_length, self.hidden_size, 1))
    self.output_layer.states = np.zeros(shape=(sequence_length, self.vocab_size, 1))
    self.input_layer = inputlayer(inputs , self.hidden_size)
    for step in range (len(inputs)):
      weighted_input = self.input_layer.weighted_sum(step)
      activation = self.hidden_layer.calculate_activation(weighted_input ,step)
      self.output_layer.output(activation  ,step)
    return self.output_layer

  def backpropagation(self , expected:np.ndarray)->None:
    for step_number in reversed(range(len(expected))):
      delta_output = self.output_layer.calculate_deltas_per_step(expected[step_number],self.hidden_layer.get_hidden_state(step_number),
                                                                 step_number)
      delta_weighted_sum = self.hidden_layer.calculate_deltas_per_step(step_number , delta_output)
      self.input_layer.calculate_deltas_per_step(step_number , delta_weighted_sum)

    self.output_layer.update_weights_biases(self.alpha)
    self.hidden_layer.update_weights_biases(self.alpha)
    self.input_layer.update_weights(self.alpha)

  def loss(self , y_hat:list[np.ndarray] , y:list[np.ndarray])->float:
    return sum(-np.sum(y[i]*np.log(y_hat[i]) for i in range(len(y))))

  def training (self,inputs:np.ndarray, expected:np.ndarray,epochs:int)->None:
    for epoch in range(epochs):
      print(f"epoch : {epoch}")
      for idx , input in enumerate(inputs):
        y_hats = self.feed_forward(input)
        self.backpropagation(expected[idx])
        print(f"Loss: {self.loss([y for y in y_hats.states],expected[idx])}")

# Main file

In [None]:
if __name__ == "__main__":
  one_hot_encodes = string_to_one_hot_encoding(inputs)
  one_hot_encodes_expected = string_to_one_hot_encoding(expected)
  validation_inputs = ['B','M','N']
  # print(string_to_one_hot_encoding(validation_inputs))
  rnn = VanillaRNN(len(string.ascii_uppercase),hidden_size=128,alpha=0.01)

  rnn.training(one_hot_encodes,one_hot_encodes_expected,10)


  for input in string_to_one_hot_encoding(validation_inputs):
    predictions_validation = rnn.feed_forward(input)
    output_validation = np.argmax(predictions_validation.states[-1])
    print(output_validation)
    print(string.ascii_uppercase[output_validation])