# Recurrent Neural Network
This notebook is an implementation of a simple RNN using Python and numpy. This program is inspired by:

Original code(Karpathy) : https://gist.github.com/karpathy/d4dee566867f8291f086

Converted to Python 3 by(Yoon): https://github.com/JY-Yoonexplains how to find 

Farhad Kamangar  2023

In [None]:
!pip install -Uqq ipdb
import numpy as np
from tabulate import tabulate
import ipdb


def display_array(input,title='',headers=[],table_format='fancy',num_format='.2f'):
    print(tabulate(input, [title], tablefmt=table_format,floatfmt=num_format))

class RNN():
    def __init__(self, hidden_size, time_steps, input_data):

        self.hidden_size = hidden_size
        self.time_steps = time_steps
        self.input_data = input_data
        self.character_level_analysis(self.input_data)
        self.W_xh = np.random.randn(self.hidden_size, self.num_of_unique_features)
        self.W_hh = np.random.randn(self.hidden_size, self.hidden_size)
        self.W_hy = np.random.randn(self.num_of_unique_features, self.hidden_size)
        self.b_h = np.zeros((hidden_size, 1))  # hidden bias
        self.b_y = np.zeros((self.num_of_unique_features, 1))  # output bias
        self.h_prev = np.zeros((hidden_size, 1))

    def character_level_analysis(self, txt_data):
        self.unique_features = list(set(txt_data))
        self.num_of_unique_features = len(self.unique_features)
        self.num_of_features = len(txt_data)
        self.features_to_int_dictionary = dict((f, k) for k, f in enumerate(self.unique_features))
        self.int_to_feature_dictionary = dict((k, f) for k, f in enumerate(self.unique_features))

    def word_level_analysis(self, txt_data):
        text = txt_data.lower()
        words = text.split()
        words = [word.strip('.,!;()[]') for word in words]
        self.unique_features = list(set(words))
        self.num_of_unique_features = len(self.unique_features)
        self.num_of_features = len(words)
        self.features_to_int_dictionary = dict((f, k) for k, f in enumerate(self.unique_features))
        self.int_to_feature_dictionary = dict((k, f) for k, f in enumerate(self.unique_features))

    def forwardprop(self, input_sequence, h_prev):
        xs, hs, ys, ps = {}, {}, {}, {}  # dictionary
        hs[-1] = np.copy(h_prev)
        loss = 0

        for t in range(len(input_sequence)):
            xs[t] = np.zeros((self.num_of_unique_features, 1))
            xs[t][input_sequence[t]] = 1
            hs[t] = np.tanh(np.dot(self.W_xh, xs[t]) + np.dot(self.W_hh, hs[t - 1]) + self.b_h)  # hidden state.
            ys[t] = np.dot(self.W_hy, hs[t]) + self.b_y
            ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t]))
            # loss += -np.log(ps[t][targets[t], 0])


        return ps, hs, xs

    def backprop(self, ps, targets, hs, xs):

        dWxh, dWhh, dWhy = np.zeros_like(self.W_xh), np.zeros_like(self.W_hh), np.zeros_like(
            self.W_hy)  # make all zero matrices.
        dbh, dby = np.zeros_like(self.b_h), np.zeros_like(self.b_y)
        dhnext = np.zeros_like(hs[0])  # (hidden_size,1)

        loss=0
        for t in range(len(targets)):
            loss += -np.log(ps[t][targets[t], 0])

        # reversed
        for t in reversed(range(len(targets))):
            dy = np.copy(ps[t])
            dy[targets[t]] -= 1
            # display_array(dy, "dy", table_format="grid")
            dWhy += np.dot(dy, hs[t].T)
            
            
            # display_array(dWhy, "dWhy", table_format="grid")
            dby += dy
            # display_array(dby, "dby", table_format="grid")

            dh = np.dot(self.W_hy.T, dy) + dhnext
            # display_array(dh, "dh", table_format="grid")
            dhraw = (1 - hs[t] * hs[t]) * dh  # tanh'(x) = 1-tanh^2(x)
            # display_array(dhraw, "dhraw", table_format="grid")
            dbh += dhraw
            # display_array(dbh, "dbh", table_format="grid")
            dWxh += np.dot(dhraw, xs[t].T)
            # display_array(dWxh, "dWxh", table_format="grid")
            dWhh += np.dot(dhraw, hs[t - 1].T)
            # display_array(dWhh, "dWhh", table_format="grid")
            dhnext = np.dot(self.W_hh.T, dhraw)
            # display_array(dhnext, "dhnext", table_format="grid")
            # ipdb.set_trace()
        for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
            np.clip(dparam, -5, 5, out=dparam)
        return dWxh, dWhh, dWhy, dbh, dby,loss

    def predict(self, test_feature, length):
        x = np.zeros((self.num_of_unique_features, 1))
        x[self.features_to_int_dictionary[test_feature]] = 1
        ixes = []
        h = np.zeros((self.hidden_size, 1))

        for t in range(length):
            h = np.tanh(np.dot(self.W_xh, x) + np.dot(self.W_hh, h) + self.b_h)
            y = np.dot(self.W_hy, h) + self.b_y
            p = np.exp(y) / np.sum(np.exp(y))
            ix = np.random.choice(range(self.num_of_unique_features), p=p.ravel())  # ravel -> rank0
            # "ix" is a list of indexes selected according to the soft max probability.
            x = np.zeros((self.num_of_unique_features, 1))  # init
            x[ix] = 1
            ixes.append(ix)  # list
        txt = ''.join(self.int_to_feature_dictionary[i] for i in ixes)
        print('----\n', txt, '\n----')

    def train(self, number_of_epochs, learning_rate):
        number_of_sequences = round((len(self.input_data) / self.time_steps))

        # memory variables for Adagrad
        mWxh, mWhh, mWhy = np.zeros_like(self.W_xh), np.zeros_like(self.W_hh), np.zeros_like(self.W_hy)
        mbh, mby = np.zeros_like(self.b_h), np.zeros_like(self.b_y)

        for epoch_index in range(number_of_epochs):
            h_prev = np.zeros((self.hidden_size, 1))  # reset RNN memory
            data_pointer = 0  # go from start of data

            for sequence_index in range(number_of_sequences):

                inputs = [self.features_to_int_dictionary[ch] for ch in self.input_data[data_pointer:data_pointer +
                                                                                                     self.time_steps]]
                targets = [self.features_to_int_dictionary[ch] for ch in
                           self.input_data[data_pointer + 1:data_pointer + self.time_steps + 1]]  # t+1

                if (data_pointer + self.time_steps + 1 >= len(self.input_data)):
                    targets.append(self.features_to_int_dictionary[" "])

                ps, hs, xs = self.forwardprop(inputs, h_prev)
                dWxh, dWhh, dWhy, dbh, dby, loss = self.backprop(ps, targets, hs, xs)
                for param, dparam, mem in zip([self.W_xh, self.W_hh, self.W_hy, self.b_h, self.b_y],
                                              [dWxh, dWhh, dWhy, dbh, dby],
                                              [mWxh, mWhh, mWhy, mbh, mby]):
                    mem += dparam * dparam  # elementwise
                    param += -learning_rate * dparam / np.sqrt(mem + 1e-8)

                data_pointer += self.time_steps

            if epoch_index % 100 == 0:
                print('Iteration:  %d, Loss: %f' % (epoch_index, loss))


# input_data = """I want to die as the day declines,
# at high sea and facing the sky,
# while agony seems like a dream
# and my soul like a bird that can fly.

# To hear not, at this last moment,
# once alone with sky and sea,
# any more voices nor weeping prayers
# than the majestic beating of the waves.

# To die when the sad light retires
# its golden network from the green waves
# to be like the sun that slowly expires;
# something very luminous that fades.

# To die, and die young, before
# fleeting time removes the gentle crown,
# while life still says: "I'm yours"
# though we know with our hearts that she lies. """
# with open("byron.txt") as f:
#             input_data = f.read().lower()

input_data="this is his tie"
rnn = RNN(hidden_size=4, time_steps=3, input_data=input_data)
rnn.train(200, 0.01)
rnn.predict('t', 10)


Iteration:  0, Loss: 9.032788
Iteration:  100, Loss: 6.423048
----
  isi iisit 
----
