# Introduction Recurrent Neural Network
Let's start with a new version of neural network for the most common system such that: Google translate,Generate text,... <br>
In this tutorial, we'll build a small application to predict a sentence is positive or negative.We'll use model many to one following in below image


<img src="image/modelBackpro.png" width=500 height=300/>

In [10]:
import numpy as np
from data import train_data,test_data
from numpy.random import rand

# Pre-processing Data

# 1.Construct vocabulary 
We'll have to do some pre-processing data to get the data into the usable format.To start, we'll build a construct vocabulary of all worlds exist in the data 

In [11]:
vocab=list(set(w for text in train_data.keys() for w in text.split(' ')))
vocab_size=len(vocab)
print(vocab_size)
print(vocab)

18
['very', 'at', 'happy', 'bad', 'and', 'all', 'not', 'good', 'right', 'is', 'was', 'earlier', 'this', 'or', 'i', 'now', 'sad', 'am']


# 1.1 Assign an integer index to represent for each word

In [12]:
word_to_index={word:i for i,word in enumerate(vocab)}
index_to_word={i:word for i,word in enumerate(vocab)}
print(word_to_index)
print(index_to_word)


{'very': 0, 'at': 1, 'happy': 2, 'bad': 3, 'and': 4, 'all': 5, 'not': 6, 'good': 7, 'right': 8, 'is': 9, 'was': 10, 'earlier': 11, 'this': 12, 'or': 13, 'i': 14, 'now': 15, 'sad': 16, 'am': 17}
{0: 'very', 1: 'at', 2: 'happy', 3: 'bad', 4: 'and', 5: 'all', 6: 'not', 7: 'good', 8: 'right', 9: 'is', 10: 'was', 11: 'earlier', 12: 'this', 13: 'or', 14: 'i', 15: 'now', 16: 'sad', 17: 'am'}


# 1.2 Build one-hot coding for each sentence

In [13]:
def build_one_hot_coding(text):
    inputs=[]
    for word in text.split(' '):
        v=np.zeros((vocab_size,1))
        v[word_to_index[word]][0]=1
        inputs.append(v)
    return inputs

# 2 Build RNN Model

<img src="image/rnn.jpg" width=500 height=200/>

# 2.1 A few equations for derivation
* $W_{xh}$ used for all $x_t$ -> $h_t$ links
* $W_{hh}$ used for all $h_{t-1}$ -> $h_t$ links
* $W_{hy}$ used for all $h_t$ -> $y_t$ links
* $b_{h}$ is bias,it is added when calculating $h_t$
* $b_{y}$ is bias,it is added when calculating $y_t$
<br><br>

* Definition for the state $h(t)$<br><br>
    <font size="6">$h_t$=tanh($W_{xh}x_t$+$W_{hh}h_{t-1}+b_h$)</font></n></n>

    <font size="6">$y_t$=$W_{hy}h_t+b_y$</font>

<br><br><nr>
* <font size="7">Derivation for Loss Function cross entropy</font> <br><br>
<img src="image/dL_dy.png"/>


<br><br>
<br><br>
* <font size="7"></font> <br><br>
<img src="image/dL_dWhy.png"/>
<br><br>
<br><br>
<img src="image/1.png"/>
<br><br>
<br><br>
<img src="image/2.png"/>
<br><br>
<br><br>
<img src="image/3.png"/>

In [14]:
class RNN:
    
    def __init__(self,input_size,output_size,hiden_size=64):
        self.input_size=input_size
        self.hiden_size=hiden_size
        self.output_size=output_size
        
        self.Wxh=rand(self.hiden_size,self.input_size)/1000
        self.Whh=rand(self.hiden_size,self.hiden_size)/1000
        self.Why=rand(self.output_size,self.hiden_size)/1000
        
        self.bh=np.zeros((self.hiden_size,1))
        self.by=np.zeros((self.output_size,1))
    
    def forward(self,inputs):
        h=np.zeros((self.Whh.shape[0],1))      
        self.inputs=inputs
        self.list_h={0:h}

        '''
        if a and b are matrix, a@b <==> a.dot(b)
        if a is matrix and b is list, a@b <==> a.dot(np.array(b).T)
        '''
        for i,x in enumerate(inputs):
            h=np.tanh(self.Wxh.dot(x)+self.Whh.dot(h)+self.bh)
            self.list_h[i+1]=h
        y=self.Why.dot(h)+self.by
        return y,h
    
    def backpropagation(self,dL_dy,learn_rate=1e-2):
        n=len(self.inputs)
        dL_dWhy=dL_dy.dot(self.list_h[n].T) #(2,64)
        dL_dby=dL_dy #(2,1)
        
        dL_dWhh=np.zeros(self.Whh.shape)#(64,64)
        dL_dWxh=np.zeros(self.Wxh.shape)#(64,18)
        dL_dbh=np.zeros(self.bh.shape)#(18,1)
        
        dL_dh=self.Why.T.dot(dL_dy)#last state(64,1)
        temp_value=self.Why
        # for t in reversed(range(n)):
        #     temp=(1-self.list_h[t+1]**2)*dL_dh
        #     dL_dbh+=temp
        #     dL_dWhh+=temp.dot(self.list_h[t].T)
        #     dL_dWxh+=temp.dot(self.inputs[t].T)
        #     dL_dh=self.Whh.dot(temp)
        for t i reversed(range(n)):
            temp_value=
            
        #np.clip() function for prevent exploding gradient
        for d in [dL_dWxh,dL_dWhh,dL_dbh,dL_dby]:
            np.clip(d,-1,1,out=d)
        
        self.Whh-=learn_rate*dL_dWhh
        self.Wxh-=learn_rate*dL_dWxh
        self.bh-=learn_rate*dL_dbh
        self.by-=learn_rate*dL_dby     
        

In [15]:
def softmax(x):
    return np.exp(x)/sum(np.exp(x))

In [16]:
rnn=RNN(vocab_size,2)

In [17]:
import random
def processingData(data):
    items=list(data.items())
    random.shuffle(items)
    loss=0
    number_correct=0

    for x,y in items:
        inputs=build_one_hot_coding(x)
        target=int(y)
        output,_=rnn.forward(inputs)
        probs=softmax(output)
        loss-=np.log(probs[target])
        number_correct+=int(np.argmax(probs)==target)

        dL_dy=probs
        dL_dy[target]-=1
        rnn.backpropagation(dL_dy)

    return loss/len(data),number_correct/len(data)

In [18]:
for epoch in range(1000):
  train_loss, train_acc = processingData(train_data)

  if epoch % 100 == 99:
    print('     --- Epoch %d' % (epoch + 1))
    print('Train:\tLoss %.3f | Accuracy: %.3f' % (train_loss, train_acc))

    test_loss, test_acc = processingData(test_data)
    print('Test:\tLoss %.3f | Accuracy: %.3f' % (test_loss, test_acc))

     --- Epoch 100
Train:	Loss 0.690 | Accuracy: 0.552
Test:	Loss 0.701 | Accuracy: 0.500
     --- Epoch 200
Train:	Loss 0.690 | Accuracy: 0.552
Test:	Loss 0.701 | Accuracy: 0.500
     --- Epoch 300
Train:	Loss 0.690 | Accuracy: 0.552
Test:	Loss 0.701 | Accuracy: 0.500
     --- Epoch 400
Train:	Loss 0.690 | Accuracy: 0.552
Test:	Loss 0.700 | Accuracy: 0.500
     --- Epoch 500
Train:	Loss 0.690 | Accuracy: 0.552
Test:	Loss 0.701 | Accuracy: 0.500
     --- Epoch 600
Train:	Loss 0.690 | Accuracy: 0.552
Test:	Loss 0.700 | Accuracy: 0.500
     --- Epoch 700
Train:	Loss 0.690 | Accuracy: 0.552
Test:	Loss 0.701 | Accuracy: 0.500
     --- Epoch 800
Train:	Loss 0.690 | Accuracy: 0.552
Test:	Loss 0.701 | Accuracy: 0.500
     --- Epoch 900
Train:	Loss 0.690 | Accuracy: 0.552
Test:	Loss 0.700 | Accuracy: 0.500
     --- Epoch 1000
Train:	Loss 0.690 | Accuracy: 0.552
Test:	Loss 0.701 | Accuracy: 0.500
