# Recurrent neural network

- A recurrent neural network (RNN)is a class of artificial intelligence neural networks where connections between nodes form a directed graph along a temporal sequence.
- This allows it to exhibits(publicly display) temporal dynamic behaviour.
- Derived from feedforward neural networks, RNNs can use their internal state (memory) to process variable length sequence of inputs.
- This makes them applicable to tasks such as unsegmented, connected handwriting recognition or speech recognition

In [1]:
from numpy.random import randn
import numpy as np

In [2]:
train_data = {
  'good': True,
  'bad': False,
  'happy': True,
  'sad': False,
  'not good': False,
  'not bad': True,
  'not happy': False,
  'not sad': True,
  'very good': True,
  'very bad': False,
  'very happy': True,
  'very sad': False,
  'i am happy': True,
  'this is good': True,
  'i am bad': False,
  'this is bad': False,
  'i am sad': False,
  'this is sad': False,
  'i am not happy': False,
  'this is not good': False,
  'i am not bad': True,
  'this is not sad': True,
  'i am very happy': True,
  'this is very good': True,
  'i am very bad': False,
  'this is very sad': False,
  'this is very happy': True,
  'i am good not bad': True,
  'this is good not bad': True,
  'i am bad not good': False,
  'i am good and happy': True,
  'this is not good and not happy': False,
  'i am not at all good': False,
  'i am not at all bad': True,
  'i am not at all happy': False,
  'this is not at all sad': True,
  'this is not at all happy': False,
  'i am good right now': True,
  'i am bad right now': False,
  'this is bad right now': False,
  'i am sad right now': False,
  'i was good earlier': True,
  'i was happy earlier': True,
  'i was bad earlier': False,
  'i was sad earlier': False,
  'i am very bad right now': False,
  'this is very good right now': True,
  'this is very sad right now': False,
  'this was bad earlier': False,
  'this was very good earlier': True,
  'this was very bad earlier': False,
  'this was very happy earlier': True,
  'this was very sad earlier': False,
  'i was good and not bad earlier': True,
  'i was not good and not happy earlier': False,
  'i am not at all bad or sad right now': True,
  'i am not at all good or happy right now': False,
  'this was not happy and not good earlier': False,
}

test_data = {
  'this is happy': True,
  'i am good': True,
  'this is not happy': False,
  'i am not good': False,
  'this is not bad': True,
  'i am not sad': True,
  'i am very good': True,
  'this is very bad': False,
  'i am very sad': False,
  'this is bad not good': False,
  'this is good and happy': True,
  'i am not good and not happy': False,
  'i am not at all sad': True,
  'this is not at all good': False,
  'this is not at all bad': True,
  'this is good right now': True,
  'this is sad right now': False,
  'this is very bad right now': False,
  'this was good earlier': True,
  'i was not happy and not good earlier': False,
}

In [3]:
class RNN:
    # A many-to-one Vanilla Recurrent Neural network.

    def __init__(self, input_size, output_size, hidden_size=64):
        # weights
        self.Whh = randn(hidden_size, hidden_size) / 1000
        self.Wxh = randn(hidden_size, input_size) / 1000
        self.Why = randn(output_size, hidden_size) / 1000

        # Biases
        self.bh = np.zeros((hidden_size, 1))
        self.by = np.zeros((output_size, 1))

    def forward(self, inputs):
        '''
        Perform a forward pass of the RNN using the given inputs.
        Return the final output and hidden state.
        - inputs is an arrays of one hot vector with shape (input_size, 1).
        '''

        h = np.zeros((self.Whh.shape[0], 1))

        self.last_inputs = inputs
        self.last_hs = {0:h}

        # Perform each step of the RNN
        for i, x in  enumerate(inputs):
            h = np.tanh(self.Wxh @ x + self.Whh @ h + self.bh)
            self.last_hs[i + 1] = h

        #compute the output
        y = self.Why @ h + self.by

        return y, h

    def backprop(self, d_y, learn_rate=2e-2):
        '''
        Perform a backward pass of the RNN.
        - d_y (dL/dy) has shape (output_size, 1).
        '''
        n = len(self.last_inputs)

        # Calculate dL/dWhy and dL/dby.
        d_Why = d_y @ self.last_hs[n].T
        d_by = d_y

        # Initialize dL/dWhh, dL/dWxh, and dL/dbh to zero.
        d_Whh = np.zeros(self.Whh.shape)
        d_Wxh = np.zeros(self.Wxh.shape)
        d_bh = np.zeros(self.bh.shape)

        # Calculate dL/dh for the last h.
        # dL/dh = dL/dy * dy/dh
        d_h = self.Why.T @ d_y

        # Backpropagate through time.
        for t in reversed(range(n)):
            # An intermediate value: dL/dh * (1 - h^2)
            temp = ((1 - self.last_hs[t + 1] ** 2) * d_h)

            # dL/db = dL/dh * (1 - h^2)
            d_bh += temp

            # dL/dWhh = dL/dh * (1 - h^2) * h_{t-1}
            d_Whh += temp @ self.last_hs[t].T

            # dL/dWxh = dL/dh * (1 - h^2) * x
            d_Wxh += temp @ self.last_inputs[t].T

            # Next dL/dh = dL/dh * (1 - h^2) * Whh
            d_h = self.Whh @ temp

        # Clip to prevent exploding gradients.
        for d in [d_Wxh, d_Whh, d_Why, d_bh, d_by]:
            np.clip(d, -1, 1, out=d)

        # Update weights and biases using gradient descent.
        self.Whh -= learn_rate * d_Whh
        self.Wxh -= learn_rate * d_Wxh
        self.Why -= learn_rate * d_Why
        self.bh -= learn_rate * d_bh
        self.by -= learn_rate * d_by

In [4]:
# Create the vocabulary.
vocab = list(set([w for text in train_data.keys() for w in text.split(' ')]))
vocab_size =  len(vocab)
print(f'unique words found: {vocab_size}')

# Assign indices to each word
word_to_idx = { w: i for i , w in enumerate(vocab)}
idx_to_word = { i: w for i, w in enumerate(vocab)}

unique words found: 18


In [5]:
import random


def createInputs(text):
    '''
    Returns an array of one-hot vectors representing the words in the input text string.
    - text is a string
    - Each one-hot vector has shape (vocab_size, 1)
    '''
    inputs = []
    for w in text.split(' '):
        v = np.zeros((vocab_size, 1))
        v[word_to_idx[w]] = 1
        inputs.append(v)
    return inputs

def softmax(xs):
    # Applies the softmax function to the input array.
    return np.exp(xs) / sum(np.exp(xs))

# Initialize our RNN
rnn = RNN(vocab_size, 2)

def processData(data, backprop=True):
    '''
    Returns the RNN's loss and accuracy for the given data.
    - data is a dictionary mapping text to True or False.
    - backprop determines if the backward phase should be run.
    '''

    items = list(data.items())
    random.shuffle(items)

    loss = 0 
    num_correct = 0 

    for x, y in items:
        inputs = createInputs(x)
        target = int(y)

        # Forward
        out, _ = rnn.forward(inputs)
        probs = softmax(out)


        # Calculate loss / accuracy
        loss -= np.log(probs[target])
        num_correct += int(np.argmax(probs) == target)

        if backprop:
            # Build DL/dy
            d_L_d_y = probs
            d_L_d_y[target] -= 1

            # Backward
            rnn.backprop(d_L_d_y)

    return loss / len(data),  num_correct/ len(data)

# Training loop
for epoch in range(1000):
    train_loss, train_acc = processData(train_data)

    if epoch % 100 == 99:
        print('---Epoch %d' % (epoch + 1))
        print('Train:/tLoss %.3f | Accuracy: %.3f' % (train_loss, train_acc))

        test_loss, test_acc = processData(test_data, backprop=False)
        print('Test:/tLoss %.3f | Accuracy: %.3f' % (test_loss, test_acc))

---Epoch 100
Train:/tLoss 0.688 | Accuracy: 0.552
Test:/tLoss 0.698 | Accuracy: 0.500


  print('Train:/tLoss %.3f | Accuracy: %.3f' % (train_loss, train_acc))
  print('Test:/tLoss %.3f | Accuracy: %.3f' % (test_loss, test_acc))


---Epoch 200
Train:/tLoss 0.663 | Accuracy: 0.586
Test:/tLoss 0.714 | Accuracy: 0.500
---Epoch 300
Train:/tLoss 0.568 | Accuracy: 0.655
Test:/tLoss 0.626 | Accuracy: 0.700
---Epoch 400
Train:/tLoss 0.407 | Accuracy: 0.845
Test:/tLoss 0.755 | Accuracy: 0.500
---Epoch 500
Train:/tLoss 0.316 | Accuracy: 0.897
Test:/tLoss 0.723 | Accuracy: 0.600
---Epoch 600
Train:/tLoss 0.198 | Accuracy: 0.897
Test:/tLoss 0.582 | Accuracy: 0.700
---Epoch 700
Train:/tLoss 0.542 | Accuracy: 0.724
Test:/tLoss 0.887 | Accuracy: 0.450
---Epoch 800
Train:/tLoss 0.064 | Accuracy: 0.983
Test:/tLoss 0.476 | Accuracy: 0.900
---Epoch 900
Train:/tLoss 0.015 | Accuracy: 1.000
Test:/tLoss 0.725 | Accuracy: 0.900
---Epoch 1000
Train:/tLoss 0.007 | Accuracy: 1.000
Test:/tLoss 0.765 | Accuracy: 0.900
