In [59]:
from data import train_data, test_data

# Create the vocabulary.
vocab = list(set([w for text in train_data.keys() for w in text.split(' ')]))
vocab_size = len(vocab)
print('%d unique words found' % vocab_size) # 18 unique words found

18 unique words found


In [60]:
# Assign indices to each word.
word_to_idx = { w: i for i, w in enumerate(vocab) }
idx_to_word = { i: w for i, w in enumerate(vocab) }
print(word_to_idx['good']) # 16 (this may change)
print(idx_to_word[0]) # sad (this may change)

12
and


In [61]:
import numpy as np

def createInputs(text):
  '''
  Returns an array of one-hot vectors representing the words
  in the input text string.
  - text is a string
  - Each one-hot vector has shape (vocab_size, 1)
  '''
  inputs = []
  for w in text.split(' '):
    v = np.zeros((vocab_size, 1))
    v[word_to_idx[w]] = 1
    inputs.append(v)
  return inputs

In [62]:
## Forward Prop

import numpy as np

class RNN:
    def __init__(self, input_size, output_size, hidden_size = 64):

        self.Whh = np.random.randn(hidden_size, hidden_size)/1000
        self.Whx = np.random.randn(hidden_size, input_size)/1000        
        self.Why = np.random.randn(output_size, hidden_size)/1000

        self.bh = np.zeros((hidden_size,1))
        self.by = np.zeros((output_size,1))

    def forward(self, inputs):

        h = np.zeros((self.Whh.shape[0], 1))

        self.last_inputs = inputs
        self.last_hs = {0 : h}

        for i, x in enumerate(inputs):
            h = np.tanh(self.Whx @ x + self.Whh @ h + self.bh)      
            self.last_hs[i + 1] = h
        
        y = self.Why @ h + self.by

        return y, h

    def backward(self, dl_y, learning_rate = 2e-2):
        ''' Performs derivatives of parameters -> Whx, Whh, Why, bhy, bhh 
        - Inputs d_y : derivative of softmax'''
        n = len(self.last_inputs)

        dl_Why = dl_y @ self.last_hs[n].T
        dl_by = dl_y

        # Initialize values
        dl_Whh = np.zeros(self.Whh.shape)
        dl_Whx = np.zeros(self.Whx.shape)
        dl_bh = np.zeros(self.bh.shape)

        # Last state
        dl_h = self.Why.T @ dl_y

        # Backprop through time
        for t in reversed(range(n)):
            # An intermediate value: dL/dh * (1 - h^2)
            temp = ((1 - self.last_hs[t + 1] ** 2) * dl_h)

            dl_bh += temp

            dl_Whh += temp @ self.last_hs[t].T

            dl_Whx += temp @ self.last_inputs[t].T

            dl_h += self.Whh @ temp
        
        # Clip grad
        for d in [dl_Whx, dl_Why, dl_Whh, dl_bh,
         dl_by]:
            np.clip(d, -1, 1, out = d)

        # Update Weights
        self.Whh -= learning_rate * dl_Whh
        self.Whx -= learning_rate * dl_Whx
        self.Why -= learning_rate * dl_Why
        self.bh -= learning_rate * dl_bh
        self.by -= learning_rate * dl_by

# Training loop
rnn = RNN(vocab_size, 2)

In [63]:
import random

def processData(data, backprop=True):
  '''
  Returns the RNN's loss and accuracy for the given data.
  - data is a dictionary mapping text to True or False.
  - backprop determines if the backward phase should be run.
  '''
  items = list(data.items())
  random.shuffle(items)

  loss = 0
  num_correct = 0

  for x, y in items:
    inputs = createInputs(x)
    target = int(y)

    # Forward
    out, _ = rnn.forward(inputs)
    probs = softmax(out)

    # Calculate loss / accuracy
    loss -= np.log(probs[target])
    num_correct += int(np.argmax(probs) == target)

    if backprop:
      # Build dL/dy
      dl_dy = probs
      dl_dy[target] -= 1
      # Backward
      rnn.backward(dl_dy)

  return loss / len(data), num_correct / len(data)

In [64]:
def softmax(xs):
    return np.exp(xs)/np.sum(np.exp(xs))

In [65]:
for epoch in range(1000):
    # print(epoch)
    train_loss, train_acc = processData(train_data)

    if epoch % 100 == 99:
        print('--- Epoch %d' % (epoch + 1))
        print('Train:\tLoss %.3f | Accuracy: %.3f' % (train_loss, train_acc))

        test_loss, test_acc = processData(test_data, backprop=False)
        print('Test:\tLoss %.3f | Accuracy: %.3f' % (test_loss, test_acc))

--- Epoch 100
Train:	Loss 0.692 | Accuracy: 0.552
Test:	Loss 0.705 | Accuracy: 0.500
--- Epoch 200
Train:	Loss 0.518 | Accuracy: 0.776
Test:	Loss 0.604 | Accuracy: 0.650
--- Epoch 300
Train:	Loss 0.745 | Accuracy: 0.672
Test:	Loss 0.646 | Accuracy: 0.700
--- Epoch 400
Train:	Loss 0.819 | Accuracy: 0.690
Test:	Loss 0.927 | Accuracy: 0.500
--- Epoch 500
Train:	Loss 0.372 | Accuracy: 0.793
Test:	Loss 0.375 | Accuracy: 0.850
--- Epoch 600
Train:	Loss 0.417 | Accuracy: 0.828
Test:	Loss 0.612 | Accuracy: 0.850
--- Epoch 700
Train:	Loss 0.005 | Accuracy: 1.000
Test:	Loss 0.011 | Accuracy: 1.000
--- Epoch 800
Train:	Loss 0.002 | Accuracy: 1.000
Test:	Loss 0.006 | Accuracy: 1.000
--- Epoch 900
Train:	Loss 0.001 | Accuracy: 1.000
Test:	Loss 0.004 | Accuracy: 1.000
--- Epoch 1000
Train:	Loss 0.001 | Accuracy: 1.000
Test:	Loss 0.003 | Accuracy: 1.000


## Example 2 : Num of times "1" in sequence

In [66]:
# Create dataset
nb_of_samples = 20
sequence_len = 10
# Create the sequences
X = np.zeros((nb_of_samples, sequence_len))
for row_idx in range(nb_of

_samples):
    X[row_idx,:] = np.around(np.random.rand(sequence_len)).astype(int)
# Create the targets for each sequence
t = np.sum(X, axis=1)

In [67]:
## Forward Prop

import numpy as np

class RNN:
    def __init__(self, input_size, output_size, hidden_size = 64):

        self.Whh = np.random.randn(hidden_size, hidden_size)/1000
        self.Whx = np.random.randn(hidden_size, input_size)/1000        
        self.Why = np.random.randn(output_size, hidden_size)/1000

        self.bh = np.zeros((hidden_size,1))
        self.by = np.zeros((output_size,1))

        self.input_size = input_size

    def forward(self, inputs):

        h = np.zeros((self.Whh.shape[0], 1))
        self.last_inputs = inputs
        self.last_hs = {0 : h}

        for i, x in enumerate(inputs):
            h = np.tanh(self.Whx @ x.reshape(-1, 1) + self.Whh @ h + self.bh)    
            self.last_hs[i + 1] = h
        
        y = self.Why @ h + self.by
        print(y.shape)
        return y, h

    def backward(self, dl_y, learning_rate = 2e-2):
        ''' Performs derivatives of parameters -> Whx, Whh, Why, bhy, bhh 
        - Inputs d_y : derivative of softmax'''
        n = len(self.last_inputs)
        print(self.last_hs[n].T.shape)
        dl_Why = dl_y @ self.last_hs[n].T
        dl_by = dl_y

        # Initialize values
        dl_Whh = np.zeros(self.Whh.shape)
        dl_Whx = np.zeros(self.Whx.shape)
        dl_bh = np.zeros(self.bh.shape)

        # Last state
        print(self.Why.T.shape, dl_y.shape)
        dl_h = self.Why.T @ dl_y

        # Backprop through time
        for t in reversed(range(n)):
            # An intermediate value: dL/dh * (1 - h^2)
            temp = ((1 - self.last_hs[t + 1] ** 2) * dl_h)

            dl_bh += temp

            dl_Whh += temp @ self.last_hs[t].T

            dl_Whx += temp @ self.last_inputs[t].T

            dl_h += self.Whh @ temp
        
        # Clip grad
        for d in [dl_Whx, dl_Why, dl_Whh, dl_bh,
         dl_by]:
            np.clip(d, -1, 1, out = d)

        # Update Weights
        self.Whh -= learning_rate * dl_Whh
        self.Whx -= learning_rate * dl_Whx
        self.Why -= learning_rate * dl_Why
        self.bh -= learning_rate * dl_bh
        self.by -= learning_rate * dl_by

In [48]:
rnn = RNN(input_size = 10, output_size = 1)
for i in range(5):
    for x 
    out, _ = rnn.forward(X)
    
    loss = (out - t)**2/2
    dl_dy = (out - t).reshape(-1, 1)

    rnn.backward(dl_dy)
    print(loss)

(1, 1)
(1, 64)


ValueError: matmul: Input operand 0 does not have enough dimensions (has 0, gufunc core with signature (n?,k),(k,m?)->(n?,m?) requires 1)

In [39]:
dl_dy.shape

(1, 20)