In [1]:
from data import train_data, test_data

# Create the vocabulary.
vocab = list(set([w for text in train_data.keys() for w in text.split(' ')]))
vocab_size = len(vocab)
print('%d unique words found' % vocab_size) # 18 unique words found

18 unique words found


In [2]:
# Assign indices to each word.
word_to_idx = { w: i for i, w in enumerate(vocab) }
idx_to_word = { i: w for i, w in enumerate(vocab) }
print(word_to_idx['good']) # 16 (this may change)
print(idx_to_word[0]) # sad (this may change)

11
this


In [3]:
import numpy as np

def createInputs(text):
  '''
  Returns an array of one-hot vectors representing the words
  in the input text string.
  - text is a string
  - Each one-hot vector has shape (vocab_size, 1)
  '''
  inputs = []
  for w in text.split(' '):
    v = np.zeros((vocab_size, 1))
    v[word_to_idx[w]] = 1
    inputs.append(v)
  return inputs

In [4]:
## Forward Prop

import numpy as np

class RNN:
    def __init__(self, input_size, output_size, hidden_size = 64):

        self.Whh = np.random.randn(hidden_size, hidden_size)/1000
        self.Whx = np.random.randn(hidden_size, input_size)/1000        
        self.Why = np.random.randn(output_size, hidden_size)/1000

        self.bh = np.zeros((hidden_size,1))
        self.by = np.zeros((output_size,1))

    def forward(self, inputs):

        h = np.zeros((self.Whh.shape[0], 1))

        self.last_inputs = inputs
        self.last_hs = {0 : h}

        for i, x in enumerate(inputs):
            h = np.tanh(self.Whx @ x + self.Whh @ h + self.bh)      
            self.last_hs[i + 1] = h
        
        y = self.Why @ h + self.by

        return y, h

    def backward(self, dl_y, learning_rate = 2e-2):
        ''' Performs derivatives of parameters -> Whx, Whh, Why, bhy, bhh 
        - Inputs d_y : derivative of softmax'''
        n = len(self.last_inputs)

        dl_Why = dl_y @ self.last_hs[n].T
        dl_by = dl_y

        # Initialize values
        dl_Whh = np.zeros(self.Whh.shape)
        dl_Whx = np.zeros(self.Whx.shape)
        dl_bh = np.zeros(self.bh.shape)

        # Last state
        dl_h = self.Why.T @ dl_y

        # Backprop through time
        for t in reversed(range(n)):
            # An intermediate value: dL/dh * (1 - h^2)
            temp = ((1 - self.last_hs[t + 1] ** 2) * dl_h)

            dl_bh += temp

            dl_Whh += temp @ self.last_hs[t].T

            dl_Whx += temp @ self.last_inputs[t].T

            dl_h += self.Whh @ temp
        
        # Clip grad
        for d in [dl_Whx, dl_Why, dl_Whh, dl_bh,
         dl_by]:
            np.clip(d, -1, 1, out = d)

        # Update Weights
        self.Whh -= learning_rate * dl_Whh
        self.Whx -= learning_rate * dl_Whx
        self.Why -= learning_rate * dl_Why
        self.bh -= learning_rate * dl_bh
        self.by -= learning_rate * dl_by

# Training loop
rnn = RNN(vocab_size, 2)

In [5]:
import random

def processData(data, backprop=True):
  '''
  Returns the RNN's loss and accuracy for the given data.
  - data is a dictionary mapping text to True or False.
  - backprop determines if the backward phase should be run.
  '''
  items = list(data.items())
  random.shuffle(items)

  loss = 0
  num_correct = 0

  for x, y in items:
    inputs = createInputs(x)
    target = int(y)

    # Forward
    out, _ = rnn.forward(inputs)
    probs = softmax(out)

    # Calculate loss / accuracy
    loss -= np.log(probs[target])
    num_correct += int(np.argmax(probs) == target)

    if backprop:
      # Build dL/dy
      dl_dy = probs
      dl_dy[target] -= 1

      # Backward
      rnn.backward(dl_dy)

  return loss / len(data), num_correct / len(data)

In [6]:
def softmax(xs):
    return np.exp(xs)/np.sum(np.exp(xs))

In [14]:

for epoch in range(1000):
    # print(epoch)
    train_loss, train_acc = processData(train_data)

    if epoch % 100 == 99:
        print('--- Epoch %d' % (epoch + 1))
        print('Train:\tLoss %.3f | Accuracy: %.3f' % (train_loss, train_acc))

        test_loss, test_acc = processData(test_data, backprop=False)
        print('Test:\tLoss %.3f | Accuracy: %.3f' % (test_loss, test_acc))

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
--- Epoch 100
Train:	Loss 0.691 | Accuracy: 0.552
Test:	Loss 0.706 | Accuracy: 0.500
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187


KeyboardInterrupt: 

## Example 2 : Num of times "1" in sequence

In [None]:
# Create dataset
nb_of_samples = 20
sequence_len = 10
# Create the sequences
X = np.zeros((nb_of_samples, sequence_len))
for row_idx in range(nb_of_samples):
    X[row_idx,:] = np.around(np.random.rand(sequence_len)).astype(int)
# Create the targets for each sequence
t = np.sum(X, axis=1)