In [4]:
import itertools
import numpy as np
import sys
import csv
from datetime import datetime
from utils import *
import matplotlib.pyplot as plt
from data_preprocessing import data_preprocessing

vocabulary_size = 8000

In [30]:
class RNN:

    def __init__(self, vocab_size, hidden_units=100, bptt_truncate=4):
        self.vocab_size = vocab_size
        self.hidden_units = hidden_units
        self.bptt_truncate = bptt_truncate
        # U is the Matric from input to hidden layer
        self.U = np.random.uniform(-np.sqrt(1.0/vocab_size), np.sqrt(1.0 / vocab_size), (hidden_units ,vocab_size))
        # V is from hidden layer to output
        self.V = np.random.uniform(-np.sqrt(1.0/hidden_units), np.sqrt(1.0 / hidden_units), (vocab_size, hidden_units))
        # W is the transistion matrix from St-1 to St
        self.W = np.random.uniform(-np.sqrt(1.0/hidden_units), np.sqrt(1.0 / hidden_units), (hidden_units, hidden_units))

    def feed_forward(self, input):
        T = len(input)
        s = np.zeros((T + 1, self.hidden_units))
        s[-1] = np.zeros(self.hidden_units)
        o = np.zeros((T, self.vocab_size))
        for t in range(T):
            s[t] = np.tanh(self.U[:,input[t]] + self.W.dot(s[t - 1]))
            o[t] = softmax(self.V.dot(s[t]))
        return o, s

    def prediction(self, x):
        o, s = self.feed_forward(x)
        return np.argmax(o, axis=1)

    def calculate_total_loss(self, x, y):
        L = 0;
        for i in range(len(y)):
            o, s = self.feed_forward(x[i])
            correct_word_prediction = o[np.arange(len(y[i])), y[i]] 
            L += -1 * np.sum(np.log(correct_word_prediction))
        return L

    def calculate_loss(self, x, y):
        N = np.sum([len(yi) for yi in y])
        return self.calculate_total_loss(x, y) / N

    def bptt(self, x, y):
        T = len(y)
        o, s = self.feed_forward(x)
        dLdU = np.zeros(self.U.shape)
        dLdV = np.zeros(self.V.shape)
        dLdW = np.zeros(self.W.shape)
        
        delta_o = o
        delta_o[np.arange(len(y)), y] -= 1.0
        for t in np.arange(T)[::-1]:
            dLdV += np.outer(delta_o[t], s[t].T)
            delta_t = self.V.T.dot(delta_o[t]) * (1 - (s[t] ** 2))
            for bptt_step in np.arange(max(0, t - self.bptt_truncate), t + 1):
                dLdW += np.outer(delta_t, s[bptt_step - 1])
                dLdU[:, x[bptt_step]] += delta_t
                delta_t = self.W.T.dot(delta_t) * (1 - s[bptt_step - 1] ** 2)
        return dLdU, dLdV, dLdW

    def numpy_sgd_step(self, x, y, learning_rate=0.005):
        dLdU, dLdV, dLdW = self.bptt(x, y)
        self.U -= learning_rate * dLdU
        self.V -= learning_rate * dLdV
        self.W -= learning_rate * dLdW

    def SGD(self, X_train, y_train, learning_rate=0.005, num_epoches=100, evaluate_loss_after=5):
        losses = []
        num_examples_seen = 0
        for epoch in range(num_epoches):
            print (epoch)
            if (epoch % evaluate_loss_after == 0):
                loss = self.calculate_total_loss(X_train, y_train)
                losses.append(loss)
                time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
                print "%s: loss %d after num_example_see=%d epoch=%d" %(time, loss,num_examples_seen, epoch)
                if (len(losses) > 1):
                    if(losses[-1] > losses[-2]):
                        learning_rate = learning_rate * 0.5
            for i in range(len(y_train)):
                self.numpy_sgd_step(X_train[i], y_train[i], learning_rate)
                num_examples_seen += 1

In [6]:
def softmax(arr):
    exp_sum = np.sum(np.exp(arr))
    return np.exp(arr) / exp_sum

In [7]:
X_data, y_data = data_preprocessing()

Start Proprocessing Input Data
Finishing Data Processing


In [None]:
model = RNN(vocab_size=vocabulary_size)
model.SGD(X_data[:100], y_data[:100])

0
2018-08-15 22:38:55: loss 17578 after num_example_see=0 epoch=0
1
2
3
4
5
2018-08-15 22:39:34: loss 13277 after num_example_see=500 epoch=5
6
7
8
9
10
2018-08-15 22:40:13: loss 11152 after num_example_see=1000 epoch=10
11
12
13
14
15
2018-08-15 22:40:51: loss 10722 after num_example_see=1500 epoch=15
16
17
18
19
20
2018-08-15 22:41:29: loss 10527 after num_example_see=2000 epoch=20
21
22
23
24
25
2018-08-15 22:42:10: loss 10391 after num_example_see=2500 epoch=25
26
27
28
29
30
2018-08-15 22:42:49: loss 10292 after num_example_see=3000 epoch=30
31
32
33
34
35
2018-08-15 22:43:30: loss 10181 after num_example_see=3500 epoch=35
36
37
38
39
40
2018-08-15 22:44:10: loss 10093 after num_example_see=4000 epoch=40
41
42
43
44
45
2018-08-15 22:44:51: loss 10006 after num_example_see=4500 epoch=45
46
47
48
49
