In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
import theano
import theano.tensor as T 
import numpy as np
import matplotlib.pyplot as plt 

In [16]:
import theano
import theano.tensor as T
import numpy as np
import matplotlib.pyplot as plt

from sklearn.utils import shuffle
from util import init_weight, all_parity_pairs_with_sequence_labels


class SimpleRNN:
    def __init__(self, M):
        self.M = M # hidden layer size

    def fit(self, X, Y, learning_rate=10e-1, mu=0.99, reg=1.0, activation=T.tanh, epochs=100, show_fig=False):
        D = X[0].shape[1] # X is of size N x T(n) x D
        K = len(set(Y.flatten()))
        N = len(Y)
        M = self.M
        self.f = activation

        # initial weights
        Wx = init_weight(D, M)
        Wh = init_weight(M, M)
        bh = np.zeros(M)
        h0 = np.zeros(M)
        Wo = init_weight(M, K)
        bo = np.zeros(K)

        # make them theano shared
        self.Wx = theano.shared(Wx)
        self.Wh = theano.shared(Wh)
        self.bh = theano.shared(bh)
        self.h0 = theano.shared(h0)
        self.Wo = theano.shared(Wo)
        self.bo = theano.shared(bo)
        self.params = [self.Wx, self.Wh, self.bh, self.h0, self.Wo, self.bo]

        thX = T.fmatrix('X')
        thY = T.ivector('Y')

        def recurrence(x_t, h_t1):
            h_t = self.f(x_t.dot(self.Wx) + h_t1.dot(self.Wh) + self.bh)
            y_t = T.nnet.softmax(h_t.dot(self.Wo) + self.bo)
            return h_t, y_t

        [h, y], _ = theano.scan(
            fn=recurrence,
            outputs_info=[self.h0, None],
            sequences=thX,
            n_steps=thX.shape[0],
        )

        py_x = y[:, 0, :]
        prediction = T.argmax(py_x, axis=1)

        cost = -T.mean(T.log(py_x[T.arange(thY.shape[0]), thY]))
        grads = T.grad(cost, self.params)
        dparams = [theano.shared(p.get_value()*0) for p in self.params]

        updates = [
            (p, p + mu*dp - learning_rate*g) for p, dp, g in zip(self.params, dparams, grads)
        ] + [
            (dp, mu*dp - learning_rate*g) for dp, g in zip(dparams, grads)
        ]

        self.predict_op = theano.function(inputs=[thX], outputs=prediction)
        self.train_op = theano.function(
            inputs=[thX, thY],
            outputs=[cost, prediction, y],
            updates=updates
        )

        costs = []
        for i in xrange(epochs):
            X, Y = shuffle(X, Y)
            n_correct = 0
            cost = 0
            for j in xrange(N):
                c, p, rout = self.train_op(X[j], Y[j])
                cost += c
                if p[-1] == Y[j,-1]:
                    n_correct += 1
            print "i:", i, "cost:", cost, "classification rate:", (float(n_correct)/N)
            costs.append(cost)
            if n_correct == N:
                break

        if show_fig:
            plt.plot(costs)
            plt.show()


def parity(B=12, learning_rate=10e-5, epochs=200):
    X, Y = all_parity_pairs_with_sequence_labels(B)

    rnn = SimpleRNN(4)
    rnn.fit(X, Y, learning_rate=learning_rate, epochs=epochs, activation=T.nnet.sigmoid, show_fig=False)  
    return rnn

In [None]:
rnn = parity()

i: 0 cost: 2842.6934151 classification rate: 0.504146341463
i: 1 cost: 2839.21896554 classification rate: 0.498536585366
i: 2 cost: 2836.27609881 classification rate: 0.5
i: 3 cost: 2833.38878307 classification rate: 0.499268292683
i: 4 cost: 2831.77397132 classification rate: 0.5
i: 5 cost: 2830.10101779 classification rate: 0.5
i: 6 cost: 2829.53522473 classification rate: 0.5
i: 7 cost: 2828.8779769 classification rate: 0.5
i: 8 cost: 2829.06817215 classification rate: 0.5
i: 9 cost: 2829.01570062 classification rate: 0.5
i: 10 cost: 2828.84086757 classification rate: 0.5
i: 11 cost: 2828.48639747 classification rate: 0.5
i: 12 cost: 2828.02780118 classification rate: 0.5
i: 13 cost: 2828.38075468 classification rate: 0.5
i: 14 cost: 2828.65679666 classification rate: 0.5
i: 15 cost: 2828.18672178 classification rate: 0.5
i: 16 cost: 2827.95030588 classification rate: 0.5
i: 17 cost: 2828.07416181 classification rate: 0.5
i: 18 cost: 2827.85356883 classification rate: 0.5
i: 19 cost

In [15]:
rnn.predict_op(np.array([1, 0, 1, 0, 1], dtype=np.float32).reshape(-1, 1))

array([1, 1, 0, 0, 1])