In [49]:
import theano.tensor as T
import matplotlib.pyplot as plt
import numpy as np
import theano

from util import get_normalized_data, y2indicator

In [50]:
def error_rate(p, t):
    return np.mean(p!=t)

In [51]:
def relu(a):
    return a * (a>0)

In [52]:
def main():
    X, Y = get_normalized_data()
    
    # Setting usual variables
    max_iter = 20
    print_period = 10
    lr = 0.00004
    reg = 0.01
    
    # Setting data
    Xtrain = X[:-1000,]
    Ytrain = Y[:-1000,]
    Xtest = X[-1000:,]
    Ytest = Y[-1000:,]
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)
    
    N,D = Xtrain.shape
    batch_sz = 500
    n_batches = N/batch_sz
    
    # Creating initial weights and biases
    M=300
    K=10
    W1_init = np.random.randn(D,M)/28
    b1_init = np.zeros(M)
    W2_init = np.random.randn(M,K)/np.sqrt(M)
    b2_init = np.zeros(K)
    
    # Creating theano Variables and model
    # We use Variables for the inputs and
    # Shared Variables for weights and biases as it need to be updated
    thX = T.matrix('X')
    thT = T.matrix('T')
    W1 = theano.shared(W1_init,'W1')
    b1 = theano.shared(b1_init,'b1')
    W2 = theano.shared(W2_init,'W2')
    b2 = theano.shared(b2_init,'b2')
    thZ = relu(thX.dot(W1) + b1)
    thY = T.nnet.softmax(thZ.dot(W2)+b2)
    
    # Cost and Prediction
    cost = -(thT * T.log(thY)).sum() + reg*((W1*W1).sum() + (b1*b1).sum()+ (W2*W2).sum() + (b2*b2).sum())
    prediction = T.argmax(thY, axis=1)
    
    # Weight update model
    update_W1 = W1 - lr*T.grad(cost, W1)
    update_b1 = b1 - lr*T.grad(cost, b1)
    update_W2 = W2 - lr*T.grad(cost, W2)
    update_b2 = b2 - lr*T.grad(cost, b2)
    
    # train is the function used to update the weights and biases in the model
    train = theano.function(
        inputs = [thX, thT],
        updates=[(W1,update_W1),(b1,update_b1),(W2,update_W2),(b2,update_b2)]
    )

    
    get_prediction = theano.function(
        inputs = [thX, thT],
        outputs = [cost,prediction]
    )
    
    costs = []
    for i in xrange(max_iter):
        for j in xrange(n_batches):
            X_batch = Xtrain[j*batch_sz:(j+1)*batch_sz,]
            Y_batch = Ytrain_ind[j*batch_sz:(j+1)*batch_sz,]
            train(X_batch, Y_batch)
            
            if(j%print_period == 0):
                cost_val, prediction_val = get_prediction(Xtest, Ytest_ind)
                err = error_rate(prediction_val, Ytest)
                print "Cost/err at iteration i=%d, j=%d: %.3f / %.3f" % (i,j,cost_val,err)
                costs.append(cost_val)
    return costs

In [None]:
if __name__ == '__main__':
    costs = main()

Reading in and transforming data...
Cost/err at iteration i=0, j=0: 2528.421 / 0.929
Cost/err at iteration i=0, j=10: 1847.631 / 0.537
Cost/err at iteration i=0, j=20: 1472.523 / 0.358
Cost/err at iteration i=0, j=30: 1232.594 / 0.280
Cost/err at iteration i=0, j=40: 1065.867 / 0.227
Cost/err at iteration i=0, j=50: 944.591 / 0.206
Cost/err at iteration i=0, j=60: 853.558 / 0.185
Cost/err at iteration i=0, j=70: 779.955 / 0.170
Cost/err at iteration i=0, j=80: 721.758 / 0.159
Cost/err at iteration i=1, j=0: 710.788 / 0.157
Cost/err at iteration i=1, j=10: 666.205 / 0.151
Cost/err at iteration i=1, j=20: 628.936 / 0.146
Cost/err at iteration i=1, j=30: 596.179 / 0.142
Cost/err at iteration i=1, j=40: 568.228 / 0.137
Cost/err at iteration i=1, j=50: 544.723 / 0.129
Cost/err at iteration i=1, j=60: 524.889 / 0.128
Cost/err at iteration i=1, j=70: 506.065 / 0.124
Cost/err at iteration i=1, j=80: 489.277 / 0.119
Cost/err at iteration i=2, j=0: 485.891 / 0.117
Cost/err at iteration i=2, j=10

In [None]:
plt.plot(costs)
    plt.show()