In [3]:
import numpy as np
import random
from mnist import MNIST
import matplotlib.pyplot as plt

In [4]:
mndata = MNIST('../HW1/Dataset')
trainX = np.array(mndata.load_training()[0])[:50000]
trainY = np.array(mndata.load_training()[1])[:50000]

testX = np.array(mndata.load_testing()[0])[:1000]
testY = np.array(mndata.load_testing()[1])[:1000]

In [5]:
valIndices = np.random.choice(len(trainX), 2000)
nonValIndices = [x for x in range(len(trainX)) if x not in valIndices]

valX = trainX[valIndices]
valY = trainY[valIndices]

trainX = trainX[nonValIndices]
trainY = trainY[nonValIndices]

testX = np.array(mndata.load_testing()[0])[:2000]
testY = np.array(mndata.load_testing()[1])[:2000]

def feat(data,i):
    return data[i].tolist()

def oneHot(clas, noOfClasses):
    feat = np.zeros(noOfClasses)
    feat[clas] = 1;
    return feat

trnX = np.array([feat(trainX,i) for i in range(trainX.shape[0])])/256.0
trnY = np.array([oneHot(trainY[i], 10) for i in range(trainX.shape[0])])

tstX = np.array([feat(testX,i) for i in range(testX.shape[0])])/256.0
tstY = np.array([oneHot(testY[i], 10) for i in range(testX.shape[0])])

valX = np.array([feat(valX,i) for i in range(valX.shape[0])])/256.0
valY = np.array([oneHot(valY[i], 10) for i in range(valX.shape[0])])

In [None]:
def sigmoid(x):
    return 1.0/(1+np.exp(-1*np.array(x)))

def lecun(x):
    return 1.7159*np.tanh(2.0*x/3)

def gradSigmoid(x):
    return x*(1-x)

def gradLecun(x):
    t = 2.0*x/3
    return 1.7159*(1-t**2)

def softmax(x):
    x = np.exp(x)
    x = x/x.sum(axis=1)[:, None]
    return x
    
maxValAcc = 0
testAcc = 0
lr = 0.00001
mom = 0.8

trnAcc = []
valAcc = []
tstAcc = []

# randomly initialize our weights with mean 0
n_hid_1 = 80
n_hid_2 = 140
W1 = 2*np.random.random((784,n_hid_1)) - 1
bias1 = 2*np.random.random((n_hid_1)) - 1
W2 = 2*np.random.random((n_hid_1,n_hid_2)) - 1
bias2 = 2*np.random.random((n_hid_2)) - 1
W3 = 2*np.random.random((n_hid_2,10)) - 1
bias3 = 2*np.random.random((10)) - 1

prevW1=0
prevW2=0
prevW3=0
prevB1=0
prevB2=0
prevB3=0

for j in xrange(100):
    A1 = np.dot(trnX, W1+mom*prevW1) + bias1 + mom*prevB1
    l1 = sigmoid(A1)
    
    A2 = np.dot(l1, W2+mom*prevW2) + bias2 + mom*prevB2
    l2 = sigmoid(A2)
    
    A3 = np.dot(l2, W3+mom*prevW3) + bias3 + mom*prevB3
    l3 = softmax(A3)

    # Errors in output layer
    d3 = (l3 - trnY)
    dbias3 = np.sum(d3, axis = 0)
        
    # Delta of W3
    dW3 = np.dot(l2.T, d3)

    # Errors in 2nd hidden layer
    d2 = np.dot(d3, (W3+mom*prevW3).T)*gradSigmoid(l2)
    dbias2 = np.sum(d2, axis = 0)    
    
    # Delta W2
    dW2 = np.dot(l1.T, d2)
    
    # Errors in 2nd hidden layer
    d1 = np.dot(d2, (W2+mom*prevW2).T)*gradSigmoid(l1)
    dbias1 = np.sum(d1, axis = 0)    
    
    # Delta W1
    dW1 = np.dot(trnX.T, d1)
    
    currW3=mom*prevW3-lr*dW3
    currB3=mom*prevB3-lr*dbias3
    currW2=mom*prevW2-lr*dW2
    currB2=mom*prevB2-lr*dbias2
    currW1=mom*prevW1-lr*dW1
    currB1=mom*prevB1-lr*dbias1
    
    prevW3=currW3
    prevW2=currW2
    prevW1=currW1
    prevW3=currB3
    prevB2=currB2
    prevB1=currB1
    
    W3 +=currW3
    bias3 +=currB3
    W2 +=currW2
    bias2 +=currB2
    W1 +=currW1
    bias1 +=currB1
    
    prediction = softmax(np.dot(sigmoid(np.dot(sigmoid(np.dot(valX, W1)+bias1), W2)+bias2), W3)+bias3)
    correct = [1 if a == b else 0 for (a, b) in zip(np.argmax(valY, axis = 1), np.argmax(prediction, axis = 1))]
    valAcc.append(np.sum(correct)*100.0/len(valX))
    
    prediction = softmax(np.dot(sigmoid(np.dot(sigmoid(np.dot(trnX, W1)+bias1), W2)+bias2), W3)+bias3)
    correct = [1 if a == b else 0 for (a, b) in zip(np.argmax(trnY, axis = 1), np.argmax(prediction, axis = 1))]
    trnAcc.append(np.sum(correct)*100.0/len(trnX))   
    
    prediction = softmax(np.dot(sigmoid(np.dot(sigmoid(np.dot(tstX, W1)+bias1), W2)+bias2), W3)+bias3)
    correct = [1 if a == b else 0 for (a, b) in zip(np.argmax(tstY, axis = 1), np.argmax(prediction, axis = 1))]
    tstAcc.append(np.sum(correct)*100.0/len(tstX))   
    
    if(valAcc[-1] > maxValAcc):
        maxValAcc = valAcc[-1]
        maxTestAcc = tstAcc[-1]

In [None]:
plt.plot([x+1 for x in range(len(trnAcc))], trnAcc, label = 'Training Accuracy')
plt.plot([x+1 for x in range(len(valAcc))], valAcc, label = 'Validation Accuracy')
plt.plot([x+1 for x in range(len(tstAcc))], tstAcc, label = 'Testing Accuracy')
plt.xlabel("Iterations")
plt.ylabel("Accuracy")
plt.legend(loc='lower right', shadow=True)
plt.show()