In [2]:
### I have manipulated the example code that is given in the lecture note in IE 498 / CS 547 

import numpy as np
import h5py
import time
import copy
from random import randint
#load MNIST data
MNIST_data = h5py.File('MNISTdata_1.hdf5', 'r')
print(list(MNIST_data.keys()))

['x_test', 'x_train', 'y_test', 'y_train']


In [3]:
x_train = np.float32(MNIST_data['x_train'][:] )
y_train = np.int32(np.array(MNIST_data['y_train'][:,0])) 
x_test = np.float32( MNIST_data['x_test'][:] )
y_test = np.int32( np.array( MNIST_data['y_test'][:,0] ) )
MNIST_data.close()

In [11]:
####################################################################################
#Implementation of stochastic gradient descent algorithm


#number of inputs
d = 28*28
#number of outputs
k = 10
#dim of hiddenlayer
dh = 100

#Randomly create array with certain dimension
model = {}
model['W'] = np.random.randn(dh,d) / np.sqrt(d) 
model['CT'] = np.random.randn(k,dh) / np.sqrt(d)
model['b1'] = np.zeros((dh,1))
model['b2'] = np.zeros((k,1))

#make a copy for later to implement
model_grads = copy.deepcopy(model)

#define functions for SGD
def softmax_function(z):
    ZZ = np.exp(z)/np.sum(np.exp(z)) 
    return ZZ

def sigmoid(z):
    return np.exp(z)/(1+np.exp(z))

def forward(x,y, model):
    x = x.reshape((784,1))
    Z = np.dot(model['W'], x) + model['b1']
    H = sigmoid(Z)
    U = np.dot(model['CT'], H) + model['b2']
    f = softmax_function(U)
    model['H'] = H
    model['Z'] = Z
    model['U'] = U
    model['p'] = f
    return f

def sigmoidprime(z):
    sprime = 1/(1+np.exp(-z))
    return sprime * (1-sprime)

def backward(x,y,p, model, model_grads): 
    dpdU = -1.0*p
    dpdU[y] = dpdU[y] + 1.0
    dpdb2 = dpdU
    dpdC = np.dot(dpdU,model['H'].T)
    delta = np.dot(model['CT'].T,dpdU)
    sigprime = sigmoidprime(model['Z'])
    dpdb1 = np.multiply(delta, sigprime)
    dpdW = np.dot(dpdb1,x.reshape(1,784))
    model_grads['W'] = dpdW
    model_grads['CT'] = dpdC
    model_grads['b1'] = dpdb1
    model_grads['b2'] = dpdb2
    return model_grads

#### train the model

import time
time1 = time.time() 
LR = .01
num_epochs = 20
for epochs in range(num_epochs):
    #Learning rate schedule
    if (epochs > 5): 
        LR = 0.001
    if (epochs > 10): 
        LR = 0.0001
    if (epochs > 15): 
        LR = 0.00001
        
    total_correct = 0   
    for n in range(len(x_train)):
        n_random = randint(0,len(x_train)-1 ) 
        y = y_train[n_random]
        x = x_train[n_random][:]
        p = forward(x, y, model)
        prediction = np.argmax(p)
        if (prediction == y):
            total_correct += 1
        model_grads = backward(x,y,p, model, model_grads)
        model['W'] = model['W'] + LR*model_grads['W']
        model['CT'] = model['CT'] + LR*model_grads['CT']
        model['b1'] = model['b1'] + LR*model_grads['b1']
        model['b2'] = model['b2'] + LR*model_grads['b2']
    print("training in process..", "(",epochs+1,"/",num_epochs,")")
    print("Accuracy is",(total_correct/np.float(len(x_train)))*100,"%") 
    
time2 = time.time()
print("Processing time was", time2-time1)
###################################################### #test data
total_correct = 0
for n in range( len(x_test)):
    y = y_test[n]
    x = x_test[n][:]
    p = forward(x, y, model)
    prediction = np.argmax(p) 
    if (prediction == y):
        total_correct += 1 
        
print("Test Result: ",total_correct/np.float(len(x_test)), "%")


training in process.. ( 1 / 20 )
Accuracy is 87.62333333333333 %
training in process.. ( 2 / 20 )
Accuracy is 94.10833333333333 %
training in process.. ( 3 / 20 )
Accuracy is 95.45333333333333 %
training in process.. ( 4 / 20 )
Accuracy is 96.35000000000001 %
training in process.. ( 5 / 20 )
Accuracy is 97.02333333333333 %
training in process.. ( 6 / 20 )
Accuracy is 97.555 %
training in process.. ( 7 / 20 )
Accuracy is 97.84333333333333 %
training in process.. ( 8 / 20 )
Accuracy is 98.04 %
training in process.. ( 9 / 20 )
Accuracy is 97.94166666666668 %
training in process.. ( 10 / 20 )
Accuracy is 98.1 %
training in process.. ( 11 / 20 )
Accuracy is 98.11833333333333 %
training in process.. ( 12 / 20 )
Accuracy is 98.165 %
training in process.. ( 13 / 20 )
Accuracy is 98.08666666666667 %
training in process.. ( 14 / 20 )
Accuracy is 98.03666666666668 %
training in process.. ( 15 / 20 )
Accuracy is 98.055 %
training in process.. ( 16 / 20 )
Accuracy is 98.13499999999999 %
training in