In [1]:
# Import modules
from __future__ import print_function

import os
import sys
import gzip
import shutil
import struct
import numpy as np
from random import shuffle
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

try:
    from urllib.request import urlretrieve
except ImportError:
    from urllib import urlretrieve

# Config matplotlib for inline plotting
%matplotlib inline

In [2]:
## Dataloader Functio Definitions
def loadData(url, nimage):
    print ('Downloading ' + url)
    gzfile_name, h = urlretrieve(url, './delete.me')
    print ('Done.')
    try:
        with gzip.open(gzfile_name) as gz:
            n = struct.unpack('I', gz.read(4))
            n = struct.unpack('>I', gz.read(4))[0]
            nrow = struct.unpack('>I', gz.read(4))[0]
            ncol = struct.unpack('>I', gz.read(4))[0]
            res = np.frombuffer(gz.read(nimage * nrow * ncol), dtype = np.uint8)
    finally:
        os.remove(gzfile_name)
    return res.reshape((nimage, nrow * ncol))

def loadLabels(url, nimage):
    print ('Downloading ' + url)
    gzfile_name, h = urlretrieve(url, './delete.me')
    print ('Done.')
    try:
        with gzip.open(gzfile_name) as gz:
            n = struct.unpack('I', gz.read(4))
            n = struct.unpack('>I', gz.read(4))
            res = np.frombuffer(gz.read(nimage), dtype = np.uint8)
    finally:
        os.remove(gzfile_name)
    return res.reshape((nimage, 1))

def download(dataurl, labelsurl, nimage):
    data = loadData(dataurl, nimage)
    labels = loadLabels(labelsurl, nimage)
    return np.hstack((data, labels))

In [3]:
## Activation Function Definitions
# Relu Function
def relu(M):
    return np.maximum(M, 0)
# Derivative of Relu Function
def relu_p(M):
    M[M<=0] = 0
    M[M>0] = 1
    return M
# Sigmoid Function
def sigmoid(M):
    return np.exp(M)/(np.exp(M)+1)
# Derivative of Sigmoid Function
def sigmoid_p(M):
    return sigmoid(M)*(1-sigmoid(M))
# Derivative of Softmax Function
def softmax(z):
    e_z = np.exp(z)
    num_array = z.shape[0]
    return e_z/e_z.sum(1).reshape(num_array,1)

In [4]:
## Feed Foward And Back Propagation Functions 
def feed_forward(X,Y):
    # Compute Z, H, U and f
    Z = np.matmul(X, parameters["W"])+parameters["b1"] # 5 x 20
    H = relu(Z) # 5 x 20
    U = np.matmul(H, parameters["C"])+parameters["b2"] # 5 x 10
    f = softmax(U) # 5 x 10

    # Compute the value of rho (loss)
    N = f.shape[0]
    r_sum = -np.sum(np.multiply(Y, np.log(f)))
    r = r_sum/N
    return Z,H,U,f,r

def back_propagate(Z,H,U,f,X,Y):
    N = f.shape[0]
    drdU = f - Y # 5 x 10
    drdb2 = (1/N)*np.sum(drdU, axis=0) # 10
    drdC = (1/N)*np.matmul(H.T,drdU) # 20 x 10
    d = np.matmul(drdU,parameters["C"].T) # 5 x 20
    drdb1 = (1/N)*np.sum(np.multiply(d,relu_p(Z)),axis=0) # 20
    drdW = (1/N)*np.matmul(X.T,np.multiply(d,relu_p(Z))) # 784 x 20
    return drdW,drdb1,drdC,drdb2

def predict(probability, labels):
    TFlist = np.sum(np.argmax(probability,axis=1) == labels)
    accuracy = np.sum(TFlist)/probability.shape[0]
    return accuracy

In [5]:
## Download Data From Remote Source
# URLs for the train image and label data
train_image_url = 'http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz'
train_labels_url = 'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz'
num_train_samples = 60000
# Download train data
print("Downloading train data")
train = download(train_image_url, train_labels_url, num_train_samples)

# URLs for the test image and label data
test_image_url = 'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz'
test_labels_url = 'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz'
num_test_samples = 10000
# Download test data
print("Downloading test data")
test = download(test_image_url, test_labels_url, num_test_samples)

## Prepare Train and Test data
# Separate X and Y from data
Xtrain = train[:,0:-1]
Xtest = test[:,0:-1]
train_size = Xtrain.shape[0]
test_size = Xtest.shape[0]

# One hot encode Y data
Ytrain = np.zeros((train_size,10))
Ytest = np.zeros((test_size,10))

Ytrain_labels = train[:,-1]
Ytest_labels = test[:,-1]

for i in range(train_size):
    Ytrain[i,Ytrain_labels[i]] = 1
for i in range(test_size):
    Ytest[i,Ytrain_labels[i]] = 1

Downloading train data
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Done.
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Done.
Downloading test data
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Done.
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Done.


In [6]:
## Set trainable parameters for building the neural network
num_elements = 100
parameters = {
    "W": np.random.randn(784,num_elements)*np.sqrt(1./784.),
    "b1": np.zeros(num_elements),
    "C": np.random.randn(num_elements,10)*np.sqrt(1./784.),
    "b2": np.zeros(10)
}

In [7]:
## Training
lr= 1e-4
num_epochs = 21
data_size = Xtrain.shape[0]
batch_size = 1
num_iterations = data_size//batch_size
idx_list=list(range(data_size))

for epoch in range(1,num_epochs):
    shuffle(idx_list)
    curr_idx = 0
    for i in range(num_iterations):
        # Prepare Data for Training
        Xinput = Xtrain[curr_idx:curr_idx+batch_size,:]
        Yinput = Ytrain[curr_idx:curr_idx+batch_size,:]
        # Feed forward And Back Propagate
        Z_,H_,U_,f_,r_ = feed_forward(Xinput, Yinput)
        drdW_,drdb1_,drdC_,drdb2_ = back_propagate(Z_,H_,U_,f_,Xinput,Yinput)
        # Update Parameters
        parameters["W"] -= lr*drdW_
        parameters["b1"] -= lr*drdb1_
        parameters["C"] -= lr*drdC_
        parameters["b2"] -= lr*drdb2_
        curr_idx += batch_size
    # Print Loss Message
    if (epoch % 1 == 0):
        print("Epoch ",epoch,"/",num_epochs," is complete with a loss of ",r_)
        _,_,_,train_prob,train_loss = feed_forward(Xtrain[0:60000,:],Ytrain[0:60000,:])
        print("    Accuracy on the training set is ", predict(train_prob[0:60000,:],Ytrain_labels[0:60000]))
        _,_,_,test_prob,test_loss = feed_forward(Xtest[0:10000,:],Ytest[0:10000,:])
        print("    Accuracy on the test set is ", predict(test_prob[0:10000,:],Ytest_labels[0:10000]))
        print("    Current learning rate is ", lr)
    if (epoch % 8 == 0):
        lr = 1e-5

Epoch  1 / 21  is complete with a loss of  5.421095825220706e-06
    Accuracy on the training set is  0.9240166666666667
    Accuracy on the test set is  0.9214
    Current learning rate is  0.0001
Epoch  2 / 21  is complete with a loss of  1.8044077224836576e-06
    Accuracy on the training set is  0.9467833333333333
    Accuracy on the test set is  0.9414
    Current learning rate is  0.0001
Epoch  3 / 21  is complete with a loss of  2.8969534326487525e-07
    Accuracy on the training set is  0.9511333333333334
    Accuracy on the test set is  0.9412
    Current learning rate is  0.0001
Epoch  4 / 21  is complete with a loss of  1.9576114201149124e-05
    Accuracy on the training set is  0.954
    Accuracy on the test set is  0.9426
    Current learning rate is  0.0001
Epoch  5 / 21  is complete with a loss of  2.2403207092351586e-09
    Accuracy on the training set is  0.9648833333333333
    Accuracy on the test set is  0.9529
    Current learning rate is  0.0001
Epoch  6 / 21  is c