#### The implementation of the MLP algorithm will be in the mlp.py file but run through this notebook
#### This code will use the MNIST Handwritten Digit Classification Dataset.


In [1]:
from mlp import MLP 
import numpy as np

# This is to reload all changed modules every time before executing a new line.
# https://stackoverflow.com/questions/5364050/reloading-submodules-in-ipython
%load_ext autoreload
%autoreload 2

In [12]:
# loading the MNIST datatset
import pickle, gzip

f = gzip.open('mnist.pkl.gz','rb')
tset, vset, teset = pickle.load(f, encoding='latin1')
print(tset[0].shape, vset[0].shape, teset[0].shape)
f.close()

(50000, 784) (10000, 784) (10000, 784)


In [3]:
import matplotlib.pyplot as plt # To install: pip install matplotlib

# visualise some examples from the dataset 
fig, ax = plt.subplots(2,5)
for i, ax in enumerate(ax.flatten()):
    im_idx = np.argwhere(teset[1] == i)[0]
    plottable_image = np.reshape(teset[0][im_idx], (28, 28))
    ax.imshow(plottable_image, cmap='gray_r')

In [4]:
# only 9000 images for training and 1000 for testing 

# Just use the first 9000 images for training 
tread = 9000
train_in = tset[0][:tread,:]

# This is a little bit of work -- 1 of N encoding
train_tgt = np.zeros((tread,10))
for i in range(tread):
    train_tgt[i,tset[1][i]] = 1

# and use 1000 images for testing
teread = 1000
test_in = teset[0][:teread,:]
test_tgt = np.zeros((teread,10))
for i in range(teread):
    test_tgt[i,teset[1][i]] = 1

#### Initialise the MLP classifier 

In [5]:
# the first and second hidden layers to have 5 neurons each.
sizes = [784,5,5,10] # 784 is the number of pixels of the images and 10 is the number of classes 
classifier = MLP(sizes)

In [6]:
# TODO: open the mlp.py file and implement self.forwardPass and self.train methods
# for now, let's keep the learning rate and the number of iterations unchanged  
classifier.train(train_in, train_tgt, 0.1, 1000)

Iteration:  0  Error:  4117.6080419244745
Iteration:  100  Error:  4046.2617172938217
Iteration:  200  Error:  4042.0029740608347
Iteration:  300  Error:  4037.3965165083214
Iteration:  400  Error:  4030.1819396617757
Iteration:  500  Error:  4017.571654349916
Iteration:  600  Error:  3993.1614139404605
Iteration:  700  Error:  3940.0879531330797
Iteration:  800  Error:  3825.087084636112
Iteration:  900  Error:  3692.816363797018


In [7]:
#evaluate our model on the testing set 
# and show the confusion matrix and the accuracy
classifier.evaluate(test_in, test_tgt)

The confusion matrix is:
[[ 85.   0.  37.  78. 103.  72.  71.  60.  64.  83.]
 [  0. 125.  41.   7.   2.   4.   4.  16.  11.   3.]
 [  0.   1.  26.  10.   0.   2.   6.   0.   9.   0.]
 [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  0.   0.   0.   0.   1.   1.   0.   4.   0.   3.]
 [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  0.   0.  12.  12.   4.   8.   6.  19.   5.   5.]
 [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.]]
The accuracy is  25.6


In [17]:
best_sizes = [784,30,30,10]
best_beta = 1
best_momentum =0.9
best_lr =6 # best learning rate
best_niterations = 600
best_classifier = MLP(sizes = best_sizes, beta=best_beta, momentum=best_momentum)
best_classifier.train(train_in, train_tgt, best_lr, best_niterations)
best_classifier.evaluate(test_in, test_tgt)

Iteration:  0  Error:  4102.675834023828
Iteration:  100  Error:  481.0678474791383
Iteration:  200  Error:  209.30731419077895
Iteration:  300  Error:  120.97961676182538
Iteration:  400  Error:  87.06166611461765
Iteration:  500  Error:  70.34299707496079
The confusion matrix is:
[[ 78.   0.   1.   0.   0.   0.   4.   0.   0.   0.]
 [  0. 125.   0.   0.   0.   0.   0.   1.   1.   0.]
 [  0.   1. 105.   3.   1.   0.   4.   3.   0.   0.]
 [  0.   0.   1.  97.   0.   4.   0.   2.   2.   0.]
 [  0.   0.   0.   0.  99.   1.   3.   0.   1.   3.]
 [  4.   0.   0.   5.   0.  78.   1.   0.   4.   2.]
 [  2.   0.   1.   0.   2.   0.  75.   0.   1.   0.]
 [  0.   0.   5.   1.   0.   0.   0.  89.   0.   3.]
 [  0.   0.   2.   1.   1.   3.   0.   0.  77.   2.]
 [  1.   0.   1.   0.   7.   1.   0.   4.   3.  84.]]
The accuracy is  90.7


In [18]:
# TODO: run the following code to save the best parameters and 
# the weights of the network that achieves the desired accuracy
best_parameters = {
    'sizes': best_sizes,
    'beta': best_beta,
    'momentum': best_momentum,
    'lr': best_lr,
    'niterations': best_niterations,
    'weights_1': best_classifier.weights1,
    'weights_2': best_classifier.weights2,
    'weights_3': best_classifier.weights3,
}

with open('best_classifier.pkl', 'wb') as handle:
    pickle.dump(best_parameters, handle, protocol=pickle.HIGHEST_PROTOCOL)