# <center>MNIST Handwritten Digit Recognizer</center>

In [1]:
# Initial Imports
from NeuralNetwork import NeuralNetwork
import numpy as np
import gzip
import pickle
from pathlib import Path

Define function to load data:

In [2]:
def load_mnist():
    with gzip.open('mnist.pkl.gz', 'rb') as f:
        training_data, validation_data, test_data = pickle.load(f, encoding='latin1')
    return training_data, validation_data, test_data

Let's see what the data looks like

In [3]:
training_data, valid_data, test_data = load_mnist()
print('num training data points:', len(training_data[0]))
print('num test data points:    ', len(test_data[0]))
print('data shape:              ', training_data[0][0].shape)
print()
print('training data\n',training_data)
print()
print('test data\n',test_data)

num training data points: 50000
num test data points:     10000
data shape:               (784,)

training data
 (array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32), array([5, 0, 4, ..., 8, 4, 8]))

test data
 (array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32), array([7, 2, 1, ..., 4, 5, 6]))


Put the data into a form usable by the learner

In [4]:
def label_to_col_vector(y):
    v = np.zeros((10, 1))
    v[y] = 1.
    return v

In [5]:
tr_data = [(np.reshape(x, (784, 1)), label_to_col_vector(y)) \
           for x, y in zip(training_data[0], training_data[1])]

te_data = [(np.reshape(x, (784, 1)), y) for x, y in zip(test_data[0], test_data[1])]

Train neural network (or unpickle it if we have one saved) and evaluate it on test data after each epoch: 
  
  epochs = 30  
  batch-size = 10  
  eta = 3.0  

In [6]:
if not Path('nn.pkl').exists():
    nn = NeuralNetwork([784, 30, 10])
    print('training...')
    nn.train(tr_data, 30, 10, 3.0, test_data=te_data)
    with open('nn.pkl', 'wb') as f:
        pickle.dump(nn, f)
else:
    print('unpickling nn...')
    with open('nn.pkl', 'rb') as f:
        nn = pickle.load(f)

unpickling nn...


94% accuracy!  
Try tweaking the hyper parameters to see if we can get a better result.

In [8]:
nn2 = NeuralNetwork([784, 30, 10])
nn2.train(tr_data, 50, 10, 3.0, test_data=te_data, with_cost=True)

Epoch 1: 9085 / 10000
   cost: 0.8042
Epoch 2: 9272 / 10000
   cost: 0.6059
Epoch 3: 9295 / 10000
   cost: 0.62595
Epoch 4: 9393 / 10000
   cost: 0.57875
Epoch 5: 9349 / 10000
   cost: 0.58885
Epoch 6: 9408 / 10000
   cost: 0.54795
Epoch 7: 9435 / 10000
   cost: 0.50705
Epoch 8: 9436 / 10000
   cost: 0.5167
Epoch 9: 9459 / 10000
   cost: 0.4924
Epoch 10: 9462 / 10000
   cost: 0.51335
Epoch 11: 9450 / 10000
   cost: 0.52455
Epoch 12: 9483 / 10000
   cost: 0.4747
Epoch 13: 9466 / 10000
   cost: 0.492
Epoch 14: 9510 / 10000
   cost: 0.4529
Epoch 15: 9507 / 10000
   cost: 0.44825
Epoch 16: 9440 / 10000
   cost: 0.5033
Epoch 17: 9468 / 10000
   cost: 0.50845
Epoch 18: 9487 / 10000
   cost: 0.44165
Epoch 19: 9493 / 10000
   cost: 0.4666
Epoch 20: 9509 / 10000
   cost: 0.41415
Epoch 21: 9489 / 10000
   cost: 0.4593
Epoch 22: 9500 / 10000
   cost: 0.4377
Epoch 23: 9484 / 10000
   cost: 0.479
Epoch 24: 9496 / 10000
   cost: 0.47395
Epoch 25: 9495 / 10000
   cost: 0.4359
Epoch 26: 9504 / 10000
 

TypeError: unsupported operand type(s) for /: 'NoneType' and 'int'

Increasing training epochs only yielded a small increase in predicting on test data.  

Try increasing copmlexity of model by adding another hidden layer as well as more neurons

In [None]:
if not Path('nn_complex.pkl').exists():
    nn = NeuralNetwork([784, 128, 64, 10])
    print('training...')
    nn.train(tr_data, 30, 10, 3.0, test_data=te_data)
    with open('nn_complex.pkl', 'wb') as f:
        pickle.dump(nn, f)
else:
    print('unpickling nn...')
    with open('nn_complex.pkl', 'rb') as f:
        nn = pickle.load(f)