In [1]:
# You should change `pickle` to `cPickle` if your python complains
import numpy as np
import pickle as cPickle
import os
import gzip

# Prepare MNIST data
- We first define a function for downloading and loading MNIST.
- Then, we split up the dataset into a train, validation, and test set.

In [2]:
def mnist(datasets_dir='./data'):
    if not os.path.exists(datasets_dir):
        os.mkdir(datasets_dir)
    data_file = os.path.join(datasets_dir, 'mnist.pkl.gz')
    if not os.path.exists(data_file):
        print('... downloading MNIST from the web')
        try:
            import urllib
            urllib.urlretrieve('http://google.com')
        except AttributeError:
            import urllib.request as urllib
        url = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
        urllib.urlretrieve(url, data_file)

    print('... loading data')
    # Load the dataset
    f = gzip.open(data_file, 'rb')
    try:
        train_set, valid_set, test_set = cPickle.load(f, encoding="latin1")
    except TypeError:
        train_set, valid_set, test_set = cPickle.load(f)
    f.close()

    test_x, test_y = test_set
    test_x = test_x.astype('float32')
    test_x = test_x.astype('float32').reshape(test_x.shape[0], 1, 28, 28)
    test_y = test_y.astype('int32')
    valid_x, valid_y = valid_set
    valid_x = valid_x.astype('float32')
    valid_x = valid_x.astype('float32').reshape(valid_x.shape[0], 1, 28, 28)
    valid_y = valid_y.astype('int32')
    train_x, train_y = train_set
    train_x = train_x.astype('float32').reshape(train_x.shape[0], 1, 28, 28)
    train_y = train_y.astype('int32')
    rval = [(train_x, train_y), (valid_x, valid_y), (test_x, test_y)]
    print('... done loading data')
    return rval

Now download the data and reshape it.

In [3]:
# load
Dtrain, Dval, Dtest = mnist()
X_train, y_train = Dtrain
X_valid, y_valid = Dval
X_test, y_test = Dtest

... loading data
... done loading data


`Dtrain` contains 50k images which are of size 28 x 28 pixels. Hence:

In [4]:
print("X_train shape: {}".format(np.shape(X_train)))
print("y_train shape: {}".format(np.shape(y_train)))

X_train shape: (50000, 1, 28, 28)
y_train shape: (50000,)


`y_train` will automatically be converted in the `train()` function to one_hot encoding.
But we need to reshape `X_train` and `X_valid`, as our `Network` expects flat vectors of size 28*28 as input!

In [5]:
X_train = X_train.reshape(X_train.shape[0], -1)
X_valid = X_valid.reshape(X_valid.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)
print("Reshaped X_train size: {}".format(X_train.shape))
print("Reshaped X_valid size: {}".format(X_valid.shape))
print("Reshaped X_test size: {}".format(X_test.shape))

Reshaped X_train size: (50000, 784)
Reshaped X_valid size: (10000, 784)
Reshaped X_test size: (10000, 784)


# Define a Network
We want to design a network that gets less than 3% validation error.

In [6]:
from nnlib.layers import *
from nnlib.network import *

# Setup a small MLP / Neural Network
# we can set the first shape to None here to indicate that
# we will input a variable number inputs to the network
input_shape = (None, 28*28)
layers = [InputLayer(input_shape)]
layers.append(FullyConnectedLayer(
                layers[-1],
                num_units=100,
                init_stddev=0.1,
                activation_fun=Activation('tanh')
))

layers.append(FullyConnectedLayer(
                layers[-1],
                num_units=100,
                init_stddev=0.1,
                activation_fun=Activation('relu')
))
layers.append(FullyConnectedLayer(
                layers[-1],
                num_units=10,
                init_stddev=0.1,
                # last layer has no nonlinearity 
                # (softmax will be applied in the output layer)
                activation_fun=None 
))
layers.append(SoftmaxOutput(layers[-1]))
nn = NeuralNetwork(layers)

Now train the network.

In [7]:
import time

t0 = time.time()
nn.train(X_train, y_train, X_valid, y_valid,
         learning_rate=0.1, 
         momentum=0.8,
         max_epochs=15,
         batch_size=100,
         y_one_hot=True)
t1 = time.time()

print('Duration: {:.1f}s'.format(t1-t0))

... starting training
epoch 0.0000, loss 0.1737, train error 0.0536, validation error 0.0503
epoch 1.0000, loss 0.1011, train error 0.0320, validation error 0.0355
epoch 2.0000, loss 0.0727, train error 0.0230, validation error 0.0313
epoch 3.0000, loss 0.0551, train error 0.0175, validation error 0.0293
epoch 4.0000, loss 0.0494, train error 0.0157, validation error 0.0301
epoch 5.0000, loss 0.0436, train error 0.0142, validation error 0.0286
epoch 6.0000, loss 0.0380, train error 0.0132, validation error 0.0294
epoch 7.0000, loss 0.0282, train error 0.0097, validation error 0.0269
epoch 8.0000, loss 0.0190, train error 0.0055, validation error 0.0247
epoch 9.0000, loss 0.0140, train error 0.0039, validation error 0.0234
epoch 10.0000, loss 0.0113, train error 0.0032, validation error 0.0239
epoch 11.0000, loss 0.0086, train error 0.0020, validation error 0.0235
epoch 12.0000, loss 0.0067, train error 0.0015, validation error 0.0228
epoch 13.0000, loss 0.0049, train error 0.0010, vali

# Evaluate model
Now check the accuracy of the model on the test set. Error should be less than 3%.

In [8]:
test_error = nn.classification_error(X_test, y_test)
print('Test error: {:.2f}%'.format(test_error * 100))

Test error: 2.35%
