In [1]:
import numpy as np
import nn

# CNNs on MNIST
In the third part of the exercise we will now apply CNNs to MNIST.

First, take a look at the neural network code I placed into the nn package in this repository. It should look familiar as it is mainly the code you used in the last exercise. One thing that I added is a prototyped implementation of convolution and pooling. You will find these in nn/conv/layers.py.

After you have completed exercises 2 a) and 2 b) you should go into that file, and implement the missing pieces, which will essentially be the conv and pool functions you have already written as well as their backward pass (which might be a bit more tricky). 

Once you implemented those, come back here and make sure the following example works.

First, let us do gradient checking using your conv and pooling layers.

In [2]:
input_shape = (5, 1, 28, 28)
n_labels = 6
layers = [nn.InputLayer(input_shape)]

layers.append(nn.Conv(
                layers[-1],
                n_feats=2,
                filter_shape=(3,3),
                init_stddev=0.01,
                activation_fun=nn.Activation('relu'),
))
layers.append(nn.Pool(layers[-1]))
layers.append(nn.Flatten(layers[-1]))
layers.append(nn.FullyConnectedLayer(
                layers[-1],
                num_units=6,
                init_stddev=0.1,
                activation_fun=None
))
layers.append(nn.SoftmaxOutput(layers[-1]))
net = nn.NeuralNetwork(layers)

In [3]:
# create random data
X = np.random.normal(size=input_shape)
Y = np.zeros((input_shape[0], n_labels))
for i in range(Y.shape[0]):
    idx = np.random.randint(n_labels)
    Y[i, idx] = 1.

In [4]:
# perform gradient checking, this should go through if you implemented everything correctly!
net.check_gradients(X, Y)

checking gradient for layer 1
diff 1.01e-08
diff 2.13e-08
checking gradient for layer 4
diff 1.29e-08
diff 9.08e-09


# Train on mnist
Finally, figure out a reasonable network architecture and train it on MNIST.

In [5]:
# you can load the mnist data as 
data = nn.data.mnist()

... loading data
... done loading data


In [6]:
# define a larger conv net

In [7]:
input_shape = (100, 1, 28, 28)
n_labels = 10
layers = [nn.InputLayer(input_shape)]

layers.append(nn.Conv(
                layers[-1],
                n_feats=32,
                filter_shape=(5,5),
                init_stddev=0.1,
                activation_fun=nn.Activation('relu'),
))
layers.append(nn.Pool(layers[-1]))
layers.append(nn.Conv(
                layers[-1],
                n_feats=32,
                filter_shape=(5,5),
                init_stddev=0.1,
                activation_fun=nn.Activation('relu'),
))
layers.append(nn.Pool(layers[-1]))
layers.append(nn.Flatten(layers[-1]))
layers.append(nn.FullyConnectedLayer(
                layers[-1],
                num_units=128,
                init_stddev=0.1,
                activation_fun=nn.Activation('relu')
))
layers.append(nn.FullyConnectedLayer(
                layers[-1],
                num_units=10,
                init_stddev=0.1,
                activation_fun=None
))
layers.append(nn.SoftmaxOutput(layers[-1]))
net = nn.NeuralNetwork(layers)

In [8]:
# load
Dtrain, Dval, Dtest = nn.mnist()
X_train, y_train = Dtrain
# Downsample training data to make it a bit faster for testing this code
n_train_samples = 10000
train_idxs = np.random.permutation(X_train.shape[0])[:n_train_samples]
X_train = np.asarray(X_train[train_idxs], dtype=np.float)
y_train = y_train[train_idxs]
# extract validation data
Xvalid, Yvalid = Dval
Xvalid = np.asarray(Xvalid, dtype=np.float)

... loading data
... done loading data


In [9]:
print("X_train shape: {}".format(np.shape(X_train)))
print("y_train shape: {}".format(np.shape(y_train)))
print(X_train.dtype)

X_train shape: (10000, 1, 28, 28)
y_train shape: (10000,)
float64


In [10]:
# Train neural network
import time
t0 = time.time()
net.train(X_train, y_train, Xvalid=Xvalid, Yvalid=Yvalid, learning_rate=0.1, 
        max_epochs=20, batch_size=100, y_one_hot=True, log_every=5)
t1 = time.time()
print('Duration: {:.1f}s'.format(t1-t0))

... starting training
epoch    0, loss 0.6551, train error 0.0603
			 valid error 0.0637
epoch    1, loss 0.1738
epoch    2, loss 0.1164
epoch    3, loss 0.0876
epoch    4, loss 0.0680
epoch    5, loss 0.0536, train error 0.0140
			 valid error 0.0276
epoch    6, loss 0.0421
epoch    7, loss 0.0327
epoch    8, loss 0.0250
epoch    9, loss 0.0193
epoch   10, loss 0.0148, train error 0.0023
			 valid error 0.0226
epoch   11, loss 0.0112
epoch   12, loss 0.0085
epoch   13, loss 0.0067
epoch   14, loss 0.0053
epoch   15, loss 0.0043, train error 0.0002
			 valid error 0.0215
epoch   16, loss 0.0036
epoch   17, loss 0.0030
epoch   18, loss 0.0026
epoch   19, loss 0.0023
epoch   20, loss 0.0020, train error 0.0000
			 valid error 0.0213
Duration: 1362.4s
