# MNIST with CNN using MXNet

Code mostly from http://mxnet.io/tutorials/python/mnist.html

**NOTES**  
Whereas Keras and Tensorflow use has y as one-hot incoded, MXNet takes the unique integers e.g. [5, 0, 1 ...]

## Setup

In [6]:
#from tensorflow.examples.tutorials.mnist import input_data
#mnist = input_data.read_data_sets('./.data/', one_hot=True)

from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [7]:
import time
import mxnet as mx # for some reason, loading mxnet before the dataset from tensorflow crashes the kernel
import logging
logging.getLogger().setLevel(logging.DEBUG)

In [8]:
#X_train = mnist.train.images
#X_test = mnist.test.images
X_train = X_train.reshape(X_train.shape[0], 1, 28, 28).astype('float32')/255
X_test = X_test.reshape(X_test.shape[0], 1, 28, 28).astype('float32')/255

#y_train = mnist.train.labels
#y_test = mnist.test.labels

## Define network

In [13]:
data = mx.symbol.Variable('data')
# first conv layer
conv1 = mx.sym.Convolution(data=data, kernel=(5,5), num_filter=32)
relu1 = mx.sym.Activation(data=conv1, act_type="relu")
pool1 = mx.sym.Pooling(data=relu1, pool_type="max", kernel=(2,2), stride=(2,2))
# second conv layer
conv2 = mx.sym.Convolution(data=pool1, kernel=(5,5), num_filter=64)
relu2 = mx.sym.Activation(data=conv2, act_type="relu")
pool2 = mx.sym.Pooling(data=relu2, pool_type="max", kernel=(2,2), stride=(2,2))
# first fullc layer
flatten = mx.sym.Flatten(data=pool2)
fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=1024)
relu3 = mx.sym.Activation(data=fc1, act_type="relu")
# dropout
drop = mx.sym.Dropout(data=relu3, p=0.5)
# second fullc
fc2 = mx.sym.FullyConnected(data=drop, num_hidden=10)
# softmax loss
lenet = mx.sym.SoftmaxOutput(data=fc2, name='softmax')

## Train

In [14]:
mod = mx.mod.Module(lenet, context=mx.gpu(0))

t0 = time.time()
mod.fit(mx.io.NDArrayIter(X_train, label=y_train, batch_size=50, shuffle=True),
        num_epoch=10,
        optimizer='adadelta',
        batch_end_callback = mx.callback.Speedometer(50, 200)
       )

print "training time = {}".format(time.time() - t0)

INFO:root:Epoch[0] Batch [200]	Speed: 2030.64 samples/sec	Train-accuracy=0.412139
INFO:root:Epoch[0] Batch [400]	Speed: 2094.86 samples/sec	Train-accuracy=0.942300
INFO:root:Epoch[0] Batch [600]	Speed: 2041.11 samples/sec	Train-accuracy=0.959500
INFO:root:Epoch[0] Batch [800]	Speed: 2057.81 samples/sec	Train-accuracy=0.970100
INFO:root:Epoch[0] Batch [1000]	Speed: 2045.58 samples/sec	Train-accuracy=0.974100
INFO:root:Epoch[0] Train-accuracy=0.975578
INFO:root:Epoch[0] Time cost=29.191
INFO:root:Epoch[1] Batch [200]	Speed: 2063.14 samples/sec	Train-accuracy=0.978905
INFO:root:Epoch[1] Batch [400]	Speed: 2035.12 samples/sec	Train-accuracy=0.982800
INFO:root:Epoch[1] Batch [600]	Speed: 2043.50 samples/sec	Train-accuracy=0.982200
INFO:root:Epoch[1] Batch [800]	Speed: 2052.04 samples/sec	Train-accuracy=0.984800
INFO:root:Epoch[1] Batch [1000]	Speed: 2062.39 samples/sec	Train-accuracy=0.982500
INFO:root:Epoch[1] Train-accuracy=0.984925
INFO:root:Epoch[1] Time cost=29.223
INFO:root:Epoch[2] B

training time = 293.20192194


## Evaluate

In [15]:
metric = mx.metric.Accuracy()
mod.score(mx.io.NDArrayIter(X_test, label=y_test, batch_size=100), metric)

[('accuracy', 0.9904)]