In [1]:
# The following source code downloads and loads the images and the corresponding labels into memory
import mxnet as mx
mnist = mx.test_utils.get_mnist()

In [2]:
# The following source code initializes the data iterators for the MNIST dataset. 
# Note that we initialize two iterators: one for train data and one for test data
batch_size = 100
train_iter = mx.io.NDArrayIter(mnist['train_data'], mnist['train_label'], batch_size, shuffle=True)
val_iter = mx.io.NDArrayIter(mnist['test_data'], mnist['test_label'], batch_size)


In [3]:
# The first approach makes use of a Multilayer Perceptron to solve this problem
data = mx.sym.var('data')
# Flatten the data from 4-D shape into 2-D (batch_size, num_channel*width*height)
data = mx.sym.flatten(data=data)

In [4]:
# The following code declares two fully connected layers with 128 and 64 neurons each
# The first fully-connected layer and the corresponding activation function
fc1  = mx.sym.FullyConnected(data=data, num_hidden=128)
act1 = mx.sym.Activation(data=fc1, act_type="relu")

# The second fully-connected layer and the corresponding activation function
fc2  = mx.sym.FullyConnected(data=act1, num_hidden = 64)
act2 = mx.sym.Activation(data=fc2, act_type="relu")

In [5]:
# The following source code declares the final fully connected layer of size 10
# MNIST has 10 classes
fc3  = mx.sym.FullyConnected(data=act2, num_hidden=10)
# Softmax with cross entropy loss
mlp  = mx.sym.SoftmaxOutput(data=fc3, name='softmax')

In [6]:
import logging
logging.getLogger().setLevel(logging.DEBUG)  # logging to stdout
# create a trainable module on CPU
mlp_model = mx.mod.Module(symbol=mlp, context=mx.cpu())
mlp_model.fit(train_iter,  # train data
              eval_data=val_iter,  # validation data
              optimizer='sgd',  # use SGD to train
              optimizer_params={'learning_rate':0.1},  # use fixed learning rate
              eval_metric='acc',  # report accuracy during training
              batch_end_callback = mx.callback.Speedometer(batch_size, 100), # output progress for each 100 data batches
              num_epoch=10)  # train for at most 10 dataset passes

INFO:root:Epoch[0] Batch [100]	Speed: 27720.29 samples/sec	accuracy=0.111980
INFO:root:Epoch[0] Batch [200]	Speed: 26618.65 samples/sec	accuracy=0.111600
INFO:root:Epoch[0] Batch [300]	Speed: 26824.49 samples/sec	accuracy=0.113500
INFO:root:Epoch[0] Batch [400]	Speed: 27890.74 samples/sec	accuracy=0.115100
INFO:root:Epoch[0] Batch [500]	Speed: 27234.01 samples/sec	accuracy=0.169900
INFO:root:Epoch[0] Train-accuracy=0.255354
INFO:root:Epoch[0] Time cost=2.274
INFO:root:Epoch[0] Validation-accuracy=0.332700
INFO:root:Epoch[1] Batch [100]	Speed: 27562.17 samples/sec	accuracy=0.501386
INFO:root:Epoch[1] Batch [200]	Speed: 27188.94 samples/sec	accuracy=0.699900
INFO:root:Epoch[1] Batch [300]	Speed: 27874.95 samples/sec	accuracy=0.792200
INFO:root:Epoch[1] Batch [400]	Speed: 27452.63 samples/sec	accuracy=0.823200
INFO:root:Epoch[1] Batch [500]	Speed: 27242.34 samples/sec	accuracy=0.841400
INFO:root:Epoch[1] Train-accuracy=0.868990
INFO:root:Epoch[1] Time cost=2.195
INFO:root:Epoch[1] Validat

In [7]:
# we can evaluate the trained model by running predictions on test data
test_iter = mx.io.NDArrayIter(mnist['test_data'], None, batch_size)
prob = mlp_model.predict(test_iter)
assert prob.shape == (10000, 10)

In [8]:
# Convolutional Neural Network
data = mx.sym.var('data')
# first conv layer
conv1 = mx.sym.Convolution(data=data, kernel=(5,5), num_filter=20)
tanh1 = mx.sym.Activation(data=conv1, act_type="tanh")
pool1 = mx.sym.Pooling(data=tanh1, pool_type="max", kernel=(2,2), stride=(2,2))
# second conv layer
conv2 = mx.sym.Convolution(data=pool1, kernel=(5,5), num_filter=50)
tanh2 = mx.sym.Activation(data=conv2, act_type="tanh")
pool2 = mx.sym.Pooling(data=tanh2, pool_type="max", kernel=(2,2), stride=(2,2))
# first fullc layer
flatten = mx.sym.flatten(data=pool2)
fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=500)
tanh3 = mx.sym.Activation(data=fc1, act_type="tanh")
# second fullc
fc2 = mx.sym.FullyConnected(data=tanh3, num_hidden=10)
# softmax loss
lenet = mx.sym.SoftmaxOutput(data=fc2, name='softmax')

In [9]:
# create a trainable module on GPU 0
lenet_model = mx.mod.Module(symbol=lenet, context=mx.cpu())
# train with the same
lenet_model.fit(train_iter,
                eval_data=val_iter,
                optimizer='sgd',
                optimizer_params={'learning_rate':0.1},
                eval_metric='acc',
                batch_end_callback = mx.callback.Speedometer(batch_size, 100),
                num_epoch=10)

INFO:root:Epoch[0] Batch [100]	Speed: 836.49 samples/sec	accuracy=0.112277
INFO:root:Epoch[0] Batch [200]	Speed: 871.48 samples/sec	accuracy=0.111600
INFO:root:Epoch[0] Batch [300]	Speed: 886.76 samples/sec	accuracy=0.113500
INFO:root:Epoch[0] Batch [400]	Speed: 882.46 samples/sec	accuracy=0.115100
INFO:root:Epoch[0] Batch [500]	Speed: 891.08 samples/sec	accuracy=0.107300
INFO:root:Epoch[0] Train-accuracy=0.111616
INFO:root:Epoch[0] Time cost=68.797
INFO:root:Epoch[0] Validation-accuracy=0.113500
INFO:root:Epoch[1] Batch [100]	Speed: 881.15 samples/sec	accuracy=0.115050
INFO:root:Epoch[1] Batch [200]	Speed: 878.31 samples/sec	accuracy=0.409000
INFO:root:Epoch[1] Batch [300]	Speed: 855.11 samples/sec	accuracy=0.837400
INFO:root:Epoch[1] Batch [400]	Speed: 866.94 samples/sec	accuracy=0.886200
INFO:root:Epoch[1] Batch [500]	Speed: 854.77 samples/sec	accuracy=0.914500
INFO:root:Epoch[1] Train-accuracy=0.931111
INFO:root:Epoch[1] Time cost=69.741
INFO:root:Epoch[1] Validation-accuracy=0.937

In [10]:
#  use the trained LeNet model to generate predictions for the test data
test_iter = mx.io.NDArrayIter(mnist['test_data'], None, batch_size)
prob = lenet_model.predict(test_iter)
test_iter = mx.io.NDArrayIter(mnist['test_data'], mnist['test_label'], batch_size)
# predict accuracy for lenet
acc = mx.metric.Accuracy()
lenet_model.score(test_iter, acc)
print(acc)
assert acc.get()[1] > 0.98

EvalMetric: {'accuracy': 0.98760000000000003}
