In [1]:
import mxnet as mx

  from ._conv import register_converters as _register_converters


In [2]:
mnist = mx.test_utils.get_mnist()

In [4]:
batch_size = 100
train_iter = mx.io.NDArrayIter(mnist['train_data'],mnist['train_label'],batch_size,shuffle=True)
val_iter = mx.io.NDArrayIter(mnist['test_data'],mnist['test_label'],batch_size)

In [6]:
# Approach 1 : Multilayer Perceptron

In [7]:
data = mx.sym.var('data')

In [8]:
# Flatten the data from 4-D shape into 2-D (batch_size, num_channel*width*height)
data = mx.sym.flatten(data=data)

In [9]:
# The first fully-connected layer and the corresponding activation function
fc1 = mx.sym.FullyConnected(data=data,num_hidden=128)
act1 = mx.sym.Activation(data=fc1,act_type="relu")

In [10]:
#The second fully-connected Layer and the corresponding activation function
fc2 = mx.sym.FullyConnected(data=act1,num_hidden=64)
act2 = mx.sym.Activation(data=fc2,act_type="relu")

In [11]:
# MNIST has 10 classes
fc3 = mx.sym.FullyConnected(data=act2,num_hidden=10)
# Softmax with cross entropy Loss
mlp = mx.sym.SoftmaxOutput(data=fc3,name='softmax')

In [12]:
# hyper-parameters   mini_batch_size = 100    learning_rate = 0.1  epoch = 10

In [15]:
import logging
logging.getLogger().setLevel(logging.DEBUG) #Logging to stdout
# create a trainable module on CPU
mlp_model = mx.mod.Module(symbol=mlp,context=mx.cpu())
mlp_model.fit(train_iter,#train data
             eval_data=val_iter, # validation data
             optimizer='sgd',  #use Stochastic Gradient Descent to train
             optimizer_params={'learning_rate':0.1}, #use fixed learning rate
             eval_metric='acc',   # report accuracy during training
             batch_end_callback = mx.callback.Speedometer(batch_size,100), #output progress for each 100 data batches
             num_epoch=10)  # train for at most 10 dataset passes

INFO:root:Epoch[0] Batch [100]	Speed: 42129.17 samples/sec	accuracy=0.109307
INFO:root:Epoch[0] Batch [200]	Speed: 43909.39 samples/sec	accuracy=0.114900
INFO:root:Epoch[0] Batch [300]	Speed: 44366.24 samples/sec	accuracy=0.110800
INFO:root:Epoch[0] Batch [400]	Speed: 44366.61 samples/sec	accuracy=0.114400
INFO:root:Epoch[0] Batch [500]	Speed: 31728.78 samples/sec	accuracy=0.118300
INFO:root:Epoch[0] Train-accuracy=0.228081
INFO:root:Epoch[0] Time cost=1.488
INFO:root:Epoch[0] Validation-accuracy=0.383200
INFO:root:Epoch[1] Batch [100]	Speed: 30611.69 samples/sec	accuracy=0.475842
INFO:root:Epoch[1] Batch [200]	Speed: 37980.15 samples/sec	accuracy=0.672400
INFO:root:Epoch[1] Batch [300]	Speed: 51684.35 samples/sec	accuracy=0.777200
INFO:root:Epoch[1] Batch [400]	Speed: 52215.30 samples/sec	accuracy=0.803800
INFO:root:Epoch[1] Batch [500]	Speed: 51275.49 samples/sec	accuracy=0.823000
INFO:root:Epoch[1] Train-accuracy=0.837677
INFO:root:Epoch[1] Time cost=1.471
INFO:root:Epoch[1] Validat

In [19]:
# Prediction
test_iter = mx.io.NDArrayIter(mnist['test_data'],None,batch_size)
prob = mlp_model.predict(test_iter)
assert prob.shape == (10000,10)

In [20]:
test_iter = mx.io.NDArrayIter(mnist['test_data'],mnist['test_label'],batch_size)
# predict accuracy of mlp
acc = mx.metric.Accuracy()
mlp_model.score(test_iter,acc)
print(acc)
assert acc.get()[1]>0.96

EvalMetric: {'accuracy': 0.9692}


In [21]:
# Approach 2: Convolutional Neural Network

In [23]:
data = mx.sym.var('data')
# first conv layer
conv1 = mx.sym.Convolution(data=data, kernel=(5,5), num_filter=20)
tanh1 = mx.sym.Activation(data=conv1, act_type="tanh")
pool1 = mx.sym.Pooling(data=tanh1, pool_type="max", kernel=(2,2), stride=(2,2))
# second conv layer
conv2 = mx.sym.Convolution(data=pool1, kernel=(5,5), num_filter=50)
tanh2 = mx.sym.Activation(data=conv2, act_type="tanh")
pool2 = mx.sym.Pooling(data=tanh2, pool_type="max", kernel=(2,2), stride=(2,2))
# first fullc layer
flatten = mx.sym.flatten(data=pool2)
fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=500)
tanh3 = mx.sym.Activation(data=fc1, act_type="tanh")
# second fullc
fc2 = mx.sym.FullyConnected(data=tanh3, num_hidden=10)
# softmax loss
lenet = mx.sym.SoftmaxOutput(data=fc2, name='softmax')

In [25]:
# create a trainable module on GPU 0
lenet_model = mx.mod.Module(symbol=lenet,context=mx.cpu())
# train with the same
lenet_model.fit(train_iter,
               eval_data=val_iter,
               optimizer='sgd',
               optimizer_params={'learning_rate':0.1},
               eval_metric='acc',
               batch_end_callback = mx.callback.Speedometer(batch_size,100),
               num_epoch=10)

INFO:root:Epoch[0] Batch [100]	Speed: 1002.50 samples/sec	accuracy=0.109802
INFO:root:Epoch[0] Batch [200]	Speed: 990.84 samples/sec	accuracy=0.114900
INFO:root:Epoch[0] Batch [300]	Speed: 995.11 samples/sec	accuracy=0.110800
INFO:root:Epoch[0] Batch [400]	Speed: 1008.46 samples/sec	accuracy=0.112200
INFO:root:Epoch[0] Batch [500]	Speed: 834.36 samples/sec	accuracy=0.115800
INFO:root:Epoch[0] Train-accuracy=0.109596
INFO:root:Epoch[0] Time cost=63.798
INFO:root:Epoch[0] Validation-accuracy=0.113500
INFO:root:Epoch[1] Batch [100]	Speed: 858.50 samples/sec	accuracy=0.116139
INFO:root:Epoch[1] Batch [200]	Speed: 981.73 samples/sec	accuracy=0.506300
INFO:root:Epoch[1] Batch [300]	Speed: 813.07 samples/sec	accuracy=0.850800
INFO:root:Epoch[1] Batch [400]	Speed: 836.76 samples/sec	accuracy=0.893200
INFO:root:Epoch[1] Batch [500]	Speed: 888.06 samples/sec	accuracy=0.921000
INFO:root:Epoch[1] Train-accuracy=0.937677
INFO:root:Epoch[1] Time cost=67.488
INFO:root:Epoch[1] Validation-accuracy=0.9

In [27]:
# Prediction
test_iter = mx.io.NDArrayIter(mnist['test_data'],None,batch_size)
prob = lenet_model.predict(test_iter)
test_iter = mx.io.NDArrayIter(mnist['test_data'],mnist['test_label'],batch_size)
# predict accuracy for Lenet
acc = mx.metric.Accuracy()
lenet_model.score(test_iter,acc)
print(acc)
assert acc.get()[1] > 0,98

EvalMetric: {'accuracy': 0.988}
