# High-level MXNet Example

**In the interest of comparison; a common (custom) data-generator (called yield_mb(X, y, batchsize=64, shuffle=False)) was originally used for all other frameworks - but not for MXNet. I have reproduced the MXNet example using this same generator (wrapping the results in the mx.io.DataBatch class) to test if MXNet is faster than other frameworks just because I was using its own data-generator. This does not appear to be the case. **

In [1]:
import os
import sys
import numpy as np
import mxnet as mx
from common.params import *
from common.utils import *

In [2]:
print("OS: ", sys.platform)
print("Python: ", sys.version)
print("Numpy: ", np.__version__)
print("MXNet: ", mx.__version__)
print("GPU: ", get_gpu_name())

OS:  linux
Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) 
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]
Numpy:  1.13.3
MXNet:  0.12.0
GPU:  []


In [3]:
def create_symbol():
    data = mx.symbol.Variable('data')
    # size = [(old-size - kernel + 2*padding)/stride]+1
    # if kernel = 3, pad with 1 either side
    conv1 = mx.symbol.Convolution(data=data, num_filter=50, pad=(1,1), kernel=(3,3))
    relu1 = mx.symbol.Activation(data=conv1, act_type="relu")
    conv2 = mx.symbol.Convolution(data=relu1, num_filter=50, pad=(1,1), kernel=(3,3))
    pool1 = mx.symbol.Pooling(data=conv2, pool_type="max", kernel=(2,2), stride=(2,2))
    relu2 = mx.symbol.Activation(data=pool1, act_type="relu")
    drop1 = mx.symbol.Dropout(data=relu2, p=0.25)
    
    conv3 = mx.symbol.Convolution(data=drop1, num_filter=100, pad=(1,1), kernel=(3,3))
    relu3 = mx.symbol.Activation(data=conv3, act_type="relu")
    conv4 = mx.symbol.Convolution(data=relu3, num_filter=100, pad=(1,1), kernel=(3,3))
    pool2 = mx.symbol.Pooling(data=conv4, pool_type="max", kernel=(2,2), stride=(2,2))
    relu4 = mx.symbol.Activation(data=pool2, act_type="relu")
    drop2 = mx.symbol.Dropout(data=relu4, p=0.25)
           
    flat1 = mx.symbol.Flatten(data=drop2)
    fc1 = mx.symbol.FullyConnected(data=flat1, num_hidden=512)
    relu7 = mx.symbol.Activation(data=fc1, act_type="relu")
    drop4 = mx.symbol.Dropout(data=relu7, p=0.5)
    fc2 = mx.symbol.FullyConnected(data=drop4, num_hidden=N_CLASSES) 
    
    input_y = mx.symbol.Variable('softmax_label')  
    m = mx.symbol.SoftmaxOutput(data=fc2, label=input_y, name="softmax")
    return m

In [4]:
def init_model(m):
    if GPU:
        ctx = [mx.gpu(0)]
    else:
        ctx = mx.cpu()
    
    mod = mx.mod.Module(context=ctx, symbol=m)
    mod.bind(data_shapes=[('data', (BATCHSIZE, 3, 32, 32))],
             label_shapes=[('softmax_label', (BATCHSIZE,))])

    # Glorot-uniform initializer
    mod.init_params(initializer=mx.init.Xavier(rnd_type='uniform'))
    mod.init_optimizer(optimizer='sgd', 
                       optimizer_params=(('learning_rate', LR), ('momentum', MOMENTUM), ))
    return mod

In [5]:
%%time
# Data into format for library
x_train, x_test, y_train, y_test = cifar_for_library(channel_first=True)

# Load data-iterator
#train_iter = mx.io.NDArrayIter(x_train, y_train, BATCHSIZE, shuffle=True)
# Use custom iterator instead of mx.io.NDArrayIter() for consistency
# Wrap as DataBatch class
wrapper_db = lambda args: mx.io.DataBatch(data=[mx.nd.array(args[0])], label=[mx.nd.array(args[1])])

print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)

Preparing train set...
Preparing test set...
(50000, 3, 32, 32) (10000, 3, 32, 32) (50000,) (10000,)
float32 float32 int32 int32
CPU times: user 820 ms, sys: 575 ms, total: 1.39 s
Wall time: 1.39 s


In [None]:
y_test

In [8]:
x_train

array([[[[ 0.23137255,  0.16862746,  0.19607843, ...,  0.61960787,
           0.59607846,  0.58039218],
         [ 0.0627451 ,  0.        ,  0.07058824, ...,  0.48235294,
           0.46666667,  0.47843137],
         [ 0.09803922,  0.0627451 ,  0.19215687, ...,  0.4627451 ,
           0.47058824,  0.42745098],
         ..., 
         [ 0.81568629,  0.78823531,  0.7764706 , ...,  0.627451  ,
           0.21960784,  0.20784314],
         [ 0.70588237,  0.67843139,  0.72941178, ...,  0.72156864,
           0.38039216,  0.32549021],
         [ 0.69411767,  0.65882355,  0.7019608 , ...,  0.84705883,
           0.59215689,  0.48235294]],

        [[ 0.24313726,  0.18039216,  0.1882353 , ...,  0.51764709,
           0.49019608,  0.48627451],
         [ 0.07843138,  0.        ,  0.03137255, ...,  0.34509805,
           0.32549021,  0.34117648],
         [ 0.09411765,  0.02745098,  0.10588235, ...,  0.32941177,
           0.32941177,  0.28627452],
         ..., 
         [ 0.66666669,  0.600000

In [6]:
%%time
# Load symbol
sym = create_symbol()

CPU times: user 1.47 ms, sys: 687 µs, total: 2.16 ms
Wall time: 1.73 ms


In [7]:
%%time
# Initialise model
model = init_model(sym)

CPU times: user 739 ms, sys: 778 ms, total: 1.52 s
Wall time: 1.96 s


In [8]:
%%time
# 145s
# Train and log accuracy
metric = mx.metric.create('acc')
for j in range(EPOCHS):
    #train_iter.reset()
    metric.reset()
    #for batch in train_iter:
    for batch in map(wrapper_db, yield_mb(x_train, y_train, BATCHSIZE, shuffle=True)):
        model.forward(batch, is_train=True) 
        model.update_metric(metric, batch.label)
        model.backward()              
        model.update()
    print('Epoch %d, Training %s' % (j, metric.get()))

Epoch 0, Training ('accuracy', 0.34126920614596673)
Epoch 1, Training ('accuracy', 0.49851952624839951)
Epoch 2, Training ('accuracy', 0.57626440460947503)
Epoch 3, Training ('accuracy', 0.63538332266325226)
Epoch 4, Training ('accuracy', 0.6732754481434059)
Epoch 5, Training ('accuracy', 0.71028729193341866)
Epoch 6, Training ('accuracy', 0.73801616517285529)
Epoch 7, Training ('accuracy', 0.75780249679897571)
Epoch 8, Training ('accuracy', 0.77704865556978231)
Epoch 9, Training ('accuracy', 0.79661491677336749)
CPU times: user 2min 16s, sys: 27.4 s, total: 2min 43s
Wall time: 2min 25s


In [9]:
%%time
y_guess = model.predict(mx.io.NDArrayIter(x_test, batch_size=BATCHSIZE, shuffle=False))
y_guess = np.argmax(y_guess.asnumpy(), axis=-1)

CPU times: user 1.21 s, sys: 269 ms, total: 1.48 s
Wall time: 1.09 s


In [10]:
print("Accuracy: ", 1.*sum(y_guess == y_test)/len(y_guess))

Accuracy:  0.7742
