# High-level Gluon Example

In [1]:
import os
import sys
import numpy as np
import math
import mxnet as mx
from mxnet import nd, autograd
from mxnet import gluon
from common.params import *
from common.utils import *

In [2]:
# Force one-gpu
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [3]:
print("OS: ", sys.platform)
print("Python: ", sys.version)
print("MXNet: ", mx.__version__)
print("Numpy: ", np.__version__)
print("GPU: ", get_gpu_name())
print(get_cuda_version())
print("CuDNN Version ", get_cudnn_version())

OS:  linux
Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) 
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]
MXNet:  0.12.0
Numpy:  1.14.1
GPU:  ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']
CUDA Version 8.0.61
CuDNN Version  6.0.21


In [4]:
def SymbolModule(n_classes=N_CLASSES):
    sym = gluon.nn.Sequential()
    with sym.name_scope():
        sym.add(gluon.nn.Conv2D(channels=50, kernel_size=3, padding=1, activation='relu'))
        sym.add(gluon.nn.Conv2D(channels=50, kernel_size=3, padding=1))
        sym.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))
        sym.add(gluon.nn.Activation('relu'))
        # Equiv to gluon.nn.LeakyReLU(0)
        sym.add(gluon.nn.Dropout(0.25))
        sym.add(gluon.nn.Conv2D(channels=100, kernel_size=3, padding=1, activation='relu'))
        sym.add(gluon.nn.Conv2D(channels=100, kernel_size=3, padding=1))
        sym.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))
        sym.add(gluon.nn.Activation('relu'))
        sym.add(gluon.nn.Dropout(0.25))
        sym.add(gluon.nn.Flatten())
        sym.add(gluon.nn.Dense(512, activation='relu'))
        sym.add(gluon.nn.Dropout(0.25))
        sym.add(gluon.nn.Dense(n_classes))
    return sym

In [5]:
def init_model(m, lr=LR, momentum=MOMENTUM):
    trainer = gluon.Trainer(
        m.collect_params(), 
        'sgd',
        {'learning_rate': lr, 'momentum':momentum})
    criterion = gluon.loss.SoftmaxCrossEntropyLoss()
    return trainer, criterion

In [6]:
%%time
# Data into format for library
x_train, x_test, y_train, y_test = cifar_for_library(channel_first=True)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)

Preparing train set...
Preparing test set...
(50000, 3, 32, 32) (10000, 3, 32, 32) (50000,) (10000,)
float32 float32 int32 int32
CPU times: user 630 ms, sys: 588 ms, total: 1.22 s
Wall time: 1.22 s


In [7]:
%%time
ctx = mx.gpu()
sym = SymbolModule()
sym.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)

CPU times: user 321 ms, sys: 392 ms, total: 713 ms
Wall time: 876 ms


In [8]:
%%time
trainer, criterion = init_model(sym)

CPU times: user 203 µs, sys: 128 µs, total: 331 µs
Wall time: 337 µs


In [9]:
%%time
# Main training loop: 62s
for j in range(EPOCHS):
    train_loss = 0.0
    for data, target in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):
        # Get samples
        data = nd.array(data).as_in_context(ctx)
        target = nd.array(target).as_in_context(ctx)
        with autograd.record():
            # Forwards
            output = sym(data)
            # Loss
            loss = criterion(output, target)
        # Back-prop
        loss.backward()
        trainer.step(data.shape[0])
        train_loss += nd.sum(loss).asscalar()
    # Log
    print('Epoch %3d: loss: %5.4f'%(j, train_loss/len(x_train)))

Epoch   0: loss: 1.8405
Epoch   1: loss: 1.3773
Epoch   2: loss: 1.1577
Epoch   3: loss: 0.9811
Epoch   4: loss: 0.8450
Epoch   5: loss: 0.7354
Epoch   6: loss: 0.6391
Epoch   7: loss: 0.5559
Epoch   8: loss: 0.4810
Epoch   9: loss: 0.4157
CPU times: user 1min 18s, sys: 15.3 s, total: 1min 34s
Wall time: 1min 2s


In [10]:
%%time
# Main evaluation loop: 453ms
n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE
y_guess = np.zeros(n_samples, dtype=np.int)
y_truth = y_test[:n_samples]
c = 0
for data, target in yield_mb(x_test, y_test, BATCHSIZE):
    # Get samples
    data = nd.array(data).as_in_context(ctx)
    # Forwards
    output = sym(data)
    pred = nd.argmax(output, axis=1)
    # Collect results
    y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = pred.asnumpy()
    c += 1

CPU times: user 627 ms, sys: 73.1 ms, total: 700 ms
Wall time: 453 ms


In [11]:
print("Accuracy: ", 1.*sum(y_guess == y_truth)/len(y_guess))

Accuracy:  0.7661258012820513
