In [1]:
import wget
import os

def download_file(url):
    """
    Downloads a file from a url if the file does not exist in the current folder
    :param url: Url to the file
    """
    local_filename = url.split('/')[-1]
    if os.path.isfile(local_filename):
        print("The file %s already exist in the current directory" % local_filename)
    else:
        print('downloading data: %s' % url)
        response = wget.download(url)
        print('saved data')

In [2]:
import sys

url_train = 'https://hoaphumanoidstorage2.blob.core.windows.net/public/mnist_train.csv'
url_test = 'https://hoaphumanoidstorage2.blob.core.windows.net/public/mnist_test.csv'

print("Downloading file %s" % url_train)
download_file(url_train)

print("Downloading file %s" % url_test)
download_file(url_test)

Downloading file https://hoaphumanoidstorage2.blob.core.windows.net/public/mnist_train.csv
The file mnist_train.csv already exist in the current directory
Downloading file https://hoaphumanoidstorage2.blob.core.windows.net/public/mnist_test.csv
The file mnist_test.csv already exist in the current directory


In [3]:
#http://yann.lecun.com/exdb/mnist/
import numpy as np
import pandas as pd
import mxnet as mx
import time
import math
import logging

In [4]:
BATCH_SIZE = 100
DATA_SHAPE = (BATCH_SIZE, 1, 28, 28)
EPOCHS = 10
LR  = 0.07
MOM = 0.9
WD = 0.00001

In [5]:
# logging
logger = logging.getLogger()
fhandler = logging.FileHandler(filename='lenet.log', mode='a')
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fhandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.setLevel(logging.DEBUG)

In [6]:
# gather data
train = pd.read_csv('mnist_train.csv', header=None)
train_y = train[[0]].values.ravel()
train_x = train.iloc[:,1:].values

# modify data
train_x = np.array(train_x, dtype='float32').reshape((-1, 1, 28, 28))
#print(train_x.shape)  # (60000, 1, 28, 28)
# normalise (between 0 and 1)
train_x[:] /= 255.0

# iterator to feed mini_batch at a time
# returns <mxnet.io.DataBatch object at 0x000001AA996B38D0> 
# type <class 'mxnet.io.DataBatch'>

train_iter = mx.io.NDArrayIter(train_x, train_y, batch_size=BATCH_SIZE, shuffle=True)
    
def create_lenet():
    # create symbolic representation
    data = mx.symbol.Variable('data')
    input_y = mx.sym.Variable('softmax_label')  # placeholder for output

    conv1 = mx.symbol.Convolution(
        data=data, kernel=(5,5), num_filter=20)
    tanh1 = mx.symbol.Activation(
        data=conv1, act_type="tanh")
    pool1 = mx.symbol.Pooling(
        data=tanh1, pool_type="max", kernel=(2,2), stride=(2,2))

    conv2 = mx.symbol.Convolution(
        data=pool1, kernel=(5,5), num_filter=50)
    tanh2 = mx.symbol.Activation(
        data=conv2, act_type="tanh")
    pool2 = mx.symbol.Pooling(
        data=tanh2, pool_type="max", kernel=(2,2), stride=(2,2)) 

    flatten = mx.symbol.Flatten(
        data=pool2)
    fc1 = mx.symbol.FullyConnected(
        data=flatten, num_hidden=500) 
    tanh3 = mx.symbol.Activation(
        data=fc1, act_type="tanh")

    fc2 = mx.symbol.FullyConnected(
        data=tanh3, num_hidden=10) 

    lenet = mx.symbol.SoftmaxOutput(
        data=fc2, label=input_y, name="softmax")
    return lenet

# train the NN
ctx = mx.cpu()
cnn = create_lenet()

In [7]:
# Visualise symbol (for crepe)
a = mx.viz.plot_network(cnn)
a.render('lenet')

'lenet.pdf'

### Training Method A - Simple

In [8]:
model = mx.model.FeedForward(
    ctx = ctx,
    symbol = cnn, 
    num_epoch = EPOCHS,
    learning_rate = LR,
    momentum = MOM, 
    wd = WD
    )

In [None]:
# Log accuracy to file every batch
# Save parameters at every epoch

model.fit(
    X = train_iter,
    eval_metric=['accuracy'],
    batch_end_callback=mx.callback.Speedometer(BATCH_SIZE),
    epoch_end_callback=mx.callback.do_checkpoint("lenet_checkp_") 
)

In [None]:
# prediction of test set
test = pd.read_csv('mnist_test.csv', header=None)
test_y = test[[0]].values.ravel()
test_x = test.iloc[:,1:].values

test_x = np.array(test_x, dtype='float32').reshape((-1, 1, 28, 28))
test_x[:] /= 255.0

test_iter = mx.io.NDArrayIter(test_x, test_y, batch_size=100)

# most likely will be last element after sorting
pred = np.argsort(model.predict(X = test_iter))[:,-1]

# accuracy
print(sum(pred==test_y)/len(test_y))

# save
np.savetxt('predicted_images.csv', np.c_[pred, test_y], delimiter=',', fmt='%d')

### Training Method B - Advanced

In [11]:
# create own iterator (that can optionally apply transform function)
def manual_iterator(infile, y_split, batch_size=100):
    for ti, tx in enumerate(infile):
            if ti % batch_size == 0:
                # output
                if ti > 0:
                    yield X_split, y_split[ti-batch_size:ti]
                X_split = np.zeros(DATA_SHAPE, dtype='float32')
            X_split[ti%batch_size][0] = tx   

In [37]:
# setup executor
from collections import namedtuple

CNNModel = namedtuple("CNNModel", ['cnn_exec', 'symbol', 'data', 'label', 'param_blocks'])
initializer = mx.init.Xavier(factor_type="in", magnitude=2.34)

# Create arguments
arg_names = cnn.list_arguments()
input_shapes = {'data': DATA_SHAPE}
shapey = cnn.infer_shape(**input_shapes)
arg_shape, out_shape, aux_shape = shapey

arg_arrays = [mx.nd.zeros(s, ctx) for s in arg_shape]
args_grad = {}

for shape, name in zip(arg_shape, arg_names):
    if name in ['softmax_label', 'data']:  # input, output
        continue
    args_grad[name] = mx.nd.zeros(shape, ctx)

# Group symbols and create executor
cnn_exec = cnn.bind(ctx=ctx,
                    args=arg_arrays,
                    args_grad=args_grad,
                    grad_req='add')

param_blocks = []
arg_dict = dict(zip(arg_names, cnn_exec.arg_arrays))
for i, name in enumerate(arg_names):
    if name in ['softmax_label', 'data']:  # input, output
        continue
    initializer(name, arg_dict[name])

    param_blocks.append((i, arg_dict[name], args_grad[name], name))

out_dict = dict(zip(cnn.list_outputs(), cnn_exec.outputs))

data = cnn_exec.arg_dict['data']
label = cnn_exec.arg_dict['softmax_label']

m = CNNModel(cnn_exec=cnn_exec,
             symbol=cnn,
             data=data, 
             label=label,
             param_blocks=param_blocks)

In [None]:
# Stochastic gradient descent
max_grad_norm=5.0
optimizer = mx.optimizer.create(
    'sgd',
    learning_rate=LR,
    momentum = MOM,
    wd=WD
)
        
updater = mx.optimizer.get_updater(optimizer)

for iteration in range(EPOCHS):
    
    tic = time.time()
    num_correct = 0
    num_total = 0
    
    for batchX, batchY in manual_iterator(train_x, train_y, batch_size=BATCH_SIZE):

        m.data[:] = batchX
        m.label[:] = batchY

        # forward
        m.cnn_exec.forward(is_train=True)
        
        # backward
        m.cnn_exec.backward()

        # eval on training data
        num_correct += sum(batchY == np.argmax(m.cnn_exec.outputs[0].asnumpy(), axis=1))
        num_total += len(batchY)
        
        if num_total % (BATCH_SIZE*100) == 0:
            print("Processed %d" % num_total)
            
        # update weights
        norm = 0
        for idx, weight, grad, name in m.param_blocks:
            grad /= BATCH_SIZE
            l2_norm = mx.nd.norm(grad).asscalar()
            norm += l2_norm * l2_norm

        norm = math.sqrt(norm)
        for idx, weight, grad, name in m.param_blocks:
            if norm > max_grad_norm:
                grad *= (max_grad_norm / norm)

            updater(idx, grad, weight)

            # reset gradient to zero
            grad[:] = 0.0

    # decay learning rate
    if iteration % 50 == 0 and iteration > 0:
        opt.lr *= 0.5
        print('reset learning rate to %g' % opt.lr)
            
    # end of training loop
    toc = time.time()
    train_time = toc - tic
    train_acc = num_correct * 100 / float(num_total)
    print('Iter [%d] Train: Time: %.3fs, Training Accuracy: %.3f' % (iteration, train_time, train_acc))

Processed 10000
Processed 20000
Processed 30000
Processed 40000
Processed 50000
