In [1]:
import mxnet as mx
from mxnet import gluon, autograd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import time

  from ._conv import register_converters as _register_converters
  import OpenSSL.SSL


In [2]:
def transform(data, label):
    return mx.nd.transpose(data.astype(np.float32), (2,0,1))/255, label.astype(np.float32)

train_data = gluon.data.DataLoader(
    gluon.data.vision.FashionMNIST(train=True, transform=transform), batch_size=128, shuffle=True)

validation_data = gluon.data.DataLoader(
    gluon.data.vision.FashionMNIST(train=False, transform=transform), batch_size=128, shuffle=False)

  label = np.fromstring(fin.read(), dtype=np.uint8).astype(np.int32)
  data = np.fromstring(fin.read(), dtype=np.uint8)


In [8]:
def buildCNN(cnnlayers, fclayers=([256,''],[64,'']), outputs=10, activation='relu', alpha=0.01):
    net = gluon.nn.Sequential()
    with net.name_scope():
        for l in cnnlayers:                
            if (l[0]<1.0):
                net.add(gluon.nn.Dropout(l[0]))
            else:
                net.add(gluon.nn.Conv2D(channels=l[0], kernel_size=l[1], padding=l[2], activation=None))
                if (l[5]=='BN'):
                    net.add(gluon.nn.BatchNorm(axis=1, center=True, scale=True))
                if (activation=='leakyrelu'):
                    net.add(gluon.nn.LeakyReLU(alpha=alpha))
                else:
                    net.add(gluon.nn.Activation(activation=activation))
                net.add(gluon.nn.MaxPool2D(pool_size=l[4], strides=l[4]))
                
        net.add(gluon.nn.Flatten())
        
        for l in fclayers:
            if (l[0]<1.0):
                net.add(gluon.nn.Dropout(l[0]))
            else:
                net.add(gluon.nn.Dense(l[0], activation=None))
                if (l[1]=='BN'):
                    net.add(gluon.nn.BatchNorm(axis=1, center=True, scale=True))
                if (activation=='leakyrelu'):        
                    net.add(gluon.nn.LeakyReLU(alpha=alpha))
                else:
                    net.add(gluon.nn.Activation(activation=activation))
            
        net.add(gluon.nn.Dense(outputs))
    return net

In [9]:
def init(net, optimizer='sgd', learning_rate=0.1, weight_decay=1e-6):
    net.collect_params().initialize(mx.init.Xavier(magnitude=2.24))
    trainer = gluon.Trainer(net.collect_params(), 
                            optimizer, 
                            {'learning_rate': learning_rate, 'wd': weight_decay})
    return trainer

In [10]:
def accuracy(data_iterator, net,ctx=mx.cpu()):
    acc = mx.metric.Accuracy()
    for (data, label) in data_iterator:
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        output = net(data)
        predictions = mx.nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]

In [11]:
def train(net, trainer, train_data, validation_data, epochs, ctx=mx.cpu()):
    training_accuracies = []
    validation_accuracies = []
    softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
    for e in range(epochs):
        tic = time.time()
        for (data, label) in train_data:
            data  = data.as_in_context(ctx)
            label = label.as_in_context(ctx)
            with autograd.record():
                output = net(data)
                loss = softmax_cross_entropy(output, label)
                loss.backward()
            trainer.step(data.shape[0])
        toc = time.time()
        train_accuracy = accuracy(train_data, net)
        training_accuracies.append(train_accuracy)
        validation_accuracy = accuracy(validation_data, net)
        validation_accuracies.append(validation_accuracy)
        print("Epoch#%d Time=%.2f Training=%.4f Validation=%.4f Diff=%.4f" 
              % (e, toc-tic, train_accuracy, validation_accuracy, train_accuracy-validation_accuracy))
    return training_accuracies, validation_accuracies

In [None]:
epochs = 50
# 1x28x28 -CONV-> 64x28x28 -POOL-> 64x13x13 -CONV-> 64x10x10 -POOL-> 64x5x5 --> 1600 -FC-> 256 -FC-> 64 -FC-> 10
net = buildCNN(([64,3,1,2,2,''],[64,3,0,2,2,'']))
print(net)
trainer = init(net)
training_accuracies, validation_accuracies = train(net, trainer, train_data, validation_data, epochs)
plot_accuracies(training_accuracies, validation_accuracies)

Sequential(
  (0): Conv2D(None -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): Activation(relu)
  (2): MaxPool2D(size=(2, 2), stride=(2, 2), padding=(0, 0), ceil_mode=False)
  (3): Conv2D(None -> 64, kernel_size=(3, 3), stride=(1, 1))
  (4): Activation(relu)
  (5): MaxPool2D(size=(2, 2), stride=(2, 2), padding=(0, 0), ceil_mode=False)
  (6): Flatten
  (7): Dense(None -> 256, linear)
  (8): Activation(relu)
  (9): Dense(None -> 64, linear)
  (10): Activation(relu)
  (11): Dense(None -> 10, linear)
)


In [None]:
epochs = 50
net = buildCNN(([64,3,1,2,2,''],[64,3,0,2,2,'']))
print(net)
trainer = init(net, optimizer='adam', learning_rate=1e-3)
training_accuracies, validation_accuracies = train(net, trainer, train_data, validation_data, epochs)
plot_accuracies(training_accuracies, validation_accuracies)

In [None]:
epochs = 50
net = buildCNN(([64,3,1,2,2,'BN'],[64,3,0,2,2,'BN']), ([256,'BN'], [64,'BN']))
print(net)
trainer = init(net, optimizer='adam', learning_rate=1e-3)
training_accuracies, validation_accuracies = train(net, trainer, train_data, validation_data, epochs)
plot_accuracies(training_accuracies, validation_accuracies)

In [None]:
epochs = 100
net = buildCNN(([64,3,1,2,2,'BN'],[0.3],[64,3,0,2,2,'BN'],[0.3]), ([256,'BN'],[0.3],[64,'BN'],[0.3]))
print(net)
trainer = init(net, optimizer='adam', learning_rate=1e-3)
training_accuracies, validation_accuracies = train(net, trainer, train_data, validation_data, epochs)
plot_accuracies(training_accuracies, validation_accuracies)

In [None]:
epochs = 100
net = buildCNN(([64,3,1,2,2,'BN'],[0.5],[64,3,0,2,2,'BN'],[0.5]), ([256,'BN'],[0.5],[64,'BN'],[0.5]))
print(net)
trainer = init(net, optimizer='adam', learning_rate=1e-3)
training_accuracies, validation_accuracies = train(net, trainer, train_data, validation_data, epochs)
plot_accuracies(training_accuracies, validation_accuracies)

In [None]:
train_data = gluon.data.DataLoader(
    gluon.data.vision.MNIST(train=True, transform=transform), batch_size=128, shuffle=True)

validation_data = gluon.data.DataLoader(
    gluon.data.vision.MNIST(train=False, transform=transform), batch_size=128, shuffle=False)

In [None]:
epochs = 20
net = buildCNN(([64,3,1,2,2,'BN'],[0.3],[64,3,0,2,2,'BN'],[0.3]), ([256,'BN'],[0.3],[64,'BN'],[0.3]))
print(net)
trainer = init(net, optimizer='adam', learning_rate=1e-3)
training_accuracies, validation_accuracies = train(net, trainer, train_data, validation_data, epochs)
plot_accuracies(training_accuracies, validation_accuracies)