In [1]:
import mxnet as mx
from mxnet import gluon, autograd
import numpy as np

In [19]:
ACTIV='tanh'
L1_UNITS = 60
L2_UNITS = 20
NUM_CLASSES = 10
BATCH_SIZE = 64
LR = 0.001
WD = 0
MOMENTUM = .9
CTX = mx.gpu()
NUM_EPOCHS = 5

In [3]:
def MLP(l1u=50, l2u=20, act='tanh', num_classes=10):
    net = gluon.nn.Sequential()
    with net.name_scope():
        net.add(gluon.nn.Dense(units=l1u, activation=act))
        net.add(gluon.nn.Dense(units=l2u, activation=act))
        net.add(gluon.nn.Dense(num_classes))
    loss = gluon.loss.SoftmaxCrossEntropyLoss()
    return net, loss

In [21]:
net, loss = MLP()
(net, loss)

(Sequential(
   (0): Dense(None -> 50, Activation(tanh))
   (1): Dense(None -> 20, Activation(tanh))
   (2): Dense(None -> 10, linear)
 ), SoftmaxCrossEntropyLoss(batch_axis=0, w=None))

In [5]:
def transform(data, label):
    return (data.astype('float32')/255, label.astype('float32'))
train_dataset = gluon.data.vision.MNIST(train=True, transform=transform)
test_dataset = gluon.data.vision.MNIST(train=False, transform=transform)

In [22]:
train_data = gluon.data.DataLoader(dataset=train_dataset, 
                                   batch_size=BATCH_SIZE, 
                                   shuffle=True)

test_data = gluon.data.DataLoader(dataset=test_dataset, 
                                  batch_size=BATCH_SIZE, 
                                  shuffle=False)

In [33]:
net.initialize(mx.init.Xavier(magnitude=2.24),
               ctx=CTX, 
               force_reinit=True)

In [29]:
trainer = gluon.Trainer(params=net.collect_params(), 
                        optimizer='sgd', 
                        optimizer_params={'learning_rate': LR, 'momentum':MOMENTUM, 'wd':WD})

In [30]:
def train(dataloader, network, loss_fn , trainer, ctx, epochs=10):
    for e in range(epochs):
        print("EPOCH#: {}".format(e))
        epoch_loss = 0
        for i, (d, label) in enumerate(dataloader):
            d = d.as_in_context(ctx)
            label = label.as_in_context(ctx)
            with autograd.record():
                predictions = network(d)
                loss = loss_fn(predictions, label)
            loss.backward()
            trainer.step(d.shape[0])
            if ( i % 100 ) == 0:
                print("Minibactch#: {}: mean loss: {}".format(i, loss.mean().asscalar()))
                epoch_loss += loss.mean().asscalar()
        print("EPOCH LOSS: {}".format(epoch_loss/d.shape[0]))
                
    

In [34]:
train(train_data, net, loss, trainer, CTX, NUM_EPOCHS)

EPOCH#: 0
Minibactch#: 0: mean loss: 2.332568407058716
Minibactch#: 100: mean loss: 1.723408579826355
Minibactch#: 200: mean loss: 1.3712990283966064
Minibactch#: 300: mean loss: 1.1950805187225342
Minibactch#: 400: mean loss: 1.0124510526657104
Minibactch#: 500: mean loss: 0.9406935572624207
Minibactch#: 600: mean loss: 0.7341489195823669
Minibactch#: 700: mean loss: 0.9151694774627686
Minibactch#: 800: mean loss: 0.6902312636375427
Minibactch#: 900: mean loss: 0.7092996835708618
EPOCH LOSS: 0.36326095275580883
EPOCH#: 1
Minibactch#: 0: mean loss: 0.653210461139679
Minibactch#: 100: mean loss: 0.6352062821388245
Minibactch#: 200: mean loss: 0.6736405491828918
Minibactch#: 300: mean loss: 0.44345277547836304
Minibactch#: 400: mean loss: 0.5761818885803223
Minibactch#: 500: mean loss: 0.5809483528137207
Minibactch#: 600: mean loss: 0.6157048344612122
Minibactch#: 700: mean loss: 0.7161040306091309
Minibactch#: 800: mean loss: 0.4491504728794098
Minibactch#: 900: mean loss: 0.50660789012