In [1]:
import mxnet as mx
from mxnet import gluon, autograd
import numpy as np

In [19]:
ACTIV='tanh'
L1_UNITS = 60
L2_UNITS = 20
NUM_CLASSES = 10
BATCH_SIZE = 64
LR = 0.001
WD = 0
MOMENTUM = .9
CTX = mx.gpu()
NUM_EPOCHS = 5

In [3]:
def MLP(l1u=50, l2u=20, act='tanh', num_classes=10):
    net = gluon.nn.Sequential()
    with net.name_scope():
        net.add(gluon.nn.Dense(units=l1u, activation=act))
        net.add(gluon.nn.Dense(units=l2u, activation=act))
        net.add(gluon.nn.Dense(num_classes))
    loss = gluon.loss.SoftmaxCrossEntropyLoss()
    return net, loss

In [21]:
net, loss = MLP()
(net, loss)

(Sequential(
   (0): Dense(None -> 50, Activation(tanh))
   (1): Dense(None -> 20, Activation(tanh))
   (2): Dense(None -> 10, linear)
 ), SoftmaxCrossEntropyLoss(batch_axis=0, w=None))

In [5]:
def transform(data, label):
    return (data.astype('float32')/255, label.astype('float32'))
train_dataset = gluon.data.vision.MNIST(train=True, transform=transform)
test_dataset = gluon.data.vision.MNIST(train=False, transform=transform)

In [22]:
train_data = gluon.data.DataLoader(dataset=train_dataset, 
                                   batch_size=BATCH_SIZE, 
                                   shuffle=True)

test_data = gluon.data.DataLoader(dataset=test_dataset, 
                                  batch_size=BATCH_SIZE, 
                                  shuffle=False)

In [33]:
net.initialize(mx.init.Xavier(magnitude=2.24),
               ctx=CTX, 
               force_reinit=True)

In [29]:
trainer = gluon.Trainer(params=net.collect_params(), 
                        optimizer='sgd', 
                        optimizer_params={'learning_rate': LR, 'momentum':MOMENTUM, 'wd':WD})

In [46]:
def evaluate(dataloader, network, ctx, metrics=[mx.metric.Accuracy(), mx.metric.RMSE()]):
    metric = mx.metric.CompositeEvalMetric(metrics=metrics)
    metric.reset()
    for i, (d,l) in enumerate(dataloader):
        d  = d.as_in_context(ctx)
        l = l.as_in_context(ctx)
        predictions = network(d)
        metric.update(preds=predictions, labels=l)
    return metric
    

In [53]:
def train(dataloader, network, loss_fn , trainer, ctx, epochs=10):
    for e in range(epochs):
        print("EPOCH#: {}".format(e))
        epoch_loss = 0
        for i, (d, label) in enumerate(dataloader):
            d = d.as_in_context(ctx)
            label = label.as_in_context(ctx)
            with autograd.record():
                predictions = network(d)
                loss = loss_fn(predictions, label)
            loss.backward()
            trainer.step(d.shape[0])
            test_val = evaluate(dataloader=test_data, network=network, ctx=ctx)
            train_val = evaluate(dataloader=train_data, network=network, ctx=ctx)
            if ( i % 100 ) == 0:
                print("Minibactch#: {}: mean loss: {}".format(i, loss.mean().asscalar()))
                epoch_loss += loss.mean().asscalar()
                print("test metrics: {}".format(test_val.get_name_value()))
                print("train metrics: {}".format(train_val.get_name_value()))
        
                
    

In [None]:
train(train_data, net, loss, trainer, CTX, epochs=2)

EPOCH#: 0
Minibactch#: 0: mean loss: 0.25654923915863037
test metrics: [('accuracy', 0.9180166666666667), ('rmse', 5.787024036907693)]
train metrics: [('accuracy', 0.9180166666666667), ('rmse', 5.787024036907693)]
