In [2]:
# http://gluon.mxnet.io/chapter03_deep-neural-networks/mlp-gluon.html#Multilayer-perceptrons-in-gluon
from __future__ import print_function
import numpy as np
import mxnet as mx
from mxnet import nd, autograd, gluon

  Optimizer.opt_registry[name].__name__))


In [3]:
data_ctx = mx.cpu()
model_ctx = mx.cpu()
# model_ctx = mx.gpu(0)
batch_size = 64
num_inputs = 784
num_outputs = 10
num_examples = 60000
def transform(data, label):
    return data.astype(np.float32)/255, label.astype(np.float32)
train_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=True, transform=transform),
                                      batch_size, shuffle=True)
test_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=False, transform=transform),
                                     batch_size, shuffle=False)

In [7]:
# make a block to handle network
# http://gluon.mxnet.io/chapter03_deep-neural-networks/mlp-gluon.html#Define-the-model-with-gluon.Block
class MLP(gluon.Block):
    def __init__(self, **kwargs):
        super(MLP, self).__init__(**kwargs)
        with self.name_scope():
            self.dense0 = gluon.nn.Dense(64, activation="relu")
            self.dense1 = gluon.nn.Dense(64, activation="relu")
            self.dense2 = gluon.nn.Dense(10)

    def forward(self, x):
        x = self.dense0(x)
        print("Hidden Representation 1: %s" % x)
        x = self.dense1(x)
        print("Hidden Representation 2: %s" % x)
        x = self.dense2(x)
        print("Network output: %s" % x)
        return x

In [8]:
# create network
net = MLP()
net.collect_params().initialize(mx.init.Normal(sigma=.01), ctx=model_ctx)

In [9]:
# send gibberish through network
data = nd.ones((1,784))
net(data.as_in_context(model_ctx))


Hidden Representation 1: 
[[ 0.          0.          0.0257028   0.41763657  0.          0.          0.
   0.          0.          0.03712554  0.16054311  0.35507885  0.
   0.12578693  0.          0.          0.          0.30374652  0.2925671
   0.35357702  0.          0.07809133  0.21969812  0.21779835  0.
   0.34579116  0.13206208  0.01624629  0.27534342  0.22952282  0.22022063
   0.          0.00258672  0.0639514   0.68015653  0.          0.          0.
   0.16524595  0.18695298  0.25243062  0.01728731  0.0647173   0.          0.
   0.25521508  0.          0.          0.03300405  0.33107036  0.64537466
   0.04547647  0.          0.          0.          0.1954248   0.02424751
   0.          0.          0.04300816  0.16542056  0.13203511  0.          0.        ]]
<NDArray 1x64 @cpu(0)>
Hidden Representation 2: 
[[  0.00000000e+00   0.00000000e+00   4.84575331e-03   0.00000000e+00
    2.49755625e-02   0.00000000e+00   9.23847593e-03   1.18465200e-02
    0.00000000e+00   1.50872627e-02 


[[ -1.17857940e-03   1.90145045e-04   8.11181497e-04  -3.82558472e-04
    4.79567214e-04  -1.27192587e-04   3.38522077e-05  -2.32845996e-04
    7.18050636e-04   1.17539358e-03]]
<NDArray 1x10 @cpu(0)>

In [10]:
# Faster modeling with gluon.nn.Sequential
# http://gluon.mxnet.io/chapter03_deep-neural-networks/mlp-gluon.html#Faster-modeling-with-gluon.nn.Sequential

num_hidden = 64
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Dense(num_hidden, activation="relu"))
    net.add(gluon.nn.Dense(num_hidden, activation="relu"))
    net.add(gluon.nn.Dense(num_outputs))

In [11]:
# register parameter initializer
net.collect_params().initialize(mx.init.Normal(sigma=.1), ctx=model_ctx)

#Loss
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
#Optimizer
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .01})

In [12]:
# Evaluator
def evaluate_accuracy(data_iterator, net):
    acc = mx.metric.Accuracy()
    for i, (data, label) in enumerate(data_iterator):
        data = data.as_in_context(model_ctx).reshape((-1, 784))
        label = label.as_in_context(model_ctx)
        output = net(data)
        predictions = nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]

In [13]:
epochs = 10
smoothing_constant = .01

for e in range(epochs):
    cumulative_loss = 0
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(model_ctx).reshape((-1, 784))
        label = label.as_in_context(model_ctx)
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])
        cumulative_loss += nd.sum(loss).asscalar()


    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, cumulative_loss/num_examples, train_accuracy, test_accuracy))

Epoch 0. Loss: 1.29614894327, Train_acc 0.829833333333, Test_acc 0.8355
Epoch 1. Loss: 0.485520681953, Train_acc 0.879666666667, Test_acc 0.8867
Epoch 2. Loss: 0.382655344407, Train_acc 0.898066666667, Test_acc 0.9043
Epoch 3. Loss: 0.336357296848, Train_acc 0.90765, Test_acc 0.913
Epoch 4. Loss: 0.305891218535, Train_acc 0.914966666667, Test_acc 0.9198
Epoch 5. Loss: 0.282849863744, Train_acc 0.9218, Test_acc 0.9248
Epoch 6. Loss: 0.263587269719, Train_acc 0.92745, Test_acc 0.9305
Epoch 7. Loss: 0.247260904853, Train_acc 0.931666666667, Test_acc 0.935
Epoch 8. Loss: 0.233039602081, Train_acc 0.934766666667, Test_acc 0.9358
Epoch 9. Loss: 0.220281624333, Train_acc 0.939316666667, Test_acc 0.9406
