# Multilayer perceptrons in ``gluon``

Using gluon, we only need two additional lines of code to transform our logisitc regression model into a multilayer perceptron.

In [3]:
from __future__ import print_function
import mxnet as mx
from mxnet import nd, autograd
from mxnet import gluon

We'll also want to set the compute context for our modeling. Feel free to go ahead and change this to mx.gpu(0) if you're running on an appropriately endowed machine.

In [4]:
ctx = mx.gpu(3)

## The MNIST dataset

In [5]:
mnist = mx.test_utils.get_mnist()
batch_size = 64
num_inputs = 784
num_outputs = 10
train_data = mx.io.NDArrayIter(mnist["train_data"], mnist["train_label"],
                               batch_size, shuffle=True)
test_data = mx.io.NDArrayIter(mnist["test_data"], mnist["test_label"],
                              batch_size, shuffle=True)

## Define the model

*Here's the only real difference. We add two lines!*

In [10]:
num_hidden = 256
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Dense(num_hidden, activation="relu"))
    net.add(gluon.nn.Dense(num_hidden, activation="relu"))
    net.add(gluon.nn.Dense(num_outputs))
net.params

sequential1_ (

)

## Parameter initialization


In [11]:
net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)

## Softmax cross-entropy loss

In [12]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

## Optimizer

In [13]:
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .1})

## Evaluation metric

In [14]:
def evaluate_accuracy(data_iterator, net):
    acc = mx.metric.Accuracy()
    data_iterator.reset()
    for i, batch in enumerate(data_iterator):
        data = batch.data[0].as_in_context(ctx).reshape((-1, 784))
        label = batch.label[0].as_in_context(ctx)
        output = net(data)
        predictions = nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]

## Training loop

In [15]:
epochs = 10
smoothing_constant = .01

for e in range(epochs):
    train_data.reset()
    for i, batch in enumerate(train_data):
        data = batch.data[0].as_in_context(ctx).reshape((-1, 784))
        label = batch.label[0].as_in_context(ctx)
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
            loss.backward()
        trainer.step(data.shape[0])

        ##########################
        #  Keep a moving average of the losses
        ##########################
        curr_loss = nd.mean(loss).asscalar()
        moving_loss = (curr_loss if ((i == 0) and (e == 0)) 
                       else (1 - smoothing_constant) * moving_loss + (smoothing_constant) * curr_loss)

    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, moving_loss, train_accuracy, test_accuracy))

Epoch 0. Loss: 0.202877108467, Train_acc 0.948793976546, Test_acc 0.947750796178
Epoch 1. Loss: 0.129510437929, Train_acc 0.968067030917, Test_acc 0.963972929936
Epoch 2. Loss: 0.0960908631347, Train_acc 0.976412579957, Test_acc 0.970342356688
Epoch 3. Loss: 0.0748281296103, Train_acc 0.981226679104, Test_acc 0.973328025478
Epoch 4. Loss: 0.0600846850756, Train_acc 0.984524920043, Test_acc 0.974820859873
Epoch 5. Loss: 0.0486018002638, Train_acc 0.987573294243, Test_acc 0.976612261146
Epoch 6. Loss: 0.0398525504556, Train_acc 0.989922041578, Test_acc 0.977109872611
Epoch 7. Loss: 0.0322227592865, Train_acc 0.990921508529, Test_acc 0.977607484076
Epoch 8. Loss: 0.026615706459, Train_acc 0.991887659915, Test_acc 0.977806528662
Epoch 9. Loss: 0.021916624636, Train_acc 0.992887126866, Test_acc 0.977906050955


In [16]:
epochs = 10
smoothing_constant = .01

for e in range(epochs):
    train_data.reset()
    for i, batch in enumerate(train_data):
        data = batch.data[0].as_in_context(ctx).reshape((-1, 784))
        label = batch.label[0].as_in_context(ctx)
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])

        ##########################
        #  Keep a moving average of the losses
        ##########################
        curr_loss = nd.mean(loss).asscalar()
        moving_loss = (curr_loss if ((i == 0) and (e == 0)) 
                       else (1 - smoothing_constant) * moving_loss + (smoothing_constant) * curr_loss)

    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, moving_loss, train_accuracy, test_accuracy))

Epoch 0. Loss: 0.0177126685598, Train_acc 0.994069829424, Test_acc 0.978105095541
Epoch 1. Loss: 0.0142884760054, Train_acc 0.995185900853, Test_acc 0.97939888535
Epoch 2. Loss: 0.0114222812061, Train_acc 0.996585154584, Test_acc 0.979896496815
Epoch 3. Loss: 0.00930792787238, Train_acc 0.99775119936, Test_acc 0.980195063694
Epoch 4. Loss: 0.00744988269647, Train_acc 0.998500799574, Test_acc 0.980195063694
Epoch 5. Loss: 0.00598901187225, Train_acc 0.999000533049, Test_acc 0.980493630573
Epoch 6. Loss: 0.0049532218968, Train_acc 0.999350346482, Test_acc 0.980792197452
Epoch 7. Loss: 0.00407457374974, Train_acc 0.999516924307, Test_acc 0.980792197452
Epoch 8. Loss: 0.00344898528502, Train_acc 0.999700159915, Test_acc 0.980991242038
Epoch 9. Loss: 0.00296680185857, Train_acc 0.99973347548, Test_acc 0.980692675159


## Conclusion

Now let's take a look at how to build convolutional neural networks.

For whinges or inquiries, [open an issue on  GitHub.](https://github.com/zackchase/mxnet-the-straight-dope)