

#MXNet (From Scratch)

In [0]:
import mxnet as mx
from mxnet import nd, autograd, gluon

data_ctx = mx.cpu()
model_ctx = mx.gpu()

##Getting dataset and builing data loader

In [0]:
mnist = mx.test_utils.get_mnist()
num_examples = 60000
batch_size = 64

train_data = mx.gluon.data.DataLoader(
    mx.gluon.data.ArrayDataset(mnist["train_data"][:num_examples],
                               mnist["train_label"][:num_examples].astype('float32')),
                               batch_size, shuffle=True)
test_data = mx.gluon.data.DataLoader(
    mx.gluon.data.ArrayDataset(mnist["test_data"][:num_examples],
                               mnist["test_label"][:num_examples].astype('float32')),
                               batch_size, shuffle=False)

##Building the model

In [0]:
weight = nd.random.normal(shape=(784,10), ctx = model_ctx)
bias = nd.random.normal(shape=10, ctx = model_ctx)

params = [weight, bias]

for param in params:
    param.attach_grad()

def model(inputs):
    return nd.softmax(nd.dot(inputs, weight) + bias, axis=1)

##Defining the objective function and optimizer

In [0]:
def categoricalcrossentropy(predictions, labels):
    return -nd.sum(labels * nd.log(predictions))

def sgd(params, lr):
    for param in params:
        param[:] = param - lr * param.grad

##Defining regularization function

In [0]:
def l1(params, lamda):
    penalty = nd.zeros(shape=1).as_in_context(model_ctx)
    for param in params:
        penalty = penalty + nd.sum(nd.abs(param))
    return lamda * penalty    

def l2(params, lamda):
    penalty = nd.zeros(shape=1).as_in_context(model_ctx)
    for param in params:
        penalty = penalty + nd.sum(param ** 2)
    return lamda * penalty

##Defining accuracy function

In [0]:
def accuracy(predictions, labels):
    return nd.mean(predictions.argmax(1) == labels.argmax(1))

##Training the model

In [0]:
epochs = 20
num_batches = 60000/batch_size
learning_rate = .01
lamda = 0.01
losses = []
accs = []

for epoch in range(epochs):
    cumulative_loss = 0
    cumulative_acc = 0
    for features, labels in train_data:
        features = features.as_in_context(model_ctx).reshape((-1, 784))
        labels = labels.as_in_context(model_ctx).one_hot(10)
        with autograd.record():
            output = model(features)
            loss = categoricalcrossentropy(output, labels) + l2(params, lamda)
        loss.backward()
        sgd(params, learning_rate)
        cumulative_loss += loss
        cumulative_acc += accuracy(output, labels)
    print(f'Epoch: {epoch} Loss: {cumulative_loss.asscalar()/num_batches} Accuracy: {cumulative_acc.asscalar()/num_batches}')
    losses.append(cumulative_loss.asscalar()/num_batches)
    accs.append(cumulative_acc.asscalar()/num_batches)

Epoch: 0 Loss: 120.46835833333333 Accuracy: 0.8213666666666667
Epoch: 1 Loss: 67.48980833333333 Accuracy: 0.8883333333333333
Epoch: 2 Loss: 49.50472083333333 Accuracy: 0.9019333333333334
Epoch: 3 Loss: 39.076366666666665 Accuracy: 0.9101333333333333
Epoch: 4 Loss: 32.68465416666667 Accuracy: 0.9134
Epoch: 5 Loss: 28.76230625 Accuracy: 0.9149833333333334
Epoch: 6 Loss: 26.101997916666665 Accuracy: 0.9155666666666666
Epoch: 7 Loss: 24.256166666666665 Accuracy: 0.91755
Epoch: 8 Loss: 23.204591666666666 Accuracy: 0.91745
Epoch: 9 Loss: 22.444966666666666 Accuracy: 0.9168166666666666
Epoch: 10 Loss: 21.89431875 Accuracy: 0.9178833333333334
Epoch: 11 Loss: 21.649058333333333 Accuracy: 0.9174
Epoch: 12 Loss: 21.354272916666666 Accuracy: 0.9171666666666667
Epoch: 13 Loss: 21.328358333333334 Accuracy: 0.91585
Epoch: 14 Loss: 21.146654166666668 Accuracy: 0.9164666666666667
Epoch: 15 Loss: 21.123816666666666 Accuracy: 0.9173
Epoch: 16 Loss: 20.968764583333332 Accuracy: 0.917
Epoch: 17 Loss: 20.95

In [0]:
plt.plot(losses)
plt.title('Training Loss')
plt.show()
plt.plot(accs)
plt.title('Training Accuracy')
plt.show()

In [0]:
acc = 0.0
batches = 0
for features, labels in test_data:
    features = features.as_in_context(model_ctx).reshape((-1, 784))
    labels = labels.as_in_context(model_ctx).one_hot(10)
    predictions = model(features)
    acc += accuracy(predictions, labels)
    batches += 1
print(f'Test Accuracy: {acc.asscalar()/batches}')

Test Accuracy: 0.9193869426751592


### Even by training out model for full 20 epochs we still managed to force it not to overfit on the training data. 