In [2]:
from zipfile import ZipFile
import numpy as np
import mxnet as mx
from mxnet import nd, autograd, gluon
import mxnet.ndarray as F
import random


In [3]:
mx.random.seed(1)
random.seed(1)

data_ctx = mx.cpu()
model_ctx = mx.cpu()

In [4]:
class DataLoader(object):
    def __init__(self, min_batch_size=32):
        DIR = 'data/'
        self.min_batch_size=min_batch_size
        
    
    # Returns images and labels corresponding for training and testing. Default mode is train. 
    # For retrieving test data pass mode as 'test' in function call.
    def load_data(self, mode = 'train'):
        label_filename = mode + '_labels'
        image_filename = mode + '_images'
        label_zip = 'data/' + label_filename + '.zip'
        image_zip = 'data/' + image_filename + '.zip'
        with ZipFile(label_zip, 'r') as lblzip:
            labels = np.frombuffer(lblzip.read(label_filename), dtype=np.uint8, offset=8)
        with ZipFile(image_zip, 'r') as imgzip:
            images = np.frombuffer(imgzip.read(image_filename), dtype=np.uint8, offset=16).reshape(len(labels), 784)
        return images, labels

In [31]:
data_obj = DataLoader()
X_train, y_train = data_obj.load_data(mode='train')
X_train = X_train.astype('float32')
y_train = y_train
X_test, y_test = data_obj.load_data(mode='test')
X_test = X_test.astype('float32')
num_inputs = 784
num_outputs = 10
num_examples = X_train.shape[0]

In [32]:
print(X_train.shape)
print(y_train.shape)

(60000, 784)
(60000,)


In [33]:
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Dense(1024, activation="relu"))
    net.add(gluon.nn.Dense(512, activation="relu"))
    net.add(gluon.nn.Dense(256, activation="relu"))
    net.add(gluon.nn.Dense(10))

In [34]:
net.collect_params()

sequential1_ (
  Parameter sequential1_dense0_weight (shape=(1024, 0), dtype=float32)
  Parameter sequential1_dense0_bias (shape=(1024,), dtype=float32)
  Parameter sequential1_dense1_weight (shape=(512, 0), dtype=float32)
  Parameter sequential1_dense1_bias (shape=(512,), dtype=float32)
  Parameter sequential1_dense2_weight (shape=(256, 0), dtype=float32)
  Parameter sequential1_dense2_bias (shape=(256,), dtype=float32)
  Parameter sequential1_dense3_weight (shape=(10, 0), dtype=float32)
  Parameter sequential1_dense3_bias (shape=(10,), dtype=float32)
)

In [35]:
net.collect_params().initialize(mx.initializer.Uniform(0.01), ctx=model_ctx)


In [36]:
type(net.collect_params())

mxnet.gluon.parameter.ParameterDict

In [37]:
square_loss = gluon.loss.SoftmaxCrossEntropyLoss()

In [38]:
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.0001})


In [48]:
batch_size = 32
train_data = gluon.data.DataLoader(
    gluon.data.dataset.ArrayDataset(X_train, y_train), batch_size=batch_size, shuffle=True)

test_data = gluon.data.DataLoader(
    gluon.data.dataset.ArrayDataset(X_test, y_test), batch_size=batch_size, shuffle=True)

In [49]:
epochs = 10
num_batches = num_examples / batch_size
print(num_batches)

1875.0


In [50]:
def train_loop(epochs):
    for e in range(epochs):
        cumulative_loss = 0
        for i, (data, label) in enumerate(train_data):
            data = data.as_in_context(model_ctx).reshape(-1,784)
            label = label.as_in_context(model_ctx)
            with autograd.record():
                output = net(data)
                loss = square_loss(output, label)
            loss.backward()
            trainer.step(batch_size)
            cumulative_loss += nd.mean(loss).asscalar()
            
        test_accuracy = evaluate_accuracy(test_data, net)
        train_accuracy = evaluate_accuracy(train_data, net)
        print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, cumulative_loss/num_examples, train_accuracy, test_accuracy))
        print("Epoch %s, loss: %.4f" % (e, cumulative_loss / num_examples))

In [51]:
train_loop(epochs)

Epoch 0. Loss: 0.05307473219037056, Train_acc 0.5870333333333333, Test_acc 0.584
Epoch 0, loss: 0.0531
Epoch 1. Loss: 0.03445269978940487, Train_acc 0.6465666666666666, Test_acc 0.6333
Epoch 1, loss: 0.0345
Epoch 2. Loss: 0.027462654035290083, Train_acc 0.6597333333333333, Test_acc 0.6544
Epoch 2, loss: 0.0275
Epoch 3. Loss: 0.024285169814527035, Train_acc 0.7073166666666667, Test_acc 0.7007
Epoch 3, loss: 0.0243
Epoch 4. Loss: 0.022532109563052655, Train_acc 0.7301166666666666, Test_acc 0.719
Epoch 4, loss: 0.0225
Epoch 5. Loss: 0.021219958369930587, Train_acc 0.7520833333333333, Test_acc 0.7415
Epoch 5, loss: 0.0212
Epoch 6. Loss: 0.020155543450514474, Train_acc 0.7731666666666667, Test_acc 0.7596
Epoch 6, loss: 0.0202
Epoch 7. Loss: 0.019263958705961703, Train_acc 0.7863, Test_acc 0.7743
Epoch 7, loss: 0.0193
Epoch 8. Loss: 0.018499721808483204, Train_acc 0.79065, Test_acc 0.7798
Epoch 8, loss: 0.0185
Epoch 9. Loss: 0.017854982663691045, Train_acc 0.80525, Test_acc 0.7938
Epoch 9, l

In [None]:
def evaluate_accuracy(data_iterator, net):
    acc = mx.metric.Accuracy()
    for i, (data, label) in enumerate(data_iterator):
        data = data.as_in_context(model_ctx).reshape((-1, 784))
        label = label.as_in_context(model_ctx)
        output = net(data)
        predictions = nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]

In [52]:
file_name = "net.params"
net.save_parameters(file_name)