In [1]:
from zipfile import ZipFile
import numpy as np
import mxnet as mx
from mxnet import nd, autograd, gluon
import mxnet.ndarray as F
import random


In [2]:
mx.random.seed(1)
random.seed(1)

data_ctx = mx.cpu()
model_ctx = mx.cpu()

In [3]:
class DataLoader(object):
    def __init__(self, min_batch_size=32):
        DIR = 'data/'
        self.min_batch_size=min_batch_size
        
    
    # Returns images and labels corresponding for training and testing. Default mode is train. 
    # For retrieving test data pass mode as 'test' in function call.
    def load_data(self, mode = 'train'):
        label_filename = mode + '_labels'
        image_filename = mode + '_images'
        label_zip = 'data/' + label_filename + '.zip'
        image_zip = 'data/' + image_filename + '.zip'
        with ZipFile(label_zip, 'r') as lblzip:
            labels = np.frombuffer(lblzip.read(label_filename), dtype=np.uint8, offset=8)
        with ZipFile(image_zip, 'r') as imgzip:
            images = np.frombuffer(imgzip.read(image_filename), dtype=np.uint8, offset=16).reshape(len(labels), 784)
        return images, labels

In [4]:
data_obj = DataLoader()
X_train, y_train = data_obj.load_data(mode='train')
X_train = X_train.astype('float32')
y_train = y_train
X_test, y_test = data_obj.load_data(mode='test')
num_inputs = 784
num_outputs = 10
num_examples = X_train.shape[0]

In [5]:
print(X_train.shape)
print(y_train.shape)

(60000, 784)
(60000,)


In [6]:
num_hidden = 64
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Dense(512, activation="relu"))
    net.add(gluon.nn.Dense(128, activation="relu"))
    net.add(gluon.nn.Dense(64, activation="relu"))
    net.add(gluon.nn.Dense(32, activation="relu"))
    net.add(gluon.nn.Dense(16, activation="relu"))
    net.add(gluon.nn.Dense(10))

In [7]:
net.collect_params()

sequential0_ (
  Parameter sequential0_dense0_weight (shape=(512, 0), dtype=float32)
  Parameter sequential0_dense0_bias (shape=(512,), dtype=float32)
  Parameter sequential0_dense1_weight (shape=(128, 0), dtype=float32)
  Parameter sequential0_dense1_bias (shape=(128,), dtype=float32)
  Parameter sequential0_dense2_weight (shape=(64, 0), dtype=float32)
  Parameter sequential0_dense2_bias (shape=(64,), dtype=float32)
  Parameter sequential0_dense3_weight (shape=(32, 0), dtype=float32)
  Parameter sequential0_dense3_bias (shape=(32,), dtype=float32)
  Parameter sequential0_dense4_weight (shape=(16, 0), dtype=float32)
  Parameter sequential0_dense4_bias (shape=(16,), dtype=float32)
  Parameter sequential0_dense5_weight (shape=(10, 0), dtype=float32)
  Parameter sequential0_dense5_bias (shape=(10,), dtype=float32)
)

In [8]:
net.collect_params().initialize(mx.initializer.Uniform(0.01), ctx=model_ctx)


In [9]:
type(net.collect_params())

mxnet.gluon.parameter.ParameterDict

In [10]:
square_loss = gluon.loss.SoftmaxCrossEntropyLoss()

In [11]:
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.0001})


In [12]:
batch_size = 32
train_data = gluon.data.DataLoader(
    gluon.data.dataset.ArrayDataset(X_train, y_train.reshape(-1,1)), batch_size=batch_size, shuffle=True)

In [13]:
epochs = 10
num_batches = num_examples / batch_size
print(num_batches)

1875.0


In [14]:
def train_loop(epochs):
    for e in range(epochs):
        cumulative_loss = 0
        for i, (data, label) in enumerate(train_data):
            data = data.as_in_context(model_ctx).reshape(-1,784)
            label = label.as_in_context(model_ctx)
            with autograd.record():
                output = net(data)
                loss = square_loss(output, label)
            loss.backward()
            trainer.step(batch_size)
            cumulative_loss += nd.mean(loss).asscalar()
        print("Epoch %s, loss: %.4f" % (e, cumulative_loss / num_examples))

In [None]:
train_loop(epochs)

Epoch 0, loss: 0.0720
Epoch 1, loss: 0.0720
Epoch 2, loss: 0.0720
Epoch 3, loss: 0.0720
Epoch 4, loss: 0.0720
