In [1]:
from zipfile import ZipFile
import numpy as np
import mxnet as mx
from mxnet import nd, autograd, gluon
import mxnet.ndarray as F
import random


In [2]:
mx.random.seed(5)
random.seed(5)

data_ctx = mx.cpu()
model_ctx = mx.cpu()

In [3]:
class DataLoader(object):
    def __init__(self, min_batch_size=32):
        DIR = 'data/'
        self.min_batch_size=min_batch_size
        
    
    # Returns images and labels corresponding for training and testing. Default mode is train. 
    # For retrieving test data pass mode as 'test' in function call.
    def load_data(self, mode = 'train'):
        label_filename = mode + '_labels'
        image_filename = mode + '_images'
        label_zip = 'data/' + label_filename + '.zip'
        image_zip = 'data/' + image_filename + '.zip'
        with ZipFile(label_zip, 'r') as lblzip:
            labels = np.frombuffer(lblzip.read(label_filename), dtype=np.uint8, offset=8)
        with ZipFile(image_zip, 'r') as imgzip:
            images = np.frombuffer(imgzip.read(image_filename), dtype=np.uint8, offset=16).reshape(len(labels), 784)
        return images, labels

In [4]:
data_obj = DataLoader()
X_train, y_train = data_obj.load_data(mode='train')
X_train = X_train.astype('float32')
y_train = y_train
X_test, y_test = data_obj.load_data(mode='test')
X_test = X_test.astype('float32')
num_inputs = 784
num_outputs = 10
num_examples = X_train.shape[0]

In [5]:
print(X_train.shape)
print(y_train.shape)

(60000, 784)
(60000,)


In [6]:
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Dense(512, activation="relu"))
    net.add(gluon.nn.Dense(128, activation="relu"))
    net.add(gluon.nn.Dense(64, activation="relu"))
    net.add(gluon.nn.Dense(32, activation="relu"))
    net.add(gluon.nn.Dense(16, activation="relu"))
    net.add(gluon.nn.Dense(10))

In [7]:
net.collect_params()

sequential0_ (
  Parameter sequential0_dense0_weight (shape=(512, 0), dtype=float32)
  Parameter sequential0_dense0_bias (shape=(512,), dtype=float32)
  Parameter sequential0_dense1_weight (shape=(128, 0), dtype=float32)
  Parameter sequential0_dense1_bias (shape=(128,), dtype=float32)
  Parameter sequential0_dense2_weight (shape=(64, 0), dtype=float32)
  Parameter sequential0_dense2_bias (shape=(64,), dtype=float32)
  Parameter sequential0_dense3_weight (shape=(32, 0), dtype=float32)
  Parameter sequential0_dense3_bias (shape=(32,), dtype=float32)
  Parameter sequential0_dense4_weight (shape=(16, 0), dtype=float32)
  Parameter sequential0_dense4_bias (shape=(16,), dtype=float32)
  Parameter sequential0_dense5_weight (shape=(10, 0), dtype=float32)
  Parameter sequential0_dense5_bias (shape=(10,), dtype=float32)
)

In [8]:
net.collect_params().initialize(mx.initializer.Normal(), ctx=model_ctx)


In [9]:
type(net.collect_params())

mxnet.gluon.parameter.ParameterDict

In [10]:
square_loss = gluon.loss.SoftmaxCrossEntropyLoss()

In [21]:
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.0001})


In [22]:
batch_size = 128
train_data = gluon.data.DataLoader(
    gluon.data.dataset.ArrayDataset(X_train, y_train), batch_size=batch_size, shuffle=True)

test_data = gluon.data.DataLoader(
    gluon.data.dataset.ArrayDataset(X_test, y_test), batch_size=batch_size, shuffle=True)

In [23]:
epochs = 100
num_batches = num_examples / batch_size
print(num_batches)

468.75


In [24]:
def train_loop(epochs):
    test_accuracy1 = []
    train_accuracy1 = []
    for e in range(epochs):
        cumulative_loss = 0
        for i, (data, label) in enumerate(train_data):
            data = data.as_in_context(model_ctx)
            label = label.as_in_context(model_ctx)
            with autograd.record():
                output = net(data)
                loss = square_loss(output, label)
            loss.backward()
            trainer.step(batch_size)
            cumulative_loss += nd.mean(loss).asscalar()
            
        test_accuracy = evaluate_accuracy(test_data, net)
        train_accuracy = evaluate_accuracy(train_data, net)
        
        test_accuracy1.append(evaluate_accuracy(test_data, net))
        train_accuracy1.append(evaluate_accuracy(train_data, net))
        
        print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, cumulative_loss/num_examples, train_accuracy, test_accuracy))
        #print("Epoch %s, loss: %.4f" % (e, cumulative_loss / num_examples))
        
    

In [25]:
def evaluate_accuracy(data_iterator, net):
    acc = mx.metric.Accuracy()
    for i, (data, label) in enumerate(data_iterator):
        data = data.as_in_context(model_ctx).reshape((-1, 784))
        label = label.as_in_context(model_ctx)
        output = net(data)
        predictions = nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]

In [26]:
train_loop(epochs)

Epoch 0. Loss: 0.017997774839401244, Train_acc 0.23345, Test_acc 0.2289
Epoch 1. Loss: 0.017997772034009298, Train_acc 0.23361666666666667, Test_acc 0.2294
Epoch 2. Loss: 0.01799776908159256, Train_acc 0.23366666666666666, Test_acc 0.2294
Epoch 3. Loss: 0.017997766371568046, Train_acc 0.2338, Test_acc 0.2292
Epoch 4. Loss: 0.017997763272126516, Train_acc 0.23396666666666666, Test_acc 0.2293
Epoch 5. Loss: 0.017997760283946992, Train_acc 0.234, Test_acc 0.2291
Epoch 6. Loss: 0.01799775728782018, Train_acc 0.23425, Test_acc 0.2289
Epoch 7. Loss: 0.01799775442679723, Train_acc 0.2346, Test_acc 0.2289
Epoch 8. Loss: 0.0179977512439092, Train_acc 0.23495, Test_acc 0.2295
Epoch 9. Loss: 0.01799774824778239, Train_acc 0.23521666666666666, Test_acc 0.2289
Epoch 10. Loss: 0.017997745231787363, Train_acc 0.23573333333333332, Test_acc 0.2297
Epoch 11. Loss: 0.01799774218002955, Train_acc 0.23616666666666666, Test_acc 0.2301
Epoch 12. Loss: 0.017997739112377165, Train_acc 0.2364, Test_acc 0.2306
E

KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(20, 9))
train_plot = fig.add_subplot(1, 2, 1)
test_plot = fig.add_subplot(1, 2, 2)


y_ticks = [e + 1 for e in range(epochs)]
x_ticks = np.arange(0.75, 1.0, 0.05)
def create_plot(line_plot, data, kind):
    line_plot.plot(data, y_ticks, c=color, label=key)
    line_plot.legend(loc='upper left')
    line_plot.set_title("Model's {} prediction accuracy trends".format(kind))
    line_plot.set_xlabel('Accuracy')
    line_plot.set_xticks(x_ticks)
    line_plot.set_ylabel('Epochs')
    line_plot.set_yticks(y_ticks)

create_plot(train_accuracy1, train_accuracy, 'train')
create_plot(test_accuracy1, test_accuracy, 'test')

final_accuracy[key] = (train_accuracy, test_accuracy)
plt.show()

In [None]:
file_name = "net.params_1"
net.save_parameters(file_name)