In [1]:
%matplotlib inline

import os
import numpy as np
import data_loader
import module
from data_loader import DataLoader

import matplotlib.pyplot as plt

import mxnet as mx
from mxnet import nd, autograd, gluon

ctx = mx.cpu()
data_ctx = ctx
model_ctx = ctx

  from ._conv import register_converters as _register_converters


# Custom Dataset class

In [2]:
dl = DataLoader()

class CustomDataset:
    
    def __init__(self, mode, dataset = 'all'):
        self.x, self.y = dl.load_data(mode, dataset)
    
    def __getitem__(self, i):
        return self.x[i], self.y[i]
    
    def __len__(self):
        return len(self.y)        

In [3]:
batch_size = 64

train_data = mx.gluon.data.DataLoader(CustomDataset('train', 'train'), batch_size, shuffle=True)
test_data = mx.gluon.data.DataLoader(CustomDataset('train', 'validation'), batch_size, shuffle=False)

epochs = 10
num_examples = len(train_data)

In [4]:
def evaluate_accuracy(data_iterator, net):
    acc = mx.metric.Accuracy()
    for i, (data, label) in enumerate(data_iterator):
        data = data.as_in_context(model_ctx).reshape((-1, 784))
        label = label.as_in_context(model_ctx)
        data = mx.ndarray.cast(data, dtype='float32')
        output = net(data)
        predictions = nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]

## Vanilla Network

In [5]:
layer = [1024, 512, 256]
lout = 10
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Dense(layer[0], activation="relu"))
    net.add(gluon.nn.Dense(layer[1], activation="relu"))
    net.add(gluon.nn.Dense(layer[2], activation="relu"))
    net.add(gluon.nn.Dense(lout))

In [None]:
net.collect_params().initialize(mx.init.Uniform(.1), ctx=model_ctx, force_reinit=True)
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'adam')

In [None]:
loss_arr = []
for e in range(epochs):
    cumulative_loss = 0
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(model_ctx).reshape((-1, 784))
        label = label.as_in_context(model_ctx)
        
        with autograd.record():
            data = mx.ndarray.cast(data, dtype='float32')
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])
        cumulative_loss += nd.sum(loss).asscalar()
    
    loss_arr.append(cumulative_loss/num_examples)

    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, cumulative_loss/num_examples, train_accuracy, test_accuracy))    

loss_vanilla = loss_arr.copy()

filename = os.path.join('weights','b_vanilla.params')
net.save_parameters(filename)

# Initialization

## Normal Initialization

In [None]:
net.collect_params().initialize(mx.init.Normal(sigma=.05), ctx=model_ctx, force_reinit=True)
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'adam')

In [None]:
loss_arr = []
for e in range(epochs):
    cumulative_loss = 0
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(model_ctx).reshape((-1, 784))
        label = label.as_in_context(model_ctx)
        
        with autograd.record():
            data = mx.ndarray.cast(data, dtype='float32')
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])
        cumulative_loss += nd.sum(loss).asscalar()
    
    loss_arr.append(cumulative_loss/num_examples)

    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, cumulative_loss/num_examples, train_accuracy, test_accuracy))    

loss_norm_init = loss_arr.copy()

filename = os.path.join('weights','b_normal.params')
net.save_parameters(filename)

## Xavier Initialization

In [None]:
net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=model_ctx, force_reinit=True)
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'adam')

In [None]:
loss_arr = []
for e in range(epochs):
    cumulative_loss = 0
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(model_ctx).reshape((-1, 784))
        label = label.as_in_context(model_ctx)
        
        with autograd.record():
            data = mx.ndarray.cast(data, dtype='float32')
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])
        cumulative_loss += nd.sum(loss).asscalar()
    
    loss_arr.append(cumulative_loss/num_examples)

    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, cumulative_loss/num_examples, train_accuracy, test_accuracy))    
    
loss_xavier_init = loss_arr.copy()

filename = os.path.join('weights','b_xavier.params')
net.save_parameters(filename)

## Orthogonal Initialization

In [None]:
net.collect_params().initialize(mx.init.Orthogonal(scale=1.414, rand_type='uniform'), ctx=model_ctx, force_reinit=True)
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'adam')

In [None]:
loss_arr = []
for e in range(epochs):
    cumulative_loss = 0
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(model_ctx).reshape((-1, 784))
        label = label.as_in_context(model_ctx)
        
        with autograd.record():
            data = mx.ndarray.cast(data, dtype='float32')
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])
        cumulative_loss += nd.sum(loss).asscalar()
    
    loss_arr.append(cumulative_loss/num_examples)

    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, cumulative_loss/num_examples, train_accuracy, test_accuracy))   
    
loss_ortho_init = loss_arr.copy()

filename = os.path.join('weights','b_ortho.params')
net.save_parameters(filename)

# Normalization

## Batch Normalization

In [None]:
layer = [1024, 512, 256]
lout = 10
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.BatchNorm())
    net.add(gluon.nn.Dense(layer[0], activation="relu"))
    net.add(gluon.nn.Dense(layer[1], activation="relu"))
    net.add(gluon.nn.Dense(layer[2], activation="relu"))
    net.add(gluon.nn.Dense(lout))

In [None]:
net.collect_params().initialize(mx.init.Normal(sigma=.05), ctx=model_ctx, force_reinit=True)
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'adam')

In [None]:
loss_arr = []
for e in range(epochs):
    cumulative_loss = 0
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(model_ctx).reshape((-1, 784))
        label = label.as_in_context(model_ctx)
        
        with autograd.record():
            data = mx.ndarray.cast(data, dtype='float32')
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])
        cumulative_loss += nd.sum(loss).asscalar()
    
    loss_arr.append(cumulative_loss/num_examples)

    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, cumulative_loss/num_examples, train_accuracy, test_accuracy))    

loss_batch_norm = loss_arr.copy()

filename = os.path.join('weights','b_batch.params')
net.save_parameters(filename)

# Dropout

## Dropout = 0.1

In [None]:
layer = [1024, 512, 256]
lout = 10
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Dense(layer[0], activation="relu"))
    net.add(gluon.nn.Dropout(.1))
    net.add(gluon.nn.Dense(layer[1], activation="relu"))
    net.add(gluon.nn.Dropout(.1))
    net.add(gluon.nn.Dense(layer[2], activation="relu"))
    net.add(gluon.nn.Dropout(.1))
    net.add(gluon.nn.Dense(lout))

In [None]:
net.collect_params().initialize(mx.init.Normal(sigma=.05), ctx=model_ctx, force_reinit=True)
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'adam')

In [None]:
loss_arr = []
for e in range(epochs):
    cumulative_loss = 0
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(model_ctx).reshape((-1, 784))
        label = label.as_in_context(model_ctx)
        
        with autograd.record():
            data = mx.ndarray.cast(data, dtype='float32')
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])
        cumulative_loss += nd.sum(loss).asscalar()
    
    loss_arr.append(cumulative_loss/num_examples)

    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, cumulative_loss/num_examples, train_accuracy, test_accuracy))    

loss_dropout1 = loss_arr.copy()

filename = os.path.join('weights','b_dropout1.params')
net.save_parameters(filename)

## Dropout = 0.4

In [None]:
layer = [1024, 512, 256]
lout = 10
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Dense(layer[0], activation="relu"))
    net.add(gluon.nn.Dropout(.4))
    net.add(gluon.nn.Dense(layer[1], activation="relu"))
    net.add(gluon.nn.Dropout(.4))
    net.add(gluon.nn.Dense(layer[2], activation="relu"))
    net.add(gluon.nn.Dropout(.4))
    net.add(gluon.nn.Dense(lout))

In [None]:
net.collect_params().initialize(mx.init.Normal(sigma=.05), ctx=model_ctx, force_reinit=True)
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'adam')

In [None]:
loss_arr = []
for e in range(epochs):
    cumulative_loss = 0
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(model_ctx).reshape((-1, 784))
        label = label.as_in_context(model_ctx)
        
        with autograd.record():
            data = mx.ndarray.cast(data, dtype='float32')
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])
        cumulative_loss += nd.sum(loss).asscalar()
    
    loss_arr.append(cumulative_loss/num_examples)

    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, cumulative_loss/num_examples, train_accuracy, test_accuracy))    

loss_dropout4 = loss_arr.copy()

filename = os.path.join('weights','b_dropout4.params')
net.save_parameters(filename)

## Dropout = 0.6

In [None]:
layer = [1024, 512, 256]
lout = 10
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Dense(layer[0], activation="relu"))
    net.add(gluon.nn.Dropout(.6))
    net.add(gluon.nn.Dense(layer[1], activation="relu"))
    net.add(gluon.nn.Dropout(.6))
    net.add(gluon.nn.Dense(layer[2], activation="relu"))
    net.add(gluon.nn.Dropout(.6))
    net.add(gluon.nn.Dense(lout))

In [None]:
net.collect_params().initialize(mx.init.Normal(sigma=.05), ctx=model_ctx, force_reinit=True)
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'adam')

In [None]:
loss_arr = []
for e in range(epochs):
    cumulative_loss = 0
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(model_ctx).reshape((-1, 784))
        label = label.as_in_context(model_ctx)
        
        with autograd.record():
            data = mx.ndarray.cast(data, dtype='float32')
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])
        cumulative_loss += nd.sum(loss).asscalar()
    
    loss_arr.append(cumulative_loss/num_examples)

    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, cumulative_loss/num_examples, train_accuracy, test_accuracy))    

loss_dropout6 = loss_arr.copy()

filename = os.path.join('weights','b_dropout6.params')
net.save_parameters(filename)

# Optimizers

In [None]:
layer = [1024, 512, 256]
lout = 10
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Dense(layer[0], activation="relu"))
    net.add(gluon.nn.Dense(layer[1], activation="relu"))
    net.add(gluon.nn.Dense(layer[2], activation="relu"))
    net.add(gluon.nn.Dense(lout))

## Stochastic Gradient Descent

In [None]:
net.collect_params().initialize(mx.init.Normal(sigma=.05), ctx=model_ctx, force_reinit=True)
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

In [None]:
def sgd(params, lr, batch_size):
    for param in params:
        param[:] = param - lr * param.grad / batch_size

In [None]:
loss_arr = []
lr = .001
for e in range(epochs):
    cumulative_loss = 0
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(model_ctx).reshape((-1, 784))
        label = label.as_in_context(model_ctx)
        
        with autograd.record():
            data = mx.ndarray.cast(data, dtype='float32')
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        w = []
        for i in range(3):
            w.append(net[i].weight.data())
            w.append(net[i].bias.data())
        sgd(w, lr, data.shape[0])
        cumulative_loss += nd.sum(loss).asscalar()
    
    loss_arr.append(cumulative_loss/num_examples)

    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, cumulative_loss/num_examples, train_accuracy, test_accuracy))    
    
loss_sgd = loss_arr.copy()

filename = os.path.join('weights','b_sgd.params')
net.save_parameters(filename)

## Nesterov’s accelerated momentum

In [6]:
net.collect_params().initialize(mx.init.Normal(sigma=.05), ctx=model_ctx, force_reinit=True)
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(),'nag', {'momentum':.1, 'learning_rate':.001})

In [7]:
loss_arr = []
for e in range(epochs):
    cumulative_loss = 0
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(model_ctx).reshape((-1, 784))
        label = label.as_in_context(model_ctx)
        
        with autograd.record():
            data = mx.ndarray.cast(data, dtype='float32')
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        w = []
        trainer.step(data.shape[0])
        cumulative_loss += nd.sum(loss).asscalar()
   
    loss_arr.append(cumulative_loss/num_examples)

    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print(cumulative_loss)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, cumulative_loss/num_examples, train_accuracy, test_accuracy))    
    
loss_nest_opt = loss_arr.copy()

filename = os.path.join('weights','b_nag.params')
net.save_parameters(filename)

152843.2119860649
Epoch 0. Loss: 232.63806999401052, Train_acc 0.8033809523809524, Test_acc 0.7902222222222223
21554.34310722351
Epoch 1. Loss: 32.80721934128388, Train_acc 0.8183095238095238, Test_acc 0.8040555555555555
18685.60315513611
Epoch 2. Loss: 28.440796278745978, Train_acc 0.7682380952380953, Test_acc 0.7493333333333333
17098.82873606682
Epoch 3. Loss: 26.02561451456137, Train_acc 0.8574285714285714, Test_acc 0.8347222222222223
15851.296346902847
Epoch 4. Loss: 24.126782871998245, Train_acc 0.8712619047619048, Test_acc 0.8421111111111111
14877.305012226105
Epoch 5. Loss: 22.644299866401987, Train_acc 0.8432142857142857, Test_acc 0.8123333333333334
14071.034298181534
Epoch 6. Loss: 21.41709938840416, Train_acc 0.8558571428571429, Test_acc 0.8219444444444445
13417.7857131958
Epoch 7. Loss: 20.422809304712025, Train_acc 0.8868809523809524, Test_acc 0.8516111111111111
12805.385511398315
Epoch 8. Loss: 19.49069332024097, Train_acc 0.898, Test_acc 0.8581666666666666
12321.379415750

## AdaDelta

In [None]:
net.collect_params().initialize(mx.init.Normal(sigma=.05), ctx=model_ctx, force_reinit=True)
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(),'adadelta')

In [None]:
loss_arr = []
for e in range(epochs):
    cumulative_loss = 0
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(model_ctx).reshape((-1, 784))
        label = label.as_in_context(model_ctx)
        
        with autograd.record():
            data = mx.ndarray.cast(data, dtype='float32')
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])
        cumulative_loss += nd.sum(loss).asscalar()
    
    loss_arr.append(cumulative_loss/num_examples)

    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, cumulative_loss/num_examples, train_accuracy, test_accuracy))    

loss_adadelta_opt = loss_arr.copy()

filename = os.path.join('weights','b_adadelta.params')
net.save_parameters(filename)

## AdaGrad

In [None]:
net.collect_params().initialize(mx.init.Normal(sigma=.05), ctx=model_ctx, force_reinit=True)
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'adagrad')

In [None]:
loss_arr = []
for e in range(epochs):
    cumulative_loss = 0
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(model_ctx).reshape((-1, 784))
        label = label.as_in_context(model_ctx)
        
        with autograd.record():
            data = mx.ndarray.cast(data, dtype='float32')
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])
        cumulative_loss += nd.sum(loss).asscalar()
    
    loss_arr.append(cumulative_loss/num_examples)

    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, cumulative_loss/num_examples, train_accuracy, test_accuracy))   
    
loss_adagrad_opt = loss_arr.copy()

filename = os.path.join('weights','b_adagrad.params')
net.save_parameters(filename)

## RMSProp

In [None]:
net.collect_params().initialize(mx.init.Normal(sigma=.05), ctx=model_ctx, force_reinit=True)
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'rmsprop')

In [None]:
loss_arr = []
for e in range(epochs):
    cumulative_loss = 0
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(model_ctx).reshape((-1, 784))
        label = label.as_in_context(model_ctx)
        
        with autograd.record():
            data = mx.ndarray.cast(data, dtype='float32')
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])
        cumulative_loss += nd.sum(loss).asscalar()
    
    loss_arr.append(cumulative_loss/num_examples)

    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, cumulative_loss/num_examples, train_accuracy, test_accuracy))    

loss_rmsprop_opt = loss_arr.copy()

filename = os.path.join('weights','b_rmsprop.params')
net.save_parameters(filename)

## Adam

In [None]:
net.collect_params().initialize(mx.init.Normal(sigma=.05), ctx=model_ctx, force_reinit=True)
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'adam')

In [None]:
loss_arr = []
for e in range(epochs):
    cumulative_loss = 0
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(model_ctx).reshape((-1, 784))
        label = label.as_in_context(model_ctx)
        
        with autograd.record():
            data = mx.ndarray.cast(data, dtype='float32')
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])
        cumulative_loss += nd.sum(loss).asscalar()
    
    loss_arr.append(cumulative_loss/num_examples)

    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, cumulative_loss/num_examples, train_accuracy, test_accuracy))    
    
loss_adam_opt = loss_arr.copy()

filename = os.path.join('weights','b_adam.params')
net.save_parameters(filename)

In [None]:
# plt.figure(figsize=(15,15))
plt.plot(loss_vanilla, linewidth=2, label='Vanilla')
plt.plot(loss_norm_init, linewidth=2, label='Normal Initialization')
plt.plot(loss_xavier_init, linewidth=2, label='Xavier Initialization')
plt.plot(loss_ortho_init, linewidth=2, label='Orthogonal Initialization')
plt.legend(fontsize=15)
plt.title('Initialization Methods Comparison')
plt.xlabel('Epochs', fontsize=15)
plt.ylabel('Loss', fontsize=15)
plt.show()

In [None]:
# plt.figure(figsize=(15,15))
plt.plot(loss_vanilla, linewidth=2, label='Vanilla')
plt.plot(loss_batch_norm, linewidth=2, label='Batch Normalization')
plt.legend(fontsize=15)
plt.title('Normalization Methods Comparison')
plt.xlabel('Epochs', fontsize=15)
plt.ylabel('Loss', fontsize=15)
plt.show()

In [None]:
# plt.figure(figsize=(15,15))
plt.plot(loss_vanilla, linewidth=2, label='Vanilla')
plt.plot(loss_dropout1, linewidth=2, label='Dropout(0.1)')
plt.plot(loss_dropout4, linewidth=2, label='Dropout(0.4)')
plt.plot(loss_dropout6, linewidth=2, label='Dropout(0.6)')
plt.legend(fontsize=15)
plt.title('Dropout Comparison')
plt.xlabel('Epochs', fontsize=15)
plt.ylabel('Loss', fontsize=15)
plt.show()

In [None]:
# plt.figure(figsize=(15,15))
plt.plot(loss_vanilla, linewidth=2, label='Vanilla')
plt.plot(loss_sgd, linewidth=2, label='SGD Optimization')
plt.plot(loss_nest_opt, linewidth=2, label='Nesterov\'s Optimization')
plt.plot(loss_adadelta_opt, linewidth=2, label='AdaDelta Optimization')
# plt.plot(loss_adagrad_opt, linewidth=2, label='AdaGrad Optimization')
plt.plot(loss_rmsprop_opt, linewidth=2, label='RMSProp Optimization')
plt.plot(loss_adam_opt, linewidth=2, label='Adam Optimization')
plt.legend(fontsize=15)
plt.title('Optimization Methods Comparison')
plt.xlabel('Epochs', fontsize=15)
plt.ylabel('Loss', fontsize=15)
plt.show()

# TESTING

In [None]:
layer = [1024, 512, 256]
lout = 10
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Dense(layer[0], activation="relu"))
    net.add(gluon.nn.Dense(layer[1], activation="relu"))
    net.add(gluon.nn.Dense(layer[2], activation="relu"))
    net.add(gluon.nn.Dense(lout))
test_data = mx.gluon.data.DataLoader(CustomDataset('test', 'validation'), batch_size, last_batch='keep', shuffle=False)

In [None]:
filename = os.path.join('weights','b_vanilla.params')
if not (os.path.isfile(filename)):
    print('No data for Vanilla')
else:
    net.load_parameters(filename, ctx=ctx)
    print('Accuracy(Vanilla) = ' + str(100 * evaluate_accuracy(test_data, net)) + '%')

In [None]:
filename = os.path.join('weights','b_normal.params')
if not (os.path.isfile(filename)):
    print('No data for Normal Initialization')
else:
    net.load_parameters(filename, ctx=ctx)
    print('Accuracy(Normal Initialization) = ' + str(100 * evaluate_accuracy(test_data, net)) + '%')

In [None]:
filename = os.path.join('weights','b_xavier.params')
if not (os.path.isfile(filename)):
    print('No data for Xavier Initialization')
else:
    net.load_parameters(filename, ctx=ctx)
    print('Accuracy(Xavier Initialization) = ' + str(100 * evaluate_accuracy(test_data, net)) + '%')

In [None]:
filename = os.path.join('weights','b_ortho.params')
if not (os.path.isfile(filename)):
    print('No data for Orthogonal Initialization')
else:
    net.load_parameters(filename, ctx=ctx)
    print('Accuracy(Orthogonal Initialization) = ' + str(100 * evaluate_accuracy(test_data, net)) + '%')

In [None]:
layer = [1024, 512, 256]
lout = 10
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.BatchNorm())
    net.add(gluon.nn.Dense(layer[0], activation="relu"))
    net.add(gluon.nn.Dense(layer[1], activation="relu"))
    net.add(gluon.nn.Dense(layer[2], activation="relu"))
    net.add(gluon.nn.Dense(lout))

In [None]:
filename = os.path.join('weights','b_batch.params')
if not (os.path.isfile(filename)):
    print('No data for Batch Normalization')
else:
    net.load_parameters(filename, ctx=ctx, allow_missing=True, ignore_extra=True)
    print('Accuracy(Batch Normalization) = ' + str(100 * evaluate_accuracy(test_data, net)) + '%')

In [None]:
layer = [1024, 512, 256]
lout = 10
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Dense(layer[0], activation="relu"))
    net.add(gluon.nn.Dropout(.1))
    net.add(gluon.nn.Dense(layer[1], activation="relu"))
    net.add(gluon.nn.Dropout(.1))
    net.add(gluon.nn.Dense(layer[2], activation="relu"))
    net.add(gluon.nn.Dropout(.1))
    net.add(gluon.nn.Dense(lout))

In [None]:
filename = os.path.join('weights','b_dropout1.params')
if not (os.path.isfile(filename)):
    print('No data for Dropout(0.1)')
else:
    net.load_parameters(filename, ctx=ctx)
    print('Accuracy(Dropout(0.1)) = ' + str(100 * evaluate_accuracy(test_data, net)) + '%')

In [None]:
layer = [1024, 512, 256]
lout = 10
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Dense(layer[0], activation="relu"))
    net.add(gluon.nn.Dropout(.4))
    net.add(gluon.nn.Dense(layer[1], activation="relu"))
    net.add(gluon.nn.Dropout(.4))
    net.add(gluon.nn.Dense(layer[2], activation="relu"))
    net.add(gluon.nn.Dropout(.4))
    net.add(gluon.nn.Dense(lout))

In [None]:
filename = os.path.join('weights','b_dropout4.params')
if not (os.path.isfile(filename)):
    print('No data for Dropout(0.4)')
else:
    net.load_parameters(filename, ctx=ctx, allow_missing=True)
    print('Accuracy(Dropout(0.4)) = ' + str(100 * evaluate_accuracy(test_data, net)) + '%')

In [None]:
layer = [1024, 512, 256]
lout = 10
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Dense(layer[0], activation="relu"))
    net.add(gluon.nn.Dropout(.6))
    net.add(gluon.nn.Dense(layer[1], activation="relu"))
    net.add(gluon.nn.Dropout(.6))
    net.add(gluon.nn.Dense(layer[2], activation="relu"))
    net.add(gluon.nn.Dropout(.6))
    net.add(gluon.nn.Dense(lout))

In [None]:
filename = os.path.join('weights','b_dropout6.params')
if not (os.path.isfile(filename)):
    print('No data for Dropout(0.6)')
else:
    net.load_parameters(filename, ctx=ctx)
    print('Accuracy(Dropout(0.6)) = ' + str(100 * evaluate_accuracy(test_data, net)) + '%')

In [9]:
layer = [1024, 512, 256]
lout = 10
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Dense(layer[0], activation="relu"))
    net.add(gluon.nn.Dense(layer[1], activation="relu"))
    net.add(gluon.nn.Dense(layer[2], activation="relu"))
    net.add(gluon.nn.Dense(lout))

In [None]:
filename = os.path.join('weights','b_sgd.params')
if not (os.path.isfile(filename)):
    print('No data for SGD')
else:
    net.load_parameters(filename, ctx=ctx)
    print('Accuracy(SGD) = ' + str(100 * evaluate_accuracy(test_data, net)) + '%')

In [10]:
filename = os.path.join('weights','b_nag.params')
if not (os.path.isfile(filename)):
    print('No data for Nesterov\'s Accelerated Momentum')
else:
    net.load_parameters(filename, ctx=ctx)
    print('Accuracy(Nesterov\'s Accelerated Momentum) = ' + str(100 * evaluate_accuracy(test_data, net)) + '%')

Accuracy(Nesterov's Accelerated Momentum) = 85.13888888888889%


In [None]:
filename = os.path.join('weights','b_adadelta.params')
if not (os.path.isfile(filename)):
    print('No data for AdaDelta')
else:
    net.load_parameters(filename, ctx=ctx)
    print('Accuracy(AdaDelta) = ' + str(100 * evaluate_accuracy(test_data, net)) + '%')

In [None]:
filename = os.path.join('weights','b_adagrad.params')
if not (os.path.isfile(filename)):
    print('No data for AdaGrad')
else:
    net.load_parameters(filename, ctx=ctx)
    print('Accuracy(AdaGrad) = ' + str(100 * evaluate_accuracy(test_data, net)) + '%')

In [None]:
filename = os.path.join('weights','b_rmsprop.params')
if not (os.path.isfile(filename)):
    print('No data for RMSProp')
else:
    net.load_parameters(filename, ctx=ctx)
    print('Accuracy(RMSProp) = ' + str(100 * evaluate_accuracy(test_data, net)) + '%')

In [None]:
filename = os.path.join('weights','b_adam.params')
if not (os.path.isfile(filename)):
    print('No data for Adam')
else:
    net.load_parameters(filename, ctx=ctx)
    print('Accuracy(Adam) = ' + str(100 * evaluate_accuracy(test_data, net)) + '%')