### Import libraries

In [1]:
import numpy as np
from utils.activations import Linear, Sigmoid, Tanh, ReLU
from utils.layers import Dense
from utils.losses import MeanSquaredError, Softmax
from utils.optimizers import Optimizer, SGD, SGDMomentum
from utils.network import NeuralNetwork
from utils.train import Trainer
from utils.utility_functions import softmax

### Data import and preprocessing

In [2]:
# Import MNIST dataset - Credit: https://github.com/hsjeong5

from urllib import request
import gzip
import pickle

filename = [
["training_images","train-images-idx3-ubyte.gz"],
["test_images","t10k-images-idx3-ubyte.gz"],
["training_labels","train-labels-idx1-ubyte.gz"],
["test_labels","t10k-labels-idx1-ubyte.gz"]
]


def download_mnist():
  base_url = "http://yann.lecun.com/exdb/mnist/"
  for name in filename:
    print("Downloading "+name[1]+"...")
    request.urlretrieve(base_url+name[1], name[1])
  print("Download complete.")


def save_mnist():
  mnist = {}
  for name in filename[:2]:
    with gzip.open(name[1], 'rb') as f:
      mnist[name[0]] = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1,28*28)
  for name in filename[-2:]:
    with gzip.open(name[1], 'rb') as f:
      mnist[name[0]] = np.frombuffer(f.read(), np.uint8, offset=8)
  with open("mnist.pkl", 'wb') as f:
    pickle.dump(mnist,f)
  print("Save complete.")


def init():
  download_mnist()
  save_mnist()


def load():
  with open("mnist.pkl",'rb') as f:
    mnist = pickle.load(f)
  return mnist["training_images"], mnist["training_labels"], mnist["test_images"], mnist["test_labels"]

if __name__ == '__main__':
  init()

Downloading train-images-idx3-ubyte.gz...
Downloading t10k-images-idx3-ubyte.gz...
Downloading train-labels-idx1-ubyte.gz...
Downloading t10k-labels-idx1-ubyte.gz...
Download complete.
Save complete.


In [3]:
X_train, y_train, X_test, y_test = load()
num_labels = len(y_train)
num_labels

60000

In [4]:
# One-hot encoding labels (both train and test)
num_labels = len(y_train)
train_labels = np.zeros((num_labels, 10))
for i in range(num_labels):
  train_labels[i][y_train[i]] = 1

num_labels = len(y_test)
test_labels = np.zeros((num_labels, 10))
for i in range(num_labels):
  test_labels[i][y_test[i]] = 1

In [5]:
# Normalize data (mean 0, variance 1)
X_train, X_test = X_train - np.mean(X_train), X_test - np.mean(X_train)
X_train, X_test = X_train / np.std(X_train), X_test / np.std(X_train)

In [6]:
# Function for accuracy
def calc_accuracy_model(model, test_set):
        return print(f'''The model validation accuracy is: {np.equal(np.argmax(model.forward(test_set, inference=True), axis=1), y_test).sum() * 100.0 / test_set.shape[0]:.2f}%''')

### Model training with different settings

In [7]:
model = NeuralNetwork(
    layers = [Dense(neurons=89, activation=Tanh()),
              Dense(neurons=10, activation=Sigmoid())],
    loss = MeanSquaredError(normalize=True),
    seed = 42
)

trainer = Trainer(model, SGD(learning_rate = 0.1))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 50, eval_every = 10, seed = 42, batch_size = 60)
print()
calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.936.
Validation loss after 20 epochs is 0.932.
Validation loss after 30 epochs is 0.931.

Loss increased after epoch 40, the final loss was 0.931, using the model from epoch 30

The model validation accuracy is: 44.10%


In [8]:
model = NeuralNetwork(
    layers = [Dense(neurons=89, activation=Sigmoid()),
              Dense(neurons=10, activation=Linear())],
    loss = Softmax(),
    seed = 42
)

trainer = Trainer(model, SGD(learning_rate = 0.1))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 50, eval_every = 10, seed = 42, batch_size = 60)
print()
calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.603.
Validation loss after 20 epochs is 0.523.
Validation loss after 30 epochs is 0.491.
Validation loss after 40 epochs is 0.472.
Validation loss after 50 epochs is 0.465.

The model validation accuracy is: 92.11%


Softmax loss function works way better than MSE! Now we try to use ReLU or Tanh activations with Softmax:

In [9]:
model = NeuralNetwork(
    layers = [Dense(neurons=89, activation=ReLU()),
              Dense(neurons=10, activation=Sigmoid())],
    loss = Softmax(),
    seed = 42
)

trainer = Trainer(model, SGD(learning_rate = 0.1))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 50, eval_every = 10, seed = 42, batch_size = 60)
print()
calc_accuracy_model(model, X_test)

  return 1.0 / (1.0 + np.exp(-1.0 * self.input_))


Validation loss after 10 epochs is 2.462.
Validation loss after 20 epochs is 2.426.
Validation loss after 30 epochs is 2.406.
Validation loss after 40 epochs is 2.396.
Validation loss after 50 epochs is 2.392.

The model validation accuracy is: 86.81%


In [10]:
model = NeuralNetwork(
    layers = [Dense(neurons=89, activation=Tanh()),
              Dense(neurons=10, activation=Linear())],
    loss = Softmax(),
    seed = 42
)

trainer = Trainer(model, SGD(learning_rate = 0.1))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 50, eval_every = 10, seed = 42, batch_size = 60)
print()
calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.627.
Validation loss after 20 epochs is 0.564.
Validation loss after 30 epochs is 0.551.
Validation loss after 40 epochs is 0.543.

Loss increased after epoch 50, the final loss was 0.543, using the model from epoch 40

The model validation accuracy is: 90.99%


So far, using Softmax as loss, Sigmoid and Tanh are the best activations.

### SGD with Momentum

In [11]:
model = NeuralNetwork(
    layers = [Dense(neurons=89, activation=Sigmoid()),
              Dense(neurons=10, activation=Linear())],
    loss = Softmax(),
    seed = 42
)

trainer = Trainer(model, SGDMomentum(learning_rate = 0.1, momentum = 0.9))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 50, eval_every = 10, seed = 42, batch_size = 60)
print()
calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.362.

Loss increased after epoch 20, the final loss was 0.362, using the model from epoch 10

The model validation accuracy is: 94.39%


As we can see, adding momentum in the parameters update rule make the accuracy better.

### Weight Decay

In [12]:
model = NeuralNetwork(
    layers = [Dense(neurons=89, activation=Sigmoid()),
              Dense(neurons=10, activation=Linear())],
    loss = Softmax(),
    seed = 42
)

trainer = Trainer(model, SGDMomentum(learning_rate = 0.1, momentum = 0.9, final_lr = 0.05, decay_type = 'linear'))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 50, eval_every = 10, seed = 42, batch_size = 60)
print()
calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.364.

Loss increased after epoch 20, the final loss was 0.364, using the model from epoch 10

The model validation accuracy is: 94.35%


In [13]:
model = NeuralNetwork(
    layers = [Dense(neurons=89, activation=Sigmoid()),
              Dense(neurons=10, activation=Linear())],
    loss = Softmax(),
    seed = 42
)

trainer = Trainer(model, SGDMomentum(learning_rate = 0.1, momentum = 0.9, final_lr = 0.05, decay_type = 'exponential'))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 50, eval_every = 10, seed = 42, batch_size = 60)
print()
calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.363.

Loss increased after epoch 20, the final loss was 0.363, using the model from epoch 10

The model validation accuracy is: 94.24%


It seems that in our case weight decay does not improve the model accuracy (but in general it is a good practice).

### Weight initialization

In [14]:
model = NeuralNetwork(
    layers = [Dense(neurons=89, activation=Sigmoid(), weight_init='glorot'),
              Dense(neurons=10, activation=Linear(), weight_init='glorot')],
    loss = Softmax(),
    seed = 42
)

trainer = Trainer(model, SGDMomentum(learning_rate = 0.1, momentum = 0.9, final_lr = 0.05, decay_type = 'linear'))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 50, eval_every = 10, seed = 42, batch_size = 60)
print()
calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.144.
Validation loss after 20 epochs is 0.137.

Loss increased after epoch 30, the final loss was 0.137, using the model from epoch 20

The model validation accuracy is: 97.75%


Nice! Accuracy has increased!

### Dropout

In [15]:
model = NeuralNetwork(
    layers = [Dense(neurons=89, activation=Sigmoid(), weight_init='glorot', dropout=0.8),
              Dense(neurons=10, activation=Linear(), weight_init='glorot')],
    loss = Softmax(),
    seed = 42
)

trainer = Trainer(model, SGDMomentum(learning_rate = 0.1, momentum = 0.9, final_lr = 0.05, decay_type = 'linear'))
trainer.fit(X_train, train_labels, X_test, test_labels,
            epochs = 50, eval_every = 10, seed = 42, batch_size = 60)
print()
calc_accuracy_model(model, X_test)

Validation loss after 10 epochs is 0.147.
Validation loss after 20 epochs is 0.143.

Loss increased after epoch 30, the final loss was 0.143, using the model from epoch 20

The model validation accuracy is: 97.69%


Not too useful here. Let's test with another model, first without dropout and then with dropout:

In [17]:
model2 = NeuralNetwork(
    layers = [Dense(neurons=178, activation=Sigmoid(), weight_init='glorot'),
              Dense(neurons=46, activation=Sigmoid(), weight_init='glorot'),
              Dense(neurons=10, activation=Linear(), weight_init='glorot')],
    loss = Softmax(),
    seed = 42
)

trainer2 = Trainer(model2, SGDMomentum(learning_rate = 0.2, momentum = 0.9, final_lr = 0.05, decay_type = 'exponential'))
trainer2.fit(X_train, train_labels, X_test, test_labels,
            epochs = 100, eval_every = 10, seed = 42, batch_size = 60)
print()
calc_accuracy_model(model2, X_test)

Validation loss after 10 epochs is 0.164.
Validation loss after 20 epochs is 0.160.

Loss increased after epoch 30, the final loss was 0.160, using the model from epoch 20

The model validation accuracy is: 97.95%


In [18]:
model2 = NeuralNetwork(
    layers = [Dense(neurons=178, activation=Sigmoid(), weight_init='glorot', dropout=0.8),
              Dense(neurons=46, activation=Sigmoid(), weight_init='glorot', dropout=0.8),
              Dense(neurons=10, activation=Linear(), weight_init='glorot')],
    loss = Softmax(),
    seed = 42
)

trainer2 = Trainer(model2, SGDMomentum(learning_rate = 0.2, momentum = 0.9, final_lr = 0.05, decay_type = 'exponential'))
trainer2.fit(X_train, train_labels, X_test, test_labels,
            epochs = 100, eval_every = 10, seed = 42, batch_size = 60)
print()
calc_accuracy_model(model2, X_test)

Validation loss after 10 epochs is 0.154.
Validation loss after 20 epochs is 0.140.

Loss increased after epoch 30, the final loss was 0.140, using the model from epoch 20

The model validation accuracy is: 97.89%
