In [1]:
%load_ext autoreload
%autoreload 2

In [3]:
import gzip
import pickle
import os

import numpy as np
import pandas as pd
from sklearn.datasets import fetch_mldata
from sklearn.preprocessing import MinMaxScaler

from neural_network.training_batch_generator import MiniBatchGenerator
from neural_network.optimization_algorithm import GradientDescent
from neural_network.activation_function import SigmoidActivationFunction
from neural_network.loss_function import MeanSquaredError, CrossEntropyLoss
from neural_network.network import VanillaNeuralNetwork
from neural_network.data_object import HoldoutData

# Load MNIST data

In [4]:
mnist = fetch_mldata('MNIST original', data_home='.')
mnist.target = np.array(pd.get_dummies(mnist.target))

# Separate and scale train and holdout data

In [5]:
holdout_set_mask = np.array([i % 7 == 0 for i in range(len(mnist.data))])
np.random.shuffle(holdout_set_mask)

X = mnist.data[~holdout_set_mask].astype(float)
y = mnist.target[~holdout_set_mask].astype(float)
X_holdout = mnist.data[holdout_set_mask].astype(float)
y_holdout = mnist.target[holdout_set_mask].astype(float)

In [6]:
X = MinMaxScaler().fit_transform(X)
X_holdout = MinMaxScaler().fit_transform(X_holdout)

# Initialize hyper-parameters

In [7]:
HIDDEN_LAYER_SIZE = 50
LEARNING_RATE = 3.
N_EPOCHS = 10
TRAINING_BATCH_SIZE = 10
RANDOM_STATE = 123

In [8]:
LAYER_SIZES = [X.shape[1], HIDDEN_LAYER_SIZE, HIDDEN_LAYER_SIZE, y.shape[1]]

# Train network

### with mean squared error

In [214]:
vanilla_neural_net = VanillaNeuralNetwork(
    layer_sizes=LAYER_SIZES,
    training_batch_generator_class=MiniBatchGenerator,
    loss_function_class=MeanSquaredError,
    activation_function_class=SigmoidActivationFunction,
    optimization_algorithm_class=GradientDescent,
    learning_rate=LEARNING_RATE,
    n_epochs=N_EPOCHS,
    training_batch_size=TRAINING_BATCH_SIZE,
    random_state=RANDOM_STATE,
    holdout_data=HoldoutData(X=X_holdout, y=y_holdout)
)

In [215]:
vanilla_neural_net.fit(X, y)

Epoch: 0 | Accuracy: 0.91
Epoch: 1 | Accuracy: 0.9279
Epoch: 2 | Accuracy: 0.9405
Epoch: 3 | Accuracy: 0.9474
Epoch: 4 | Accuracy: 0.9434
Epoch: 5 | Accuracy: 0.9499
Epoch: 6 | Accuracy: 0.9514
Epoch: 7 | Accuracy: 0.9508
Epoch: 8 | Accuracy: 0.9488
Epoch: 9 | Accuracy: 0.9532


### with cross entropy loss

In [216]:
LEARNING_RATE = .5

In [217]:
vanilla_neural_net = VanillaNeuralNetwork(
    layer_sizes=LAYER_SIZES,
    training_batch_generator_class=MiniBatchGenerator,
    loss_function_class=CrossEntropyLoss,
    activation_function_class=SigmoidActivationFunction,
    optimization_algorithm_class=GradientDescent,
    learning_rate=LEARNING_RATE,
    n_epochs=N_EPOCHS,
    training_batch_size=TRAINING_BATCH_SIZE,
    random_state=RANDOM_STATE,
    holdout_data=HoldoutData(X=X_holdout, y=y_holdout)
)

In [218]:
vanilla_neural_net.fit(X, y)

Epoch: 0 | Accuracy: 0.9185
Epoch: 1 | Accuracy: 0.9373
Epoch: 2 | Accuracy: 0.9387
Epoch: 3 | Accuracy: 0.9439
Epoch: 4 | Accuracy: 0.9519
Epoch: 5 | Accuracy: 0.9507
Epoch: 6 | Accuracy: 0.9559
Epoch: 7 | Accuracy: 0.9559
Epoch: 8 | Accuracy: 0.9567
Epoch: 9 | Accuracy: 0.9562
