In [2]:
%load_ext autoreload
%autoreload 2

In [51]:
import gzip
import pickle
import os

import numpy as np
import pandas as pd
from sklearn.datasets import fetch_mldata

from training_batch_generator import MiniBatchGenerator
from optimization_algorithm import GradientDescent
from activation_function import SigmoidActivationFunction
from loss_function import MeanSquaredError
from neural_network.vanilla_neural_network import VanillaNeuralNetwork
from data_object import HoldoutData

# Load MNIST data

In [52]:
path = 'mldata/mnist.pkl.gz'
f = gzip.open(path, 'rb')
training_data, validation_data, test_data = pickle.load(f, encoding='latin1')
f.close()

In [53]:
X, y = training_data
y = pd.get_dummies(y).values

X_holdout, y_holdout = test_data
y_holdout = pd.get_dummies(y_holdout).values

# Initialize hyper-parameters

In [54]:
HIDDEN_LAYER_SIZE = 30
LEARNING_RATE = 3.
N_EPOCHS = 10
TRAINING_BATCH_SIZE = 10
RANDOM_STATE = 123

In [55]:
LAYER_SIZES = [X.shape[1], HIDDEN_LAYER_SIZE, y.shape[1]]

# Train network

### with mean squared error

In [56]:
vanilla_neural_net = VanillaNeuralNet(
    layer_sizes=LAYER_SIZES,
    training_batch_generator_class=MiniBatchGenerator,
    loss_function_class=MeanSquaredError,
    activation_function_class=SigmoidActivationFunction,
    optimization_algorithm_class=GradientDescent,
    learning_rate=LEARNING_RATE,
    n_epochs=N_EPOCHS,
    training_batch_size=TRAINING_BATCH_SIZE,
    random_state=RANDOM_STATE,
    holdout_data=HoldoutData(X=X_holdout, y=y_holdout)
)

In [57]:
vanilla_neural_net.fit(X, y)

Epoch: 0 | Accuracy: 0.9049
Epoch: 1 | Accuracy: 0.9211
Epoch: 2 | Accuracy: 0.9268
Epoch: 3 | Accuracy: 0.9367
Epoch: 4 | Accuracy: 0.9356
Epoch: 5 | Accuracy: 0.9348
Epoch: 6 | Accuracy: 0.9399
Epoch: 7 | Accuracy: 0.9421
Epoch: 8 | Accuracy: 0.9402
Epoch: 9 | Accuracy: 0.941
