In [1]:
cd ..

/Users/willwolf/Documents/neural-nets/vanilla-neural-nets


In [2]:
%load_ext autoreload
%autoreload 2

In [23]:
import gzip
import pickle
import os

import numpy as np
import pandas as pd
from sklearn.datasets import fetch_mldata
from sklearn.preprocessing import MinMaxScaler

from neural_network.training_batch_generator import MiniBatchGenerator
from neural_network.optimization_algorithm import GradientDescent
from neural_network.activation_function import SigmoidActivationFunction, ReLUActivationFunction, SoftmaxActivationFunction
from neural_network.loss_function import MeanSquaredError, CrossEntropyLoss, LogLikelihoodLoss
from neural_network.network import VanillaNeuralNetwork
from neural_network.data_object import HoldoutData

# Load MNIST data

In [4]:
mnist = fetch_mldata('MNIST original', data_home='.')
mnist.target = np.array(pd.get_dummies(mnist.target))

# Separate and scale train and holdout data

In [5]:
holdout_set_mask = np.array([i % 7 == 0 for i in range(len(mnist.data))])
np.random.shuffle(holdout_set_mask)

X = mnist.data[~holdout_set_mask].astype(float)
y = mnist.target[~holdout_set_mask].astype(float)
X_holdout = mnist.data[holdout_set_mask].astype(float)
y_holdout = mnist.target[holdout_set_mask].astype(float)

In [6]:
X = MinMaxScaler().fit_transform(X)
X_holdout = MinMaxScaler().fit_transform(X_holdout)

# Initialize hyper-parameters

In [44]:
HIDDEN_LAYER_SIZE = 50
LEARNING_RATE = 3.
N_EPOCHS = 10
TRAINING_BATCH_SIZE = 10
RANDOM_STATE = 123

In [45]:
LAYER_SIZES = [X.shape[1], HIDDEN_LAYER_SIZE, HIDDEN_LAYER_SIZE, y.shape[1]]

# Train network

### with mean squared error

In [9]:
vanilla_neural_net = VanillaNeuralNetwork(
    layer_sizes=LAYER_SIZES,
    training_batch_generator_class=MiniBatchGenerator,
    loss_function_class=MeanSquaredError,
    activation_function_class=SigmoidActivationFunction,
    optimization_algorithm_class=GradientDescent,
    learning_rate=LEARNING_RATE,
    n_epochs=N_EPOCHS,
    training_batch_size=TRAINING_BATCH_SIZE,
    random_state=RANDOM_STATE,
    holdout_data=HoldoutData(X=X_holdout, y=y_holdout)
)

In [10]:
vanilla_neural_net.fit(X, y)

Epoch: 0 | Accuracy: 0.9091
Epoch: 1 | Accuracy: 0.9347
Epoch: 2 | Accuracy: 0.9412
Epoch: 3 | Accuracy: 0.9407
Epoch: 4 | Accuracy: 0.9457
Epoch: 5 | Accuracy: 0.9516
Epoch: 6 | Accuracy: 0.9539
Epoch: 7 | Accuracy: 0.9518
Epoch: 8 | Accuracy: 0.9527
Epoch: 9 | Accuracy: 0.9568


### with cross entropy loss

In [46]:
LEARNING_RATE = .5

In [47]:
vanilla_neural_net = VanillaNeuralNetwork(
    layer_sizes=LAYER_SIZES,
    training_batch_generator_class=MiniBatchGenerator,
    loss_function_class=CrossEntropyLoss,
    activation_function_class=SigmoidActivationFunction,
    optimization_algorithm_class=GradientDescent,
    learning_rate=LEARNING_RATE,
    n_epochs=N_EPOCHS,
    training_batch_size=TRAINING_BATCH_SIZE,
    random_state=RANDOM_STATE,
    holdout_data=HoldoutData(X=X_holdout, y=y_holdout)
)

In [48]:
vanilla_neural_net.fit(X, y)

Epoch: 0 | Accuracy: 0.9204
Epoch: 1 | Accuracy: 0.9357
Epoch: 2 | Accuracy: 0.9435
Epoch: 3 | Accuracy: 0.9447
Epoch: 4 | Accuracy: 0.9509
Epoch: 5 | Accuracy: 0.9506
Epoch: 6 | Accuracy: 0.9539
Epoch: 7 | Accuracy: 0.9559
Epoch: 8 | Accuracy: 0.9516
Epoch: 9 | Accuracy: 0.9552


### with ReLU activations, softmax output and log likelihood loss

In [40]:
LEARNING_RATE = .0005
LAYER_SIZES = [X.shape[1], 256, y.shape[1]]
TRAINING_BATCH_SIZE = 256
N_EPOCHS = 25
WEIGHT_INITIALIZATION_STANDARD_DEVIATION = .1

In [41]:
vanilla_neural_net = VanillaNeuralNetwork(
    layer_sizes=LAYER_SIZES,
    training_batch_generator_class=MiniBatchGenerator,
    loss_function_class=LogLikelihoodLoss,
    activation_function_class=ReLUActivationFunction,
    output_layer_activation_function_class=SoftmaxActivationFunction,
    optimization_algorithm_class=GradientDescent,
    learning_rate=LEARNING_RATE,
    n_epochs=N_EPOCHS,
    training_batch_size=TRAINING_BATCH_SIZE,
    random_state=RANDOM_STATE,
    weight_initialization_standard_deviation=WEIGHT_INITIALIZATION_STANDARD_DEVIATION,
    holdout_data=HoldoutData(X=X_holdout, y=y_holdout)
)

In [42]:
vanilla_neural_net.fit(X, y)

Epoch: 0 | Accuracy: 0.1175
Epoch: 1 | Accuracy: 0.1445
Epoch: 2 | Accuracy: 0.1833
Epoch: 3 | Accuracy: 0.2354
Epoch: 4 | Accuracy: 0.3061
Epoch: 5 | Accuracy: 0.3704
Epoch: 6 | Accuracy: 0.4216
Epoch: 7 | Accuracy: 0.4706
Epoch: 8 | Accuracy: 0.516
Epoch: 9 | Accuracy: 0.5571
Epoch: 10 | Accuracy: 0.5832
Epoch: 11 | Accuracy: 0.61
Epoch: 12 | Accuracy: 0.6251
Epoch: 13 | Accuracy: 0.6453
Epoch: 14 | Accuracy: 0.6538
Epoch: 15 | Accuracy: 0.6654
Epoch: 16 | Accuracy: 0.6727
Epoch: 17 | Accuracy: 0.6889
Epoch: 18 | Accuracy: 0.6986
Epoch: 19 | Accuracy: 0.6976
Epoch: 20 | Accuracy: 0.6965
Epoch: 21 | Accuracy: 0.7126
Epoch: 22 | Accuracy: 0.7045
Epoch: 23 | Accuracy: 0.7258
Epoch: 24 | Accuracy: 0.7176
