In [1]:
cd ..

/Users/willwolf/Documents/neural-nets/vanilla-neural-nets


In [2]:
%load_ext autoreload
%autoreload 2

In [13]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_mldata

from training_batch_generators import MiniBatchGenerator
from optimization_algorithms import GradientDescent
from activation_functions import SigmoidActivationFunction
from loss_functions import MeanSquaredError, CrossEntropyLoss
from neural_nets.neural_net import VanillaNeuralNet
from data_objects import HoldoutData

# Load data

In [29]:
mnist = fetch_mldata('MNIST original', data_home='.')
mnist.target = np.array(pd.get_dummies(mnist.target))

In [49]:
holdout_set_mask = np.array([i % 70 == 0 for i in range(len(mnist.data))])
np.random.shuffle(holdout_set_mask)

X = mnist.data[~holdout_set_mask]
y = mnist.target[~holdout_set_mask]
X_holdout = mnist.data[holdout_set_mask]
y_holdout = mnist.target[holdout_set_mask]

# Initialize hyper-parameters

In [55]:
HIDDEN_LAYER_SIZE = 100
LEARNING_RATE = 3
N_EPOCHS = 500
N_BATCHES_PER_EPOCH = 20

In [56]:
LAYER_SIZES = [mnist.data.shape[1], HIDDEN_LAYER_SIZE, mnist.target.shape[1]]

# Train network

### with mean squared error

In [71]:
vanilla_neural_net = VanillaNeuralNet(
    layer_sizes=LAYER_SIZES,
    training_batch_generator_class=MiniBatchGenerator,
    loss_function_class=MeanSquaredError,
    activation_function_class=SigmoidActivationFunction,
    optimization_algorithm_class=GradientDescent,
    learning_rate=LEARNING_RATE,
    n_epochs=N_EPOCHS,
    n_batches_per_epoch=N_BATCHES_PER_EPOCH,
    holdout_data=HoldoutData(X=X_holdout, y=y_holdout)
)

In [72]:
vanilla_neural_net.fit(X, y)

Epoch 0
Holdout cost: 499.1091
Holdout accuracy: 0.163
Epoch 1
Holdout cost: 480.23137
Holdout accuracy: 0.211
Epoch 2
Holdout cost: 452.79873
Holdout accuracy: 0.274
Epoch 3
Holdout cost: 430.8758
Holdout accuracy: 0.31
Epoch 4
Holdout cost: 415.9144
Holdout accuracy: 0.328
Epoch 5
Holdout cost: 403.56064
Holdout accuracy: 0.343
Epoch 6
Holdout cost: 397.05421
Holdout accuracy: 0.353
Epoch 7
Holdout cost: 389.86553
Holdout accuracy: 0.357
Epoch 8
Holdout cost: 378.96514
Holdout accuracy: 0.379
Epoch 9
Holdout cost: 371.91402
Holdout accuracy: 0.387
Epoch 10


KeyboardInterrupt: 

### with cross entropy loss

In [59]:
vanilla_neural_net = VanillaNeuralNet(
    layer_sizes=LAYER_SIZES,
    training_batch_generator_class=MiniBatchGenerator,
    loss_function_class=CrossEntropyLoss,
    activation_function_class=SigmoidActivationFunction,
    optimization_algorithm_class=GradientDescent,
    learning_rate=LEARNING_RATE,
    n_epochs=N_EPOCHS,
    n_batches_per_epoch=N_BATCHES_PER_EPOCH,
    holdout_data=HoldoutData(X=mnist.data[holdout_set_mask], y=mnist.target[holdout_set_mask])
)

In [60]:
vanilla_neural_net.fit(X, y)

Epoch 0
Holdout cost: nan
Holdout accuracy: 0.105
Epoch 1
Holdout cost: nan
Holdout accuracy: 0.105
Epoch 2
Holdout cost: nan
Holdout accuracy: 0.105
Epoch 3
Holdout cost: nan
Holdout accuracy: 0.105
Epoch 4
Holdout cost: nan
Holdout accuracy: 0.105
Epoch 5
Holdout cost: nan
Holdout accuracy: 0.105
Epoch 6
Holdout cost: nan
Holdout accuracy: 0.105
Epoch 7


KeyboardInterrupt: 