Load libraries

In [None]:
import numpy as np
import pandas as pd
import neural_network_backbone as nnb
from sklearn.metrics import accuracy_score 
from keras.datasets import mnist

Define macros

In [2]:
SILENT = False
SEED = 10101
EPOCHS = 75
LEARNING_RATE = 0.1
BATCH_SIZE = 10
np.random.seed(SEED)

Define accuracy helper functions

In [3]:
def get_progress(Y_hat, Y):
    cost = get_cost_value(Y_hat, Y)
    accuracy = get_accuracy_value(Y_hat, Y)
    return "cost: {:.5f} - accuracy: {:.5f}".format(cost, accuracy)

def one_hot(Y, num_classes):
    return np.squeeze(np.eye(num_classes)[Y.reshape(-1)])

def convert_prob_into_class(probs):
    return np.array([[1. if prob == max(v) else 0. for prob in v] for v in probs]).reshape(probs.shape)

def get_accuracy_value(Y_hat, Y):
    Y = one_hot(Y, n_outputs)
    Y_hat_ = convert_prob_into_class(Y_hat.T)
    return accuracy_score(Y, Y_hat_)

In [4]:
def get_cost_value(Y_hat, Y, derivative = False):
    Y = one_hot(Y, n_outputs)
    if not derivative:
        eps = 1e-15
        Y_hat = np.clip(Y_hat, eps, 1. - eps)
        return -np.mean(Y * np.log(Y_hat.T) + (1. - Y) * np.log(1. - Y_hat.T))
    else:
        return Y_hat.T - Y

Load mnist dataset

In [5]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

Flatten X values from (60000, 28, 28) to (60000, 784), so it could be easier for Neural Network to operate on

In [6]:
X_train = X_train.reshape(60000, 784)
X_train.shape

(60000, 784)

Normalize: mnist pictures are created in gray scale with values from <0, 255> range. We need to normalize it by scaling to <0, 1> range.

In [7]:
X_train = X_train / 255

Define Neural Network Architecture

In [8]:
n_inputs = X_train.shape[1]
n_outputs = len(set(y_train))

network_layers = [
    {"nodes": n_inputs},
    {"nodes": 32, "activation": nnb.relu},
    {"nodes": 64, "activation": nnb.relu},
    {"nodes": n_outputs, "activation": nnb.softmax}
]

Train model

In [9]:
nnb.SILENT = SILENT
nnb.COST_FUNC = get_cost_value
nnb.PROGRESS_FUNC = get_progress
params_values = nnb.train(X_train, y_train.reshape((y_train.shape[0], 1)), 
                          network_layers, EPOCHS, LEARNING_RATE, SEED, BATCH_SIZE)

Iteration: 00000 - cost: 0.04573 - accuracy: 0.92297
Iteration: 00001 - cost: 0.03404 - accuracy: 0.94332
Iteration: 00002 - cost: 0.02968 - accuracy: 0.95180
Iteration: 00003 - cost: 0.02710 - accuracy: 0.95577
Iteration: 00004 - cost: 0.02325 - accuracy: 0.96403
Iteration: 00005 - cost: 0.02087 - accuracy: 0.96725
Iteration: 00006 - cost: 0.01753 - accuracy: 0.97355
Iteration: 00007 - cost: 0.01655 - accuracy: 0.97547
Iteration: 00008 - cost: 0.01578 - accuracy: 0.97640
Iteration: 00009 - cost: 0.01397 - accuracy: 0.98022
Iteration: 00010 - cost: 0.01359 - accuracy: 0.98093
Iteration: 00011 - cost: 0.01430 - accuracy: 0.98018
Iteration: 00012 - cost: 0.01264 - accuracy: 0.98258
Iteration: 00013 - cost: 0.01288 - accuracy: 0.98258
Iteration: 00014 - cost: 0.01167 - accuracy: 0.98532
Iteration: 00015 - cost: 0.01179 - accuracy: 0.98457
Iteration: 00016 - cost: 0.01232 - accuracy: 0.98320
Iteration: 00017 - cost: 0.01261 - accuracy: 0.98165
Iteration: 00018 - cost: 0.01136 - accuracy: 0

Test accuracy

In [10]:
X_test = X_test.reshape(10000, 784)
X_test = X_test / 255

Y_test_hat, _ = nnb.full_forward_propagation(np.transpose(X_test), params_values, network_layers)
print("Test set: " + get_progress(Y_test_hat, np.transpose(y_test.reshape((y_test.shape[0], 1)))))

Test set: cost: 0.03019 - accuracy: 0.96970


Save train result

In [11]:
nnb.save_model(network_layers, params_values, "output")