In [1]:
!pip install plotly



In [2]:
import numpy as np
from sklearn.metrics import confusion_matrix
from utils.charts import create_figure, plot_figure
from utils.data import load_dataset, one_hot, fold_data, normalization, explore_data
from utils.NeuralNetwork import model, predict

In [3]:
x_data,y_data = load_dataset("./resources/datasets/handwritten.txt",dtype=np.int64)

In [4]:
LEARNING_RATE = 0.1
NUMBER_OF_ITERATIONS = 1000
HIDDEN_LAYER_SIZE = 10
INPUT_LAYER_SIZE = 16
OUTPUT_LAYER_SIZE = 10
NUMBER_OF_FOLDS = 10

In [5]:
explore_data(y_data,OUTPUT_LAYER_SIZE)

class: 0 | 780 (10.41%) instances
class: 1 | 779 (10.39%) instances
class: 2 | 780 (10.41%) instances
class: 3 | 719 (9.59%) instances
class: 4 | 780 (10.41%) instances
class: 5 | 720 (9.61%) instances
class: 6 | 720 (9.61%) instances
class: 7 | 778 (10.38%) instances
class: 8 | 719 (9.59%) instances
class: 9 | 719 (9.59%) instances
Total number of instances: 7494


In [6]:
x_data = normalization(x_data)

In [7]:
y_data = one_hot(y_data,OUTPUT_LAYER_SIZE)

In [8]:
folded_data = fold_data(list(zip(x_data,y_data)),k=NUMBER_OF_FOLDS)

In [9]:
def calc_metrics(y_test, y_predict, current_cost):
    y_predict = np.argmax(y_predict,axis=1)
    y_test = np.argmax(y_test,axis=1)
    current_acc = sum(y_test == y_predict) / float(len(y_test)) * 100
    print(f" Accuracy: {current_acc:.2f}%\n Cost avg: {np.mean(current_cost):.5f}\n")
    print(f" Confusion Matrix:\n {confusion_matrix(y_test,y_predict)}")
    return current_acc

In [10]:
def metric_summary(historic_acc, historic_cost):
    print(f"Summary: \n\navg_acc: {np.mean(historic_acc):.2f}% std_acc: {np.std(historic_acc):.2f}% median_acc: {np.median(historic_acc):.2f}%")
    print(f"avg_cost: {np.mean(historic_cost):.5f} std_cost: {np.std(historic_cost):.5f} median_cost: {np.median(historic_cost):.5f}")

In [11]:
def kfold_cross_validation(folded_data):
    print(f"Model parameters:\n\n input_layer_size: {INPUT_LAYER_SIZE} | hidden_layer_size: {HIDDEN_LAYER_SIZE} | output_layer_size: {OUTPUT_LAYER_SIZE}")
    print(f" number_of_iterations: {NUMBER_OF_ITERATIONS} | learning_rate: {LEARNING_RATE}")
    historic_acc = []
    historic_cost = []
    for k in range(NUMBER_OF_FOLDS):
        print(f"\n################## EPOCH with K={k} ##################")
        train_data = []
        test_data = []
        for j in range(NUMBER_OF_FOLDS):
            if j != k:
                train_data += folded_data[j]
            else:
                test_data += folded_data[j]
        x_train, y_train = zip(*train_data)
        x_test, y_test = zip(*test_data)

        trained_params,current_cost = model(
            np.transpose(x_train),
            np.transpose(y_train),
            INPUT_LAYER_SIZE,
            HIDDEN_LAYER_SIZE,
            OUTPUT_LAYER_SIZE,
            NUMBER_OF_ITERATIONS,
            LEARNING_RATE
        )
        y_predict = predict(np.transpose(x_test), trained_params).transpose()
        historic_acc.append(calc_metrics(y_test, y_predict, current_cost))
        historic_cost.append(current_cost)
    return historic_acc,historic_cost

In [12]:
historic_acc, historic_cost = kfold_cross_validation(folded_data)

Model parameters:

 input_layer_size: 16 | hidden_layer_size: 10 | output_layer_size: 10
 number_of_iterations: 1000 | learning_rate: 0.1

################## EPOCH with K=0 ##################
 Accuracy: 82.91%
 Cost avg: 0.00009

 Confusion Matrix:
 [[71  0  0  0  0  1  0  0  1  0]
 [ 0 51 15  1  0  0  0  1  0  1]
 [ 0 10 76  0  0  0  0  0  0  0]
 [ 0  1  0 70  0  0  0  0  0  0]
 [ 2  1  0  0 61  0  3  0  0  0]
 [ 0  0  0  4  0 39  0  0  1 25]
 [ 0  0  1  0  3  0 86  0  0  0]
 [ 0  5  1  3  0  0  3 67  1  0]
 [ 5  0  2  2  0  2  5 11 49  2]
 [ 3  1  0  5  4  2  0  0  0 51]]

################## EPOCH with K=1 ##################
 Accuracy: 85.85%
 Cost avg: 0.00009

 Confusion Matrix:
 [[77  0  0  0  0  0  1  0  3  0]
 [ 0 50 14  4  0  4  0  2  1  0]
 [ 0  4 69  0  0  0  0  0  0  0]
 [ 0  2  0 68  0  0  0  0  0  0]
 [ 0  0  0  0 89  0  1  0  0  0]
 [ 0  3  0 13  0 32 11  0  0  5]
 [ 0  0  0  0  2  1 69  0  0  0]
 [ 0  4  1  0  0  0  0 65  1  3]
 [ 3  1  0  0  0  1  0  1 73  1]
 [ 1  8  0

In [13]:
metric_summary(historic_acc,historic_cost)

Summary: 

avg_acc: 83.86% std_acc: 3.33% median_acc: 84.85%
avg_cost: 0.00009 std_cost: 0.00010 median_cost: 0.00007


In [14]:
f = create_figure()
plot_figure(f, np.mean(historic_cost,axis=0))