In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from toygrad import MLP, Layer, Sigmoid, SoftMax, ReLU, TanH, Linear, SquaredError, CategoricalCrossEntropy, Accuracy, get_metric_key
from plot import plot_network, plot_metric

np.random.seed(2137)
plt.rcParams["figure.figsize"] = (20,20)

TRAINING_DATASET = "data/mnist/train.csv"
TESTING_DATASET = "data/mnist/test.csv"

def one_hot(y):
    y = y.astype(int)
    encoded = np.zeros((y.size, y.max()+1))
    encoded[np.arange(y.size), y.reshape(1, -1)] = 1
    return encoded

def read_mnist(path):
    """Read and shuffle the dataset"""
    data = np.genfromtxt(path, delimiter=',')[1:]
    np.random.shuffle(data)
    X = data[:, 1:]
    Y = one_hot(data[:, 0].reshape(-1, 1))
    return X, Y

def plot_images(X):
    # Disregard elements which would not fit in square
    size = int(len(X)**(1/2))
    x = X[:size**2]
    plt.imshow(np.concatenate(x.reshape(size, size*28, 28), axis=1))
    plt.show()
    
X, Y = read_mnist(TRAINING_DATASET)
# Normalize the data
X = (X-np.mean(X))/np.std(X)

train_size = int(len(X)*0.8)

X_train, Y_train = X[:train_size], Y[:train_size]
X_test, Y_test = X[train_size:], Y[train_size:]

In [3]:
layers = [
    Layer(in_size=28*28, out_size=256, activ_function=ReLU),
    Layer(in_size=256, out_size=64, activ_function=ReLU),
    Layer(in_size=64, out_size=10, activ_function=SoftMax),
]
m = MLP(
    layers=layers,
    loss=CategoricalCrossEntropy,
    metrics=[Accuracy],
    bias=True,
    batch_size=4,
    epochs=20,
    momentum=0.1,
    learning_rate=0.4,
    verbosity=2,
)

In [None]:
%%time
stats = m.train(X_train, Y_train, X_test, Y_test)
plot_metric(m.loss, stats, title="Train/Test losses per epoch")
plot_metric(Accuracy(), stats, title="Train/Test accuracy per epoch")

Epoch    1
 Accuracy_train: 0.247
 Accuracy_test: 0.412
 CategoricalCrossEntropy_train: 9.511
 CategoricalCrossEntropy_train_std: 4.896
 CategoricalCrossEntropy_test: 6.615
 CategoricalCrossEntropy_test_std: 6.816
Epoch    2
 Accuracy_train: 0.581
 Accuracy_test: 0.676
 CategoricalCrossEntropy_train: 4.100
 CategoricalCrossEntropy_train_std: 4.345
 CategoricalCrossEntropy_test: 2.831
 CategoricalCrossEntropy_test_std: 5.071
Epoch    3
 Accuracy_train: 0.722
 Accuracy_test: 0.764
 CategoricalCrossEntropy_train: 2.555
 CategoricalCrossEntropy_train_std: 3.273
 CategoricalCrossEntropy_test: 2.266
 CategoricalCrossEntropy_test_std: 4.891
Epoch    4
 Accuracy_train: 0.769
 Accuracy_test: 0.773
 CategoricalCrossEntropy_train: 2.128
 CategoricalCrossEntropy_train_std: 2.903
 CategoricalCrossEntropy_test: 2.209
 CategoricalCrossEntropy_test_std: 4.845
Epoch    5
 Accuracy_train: 0.794
 Accuracy_test: 0.816
 CategoricalCrossEntropy_train: 1.985
 CategoricalCrossEntropy_train_std: 2.757
 Categor

## Automatic parameter optimization

In [None]:
activations = [Sigmoid, ReLU]
layer_sizes = [32, 64, 128, 256]
layer_numbers = [1, 2]
learning_rates = [0.5, 0.4, 0.3, 0.2]

In [None]:
%%time
def save_results_df(result):  
    results_df = pd.DataFrame(result)
    results_df = results_df.sort_values('test_accuracy', ascending=False)
    results_df.to_csv("hyperparamter_results.csv")
    return results_df
    
epochs = 20
result = {
    "activation": [],
    "layer_size": [],
    "hidden_layer_number": [],
    "learning_rate": [],
    "epochs": [],
    "test_accuracy": [],
    "train_loss": [],
    "train_loss_std": [],
}
for activation in activations:
    for size in layer_sizes:
        for layer_num in layer_numbers:
            for lr in learning_rates:
                print("===================================================================================")
                header = f"Layer number: {layer_num}, Layer size: {size}, Activation: {str(activation())}, Learning rate: {lr}"
                result["activation"].append(activation())
                result["layer_size"].append(size)
                result["learning_rate"].append(lr)
                result["hidden_layer_number"].append(layer_num)
                result["epochs"].append(epochs)
                print(header)
                print("===================================================================================")
                layers = [
                    Layer(in_size=28*28, out_size=size, activ_function=activation),
                    *[Layer(in_size=size, out_size=size, activ_function=activation) for _ in range(layer_num-1)],
                    Layer(in_size=size, out_size=10, activ_function=SoftMax),
                ]
                m = MLP(
                    layers=layers,
                    loss=CategoricalCrossEntropy,
                    metrics=[Accuracy],
                    bias=True,
                    batch_size=4,
                    epochs=20,
                    momentum=0,
                    learning_rate=lr,
                    verbosity=1,
                )
                stats = m.train(X_train, Y_train, X_test, Y_test)
                plot_metric(m.loss, stats, title=f"Train/Test losses per epoch - {header}")
                plot_metric(Accuracy(), stats, title=f"Train/Test accuracy per epoch - {header}")
                
                result["test_accuracy"].append(stats[get_metric_key(Accuracy(), "test")][-1])
                result["train_loss"].append(stats[get_metric_key(m.loss, "train")][-1])
                result["train_loss_std"].append(stats[get_metric_key(m.loss, "train_std")][-1])
                result_df = save_results_df(result)
                display(result_df)
result_df