In [1]:
import numpy as np
import matplotlib.pyplot as plt

from toygrad import MLP, Layer, SoftMax, TanH, CategoricalCrossEntropy, Accuracy
from plot import plot_network, plot_metric

np.random.seed(2137)
plt.rcParams["figure.figsize"] = (20,20)

TRAINING_DATASET = "data/mnist/train.csv"

def one_hot(y):
    y = y.astype(int)
    encoded = np.zeros((y.size, y.max()+1))
    encoded[np.arange(y.size), y.reshape(1, -1)] = 1
    return encoded

def read_mnist(path):
    """Read and shuffle the dataset"""
    data = np.genfromtxt(path, delimiter=',')[1:]
    np.random.shuffle(data)
    X = data[:, 1:]
    Y = one_hot(data[:, 0].reshape(-1, 1))
    return X, Y

def read_test(path):
    X = np.genfromtxt(path, delimiter=',')[1:]
    return X
    

def plot_images(X):
    # Disregard elements which would not fit in square
    size = int(len(X)**(1/2))
    x = X[:size**2]
    plt.imshow(np.concatenate(x.reshape(size, size*28, 28), axis=1))
    plt.show()
    
def transform(X):
    # Mean and Std are calculated directly on whole X set
    x = X/255
    x = (x-0.13101)/0.3085
    return x

def save_results_df(result, name="hyperparamter_results.csv"):  
    results_df = pd.DataFrame(result)
    results_df = results_df.sort_values('test_accuracy', ascending=False)
    results_df.to_csv(name)
    return results_df
    
X, Y = read_mnist(TRAINING_DATASET)
X = transform(X)

train_size = int(len(X)*0.8)

X_train, Y_train = X[:train_size], Y[:train_size]
X_test, Y_test = X[train_size:], Y[train_size:]

In [None]:
%%time
layers = [
    Layer(in_size=28*28, out_size=256, activ_function=TanH),
    Layer(in_size=256, out_size=10, activ_function=SoftMax),
]
m = MLP(
    layers=layers,
    loss=CategoricalCrossEntropy,
    metrics=[Accuracy],
    bias=True,
    batch_size=8,
    epochs=100,
    momentum=0.1,
    learning_rate=0.4,
    verbosity=1,
)
stats = m.train(X_train, Y_train, X_test, Y_test)
plot_metric(m.loss, stats, title="Train/Test losses per epoch")
plot_metric(Accuracy(), stats, title="Train/Test accuracy per epoch")

Epoch   10
 Accuracy_train: 0.837
 Accuracy_test: 0.848
 CategoricalCrossEntropy_train: 1.224
 CategoricalCrossEntropy_train_std: 2.423
 CategoricalCrossEntropy_test: 1.130
 CategoricalCrossEntropy_test_std: 3.308
Epoch   20
 Accuracy_train: 0.873
 Accuracy_test: 0.881
 CategoricalCrossEntropy_train: 1.101
 CategoricalCrossEntropy_train_std: 2.309
 CategoricalCrossEntropy_test: 1.023
 CategoricalCrossEntropy_test_std: 3.338
Epoch   30
 Accuracy_train: 0.888
 Accuracy_test: 0.882
 CategoricalCrossEntropy_train: 0.995
 CategoricalCrossEntropy_train_std: 2.128
 CategoricalCrossEntropy_test: 1.032
 CategoricalCrossEntropy_test_std: 3.401
