In [6]:
import numpy as np
import random
from tqdm import tqdm
import matplotlib.pyplot as plt
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch
import wandb

random.seed(27)
np.random.seed(27)
plt.style.use('ggplot')

# Section 1: Pilot study in subset, used to find feasible hyperparameters

### Dataset - A (1/10) subset of cifar

We will choose a subset of the cifar10, and make sure there is class balance

In [7]:
# Pick dataset
ds = "cifar10"

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

if ds == "cifar10":
    train_ds = datasets.CIFAR10(root="./data", train=True, download=True, transform=transform)
    test_ds = datasets.CIFAR10(root="./data", train=False, download=True, transform=transform)

    #Normalise
    X_train = train_ds.data.astype(np.float32) / 255.0 
    y_train = np.array(train_ds.targets, dtype=np.int64)

    X_test  = test_ds.data.astype(np.float32) / 255.0
    y_test  = np.array(test_ds.targets, dtype=np.int64)

    X_train = X_train.reshape(X_train.shape[0], -1)
    X_test  = X_test.reshape(X_test.shape[0], -1)

    # Flatten
    X_train = X_train.reshape(-1, np.prod(X_train.shape[1:]))
    X_test = X_test.reshape(-1, np.prod(X_test.shape[1:]))

elif ds == "mnist":
    mnist_train = datasets.MNIST(root="./data", train=True, download=True, transform=None)
    mnist_test = datasets.MNIST(root="./data", train=False, download=True, transform=None)

    X_train = mnist_train.data.to(torch.float32).numpy()
    y_train = mnist_train.targets.to(torch.int64).numpy()

    X_test  = mnist_test.data.to(torch.float32).numpy()
    y_test  = mnist_test.targets.to(torch.int64).numpy()

    # Normalize the pixel values (important for neural nets)
    X_train /= 255.0
    X_test /= 255.0

    # Flatten
    X_train = X_train.reshape(-1, np.prod(X_train.shape[1:]))
    X_test = X_test.reshape(-1, np.prod(X_test.shape[1:]))



subset_ratio = 1/10      # take 10% of data
num_classes = 3

def balanced_subset(X, y, ratio, num_classes=2):
    X = np.array(X)
    y = np.array(y)

    indices = []
    for c in range(num_classes):
        class_idx = np.where(y == c)[0]
        n_samples = int(len(class_idx) * ratio)

        # sample without replacement
        chosen = np.random.choice(class_idx, n_samples, replace=False)
        indices.append(chosen)

    indices = np.concatenate(indices)
    np.random.shuffle(indices)

    return X[indices], y[indices]

# Apply to train + test
X_train, y_train = balanced_subset(X_train, y_train, subset_ratio, num_classes)
X_test,  y_test  = balanced_subset(X_test,  y_test,  subset_ratio, num_classes)


def one_hot_encode(y, num_classes=10):
    y_encoded = np.zeros((num_classes, y.size))
    y_encoded[y, np.arange(y.size)] = 1
    return y_encoded

Y_train = one_hot_encode(y_train, 10)  # (10, 60000)
Y_test  = one_hot_encode(y_test, 10)   # (10, 10000)

# --------------------------------------------------
# Quick check
# --------------------------------------------------
print("X_train:", X_train.shape, "Y_train:", Y_train.shape)
print("X_test: ", X_test.shape,  "Y_test: ",  Y_test.shape)

X_train: (1500, 3072) Y_train: (10, 1500)
X_test:  (300, 3072) Y_test:  (10, 300)


### Pilot study - we search for a feasible range of hyperparameters using sweeps

In [8]:
from NN.denseLayer import *
from NN.loss import *
from NN.optimizer import *

> Setup WANDB tracker, where we will perform a simple grid search of a VERY broad search

In [9]:
import matplotlib.pyplot as plt
import os
import wandb
from getAPI import retrieveApi

os.environ['WANDB_API_KEY'] = retrieveApi()

> Do hyperparameter sweep, on a very small subset of cifar-10

The code is based on the tutorials given in the sweeps documentation:
- https://docs.wandb.ai/models/sweeps/define-sweep-configuration#python-script-or-notebook
- https://docs.wandb.ai/models/tutorials/sweeps

In [None]:

sweep_configuration = {
    "method": "random",
    "metric": {"name": "loss", "goal": "minimize"},
    "parameters": {
        "learning_rate": {"distribution": "uniform", "min": 1e-6, "max": 1.0},
        "batch_size": {"values": [32, 64, 128]},
        "epochs": {"value": 30},
        "num_hidden_layers": {"distribution": "int_uniform", "min": 1, "max": 5},
        "n_hidden_units": {"distribution": "int_uniform", "min": 32, "max": 128},
        "l2_coeff": {"distribution": "uniform", "min": 0.0, "max": 0.001},
        "optimizer": {"values": ["sgd"]},
        "activation": {"values": ["relu", "tanh"]},
        "loss_fn": {"values": ["cross", "mse"]},
    },
}

sweep_id = wandb.sweep(sweep=sweep_configuration, project="numpy_nn")


def accuracy(pred, y_true):
    return (pred.argmax(axis=0) == y_true.argmax(axis=0)).mean()


def get_activation(name):
    return ReLU() if name == "relu" else Tanh()


def get_optimizer(name, lr):
    return SGD(learning_rate=lr) if name == "sgd" else Adam(learning_rate=lr)


def get_loss(name):
    return CrossEntropyLoss() if name == "cross" else MSELoss()


def build_model(input_dim, cfg):
    layers = []
    act = get_activation(cfg.activation)
    h = cfg.n_hidden_units
    prev = input_dim

    for _ in range(cfg.num_hidden_layers):
        layers.append(DenseLayer(prev, h, activation=act, initializer=HeInitializer()))
        prev = h

    layers.append(DenseLayer(prev, 10, activation=Softmax(), initializer=NormalInitializer(mean=0, std=0.01)))
    return layers

def train_run():
    wandb.init()
    cfg = wandb.config
    wandb.log(dict(cfg))

    # Build layers with L2 regularization set per layer
    layers = []
    act = get_activation(cfg.activation)
    prev = X_train.shape[1]
    h = cfg.n_hidden_units

    for _ in range(cfg.num_hidden_layers):
        layers.append(DenseLayer(prev, h, activation=act, initializer=HeInitializer(), l2_coeff=cfg.l2_coeff))
        prev = h

    layers.append(DenseLayer(prev, 10, activation=Softmax(), initializer=NormalInitializer(mean=0, std=0.01)))

    optimizer = get_optimizer(cfg.optimizer, cfg.learning_rate)
    loss_fn = get_loss(cfg.loss_fn)
    m = X_train.shape[0]

    for epoch in range(cfg.epochs):
        perm = np.random.permutation(m)
        Xs = X_train[perm].T
        Ys = Y_train[:, perm]

        for i in range(0, m, cfg.batch_size):
            Xb = Xs[:, i:i+cfg.batch_size]
            Yb = Ys[:, i:i+cfg.batch_size]

            A = Xb
            for layer in layers:
                A = layer.forward(A)

            loss = loss_fn.forward(A, Yb)
            dA = loss_fn.backward(A, Yb)

            for layer in reversed(layers):
                dA, dW, db = layer.backward(dA)
                optimizer.update(layer, dW, db)

        acc = accuracy(A, Yb)
        wandb.log({"loss": loss, "accuracy": acc})
        print(f"Epoch {epoch+1}/{cfg.epochs}  Loss={loss:.4f}  Acc={acc:.4f}")

    wandb.finish()


wandb.agent(sweep_id, function=train_run, count=10)

Create sweep with ID: cr5oclrk
Sweep URL: https://wandb.ai/xanderbaatz-danmarks-tekniske-universitet-dtu/numpy_nn/sweeps/cr5oclrk


[34m[1mwandb[0m: Agent Starting Run: wxngzj1q with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	l2_coeff: 0.0009612309151634822
[34m[1mwandb[0m: 	learning_rate: 0.9150623076765048
[34m[1mwandb[0m: 	loss_fn: mse
[34m[1mwandb[0m: 	n_hidden_units: 55
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd


Epoch 1/30  Loss=0.3548  Acc=0.3214
Epoch 2/30  Loss=0.3368  Acc=0.2857
Epoch 3/30  Loss=0.3370  Acc=0.3571
Epoch 4/30  Loss=0.3305  Acc=0.6071
Epoch 5/30  Loss=0.3321  Acc=0.3571
Epoch 6/30  Loss=0.3317  Acc=0.3929
Epoch 7/30  Loss=0.3308  Acc=0.5000
Epoch 8/30  Loss=0.3272  Acc=0.4286
Epoch 9/30  Loss=0.3350  Acc=0.3929
Epoch 10/30  Loss=0.3247  Acc=0.4643
Epoch 11/30  Loss=0.3262  Acc=0.4286
Epoch 12/30  Loss=0.3228  Acc=0.4286
Epoch 13/30  Loss=0.3154  Acc=0.6071
Epoch 14/30  Loss=0.3164  Acc=0.5714
Epoch 15/30  Loss=0.3255  Acc=0.4286
Epoch 16/30  Loss=0.3111  Acc=0.6071
Epoch 17/30  Loss=0.3068  Acc=0.6429
Epoch 18/30  Loss=0.3240  Acc=0.4286
Epoch 19/30  Loss=0.3138  Acc=0.4643
Epoch 20/30  Loss=0.3088  Acc=0.5357
Epoch 21/30  Loss=0.3260  Acc=0.4286
Epoch 22/30  Loss=0.3251  Acc=0.4286
Epoch 23/30  Loss=0.3146  Acc=0.3929
Epoch 24/30  Loss=0.3071  Acc=0.4643
Epoch 25/30  Loss=0.3005  Acc=0.5357
Epoch 26/30  Loss=0.2846  Acc=0.6071
Epoch 27/30  Loss=0.2891  Acc=0.5714
Epoch 28/3

[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


Epoch 30/30  Loss=0.2931  Acc=0.5357


0,1
accuracy,▂▁▂▇▂▃▅▄▃▄▄▄▇▇▄▇█▄▄▆▄▄▃▄▆▇▇▇▃▆
batch_size,▁
epochs,▁
l2_coeff,▁
learning_rate,▁
loss,█▆▆▆▆▆▆▅▆▅▅▅▄▄▅▄▃▅▄▃▅▅▄▃▃▁▁▁▄▂
n_hidden_units,▁
num_hidden_layers,▁

0,1
accuracy,0.53571
activation,tanh
batch_size,64
epochs,30
l2_coeff,0.00096
learning_rate,0.91506
loss,0.2931
loss_fn,mse
n_hidden_units,55
num_hidden_layers,5


[34m[1mwandb[0m: Agent Starting Run: yzzi0kmi with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	l2_coeff: 0.0006441350926703784
[34m[1mwandb[0m: 	learning_rate: 0.9681327816564806
[34m[1mwandb[0m: 	loss_fn: cross
[34m[1mwandb[0m: 	n_hidden_units: 106
[34m[1mwandb[0m: 	num_hidden_layers: 2
[34m[1mwandb[0m: 	optimizer: sgd


Epoch 1/30  Loss=1.2923  Acc=0.3571
Epoch 2/30  Loss=1.1581  Acc=0.3929
Epoch 3/30  Loss=1.1475  Acc=0.3214
Epoch 4/30  Loss=1.1300  Acc=0.3571
Epoch 5/30  Loss=1.1355  Acc=0.1071
Epoch 6/30  Loss=1.1198  Acc=0.3571
Epoch 7/30  Loss=1.1064  Acc=0.4286
Epoch 8/30  Loss=1.1118  Acc=0.3214
Epoch 9/30  Loss=1.1093  Acc=0.2500
Epoch 10/30  Loss=1.0965  Acc=0.3929
Epoch 11/30  Loss=1.1102  Acc=0.2857
Epoch 12/30  Loss=1.0945  Acc=0.4643
Epoch 13/30  Loss=1.1067  Acc=0.3214
Epoch 14/30  Loss=1.0966  Acc=0.3929
Epoch 15/30  Loss=1.0922  Acc=0.4643
Epoch 16/30  Loss=1.1243  Acc=0.2857
Epoch 17/30  Loss=1.1108  Acc=0.2143
Epoch 18/30  Loss=1.1031  Acc=0.3214
Epoch 19/30  Loss=1.1043  Acc=0.3571
Epoch 20/30  Loss=1.1042  Acc=0.3214
Epoch 21/30  Loss=1.1004  Acc=0.3571
Epoch 22/30  Loss=1.1075  Acc=0.2857
Epoch 23/30  Loss=1.1228  Acc=0.1786
Epoch 24/30  Loss=1.0915  Acc=0.4643
Epoch 25/30  Loss=1.1051  Acc=0.2500
Epoch 26/30  Loss=1.0947  Acc=0.3929
Epoch 27/30  Loss=1.1074  Acc=0.2857
Epoch 28/3

[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


Epoch 30/30  Loss=1.0948  Acc=0.3214


0,1
accuracy,▆▇▅▆▁▆▇▅▄▇▅█▅▇█▅▃▅▆▅▆▅▂█▄▇▅▄▇▅
batch_size,▁
epochs,▁
l2_coeff,▁
learning_rate,▁
loss,█▃▃▂▃▂▂▂▂▁▂▁▂▁▁▂▂▁▁▁▁▂▂▁▁▁▂▂▁▁
n_hidden_units,▁
num_hidden_layers,▁

0,1
accuracy,0.32143
activation,relu
batch_size,64
epochs,30
l2_coeff,0.00064
learning_rate,0.96813
loss,1.09476
loss_fn,cross
n_hidden_units,106
num_hidden_layers,2


[34m[1mwandb[0m: Agent Starting Run: z4g7ed3l with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	l2_coeff: 0.0005365751631820697
[34m[1mwandb[0m: 	learning_rate: 0.30010208785908193
[34m[1mwandb[0m: 	loss_fn: cross
[34m[1mwandb[0m: 	n_hidden_units: 78
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: sgd


  exp_Z = np.exp(Z - np.max(Z, axis=0, keepdims=True))


Epoch 1/30  Loss=nan  Acc=0.3043
Epoch 2/30  Loss=nan  Acc=0.3261
Epoch 3/30  Loss=nan  Acc=0.3696
Epoch 4/30  Loss=nan  Acc=0.3370
Epoch 5/30  Loss=nan  Acc=0.3370
Epoch 6/30  Loss=nan  Acc=0.3152
Epoch 7/30  Loss=nan  Acc=0.2500
Epoch 8/30  Loss=nan  Acc=0.3478
Epoch 9/30  Loss=nan  Acc=0.3478
Epoch 10/30  Loss=nan  Acc=0.3804
Epoch 11/30  Loss=nan  Acc=0.3913
Epoch 12/30  Loss=nan  Acc=0.3152
Epoch 13/30  Loss=nan  Acc=0.3261
Epoch 14/30  Loss=nan  Acc=0.3587
Epoch 15/30  Loss=nan  Acc=0.3152
Epoch 16/30  Loss=nan  Acc=0.2609
Epoch 17/30  Loss=nan  Acc=0.3152
Epoch 18/30  Loss=nan  Acc=0.3152
Epoch 19/30  Loss=nan  Acc=0.3478
Epoch 20/30  Loss=nan  Acc=0.2609
Epoch 21/30  Loss=nan  Acc=0.3478
Epoch 22/30  Loss=nan  Acc=0.3370
Epoch 23/30  Loss=nan  Acc=0.3370
Epoch 24/30  Loss=nan  Acc=0.3152
Epoch 25/30  Loss=nan  Acc=0.3043
Epoch 26/30  Loss=nan  Acc=0.3913
Epoch 27/30  Loss=nan  Acc=0.3043
Epoch 28/30  Loss=nan  Acc=0.3478


[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


Epoch 29/30  Loss=nan  Acc=0.2826
Epoch 30/30  Loss=nan  Acc=0.3696


0,1
accuracy,▄▅▇▅▅▄▁▆▆▇█▄▅▆▄▂▄▄▆▂▆▅▅▄▄█▄▆▃▇
batch_size,▁
epochs,▁
l2_coeff,▁
learning_rate,▁
n_hidden_units,▁
num_hidden_layers,▁
+1,...

0,1
accuracy,0.36957
activation,relu
batch_size,128
epochs,30
l2_coeff,0.00054
learning_rate,0.3001
loss,
loss_fn,cross
n_hidden_units,78
num_hidden_layers,4


[34m[1mwandb[0m: Agent Starting Run: odi8juml with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	l2_coeff: 0.0007703105926588215
[34m[1mwandb[0m: 	learning_rate: 0.582419387297521
[34m[1mwandb[0m: 	loss_fn: cross
[34m[1mwandb[0m: 	n_hidden_units: 45
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd


  exp_Z = np.exp(Z - np.max(Z, axis=0, keepdims=True))


Epoch 1/30  Loss=nan  Acc=0.3214
Epoch 2/30  Loss=nan  Acc=0.2857
Epoch 3/30  Loss=nan  Acc=0.3929
Epoch 4/30  Loss=nan  Acc=0.3214
Epoch 5/30  Loss=nan  Acc=0.2857
Epoch 6/30  Loss=nan  Acc=0.5000
Epoch 7/30  Loss=nan  Acc=0.4286
Epoch 8/30  Loss=nan  Acc=0.1786
Epoch 9/30  Loss=nan  Acc=0.2857
Epoch 10/30  Loss=nan  Acc=0.2500
Epoch 11/30  Loss=nan  Acc=0.2500
Epoch 12/30  Loss=nan  Acc=0.4286
Epoch 13/30  Loss=nan  Acc=0.3214
Epoch 14/30  Loss=nan  Acc=0.3571
Epoch 15/30  Loss=nan  Acc=0.2857
Epoch 16/30  Loss=nan  Acc=0.3214
Epoch 17/30  Loss=nan  Acc=0.2143
Epoch 18/30  Loss=nan  Acc=0.3571
Epoch 19/30  Loss=nan  Acc=0.2143
Epoch 20/30  Loss=nan  Acc=0.3571
Epoch 21/30  Loss=nan  Acc=0.3929
Epoch 22/30  Loss=nan  Acc=0.3929
Epoch 23/30  Loss=nan  Acc=0.2857
Epoch 24/30  Loss=nan  Acc=0.3214
Epoch 25/30  Loss=nan  Acc=0.1786
Epoch 26/30  Loss=nan  Acc=0.4286
Epoch 27/30  Loss=nan  Acc=0.3929


[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


Epoch 28/30  Loss=nan  Acc=0.3571
Epoch 29/30  Loss=nan  Acc=0.3571
Epoch 30/30  Loss=nan  Acc=0.3571


0,1
accuracy,▄▃▆▄▃█▆▁▃▃▃▆▄▅▃▄▂▅▂▅▆▆▃▄▁▆▆▅▅▅
batch_size,▁
epochs,▁
l2_coeff,▁
learning_rate,▁
n_hidden_units,▁
num_hidden_layers,▁
+1,...

0,1
accuracy,0.35714
activation,relu
batch_size,32
epochs,30
l2_coeff,0.00077
learning_rate,0.58242
loss,
loss_fn,cross
n_hidden_units,45
num_hidden_layers,3


[34m[1mwandb[0m: Agent Starting Run: ejt6jugu with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	l2_coeff: 0.0003054350522506906
[34m[1mwandb[0m: 	learning_rate: 0.2132057620430853
[34m[1mwandb[0m: 	loss_fn: cross
[34m[1mwandb[0m: 	n_hidden_units: 98
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: sgd


Epoch 1/30  Loss=1.5477  Acc=0.3043
Epoch 2/30  Loss=1.1390  Acc=0.3152
Epoch 3/30  Loss=1.3167  Acc=0.4239
Epoch 4/30  Loss=1.3679  Acc=0.3261
Epoch 5/30  Loss=1.8564  Acc=0.3478
Epoch 6/30  Loss=1.4271  Acc=0.3587
Epoch 7/30  Loss=1.1645  Acc=0.3370
Epoch 8/30  Loss=1.3134  Acc=0.3370
Epoch 9/30  Loss=1.1801  Acc=0.2391
Epoch 10/30  Loss=1.4073  Acc=0.4239
Epoch 11/30  Loss=1.1293  Acc=0.4457
Epoch 12/30  Loss=1.3694  Acc=0.3043
Epoch 13/30  Loss=1.3509  Acc=0.4348
Epoch 14/30  Loss=1.1939  Acc=0.3696
Epoch 15/30  Loss=1.0722  Acc=0.3370
Epoch 16/30  Loss=1.1666  Acc=0.4130
Epoch 17/30  Loss=1.3189  Acc=0.4674
Epoch 18/30  Loss=1.6932  Acc=0.3152
Epoch 19/30  Loss=1.3358  Acc=0.5326
Epoch 20/30  Loss=1.1098  Acc=0.4783
Epoch 21/30  Loss=1.0010  Acc=0.4891
Epoch 22/30  Loss=1.1017  Acc=0.4674
Epoch 23/30  Loss=1.2203  Acc=0.5217
Epoch 24/30  Loss=1.0197  Acc=0.4457
Epoch 25/30  Loss=1.0201  Acc=0.5109
Epoch 26/30  Loss=1.1461  Acc=0.4130
Epoch 27/30  Loss=1.0263  Acc=0.5217
Epoch 28/3

[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


Epoch 30/30  Loss=0.8923  Acc=0.5978


0,1
accuracy,▂▂▅▃▃▃▃▃▁▅▅▂▅▄▃▄▅▂▇▆▆▅▇▅▆▄▇▇██
batch_size,▁
epochs,▁
l2_coeff,▁
learning_rate,▁
loss,▆▃▄▄█▅▃▄▃▅▃▅▄▃▂▃▄▇▄▃▂▃▃▂▂▃▂▁▁▁
n_hidden_units,▁
num_hidden_layers,▁

0,1
accuracy,0.59783
activation,tanh
batch_size,128
epochs,30
l2_coeff,0.00031
learning_rate,0.21321
loss,0.89228
loss_fn,cross
n_hidden_units,98
num_hidden_layers,4


[34m[1mwandb[0m: Agent Starting Run: 38lavwu0 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	l2_coeff: 0.0009435276065790288
[34m[1mwandb[0m: 	learning_rate: 0.6507824825970303
[34m[1mwandb[0m: 	loss_fn: cross
[34m[1mwandb[0m: 	n_hidden_units: 102
[34m[1mwandb[0m: 	num_hidden_layers: 1
[34m[1mwandb[0m: 	optimizer: sgd


Epoch 1/30  Loss=8.1739  Acc=0.3587
Epoch 2/30  Loss=7.4175  Acc=0.4239
Epoch 3/30  Loss=10.0106  Acc=0.3913
Epoch 4/30  Loss=13.6124  Acc=0.2609
Epoch 5/30  Loss=10.2875  Acc=0.3804
Epoch 6/30  Loss=14.2158  Acc=0.2283
Epoch 7/30  Loss=7.7113  Acc=0.3696
Epoch 8/30  Loss=10.2328  Acc=0.3370
Epoch 9/30  Loss=8.6454  Acc=0.3696
Epoch 10/30  Loss=10.3135  Acc=0.3370
Epoch 11/30  Loss=12.8144  Acc=0.3043
Epoch 12/30  Loss=9.8109  Acc=0.4674
Epoch 13/30  Loss=12.2137  Acc=0.3370
Epoch 14/30  Loss=11.8133  Acc=0.3587
Epoch 15/30  Loss=12.0135  Acc=0.3478
Epoch 16/30  Loss=8.1793  Acc=0.3043
Epoch 17/30  Loss=11.9627  Acc=0.3478
Epoch 18/30  Loss=9.0385  Acc=0.3478
Epoch 19/30  Loss=11.2885  Acc=0.2826
Epoch 20/30  Loss=10.1558  Acc=0.3261
Epoch 21/30  Loss=12.0135  Acc=0.3478
Epoch 22/30  Loss=14.2160  Acc=0.2283
Epoch 23/30  Loss=11.3369  Acc=0.3696
Epoch 24/30  Loss=10.7155  Acc=0.3913
Epoch 25/30  Loss=13.2148  Acc=0.2826
Epoch 26/30  Loss=10.0765  Acc=0.3913
Epoch 27/30  Loss=12.4043  A

[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


Epoch 30/30  Loss=6.7304  Acc=0.2717


0,1
accuracy,▅▇▆▂▅▁▅▄▅▄▃█▄▅▅▃▅▅▃▄▅▁▅▆▃▆▄▄▃▂
batch_size,▁
epochs,▁
l2_coeff,▁
learning_rate,▁
loss,▂▂▄▇▄█▂▄▃▄▇▄▆▆▆▂▆▃▅▄▆█▅▅▇▄▆▆▇▁
n_hidden_units,▁
num_hidden_layers,▁

0,1
accuracy,0.27174
activation,tanh
batch_size,128
epochs,30
l2_coeff,0.00094
learning_rate,0.65078
loss,6.73036
loss_fn,cross
n_hidden_units,102
num_hidden_layers,1


[34m[1mwandb[0m: Agent Starting Run: efay0ef7 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	l2_coeff: 0.0008503490011638776
[34m[1mwandb[0m: 	learning_rate: 0.02819260386997762
[34m[1mwandb[0m: 	loss_fn: mse
[34m[1mwandb[0m: 	n_hidden_units: 118
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd


Epoch 1/30  Loss=0.4479  Acc=0.1087
Epoch 2/30  Loss=0.4473  Acc=0.1739
Epoch 3/30  Loss=0.4462  Acc=0.2391
Epoch 4/30  Loss=0.4449  Acc=0.4239
Epoch 5/30  Loss=0.4453  Acc=0.3043
Epoch 6/30  Loss=0.4438  Acc=0.3370
Epoch 7/30  Loss=0.4423  Acc=0.3587
Epoch 8/30  Loss=0.4412  Acc=0.4674
Epoch 9/30  Loss=0.4399  Acc=0.4348
Epoch 10/30  Loss=0.4399  Acc=0.3587
Epoch 11/30  Loss=0.4393  Acc=0.2609
Epoch 12/30  Loss=0.4369  Acc=0.4565
Epoch 13/30  Loss=0.4372  Acc=0.3587
Epoch 14/30  Loss=0.4361  Acc=0.3261
Epoch 15/30  Loss=0.4360  Acc=0.3478
Epoch 16/30  Loss=0.4344  Acc=0.2609
Epoch 17/30  Loss=0.4332  Acc=0.3152
Epoch 18/30  Loss=0.4319  Acc=0.3587
Epoch 19/30  Loss=0.4316  Acc=0.3261
Epoch 20/30  Loss=0.4298  Acc=0.4239
Epoch 21/30  Loss=0.4291  Acc=0.3478
Epoch 22/30  Loss=0.4265  Acc=0.3804
Epoch 23/30  Loss=0.4281  Acc=0.2717
Epoch 24/30  Loss=0.4252  Acc=0.3478
Epoch 25/30  Loss=0.4247  Acc=0.3478
Epoch 26/30  Loss=0.4218  Acc=0.3804
Epoch 27/30  Loss=0.4211  Acc=0.3587
Epoch 28/3

[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


Epoch 30/30  Loss=0.4183  Acc=0.3696


0,1
accuracy,▁▂▄▇▅▅▆█▇▆▄█▆▅▆▄▅▆▅▇▆▆▄▆▆▆▆▇▇▆
batch_size,▁
epochs,▁
l2_coeff,▁
learning_rate,▁
loss,███▇▇▇▇▆▆▆▆▅▆▅▅▅▅▄▄▄▄▃▃▃▃▂▂▂▁▁
n_hidden_units,▁
num_hidden_layers,▁

0,1
accuracy,0.36957
activation,relu
batch_size,128
epochs,30
l2_coeff,0.00085
learning_rate,0.02819
loss,0.41827
loss_fn,mse
n_hidden_units,118
num_hidden_layers,5


[34m[1mwandb[0m: Agent Starting Run: 410dvf6m with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	l2_coeff: 0.0004939282348011701
[34m[1mwandb[0m: 	learning_rate: 0.822300294896828
[34m[1mwandb[0m: 	loss_fn: cross
[34m[1mwandb[0m: 	n_hidden_units: 91
[34m[1mwandb[0m: 	num_hidden_layers: 2
[34m[1mwandb[0m: 	optimizer: sgd


Epoch 1/30  Loss=1.2051  Acc=0.2857
Epoch 2/30  Loss=1.1522  Acc=0.2500
Epoch 3/30  Loss=1.1364  Acc=0.3571
Epoch 4/30  Loss=1.1135  Acc=0.3214
Epoch 5/30  Loss=1.1293  Acc=0.2857
Epoch 6/30  Loss=1.0807  Acc=0.5000
Epoch 7/30  Loss=1.1010  Acc=0.3214
Epoch 8/30  Loss=1.1062  Acc=0.3571
Epoch 9/30  Loss=1.0549  Acc=0.4643
Epoch 10/30  Loss=1.1146  Acc=0.2857
Epoch 11/30  Loss=1.1023  Acc=0.3929
Epoch 12/30  Loss=1.0725  Acc=0.4286
Epoch 13/30  Loss=1.0927  Acc=0.4286
Epoch 14/30  Loss=1.1008  Acc=0.3929
Epoch 15/30  Loss=1.0925  Acc=0.4286
Epoch 16/30  Loss=1.1172  Acc=0.3214
Epoch 17/30  Loss=1.1250  Acc=0.2857
Epoch 18/30  Loss=1.1050  Acc=0.2500
Epoch 19/30  Loss=1.1039  Acc=0.3929
Epoch 20/30  Loss=1.1025  Acc=0.1786
Epoch 21/30  Loss=1.0987  Acc=0.3929
Epoch 22/30  Loss=1.0988  Acc=0.4286
Epoch 23/30  Loss=1.1066  Acc=0.3214
Epoch 24/30  Loss=1.1929  Acc=0.0714
Epoch 25/30  Loss=1.0974  Acc=0.2500
Epoch 26/30  Loss=1.0970  Acc=0.3929
Epoch 27/30  Loss=1.1269  Acc=0.2857
Epoch 28/3

[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


Epoch 29/30  Loss=1.1394  Acc=0.1786
Epoch 30/30  Loss=1.0975  Acc=0.3214


0,1
accuracy,▅▄▆▅▅█▅▆▇▅▆▇▇▆▇▅▅▄▆▃▆▇▅▁▄▆▅▇▃▅
batch_size,▁
epochs,▁
l2_coeff,▁
learning_rate,▁
loss,█▆▅▄▄▂▃▃▁▄▃▂▃▃▃▄▄▃▃▃▃▃▃▇▃▃▄▂▅▃
n_hidden_units,▁
num_hidden_layers,▁

0,1
accuracy,0.32143
activation,relu
batch_size,32
epochs,30
l2_coeff,0.00049
learning_rate,0.8223
loss,1.09746
loss_fn,cross
n_hidden_units,91
num_hidden_layers,2


[34m[1mwandb[0m: Agent Starting Run: h6d9rs3v with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	l2_coeff: 0.00040930048308229495
[34m[1mwandb[0m: 	learning_rate: 0.698183501079988
[34m[1mwandb[0m: 	loss_fn: mse
[34m[1mwandb[0m: 	n_hidden_units: 98
[34m[1mwandb[0m: 	num_hidden_layers: 2
[34m[1mwandb[0m: 	optimizer: sgd


Epoch 1/30  Loss=0.3434  Acc=0.3929
Epoch 2/30  Loss=0.3240  Acc=0.5714
Epoch 3/30  Loss=0.3341  Acc=0.2857
Epoch 4/30  Loss=0.3249  Acc=0.4286
Epoch 5/30  Loss=0.3244  Acc=0.4286
Epoch 6/30  Loss=0.3214  Acc=0.5000
Epoch 7/30  Loss=0.3171  Acc=0.4643
Epoch 8/30  Loss=0.3227  Acc=0.3571
Epoch 9/30  Loss=0.3125  Acc=0.4286
Epoch 10/30  Loss=0.2948  Acc=0.6071
Epoch 11/30  Loss=0.3153  Acc=0.5714
Epoch 12/30  Loss=0.3142  Acc=0.3214
Epoch 13/30  Loss=0.2894  Acc=0.5357
Epoch 14/30  Loss=0.2984  Acc=0.5357
Epoch 15/30  Loss=0.2941  Acc=0.6071
Epoch 16/30  Loss=0.2448  Acc=0.7143
Epoch 17/30  Loss=0.2790  Acc=0.5000
Epoch 18/30  Loss=0.3000  Acc=0.5357
Epoch 19/30  Loss=0.2887  Acc=0.5357
Epoch 20/30  Loss=0.2932  Acc=0.6071
Epoch 21/30  Loss=0.2833  Acc=0.6429
Epoch 22/30  Loss=0.2264  Acc=0.7500
Epoch 23/30  Loss=0.2252  Acc=0.7500
Epoch 24/30  Loss=0.2308  Acc=0.6786
Epoch 25/30  Loss=0.2525  Acc=0.6786
Epoch 26/30  Loss=0.2430  Acc=0.6786
Epoch 27/30  Loss=0.2928  Acc=0.4643
Epoch 28/3

[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


Epoch 29/30  Loss=0.2576  Acc=0.7143
Epoch 30/30  Loss=0.2230  Acc=0.7500


0,1
accuracy,▃▅▁▃▃▄▄▂▃▆▅▂▅▅▆▇▄▅▅▆▆██▇▇▇▄▇▇█
batch_size,▁
epochs,▁
l2_coeff,▁
learning_rate,▁
loss,█▇▇▇▇▇▆▇▆▅▆▆▅▅▅▂▄▅▅▅▅▁▁▁▃▂▅▂▃▁
n_hidden_units,▁
num_hidden_layers,▁

0,1
accuracy,0.75
activation,tanh
batch_size,64
epochs,30
l2_coeff,0.00041
learning_rate,0.69818
loss,0.22303
loss_fn,mse
n_hidden_units,98
num_hidden_layers,2


[34m[1mwandb[0m: Agent Starting Run: 6xqkl2u1 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	l2_coeff: 4.4390556586005326e-05
[34m[1mwandb[0m: 	learning_rate: 0.0026995599295566663
[34m[1mwandb[0m: 	loss_fn: mse
[34m[1mwandb[0m: 	n_hidden_units: 99
[34m[1mwandb[0m: 	num_hidden_layers: 1
[34m[1mwandb[0m: 	optimizer: sgd


Epoch 1/30  Loss=0.4511  Acc=0.3214
Epoch 2/30  Loss=0.4504  Acc=0.3214
Epoch 3/30  Loss=0.4465  Acc=0.1786
Epoch 4/30  Loss=0.4452  Acc=0.3571
Epoch 5/30  Loss=0.4463  Acc=0.3571
Epoch 6/30  Loss=0.4438  Acc=0.3214
Epoch 7/30  Loss=0.4440  Acc=0.2500
Epoch 8/30  Loss=0.4416  Acc=0.3929
Epoch 9/30  Loss=0.4478  Acc=0.0357
Epoch 10/30  Loss=0.4418  Acc=0.3571
Epoch 11/30  Loss=0.4441  Acc=0.1786
Epoch 12/30  Loss=0.4416  Acc=0.3214
Epoch 13/30  Loss=0.4430  Acc=0.1429
Epoch 14/30  Loss=0.4392  Acc=0.2143
Epoch 15/30  Loss=0.4366  Acc=0.3571
Epoch 16/30  Loss=0.4375  Acc=0.3214
Epoch 17/30  Loss=0.4372  Acc=0.3929
Epoch 18/30  Loss=0.4327  Acc=0.4286
Epoch 19/30  Loss=0.4313  Acc=0.6071
Epoch 20/30  Loss=0.4293  Acc=0.4286
Epoch 21/30  Loss=0.4316  Acc=0.3571
Epoch 22/30  Loss=0.4360  Acc=0.2143
Epoch 23/30  Loss=0.4361  Acc=0.1786
Epoch 24/30  Loss=0.4294  Acc=0.3214
Epoch 25/30  Loss=0.4271  Acc=0.3571
Epoch 26/30  Loss=0.4292  Acc=0.3571
Epoch 27/30  Loss=0.4297  Acc=0.2143
Epoch 28/3

[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


Epoch 29/30  Loss=0.4271  Acc=0.2857
Epoch 30/30  Loss=0.4242  Acc=0.3929


0,1
accuracy,▄▄▃▅▅▄▄▅▁▅▃▄▂▃▅▄▅▆█▆▅▃▃▄▅▅▃▅▄▅
batch_size,▁
epochs,▁
l2_coeff,▁
learning_rate,▁
loss,██▇▆▇▆▆▆▇▆▆▆▆▅▄▄▄▃▃▂▃▄▄▂▂▂▂▂▂▁
n_hidden_units,▁
num_hidden_layers,▁

0,1
accuracy,0.39286
activation,relu
batch_size,64
epochs,30
l2_coeff,4e-05
learning_rate,0.0027
loss,0.42415
loss_fn,mse
n_hidden_units,99
num_hidden_layers,1


# Section 2: Hyperparameter study using grid search, random search and bayesian optimization

# Section 3: Validation of model