In [162]:
import numpy as np
import random
from tqdm import tqdm
import matplotlib.pyplot as plt
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch
from sklearn.model_selection import KFold
import wandb
import os

random.seed(27)
np.random.seed(27)
plt.style.use('ggplot')

In [163]:
from NN.denseLayer import *
from NN.loss import *
from NN.optimizer import *
from NN.activations import *

## Dataset

FashionMNIST.

In [164]:
num_classes = 10

In [165]:
def load_fashion_mnist(flatten=True):
    transform = transforms.ToTensor()

    train_ds = datasets.FashionMNIST(
        root="data",
        train=True,
        download=True,
        transform=transform
    )

    test_ds = datasets.FashionMNIST(
        root="data",
        train=False,
        download=True,
        transform=transform
    )

    # Convert to numpy
    X_train = train_ds.data.numpy().astype(np.float32)
    y_train = train_ds.targets.numpy()
    X_test = test_ds.data.numpy().astype(np.float32)
    y_test = test_ds.targets.numpy()

    # Normalize
    X_train /= 255.0
    X_test /= 255.0

    # Flatten 
    if flatten:
        X_train = X_train.reshape(len(X_train), -1)
        X_test = X_test.reshape(len(X_test), -1)
    
    return X_train, y_train, X_test, y_test

In [166]:
def one_hot(y, num_classes):
    Y = np.zeros((len(y), num_classes))
    Y[np.arange(len(y)), y] = 1
    return Y

def accuracy(probs, Y_true):
    preds = np.argmax(probs, axis=0)
    labels = np.argmax(Y_true, axis=0)
    return np.mean(preds == labels)

In [167]:
X_train, y_train, X_test, y_test = load_fashion_mnist(flatten=True) 

In [168]:
Y_train = one_hot(y_train, num_classes).T  # (10, 60000)
Y_test  = one_hot(y_test, num_classes).T   # (10, 10000)

print("X_train:", X_train.shape, "Y_train:", Y_train.shape)
print("X_test: ", X_test.shape,  "Y_test: ",  Y_test.shape)

X_train: (60000, 784) Y_train: (10, 60000)
X_test:  (10000, 784) Y_test:  (10, 10000)


## Sweep

Helper functions:

In [169]:
def get_activation_instance(name):
    if name == "relu":
        return ReLU()
    elif name == "tanh":
        return Tanh()
    elif name == "sigmoid":
        return Sigmoid()

def get_optimizer(name, lr):
    if name == "sgd":
        return SGD(learning_rate=lr)
    elif name == "adam":
        return Adam(learning_rate=lr)

Sweep configuration:

In [170]:
random_sweep = {
    "method": "random",
    "metric": {
        "name": "val_accuracy",
        "goal": "maximize"
    },
    "parameters": {
        "learning_rate": {"distribution": "log_uniform_values", "min": 0.0001, "max": 0.01},
        "batch_size": {"values": [32, 64, 128]},
        "epochs": {"value": 30},
        "num_hidden_layers": {"distribution": "int_uniform", "min": 1, "max": 5},
        "n_hidden_units": {"distribution": "int_uniform", "min": 32, "max": 128},
        "l2_coeff": {"distribution": "uniform", "min": 0.0, "max": 0.001},
        "optimizer": {"values": ["sgd", "adam"]},
        "activation": {"values": ["relu", "tanh", "sigmoid"]},
        "num_classes": {"value": num_classes},
    },
}

In [171]:
bayes_sweep = {
    "method": "bayes",
    "metric": {"name": "mean_cv_accuracy", "goal": "maximize"},
    "parameters": {
        "learning_rate": {"distribution": "log_uniform_values", "min": 0.0001, "max": 0.01},
        "batch_size": {"values": [32, 64, 128]},
        "epochs": {"value": 30},
        "num_hidden_layers": {"distribution": "int_uniform", "min": 1, "max": 5},
        "n_hidden_units": {"distribution": "int_uniform", "min": 32, "max": 128},
        "l2_coeff": {"distribution": "uniform", "min": 0.0, "max": 0.001},
        "optimizer": {"values": ["sgd", "adam"]},
        "activation": {"values": ["relu", "tanh", "sigmoid"]},
        "num_classes": {"value": num_classes},
    },
}

In [172]:
random_sweep_id = wandb.sweep(random_sweep, project="numpy-nn-random-search-cv")

Create sweep with ID: 3nc3r6my
Sweep URL: https://wandb.ai/xanderbaatz-danmarks-tekniske-universitet-dtu/numpy-nn-random-search-cv/sweeps/3nc3r6my


In [173]:
bayesian_sweep_id = wandb.sweep(sweep=bayes_sweep, project="numpy-nn-bayes-cv")

Create sweep with ID: m688u22d
Sweep URL: https://wandb.ai/xanderbaatz-danmarks-tekniske-universitet-dtu/numpy-nn-bayes-cv/sweeps/m688u22d


## Train procedure

In [None]:
#os.environ['WANDB_API_KEY'] = ...

wandb.login()

True

In [None]:
def stack_layers(input_dim, cfg):
    layers = []
    prev = input_dim

    # For all the layers (0, l-1)
    for _ in range(int(cfg['num_hidden_layers'])):
        act = get_activation_instance(cfg['activation'])
        layers.append(DenseLayer(prev, int(cfg['n_hidden_units']), activation=act,
                                 initializer=None, l2_coeff=float(cfg.get('l2_coeff', 0.0)))) #We set initialiser to none, since we have activation functions as hyperparatmerters, and denselayer takes care of what initialiser to use
        prev = int(cfg['n_hidden_units'])

    # Final layer
    layers.append(DenseLayer(prev, int(cfg['num_classes']), activation=Softmax(),
                             initializer=None))
    return layers

#Get the accuracy metric
def accuracy_from_probs(A, Y_true_onehot):
    preds = A.argmax(axis=0)
    truths = Y_true_onehot.argmax(axis=0)
    return float((preds == truths).mean())

def train_on_split(X_train_fold, Y_train_fold, X_val_fold, Y_val_fold, cfg):
    layers = stack_layers(X_train_fold.shape[1], cfg)
    optimizer = get_optimizer(cfg['optimizer'], float(cfg['learning_rate']))
    loss_fn = CrossEntropyLoss()

    m = X_train_fold.shape[0]
    epochs = int(cfg["epochs"])
    batch_size = int(cfg["batch_size"])

    epoch_loss = 0.0
    num_batches = 0

    #For each epoch, we perform batching, do the usual forward, backwards measure
    #again this is summarised in the FFN-explainer notebook.
    for epoch in range(epochs):
        perm = np.random.permutation(m)

        Xs = X_train_fold[perm].T    
        Ys = Y_train_fold[:, perm]    

        for i in range(0, m, batch_size):
            Xb = Xs[:, i:i+batch_size]
            Yb = Ys[:, i:i+batch_size]
            
            A = Xb
            for layer in layers:
                A = layer.forward(A)

            loss = loss_fn.forward(A, Yb)

            epoch_loss += loss
            num_batches += 1

            dA = loss_fn.backward(A, Yb)

            for layer in reversed(layers):
                dA, dW, db = layer.backward(dA)
                optimizer.update(layer, dW, db)
        
    train_loss = epoch_loss / num_batches

    A_train = Xs
    for layer in layers:
        A_train = layer.forward(A_train)
    train_acc = accuracy_from_probs(A_train, Ys)

    A_val = X_val_fold.T
    for layer in layers:
        A_val = layer.forward(A_val)

    val_loss = loss_fn.forward(A_val, Y_val_fold)
    val_acc = accuracy_from_probs(A_val, Y_val_fold)

    wandb.log({
            "train_loss": float(train_loss),
            "train_accuracy": float(train_acc),
            "val_loss": float(val_loss),
            "val_accuracy": float(val_acc)
        })

    return val_acc, val_loss

def train_run():
    with wandb.init() as run:
        cfg = dict(run.config)
        run.log(dict(cfg))

        kf = KFold(n_splits=3, shuffle=True, random_state=27)
        fold_accuracies = []
        fold_val_losses = []

        #For N k-folds, train the split, and return the metrics
        for fold_idx, (train_idx, val_idx) in enumerate(kf.split(X_train)):

            X_train_fold = X_train[train_idx]
            Y_train_fold = Y_train[:, train_idx]

            X_val_fold = X_train[val_idx]
            Y_val_fold = Y_train[:, val_idx]

            val_acc = train_on_split(
                X_train_fold, Y_train_fold,
                X_val_fold, Y_val_fold,
                cfg
            )

            fold_accuracies.append(val_acc)
            run.log({f"fold_{fold_idx+1}_accuracy": float(val_acc)})

        #Now we find the generlization error
        mean_cv_acc = float(np.mean(fold_accuracies))
        cv_std = float(np.std(fold_accuracies))
        run.log({"mean_cv_accuracy": mean_cv_acc})
        run.log({"accuracy_cv_std": cv_std})
        run.log({"mean_cv_loss": mean_cv_loss})
        run.log()

        print(f"Run {run.id} \nCV Accuracies={fold_accuracies} \nMean={mean_cv_acc:.4f}")

        wandb.finish()

In [176]:
#wandb.init(
#    project="numpy-fashion-mnist",
#    config={
#        "learning_rate": 0.001,
#        "batch_size": 64,
#        "epochs": 30,
#        "num_hidden_layers": 2,
#        "n_hidden_units": 128,
#        "l2_coeff": 1e-4,
#        "optimizer": "adam",
#        "activation": "relu",
#        "num_classes": 10
#    }
#)
#
#train_run()

In [None]:
# Bayesian
wandb.agent(bayesian_sweep_id, function=train_run, count=30)

[34m[1mwandb[0m: Agent Starting Run: sa6my1pr with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	l2_coeff: 0.0007525049940501857
[34m[1mwandb[0m: 	learning_rate: 0.00011736155594737134
[34m[1mwandb[0m: 	n_hidden_units: 35
[34m[1mwandb[0m: 	num_classes: 10
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: adam


[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


Traceback (most recent call last):
  File "/tmp/ipykernel_490/1879819409.py", line 102, in train_run
    val_acc = train_on_split(
  File "/tmp/ipykernel_490/1879819409.py", line 76, in train_on_split
    wandb.log({
  File "/home/vscode/.local/lib/python3.10/site-packages/wandb/sdk/wandb_run.py", line 390, in wrapper
    return func(self, *args, **kwargs)
  File "/home/vscode/.local/lib/python3.10/site-packages/wandb/sdk/wandb_run.py", line 448, in wrapper_fn
    return func(self, *args, **kwargs)
  File "/home/vscode/.local/lib/python3.10/site-packages/wandb/sdk/wandb_run.py", line 435, in wrapper
    return func(self, *args, **kwargs)
  File "/home/vscode/.local/lib/python3.10/site-packages/wandb/sdk/wandb_run.py", line 2023, in log
    self._log(data=data, step=step, commit=commit)
  File "/home/vscode/.local/lib/python3.10/site-packages/wandb/sdk/wandb_run.py", line 1734, in _log
    self._partial_history_callback(data, step, commit)
  File "/home/vscode/.local/lib/python3.10/site