In [53]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn as sk
from sklearn.model_selection import train_test_split

In [54]:
WANDB_PROJECT = "myprojectname"
WANDB_ENTITY = "myname"
DATASET = "fashion_mnist"
EPOCHS = 10
BATCH_SIZE = 128
LOSS = "cross_entropy"
OPTIMIZER = "sgd"
LEARNING_RATE = 0.01
MOMENTUM = 0.5
BETA = 0.5
BETA1 = 0.5
BETA2 = 0.5
EPSILON = 1e-6
WEIGHT_DECAY = 0.0
WEIGHT_INIT = "random"
NUM_LAYERS = 4
HIDDEN_SIZE = 128
ACTIVATION = "sigmoid"

In [55]:
from keras.datasets import fashion_mnist

# Load the data
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

# Normalize the data
x_train = x_train / 255.0
x_test = x_test / 255.0

In [56]:
class FFNeuralNetwork():
    def __init__(self, 
                neurons=HIDDEN_SIZE, 
                hid_layers=NUM_LAYERS, 
                input_size=784, 
                output_size=10, 
                act_func=ACTIVATION, 
                weight_init=WEIGHT_INIT, 
                out_act_func="softmax",
                init_toggle=True):
                
        self.neurons, self.hidden_layers = neurons, hid_layers
        self.weights, self.biases = [], []
        self.input_size, self.output_size = input_size, output_size
        self.activation_function, self.weight_init = act_func, weight_init
        self.output_activation_function = out_act_func

        if init_toggle:
            self.initialize_weights()
            self.initiate_biases()

    def initialize_weights(self):
        self.weights.append(np.random.randn(self.input_size, self.neurons))
        for _ in range(self.hidden_layers - 1):
            self.weights.append(np.random.randn(self.neurons, self.neurons))
        self.weights.append(np.random.randn(self.neurons, self.output_size))

        if self.weight_init == "xavier":
            for i in range(len(self.weights)):
                self.weights[i] = self.weights[i] / np.sqrt(self.weights[i].shape[0])

    def initiate_biases(self):
        for _ in range(self.hidden_layers):
            self.biases.append(np.random.randn(self.neurons))
        self.biases.append(np.random.randn(self.output_size))
    
    def activation(self, x):
        if self.activation_function == "sigmoid":
            return 1 / (1 + np.exp(-x))
        elif self.activation_function == "tanh":
            return np.tanh(x)
        elif self.activation_function == "ReLU":
            return np.maximum(0, x)
        else:
            raise Exception("Invalid activation function")
    
    def output_activation(self, x):
        if self.output_activation_function == "softmax":
            max_x = np.max(x, axis=1)
            max_x = max_x.reshape(max_x.shape[0], 1)
            exp_x = np.exp(x - max_x)
            softmax_mat = exp_x / np.sum(exp_x, axis=1).reshape(exp_x.shape[0], 1)
            # change 0s to 1e-10
            softmax_mat[softmax_mat == 0] = 1e-10
            return softmax_mat
        else:
            raise Exception("Invalid output activation function")
    
    def forward(self, x):
        self.pre_activation, self.post_activation = [x], [x]

        for i in range(self.hidden_layers):
            self.pre_activation.append(np.matmul(self.post_activation[-1], self.weights[i]) + self.biases[i])
            self.post_activation.append(self.activation(self.pre_activation[-1]))
            
        self.pre_activation.append(np.matmul(self.post_activation[-1], self.weights[-1]) + self.biases[-1])
        self.post_activation.append(self.output_activation(self.pre_activation[-1]))

        return self.post_activation[-1]

In [57]:
def loss(loss, y, y_pred):
    if loss == "cross_entropy":
        return -np.sum(y * np.log(y_pred))
    elif loss == "mean_squared":
        return np.sum((y - y_pred) ** 2) / 2
    else:
        raise Exception("Invalid loss function")

In [58]:
class Backpropagation():
    def __init__(self, 
                 nn: FFNeuralNetwork, 
                 loss=LOSS, 
                 act_func=ACTIVATION):
        
        self.nn, self.loss, self.activation_function = nn, loss, act_func
    
    def loss_derivative(self, y, y_pred):
        if self.loss == "cross_entropy":
            return -y / y_pred
        elif self.loss == "mse":
            return 2 * (y_pred - y)
        else:
            raise Exception("Invalid loss function")
        
    def activation_derivative(self, x):
        # x is the post-activation value
        if self.activation_function == "sigmoid":
            return x * (1 - x)
        elif self.activation_function == "tanh":
            return 1 - x ** 2
        elif self.activation_function == "ReLU":
            return 1 * (x > 0)
        else:
            raise Exception("Invalid activation function")
        
    def output_activation_derivative(self, y, y_pred):
        # x is the post-activation value
        if self.nn.output_activation_function == "softmax":
            return y_pred - y
        else:
            raise Exception("Invalid output activation function")
    
    def backward(self, y, y_pred):
        self.d_weights, self.d_biases = [], []
        self.d_h, self.d_a = [], []

        self.d_h.append(self.loss_derivative(y, y_pred))
        self.d_a.append(self.output_activation_derivative(y, y_pred))

        for i in range(self.nn.hidden_layers, 0, -1):
            self.d_weights.append(np.matmul(self.nn.post_activation[i].T, self.d_a[-1]))
            self.d_biases.append(np.sum(self.d_a[-1], axis=0))
            self.d_h.append(np.matmul(self.d_a[-1], self.nn.weights[i].T))
            self.d_a.append(self.d_h[-1] * self.activation_derivative(self.nn.post_activation[i]))

        self.d_weights.append(np.matmul(self.nn.post_activation[0].T, self.d_a[-1]))
        self.d_biases.append(np.sum(self.d_a[-1], axis=0))

        self.d_weights.reverse()
        self.d_biases.reverse()

        return self.d_weights, self.d_biases

In [59]:
class Optimiser():
    def __init__(self, 
                 nn: FFNeuralNetwork, 
                 bp:Backpropagation, 
                 lr=LEARNING_RATE, 
                 optimiser=OPTIMIZER, 
                 momentum=MOMENTUM,
                 epsilon=EPSILON,
                 beta1=BETA1,
                 beta2=BETA2, 
                 t=0):
        
        self.nn, self.bp, self.lr, self.optimiser = nn, bp, lr, optimiser
        self.momentum, self.epsilon, self.beta1, self.beta2 = momentum, epsilon, beta1, beta2
        self.h_weights = [np.zeros_like(w) for w in self.nn.weights]
        self.h_biases = [np.zeros_like(b) for b in self.nn.biases]
        self.hm_weights = [np.zeros_like(w) for w in self.nn.weights]
        self.hm_biases = [np.zeros_like(b) for b in self.nn.biases]
        self.t = t

    def run(self, d_weights, d_biases, y, x):
        if(self.optimiser == "sgd"):
            self.SGD(d_weights, d_biases)
        elif(self.optimiser == "momentum"):
            self.MomentumGD(d_weights, d_biases)
        elif(self.optimiser == "nag"):
            self.NAG(d_weights, d_biases)
        elif (self.optimiser == "nag2"):
            self.NAG2(y, x)
        elif(self.optimiser == "rmsprop"):
            self.RMSProp(d_weights, d_biases)
        elif(self.optimiser == "adam"):
            self.Adam(d_weights, d_biases)
        elif (self.optimiser == "nadam"):
            self.NAdam(d_weights, d_biases)
        else:
            raise Exception("Invalid optimiser")
    
    def SGD(self, d_weights, d_biases):
        for i in range(self.nn.hidden_layers + 1):
            self.nn.weights[i] -= self.lr * d_weights[i]
            self.nn.biases[i] -= self.lr * d_biases[i]

    def MomentumGD(self, d_weights, d_biases):
        for i in range(self.nn.hidden_layers + 1):
            self.h_weights[i] = self.momentum * self.h_weights[i] + d_weights[i]
            self.h_biases[i] = self.momentum * self.h_biases[i] + d_biases[i]

            self.nn.weights[i] -= self.h_weights[i] * self.lr
            self.nn.biases[i] -= self.h_biases[i] * self.lr

    def NAG(self, d_weights, d_biases):        
        for i in range(self.nn.hidden_layers + 1):
            self.h_weights[i] = self.momentum * self.h_weights[i] + d_weights[i]
            self.h_biases[i] = self.momentum * self.h_biases[i] + d_biases[i]

            self.nn.weights[i] -= self.lr * (self.momentum * self.h_weights[i] + d_weights[i])
            self.nn.biases[i] -= self.lr * (self.momentum * self.h_biases[i] + d_biases[i])

    def NAG2(self, y, x):
        nn_new = FFNeuralNetwork(neurons = self.nn.neurons,
                                 input_size = self.nn.input_size,
                                 output_size = self.nn.output_size,
                                 hid_layers = self.nn.hidden_layers,
                                 act_func = self.nn.activation_function,
                                 out_act_func = self.nn.output_activation_function,
                                 weight_init = self.nn.weight_init,
                                 init_toggle = False)
        
        bp_new = Backpropagation(nn = nn_new, 
                                 loss = self.bp.loss,
                                 act_func = self.bp.activation_function)
        
        nn_new.weights = [w - self.momentum * self.h_weights[i] for i, w in enumerate(self.nn.weights)]
        nn_new.biases = [b - self.momentum * self.h_biases[i] for i, b in enumerate(self.nn.biases)]

        y_pred_new = nn_new.forward(x)
        d_weights_new, d_biases_new = bp_new.backward(y, y_pred_new)

        for i in range(self.nn.hidden_layers + 1):
            self.h_weights[i] = self.momentum * self.h_weights[i] + d_weights_new[i]
            self.h_biases[i] = self.momentum * self.h_biases[i] + d_biases_new[i]

            self.nn.weights[i] -= self.h_weights[i] * self.lr
            self.nn.biases[i] -= self.h_biases[i] * self.lr

    def RMSProp(self, d_weights, d_biases):
        for i in range(self.nn.hidden_layers + 1):
            self.h_weights[i] = self.momentum * self.h_weights[i] + (1 - self.momentum) * d_weights[i]**2
            self.h_biases[i] = self.momentum * self.h_biases[i] + (1 - self.momentum) * d_biases[i]**2

            self.nn.weights[i] -= (self.lr / (np.sqrt(self.h_weights[i]) + self.epsilon)) * d_weights[i]
            self.nn.biases[i] -= (self.lr / (np.sqrt(self.h_biases[i]) + self.epsilon)) * d_biases[i]

    def Adam(self, d_weights, d_biases):
        for i in range(self.nn.hidden_layers + 1):
            self.hm_weights[i] = self.beta1 * self.hm_weights[i] + (1 - self.beta1) * d_weights[i]
            self.hm_biases[i] = self.beta1 * self.hm_biases[i] + (1 - self.beta1) * d_biases[i]

            self.h_weights[i] = self.beta2 * self.h_weights[i] + (1 - self.beta2) * d_weights[i]**2
            self.h_biases[i] = self.beta2 * self.h_biases[i] + (1 - self.beta2) * d_biases[i]**2

            self.hm_weights_hat = self.hm_weights[i] / (1 - self.beta1**(self.t + 1))
            self.hm_biases_hat = self.hm_biases[i] / (1 - self.beta1**(self.t + 1))

            self.h_weights_hat = self.h_weights[i] / (1 - self.beta2**(self.t + 1))
            self.h_biases_hat = self.h_biases[i] / (1 - self.beta2**(self.t + 1))

            self.nn.weights[i] -= self.lr * (self.hm_weights_hat / ((np.sqrt(self.h_weights_hat)) + self.epsilon))
            self.nn.biases[i] -= self.lr * (self.hm_biases_hat / ((np.sqrt(self.h_biases_hat)) + self.epsilon))

    def NAdam(self, d_weights, d_biases):
        for i in range(self.nn.hidden_layers + 1):
            self.hm_weights[i] = self.beta1 * self.hm_weights[i] + (1 - self.beta1) * d_weights[i]
            self.hm_biases[i] = self.beta1 * self.hm_biases[i] + (1 - self.beta1) * d_biases[i]

            self.h_weights[i] = self.beta2 * self.h_weights[i] + (1 - self.beta2) * d_weights[i]**2
            self.h_biases[i] = self.beta2 * self.h_biases[i] + (1 - self.beta2) * d_biases[i]**2

            self.hm_weights_hat = self.hm_weights[i] / (1 - self.beta1 ** (self.t + 1))
            self.hm_biases_hat = self.hm_biases[i] / (1 - self.beta1 ** (self.t + 1))

            self.h_weights_hat = self.h_weights[i] / (1 - self.beta2 ** (self.t + 1))
            self.h_biases_hat = self.h_biases[i] / (1 - self.beta2 ** (self.t + 1))

            temp_update_w = self.beta1 * self.hm_weights_hat + ((1 - self.beta1) / (1 - self.beta1 ** (self.t + 1))) * d_weights[i]
            temp_update_b = self.beta1 * self.hm_biases_hat + ((1 - self.beta1) / (1 - self.beta1 ** (self.t + 1))) * d_biases[i]

            self.nn.weights[i] -= self.lr * (temp_update_w / ((np.sqrt(self.h_weights_hat)) + self.epsilon))
            self.nn.biases[i] -= self.lr * (temp_update_b / ((np.sqrt(self.h_biases_hat)) + self.epsilon))

In [60]:
import wandb
wandb.login()
wandb.WANDB_NOTEBOOK_NAME = "Assignment1.ipynb"



In [61]:
sweep_configuration = {
    'method': 'random',
    'name': 'sweep',
    'metric': {
        'goal': 'maximize',
        'name': 'val_accuracy'
    },
    'parameters': {
        'batch_size': {
            'values': [16, 32, 64, 128, 256]
        },
        'learning_rate': {
            'values': [0.0001, 0.002, 0.005, 0.001, 0.005, 0.01, 0.05, 0.1]
        },
        'neurons': {
            'values': [32, 64, 128, 256, 512, 1024]
        },
        'hidden_layers': {
            'values': [1, 2, 3, 4, 5]
        },
        'activation': {
            'values': ['ReLU', 'tanh', 'sigmoid']
        },
        'weight_init': {
            'values': ['Xavier', 'random']
        },
        'optimiser': {
            'values': ['sgd', 'momentum', 'nag', 'rmsprop', 'adam', 'nadam']
        },
        'momentum': {
            'values': [0.4, 0.5, 0.6, 0.7]
        },
        'input_size': {
            'value': 784
        },
        'output_size': {
            'value': 10
        },
        'loss': {
            'value': 'cross_entropy'
        },
        'epochs': {
            'value': 10
        },
        'beta1': {
            'value': 0.9
        },
        'beta2': {
            'value': 0.999
        },
        'output_activation': {
            'value': 'softmax'
        },
        'epsilon': {
            'value': 1e-8
        }
    }
}

Exception in thread SystemMonitor:
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/home/bersi/.local/lib/python3.10/site-packages/wandb/sdk/internal/system/system_monitor.py", line 118, in _start
    asset.start()
  File "/home/bersi/.local/lib/python3.10/site-packages/wandb/sdk/internal/system/assets/cpu.py", line 166, in start
    self.metrics_monitor.start()
  File "/home/bersi/.local/lib/python3.10/site-packages/wandb/sdk/internal/system/assets/interfaces.py", line 168, in start
    logger.info(f"Started {self._process.name}")
AttributeError: 'NoneType' object has no attribute 'name'


In [62]:
def load_data(type):
    (x, y), (x_test, y_test) = fashion_mnist.load_data()

    if type == 'train':
        x = x.reshape(x.shape[0], 784)
        x = x.astype('float32')
        x /= 255
        # change y to one hot
        y = np.eye(10)[y]
        return x, y
    elif type == 'test':
        x_test = x_test.reshape(x_test.shape[0], 784)
        x_test = x_test.astype('float32')
        x_test /= 255
        # change y to one hot
        y_test = np.eye(10)[y_test]
        return x_test, y_test

In [63]:
def train():
    x_train, y_train = load_data('train')

    run = wandb.init(project="sweep-hyperparameters - 1")
    parameters = wandb.config
    run.name = f"{parameters['activation']}_neurons={parameters['neurons']}_layers={parameters['hidden_layers']}_lr={parameters['learning_rate']}_batch={parameters['batch_size']}_opt={parameters['optimiser']}_mom={parameters['momentum']}_init={parameters['weight_init']}"
    
    nn = FFNeuralNetwork(input_size=parameters['input_size'], 
                         hid_layers=parameters['hidden_layers'], 
                         neurons=parameters['neurons'], 
                         output_size=parameters['output_size'], 
                         act_func=parameters['activation'], 
                         out_act_func=parameters['output_activation'],
                         weight_init=parameters['weight_init'])
    bp = Backpropagation(nn=nn, 
                         loss=parameters['loss'],
                         act_func=parameters['activation'])
    opt = Optimiser(nn=nn,
                    bp=bp,
                    lr=parameters['learning_rate'],
                    optimiser=parameters['optimiser'],
                    momentum=parameters['momentum'],
                    epsilon=parameters['epsilon'],
                    beta1=parameters['beta1'],
                    beta2=parameters['beta2'])
    
    batch_size = parameters['batch_size']
    x_train_act, x_val, y_train_act, y_val = train_test_split(x_train, y_train, test_size=0.1, random_state=42)

    print("Initial Accuracy: {}".format(np.sum(np.argmax(nn.forward(x_train), axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0]))

    for epoch in range(parameters['epochs']):
        for i in range(0, x_train_act.shape[0], batch_size):
            x_batch = x_train_act[i:i+batch_size]
            y_batch = y_train_act[i:i+batch_size]

            y_pred = nn.forward(x_batch)
            d_weights, d_biases = bp.backward(y_batch, y_pred)
            opt.run(d_weights, d_biases, y_batch, x_batch)
        
        opt.t += 1

        y_pred = nn.forward(x_train_act)
        print("Epoch: {}, Loss: {}".format(epoch + 1, loss("cross_entropy", y_train_act, y_pred)))
        print("Accuracy: {}".format(np.sum(np.argmax(y_pred, axis=1) == np.argmax(y_train_act, axis=1)) / y_train_act.shape[0]))

        train_loss = loss("cross_entropy", y_train_act, y_pred)
        train_accuracy = np.sum(np.argmax(y_pred, axis=1) == np.argmax(y_train_act, axis=1)) / y_train_act.shape[0]
        val_loss = loss("cross_entropy", y_val, nn.forward(x_val))
        val_accuracy = np.sum(np.argmax(nn.forward(x_val), axis=1) == np.argmax(y_val, axis=1)) / y_val.shape[0]

        wandb.log({
            "epoch": epoch + 1,
            "train_loss": train_loss,
            "train_accuracy": train_accuracy,
            "val_loss": val_loss,
            "val_accuracy": val_accuracy
        })
    
    return nn

In [52]:
wandb_id = wandb.sweep(sweep_configuration, project="sweep-hyperparameters - 1")

wandb.agent(wandb_id, function=train, count=500)

Create sweep with ID: o2wtxrd2
Sweep URL: https://wandb.ai/cs20b013-bersilin/sweep-hyperparameters%20-%201/sweeps/o2wtxrd2


[34m[1mwandb[0m: Agent Starting Run: k99z1qpn with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	loss: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.7
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimiser: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_init: Xavier
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Initial Accuracy: 0.10838333333333333
Epoch: 1, Loss: 1006323.7629503986
Accuracy: 0.10018518518518518
Epoch: 2, Loss: 1397315.0346036996
Accuracy: 0.10094444444444445
Epoch: 3, Loss: 899062.7880095493
Accuracy: 0.0997962962962963
Epoch: 4, Loss: 754801.1308414192
Accuracy: 0.09961111111111111
Epoch: 5, Loss: 1030688.5053380391
Accuracy: 0.09985185185185186
Epoch: 6, Loss: 735432.755959667
Accuracy: 0.09985185185185186
Epoch: 7, Loss: 1316290.0463870189
Accuracy: 0.10092592592592593
Epoch: 8, Loss: 1061547.190921836
Accuracy: 0.09985185185185186
Epoch: 9, Loss: 366086.20413939696
Accuracy: 0.09985185185185186
Epoch: 10, Loss: 1138093.3718720556
Accuracy: 0.10092592592592593


VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▄█▂▁▂▂█▂▂█
train_loss,▅█▅▄▆▄▇▆▁▆
val_accuracy,▅▁▇█▇▇▁▇▇▁
val_loss,▅█▅▄▆▃▇▆▁▆

0,1
epoch,10.0
train_accuracy,0.10093
train_loss,1138093.37187
val_accuracy,0.09167
val_loss,128049.82326


[34m[1mwandb[0m: Agent Starting Run: 5lm8sp93 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 1
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	loss: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.6
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimiser: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_init: Xavier
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Initial Accuracy: 0.07111666666666666
Epoch: 1, Loss: 125589.53715445801
Accuracy: 0.09987037037037037
Epoch: 2, Loss: 125588.34078895462
Accuracy: 0.09987037037037037
Epoch: 3, Loss: 125586.63007743994
Accuracy: 0.09988888888888889
Epoch: 4, Loss: 125586.33269203953
Accuracy: 0.09988888888888889
Epoch: 5, Loss: 125586.1734728788
Accuracy: 0.09988888888888889
Epoch: 6, Loss: 125586.11439394896
Accuracy: 0.09988888888888889
Epoch: 7, Loss: 125586.08524224398
Accuracy: 0.09988888888888889
Epoch: 8, Loss: 125586.06824682931
Accuracy: 0.09988888888888889
Epoch: 9, Loss: 125586.05724539822
Accuracy: 0.09988888888888889
Epoch: 10, Loss: 125586.049599206
Accuracy: 0.09988888888888889


VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▁████████
train_loss,█▆▂▂▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
train_accuracy,0.09989
train_loss,125586.0496
val_accuracy,0.10133
val_loss,13968.14871


[34m[1mwandb[0m: Agent Starting Run: 430giatj with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 2
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	loss: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.7
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	optimiser: sgd
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_init: random
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Initial Accuracy: 0.13125


  return -y / y_pred


Epoch: 1, Loss: 124415.12892803209
Accuracy: 0.09974074074074074
Epoch: 2, Loss: 124382.33622814258
Accuracy: 0.09981481481481481
Epoch: 3, Loss: 124358.82691691899
Accuracy: 0.09981481481481481
Epoch: 4, Loss: 124356.27393382888
Accuracy: 0.09981481481481481
Epoch: 5, Loss: 124354.81828409624
Accuracy: 0.09977777777777778
Epoch: 6, Loss: 124353.23786229374
Accuracy: 0.09985185185185186
Epoch: 7, Loss: 124350.68316994797
Accuracy: 0.0999074074074074
Epoch: 8, Loss: 124345.36735745463
Accuracy: 0.10001851851851852
Epoch: 9, Loss: 124338.25060931593
Accuracy: 0.1000925925925926
Epoch: 10, Loss: 124306.67257733662
Accuracy: 0.1005


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▂▂▂▁▂▃▄▄█
train_loss,█▆▄▄▄▄▄▃▃▁
val_accuracy,▁▁▁▂▂▂▂▂▄█
val_loss,████▇▇▇▇▆▁

0,1
epoch,10.0
train_accuracy,0.1005
train_loss,124306.67258
val_accuracy,0.10417
val_loss,13808.07979


[34m[1mwandb[0m: Agent Starting Run: zxdh7cya with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.5
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	optimiser: momentum
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_init: random
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Initial Accuracy: 0.1
Epoch: 1, Loss: 104684.14065574046
Accuracy: 0.2970740740740741
Epoch: 2, Loss: 85606.9136049866
Accuracy: 0.4370925925925926
Epoch: 3, Loss: 73893.4463091492
Accuracy: 0.5090370370370371
Epoch: 4, Loss: 66490.59481916556
Accuracy: 0.5554444444444444
Epoch: 5, Loss: 61390.108118303884
Accuracy: 0.5865
Epoch: 6, Loss: 57630.3382870175
Accuracy: 0.6100185185185185
Epoch: 7, Loss: 54660.29018951484
Accuracy: 0.6286481481481482
Epoch: 8, Loss: 52254.21177883548
Accuracy: 0.6447222222222222
Epoch: 9, Loss: 50282.482372996994
Accuracy: 0.6574444444444445
Epoch: 10, Loss: 48635.271004288436
Accuracy: 0.6691851851851852


VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▆▆▇▇███
train_loss,█▆▄▃▃▂▂▁▁▁
val_accuracy,▁▄▅▆▇▇▇███
val_loss,█▆▄▃▃▂▂▁▁▁

0,1
epoch,10.0
train_accuracy,0.66919
train_loss,48635.271
val_accuracy,0.661
val_loss,5481.76388


[34m[1mwandb[0m: Agent Starting Run: el0iz3cf with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 1
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	loss: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.6
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimiser: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_init: Xavier
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Initial Accuracy: 0.15551666666666666
Epoch: 1, Loss: 118633.11499990687
Accuracy: 0.15012962962962964
Epoch: 2, Loss: 119462.69803993183
Accuracy: 0.14196296296296296
Epoch: 3, Loss: 115655.48374838688
Accuracy: 0.16546296296296295
Epoch: 4, Loss: 121001.49547488557
Accuracy: 0.1308888888888889
Epoch: 5, Loss: 119177.44682337515
Accuracy: 0.14512962962962964
Epoch: 6, Loss: 121772.31395124517
Accuracy: 0.12816666666666668
Epoch: 7, Loss: 120278.40958583285
Accuracy: 0.13635185185185186
Epoch: 8, Loss: 120565.19947159119
Accuracy: 0.1332962962962963
Epoch: 9, Loss: 118674.66617227762
Accuracy: 0.14716666666666667
Epoch: 10, Loss: 116294.34493635216
Accuracy: 0.16092592592592592


VBox(children=(Label(value='0.003 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.828335…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▅▄█▂▄▁▃▂▅▇
train_loss,▄▅▁▇▅█▆▇▄▂
val_accuracy,▅▃█▁▄▁▂▂▄▇
val_loss,▄▆▁█▅█▆▇▄▂

0,1
epoch,10.0
train_accuracy,0.16093
train_loss,116294.34494
val_accuracy,0.16317
val_loss,13027.56673


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: t5rc2civ with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	loss: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.7
[34m[1mwandb[0m: 	neurons: 256
[34m[1mwandb[0m: 	optimiser: nag
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_init: Xavier
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Initial Accuracy: 0.09385
Epoch: 1, Loss: 39883.77521918508
Accuracy: 0.7444814814814815
Epoch: 2, Loss: 45271.68879927592
Accuracy: 0.7533518518518518
Epoch: 3, Loss: 35518.96636547117
Accuracy: 0.7914074074074074
Epoch: 4, Loss: 31376.805529203288
Accuracy: 0.8218148148148148
Epoch: 5, Loss: 34342.54931972489
Accuracy: 0.8211481481481482
Epoch: 6, Loss: 28463.955658266772
Accuracy: 0.8390555555555556
Epoch: 7, Loss: 25296.362544049098
Accuracy: 0.8491111111111111
Epoch: 8, Loss: 28847.33168436358
Accuracy: 0.8402037037037037
Epoch: 9, Loss: 25743.495307781508
Accuracy: 0.8540185185185185
Epoch: 10, Loss: 25089.275484539845
Accuracy: 0.8541666666666666


VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▂▄▆▆▇█▇██
train_loss,▆█▅▃▄▂▁▂▁▁
val_accuracy,▁▁▅▆▆▇▇▇██
val_loss,▆█▄▃▄▂▁▂▁▁

0,1
epoch,10.0
train_accuracy,0.85417
train_loss,25089.27548
val_accuracy,0.84117
val_loss,3132.3043


[34m[1mwandb[0m: Agent Starting Run: tymxfhbb with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.4
[34m[1mwandb[0m: 	neurons: 256
[34m[1mwandb[0m: 	optimiser: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_init: Xavier
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Initial Accuracy: 0.08375
Epoch: 1, Loss: 39120.19992129665
Accuracy: 0.7587222222222222
Epoch: 2, Loss: 33833.27837790008
Accuracy: 0.7861296296296296
Epoch: 3, Loss: 31233.482778888945
Accuracy: 0.7996481481481481
Epoch: 4, Loss: 29463.985385927775
Accuracy: 0.8088888888888889
Epoch: 5, Loss: 28097.131064159028
Accuracy: 0.8171666666666667
Epoch: 6, Loss: 26962.37299443844
Accuracy: 0.8242962962962963
Epoch: 7, Loss: 25987.49191890988
Accuracy: 0.8308148148148148
Epoch: 8, Loss: 25129.28685476584
Accuracy: 0.8363148148148148
Epoch: 9, Loss: 24354.262588866826
Accuracy: 0.8414444444444444
Epoch: 10, Loss: 23645.67462628403
Accuracy: 0.8453888888888889


VBox(children=(Label(value='0.003 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.834003…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▅▆▆▇▇██
train_loss,█▆▄▄▃▃▂▂▁▁
val_accuracy,▁▄▅▆▆▇▇███
val_loss,█▅▄▃▃▂▂▁▁▁

0,1
epoch,10.0
train_accuracy,0.84539
train_loss,23645.67463
val_accuracy,0.79683
val_loss,3507.78884


[34m[1mwandb[0m: Agent Starting Run: qqh179o8 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	loss: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.5
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	optimiser: adam
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_init: random
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Initial Accuracy: 0.06463333333333333
Epoch: 1, Loss: 47640.824303726484
Accuracy: 0.6670555555555555
Epoch: 2, Loss: 40440.758432958646
Accuracy: 0.7194259259259259
Epoch: 3, Loss: 36885.921870632425
Accuracy: 0.7539074074074074
Epoch: 4, Loss: 34985.53262583888
Accuracy: 0.7658888888888888
Epoch: 5, Loss: 32455.60166690682
Accuracy: 0.7867037037037037
Epoch: 6, Loss: 31294.815173617983
Accuracy: 0.7915555555555556
Epoch: 7, Loss: 29485.178326707763
Accuracy: 0.808962962962963
Epoch: 8, Loss: 29135.8851079936
Accuracy: 0.8094814814814815
Epoch: 9, Loss: 28845.17371057467
Accuracy: 0.8154814814814815
Epoch: 10, Loss: 27688.13338324335
Accuracy: 0.8202777777777778


VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▅▆▆▇▇███
train_loss,█▅▄▄▃▂▂▂▁▁
val_accuracy,▁▃▅▆▇▇▇▇██
val_loss,█▆▄▃▃▂▂▁▁▁

0,1
epoch,10.0
train_accuracy,0.82028
train_loss,27688.13338
val_accuracy,0.80383
val_loss,3243.88723


[34m[1mwandb[0m: Agent Starting Run: thal4wtm with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beta1: 0.9
[34m[1mwandb[0m: 	beta2: 0.999
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	epsilon: 1e-08
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	input_size: 784
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	loss: cross_entropy
[34m[1mwandb[0m: 	momentum: 0.6
[34m[1mwandb[0m: 	neurons: 1024
[34m[1mwandb[0m: 	optimiser: rmsprop
[34m[1mwandb[0m: 	output_activation: softmax
[34m[1mwandb[0m: 	output_size: 10
[34m[1mwandb[0m: 	weight_init: Xavier
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Initial Accuracy: 0.12926666666666667
Epoch: 1, Loss: 3869309.682079762
Accuracy: 0.09985185185185186
Epoch: 2, Loss: 3372100.557202969
Accuracy: 0.09985185185185186
Epoch: 3, Loss: 4415781.467917189
Accuracy: 0.10018518518518518
Epoch: 4, Loss: 5114257.004295293
Accuracy: 0.0997962962962963
Epoch: 5, Loss: 3436993.3762061093
Accuracy: 0.10005555555555555
Epoch: 6, Loss: 3492008.405449928
Accuracy: 0.0997962962962963
Epoch: 7, Loss: 3066453.4460261664
Accuracy: 0.10024074074074074
Epoch: 8, Loss: 2461533.6617397605
Accuracy: 0.10018518518518518


[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


In [None]:
# x_train_reshape = x_train.reshape(x_train.shape[0], -1)

# y_train_reshape = np.zeros((y_train.shape[0], 10))
# y_train_reshape[np.arange(y_train.shape[0]), y_train] = 1 # one-hot encoding

parameters = {
    'input_size': 784,
    'hidden_layers': 4,
    'neurons': 128,
    'output_size': 10,
    'activation': 'tanh',
    'output_activation': 'softmax',
    'loss': 'cross_entropy',
    'learning_rate': 0.0015,
    'optimiser': 'sgd',
    'momentum': 0.5,
    'batch_size': 128,
    'epsilon': 1e-8,
    'beta1': 0.9,
    'beta2': 0.999,
    'epochs': 10,
    'weight_init': 'Xavier'
}

# nn = train(x_train_reshape, y_train_reshape, parameters)

In [None]:
set(parameters.keys())  , set(sweep_configuration['parameters'].keys())

In [None]:
x_test_reshape = x_test.reshape(x_test.shape[0], -1)

y_pred = nn.forward(x_test_reshape)
print("Test Accuracy: {}".format(np.sum(np.argmax(y_pred, axis=1) == y_test) / y_test.shape[0]))