In [None]:
from keras.datasets import fashion_mnist
from matplotlib import pyplot as plt 
from sklearn.utils import shuffle
import numpy as np
import statistics
import math

In [None]:
#---------------------------------install abd import wandb -------------------------------------------------
%%capture
!pip install wandb -qqq
import wandb

In [None]:
#---------------------------------login to wandb -------------------------------
!wandb login

[34m[1mwandb[0m: Currently logged in as: [33manshikag_2210[0m (use `wandb login --relogin` to force relogin)


In [None]:
#-------------------------------------import file load data as ld to load data from the file -------------------------
import load_data as ld

#----------------  loading train_data , train_labels , validation_data , validation_labels , test_data , test_labels ---------------
(train_data , train_labels , validation_data , validation_labels , test_data , test_labels) = ld.load()

In [None]:
class FeedForwardNeuralNetwork():

###################################################### Constructor ################################################################

  def __init__(self, input_dim, output_dim, hidden_dim, hidden_layers, activation = "sigmoid", weight_intialisation = "random"):
    np.random.seed(1234)
    self.input_dim = input_dim
    self.hidden_dim = hidden_dim
    self.output_dim = output_dim
    self.hidden_layers = hidden_layers
    self.L = hidden_layers
    self.activation = activation
    self.weight_initialisation = weight_initialisation
    self.initialize()


##################################### Function for storing dimensions of all the layers ############################################

  def size(self):
    l_sizes = []
    l_sizes.append(self.input_dim)
    for m in range(self.hidden_layers):
      l_sizes.append(self.hidden_dim)
    l_sizes.append(self.output_dim)
    return l_sizes


######################################## Function for initialization ###########################################3

  def initialize(self):
    size = self.size()    

#-------------------------- random weight-initialization ---------------------------------

    if self.weight_initialisation == "random":
      self.Weights=[np.random.randn(size[i + 1], size[i]) for i in range(len(size) - 1)]   

#--------------------------- xavier weight-initialization --------------------------------

    if self.weight_initialisation == "xavier":
      self.Weights=[np.random.randn(size[i + 1], size[i])*np.sqrt(2/(size[i]+size[i+1])) for i in range(len(size) - 1)]

#--------------Initialize Biases, Activation, Preactivation, update weights and update biases ndarray  ----------------------

    self.Update_Weights=[np.zeros((size[i + 1], size[i])) for i in range(len(size) - 1)]
    self.Biases= [np.zeros((size[i + 1], 1)) for i in range(len(size) - 1)]
    self.Update_Biases= [np.zeros((size[i + 1], 1)) for i in range(len(size) - 1)]
    self.A = [np.zeros((size[i + 1], 1)) for i in range(len(size) - 1)]
    self.H = [np.zeros((size[i + 1], 1)) for i in range(len(size) - 1)]
  
 ######################################### Activation Function ####################################################################
  
  def activation_function(self, x, activation = "sigmoid"):
    if activation == "sigmoid":
      return np.where(x >= 0, 1 / (1 + np.exp(-x)),np.exp(x) / (1 + np.exp(x)))

    if activation == "tanh":
      return (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))

    if activation == "relu":
      # return np.where(x>=0,x,0)
      return np.where(x>=0,0.12*x,0)


    if activation == "lrelu":
      return np.where(x>0,x,0.2*x)
      # temp = np.where(x>0,x,0)
      # print("temp in relu : ",temp)
      # return np.where(x>0,x,0)


 ######################################### Function for finding derivative of activation  ######################################

  def diff_activation_function(self, x, activation = "sigmoid"):
    if activation == "sigmoid":
      fx = np.where(x >= 0, 1 / (1 + np.exp(-x)),np.exp(x) / (1 + np.exp(x)))
      return fx * (1 - fx)

    if activation == "tanh":
      fx = (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))
      return (1 - np.power((fx),2))

    if activation == "relu":
      return np.where(x>=0,0.12,0) 

    if activation == "lrelu":
      return np.where(x>=0,1,0.2)

    return


######################################### Prectivation Function ###################################################


  def preactivation(self, w, h_prev, b):
    return (np.dot(w,h_prev) + b)

##################################### Output Function using stable Softmax #####################################################


  def output_function(self, x): 
    z=x-x.max()
    num=np.exp(z)
    denom=np.sum(num,axis=0)
    return num/denom


######################################### function for Forward propogation  ###################################################


  def forwardPropagation(self, input_vector):
    activation = self.activation
    L = self.L   #total layers - input layer
    self.A[0] = self.Biases[0] + np.dot(self.Weights[0], input_vector)
    self.H[0] = self.activation_function(self.A[0], activation)

    for k in range(1,L):
      self.A[k] = np.array(self.preactivation(self.Weights[k], self.H[k-1], self.Biases[k]))
      self.H[k] = self.activation_function(self.A[k], activation)

    self.A[L] = np.array(self.preactivation(self.Weights[L], self.H[L-1], self.Biases[L]))
    self.H[L] = self.output_function(self.A[L])

#---------------------------------------------H[L] = y^---------------------------------------------------
    return self.H[L] 




######################################### function for back propogation  ###################################################


  def backPropagation(self, train_data,truelabel,prediction):
    L = self.L
    activation = self.activation
    e = np.zeros((10,1))
    e[truelabel] = 1
    grad_Weights = [0]*(L+1)
    grad_Biases = [0]*(L+1)
    # a = -(e - self.H[L])
    a = -(e - prediction)

    for k in range(L, -1, -1):
      if k==0:
        grad_Weights[k] = a.dot((train_data).T)
      else:
        grad_Weights[k] = a.dot((self.H[k-1]).T)

      grad_Biases[k] = a
      if k!=0:
        second = self.diff_activation_function(self.A[k-1], activation)
        first = (self.Weights[k].T).dot(a)
        a =  np.multiply(first,second)

    return (grad_Weights,grad_Biases)



######################################### converting label to corresponding one hot vector  ###################################################

  def oneHot(self, num, size = 10):
    vec = [0]*size
    # print("num :",num)
    vec[num] = 1
    vec = np.array(vec)
    vec = vec.reshape(len(vec),1)
    return vec




######################################### Calculate accuracy  of our model  ###################################################

  def calculate_accuracy(self, test_data, test_labels, limit):
    predictions = []
    count = 0
    for example in test_data[:limit]:
      predictions.append(self.forwardPropagation(example.reshape(784,1)))
    predicted_labels = list(map(np.argmax, predictions))
    for p,l in zip(predicted_labels, test_labels[:limit]):
      if p == l:
        count += 1
    return (count/limit)*100



######################################### Calculate cross entropy loss of our model  ###################################################

  def ce_loss(self, data, labels, limit):
    predictions = []
    count = 0
    for example in data[:limit]:
      predictions.append(self.forwardPropagation(example.reshape(784,1)))
    #-log(predictions[labels[i]])
    total_ce_loss = 0
    for i in range(limit):
      total_ce_loss += -math.log(predictions[i][labels[i]])
    average_ce_loss = total_ce_loss / limit
    return average_ce_loss



  def squared_error_loss(self, data, labels, limit):
    predictions = []
    count = 0
    for example in data[:limit]:
      predictions.append(self.forwardPropagation(example.reshape(784,1)))
    #
    total_sqe_loss = 0
    for i in range(limit):
      total_sqe_loss += sum(np.power(self.oneHot(labels[i])-predictions[i],2))
    average_sqe_loss = total_sqe_loss[0] / limit
    return average_sqe_loss




######################################### Training of our model  ###################################################

  def trainingAlgo(self, opt = 'adam', gamma = 0.9, eta = 1e-4, batch_size = 1, max_epochs = 1,alpha = 0.1, eps = 1e-6, beta = 0.9, limit = 500, vlimit= 500, tlimit = 500):


#------------ loading train data, train labels, validation data, validation labels , test data and test labels  -----------------
    (train_data , train_labels , validation_data , validation_labels , test_data , test_labels) = ld.load()


    N = train_data.shape[0]

    #-------------------------L is number of hidden layers in our model-------------------------------------------
    L = self.L    


    if opt=='adam' or 'nadam':
      m_w,m_b = [0]*(L+1),[0]*(L+1)
      step = 1


#---------------------------------run for loops for maximum number of epochs--------------------------------------------
    for i in range(max_epochs):

      print("\nEpoch : ", i+1)
      g_w, g_b = [0]*(L+1), [0]*(L+1)
      count = 0

      X_train,y_train = shuffle(train_data,train_labels,random_state=0)

      for x, y in zip(X_train[:limit], y_train[:limit]):

        predictions=self.forwardPropagation(x.reshape(784,1))
        (grad_Weights, grad_Biases) = self.backPropagation(x.reshape(784,1),y,predictions)

        for j in range(L+1):
          g_w[j] = g_w[j] + grad_Weights[j]
          g_b[j] = g_b[j] + grad_Biases[j]
        count = count + 1

        if count % batch_size == 0 or count == N :

#-----------------------------update weights and biases for momentum based stochastic gradient descent optimizer-----------------------------------------
          if opt=='momentum':
            for j in range(0,L+1):
              self.Update_Weights[j] = gamma * self.Update_Weights[j] + (eta * grad_Weights[j])
              self.Weights[j] =self.Weights[j]-self.Update_Weights[j]

              self.Update_Biases[j] = gamma * self.Update_Biases[j] + (eta * grad_Biases[j])
              self.Biases[j]  = self.Biases[j] - self.Update_Biases[j]



#-----------------------------update weights and biases for Stochastic gradient descent optimizer-----------------------------------------
          elif opt=='sgd':
            for j in range(L + 1):
              self.Weights[j]   = self.Weights[j]-(eta*grad_Weights[j])
              self.Biases[j]  = self.Biases[j]-(eta * grad_Biases[j])



#-----------------------------update weights and biases for nestrov optimizer-----------------------------------------
          elif opt=='nestrov':
            for j in range(L + 1):
              self.Update_Weights[j] = (gamma * self.Update_Weights[j]) + (eta * g_w[j])
              self.Weights[j]  = self.Weights[j] - ( gamma * prev_w[j] + eta * g_w[j] )

              self.Update_Biases[j] = (gamma *  self.Update_Biases[j]) + (eta * g_b[j])
              self.Biases[j]  = self.Biases[j]- ( gamma *  self.Update_Biases[j] + eta * g_b[j] )



#-----------------------------update weights and biases for RMSProp optimizer-----------------------------------------
          elif opt=='rmsprop':
            for j in range(L + 1):
              self.Update_Weights[j] = beta * self.Update_Weights[j] + (1 - beta) * g_w[j]**2
              self.Weights[j] = ( 1 - eta*alpha ) * self.Update_Weights[j] - eta /( np.sqrt(self.Update_Weights[j])+ eps) * grad_Weights[j]

              self.Update_Biases[j]= beta *  self.Update_Biases[j] + (1 - beta) * g_b[j]**2
              self.Biases[j]  = self.Biases[j] -(eta / (np.sqrt( self.Update_Biases[j]) +eps)) * grad_Biases[j]




#-----------------------------update weights and biases for Adam optimizer-----------------------------------------
          elif opt=='adam':
            # pass
            beta1 = 0.9
            beta2 = 0.999
            for j in range(L + 1):

              self.Update_Weights[j] = beta2 * self.Update_Weights[j] + (1 - beta2) * g_w[j]**2
              m_w[j]=beta1*m_w[j] +(1-beta1) * g_w[j]
              m_w_hat=m_w[j]/(1-math.pow(beta1,step))
              v_w_hat=self.Update_Weights[j]/(1-math.pow(beta2,step))
              self.Weights[j]=(1-eta*alpha/N)*self.Weights[j] -(eta/(np.sqrt(v_w_hat)+eps))*m_w_hat

              self.Update_Biases[j] = beta2 * self.Update_Biases[j] + (1 - beta2) * g_b[j]**2
              m_b[j]=beta1*m_b[j] +(1-beta1) * g_b[j]
              m_b_hat=m_b[j]/(1-math.pow(beta1,step))
              v_b_hat=self.Update_Biases[j]/(1-math.pow(beta2,step))
              self.Biases[j] = self.Biases[j] -(eta / (np.sqrt(v_b_hat)+eps))* m_b_hat



#-----------------------------updating weights and biases for Nadam optimizer-----------------------------------------
          elif opt=='nadam':
            # pass
            beta1 = 0.9
            beta2 = 0.999
            for j in range(L + 1):

              self.Update_Weights[j] = beta2 * self.Update_Weights[j] + (1 - beta2) * g_w[j] ** 2
              m_w[j] = beta1 * m_w[j] + (1 - beta1) * g_w[j]
              m_w_hat = m_w[j] / (1 - math.pow(beta1, step))
              m_w_hat = beta1 * m_w_hat + ((1 - beta1) * g_w[j]) / (1 - math.pow(beta1, step))
              v_w_hat=self.Update_Weights[j]/(1-math.pow(beta2,step))
              self.Weights[j] = (1 - eta * alpha) * self.Weights[j] -(eta /( np.sqrt(v_w_hat) + eps)) * m_w_hat

              self.Update_Biases[j] = beta2 * self.Update_Biases[j] + (1 - beta2) * g_b[j] ** 2
              m_b[j] = beta1 * m_b[j] + (1 - beta1) * g_b[j]
              m_b_hat = beta1 * (m_b[j] / (1 - math.pow(beta1, step))) + ((1 - beta1) * g_b[j]) / (1 - math.pow(beta1, step))
              v_b_hat=self.Update_Biases[j]/(1-math.pow(beta2,step))
              self.Biases[j] = self.Biases[j]-(eta/(np.sqrt(v_b_hat)+eps))*m_b_hat

          else:
            pass

#--------------------------------------------weights and biases has been updated---------------------------------------------
          g_w=[0]*(L+1)
          g_b=[0]*(L+1)
          step = step + 1

     #--------------------------------------------calculating training accuracy-----------------------------------------
      train_acc = self.calculate_accuracy(train_data, train_labels, limit)


      #--------------------------------------------calculating validation accuracy-----------------------------------------
      val_acc = self.calculate_accuracy(validation_data, validation_labels, vlimit)


      #--------------------------------------------calculating testing accuracy-----------------------------------------
      test_acc = self.calculate_accuracy(test_data, test_labels, tlimit)


      #----------------------------------------   calculating cross entropy loss for training  ----------------------------------------
      train_ce_loss = self.ce_loss(train_data, train_labels, limit)


      #----------------------------------------   calculating cross entropy loss for validation  ----------------------------------------
      valid_ce_loss = self.ce_loss(validation_data, validation_labels, vlimit)

      
      #----------------------------------------   calculating cross entropy loss for testing ----------------------------------------
      test_ce_loss = self.ce_loss(test_data, test_labels, tlimit)




    
#================================================== Creating Confusion Matrix  =====================================================

#----------------------------------------  predict labels for test data ---------------------------------------------------
    predictions=[]
    for x, y in zip(test_data, test_labels):
      predictions.append(np.argmax(self.forwardPropagation(x.reshape(784,1))))

#------------------------------------------------class names for given dataset ----------------------------------------------------------
    class_names = ["t-shirt/top", 'trouser/pants', 'pullover shirt', 'dress', 'coat', 'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
    wandb.log({"conf_mat" : wandb.plot.confusion_matrix(probs=None, y_true = test_labels, preds = predictions, class_names = class_names)})
  


#-----------------------------return from function training algo -----------------------------------------------------------
    return




#================================================================== end of class ========================================================================================================================================================================


In [None]:
#===================================================== default intialisation ==========================================================================================================================================================

number_of_hidden_layers = 1
number_of_neurons_in_hidden_layer = 128
activation_function = "sigmoid"
weight_initialisation = "xavier"
optimiser = "adam"
gamma = 0.9
eta = 1e-4
batch_size = 1
max_epochs = 1
alpha = 0.005
train_limit = len(train_data)
test_limit = len(test_data)
validation_limit = len(validation_data)

In [None]:
#=================================================================== Sweep configuration =============================================================================================================================
sweep_config={"method":"random",
              'metric' : {
        'name' : 'train_acc',
        'goal' : 'maximize',
    },
    "parameters":{
    "num_hidden":{"values":[3,4,5]},
    "hidden_layer_size":{"values":[20,25,32,128]},
    "learning_rate":{"values":[1e-3,1e-4]},
    "num_epochs":{"values":[5,10]},
    "batch_size":{"values":[16,32,64]},
    "optimisation":{"values":["sgd","momentum","nesterov","rmsprop","adam","nadam"]},
    "activation_function":{"values":["sigmoid","tanh","relu","lrelu"]},
    "weight_initialisation":{"values":["random","xavier"]},
    "weight_decay":{"values":[0.0005,0.5,0]}
}
              }
sweep_id = wandb.sweep(sweep_config, project="sweep_1")

Create sweep with ID: cxw987bn
Sweep URL: https://wandb.ai/anshikag_2210/sweep_1/sweeps/cxw987bn


In [None]:
def run():
  wb = wandb.init()
  config = wb.config
  
  ffnn = FeedForwardNeuralNetwork(784, 10, config.hidden_layer_size, config.num_hidden, config.activation_function, config.weight_initialisation)
  ffnn.trainingAlgo(opt = config.optimisation, gamma = 0.9, eta = config.learning_rate, batch_size = config.batch_size, max_epochs = config.num_epochs, alpha = config.weight_decay,  limit = train_limit, vlimit = validation_limit, tlimit = test_limit)
  return

In [None]:
wandb.agent(sweep_id, run)

[34m[1mwandb[0m: Agent Starting Run: 0o8f9gpx with config:
[34m[1mwandb[0m: 	activation_function: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	hidden_layer_size: 20
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden: 4
[34m[1mwandb[0m: 	optimisation: sgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialisation: xavier
[34m[1mwandb[0m: Currently logged in as: [33manshikag_2210[0m (use `wandb login --relogin` to force relogin)



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5

Epoch :  6

Epoch :  7

Epoch :  8

Epoch :  9

Epoch :  10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,81.08889
val_acc,80.85
test_acc,79.69
train_ce_loss,0.55636
valid_ce_loss,0.5678
test_ce_loss,0.58642
epoch,9.0
_runtime,500.0
_timestamp,1616018089.0
_step,10.0


0,1
train_acc,▁▅▆▇▇▇████
val_acc,▁▅▆▇▇▇████
test_acc,▁▅▆▇▇█████
train_ce_loss,█▅▃▂▂▂▁▁▁▁
valid_ce_loss,█▅▃▂▂▂▁▁▁▁
test_ce_loss,█▅▃▂▂▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▇▇██
_timestamp,▁▂▃▃▄▅▆▇▇██
_step,▁▂▂▃▄▅▅▆▇▇█


[34m[1mwandb[0m: Agent Starting Run: 6v0y09x6 with config:
[34m[1mwandb[0m: 	activation_function: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	hidden_layer_size: 25
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden: 4
[34m[1mwandb[0m: 	optimisation: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialisation: xavier



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.14681365958…

0,1
train_acc,82.02593
val_acc,81.63333
test_acc,80.1
train_ce_loss,0.46618
valid_ce_loss,0.49855
test_ce_loss,0.52086
epoch,4.0
_runtime,342.0
_timestamp,1616018437.0
_step,5.0


0,1
train_acc,▁▅▇██
val_acc,▁▆███
test_acc,▁▅███
train_ce_loss,█▄▂▁▁
valid_ce_loss,█▄▂▁▁
test_ce_loss,█▄▂▁▁
epoch,▁▃▅▆█
_runtime,▁▃▄▆██
_timestamp,▁▃▄▆██
_step,▁▂▄▅▇█


[34m[1mwandb[0m: Agent Starting Run: v1ogpokm with config:
[34m[1mwandb[0m: 	activation_function: lrelu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	hidden_layer_size: 20
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden: 3
[34m[1mwandb[0m: 	optimisation: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialisation: xavier



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.14638069705…

0,1
train_acc,28.42222
val_acc,27.75
test_acc,28.44
train_ce_loss,1.91736
valid_ce_loss,1.92271
test_ce_loss,1.91767
epoch,4.0
_runtime,176.0
_timestamp,1616018618.0
_step,5.0


0,1
train_acc,▁▃▄▆█
val_acc,▁▃▄▆█
test_acc,▁▄▄▆█
train_ce_loss,█▆▄▃▁
valid_ce_loss,█▆▄▃▁
test_ce_loss,█▆▄▃▁
epoch,▁▃▅▆█
_runtime,▁▃▄▆██
_timestamp,▁▃▄▆██
_step,▁▂▄▅▇█


[34m[1mwandb[0m: Agent Starting Run: 2l7xi7gn with config:
[34m[1mwandb[0m: 	activation_function: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden: 5
[34m[1mwandb[0m: 	optimisation: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialisation: random



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.14669532509…

0,1
train_acc,76.67593
val_acc,76.66667
test_acc,76.28
train_ce_loss,0.61741
valid_ce_loss,0.62028
test_ce_loss,0.63762
epoch,4.0
_runtime,1010.0
_timestamp,1616019633.0
_step,5.0


0,1
train_acc,▁█▇▆▇
val_acc,▁█▇▆▇
test_acc,▁█▇▇▇
train_ce_loss,█▁▂▂▂
valid_ce_loss,█▁▂▂▂
test_ce_loss,█▁▂▂▂
epoch,▁▃▅▆█
_runtime,▁▃▄▆██
_timestamp,▁▃▄▆██
_step,▁▂▄▅▇█


[34m[1mwandb[0m: Agent Starting Run: v7bt1j27 with config:
[34m[1mwandb[0m: 	activation_function: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	hidden_layer_size: 20
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden: 5
[34m[1mwandb[0m: 	optimisation: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialisation: random



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,10.02963
val_acc,9.73333
test_acc,10.0
train_ce_loss,2.62111
valid_ce_loss,2.62072
test_ce_loss,2.62107
epoch,4.0
_runtime,369.0
_timestamp,1616020008.0
_step,5.0


0,1
train_acc,▁▁▁▁▁
val_acc,▁▁▁▁▁
test_acc,▁▁▁▁▁
train_ce_loss,▁▁▁▁▁
valid_ce_loss,▁▁▁▁▁
test_ce_loss,▁▁▁▁▁
epoch,▁▃▅▆█
_runtime,▁▃▅▆██
_timestamp,▁▃▅▆██
_step,▁▂▄▅▇█


[34m[1mwandb[0m: Agent Starting Run: 4tsbgg0i with config:
[34m[1mwandb[0m: 	activation_function: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	hidden_layer_size: 20
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden: 3
[34m[1mwandb[0m: 	optimisation: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialisation: random



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,87.79444
val_acc,86.4
test_acc,85.55
train_ce_loss,0.33486
valid_ce_loss,0.37518
test_ce_loss,0.39872
epoch,4.0
_runtime,216.0
_timestamp,1616020230.0
_step,5.0


0,1
train_acc,▁▄▆▇█
val_acc,▁▄▇██
test_acc,▁▄▇██
train_ce_loss,█▄▃▂▁
valid_ce_loss,█▄▂▁▁
test_ce_loss,█▄▂▁▁
epoch,▁▃▅▆█
_runtime,▁▃▄▆██
_timestamp,▁▃▄▆██
_step,▁▂▄▅▇█


[34m[1mwandb[0m: Agent Starting Run: bstr946q with config:
[34m[1mwandb[0m: 	activation_function: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden: 3
[34m[1mwandb[0m: 	optimisation: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialisation: xavier



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5

Epoch :  6

Epoch :  7

Epoch :  8

Epoch :  9

Epoch :  10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.14657718120…

0,1
train_acc,80.61667
val_acc,79.7
test_acc,79.34
train_ce_loss,0.64299
valid_ce_loss,0.67083
test_ce_loss,0.70937
epoch,9.0
_runtime,1261.0
_timestamp,1616021497.0
_step,10.0


0,1
train_acc,▁▄▅▅▆▆▆▆▇█
val_acc,▁▄▅▄▆▆▆▆▇█
test_acc,▁▄▅▄▆▆▆▆▇█
train_ce_loss,█▃▂▄▁▂▃▅▄▂
valid_ce_loss,█▃▂▄▁▃▃▆▅▂
test_ce_loss,█▃▂▄▁▃▄▇▇▄
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇██
_timestamp,▁▂▃▃▄▅▆▆▇██
_step,▁▂▂▃▄▅▅▆▇▇█


[34m[1mwandb[0m: Agent Starting Run: z3letfhi with config:
[34m[1mwandb[0m: 	activation_function: lrelu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	hidden_layer_size: 25
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden: 3
[34m[1mwandb[0m: 	optimisation: sgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialisation: random



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.14661654135…

0,1
train_acc,67.23889
val_acc,67.55
test_acc,66.28
train_ce_loss,0.81694
valid_ce_loss,0.81602
test_ce_loss,0.83629
epoch,4.0
_runtime,186.0
_timestamp,1616021689.0
_step,5.0


0,1
train_acc,▁▅▇▇█
val_acc,▁▅▇▇█
test_acc,▁▅▇▇█
train_ce_loss,█▄▂▁▁
valid_ce_loss,█▄▂▁▁
test_ce_loss,█▄▂▁▁
epoch,▁▃▅▆█
_runtime,▁▃▄▆██
_timestamp,▁▃▄▆██
_step,▁▂▄▅▇█


[34m[1mwandb[0m: Agent Starting Run: rbbbie0x with config:
[34m[1mwandb[0m: 	activation_function: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden: 4
[34m[1mwandb[0m: 	optimisation: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialisation: xavier



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5

Epoch :  6

Epoch :  7

Epoch :  8

Epoch :  9

Epoch :  10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,69.9
val_acc,70.26667
test_acc,68.63
train_ce_loss,0.96306
valid_ce_loss,0.95381
test_ce_loss,0.9771
epoch,9.0
_runtime,1470.0
_timestamp,1616023166.0
_step,10.0


0,1
train_acc,▁▅▆▆▇▇████
val_acc,▁▅▆▆▇▇████
test_acc,▁▅▆▆▇▇████
train_ce_loss,█▆▅▄▃▂▂▂▁▁
valid_ce_loss,█▆▅▄▃▂▂▂▁▁
test_ce_loss,█▆▅▄▃▂▂▂▁▁
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇██
_timestamp,▁▂▃▃▄▅▆▆▇██
_step,▁▂▂▃▄▅▅▆▇▇█


[34m[1mwandb[0m: Agent Starting Run: 8fj7tlko with config:
[34m[1mwandb[0m: 	activation_function: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden: 3
[34m[1mwandb[0m: 	optimisation: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialisation: xavier



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.14501992031…

0,1
train_acc,19.07963
val_acc,19.26667
test_acc,18.61
train_ce_loss,2.31993
valid_ce_loss,2.31806
test_ce_loss,2.32168
epoch,4.0
_runtime,615.0
_timestamp,1616023788.0
_step,5.0


0,1
train_acc,▁▁▁▁▁
val_acc,▁▁▁▁▁
test_acc,▁▁▁▁▁
train_ce_loss,▁▁▁▁▁
valid_ce_loss,▁▁▁▁▁
test_ce_loss,▁▁▁▁▁
epoch,▁▃▅▆█
_runtime,▁▃▅▆██
_timestamp,▁▃▅▆██
_step,▁▂▄▅▇█


[34m[1mwandb[0m: Agent Starting Run: 20xl7472 with config:
[34m[1mwandb[0m: 	activation_function: lrelu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden: 4
[34m[1mwandb[0m: 	optimisation: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialisation: random



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.14681365958…

0,1
train_acc,87.42222
val_acc,85.93333
test_acc,85.38
train_ce_loss,0.33942
valid_ce_loss,0.37172
test_ce_loss,0.39905
epoch,4.0
_runtime,684.0
_timestamp,1616024479.0
_step,5.0


0,1
train_acc,▁▄▆▇█
val_acc,▁▄▆▇█
test_acc,▁▅▆▇█
train_ce_loss,█▅▃▂▁
valid_ce_loss,█▅▃▂▁
test_ce_loss,█▅▃▂▁
epoch,▁▃▅▆█
_runtime,▁▃▄▆██
_timestamp,▁▃▄▆██
_step,▁▂▄▅▇█


[34m[1mwandb[0m: Agent Starting Run: u45bf3li with config:
[34m[1mwandb[0m: 	activation_function: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	hidden_layer_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden: 5
[34m[1mwandb[0m: 	optimisation: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialisation: random



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.16259298618…

0,1
train_acc,9.94444
val_acc,10.5
test_acc,10.0
train_ce_loss,2.3026
valid_ce_loss,2.30264
test_ce_loss,2.3026
epoch,4.0
_runtime,268.0
_timestamp,1616024757.0
_step,5.0


0,1
train_acc,▁▁▁▁▁
val_acc,▁▁▁▁▁
test_acc,▁▁▁▁▁
train_ce_loss,▁▂▃▅█
valid_ce_loss,▁▂▄▆█
test_ce_loss,▁▂▃▅█
epoch,▁▃▅▆█
_runtime,▁▃▄▆██
_timestamp,▁▃▄▆██
_step,▁▂▄▅▇█


[34m[1mwandb[0m: Agent Starting Run: su5t2guv with config:
[34m[1mwandb[0m: 	activation_function: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden: 5
[34m[1mwandb[0m: 	optimisation: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialisation: xavier



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5

Epoch :  6

Epoch :  7

Epoch :  8

Epoch :  9

Epoch :  10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.16101026045…

0,1
train_acc,66.03333
val_acc,66.43333
test_acc,65.76
train_ce_loss,0.91525
valid_ce_loss,0.91327
test_ce_loss,0.92541
epoch,9.0
_runtime,1375.0
_timestamp,1616026141.0
_step,10.0


0,1
train_acc,▁▅▆▇▇▇▇███
val_acc,▁▅▆▇▇▇████
test_acc,▁▅▆▇▇▇▇███
train_ce_loss,█▄▃▂▂▂▂▁▁▁
valid_ce_loss,█▄▃▂▂▂▂▁▁▁
test_ce_loss,█▄▃▂▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇██
_timestamp,▁▂▃▃▄▅▆▆▇██
_step,▁▂▂▃▄▅▅▆▇▇█


[34m[1mwandb[0m: Agent Starting Run: d0rk11no with config:
[34m[1mwandb[0m: 	activation_function: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	hidden_layer_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden: 3
[34m[1mwandb[0m: 	optimisation: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialisation: xavier



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.16033534189…

0,1
train_acc,6.06667
val_acc,5.61667
test_acc,5.81
train_ce_loss,2.35539
valid_ce_loss,2.3595
test_ce_loss,2.3583
epoch,4.0
_runtime,243.0
_timestamp,1616026394.0
_step,5.0


0,1
train_acc,▁▁▁▁▁
val_acc,▁▁▁▁▁
test_acc,▁▁▁▁▁
train_ce_loss,▁▁▁▁▁
valid_ce_loss,▁▁▁▁▁
test_ce_loss,▁▁▁▁▁
epoch,▁▃▅▆█
_runtime,▁▃▄▆██
_timestamp,▁▃▄▆██
_step,▁▂▄▅▇█


[34m[1mwandb[0m: Agent Starting Run: w5ee3lix with config:
[34m[1mwandb[0m: 	activation_function: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	hidden_layer_size: 25
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden: 5
[34m[1mwandb[0m: 	optimisation: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialisation: xavier



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5

Epoch :  6

Epoch :  7

Epoch :  8

Epoch :  9

Epoch :  10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.16117987885…

0,1
train_acc,73.3
val_acc,73.13333
test_acc,72.92
train_ce_loss,0.72354
valid_ce_loss,0.72485
test_ce_loss,0.74024
epoch,9.0
_runtime,514.0
_timestamp,1616026920.0
_step,10.0


0,1
train_acc,▁▅▆▇▇▇▇███
val_acc,▁▅▆▇▇▇▇███
test_acc,▁▅▆▇▇▇▇███
train_ce_loss,█▅▄▃▂▂▂▁▁▁
valid_ce_loss,█▅▄▃▂▂▂▁▁▁
test_ce_loss,█▅▄▃▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇██
_timestamp,▁▂▃▃▄▅▆▆▇██
_step,▁▂▂▃▄▅▅▆▇▇█


[34m[1mwandb[0m: Agent Starting Run: vkopj89v with config:
[34m[1mwandb[0m: 	activation_function: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden: 4
[34m[1mwandb[0m: 	optimisation: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialisation: random



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.16152019002…

0,1
train_acc,7.59815
val_acc,7.41667
test_acc,7.3
train_ce_loss,2.30259
valid_ce_loss,2.30259
test_ce_loss,2.30259
epoch,4.0
_runtime,565.0
_timestamp,1616027494.0
_step,5.0


0,1
train_acc,▁▁▁▁▁
val_acc,▁▁▁▁▁
test_acc,▁▁▁▁▁
train_ce_loss,▁▁▁▁▁
valid_ce_loss,▁▁▁▁▁
test_ce_loss,▁▁▁▁▁
epoch,▁▃▅▆█
_runtime,▁▃▄▆██
_timestamp,▁▃▄▆██
_step,▁▂▄▅▇█


[34m[1mwandb[0m: Agent Starting Run: z09x19a8 with config:
[34m[1mwandb[0m: 	activation_function: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	hidden_layer_size: 20
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden: 3
[34m[1mwandb[0m: 	optimisation: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialisation: xavier



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.16147757255…

0,1
train_acc,66.14444
val_acc,66.56667
test_acc,65.26
train_ce_loss,1.12347
valid_ce_loss,1.1179
test_ce_loss,1.1335
epoch,4.0
_runtime,269.0
_timestamp,1616027773.0
_step,5.0


0,1
train_acc,▁▅▇▇█
val_acc,▁▅▇▇█
test_acc,▁▅▇▇█
train_ce_loss,█▅▄▂▁
valid_ce_loss,█▅▄▂▁
test_ce_loss,█▅▄▂▁
epoch,▁▃▅▆█
_runtime,▁▃▄▆██
_timestamp,▁▃▄▆██
_step,▁▂▄▅▇█


[34m[1mwandb[0m: Agent Starting Run: 0zx8j5xc with config:
[34m[1mwandb[0m: 	activation_function: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden: 5
[34m[1mwandb[0m: 	optimisation: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialisation: random



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5

Epoch :  6

Epoch :  7

Epoch :  8

Epoch :  9

Epoch :  10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.16259298618…

0,1
train_acc,10.08333
val_acc,9.25
test_acc,10.0
train_ce_loss,2.3046
valid_ce_loss,2.30701
test_ce_loss,2.30484
epoch,9.0
_runtime,1284.0
_timestamp,1616029067.0
_step,10.0


0,1
train_acc,▁▁▁▁▁▁▁▁▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
test_acc,▁▁▁▁▁▁▁▁▁▁
train_ce_loss,▁▂▄▅▆▆▇▇██
valid_ce_loss,▁▃▄▅▆▇▇███
test_ce_loss,▁▂▄▅▆▇▇▇██
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇██
_timestamp,▁▂▃▃▄▅▆▆▇██
_step,▁▂▂▃▄▅▅▆▇▇█


[34m[1mwandb[0m: Agent Starting Run: 5cewti76 with config:
[34m[1mwandb[0m: 	activation_function: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden: 3
[34m[1mwandb[0m: 	optimisation: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialisation: xavier



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,83.06481
val_acc,82.45
test_acc,81.86
train_ce_loss,0.45746
valid_ce_loss,0.46212
test_ce_loss,0.49149
epoch,4.0
_runtime,708.0
_timestamp,1616029784.0
_step,5.0


0,1
train_acc,▁▅▆▇█
val_acc,▁▄▇█▆
test_acc,▁▄▅▆█
train_ce_loss,█▄▃▂▁
valid_ce_loss,█▄▃▂▁
test_ce_loss,█▄▃▂▁
epoch,▁▃▅▆█
_runtime,▁▃▅▆██
_timestamp,▁▃▅▆██
_step,▁▂▄▅▇█


[34m[1mwandb[0m: Agent Starting Run: gctl3c1v with config:
[34m[1mwandb[0m: 	activation_function: lrelu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	hidden_layer_size: 20
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden: 5
[34m[1mwandb[0m: 	optimisation: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialisation: random



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.16164817749…

0,1
train_acc,86.08333
val_acc,85.1
test_acc,84.1
train_ce_loss,0.37363
valid_ce_loss,0.40194
test_ce_loss,0.43771
epoch,4.0
_runtime,239.0
_timestamp,1616030032.0
_step,5.0


0,1
train_acc,▁▄▇▇█
val_acc,▁▃▇▇█
test_acc,▁▃▇▇█
train_ce_loss,█▅▂▂▁
valid_ce_loss,█▅▂▂▁
test_ce_loss,█▅▂▂▁
epoch,▁▃▅▆█
_runtime,▁▃▅▆██
_timestamp,▁▃▅▆██
_step,▁▂▄▅▇█


[34m[1mwandb[0m: Agent Starting Run: gcxbj3mq with config:
[34m[1mwandb[0m: 	activation_function: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	hidden_layer_size: 20
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden: 3
[34m[1mwandb[0m: 	optimisation: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialisation: random



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.16259298618…

0,1
train_acc,10.08333
val_acc,9.25
test_acc,10.0
train_ce_loss,2.30399
valid_ce_loss,2.30607
test_ce_loss,2.3042
epoch,4.0
_runtime,222.0
_timestamp,1616030263.0
_step,5.0


0,1
train_acc,▁████
val_acc,█▁▁▁▁
test_acc,▁▁▁▁▁
train_ce_loss,▁▃▅▆█
valid_ce_loss,▁▃▅▇█
test_ce_loss,▁▃▅▆█
epoch,▁▃▅▆█
_runtime,▁▃▄▆██
_timestamp,▁▃▄▆██
_step,▁▂▄▅▇█


[34m[1mwandb[0m: Agent Starting Run: pk0ytxs0 with config:
[34m[1mwandb[0m: 	activation_function: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	hidden_layer_size: 25
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden: 5
[34m[1mwandb[0m: 	optimisation: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialisation: xavier



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5

Epoch :  6

Epoch :  7

Epoch :  8

Epoch :  9

Epoch :  10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.16160549247…

0,1
train_acc,9.13889
val_acc,9.11667
test_acc,8.88
train_ce_loss,2.30259
valid_ce_loss,2.30259
test_ce_loss,2.30259
epoch,9.0
_runtime,500.0
_timestamp,1616030772.0
_step,10.0


0,1
train_acc,▁▁▁▁▁▁▁▁▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
test_acc,▁▁▁▁▁▁▁▁▁▁
train_ce_loss,▁▁▁▁▁▁▁▁▁▁
valid_ce_loss,▁▁▁▁▁▁▁▁▁▁
test_ce_loss,▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇██
_timestamp,▁▂▃▃▄▅▆▆▇██
_step,▁▂▂▃▄▅▅▆▇▇█


[34m[1mwandb[0m: Agent Starting Run: 3cbgvj8a with config:
[34m[1mwandb[0m: 	activation_function: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden: 4
[34m[1mwandb[0m: 	optimisation: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialisation: random



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.16177636796…

0,1
train_acc,86.9037
val_acc,85.81667
test_acc,84.82
train_ce_loss,0.34908
valid_ce_loss,0.39358
test_ce_loss,0.42826
epoch,4.0
_runtime,823.0
_timestamp,1616031606.0
_step,5.0


0,1
train_acc,▂▁▄▅█
val_acc,▂▁▄▄█
test_acc,▃▁▄▆█
train_ce_loss,▇█▅▃▁
valid_ce_loss,▆█▅▃▁
test_ce_loss,▅█▅▂▁
epoch,▁▃▅▆█
_runtime,▁▃▄▆██
_timestamp,▁▃▄▆██
_step,▁▂▄▅▇█


[34m[1mwandb[0m: Agent Starting Run: hnq2l2uo with config:
[34m[1mwandb[0m: 	activation_function: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden: 4
[34m[1mwandb[0m: 	optimisation: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialisation: random



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5

Epoch :  6

Epoch :  7

Epoch :  8

Epoch :  9

Epoch :  10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.16156282998…

0,1
train_acc,79.15
val_acc,78.15
test_acc,78.58
train_ce_loss,0.58104
valid_ce_loss,0.58746
test_ce_loss,0.60728
epoch,9.0
_runtime,1742.0
_timestamp,1616033358.0
_step,10.0


0,1
train_acc,▁▁▂▄▅▆▆▇▇█
val_acc,▂▁▁▄▆▆█▇▅▆
test_acc,▂▁▂▄▄▅▆▇██
train_ce_loss,█▆▆▄▃▃▂▂▁▁
valid_ce_loss,█▇▆▅▄▃▃▂▂▁
test_ce_loss,█▇▆▅▄▃▃▂▂▁
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇██
_timestamp,▁▂▃▃▄▅▆▆▇██
_step,▁▂▂▃▄▅▅▆▇▇█


[34m[1mwandb[0m: Agent Starting Run: s4xgd1hi with config:
[34m[1mwandb[0m: 	activation_function: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	hidden_layer_size: 20
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden: 4
[34m[1mwandb[0m: 	optimisation: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialisation: random



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5

Epoch :  6

Epoch :  7

Epoch :  8

Epoch :  9

Epoch :  10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,7.30556
val_acc,7.35
test_acc,7.03
train_ce_loss,2.41154
valid_ce_loss,2.40222
test_ce_loss,2.41251
epoch,9.0
_runtime,506.0
_timestamp,1616033873.0
_step,10.0


0,1
train_acc,▁▁▁▁▁▁▁▁▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
test_acc,▁▁▁▁▁▁▁▁▁▁
train_ce_loss,▁▁▁▁▁▁▁▁▁▁
valid_ce_loss,▁▁▁▁▁▁▁▁▁▁
test_ce_loss,▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇██
_timestamp,▁▂▃▃▄▅▆▆▇██
_step,▁▂▂▃▄▅▅▆▇▇█


[34m[1mwandb[0m: Agent Starting Run: vsbrqyhu with config:
[34m[1mwandb[0m: 	activation_function: lrelu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	hidden_layer_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden: 5
[34m[1mwandb[0m: 	optimisation: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialisation: xavier



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5

Epoch :  6

Epoch :  7

Epoch :  8

Epoch :  9

Epoch :  10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.16259298618…

0,1
train_acc,9.94444
val_acc,10.5
test_acc,10.0
train_ce_loss,2.30261
valid_ce_loss,2.30267
test_ce_loss,2.30261
epoch,9.0
_runtime,499.0
_timestamp,1616034381.0
_step,10.0


0,1
train_acc,▁▁▁▁▁▁▁▁▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
test_acc,▁▁▁▁▁▁▁▁▁▁
train_ce_loss,▁▁▁▂▂▃▄▅▇█
valid_ce_loss,▁▂▂▃▄▅▅▆▇█
test_ce_loss,▁▁▂▂▃▄▅▆▇█
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇██
_timestamp,▁▂▃▃▄▅▆▆▇██
_step,▁▂▂▃▄▅▅▆▇▇█


[34m[1mwandb[0m: Agent Starting Run: mqy4ud7h with config:
[34m[1mwandb[0m: 	activation_function: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden: 5
[34m[1mwandb[0m: 	optimisation: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialisation: xavier



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5

Epoch :  6

Epoch :  7

Epoch :  8

Epoch :  9

Epoch :  10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.16092558506…

0,1
train_acc,67.43704
val_acc,67.91667
test_acc,67.15
train_ce_loss,0.87632
valid_ce_loss,0.87622
test_ce_loss,0.89139
epoch,9.0
_runtime,1314.0
_timestamp,1616035704.0
_step,10.0


0,1
train_acc,▁▅▆▇▇▇▇███
val_acc,▁▅▆▇▇▇████
test_acc,▁▅▆▇▇▇▇███
train_ce_loss,█▄▃▃▂▂▂▁▁▁
valid_ce_loss,█▄▃▃▂▂▂▁▁▁
test_ce_loss,█▄▃▃▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇██
_timestamp,▁▂▃▃▄▅▆▆▇██
_step,▁▂▂▃▄▅▅▆▇▇█


[34m[1mwandb[0m: Agent Starting Run: k1ccvslx with config:
[34m[1mwandb[0m: 	activation_function: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	hidden_layer_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden: 3
[34m[1mwandb[0m: 	optimisation: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialisation: xavier



Epoch :  1


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run k1ccvslx errored: ValueError('math domain error')
[34m[1mwandb[0m: [32m[41mERROR[0m Run k1ccvslx errored: ValueError('math domain error')
[34m[1mwandb[0m: Agent Starting Run: 42kvh8kw with config:
[34m[1mwandb[0m: 	activation_function: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	hidden_layer_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden: 3
[34m[1mwandb[0m: 	optimisation: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialisation: xavier



Epoch :  1




VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run 42kvh8kw errored: ValueError('math domain error')
[34m[1mwandb[0m: [32m[41mERROR[0m Run 42kvh8kw errored: ValueError('math domain error')
[34m[1mwandb[0m: Agent Starting Run: fq02l674 with config:
[34m[1mwandb[0m: 	activation_function: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden: 3
[34m[1mwandb[0m: 	optimisation: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialisation: xavier



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5

Epoch :  6

Epoch :  7

Epoch :  8

Epoch :  9

Epoch :  10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.16139240506…

0,1
train_acc,76.87963
val_acc,76.31667
test_acc,75.84
train_ce_loss,0.63813
valid_ce_loss,0.6438
test_ce_loss,0.66036
epoch,9.0
_runtime,1221.0
_timestamp,1616037071.0
_step,10.0


0,1
train_acc,▁▄▅▆▆▇▇███
val_acc,▁▃▅▆▇▇▇███
test_acc,▁▃▅▆▆▇▇▇██
train_ce_loss,█▅▃▃▂▂▂▁▁▁
valid_ce_loss,█▅▃▃▂▂▁▁▁▁
test_ce_loss,█▅▃▃▂▂▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇██
_timestamp,▁▂▃▃▄▅▆▆▇██
_step,▁▂▂▃▄▅▅▆▇▇█


[34m[1mwandb[0m: Agent Starting Run: 484nyplj with config:
[34m[1mwandb[0m: 	activation_function: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	hidden_layer_size: 25
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden: 5
[34m[1mwandb[0m: 	optimisation: nesterov
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialisation: xavier



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5

Epoch :  6

Epoch :  7

Epoch :  8

Epoch :  9

Epoch :  10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.16259298618…

0,1
train_acc,10.01667
val_acc,9.85
test_acc,10.0
train_ce_loss,2.51974
valid_ce_loss,2.52626
test_ce_loss,2.52039
epoch,9.0
_runtime,777.0
_timestamp,1616037858.0
_step,10.0


0,1
train_acc,▁▁▁▁▁▁▁▁▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
test_acc,▁▁▁▁▁▁▁▁▁▁
train_ce_loss,▁▁▁▁▁▁▁▁▁▁
valid_ce_loss,▁▁▁▁▁▁▁▁▁▁
test_ce_loss,▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇██
_timestamp,▁▂▃▃▄▅▆▆▇██
_step,▁▂▂▃▄▅▅▆▇▇█


[34m[1mwandb[0m: Agent Starting Run: u1u0ho5p with config:
[34m[1mwandb[0m: 	activation_function: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	hidden_layer_size: 20
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden: 3
[34m[1mwandb[0m: 	optimisation: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialisation: random



Epoch :  1


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run u1u0ho5p errored: ValueError('math domain error')
[34m[1mwandb[0m: [32m[41mERROR[0m Run u1u0ho5p errored: ValueError('math domain error')
[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8ortdydi with config:
[34m[1mwandb[0m: 	activation_function: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden: 3
[34m[1mwandb[0m: 	optimisation: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialisation: random



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5

Epoch :  6

Epoch :  7

Epoch :  8

Epoch :  9

Epoch :  10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,10.01667
val_acc,9.85
test_acc,10.0
train_ce_loss,2.3026
valid_ce_loss,2.30262
test_ce_loss,2.3026
epoch,9.0
_runtime,1482.0
_timestamp,1616039412.0
_step,10.0


0,1
train_acc,▁▁▁▁██████
val_acc,████▁▁▁▁▁▁
test_acc,▁▁▁▁▁▁▁▁▁▁
train_ce_loss,▁▁▁▂▂▃▄▅▆█
valid_ce_loss,▁▂▂▃▄▄▅▆▇█
test_ce_loss,▁▁▂▂▃▃▄▆▇█
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▂▃▃▄▄▅▇██
_timestamp,▁▂▂▃▃▄▄▅▇██
_step,▁▂▂▃▄▅▅▆▇▇█


[34m[1mwandb[0m: Agent Starting Run: gw7q3set with config:
[34m[1mwandb[0m: 	activation_function: lrelu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	hidden_layer_size: 25
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden: 3
[34m[1mwandb[0m: 	optimisation: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialisation: xavier



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5

Epoch :  6

Epoch :  7

Epoch :  8

Epoch :  9

Epoch :  10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,88.49074
val_acc,86.85
test_acc,86.25
train_ce_loss,0.30796
valid_ce_loss,0.35588
test_ce_loss,0.38406
epoch,9.0
_runtime,372.0
_timestamp,1616039793.0
_step,10.0


0,1
train_acc,▁▃▄▅▅▆▇▇██
val_acc,▁▃▄▆▆▇████
test_acc,▁▃▅▅▆▇▇███
train_ce_loss,█▆▅▄▃▃▂▂▁▁
valid_ce_loss,█▆▅▄▃▂▂▁▁▁
test_ce_loss,█▆▅▄▃▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇██
_timestamp,▁▂▃▃▄▅▆▆▇██
_step,▁▂▂▃▄▅▅▆▇▇█


[34m[1mwandb[0m: Agent Starting Run: btso0drf with config:
[34m[1mwandb[0m: 	activation_function: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	hidden_layer_size: 20
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden: 3
[34m[1mwandb[0m: 	optimisation: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialisation: random



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5

Epoch :  6

Epoch :  7

Epoch :  8

Epoch :  9

Epoch :  10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.16164817749…

0,1
train_acc,63.89074
val_acc,64.8
test_acc,64.04
train_ce_loss,0.89491
valid_ce_loss,0.89562
test_ce_loss,0.902
epoch,9.0
_runtime,534.0
_timestamp,1616040336.0
_step,10.0


0,1
train_acc,▁▂▃▅▆▆▆▇██
val_acc,▁▂▃▅▆▆▆▇██
test_acc,▁▂▃▅▆▆▆▇██
train_ce_loss,█▆▅▄▃▃▂▂▁▁
valid_ce_loss,█▆▅▄▃▃▂▂▁▁
test_ce_loss,█▆▅▄▃▃▂▂▁▁
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇██
_timestamp,▁▂▃▃▄▅▆▆▇██
_step,▁▂▂▃▄▅▅▆▇▇█


[34m[1mwandb[0m: Agent Starting Run: 6ymj8747 with config:
[34m[1mwandb[0m: 	activation_function: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden: 4
[34m[1mwandb[0m: 	optimisation: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialisation: random



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5

Epoch :  6

Epoch :  7

Epoch :  8

Epoch :  9

Epoch :  10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.16259298618…

0,1
train_acc,10.08333
val_acc,9.25
test_acc,10.0
train_ce_loss,2.30751
valid_ce_loss,2.3106
test_ce_loss,2.30782
epoch,9.0
_runtime,1203.0
_timestamp,1616041548.0
_step,10.0


0,1
train_acc,▁▁▁▁▁▁▁▁▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
test_acc,▁▁▁▁▁▁▁▁▁▁
train_ce_loss,▁█████████
valid_ce_loss,▁█████████
test_ce_loss,▁█████████
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇██
_timestamp,▁▂▃▃▄▅▆▆▇██
_step,▁▂▂▃▄▅▅▆▇▇█


[34m[1mwandb[0m: Agent Starting Run: ueuaplpa with config:
[34m[1mwandb[0m: 	activation_function: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	hidden_layer_size: 25
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden: 3
[34m[1mwandb[0m: 	optimisation: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialisation: xavier



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,87.60741
val_acc,86.3
test_acc,85.33
train_ce_loss,0.35824
valid_ce_loss,0.3879
test_ce_loss,0.41491
epoch,4.0
_runtime,268.0
_timestamp,1616041825.0
_step,5.0


0,1
train_acc,▁▄▇▇█
val_acc,▁▄▇██
test_acc,▁▄▇██
train_ce_loss,█▄▂▂▁
valid_ce_loss,█▄▂▁▁
test_ce_loss,█▄▂▁▁
epoch,▁▃▅▆█
_runtime,▁▃▅▆██
_timestamp,▁▃▅▆██
_step,▁▂▄▅▇█


[34m[1mwandb[0m: Agent Starting Run: qyvp6fp2 with config:
[34m[1mwandb[0m: 	activation_function: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden: 4
[34m[1mwandb[0m: 	optimisation: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialisation: xavier



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5

Epoch :  6

Epoch :  7

Epoch :  8

Epoch :  9

Epoch :  10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.16177636796…

0,1
train_acc,90.42778
val_acc,87.83333
test_acc,86.99
train_ce_loss,0.25912
valid_ce_loss,0.35842
test_ce_loss,0.37793
epoch,9.0
_runtime,1445.0
_timestamp,1616043280.0
_step,10.0


0,1
train_acc,▁▃▆▆▆▇▆▇▇█
val_acc,▁▄▇█▇▇▆▇▇█
test_acc,▁▄▇▇▇█▆▇▇█
train_ce_loss,█▆▃▃▃▂▃▂▂▁
valid_ce_loss,█▅▁▁▂▃▅▃▄▃
test_ce_loss,█▅▂▁▂▂▄▂▃▃
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇██
_timestamp,▁▂▃▃▄▅▆▆▇██
_step,▁▂▂▃▄▅▅▆▇▇█


[34m[1mwandb[0m: Agent Starting Run: kgyifakq with config:
[34m[1mwandb[0m: 	activation_function: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	hidden_layer_size: 20
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden: 5
[34m[1mwandb[0m: 	optimisation: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialisation: xavier



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.16259298618…

0,1
train_acc,10.02963
val_acc,9.73333
test_acc,10.0
train_ce_loss,2.62111
valid_ce_loss,2.62072
test_ce_loss,2.62107
epoch,4.0
_runtime,377.0
_timestamp,1616043666.0
_step,5.0


0,1
train_acc,▁▁▁▁▁
val_acc,▁▁▁▁▁
test_acc,▁▁▁▁▁
train_ce_loss,▁▁▁▁▁
valid_ce_loss,▁▁▁▁▁
test_ce_loss,▁▁▁▁▁
epoch,▁▃▅▆█
_runtime,▁▃▄▆██
_timestamp,▁▃▄▆██
_step,▁▂▄▅▇█


[34m[1mwandb[0m: Agent Starting Run: a9c0fqkz with config:
[34m[1mwandb[0m: 	activation_function: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden: 3
[34m[1mwandb[0m: 	optimisation: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialisation: xavier



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5

Epoch :  6

Epoch :  7

Epoch :  8

Epoch :  9

Epoch :  10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.15974941268…

0,1
train_acc,19.07963
val_acc,19.26667
test_acc,18.61
train_ce_loss,2.31993
valid_ce_loss,2.31806
test_ce_loss,2.32168
epoch,9.0
_runtime,1204.0
_timestamp,1616044880.0
_step,10.0


0,1
train_acc,▁▁▁▁▁▁▁▁▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
test_acc,▁▁▁▁▁▁▁▁▁▁
train_ce_loss,▁▁▁▁▁▁▁▁▁▁
valid_ce_loss,▁▁▁▁▁▁▁▁▁▁
test_ce_loss,▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇██
_timestamp,▁▂▃▃▄▅▆▆▇██
_step,▁▂▂▃▄▅▅▆▇▇█


[34m[1mwandb[0m: Agent Starting Run: hqdms56z with config:
[34m[1mwandb[0m: 	activation_function: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden: 5
[34m[1mwandb[0m: 	optimisation: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialisation: xavier



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.16143497757…

0,1
train_acc,73.92778
val_acc,73.53333
test_acc,73.28
train_ce_loss,0.66582
valid_ce_loss,0.68625
test_ce_loss,0.69786
epoch,4.0
_runtime,702.0
_timestamp,1616045592.0
_step,5.0


0,1
train_acc,▁▆▇██
val_acc,▁▆▇▇█
test_acc,▁▆▇▇█
train_ce_loss,█▄▃▂▁
valid_ce_loss,█▄▂▂▁
test_ce_loss,█▄▂▂▁
epoch,▁▃▅▆█
_runtime,▁▃▄▆██
_timestamp,▁▃▄▆██
_step,▁▂▄▅▇█


[34m[1mwandb[0m: Agent Starting Run: fjartts1 with config:
[34m[1mwandb[0m: 	activation_function: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	hidden_layer_size: 20
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden: 3
[34m[1mwandb[0m: 	optimisation: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialisation: random



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,57.79259
val_acc,58.11667
test_acc,57.51
train_ce_loss,1.08827
valid_ce_loss,1.07962
test_ce_loss,1.09642
epoch,4.0
_runtime,186.0
_timestamp,1616045787.0
_step,5.0


0,1
train_acc,▁▂▆▇█
val_acc,▁▂▆▇█
test_acc,▁▂▅▇█
train_ce_loss,█▄▃▂▁
valid_ce_loss,█▄▃▂▁
test_ce_loss,█▄▃▂▁
epoch,▁▃▅▆█
_runtime,▁▃▄▆██
_timestamp,▁▃▄▆██
_step,▁▂▄▅▇█


[34m[1mwandb[0m: Agent Starting Run: jikb4n9m with config:
[34m[1mwandb[0m: 	activation_function: lrelu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden: 5
[34m[1mwandb[0m: 	optimisation: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialisation: random



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.16186194128…

0,1
train_acc,89.7037
val_acc,87.88333
test_acc,87.46
train_ce_loss,0.27514
valid_ce_loss,0.33075
test_ce_loss,0.35428
epoch,4.0
_runtime,655.0
_timestamp,1616046451.0
_step,5.0


0,1
train_acc,▁▄▆▇█
val_acc,▁▃▆▇█
test_acc,▁▄▆▆█
train_ce_loss,█▅▃▂▁
valid_ce_loss,█▅▃▂▁
test_ce_loss,█▅▃▂▁
epoch,▁▃▅▆█
_runtime,▁▃▄▆██
_timestamp,▁▃▄▆██
_step,▁▂▄▅▇█


[34m[1mwandb[0m: Agent Starting Run: mp2lpapl with config:
[34m[1mwandb[0m: 	activation_function: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden: 3
[34m[1mwandb[0m: 	optimisation: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialisation: random



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.16092558506…

0,1
train_acc,63.86481
val_acc,64.01667
test_acc,63.98
train_ce_loss,0.88499
valid_ce_loss,0.8835
test_ce_loss,0.88999
epoch,4.0
_runtime,673.0
_timestamp,1616047133.0
_step,5.0


0,1
train_acc,▁▅▇██
val_acc,▁▆▇██
test_acc,▁▅▇██
train_ce_loss,█▄▂▂▁
valid_ce_loss,█▄▂▁▁
test_ce_loss,█▄▂▂▁
epoch,▁▃▅▆█
_runtime,▁▃▄▆██
_timestamp,▁▃▄▆██
_step,▁▂▄▅▇█


[34m[1mwandb[0m: Agent Starting Run: c87uz6d3 with config:
[34m[1mwandb[0m: 	activation_function: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	hidden_layer_size: 25
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden: 3
[34m[1mwandb[0m: 	optimisation: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialisation: random



Epoch :  1


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Run c87uz6d3 errored: ValueError('math domain error')
[34m[1mwandb[0m: [32m[41mERROR[0m Run c87uz6d3 errored: ValueError('math domain error')
[34m[1mwandb[0m: Agent Starting Run: ylqxe74s with config:
[34m[1mwandb[0m: 	activation_function: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	hidden_layer_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden: 5
[34m[1mwandb[0m: 	optimisation: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialisation: xavier



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5

Epoch :  6

Epoch :  7

Epoch :  8

Epoch :  9

Epoch :  10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.16216216216…

0,1
train_acc,32.23333
val_acc,32.18333
test_acc,32.24
train_ce_loss,1.4572
valid_ce_loss,1.4597
test_ce_loss,1.45948
epoch,9.0
_runtime,832.0
_timestamp,1616048039.0
_step,10.0


0,1
train_acc,▁▃▃▃▅▆▇███
val_acc,▁▄▄▃▅▆▇███
test_acc,▁▃▄▃▅▆▇███
train_ce_loss,█▆▄▃▃▂▂▁▁▁
valid_ce_loss,█▆▄▃▃▂▂▁▁▁
test_ce_loss,█▆▄▃▃▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇██
_timestamp,▁▂▃▃▄▅▆▆▇██
_step,▁▂▂▃▄▅▅▆▇▇█


[34m[1mwandb[0m: Agent Starting Run: 5cb3muts with config:
[34m[1mwandb[0m: 	activation_function: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	hidden_layer_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden: 4
[34m[1mwandb[0m: 	optimisation: nesterov
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialisation: random



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5

Epoch :  6

Epoch :  7

Epoch :  8

Epoch :  9

Epoch :  10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,10.01667
val_acc,9.85
test_acc,10.0
train_ce_loss,2.39383
valid_ce_loss,2.40495
test_ce_loss,2.39495
epoch,9.0
_runtime,658.0
_timestamp,1616048706.0
_step,10.0


0,1
train_acc,▁▁▁▁▁▁▁▁▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁
test_acc,▁▁▁▁▁▁▁▁▁▁
train_ce_loss,▁▁▁▁▁▁▁▁▁▁
valid_ce_loss,▁▁▁▁▁▁▁▁▁▁
test_ce_loss,▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▅▆▇██
_timestamp,▁▂▃▃▄▅▅▆▇██
_step,▁▂▂▃▄▅▅▆▇▇█


[34m[1mwandb[0m: Agent Starting Run: i6pn3y3i with config:
[34m[1mwandb[0m: 	activation_function: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	hidden_layer_size: 20
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden: 5
[34m[1mwandb[0m: 	optimisation: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialisation: xavier



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.16143497757…

0,1
train_acc,40.76296
val_acc,40.21667
test_acc,40.44
train_ce_loss,1.53394
valid_ce_loss,1.53172
test_ce_loss,1.53974
epoch,4.0
_runtime,391.0
_timestamp,1616049106.0
_step,5.0


0,1
train_acc,▁▃▄▆█
val_acc,▁▃▄▇█
test_acc,▁▃▄▆█
train_ce_loss,█▆▃▂▁
valid_ce_loss,█▆▃▂▁
test_ce_loss,█▆▃▂▁
epoch,▁▃▅▆█
_runtime,▁▃▄▆██
_timestamp,▁▃▄▆██
_step,▁▂▄▅▇█


[34m[1mwandb[0m: Agent Starting Run: qjww0ejd with config:
[34m[1mwandb[0m: 	activation_function: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	hidden_layer_size: 25
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden: 5
[34m[1mwandb[0m: 	optimisation: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialisation: xavier



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.16160549247…

0,1
train_acc,85.22593
val_acc,84.48333
test_acc,84.02
train_ce_loss,0.43194
valid_ce_loss,0.44302
test_ce_loss,0.46708
epoch,4.0
_runtime,325.0
_timestamp,1616049441.0
_step,5.0


0,1
train_acc,▁▅▇▇█
val_acc,▁▅▆▇█
test_acc,▁▅▇██
train_ce_loss,█▄▂▁▁
valid_ce_loss,█▄▂▁▁
test_ce_loss,█▄▂▁▁
epoch,▁▃▅▆█
_runtime,▁▃▄▆██
_timestamp,▁▃▄▆██
_step,▁▂▄▅▇█


[34m[1mwandb[0m: Agent Starting Run: 97l4pb4t with config:
[34m[1mwandb[0m: 	activation_function: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	hidden_layer_size: 25
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden: 3
[34m[1mwandb[0m: 	optimisation: sgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initialisation: xavier



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5

Epoch :  6

Epoch :  7

Epoch :  8

Epoch :  9

Epoch :  10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,10.06481
val_acc,9.41667
test_acc,10.0
train_ce_loss,2.3026
valid_ce_loss,2.3033
test_ce_loss,2.30268
epoch,9.0
_runtime,364.0
_timestamp,1616049814.0
_step,10.0


0,1
train_acc,▁█████████
val_acc,▁▁▁▁▁▁▁▁▁▁
test_acc,▁▁▁▁▁▁▁▁▁▁
train_ce_loss,▃▂▁▁▁▂▃▄▆█
valid_ce_loss,▁▂▂▃▄▅▅▆▇█
test_ce_loss,▁▁▁▁▂▃▄▅▆█
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇██
_timestamp,▁▂▃▃▄▅▆▆▇██
_step,▁▂▂▃▄▅▅▆▇▇█


[34m[1mwandb[0m: Agent Starting Run: 0npta69a with config:
[34m[1mwandb[0m: 	activation_function: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden: 5
[34m[1mwandb[0m: 	optimisation: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initialisation: xavier



Epoch :  1

Epoch :  2

Epoch :  3

Epoch :  4

Epoch :  5

Epoch :  6

Epoch :  7

Epoch :  8

Epoch :  9

Epoch :  10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,74.96481
val_acc,75.91667
test_acc,74.52
train_ce_loss,0.71313
valid_ce_loss,0.71549
test_ce_loss,0.73455
epoch,9.0
_runtime,1817.0
_timestamp,1616051640.0
_step,10.0


0,1
train_acc,▁▂▃▅▆▇▇███
val_acc,▁▂▃▅▆▇▇███
test_acc,▁▂▃▅▆▇▇███
train_ce_loss,█▇▆▄▃▂▂▁▁▁
valid_ce_loss,█▇▆▄▃▂▂▁▁▁
test_ce_loss,█▇▆▄▃▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
_runtime,▁▂▃▃▄▅▆▆▇██
_timestamp,▁▂▃▃▄▅▆▆▇██
_step,▁▂▂▃▄▅▅▆▇▇█


[34m[1mwandb[0m: Agent Starting Run: 38rqfcd3 with config:
[34m[1mwandb[0m: 	activation_function: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden: 3
[34m[1mwandb[0m: 	optimisation: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initialisation: xavier



Epoch :  1


[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


In [None]:
#========================================= wandb finish function ====================================================

wandb.finish()