<a href="https://colab.research.google.com/github/arunangshudutta/DA6401_assignments/blob/main/assignment_1/Assignment_1_Q4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore', category=RuntimeWarning)

from keras.datasets import fashion_mnist
from sklearn.model_selection import train_test_split

import wandb

# Functions

In [2]:
def weights_initialization(num_neurons, initializer):
  """
  num_neurons = list of number of neurons at each layer starting from the input layer and ending at output layer
  initializer = 'random' or 'xavier'

  Returns: initialized weight matrices and bias vectors
  """
  mean=0
  std_dev=1

  W_matrices = []
  b_vectors = []

  for i in range(len(num_neurons)-1):
    rows = num_neurons[i+1]
    cols = num_neurons[i]

    if initializer == 'random':

      weight_matrix = np.random.normal(mean, std_dev, size=(rows, cols))

    elif initializer == 'Xavier':

      upper_bound = np.sqrt(6.0/(rows + cols))
      lower_bound = -1*upper_bound
      weight_matrix = np.random.uniform(low = lower_bound, high = upper_bound, size = (rows, cols))

    else:
      print('initializer invalid')


    bias_vector = np.zeros((rows,1))

    W_matrices.append(weight_matrix)
    b_vectors.append(bias_vector)


  return W_matrices, b_vectors

########################################## ACTIVATION FUNCTIONS
def relu(x):
  """
  Rectified Linear Unit (ReLU) activation function
  """
  return np.maximum(0, x)

def sigmoid(x):
  """
  Sigmoid activation function
  """
  # x = np.float128(x)
  return 1 / (1 + np.exp(-x))

def tanh(x):
  """
  Hyperbolic tangent (tanh) activation function
  """
  # x = np.float128(x)
  return np.tanh(x)
def softmax(x):

  """
  Softmax function for output layer
  """
  # x = np.float128(x)
  return np.exp(x) / np.sum(np.exp(x), axis=0)

def activation_output(x, activation_function):
  """
  activation_function = 'ReLU', 'sigmoid', 'tanh'
  """
  if activation_function == 'ReLU':
    return relu(x)
  elif activation_function == 'sigmoid':
    return sigmoid(x)
  elif activation_function == 'tanh':
    return tanh(x)
  elif activation_function == 'softmax':
    return softmax(x)
  else:
    print('activation function invalid')

######################################### DERIVATIVE OF ACTIVATION FUNCTION
def sigmoid_derivative(x):
  s = sigmoid(x)
  return s * (1 - s)

def tanh_derivative(x):
  t = tanh(x)
  return 1 - t**2

def relu_derivative(x):
  return 1*(x>0)

def activation_derivative(x, activation_function):
  """
  activation_function = 'ReLU', 'sigmoid', 'tanh'
  """
  if activation_function == 'ReLU':
    return relu_derivative(x)
  elif activation_function == 'sigmoid':
    return sigmoid_derivative(x)
  elif activation_function == 'tanh':
    return tanh_derivative(x)
  else:
    print('activation function invalid')

####################################### Forward Propagation

def layer_output_FP(x, weight_matrix, bias_vector, activation_function):
  pre_activation = np.add(np.matmul(weight_matrix, x), bias_vector)
  post_activation = activation_output(pre_activation, activation_function)
  return pre_activation, post_activation

def forward_propagation(ip_data, W_matrices, b_vectors, activation_functions):
  """
  forward propagation
  """

  layer_op = []
  layer_op.append(ip_data)

  layer_ip = []

  for i in range(len(W_matrices)):

    weight_matrix = W_matrices[i]
    bias_vector = b_vectors[i]

    activation_function = activation_functions[i]

    pre_activation, post_activation = layer_output_FP(layer_op[i], weight_matrix, bias_vector, activation_function)

    layer_op.append(post_activation)
    layer_ip.append(pre_activation)

  return layer_ip, layer_op

####################################### Back Propagation

def back_propagation(W_matrices, b_vectors, y_true, layer_ip, layer_op, activation_functions, batch_size, w_d):

  DWs = []
  Dbs = []
  for i in range(len(W_matrices)):
    k = len(W_matrices) - i


    if k == len(W_matrices):
      Da = -np.add(y_true, -layer_op[k])
      Dw = (np.matmul(Da, layer_op[k-1].T) + w_d*W_matrices[k-1])/batch_size
    else:

      Dh = np.matmul(W_matrices[k].T, Da)
      Dg = activation_derivative(layer_ip[k-1], activation_functions[k-1])
      Da = np.multiply(Dh, Dg)
      Dw = (np.matmul(Da, layer_op[k-1].T) + w_d*W_matrices[k-1])/batch_size
    Db = np.sum(Da, axis=1, keepdims=True)/batch_size

    DWs.append(Dw)
    Dbs.append(Db)

  return DWs, Dbs


################################## optimization functions

def update_weights_gd(W_matrices, b_vectors, DWs, Dbs, learning_rate = 0.1):
  DWs.reverse()
  Dbs.reverse()

  for i in range(len(DWs)):

    W_matrices[i] = W_matrices[i] - learning_rate*DWs[i]
    b_vectors[i] = b_vectors[i] - learning_rate*Dbs[i]
  return W_matrices, b_vectors

def update_weights_momentum(W_matrices, b_vectors, DWs, Dbs, u_past_w, u_past_b, learning_rate = 0.1, beta = 0.5):
  DWs.reverse()
  Dbs.reverse()
  u_w = u_past_w
  u_b = u_past_b
  for i in range(len(DWs)):

    u_w[i] = beta*u_past_w[i] + DWs[i]
    u_b[i] = beta*u_past_b[i] + Dbs[i]

    W_matrices[i] = W_matrices[i] - learning_rate*u_w[i]
    b_vectors[i] = b_vectors[i] - learning_rate*u_b[i]

  return W_matrices, b_vectors, u_w, u_b

def update_weights_adagrad(W_matrices, b_vectors, DWs, Dbs, u_past_w, u_past_b, learning_rate = 0.1):
  DWs.reverse()
  Dbs.reverse()

  u_w = u_past_w
  u_b = u_past_b
  eps = 1e-8
  for i in range(len(DWs)):
    u_w[i] = u_past_w[i] + DWs[i]**2
    u_b[i] = u_past_b[i] + Dbs[i]**2

    W_matrices[i] = W_matrices[i] - learning_rate*DWs[i]/(np.sqrt(u_w[i]) + eps)
    b_vectors[i] = b_vectors[i] - learning_rate*Dbs[i]/(np.sqrt(u_b[i]) + eps)

  return W_matrices, b_vectors, u_w, u_b

def update_weights_rmsprop(W_matrices, b_vectors, DWs, Dbs, u_past_w, u_past_b, learning_rate = 0.1, beta = 0.5):
  DWs.reverse()
  Dbs.reverse()

  u_w = u_past_w
  u_b = u_past_b
  eps = 1e-8
  for i in range(len(DWs)):
    u_w[i] = beta*u_past_w[i] + (1-beta)*DWs[i]**2
    u_b[i] = beta*u_past_b[i] + (1-beta)*Dbs[i]**2

    W_matrices[i] = W_matrices[i] - learning_rate*DWs[i]/(np.sqrt(u_w[i]) + eps)
    b_vectors[i] = b_vectors[i] - learning_rate*Dbs[i]/(np.sqrt(u_b[i]) + eps)

  return W_matrices, b_vectors, u_w, u_b

def update_weights_adam(W_matrices, b_vectors, DWs, Dbs, mw_past, mb_past, vw_past, vb_past, t, learning_rate = 0.1, beta1 = 0.5, beta2 =0.5):
  DWs.reverse()
  Dbs.reverse()
  mw = mw_past
  mb = mb_past
  vw = vw_past
  vb = vb_past
  eps = 1e-8

  for i in range(len(DWs)):
    mw[i] = beta1*mw_past[i] + (1-beta1)*DWs[i]
    mb[i] = beta1*mb_past[i] + (1-beta1)*Dbs[i]

    mw_cap = mw[i]/(1 - beta1**t)
    mb_cap = mb[i]/(1 - beta1**t)

    vw[i] = beta2*vw_past[i] + (1-beta2)*DWs[i]**2
    vb[i] = beta2*vb_past[i] + (1-beta2)*Dbs[i]**2
    vw_cap = vw[i]/(1 - beta2**t)
    vb_cap = vb[i]/(1 - beta2**t)

    W_matrices[i] = W_matrices[i] - learning_rate*mw_cap/(np.sqrt(vw_cap) + eps)
    b_vectors[i] = b_vectors[i] - learning_rate*mb_cap/(np.sqrt(vb_cap) + eps)

  return W_matrices, b_vectors, mw, mb, vw, vb

def update_weights_nadam(W_matrices, b_vectors, DWs, Dbs, mw_past, mb_past, vw_past, vb_past,t,  learning_rate = 0.1, beta1 = 0.5, beta2 =0.5):
  DWs.reverse()
  Dbs.reverse()
  mw = mw_past
  mb = mb_past
  vw = vw_past
  vb = vb_past
  eps = 1e-8

  for i in range(len(DWs)):
    mw[i] = beta1*mw_past[i] + (1-beta1)*DWs[i]
    mb[i] = beta1*mb_past[i] + (1-beta1)*Dbs[i]

    mw_cap = mw[i]/(1 - beta1**(t+1))
    mb_cap = mb[i]/(1 - beta1**(t+1))

    vw[i] = beta2*vw_past[i] + (1-beta2)*DWs[i]**2
    vb[i] = beta2*vb_past[i] + (1-beta2)*Dbs[i]**2
    vw_cap = vw[i]/(1 - beta2**(t+1))
    vb_cap = vb[i]/(1 - beta2**(t+1))

    W_matrices[i] = W_matrices[i] - learning_rate*(beta1*mw_cap + ((1-beta1)/(1 - beta1**(t+1)))*DWs[i])/(np.sqrt(vw_cap) + eps)
    b_vectors[i] = b_vectors[i] - learning_rate*(beta1*mb_cap + ((1-beta1)/(1 - beta1**(t+1)))*Dbs[i])/(np.sqrt(vb_cap) + eps)

  return W_matrices, b_vectors, mw, mb, vw, vb

def look_ahead_nag(W_s, b_s, u_past_w, u_past_b, beta = 0.5):
  for i in range(len(W_s)):
    W_s[i] = W_s[i] - beta*u_past_w[i]
    b_s[i] = b_s[i] - beta*u_past_b[i]
  return W_s, b_s

############################# Loss and accuracy

def one_hot_encode(integers, num_classes=None):
  if num_classes is None:
      num_classes = np.max(integers) + 1
  return np.eye(num_classes)[integers]

def cross_entropy_loss(y_true, y_pred, batch_size):
  # Clip the predicted probabilities to avoid numerical instability
  y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)
  loss_value = np.sum(np.sum(y_true*np.log(y_pred), axis=0))/batch_size
  return loss_value*(-1)



def accuracy(y_true, y_pred, batch_size):
  n_correct = 0
  for i in range(0, batch_size, 1) :
    if y_true[:,i].argmax() == y_pred[:,i].argmax() :
      n_correct += 1
  return 100 * n_correct / batch_size

###################################### dataset

def load_split_dataset():
  # Load Fashion MNIST dataset
  (train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

  # Split the training set into training and validation sets
  X_train, X_val, Y_train, Y_val = train_test_split(train_images, train_labels, test_size=0.3, random_state=42)

  data_size = X_train.shape[0]
  X_train = (X_train.reshape(data_size, -1).T)/255
  Y_train = one_hot_encode(Y_train, 10).T

  data_size = X_val.shape[0]
  X_val = (X_val.reshape(data_size, -1).T)/255
  Y_val = one_hot_encode(Y_val, 10).T

  data_size = test_images.shape[0]
  X_test = (test_images.reshape(data_size, -1).T)/255
  Y_test = one_hot_encode(test_labels, 10).T

  return X_train, Y_train, X_val, Y_val, X_test, Y_test

# Train function

In [3]:
def train_model_q4(X_train,Y_train, X_test, Y_test, epoch=1,batch_size=25, num_neurons_hidden = [10], activation_functions = ['sigmoid'],
                weights_init_type='random', optimizer = 'sgd', learning_rate = 0.1, opti_beta = [0.5, 0.5], w_d = 0):

  """
  X has shape (number of features, number of samples in train data set)
  Y has shape (number of classes, number of samples in train data set)

  num_neurons_hidden = list of number of neurons at each hidden layer

  """
  num_ip_neurons = X_train.shape[0]
  num_op_neurons = Y_train.shape[0]
  num_neurons = [num_ip_neurons] + num_neurons_hidden + [num_op_neurons]
  activation_functions = activation_functions + ['softmax']

  W_s, b_s = weights_initialization(num_neurons, weights_init_type)


  num_batches = np.floor(X_train.shape[1]/batch_size)
  print(num_batches)

  if optimizer == 'momentum':
    u_past_w = [x * 0 for x in W_s]
    u_past_b = [x * 0 for x in b_s]

  elif optimizer == 'nag':
    u_past_w = [x * 0 for x in W_s]
    u_past_b = [x * 0 for x in b_s]

  elif optimizer == 'rmsprop':
    u_past_w = [x * 0 for x in W_s]
    u_past_b = [x * 0 for x in b_s]

  elif optimizer == 'adagrad':
    u_past_w = [x * 0 for x in W_s]
    u_past_b = [x * 0 for x in b_s]

  elif optimizer == 'adam':
    mw_past = [x * 0 for x in W_s]
    mb_past = [x * 0 for x in b_s]
    vw_past = [x * 0 for x in W_s]
    vb_past = [x * 0 for x in b_s]
    t = 1

  elif optimizer == 'nadam':
    mw_past = [x * 0 for x in W_s]
    mb_past = [x * 0 for x in b_s]
    vw_past = [x * 0 for x in W_s]
    vb_past = [x * 0 for x in b_s]
    t = 1

  for i in range(epoch):



    for j in tqdm(range(int(num_batches))):
      batch_X = X_train[:,j*batch_size:(j+1)*batch_size]
      batch_Y = Y_train[:,j*batch_size:(j+1)*batch_size]


      if optimizer == 'sgd':
        ip, op = forward_propagation(batch_X, W_s, b_s, activation_functions)
        DWs, Dbs = back_propagation(W_s, b_s, batch_Y, ip, op, activation_functions, batch_size, w_d)
        W_s, b_s = update_weights_gd(W_s, b_s, DWs, Dbs, learning_rate)

      elif optimizer == 'momentum':
        ip, op = forward_propagation(batch_X, W_s, b_s, activation_functions)
        DWs, Dbs = back_propagation(W_s, b_s, batch_Y, ip, op, activation_functions, batch_size, w_d)
        W_s, b_s, u_past_w, u_past_b  = update_weights_momentum(W_s, b_s, DWs, Dbs, u_past_w, u_past_b, learning_rate, opti_beta[0])

      elif optimizer == 'adagrad':
        ip, op = forward_propagation(batch_X, W_s, b_s, activation_functions)
        DWs, Dbs = back_propagation(W_s, b_s, batch_Y, ip, op, activation_functions, batch_size, w_d)
        W_s, b_s, u_past_w, u_past_b  = update_weights_adagrad(W_s, b_s, DWs, Dbs, u_past_w, u_past_b, learning_rate)

      elif optimizer == 'rmsprop':
        ip, op = forward_propagation(batch_X, W_s, b_s, activation_functions)
        DWs, Dbs = back_propagation(W_s, b_s, batch_Y, ip, op, activation_functions, batch_size, w_d)
        W_s, b_s, u_past_w, u_past_b  = update_weights_rmsprop(W_s, b_s, DWs, Dbs, u_past_w, u_past_b, learning_rate, opti_beta[0])

      elif optimizer == 'adam':
        ip, op = forward_propagation(batch_X, W_s, b_s, activation_functions)
        DWs, Dbs = back_propagation(W_s, b_s, batch_Y, ip, op, activation_functions, batch_size, w_d)
        W_s, b_s, mw_past, mb_past, vw_past, vb_past = update_weights_adam(W_s, b_s, DWs, Dbs, mw_past, mb_past, vw_past, vb_past, t, learning_rate, opti_beta[0], opti_beta[1])
        t =t +1

      elif optimizer == 'nadam':
        ip, op = forward_propagation(batch_X, W_s, b_s, activation_functions)
        DWs, Dbs = back_propagation(W_s, b_s, batch_Y, ip, op, activation_functions, batch_size, w_d)
        W_s, b_s, mw_past, mb_past, vw_past, vb_past = update_weights_nadam(W_s, b_s, DWs, Dbs, mw_past, mb_past, vw_past, vb_past, t, learning_rate, opti_beta[0], opti_beta[1])
        t =t +1
      elif optimizer == 'nag':
        PWs, Pbs = look_ahead_nag(W_s, b_s, u_past_w, u_past_b, opti_beta[0])
        ip, op = forward_propagation(batch_X, PWs, Pbs, activation_functions)
        DWs, Dbs = back_propagation(PWs, Pbs, batch_Y, ip, op, activation_functions, batch_size, w_d)
        W_s, b_s, u_past_w, u_past_b  = update_weights_momentum(W_s, b_s, DWs, Dbs, u_past_w, u_past_b, learning_rate, opti_beta[0])

    ip_all, op_all = forward_propagation(X_train, W_s, b_s, activation_functions)
    loss_tr = cross_entropy_loss(Y_train, op_all[-1], X_train.shape[1])
    acc_tr = accuracy(Y_train, op_all[-1], batch_size)


    ip_all, op_all = forward_propagation(X_test, W_s, b_s, activation_functions)
    loss_val = cross_entropy_loss(Y_test, op_all[-1], X_test.shape[1])
    acc_val = accuracy(Y_test, op_all[-1], batch_size)

    wandb.log({'tr_loss' : loss_tr, 'tr_accuracy' : acc_tr, 'val_loss' : loss_val, 'val_accuracy' : acc_val})



In [4]:
sweep_config = {
    'method': 'bayes',
    'name' : 'Bayesian_sweep_cross_entropy',
    'metric': {
      'name': 'valid accuracy',
      'goal': 'maximize'
    },
    'parameters': {
        'epochs': {
            'values': [5, 10]
        },
        'num_layers': {
            'values': [3, 4, 5]
        },
         'hidden_size': {
            'values': [32, 64, 128]
        },
        'weight_decay': {
            'values': [0, 0.0005, 0.5]
        },
         'learning_rate': {
            'values': [0.001, 0.0001]
        },
         'optimizer': {
            'values': ['sgd', 'momentum', 'nag', 'rmsprop', 'adam', 'nadam']
        },
        'batch_size': {
            'values': [16, 32, 64]
        },
         'weight_init': {
            'values': ['random', 'Xavier']
        },
        'activation': {
            'values': ['sigmoid', 'tanh', 'ReLU']
        },
    }
}

sweep_id = wandb.sweep(sweep = sweep_config, project = 'dl_assgn_1_q_4')

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Create sweep with ID: sdlyh3ti
Sweep URL: https://wandb.ai/arunangshudutta218-iitm/dl_assgn_1_q_4/sweeps/sdlyh3ti


In [5]:
def main():
  with wandb.init() as run:

    epochs = wandb.config.epochs
    nhl = wandb.config.num_layers
    sz = wandb.config.hidden_size
    w_d = wandb.config.weight_decay
    lr = wandb.config.learning_rate
    optimizer = wandb.config.optimizer
    b_sz = wandb.config.batch_size
    weight_init = wandb.config.weight_init
    act_fun = wandb.config.activation

    neuros_num = []
    act_func = []
    for i in range(nhl):
      neuros_num.append(sz)
      act_func.append(act_fun)

    wandb.run.name = "e_{}_hl_{}_hs_{}_lr_{}_opt_{}_bs_{}_init_{}_ac_{}_l2_{}".format(epochs, nhl, sz, lr, optimizer, b_sz, weight_init, act_fun, w_d)

    train_model_q4(X_train, Y_train, X_val, Y_val, epoch=epochs, batch_size=b_sz, num_neurons_hidden = neuros_num, activation_functions = act_func,
                weights_init_type=weight_init, optimizer = optimizer, learning_rate = lr, opti_beta = [0.5, 0.5], w_d = w_d)


X_train, Y_train, X_val, Y_val, X_test, Y_test = load_split_dataset()

wandb.agent(sweep_id, function = main, count = 100)
wandb.finish()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


[34m[1mwandb[0m: Agent Starting Run: 7nslyene with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random
[34m[1mwandb[0m: Currently logged in as: [33marunangshudutta218[0m ([33marunangshudutta218-iitm[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


1312.0


100%|██████████| 1312/1312 [00:02<00:00, 562.26it/s]
100%|██████████| 1312/1312 [00:03<00:00, 386.31it/s]
100%|██████████| 1312/1312 [00:01<00:00, 696.94it/s]
100%|██████████| 1312/1312 [00:01<00:00, 725.19it/s]
100%|██████████| 1312/1312 [00:01<00:00, 749.42it/s]


0,1
tr_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
tr_accuracy,15.625
tr_loss,
val_accuracy,6.25
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 3b3h62qy with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: Xavier


2625.0


100%|██████████| 2625/2625 [00:04<00:00, 586.87it/s]
100%|██████████| 2625/2625 [00:06<00:00, 377.60it/s]
100%|██████████| 2625/2625 [00:04<00:00, 574.51it/s]
100%|██████████| 2625/2625 [00:06<00:00, 401.43it/s]
100%|██████████| 2625/2625 [00:04<00:00, 575.58it/s]


0,1
tr_accuracy,▁▁███
tr_loss,█▄▂▂▁
val_accuracy,▁████
val_loss,█▄▂▂▁

0,1
tr_accuracy,93.75
tr_loss,0.45997
val_accuracy,93.75
val_loss,0.46568


[34m[1mwandb[0m: Agent Starting Run: ayb5ef3b with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


2625.0


100%|██████████| 2625/2625 [00:04<00:00, 617.18it/s]
100%|██████████| 2625/2625 [00:05<00:00, 517.91it/s]
100%|██████████| 2625/2625 [00:05<00:00, 473.11it/s]
100%|██████████| 2625/2625 [00:04<00:00, 601.14it/s]
100%|██████████| 2625/2625 [00:06<00:00, 402.65it/s]


0,1
tr_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
tr_accuracy,18.75
tr_loss,
val_accuracy,6.25
val_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: czi4a04j with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: Xavier


2625.0


100%|██████████| 2625/2625 [00:02<00:00, 1099.00it/s]
100%|██████████| 2625/2625 [00:02<00:00, 1041.77it/s]
100%|██████████| 2625/2625 [00:02<00:00, 913.08it/s]
100%|██████████| 2625/2625 [00:04<00:00, 608.22it/s] 
100%|██████████| 2625/2625 [00:02<00:00, 1042.87it/s]
100%|██████████| 2625/2625 [00:02<00:00, 1072.11it/s]
100%|██████████| 2625/2625 [00:02<00:00, 984.84it/s]
100%|██████████| 2625/2625 [00:04<00:00, 623.81it/s] 
100%|██████████| 2625/2625 [00:02<00:00, 1044.04it/s]
100%|██████████| 2625/2625 [00:02<00:00, 1057.45it/s]


0,1
tr_accuracy,▁▄▄▄▄▇█▇▇▇
tr_loss,█▇▅▄▄▃▂▂▁▁
val_accuracy,▁▂▃▅▆▆▇▇██
val_loss,█▇▅▄▄▃▂▂▁▁

0,1
tr_accuracy,62.5
tr_loss,1.21375
val_accuracy,68.75
val_loss,1.22209


[34m[1mwandb[0m: Agent Starting Run: cmh5h7f8 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: Xavier


1312.0


100%|██████████| 1312/1312 [00:07<00:00, 184.34it/s]
100%|██████████| 1312/1312 [00:08<00:00, 147.13it/s]
100%|██████████| 1312/1312 [00:09<00:00, 136.96it/s]
100%|██████████| 1312/1312 [00:07<00:00, 181.64it/s]
100%|██████████| 1312/1312 [00:06<00:00, 201.02it/s]
100%|██████████| 1312/1312 [00:09<00:00, 144.16it/s]
100%|██████████| 1312/1312 [00:08<00:00, 154.55it/s]
100%|██████████| 1312/1312 [00:06<00:00, 194.86it/s]
100%|██████████| 1312/1312 [00:09<00:00, 145.59it/s]
100%|██████████| 1312/1312 [00:08<00:00, 147.23it/s]


0,1
tr_accuracy,▁▁▁▁▁▁▁▁▁▁
tr_loss,█▇▆▆▅▄▃▃▂▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,█▇▆▆▅▄▃▃▂▁

0,1
tr_accuracy,15.625
tr_loss,2.3026
val_accuracy,3.125
val_loss,2.30252


[34m[1mwandb[0m: Agent Starting Run: 5ugoi2k1 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


1312.0


100%|██████████| 1312/1312 [00:04<00:00, 317.45it/s]
100%|██████████| 1312/1312 [00:04<00:00, 300.33it/s]
100%|██████████| 1312/1312 [00:03<00:00, 355.67it/s]
100%|██████████| 1312/1312 [00:04<00:00, 279.02it/s]
100%|██████████| 1312/1312 [00:03<00:00, 359.13it/s]
100%|██████████| 1312/1312 [00:03<00:00, 391.36it/s]
100%|██████████| 1312/1312 [00:05<00:00, 245.84it/s]
100%|██████████| 1312/1312 [00:05<00:00, 224.84it/s]
100%|██████████| 1312/1312 [00:04<00:00, 299.57it/s]
100%|██████████| 1312/1312 [00:04<00:00, 309.44it/s]


0,1
tr_accuracy,█▁▁▁▁▁▁▁▁▁
tr_loss,█▆▅▄▃▂▂▂▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,█▆▅▄▃▂▂▂▁▁

0,1
tr_accuracy,78.125
tr_loss,0.54632
val_accuracy,87.5
val_loss,0.55066


[34m[1mwandb[0m: Agent Starting Run: e5chhmww with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: Xavier


2625.0


100%|██████████| 2625/2625 [00:10<00:00, 239.15it/s]
100%|██████████| 2625/2625 [00:12<00:00, 214.25it/s]
100%|██████████| 2625/2625 [00:11<00:00, 226.73it/s]
100%|██████████| 2625/2625 [00:11<00:00, 227.90it/s]
100%|██████████| 2625/2625 [00:11<00:00, 223.00it/s]
100%|██████████| 2625/2625 [00:11<00:00, 219.20it/s]
100%|██████████| 2625/2625 [00:11<00:00, 219.67it/s]
100%|██████████| 2625/2625 [00:11<00:00, 222.06it/s]
100%|██████████| 2625/2625 [00:10<00:00, 253.67it/s]
100%|██████████| 2625/2625 [00:09<00:00, 279.68it/s]


0,1
tr_accuracy,▁█████████
tr_loss,█▆▄▃▃▂▂▁▁▁
val_accuracy,▁▅▅▅▅▅█▅▅▅
val_loss,█▅▄▃▂▂▁▁▁▁

0,1
tr_accuracy,93.75
tr_loss,0.34221
val_accuracy,93.75
val_loss,0.38023


[34m[1mwandb[0m: Agent Starting Run: 1h7wlxrd with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


656.0


100%|██████████| 656/656 [00:04<00:00, 151.52it/s]
100%|██████████| 656/656 [00:06<00:00, 100.03it/s]
100%|██████████| 656/656 [00:04<00:00, 155.99it/s]
100%|██████████| 656/656 [00:06<00:00, 99.32it/s] 
100%|██████████| 656/656 [00:04<00:00, 158.74it/s]


0,1
tr_accuracy,▁▁▆▇█
tr_loss,█▃▂▁▁
val_accuracy,▁██▆▇
val_loss,█▃▂▁▁

0,1
tr_accuracy,75.0
tr_loss,0.84945
val_accuracy,67.1875
val_loss,0.88163


[34m[1mwandb[0m: Agent Starting Run: j7ryc44h with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


656.0


100%|██████████| 656/656 [00:01<00:00, 647.08it/s]
100%|██████████| 656/656 [00:00<00:00, 678.36it/s]
100%|██████████| 656/656 [00:00<00:00, 667.05it/s]
100%|██████████| 656/656 [00:00<00:00, 728.09it/s]
100%|██████████| 656/656 [00:00<00:00, 682.82it/s]


0,1
tr_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
tr_accuracy,12.5
tr_loss,
val_accuracy,6.25
val_loss,


[34m[1mwandb[0m: Agent Starting Run: q3st6jn5 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


1312.0


100%|██████████| 1312/1312 [00:12<00:00, 101.33it/s]
100%|██████████| 1312/1312 [00:12<00:00, 107.76it/s]
100%|██████████| 1312/1312 [00:12<00:00, 109.20it/s]
100%|██████████| 1312/1312 [00:12<00:00, 105.25it/s]
100%|██████████| 1312/1312 [00:12<00:00, 108.75it/s]


0,1
tr_accuracy,█▁███
tr_loss,█▅▂▂▁
val_accuracy,▁▁▁▁▁
val_loss,█▅▂▂▁

0,1
tr_accuracy,75.0
tr_loss,0.56684
val_accuracy,84.375
val_loss,0.5695


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: batvg02p with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


1312.0


100%|██████████| 1312/1312 [00:02<00:00, 450.71it/s]
100%|██████████| 1312/1312 [00:02<00:00, 473.52it/s]
100%|██████████| 1312/1312 [00:05<00:00, 223.37it/s]
100%|██████████| 1312/1312 [00:02<00:00, 452.37it/s]
100%|██████████| 1312/1312 [00:02<00:00, 450.77it/s]


0,1
tr_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
tr_accuracy,15.625
tr_loss,
val_accuracy,6.25
val_loss,


[34m[1mwandb[0m: Agent Starting Run: rrcl1p3r with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


1312.0


100%|██████████| 1312/1312 [00:02<00:00, 573.52it/s]
100%|██████████| 1312/1312 [00:02<00:00, 570.80it/s]
100%|██████████| 1312/1312 [00:02<00:00, 508.35it/s]
100%|██████████| 1312/1312 [00:04<00:00, 306.43it/s]
100%|██████████| 1312/1312 [00:02<00:00, 534.67it/s]
100%|██████████| 1312/1312 [00:02<00:00, 586.64it/s]
100%|██████████| 1312/1312 [00:02<00:00, 600.58it/s]
100%|██████████| 1312/1312 [00:04<00:00, 302.74it/s]
100%|██████████| 1312/1312 [00:02<00:00, 584.06it/s]
100%|██████████| 1312/1312 [00:02<00:00, 581.18it/s]


0,1
tr_accuracy,▁▆▁▃█▃█▆██
tr_loss,█▆▄▄▃▂▂▁▁▁
val_accuracy,▁▄▄▇▇▇▇███
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
tr_accuracy,87.5
tr_loss,0.3921
val_accuracy,93.75
val_loss,0.45307


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: smiwto81 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


2625.0


100%|██████████| 2625/2625 [00:16<00:00, 161.40it/s]
100%|██████████| 2625/2625 [00:16<00:00, 155.86it/s]
100%|██████████| 2625/2625 [00:16<00:00, 160.60it/s]
100%|██████████| 2625/2625 [00:15<00:00, 170.75it/s]
100%|██████████| 2625/2625 [00:15<00:00, 166.80it/s]
100%|██████████| 2625/2625 [00:16<00:00, 161.61it/s]
100%|██████████| 2625/2625 [00:20<00:00, 126.16it/s]
100%|██████████| 2625/2625 [00:15<00:00, 166.08it/s]
100%|██████████| 2625/2625 [00:15<00:00, 168.76it/s]
100%|██████████| 2625/2625 [00:15<00:00, 167.34it/s]


0,1
tr_accuracy,█▇███▇▆▆▃▁
tr_loss,▆▃▁▁▁▂▄▅▇█
val_accuracy,▁▆▃▆████▆▃
val_loss,▆▃▁▁▁▂▄▅▇█

0,1
tr_accuracy,25.0
tr_loss,1.59597
val_accuracy,68.75
val_loss,1.59681


[34m[1mwandb[0m: Agent Starting Run: s4tpli4b with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


656.0


100%|██████████| 656/656 [00:03<00:00, 169.03it/s]
100%|██████████| 656/656 [00:05<00:00, 114.41it/s]
100%|██████████| 656/656 [00:03<00:00, 170.66it/s]
100%|██████████| 656/656 [00:03<00:00, 165.90it/s]
100%|██████████| 656/656 [00:05<00:00, 130.85it/s]


0,1
tr_accuracy,▁▂▅▆█
tr_loss,█▅▃▂▁
val_accuracy,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
tr_accuracy,45.3125
tr_loss,1.76176
val_accuracy,42.1875
val_loss,1.77075


[34m[1mwandb[0m: Agent Starting Run: g72fdiwb with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


656.0


100%|██████████| 656/656 [00:02<00:00, 257.87it/s]
100%|██████████| 656/656 [00:02<00:00, 223.37it/s]
100%|██████████| 656/656 [00:01<00:00, 378.29it/s]
100%|██████████| 656/656 [00:01<00:00, 404.26it/s]
100%|██████████| 656/656 [00:01<00:00, 392.28it/s]


0,1
tr_accuracy,▁▃▆▇█
tr_loss,█▃▂▁▁
val_accuracy,▂▁▅▆█
val_loss,█▃▂▁▁

0,1
tr_accuracy,54.6875
tr_loss,1.55893
val_accuracy,53.125
val_loss,1.56058


[34m[1mwandb[0m: Agent Starting Run: s1kpg52b with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: Xavier


1312.0


100%|██████████| 1312/1312 [00:04<00:00, 271.46it/s]
100%|██████████| 1312/1312 [00:07<00:00, 187.02it/s]
100%|██████████| 1312/1312 [00:04<00:00, 285.36it/s]
100%|██████████| 1312/1312 [00:06<00:00, 203.56it/s]
100%|██████████| 1312/1312 [00:04<00:00, 282.93it/s]
100%|██████████| 1312/1312 [00:04<00:00, 284.51it/s]
100%|██████████| 1312/1312 [00:06<00:00, 209.05it/s]
100%|██████████| 1312/1312 [00:04<00:00, 281.46it/s]
100%|██████████| 1312/1312 [00:07<00:00, 185.68it/s]
100%|██████████| 1312/1312 [00:04<00:00, 277.73it/s]


0,1
tr_accuracy,▁▄▅▅▇▇▇███
tr_loss,█▅▃▂▂▂▁▁▁▁
val_accuracy,▁▄▄▆▆▆▇███
val_loss,█▅▃▂▂▂▁▁▁▁

0,1
tr_accuracy,81.25
tr_loss,0.67467
val_accuracy,78.125
val_loss,0.67925


[34m[1mwandb[0m: Agent Starting Run: mw6l3mzf with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


2625.0


100%|██████████| 2625/2625 [00:04<00:00, 591.03it/s]
100%|██████████| 2625/2625 [00:07<00:00, 357.80it/s]
100%|██████████| 2625/2625 [00:04<00:00, 577.70it/s]
100%|██████████| 2625/2625 [00:05<00:00, 469.68it/s]
100%|██████████| 2625/2625 [00:05<00:00, 456.37it/s]
100%|██████████| 2625/2625 [00:04<00:00, 596.41it/s]
100%|██████████| 2625/2625 [00:06<00:00, 379.74it/s]
100%|██████████| 2625/2625 [00:04<00:00, 583.37it/s]
100%|██████████| 2625/2625 [00:05<00:00, 444.98it/s]
100%|██████████| 2625/2625 [00:05<00:00, 499.49it/s]


0,1
tr_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
tr_accuracy,18.75
tr_loss,
val_accuracy,6.25
val_loss,


[34m[1mwandb[0m: Agent Starting Run: lhcxtn2m with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


2625.0


100%|██████████| 2625/2625 [00:06<00:00, 408.93it/s]
100%|██████████| 2625/2625 [00:03<00:00, 703.69it/s]
100%|██████████| 2625/2625 [00:04<00:00, 648.48it/s]
100%|██████████| 2625/2625 [00:06<00:00, 424.53it/s]
100%|██████████| 2625/2625 [00:03<00:00, 714.69it/s]


0,1
tr_accuracy,▁▁▁▁█
tr_loss,█▄▂▁▁
val_accuracy,▁▁███
val_loss,█▄▂▁▁

0,1
tr_accuracy,100.0
tr_loss,0.63412
val_accuracy,87.5
val_loss,0.63809


[34m[1mwandb[0m: Agent Starting Run: i56vr1g4 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: Xavier


656.0


100%|██████████| 656/656 [00:02<00:00, 314.68it/s]
100%|██████████| 656/656 [00:01<00:00, 398.33it/s]
100%|██████████| 656/656 [00:01<00:00, 591.36it/s]
100%|██████████| 656/656 [00:01<00:00, 644.80it/s]
100%|██████████| 656/656 [00:01<00:00, 633.73it/s]
100%|██████████| 656/656 [00:01<00:00, 626.35it/s]
100%|██████████| 656/656 [00:01<00:00, 637.14it/s]
100%|██████████| 656/656 [00:01<00:00, 644.88it/s]
100%|██████████| 656/656 [00:01<00:00, 620.95it/s]
100%|██████████| 656/656 [00:02<00:00, 263.76it/s]


0,1
tr_accuracy,█▁█▁▁▁▁▁▁▁
tr_loss,█▅▅▁▁▁▁▁▁▁
val_accuracy,▁█▁███████
val_loss,█▅▅▁▁▁▁▁▁▁

0,1
tr_accuracy,7.8125
tr_loss,2.30495
val_accuracy,7.8125
val_loss,2.30521


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 3g6lrb7a with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


656.0


100%|██████████| 656/656 [00:01<00:00, 372.81it/s]
100%|██████████| 656/656 [00:01<00:00, 379.88it/s]
100%|██████████| 656/656 [00:01<00:00, 375.28it/s]
100%|██████████| 656/656 [00:01<00:00, 381.64it/s]
100%|██████████| 656/656 [00:02<00:00, 222.89it/s]


0,1
tr_accuracy,▁▁▁▁▁
tr_loss,▁▁▁▁▁
val_accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
tr_accuracy,7.8125
tr_loss,2.30495
val_accuracy,7.8125
val_loss,2.30521


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: hh6lvpaz with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


1312.0


100%|██████████| 1312/1312 [00:02<00:00, 460.35it/s]
100%|██████████| 1312/1312 [00:02<00:00, 444.51it/s]
100%|██████████| 1312/1312 [00:03<00:00, 434.33it/s]
100%|██████████| 1312/1312 [00:05<00:00, 257.19it/s]
100%|██████████| 1312/1312 [00:02<00:00, 458.66it/s]


0,1
tr_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
tr_accuracy,15.625
tr_loss,
val_accuracy,6.25
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 5pjh1i8t with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


1312.0


100%|██████████| 1312/1312 [00:10<00:00, 128.93it/s]
100%|██████████| 1312/1312 [00:10<00:00, 122.15it/s]
100%|██████████| 1312/1312 [00:11<00:00, 119.06it/s]
100%|██████████| 1312/1312 [00:12<00:00, 102.22it/s]
100%|██████████| 1312/1312 [00:12<00:00, 101.75it/s]
100%|██████████| 1312/1312 [00:13<00:00, 97.32it/s] 
100%|██████████| 1312/1312 [00:12<00:00, 103.40it/s]
100%|██████████| 1312/1312 [00:12<00:00, 101.36it/s]
100%|██████████| 1312/1312 [00:12<00:00, 102.84it/s]
100%|██████████| 1312/1312 [00:11<00:00, 113.25it/s]


0,1
tr_accuracy,▁▄▂▄▂▂█▅▇▆
tr_loss,█▆▅▄▃▃▂▂▁▁
val_accuracy,▂▂▇▂▁▅▇███
val_loss,█▆▅▄▃▃▂▂▁▁

0,1
tr_accuracy,25.0
tr_loss,7.48744
val_accuracy,28.125
val_loss,7.582


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: aafbdzs7 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


1312.0


100%|██████████| 1312/1312 [00:07<00:00, 173.16it/s]
100%|██████████| 1312/1312 [00:09<00:00, 137.35it/s]
100%|██████████| 1312/1312 [00:10<00:00, 125.86it/s]
100%|██████████| 1312/1312 [00:10<00:00, 125.37it/s]
100%|██████████| 1312/1312 [00:07<00:00, 179.65it/s]


0,1
tr_accuracy,▁▁██▇
tr_loss,█▆▂▁▁
val_accuracy,▁▇▆█▇
val_loss,█▆▂▁▁

0,1
tr_accuracy,90.625
tr_loss,0.51733
val_accuracy,84.375
val_loss,0.55019


[34m[1mwandb[0m: Agent Starting Run: 58q3v7a9 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


656.0


100%|██████████| 656/656 [00:02<00:00, 257.87it/s]
100%|██████████| 656/656 [00:02<00:00, 227.74it/s]
100%|██████████| 656/656 [00:04<00:00, 158.37it/s]
100%|██████████| 656/656 [00:02<00:00, 254.66it/s]
100%|██████████| 656/656 [00:02<00:00, 263.22it/s]
100%|██████████| 656/656 [00:03<00:00, 182.00it/s]
100%|██████████| 656/656 [00:03<00:00, 208.93it/s]
100%|██████████| 656/656 [00:02<00:00, 252.30it/s]
100%|██████████| 656/656 [00:02<00:00, 262.68it/s]
100%|██████████| 656/656 [00:04<00:00, 144.70it/s]


0,1
tr_accuracy,▁█████████
tr_loss,█▁▁▁▁▁▁▁▁▁
val_accuracy,█▁▁▁▁▁▁▁▁▁
val_loss,█▁▁▁▁▁▁▁▁▁

0,1
tr_accuracy,10.9375
tr_loss,2.30255
val_accuracy,9.375
val_loss,2.3026


[34m[1mwandb[0m: Agent Starting Run: u0a62qbt with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: Xavier


1312.0


100%|██████████| 1312/1312 [00:02<00:00, 452.16it/s]
100%|██████████| 1312/1312 [00:05<00:00, 258.98it/s]
100%|██████████| 1312/1312 [00:02<00:00, 466.42it/s]
100%|██████████| 1312/1312 [00:02<00:00, 468.33it/s]
100%|██████████| 1312/1312 [00:03<00:00, 374.36it/s]
100%|██████████| 1312/1312 [00:04<00:00, 320.22it/s]
100%|██████████| 1312/1312 [00:02<00:00, 442.62it/s]
100%|██████████| 1312/1312 [00:02<00:00, 470.26it/s]
100%|██████████| 1312/1312 [00:05<00:00, 249.39it/s]
100%|██████████| 1312/1312 [00:02<00:00, 474.10it/s]


0,1
tr_accuracy,▁▁▃▄▅▅▆▆█▆
tr_loss,█▅▃▃▂▂▂▁▁▁
val_accuracy,▁▂▄▅▅▅▆▇██
val_loss,█▅▃▃▂▂▂▁▁▁

0,1
tr_accuracy,84.375
tr_loss,0.47662
val_accuracy,96.875
val_loss,0.48567


[34m[1mwandb[0m: Agent Starting Run: uswp9qbm with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


1312.0


100%|██████████| 1312/1312 [00:05<00:00, 235.71it/s]
100%|██████████| 1312/1312 [00:04<00:00, 298.13it/s]
100%|██████████| 1312/1312 [00:06<00:00, 208.07it/s]
100%|██████████| 1312/1312 [00:04<00:00, 290.34it/s]
100%|██████████| 1312/1312 [00:06<00:00, 193.88it/s]
100%|██████████| 1312/1312 [00:04<00:00, 289.57it/s]
100%|██████████| 1312/1312 [00:06<00:00, 191.86it/s]
100%|██████████| 1312/1312 [00:04<00:00, 283.44it/s]
100%|██████████| 1312/1312 [00:07<00:00, 181.16it/s]
100%|██████████| 1312/1312 [00:04<00:00, 279.16it/s]


0,1
tr_accuracy,▅▅▃▁█▃▆▃▁▁
tr_loss,▆▆▂▄▁▇▆█▇▄
val_accuracy,▄▁▄█▁▄▃▃██
val_loss,▆▆▂▄▁▇▆█▇▄

0,1
tr_accuracy,3.125
tr_loss,16.87281
val_accuracy,25.0
val_loss,16.74632


[34m[1mwandb[0m: Agent Starting Run: wkvps5ob with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: Xavier


656.0


100%|██████████| 656/656 [00:01<00:00, 621.21it/s]
100%|██████████| 656/656 [00:01<00:00, 648.65it/s]
100%|██████████| 656/656 [00:01<00:00, 634.43it/s]
100%|██████████| 656/656 [00:01<00:00, 640.21it/s]
100%|██████████| 656/656 [00:00<00:00, 663.23it/s]


0,1
tr_accuracy,▁▆▇██
tr_loss,█▄▂▁▁
val_accuracy,▁▆███
val_loss,█▄▂▁▁

0,1
tr_accuracy,76.5625
tr_loss,0.75224
val_accuracy,75.0
val_loss,0.75673


[34m[1mwandb[0m: Agent Starting Run: yyv7189j with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


1312.0


100%|██████████| 1312/1312 [00:07<00:00, 167.80it/s]
100%|██████████| 1312/1312 [00:09<00:00, 137.89it/s]
100%|██████████| 1312/1312 [00:09<00:00, 136.42it/s]
100%|██████████| 1312/1312 [00:07<00:00, 184.62it/s]
100%|██████████| 1312/1312 [00:08<00:00, 155.81it/s]
100%|██████████| 1312/1312 [00:09<00:00, 134.02it/s]
100%|██████████| 1312/1312 [00:08<00:00, 147.43it/s]
100%|██████████| 1312/1312 [00:07<00:00, 181.71it/s]
100%|██████████| 1312/1312 [00:09<00:00, 134.14it/s]
100%|██████████| 1312/1312 [00:09<00:00, 132.58it/s]


0,1
tr_accuracy,█▃▃▃▃▂▂▂▂▁
tr_loss,▁▇▇███████
val_accuracy,▆▃▁▃█████▇
val_loss,▁▇▇███████

0,1
tr_accuracy,37.5
tr_loss,1.37468
val_accuracy,62.5
val_loss,1.37357


[34m[1mwandb[0m: Agent Starting Run: 1du2gbxv with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


656.0


100%|██████████| 656/656 [00:02<00:00, 313.45it/s]
100%|██████████| 656/656 [00:03<00:00, 216.11it/s]
100%|██████████| 656/656 [00:01<00:00, 482.93it/s]
100%|██████████| 656/656 [00:01<00:00, 474.64it/s]
100%|██████████| 656/656 [00:01<00:00, 488.26it/s]


0,1
tr_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
tr_accuracy,12.5
tr_loss,
val_accuracy,6.25
val_loss,


[34m[1mwandb[0m: Agent Starting Run: aryevzk5 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


2625.0


100%|██████████| 2625/2625 [00:11<00:00, 233.88it/s]
100%|██████████| 2625/2625 [00:10<00:00, 238.85it/s]
100%|██████████| 2625/2625 [00:10<00:00, 247.90it/s]
100%|██████████| 2625/2625 [00:08<00:00, 303.48it/s]
100%|██████████| 2625/2625 [00:10<00:00, 245.43it/s]


0,1
tr_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
tr_accuracy,18.75
tr_loss,
val_accuracy,6.25
val_loss,


[34m[1mwandb[0m: Agent Starting Run: lcmokav4 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: Xavier


2625.0


100%|██████████| 2625/2625 [00:05<00:00, 505.39it/s]
100%|██████████| 2625/2625 [00:03<00:00, 724.90it/s]
100%|██████████| 2625/2625 [00:04<00:00, 584.04it/s]
100%|██████████| 2625/2625 [00:04<00:00, 549.18it/s]
100%|██████████| 2625/2625 [00:03<00:00, 742.44it/s]


0,1
tr_accuracy,▁▁▁▁▁
tr_loss,█▄▃▁▁
val_accuracy,▁▁███
val_loss,█▄▃▁▂

0,1
tr_accuracy,93.75
tr_loss,0.37481
val_accuracy,93.75
val_loss,0.4118


[34m[1mwandb[0m: Agent Starting Run: u07c2yv6 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


2625.0


100%|██████████| 2625/2625 [00:06<00:00, 423.42it/s]
100%|██████████| 2625/2625 [00:08<00:00, 305.13it/s]
100%|██████████| 2625/2625 [00:05<00:00, 443.91it/s]
100%|██████████| 2625/2625 [00:08<00:00, 309.52it/s]
100%|██████████| 2625/2625 [00:05<00:00, 447.74it/s]


0,1
tr_accuracy,▁▅██▆
tr_loss,█▄▃▁▁
val_accuracy,▂▁█▇▆
val_loss,█▄▃▁▁

0,1
tr_accuracy,75.0
tr_loss,0.7874
val_accuracy,81.25
val_loss,0.80519


[34m[1mwandb[0m: Agent Starting Run: wazcybdg with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


1312.0


100%|██████████| 1312/1312 [00:04<00:00, 275.26it/s]
100%|██████████| 1312/1312 [00:07<00:00, 179.22it/s]
100%|██████████| 1312/1312 [00:05<00:00, 254.31it/s]
100%|██████████| 1312/1312 [00:07<00:00, 177.15it/s]
100%|██████████| 1312/1312 [00:04<00:00, 263.95it/s]


0,1
tr_accuracy,▁▂▄▇█
tr_loss,█▄▃▂▁
val_accuracy,▁▆▇▇█
val_loss,█▄▃▂▁

0,1
tr_accuracy,56.25
tr_loss,2.04688
val_accuracy,56.25
val_loss,2.23493


[34m[1mwandb[0m: Agent Starting Run: fcfsczau with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


2625.0


100%|██████████| 2625/2625 [00:04<00:00, 603.32it/s]
100%|██████████| 2625/2625 [00:06<00:00, 377.52it/s]
100%|██████████| 2625/2625 [00:04<00:00, 586.02it/s]
100%|██████████| 2625/2625 [00:04<00:00, 605.29it/s]
100%|██████████| 2625/2625 [00:06<00:00, 388.25it/s]


0,1
tr_accuracy,▁▅▇▇█
tr_loss,█▄▂▁▁
val_accuracy,▁▅███
val_loss,█▄▂▂▁

0,1
tr_accuracy,81.25
tr_loss,0.71787
val_accuracy,87.5
val_loss,0.74191


[34m[1mwandb[0m: Agent Starting Run: s1n6137v with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


2625.0


100%|██████████| 2625/2625 [00:07<00:00, 354.71it/s]
100%|██████████| 2625/2625 [00:04<00:00, 580.33it/s]
100%|██████████| 2625/2625 [00:07<00:00, 361.35it/s]
100%|██████████| 2625/2625 [00:04<00:00, 582.93it/s]
100%|██████████| 2625/2625 [00:07<00:00, 367.71it/s]
100%|██████████| 2625/2625 [00:04<00:00, 569.17it/s]
100%|██████████| 2625/2625 [00:05<00:00, 457.71it/s]
100%|██████████| 2625/2625 [00:05<00:00, 513.00it/s]
100%|██████████| 2625/2625 [00:04<00:00, 568.56it/s]
100%|██████████| 2625/2625 [00:06<00:00, 418.43it/s]


0,1
tr_accuracy,▁▃▁▃▅▆█▆▆█
tr_loss,█▆▅▄▃▃▂▂▁▁
val_accuracy,▁▃▄▅▅▆▆███
val_loss,█▆▅▄▃▃▂▂▁▁

0,1
tr_accuracy,37.5
tr_loss,3.55888
val_accuracy,43.75
val_loss,3.61752


[34m[1mwandb[0m: Agent Starting Run: qmghr58c with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: Xavier


2625.0


100%|██████████| 2625/2625 [00:08<00:00, 317.32it/s]
100%|██████████| 2625/2625 [00:09<00:00, 279.61it/s]
100%|██████████| 2625/2625 [00:06<00:00, 397.36it/s]
100%|██████████| 2625/2625 [00:09<00:00, 275.06it/s]
100%|██████████| 2625/2625 [00:06<00:00, 393.43it/s]


0,1
tr_accuracy,▁▆███
tr_loss,█▄▂▁▁
val_accuracy,▁▃▆██
val_loss,█▄▂▁▁

0,1
tr_accuracy,87.5
tr_loss,0.56359
val_accuracy,100.0
val_loss,0.57132


[34m[1mwandb[0m: Agent Starting Run: bba7fpw3 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: Xavier


656.0


100%|██████████| 656/656 [00:03<00:00, 183.51it/s]
100%|██████████| 656/656 [00:06<00:00, 104.21it/s]
100%|██████████| 656/656 [00:03<00:00, 183.61it/s]
100%|██████████| 656/656 [00:04<00:00, 160.09it/s]
100%|██████████| 656/656 [00:05<00:00, 127.43it/s]


0,1
tr_accuracy,▁▅▆██
tr_loss,█▅▃▂▁
val_accuracy,▁▇▇▇█
val_loss,█▅▃▂▁

0,1
tr_accuracy,75.0
tr_loss,0.8479
val_accuracy,76.5625
val_loss,0.85013


[34m[1mwandb[0m: Agent Starting Run: wore326o with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: Xavier


1312.0


100%|██████████| 1312/1312 [00:07<00:00, 172.76it/s]
100%|██████████| 1312/1312 [00:04<00:00, 305.25it/s]
100%|██████████| 1312/1312 [00:07<00:00, 170.66it/s]
100%|██████████| 1312/1312 [00:04<00:00, 308.52it/s]
100%|██████████| 1312/1312 [00:04<00:00, 287.18it/s]
100%|██████████| 1312/1312 [00:06<00:00, 215.16it/s]
100%|██████████| 1312/1312 [00:04<00:00, 299.42it/s]
100%|██████████| 1312/1312 [00:07<00:00, 180.94it/s]
100%|██████████| 1312/1312 [00:04<00:00, 287.61it/s]
100%|██████████| 1312/1312 [00:06<00:00, 188.77it/s]


0,1
tr_accuracy,▁▅▆▆▆▇▇▇▇█
tr_loss,█▇▆▄▃▃▂▂▁▁
val_accuracy,▁▆████████
val_loss,█▇▆▄▃▃▂▂▁▁

0,1
tr_accuracy,71.875
tr_loss,0.88919
val_accuracy,75.0
val_loss,0.88956


[34m[1mwandb[0m: Agent Starting Run: 5xpab2q4 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


2625.0


100%|██████████| 2625/2625 [00:02<00:00, 937.37it/s] 
100%|██████████| 2625/2625 [00:03<00:00, 782.46it/s]
100%|██████████| 2625/2625 [00:05<00:00, 494.93it/s]
100%|██████████| 2625/2625 [00:02<00:00, 961.51it/s]
100%|██████████| 2625/2625 [00:02<00:00, 970.07it/s]
100%|██████████| 2625/2625 [00:02<00:00, 938.10it/s]
100%|██████████| 2625/2625 [00:05<00:00, 493.60it/s]
100%|██████████| 2625/2625 [00:02<00:00, 965.30it/s]
100%|██████████| 2625/2625 [00:02<00:00, 965.64it/s]
100%|██████████| 2625/2625 [00:02<00:00, 946.97it/s]


0,1
tr_accuracy,▁▁▂▃▃▃▃▃▆█
tr_loss,██▇▇▆▅▄▃▂▁
val_accuracy,▁▁▂▇▆▆▆▆▇█
val_loss,██▇▇▆▅▅▃▂▁

0,1
tr_accuracy,43.75
tr_loss,1.8113
val_accuracy,56.25
val_loss,1.80953


[34m[1mwandb[0m: Agent Starting Run: iujdjj2r with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


2625.0


100%|██████████| 2625/2625 [00:12<00:00, 215.19it/s]
100%|██████████| 2625/2625 [00:09<00:00, 263.38it/s]
100%|██████████| 2625/2625 [00:11<00:00, 233.41it/s]
100%|██████████| 2625/2625 [00:12<00:00, 216.90it/s]
100%|██████████| 2625/2625 [00:12<00:00, 205.31it/s]
100%|██████████| 2625/2625 [00:12<00:00, 211.92it/s]
100%|██████████| 2625/2625 [00:12<00:00, 210.79it/s]
100%|██████████| 2625/2625 [00:10<00:00, 249.02it/s]
100%|██████████| 2625/2625 [00:10<00:00, 262.04it/s]
100%|██████████| 2625/2625 [00:12<00:00, 215.17it/s]


0,1
tr_accuracy,▁▁▁▁▁▁▁▁▁▁
tr_loss,█▄▃▂▂▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,█▄▃▂▂▁▁▁▁▁

0,1
tr_accuracy,6.25
tr_loss,2.30272
val_accuracy,0.0
val_loss,2.30274


[34m[1mwandb[0m: Agent Starting Run: jya1mmxr with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


2625.0


100%|██████████| 2625/2625 [00:05<00:00, 471.08it/s]
100%|██████████| 2625/2625 [00:08<00:00, 313.95it/s]
100%|██████████| 2625/2625 [00:06<00:00, 428.30it/s]
100%|██████████| 2625/2625 [00:08<00:00, 306.12it/s]
100%|██████████| 2625/2625 [00:05<00:00, 467.29it/s]


0,1
tr_accuracy,▁█▁▁█
tr_loss,▁▃█▆▁
val_accuracy,█▁▁▁▁
val_loss,▁▃█▆▁

0,1
tr_accuracy,12.5
tr_loss,17.60697
val_accuracy,0.0
val_loss,17.63282


[34m[1mwandb[0m: Agent Starting Run: w9dkjtt3 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


1312.0


100%|██████████| 1312/1312 [00:03<00:00, 393.91it/s]
100%|██████████| 1312/1312 [00:04<00:00, 305.24it/s]
100%|██████████| 1312/1312 [00:03<00:00, 342.52it/s]
100%|██████████| 1312/1312 [00:03<00:00, 419.76it/s]
100%|██████████| 1312/1312 [00:03<00:00, 341.41it/s]
100%|██████████| 1312/1312 [00:04<00:00, 298.75it/s]
100%|██████████| 1312/1312 [00:03<00:00, 409.18it/s]
100%|██████████| 1312/1312 [00:03<00:00, 357.91it/s]
100%|██████████| 1312/1312 [00:04<00:00, 290.13it/s]
100%|██████████| 1312/1312 [00:03<00:00, 418.25it/s]


0,1
tr_accuracy,▆█▄▃▅▃▄▃▁▆
tr_loss,▆▁▃▄▃▆▅▄█▃
val_accuracy,▁█▇▇▇▁▇▄▁▃
val_loss,▆▁▃▄▃▆▅▄█▃

0,1
tr_accuracy,37.5
tr_loss,1.41395
val_accuracy,37.5
val_loss,1.41455


[34m[1mwandb[0m: Agent Starting Run: ff9b25nn with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: Xavier


2625.0


100%|██████████| 2625/2625 [00:03<00:00, 800.34it/s]
100%|██████████| 2625/2625 [00:03<00:00, 723.45it/s]
100%|██████████| 2625/2625 [00:05<00:00, 470.55it/s]
100%|██████████| 2625/2625 [00:03<00:00, 750.65it/s]
100%|██████████| 2625/2625 [00:03<00:00, 783.78it/s]


0,1
tr_accuracy,█▁▁██
tr_loss,█▄▃▂▁
val_accuracy,▁▆▆██
val_loss,█▄▃▂▁

0,1
tr_accuracy,93.75
tr_loss,0.63012
val_accuracy,100.0
val_loss,0.63857


[34m[1mwandb[0m: Agent Starting Run: djbzmdw1 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: Xavier


656.0


100%|██████████| 656/656 [00:01<00:00, 398.92it/s]
100%|██████████| 656/656 [00:01<00:00, 401.69it/s]
100%|██████████| 656/656 [00:02<00:00, 266.75it/s]
100%|██████████| 656/656 [00:03<00:00, 211.64it/s]
100%|██████████| 656/656 [00:01<00:00, 405.58it/s]


0,1
tr_accuracy,▁▇▆▇█
tr_loss,█▄▂▂▁
val_accuracy,▁▅▆▇█
val_loss,█▄▂▂▁

0,1
tr_accuracy,84.375
tr_loss,0.53008
val_accuracy,93.75
val_loss,0.54413


[34m[1mwandb[0m: Agent Starting Run: 0lx3b2dn with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: Xavier


1312.0


100%|██████████| 1312/1312 [00:11<00:00, 117.43it/s]
100%|██████████| 1312/1312 [00:11<00:00, 114.68it/s]
100%|██████████| 1312/1312 [00:10<00:00, 124.94it/s]
100%|██████████| 1312/1312 [00:08<00:00, 157.39it/s]
100%|██████████| 1312/1312 [00:10<00:00, 124.05it/s]
100%|██████████| 1312/1312 [00:11<00:00, 116.47it/s]
100%|██████████| 1312/1312 [00:11<00:00, 112.55it/s]
100%|██████████| 1312/1312 [00:09<00:00, 144.57it/s]
100%|██████████| 1312/1312 [00:09<00:00, 140.15it/s]
100%|██████████| 1312/1312 [00:10<00:00, 119.44it/s]


0,1
tr_accuracy,▁▃▃▆▅▆▆▆▆█
tr_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▁████████
val_loss,█▄▂▁▁▁▂▁▂▂

0,1
tr_accuracy,93.75
tr_loss,0.33765
val_accuracy,93.75
val_loss,0.4032


[34m[1mwandb[0m: Agent Starting Run: 00n1c7b7 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: Xavier


2625.0


100%|██████████| 2625/2625 [00:10<00:00, 255.88it/s]
100%|██████████| 2625/2625 [00:09<00:00, 267.76it/s]
100%|██████████| 2625/2625 [00:12<00:00, 214.69it/s]
100%|██████████| 2625/2625 [00:12<00:00, 218.01it/s]
100%|██████████| 2625/2625 [00:15<00:00, 167.15it/s]


0,1
tr_accuracy,▁████
tr_loss,█▄▂▂▁
val_accuracy,▁████
val_loss,█▄▂▂▁

0,1
tr_accuracy,100.0
tr_loss,0.44755
val_accuracy,93.75
val_loss,0.45845


[34m[1mwandb[0m: Agent Starting Run: 62hge6ql with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: Xavier


1312.0


100%|██████████| 1312/1312 [00:04<00:00, 294.87it/s]
100%|██████████| 1312/1312 [00:03<00:00, 343.62it/s]
100%|██████████| 1312/1312 [00:02<00:00, 447.33it/s]
100%|██████████| 1312/1312 [00:03<00:00, 417.87it/s]
100%|██████████| 1312/1312 [00:04<00:00, 272.14it/s]


0,1
tr_accuracy,▁▅█▇█
tr_loss,█▆▄▂▁
val_accuracy,▁▄▅██
val_loss,█▆▄▂▁

0,1
tr_accuracy,59.375
tr_loss,1.4778
val_accuracy,68.75
val_loss,1.47861


[34m[1mwandb[0m: Agent Starting Run: 7h32zyeq with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


1312.0


100%|██████████| 1312/1312 [00:08<00:00, 147.79it/s]
100%|██████████| 1312/1312 [00:05<00:00, 219.34it/s]
100%|██████████| 1312/1312 [00:08<00:00, 151.34it/s]
100%|██████████| 1312/1312 [00:05<00:00, 221.36it/s]
100%|██████████| 1312/1312 [00:07<00:00, 174.12it/s]


0,1
tr_accuracy,▁▅▆▆█
tr_loss,█▅▃▂▁
val_accuracy,▁▇█▅█
val_loss,█▅▃▂▁

0,1
tr_accuracy,62.5
tr_loss,2.99297
val_accuracy,40.625
val_loss,3.16436


[34m[1mwandb[0m: Agent Starting Run: 1njc8xal with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


1312.0


100%|██████████| 1312/1312 [00:01<00:00, 728.05it/s]
100%|██████████| 1312/1312 [00:01<00:00, 719.28it/s]
100%|██████████| 1312/1312 [00:01<00:00, 736.27it/s]
100%|██████████| 1312/1312 [00:01<00:00, 751.05it/s]
100%|██████████| 1312/1312 [00:02<00:00, 554.23it/s]


0,1
tr_accuracy,▁████
tr_loss,█▆▃▂▁
val_accuracy,▁▅███
val_loss,█▆▃▂▁

0,1
tr_accuracy,78.125
tr_loss,0.55014
val_accuracy,84.375
val_loss,0.55512


[34m[1mwandb[0m: Agent Starting Run: h6l7pwm2 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


656.0


100%|██████████| 656/656 [00:02<00:00, 253.50it/s]
100%|██████████| 656/656 [00:02<00:00, 261.71it/s]
100%|██████████| 656/656 [00:04<00:00, 152.16it/s]
100%|██████████| 656/656 [00:02<00:00, 246.04it/s]
100%|██████████| 656/656 [00:02<00:00, 265.57it/s]
100%|██████████| 656/656 [00:02<00:00, 264.67it/s]
100%|██████████| 656/656 [00:05<00:00, 124.60it/s]
100%|██████████| 656/656 [00:02<00:00, 247.39it/s]
100%|██████████| 656/656 [00:02<00:00, 239.78it/s]
100%|██████████| 656/656 [00:03<00:00, 190.21it/s]


0,1
tr_accuracy,▁▂▂▄▇▇▆▇██
tr_loss,█▅▃▃▂▂▂▁▁▁
val_accuracy,▁▂▂▃▆▅▆▇▇█
val_loss,█▅▃▃▂▂▂▁▁▁

0,1
tr_accuracy,46.875
tr_loss,1.58779
val_accuracy,60.9375
val_loss,1.58548


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: q8dubpr6 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


656.0


100%|██████████| 656/656 [00:08<00:00, 79.38it/s]
100%|██████████| 656/656 [00:09<00:00, 66.95it/s]
100%|██████████| 656/656 [00:10<00:00, 63.65it/s]
100%|██████████| 656/656 [00:10<00:00, 60.84it/s]
100%|██████████| 656/656 [00:09<00:00, 66.24it/s]
100%|██████████| 656/656 [00:08<00:00, 78.27it/s]
100%|██████████| 656/656 [00:09<00:00, 72.29it/s]
100%|██████████| 656/656 [00:10<00:00, 62.92it/s]
100%|██████████| 656/656 [00:10<00:00, 61.15it/s]
100%|██████████| 656/656 [00:09<00:00, 69.84it/s]


0,1
tr_accuracy,▁▁▁▁▁▁▁▁██
tr_loss,██▇▆▅▄▃▃▁▁
val_accuracy,██▅▅▁▁▁▁▁▁
val_loss,██▇▆▅▄▃▃▁▁

0,1
tr_accuracy,84.375
tr_loss,0.4653
val_accuracy,87.5
val_loss,0.47601


[34m[1mwandb[0m: Agent Starting Run: nhaax88l with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: Xavier


2625.0


100%|██████████| 2625/2625 [00:12<00:00, 208.72it/s]
100%|██████████| 2625/2625 [00:12<00:00, 206.20it/s]
100%|██████████| 2625/2625 [00:11<00:00, 232.32it/s]
100%|██████████| 2625/2625 [00:09<00:00, 279.55it/s]
100%|██████████| 2625/2625 [00:11<00:00, 230.04it/s]
100%|██████████| 2625/2625 [00:11<00:00, 223.39it/s]
100%|██████████| 2625/2625 [00:11<00:00, 221.43it/s]
100%|██████████| 2625/2625 [00:11<00:00, 222.43it/s]
100%|██████████| 2625/2625 [00:08<00:00, 291.97it/s]
100%|██████████| 2625/2625 [00:10<00:00, 241.69it/s]


0,1
tr_accuracy,▁▂▃▄▄▆▇▇▇█
tr_loss,██▇▆▅▄▃▂▁▁
val_accuracy,▁▂▃▅▆█████
val_loss,██▇▆▅▄▃▂▁▁

0,1
tr_accuracy,75.0
tr_loss,0.95272
val_accuracy,87.5
val_loss,0.95574


[34m[1mwandb[0m: Agent Starting Run: j4fgxump with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


1312.0


100%|██████████| 1312/1312 [00:01<00:00, 1011.18it/s]
100%|██████████| 1312/1312 [00:01<00:00, 987.98it/s]
100%|██████████| 1312/1312 [00:01<00:00, 913.27it/s] 
100%|██████████| 1312/1312 [00:01<00:00, 1012.17it/s]
100%|██████████| 1312/1312 [00:01<00:00, 865.15it/s]


0,1
tr_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
tr_accuracy,15.625
tr_loss,
val_accuracy,6.25
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 6brjj522 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


656.0


100%|██████████| 656/656 [00:05<00:00, 118.70it/s]
100%|██████████| 656/656 [00:05<00:00, 120.24it/s]
100%|██████████| 656/656 [00:05<00:00, 123.09it/s]
100%|██████████| 656/656 [00:05<00:00, 111.73it/s]
100%|██████████| 656/656 [00:05<00:00, 112.78it/s]


0,1
tr_accuracy,▁▁▆██
tr_loss,█▅▃▂▁
val_accuracy,▁▄▃▃█
val_loss,█▆▃▂▁

0,1
tr_accuracy,21.875
tr_loss,5.61407
val_accuracy,28.125
val_loss,5.89666


[34m[1mwandb[0m: Agent Starting Run: sd11gknk with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


2625.0


100%|██████████| 2625/2625 [00:13<00:00, 198.49it/s]
100%|██████████| 2625/2625 [00:13<00:00, 194.14it/s]
100%|██████████| 2625/2625 [00:13<00:00, 201.41it/s]
100%|██████████| 2625/2625 [00:13<00:00, 195.66it/s]
100%|██████████| 2625/2625 [00:11<00:00, 219.62it/s]
100%|██████████| 2625/2625 [00:10<00:00, 247.29it/s]
100%|██████████| 2625/2625 [00:11<00:00, 222.35it/s]
100%|██████████| 2625/2625 [00:12<00:00, 208.45it/s]
100%|██████████| 2625/2625 [00:12<00:00, 205.37it/s]
100%|██████████| 2625/2625 [00:12<00:00, 202.39it/s]


0,1
tr_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
tr_accuracy,18.75
tr_loss,
val_accuracy,6.25
val_loss,


[34m[1mwandb[0m: Agent Starting Run: l8azkyvr with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


2625.0


100%|██████████| 2625/2625 [00:10<00:00, 242.57it/s]
100%|██████████| 2625/2625 [00:10<00:00, 243.83it/s]
100%|██████████| 2625/2625 [00:08<00:00, 311.82it/s]
100%|██████████| 2625/2625 [00:10<00:00, 261.62it/s]
100%|██████████| 2625/2625 [00:10<00:00, 241.57it/s]


0,1
tr_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
tr_accuracy,18.75
tr_loss,
val_accuracy,6.25
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 3oftfdje with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


656.0


100%|██████████| 656/656 [00:08<00:00, 77.76it/s] 
100%|██████████| 656/656 [00:06<00:00, 109.22it/s]
100%|██████████| 656/656 [00:09<00:00, 71.88it/s] 
100%|██████████| 656/656 [00:06<00:00, 108.12it/s]
100%|██████████| 656/656 [00:08<00:00, 80.58it/s] 


0,1
tr_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
tr_accuracy,12.5
tr_loss,
val_accuracy,6.25
val_loss,


[34m[1mwandb[0m: Agent Starting Run: f0yjmylb with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: Xavier


2625.0


100%|██████████| 2625/2625 [00:04<00:00, 596.32it/s]
100%|██████████| 2625/2625 [00:03<00:00, 662.83it/s]
100%|██████████| 2625/2625 [00:06<00:00, 418.62it/s]
100%|██████████| 2625/2625 [00:03<00:00, 671.72it/s]
100%|██████████| 2625/2625 [00:05<00:00, 476.26it/s]


0,1
tr_accuracy,▁▁███
tr_loss,█▄▃▂▁
val_accuracy,▁████
val_loss,█▄▃▂▁

0,1
tr_accuracy,93.75
tr_loss,0.61813
val_accuracy,81.25
val_loss,0.62192


[34m[1mwandb[0m: Agent Starting Run: du3uu4ns with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: Xavier


2625.0


100%|██████████| 2625/2625 [00:05<00:00, 518.62it/s]
100%|██████████| 2625/2625 [00:07<00:00, 368.89it/s]
100%|██████████| 2625/2625 [00:04<00:00, 539.86it/s]
100%|██████████| 2625/2625 [00:07<00:00, 349.31it/s]
100%|██████████| 2625/2625 [00:04<00:00, 526.60it/s]


0,1
tr_accuracy,▁▁▁█▅
tr_loss,█▄▃▂▁
val_accuracy,▁▁▁██
val_loss,█▄▃▂▁

0,1
tr_accuracy,93.75
tr_loss,0.46831
val_accuracy,93.75
val_loss,0.48029


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: qgr6z6xp with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


656.0


100%|██████████| 656/656 [00:03<00:00, 202.14it/s]
100%|██████████| 656/656 [00:01<00:00, 348.84it/s]
100%|██████████| 656/656 [00:01<00:00, 376.45it/s]
100%|██████████| 656/656 [00:02<00:00, 312.79it/s]
100%|██████████| 656/656 [00:01<00:00, 393.89it/s]
100%|██████████| 656/656 [00:02<00:00, 225.85it/s]
100%|██████████| 656/656 [00:02<00:00, 242.62it/s]
100%|██████████| 656/656 [00:01<00:00, 392.29it/s]
100%|██████████| 656/656 [00:01<00:00, 387.14it/s]
100%|██████████| 656/656 [00:01<00:00, 394.06it/s]


0,1
tr_accuracy,▂▁▂▃▂▄▇▆██
tr_loss,█▇▆▅▄▃▃▂▁▁
val_accuracy,▁▃▅▅▅▅▇▇█▇
val_loss,█▇▆▅▄▃▃▂▁▁

0,1
tr_accuracy,20.3125
tr_loss,5.54073
val_accuracy,20.3125
val_loss,5.56008


[34m[1mwandb[0m: Agent Starting Run: 9lneiw27 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: Xavier


1312.0


100%|██████████| 1312/1312 [00:02<00:00, 549.33it/s]
100%|██████████| 1312/1312 [00:02<00:00, 589.52it/s]
100%|██████████| 1312/1312 [00:04<00:00, 307.35it/s]
100%|██████████| 1312/1312 [00:02<00:00, 448.35it/s]
100%|██████████| 1312/1312 [00:02<00:00, 597.42it/s]
100%|██████████| 1312/1312 [00:02<00:00, 591.84it/s]
100%|██████████| 1312/1312 [00:02<00:00, 578.50it/s]
100%|██████████| 1312/1312 [00:04<00:00, 308.23it/s]
100%|██████████| 1312/1312 [00:02<00:00, 508.84it/s]
100%|██████████| 1312/1312 [00:02<00:00, 576.58it/s]


0,1
tr_accuracy,▁▂▂▆▇▆▆▇██
tr_loss,█▅▄▂▂▂▂▂▁▁
val_accuracy,▁▅██▇▅▅▅▇▇
val_loss,█▅▄▂▁▁▂▂▁▁

0,1
tr_accuracy,90.625
tr_loss,0.38838
val_accuracy,90.625
val_loss,0.43763


[34m[1mwandb[0m: Agent Starting Run: yaejbaer with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


1312.0


100%|██████████| 1312/1312 [00:04<00:00, 315.26it/s]
100%|██████████| 1312/1312 [00:04<00:00, 295.19it/s]
100%|██████████| 1312/1312 [00:05<00:00, 248.59it/s]
100%|██████████| 1312/1312 [00:04<00:00, 308.48it/s]
100%|██████████| 1312/1312 [00:07<00:00, 171.94it/s]
100%|██████████| 1312/1312 [00:04<00:00, 305.24it/s]
100%|██████████| 1312/1312 [00:05<00:00, 247.37it/s]
100%|██████████| 1312/1312 [00:05<00:00, 250.28it/s]
100%|██████████| 1312/1312 [00:04<00:00, 308.11it/s]
100%|██████████| 1312/1312 [00:07<00:00, 187.33it/s]


0,1
tr_accuracy,▂▇▅█▂▂▃▂▄▁
tr_loss,▆▁▄▂█▃▅▅▃▇
val_accuracy,▄█▇▆▄▄▁▄▆▇
val_loss,▆▁▄▂█▃▅▅▃▇

0,1
tr_accuracy,3.125
tr_loss,16.14733
val_accuracy,25.0
val_loss,16.14707


[34m[1mwandb[0m: Agent Starting Run: c14dd1sj with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


1312.0


100%|██████████| 1312/1312 [00:07<00:00, 176.31it/s]
100%|██████████| 1312/1312 [00:04<00:00, 294.43it/s]
100%|██████████| 1312/1312 [00:06<00:00, 201.98it/s]
100%|██████████| 1312/1312 [00:04<00:00, 304.40it/s]
100%|██████████| 1312/1312 [00:04<00:00, 291.86it/s]


0,1
tr_accuracy,▄▁▆▄█
tr_loss,█▄▂▂▁
val_accuracy,▃▁▅█▆
val_loss,█▄▃▂▁

0,1
tr_accuracy,84.375
tr_loss,0.86039
val_accuracy,68.75
val_loss,0.8761


[34m[1mwandb[0m: Agent Starting Run: 1tv6hssn with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: Xavier


1312.0


100%|██████████| 1312/1312 [00:01<00:00, 688.68it/s]
100%|██████████| 1312/1312 [00:03<00:00, 432.83it/s]
100%|██████████| 1312/1312 [00:04<00:00, 280.98it/s]
100%|██████████| 1312/1312 [00:02<00:00, 500.12it/s]
100%|██████████| 1312/1312 [00:02<00:00, 520.53it/s]
100%|██████████| 1312/1312 [00:02<00:00, 511.50it/s]
100%|██████████| 1312/1312 [00:04<00:00, 270.79it/s]
100%|██████████| 1312/1312 [00:02<00:00, 518.36it/s]
100%|██████████| 1312/1312 [00:02<00:00, 534.40it/s]
100%|██████████| 1312/1312 [00:02<00:00, 547.78it/s]


0,1
tr_accuracy,▅▅▄▅▇▁▅▄██
tr_loss,█▅▄▁▁▄▁▃▁▂
val_accuracy,▁▁▃▆▃▆▁▆█▃
val_loss,█▅▅▁▂▆▃▆▅▇

0,1
tr_accuracy,90.625
tr_loss,0.43559
val_accuracy,90.625
val_loss,0.50172


[34m[1mwandb[0m: Agent Starting Run: ebpkj15c with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: Xavier


1312.0


100%|██████████| 1312/1312 [00:03<00:00, 371.10it/s]
100%|██████████| 1312/1312 [00:03<00:00, 388.09it/s]
100%|██████████| 1312/1312 [00:05<00:00, 219.53it/s]
100%|██████████| 1312/1312 [00:03<00:00, 383.62it/s]
100%|██████████| 1312/1312 [00:03<00:00, 382.54it/s]


0,1
tr_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
tr_accuracy,15.625
tr_loss,
val_accuracy,6.25
val_loss,


[34m[1mwandb[0m: Agent Starting Run: m228mxih with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


1312.0


100%|██████████| 1312/1312 [00:04<00:00, 269.56it/s]
100%|██████████| 1312/1312 [00:06<00:00, 207.21it/s]
100%|██████████| 1312/1312 [00:04<00:00, 277.18it/s]
100%|██████████| 1312/1312 [00:07<00:00, 182.73it/s]
100%|██████████| 1312/1312 [00:04<00:00, 289.79it/s]
100%|██████████| 1312/1312 [00:07<00:00, 179.47it/s]
100%|██████████| 1312/1312 [00:04<00:00, 279.17it/s]
100%|██████████| 1312/1312 [00:07<00:00, 174.21it/s]
100%|██████████| 1312/1312 [00:04<00:00, 277.36it/s]
100%|██████████| 1312/1312 [00:06<00:00, 191.24it/s]


0,1
tr_accuracy,▁▁▂▂▂▅▅▇▇█
tr_loss,█▅▃▂▂▂▁▁▁▁
val_accuracy,▁▁█▆▆▆▆▆▆▆
val_loss,█▅▃▂▂▂▁▁▁▁

0,1
tr_accuracy,87.5
tr_loss,0.54403
val_accuracy,87.5
val_loss,0.54872


[34m[1mwandb[0m: Agent Starting Run: l3hgwqzd with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


656.0


100%|██████████| 656/656 [00:02<00:00, 280.67it/s]
100%|██████████| 656/656 [00:03<00:00, 167.57it/s]
100%|██████████| 656/656 [00:02<00:00, 298.40it/s]
100%|██████████| 656/656 [00:02<00:00, 327.51it/s]
100%|██████████| 656/656 [00:02<00:00, 308.27it/s]


0,1
tr_accuracy,▁▁▁▁▁
tr_loss,█▅▃▂▁
val_accuracy,▁▁▁▁▁
val_loss,█▅▃▂▁

0,1
tr_accuracy,14.0625
tr_loss,2.3195
val_accuracy,12.5
val_loss,2.31941


[34m[1mwandb[0m: Agent Starting Run: zcglbmg1 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


656.0


100%|██████████| 656/656 [00:02<00:00, 294.54it/s]
100%|██████████| 656/656 [00:02<00:00, 304.33it/s]
100%|██████████| 656/656 [00:02<00:00, 303.42it/s]
100%|██████████| 656/656 [00:03<00:00, 210.40it/s]
100%|██████████| 656/656 [00:03<00:00, 170.58it/s]
100%|██████████| 656/656 [00:02<00:00, 291.23it/s]
100%|██████████| 656/656 [00:02<00:00, 299.66it/s]
100%|██████████| 656/656 [00:02<00:00, 282.99it/s]
100%|██████████| 656/656 [00:04<00:00, 153.92it/s]
100%|██████████| 656/656 [00:02<00:00, 273.88it/s]


0,1
tr_accuracy,▁▂▄▅▅▇▇▇██
tr_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▃▄▅▆▇▇▇██
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
tr_accuracy,56.25
tr_loss,1.25733
val_accuracy,57.8125
val_loss,1.28038


[34m[1mwandb[0m: Agent Starting Run: 3jcv9ssd with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


656.0


100%|██████████| 656/656 [00:05<00:00, 125.61it/s]
100%|██████████| 656/656 [00:03<00:00, 189.75it/s]
100%|██████████| 656/656 [00:03<00:00, 206.38it/s]
100%|██████████| 656/656 [00:03<00:00, 167.15it/s]
100%|██████████| 656/656 [00:04<00:00, 131.75it/s]


0,1
tr_accuracy,▁▄▇██
tr_loss,█▄▂▁▁
val_accuracy,▁▄▇▇█
val_loss,█▄▂▁▁

0,1
tr_accuracy,67.1875
tr_loss,1.03192
val_accuracy,70.3125
val_loss,1.03854


[34m[1mwandb[0m: Agent Starting Run: efr2mwmy with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


1312.0


100%|██████████| 1312/1312 [00:04<00:00, 301.22it/s]
100%|██████████| 1312/1312 [00:05<00:00, 250.67it/s]
100%|██████████| 1312/1312 [00:03<00:00, 380.49it/s]
100%|██████████| 1312/1312 [00:03<00:00, 362.65it/s]
100%|██████████| 1312/1312 [00:06<00:00, 214.62it/s]
100%|██████████| 1312/1312 [00:03<00:00, 383.85it/s]
100%|██████████| 1312/1312 [00:03<00:00, 383.58it/s]
100%|██████████| 1312/1312 [00:05<00:00, 238.24it/s]
100%|██████████| 1312/1312 [00:04<00:00, 324.60it/s]
100%|██████████| 1312/1312 [00:03<00:00, 374.35it/s]


0,1
tr_accuracy,▁▃▆▆█▆▆▆▆▆
tr_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▁▅▅▅▅▅▅▆█
val_loss,█▅▃▃▂▂▂▁▁▁

0,1
tr_accuracy,90.625
tr_loss,0.42027
val_accuracy,96.875
val_loss,0.4567


[34m[1mwandb[0m: Agent Starting Run: e2u7odbb with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: Xavier


2625.0


100%|██████████| 2625/2625 [00:03<00:00, 718.04it/s]
100%|██████████| 2625/2625 [00:04<00:00, 638.81it/s]
100%|██████████| 2625/2625 [00:05<00:00, 445.79it/s]
100%|██████████| 2625/2625 [00:03<00:00, 757.47it/s]
100%|██████████| 2625/2625 [00:03<00:00, 741.52it/s]


0,1
tr_accuracy,▁█▁▃▁
tr_loss,▃▁▄█▄
val_accuracy,██▅▁▁
val_loss,▃▁▄█▄

0,1
tr_accuracy,6.25
tr_loss,6.09492
val_accuracy,0.0
val_loss,6.12421


[34m[1mwandb[0m: Agent Starting Run: omrsnhe5 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


1312.0


100%|██████████| 1312/1312 [00:13<00:00, 98.54it/s] 
100%|██████████| 1312/1312 [00:12<00:00, 103.96it/s]
100%|██████████| 1312/1312 [00:13<00:00, 99.07it/s]
100%|██████████| 1312/1312 [00:13<00:00, 99.05it/s]
100%|██████████| 1312/1312 [00:12<00:00, 102.53it/s]
100%|██████████| 1312/1312 [00:12<00:00, 106.20it/s]
100%|██████████| 1312/1312 [00:12<00:00, 109.09it/s]
100%|██████████| 1312/1312 [00:12<00:00, 107.43it/s]
100%|██████████| 1312/1312 [00:12<00:00, 105.93it/s]
100%|██████████| 1312/1312 [00:11<00:00, 109.62it/s]


0,1
tr_accuracy,▁▆▆▆██████
tr_loss,█▅▃▃▂▂▂▁▁▁
val_accuracy,▁▁▁▁▁▁▁███
val_loss,█▄▃▃▂▂▂▁▁▁

0,1
tr_accuracy,81.25
tr_loss,0.49403
val_accuracy,93.75
val_loss,0.50018


[34m[1mwandb[0m: Agent Starting Run: 6u0k0go9 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: Xavier


656.0


100%|██████████| 656/656 [00:06<00:00, 101.51it/s]
100%|██████████| 656/656 [00:05<00:00, 127.04it/s]
100%|██████████| 656/656 [00:06<00:00, 95.75it/s]
100%|██████████| 656/656 [00:05<00:00, 120.52it/s]
100%|██████████| 656/656 [00:08<00:00, 79.39it/s]


0,1
tr_accuracy,▁▅▇▇█
tr_loss,█▃▂▁▁
val_accuracy,▁▅▇██
val_loss,█▃▂▁▁

0,1
tr_accuracy,87.5
tr_loss,0.44045
val_accuracy,89.0625
val_loss,0.45586


[34m[1mwandb[0m: Agent Starting Run: 7lejz3jz with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


2625.0


100%|██████████| 2625/2625 [00:10<00:00, 239.15it/s]
100%|██████████| 2625/2625 [00:11<00:00, 231.53it/s]
100%|██████████| 2625/2625 [00:08<00:00, 325.84it/s]
100%|██████████| 2625/2625 [00:10<00:00, 246.21it/s]
100%|██████████| 2625/2625 [00:10<00:00, 241.47it/s]


0,1
tr_accuracy,▁▁▁▁▁
tr_loss,█▅▃▁▁
val_accuracy,███▁█
val_loss,█▅▃▁▁

0,1
tr_accuracy,93.75
tr_loss,0.44657
val_accuracy,93.75
val_loss,0.48081


[34m[1mwandb[0m: Agent Starting Run: z46phv3v with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


1312.0


100%|██████████| 1312/1312 [00:06<00:00, 188.93it/s]
100%|██████████| 1312/1312 [00:06<00:00, 198.56it/s]
100%|██████████| 1312/1312 [00:07<00:00, 186.58it/s]
100%|██████████| 1312/1312 [00:06<00:00, 194.48it/s]
100%|██████████| 1312/1312 [00:06<00:00, 210.43it/s]
100%|██████████| 1312/1312 [00:07<00:00, 170.01it/s]
100%|██████████| 1312/1312 [00:05<00:00, 233.46it/s]
100%|██████████| 1312/1312 [00:07<00:00, 172.05it/s]
100%|██████████| 1312/1312 [00:05<00:00, 242.70it/s]
100%|██████████| 1312/1312 [00:08<00:00, 154.43it/s]


0,1
tr_accuracy,▁▃▄▅▇▇▇▇▇█
tr_loss,█▄▃▂▂▂▁▁▁▁
val_accuracy,▁▅▇▇████▇▇
val_loss,█▄▃▂▂▂▁▁▁▁

0,1
tr_accuracy,81.25
tr_loss,0.5795
val_accuracy,75.0
val_loss,0.59306


[34m[1mwandb[0m: Agent Starting Run: kjixjc11 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


656.0


100%|██████████| 656/656 [00:01<00:00, 378.80it/s]
100%|██████████| 656/656 [00:01<00:00, 345.05it/s]
100%|██████████| 656/656 [00:00<00:00, 701.77it/s]
100%|██████████| 656/656 [00:00<00:00, 707.11it/s]
100%|██████████| 656/656 [00:00<00:00, 662.93it/s]


0,1
tr_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
tr_accuracy,12.5
tr_loss,
val_accuracy,6.25
val_loss,


[34m[1mwandb[0m: Agent Starting Run: djn53i5q with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


656.0


100%|██████████| 656/656 [00:02<00:00, 304.15it/s]
100%|██████████| 656/656 [00:02<00:00, 284.74it/s]
100%|██████████| 656/656 [00:01<00:00, 346.41it/s]
100%|██████████| 656/656 [00:01<00:00, 349.01it/s]
100%|██████████| 656/656 [00:03<00:00, 187.57it/s]


0,1
tr_accuracy,▁▁▁▁▁
tr_loss,█▅▃▂▁
val_accuracy,▁▁▁▁▁
val_loss,█▅▃▂▁

0,1
tr_accuracy,14.0625
tr_loss,2.31054
val_accuracy,12.5
val_loss,2.30984


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: o1x14ghd with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


1312.0


100%|██████████| 1312/1312 [00:10<00:00, 126.63it/s]
100%|██████████| 1312/1312 [00:09<00:00, 136.66it/s]
100%|██████████| 1312/1312 [00:11<00:00, 114.77it/s]
100%|██████████| 1312/1312 [00:11<00:00, 110.05it/s]
100%|██████████| 1312/1312 [00:12<00:00, 104.95it/s]
100%|██████████| 1312/1312 [00:11<00:00, 110.81it/s]
100%|██████████| 1312/1312 [00:10<00:00, 124.29it/s]
100%|██████████| 1312/1312 [00:09<00:00, 132.81it/s]
100%|██████████| 1312/1312 [00:11<00:00, 112.39it/s]
100%|██████████| 1312/1312 [00:12<00:00, 108.28it/s]


0,1
tr_accuracy,▁▁▁▄▄▅▂▇██
tr_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▅▅▅▅▅█▅██
val_loss,█▄▃▂▁▁▁▁▁▂

0,1
tr_accuracy,93.75
tr_loss,0.36908
val_accuracy,90.625
val_loss,0.48778


[34m[1mwandb[0m: Agent Starting Run: sij8qs0u with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


656.0


100%|██████████| 656/656 [00:10<00:00, 64.23it/s]
100%|██████████| 656/656 [00:10<00:00, 60.84it/s]
100%|██████████| 656/656 [00:10<00:00, 61.75it/s]
100%|██████████| 656/656 [00:09<00:00, 67.11it/s]
100%|██████████| 656/656 [00:08<00:00, 79.86it/s]
100%|██████████| 656/656 [00:09<00:00, 68.89it/s]
100%|██████████| 656/656 [00:10<00:00, 62.44it/s]
100%|██████████| 656/656 [00:10<00:00, 61.45it/s]
100%|██████████| 656/656 [00:10<00:00, 63.10it/s]
100%|██████████| 656/656 [00:08<00:00, 72.93it/s]


0,1
tr_accuracy,▁▃▅█▇▅▅▇▇▇
tr_loss,█▃▂▁▁▁▁▁▁▁
val_accuracy,▁▅▄▇▆▇▇███
val_loss,█▃▂▁▁▁▁▁▁▁

0,1
tr_accuracy,87.5
tr_loss,0.46862
val_accuracy,90.625
val_loss,0.47683


[34m[1mwandb[0m: Agent Starting Run: 89rtpnkh with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: Xavier


2625.0


100%|██████████| 2625/2625 [00:05<00:00, 474.84it/s]
100%|██████████| 2625/2625 [00:05<00:00, 508.70it/s]
100%|██████████| 2625/2625 [00:04<00:00, 585.76it/s]
100%|██████████| 2625/2625 [00:06<00:00, 382.85it/s]
100%|██████████| 2625/2625 [00:04<00:00, 623.35it/s]


0,1
tr_accuracy,▁████
tr_loss,█▄▂▂▁
val_accuracy,▁████
val_loss,█▄▂▂▁

0,1
tr_accuracy,93.75
tr_loss,0.42687
val_accuracy,93.75
val_loss,0.4358


[34m[1mwandb[0m: Agent Starting Run: gc6361fh with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


2625.0


100%|██████████| 2625/2625 [00:24<00:00, 108.78it/s]
100%|██████████| 2625/2625 [00:24<00:00, 106.69it/s]
100%|██████████| 2625/2625 [00:24<00:00, 108.50it/s]
100%|██████████| 2625/2625 [00:23<00:00, 110.89it/s]
100%|██████████| 2625/2625 [00:24<00:00, 105.83it/s]
100%|██████████| 2625/2625 [00:24<00:00, 106.24it/s]
100%|██████████| 2625/2625 [00:24<00:00, 107.97it/s]
100%|██████████| 2625/2625 [00:23<00:00, 113.90it/s]
100%|██████████| 2625/2625 [00:24<00:00, 107.07it/s]
100%|██████████| 2625/2625 [00:26<00:00, 99.56it/s] 


0,1
tr_accuracy,▆█▇█▇▇▄▄▁▁
tr_loss,▃▂▁▁▂▃▅▆██
val_accuracy,▅▅▆▇█▆▅▁▁▁
val_loss,▃▂▁▁▂▃▅▆██

0,1
tr_accuracy,6.25
tr_loss,2.00366
val_accuracy,25.0
val_loss,2.00491


[34m[1mwandb[0m: Agent Starting Run: ax6v4ug2 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


1312.0


100%|██████████| 1312/1312 [00:04<00:00, 313.12it/s]
100%|██████████| 1312/1312 [00:04<00:00, 274.59it/s]
100%|██████████| 1312/1312 [00:06<00:00, 194.44it/s]
100%|██████████| 1312/1312 [00:04<00:00, 320.34it/s]
100%|██████████| 1312/1312 [00:04<00:00, 299.47it/s]
100%|██████████| 1312/1312 [00:06<00:00, 200.50it/s]
100%|██████████| 1312/1312 [00:04<00:00, 295.56it/s]
100%|██████████| 1312/1312 [00:06<00:00, 194.86it/s]
100%|██████████| 1312/1312 [00:04<00:00, 287.29it/s]
100%|██████████| 1312/1312 [00:04<00:00, 294.02it/s]


0,1
tr_accuracy,▅▅▅▁▅▅▅▅█▅
tr_loss,█▆▅▄▃▃▂▁▁▁
val_accuracy,▁▆▆▆▆▆████
val_loss,█▆▅▄▄▃▂▁▁▁

0,1
tr_accuracy,81.25
tr_loss,0.50371
val_accuracy,90.625
val_loss,0.50911


[34m[1mwandb[0m: Agent Starting Run: 5d8xsc0c with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


656.0


100%|██████████| 656/656 [00:08<00:00, 72.97it/s]
100%|██████████| 656/656 [00:08<00:00, 74.77it/s]
100%|██████████| 656/656 [00:10<00:00, 60.57it/s]
100%|██████████| 656/656 [00:11<00:00, 55.77it/s]
100%|██████████| 656/656 [00:11<00:00, 58.10it/s]
100%|██████████| 656/656 [00:08<00:00, 74.77it/s]
100%|██████████| 656/656 [00:07<00:00, 82.09it/s]
100%|██████████| 656/656 [00:10<00:00, 60.11it/s]
100%|██████████| 656/656 [00:11<00:00, 58.25it/s]
100%|██████████| 656/656 [00:10<00:00, 60.67it/s]


0,1
tr_accuracy,▁▅▅▅▆▆▇▇▇█
tr_loss,█▄▃▃▂▂▁▁▁▁
val_accuracy,▁▄▇█▇▇▆▆▆▆
val_loss,█▄▃▂▂▂▁▁▁▁

0,1
tr_accuracy,89.0625
tr_loss,0.56685
val_accuracy,78.125
val_loss,0.61252


[34m[1mwandb[0m: Agent Starting Run: piya366x with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: Xavier


1312.0


100%|██████████| 1312/1312 [00:04<00:00, 308.50it/s]
100%|██████████| 1312/1312 [00:02<00:00, 528.90it/s]
100%|██████████| 1312/1312 [00:02<00:00, 586.95it/s]
100%|██████████| 1312/1312 [00:02<00:00, 570.36it/s]
100%|██████████| 1312/1312 [00:02<00:00, 491.77it/s]
100%|██████████| 1312/1312 [00:04<00:00, 321.53it/s]
100%|██████████| 1312/1312 [00:02<00:00, 570.26it/s]
100%|██████████| 1312/1312 [00:02<00:00, 485.45it/s]
100%|██████████| 1312/1312 [00:02<00:00, 568.88it/s]
100%|██████████| 1312/1312 [00:03<00:00, 330.61it/s]


0,1
tr_accuracy,▁▁▁▁▁▁▁▁▁▁
tr_loss,█▅▄▃▂▂▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,█▅▃▂▂▁▁▁▁▁

0,1
tr_accuracy,15.625
tr_loss,2.30237
val_accuracy,3.125
val_loss,2.30221


[34m[1mwandb[0m: Agent Starting Run: tpvt4pm8 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


1312.0


100%|██████████| 1312/1312 [00:02<00:00, 460.90it/s]
100%|██████████| 1312/1312 [00:03<00:00, 364.47it/s]
100%|██████████| 1312/1312 [00:04<00:00, 315.89it/s]
100%|██████████| 1312/1312 [00:03<00:00, 424.82it/s]
100%|██████████| 1312/1312 [00:02<00:00, 481.09it/s]


0,1
tr_accuracy,▁▁▅▅█
tr_loss,█▄▂▂▁
val_accuracy,▁▅▆██
val_loss,█▄▃▂▁

0,1
tr_accuracy,62.5
tr_loss,0.89854
val_accuracy,78.125
val_loss,0.90965


[34m[1mwandb[0m: Agent Starting Run: cugv5onm with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: Xavier


656.0


100%|██████████| 656/656 [00:01<00:00, 488.62it/s]
100%|██████████| 656/656 [00:01<00:00, 477.58it/s]
100%|██████████| 656/656 [00:01<00:00, 490.71it/s]
100%|██████████| 656/656 [00:01<00:00, 514.37it/s]
100%|██████████| 656/656 [00:02<00:00, 306.90it/s]


0,1
tr_accuracy,▁▁▁▁▁
tr_loss,▁▁▁▁▁
val_accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
tr_accuracy,7.8125
tr_loss,2.30495
val_accuracy,7.8125
val_loss,2.30521


[34m[1mwandb[0m: Agent Starting Run: 3oq5j926 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


2625.0


100%|██████████| 2625/2625 [00:08<00:00, 298.37it/s]
100%|██████████| 2625/2625 [00:06<00:00, 388.63it/s]
100%|██████████| 2625/2625 [00:09<00:00, 271.91it/s]
100%|██████████| 2625/2625 [00:06<00:00, 392.73it/s]
100%|██████████| 2625/2625 [00:08<00:00, 301.04it/s]
100%|██████████| 2625/2625 [00:07<00:00, 344.02it/s]
100%|██████████| 2625/2625 [00:06<00:00, 387.34it/s]
100%|██████████| 2625/2625 [00:09<00:00, 279.80it/s]
100%|██████████| 2625/2625 [00:06<00:00, 400.47it/s]
100%|██████████| 2625/2625 [00:09<00:00, 287.83it/s]


0,1
tr_accuracy,▃▆▃▁▃▃▆██▃
tr_loss,█▃▄▄▁▃▇▂▂▅
val_accuracy,▁▁███▅▁▅▅█
val_loss,█▃▄▄▁▃▇▂▂▅

0,1
tr_accuracy,6.25
tr_loss,20.70581
val_accuracy,12.5
val_loss,20.67569


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5zjcggaf with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


1312.0


100%|██████████| 1312/1312 [00:03<00:00, 342.72it/s]
100%|██████████| 1312/1312 [00:06<00:00, 200.39it/s]
100%|██████████| 1312/1312 [00:03<00:00, 333.81it/s]
100%|██████████| 1312/1312 [00:04<00:00, 271.86it/s]
100%|██████████| 1312/1312 [00:05<00:00, 252.02it/s]
100%|██████████| 1312/1312 [00:04<00:00, 318.11it/s]
100%|██████████| 1312/1312 [00:06<00:00, 197.90it/s]
100%|██████████| 1312/1312 [00:04<00:00, 313.34it/s]
100%|██████████| 1312/1312 [00:03<00:00, 330.34it/s]
100%|██████████| 1312/1312 [00:06<00:00, 200.66it/s]


0,1
tr_accuracy,▁▃▅▅▆██▇▇█
tr_loss,█▄▃▂▂▁▁▁▁▁
val_accuracy,▁▄▃▄▅▅▇▇▇█
val_loss,█▄▃▂▂▁▁▁▁▁

0,1
tr_accuracy,31.25
tr_loss,2.31815
val_accuracy,28.125
val_loss,2.32794


[34m[1mwandb[0m: Agent Starting Run: y4em07kp with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: Xavier


656.0


100%|██████████| 656/656 [00:07<00:00, 90.98it/s]
100%|██████████| 656/656 [00:05<00:00, 119.73it/s]
100%|██████████| 656/656 [00:07<00:00, 82.54it/s]
100%|██████████| 656/656 [00:04<00:00, 135.21it/s]
100%|██████████| 656/656 [00:07<00:00, 89.07it/s]


0,1
tr_accuracy,▁▄███
tr_loss,█▆▅▃▁
val_accuracy,▁▅▇██
val_loss,█▆▅▃▁

0,1
tr_accuracy,31.25
tr_loss,2.17737
val_accuracy,46.875
val_loss,2.17698


[34m[1mwandb[0m: Agent Starting Run: xdhgn9p7 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: Xavier


2625.0


100%|██████████| 2625/2625 [00:09<00:00, 287.75it/s]
100%|██████████| 2625/2625 [00:14<00:00, 179.27it/s]
100%|██████████| 2625/2625 [00:14<00:00, 181.40it/s]
100%|██████████| 2625/2625 [00:12<00:00, 202.80it/s]
100%|██████████| 2625/2625 [00:14<00:00, 181.50it/s]


0,1
tr_accuracy,▁█▁██
tr_loss,▄▁▆▅█
val_accuracy,▁▅█▅▁
val_loss,▂▁▅▆█

0,1
tr_accuracy,100.0
tr_loss,0.49533
val_accuracy,81.25
val_loss,0.54985


[34m[1mwandb[0m: Agent Starting Run: mw7uvhgu with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


2625.0


100%|██████████| 2625/2625 [00:16<00:00, 162.03it/s]
100%|██████████| 2625/2625 [00:16<00:00, 156.71it/s]
100%|██████████| 2625/2625 [00:15<00:00, 164.85it/s]
100%|██████████| 2625/2625 [00:16<00:00, 163.17it/s]
100%|██████████| 2625/2625 [00:16<00:00, 159.80it/s]


0,1
tr_accuracy,▁▃▅▇█
tr_loss,█▄▃▂▁
val_accuracy,▁▃█▅▇
val_loss,█▄▃▂▁

0,1
tr_accuracy,75.0
tr_loss,2.77238
val_accuracy,68.75
val_loss,3.02643


[34m[1mwandb[0m: Agent Starting Run: 73cygps7 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


1312.0


100%|██████████| 1312/1312 [00:07<00:00, 184.20it/s]
100%|██████████| 1312/1312 [00:04<00:00, 281.35it/s]
100%|██████████| 1312/1312 [00:05<00:00, 250.99it/s]
100%|██████████| 1312/1312 [00:05<00:00, 243.08it/s]
100%|██████████| 1312/1312 [00:04<00:00, 300.46it/s]


0,1
tr_accuracy,▁▅▅▅█
tr_loss,█▇▅▂▁
val_accuracy,▅█▁▁▁
val_loss,█▇▅▂▁

0,1
tr_accuracy,84.375
tr_loss,0.536
val_accuracy,90.625
val_loss,0.54017


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: zuuuyl7v with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


656.0


100%|██████████| 656/656 [00:04<00:00, 147.22it/s]
100%|██████████| 656/656 [00:02<00:00, 234.67it/s]
100%|██████████| 656/656 [00:02<00:00, 228.94it/s]
100%|██████████| 656/656 [00:03<00:00, 211.64it/s]
100%|██████████| 656/656 [00:04<00:00, 152.18it/s]
100%|██████████| 656/656 [00:02<00:00, 234.16it/s]
100%|██████████| 656/656 [00:02<00:00, 232.41it/s]
100%|██████████| 656/656 [00:03<00:00, 165.67it/s]
100%|██████████| 656/656 [00:04<00:00, 148.85it/s]
100%|██████████| 656/656 [00:02<00:00, 232.91it/s]


0,1
tr_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
tr_accuracy,12.5
tr_loss,
val_accuracy,6.25
val_loss,


[34m[1mwandb[0m: Agent Starting Run: pc2maa13 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


2625.0


100%|██████████| 2625/2625 [00:06<00:00, 382.35it/s]
100%|██████████| 2625/2625 [00:05<00:00, 502.94it/s]
100%|██████████| 2625/2625 [00:07<00:00, 363.18it/s]
100%|██████████| 2625/2625 [00:04<00:00, 562.43it/s]
100%|██████████| 2625/2625 [00:07<00:00, 335.25it/s]


0,1
tr_accuracy,▁▇██▇
tr_loss,█▄▂▁▁
val_accuracy,▁▂▆▇█
val_loss,█▄▂▁▁

0,1
tr_accuracy,62.5
tr_loss,0.9165
val_accuracy,62.5
val_loss,0.92693


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 554x2c6i with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


2625.0


100%|██████████| 2625/2625 [00:20<00:00, 130.78it/s]
100%|██████████| 2625/2625 [00:22<00:00, 115.55it/s]
100%|██████████| 2625/2625 [00:20<00:00, 127.89it/s]
100%|██████████| 2625/2625 [00:21<00:00, 122.66it/s]
100%|██████████| 2625/2625 [00:23<00:00, 109.76it/s]


0,1
tr_accuracy,▁▃▃▃█
tr_loss,▅█▁▅▅
val_accuracy,▃▃█▁▁
val_loss,▅█▁▅▅

0,1
tr_accuracy,18.75
tr_loss,26.80601
val_accuracy,6.25
val_loss,26.79158


[34m[1mwandb[0m: Agent Starting Run: wdggnhbz with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


656.0


100%|██████████| 656/656 [00:03<00:00, 202.82it/s]
100%|██████████| 656/656 [00:02<00:00, 256.86it/s]
100%|██████████| 656/656 [00:01<00:00, 398.30it/s]
100%|██████████| 656/656 [00:02<00:00, 321.57it/s]
100%|██████████| 656/656 [00:01<00:00, 355.48it/s]
100%|██████████| 656/656 [00:01<00:00, 354.82it/s]
100%|██████████| 656/656 [00:03<00:00, 200.14it/s]
100%|██████████| 656/656 [00:02<00:00, 267.92it/s]
100%|██████████| 656/656 [00:01<00:00, 367.23it/s]
100%|██████████| 656/656 [00:01<00:00, 366.03it/s]


0,1
tr_accuracy,▁▅▄▅▅▅▅▇▇█
tr_loss,█▅▃▃▂▂▂▁▁▁
val_accuracy,▁▄▂▅▄▄▇█▇█
val_loss,█▄▃▃▂▂▂▁▁▁

0,1
tr_accuracy,92.1875
tr_loss,0.43448
val_accuracy,87.5
val_loss,0.46612


[34m[1mwandb[0m: Agent Starting Run: yydcumi0 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


656.0


100%|██████████| 656/656 [00:03<00:00, 170.73it/s]
100%|██████████| 656/656 [00:04<00:00, 133.17it/s]
100%|██████████| 656/656 [00:04<00:00, 136.45it/s]
100%|██████████| 656/656 [00:03<00:00, 183.94it/s]
100%|██████████| 656/656 [00:05<00:00, 125.65it/s]


0,1
tr_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
tr_accuracy,12.5
tr_loss,
val_accuracy,6.25
val_loss,


[34m[1mwandb[0m: Agent Starting Run: jdzjl5ci with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


1312.0


100%|██████████| 1312/1312 [00:04<00:00, 302.80it/s]
100%|██████████| 1312/1312 [00:04<00:00, 284.00it/s]
100%|██████████| 1312/1312 [00:03<00:00, 428.07it/s]
100%|██████████| 1312/1312 [00:03<00:00, 425.89it/s]
100%|██████████| 1312/1312 [00:05<00:00, 257.84it/s]


0,1
tr_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
tr_accuracy,15.625
tr_loss,
val_accuracy,6.25
val_loss,


[34m[1mwandb[0m: Agent Starting Run: smcwegpj with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


2625.0


100%|██████████| 2625/2625 [00:11<00:00, 235.50it/s]
100%|██████████| 2625/2625 [00:07<00:00, 336.14it/s]
100%|██████████| 2625/2625 [00:10<00:00, 250.84it/s]
100%|██████████| 2625/2625 [00:09<00:00, 268.60it/s]
100%|██████████| 2625/2625 [00:08<00:00, 320.27it/s]
100%|██████████| 2625/2625 [00:11<00:00, 237.63it/s]
100%|██████████| 2625/2625 [00:08<00:00, 317.11it/s]
100%|██████████| 2625/2625 [00:09<00:00, 270.68it/s]
100%|██████████| 2625/2625 [00:10<00:00, 245.31it/s]
100%|██████████| 2625/2625 [00:08<00:00, 327.33it/s]


0,1
tr_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
tr_accuracy,18.75
tr_loss,
val_accuracy,6.25
val_loss,


[34m[1mwandb[0m: Agent Starting Run: z4d76a76 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


656.0


100%|██████████| 656/656 [00:01<00:00, 340.19it/s]
100%|██████████| 656/656 [00:01<00:00, 361.78it/s]
100%|██████████| 656/656 [00:02<00:00, 260.10it/s]
100%|██████████| 656/656 [00:03<00:00, 192.61it/s]
100%|██████████| 656/656 [00:01<00:00, 346.25it/s]
100%|██████████| 656/656 [00:02<00:00, 317.35it/s]
100%|██████████| 656/656 [00:01<00:00, 357.32it/s]
100%|██████████| 656/656 [00:01<00:00, 370.75it/s]
100%|██████████| 656/656 [00:03<00:00, 218.61it/s]
100%|██████████| 656/656 [00:02<00:00, 220.67it/s]


0,1
tr_accuracy,▁█▃▃▃▃▃▃▃▃
tr_loss,▁▅████████
val_accuracy,▃█▁▁▁▁▁▁▁▁
val_loss,▁▄████████

0,1
tr_accuracy,10.9375
tr_loss,2.3026
val_accuracy,9.375
val_loss,2.30263
