In [None]:
import wandb

In [None]:
import argparse, os, logging, random, time
import numpy as np
import math
import time
import scipy.sparse
import lightgbm as lgb
import data_helpers as dh

In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

from sklearn.utils.extmath import softmax

from torch.autograd import Variable
from torch.nn.parameter import Parameter
from torch.optim import Optimizer, AdamW, SGD

import gc

In [None]:
torch.__version__

'1.10.0+cu102'

In [None]:
torchvision.__version__

'0.11.1+cu102'

In [None]:
import pdb

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

if torch.cuda.is_available():
    torch.set_default_tensor_type(torch.cuda.FloatTensor)
    type_prefix = torch.cuda
else:
    type_prefix = torch

In [None]:
def one_hot(y, numslot, mask=None):
    y_tensor = y.type(type_prefix.LongTensor).reshape(-1, 1)
    y_one_hot = torch.zeros(y_tensor.size()[0], numslot, device=device, dtype=torch.float32, requires_grad=False).scatter_(1, y_tensor, 1)
    if mask is not None:
        y_one_hot = y_one_hot * mask
    y_one_hot = y_one_hot.reshape(y.shape[0], -1)
    return y_one_hot

In [None]:
class BatchDense(nn.Module):
    def __init__(self, batch, in_features, out_features, bias_init=None):
        super(BatchDense, self).__init__()
        self.batch = batch
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.Tensor(batch, in_features, out_features))
        self.bias = Parameter(torch.Tensor(batch, 1, out_features))
        self.reset_parameters(bias_init)
    def reset_parameters(self, bias_init=None):
        stdv = math.sqrt(6.0 /(self.in_features + self.out_features))
        self.weight.data.uniform_(-stdv, stdv)
        if bias_init is not None:
            # pdb.set_trace()
            self.bias.data = torch.from_numpy(np.array(bias_init))
            
        else:
            self.bias.data.fill_(0)
    def forward(self, x):
        size = x.size()
        # Todo: avoid the swap axis
        x = x.view(x.size(0), self.batch, -1)
        out = x.transpose(0, 1).contiguous()
        out = torch.baddbmm(self.bias, out, self.weight)
        out = out.transpose(0, 1).contiguous()
        out = out.view(x.size(0), -1)
        return out

In [None]:
class EmbeddingModel(nn.Module):
    def __init__(self, n_models, max_ntree_per_split, embsize, maxleaf, n_output, out_bias=None, task='regression'):
        super(EmbeddingModel, self).__init__()
        self.task = task
        self.n_models = n_models
        self.maxleaf = maxleaf
        self.fcs = nn.ModuleList()
        self.max_ntree_per_split = max_ntree_per_split

        self.embed_w = Parameter(torch.Tensor(n_models, max_ntree_per_split*maxleaf, embsize))
        # torch.nn.init.xavier_normal(self.embed_w)
        stdv = math.sqrt(1.0 /(max_ntree_per_split))
        self.embed_w.data.normal_(0,stdv) # .uniform_(-stdv, stdv)
        
        self.bout = BatchDense(n_models, embsize, 1, out_bias)
        self.bn = nn.BatchNorm1d(embsize * n_models)
        self.tanh = nn.Tanh()
        self.sigmoid = nn.Sigmoid()
        # self.output_fc = Dense(n_models * embsize, n_output)
        self.dropout = torch.nn.Dropout()
        if task == 'regression':
            self.criterion = nn.MSELoss()
        else:
            self.criterion = nn.BCELoss()

    def batchmul(self, x, models, embed_w, length):
        out = one_hot(x, length)
        out = out.view(x.size(0), models, -1)
        out = out.transpose(0, 1).contiguous()
        out = torch.bmm(out, embed_w)
        out = out.transpose(0, 1).contiguous()
        out = out.view(x.size(0), -1)
        return out
        
    def lastlayer(self, x):
        out = self.batchmul(x, self.n_models, self.embed_w, self.maxleaf)
        out = self.bn(out)
        # out = self.tanh(out)
        # out = out.view(x.size(0), self.n_models, -1)
        return out
    def forward(self, x):
        out = self.lastlayer(x)
        out = self.dropout(out)
        out = out.view(x.size(0), self.n_models, -1)
        out = self.bout(out)
        # out = self.output_fc(out)
        sum_out = torch.sum(out,-1,True)
        if self.task != 'regression':
            return self.sigmoid(sum_out), out
        return sum_out, out
    
    def joint_loss(self, out, target, out_inner, target_inner, *args):
        return nn.MSELoss()(out_inner, target_inner)

    def true_loss(self, out, target):
        return self.criterion(out, target)

In [None]:
def eval_metrics(task, true, pred):
    if task == 'binary':
        logloss = sklearn.metrics.log_loss(true.astype(np.float64), pred.astype(np.float64))
        auc = sklearn.metrics.roc_auc_score(true, pred)
        # error = 1-sklearn.metrics.accuracy_score(true,(pred+0.5).astype(np.int32))
        return (logloss, auc)#, error)
    else:
        mseloss = sklearn.metrics.mean_squared_error(true, pred)
        return mseloss

def EvalTestset(test_x, test_y, model, test_batch_size, test_x_opt=None):
    test_len = test_x.shape[0]
    test_num_batch = math.ceil(test_len / test_batch_size)
    sum_loss = 0.0
    y_preds = []
    model.eval()
    with torch.no_grad():
        for jdx in range(test_num_batch):
            tst_st = jdx * test_batch_size
            tst_ed = min(test_len, tst_st + test_batch_size)
            inputs = torch.from_numpy(test_x[tst_st:tst_ed].astype(np.float32)).to(device)
            if test_x_opt is not None:
                inputs_opt = torch.from_numpy(test_x_opt[tst_st:tst_ed].astype(np.float32)).to(device)
                outputs = model(inputs, inputs_opt)
            else:
                outputs = model(inputs)
            targets = torch.from_numpy(test_y[tst_st:tst_ed]).to(device)
            if isinstance(outputs, tuple):
                outputs = outputs[0]
            y_preds.append(outputs)
            loss_tst = model.true_loss(outputs, targets).item()            
            sum_loss += (tst_ed - tst_st) * loss_tst
    return sum_loss / test_len, np.concatenate(y_preds, 0)

def TrainWithLog(loss_dr, loss_init, loss_de, log_freq, test_freq, task, test_batch_size,                
                train_x, train_y, 
                 train_y_opt, test_x, test_y, model, opt,
                 epoch, batch_size, n_output, key="",
                 train_x_opt=None, test_x_opt=None):
    # trn_writer = tf.summary.FileWriter(summaryPath+plot_title+key+"_output/train")
    # tst_writer = tf.summary.FileWriter(summaryPath+plot_title+key+"_output/test")
    if isinstance(test_x, scipy.sparse.csr_matrix):
        test_x = test_x.todense()
    train_len = train_x.shape[0]
    global_iter = 0
    trn_batch_size = batch_size
    train_num_batch = math.ceil(train_len / trn_batch_size)
    total_iterations = epoch * train_num_batch
    start_time = time.time()
    total_time = 0.0
    min_loss = float("Inf")
    # min_error = float("Inf")
    max_auc = 0.0
    for epoch in range(epoch):
        shuffled_indices = np.random.permutation(np.arange(train_x.shape[0]))
        Loss_trn_epoch = 0.0
        Loss_trn_log = 0.0
        log_st = 0
        for local_iter in range(train_num_batch):
            trn_st = local_iter * trn_batch_size
            trn_ed = min(train_len, trn_st + trn_batch_size)
            batch_trn_x = train_x[shuffled_indices[trn_st:trn_ed]]
            if isinstance(batch_trn_x, scipy.sparse.csr_matrix):
                batch_trn_x = batch_trn_x.todense()
            inputs = torch.from_numpy(batch_trn_x.astype(np.float32)).to(device)
            targets = torch.from_numpy(train_y[shuffled_indices[trn_st:trn_ed],:]).to(device)
            model.train()
            if train_x_opt is not None:
                inputs_opt = torch.from_numpy(train_x_opt[shuffled_indices[trn_st:trn_ed]].astype(np.float32)).to(device)
                outputs = model(inputs, inputs_opt)
            else:
                outputs = model(inputs)
            opt.zero_grad()
            if isinstance(outputs, tuple) and train_y_opt is not None:
                # targets_inner = torch.from_numpy(s_train_y_opt[trn_st:trn_ed,:]).to(device)
                targets_inner = torch.from_numpy(train_y_opt[shuffled_indices[trn_st:trn_ed],:]).to(device)
                loss_ratio = loss_init * max(0.3,loss_dr ** (epoch // loss_de))#max(0.5, args.loss_dr ** (epoch // args.loss_de))
                if len(outputs) == 3:
                    loss_val = model.joint_loss(outputs[0], targets, outputs[1], targets_inner, loss_ratio, outputs[2])
                else:
                    loss_val = model.joint_loss(outputs[0], targets, outputs[1], targets_inner, loss_ratio)
                loss_val.backward()
                loss_val = model.true_loss(outputs[0], targets)
            elif isinstance(outputs, tuple):
                loss_val = model.true_loss(outputs[0], targets)
                loss_val.backward()
            else:
                loss_val = model.true_loss(outputs, targets)
                loss_val.backward()
            opt.step()
            loss_val = loss_val.item()
            wandb.log({"batch loss":loss_val})
            global_iter += 1
            Loss_trn_epoch += (trn_ed - trn_st) * loss_val
            Loss_trn_log += (trn_ed - trn_st) * loss_val
            if global_iter % log_freq == 0:
                print(key+"Epoch-{:0>3d} {:>5d} Batches, Step {:>6d}, Training Loss: {:>9.6f} (AllAvg {:>9.6f})"
                            .format(epoch, local_iter + 1, global_iter, Loss_trn_log/(trn_ed-log_st), Loss_trn_epoch/trn_ed))
                
                # trn_summ = tf.Summary()
                # trn_summ.value.add(tag=args.data+ "/Train/Loss", simple_value = Loss_trn_log/(trn_ed-log_st))
                # trn_writer.add_summary(trn_summ, global_iter)
                log_st = trn_ed
                Loss_trn_log = 0.0
            if global_iter % test_freq == 0 or local_iter == train_num_batch - 1:
                if model == 'deepgbm' or model == 'd1':
                    try:
                        print('Alpha: '+str(model.alpha))
                        print('Beta: '+str(model.beta))
                    except:
                        pass
                # tst_summ = tf.Summary()
                torch.cuda.empty_cache()
                test_loss, pred_y = EvalTestset(test_x, test_y, model, test_batch_size, test_x_opt)
                wandb.log({"loss":test_loss})
                current_used_time = time.time() - start_time
                start_time = time.time()
                wandb.log({"createdAt":start_time})
                total_time += current_used_time
                remaining_time = (total_iterations - (global_iter) ) * (total_time / (global_iter))
                if task == 'binary':
                    metrics = eval_metrics(task, test_y, pred_y)
                    _, test_auc = metrics
                    wandb.log({"test batch auc":test_auc})
                    # min_error = min(min_error, test_error)
                    max_auc = max(max_auc, test_auc)
                    wandb.log({"test max auc":max_auc})
                    # tst_summ.value.add(tag=args.data+"/Test/Eval/Error", simple_value = test_error)
                    # tst_summ.value.add(tag=args.data+"/Test/Eval/AUC", simple_value = test_auc)
                    # tst_summ.value.add(tag=args.data+"/Test/Eval/Min_Error", simple_value = min_error)
                    # tst_summ.value.add(tag=args.data+"/Test/Eval/Max_AUC", simple_value = max_auc)
                    print(key+"Evaluate Result:\nEpoch-{:0>3d} {:>5d} Batches, Step {:>6d}, Testing Loss: {:>9.6f}, Testing AUC: {:8.6f}, Used Time: {:>5.1f}m, Remaining Time: {:5.1f}m"
                            .format(epoch, local_iter + 1, global_iter, test_loss, test_auc, total_time/60.0, remaining_time/60.0))
                else:
                    print(key+"Evaluate Result:\nEpoch-{:0>3d} {:>5d} Batches, Step {:>6d}, Testing Loss: {:>9.6f}, Used Time: {:>5.1f}m, Remaining Time: {:5.1f}m"
                            .format(epoch, local_iter + 1, global_iter, test_loss, total_time/60.0, remaining_time/60.0))
                min_loss = min(min_loss, test_loss)
                wandb.log({"test min loss": min_loss})
                # tst_summ.value.add(tag=args.data+"/Test/Loss", simple_value = test_loss)
                # tst_summ.value.add(tag=args.data+"/Test/Min_Loss", simple_value = min_loss)
                print("-------------------------------------------------------------------------------")
                # tst_writer.add_summary(tst_summ, global_iter)
                # tst_writer.flush()
        print("Best Metric: %s"%(str(max_auc) if task=='binary' else str(min_loss)))
        print("####################################################################################")
    print("Final Best Metric: %s"%(str(max_auc) if task=='binary' else str(min_loss)))
    return min_loss        

def GetEmbPred(model, fun, X, test_batch_size):
    model.eval()
    tst_len = X.shape[0]
    test_num_batch = math.ceil(tst_len / test_batch_size)
    y_preds = []
    with torch.no_grad():
        for jdx in range(test_num_batch):
            tst_st = jdx * test_batch_size
            tst_ed = min(tst_len, tst_st + test_batch_size)
            inputs = torch.from_numpy(X[tst_st:tst_ed]).to(device)
            t_preds = fun(inputs).data.cpu().numpy()
            y_preds.append(t_preds)
        y_preds = np.concatenate(y_preds, 0)
    return y_preds


In [None]:
HOME_DIR = os.getcwd()
DATA_DIR = os.path.join(HOME_DIR, 'data')

In [None]:
num_data = dh.load_data('/work/neurotrees/articles code reproduction/DeepGBM/data/data_offline_num')

2022-02-06 19:25:37,559 [INFO] data loaded.
 train_x shape: (3918, 12). train_y shape: (3918, 1).
 test_x shape: (980, 12). test_y shape: (980, 1).


In [None]:
train_x, train_y, test_x, test_y = num_data

In [None]:
PATH_TO_PICKLE = '/work/neurotrees/experiments/DeepGBM-decomposition/wine-dataset'

In [None]:
import pickle

    

In [None]:
sweep_config = {
    'method': 'random', #grid, random
    'metric': {
      'name': 'loss',
      'goal': 'minimize'   
    },
    'parameters': {
        'emb_epoch': {
            'values': [2, 5, 10]
        },
        'batch_size': {
            'values': [256, 128, 64, 32]
        },
        
        'emb_lr': {
            'values': [1e-2, 1e-3, 1e-4, 3e-4, 3e-5, 1e-5]
        },
        
        'optimizer': {
            'values': ['adamW', 'sgd']
        },
    }
}

In [None]:
sweep_id = wandb.sweep(sweep_config, project="deepgbm-wandb")

Create sweep with ID: 3qvzlypv
Sweep URL: https://wandb.ai/iloncka/deepgbm-wandb/sweeps/3qvzlypv


In [None]:
# embsize = 20
# maxleaf = 64
# task = "regression"
# l2_reg = 1e-6
# emb_lr = 1e-3
# emb_epoch = 2
# batch_size = 512
# test_batch_size = 100 
# loss_init = 1.0
# loss_dr = 0.7
# loss_de = 2
# log_freq = 500
# test_freq = 300
# key = ""

# n_output = train_y.shape[1]
def train():
   # Default values for hyper-parameters we're going to sweep over
   with open(os.path.join(PATH_TO_PICKLE,'n_models_wine_100.pickle'), 'rb') as f:
    # Pickle using the highest protocol available.
    n_models = pickle.load(f)
    
   with open(os.path.join(PATH_TO_PICKLE,'max_ntree_per_split_wine_100.pickle'), 'rb') as f:
      # Pickle using the highest protocol available.
      max_ntree_per_split = pickle.load(f)
      
   with open(os.path.join(PATH_TO_PICKLE,'group_average_wine_100.pickle'), 'rb') as f:
      # Pickle using the highest protocol available.
      group_average = pickle.load(f)

   with open(os.path.join(PATH_TO_PICKLE,'leaf_preds_wine_100.pickle'), 'rb') as f:
      # Pickle using the highest protocol available.
      leaf_preds = pickle.load(f)
      
   with open(os.path.join(PATH_TO_PICKLE,'test_leaf_preds_wine_100.pickle'), 'rb') as f:
      # Pickle using the highest protocol available.
      test_leaf_preds = pickle.load(f)
      
   with open(os.path.join(PATH_TO_PICKLE,'tree_outputs_wine_100.pickle'), 'rb') as f:
      # Pickle using the highest protocol available.
      tree_outputs = pickle.load(f) 

   config_defaults = dict(
      
      n_models = n_models,
      max_ntree_per_split = max_ntree_per_split,
      group_average = group_average,    
      embsize = 20,
      maxleaf = 64,
      task = "regression",
      l2_reg = 1e-6,
      emb_lr = 1e-3,
      emb_epoch = 2,
      batch_size = 512,
      test_batch_size = 100,
      loss_init = 1.0,
      loss_dr = 0.7,
      loss_de = 2,
      log_freq = 500,
      test_freq = 300,
      key = "",
      n_output = train_y.shape[1]
      )


   # Initialize a new wandb run
   wandb.init(config=config_defaults)
    
   # Config is a variable that holds and saves hyperparameters and inputs
   config = wandb.config
    
    

   #     wandb.log({"batch loss":loss.item()})
   # wandb.log({"loss":closs/config.batch_size})


   emb_model = EmbeddingModel(config.n_models, config.max_ntree_per_split, 
                              config.embsize,
                              config.maxleaf+1, config.n_output,
                              config.group_average, task=config.task).float().to(device)
   if config.optimizer=='sgd':
         opt = SGD(emb_model.parameters(),lr=config.emb_lr, momentum=0.9)
   elif config.optimizer=='adamW':
         opt = AdamW(emb_model.parameters(),lr=config.emb_lr, weight_decay=config.l2_reg)

   tree_outputs = np.asarray(tree_outputs).reshape((config.n_models, 
                  leaf_preds.shape[0])).transpose((1,0))

   TrainWithLog(config.loss_dr, config.loss_init, config.loss_de, config.log_freq, 
               config.test_freq, 
               config.task, config.test_batch_size,
               leaf_preds, train_y, tree_outputs,
               test_leaf_preds, test_y, emb_model, opt,
               config.emb_epoch, config.batch_size, config.n_output, config.key+"emb-")


   output_w = emb_model.bout.weight.data.cpu().numpy().reshape(config.n_models*config.embsize, config.n_output)
   output_b = np.array(emb_model.bout.bias.data.cpu().numpy().sum())
   train_embs = GetEmbPred(emb_model, emb_model.lastlayer, leaf_preds,
                      config.test_batch_size)
   del tree_outputs, leaf_preds, test_leaf_preds
   gc.collect();


In [None]:
train()

emb-Evaluate Result:
Epoch-000    31 Batches, Step     31, Testing Loss: 27.826828, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 27.826828470035476
####################################################################################
emb-Evaluate Result:
Epoch-001    31 Batches, Step     62, Testing Loss: 17.162858, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 17.162857834173707
####################################################################################
Final Best Metric: 17.162857834173707


In [None]:
wandb.agent(sweep_id, train)

[34m[1mwandb[0m: Agent Starting Run: fcen0bvl with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: sgd


emb-Evaluate Result:
Epoch-000   123 Batches, Step    123, Testing Loss:  8.830520, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 8.830519637283015
####################################################################################
emb-Evaluate Result:
Epoch-001   123 Batches, Step    246, Testing Loss:  4.390103, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 4.390102590833392
####################################################################################
Final Best Metric: 4.390102590833392


0,1
batch loss,█▆▆▆▆▆▄▄▄▄▃▃▃▃▂▂▂▃▂▂▂▂▂▁▂▂▂▂▁▁▂▁▁▁▁▁▁▁▁▂
test batch loss,█▁
test min loss,█▁

0,1
batch loss,8.74636
test batch loss,4.3901
test min loss,4.3901


[34m[1mwandb[0m: Agent Starting Run: 2yxraxip with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	optimizer: sgd


emb-Evaluate Result:
Epoch-000    62 Batches, Step     62, Testing Loss: 17.439319, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 17.43931939650555
####################################################################################
emb-Evaluate Result:
Epoch-001    62 Batches, Step    124, Testing Loss:  9.659047, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 9.659047496562101
####################################################################################
Final Best Metric: 9.659047496562101


0,1
batch loss,▇▇▆█▆▅▆▆▅▄▅▅▄▄▄▄▃▃▃▂▃▃▂▂▂▂▂▂▂▂▂▂▁▁▂▁▁▁▁▂
test batch loss,█▁
test min loss,█▁

0,1
batch loss,17.5583
test batch loss,9.65905
test min loss,9.65905


[34m[1mwandb[0m: Agent Starting Run: 216xymx7 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	optimizer: adamW


emb-Evaluate Result:
Epoch-000    62 Batches, Step     62, Testing Loss: 20.528013, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 20.52801311259367
####################################################################################
emb-Evaluate Result:
Epoch-001    62 Batches, Step    124, Testing Loss: 13.369922, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 13.36992174265336
####################################################################################
Final Best Metric: 13.36992174265336


0,1
batch loss,▆▆▆▆█▄▆▄▅▄▅▆▄▃▃▄▄▄▄▂▂▃▃▃▂▂▂▂▂▂▁▂▂▂▁▁▂▁▁▃
test batch loss,█▁
test min loss,█▁

0,1
batch loss,25.36608
test batch loss,13.36992
test min loss,13.36992


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: vvhjigvj with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: sgd


emb-Evaluate Result:
Epoch-000    62 Batches, Step     62, Testing Loss: 18.200510, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 18.200510375353755
####################################################################################
emb-Evaluate Result:
Epoch-001    62 Batches, Step    124, Testing Loss:  9.549342, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 9.549341688350756
####################################################################################
Final Best Metric: 9.549341688350756


0,1
batch loss,▇█▆█▇▆▇▅▄▅▅▄▄▂▄▃▃▄▄▃▃▃▃▂▂▃▂▁▃▂▂▂▂▂▁▂▁▁▁▁
test batch loss,█▁
test min loss,█▁

0,1
batch loss,10.05197
test batch loss,9.54934
test min loss,9.54934


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: smwbysht with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0003
[34m[1mwandb[0m: 	optimizer: adamW


emb-Evaluate Result:
Epoch-000    16 Batches, Step     16, Testing Loss: 34.706379, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 34.70637928709692
####################################################################################
emb-Evaluate Result:
Epoch-001    16 Batches, Step     32, Testing Loss: 27.569772, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 27.569772253231125
####################################################################################
Final Best Metric: 27.569772253231125


0,1
batch loss,▇▅█▆█▆▇▇▇▆▆▅▆▆▆▆▄▄▄▃▃▄▃▂▁▂▃▄▃▁▂▄
test batch loss,█▁
test min loss,█▁

0,1
batch loss,40.98534
test batch loss,27.56977
test min loss,27.56977


[34m[1mwandb[0m: Agent Starting Run: ok1ua4dv with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: sgd


emb-Evaluate Result:
Epoch-000   123 Batches, Step    123, Testing Loss:  9.839742, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 9.839742076640226
####################################################################################
emb-Evaluate Result:
Epoch-001   123 Batches, Step    246, Testing Loss:  4.360511, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 4.360510972081398
####################################################################################
Final Best Metric: 4.360510972081398


0,1
batch loss,▅▇█▄▄▃▃▃▃▃▂▃▂▂▂▃▂▂▂▂▂▁▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test batch loss,█▁
test min loss,█▁

0,1
batch loss,4.19167
test batch loss,4.36051
test min loss,4.36051


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: t2xy2poy with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: adamW


emb-Evaluate Result:
Epoch-000   123 Batches, Step    123, Testing Loss: 13.715305, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 13.715304744486906
####################################################################################
emb-Evaluate Result:
Epoch-001   123 Batches, Step    246, Testing Loss:  5.912545, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 5.912545330670415
####################################################################################
Final Best Metric: 5.912545330670415


0,1
batch loss,██▆██▇▅▅▅▅▆▅▄▅▄▄▃▂▃▃▅▃▂▃▃▄▂▃▂▁▂▂▂▃▁▁▂▂▁▁
test batch loss,█▁
test min loss,█▁

0,1
batch loss,8.82458
test batch loss,5.91255
test min loss,5.91255


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ux56dvfg with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 3e-05
[34m[1mwandb[0m: 	optimizer: sgd


emb-Evaluate Result:
Epoch-000    31 Batches, Step     31, Testing Loss: 27.702553, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 27.70255287326112
####################################################################################
emb-Evaluate Result:
Epoch-001    31 Batches, Step     62, Testing Loss: 16.673847, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 16.673847198486328
####################################################################################
Final Best Metric: 16.673847198486328


0,1
batch loss,▇▇█▆██▆▆▇▇▅▇▆▆▄▆▄▄▅▄▄▃▃▄▃▃▃▃▃▂▂▂▂▃▂▂▂▂▂▁
test batch loss,█▁
test min loss,█▁

0,1
batch loss,17.98993
test batch loss,16.67385
test min loss,16.67385


[34m[1mwandb[0m: Agent Starting Run: fhac6xfj with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 3e-05
[34m[1mwandb[0m: 	optimizer: sgd


emb-Evaluate Result:
Epoch-000    31 Batches, Step     31, Testing Loss: 27.878355, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 27.878354598064814
####################################################################################
emb-Evaluate Result:
Epoch-001    31 Batches, Step     62, Testing Loss: 16.856367, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 16.856366644100266
####################################################################################
Final Best Metric: 16.856366644100266


0,1
batch loss,▇▇▇█▇▇▇▇█▆█▆▆▄▅▆▅▅▅▆▄▃▃▄▃▂▂▃▂▂▂▂▂▁▁▂▁▁▁▁
test batch loss,█▁
test min loss,█▁

0,1
batch loss,23.4097
test batch loss,16.85637
test min loss,16.85637


[34m[1mwandb[0m: Agent Starting Run: 1b5f3rz5 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0003
[34m[1mwandb[0m: 	optimizer: adamW


emb-Evaluate Result:
Epoch-000    62 Batches, Step     62, Testing Loss: 23.145026, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 23.145026187507472
####################################################################################
emb-Evaluate Result:
Epoch-001    62 Batches, Step    124, Testing Loss: 12.960984, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 12.960984405206174
####################################################################################
Final Best Metric: 12.960984405206174


0,1
batch loss,▆▇█▆▆▅▅▅▅▆▅▅▅▄▄▄▄▃▃▃▂▄▃▂▃▂▂▂▂▃▃▂▂▂▂▂▂▁▂▁
test batch loss,█▁
test min loss,█▁

0,1
batch loss,14.89263
test batch loss,12.96098
test min loss,12.96098


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: z6b1ey2m with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: sgd


emb-Evaluate Result:
Epoch-000   123 Batches, Step    123, Testing Loss:  9.009322, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 9.009322458383988
####################################################################################
emb-Evaluate Result:
Epoch-001   123 Batches, Step    246, Testing Loss:  4.240288, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 4.24028789267248
####################################################################################
Final Best Metric: 4.24028789267248


0,1
batch loss,▆█▆▆▅▅▅▅▃▃▃▂▃▃▃▂▂▂▂▂▂▁▁▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁
test batch loss,█▁
test min loss,█▁

0,1
batch loss,5.02417
test batch loss,4.24029
test min loss,4.24029


[34m[1mwandb[0m: Agent Starting Run: r4wuao77 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	optimizer: sgd


emb-Evaluate Result:
Epoch-000   123 Batches, Step    123, Testing Loss:  8.607315, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 8.607315267835345
####################################################################################
emb-Evaluate Result:
Epoch-001   123 Batches, Step    246, Testing Loss:  4.402598, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 4.402598055041566
####################################################################################
Final Best Metric: 4.402598055041566


0,1
batch loss,█▇▅▆▅▄▄▅▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▂▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁
test batch loss,█▁
test min loss,█▁

0,1
batch loss,4.66091
test batch loss,4.4026
test min loss,4.4026


[34m[1mwandb[0m: Agent Starting Run: hpdu8e0l with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	optimizer: sgd


emb-Evaluate Result:
Epoch-000    31 Batches, Step     31, Testing Loss: 26.685686, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 26.685685761120855
####################################################################################
emb-Evaluate Result:
Epoch-001    31 Batches, Step     62, Testing Loss: 16.787947, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 16.78794712923011
####################################################################################
Final Best Metric: 16.78794712923011


0,1
batch loss,██▆█▇▅▆▅▆▅▆▅▅▅▄▄▄▄▃▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▃▁▂▁▁▂
test batch loss,█▁
test min loss,█▁

0,1
batch loss,21.61503
test batch loss,16.78795
test min loss,16.78795


[34m[1mwandb[0m: Agent Starting Run: f55uz4xh with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 3e-05
[34m[1mwandb[0m: 	optimizer: sgd


emb-Evaluate Result:
Epoch-000    62 Batches, Step     62, Testing Loss: 19.157319, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 19.15731869911661
####################################################################################
emb-Evaluate Result:
Epoch-001    62 Batches, Step    124, Testing Loss: 10.589746, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 10.58974569671008
####################################################################################
Final Best Metric: 10.58974569671008


0,1
batch loss,▇▅█▇▇█▆▅▆▅▅▅▄▄▃▄▃▃▃▃▃▄▂▃▂▃▃▂▂▂▃▂▂▂▂▂▂▂▂▁
test batch loss,█▁
test min loss,█▁

0,1
batch loss,7.71197
test batch loss,10.58975
test min loss,10.58975


[34m[1mwandb[0m: Agent Starting Run: f7dumqd8 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 3e-05
[34m[1mwandb[0m: 	optimizer: adamW


emb-Evaluate Result:
Epoch-000    62 Batches, Step     62, Testing Loss: 22.281786, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 22.281786393146124
####################################################################################
emb-Evaluate Result:
Epoch-001    62 Batches, Step    124, Testing Loss: 13.911987, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 13.911987168448311
####################################################################################
Final Best Metric: 13.911987168448311


0,1
batch loss,▆▇█▇▆▆▆▆▅▃▃▃▄▄▃▃▄▃▃▄▃▃▄▂▂▂▁▂▂▂▂▂▁▁▂▂▂▁▁▁
test batch loss,█▁
test min loss,█▁

0,1
batch loss,18.96733
test batch loss,13.91199
test min loss,13.91199


[34m[1mwandb[0m: Agent Starting Run: sihgo8h8 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 2
[34m[1mwandb[0m: 	learning_rate: 3e-05
[34m[1mwandb[0m: 	optimizer: adamW


emb-Evaluate Result:
Epoch-000    62 Batches, Step     62, Testing Loss: 23.503257, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 23.5032569729552
####################################################################################
emb-Evaluate Result:
Epoch-001    62 Batches, Step    124, Testing Loss: 14.909015, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 14.909014974321638
####################################################################################
Final Best Metric: 14.909014974321638


0,1
batch loss,█▇▄▅▅▅▄▄▅▅▄▃▄▃▃▂▃▃▃▂▂▃▃▃▂▂▂▂▂▁▂▁▂▁▁▂▂▂▁▂
test batch loss,█▁
test min loss,█▁

0,1
batch loss,23.18174
test batch loss,14.90901
test min loss,14.90901


[34m[1mwandb[0m: Agent Starting Run: grdobgfi with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 2
[34m[1mwandb[0m: 	learning_rate: 0.0003
[34m[1mwandb[0m: 	optimizer: adamW


emb-Evaluate Result:
Epoch-000    16 Batches, Step     16, Testing Loss: 34.677828, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 34.677827562604634
####################################################################################
emb-Evaluate Result:
Epoch-001    16 Batches, Step     32, Testing Loss: 28.301034, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 28.301033759603694
####################################################################################
Final Best Metric: 28.301033759603694


0,1
batch loss,▇▇▇▇█▇█▇▆▆▅▄▆▅▆▇▅▅▅▄▄▅▃▄▃▄▄▃▂▂▂▁
test batch loss,█▁
test min loss,█▁

0,1
batch loss,31.2646
test batch loss,28.30103
test min loss,28.30103


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: w07k8zhz with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	optimizer: adamW


emb-Evaluate Result:
Epoch-000    31 Batches, Step     31, Testing Loss: 29.640209, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 29.64020923692353
####################################################################################
emb-Evaluate Result:
Epoch-001    31 Batches, Step     62, Testing Loss: 21.519539, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 21.519538957245494
####################################################################################
Final Best Metric: 21.519538957245494


0,1
batch loss,▆█▆▆▅▅▅▄▅▄▄▄▄▃▄▄▃▄▃▃▃▃▃▃▃▃▂▃▂▂▂▂▁▁▂▂▁▂▂▁
test batch loss,█▁
test min loss,█▁

0,1
batch loss,26.86617
test batch loss,21.51954
test min loss,21.51954


[34m[1mwandb[0m: Agent Starting Run: hfs8fgi9 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 3e-05
[34m[1mwandb[0m: 	optimizer: adamW


emb-Evaluate Result:
Epoch-000    62 Batches, Step     62, Testing Loss: 20.559949, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 20.559948823889908
####################################################################################
emb-Evaluate Result:
Epoch-001    62 Batches, Step    124, Testing Loss: 11.872314, Used Time:   0.0m, Remaining Time:   0.0m
-------------------------------------------------------------------------------
Best Metric: 11.872313908168248
####################################################################################
Final Best Metric: 11.872313908168248


0,1
batch loss,█▇▇▆▇▆▆▅▅▆▅▆▆▅▆▄▅▅▄▄▅▃▃▃▄▃▃▃▃▂▂▃▂▃▂▂▃▂▁▁
test batch loss,█▁
test min loss,█▁

0,1
batch loss,8.61398
test batch loss,11.87231
test min loss,11.87231


[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=de072003-a9db-4342-8067-19a4b45feff1' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>