In [1]:
import pickle
import math
import time
import copy
import numpy as np
import matplotlib.pyplot as plt
import wandb
import os
import pickle
%matplotlib inline

import hummingbird.ml
from hummingbird.ml import constants
import torch
import lightgbm as lgb
from os.path import join
from s2and.data import ANDData

from s2and.model import PairwiseModeler
from s2and.featurizer import FeaturizationInfo, featurize
from s2and.eval import pairwise_eval

from sklearn.metrics import roc_curve, auc
from sklearn.metrics import precision_recall_fscore_support

from livelossplot import PlotLosses



RuntimeError: module compiled against API version 0xe but this version of numpy is 0xd

In [2]:
from IPython import embed

In [4]:
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mdhdhagar[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [3]:
def count_parameters(model): return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [4]:
# Convert the S2 LGBM classifier to torch

def convert_gbdt_to_torch(classifier_model, test_input=None, dropout=0.1, 
                          fine_tune=True, force_gemm=False, 
                          fine_tune_temp={'train': 1., 'eval': 1., 'required_grad': False},
                          fine_tune_activation='tanh'):
    extra_config = {}
    
    if fine_tune:
        extra_config.update({
            constants.FINE_TUNE: True,
            constants.FINE_TUNE_DROPOUT_PROB: dropout
        })
    if force_gemm:
        extra_config[constants.TREE_IMPLEMENTATION] = "gemm"
    extra_config[constants.FINE_TUNE_TEMP] = fine_tune_temp
    extra_config[constants.FINE_TUNE_ACTIVATION] = fine_tune_activation
    
    humming = hummingbird.ml.convert(classifier_model, "torch", test_input=test_input, extra_config=extra_config)
    
    return humming.model

In [85]:
# Load S2AND production classifier and convert to NN

with open("data/production_model.pickle", "rb") as _pkl_file:  # full_union_seed_3
    prod_model = pickle.load(_pkl_file)
    if type(prod_model) == dict:
        prod_model = prod_model["clusterer"]

In [8]:
# # Compute average of AUROC and F1 for the full union models
# FEATURES_TO_USE = [
#     "name_similarity",
#     "affiliation_similarity",
#     "email_similarity",
#     "coauthor_similarity",
#     "venue_similarity",
#     "year_diff",
#     "title_similarity",
#     "reference_features",
#     "misc_features",
#     "name_counts",
#     "embedding_similarity",
#     "journal_similarity",
#     "advanced_name_similarity",
# ]

# NAMELESS_FEATURES_TO_USE = [
#     feature_name for feature_name in FEATURES_TO_USE
#     if feature_name not in {"name_similarity", "advanced_name_similarity", "name_counts"}
# ]

# auroc_all, f1_all = [], []

# for i in range(5):
#     # Load dataset

#     dataset_name = "pubmed"  # "kisti"  # "aminer"
#     parent_dir = f"data/{dataset_name}"
#     dataset = ANDData(
#         signatures=join(parent_dir, f"{dataset_name}_signatures.json"),
#         papers=join(parent_dir, f"{dataset_name}_papers.json"),
#         mode="train",
#         specter_embeddings=join(parent_dir, f"{dataset_name}_specter.pickle"),
#         clusters=join(parent_dir, f"{dataset_name}_clusters.json"),
#         block_type="s2",
#         train_pairs_size=100000,
#         val_pairs_size=10000,
#         test_pairs_size=10000,
#         name=dataset_name,
#         n_jobs=8,
#         random_seed=i+1
#     )

#     featurization_info = FeaturizationInfo()
#     FEATURIZER_INFO = FeaturizationInfo(features_to_use=FEATURES_TO_USE)
#     NAMELESS_FEATURIZER_INFO = FeaturizationInfo(features_to_use=NAMELESS_FEATURES_TO_USE)
#     # the cache will make it faster to train multiple times - it stores the features on disk for you
#     train, val, test = featurize(dataset, FEATURIZER_INFO, n_jobs=8, use_cache=True, 
#                                  nameless_featurizer_info=NAMELESS_FEATURIZER_INFO)

#     # Get x,y data for each split
#     X_train, y_train, X_train_NL = train
#     X_val, y_val, X_val_NL = val
#     X_test, y_test, X_test_NL = test
    
#     with open(f"data/full_union_seed_{i+1}.pickle", "rb") as _pkl_file:
#         prod_model = pickle.load(_pkl_file)
        
#         y_pred = prod_model.classifier.predict_proba(X_test)
#         y_prob = y_pred[:, 1]
#         y_pred_NL = prod_model.nameless_classifier.predict_proba(X_test_NL)
#         y_prob_NL = y_pred_NL[:, 1]
#         y_prob = (y_prob + y_prob_NL) / 2
#         y = y_test
#         fpr, tpr, _ = roc_curve(y, y_prob)
#         roc_auc = auc(fpr, tpr)
#         thresh_for_f1 = 0.5
#         pr, rc, f1, _ = precision_recall_fscore_support(y, y_prob > thresh_for_f1, beta=1.0, average="macro")
        
#         auroc_all.append(roc_auc)
#         f1_all.append(f1)
#         print(auroc_all)
#         print(f1_all)
#         print()
# #         print("LightGBM (AUROC, F1):", roc_auc, np.round(f1, 3))
# print("Mean AUROC:", np.mean(auroc_all))
# print("Mean F1:", np.mean(f1_all))

In [5]:
device = torch.device(
    "cuda" if torch.cuda.is_available() else "cpu"
)
print(f"Using device={device}")

Using device=cuda


In [63]:
out_dir = os.path.join("data", "zbmath", "clf_random_splits")
out_fname = os.path.join(out_dir, f"splits_rand1.pkl")
with open(out_fname, 'rb') as fh:
    splits = pickle.load(fh)

In [41]:
np.sum(np.isnan(splits['X_train']))/len(splits['X_train'].flatten())

0.40719205128205127

In [136]:
np.round(np.sum(np.isnan(splits['X_train']), axis=0)/len(splits['X_train'])*100)

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        3.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0., 18., 18.,  0.,  0.,  0.,  0.,  9.,  0.,  0.,  0.,  0.,  0.])

In [76]:
X_train = splits['X_train']

In [81]:
missing_per_feat = (np.sum(np.isnan(X_train), axis=0) / len(X_train))
keep_feat_mask = missing_per_feat < 0.2
X_train[:, keep_feat_mask].shape

(100000, 18)

In [133]:
X_train[:, ~keep_feat_mask] = np.zeros_like(X_train[:, ~keep_feat_mask])

In [135]:
X_train[:, ~keep_feat_mask].shape

(100000, 21)

In [119]:
hb_model = convert_gbdt_to_torch(prod_model.classifier, fine_tune=True, force_gemm=True,
                                 fine_tune_temp={'train': 1e-8, 'eval': 1e-8, 
                                                 'requires_grad': False}, 
                                 fine_tune_activation='tanh')

Finished loading model, total used 259 iterations


In [115]:
hb_model._operators[0].weight_1.data[:, ~keep_feat_mask] = torch.zeros_like(hb_model._operators[0].weight_1[:, ~keep_feat_mask])

In [130]:
not True

False

In [118]:
for p in hb_model.parameters():
    print(p.size())

torch.Size([13986, 39])
torch.Size([13986, 1])
torch.Size([259, 55, 54])
torch.Size([259, 55, 1])
torch.Size([259, 1, 55])
torch.Size([259, 1, 1])
torch.Size([])


In [126]:
torch.nn.init.zeros_(hb_model._operators[0].weight_1)

Parameter containing:
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], requires_grad=True)

In [127]:
hb_model._operators[0].weight_1[:, ~keep_feat_mask].sum(dim=0)

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       grad_fn=<SumBackward1>)

In [99]:
hb_model._operators[0].bias_1

Parameter containing:
tensor([[-1.5853e+00],
        [-3.0990e+03],
        [-1.0669e-01],
        ...,
        [ 0.0000e+00],
        [ 0.0000e+00],
        [ 0.0000e+00]], requires_grad=True)

In [10]:
# Get data tensors and optionally convert NANs

def get_tensors(X_train, y_train, X_val, y_val, X_test, y_test, convert_nan=True, nan_val=-1):
    X_train_tensor = torch.tensor(X_train)
    y_train_tensor = torch.tensor(y_train)

    X_val_tensor = torch.tensor(X_val)
    y_val_tensor = torch.tensor(y_val)

    X_test_tensor = torch.tensor(X_test)
    y_test_tensor = torch.tensor(y_test)
    
    if convert_nan:
        X_train_tensor = torch.nan_to_num(X_train_tensor, nan_val)
        X_val_tensor = torch.nan_to_num(X_val_tensor, nan_val)
        X_test_tensor = torch.nan_to_num(X_test_tensor, nan_val)
    
    return X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor, X_test_tensor, y_test_tensor

In [11]:
# Attempts to release memory

def release_memory(hm_model, X_train_tensor, y_train_tensor, X_val_tensor, 
                   y_val_tensor, X_test_tensor, y_test_tensor):
    hm_model = None
    X_train_tensor = None
    y_train_tensor = None
    X_val_tensor = None
    y_val_tensor = None
    X_test_tensor = None
    y_test_tensor = None
    torch.cuda.empty_cache()

In [12]:
# Load dataset
if True:
    dataset_name = "zbmath"  # "pubmed"  # "arnetminer"  # "qian"
    for dataset_random_seed in [1,2,3,4,5]:
        parent_dir = f"data/{dataset_name}"
        out_dir = os.path.join("data", dataset_name, "clf_random_splits")
        dataset = ANDData(
            signatures=join(parent_dir, f"{dataset_name}_signatures.json"),
            papers=join(parent_dir, f"{dataset_name}_papers.json"),
            mode="train",
            specter_embeddings=join(parent_dir, f"{dataset_name}_specter.pickle"),
            clusters=join(parent_dir, f"{dataset_name}_clusters.json"),
            block_type="s2",
            train_pairs_size=100000,
            val_pairs_size=10000,
            test_pairs_size=10000,
            name=dataset_name,
            n_jobs=8,
            random_seed=dataset_random_seed
        )

        featurization_info = FeaturizationInfo()
        # the cache will make it faster to train multiple times - it stores the features on disk for you
        train, val, test = featurize(dataset, featurization_info, n_jobs=8, use_cache=True)

        # Get x,y data for each split
        X_train, y_train, _ = train
        X_val, y_val, _ = val
        X_test, y_test, _ = test

        os.makedirs(out_dir, exist_ok=True)
        dict_to_save = {
            'X_train': X_train,
            'y_train': y_train,
            'X_val': X_val,
            'y_val': y_val,
            'X_test': X_test,
            'y_test': y_test
        }
        out_fname = os.path.join(out_dir, f"splits_rand{dataset_random_seed}.pkl")
        with open(out_fname, 'wb') as fh:
            pickle.dump(dict_to_save, fh, protocol=pickle.HIGHEST_PROTOCOL)

In [13]:
model, X_train_tensor, y_train_tensor, X_val_tensor, \
y_val_tensor, X_test_tensor, y_test_tensor = None, None, None, None, None, None, None

In [14]:
from neumiss import NeuMissBlock, NeuMissDEQBlock
from torch import nn

class NeuMissHB(torch.nn.Module):
    def __init__(self, n_features, neumiss_depth, hb_model):
        super().__init__()
        self.neumiss = NeuMissBlock(n_features=n_features, depth=neumiss_depth)
        self.gbdtnn = hb_model
    def forward(self, x):
        imputed = self.neumiss(x)
        return self.gbdtnn(imputed)
    
    
class NeuMissDEQHB(torch.nn.Module):
    def __init__(self, n_features, hb_model):
        super().__init__()
        self.neumiss = NeuMissDEQBlock(n_features=n_features)
        self.gbdtnn = hb_model
    def forward(self, x):
        imputed = self.neumiss(x)
        return self.gbdtnn(imputed)
    
    
class NeuMissVanilla(torch.nn.Module):
    def __init__(self, n_features, neumiss_depth, hidden_dim=1024, n_hidden_layers=10, dropout_p=0.1,
                add_neumiss=True, add_batchnorm=False):
        super().__init__()
        if add_batchnorm:
            self.linear_layer = nn.Sequential(
                *(([NeuMissBlock(n_features=n_features, depth=neumiss_depth)] if add_neumiss else []) + 
                  [nn.Linear(n_features, hidden_dim)] +
                  [nn.BatchNorm1d(hidden_dim), nn.ReLU(), nn.Dropout(p=dropout_p),
                   nn.Linear(hidden_dim, hidden_dim)] * n_hidden_layers +
                  [nn.BatchNorm1d(hidden_dim), nn.ReLU(), nn.Dropout(p=dropout_p), nn.Linear(hidden_dim, 1)])
            )
        else:
            self.linear_layer = nn.Sequential(
                *(([NeuMissBlock(n_features=n_features, depth=neumiss_depth)] if add_neumiss else []) + 
                  [nn.Linear(n_features, hidden_dim)] +
                  [nn.ReLU(), nn.Dropout(p=dropout_p),
                   nn.Linear(hidden_dim, hidden_dim)] * n_hidden_layers +
                  [nn.ReLU(), nn.Dropout(p=dropout_p), nn.Linear(hidden_dim, 1)])
            )
    def forward(self, x):
        return self.linear_layer(x)

    
class NeuMissVanillaLeaky(torch.nn.Module):
    def __init__(self, n_features, neumiss_depth, hidden_dim=1024, n_hidden_layers=10, dropout_p=0.1,
                 negative_slope=0.01, add_batchnorm=False, add_neumiss=True):
        super().__init__()
        if add_batchnorm:
            self.linear_layer = nn.Sequential(
                *(([NeuMissBlock(n_features=n_features, depth=neumiss_depth)] if add_neumiss else []) +
                  [nn.Linear(n_features, hidden_dim)] +
                  [nn.BatchNorm1d(hidden_dim), nn.LeakyReLU(negative_slope=negative_slope), nn.Dropout(p=dropout_p),
                   nn.Linear(hidden_dim, hidden_dim)] * n_hidden_layers +
                  [nn.BatchNorm1d(hidden_dim), nn.LeakyReLU(negative_slope=negative_slope), nn.Dropout(p=dropout_p), nn.Linear(hidden_dim, 1)])
            )
        else:
            self.linear_layer = nn.Sequential(
                *(([NeuMissBlock(n_features=n_features, depth=neumiss_depth)] if add_neumiss else []) +
                  [nn.Linear(n_features, hidden_dim)] +
                  [nn.LeakyReLU(negative_slope=negative_slope), nn.Dropout(p=dropout_p),
                   nn.Linear(hidden_dim, hidden_dim)] * n_hidden_layers +
                  [nn.LeakyReLU(negative_slope=negative_slope), nn.Dropout(p=dropout_p), nn.Linear(hidden_dim, 1)])
            )
    def forward(self, x):
        return self.linear_layer(x)
    

# Function to re-init weights using xavier initialization (tanh); should use He init for relu
def init_weights(model, activation="tanh", vanilla=False):
    if vanilla:
        for p in model.named_parameters():
            if 'weight' in p[0]:
                if len(p[1].data.size()) > 1:
                    if activation == "tanh":
                        torch.nn.init.xavier_uniform_(p[1].data, gain=nn.init.calculate_gain(activation))
                    else:
                        torch.nn.init.kaiming_uniform_(p[1].data, nonlinearity=activation)  # "relu" / "leaky_relu"
            elif 'bias' in p[0]:
                p[1].data.fill_(0.01)
    else:
        for p in model.parameters():
            if len(p.size()) > 0:
                if p.size(-1) == 1:
                    p.data.fill_(0.01)
                else:
                    if activation == "tanh":
                        torch.nn.init.xavier_uniform_(p.data, gain=nn.init.calculate_gain("tanh"))
                    else:
                        torch.nn.init.kaiming_uniform_(p.data, nonlinearity=activation)  # "relu" / "leaky_relu"

In [24]:
needs_sigmoid = [NeuMissVanilla, NeuMissVanillaLeaky,]
metric_to_idx = {'auroc': 0, 'f1': 1}

def predict_proba(model, input):
    if model.__class__ in needs_sigmoid:
        return torch.sigmoid(model(input.type(torch.float)))
    else:
        return model(input)[1][:, 1]
    
def evaluate(model, input, output, mode="macro", return_pred_only=False, 
             batch_size=None, overfit_one_batch=False, loss_fn=None, pos_weight=None):
    if batch_size is None:
        if model.__class__ in needs_sigmoid:
            y_prob = torch.sigmoid(model(input.type(torch.float))).cpu().numpy()
        else:
            y_prob = model(input)[1][:, 1].cpu().numpy()
    else:
        y_prob = []
        for i in range(0, batch_size if overfit_one_batch else len(input), batch_size):
            if model.__class__ in needs_sigmoid:
                _y_prob = torch.sigmoid(model(input[i:i+batch_size].type(torch.float).to(device))).cpu().numpy()
            else:
                _y_prob = model(input[i:i+batch_size].to(device))[1][:, 1].cpu().numpy()
            y_prob.append(_y_prob)
        y_prob = np.concatenate(y_prob, axis=0).flatten()
    
    if return_pred_only:
        return y_prob
    
    y = output.numpy()
    
    if batch_size is not None and overfit_one_batch:
        y = y[:batch_size]
    
    fpr, tpr, _ = roc_curve(y, y_prob)
    roc_auc = auc(fpr, tpr)
    thresh_for_f1 = 0.5
    pr, rc, f1, _ = precision_recall_fscore_support(y, y_prob > thresh_for_f1, beta=1.0, average=mode,
                                                    zero_division=0)
    
    if loss_fn is not None:
        if 'BCELoss' in loss_fn.__class__.__name__:
            if pos_weight is None:
                loss_fn.weight = None
            else:
                loss_weight = (pos_weight * torch.tensor(y).type(torch.float))
                loss_weight[loss_weight==0] = 1.
                loss_fn.weight = loss_weight
        loss = loss_fn(torch.tensor(y_prob).type(torch.float).view_as(torch.tensor(y)), torch.tensor(y).type(torch.float))
        return roc_auc, np.round(f1, 3), loss.item()
    
    return roc_auc, np.round(f1, 3)

In [22]:
def train(dataset_name="pubmed", verbose=False, hyperparams = {
    # Dataset
    "dataset": "pubmed",
    "dataset_random_seed": 1,
    # Training config
    "lr": 1e-4, 
    "n_epochs": 200,
    "weighted_loss": True,
    "batch_size": 10000,
    "use_lr_scheduler": True,
    "lr_factor": 0.6,
    "lr_min": 1e-6,
    "lr_scheduler_patience": 10,
    "weight_decay": 0.,
    "dev_opt_metric": 'auroc',  # 'f1'
    "overfit_one_batch": False,
    "convert_nan": False,
    "nan_value": -1,
    # Model config
    "hb_model": False,
    "hb_temp": 1e-8,
    "hb_activation": 'tanh',
    "neumiss_deq": False,
    "neumiss_depth": 20,
    "vanilla_hidden_dim": 1024,
    "vanilla_n_hidden_layers": 1,
    "vanilla_dropout": 0.,
    "vanilla_batchnorm": True,
    "vanilla_activation": "leaky_relu",
    "reinit_model": False
}):
#     for seed in [1,2,3,4,5]:
#         hyperparams.update({"dataset_random_seed": seed})
    # Start wandb run
    with wandb.init(config=hyperparams) as run:
        hyp = wandb.config
        
        # Load data
        out_dir = os.path.join("data", hyp["dataset"], "clf_random_splits")
        out_fname = os.path.join(out_dir, f"splits_rand{hyp['dataset_random_seed']}.pkl")
        with open(out_fname, 'rb') as fh:
            splits = pickle.load(fh)

        # Get tensors
        X_train_tensor, y_train_tensor, \
        X_val_tensor, y_val_tensor, \
        X_test_tensor, y_test_tensor = get_tensors(splits['X_train'], splits['y_train'], splits['X_val'], 
                                                   splits['y_val'], splits['X_test'], splits['y_test'], 
                                                   convert_nan=hyp['convert_nan'], nan_val=hyp['nan_value'])
        del splits

        # Create model
        if hyp['hb_model']:
            hb_model = convert_gbdt_to_torch(prod_model.classifier, fine_tune=True, force_gemm=True,
                                             fine_tune_temp={'train': hyp['hb_temp'], 'eval': hyp['hb_temp'], 
                                                             'requires_grad': False}, 
                                             fine_tune_activation=hyp['hb_activation'])
            if hyp['convert_nan']:
                model = hb_model
            else:
                if hyp['neumiss_deq']:
                    model = NeuMissDEQHB(n_features=X_train.shape[1], hb_model=hb_model)
                else:
                    model = NeuMissHB(n_features=X_train.shape[1], neumiss_depth=hyp['neumiss_depth'], 
                                      hb_model=hb_model)
        else:
            if hyp['vanilla_activation'] == 'relu':
                model = NeuMissVanilla(n_features=X_train.shape[1], neumiss_depth=hyp['neumiss_depth'], 
                                       hidden_dim=hyp['vanilla_hidden_dim'], 
                                       n_hidden_layers=hyp['vanilla_n_hidden_layers'], 
                                       dropout_p=hyp['vanilla_dropout'], add_neumiss=not hyp['convert_nan'], 
                                       add_batchnorm=hyp['vanilla_batchnorm'])
            elif hyp['vanilla_activation'] == 'leaky_relu':
                model = NeuMissVanillaLeaky(n_features=X_train.shape[1], neumiss_depth=hyp['neumiss_depth'], 
                                            hidden_dim=hyp['vanilla_hidden_dim'], 
                                            n_hidden_layers=hyp['vanilla_n_hidden_layers'], 
                                            dropout_p=hyp['vanilla_dropout'], 
                                            add_neumiss=not hyp['convert_nan'], 
                                            add_batchnorm=hyp['vanilla_batchnorm'])

        embed()
        if hyp['reinit_model']:
            if hyp['hb_model']:
                init_weights(model if hyp['convert_nan'] else model.gbdtnn, activation=hyp['hb_activation'], vanilla=False)
            else:
                init_weights(model.linear_layer, activation=hyp['vanilla_activation'], vanilla=True)
        embed()
        # Training code
        batch_size = hyp['batch_size']
        weighted_loss = hyp['weighted_loss']
        overfit_one_batch = hyp['overfit_one_batch']
        dev_opt_metric = hyp['dev_opt_metric']
        n_epochs = hyp['n_epochs']
        use_lr_scheduler = hyp['use_lr_scheduler']

        model.to(device)
        wandb.watch(model)

        y_train_tensor = y_train_tensor.float()  # Converting to keep output and prediction dtypes consistent

        pos_weight = None
        if weighted_loss:
            if overfit_one_batch:
                pos_weight = (batch_size - y_train_tensor[:batch_size].sum()) / y_train_tensor[:batch_size].sum()
            else:
                pos_weight = (len(y_train_tensor) - y_train_tensor.sum()) / y_train_tensor.sum()

        if verbose:
            print(f"Loss function pos_weight={pos_weight}")
        loss_fn = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight) if model.__class__ in needs_sigmoid else \
                                                torch.nn.BCELoss()

        group_no_wd, group_wd = [], []
        for name, param in model.named_parameters():
            if '.mu' in name:
                group_no_wd.append(param)
            else:
                group_wd.append(param)
        optimizer = torch.optim.AdamW([{'params': group_wd, 'weight_decay': hyp['weight_decay']},
                                       {'params': group_no_wd, 'weight_decay': 0}], lr=hyp['lr'])
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 
                                                               mode='min', 
                                                               factor=hyp['lr_factor'], 
                                                               min_lr=hyp['lr_min'],
                                                               patience=hyp['lr_scheduler_patience'], 
                                                               verbose=verbose)

        if verbose:
            print("Training:\n")
        with torch.no_grad():
            model.eval()
            init_eval_train = evaluate(model, X_train_tensor, y_train_tensor, 
                                       batch_size=batch_size, overfit_one_batch=overfit_one_batch)
            init_eval_dev = evaluate(model, X_val_tensor.to(device), y_val_tensor)
            init_eval_test = evaluate(model, X_test_tensor.to(device), y_test_tensor)

            if verbose:
                print(f"Initial model evaluation:")
                print("Train AUROC, F1:", init_eval_train)
                print("Dev AUROC, F1:", init_eval_dev)
                print("Test AUROC, F1:", init_eval_test)

            wandb.log({
                'train_auroc': init_eval_train[0],
                'train_f1': init_eval_train[1],
                'dev_auroc': init_eval_dev[0],
                'dev_f1': init_eval_dev[1],
                'test_auroc': init_eval_test[0],
                'test_f1': init_eval_test[1]})

        if verbose:
            print(f"\nDev metric to optimize: {dev_opt_metric}")

        best_model_on_dev = None
        best_metric = -1.
        best_dev_f1 = -1.
        best_dev_auroc = -1.
        best_epoch = -1

        model.train()
        loss = None

        start_time = time.time()
        for i in range(n_epochs):  # epoch
            running_loss = []
            wandb.log({'epoch': i+1})

            for j in range(0, batch_size if overfit_one_batch else len(X_train_tensor), batch_size):
                X_batch = X_train_tensor[j:j+batch_size].to(device)
                y_batch = y_train_tensor[j:j+batch_size].to(device)

                optimizer.zero_grad()
                y_ = predict_proba(model, X_batch)
                assert y_.requires_grad

                if weighted_loss and 'BCELoss' in loss_fn.__class__.__name__:
                    weights = (pos_weight * y_batch)
                    weights[weights==0] = 1.
                    loss_fn.weight = weights
                loss = loss_fn(y_.view_as(y_batch), y_batch)
                running_loss.append(loss.item())
                loss.backward()
                optimizer.step()

                # Print batch loss
                if verbose:
                    print("\tBatch", f"[{j}:{j+batch_size}]", ":", running_loss[-1])
                wandb.log({'train_loss_batch': running_loss[-1]})

            # Print epoch validation accuracy
            with torch.no_grad():
                model.eval()
                dev_auroc_f1_loss = evaluate(model, X_val_tensor.to(device), y_val_tensor, 
                                             loss_fn=loss_fn, pos_weight=pos_weight)
                if verbose:
                    print("Epoch", i+1, ":", "Dev AUROC,F1,loss:", dev_auroc_f1_loss)
                if dev_auroc_f1_loss[metric_to_idx[dev_opt_metric]] > best_metric:
                    if verbose:
                        print(f"New best dev {dev_opt_metric}; storing model")
                    best_epoch = i
                    best_metric = dev_auroc_f1_loss[metric_to_idx[dev_opt_metric]]
                    best_dev_f1 = dev_auroc_f1_loss[1]
                    best_dev_auroc = dev_auroc_f1_loss[0]
                    best_model_on_dev = copy.deepcopy(model)
                if overfit_one_batch:
                    train_auroc_f1 = evaluate(model, X_batch.to(device), y_batch.cpu())
            model.train()

            wandb.log({
                'train_loss_epoch': np.mean(running_loss),
                'dev_auroc': dev_auroc_f1_loss[0],
                'dev_f1': dev_auroc_f1_loss[1],
                'dev_loss': dev_auroc_f1_loss[2],
            })
            if overfit_one_batch:
                wandb.log({'train_auroc': train_auroc_f1[0], 
                           'train_f1': train_auroc_f1[1]})

            # Update lr schedule
            if use_lr_scheduler:
                scheduler.step(dev_auroc_f1_loss[2])  # running_loss

        end_time = time.time()

        with torch.no_grad():
            best_model_on_dev.eval()
            if verbose:
                print("----------------")
                print(f"Initial model evaluation:")
                print("Train AUROC, F1:", init_eval_train)
                print("Dev AUROC, F1:", init_eval_dev)
                print("Test AUROC, F1:", init_eval_test)
                print()

            best_eval_train = evaluate(best_model_on_dev, X_train_tensor, y_train_tensor, 
                                       batch_size=batch_size, overfit_one_batch=overfit_one_batch)
            best_eval_dev = (best_dev_auroc, best_dev_f1)
            best_eval_test = evaluate(best_model_on_dev, X_test_tensor.to(device), y_test_tensor)
            if verbose:
                print(f"Best dev eval on Epoch {best_epoch}:")
                print("Train AUROC, F1:", best_eval_train)
                print("Dev AUROC, F1:", best_eval_dev)
                print("Test AUROC, F1:", best_eval_test)
                print(f"Time taken: {end_time - start_time}")
                print()
            wandb.log({
                'best_train_auroc': best_eval_train[0],
                'best_train_f1': best_eval_train[1],
                'best_dev_auroc': best_eval_dev[0],
                'best_dev_f1': best_eval_dev[1],
                'best_test_auroc': best_eval_test[0],
                'best_test_f1': best_eval_test[1]
            })

            model.eval()
            final_eval_train = evaluate(model, X_train_tensor, y_train_tensor, 
                                        batch_size=batch_size, overfit_one_batch=overfit_one_batch)
            final_eval_dev = evaluate(model, X_val_tensor.to(device), y_val_tensor)
            final_eval_test = evaluate(model, X_test_tensor.to(device), y_test_tensor)
            if verbose:
                print(f"Final model eval on Epoch {n_epochs}:")
                print("Train AUROC, F1:", final_eval_train)
                print("Dev AUROC, F1:", final_eval_dev)
                print("Test AUROC, F1:", final_eval_test)
            wandb.log({
                'train_auroc': final_eval_train[0],
                'train_f1': final_eval_train[1],
                'dev_auroc': final_eval_dev[0],
                'dev_f1': final_eval_dev[1],
                'test_auroc': final_eval_test[0],
                'test_f1': final_eval_test[1]
            })

In [23]:
train(hyperparams={
    # Dataset
    "dataset": "pubmed",
    "dataset_random_seed": 1,
    # Training config
    "lr": 2e-4, 
    "n_epochs": 200,
    "weighted_loss": False,
    "batch_size": 10000,
    "use_lr_scheduler": True,
    "lr_factor": 0.6,
    "lr_min": 1e-6,
    "lr_scheduler_patience": 10,
    "weight_decay": 0.01,
    "dev_opt_metric": 'auroc',
    "overfit_one_batch": False,
    "convert_nan": True,
    "nan_value": 0,
    # Model config
    "hb_model": True,
    "hb_temp": 1e-8,
    "hb_activation": 'tanh',
    "neumiss_deq": False,
    "neumiss_depth": 20,
    "vanilla_hidden_dim": 1024,
    "vanilla_n_hidden_layers": 1,
    "vanilla_dropout": 0.,
    "vanilla_batchnorm": True,
    "vanilla_activation": "leaky_relu",
    "reinit_model": True
})

Finished loading model, total used 259 iterations
Python 3.7.0 (default, Oct  9 2018, 10:31:47) 
Type 'copyright', 'credits' or 'license' for more information
IPython 7.31.1 -- An enhanced Interactive Python. Type '?' for help.

In [1]: exit

Python 3.7.0 (default, Oct  9 2018, 10:31:47) 
Type 'copyright', 'credits' or 'license' for more information
IPython 7.31.1 -- An enhanced Interactive Python. Type '?' for help.

In [1]: model
Out[1]: 
Executor(
  (_operators): ModuleList(
    (0): GEMMGBDTImplTraining(
      (dropout): Dropout(p=0.1, inplace=False)
    )
  )
)

In [2]: model._operators[0]
Out[2]: 
GEMMGBDTImplTraining(
  (dropout): Dropout(p=0.1, inplace=False)
)

In [3]: model._operators[0].weight_1
Out[3]: 
Parameter containing:
tensor([[-0.0189,  0.0061,  0.0325,  ..., -0.0050, -0.0257,  0.0047],
        [-0.0335, -0.0151, -0.0013,  ..., -0.0271, -0.0177, -0.0278],
        [-0.0243, -0.0187,  0.0304,  ..., -0.0040,  0.0078, -0.0155],
        ...,
        [-0.0247, -0.0001, -0.

Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▅▅▆▇▇█▇█████▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_f1,▁████████████▇██████████████████████████
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.93152
best_dev_f1,0.801
best_test_auroc,0.73505
best_test_f1,0.556
best_train_auroc,0.8955
best_train_f1,0.769
dev_auroc,0.89493
dev_f1,0.812
dev_loss,0.46493
epoch,200.0


In [None]:
dataset_name = "pubmed"

sweep_config = {
    'method': 'bayes',
    'name': 'vanilla_neumiss',  
    'metric': {
        'goal': 'maximize', 
        'name': 'dev_auroc'
    },
    'parameters': {
        # Training
        'lr': {'max': 1e-2, 'min': 1e-6},
        'weighted_loss': {'values': [True, False]},
        'weight_decay': {'values': [1e-1, 1e-2, 1e-3, 0]},
        'dev_opt_metric': {'values': ['auroc', 'f1']},
        "convert_nan": {'value': False},
        "nan_value": {'value': -1},
        # Model
        "hb_model": {'value': False},
#         "hb_temp": {'values': [1., 1e-1, 1e-2, 1e-4, 1e-6, 1e-8]},
#         "hb_activation": {'value': 'tanh'},
        "neumiss_deq": {'value': False},
        "neumiss_depth": {'values': [5, 10, 20]},
        "vanilla_hidden_dim": {'values': [128, 256, 512, 1024, 2048]},
        "vanilla_n_hidden_layers": {'values': [1, 2, 3]},
        "vanilla_dropout": {'values': [0., 0.1, 0.2, 0.4]},
        "vanilla_batchnorm": {'values': [True, False]},
        "reinit_model": {'values': [False, True]}
    },
    'early_terminate': {
        'type': 'hyperband',
        'min_iter': 40
    }
}       

# Init sweep
sweep_id = wandb.sweep(sweep=sweep_config, project="missing-values", entity="dhdhagar")
# Start sweep job
wandb.agent(sweep_id, function=lambda: train(dataset_name=dataset_name))

Create sweep with ID: rsnwx2s5
Sweep URL: https://wandb.ai/dhdhagar/missing-values/sweeps/rsnwx2s5


[34m[1mwandb[0m: Agent Starting Run: hiirn3rs with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00032670294650931494
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▂▁▆▆▆▅▆▆▆▆▇▇▆▇▇▇▇███▇▇▇▇▇███████████████
dev_f1,▁▃▆▃▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇█▇█▇██▇▇▇▇▇█████████
dev_loss,█▄▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.77295
best_dev_f1,0.439
best_test_auroc,0.59735
best_test_f1,0.535
best_train_auroc,0.82661
best_train_f1,0.71
dev_auroc,0.7768
dev_f1,0.439
dev_loss,0.68713
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: xehu2gwp with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.008108765634013153
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▄▅▆▆▆▆▆▇▇▇█████████████████████████████
dev_f1,▄▁██▇██▇▇███████████████████████████████
dev_loss,▃█▂▂▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.90016
best_dev_f1,0.853
best_test_auroc,0.74897
best_test_f1,0.561
best_train_auroc,0.89255
best_train_f1,0.79
dev_auroc,0.91555
dev_f1,0.839
dev_loss,1.38832
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: eyr2cngr with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0017674205120856326
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: fgfalw23 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005030329289178962
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.143004…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▂▂▁▂▂▃▆▇▇▇▇▇▇███████████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▅▁▂▂▃▃█▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.29489
best_dev_f1,0.401
best_test_auroc,0.51241
best_test_f1,0.489
best_train_auroc,0.31641
best_train_f1,0.465
dev_auroc,0.89806
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: g1p5q4ht with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.008483758100161476
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
dev_auroc,▁█▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▃▆▃▂▁▆▇▇▇███████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,█▆▇▅▄▅▄▃▂▃▃▃▃▃▃▂▂▂▃▂▃▁▃▂▂▁▃▂▂▂▂▂▂▂▂▂▂▂▁▂
train_loss_epoch,█▅▄▄▄▃▃▂▂▂▂▂▃▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.50045
dev_f1,0.401
dev_loss,1.9783
epoch,150.0
test_auroc,0.46073
test_f1,0.077
train_auroc,0.47873
train_f1,0.164
train_loss_batch,1.00899
train_loss_epoch,1.00722


[34m[1mwandb[0m: Agent Starting Run: 5v8y08hl with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.008887111386605038
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.69585
best_dev_f1,0.696
best_test_auroc,0.50332
best_test_f1,0.502
best_train_auroc,0.5739
best_train_f1,0.517
dev_auroc,0.48937
dev_f1,0.421
dev_loss,1.97245
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: jfvr7z37 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.007769450655278945
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: fubgrwzn with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006900233528897464
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▄█▇▇▆▅▅▆▅▅▅▄▅▄▅▄▅▅▄▅▅▅▅▅▄▅▅▅▅▅▄▄▅▅▄▅▅▅▅
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.56876
best_dev_f1,0.401
best_test_auroc,0.56634
best_test_f1,0.489
best_train_auroc,0.54871
best_train_f1,0.465
dev_auroc,0.72529
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: lyg8rnve with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005705638679899629
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_epoch,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,145.0
test_auroc,0.48679
test_f1,0.051
train_auroc,0.47069
train_f1,0.128
train_loss_batch,0.69315
train_loss_epoch,0.69315


[34m[1mwandb[0m: Agent Starting Run: 7w5sgb6b with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002176829538042058
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▃▃▄▃▃▃▃▄▃▃▂▃▁▃▃▁▃▄▂▅▁▃▃▇▃▃▃▃▃▅▄▄▃▄▄▁▃▃▃
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.54612
best_dev_f1,0.401
best_test_auroc,0.5254
best_test_f1,0.489
best_train_auroc,0.48221
best_train_f1,0.465
dev_auroc,0.50322
dev_f1,0.401
dev_loss,0.69318
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: hzg59v5j with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.008186694384489778
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
dev_auroc,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁███████████████████████████████████████
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁█▁▁█▁▁▁▁▁▁▁▁▁▁█▁▁▁
train_loss_epoch,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,114.0
test_auroc,0.46508
test_f1,0.489
train_auroc,0.56898
train_f1,0.543
train_loss_batch,0.69315
train_loss_epoch,0.69315


[34m[1mwandb[0m: Agent Starting Run: l968fc63 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00410858184656242
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁███████████████████████████████████████
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: g6ykydy4 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0011874953574069883
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.024 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.246434…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.248
best_test_auroc,0.5
best_test_f1,0.041
best_train_auroc,0.5
best_train_f1,0.116
dev_auroc,0.5
dev_f1,0.248
dev_loss,0.98406
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: vyb3iwq3 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0056782531505204055
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.142947…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▂▁▁▂▂▂▂▁▂▃▄▄▄▅▅▆▆▆▆▇▇▇██████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▂▂▂▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.61462
best_dev_f1,0.401
best_test_auroc,0.5458
best_test_f1,0.489
best_train_auroc,0.48559
best_train_f1,0.465
dev_auroc,0.81693
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: yxg6ixf9 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004100633688322859
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▃▂▁▁▁▁▁▁▁▆▆▇▇▇▇█████████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▂▁▁▁▁▂▁▂█▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.46163
best_dev_f1,0.401
best_test_auroc,0.43412
best_test_f1,0.489
best_train_auroc,0.40481
best_train_f1,0.465
dev_auroc,0.81718
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: f09ifcoh with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.007230611858815508
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: qavfocbn with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005344340091624927
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.024 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.246556…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5tue2n8i with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006255345413633211
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▄▆▅▄▄▄▅▃▁▄▅▃▄▅▃▄▅▅▅▃▇▄▄▅▅▆▄▅▄▅▄▄▇▅▃▄▄▇▄
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.59717
best_dev_f1,0.401
best_test_auroc,0.50588
best_test_f1,0.489
best_train_auroc,0.53607
best_train_f1,0.465
dev_auroc,0.49475
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7s7ifote with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00400211008508067
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▄▃▃▃▃▂▂▂▂▂▂▂▁▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.40328
best_dev_f1,0.401
best_test_auroc,0.42042
best_test_f1,0.489
best_train_auroc,0.39664
best_train_f1,0.465
dev_auroc,0.31587
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ky17ejhb with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005968093867530095
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: rux0q2sx with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004435460606973292
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▂▂▂▂▃▆▄▅▄▅▅▆▇▇▇▇███████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▇▁▁▁█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.57978
best_dev_f1,0.401
best_test_auroc,0.54301
best_test_f1,0.489
best_train_auroc,0.38454
best_train_f1,0.465
dev_auroc,0.85237
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: pmllynty with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003607132564990536
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁█▇▆▇▇▆▇▇▇▇▇▇▇▇█████████████████████████
dev_f1,█▂▂▂▂▂▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
dev_loss,█▂▄▂▂▂▂▁▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.53503
best_dev_f1,0.469
best_test_auroc,0.46374
best_test_f1,0.481
best_train_auroc,0.65587
best_train_f1,0.589
dev_auroc,0.83049
dev_f1,0.43
dev_loss,0.69063
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 03g7nutf with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.008622272105889403
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
dev_f1,█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁█▂▂▂▂▂▂▂▂▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5065
best_dev_f1,0.419
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.49943
best_train_f1,0.468
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: kkd82a0t with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002339922702909049
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▅▁▁▅▅▄▂▄▂▂▂▃▂▂▃▄▅▂▅▆▄▄▅▂▅▅▅▄▃▃▃▄▁▂▄▄▂▃▄█
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.55237
best_dev_f1,0.401
best_test_auroc,0.46281
best_test_f1,0.489
best_train_auroc,0.50542
best_train_f1,0.465
dev_auroc,0.58938
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 23tyhsgt with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006943877557223951
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁███████████████████████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.49888
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.49927
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: w9dhyxsl with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003588689896970329
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.142626…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▆▆▆▇▇▇▇█▇▇▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.45384
best_dev_f1,0.401
best_test_auroc,0.53267
best_test_f1,0.489
best_train_auroc,0.5037
best_train_f1,0.465
dev_auroc,0.50015
dev_f1,0.401
dev_loss,0.69318
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: fc5j2xem with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004673376787425661
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,████▁███████████████████████████████████
dev_f1,▁▁▁▁████████████████████████████████████
dev_loss,████▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.49873
best_dev_f1,0.401
best_test_auroc,0.49966
best_test_f1,0.494
best_train_auroc,0.49715
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: cx0ocfow with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0019290961049938469
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▃▁▁▁▆███████████████████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▂▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.44441
best_dev_f1,0.401
best_test_auroc,0.52831
best_test_f1,0.489
best_train_auroc,0.65156
best_train_f1,0.465
dev_auroc,0.88979
dev_f1,0.401
dev_loss,0.69331
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: e1qiqzyd with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003354178991864787
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.142533…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▃▁▅▆▇▅▅▅▆▇▆▇▆▇▇▆▇▇▇█▇███████████████████
dev_f1,▁▁▁▂▂▂▂▂▂▂▂▆▆█████▇▅████████████████████
dev_loss,███████▇▆▄█▃▂▂▂▃▁▁▂▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.88049
best_dev_f1,0.82
best_test_auroc,0.58594
best_test_f1,0.565
best_train_auroc,0.8406
best_train_f1,0.736
dev_auroc,0.90263
dev_f1,0.818
dev_loss,0.6215
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 7c99ye5b with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006276708391101279
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▅▆▇█▇███▇▇▆▆▆▇▇▆▆▇▆▆▅▆▆▆▆▅▆▅▅▅▅▅▅▅▅▅▅▅▅
dev_f1,▁▁▂▁▂▂▂▂▂▂▂▂▂▂▅▇▇█▇▇████████████████████
dev_loss,█▇▆▆▆▆▆▆▆▆▆▆▅▄▃▃▂▂▂▂▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.79611
best_dev_f1,0.556
best_test_auroc,0.69619
best_test_f1,0.595
best_train_auroc,0.80955
best_train_f1,0.741
dev_auroc,0.7961
dev_f1,0.554
dev_loss,0.67376
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: um29tvn0 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0035723140667725654
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▄▆▄▇▇▇▆██▇████████████████████████████
dev_f1,▁▁▂▂▂▂▂▁▂▂▂▂▂▂▂▂▂▂▂▂▅▅▇▅▄▅▄██▇█▇▇███████
dev_loss,█▇▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▅▄▄▄▄▃▄▂▂▂▃▂▂▂▂▂▁▂▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.92169
best_dev_f1,0.553
best_test_auroc,0.81203
best_test_f1,0.634
best_train_auroc,0.87011
best_train_f1,0.707
dev_auroc,0.92157
dev_f1,0.553
dev_loss,0.67545
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: zbfraq33 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005106962522435059
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▃▆▇▇█▆▅▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▅▅▅▅▄▄▅▅▄▅▅
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.52706
best_dev_f1,0.402
best_test_auroc,0.47118
best_test_f1,0.489
best_train_auroc,0.60288
best_train_f1,0.465
dev_auroc,0.57847
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: goj447jt with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0023650315266413873
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.024 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.246678…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: o9jkf8ex with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0013939483509437332
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: n7x1ue6j with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0020179096753011744
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.49995
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: zgca21sy with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0013596269926788817
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.142443…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▂▁▅▆▇▇▇▇██████▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_f1,▂▁▁▁▂▂▇▇▇▇▇▇▇▇▇▇████████████████████████
dev_loss,█▅▅▅▄▃▂▂▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.8015
best_dev_f1,0.549
best_test_auroc,0.61973
best_test_f1,0.586
best_train_auroc,0.83125
best_train_f1,0.708
dev_auroc,0.81167
dev_f1,0.549
dev_loss,0.67538
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: ph57fdbe with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0042702933694507925
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▆▃▃▁▅▅▆▆▆▇▇▇▇▇▇█████████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▇▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.686
best_dev_f1,0.401
best_test_auroc,0.55371
best_test_f1,0.489
best_train_auroc,0.39269
best_train_f1,0.465
dev_auroc,0.77395
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 4cjd3tbk with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.008793440209508578
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▆▁▇▅▇▇▇█▅▇▆▅▇▇▆▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▅█▇▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.65317
best_dev_f1,0.401
best_test_auroc,0.5778
best_test_f1,0.489
best_train_auroc,0.65938
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: vp0jg8t2 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003156728851083499
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▄█▆█▅▄▅▅▄▄▅▄▆▅▅▁▂▄▂▁▁▁▁▂▁▁▁▁▂▁▁▁▁▁▁▁▁▁▂▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.75905
best_dev_f1,0.401
best_test_auroc,0.62539
best_test_f1,0.489
best_train_auroc,0.63348
best_train_f1,0.465
dev_auroc,0.61015
dev_f1,0.401
dev_loss,0.69321
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 4fo2s67a with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006645574286838982
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.040 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.144064…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▅█▇▇▇▇▆▅▄▂▂▂▁▁▂▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.73459
best_dev_f1,0.401
best_test_auroc,0.59459
best_test_f1,0.489
best_train_auroc,0.73421
best_train_f1,0.465
dev_auroc,0.59102
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: xq0jdtcn with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0010031056698106512
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: tkmdpu6b with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00201854236080092
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: q82of77o with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0038186235967688754
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
dev_auroc,▁███████████████████████████████████████
dev_f1,▁███████████████████████████████████████
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,▁▁▁▁▁█▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁█▁▁▁▁▁▁▁▁
train_loss_epoch,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,122.0
test_auroc,0.49699
test_f1,0.489
train_auroc,0.48335
train_f1,0.472
train_loss_batch,0.69315
train_loss_epoch,0.69315


[34m[1mwandb[0m: Agent Starting Run: 9crn0yr0 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006090621128800535
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
dev_auroc,▁▂▃▂▄▂▂▂█▂▂▂▂▂▂▃▂▃▂▂▂▂▃▂▂▂▃▂▂▂▂▂▂▄▃▂▄▃▂▂
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_epoch,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.50007
dev_f1,0.401
dev_loss,0.69315
epoch,173.0
test_auroc,0.49409
test_f1,0.424
train_auroc,0.50777
train_f1,0.455
train_loss_batch,0.69315
train_loss_epoch,0.69315


[34m[1mwandb[0m: Agent Starting Run: 9zo89z76 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.007925790917790635
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.024 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.245977…

[34m[1mwandb[0m: [32m[41mERROR[0m Problem finishing run
Traceback (most recent call last):
  File "/work/dagarwal_umass_edu/.conda/envs/s2and/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 1958, in _atexit_cleanup
    self._on_finish()
  File "/work/dagarwal_umass_edu/.conda/envs/s2and/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 2294, in _on_finish
    self._backend.cleanup()
  File "/work/dagarwal_umass_edu/.conda/envs/s2and/lib/python3.7/site-packages/wandb/sdk/backend/backend.py", line 252, in cleanup
    self.interface.join()
  File "/work/dagarwal_umass_edu/.conda/envs/s2and/lib/python3.7/site-packages/wandb/sdk/interface/interface_shared.py", line 576, in join
    self._router.join()
  File "/work/dagarwal_umass_edu/.conda/envs/s2and/lib/python3.7/site-packages/wandb/sdk/interface/router.py", line 100, in join
    self._thread.join()
  File "/work/dagarwal_umass_edu/.conda/envs/s2and/lib/python3.7/threading.py", line 1032, in join
    self._wait_for

Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
dev_auroc,█▇▁▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁███████████████████████████████████████
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_epoch,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.38134
dev_f1,0.401
dev_loss,0.69315
epoch,71.0
test_auroc,0.5
test_f1,0.041
train_auroc,0.44357
train_f1,0.116
train_loss_batch,0.69315
train_loss_epoch,0.69315


[34m[1mwandb[0m: Agent Starting Run: a8i9izjr with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0066150395164147275
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: czv7h1zw with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006580410657932952
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.332226…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: fcrb4cf4 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005836297156860907
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: jmeagmmx with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00369936481329266
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▃▃▃▃▁▂▄▄▃▄▅▇█▆▆▇▇▆▆▆▇█▇▇█▇██████▇▇██▇██
dev_f1,▁▂▂▂▂▂▂▂▂▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▆▇██████████████
dev_loss,█████████████▇▇▇▇▇▇▇▇▆▇▅▄▃▂▂▁▂▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.86337
best_dev_f1,0.819
best_test_auroc,0.57676
best_test_f1,0.578
best_train_auroc,0.81615
best_train_f1,0.745
dev_auroc,0.8677
dev_f1,0.819
dev_loss,0.62242
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: n5p8cqmw with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005903471774457767
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 92a5kmwl with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002767278818041254
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▄▄▄▅▄▄▇▇███████████████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.52905
best_dev_f1,0.401
best_test_auroc,0.46568
best_test_f1,0.489
best_train_auroc,0.38071
best_train_f1,0.465
dev_auroc,0.76226
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: qmqdo7g2 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0027537120995469087
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▆▂▂▂▁▁▁▁▁▁▁▁▁▁▇▇▇▇██████████████████████
dev_f1,█▃▃▃▃▃▃▃▃▃▃▃▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▆▆▆▆▆▆▆▆▆▆▆▆▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.49475
best_dev_f1,0.463
best_test_auroc,0.50085
best_test_f1,0.491
best_train_auroc,0.60009
best_train_f1,0.581
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: dsjzoqlz with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004342080325165095
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▆▇▇▆▇▇▇▇▇▁▇▇▇▇▇▇▇▇▇▇▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.52509
best_dev_f1,0.401
best_test_auroc,0.46106
best_test_f1,0.489
best_train_auroc,0.5395
best_train_f1,0.465
dev_auroc,0.50522
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: lv9alkul with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.008299732560658775
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▄█▇▆▇▅▇▇▃▆▇▅▆▅▆▆▆▆▆▆▆▇▆▇▇▇▇▇█████▇█████
dev_f1,▁▂▂▂▃▂▇▂▆▂▆▆▆▆▆███▇█▇███████████████████
dev_loss,█▇▇▇▇▆▆▃▃▅▄▃▂▂▂▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.70883
best_dev_f1,0.572
best_test_auroc,0.57341
best_test_f1,0.554
best_train_auroc,0.69382
best_train_f1,0.705
dev_auroc,0.84669
dev_f1,0.568
dev_loss,0.67145
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 8hypek0w with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006973190696492078
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
dev_auroc,▄▁▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▅▆▇▇▆▆▇▆▆▆█▆▆▆▆▆▆▆
dev_f1,▁███████████████████████████████████████
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_epoch,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.50611
dev_f1,0.401
dev_loss,0.69315
epoch,64.0
test_auroc,0.44533
test_f1,0.136
train_auroc,0.48957
train_f1,0.218
train_loss_batch,0.69317
train_loss_epoch,0.69317


[34m[1mwandb[0m: Agent Starting Run: fsamdtuo with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005593497445487166
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.248
best_test_auroc,0.5
best_test_f1,0.041
best_train_auroc,0.5
best_train_f1,0.116
dev_auroc,0.5
dev_f1,0.248
dev_loss,1.56488
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 5ei7fkax with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.001788931838302104
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▆▄▄▄▅▄▄▅▄▄▄▄▄▄▅▄▄▇▄▄▄█▄▄▄▄▅▄▄▄▃▅▄▅▄▄▄▁▅▃
dev_f1,▁███████████████████████████████████████
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.52821
best_dev_f1,0.401
best_test_auroc,0.51552
best_test_f1,0.489
best_train_auroc,0.47799
best_train_f1,0.465
dev_auroc,0.47797
dev_f1,0.401
dev_loss,0.69316
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 43e3b0o7 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005525795660433489
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁███████████████████████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.49977
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.49985
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: g1agz83q with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005739961790525125
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁███████████████████████████████████████
dev_f1,▁███████████████████████████████████████
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: xhyj9kib with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0017407759833361744
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.248
best_test_auroc,0.5
best_test_f1,0.041
best_train_auroc,0.5
best_train_f1,0.116
dev_auroc,0.5
dev_f1,0.248
dev_loss,0.98406
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: mu7too6f with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003327492731714474
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.024 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.246475…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: ok2qwsoa with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0019188602723713745
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▅▆▆▇▇████▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▆▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.55261
best_dev_f1,0.401
best_test_auroc,0.50047
best_test_f1,0.489
best_train_auroc,0.69397
best_train_f1,0.465
dev_auroc,0.84427
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 8wohs75w with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002772675093349934
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▅▁▅▅▆▇██████▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,██▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.4531
best_dev_f1,0.414
best_test_auroc,0.49294
best_test_f1,0.33
best_train_auroc,0.49519
best_train_f1,0.451
dev_auroc,0.48132
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: ltjgfj42 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0002750900510254642
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▂▁▅▇▆▇▆▆▆▇▇▇▇▇██████████████████████████
dev_f1,▁███████████████████████████████████████
dev_loss,█▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.53337
best_dev_f1,0.401
best_test_auroc,0.58163
best_test_f1,0.489
best_train_auroc,0.65307
best_train_f1,0.465
dev_auroc,0.89395
dev_f1,0.401
dev_loss,0.69351
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: uel5zxdc with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00753940932047166
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁█▅▅▅▅▃▄▃▄▃▄▄▄▄▃▃▄▄▄▅▄▅▂▄▄▄▃▂▄▃▂▃▃▃▃▃▄▂▄
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.52847
best_dev_f1,0.401
best_test_auroc,0.4625
best_test_f1,0.489
best_train_auroc,0.68428
best_train_f1,0.465
dev_auroc,0.62013
dev_f1,0.401
dev_loss,0.69316
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: upty8g5g with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002479626476665729
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: f4wq9my5 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004847603728727104
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▅▇▇▇▄▇▇▇▇▇▇▇▇▆▇▇▇▇▆▇▇▁▇▇▇▇▇█▇▇▇▇▇▇▇▆▇▇▇▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.4863
best_dev_f1,0.401
best_test_auroc,0.5342
best_test_f1,0.489
best_train_auroc,0.49533
best_train_f1,0.465
dev_auroc,0.50271
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: d8ccx2rk with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002521962561544365
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁███████████████████████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.49608
best_dev_f1,0.401
best_test_auroc,0.49215
best_test_f1,0.489
best_train_auroc,0.49891
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: i1iyumu9 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.009194687193846918
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▃▃▂▃▃▃▂▂▂▁▂▁▁▁▁▁▂▂▁▁▂▂▁▂▂▁▂▁▁▁▁▂▁▂▁▂▂▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.54155
best_dev_f1,0.401
best_test_auroc,0.48058
best_test_f1,0.489
best_train_auroc,0.45639
best_train_f1,0.465
dev_auroc,0.35422
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: hs4z4ag1 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0022268544333010785
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.142336…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁█▆▇▆▇▅▆▅▅▆▆▆▄▆▇▇▆▇▆▇█▇▅▇▆▇▇██▅▇▇█▇▆▆▅▅▆
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.39099
best_dev_f1,0.401
best_test_auroc,0.50847
best_test_f1,0.489
best_train_auroc,0.40728
best_train_f1,0.465
dev_auroc,0.54268
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: gkchlaie with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0019568003812168886
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: lhn6tudm with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004166684992089916
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: edzos45f with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004093941142457813
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁█▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
dev_f1,▄█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.59338
best_dev_f1,0.592
best_test_auroc,0.48615
best_test_f1,0.461
best_train_auroc,0.52547
best_train_f1,0.516
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: izd9uvrp with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0026628432211273285
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▂▇▅▆▅▄▄▅▄▃▅▃▅█▆▄▄▄▅▄▁▄▃▅▆▆█▅▅▅▁▃▅▅▅▄▄▅▅▆
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▅▂▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.45172
best_dev_f1,0.401
best_test_auroc,0.45533
best_test_f1,0.489
best_train_auroc,0.59749
best_train_f1,0.465
dev_auroc,0.53577
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: io8t4rbn with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004500441304959099
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.024 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.246414…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: d1oo2wez with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003587403260725056
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▄▁▇▆▇████▇██▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.61835
best_dev_f1,0.401
best_test_auroc,0.46528
best_test_f1,0.489
best_train_auroc,0.64174
best_train_f1,0.465
dev_auroc,0.73901
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 0kyraen8 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0034048756052439204
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▅▇▇▇█▇█▇███▇▇▇▇▇█▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▆▇▇▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.47985
best_dev_f1,0.401
best_test_auroc,0.46114
best_test_f1,0.489
best_train_auroc,0.59034
best_train_f1,0.465
dev_auroc,0.78496
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 1lfxyd6c with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0030678568794848403
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▄▅▅▄▅▇▆▂▅▁▅▄▂▆▄▄▅▅▃▇▆▇▃▆▄▁▆▄▂▅▄▄▇▅▃▆▅▄█
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.43724
best_dev_f1,0.401
best_test_auroc,0.52909
best_test_f1,0.489
best_train_auroc,0.49621
best_train_f1,0.465
dev_auroc,0.5905
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 0gxtzvnq with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0012470050257347555
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
dev_auroc,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁███████████████████████████████████████
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,▁▁▁▁▁▁▁█▁▁▁▁▁▁█▁▁▁▁▁▁█▁▁▁▁▁▁█▁▁▁▁▁▁█▁▁▁▁
train_loss_epoch,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,90.0
test_auroc,0.50186
test_f1,0.041
train_auroc,0.42019
train_f1,0.133
train_loss_batch,0.69315
train_loss_epoch,0.69315


[34m[1mwandb[0m: Agent Starting Run: njblakhv with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005850342201943476
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▃█▇▆▆▆▆▆▇▇███████████████████████
dev_f1,▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▇▇▇▇▇▇▇▇▇▇█▇█████████████
dev_loss,████████▇▇▇▇▇▆▄▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.89453
best_dev_f1,0.557
best_test_auroc,0.77738
best_test_f1,0.638
best_train_auroc,0.86284
best_train_f1,0.685
dev_auroc,0.89704
dev_f1,0.557
dev_loss,0.67417
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: zx1d7n2d with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0018327667036277448
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▇██▇▇▇▇█▆▆▇▇▇▆▇▇▆▆▆▇▆▇▇▅▇▆▇▆▇▇▆▆▇▆▆█▆▆▆
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.50832
best_dev_f1,0.401
best_test_auroc,0.45003
best_test_f1,0.489
best_train_auroc,0.49937
best_train_f1,0.465
dev_auroc,0.62642
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: k20n3tpx with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0014488100263955483
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.024 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.246384…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: itslzpcd with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00348299405633178
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: nzc2uv1c with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002919654931035728
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▃▂▁▃▂▂▃▄█▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇████████
dev_f1,▁▁▁▁▁▁▁▁▂▂▂▂▁▂▂▂▂▂▂▂▂▂▂▃▆▄▆▆▆▆▆▇▆▇▇▇▇███
dev_loss,█████████████████▇▇▇▇▆▅▅▄▄▄▄▃▃▂▂▃▂▂▂▂▂▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.89015
best_dev_f1,0.818
best_test_auroc,0.61276
best_test_f1,0.568
best_train_auroc,0.85473
best_train_f1,0.716
dev_auroc,0.89532
dev_f1,0.818
dev_loss,0.63113
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: fb07cht5 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0046919133781590525
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▂▁▁▁▁▁▁▁▁▁▅█████████████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁██████████████████████████████
dev_loss,▇█████████▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.49896
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.49974
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: q0uo3zq5 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.000981237774508674
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▃▁▂▅▇▇▇██▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_f1,▂▁▄▇▇▇▇▆▇▇▇█████████████▇███████████████
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.81062
best_dev_f1,0.435
best_test_auroc,0.56519
best_test_f1,0.499
best_train_auroc,0.82068
best_train_f1,0.694
dev_auroc,0.80784
dev_f1,0.434
dev_loss,0.68993
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: b14j9qdg with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0011251270959716266
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▅███████████████▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.53923
best_dev_f1,0.401
best_test_auroc,0.58026
best_test_f1,0.489
best_train_auroc,0.72031
best_train_f1,0.465
dev_auroc,0.86508
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: hg34to37 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004058240479134095
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: y2cmelga with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0027479966507722303
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▄▆▅▂▄▅▄▆▇▅▅▂▆▄▅▂▃▄▄▃▄▄█▅▄▅▃▄▂▄▄▄▄▄▃▄▆▂
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.43997
best_dev_f1,0.401
best_test_auroc,0.44872
best_test_f1,0.489
best_train_auroc,0.49047
best_train_f1,0.465
dev_auroc,0.46254
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: g5vcdr2l with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0008553256957523292
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: y25a3p72 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002976475398834128
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁███████████████████████████████████████
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.49472
best_dev_f1,0.4
best_test_auroc,0.49948
best_test_f1,0.489
best_train_auroc,0.49974
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.248
dev_loss,0.98406
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: g8f6bm1t with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0009477187105572976
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▂▃▁▂▄▃▄▄▂█▅▄▆▂▂▇▆▃▂▄▃▆▅▃▅▅▂▄▆▃▄▂▅▂▃▇█▄▃▆
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.55031
best_dev_f1,0.401
best_test_auroc,0.5122
best_test_f1,0.489
best_train_auroc,0.68328
best_train_f1,0.465
dev_auroc,0.72229
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: d3uj6dr2 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004436860800326629
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▅▇▇███▄▄▆▇▇▇███████████████████████████
dev_f1,▁▄▆▆▇▆█▅▄▁▁▁▁▁▄▄▄▄▄▄▅▄▅▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
dev_loss,█▅▄▄▃▃▄▄▅▅▅▅▅▅▄▄▃▃▂▂▁▁▂▄▅▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.91085
best_dev_f1,0.442
best_test_auroc,0.79774
best_test_f1,0.592
best_train_auroc,0.7945
best_train_f1,0.661
dev_auroc,0.90587
dev_f1,0.431
dev_loss,0.69076
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: vbjs54bw with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0005604507189990857
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.56162
best_dev_f1,0.478
best_test_auroc,0.53179
best_test_f1,0.282
best_train_auroc,0.59501
best_train_f1,0.438
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: demo2ma8 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005550839984947287
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.59566
best_dev_f1,0.597
best_test_auroc,0.49086
best_test_f1,0.414
best_train_auroc,0.51043
best_train_f1,0.489
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: hcq8h079 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00037593419666072993
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▂▁▄▃▅▅▆▅▇▆▆█▅▆▆▆▇▇▆▇▇▆▇▇▇▆▆▇▇▆▇▇▆▇▆▇▆▇▆▆
dev_f1,▁███████████████████████████████████████
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.41588
best_dev_f1,0.401
best_test_auroc,0.52676
best_test_f1,0.489
best_train_auroc,0.58312
best_train_f1,0.465
dev_auroc,0.72866
dev_f1,0.401
dev_loss,0.69323
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: swnltwai with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004609389892052366
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▆▇▇████████████████████████████████████
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▄▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.41906
best_dev_f1,0.414
best_test_auroc,0.48017
best_test_f1,0.387
best_train_auroc,0.52102
best_train_f1,0.486
dev_auroc,0.59269
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 3300asdk with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0025092192394676116
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▂▃▃▂▃▄▃▃▄▃▄▂▄▃▃▂▂▃▄▄▁▄▃▃▂▂▃▂▂▂▄▂▅▄▃▂▄▂█▄
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.49615
best_dev_f1,0.401
best_test_auroc,0.56623
best_test_f1,0.489
best_train_auroc,0.52602
best_train_f1,0.465
dev_auroc,0.5187
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 33g00qqn with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.008647169230582015
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▁▂▂▃▃▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.57816
best_dev_f1,0.404
best_test_auroc,0.47912
best_test_f1,0.489
best_train_auroc,0.53619
best_train_f1,0.502
dev_auroc,0.5089
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 3xsfewsb with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0011816287353864025
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.332062…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▅▁▁▅▄▃▅▂▃▄▅▄▄▃▆▄▄▅▆▅▄▅▄▄▆▃▃▄▆▅█▆▃▄▅▄▂█▄
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.46634
best_dev_f1,0.401
best_test_auroc,0.541
best_test_f1,0.489
best_train_auroc,0.44988
best_train_f1,0.465
dev_auroc,0.537
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: kqf0v83y with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0036071309752112274
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁███████████████████████████████████████
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.4981
best_dev_f1,0.404
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.50969
best_train_f1,0.489
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 9cv9vp7o with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0019389460766537083
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.143199…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▂▂▁▁▁▃▇▇▇███████████▇██████████████▇████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.56439
best_dev_f1,0.401
best_test_auroc,0.51101
best_test_f1,0.489
best_train_auroc,0.5626
best_train_f1,0.465
dev_auroc,0.88151
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: cbkpk5yq with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003496243774864913
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.332081…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁█▇▇▇▇▇▇▇▇▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁█▇▇▇▇▇▇▇▇▇▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.49407
best_dev_f1,0.411
best_test_auroc,0.45824
best_test_f1,0.489
best_train_auroc,0.52706
best_train_f1,0.465
dev_auroc,0.48823
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: stp8ge37 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00319355921584219
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 7peu8z1u with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006197918806160447
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.44152
best_dev_f1,0.401
best_test_auroc,0.4756
best_test_f1,0.489
best_train_auroc,0.4957
best_train_f1,0.465
dev_auroc,0.42129
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 8o0v583f with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0007748499893532082
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▅██████████████████████████████████████
dev_f1,▄▁▆▆▆▆▆▇▇▇▇▇▇▇▇▇█▇██████████████████████
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.78883
best_dev_f1,0.442
best_test_auroc,0.58057
best_test_f1,0.544
best_train_auroc,0.83115
best_train_f1,0.709
dev_auroc,0.79108
dev_f1,0.441
dev_loss,0.68858
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: tmumqu0k with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.009904996595085086
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▆▇▇▇▇▇▇▇▇▇█████████████████████████████
dev_f1,▁▅▇▇██▇█████████████████████████████████
dev_loss,█▃▄▃▂▂▁▃▃▃▂▃▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.89684
best_dev_f1,0.84
best_test_auroc,0.73613
best_test_f1,0.532
best_train_auroc,0.89093
best_train_f1,0.789
dev_auroc,0.90225
dev_f1,0.84
dev_loss,1.42231
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 4aseldmy with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0019880509321618498
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.142503…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▄▁▂▂▆▇▇▇▇███████████████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.48517
best_dev_f1,0.401
best_test_auroc,0.55473
best_test_f1,0.489
best_train_auroc,0.63918
best_train_f1,0.465
dev_auroc,0.84975
dev_f1,0.401
dev_loss,0.69316
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: y9y8x19r with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.008290348259551264
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▅▅▅▆▆▆▇▇▇▇▆▇▇▇█████████████████████████
dev_f1,▁▁▂█▇████▇█▇▇███████████████████████████
dev_loss,▃█▃▂▂▂▂▂▃▁▂▁▂▁▁▁▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.89981
best_dev_f1,0.846
best_test_auroc,0.74478
best_test_f1,0.584
best_train_auroc,0.90345
best_train_f1,0.798
dev_auroc,0.91272
dev_f1,0.838
dev_loss,1.39635
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 0049iuju with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00138876261033307
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▃▁▁▁▁▁▁▁▁▂▂▂▃▄▆█████████████████████████
dev_f1,▂▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▇▇▇▇▇▇███▇▇▇████████
dev_loss,█▇▇▇▇▇▇▇▇▇▇▇▇▆▆▆▅▄▃▃▂▂▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.87058
best_dev_f1,0.546
best_test_auroc,0.5904
best_test_f1,0.548
best_train_auroc,0.82396
best_train_f1,0.677
dev_auroc,0.86974
dev_f1,0.546
dev_loss,0.67518
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: s9ronxg0 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00300962440962986
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▅▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.49866
best_dev_f1,0.418
best_test_auroc,0.47421
best_test_f1,0.456
best_train_auroc,0.51695
best_train_f1,0.492
dev_auroc,0.43994
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 142ph7f4 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006767329617019713
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▃█▇▆█▇▆▇▆▇▇▅█▄▇▃▄▆▅▄▅▆▃▅▅▆▄▅▆▅▅▅▃▃▅▁▅▅▄▆
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▆▁▅▅▇▇▇█████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.74742
best_dev_f1,0.401
best_test_auroc,0.57876
best_test_f1,0.489
best_train_auroc,0.72292
best_train_f1,0.465
dev_auroc,0.79172
dev_f1,0.401
dev_loss,1.9783
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: ykh6puwh with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0027780411479586434
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▂▁▂▁▃▃▃▂▄▄▅▅▆▆▇▆▆▇█▇▇▇█▇█▇▇█▇██▇▇▇▇▇██▇▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.52595
best_dev_f1,0.401
best_test_auroc,0.45473
best_test_f1,0.489
best_train_auroc,0.52026
best_train_f1,0.465
dev_auroc,0.74284
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 8b6461k7 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0024762236279211257
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▂▇▆██▇█▇█▇█▇▇▆▆▆▆█▆▆▇▅▆▅▇▆▆▇▅▆▆▅▆▇▇▆▆▆▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.53771
best_dev_f1,0.401
best_test_auroc,0.51449
best_test_f1,0.489
best_train_auroc,0.54867
best_train_f1,0.465
dev_auroc,0.77708
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: lwh3waj8 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0001823165279355396
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


VBox(children=(Label(value='0.006 MB of 0.017 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.334907…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▄▄▅▆▁▁▁▁▁▂▃▄▅▆▇████▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_f1,█▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▃▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃
dev_loss,█▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.47954
best_dev_f1,0.463
best_test_auroc,0.53504
best_test_f1,0.336
best_train_auroc,0.55258
best_train_f1,0.438
dev_auroc,0.58382
dev_f1,0.429
dev_loss,0.69087
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: i7mp9j5c with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006645551171382844
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▄▄▅▄▃▄▄▂▄▂▄▅▃▂▄▃▄▅▄▃▂█▄▄▄▅▅▄▃▃▃▅▁▄▃▄▂▃▃▅
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.52825
best_dev_f1,0.401
best_test_auroc,0.4625
best_test_f1,0.489
best_train_auroc,0.56537
best_train_f1,0.465
dev_auroc,0.55699
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: oksjqe2d with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.001469751933803908
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
dev_auroc,▅▅▆▅▆▄▅▆▅▅▆▄▆▆▅▅▇▅▅▅█▅▅▅▃▅▅▅▅▅▆▇▇▅▁▅▇▄▄▄
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,█▇▇▆▄▄▄▄▃▄▄▄▄▃▄▄▄▄▂▂▂▂▂▁▂▂▂▂▁▂▂▂▂▁▂▂▂▂▁▂
train_loss_epoch,█▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.49019
dev_f1,0.401
dev_loss,0.69315
epoch,128.0
test_auroc,0.52626
test_f1,0.457
train_auroc,0.56358
train_f1,0.472
train_loss_batch,0.70025
train_loss_epoch,0.69998


[34m[1mwandb[0m: Agent Starting Run: 7fza69yq with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002573470869240479
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▅██████████████████████████████████████
dev_f1,▇█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▅▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.48765
best_dev_f1,0.409
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.55159
best_train_f1,0.562
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: dy3z4lkc with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0001367606425007385
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▂▃▂▃▄█▃▄▃▄▆▂▂▄▁▄▃▁▅▃▄▅▇▄▆▂▂▄▂▅▄▅▆▂▂▄▁▂▇▃
dev_f1,▁███████████████████████████████████████
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.49538
best_dev_f1,0.401
best_test_auroc,0.5361
best_test_f1,0.489
best_train_auroc,0.52637
best_train_f1,0.465
dev_auroc,0.50315
dev_f1,0.401
dev_loss,0.69323
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 1pwamrcq with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.009321018723278838
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.50135
best_dev_f1,0.401
best_test_auroc,0.50897
best_test_f1,0.489
best_train_auroc,0.50464
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69316
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: nszl2s13 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0014861797547494338
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.332044…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▄▅▅▅▅▅▅▄▄▅▅▅▅▆▅▆▆▆▆▇▇▆▆▇▆▇▇▇▇██▇███████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▆▁█████▆█████████████████
dev_loss,██▇▇▇▇▇▇▇▇▇▇▇▇▆▄▃▂▂▂▂▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.90523
best_dev_f1,0.822
best_test_auroc,0.6175
best_test_f1,0.575
best_train_auroc,0.85152
best_train_f1,0.748
dev_auroc,0.90955
dev_f1,0.821
dev_loss,0.62116
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 21ex4y48 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.009412818563331754
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▅▆▆▆▆▆▆▇▇▇▇▇███████████████████████████
dev_f1,▁▅▇███▇▇██▇████▇████████▇██▇█████▇▇▇█▇▇▇
dev_loss,▃█▂▄▂▃▁▁▁▂▁▁▁▂▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.90956
best_dev_f1,0.87
best_test_auroc,0.75367
best_test_f1,0.605
best_train_auroc,0.90233
best_train_f1,0.796
dev_auroc,0.90964
dev_f1,0.794
dev_loss,1.43391
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: kk6l1fhd with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003894790482343881
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▃▂▃▅▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇██████████
dev_f1,▁▁▁▁▁▁▂▂▁▁▂▃▃▃▆▆▃▄▆▃▄█▃██▇██████████████
dev_loss,████████▇▇▆▆▆▅▆▄▄▃▄▄▄▃▂▂▂▂▁▂▁▁▂▂▁▁▂▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.83058
best_dev_f1,0.819
best_test_auroc,0.58786
best_test_f1,0.571
best_train_auroc,0.82208
best_train_f1,0.742
dev_auroc,0.87263
dev_f1,0.818
dev_loss,0.62664
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: tluat2wd with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0026106228363952837
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 6xu4c06w with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004518499679999527
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▃███▇▇███▇█████████████████████████████
dev_f1,▁▃▅▆▆▆▆▆▆▆█▅▇▇▆▇▇▇█████▇████████████████
dev_loss,█▅▃▂▃▄▄▃▃▂▃▃▃▃▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.83817
best_dev_f1,0.442
best_test_auroc,0.71163
best_test_f1,0.605
best_train_auroc,0.75639
best_train_f1,0.677
dev_auroc,0.89346
dev_f1,0.44
dev_loss,0.68967
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 6qgvno8q with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0074029767827113934
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▄▅▄▇▆▇▅▇▇▇▇▇▇▇▇▇▇▇▅▇▇▇▇▇▇██████████████
dev_f1,▁▁▂▁▁▂▂▂▁▁▂▂▇██████▂████████████████████
dev_loss,█████████▇▇▇▂▂▂▂▂▂▁█▃▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.91474
best_dev_f1,0.822
best_test_auroc,0.62175
best_test_f1,0.578
best_train_auroc,0.86688
best_train_f1,0.755
dev_auroc,0.91474
dev_f1,0.822
dev_loss,0.62018
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: ck53z7om with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006044003865738384
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 17a1cno3 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.008210638744320119
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.248
best_test_auroc,0.5
best_test_f1,0.041
best_train_auroc,0.5
best_train_f1,0.116
dev_auroc,0.5
dev_f1,0.248
dev_loss,0.98406
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: s0eoeazt with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0016641629552945597
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▆▅▇▇▄█▇▆▇█▇▃▆▅▅██▇█▇█▇▆▇▇▄▂▆▅▇▅▆▆▆▅▆▆▅▆
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5535
best_dev_f1,0.401
best_test_auroc,0.53132
best_test_f1,0.489
best_train_auroc,0.66506
best_train_f1,0.465
dev_auroc,0.68042
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: ensalyrm with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003548347514556741
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.024 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.246565…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: nufotrr6 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003894274873422514
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.142356…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▅▄▅▅▄▅▆▆▄▅▃▅▄▄▆▄▄▅▅▄▅▄▅▅▅▅▅▄▄▆▄▅█▃▄▅▄▅▅
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.37493
best_dev_f1,0.401
best_test_auroc,0.54892
best_test_f1,0.489
best_train_auroc,0.44916
best_train_f1,0.465
dev_auroc,0.5563
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: kn4qufw4 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004043979092612401
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.50574
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: xjr88a7v with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0008017336107369498
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.014 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.411943…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▂▇▂█▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
dev_f1,▅▂▇▁█▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
dev_loss,▃▃▁▁▆███████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5732
best_dev_f1,0.556
best_test_auroc,0.47743
best_test_f1,0.417
best_train_auroc,0.47907
best_train_f1,0.428
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: w9h9k4p0 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004280602609217362
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▃▅▅▅▅▅▅▅▅▅▆▅▆▆▆▆▆▇▆▇▇▇▇▇███████████████
dev_f1,▁▁▂▂▂▂▂▂▂▂▂▂▃███████████████████████████
dev_loss,██████████▇▆▂▂▂▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.86017
best_dev_f1,0.821
best_test_auroc,0.56137
best_test_f1,0.565
best_train_auroc,0.82028
best_train_f1,0.741
dev_auroc,0.86778
dev_f1,0.819
dev_loss,0.62151
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: ejhi838q with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0005607462515711547
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▄▄▅▄▇██▆▆▄▇▇▄▅▅▅▂▇▄▄▇▂▄▇▂▅▄▄▄▄▄▅▃▃▅▄▁▇▅▃
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▇██████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.49563
best_dev_f1,0.401
best_test_auroc,0.51052
best_test_f1,0.489
best_train_auroc,0.5165
best_train_f1,0.465
dev_auroc,0.41327
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: uqq4jd9n with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002068945637432258
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 24gkhffg with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006508412486877451
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
dev_auroc,▁███████████████████████████████████████
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁
train_loss_epoch,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,73.0
test_auroc,0.50462
test_f1,0.43
train_auroc,0.52799
train_f1,0.513
train_loss_batch,0.69315
train_loss_epoch,0.69315


[34m[1mwandb[0m: Agent Starting Run: 7swmkfcd with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0050900290787030495
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▄▄▅▅▆▆▅█▆▇▆▅▃▆▅▁▆▄█▆▅▄▆▇▆▇▇▂▇▄▆▄█▇█▄▅▆▂▆
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.49533
best_dev_f1,0.401
best_test_auroc,0.46243
best_test_f1,0.489
best_train_auroc,0.54851
best_train_f1,0.465
dev_auroc,0.5409
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 81rjs3h7 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0022332204953414707
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▃▅▄▄▆▆▃▅▄▆▆▆▅▃▇▆█▆▇▆▅▆▇▇▃▄▅▆▄▂▃▄▆▆▅▅▄▅▆
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▄█▁▂▃▅▅▅▆▇▇▇████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.58008
best_dev_f1,0.401
best_test_auroc,0.54909
best_test_f1,0.489
best_train_auroc,0.68051
best_train_f1,0.465
dev_auroc,0.73843
dev_f1,0.401
dev_loss,1.96064
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 4102l9or with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00038380828750764353
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


0,1
dev_auroc,█▄▂▂▂▁▂▄▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆
dev_f1,█▄▄▃▃▁▁▂▃▇▇▇███▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,█▃▂▂▂▂▁▂▁▁▂▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▂
train_loss_epoch,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.50017
dev_f1,0.423
dev_loss,0.69351
epoch,87.0
test_auroc,0.50616
test_f1,0.325
train_auroc,0.53923
train_f1,0.401
train_loss_batch,0.70862
train_loss_epoch,0.70677


[34m[1mwandb[0m: Agent Starting Run: btmgsuw5 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005717010692656717
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▇▇▆▆▆▆▇▆▇▇▇█▇██▇▇▇▇████████████████████
dev_f1,▂▁▂▂▂▂▂▄▆▆▆▆▆▆▆▆▇▆▆▇▆▇▇█▇▇██████████████
dev_loss,█▇▇▇▇▇▇▃▂▃▂▂▃▁▃▁▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.87495
best_dev_f1,0.561
best_test_auroc,0.7006
best_test_f1,0.627
best_train_auroc,0.81574
best_train_f1,0.701
dev_auroc,0.88887
dev_f1,0.558
dev_loss,0.67433
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: kuiwmcsn with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.008352107309448003
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
dev_auroc,▁███████████████████████████████████████
dev_f1,▁███████████████████████████████████████
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,███▇▆▇▇▆▄▅▅▃▂▃▃▂▂▂▂▂▁▂▂▂▁▂▂▂▁▂▂▂▁▂▂▂▁▂▂▂
train_loss_epoch,█▇▇▇▇▇▇▅▅▅▃▃▂▂▂▂▂▂▂▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,79.0
test_auroc,0.4991
test_f1,0.053
train_auroc,0.49596
train_f1,0.122
train_loss_batch,0.99803
train_loss_epoch,1.00104


[34m[1mwandb[0m: Agent Starting Run: oj0p6iio with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00195754606047396
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,█▃▂▂▄▂▂▁▃▂▂▂▂▄▂▂▁▃▂▂▂▂▄▂▂▁▃▂▂▂▂▄▂▂▁▃▂▂▂▃
train_loss_epoch,█▅▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,144.0
test_auroc,0.50245
test_f1,0.053
train_auroc,0.50246
train_f1,0.123
train_loss_batch,0.71237
train_loss_epoch,0.7125


[34m[1mwandb[0m: Agent Starting Run: wq4vptj9 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.001784815621999185
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▂▅▅▇▇▇▇▇▇▇▇▇▇▇▇████████████████████████
dev_f1,▂▁▁▁▁▁▁▆▄▆▇▇█▆▆█▇██▇████████████████████
dev_loss,█▇▇▇▇▇▇▄▃▄▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.80684
best_dev_f1,0.57
best_test_auroc,0.58615
best_test_f1,0.539
best_train_auroc,0.82061
best_train_f1,0.68
dev_auroc,0.84007
dev_f1,0.568
dev_loss,0.67177
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: e5dccpyv with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002231711852887379
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
dev_auroc,▅▁██▆▅▅▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆
dev_f1,▁███████████████████████████████████████
dev_loss,█▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,█▅▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_epoch,█▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.53199
dev_f1,0.401
dev_loss,0.69315
epoch,116.0
test_auroc,0.5
test_f1,0.041
train_auroc,0.49995
train_f1,0.116
train_loss_batch,0.69315
train_loss_epoch,0.69315


[34m[1mwandb[0m: Agent Starting Run: exy4n5dz with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.008463700475639185
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.332208…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: qmdx016p with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006310450400985134
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▇▇▁▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.50425
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.50198
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: fepy6yqe with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0012646968839937487
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▇█▆▆▂▆▆▅▅▅▅▃▆▅▅▅▆▅▆▅▆▆▅▆▅▇▁▆▅▆▆▅▆▆▃▅▅▆▅▆
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▅██████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.63092
best_dev_f1,0.401
best_test_auroc,0.52747
best_test_f1,0.489
best_train_auroc,0.62307
best_train_f1,0.465
dev_auroc,0.5005
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: fybyfeze with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00499940772803072
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▇▃█▃▃▃▅▂▃▂▂▃▃▃▂▃▃▃▄▃▃▃▃▃▃▃▃▄▃▃▃▄▃▃▄▃▃▄▄
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.63428
best_dev_f1,0.401
best_test_auroc,0.56305
best_test_f1,0.489
best_train_auroc,0.59498
best_train_f1,0.465
dev_auroc,0.51433
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 0ti1qlrj with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0053600037498535424
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: op9eaq70 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0034452393808475784
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: vdn6uvnr with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.008909630355981954
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▅▂▂▂▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▂
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.69484
best_dev_f1,0.401
best_test_auroc,0.59041
best_test_f1,0.489
best_train_auroc,0.63682
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: c4qf8jtc with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.001929618037677914
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.51912
best_dev_f1,0.401
best_test_auroc,0.50064
best_test_f1,0.489
best_train_auroc,0.49908
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: nx33hkxw with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0027798310913525505
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▁▁▂▂▃▃▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.63951
best_dev_f1,0.401
best_test_auroc,0.52718
best_test_f1,0.489
best_train_auroc,0.50183
best_train_f1,0.465
dev_auroc,0.44688
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: i4mjsx7q with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006791553974013885
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▃▅▅▅▅▅▆▇▇▇▆▆▇▇▆▆▇▇▇█▇▇▇▇█▇██▇██████████
dev_f1,▁▁▁▁▂▂▂▂▂▂▂▃▆▂██████████████████████████
dev_loss,█████████▇▆▇▄▆▇▁▂▁▂▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.90509
best_dev_f1,0.821
best_test_auroc,0.56922
best_test_f1,0.578
best_train_auroc,0.77715
best_train_f1,0.746
dev_auroc,0.9041
dev_f1,0.821
dev_loss,0.62216
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 9zhnvcxh with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0027818088550255354
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.332225…

0,1
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁█▁
train_loss_epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,167.0
test_auroc,0.53092
test_f1,0.124
train_auroc,0.52141
train_f1,0.247
train_loss_batch,0.69315
train_loss_epoch,0.69315


[34m[1mwandb[0m: Agent Starting Run: nmbuf2e9 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002436811097589993
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▇██████████████████████████████████████
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.55556
best_dev_f1,0.417
best_test_auroc,0.53598
best_test_f1,0.468
best_train_auroc,0.57629
best_train_f1,0.511
dev_auroc,0.65169
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: x585yjbb with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0064830583286029616
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁████▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.49291
best_dev_f1,0.417
best_test_auroc,0.47529
best_test_f1,0.489
best_train_auroc,0.55037
best_train_f1,0.532
dev_auroc,0.56626
dev_f1,0.401
dev_loss,0.69316
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: oq369mdq with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0061576498191465625
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.143243…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69316
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: zfki6s6t with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0053001660827805995
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 0uv70hyy with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.009319175749222511
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▁▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.52137
best_dev_f1,0.482
best_test_auroc,0.52299
best_test_f1,0.525
best_train_auroc,0.59205
best_train_f1,0.602
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: v3f7yqxn with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004619612316158403
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: y1iqcgv8 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0006373922019213972
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▆▅█████████████████████████████████████
dev_f1,▄█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: ikqjjqn5 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003147297484405251
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: e9at7cuw with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.007524998115254786
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.332335…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▇█▇▆▇▆▇▃▄▃▂▃▂▃▆▄▂▃▅▆▁▃▂▂▂▃▃▂▃▃▃▇▃▂▇▃▃▄▂
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.7685
best_dev_f1,0.401
best_test_auroc,0.5649
best_test_f1,0.489
best_train_auroc,0.73101
best_train_f1,0.465
dev_auroc,0.47275
dev_f1,0.401
dev_loss,1.97829
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: quje7oax with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0038447860057955192
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 15f9tmsm with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006855260196055812
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,▅▆▅▃▅▇▇█▄▅▇▃▅▆▂▄▇▇▄▅▆▃▆▁▄▆▄▃▄▆▄▆▅▆▅▅▅▅▅▃
train_loss_epoch,▂▃▄▇▄▅▅█▃▆▄▄▄▄▅▆▅▇▃▅▃▅▄▃▃▄▆▁▂▄▅▂▄▄▁▃▅▅▄▂

0,1
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,179.0
test_auroc,0.51638
test_f1,0.438
train_auroc,0.50634
train_f1,0.479
train_loss_batch,0.69317
train_loss_epoch,0.69315


[34m[1mwandb[0m: Agent Starting Run: j7at0whd with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003728039084163876
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▃▄▄██▇██▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▆▆▆▇▆▆▆▆▇▆▆▇▆▆▆▆
dev_f1,▂▁▁▁▂▂▂▂▂▂▂▂▃▂▃▃▃▅▅▆▇▇▇█████▇███████████
dev_loss,█▆▆▆▆▆▆▆▆▆▅▅▅▅▅▅▄▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.75878
best_dev_f1,0.57
best_test_auroc,0.57889
best_test_f1,0.556
best_train_auroc,0.79775
best_train_f1,0.743
dev_auroc,0.76045
dev_f1,0.566
dev_loss,0.67244
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: vxmk08so with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0020822837495152054
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▆▆▃▇▇▇▇▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,███▅████▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▅▃▅▄▃▃▃▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.61004
best_dev_f1,0.608
best_test_auroc,0.54167
best_test_f1,0.518
best_train_auroc,0.62467
best_train_f1,0.572
dev_auroc,0.48925
dev_f1,0.401
dev_loss,0.70403
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 6k5f0wmv with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005964489422114637
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.49856
best_dev_f1,0.401
best_test_auroc,0.51469
best_test_f1,0.489
best_train_auroc,0.53739
best_train_f1,0.465
dev_auroc,0.45986
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 4bvjddbh with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.001933253493424484
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: syex92uw with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006016407599522913
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.55586
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: weqxf9dl with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00231042100756078
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▄▆█▆▆▆▇▇▆▆▆▆▆▄▆▆▅▆▁▆▄▄▅▅▅▇▅▆▇▂▃▃▄▃▃▄▄▆▂▃
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.63065
best_dev_f1,0.401
best_test_auroc,0.59875
best_test_f1,0.489
best_train_auroc,0.70885
best_train_f1,0.465
dev_auroc,0.59024
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: gg7qwomk with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004404939117750313
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.142307…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▂▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.66064
best_dev_f1,0.402
best_test_auroc,0.4952
best_test_f1,0.489
best_train_auroc,0.57073
best_train_f1,0.465
dev_auroc,0.62684
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: nqn59h90 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.001514312966807259
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▄▄▄▄▄▃▄▄▄▄▄▄▄▄▄▄▄▄▆▄▄▄▄▄▄▄▄▄▁▄▄▄▄▄▄▄▄▄█▄
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.50008
best_dev_f1,0.401
best_test_auroc,0.49555
best_test_f1,0.489
best_train_auroc,0.499
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: apgp3s16 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00496499801968573
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▆▄▄▂▆▄▃▃▅▆▇▅▆█▇▇▃▆▄▅▄▆▆▁▆█▆▅▆▇▇▇▇▆▆▇▇▆▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.42716
best_dev_f1,0.401
best_test_auroc,0.44795
best_test_f1,0.489
best_train_auroc,0.51079
best_train_f1,0.465
dev_auroc,0.52192
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: vhu8bacj with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006852044688849202
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
dev_auroc,▁███████████████████████████████████████
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_epoch,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,172.0
test_auroc,0.58375
test_f1,0.498
train_auroc,0.60454
train_f1,0.545
train_loss_batch,0.69315
train_loss_epoch,0.69315


[34m[1mwandb[0m: Agent Starting Run: 2dt50air with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003791259764655524
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


VBox(children=(Label(value='0.006 MB of 0.023 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.248938…

0,1
dev_auroc,▁███████████████████████████████████████
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,█▁▁▂▁▁▁▂▁▁▁▂▁▁▁▁▁▁▂▁▁▁▂▁▁▁▂▁▁▁▁▁▁▂▁▁▁▂▁▁
train_loss_epoch,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.49844
dev_f1,0.401
dev_loss,0.69454
epoch,171.0
test_auroc,0.45759
test_f1,0.125
train_auroc,0.47146
train_f1,0.26
train_loss_batch,0.69626
train_loss_epoch,0.69655


[34m[1mwandb[0m: Agent Starting Run: 3f9fyuiy with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005929459145285526
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: i8p58t5o with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002149448731694041
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▄▃▄▅▅▆▆▅▆▆▆▆▅▆▆▆▇▆▇▆▆▇▆█▇▇▇▇██▇█▇█████
dev_f1,▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▆▂▂▆▆███████████████
dev_loss,█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▆▆▆▅▆▃▂▂▁▁▁▁▄▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.86251
best_dev_f1,0.821
best_test_auroc,0.5702
best_test_f1,0.58
best_train_auroc,0.732
best_train_f1,0.742
dev_auroc,0.86157
dev_f1,0.82
dev_loss,0.62185
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: it4ajkmr with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0025353261932871394
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 2k4k1mn5 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0027610421581221773
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▆▇▇▇▇▇▇▇▇▇█████████████████████████████
dev_f1,▁█▇███▆▇████████████████████████████████
dev_loss,█▆█▃▂▁▁▁▁▂▂▁▂▁▂▂▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▂▂▂▂▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.88015
best_dev_f1,0.829
best_test_auroc,0.75775
best_test_f1,0.548
best_train_auroc,0.89588
best_train_f1,0.792
dev_auroc,0.87996
dev_f1,0.812
dev_loss,1.40455
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 5rjc1dxl with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0009437749990586592
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.143549…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▆▇▇████████████████████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.87006
best_dev_f1,0.401
best_test_auroc,0.64331
best_test_f1,0.489
best_train_auroc,0.81273
best_train_f1,0.465
dev_auroc,0.86801
dev_f1,0.401
dev_loss,1.9783
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: ewrlamkm with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0053116015991850184
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.248
best_test_auroc,0.5
best_test_f1,0.041
best_train_auroc,0.5
best_train_f1,0.116
dev_auroc,0.5
dev_f1,0.248
dev_loss,1.56488
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 3ss2z2nt with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004669603325459131
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,▆▇█▅▆▇█▅▆▇█▅▇▇█▁▇▇█▁▇██▁▇██▆▇█▅▆▇█▅▆▇█▅▇
train_loss_epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,165.0
test_auroc,0.5
test_f1,0.489
train_auroc,0.5
train_f1,0.465
train_loss_batch,1.19011
train_loss_epoch,1.20156


[34m[1mwandb[0m: Agent Starting Run: 5p6pqp2q with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0003554982515714886
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▅▅▅▅▅▅▆▆▇█▄▇▄▁▄██▄▄▄▄▄▄▄▅▆▄▅▄▅▅▅▄▇▅▄▇▇▅▅
dev_f1,▁███████████████████████████████████████
dev_loss,▁▂▅▆▇▇▇▇████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.7375
best_dev_f1,0.401
best_test_auroc,0.58053
best_test_f1,0.489
best_train_auroc,0.65268
best_train_f1,0.465
dev_auroc,0.50083
dev_f1,0.401
dev_loss,1.95652
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: hc7mizkq with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005990001789592788
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▅▁▁▁▁▁▁█▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.49663
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: iyr2vrh9 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004395370553994276
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▂▄▅▅▆▆▆▇▇▇▇▇▆▆▆▇▇▇█▇▇███▇██████████████
dev_f1,▁▁▂▂▂▁▁▂▁▄▆█████████████████████████████
dev_loss,██████▇▇▃▃▇▃▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.89909
best_dev_f1,0.826
best_test_auroc,0.60703
best_test_f1,0.575
best_train_auroc,0.8377
best_train_f1,0.748
dev_auroc,0.91076
dev_f1,0.82
dev_loss,0.62067
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 9ie811h5 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006513278286791923
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▄▁▃▁▄▅▇█▆▇▇█▇███████████▇▇███████████▇██
dev_f1,▁▁▁▁▂▂▂▂▂▃▂▁▃▅▆▆▅▅▅▆▆▇▆▇▆▆██████████████
dev_loss,██████████▅█▄▄▃▃▄▅▅▄▄▃▃▃▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.90387
best_dev_f1,0.821
best_test_auroc,0.71367
best_test_f1,0.572
best_train_auroc,0.84068
best_train_f1,0.771
dev_auroc,0.89688
dev_f1,0.819
dev_loss,0.62604
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: ztodgp8p with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004564094672597603
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▄█▃▃▃▃▃▃▆▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
dev_f1,▁█▆▆▆▆▆▆▇▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃
dev_loss,█▃▃▃▃▃▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.59352
best_dev_f1,0.595
best_test_auroc,0.50148
best_test_f1,0.308
best_train_auroc,0.52633
best_train_f1,0.429
dev_auroc,0.50214
dev_f1,0.419
dev_loss,0.69764
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: sv9tqqjp with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004743724901483197
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▄▇█▃▄▃▃▃▃▆▄▆▃█▄▃▄▄▆▄▃▆▄▄▄▄▃▄▄▄▁▄▄▃▄▄▃▄▄
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.44727
best_dev_f1,0.401
best_test_auroc,0.5381
best_test_f1,0.489
best_train_auroc,0.56535
best_train_f1,0.465
dev_auroc,0.57732
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 9montbhy with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003801567594889832
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▂▃▄▃▆▆▆▇█▇▇▇▅▆▆▆▆▆▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.64291
best_dev_f1,0.401
best_test_auroc,0.57686
best_test_f1,0.489
best_train_auroc,0.51178
best_train_f1,0.465
dev_auroc,0.77871
dev_f1,0.401
dev_loss,0.69318
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 04gli9qj with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004659957515575722
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▆▇▇▇▇▇██▇▇█▆▇██▇█████▇██▇██████████████
dev_f1,▁▁▁▁▁▁▂▂▂▂▂▁▁▃▆▆█▆▆▆█▆▇▆█▇██████████████
dev_loss,████████▇█▇▇█▃▂▂▃▃▂▂▂▂▃▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.85334
best_dev_f1,0.819
best_test_auroc,0.59835
best_test_f1,0.565
best_train_auroc,0.8321
best_train_f1,0.744
dev_auroc,0.88807
dev_f1,0.818
dev_loss,0.62243
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: n5tr9ii7 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006644892168964413
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: tf1jogvk with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002084355474087958
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 2veeslri with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 7.654871200351376e-05
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5167
best_dev_f1,0.498
best_test_auroc,0.51029
best_test_f1,0.468
best_train_auroc,0.50813
best_train_f1,0.503
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: gwg5b89t with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002474515752847087
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.4998
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 9vmd412i with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0021614710786567125
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
dev_f1,▁█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.47232
best_dev_f1,0.472
best_test_auroc,0.49237
best_test_f1,0.491
best_train_auroc,0.62179
best_train_f1,0.582
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 53qdyvtb with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0046078632344387395
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 6nyy9ahf with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0031991146602830607
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▅▇▄▇▃▄▂▄▇▄▆█▆▆█▃▇▅▄▅▇▄▆▇▇▅▃▅▇▅▁▆▅▆▇▄▇▇▇▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.50702
best_dev_f1,0.401
best_test_auroc,0.48998
best_test_f1,0.489
best_train_auroc,0.452
best_train_f1,0.465
dev_auroc,0.52719
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 3rtea3ew with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004565767543643143
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.248
best_test_auroc,0.5
best_test_f1,0.041
best_train_auroc,0.5
best_train_f1,0.116
dev_auroc,0.5
dev_f1,0.248
dev_loss,1.56488
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: uvsb6s5w with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0055213420562818544
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▄▆▆▇██▇█▇▆▆▆▆▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.49296
best_dev_f1,0.401
best_test_auroc,0.52073
best_test_f1,0.489
best_train_auroc,0.432
best_train_f1,0.465
dev_auroc,0.71728
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: aa352tjo with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002755738049988334
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▃▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_f1,█▁▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.55786
best_dev_f1,0.474
best_test_auroc,0.55356
best_test_f1,0.486
best_train_auroc,0.59874
best_train_f1,0.533
dev_auroc,0.81188
dev_f1,0.436
dev_loss,0.69043
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ogpbv50w with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004139847132521092
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.332098…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▂▂▂▄▅▆▇▇▇▇▇███▇██████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▃▆▆▅▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▇▇█▇
dev_loss,█████████▇▇▇▇▇▇▅▅▃▄▃▃▃▃▂▃▂▂▂▂▂▂▁▂▂▂▂▁▂▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.8859
best_dev_f1,0.809
best_test_auroc,0.62908
best_test_f1,0.575
best_train_auroc,0.86691
best_train_f1,0.691
dev_auroc,0.88176
dev_f1,0.774
dev_loss,0.63979
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: xaop3xzi with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005418255992606141
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▃▅▅▇▅▅█▅▅▆▅▅▅▅▅▅▅▅▅▅▅▅▅▅▆▅▅▅▅▅▆▆▆▆▆▆▆▆▆
dev_f1,▁▁▁▁▂▁▂▂▂▂▂▂▂▂▂▂▃▂▇█████████████████████
dev_loss,█████▇▇▇▇▇▇▇▇▇▇▆▅▃▂▁▂▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.86756
best_dev_f1,0.824
best_test_auroc,0.5695
best_test_f1,0.6
best_train_auroc,0.73279
best_train_f1,0.749
dev_auroc,0.8427
dev_f1,0.823
dev_loss,0.62164
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 4a1zepi2 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0024357945265268063
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▄▇█▇▆▇▆▆▄▂▁▄▃▃▂▆▅▁▁▂▆▂▄▅▅▂▂▄▅▄▂▃▅▃▃▁▁▂▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.58805
best_dev_f1,0.401
best_test_auroc,0.60181
best_test_f1,0.489
best_train_auroc,0.68978
best_train_f1,0.465
dev_auroc,0.43822
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: w0mseneu with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0026516053257329905
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: gg72wyqs with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005595080902438391
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▂█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.62981
best_dev_f1,0.637
best_test_auroc,0.50694
best_test_f1,0.508
best_train_auroc,0.56689
best_train_f1,0.577
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: cw6ubwzt with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.009653331385651998
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: kt2wmnax with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002900059903131154
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▆▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,█▅▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁██████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.62015
best_dev_f1,0.622
best_test_auroc,0.50345
best_test_f1,0.496
best_train_auroc,0.57106
best_train_f1,0.588
dev_auroc,0.57385
dev_f1,0.537
dev_loss,0.82594
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: xwsshei1 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0029684892622258733
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▄█▅▂▄▂▆▃▅▃▄▂▃▁▃▂▅▃▄▄▂▃▃▂▂▃▂▂▂▃▅▃▃▅▃▃▃▂▂▃
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.53279
best_dev_f1,0.401
best_test_auroc,0.4906
best_test_f1,0.489
best_train_auroc,0.54432
best_train_f1,0.465
dev_auroc,0.51497
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: c84ua430 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004219249168596273
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▇▁▇▅▄▄▅▇████▇▆▇▇▆▇▇▇▇▇██▇▇▇▇█▇▇▇▇▇▇▇▇▇▇▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.71319
best_dev_f1,0.401
best_test_auroc,0.58285
best_test_f1,0.489
best_train_auroc,0.61949
best_train_f1,0.465
dev_auroc,0.72146
dev_f1,0.401
dev_loss,0.69317
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 0ahlml59 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004949644357532503
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▃▁▁▁▁▆▆▆▆▇▇███████████████▇██▇▇▇▇▇▇▇▇▇▇▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.43216
best_dev_f1,0.401
best_test_auroc,0.50014
best_test_f1,0.489
best_train_auroc,0.31492
best_train_f1,0.465
dev_auroc,0.75409
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5qlrqgk6 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006695171166769337
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 7j3zzz4w with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0006352811530587951
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▃▁▂▁▃▅▆▆▆▆▆▆▇▇▇▇▇███████████▇████▇██████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.59346
best_dev_f1,0.401
best_test_auroc,0.55354
best_test_f1,0.489
best_train_auroc,0.57202
best_train_f1,0.465
dev_auroc,0.81145
dev_f1,0.401
dev_loss,0.69328
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: vrf8czce with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003803007942063403
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▂▅▆▇███████████████████████████████████
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.56094
best_dev_f1,0.423
best_test_auroc,0.49834
best_test_f1,0.488
best_train_auroc,0.50715
best_train_f1,0.478
dev_auroc,0.62686
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: barkstxw with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006304034042754778
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▁▄▄▄▄▃▃▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.65745
best_dev_f1,0.551
best_test_auroc,0.53806
best_test_f1,0.29
best_train_auroc,0.53254
best_train_f1,0.405
dev_auroc,0.58013
dev_f1,0.401
dev_loss,0.69316
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: kzbac2vz with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004038471901403575
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▂▆▆▆▆▇▇▇▇██████████████████████████████
dev_f1,▁▂██▇▇███▇████▇██████████▇▇▇████▇██▇▇▇██
dev_loss,█▅▅▅▅▄▅▃▄▄▄▅▂▁▁▂▂▁▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.90975
best_dev_f1,0.83
best_test_auroc,0.74913
best_test_f1,0.592
best_train_auroc,0.88047
best_train_f1,0.75
dev_auroc,0.89467
dev_f1,0.832
dev_loss,1.37456
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: fpkjldzv with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0008954215973130636
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: fql31rqx with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0023672305841050695
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▃▃▅▁█▃▅▅▅▆▇▆▆▄▅▅▆▅▅▅▆▄▃▃▄▇▅▅▃▄▄▄▁▅▅▄▁▃▁▃
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.47774
best_dev_f1,0.401
best_test_auroc,0.51089
best_test_f1,0.489
best_train_auroc,0.37891
best_train_f1,0.465
dev_auroc,0.45571
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: r9lpf4m5 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00017790350787510754
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▂▁▂▇▇▇▇▆▆▆▇█████████████████████████████
dev_f1,▁███████████████████████████████████████
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.45106
best_dev_f1,0.401
best_test_auroc,0.56937
best_test_f1,0.489
best_train_auroc,0.69643
best_train_f1,0.465
dev_auroc,0.86731
dev_f1,0.401
dev_loss,0.69322
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 08pick5m with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0046354818584066725
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8mkx05mj with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002561470860955345
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: elugmk00 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0005448425749338619
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▆▃▁▁▁▂▂▂▃▃▄▄▅▇▆▇▇▇██████████████████████
dev_f1,▃▂▁▁▂▃▃▃▃▃▃▃▅▆▆▇▇█▇█████████████████████
dev_loss,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.66698
best_dev_f1,0.428
best_test_auroc,0.5575
best_test_f1,0.494
best_train_auroc,0.79287
best_train_f1,0.672
dev_auroc,0.68858
dev_f1,0.428
dev_loss,0.69145
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: olbqtb6o with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0009013420040865474
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▂▁▁▁▂▂▇████▇█▇▇█████▇▇██████████▇▇██▇███
dev_f1,▁███████████████████████████████████████
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.38824
best_dev_f1,0.401
best_test_auroc,0.49087
best_test_f1,0.489
best_train_auroc,0.57304
best_train_f1,0.465
dev_auroc,0.84701
dev_f1,0.401
dev_loss,0.69318
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: htx6dcb4 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.001996048182958971
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: hpdjyq6p with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006463205357564059
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.62758
best_dev_f1,0.401
best_test_auroc,0.47929
best_test_f1,0.489
best_train_auroc,0.41018
best_train_f1,0.465
dev_auroc,0.50525
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: omwozba1 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.008876057777190466
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


0,1
dev_auroc,█▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,▃▂▁█▂▂▄▃▂▁█▂▁█▂▁▄▃▂▁█▂▁▄▂▁▄▃▂▁█▂▁▄▂▁▁▃▂▁
train_loss_epoch,▁▇▇▅▇▆▆▇▅▇▆█▆▅▆▅▅▆▆▆█▆▇▇█▇█▅█▆█▇▇▅▆▆▆▆▇▆

0,1
dev_auroc,0.50059
dev_f1,0.25
dev_loss,0.98331
epoch,190.0
test_auroc,0.49388
test_f1,0.324
train_auroc,0.43666
train_f1,0.402
train_loss_batch,1.17838
train_loss_epoch,1.18257


[34m[1mwandb[0m: Agent Starting Run: t6u3xh96 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.001884534914322948
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▇▇▇▁▇▇▇▇▇▇▇▄▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.50345
best_dev_f1,0.401
best_test_auroc,0.47536
best_test_f1,0.489
best_train_auroc,0.50103
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 4whqi87z with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.007076631396293739
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 9ynfcu7z with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005593091834492637
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▇██▆▆▅▅▄▅▄▃▃▁▂▂▂▂▂▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▆▇████████▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.83875
best_dev_f1,0.401
best_test_auroc,0.61507
best_test_f1,0.489
best_train_auroc,0.81395
best_train_f1,0.465
dev_auroc,0.66115
dev_f1,0.401
dev_loss,1.97259
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 6os9k693 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0024016826680333635
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▂▂▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5367
best_dev_f1,0.401
best_test_auroc,0.44955
best_test_f1,0.489
best_train_auroc,0.5409
best_train_f1,0.465
dev_auroc,0.79213
dev_f1,0.401
dev_loss,0.69326
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: x3bodbpz with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005706651100351132
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.143380…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▃▂▃▂▂▄▅▅▅▆▆▇███████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▂▂▇▅▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.58661
best_dev_f1,0.401
best_test_auroc,0.55353
best_test_f1,0.489
best_train_auroc,0.47499
best_train_f1,0.465
dev_auroc,0.76222
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 2jnbj2wb with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.001753070579922542
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.040 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.143921…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▃▄▃█▃▅▃▃▃▃▃▄▆▃▃▃▄▃▄▄▄▅▁▄▄▃▃▄▄▇▆▄▃▄▆▃▄▃▃▄
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.51656
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: au8ck9bz with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006527074541188287
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: b2i4bcq0 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004424092906609624
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▅▆▅▅▇█▂█▅▅▅▃▄▆▅▁▂▄▄▇▄▆▄▃▁▁▄▆▅▅▆▆▅▂▁▄▅▂▅▅
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.47508
best_dev_f1,0.401
best_test_auroc,0.45813
best_test_f1,0.489
best_train_auroc,0.54305
best_train_f1,0.465
dev_auroc,0.49524
dev_f1,0.401
dev_loss,0.69316
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: gjgfwmwi with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004858256625876213
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▅▅▅▆▅▆▅▄▅▇▁▅▇▅▅▆▅▅▅▇▆▅▆▅▇▅▅▅▃▆▅▄▅▄▅▅▅▅▅
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.54202
best_dev_f1,0.401
best_test_auroc,0.47537
best_test_f1,0.489
best_train_auroc,0.54235
best_train_f1,0.465
dev_auroc,0.5009
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: oxqedszv with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003755721702943268
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.50024
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: qeiqiliy with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.001371874203809769
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▄▆▇█▇█▇▅▅▅▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.86183
best_dev_f1,0.401
best_test_auroc,0.64145
best_test_f1,0.489
best_train_auroc,0.75359
best_train_f1,0.465
dev_auroc,0.79823
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: k8576v2h with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00887165940290142
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▆█▇▇▇██▅█▁▅▃▅▂▅▆▇▆▂▂▆▅▄▂▂▂▆▅▂▄▂▆▂▄▅▂▂▂▂▂
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.6795
best_dev_f1,0.401
best_test_auroc,0.59073
best_test_f1,0.489
best_train_auroc,0.6788
best_train_f1,0.465
dev_auroc,0.46906
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 7sjl3kib with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0027178628060372497
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.143058…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▆▆▇▇███████████████████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.34502
best_dev_f1,0.401
best_test_auroc,0.48228
best_test_f1,0.489
best_train_auroc,0.57326
best_train_f1,0.465
dev_auroc,0.91251
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: s1k1vzdv with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.008622581581683256
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.142105…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.61736
best_dev_f1,0.401
best_test_auroc,0.48134
best_test_f1,0.489
best_train_auroc,0.4001
best_train_f1,0.465
dev_auroc,0.54635
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 6ruakn0u with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0016797654837150807
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
dev_auroc,▅▇▁▂██▇▆▅▅▆▆▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,█▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_epoch,█▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.54179
dev_f1,0.401
dev_loss,0.69315
epoch,61.0
test_auroc,0.47109
test_f1,0.468
train_auroc,0.49064
train_f1,0.485
train_loss_batch,0.69316
train_loss_epoch,0.69315


[34m[1mwandb[0m: Agent Starting Run: jqtc57f3 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0016322977626327486
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▄▁▃▅▃▅▅▅▆▆▆▇▇▇▇▇████████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.75096
best_dev_f1,0.401
best_test_auroc,0.54834
best_test_f1,0.489
best_train_auroc,0.73943
best_train_f1,0.465
dev_auroc,0.81039
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: nbqex0r0 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005140174693840343
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▆▁▂▆▇▇▇▇▇███████████████████████████████
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.54168
best_dev_f1,0.414
best_test_auroc,0.49051
best_test_f1,0.507
best_train_auroc,0.50951
best_train_f1,0.524
dev_auroc,0.62237
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: e2cubqnu with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004278246173001199
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█████████████████████████████▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁███████████
dev_loss,▅▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁███████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.49623
best_dev_f1,0.251
best_test_auroc,0.48688
best_test_f1,0.117
best_train_auroc,0.49972
best_train_f1,0.136
dev_auroc,0.49623
dev_f1,0.251
dev_loss,0.98349
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 9r8bchgx with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.008120996898644824
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: True


VBox(children=(Label(value='0.030 MB of 0.040 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.750708…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▃▆▅▃▄▇██▇▇█████████████████████████████
dev_f1,▁▇█▇▇▇█▇▇███████████████████████████████
dev_loss,▄▃▂█▂▃▁▅▃▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.90857
best_dev_f1,0.863
best_test_auroc,0.73745
best_test_f1,0.596
best_train_auroc,0.8599
best_train_f1,0.794
dev_auroc,0.91668
dev_f1,0.862
dev_loss,1.43157
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: ytuo5edr with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00994469570998724
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▅▃▇▃▃▃▇▄▇▃▅▃▆▃▆▁█▄▃▃▃▃▅▇▄▄▇▃▄▄▂▆▃▆▃▇▇▃▄▃
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.72495
best_dev_f1,0.401
best_test_auroc,0.56834
best_test_f1,0.489
best_train_auroc,0.56009
best_train_f1,0.465
dev_auroc,0.45736
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: lraabgrh with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00013709194749794755
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.143340…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▅▅▅▅▅▅▅▅▅▆▅█▅▅▅▅▅▅▅▄▅▅▄▅▅▅▅▅▅▅█▄▅█▁▅▄▅▅▅
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▅▃▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.53755
best_dev_f1,0.401
best_test_auroc,0.46907
best_test_f1,0.489
best_train_auroc,0.51402
best_train_f1,0.465
dev_auroc,0.49839
dev_f1,0.401
dev_loss,0.71446
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: yk0bqxi8 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0005972438449337976
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.55622
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: m5y00aph with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004108843443685566
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▇███▇▇█▆▇▇█▇▇▇▇████████████████████████
dev_f1,▁▁▂▁▂▄▂▁▆▆▆▄▇▇▇▇▇█▇▇▇▇▇▇▇██▇█▇██████████
dev_loss,██▇▇▆▇▄▅▇▆▄▃▃▂▂▂▃▂▂▂▂▂▁▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.89596
best_dev_f1,0.571
best_test_auroc,0.6004
best_test_f1,0.572
best_train_auroc,0.84838
best_train_f1,0.69
dev_auroc,0.89803
dev_f1,0.563
dev_loss,0.67248
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 0whfrg1a with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.007311428673059729
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▇▇██████▆▃▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▆▆▁▇████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.77307
best_dev_f1,0.401
best_test_auroc,0.60151
best_test_f1,0.489
best_train_auroc,0.7462
best_train_f1,0.465
dev_auroc,0.40207
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: r6dku0nm with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0046717642953235765
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▅▅▅▆▆▆▅▆▆▆▆▇▇▇▇▇▇███████████████▇█▇████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.37037
best_dev_f1,0.401
best_test_auroc,0.52543
best_test_f1,0.489
best_train_auroc,0.50896
best_train_f1,0.465
dev_auroc,0.72953
dev_f1,0.401
dev_loss,0.69323
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 0k9u6xrw with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005934352252207137
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▃▃▃▃▁▃▃▃▃▃▃▃█▃▃▃▃▃▃▃▃▃▃▅▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.77333
best_dev_f1,0.401
best_test_auroc,0.5696
best_test_f1,0.489
best_train_auroc,0.6452
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: mcn5c7yw with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0064216174494701826
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▂▇▆██▆▇▄▅▅▇▆█▇▇▅██▇██▇▇████████████████
dev_f1,▁▁▂▂▁▂▂▂▂▂▂▂▂▂▂█▂▅███▅██████████████████
dev_loss,██▇▇▇▇▆▇▇▇▆▇▅▇▆▅▇▅▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.89564
best_dev_f1,0.539
best_test_auroc,0.65728
best_test_f1,0.612
best_train_auroc,0.82464
best_train_f1,0.697
dev_auroc,0.893
dev_f1,0.529
dev_loss,0.67634
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: eqwcho1j with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.008755832957709183
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▇█▄▂▁▃▁▂▂▃▂▃▂▂▂▂▂▂▂▂▂▂▂▂▃▃▂▂▃▂▂▃▂▂▂▂▂▂▃▂
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.70773
best_dev_f1,0.401
best_test_auroc,0.61676
best_test_f1,0.489
best_train_auroc,0.66073
best_train_f1,0.465
dev_auroc,0.52228
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: f0h8gey6 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0020820912107132336
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▅▆▇▇▇▇█████████████████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.40607
best_dev_f1,0.401
best_test_auroc,0.43771
best_test_f1,0.489
best_train_auroc,0.54655
best_train_f1,0.465
dev_auroc,0.85813
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 28h6itvc with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002357719209644232
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.55629
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: sva9bp4q with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00282054335861772
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.024 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.246474…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.248
best_test_auroc,0.5
best_test_f1,0.041
best_train_auroc,0.5
best_train_f1,0.116
dev_auroc,0.5
dev_f1,0.248
dev_loss,1.56488
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 1hm2q7rl with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004950423260632159
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,██▂▂▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,██▄▁▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▇█▇███████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.55908
best_dev_f1,0.559
best_test_auroc,0.49158
best_test_f1,0.396
best_train_auroc,0.51099
best_train_f1,0.468
dev_auroc,0.48912
dev_f1,0.396
dev_loss,1.98736
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: n3vurn1j with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003539686786139399
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
dev_auroc,▁███████████████████████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁█▁▁▁▁█▁▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁
train_loss_epoch,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,59.0
test_auroc,0.43795
test_f1,0.489
train_auroc,0.49689
train_f1,0.465
train_loss_batch,0.69315
train_loss_epoch,0.69315


[34m[1mwandb[0m: Agent Starting Run: iz6sdeew with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005841169944407098
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
dev_auroc,▁▄▅▅▅▄▄▆█▆▄▃▃▄▄▃▃▃▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_epoch,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.44617
dev_f1,0.401
dev_loss,0.69315
epoch,145.0
test_auroc,0.48521
test_f1,0.38
train_auroc,0.49871
train_f1,0.374
train_loss_batch,0.69315
train_loss_epoch,0.69315


[34m[1mwandb[0m: Agent Starting Run: ypvidlib with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0018931636791946012
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▃▆▇███▇▇▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.48381
best_dev_f1,0.401
best_test_auroc,0.53631
best_test_f1,0.489
best_train_auroc,0.58004
best_train_f1,0.465
dev_auroc,0.69866
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: ff9wngwu with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0020927261803963047
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▃▄▅▆▆▇█▇▇▆▄▆▇▇▇▇▇▇▇████████████████████
dev_f1,▁▁▁▁▂▂▂▂▂▂▆▂▄▇▇█▇█▇▇▇▇██████████████████
dev_loss,████▇▇▇▇▅▃▄▇▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.81045
best_dev_f1,0.569
best_test_auroc,0.59089
best_test_f1,0.559
best_train_auroc,0.83902
best_train_f1,0.701
dev_auroc,0.86101
dev_f1,0.568
dev_loss,0.67204
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: pn69400x with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0005018566492359089
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▆▃▇▆▅▄▃▂▄▅▄▄▆▆▅▆▅▅▆▆▇▇▇▇▆▇▇▇███████████
dev_f1,▁███████████████████████████████████████
dev_loss,█▅▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.63564
best_dev_f1,0.401
best_test_auroc,0.59998
best_test_f1,0.489
best_train_auroc,0.66901
best_train_f1,0.465
dev_auroc,0.70126
dev_f1,0.401
dev_loss,0.69319
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: x5fc4suw with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0034613260946438924
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.142570…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▆▆▅▆▆▇▇▇▇▇██▇██████████████████████████
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.43391
best_dev_f1,0.413
best_test_auroc,0.45581
best_test_f1,0.507
best_train_auroc,0.54331
best_train_f1,0.519
dev_auroc,0.47537
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 98244mpl with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.007207986115167315
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: kayhmbgb with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003573516759525602
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.55624
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: fpb3axp5 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.009524145230561786
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.037 MB of 0.040 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.916126…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▆▇█▆▆▅▃▂▅▁▂▆▂▄▇▇▆▃█▇▆▇█▆▅▅▆▆▆▇▇▆▅▇▅▆▅▅▆▅
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▇▁▅▆▅▅▆▇▇███████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.8282
best_dev_f1,0.401
best_test_auroc,0.56086
best_test_f1,0.489
best_train_auroc,0.77153
best_train_f1,0.465
dev_auroc,0.82259
dev_f1,0.401
dev_loss,1.9783
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: nkpko1ey with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0069451493421100186
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▃█▄▃▄▅▅▅▆▄▄▄▄▄▅▆▃▃▆▆▄▄▄▅▅▄▁▂▄▄▄▅▅▆▆▄▄▄▅▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.49037
best_dev_f1,0.401
best_test_auroc,0.52365
best_test_f1,0.489
best_train_auroc,0.48134
best_train_f1,0.465
dev_auroc,0.55463
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: tijivtx3 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003043568247011213
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: fpvnvets with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0007556891463911313
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▅▆▄▆▅▆▄▅▆▆▃▇▅▆▄▆█▅▆▅▅▅▆▃▅▅▆▄▆▇▆▇▆▅▄▁▆▅▄▅
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.50047
best_dev_f1,0.401
best_test_auroc,0.50732
best_test_f1,0.489
best_train_auroc,0.53256
best_train_f1,0.465
dev_auroc,0.48296
dev_f1,0.401
dev_loss,0.69326
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 1jtz9s8s with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00461710116272292
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▂▄▄▅▄▄▄▄▄▄█▄▄▄▅▄▄▅▄▄▄▄▄▅▅▄▄▄▅▄▄▄▄▄▄▄▄▄▅
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.45223
best_dev_f1,0.401
best_test_auroc,0.48421
best_test_f1,0.489
best_train_auroc,0.41296
best_train_f1,0.465
dev_auroc,0.50981
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: pk5xw5zp with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006186314618472318
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.332371…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▄▄▄▄▄▃▄▁▄▄▃▄▄▁▃▄▄▄▄▄▅▄▂▄▇▄▃▄▄▅▄▃▄▄▄▄▄▄▃
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.53207
best_dev_f1,0.401
best_test_auroc,0.44933
best_test_f1,0.489
best_train_auroc,0.51417
best_train_f1,0.465
dev_auroc,0.4956
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: ut61dmdm with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.000952162842730338
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▂▅▄▅▅▅▅▅▅▅▅▅▅▆▆▇▇▇▇▇▇▇▇▇▇██████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▄▆▆▆▆█▆██▇███▆█████████
dev_loss,█▆▆▆▆▆▆▆▆▆▆▆▆▆▅▅▄▄▃▃▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.88188
best_dev_f1,0.82
best_test_auroc,0.62604
best_test_f1,0.574
best_train_auroc,0.86317
best_train_f1,0.732
dev_auroc,0.89767
dev_f1,0.82
dev_loss,0.6228
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 2fc3go10 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0011434939825053084
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▂▁▄▄▄▄▄▅▅▆▅▆▆▇▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇█▇████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▄▆▆▆▆███████████████████
dev_loss,█▇▆▆▆▆▆▆▆▆▆▆▆▆▆▅▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.90609
best_dev_f1,0.822
best_test_auroc,0.62717
best_test_f1,0.578
best_train_auroc,0.85177
best_train_f1,0.731
dev_auroc,0.90589
dev_f1,0.822
dev_loss,0.62286
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: amzu8o4u with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0025388977319733743
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▁▇▇▇▇█████▇▇▇▇█████████████████████████
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.60585
best_dev_f1,0.541
best_test_auroc,0.54728
best_test_f1,0.513
best_train_auroc,0.49279
best_train_f1,0.513
dev_auroc,0.60134
dev_f1,0.401
dev_loss,0.69316
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: tcduf0ke with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0016733252798680491
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▇▄▅▅▅▅▆▆▇▇██████████████▇█▇▇▇▇▇▇█▇█▇█▇▇
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.57113
best_dev_f1,0.408
best_test_auroc,0.546
best_test_f1,0.48
best_train_auroc,0.48842
best_train_f1,0.495
dev_auroc,0.59869
dev_f1,0.404
dev_loss,0.69291
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: yg0pnplf with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00029418846370769976
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▂▁▁▄▃▂▃▃▄▅▇▆▆▆▆▆▆▇▇▇▇▇█▆▇▆▇▇▇█▆▆▇▇▆▆▇▇▇▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.44457
best_dev_f1,0.401
best_test_auroc,0.54844
best_test_f1,0.489
best_train_auroc,0.49656
best_train_f1,0.465
dev_auroc,0.64392
dev_f1,0.401
dev_loss,0.69437
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: qbgqfvif with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0007629545698802692
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.142680…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▆▇▇▇▇▇▇▇▇▇█████████████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.38782
best_dev_f1,0.401
best_test_auroc,0.57832
best_test_f1,0.489
best_train_auroc,0.7141
best_train_f1,0.465
dev_auroc,0.89453
dev_f1,0.401
dev_loss,0.69316
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: mmz4xbzz with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0002036826363231173
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,▆▇██▁▇██▁▇██▁▇▇█▁▇▇█▁▇▇█▅▇▇█▅▆▇█▅▆▇█▅▆▇█
train_loss_epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.5
dev_f1,0.248
dev_loss,1.56488
epoch,155.0
test_auroc,0.45245
test_f1,0.489
train_auroc,0.53737
train_f1,0.465
train_loss_batch,1.40292
train_loss_epoch,1.4128


[34m[1mwandb[0m: Agent Starting Run: r1caw4gd with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.009137443385849847
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: True


VBox(children=(Label(value='0.006 MB of 0.024 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.247185…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▁▁▁▂▂▁▆▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▆▆▇▇▇▇▇▇▇▇▇▇▇
dev_f1,█▂▂▂▄▃▃▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▄▄▅▅▅▅▅▅▅▅▅▅▅
dev_loss,▄███▇▇█▁▂▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.62824
best_dev_f1,0.58
best_test_auroc,0.53238
best_test_f1,0.476
best_train_auroc,0.56804
best_train_f1,0.483
dev_auroc,0.59974
dev_f1,0.501
dev_loss,1.53138
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: lthlqtjg with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00039872230319192936
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
dev_auroc,▆█▇▁▃▂▃▃▃▃▃▂▃▃▃▄▅▅▅▄▄▅▅▄▄▃▄▅▄▄▄▃▅▅▅▄▄▃▄▅
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_epoch,█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.45005
dev_f1,0.401
dev_loss,0.69321
epoch,55.0
test_auroc,0.49467
test_f1,0.494
train_auroc,0.51489
train_f1,0.514
train_loss_batch,0.69317
train_loss_epoch,0.69317


[34m[1mwandb[0m: Agent Starting Run: f5ek8zw0 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00873490678896526
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


0,1
dev_auroc,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃
dev_f1,█▄▄▄▄▄▄▄▄▄▄▄▄▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▄▄▄▄▄▄▄▄▄▄▄▄▆██████████████████████████
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,█▇█▇▇▇█▇▇█▇▇▇▄▅▁▄▄▄▁▄▄▄▁▄▄▄▄▄▄▃▄▄▄▃▄▄▄▃▄
train_loss_epoch,█▇▇▇▇▇▇▇▇▇▇▇▇▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.49964
dev_f1,0.402
dev_loss,1.9781
epoch,165.0
test_auroc,0.5124
test_f1,0.12
train_auroc,0.48405
train_f1,0.314
train_loss_batch,1.21118
train_loss_epoch,1.20234


[34m[1mwandb[0m: Agent Starting Run: 43unwv3g with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003432312047247476
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
dev_auroc,▃▆▅▅▁▅▅▅▅▅▅▅▃▂█▅▂▆▅▅▅▃█▅▅▇▅▅▅▅▅▅▆▅▅▅▆▅▅▅
dev_f1,▁███████████████████████████████████████
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_epoch,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.50725
dev_f1,0.401
dev_loss,0.69315
epoch,82.0
test_auroc,0.45398
test_f1,0.234
train_auroc,0.48572
train_f1,0.266
train_loss_batch,0.69316
train_loss_epoch,0.69316


[34m[1mwandb[0m: Agent Starting Run: 4jpodmdc with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0048119280861593705
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▃▂▃▄▅████▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.23692
best_dev_f1,0.401
best_test_auroc,0.5105
best_test_f1,0.489
best_train_auroc,0.33804
best_train_f1,0.465
dev_auroc,0.79156
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: qmiyjfdq with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00358993406675753
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▇█████▇██████▇██▇▇▇▇▇▇▇▇█▇▇██▇███▇▇▇███
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▇▅▅▄▇▆▆▇▇██████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.58604
best_dev_f1,0.401
best_test_auroc,0.49365
best_test_f1,0.489
best_train_auroc,0.71419
best_train_f1,0.465
dev_auroc,0.77905
dev_f1,0.401
dev_loss,1.97827
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: x32o060r with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005406264676173507
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▂▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.592
best_dev_f1,0.401
best_test_auroc,0.56201
best_test_f1,0.489
best_train_auroc,0.50226
best_train_f1,0.465
dev_auroc,0.46228
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: o77k510n with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.000348614831187619
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


VBox(children=(Label(value='0.006 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.327941…

0,1
dev_auroc,▄▅██▆▅▃▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁
dev_f1,▁███▇▇█▇▇███████████████████████████████
dev_loss,█▄▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,█▆▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_epoch,█▃▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.44702
dev_f1,0.404
dev_loss,0.69289
epoch,63.0
test_auroc,0.46966
test_f1,0.164
train_auroc,0.4934
train_f1,0.159
train_loss_batch,0.69806
train_loss_epoch,0.69729


[34m[1mwandb[0m: Agent Starting Run: mgr0zxrp with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0018088059522676375
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: ephwh2j6 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004410640461511561
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▃██████████████████████████████████████
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,█▇▅▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▂▂▁▂▂▂▁▂▂▂▁▂▂▂▁
train_loss_epoch,█▆▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,163.0
test_auroc,0.5
test_f1,0.041
train_auroc,0.5
train_f1,0.116
train_loss_batch,1.00334
train_loss_epoch,0.99965


[34m[1mwandb[0m: Agent Starting Run: zq7sv1be with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003437191101097705
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▂▃▄▅▅▅▆▆▆▇▇▇▇▆▇▇▆▇▇▇▇▇█████████████████
dev_f1,▄▁▁▂▂▂▂▂▂▂▂▅▅█▅█████████████████████████
dev_loss,██▇▇▆▇▇▆▇▆▅▄▃▂▃▂▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.75209
best_dev_f1,0.53
best_test_auroc,0.64015
best_test_f1,0.592
best_train_auroc,0.7152
best_train_f1,0.667
dev_auroc,0.81123
dev_f1,0.525
dev_loss,0.67717
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 6mn18vgi with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0014796962473902135
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: ftdpquoa with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.008155907433393337
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▃█▃▃▃▃▃▃▃▃▃▃▃▁▃▃▄▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.56772
best_dev_f1,0.401
best_test_auroc,0.45539
best_test_f1,0.489
best_train_auroc,0.55542
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: j31r617d with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006382920616552753
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: gw4eynfx with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.008395942376474529
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.332298…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▇█▇▇▆▄▆▂▂▂▂▂▂▂▂▁▂▂▁▂▃▂▂▂▂▂▂▂▂▂▁▂▂▂▂▂▂▂▂▂
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.74167
best_dev_f1,0.401
best_test_auroc,0.60851
best_test_f1,0.489
best_train_auroc,0.70317
best_train_f1,0.465
dev_auroc,0.4749
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 9u7wc2wk with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00202341789501813
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.040 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.144768…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▄▇▇▇▇▇▇▇▇▇▇▇▇▇█████████████████████████
dev_f1,▁▇█████████▇████████████████████████████
dev_loss,█▅▄▄▄▃▃▃▂▃▂▁▂▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▂▂▂▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.88967
best_dev_f1,0.85
best_test_auroc,0.77552
best_test_f1,0.594
best_train_auroc,0.90086
best_train_f1,0.803
dev_auroc,0.88966
dev_f1,0.852
dev_loss,1.38377
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: cq29t1x7 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0024246561397013463
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 35g36bwm with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005659109727354284
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.248
best_test_auroc,0.5
best_test_f1,0.041
best_train_auroc,0.5
best_train_f1,0.116
dev_auroc,0.5
dev_f1,0.248
dev_loss,1.56488
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: y1wfthia with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0018545165244962436
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: i6dvuas4 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004929021644484661
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.332589…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▅▇▅▇▅▄▅▄▅▆▆▅▆▅▆▇▄▅▅▆▅▆▅▆▅▆█▅▆▅▆▆▆▅▄▅▄▅▅
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.3664
best_dev_f1,0.401
best_test_auroc,0.45346
best_test_f1,0.489
best_train_auroc,0.50518
best_train_f1,0.465
dev_auroc,0.52604
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 66uam569 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0054096790152620144
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.040 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.144453…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▇██▆▆▅▅▆▅▅▅▅▅▅▅▅▆▅▅▆▆▅▆▆▅▅▆▆▆▅▅▆▆▅▆▆▅▆▅
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.62651
best_dev_f1,0.401
best_test_auroc,0.51893
best_test_f1,0.489
best_train_auroc,0.71978
best_train_f1,0.465
dev_auroc,0.7558
dev_f1,0.401
dev_loss,1.9783
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 95asenp4 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0021877999140457963
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▃▇██▇▇███▇▇▇▇▇▇▇███████████████████████
dev_f1,▁▇▇▇█▇▇███▇█████████████████████████████
dev_loss,█▅▃▆▁▃▁▅▃▁▁▂▂▂▂▂▂▂▁▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.88335
best_dev_f1,0.849
best_test_auroc,0.68096
best_test_f1,0.551
best_train_auroc,0.88173
best_train_f1,0.77
dev_auroc,0.88084
dev_f1,0.823
dev_loss,1.3789
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 1v18h79z with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.007530308585788766
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▆▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.51562
best_dev_f1,0.401
best_test_auroc,0.45941
best_test_f1,0.489
best_train_auroc,0.52959
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 28u8fc5c with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0019153124284369849
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: zzpqbcqh with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00201284067796361
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 0cufwoex with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00040917303307396016
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▂▁▆▆▆▆▆▆▆▇▇▇█▇██████████████████████████
dev_f1,▁███████████████████████████████████████
dev_loss,█▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.49514
best_dev_f1,0.401
best_test_auroc,0.57534
best_test_f1,0.489
best_train_auroc,0.69285
best_train_f1,0.465
dev_auroc,0.8603
dev_f1,0.401
dev_loss,0.69353
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: sw60btxy with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004186851720204329
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▂▃▅▄▃▄▄▃▂▁▃▃▄▄▁▄▄▃▂▂▅▁▄▃▂▂▂▄▄▂█▆▂▅▁▁▄▅▄▄
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.45702
best_dev_f1,0.401
best_test_auroc,0.43959
best_test_f1,0.489
best_train_auroc,0.53803
best_train_f1,0.465
dev_auroc,0.49866
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: nwqxztk2 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0036680060292151343
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.65757
best_dev_f1,0.421
best_test_auroc,0.46343
best_test_f1,0.505
best_train_auroc,0.5185
best_train_f1,0.505
dev_auroc,0.5799
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 8d3gqfl4 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005357139744788596
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,▆▇██▁▇▇█▅▆▇██▁▇▇█▅▆▇█▅▆▇██▁▇▇█▅▆▇██▁▇▇█▁
train_loss_epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,146.0
test_auroc,0.49206
test_f1,0.043
train_auroc,0.49626
train_f1,0.123
train_loss_batch,1.20573
train_loss_epoch,1.20156


[34m[1mwandb[0m: Agent Starting Run: ik8ra4bo with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00493983392557712
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: mk2g23hr with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0030699512710038834
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁███████████████████████████████████████
dev_f1,▁███████████████████████████████████████
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.4983
best_dev_f1,0.25
best_test_auroc,0.5
best_test_f1,0.041
best_train_auroc,0.50086
best_train_f1,0.119
dev_auroc,0.4983
dev_f1,0.25
dev_loss,0.98345
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: sm2v1854 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004334092051184405
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.142218…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▁▂▂▃▃▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.71002
best_dev_f1,0.401
best_test_auroc,0.47784
best_test_f1,0.489
best_train_auroc,0.40788
best_train_f1,0.465
dev_auroc,0.66792
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 8rw5otd3 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003142886378605604
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▂▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.67974
best_dev_f1,0.401
best_test_auroc,0.57256
best_test_f1,0.489
best_train_auroc,0.46669
best_train_f1,0.465
dev_auroc,0.40895
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: ll38xyyo with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.008755804961301234
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▆▆▆▆▆▅▆▆▆▆▆▆▆▅▅▆▆▆█▆▆▆▆▆▆▆▆▆▅▆▆▆▆▆▆▆▆▆▆
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.4812
best_dev_f1,0.401
best_test_auroc,0.46637
best_test_f1,0.489
best_train_auroc,0.46133
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: t91i3706 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.008324406666805268
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 11ulpqt2 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0022640427103353873
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▆███▅▄▃▃▃▄▃▄▅▅▄▄▄▅▃▄▅▆▅▅▃▄▅▅▅▃▄▅▄▅▄▅▄▅
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.56184
best_dev_f1,0.401
best_test_auroc,0.56762
best_test_f1,0.489
best_train_auroc,0.63585
best_train_f1,0.465
dev_auroc,0.71536
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: gnhf4eu4 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0027042911221063877
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 6dulx8d6 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006637047619678962
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.62653
best_dev_f1,0.401
best_test_auroc,0.51286
best_test_f1,0.519
best_train_auroc,0.53344
best_train_f1,0.517
dev_auroc,0.51912
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: se4a9aef with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005869550855704841
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 8fbazp0b with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0019339470807865056
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▄█▄▄▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁█▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.61197
best_dev_f1,0.419
best_test_auroc,0.56267
best_test_f1,0.476
best_train_auroc,0.5085
best_train_f1,0.52
dev_auroc,0.58055
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: jxik8sdt with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0015468657032887516
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.55629
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: q6ii5lo6 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0057166487240428495
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 2vsfw1rw with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.007365979856380117
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▃██▇▅▇▆▆▅▅▅▅▄▄▄▃▂▃▄▃▂▄▂▃▂▁▃▂▄▂▄▃▄▂▄▅▂▃▃▄
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▂▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.86913
best_dev_f1,0.401
best_test_auroc,0.6202
best_test_f1,0.489
best_train_auroc,0.74366
best_train_f1,0.465
dev_auroc,0.65601
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 7x0dfvk5 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0006729245824423926
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▄▄▄▄▄▄▄▅▅▅▆▆▇██▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_f1,▁███████████████████████████████████████
dev_loss,█▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.52733
best_dev_f1,0.401
best_test_auroc,0.53294
best_test_f1,0.489
best_train_auroc,0.62354
best_train_f1,0.465
dev_auroc,0.77487
dev_f1,0.401
dev_loss,0.69317
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: k0yahm0t with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 7.501783973355753e-06
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▂▃▅▅▅▅▇█▇█████████████████████████████
dev_f1,▁▁▁▂▄▅▅▅▆▇██████████████████████████████
dev_loss,███▇▅▅▄▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.88079
best_dev_f1,0.814
best_test_auroc,0.65956
best_test_f1,0.54
best_train_auroc,0.84816
best_train_f1,0.784
dev_auroc,0.88079
dev_f1,0.814
dev_loss,1.44235
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: jq4zp1x7 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0023164764168255615
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▅▃▃▃▇▂▃▃▄▄▃▃▄▃▂▃▃▂▃▃▂▃▄█▃▄▁▃▃▂▃▄▄▃▃▂▃▃▃▃
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.53511
best_dev_f1,0.401
best_test_auroc,0.54884
best_test_f1,0.489
best_train_auroc,0.49116
best_train_f1,0.465
dev_auroc,0.50799
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 0hm8j8es with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0040775673890158124
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▂▁▁█▇▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▅█▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
dev_loss,▁▁▁▃▃███████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.59543
best_dev_f1,0.584
best_test_auroc,0.51705
best_test_f1,0.454
best_train_auroc,0.57399
best_train_f1,0.52
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: dxbnu6ib with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004243507380295771
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 1b65isrq with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0023728540675684554
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: zhuotjcj with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004855560261545393
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▅██████████████████████████████████████
dev_f1,█▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.60463
best_dev_f1,0.586
best_test_auroc,0.50292
best_test_f1,0.487
best_train_auroc,0.582
best_train_f1,0.571
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 1kmfidj7 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004758441661008102
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▇▇▇▆▇████▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.59297
best_dev_f1,0.532
best_test_auroc,0.54007
best_test_f1,0.36
best_train_auroc,0.47638
best_train_f1,0.481
dev_auroc,0.61536
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: bk2mr15g with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.007601653899733834
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
dev_auroc,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁███████████████████████████████████████
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,▁▁▁▁▁█▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁█▁▁▁▁▁▁▁▁
train_loss_epoch,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,121.0
test_auroc,0.5
test_f1,0.041
train_auroc,0.30413
train_f1,0.131
train_loss_batch,0.69315
train_loss_epoch,0.69315


[34m[1mwandb[0m: Agent Starting Run: k6emui9y with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0005993149061863874
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: oasb8tdo with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006209107806224746
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▇█████▆▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.74088
best_dev_f1,0.401
best_test_auroc,0.62728
best_test_f1,0.489
best_train_auroc,0.65779
best_train_f1,0.465
dev_auroc,0.39361
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 51dwn02d with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0016837272252696823
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.142783…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▇▆███▇▇▇█▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_f1,█▃▁▁▁▁▁▁▂▂▃▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
dev_loss,█▂▃▃▃▃▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.41043
best_dev_f1,0.414
best_test_auroc,0.45546
best_test_f1,0.49
best_train_auroc,0.58211
best_train_f1,0.528
dev_auroc,0.56897
dev_f1,0.407
dev_loss,0.69254
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: lni2oobh with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0014726907724647877
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▄▇▇█▇██████████████████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▄▆▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.57584
best_dev_f1,0.401
best_test_auroc,0.58698
best_test_f1,0.489
best_train_auroc,0.67941
best_train_f1,0.465
dev_auroc,0.87895
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: hkfym17e with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004786776305220078
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.023 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.247676…

0,1
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,▆▇██▁▇▇█▅▆▇██▁▇▇█▅▆▇██▁▇▇█▅▆▇██▁▇▇█▅▆▇█▅
train_loss_epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,144.0
test_auroc,0.51947
test_f1,0.2
train_auroc,0.46747
train_f1,0.184
train_loss_batch,1.20612
train_loss_epoch,1.20156


[34m[1mwandb[0m: Agent Starting Run: xdbtu37j with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00851912879590564
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▄▁▁▅████████████████████████████████████
dev_f1,▆▁▁█▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆
dev_loss,▆██▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.49585
best_dev_f1,0.402
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.50577
best_train_f1,0.488
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: q2e6swzm with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003284512007720941
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁███▇▇██████████████████████████████████
dev_f1,▇▁▁▁██▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▆█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.49151
best_dev_f1,0.426
best_test_auroc,0.50082
best_test_f1,0.5
best_train_auroc,0.49469
best_train_f1,0.488
dev_auroc,0.49933
dev_f1,0.401
dev_loss,0.69379
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: o8161srw with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004164695023364357
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.57093
best_dev_f1,0.567
best_test_auroc,0.50086
best_test_f1,0.501
best_train_auroc,0.53147
best_train_f1,0.534
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: gmxsed37 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002189230868956882
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▄▆▅▅▅▅▄▅▅▅▅▆▆▆▆▆▇▇▆▇▇▇▇▇███████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▂▆▅▁██▇█▇████████████████████
dev_loss,████████▇▇▆▆▅▄▆▂▂▂▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.91572
best_dev_f1,0.827
best_test_auroc,0.60601
best_test_f1,0.615
best_train_auroc,0.83781
best_train_f1,0.749
dev_auroc,0.9096
dev_f1,0.823
dev_loss,0.61857
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 7fgzvtqd with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0041332350026603544
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▂▁▁▁▃▄▅▄▅▅▅▅▅▆▅▆▆▆▆▆▇▇▇▇▇███████████████
dev_f1,▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▄▄██████████████████
dev_loss,███████████████████▇▆▄▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.89615
best_dev_f1,0.819
best_test_auroc,0.5851
best_test_f1,0.589
best_train_auroc,0.85344
best_train_f1,0.729
dev_auroc,0.90609
dev_f1,0.816
dev_loss,0.62282
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: lkvikl3o with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003552303388050806
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁███▇▇▇▇▇▆▆▆▆▆▇▆▇▆▇▆▆▆▆▇▆▆▆▆▆▆▇▇▆▇▆▆▆▆▆▆
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.49569
best_dev_f1,0.401
best_test_auroc,0.51365
best_test_f1,0.489
best_train_auroc,0.38456
best_train_f1,0.465
dev_auroc,0.58226
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 0kckuyk1 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0011751708150304276
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁█▄▆▆▆▆▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█████████
dev_f1,▂▄▆▆▆▆▆▁▄▅█▅▄▆▆▆▆▅▆▇▇▆▆▇▆▇▆▆▆▆▆▆▆▆▆▆▆▆▆▆
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.76932
best_dev_f1,0.435
best_test_auroc,0.7028
best_test_f1,0.503
best_train_auroc,0.73402
best_train_f1,0.653
dev_auroc,0.81146
dev_f1,0.432
dev_loss,0.68913
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: e0t8ie40 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002566262761022594
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: qkbqmday with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002135290352581704
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁███▇▇▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.38132
best_dev_f1,0.401
best_test_auroc,0.57209
best_test_f1,0.489
best_train_auroc,0.64829
best_train_f1,0.465
dev_auroc,0.79159
dev_f1,0.401
dev_loss,0.69335
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: vazumdgv with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005499659193455506
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▃▄▄▄▃▁▆▆▆▆▆▇▇▇▇▇▇█▇▇▇▇▇██▇▇██▇██████████
dev_f1,▁▁▁▁▁▁▁▂▂▂▁▂▂▄█▆█▇██████████████████████
dev_loss,████████████▇▅▃▃▃▂▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.89089
best_dev_f1,0.822
best_test_auroc,0.56964
best_test_f1,0.587
best_train_auroc,0.80611
best_train_f1,0.741
dev_auroc,0.88482
dev_f1,0.821
dev_loss,0.61913
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: qaqdoby4 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.009742010050799124
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: lfio6w6x with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0008567888334955264
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5559
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 9epifmb7 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.001829517177825319
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.142367…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▂▂▁▆▆███▇▇█████████████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▃▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.84007
best_dev_f1,0.401
best_test_auroc,0.66647
best_test_f1,0.489
best_train_auroc,0.69605
best_train_f1,0.465
dev_auroc,0.83501
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: h4pir0hx with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.008548844862097487
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.142566…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▂▁▁▁▁▅▅▅▆▅▆▆▆▇▆▇▇▆▇▇▆▆▇▇▇▇▇▇█▇▇██▇██████
dev_f1,▁▁▁▁▁▁▁▁▁▁▆▆▇▅▇▂████████████████████████
dev_loss,███████▇▅▅▄▄▃▃▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.89873
best_dev_f1,0.82
best_test_auroc,0.59337
best_test_f1,0.574
best_train_auroc,0.82134
best_train_f1,0.755
dev_auroc,0.89995
dev_f1,0.819
dev_loss,0.62441
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 9280rsgf with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.001746827376728542
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▂▁▂▂▆▆▆▇▇█▆▇▇█▇█▇▇▇▇▇▇██████████████████
dev_f1,▄▁▁▁▂▂▂▂▂▂▂▂▆▄▆▆▆▆▆▆████████████████████
dev_loss,█▅▆▅▅▅▅▄▄▄▅▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.8304
best_dev_f1,0.57
best_test_auroc,0.58477
best_test_f1,0.559
best_train_auroc,0.80438
best_train_f1,0.704
dev_auroc,0.87426
dev_f1,0.568
dev_loss,0.67334
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: p31eepr8 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0030577484825221393
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
dev_auroc,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁███████████████████████████████████████
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,███▅███▅███▇███▇██▇▇██▇▇█▇▇▇█▇▇▇▆▇▇▇▆▇▇▁
train_loss_epoch,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,81.0
test_auroc,0.49638
test_f1,0.048
train_auroc,0.46218
train_f1,0.17
train_loss_batch,1.19987
train_loss_epoch,1.20156


[34m[1mwandb[0m: Agent Starting Run: 6yxkw9gj with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.008128419785944432
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
dev_auroc,▃▅▂▅▃▅▅▄▅▅▅▅▄▅█▇▄▅▄▄▅▄▃▄▂▄▃▂▂▃▂▄▃▁▄▂▃▄▂▂
dev_f1,▁███████████████████████████████████████
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_epoch,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.43902
dev_f1,0.401
dev_loss,0.69315
epoch,71.0
test_auroc,0.48063
test_f1,0.479
train_auroc,0.49421
train_f1,0.473
train_loss_batch,0.69315
train_loss_epoch,0.69315


[34m[1mwandb[0m: Agent Starting Run: dc56zyfz with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0005557310853649658
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▆▇▇████████▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▇██████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.87966
best_dev_f1,0.401
best_test_auroc,0.63429
best_test_f1,0.489
best_train_auroc,0.79451
best_train_f1,0.465
dev_auroc,0.85279
dev_f1,0.401
dev_loss,1.9783
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: t1rq4qtc with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.001338307195380554
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▂█▇▆▆▅▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▃▃▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.55771
best_dev_f1,0.401
best_test_auroc,0.59653
best_test_f1,0.489
best_train_auroc,0.70969
best_train_f1,0.465
dev_auroc,0.76944
dev_f1,0.401
dev_loss,0.69316
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: xn5ppd9k with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0021262423204575125
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▂▁▁▁████████████████████████████████████
dev_f1,▁███▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.49993
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 0uttxq0q with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0061981895246743315
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: v9373imv with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0036967811982135616
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 6x5zez2j with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.007735713339146485
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▂▁▇▅▇▄▅▆▇▆▇▆█▆▄▄▅▄▅█▆▆▇▆▅▆▅▆▆▆▇▇▆▅▇▆▇▆▅▅
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁█████████████████████▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.54088
best_dev_f1,0.401
best_test_auroc,0.45749
best_test_f1,0.489
best_train_auroc,0.54348
best_train_f1,0.465
dev_auroc,0.58614
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: lzzv6z4r with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004188140977879993
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: g6xxvmlo with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0013322569376922357
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▂▅▆▇▇▇█████████▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█
dev_f1,▂▁▁▅▆▆▆▆▆▆▇▇▇▆▇▇▇▇▇▇▇▇█▇█▇███▇▇▇█████▇▇▇
dev_loss,█▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.89447
best_dev_f1,0.431
best_test_auroc,0.57312
best_test_f1,0.496
best_train_auroc,0.84462
best_train_f1,0.612
dev_auroc,0.85476
dev_f1,0.439
dev_loss,0.68631
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: lijgjfsr with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0014636887087827557
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▅▆▆▇▇▇▇█▇▇█▇▇██████████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▃▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.55723
best_dev_f1,0.401
best_test_auroc,0.52031
best_test_f1,0.489
best_train_auroc,0.72546
best_train_f1,0.465
dev_auroc,0.88118
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: wry916l4 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0045080099080542215
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: zhv5on6s with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002063130780496603
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁███████████████████████████████████████
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.48261
best_dev_f1,0.408
best_test_auroc,0.51976
best_test_f1,0.517
best_train_auroc,0.5312
best_train_f1,0.533
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: oxp9ns2j with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0005482028128964105
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▄▂▂▂▁▁▂▃▄▅▅▅▆▆▆▇█████▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇███
dev_f1,▁▁▁▁▁▁▁▁▁▁▂▂▂▂▃▂▃▃▃▃▃▃▃▃▇▇▇▇▇▇▇▇██▇██▇▇█
dev_loss,█▄▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.65755
best_dev_f1,0.486
best_test_auroc,0.5689
best_test_f1,0.505
best_train_auroc,0.80507
best_train_f1,0.66
dev_auroc,0.66417
dev_f1,0.485
dev_loss,0.68269
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 0hux44dy with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0016029550065640703
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.142055…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▃▅▅▅▆▇▇█▇▇▇█▇▇█▇▇███▇██▇▇▆▇▇██▇▇█▇▇▇▇▇▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.52982
best_dev_f1,0.401
best_test_auroc,0.48387
best_test_f1,0.489
best_train_auroc,0.58762
best_train_f1,0.465
dev_auroc,0.80172
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: nh7zmo50 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0022621216226080556
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▆▆▆▇▆▇▅█▄▄█▆▇▆▆▅▆▅▇▆▇▅▄▇▆▆▆▅▆▆▆▆▇▅▆▅▅█▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.42464
best_dev_f1,0.401
best_test_auroc,0.52686
best_test_f1,0.489
best_train_auroc,0.39824
best_train_f1,0.465
dev_auroc,0.60525
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: tnhpzqh1 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0010497593079717355
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.024 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.247697…

0,1
dev_auroc,▁███████████████████████████████████████
dev_f1,▁███████████████████████████████████████
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,▆▇▇██▁▇▇██▁▇▇██▁▆▇██▅▆▇██▅▆▇▇█▅▆▇▇█▅▁▇▇█
train_loss_epoch,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,124.0
test_auroc,0.42002
test_f1,0.489
train_auroc,0.51278
train_f1,0.45
train_loss_batch,1.21783
train_loss_epoch,1.20156


[34m[1mwandb[0m: Agent Starting Run: z01c0xio with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004332636852592189
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 6s439nzy with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003996025892923173
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▆▇▆▆▇▆▇▆▆▆▆▆▆▆▆▇▇▇▇▇▇██████████████████
dev_f1,▁▃▅▅▅▅▅▅▆▆▆▆▆▆██▇▇▇▇▇▇▇█▇▇▇██▇██▇███▇▇██
dev_loss,█▇▅▄▄▅▄▅▄▄▄▄▄▃▃▂▃▃▂▂▂▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.84634
best_dev_f1,0.444
best_test_auroc,0.80364
best_test_f1,0.592
best_train_auroc,0.81438
best_train_f1,0.69
dev_auroc,0.87349
dev_f1,0.441
dev_loss,0.68909
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: gqa1xrhh with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00423991699912774
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,▆▇█▅▇▇█▁▇█▅▇▇█▁▇█▅▆▇█▁▇█▅▆▇█▁▇█▅▇▇█▁▇█▅▇
train_loss_epoch,█▅▅▅▄▇▆▅▃▆▆▅▇▅▄▅▇▅▄▄▅▇▅▃▃▁▅▃▄▃▃▂▃▄▂▃▂▅▄▃

0,1
dev_auroc,0.5
dev_f1,0.248
dev_loss,1.56488
epoch,180.0
test_auroc,0.4881
test_f1,0.182
train_auroc,0.45743
train_f1,0.202
train_loss_batch,1.40279
train_loss_epoch,1.41271


[34m[1mwandb[0m: Agent Starting Run: 32l3mm5y with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00582848285018626
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▂▂▃▇▇▇▇▇▇▇▇▇▇▇▇▇▇████████████████████
dev_f1,▁▁▁▁▁▁▂▂▂▂▆▆▆▆▆▇▆▆▇▇▇███████████████████
dev_loss,██████▇██▆▅▅▄▃▄▄▂▃▃▂▂▃▂▁▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.88089
best_dev_f1,0.57
best_test_auroc,0.58011
best_test_f1,0.578
best_train_auroc,0.84701
best_train_f1,0.723
dev_auroc,0.88041
dev_f1,0.57
dev_loss,0.66408
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: vjmaka3o with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004546916355164474
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▇▇█████▇▄▆▅▃▃▃▄▁▃▃▃▃▃▃▂▄▂▃▃▃▂▃▃▄▁▂▄▆▃▃▅▃
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.77231
best_dev_f1,0.401
best_test_auroc,0.54516
best_test_f1,0.489
best_train_auroc,0.71452
best_train_f1,0.465
dev_auroc,0.49732
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: wqsb2sjx with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002190102358277205
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,██▁▅▆▅▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_f1,█▅▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.59986
best_dev_f1,0.421
best_test_auroc,0.51226
best_test_f1,0.489
best_train_auroc,0.54821
best_train_f1,0.473
dev_auroc,0.56166
dev_f1,0.401
dev_loss,0.69316
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 2ymujq3q with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002251009561070084
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▅█▆▇▅▅▅▅▅▅▅▅▄▅▆▆█▅▇▇▆▇▆▆▅▆▆▆▆▇▅▇▆▆▇▇▇▆▆
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.53367
best_dev_f1,0.401
best_test_auroc,0.5917
best_test_f1,0.489
best_train_auroc,0.6782
best_train_f1,0.465
dev_auroc,0.68093
dev_f1,0.401
dev_loss,0.69317
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: rgtajrg8 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005848973434488265
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: xk8t8xo5 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00026107074294424336
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▆▇▇▇███████████████████████████████████
dev_f1,▁███████████████████████████████████████
dev_loss,▁▅▆▅▆▆▇▇████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.77117
best_dev_f1,0.401
best_test_auroc,0.60582
best_test_f1,0.489
best_train_auroc,0.70514
best_train_f1,0.465
dev_auroc,0.86659
dev_f1,0.401
dev_loss,1.9628
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: wpd9zaxa with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004494443745847574
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▃▆▆▇▇▇▇▆▇▇█▇▇█▇████████████████████████
dev_f1,▁▁▂▂▂▂▂▂▂▆▂▆▆▆▇▆▇▆▆▇▇▇▇█████████████████
dev_loss,██▇▇▇▇▇▇▆▃▄▂▁▂▂▂▃▁▂▁▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.90236
best_dev_f1,0.572
best_test_auroc,0.60216
best_test_f1,0.566
best_train_auroc,0.8496
best_train_f1,0.704
dev_auroc,0.90453
dev_f1,0.571
dev_loss,0.67184
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: h9nbf7kk with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0014556771236191996
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▂▁▁▂▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▂▁▁▁▂▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.50036
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.49978
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 36miiome with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004710078958504811
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▃█▅▃▄▄▂▅▅▂▄▃▅▄▄▃▇▄▅▁▆▁▅▅▅▄▄▂▅▃▄▅▃▄▂▆▃▄▅▃
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.442
best_dev_f1,0.401
best_test_auroc,0.54848
best_test_f1,0.489
best_train_auroc,0.54201
best_train_f1,0.465
dev_auroc,0.46467
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: myhlszf1 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0023920957975842663
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▇▇█████████████████████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▇▅▆▇█▇▇████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.80413
best_dev_f1,0.401
best_test_auroc,0.60061
best_test_f1,0.489
best_train_auroc,0.7756
best_train_f1,0.465
dev_auroc,0.79343
dev_f1,0.401
dev_loss,1.97791
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: gx6hi8xd with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004704459963168396
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▃▇▇▇███████████████████████████████████
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.47961
best_dev_f1,0.407
best_test_auroc,0.51525
best_test_f1,0.443
best_train_auroc,0.46486
best_train_f1,0.496
dev_auroc,0.57892
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: pfydm5fi with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002892999428525247
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.142670…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▄▄▄▅▅▄▃▄▅▅▇▇▇▇▇▇█▇██████████████████
dev_f1,▁▁▁▁▂▂▂▁▁▂▂▂▂▂▃▅▂███████████████████████
dev_loss,███████████▇▇▇▇▃█▂▁▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.87768
best_dev_f1,0.82
best_test_auroc,0.58859
best_test_f1,0.569
best_train_auroc,0.84565
best_train_f1,0.738
dev_auroc,0.89421
dev_f1,0.82
dev_loss,0.62178
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ww3yi9qv with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0025081789927584733
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▄▂▁▅▆▆▇▆▃▃▂▂▂▂▂▃▂▄▄▅▇▇██████████████████
dev_f1,▁▁▁▆▅▆▇▇▇▇▇▇▇█▇█████████████████████████
dev_loss,█▇▇▆▆▆▆▅▅▅▅▅▅▅▅▅▅▅▅▅▅▄▄▄▃▃▃▃▃▃▃▃▃▃▂▂▂▁▃▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.60141
best_dev_f1,0.443
best_test_auroc,0.5512
best_test_f1,0.536
best_train_auroc,0.68666
best_train_f1,0.687
dev_auroc,0.88461
dev_f1,0.443
dev_loss,0.68295
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: nt1x80t4 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003664124199286196
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▄▅▃▃▆▃▆▇█▆▄█▇▆▅▆▆▇▇▇█▇▇▇▇██████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▇▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.63753
best_dev_f1,0.401
best_test_auroc,0.56757
best_test_f1,0.489
best_train_auroc,0.53776
best_train_f1,0.465
dev_auroc,0.76774
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: o1lyqtty with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003252132210641445
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▄▅▇█▄██▂▄▄▄▃▄▆▃▄▃▄▄▃▄▂▅▇▄▅▅▁▆▁▅▁▂▅▆▄▃▃▁▆
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.41687
best_dev_f1,0.401
best_test_auroc,0.45054
best_test_f1,0.489
best_train_auroc,0.51893
best_train_f1,0.465
dev_auroc,0.45708
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: efvv3aqa with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0035890795561308426
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.248
best_test_auroc,0.5
best_test_f1,0.041
best_train_auroc,0.5
best_train_f1,0.116
dev_auroc,0.5
dev_f1,0.248
dev_loss,1.56488
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: b60a9et1 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0023438484815745406
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: kvl9ndqy with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002652153610366054
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▂▃▃▄▅▆▅▆▆▆▆▇▇█▇▇▇▇█▇▇█▇█▇██████████████
dev_f1,▄▁▁▁▁▂▂▂▂▂▂▇▄▄▂▄▆▆▇▇▇▇▇█▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_loss,█▇▇▇▇▇▇▇▇▅▃▆▂▂▆▃▂▂▂▂▂▁▂▄▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.84419
best_dev_f1,0.556
best_test_auroc,0.70511
best_test_f1,0.611
best_train_auroc,0.78536
best_train_f1,0.688
dev_auroc,0.88132
dev_f1,0.532
dev_loss,0.67607
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 4amvvp4g with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.001491811654620635
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▃▁▅▅▅▅▆▆▆▆▆▇▆▇▇▇▇▇▇▇████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.52324
best_dev_f1,0.401
best_test_auroc,0.59409
best_test_f1,0.489
best_train_auroc,0.66868
best_train_f1,0.465
dev_auroc,0.8228
dev_f1,0.401
dev_loss,0.69319
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: ldbxb4eg with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004149371579269036
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▆▅▄▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.73377
best_dev_f1,0.401
best_test_auroc,0.49425
best_test_f1,0.489
best_train_auroc,0.42718
best_train_f1,0.465
dev_auroc,0.51798
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: g0fmkirc with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0016778340201440577
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.331953…

0,1
dev_auroc,▄▁▇▇█▆▇▆▅▅▄▅▅▆▆▆▆▆▇▆▆▅▄▆▆▅▆▅▇▆▆▇▆▆▇▆▅▆▅█
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,█▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_epoch,█▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.55153
dev_f1,0.401
dev_loss,0.69315
epoch,56.0
test_auroc,0.539
test_f1,0.425
train_auroc,0.53004
train_f1,0.493
train_loss_batch,0.69222
train_loss_epoch,0.69131


[34m[1mwandb[0m: Agent Starting Run: ju85dk97 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005873489662293133
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: mm447fg3 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.008181966918110937
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▇██▇▇▇▇▆▇▇▇████████▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_f1,▁▂▂▂▂▇▇▇▇███████████████████████████████
dev_loss,▄▆█▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.91529
best_dev_f1,0.49
best_test_auroc,0.63947
best_test_f1,0.499
best_train_auroc,0.84937
best_train_f1,0.688
dev_auroc,0.89755
dev_f1,0.837
dev_loss,1.40353
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: yw51zjvu with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003335095315614865
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 71tn7ger with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0020105944220118175
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▅▇███▇▇▇▅▆▇▄▆▆▅▄▆▅▅▃▅▂▄▃▂▄▅▄▄▂▁▃▅▄▃▄▅▅▅▄
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.82654
best_dev_f1,0.401
best_test_auroc,0.62976
best_test_f1,0.489
best_train_auroc,0.69882
best_train_f1,0.465
dev_auroc,0.61848
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 0cxovx2n with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.007806516175776886
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▅▅▄▆▅▅▁▄▅▄▅▃▄▇▅▅▆▅▃█▇▆▆▃▆▅▇▄▆▆▅▂▄▇▆▇▅▇▄▅
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.42116
best_dev_f1,0.401
best_test_auroc,0.51352
best_test_f1,0.489
best_train_auroc,0.39452
best_train_f1,0.465
dev_auroc,0.41949
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: pfdihctf with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0022963596380573895
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁█▇▇▇▇▆▇▇▇▇▇████████████████████████████
dev_f1,▁███▇█▇█████████████████████████████████
dev_loss,█▄▄▂▁▁▁▁▂▁▂▁▁▂▂▂▂▂▂▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.85745
best_dev_f1,0.832
best_test_auroc,0.76306
best_test_f1,0.611
best_train_auroc,0.86996
best_train_f1,0.759
dev_auroc,0.87401
dev_f1,0.826
dev_loss,1.41684
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: fbo3v4i1 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002759795430093117
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: n2bem87u with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00010493360197461202
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▄▅▆▆▆▆▆▇▇█▇▇█▆▇▆▆██▇▇▇▇██▇▇█▇▇▇▇█▇▆▆█▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▅▇█████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.78691
best_dev_f1,0.401
best_test_auroc,0.61027
best_test_f1,0.489
best_train_auroc,0.79857
best_train_f1,0.465
dev_auroc,0.75357
dev_f1,0.401
dev_loss,1.97535
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: sh924hdo with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004743613588612296
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 76wy4iur with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0012182245000804063
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▃▆██▇██████████████████████████████████
dev_f1,▄▁▆▇▇▇██▇█▇█████████████████████████████
dev_loss,█▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.82551
best_dev_f1,0.435
best_test_auroc,0.67936
best_test_f1,0.572
best_train_auroc,0.80802
best_train_f1,0.665
dev_auroc,0.85633
dev_f1,0.434
dev_loss,0.68996
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: jbjcw2sk with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0012070470026910485
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁███████████████████████████████████████
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: meiri64v with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005871761359758027
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 6becz5mm with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.007585406016426809
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
dev_auroc,▁█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▃▇█████▇▇▇▁▄▅▇▇▇▇▇▇▇████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,██▆▅▄▅▄▄▄▄▄▄▄▂▃▂▂▁▂▂▂▂▂▂▂▂▂▂▂▂▁▂▂▂▂▂▂▂▂▂
train_loss_epoch,██▇▅▄▄▄▄▄▃▃▃▃▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,146.0
test_auroc,0.47413
test_f1,0.41
train_auroc,0.48674
train_f1,0.479
train_loss_batch,0.98756
train_loss_epoch,1.00914


[34m[1mwandb[0m: Agent Starting Run: lxx526va with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004928600779742057
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▃▂▄▂▂▂▂▃▅▃▇████████████████████████████
dev_f1,▄▁▁▁▁▂▂▂▃▃▃▃▃▃▅▅▅▅██▅█▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅
dev_loss,█▇▇▇▇▇▇▇▆▆▆▅▆▆▁▃▃▂▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.90745
best_dev_f1,0.536
best_test_auroc,0.72068
best_test_f1,0.624
best_train_auroc,0.83923
best_train_f1,0.686
dev_auroc,0.90542
dev_f1,0.483
dev_loss,0.68055
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 0r6yp3m6 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.007271785379863874
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▆▆▇▇▇██████████████████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.41233
best_dev_f1,0.401
best_test_auroc,0.4577
best_test_f1,0.489
best_train_auroc,0.56859
best_train_f1,0.465
dev_auroc,0.48442
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: ga74m3eu with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0041528792044010126
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.023 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.247513…

0,1
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,▆▇█▅▇▇█▁▇█▅▆▇█▁▇██▆▇█▅▇▇█▁▇█▅▆▇█▁▇██▆▇█▁
train_loss_epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,178.0
test_auroc,0.4662
test_f1,0.49
train_auroc,0.54926
train_f1,0.504
train_loss_batch,1.21119
train_loss_epoch,1.20156


[34m[1mwandb[0m: Agent Starting Run: it3muqxr with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.007782434393395443
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: dc5n9ovo with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002647819622048103
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▅▆▆▇▇▇████████▇▇▇█▇▇▇▇▇▇▇▇▇▇▇█▇█▇▇█████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▆▇▇████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.90787
best_dev_f1,0.401
best_test_auroc,0.63634
best_test_f1,0.489
best_train_auroc,0.76681
best_train_f1,0.465
dev_auroc,0.89603
dev_f1,0.401
dev_loss,1.9783
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 7fc9qk54 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002366374342772442
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.142516…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▇▆▃▇▆▄▃▅▄▂█▆▄▆▆▁▃▃▃▃▂▅▄▅▅▅▃▅▁▂▃▃▃▆▇▄▅▃▃▆
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.535
best_dev_f1,0.401
best_test_auroc,0.53948
best_test_f1,0.489
best_train_auroc,0.55536
best_train_f1,0.465
dev_auroc,0.51121
dev_f1,0.401
dev_loss,0.69317
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: cyudtf0z with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.008949131427566603
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 82x8ds4t with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0013300172399505223
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.024 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.246363…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: dnxwvx2k with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0025948092686347724
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▇██████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97665
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: orhyfaqj with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00023669856339551015
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁███████████████████████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.49978
best_dev_f1,0.401
best_test_auroc,0.49995
best_test_f1,0.489
best_train_auroc,0.50153
best_train_f1,0.466
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 23m3y1xw with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0038683567835986624
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▆█▇█▇▅▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.78185
best_dev_f1,0.401
best_test_auroc,0.62286
best_test_f1,0.489
best_train_auroc,0.68844
best_train_f1,0.465
dev_auroc,0.614
dev_f1,0.401
dev_loss,1.9783
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: szewitzb with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 2.043794976845306e-05
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▆▅▇▇▆▆█▆▇█▆▇▆▅▅▇▅▅▆▆▅▆▆▅▅▆▅▃▅▅▅▅▅▃▃▂▃▆▅▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▆▄▄▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5275
best_dev_f1,0.401
best_test_auroc,0.49879
best_test_f1,0.489
best_train_auroc,0.50846
best_train_f1,0.465
dev_auroc,0.39977
dev_f1,0.401
dev_loss,0.70129
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: wviyn99p with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0017747518253961403
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: otwjlsei with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004546200210436361
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▄▃▂▁▅▅▇▇▇▇▇▇▇▇██████████▇▇████▇▇▇▇██████
dev_f1,▁▁▁▁▁▁▁▁▅▇▆▇▇█▇██▇██████████████████████
dev_loss,██████▇▆▄▄▄▄▃▃▃▃▃▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.87787
best_dev_f1,0.847
best_test_auroc,0.6068
best_test_f1,0.581
best_train_auroc,0.81413
best_train_f1,0.769
dev_auroc,0.87843
dev_f1,0.827
dev_loss,0.61913
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: ezeqnehy with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0012036368706030516
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▄▁▅▅▅▅▅▅▅▆▅▆▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇██████
dev_f1,▁▁▂▂▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▇▇█▆███████████
dev_loss,██████████▇▇▇▇▇▇▇▇▇▆▆▅▆▆▄▄▃▃▂▂▂▂▁▁▁▁▂▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.83444
best_dev_f1,0.815
best_test_auroc,0.58324
best_test_f1,0.57
best_train_auroc,0.83678
best_train_f1,0.724
dev_auroc,0.86023
dev_f1,0.815
dev_loss,0.6266
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: fc00fcxb with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006041175620016667
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.141962…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▄▂▂▂▂▁▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.48933
best_dev_f1,0.401
best_test_auroc,0.42815
best_test_f1,0.489
best_train_auroc,0.43061
best_train_f1,0.465
dev_auroc,0.45707
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: b5gstf2a with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.007196024943316304
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.142456…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▂▂▃▃▃▁▁▁▂▁▁▁▁▁▁▁▁▁▂▂▂▂▁▂▂▂▂▂▂▂▂▂▁▂▁▂▂▂▂
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▃▄▆▆▇▇▇▇▇▇▇▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.60611
best_dev_f1,0.401
best_test_auroc,0.48183
best_test_f1,0.489
best_train_auroc,0.49377
best_train_f1,0.465
dev_auroc,0.50083
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: odc6333e with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0009159935850348346
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: vic0syxe with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005658602315761238
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: gplpdsra with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005321434176843443
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
dev_auroc,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁███████████████████████████████████████
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁
train_loss_epoch,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,98.0
test_auroc,0.51062
test_f1,0.3
train_auroc,0.4858
train_f1,0.234
train_loss_batch,0.69315
train_loss_epoch,0.69315


[34m[1mwandb[0m: Agent Starting Run: dvnvtv04 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005365870083936988
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: p98yks1s with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0010155238905135231
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▅▆▆▇▆▇▆▃▆▅▇▂▆▄▇▆▇▅▅▆▆▂▅▂▅▂▃█▆▃▃▅▃▆▇▆▂▆▆
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.83508
best_dev_f1,0.401
best_test_auroc,0.60917
best_test_f1,0.489
best_train_auroc,0.72126
best_train_f1,0.465
dev_auroc,0.72776
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: d39jp0jg with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0006588572112804024
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: u0pd5jqy with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.009823188835592325
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 9yu15e0i with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0020589259264289
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: sht9afr8 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002578702871630659
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: bg073yva with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00905805209733973
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: gzttyqj5 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0002926554447249941
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.55562
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: v9idsbaj with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0022177355385461055
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▃▃▄▄▄▄▅▄▄▄▄▄▄▄█▅▄▄▄█▄▄▄▄▃▄▄▁▄▄▄▄▄▄▄▃▄▄▂▄
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.49584
best_dev_f1,0.401
best_test_auroc,0.50054
best_test_f1,0.489
best_train_auroc,0.50685
best_train_f1,0.465
dev_auroc,0.5309
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: iclfznz8 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004543887452426181
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▂▅▃▃▄▅▅▆▅▆▇▆▇▇▆▇█▇▇▇▇▇█████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▄▅▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.59626
best_dev_f1,0.401
best_test_auroc,0.56037
best_test_f1,0.489
best_train_auroc,0.48154
best_train_f1,0.465
dev_auroc,0.7872
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: tevnlzia with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 4.796770039539197e-05
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▅▆▇▇▇▇▇████████████████████████████████
dev_f1,▁█████▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▃▇█▆▄▃▃▂▂▂▁▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.86707
best_dev_f1,0.248
best_test_auroc,0.6248
best_test_f1,0.041
best_train_auroc,0.7926
best_train_f1,0.116
dev_auroc,0.85946
dev_f1,0.248
dev_loss,1.64841
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: mfwal08p with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002317733030941347
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▁▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▄▃▃▄▄▃▃▄▃▃▄▃▄▄▃▄▃▃▄▃
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.42144
best_dev_f1,0.401
best_test_auroc,0.45485
best_test_f1,0.489
best_train_auroc,0.39716
best_train_f1,0.465
dev_auroc,0.29483
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: n07n8ezv with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0006509051874240535
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▆▆█▇▇▇▇▇▇▇████▇▇█████████████████████
dev_f1,▁▁▁▇▇▇▇▇▇▇▇▇▇████▇██████████████████████
dev_loss,███▂▂▁▂▂▂▂▂▂▂▁▁▁▂▂▂▁▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.81385
best_dev_f1,0.832
best_test_auroc,0.66104
best_test_f1,0.547
best_train_auroc,0.79142
best_train_f1,0.735
dev_auroc,0.81092
dev_f1,0.828
dev_loss,1.43948
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 7uezco33 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0070862798741311035
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 7j1lc6mq with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.001553537830344844
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.332226…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▂▆▇██▇▇▇▇█▇█████▇██████▇▇██████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5799
best_dev_f1,0.401
best_test_auroc,0.49548
best_test_f1,0.489
best_train_auroc,0.5884
best_train_f1,0.465
dev_auroc,0.81801
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 0il203xk with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.001440308619166175
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▂▅▅▄▄▅▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇██████████
dev_f1,▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▄▄▅▅▅█████████████████
dev_loss,███████▇▇▇▇▇▇▇▇▆▇▇▄▄▄▃▂▂▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.87752
best_dev_f1,0.819
best_test_auroc,0.63337
best_test_f1,0.572
best_train_auroc,0.84633
best_train_f1,0.733
dev_auroc,0.88911
dev_f1,0.819
dev_loss,0.62288
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: zj1wm2dn with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0009550827133051134
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5901
best_dev_f1,0.582
best_test_auroc,0.50815
best_test_f1,0.489
best_train_auroc,0.51987
best_train_f1,0.509
dev_auroc,0.50015
dev_f1,0.402
dev_loss,0.6932
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: gv0v6sd0 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0059919582736700705
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 6u3yah34 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004295757614354212
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▂▁▁▁▁▁▁▁▁▁▁▃▆▇▆▇▇▇██████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,███████████▁██▁▁▁███████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.33123
best_dev_f1,0.401
best_test_auroc,0.41177
best_test_f1,0.489
best_train_auroc,0.47
best_train_f1,0.465
dev_auroc,0.73157
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 5ooybx1j with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004725691724582886
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
dev_auroc,▆▄▄▄▅▄█▄▄▄▅▄▄▁▅▃▃▄▁▅▄▃▃▄▄▂▄▃▂▃▃▄▂▄▅▄▄▅▅▆
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_epoch,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.53139
dev_f1,0.401
dev_loss,0.69315
epoch,118.0
test_auroc,0.54624
test_f1,0.228
train_auroc,0.51097
train_f1,0.317
train_loss_batch,0.69315
train_loss_epoch,0.69315


[34m[1mwandb[0m: Agent Starting Run: o8gb75dx with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003289740742104293
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▆▂▃▃▄▅▆▇▇█▇█▇▄▄▄▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▂▂▂▂▃▃▄▇█▆▇▆▅▄▄▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.70529
best_dev_f1,0.401
best_test_auroc,0.49178
best_test_f1,0.489
best_train_auroc,0.42498
best_train_f1,0.465
dev_auroc,0.63928
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 2q1y9p3l with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003163565658504891
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


[34m[1mwandb[0m: [32m[41mERROR[0m Problem finishing run
Traceback (most recent call last):
  File "/work/dagarwal_umass_edu/.conda/envs/s2and/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 1958, in _atexit_cleanup
    self._on_finish()
  File "/work/dagarwal_umass_edu/.conda/envs/s2and/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 2266, in _on_finish
    _ = exit_handle.wait(timeout=-1, on_progress=self._on_progress_exit)
  File "/work/dagarwal_umass_edu/.conda/envs/s2and/lib/python3.7/site-packages/wandb/sdk/lib/mailbox.py", line 261, in wait
    found = self._slot._get_and_clear(timeout=wait_timeout)
  File "/work/dagarwal_umass_edu/.conda/envs/s2and/lib/python3.7/site-packages/wandb/sdk/lib/mailbox.py", line 118, in _get_and_clear
    if self._wait(timeout=timeout):
  File "/work/dagarwal_umass_edu/.conda/envs/s2and/lib/python3.7/site-packages/wandb/sdk/lib/mailbox.py", line 114, in _wait
    return self._event.wait(timeout=timeout)
  File "/work/dagarwa

Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.023 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.247402…

0,1
dev_auroc,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,█▅▆▁▅▆▄▅▆▄▅▅▆▅▅▆▁▅▆▁▅▆▄▅▆▆▅▅▆▅▅▆▁▅▆▄▅▆▄▅
train_loss_epoch,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,196.0
test_auroc,0.50872
test_f1,0.457
train_auroc,0.50241
train_f1,0.489
train_loss_batch,1.15109
train_loss_epoch,1.20156


[34m[1mwandb[0m: Agent Starting Run: a9wi7yzi with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0010616830833146266
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.143034…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▄▇▃▅▅▁▄█▄▅▄▄▄▄▄▄▄▄▄▄▄▃▄▄▄▂▄▄▄▄▄▃▄▄▄▄▄▅▃▅
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.51049
best_dev_f1,0.401
best_test_auroc,0.48238
best_test_f1,0.489
best_train_auroc,0.49169
best_train_f1,0.465
dev_auroc,0.52754
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: ey3yityk with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.007060018412282426
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: enrjojff with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002003430186349911
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▄▂▅▃█▄▃▁▃▃▂▃▃▃▃▃▃▂▃▃▃▆▄▃▂▃▁▃▃▃▁▂▃▃▃▃▃▃▃▃
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.76783
best_dev_f1,0.401
best_test_auroc,0.53626
best_test_f1,0.489
best_train_auroc,0.55707
best_train_f1,0.465
dev_auroc,0.5003
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 8k1d66tg with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.001648743799224112
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: yr0qm0gu with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0042601396010542485
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▄██▁▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃
dev_f1,▂██▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▅████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.61913
best_dev_f1,0.587
best_test_auroc,0.5467
best_test_f1,0.443
best_train_auroc,0.5497
best_train_f1,0.445
dev_auroc,0.48241
dev_f1,0.444
dev_loss,0.87913
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 8er3u3gd with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0061970132020347304
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.143071…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▂▂▂▃▂▂▂▂▄▅█▇▇▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▆▇▇▇▆▇▇▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.54057
best_dev_f1,0.401
best_test_auroc,0.46341
best_test_f1,0.489
best_train_auroc,0.53263
best_train_f1,0.465
dev_auroc,0.68003
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: iaw0jdki with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00012200127763542522
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.143242…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▃▃▃▄▅▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇█▇▇▇▇██████████
dev_f1,▁███████████████████████████████████████
dev_loss,█▄▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.51099
best_dev_f1,0.401
best_test_auroc,0.54481
best_test_f1,0.489
best_train_auroc,0.56927
best_train_f1,0.465
dev_auroc,0.77075
dev_f1,0.401
dev_loss,0.69638
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: kbce4rgq with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.009512795850379322
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 9eek9hza with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.000802994490577428
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▃▁▃▄▆▆▇█████▇█▇▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_f1,▁███████████████████████████████████████
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.60386
best_dev_f1,0.401
best_test_auroc,0.55607
best_test_f1,0.489
best_train_auroc,0.55439
best_train_f1,0.465
dev_auroc,0.83153
dev_f1,0.401
dev_loss,0.69316
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 8yb0e4gt with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002381371780675683
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,▆▇██▁▇██▁▇▇█▁▇▇█▅▇▇█▅▆▇█▅▆▇█▅▆▇██▁▇██▁▇▇
train_loss_epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,154.0
test_auroc,0.45005
test_f1,0.489
train_auroc,0.59353
train_f1,0.567
train_loss_batch,1.15109
train_loss_epoch,1.20156


[34m[1mwandb[0m: Agent Starting Run: ekonf99g with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.001451672937870275
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
dev_auroc,▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▇███████████████▆▆▆
dev_f1,▇▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▇██████████████████
dev_loss,▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁███
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,███▇████████████▇███▃▃▂▃▃▂▂▃▃▁▃▃▃▃▃▂▂▂▁▂
train_loss_epoch,▆███████████████████▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁

0,1
dev_auroc,0.54131
dev_f1,0.435
dev_loss,1.59971
epoch,98.0
test_auroc,0.51279
test_f1,0.478
train_auroc,0.51636
train_f1,0.514
train_loss_batch,1.30669
train_loss_epoch,1.29277


[34m[1mwandb[0m: Agent Starting Run: 3boaieqe with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0056329722718767
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▂▁▂▂▂▃▅▆▇████▇▇████▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.61494
best_dev_f1,0.401
best_test_auroc,0.53667
best_test_f1,0.489
best_train_auroc,0.37701
best_train_f1,0.465
dev_auroc,0.83677
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 9w38t6va with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0011086204719431
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▆▃█▆▅▃▄▃▆▂▁▆▇▅▅▆▆▆▆▆▆▆▆▆▆▅▅▅▅▆▆▅▅▆▆▆▆▆▆▆
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.4901
best_dev_f1,0.452
best_test_auroc,0.56145
best_test_f1,0.469
best_train_auroc,0.45028
best_train_f1,0.477
dev_auroc,0.48819
dev_f1,0.401
dev_loss,0.69317
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: t5701y5a with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0023510504601369476
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▃▆█▇▇█▇▇▇▇▆▇▇█▇██▇████████████████████
dev_f1,▂▁▁▂▂▂▂▂▇▆▆▆▇▆▇▆▇▇▇▇▇▇█▇▇███▇███████████
dev_loss,█▆▆▅▅▆▄▄▄▂▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.86944
best_dev_f1,0.572
best_test_auroc,0.60217
best_test_f1,0.558
best_train_auroc,0.81065
best_train_f1,0.704
dev_auroc,0.89883
dev_f1,0.57
dev_loss,0.67239
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 0onw6bms with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0059257844895505745
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: kq1k1m3p with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002037050573797523
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▆▇▇████████▇██▇▇▇▇▇▇▇▇▇▇▇▇▇▇▆▆▆▆▆▆▆▆▆▆
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.45299
best_dev_f1,0.401
best_test_auroc,0.45826
best_test_f1,0.489
best_train_auroc,0.59078
best_train_f1,0.465
dev_auroc,0.77377
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 9f4emo4i with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00840070655189344
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▅▆▅▅▅▆▆▆▇▇▇▇▇▇▇▇█▇█████████████████████
dev_f1,▁▅▇▇▆▆▆▇▇▇▇▇█▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_loss,▇█▄▅▄▃▂▃▄▃▂▃▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.9017
best_dev_f1,0.822
best_test_auroc,0.77992
best_test_f1,0.64
best_train_auroc,0.90536
best_train_f1,0.809
dev_auroc,0.9017
dev_f1,0.822
dev_loss,1.35692
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: x1rp9wmo with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0004112286176683581
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▃▅▃▃▄▃▄▄▄▄▃▄█▄▄▄▃▄▄█▄▃▅▆▅▅▄▄▄▄▄▃▆▄▁▄▅▄▆▄
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.47685
best_dev_f1,0.401
best_test_auroc,0.53385
best_test_f1,0.489
best_train_auroc,0.51138
best_train_f1,0.465
dev_auroc,0.55293
dev_f1,0.401
dev_loss,0.69319
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: fu5g1x3h with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0012293087911809468
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▇▃▄▅▆▆▆▅▇▆▄▅▁▆▄▆▇▄▃▅█▄▆▇▄▄▄▆▄▇▄▇▄▄▅▄▇▄▆▆
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.70611
best_dev_f1,0.401
best_test_auroc,0.57129
best_test_f1,0.489
best_train_auroc,0.61146
best_train_f1,0.465
dev_auroc,0.56713
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: m9uanljx with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0004425281828142793
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▃▁▄▅▆▆▇▇▇▆▆▇▇▇▇█▇▇█▇█▇▇█▇███████████████
dev_f1,▁███████████████████████████████████████
dev_loss,█▄▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.54879
best_dev_f1,0.401
best_test_auroc,0.55809
best_test_f1,0.489
best_train_auroc,0.65499
best_train_f1,0.465
dev_auroc,0.88068
dev_f1,0.401
dev_loss,0.69318
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 8x528r1x with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.000628349441507618
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▅█▇▇█▇▇▇▇██████████████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▅▆▇▇▇██████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.76317
best_dev_f1,0.401
best_test_auroc,0.63013
best_test_f1,0.489
best_train_auroc,0.73599
best_train_f1,0.465
dev_auroc,0.89513
dev_f1,0.401
dev_loss,1.97415
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: nyi56dul with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002540780115205237
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: h50kt27c with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0001971703155718563
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▂▃▄▄▆▆▇▆▅▆▆▆▇▇▇█▇▇█▇▇▇██▇▇█▇▇▇█▇█▇▇▇██
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.54093
best_dev_f1,0.401
best_test_auroc,0.54985
best_test_f1,0.489
best_train_auroc,0.58845
best_train_f1,0.465
dev_auroc,0.66192
dev_f1,0.401
dev_loss,0.69316
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: rgg7g9oi with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.001248398151104768
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.142903…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▄▆▃▅▃▆▄▅▅▄▁▅▅▆▇▆▇▄▂▃▅▅▄▄▁▄▅▆▅▂▁▃▅█▅▄▅▅▅█
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.48012
best_dev_f1,0.401
best_test_auroc,0.50617
best_test_f1,0.489
best_train_auroc,0.44589
best_train_f1,0.465
dev_auroc,0.56424
dev_f1,0.401
dev_loss,0.69322
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: bdemcv9z with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.001942932811041897
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.55877
best_dev_f1,0.558
best_test_auroc,0.5482
best_test_f1,0.431
best_train_auroc,0.51286
best_train_f1,0.486
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: z3keeztk with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004895462402257399
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁█▇▇▇▇▇▇▆▆▇▇▇▇▇█▆█▇▇▇█▇▇▇▇█▇█▇▇█▇▇▇▇▆███
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.42683
best_dev_f1,0.401
best_test_auroc,0.42409
best_test_f1,0.489
best_train_auroc,0.49586
best_train_f1,0.465
dev_auroc,0.59131
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: ueob36dl with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0045550103472942905
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▁▄▅▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.57049
best_dev_f1,0.401
best_test_auroc,0.45592
best_test_f1,0.492
best_train_auroc,0.50324
best_train_f1,0.466
dev_auroc,0.5385
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: vb0qc8cj with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0004080975841469359
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▂▁▃▄▄▄▅▅▅▆▆▇▇▇▇▇▇▇▇█████████████████████
dev_f1,▅▄▁▁▁▁▁▂▄▅▇▆▆▆▇▇▇▇██████████████████████
dev_loss,█▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.77056
best_dev_f1,0.434
best_test_auroc,0.63615
best_test_f1,0.491
best_train_auroc,0.70228
best_train_f1,0.622
dev_auroc,0.78205
dev_f1,0.434
dev_loss,0.69067
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 7iok0o51 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 9.666843540967254e-05
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▄▃▄▄▄▃▆▇▂▃▅▁▅▂▇▆▅▇▅▇▆▅▆▇▄▅▅▆▅▄█▇▄▅▅▅█▆█▇
dev_f1,▁███████████████████████████████████████
dev_loss,█▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.49395
best_dev_f1,0.401
best_test_auroc,0.54744
best_test_f1,0.489
best_train_auroc,0.45219
best_train_f1,0.465
dev_auroc,0.54887
dev_f1,0.401
dev_loss,0.69447
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: xzxfjojf with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0031613668851456626
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.143266…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▄▇▇▇▇▇▇▇▆█▇▇▇▇█▇███████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▇▇▆▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.61833
best_dev_f1,0.401
best_test_auroc,0.53339
best_test_f1,0.489
best_train_auroc,0.53773
best_train_f1,0.465
dev_auroc,0.80338
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 4o3pzc3z with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0001071825987865916
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▄▄▄▅▅▅▅▅▅▆▆▆▇█████████████████████████
dev_f1,▁███████████████████████████████████████
dev_loss,█▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.61925
best_dev_f1,0.401
best_test_auroc,0.60662
best_test_f1,0.489
best_train_auroc,0.67147
best_train_f1,0.465
dev_auroc,0.88485
dev_f1,0.401
dev_loss,0.69372
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 1zkaz3vk with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.007273054591773843
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▆█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▂▂██████████████████████████████████
dev_loss,██████▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.55014
best_dev_f1,0.55
best_test_auroc,0.49336
best_test_f1,0.49
best_train_auroc,0.50803
best_train_f1,0.497
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: vezntaat with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0010413142385134356
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▅▇████▇▇▇▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆
dev_f1,▁▁▇▅▇▇▆▆▇▇▇█████████████████████████████
dev_loss,█▇▃▃▄▄▃▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.81353
best_dev_f1,0.815
best_test_auroc,0.74106
best_test_f1,0.555
best_train_auroc,0.87868
best_train_f1,0.782
dev_auroc,0.81587
dev_f1,0.815
dev_loss,1.36119
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 82q49d38 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0026694408920212125
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▂▁▄▄▄▄▄▃▄▆▄▅▆▇▇▇████████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▇██████████████████████████████
dev_loss,█▆▆▆▆▆▆▅▃▂▄▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.78847
best_dev_f1,0.541
best_test_auroc,0.74158
best_test_f1,0.618
best_train_auroc,0.78043
best_train_f1,0.681
dev_auroc,0.88784
dev_f1,0.541
dev_loss,0.67514
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: n3v5fkqd with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0005651937201697335
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▇▁▄▃▃▄▄▃▅▆▆▃▇▆█▄▇▄▇█▇▆▆█▅▇▇▆▅▃▇▆▆▅▅▆▇▇▄▅
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5724
best_dev_f1,0.401
best_test_auroc,0.56433
best_test_f1,0.489
best_train_auroc,0.48957
best_train_f1,0.465
dev_auroc,0.53029
dev_f1,0.401
dev_loss,0.69329
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: s70jmnim with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00769534812801552
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▄▂▃▂▄▄▄▄▅▆▇▇▇▇▇▇███████████████████████
dev_f1,▁▅▄▅▇█▅▇▃████▇▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆
dev_loss,▄█▂▂▂▂▁▁▁▂▂▂▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.87938
best_dev_f1,0.828
best_test_auroc,0.76877
best_test_f1,0.602
best_train_auroc,0.89
best_train_f1,0.79
dev_auroc,0.88826
dev_f1,0.805
dev_loss,1.41159
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: xifuhjqm with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0035233325746640218
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▄▆▇▇███████████████████████████████████
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.53816
best_dev_f1,0.403
best_test_auroc,0.48462
best_test_f1,0.499
best_train_auroc,0.46851
best_train_f1,0.465
dev_auroc,0.63692
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 5ohiqf6d with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.009304877550259652
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 5w52hvkn with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.001278213384766136
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▃▂▃▃▂▃▆▂▃▃▂▃▁▂▁▃▂▃▂▂▃▄▄▄▅▃▄▃▃▄▅▃▄▃▃█▄▃▃▃
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.4997
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.49992
best_train_f1,0.465
dev_auroc,0.51398
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: gi62b41k with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0019455484109268456
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


VBox(children=(Label(value='0.006 MB of 0.017 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.335253…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▃▁▁▃█▄▆▄▃▅▄▄▆▇▇▇████████████████████████
dev_f1,▁▂▇▇▇▇▇▇▇▇██████████████████████████████
dev_loss,█▂▁▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.51917
best_dev_f1,0.433
best_test_auroc,0.56188
best_test_f1,0.534
best_train_auroc,0.72535
best_train_f1,0.659
dev_auroc,0.53779
dev_f1,0.433
dev_loss,0.69056
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: wp4mmcde with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0015626236859821031
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁██▇▇▇████████████████████████▇████▇▇▇▇▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▇██████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.87605
best_dev_f1,0.401
best_test_auroc,0.62549
best_test_f1,0.489
best_train_auroc,0.75054
best_train_f1,0.465
dev_auroc,0.84636
dev_f1,0.401
dev_loss,1.9783
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: gepsgcn1 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00184053740348088
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▄▃▄▄▄▅▄▃▄▄▅█▄▄▅▅▅▅▅▅▄▃▅▅▃▅▅▅▆▅▃▁▅▅▄▅▄▄▅▅
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5007
best_dev_f1,0.401
best_test_auroc,0.55444
best_test_f1,0.489
best_train_auroc,0.48626
best_train_f1,0.465
dev_auroc,0.50885
dev_f1,0.401
dev_loss,0.69371
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 9ki57n4d with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0035036056245365764
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▆████▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▆▇▆▇▇▆▆▇▇▆▇▆▇▆▇▇▇▇▆
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▃▁▆▅▇▇██████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.82422
best_dev_f1,0.401
best_test_auroc,0.64686
best_test_f1,0.489
best_train_auroc,0.81616
best_train_f1,0.465
dev_auroc,0.77055
dev_f1,0.401
dev_loss,1.9783
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 8qhfpggv with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00445984068740157
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.73323
best_dev_f1,0.401
best_test_auroc,0.51944
best_test_f1,0.489
best_train_auroc,0.41708
best_train_f1,0.465
dev_auroc,0.54507
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: d1uz0c00 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0032900780727768953
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.143249…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▃▃▄▇▇▅▇█▇▇█▆▃▅▇▇███████████████████████
dev_f1,▂▁▁▂▂▂▂▂▂▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▇▇▇▇█▇▇█████████
dev_loss,███▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▆▆▆▅▅▅▃▂▂▁▂▁▁▁▂▁▂▂▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.90903
best_dev_f1,0.562
best_test_auroc,0.72954
best_test_f1,0.62
best_train_auroc,0.84544
best_train_f1,0.706
dev_auroc,0.91114
dev_f1,0.562
dev_loss,0.67629
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: y5njo62c with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0026092652032060123
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▄█▄▁▄▆▅▄▆▅▅▅▅▇▅▅▅▄▆▅▆▆▅▅▄▅▄▅▅▅▅▅▅▅▅▅▅▅▄▅
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.48794
best_dev_f1,0.401
best_test_auroc,0.53774
best_test_f1,0.489
best_train_auroc,0.52687
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: vwjkdu36 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004295407618941925
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▅▁▅▄▅▅▅▅▆▆▇▇▇▇▇▇████████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.42612
best_dev_f1,0.401
best_test_auroc,0.46111
best_test_f1,0.489
best_train_auroc,0.52067
best_train_f1,0.465
dev_auroc,0.4319
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: tiyrszce with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0025514923512986163
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▂▂▁▁▅▄▅▅▅▅▅▅▅▅▅▅▅▆▆▇▆▇▇▇█▇██████████████
dev_f1,▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▆▂▂█▂██████████████
dev_loss,███████████████████▆█▅▂█▃█▅▂▂▂▁▁▂▂▁▁▁▁▂▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.88975
best_dev_f1,0.822
best_test_auroc,0.60086
best_test_f1,0.587
best_train_auroc,0.84378
best_train_f1,0.764
dev_auroc,0.89078
dev_f1,0.822
dev_loss,0.61911
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: rjbn5wwu with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006542552412723046
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▇▆▇▇█▆▅▅█▆▆▆▆▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▆▇▇████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.84845
best_dev_f1,0.401
best_test_auroc,0.62586
best_test_f1,0.489
best_train_auroc,0.71499
best_train_f1,0.465
dev_auroc,0.76589
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: fxkzvfkz with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002420519821341715
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▂▄▇▇▆▆▇▇▆▇▇▇▇▇▇▇▇▇▇▇▇██████████████████
dev_f1,▂▁▁▁▁▁▁▄▄▄▄▆▆▆▆█▆██▆████████████████████
dev_loss,███▇▇▇▆▆▄▇▄▄▃▃▂▂▂▃▂▃▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.88252
best_dev_f1,0.82
best_test_auroc,0.60519
best_test_f1,0.569
best_train_auroc,0.83815
best_train_f1,0.697
dev_auroc,0.9051
dev_f1,0.82
dev_loss,0.62162
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: wrcmgnnw with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0029950424985930916
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 9u1smdqi with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006858905970913309
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.50827
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.50296
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 6somvlsf with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003024622448807533
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▅▇█▅▇▅▆▅▅▅▆▇▇▆▇▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▅▅▃▂▂▂▂▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.57414
best_dev_f1,0.401
best_test_auroc,0.51326
best_test_f1,0.489
best_train_auroc,0.59508
best_train_f1,0.465
dev_auroc,0.78977
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: gvzwzxf0 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002639586225433583
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▃▆▆▇▇▇▇████████████████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.84142
best_dev_f1,0.401
best_test_auroc,0.54595
best_test_f1,0.489
best_train_auroc,0.5441
best_train_f1,0.465
dev_auroc,0.82446
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: dip7q1pm with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0043656619673402295
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▇▁▁▅▄▅▇▇████████████████████████████████
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.61742
best_dev_f1,0.406
best_test_auroc,0.53506
best_test_f1,0.481
best_train_auroc,0.49614
best_train_f1,0.497
dev_auroc,0.64015
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: o2e6jq06 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0048816490539135724
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.332208…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: sxyos8vs with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0008430650376948904
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▄▅▄▃▄▄▂▄▅▅▅▃▅▅▄▅▅▅█▃▅▄▇▄▄▄▃▄▄▄▃▅▄▆▁▄▄▅▄▅
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.50194
best_dev_f1,0.401
best_test_auroc,0.50423
best_test_f1,0.489
best_train_auroc,0.57058
best_train_f1,0.465
dev_auroc,0.51572
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: sti58rmm with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.007705008912534066
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▄▆▆▆▇▇▇▇▇█▇▇███████████████████████████
dev_f1,▃▅█▁▇█████▇█▇███████████████████████████
dev_loss,▆▃▂▃▃█▅▂▂▂▃▁▂▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.88193
best_dev_f1,0.855
best_test_auroc,0.71458
best_test_f1,0.522
best_train_auroc,0.88115
best_train_f1,0.789
dev_auroc,0.91399
dev_f1,0.841
dev_loss,1.40869
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: besa6uiy with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0010838884299278824
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▂▃▃▂▂▃█▅▅▃▂▂▆▄█▃▁▆▂▄▄▃▄▇▇▁▃▇▅▁▇▆▂▅██▄▅▅▄
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.49433
best_dev_f1,0.401
best_test_auroc,0.55136
best_test_f1,0.489
best_train_auroc,0.60606
best_train_f1,0.465
dev_auroc,0.55596
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: esvwo56a with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003255329354002929
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.248
best_test_auroc,0.5
best_test_f1,0.041
best_train_auroc,0.5
best_train_f1,0.116
dev_auroc,0.5
dev_f1,0.248
dev_loss,0.98406
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 43p98nsl with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006788316621069981
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▁▃▂▂▂▃▂▂▂▂▂▂▃▂▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.3463
best_dev_f1,0.401
best_test_auroc,0.38046
best_test_f1,0.489
best_train_auroc,0.4037
best_train_f1,0.465
dev_auroc,0.26444
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 3fmg0s3m with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.001900490211711541
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▃▁▁▁▃▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
dev_f1,█▁▁▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃
dev_loss,▁▁█▅▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.54219
best_dev_f1,0.541
best_test_auroc,0.51048
best_test_f1,0.475
best_train_auroc,0.49691
best_train_f1,0.494
dev_auroc,0.49782
dev_f1,0.479
dev_loss,0.74144
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ay7ypwsj with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.007665376150201705
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.55304
best_dev_f1,0.547
best_test_auroc,0.51465
best_test_f1,0.346
best_train_auroc,0.4491
best_train_f1,0.449
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: qg89jnh0 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0028046333784250625
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▂▂▂▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.40117
best_dev_f1,0.401
best_test_auroc,0.5219
best_test_f1,0.489
best_train_auroc,0.50582
best_train_f1,0.465
dev_auroc,0.27665
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 69hqxhr1 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0017466232177303649
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▆█▇▇██▇▇▇▆▆▆▆▆▆▆▆▆▆▆▆▆▅▆▆▆▅▅▅▅▅▆▆▆▅▅▅▆▅
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▇▇▇████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.75942
best_dev_f1,0.401
best_test_auroc,0.63279
best_test_f1,0.489
best_train_auroc,0.70626
best_train_f1,0.465
dev_auroc,0.82588
dev_f1,0.401
dev_loss,1.97796
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 4bi5xewj with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003186114584171908
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.332317…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▃▁▄▃▄▃▃▃▃▃▃▂▃▄▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▁▃▃█▃▃▃▃
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.8013
best_dev_f1,0.401
best_test_auroc,0.61344
best_test_f1,0.489
best_train_auroc,0.64759
best_train_f1,0.465
dev_auroc,0.4982
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: uazx6rbz with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005486609231370412
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: True


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▄▆▇▇▇▇█████████████████████████████████
dev_f1,▁▆▇▅▇▇▇██▇██████████████████████████████
dev_loss,▆▄▁█▂▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.88543
best_dev_f1,0.825
best_test_auroc,0.68024
best_test_f1,0.544
best_train_auroc,0.86564
best_train_f1,0.771
dev_auroc,0.8867
dev_f1,0.819
dev_loss,1.40957
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: n34j1rxn with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0011069986520300583
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▃▁▁▂▃▄▅▇▆▇▇▇▇▇████▇█████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.58266
best_dev_f1,0.401
best_test_auroc,0.58436
best_test_f1,0.489
best_train_auroc,0.65207
best_train_f1,0.465
dev_auroc,0.76045
dev_f1,0.401
dev_loss,0.69316
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: a121osc0 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.005984230204578513
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.248
best_test_auroc,0.5
best_test_f1,0.041
best_train_auroc,0.5
best_train_f1,0.116
dev_auroc,0.5
dev_f1,0.248
dev_loss,0.98406
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: voo298mw with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0008805162701322133
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▃▄▅▆▇▇▇▇▇█████████████████████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.75242
best_dev_f1,0.401
best_test_auroc,0.57656
best_test_f1,0.489
best_train_auroc,0.7348
best_train_f1,0.465
dev_auroc,0.82353
dev_f1,0.401
dev_loss,1.9783
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 2q71f69w with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00811787652494976
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: poe2hi5h with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00037373609048402137
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.142766…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▄▂▁▂▂▃▄▅▄▅▆▆▇▆▅▇▆▇██▆▇▆▇█▆▇▇▇█▇█▇▇▇▇▇▇▇█
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.51906
best_dev_f1,0.401
best_test_auroc,0.55415
best_test_f1,0.489
best_train_auroc,0.44006
best_train_f1,0.465
dev_auroc,0.62004
dev_f1,0.401
dev_loss,0.69316
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: qnwl8ryi with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003363800725821345
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
dev_auroc,▂█▃▅▅▅▅▅▅▅▅▆▅▄▅▆▅▅▅▅▅▅▅▅▅▄▇▅▅▅▁▃▅▅▅▆▅▄▅▃
dev_f1,▁███████████████████████████████████████
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_epoch,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.48396
dev_f1,0.401
dev_loss,0.69315
epoch,126.0
test_auroc,0.46305
test_f1,0.155
train_auroc,0.46467
train_f1,0.222
train_loss_batch,0.69315
train_loss_epoch,0.69315


[34m[1mwandb[0m: Agent Starting Run: 41wxr5eh with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003603792857520716
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▆▇▇▇▇▇▇▇▇▇▇████████████████████████████
dev_f1,▁▃▇▇██████▇▇████████████████████████████
dev_loss,█▃▄▃▂▄▂▃▁▃▂▁▂▁▂▂▂▂▂▂▂▂▂▂▂▁▁▂▂▂▂▂▁▂▂▂▂▂▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.88194
best_dev_f1,0.847
best_test_auroc,0.71908
best_test_f1,0.552
best_train_auroc,0.87986
best_train_f1,0.78
dev_auroc,0.9146
dev_f1,0.846
dev_loss,1.41247
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: fcue83y3 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00982894267344107
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.248
best_test_auroc,0.5
best_test_f1,0.041
best_train_auroc,0.5
best_train_f1,0.116
dev_auroc,0.5
dev_f1,0.248
dev_loss,0.98406
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: hzwavsx1 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0056332311535171485
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▆█▆▆▇▅▆▃▃▄▂▃▅▅▄▆▅▄▃▄▅▅▄▆▆▂▄▅▅▆▅▅▄▄▆▅▃▆▆
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.51874
best_dev_f1,0.401
best_test_auroc,0.47693
best_test_f1,0.489
best_train_auroc,0.70295
best_train_f1,0.465
dev_auroc,0.7786
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: flmzq8al with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0016595026111097554
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▂▁▂▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,█▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.46095
best_dev_f1,0.405
best_test_auroc,0.4891
best_test_f1,0.529
best_train_auroc,0.54408
best_train_f1,0.479
dev_auroc,0.40892
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: bwjhsn2s with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0013564979607683198
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.041 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.142576…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁██▇▇█▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█▇▇▇▇▇▇▇▆▇▇▇▇▇█
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.41763
best_dev_f1,0.401
best_test_auroc,0.4564
best_test_f1,0.489
best_train_auroc,0.55216
best_train_f1,0.465
dev_auroc,0.60506
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: f6608098 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0013599038554932992
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.56643
best_dev_f1,0.552
best_test_auroc,0.54777
best_test_f1,0.524
best_train_auroc,0.5057
best_train_f1,0.505
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: n3ugh1uq with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.007239529919382938
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▆▅▇▇▆▅▅▅▄▄▅▅▅▅▅▆▇▆▇▇▇▇▇▇▇▇▇▇▇█▇▇▇██████
dev_f1,▁▁▂▁▂▂▂▂▂▂▂▂▂▂▂▂█▆▆█████████████████████
dev_loss,█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▄▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.88257
best_dev_f1,0.823
best_test_auroc,0.57438
best_test_f1,0.583
best_train_auroc,0.74586
best_train_f1,0.742
dev_auroc,0.8783
dev_f1,0.819
dev_loss,0.62224
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: q20yiaki with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.008913577149368176
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▃█▂▂▂▂▄▅▃▁▂▂▂▂▂▂▄▄▃▃▃▄▄▄▄▃▄▃▄▄▄▄▃▄▄▄▄▄▄▄
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.28946
best_dev_f1,0.401
best_test_auroc,0.51203
best_test_f1,0.489
best_train_auroc,0.59132
best_train_f1,0.465
dev_auroc,0.29365
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: wnvaaxg8 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.00030915675121837916
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▃▁▂▂▁▂▄▃▂▃▃▃▄▄▃▄▆▇▅▅▇▆▆▇▇▇▇█▇█▇▆▆█▆▇▇▆▇
dev_f1,▁███████████████████████████████████████
dev_loss,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.43598
best_dev_f1,0.401
best_test_auroc,0.53506
best_test_f1,0.489
best_train_auroc,0.47109
best_train_f1,0.465
dev_auroc,0.49709
dev_f1,0.401
dev_loss,0.6933
epoch,200.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 2l28clzi with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003116561762712287
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.024 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.246202…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.55745
best_dev_f1,0.558
best_test_auroc,0.53994
best_test_f1,0.53
best_train_auroc,0.57944
best_train_f1,0.564
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 1k6cwo36 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0028061255286392305
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
dev_auroc,▄▂▄▄▄▄▄▄▄▄▄▄▄▄▃▃▄▄▄▄▄▄▄▄▄▄▄▅▄▄▄▄▄█▄▄▄▄▁▄
dev_f1,▁███████████████████████████████████████
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,█████████▇▅▄▄▄▃▄▃▃▂▂▃▂▂▂▁▂▃▂▁▂▂▃▁▁▁▂▂▁▁▁
train_loss_epoch,█▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,122.0
test_auroc,0.50197
test_f1,0.047
train_auroc,0.49982
train_f1,0.123
train_loss_batch,0.68115
train_loss_epoch,0.68


[34m[1mwandb[0m: Agent Starting Run: 8j2erszs with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0016472489030844078
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▃▁▄▅▅▅▄▆▇▆▅▄▅▃▅▅█▅▅▅▇▆▅▅▆▄▅▅▅▅▅▆▆▅▅▅▄▅▆▅
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.43169
best_dev_f1,0.401
best_test_auroc,0.5375
best_test_f1,0.489
best_train_auroc,0.49411
best_train_f1,0.465
dev_auroc,0.53025
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: v7l5pljz with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.006060635290245184
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.325810…

0,1
dev_auroc,█▁███▇███▇█▇▇▇▇▇▇█▇█▇█▇▇▇▇▇█▇█▇▇█▇▇▇▇█▇▇
dev_f1,▁███████████████████████████████████████
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_auroc,▁
test_f1,▁
train_auroc,▁
train_f1,▁
train_loss_batch,█▇▇▇▇▇▇▅▃█▇▇▆▅▅▅▄▄█▄▃▄▄▃▂▄▄▂▅▄▃▃▄▃▁▇▄▂▃▄
train_loss_epoch,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
dev_auroc,0.49581
dev_f1,0.401
dev_loss,0.69315
epoch,94.0
test_auroc,0.5
test_f1,0.041
train_auroc,0.50001
train_f1,0.116
train_loss_batch,0.69295
train_loss_epoch,0.69301


[34m[1mwandb[0m: Agent Starting Run: hj7eyvrv with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004560788235047678
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▃▄▃▂▁▂▁▂▂▃▄▇▇▆▆▇██▆▇▇▇▇▇███▇████████████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▄▃▂▂▂▂▁▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.76115
best_dev_f1,0.401
best_test_auroc,0.56469
best_test_f1,0.489
best_train_auroc,0.60283
best_train_f1,0.465
dev_auroc,0.75756
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: qe6hurwy with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.001363840399343695
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 2048
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁███████████████████████████████████████
dev_f1,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁███████████████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.50617
best_dev_f1,0.428
best_test_auroc,0.50218
best_test_f1,0.494
best_train_auroc,0.5855
best_train_f1,0.604
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.97831
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: bcqrosbs with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.002487920710055503
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.332244…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁███▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█▇▇▇▇▇▇████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.50183
best_dev_f1,0.401
best_test_auroc,0.52874
best_test_f1,0.489
best_train_auroc,0.6239
best_train_f1,0.465
dev_auroc,0.70868
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: wxj8o985 with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 6.282257743912754e-05
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▂▄▅▆▆▇▇▇▇▇█████████████████████████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,1.83004
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 29qw7oqx with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: auroc
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.008902968466226619
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.2
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▄▃▁▅▄▅▅▄▆▆▄▄▄▄▆▅▇▆▇▆▆██▆█▆██▆▆▆███▇████
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,█▅▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.79918
best_dev_f1,0.401
best_test_auroc,0.48375
best_test_f1,0.489
best_train_auroc,0.55792
best_train_f1,0.465
dev_auroc,0.7857
dev_f1,0.401
dev_loss,0.69325
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: a0lbhlsk with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0034202157646548124
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 10
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 256
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: 	weighted_loss: True


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▅▅▄▄▅▅▅▅▆▆▆▆▆▆▆████████████████████████
dev_f1,▁▇▇▇▇█▅▇████▇███████████████████████████
dev_loss,▇█▅▃▂▂▂▂▂▂▁▁▂▂▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.87615
best_dev_f1,0.841
best_test_auroc,0.74557
best_test_f1,0.61
best_train_auroc,0.89002
best_train_f1,0.781
dev_auroc,0.92286
dev_f1,0.823
dev_loss,1.37956
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: 7z2cu4pj with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.0012393125676047312
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: True
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 1024
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.001
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▄▅▅▆▆▆▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇██████████
dev_f1,▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▆▆▆▇▇████████████████
dev_loss,█████▇▇▇▇▇▇▇▇▇▇▇▆▅▅▄▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.89911
best_dev_f1,0.82
best_test_auroc,0.60359
best_test_f1,0.581
best_train_auroc,0.84788
best_train_f1,0.733
dev_auroc,0.90102
dev_f1,0.818
dev_loss,0.62538
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: tlp5255r with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.003244107010490045
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 20
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: False
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.4
[34m[1mwandb[0m: 	vanilla_hidden_dim: 512
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
best_dev_auroc,▁
best_dev_f1,▁
best_test_auroc,▁
best_test_f1,▁
best_train_auroc,▁
best_train_f1,▁
dev_auroc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_f1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
dev_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
best_dev_auroc,0.5
best_dev_f1,0.401
best_test_auroc,0.5
best_test_f1,0.489
best_train_auroc,0.5
best_train_f1,0.465
dev_auroc,0.5
dev_f1,0.401
dev_loss,0.69315
epoch,200.0


[34m[1mwandb[0m: Agent Starting Run: ux2nrn3t with config:
[34m[1mwandb[0m: 	convert_nan: False
[34m[1mwandb[0m: 	dev_opt_metric: f1
[34m[1mwandb[0m: 	hb_model: False
[34m[1mwandb[0m: 	lr: 0.004497638378730614
[34m[1mwandb[0m: 	nan_value: -1
[34m[1mwandb[0m: 	neumiss_depth: 5
[34m[1mwandb[0m: 	neumiss_deq: False
[34m[1mwandb[0m: 	reinit_model: True
[34m[1mwandb[0m: 	vanilla_batchnorm: False
[34m[1mwandb[0m: 	vanilla_dropout: 0.1
[34m[1mwandb[0m: 	vanilla_hidden_dim: 128
[34m[1mwandb[0m: 	vanilla_n_hidden_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weighted_loss: False


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


## Sweeps TODO:
- vanilla_neumiss_deq
- vanilla_constant_0
- vanilla_constant_-1
- hb_neumiss
- hb_neumiss_deq
- hb_constant_0
- hb_constant_-1

In [None]:
hm_model.to(device)
with torch.no_grad():
    hm_model.eval()
    print("----------------")
    print("Train AUROC, F1:", evaluate(hm_model, X_train_tensor, y_train_tensor, 
                                       batch_size=batch_size, overfit_one_batch=overfit_one_batch))
    print("Dev AUROC, F1:", evaluate(hm_model, X_val_tensor.to(device), y_val_tensor))
    print("Test AUROC, F1:", evaluate(hm_model, X_test_tensor.to(device), y_test_tensor))
    print("----------------")

In [None]:
"""
Hyperparams tried:

- wd=0, lr=1e-3

- wd=1e-6, lr=1e-4
"""

In [None]:
# hummingbird model with no imputation
# All nan's

# hummingbird model (w/o FINE_TUNING) with no imputation
# Test AUROC,F1: (0.5001742767514814, 0.123)

In [None]:
# -------
# WITH FINE_TUNING=TRUE:

# hummingbird model trained with -1-imputation
# Test AUROC,F1: (0.8209078054716143, 0.719)

# hummingbird model with -1-imputation (no training)
# Test AUROC,F1: (0.7352596440016811, 0.522)

# -------
# WITH FINE_TUNING=FALSE:

# hummingbird model (w/o FINE_TUNING) with -1-imputation
# Test AUROC, F1: (0.9277387451790313, 0.788)

# -------
# WITH TEMPERATURE=1e-8:

# hummingbird model with -1-imputation (no training)
# Test AUROC, F1: (0.9002576493481701 0.766)

# hummingbird model trained with -1-imputation
# Test AUROC, F1: (0.9735705699070798, 0.892)

In [None]:
# hummingbird model trained with 0-imputation
# Test AUROC,F1: (0.8258480238334197, 0.718)

# hummingbird model with 0-imputation (no training)
# Test AUROC,F1: (0.7909741574142352, 0.531)

# hummingbird model (w/o FINE_TUNING) with 0-imputation
# Test AUROC, F1: (0.9277387451790313, 0.788)

# -------
# WITH TEMPERATURE=1e-8:

# hummingbird model with 0-imputation (no training)
# Test AUROC, F1: (0.9468537228791971, 0.86)

# hummingbird model trained with 0-imputation
# Test AUROC, F1: (0.9766426615975862, 0.884)

In [None]:
# hummingbird model trained with -10000-imputation
# Test AUROC,F1: (0.8402508150638835, 0.684)

# hummingbird model with -10000-imputation (no training)
# Test AUROC,F1: (0.7339247733297103, 0.576)

# hummingbird model (w/o FINE_TUNING) with -10000-imputation
# Test AUROC, F1: (0.9277387451790313, 0.788)

# -------
# WITH TEMPERATURE=1e-8:

# hummingbird model with -10000-imputation (no training)
# Test AUROC, F1: (0.9002576493481701, 0.766)

# hummingbird model trained with -10000-imputation
# Test AUROC, F1: (0.9703568548980691, 0.891)

In [None]:
# lightgm model with no imputation (i.e. with missing values)
# Test AUROC, F1: (0.9906858063325308, 0.934)

In [None]:
# lightgm model with -1-imputation
# Test AUROC, F1: (0.9277387451790313, 0.788)

In [None]:
# lightgm model with 0-imputation
# Test AUROC, F1: (0.9277387451790313, 0.788)

In [None]:
# lightgm model with -10000-imputation
# Test AUROC, F1: (0.9277387451790313, 0.788)

In [None]:
# lightgm model with +10000-imputation
# Test AUROC, F1: (0.8665702326967875, 0.685)

In [195]:
prod_model.classifier

LGBMClassifier(colsample_bytree=0.9476923894333807,
               learning_rate=0.09140456060593606, max_depth=45, metric='auc',
               min_child_samples=1779, min_child_weight=1.4729203134152992e-06,
               min_split_gain=1.184108047915407,
               monotone_constraints='1,1,1,0,0,0,0,1,1,1,0,1,0,-1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,-1,-1,-1,0,-1,0,0,0,0,0,0',
               monotone_constraints_method='advanced',
               monotone_penalty=1.009268715649836, n_estimators=1796, n_jobs=16,
               num_leaves=55, objective='binary', random_state=42,
               reg_alpha=7.44123492799962e-05,
               reg_lambda=1.8145145831930983e-05, subsample=0.7930734824387159,
               tree_learner='data', verbose=-1)

In [147]:
# LightGBM classification result on the Test set
# indices = torch.randint(0,10000,(2000,))

y_pred = prod_model.classifier.predict_proba(X_test_tensor.cpu().numpy())  # ['clusterer']
y_prob = y_pred[:, 1]
y = y_test
fpr, tpr, _ = roc_curve(y, y_prob)
roc_auc = auc(fpr, tpr)
thresh_for_f1 = 0.5
pr, rc, f1, _ = precision_recall_fscore_support(y, y_prob > thresh_for_f1, beta=1.0, average="macro")
print("LightGBM (AUROC, F1):", roc_auc, np.round(f1, 3))

LightGBM (AUROC, F1): 0.9906858063325308 0.934


In [111]:
# Hummingbird-converted model without any further fine-tuning and constant imputation
hm_model.eval()
hm_model.to(device)
with torch.no_grad():
    y_pred = hm_model(X_test_tensor.to(device))

y_prob = y_pred[1][:,1].cpu().numpy()
y = y_test
fpr, tpr, _ = roc_curve(y, y_prob)
roc_auc = auc(fpr, tpr)
thresh_for_f1 = 0.5
pr, rc, f1, _ = precision_recall_fscore_support(y, y_prob > thresh_for_f1, beta=1.0, average="macro")
print("Hummingbird test (AUROC, F1):", roc_auc, np.round(f1, 3))

with torch.no_grad():
    y_pred = hm_model(X_val_tensor.to(device))
y_prob = y_pred[1][:,1].cpu().numpy()
y = y_val
fpr, tpr, _ = roc_curve(y, y_prob)
roc_auc = auc(fpr, tpr)
thresh_for_f1 = 0.5
pr, rc, f1, _ = precision_recall_fscore_support(y, y_prob > thresh_for_f1, beta=1.0, average="macro")
print("Hummingbird dev (AUROC, F1):", roc_auc, np.round(f1, 3))

Hummingbird test (AUROC, F1): 0.4521736348042731 0.078
Hummingbird dev (AUROC, F1): 0.5465048542422006 0.085


In [18]:
X_test_tensor.shape

torch.Size([10000, 39])

In [None]:
# import inspect
# print(inspect.getsource(hm_model._operators[0].aggregate))