In [10]:
import math

import torch
import torch.nn as nn

from diffusers.optimization import get_scheduler
from tqdm import tqdm

import pandas as pd
import numpy as np

import random

import os
device = "cuda"
def set_seed(seed: int = 42) -> None:
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ["PYTHONHASHSEED"] = str(seed)
    print(f"Random seed set as {seed}")

set_seed(0)


Random seed set as 0


In [None]:
save_location = "/home/bhux/workplace/output/hptune/"
# save_location = "/media/bhux/alpha/xsd_mvp/test/"

# uniform, lg, gaussian

NUMERICAL = 12
CATEGORICAL = 0

INFILLING_TYPE = ''
NOISE_TYPE = ''

In [12]:
from utils import remove_outliers, norm, sample_local_gaussian, convert_categorical, unison_shuffled_copies

In [13]:
PATH = "./data/xtended_data_all.csv"
EMB_PATH = "./data/xtended_emb_all_deberta_pubchem.npy"

f = pd.read_csv(PATH)
drug_embeddings = np.load(EMB_PATH)
smiles = f['Drug'].values
vlists = {
    col: f[col].values for col in f.drop(labels=['Drug'], axis=1).columns[:NUMERICAL] 
}

inmask = remove_outliers([v for _,v in vlists.items()])
print(sum(inmask))
smiles = smiles[inmask]
vlists = {
    k: v[inmask] for k,v in vlists.items()
}

vlists = {
    k: norm(v) for k,v in vlists.items()
}

# for col in f.drop(labels=['Drug'], axis=1).columns[:NUMERICAL]:
#     vlists[col+"_cat"] = vlists[col]

nullmask = np.stack([
    np.isnan(v)==False for _,v in vlists.items()
    ], axis=-1)

dmss = []
for k,v in vlists.items():
    vlists[k], dms = sample_local_gaussian(v, numbins=15)
    dmss.append(dms)

for col in f.drop(labels=['Drug'], axis=1).columns[:NUMERICAL]:
    vlists[col+"_cat"] = convert_categorical(vlists[col], numbins=15)

# for col in f.drop(labels=['Drug'], axis=1).columns[NUMERICAL:]:
#     nan = np.isnan(vlists[col])
#     vlists[col] += 1
#     vlists[col][nan] = 0

# dmss = []
# for k,v in vlists.items():
#     dms = get_local_gaussian(v, numbins=50)
#     dmss.append(dms)

dataset = []
for i, gt in enumerate(zip(*[v for _,v in vlists.items()])):
    dataset.append({
        "sm": smiles[i],
        "ft": drug_embeddings[i],
        "ma": nullmask[i],
        "gt": np.array(gt),
        "od": np.array(gt[NUMERICAL:]),
    })
    # print(gt)
    # print(nullmask[i])
    # break

valCount = np.sum(nullmask, axis=0)*0.1
dataset, rcomb = unison_shuffled_copies(dataset, nullmask)
trdataset = []
valdataset = []
for c, d in zip(rcomb, dataset):
    inc = False
    for i, j in enumerate(list(c)):
        if j and valCount[i] > 0:
            valCount[i] -= 1
            inc = True
    if inc:
        valdataset.append(d)
    else:
        trdataset.append(d)

print(len(trdataset))
print(len(valdataset))
print(len(list(vlists.keys())))

28443
14
14
14
14
14
14
14
14
14
14
14
14
25603
2840
24


In [14]:
from torch.utils.data import Dataset

class GaucamolDataset(Dataset):
    def __init__(self, dataset) -> None:
        self.dataset = dataset
    
    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        return self.dataset[idx]
    
    def update(self, idx, delta):
        item = self.dataset[idx]["gt"]
        self.dataset[idx]["gt"] = item + delta

trainset = GaucamolDataset(trdataset)
valset = GaucamolDataset(valdataset)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True)
valloader = torch.utils.data.DataLoader(valset, batch_size=128, shuffle=True)
steps_per_epoch = len(trainset)
DMSS = dmss[:NUMERICAL]

In [15]:
def train(diffusion, ema, gamma, dataloader, optimizer, lr_scheduler, two_noise=False):
    diffusion.train()
    running_loss = 0
    global_step = 0
    for i, batch in enumerate(dataloader):
        ft = batch['ft'].to(device).float()
        gt = batch['gt'].to(device).float()
        od = batch['od'].to(device).long()
        mask = batch['ma'].to(device)
        bs = ft.shape[0]

        optimizer.zero_grad()
        loss_multi, loss_gauss = diffusion.mixed_loss(ft, gt, od, mask, DMSS)

        loss = loss_multi + loss_gauss
        
        loss.backward()
        optimizer.step()
        lr_scheduler.step()

        ema.update_params(gamma)
        gamma = ema.update_gamma(global_step)

        running_loss += loss.item()
        global_step += 1
    return running_loss/global_step

In [16]:
from sklearn.metrics import mean_squared_error
import csv
from utils import ohe_to_categories

def evaluate(e, ema, dataloader):
    ema.ema_model.eval()
    before_mse = 0
    running_mse = 0
    global_step = 0
    vals = {}
    device = 'cuda'
    ema.ema_model.to(device)
    with torch.no_grad():
        for i, batch in enumerate(dataloader):
            sm = batch['sm']
            mask = batch['ma'].repeat(1,2)
            ft = batch['ft'].to(device).float()
            gt = batch['gt'].to(device).float()
            od = batch['od'].to(device).long()
            bs = ft.shape[0]

            x_in, generated_ys = ema.ema_model.sample(ft, bs, od, DMSS, clip_sample=True)

            raw_mse = mean_squared_error(gt[mask].flatten().cpu(), x_in[mask].flatten().cpu())
            mse = mean_squared_error(gt[mask].flatten().cpu(), generated_ys[mask].flatten().cpu())

            for s, g in zip(sm, list(generated_ys.cpu().numpy())):
                vals[s] = g
            
            before_mse += raw_mse
            running_mse += mse
            global_step += 1

    with open(save_location+'{}_dict.csv'.format(e), 'w') as csv_file:  
        writer = csv.writer(csv_file)
        for key, value in vals.items():
            writer.writerow([key, value])

    return running_mse / global_step, before_mse / global_step
            

In [17]:
import logging
import sys
import optuna
from sdt import SDT
from diffusion import GaussianMultinomialDiffusion
torch.set_printoptions(profile="full")
from ema import EMA

num_epochs = 10

def objective(trial):
    lr = trial.suggest_float(
        "lr", 1e-5, 1e-2, log=True
    ) #0.0005

    wd = trial.suggest_float(
        "wd", 1e-5, 1e-2, log=True
    ) #1e-4

    warmup = trial.suggest_categorical(
        "warmup", [50, 100, 150, 200, 250, 300]
    ) #200

    gamma = trial.suggest_float(
        "gamma", 0.97, 0.999, log=True
    ) #0.994

    total_num_steps = (steps_per_epoch * num_epochs)

    model = SDT(
        time_dim = trial.suggest_categorical(
            "time_dim", [16, 32, 64]
        ), #64,
        cond_size = 768,
        patch_size = trial.suggest_categorical(
            "patch_size", [8, 16, 32, 64]
        ), #16
        y_dim = NUMERICAL+15*(NUMERICAL+CATEGORICAL),
        dim = 768,
        depth = trial.suggest_int(
            "depth", 4, 12
        ), #8,
        heads = trial.suggest_int(
            "heads", 4, 12
        ), #8,
        mlp_dim = trial.suggest_categorical(
            "mlp_dim", [256, 512, 768, 1024]
        ), #768,
        dropout = 0.1,
        emb_dropout = trial.suggest_float(
            "emb_dropout", 0, 0.2
        ), #0.1,
        num_classes = 15,
    )
    model.to(device)

    total_params = sum(p.numel() for p in model.parameters())
    print(f"Number of parameters: {total_params}")

    diffusion = GaussianMultinomialDiffusion(
        num_classes = np.array([15 for _ in range(NUMERICAL+CATEGORICAL)]),
        num_numerical_features = NUMERICAL,
        denoise_fn = model,
        device = device,
    )
    diffusion.to(device)

    ema = EMA(diffusion, gamma, total_num_steps)

    optimizer = torch.optim.AdamW(
            model.parameters(),
            lr=lr,
            weight_decay=wd,
        )

    lr_scheduler = get_scheduler(
            trial.suggest_categorical(
                "schedule", 
                [
                    "cosine", 
                    "linear", 
                    "cosine_with_restarts", 
                    "constant", 
                    "constant_with_warmup",
                ]
            ), #"cosine",
            optimizer=optimizer,
            num_warmup_steps=warmup,
            num_training_steps=total_num_steps,
        )
    
    l = ""
    loss = 0
    for e in range(num_epochs):
        loss = train(diffusion, ema, gamma, trainloader, optimizer, lr_scheduler)

    mse, bmse = evaluate(e, ema, valloader)
    return mse / bmse


In [18]:
import pickle
optuna.logging.get_logger("optuna").addHandler(logging.StreamHandler(sys.stdout))
study_name = save_location+"2024-10-28-optimize-ximagand"  # Unique identifier of the study.
storage_name = "sqlite:///{}.db".format(study_name)

study = optuna.create_study(study_name=study_name, storage=storage_name, direction="minimize", load_if_exists=True)
study.optimize(objective, n_trials=1000)

trial = study.best_trial

print("Accuracy: {}".format(trial.value))
print("Best hyperparameters: {}".format(trial.params))

with open(save_location+"2024-10-28-sampler.pkl", "wb") as fout:
    pickle.dump(study.sampler, fout)

[I 2024-10-28 12:40:50,246] A new study created in RDB with name: /home/bhux/workplace/output/hptune/2024-10-28-optimize-ximagand


A new study created in RDB with name: /home/bhux/workplace/output/hptune/2024-10-28-optimize-ximagand
Number of parameters: 29971392
torch.Size([180])


[I 2024-10-28 12:42:25,755] Trial 0 finished with value: 0.7338032230201502 and parameters: {'lr': 0.005661939082123917, 'wd': 3.844047186212455e-05, 'warmup': 150, 'gamma': 0.9969809598397036, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 11, 'mlp_dim': 1024, 'emb_dropout': 0.15517643554336202, 'schedule': 'linear'}. Best is trial 0 with value: 0.7338032230201502.


Trial 0 finished with value: 0.7338032230201502 and parameters: {'lr': 0.005661939082123917, 'wd': 3.844047186212455e-05, 'warmup': 150, 'gamma': 0.9969809598397036, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 11, 'mlp_dim': 1024, 'emb_dropout': 0.15517643554336202, 'schedule': 'linear'}. Best is trial 0 with value: 0.7338032230201502.
Number of parameters: 13231744
torch.Size([180])


[I 2024-10-28 12:43:33,058] Trial 1 finished with value: 0.7491792282284524 and parameters: {'lr': 0.0010003508350898316, 'wd': 0.0031310129043516394, 'warmup': 250, 'gamma': 0.9892668983404174, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.18459599241966998, 'schedule': 'constant_with_warmup'}. Best is trial 0 with value: 0.7338032230201502.


Trial 1 finished with value: 0.7491792282284524 and parameters: {'lr': 0.0010003508350898316, 'wd': 0.0031310129043516394, 'warmup': 250, 'gamma': 0.9892668983404174, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.18459599241966998, 'schedule': 'constant_with_warmup'}. Best is trial 0 with value: 0.7338032230201502.
Number of parameters: 40723408
torch.Size([180])


[I 2024-10-28 12:50:46,706] Trial 2 finished with value: 0.7632330752097526 and parameters: {'lr': 0.00024892691711846565, 'wd': 1.3048712648135097e-05, 'warmup': 200, 'gamma': 0.9745219753500601, 'time_dim': 32, 'patch_size': 8, 'depth': 12, 'heads': 5, 'mlp_dim': 768, 'emb_dropout': 0.15104404120242576, 'schedule': 'linear'}. Best is trial 0 with value: 0.7338032230201502.


Trial 2 finished with value: 0.7632330752097526 and parameters: {'lr': 0.00024892691711846565, 'wd': 1.3048712648135097e-05, 'warmup': 200, 'gamma': 0.9745219753500601, 'time_dim': 32, 'patch_size': 8, 'depth': 12, 'heads': 5, 'mlp_dim': 768, 'emb_dropout': 0.15104404120242576, 'schedule': 'linear'}. Best is trial 0 with value: 0.7338032230201502.
Number of parameters: 24062304
torch.Size([180])


[I 2024-10-28 12:53:10,212] Trial 3 finished with value: 0.7619196799926412 and parameters: {'lr': 0.0014050523084509884, 'wd': 7.545206212612988e-05, 'warmup': 200, 'gamma': 0.9943183043506232, 'time_dim': 16, 'patch_size': 16, 'depth': 6, 'heads': 8, 'mlp_dim': 768, 'emb_dropout': 0.15249020054475207, 'schedule': 'constant_with_warmup'}. Best is trial 0 with value: 0.7338032230201502.


Trial 3 finished with value: 0.7619196799926412 and parameters: {'lr': 0.0014050523084509884, 'wd': 7.545206212612988e-05, 'warmup': 200, 'gamma': 0.9943183043506232, 'time_dim': 16, 'patch_size': 16, 'depth': 6, 'heads': 8, 'mlp_dim': 768, 'emb_dropout': 0.15249020054475207, 'schedule': 'constant_with_warmup'}. Best is trial 0 with value: 0.7338032230201502.
Number of parameters: 36658048
torch.Size([180])


[I 2024-10-28 12:55:28,500] Trial 4 finished with value: 0.7273139397392495 and parameters: {'lr': 9.293595528126885e-05, 'wd': 1.356567737410119e-05, 'warmup': 300, 'gamma': 0.9920334994621506, 'time_dim': 16, 'patch_size': 32, 'depth': 8, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.10225608812944638, 'schedule': 'constant_with_warmup'}. Best is trial 4 with value: 0.7273139397392495.


Trial 4 finished with value: 0.7273139397392495 and parameters: {'lr': 9.293595528126885e-05, 'wd': 1.356567737410119e-05, 'warmup': 300, 'gamma': 0.9920334994621506, 'time_dim': 16, 'patch_size': 32, 'depth': 8, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.10225608812944638, 'schedule': 'constant_with_warmup'}. Best is trial 4 with value: 0.7273139397392495.
Number of parameters: 20994016
torch.Size([180])


[I 2024-10-28 12:57:49,311] Trial 5 finished with value: 0.8052943772133337 and parameters: {'lr': 3.3493427211827755e-05, 'wd': 0.0007414742521148054, 'warmup': 300, 'gamma': 0.9730516685348614, 'time_dim': 32, 'patch_size': 16, 'depth': 8, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.17340614284276162, 'schedule': 'constant'}. Best is trial 4 with value: 0.7273139397392495.


Trial 5 finished with value: 0.8052943772133337 and parameters: {'lr': 3.3493427211827755e-05, 'wd': 0.0007414742521148054, 'warmup': 300, 'gamma': 0.9730516685348614, 'time_dim': 32, 'patch_size': 16, 'depth': 8, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.17340614284276162, 'schedule': 'constant'}. Best is trial 4 with value: 0.7273139397392495.
Number of parameters: 33581888
torch.Size([180])


[I 2024-10-28 12:59:33,311] Trial 6 finished with value: 0.7343366019937063 and parameters: {'lr': 0.002579869678950948, 'wd': 0.001351757426854936, 'warmup': 250, 'gamma': 0.995594432842274, 'time_dim': 32, 'patch_size': 64, 'depth': 7, 'heads': 12, 'mlp_dim': 768, 'emb_dropout': 0.17987947419899572, 'schedule': 'linear'}. Best is trial 4 with value: 0.7273139397392495.


Trial 6 finished with value: 0.7343366019937063 and parameters: {'lr': 0.002579869678950948, 'wd': 0.001351757426854936, 'warmup': 250, 'gamma': 0.995594432842274, 'time_dim': 32, 'patch_size': 64, 'depth': 7, 'heads': 12, 'mlp_dim': 768, 'emb_dropout': 0.17987947419899572, 'schedule': 'linear'}. Best is trial 4 with value: 0.7273139397392495.
Number of parameters: 43756416
torch.Size([180])


[I 2024-10-28 13:02:25,354] Trial 7 finished with value: 0.7863239660973019 and parameters: {'lr': 2.7889526952386237e-05, 'wd': 0.00013027474377899366, 'warmup': 250, 'gamma': 0.9734925249508066, 'time_dim': 16, 'patch_size': 32, 'depth': 11, 'heads': 10, 'mlp_dim': 512, 'emb_dropout': 0.010451303906483989, 'schedule': 'cosine_with_restarts'}. Best is trial 4 with value: 0.7273139397392495.


Trial 7 finished with value: 0.7863239660973019 and parameters: {'lr': 2.7889526952386237e-05, 'wd': 0.00013027474377899366, 'warmup': 250, 'gamma': 0.9734925249508066, 'time_dim': 16, 'patch_size': 32, 'depth': 11, 'heads': 10, 'mlp_dim': 512, 'emb_dropout': 0.010451303906483989, 'schedule': 'cosine_with_restarts'}. Best is trial 4 with value: 0.7273139397392495.
Number of parameters: 12277584
torch.Size([180])


[I 2024-10-28 13:04:59,496] Trial 8 finished with value: 0.762516857590855 and parameters: {'lr': 2.8364168258611997e-05, 'wd': 1.4436006001290992e-05, 'warmup': 150, 'gamma': 0.9734304774095697, 'time_dim': 16, 'patch_size': 8, 'depth': 4, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.0006075728020662474, 'schedule': 'linear'}. Best is trial 4 with value: 0.7273139397392495.


Trial 8 finished with value: 0.762516857590855 and parameters: {'lr': 2.8364168258611997e-05, 'wd': 1.4436006001290992e-05, 'warmup': 150, 'gamma': 0.9734304774095697, 'time_dim': 16, 'patch_size': 8, 'depth': 4, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.0006075728020662474, 'schedule': 'linear'}. Best is trial 4 with value: 0.7273139397392495.
Number of parameters: 20755776
torch.Size([180])


[I 2024-10-28 13:06:13,207] Trial 9 finished with value: 0.737636363374086 and parameters: {'lr': 4.3537347365572585e-05, 'wd': 1.6125003547860836e-05, 'warmup': 150, 'gamma': 0.9864126553672399, 'time_dim': 64, 'patch_size': 64, 'depth': 6, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.1066887327217406, 'schedule': 'cosine_with_restarts'}. Best is trial 4 with value: 0.7273139397392495.


Trial 9 finished with value: 0.737636363374086 and parameters: {'lr': 4.3537347365572585e-05, 'wd': 1.6125003547860836e-05, 'warmup': 150, 'gamma': 0.9864126553672399, 'time_dim': 64, 'patch_size': 64, 'depth': 6, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.1066887327217406, 'schedule': 'cosine_with_restarts'}. Best is trial 4 with value: 0.7273139397392495.
Number of parameters: 41413376
torch.Size([180])


[I 2024-10-28 13:08:47,059] Trial 10 finished with value: 0.7473570434112146 and parameters: {'lr': 0.00015797225327019514, 'wd': 0.0003112129492285367, 'warmup': 300, 'gamma': 0.9801849806511167, 'time_dim': 64, 'patch_size': 32, 'depth': 9, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.055350660186377215, 'schedule': 'cosine'}. Best is trial 4 with value: 0.7273139397392495.


Trial 10 finished with value: 0.7473570434112146 and parameters: {'lr': 0.00015797225327019514, 'wd': 0.0003112129492285367, 'warmup': 300, 'gamma': 0.9801849806511167, 'time_dim': 64, 'patch_size': 32, 'depth': 9, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.055350660186377215, 'schedule': 'cosine'}. Best is trial 4 with value: 0.7273139397392495.
Number of parameters: 44743104
torch.Size([180])


[I 2024-10-28 13:10:57,299] Trial 11 finished with value: 0.6859774391857464 and parameters: {'lr': 0.005441930295349724, 'wd': 5.639829056341927e-05, 'warmup': 100, 'gamma': 0.9982782790695918, 'time_dim': 16, 'patch_size': 64, 'depth': 9, 'heads': 11, 'mlp_dim': 1024, 'emb_dropout': 0.1027569619707743, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 11 finished with value: 0.6859774391857464 and parameters: {'lr': 0.005441930295349724, 'wd': 5.639829056341927e-05, 'warmup': 100, 'gamma': 0.9982782790695918, 'time_dim': 16, 'patch_size': 64, 'depth': 9, 'heads': 11, 'mlp_dim': 1024, 'emb_dropout': 0.1027569619707743, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 47700928
torch.Size([180])


[I 2024-10-28 13:13:14,673] Trial 12 finished with value: 0.7083014152131848 and parameters: {'lr': 0.0001525187642819598, 'wd': 4.885404136506796e-05, 'warmup': 100, 'gamma': 0.9911973663163559, 'time_dim': 16, 'patch_size': 64, 'depth': 10, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.10174541185910546, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 12 finished with value: 0.7083014152131848 and parameters: {'lr': 0.0001525187642819598, 'wd': 4.885404136506796e-05, 'warmup': 100, 'gamma': 0.9911973663163559, 'time_dim': 16, 'patch_size': 64, 'depth': 10, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.10174541185910546, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 51633088
torch.Size([180])


[I 2024-10-28 13:15:41,519] Trial 13 finished with value: 0.7523992400211797 and parameters: {'lr': 0.0005422526756646062, 'wd': 0.00015919140756394848, 'warmup': 100, 'gamma': 0.9989514209986641, 'time_dim': 16, 'patch_size': 64, 'depth': 10, 'heads': 12, 'mlp_dim': 1024, 'emb_dropout': 0.07284092200628173, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 13 finished with value: 0.7523992400211797 and parameters: {'lr': 0.0005422526756646062, 'wd': 0.00015919140756394848, 'warmup': 100, 'gamma': 0.9989514209986641, 'time_dim': 16, 'patch_size': 64, 'depth': 10, 'heads': 12, 'mlp_dim': 1024, 'emb_dropout': 0.07284092200628173, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 47925568
torch.Size([180])


[I 2024-10-28 13:17:59,685] Trial 14 finished with value: 0.758313064589365 and parameters: {'lr': 0.007960867970842942, 'wd': 5.367121834944494e-05, 'warmup': 100, 'gamma': 0.9908154899470053, 'time_dim': 64, 'patch_size': 64, 'depth': 10, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.12386426609462967, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 14 finished with value: 0.758313064589365 and parameters: {'lr': 0.007960867970842942, 'wd': 5.367121834944494e-05, 'warmup': 100, 'gamma': 0.9908154899470053, 'time_dim': 64, 'patch_size': 64, 'depth': 10, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.12386426609462967, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 49667008
torch.Size([180])


[I 2024-10-28 13:20:21,862] Trial 15 finished with value: 0.7529823423969934 and parameters: {'lr': 0.00043759247853938595, 'wd': 0.007709361814481995, 'warmup': 50, 'gamma': 0.9815844206982035, 'time_dim': 16, 'patch_size': 64, 'depth': 10, 'heads': 11, 'mlp_dim': 1024, 'emb_dropout': 0.07286432600145525, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 15 finished with value: 0.7529823423969934 and parameters: {'lr': 0.00043759247853938595, 'wd': 0.007709361814481995, 'warmup': 50, 'gamma': 0.9815844206982035, 'time_dim': 16, 'patch_size': 64, 'depth': 10, 'heads': 11, 'mlp_dim': 1024, 'emb_dropout': 0.07286432600145525, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 57155520
torch.Size([180])


[I 2024-10-28 13:23:02,571] Trial 16 finished with value: 0.799551075465544 and parameters: {'lr': 1.0444082071195577e-05, 'wd': 3.3495581048553866e-05, 'warmup': 100, 'gamma': 0.9860506412211432, 'time_dim': 16, 'patch_size': 64, 'depth': 12, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.053246517415882745, 'schedule': 'constant'}. Best is trial 11 with value: 0.6859774391857464.


Trial 16 finished with value: 0.799551075465544 and parameters: {'lr': 1.0444082071195577e-05, 'wd': 3.3495581048553866e-05, 'warmup': 100, 'gamma': 0.9860506412211432, 'time_dim': 16, 'patch_size': 64, 'depth': 12, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.053246517415882745, 'schedule': 'constant'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 44743104
torch.Size([180])


[I 2024-10-28 13:25:12,398] Trial 17 finished with value: 0.7249686851501063 and parameters: {'lr': 0.00010841733711936918, 'wd': 0.00017736064552899552, 'warmup': 100, 'gamma': 0.9935769664260916, 'time_dim': 16, 'patch_size': 64, 'depth': 9, 'heads': 11, 'mlp_dim': 1024, 'emb_dropout': 0.12242932662464531, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 17 finished with value: 0.7249686851501063 and parameters: {'lr': 0.00010841733711936918, 'wd': 0.00017736064552899552, 'warmup': 100, 'gamma': 0.9935769664260916, 'time_dim': 16, 'patch_size': 64, 'depth': 9, 'heads': 11, 'mlp_dim': 1024, 'emb_dropout': 0.12242932662464531, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 32567520
torch.Size([180])


[I 2024-10-28 13:28:27,277] Trial 18 finished with value: 0.742313193412558 and parameters: {'lr': 0.001896985455402169, 'wd': 0.0005783510372613634, 'warmup': 100, 'gamma': 0.9887486022251647, 'time_dim': 64, 'patch_size': 16, 'depth': 9, 'heads': 8, 'mlp_dim': 512, 'emb_dropout': 0.08379050652753986, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 18 finished with value: 0.742313193412558 and parameters: {'lr': 0.001896985455402169, 'wd': 0.0005783510372613634, 'warmup': 100, 'gamma': 0.9887486022251647, 'time_dim': 64, 'patch_size': 16, 'depth': 9, 'heads': 8, 'mlp_dim': 512, 'emb_dropout': 0.08379050652753986, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 56839632
torch.Size([180])


[I 2024-10-28 13:38:05,412] Trial 19 finished with value: 0.713622757369376 and parameters: {'lr': 0.0032467300690600487, 'wd': 6.648274189329374e-05, 'warmup': 50, 'gamma': 0.9989705660645277, 'time_dim': 32, 'patch_size': 8, 'depth': 11, 'heads': 12, 'mlp_dim': 1024, 'emb_dropout': 0.12464191125083529, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 19 finished with value: 0.713622757369376 and parameters: {'lr': 0.0032467300690600487, 'wd': 6.648274189329374e-05, 'warmup': 50, 'gamma': 0.9989705660645277, 'time_dim': 32, 'patch_size': 8, 'depth': 11, 'heads': 12, 'mlp_dim': 1024, 'emb_dropout': 0.12464191125083529, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 50265536
torch.Size([180])


[I 2024-10-28 13:40:30,130] Trial 20 finished with value: 0.6951781418232187 and parameters: {'lr': 0.0006663806372966342, 'wd': 2.81316882087653e-05, 'warmup': 100, 'gamma': 0.9809920790351045, 'time_dim': 16, 'patch_size': 64, 'depth': 11, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.028948634430787643, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 20 finished with value: 0.6951781418232187 and parameters: {'lr': 0.0006663806372966342, 'wd': 2.81316882087653e-05, 'warmup': 100, 'gamma': 0.9809920790351045, 'time_dim': 16, 'patch_size': 64, 'depth': 11, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.028948634430787643, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 50265536
torch.Size([180])


[I 2024-10-28 13:42:54,936] Trial 21 finished with value: 0.728818241667366 and parameters: {'lr': 0.0008007278412135711, 'wd': 2.7179258806286144e-05, 'warmup': 100, 'gamma': 0.9796309927976998, 'time_dim': 16, 'patch_size': 64, 'depth': 11, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.03713896015196602, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 21 finished with value: 0.728818241667366 and parameters: {'lr': 0.0008007278412135711, 'wd': 2.7179258806286144e-05, 'warmup': 100, 'gamma': 0.9796309927976998, 'time_dim': 16, 'patch_size': 64, 'depth': 11, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.03713896015196602, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 47700928
torch.Size([180])


[I 2024-10-28 13:45:12,430] Trial 22 finished with value: 0.7366810507017749 and parameters: {'lr': 0.0002518732587444511, 'wd': 0.00010972613492323671, 'warmup': 100, 'gamma': 0.9774502331340503, 'time_dim': 16, 'patch_size': 64, 'depth': 10, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.025085105251710246, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 22 finished with value: 0.7366810507017749 and parameters: {'lr': 0.0002518732587444511, 'wd': 0.00010972613492323671, 'warmup': 100, 'gamma': 0.9774502331340503, 'time_dim': 16, 'patch_size': 64, 'depth': 10, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.025085105251710246, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 39434688
torch.Size([180])


[I 2024-10-28 13:47:11,150] Trial 23 finished with value: 0.694896524034381 and parameters: {'lr': 0.003542572308520106, 'wd': 2.617307061596788e-05, 'warmup': 100, 'gamma': 0.9831761961717884, 'time_dim': 16, 'patch_size': 64, 'depth': 9, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.09137868125521614, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 23 finished with value: 0.694896524034381 and parameters: {'lr': 0.003542572308520106, 'wd': 2.617307061596788e-05, 'warmup': 100, 'gamma': 0.9831761961717884, 'time_dim': 16, 'patch_size': 64, 'depth': 9, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.09137868125521614, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 29390272
torch.Size([180])


[I 2024-10-28 13:48:44,282] Trial 24 finished with value: 0.7820669303981299 and parameters: {'lr': 0.004663590253808788, 'wd': 2.5595704503199547e-05, 'warmup': 100, 'gamma': 0.9837701118029007, 'time_dim': 16, 'patch_size': 64, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.08628319733752926, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 24 finished with value: 0.7820669303981299 and parameters: {'lr': 0.004663590253808788, 'wd': 2.5595704503199547e-05, 'warmup': 100, 'gamma': 0.9837701118029007, 'time_dim': 16, 'patch_size': 64, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.08628319733752926, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 31954880
torch.Size([180])


[I 2024-10-28 13:50:23,652] Trial 25 finished with value: 0.781675378927512 and parameters: {'lr': 0.009411592808850755, 'wd': 2.750005607088566e-05, 'warmup': 100, 'gamma': 0.9833252866675231, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 6, 'mlp_dim': 1024, 'emb_dropout': 0.05064754454212152, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 25 finished with value: 0.781675378927512 and parameters: {'lr': 0.009411592808850755, 'wd': 2.750005607088566e-05, 'warmup': 100, 'gamma': 0.9833252866675231, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 6, 'mlp_dim': 1024, 'emb_dropout': 0.05064754454212152, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 21733056
torch.Size([180])


[I 2024-10-28 13:51:43,938] Trial 26 finished with value: 0.7475641975307256 and parameters: {'lr': 0.00374355226071992, 'wd': 1.0168982910789228e-05, 'warmup': 100, 'gamma': 0.9788436904988241, 'time_dim': 16, 'patch_size': 64, 'depth': 9, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.021983521959056646, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 26 finished with value: 0.7475641975307256 and parameters: {'lr': 0.00374355226071992, 'wd': 1.0168982910789228e-05, 'warmup': 100, 'gamma': 0.9788436904988241, 'time_dim': 16, 'patch_size': 64, 'depth': 9, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.021983521959056646, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 39437152
torch.Size([180])


[I 2024-10-28 13:55:38,144] Trial 27 finished with value: 0.7319676320291628 and parameters: {'lr': 0.0018149858339579841, 'wd': 0.0003051017013656468, 'warmup': 200, 'gamma': 0.976858780632923, 'time_dim': 16, 'patch_size': 16, 'depth': 11, 'heads': 8, 'mlp_dim': 512, 'emb_dropout': 0.19942935082730956, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 27 finished with value: 0.7319676320291628 and parameters: {'lr': 0.0018149858339579841, 'wd': 0.0003051017013656468, 'warmup': 200, 'gamma': 0.976858780632923, 'time_dim': 16, 'patch_size': 16, 'depth': 11, 'heads': 8, 'mlp_dim': 512, 'emb_dropout': 0.19942935082730956, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 50320592
torch.Size([180])


[I 2024-10-28 14:04:25,004] Trial 28 finished with value: 0.7750236838718921 and parameters: {'lr': 0.0009144138243761503, 'wd': 7.919697401505555e-05, 'warmup': 50, 'gamma': 0.97031298319678, 'time_dim': 64, 'patch_size': 8, 'depth': 12, 'heads': 9, 'mlp_dim': 768, 'emb_dropout': 0.06766141248739328, 'schedule': 'cosine_with_restarts'}. Best is trial 11 with value: 0.6859774391857464.


Trial 28 finished with value: 0.7750236838718921 and parameters: {'lr': 0.0009144138243761503, 'wd': 7.919697401505555e-05, 'warmup': 50, 'gamma': 0.97031298319678, 'time_dim': 64, 'patch_size': 8, 'depth': 12, 'heads': 9, 'mlp_dim': 768, 'emb_dropout': 0.06766141248739328, 'schedule': 'cosine_with_restarts'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 34959936
torch.Size([180])


[I 2024-10-28 14:06:11,871] Trial 29 finished with value: 0.747696770508552 and parameters: {'lr': 0.005211537801230174, 'wd': 2.2116092479813012e-05, 'warmup': 150, 'gamma': 0.9863683037863661, 'time_dim': 32, 'patch_size': 64, 'depth': 7, 'heads': 11, 'mlp_dim': 1024, 'emb_dropout': 0.11403671199286108, 'schedule': 'constant'}. Best is trial 11 with value: 0.6859774391857464.


Trial 29 finished with value: 0.747696770508552 and parameters: {'lr': 0.005211537801230174, 'wd': 2.2116092479813012e-05, 'warmup': 150, 'gamma': 0.9863683037863661, 'time_dim': 32, 'patch_size': 64, 'depth': 7, 'heads': 11, 'mlp_dim': 1024, 'emb_dropout': 0.11403671199286108, 'schedule': 'constant'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 31954880
torch.Size([180])


[I 2024-10-28 14:07:51,278] Trial 30 finished with value: 0.6932763971169544 and parameters: {'lr': 0.006226919271419465, 'wd': 3.8591777922339986e-05, 'warmup': 100, 'gamma': 0.9822539096390442, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 6, 'mlp_dim': 1024, 'emb_dropout': 0.13233140509424557, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 30 finished with value: 0.6932763971169544 and parameters: {'lr': 0.006226919271419465, 'wd': 3.8591777922339986e-05, 'warmup': 100, 'gamma': 0.9822539096390442, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 6, 'mlp_dim': 1024, 'emb_dropout': 0.13233140509424557, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 31954880
torch.Size([180])


[I 2024-10-28 14:09:30,659] Trial 31 finished with value: 0.7790411551266948 and parameters: {'lr': 0.006760692169982902, 'wd': 4.298874979251599e-05, 'warmup': 100, 'gamma': 0.9825524427224082, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 6, 'mlp_dim': 1024, 'emb_dropout': 0.13478885503382804, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 31 finished with value: 0.7790411551266948 and parameters: {'lr': 0.006760692169982902, 'wd': 4.298874979251599e-05, 'warmup': 100, 'gamma': 0.9825524427224082, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 6, 'mlp_dim': 1024, 'emb_dropout': 0.13478885503382804, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 35895744
torch.Size([180])


[I 2024-10-28 14:11:20,002] Trial 32 finished with value: 0.7556610490757188 and parameters: {'lr': 0.002543316090989935, 'wd': 4.224673868543385e-05, 'warmup': 100, 'gamma': 0.9852649858074765, 'time_dim': 16, 'patch_size': 64, 'depth': 9, 'heads': 6, 'mlp_dim': 1024, 'emb_dropout': 0.13702105068192894, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 32 finished with value: 0.7556610490757188 and parameters: {'lr': 0.002543316090989935, 'wd': 4.224673868543385e-05, 'warmup': 100, 'gamma': 0.9852649858074765, 'time_dim': 16, 'patch_size': 64, 'depth': 9, 'heads': 6, 'mlp_dim': 1024, 'emb_dropout': 0.13702105068192894, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 33527744
torch.Size([180])


[I 2024-10-28 14:13:04,244] Trial 33 finished with value: 0.7503006699533977 and parameters: {'lr': 0.005602728785275595, 'wd': 1.9633861934440417e-05, 'warmup': 100, 'gamma': 0.9812163293177676, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.08674441676269282, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 33 finished with value: 0.7503006699533977 and parameters: {'lr': 0.005602728785275595, 'wd': 1.9633861934440417e-05, 'warmup': 100, 'gamma': 0.9812163293177676, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.08674441676269282, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 21713856
torch.Size([180])


[I 2024-10-28 14:14:17,046] Trial 34 finished with value: 0.7311980724951822 and parameters: {'lr': 0.0012915311269537828, 'wd': 7.064792156209113e-05, 'warmup': 250, 'gamma': 0.98762655714276, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.14188493479319722, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 34 finished with value: 0.7311980724951822 and parameters: {'lr': 0.0012915311269537828, 'wd': 7.064792156209113e-05, 'warmup': 250, 'gamma': 0.98762655714276, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.14188493479319722, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 27996800
torch.Size([180])


[I 2024-10-28 14:16:08,271] Trial 35 finished with value: 0.7091173871901076 and parameters: {'lr': 0.003348708240945003, 'wd': 9.345494405630152e-05, 'warmup': 200, 'gamma': 0.9773298793527013, 'time_dim': 16, 'patch_size': 32, 'depth': 7, 'heads': 8, 'mlp_dim': 768, 'emb_dropout': 0.15796592135283444, 'schedule': 'linear'}. Best is trial 11 with value: 0.6859774391857464.


Trial 35 finished with value: 0.7091173871901076 and parameters: {'lr': 0.003348708240945003, 'wd': 9.345494405630152e-05, 'warmup': 200, 'gamma': 0.9773298793527013, 'time_dim': 16, 'patch_size': 32, 'depth': 7, 'heads': 8, 'mlp_dim': 768, 'emb_dropout': 0.15796592135283444, 'schedule': 'linear'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 23523920
torch.Size([180])


[I 2024-10-28 14:20:54,479] Trial 36 finished with value: 0.7381580778833372 and parameters: {'lr': 0.009301334918335371, 'wd': 3.564200661328304e-05, 'warmup': 100, 'gamma': 0.9843603372677948, 'time_dim': 16, 'patch_size': 8, 'depth': 9, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.16168067712024467, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 36 finished with value: 0.7381580778833372 and parameters: {'lr': 0.009301334918335371, 'wd': 3.564200661328304e-05, 'warmup': 100, 'gamma': 0.9843603372677948, 'time_dim': 16, 'patch_size': 8, 'depth': 9, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.16168067712024467, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 48158176
torch.Size([180])


[I 2024-10-28 14:25:21,985] Trial 37 finished with value: 0.7756022389824324 and parameters: {'lr': 0.0006141040903007937, 'wd': 1.0118725430486383e-05, 'warmup': 300, 'gamma': 0.9756174818203615, 'time_dim': 32, 'patch_size': 16, 'depth': 11, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.09637857777381106, 'schedule': 'constant'}. Best is trial 11 with value: 0.6859774391857464.


Trial 37 finished with value: 0.7756022389824324 and parameters: {'lr': 0.0006141040903007937, 'wd': 1.0118725430486383e-05, 'warmup': 300, 'gamma': 0.9756174818203615, 'time_dim': 32, 'patch_size': 16, 'depth': 11, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.09637857777381106, 'schedule': 'constant'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 33525696
torch.Size([180])


[I 2024-10-28 14:27:08,095] Trial 38 finished with value: 0.781918579167868 and parameters: {'lr': 0.0013680012839534974, 'wd': 0.00021725702443719982, 'warmup': 100, 'gamma': 0.9821662689323526, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 9, 'mlp_dim': 768, 'emb_dropout': 0.039440218118452844, 'schedule': 'cosine_with_restarts'}. Best is trial 11 with value: 0.6859774391857464.


Trial 38 finished with value: 0.781918579167868 and parameters: {'lr': 0.0013680012839534974, 'wd': 0.00021725702443719982, 'warmup': 100, 'gamma': 0.9821662689323526, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 9, 'mlp_dim': 768, 'emb_dropout': 0.039440218118452844, 'schedule': 'cosine_with_restarts'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 25032064
torch.Size([180])


[I 2024-10-28 14:28:48,384] Trial 39 finished with value: 0.7414759227188322 and parameters: {'lr': 0.0003534678154766577, 'wd': 1.684662590500454e-05, 'warmup': 250, 'gamma': 0.980419443235974, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 11, 'mlp_dim': 1024, 'emb_dropout': 0.11298240879087668, 'schedule': 'linear'}. Best is trial 11 with value: 0.6859774391857464.


Trial 39 finished with value: 0.7414759227188322 and parameters: {'lr': 0.0003534678154766577, 'wd': 1.684662590500454e-05, 'warmup': 250, 'gamma': 0.980419443235974, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 11, 'mlp_dim': 1024, 'emb_dropout': 0.11298240879087668, 'schedule': 'linear'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 19815744
torch.Size([180])


[I 2024-10-28 14:30:01,587] Trial 40 finished with value: 0.754401110815266 and parameters: {'lr': 0.0021980604086777196, 'wd': 0.0023345610810899097, 'warmup': 200, 'gamma': 0.978654882226768, 'time_dim': 32, 'patch_size': 64, 'depth': 7, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16699026763545832, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 40 finished with value: 0.754401110815266 and parameters: {'lr': 0.0021980604086777196, 'wd': 0.0023345610810899097, 'warmup': 200, 'gamma': 0.978654882226768, 'time_dim': 32, 'patch_size': 64, 'depth': 7, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16699026763545832, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 47700928
torch.Size([180])


[I 2024-10-28 14:32:19,643] Trial 41 finished with value: 0.7403232108291004 and parameters: {'lr': 0.00017117177537995828, 'wd': 4.929454585772789e-05, 'warmup': 100, 'gamma': 0.9906217410502587, 'time_dim': 16, 'patch_size': 64, 'depth': 10, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.10818031921221793, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 41 finished with value: 0.7403232108291004 and parameters: {'lr': 0.00017117177537995828, 'wd': 4.929454585772789e-05, 'warmup': 100, 'gamma': 0.9906217410502587, 'time_dim': 16, 'patch_size': 64, 'depth': 10, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.10818031921221793, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 45734848
torch.Size([180])


[I 2024-10-28 14:34:34,276] Trial 42 finished with value: 0.7666034397087972 and parameters: {'lr': 4.9857960125435544e-05, 'wd': 5.198024505230063e-05, 'warmup': 100, 'gamma': 0.9958155471077115, 'time_dim': 16, 'patch_size': 64, 'depth': 10, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.09633755579124845, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 42 finished with value: 0.7666034397087972 and parameters: {'lr': 4.9857960125435544e-05, 'wd': 5.198024505230063e-05, 'warmup': 100, 'gamma': 0.9958155471077115, 'time_dim': 16, 'patch_size': 64, 'depth': 10, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.09633755579124845, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 42973632
torch.Size([180])


[I 2024-10-28 14:36:40,294] Trial 43 finished with value: 0.7061315163859572 and parameters: {'lr': 7.984325022235208e-05, 'wd': 3.249128311157968e-05, 'warmup': 100, 'gamma': 0.9929571675120732, 'time_dim': 16, 'patch_size': 64, 'depth': 9, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.1421477707699611, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 43 finished with value: 0.7061315163859572 and parameters: {'lr': 7.984325022235208e-05, 'wd': 3.249128311157968e-05, 'warmup': 100, 'gamma': 0.9929571675120732, 'time_dim': 16, 'patch_size': 64, 'depth': 9, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.1421477707699611, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 37660608
torch.Size([180])


[I 2024-10-28 14:38:39,194] Trial 44 finished with value: 0.7318539827051663 and parameters: {'lr': 7.130910072090279e-05, 'wd': 1.9740608520776504e-05, 'warmup': 150, 'gamma': 0.993664215414738, 'time_dim': 16, 'patch_size': 64, 'depth': 9, 'heads': 11, 'mlp_dim': 512, 'emb_dropout': 0.14535764609085589, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 44 finished with value: 0.7318539827051663 and parameters: {'lr': 7.130910072090279e-05, 'wd': 1.9740608520776504e-05, 'warmup': 150, 'gamma': 0.993664215414738, 'time_dim': 16, 'patch_size': 64, 'depth': 9, 'heads': 11, 'mlp_dim': 512, 'emb_dropout': 0.14535764609085589, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 38246336
torch.Size([180])


[I 2024-10-28 14:40:33,980] Trial 45 finished with value: 0.7063125403652435 and parameters: {'lr': 0.004191139645625846, 'wd': 3.194824001000223e-05, 'warmup': 300, 'gamma': 0.9967734542351507, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.13171623864243998, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 45 finished with value: 0.7063125403652435 and parameters: {'lr': 0.004191139645625846, 'wd': 3.194824001000223e-05, 'warmup': 300, 'gamma': 0.9967734542351507, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.13171623864243998, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 39650016
torch.Size([180])


[I 2024-10-28 14:44:16,309] Trial 46 finished with value: 0.7638154612145021 and parameters: {'lr': 0.006399311772089457, 'wd': 1.4415780749370635e-05, 'warmup': 100, 'gamma': 0.9892862776146254, 'time_dim': 64, 'patch_size': 16, 'depth': 9, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.149597436107063, 'schedule': 'cosine_with_restarts'}. Best is trial 11 with value: 0.6859774391857464.


Trial 46 finished with value: 0.7638154612145021 and parameters: {'lr': 0.006399311772089457, 'wd': 1.4415780749370635e-05, 'warmup': 100, 'gamma': 0.9892862776146254, 'time_dim': 64, 'patch_size': 16, 'depth': 9, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.149597436107063, 'schedule': 'cosine_with_restarts'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 26015104
torch.Size([180])


[I 2024-10-28 14:45:59,067] Trial 47 finished with value: 0.7713487356552289 and parameters: {'lr': 0.003042168064215243, 'wd': 0.0006435763310414186, 'warmup': 100, 'gamma': 0.9921708758210959, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 12, 'mlp_dim': 1024, 'emb_dropout': 0.1798101750636387, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 47 finished with value: 0.7713487356552289 and parameters: {'lr': 0.003042168064215243, 'wd': 0.0006435763310414186, 'warmup': 100, 'gamma': 0.9921708758210959, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 12, 'mlp_dim': 1024, 'emb_dropout': 0.1798101750636387, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 54796224
torch.Size([180])


[I 2024-10-28 14:48:34,375] Trial 48 finished with value: 0.7296669330153068 and parameters: {'lr': 0.0002393632732069619, 'wd': 0.00011456963299860722, 'warmup': 100, 'gamma': 0.9975679987043454, 'time_dim': 16, 'patch_size': 64, 'depth': 12, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.12987675234282792, 'schedule': 'linear'}. Best is trial 11 with value: 0.6859774391857464.


Trial 48 finished with value: 0.7296669330153068 and parameters: {'lr': 0.0002393632732069619, 'wd': 0.00011456963299860722, 'warmup': 100, 'gamma': 0.9975679987043454, 'time_dim': 16, 'patch_size': 64, 'depth': 12, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.12987675234282792, 'schedule': 'linear'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 23904848
torch.Size([180])


[I 2024-10-28 14:52:58,034] Trial 49 finished with value: 0.784217834017407 and parameters: {'lr': 1.8842847288877124e-05, 'wd': 6.54770282449184e-05, 'warmup': 50, 'gamma': 0.9852838980227477, 'time_dim': 16, 'patch_size': 8, 'depth': 7, 'heads': 5, 'mlp_dim': 768, 'emb_dropout': 0.06166729001525422, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 49 finished with value: 0.784217834017407 and parameters: {'lr': 1.8842847288877124e-05, 'wd': 6.54770282449184e-05, 'warmup': 50, 'gamma': 0.9852838980227477, 'time_dim': 16, 'patch_size': 8, 'depth': 7, 'heads': 5, 'mlp_dim': 768, 'emb_dropout': 0.06166729001525422, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 33523648
torch.Size([180])


[I 2024-10-28 14:54:45,995] Trial 50 finished with value: 0.7145127694616847 and parameters: {'lr': 8.350556826057477e-05, 'wd': 1.2679209167794491e-05, 'warmup': 250, 'gamma': 0.9950395460209573, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 11, 'mlp_dim': 512, 'emb_dropout': 0.11845990947339381, 'schedule': 'constant'}. Best is trial 11 with value: 0.6859774391857464.


Trial 50 finished with value: 0.7145127694616847 and parameters: {'lr': 8.350556826057477e-05, 'wd': 1.2679209167794491e-05, 'warmup': 250, 'gamma': 0.9950395460209573, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 11, 'mlp_dim': 512, 'emb_dropout': 0.11845990947339381, 'schedule': 'constant'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 38246336
torch.Size([180])


[I 2024-10-28 14:56:40,713] Trial 51 finished with value: 0.7011852168394368 and parameters: {'lr': 0.004243134033251746, 'wd': 3.487566358622236e-05, 'warmup': 300, 'gamma': 0.9979071295530669, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.13688358302963516, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 51 finished with value: 0.7011852168394368 and parameters: {'lr': 0.004243134033251746, 'wd': 3.487566358622236e-05, 'warmup': 300, 'gamma': 0.9979071295530669, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.13688358302963516, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 38246336
torch.Size([180])


[I 2024-10-28 14:58:35,224] Trial 52 finished with value: 0.7184315651678368 and parameters: {'lr': 0.0066308649461727705, 'wd': 3.659402665614413e-05, 'warmup': 300, 'gamma': 0.9971771225374265, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.1531262613312496, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 52 finished with value: 0.7184315651678368 and parameters: {'lr': 0.0066308649461727705, 'wd': 3.659402665614413e-05, 'warmup': 300, 'gamma': 0.9971771225374265, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.1531262613312496, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 42973632
torch.Size([180])


[I 2024-10-28 15:00:41,603] Trial 53 finished with value: 0.7236623176281928 and parameters: {'lr': 0.004519596395861272, 'wd': 2.8878847195125387e-05, 'warmup': 300, 'gamma': 0.9981335512960804, 'time_dim': 16, 'patch_size': 64, 'depth': 9, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.1420772192360444, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 53 finished with value: 0.7236623176281928 and parameters: {'lr': 0.004519596395861272, 'wd': 2.8878847195125387e-05, 'warmup': 300, 'gamma': 0.9981335512960804, 'time_dim': 16, 'patch_size': 64, 'depth': 9, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.1420772192360444, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 45734848
torch.Size([180])


[I 2024-10-28 15:02:56,220] Trial 54 finished with value: 0.7284415047675152 and parameters: {'lr': 0.002674654788237773, 'wd': 2.2176053363582402e-05, 'warmup': 300, 'gamma': 0.9960036087348757, 'time_dim': 16, 'patch_size': 64, 'depth': 10, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.16432268981048054, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 54 finished with value: 0.7284415047675152 and parameters: {'lr': 0.002674654788237773, 'wd': 2.2176053363582402e-05, 'warmup': 300, 'gamma': 0.9960036087348757, 'time_dim': 16, 'patch_size': 64, 'depth': 10, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.16432268981048054, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 44967744
torch.Size([180])


[I 2024-10-28 15:05:07,405] Trial 55 finished with value: 0.781674969777169 and parameters: {'lr': 0.009943764365614144, 'wd': 5.8440665192045736e-05, 'warmup': 100, 'gamma': 0.9938684424313906, 'time_dim': 64, 'patch_size': 64, 'depth': 9, 'heads': 11, 'mlp_dim': 1024, 'emb_dropout': 0.00041137535432840197, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 55 finished with value: 0.781674969777169 and parameters: {'lr': 0.009943764365614144, 'wd': 5.8440665192045736e-05, 'warmup': 100, 'gamma': 0.9938684424313906, 'time_dim': 64, 'patch_size': 64, 'depth': 9, 'heads': 11, 'mlp_dim': 1024, 'emb_dropout': 0.00041137535432840197, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 33519040
torch.Size([180])


[I 2024-10-28 15:06:50,530] Trial 56 finished with value: 0.691438235963643 and parameters: {'lr': 0.0016605480692815757, 'wd': 1.7300513099942465e-05, 'warmup': 300, 'gamma': 0.9829625914364842, 'time_dim': 16, 'patch_size': 64, 'depth': 7, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.07754834555449593, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 56 finished with value: 0.691438235963643 and parameters: {'lr': 0.0016605480692815757, 'wd': 1.7300513099942465e-05, 'warmup': 300, 'gamma': 0.9829625914364842, 'time_dim': 16, 'patch_size': 64, 'depth': 7, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.07754834555449593, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 24133184
torch.Size([180])


[I 2024-10-28 15:08:12,866] Trial 57 finished with value: 0.7351319538516007 and parameters: {'lr': 0.001691419909070763, 'wd': 1.7982374185210088e-05, 'warmup': 300, 'gamma': 0.9812534359503667, 'time_dim': 32, 'patch_size': 64, 'depth': 6, 'heads': 12, 'mlp_dim': 256, 'emb_dropout': 0.08063557006274774, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 57 finished with value: 0.7351319538516007 and parameters: {'lr': 0.001691419909070763, 'wd': 1.7982374185210088e-05, 'warmup': 300, 'gamma': 0.9812534359503667, 'time_dim': 32, 'patch_size': 64, 'depth': 6, 'heads': 12, 'mlp_dim': 256, 'emb_dropout': 0.08063557006274774, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 27612096
torch.Size([180])


[I 2024-10-28 15:09:40,851] Trial 58 finished with value: 0.7028842575659807 and parameters: {'lr': 0.0010676296310574026, 'wd': 0.007745978655349674, 'warmup': 300, 'gamma': 0.9831020414148696, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.09042315589391488, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 58 finished with value: 0.7028842575659807 and parameters: {'lr': 0.0010676296310574026, 'wd': 0.007745978655349674, 'warmup': 300, 'gamma': 0.9831020414148696, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.09042315589391488, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 39809888
torch.Size([180])


[I 2024-10-28 15:13:23,451] Trial 59 finished with value: 0.7189213349100376 and parameters: {'lr': 0.0008071181804427901, 'wd': 1.3244563030962819e-05, 'warmup': 300, 'gamma': 0.9800700986775538, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 11, 'mlp_dim': 1024, 'emb_dropout': 0.07806014226732969, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 59 finished with value: 0.7189213349100376 and parameters: {'lr': 0.0008071181804427901, 'wd': 1.3244563030962819e-05, 'warmup': 300, 'gamma': 0.9800700986775538, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 11, 'mlp_dim': 1024, 'emb_dropout': 0.07806014226732969, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 33503616
torch.Size([180])


[I 2024-10-28 15:15:31,918] Trial 60 finished with value: 0.7449654152781571 and parameters: {'lr': 0.0005925954299563169, 'wd': 0.0009918619323562264, 'warmup': 150, 'gamma': 0.9876588864809906, 'time_dim': 16, 'patch_size': 32, 'depth': 7, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.10430099809171353, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 60 finished with value: 0.7449654152781571 and parameters: {'lr': 0.0005925954299563169, 'wd': 0.0009918619323562264, 'warmup': 150, 'gamma': 0.9876588864809906, 'time_dim': 16, 'patch_size': 32, 'depth': 7, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.10430099809171353, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 27612096
torch.Size([180])


[I 2024-10-28 15:16:59,964] Trial 61 finished with value: 0.7186607705355005 and parameters: {'lr': 0.0011544187677233534, 'wd': 0.000475259551575986, 'warmup': 300, 'gamma': 0.9830710607510879, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.09440275285799654, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 61 finished with value: 0.7186607705355005 and parameters: {'lr': 0.0011544187677233534, 'wd': 0.000475259551575986, 'warmup': 300, 'gamma': 0.9830710607510879, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.09440275285799654, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 26432448
torch.Size([180])


[I 2024-10-28 15:18:25,123] Trial 62 finished with value: 0.7045755181415236 and parameters: {'lr': 0.002212608673927731, 'wd': 0.008166326955757548, 'warmup': 300, 'gamma': 0.9843058967256357, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.08992380732696442, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 62 finished with value: 0.7045755181415236 and parameters: {'lr': 0.002212608673927731, 'wd': 0.008166326955757548, 'warmup': 300, 'gamma': 0.9843058967256357, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.08992380732696442, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 32142784
torch.Size([180])


[I 2024-10-28 15:20:05,285] Trial 63 finished with value: 0.7312582446431789 and parameters: {'lr': 0.0010210835106257458, 'wd': 0.0032574459905342396, 'warmup': 300, 'gamma': 0.9824041715056158, 'time_dim': 16, 'patch_size': 64, 'depth': 7, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.061024482973211905, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 63 finished with value: 0.7312582446431789 and parameters: {'lr': 0.0010210835106257458, 'wd': 0.0032574459905342396, 'warmup': 300, 'gamma': 0.9824041715056158, 'time_dim': 16, 'patch_size': 64, 'depth': 7, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.061024482973211905, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 19337152
torch.Size([180])


[I 2024-10-28 15:21:11,111] Trial 64 finished with value: 0.7606539901428916 and parameters: {'lr': 0.00409433212344819, 'wd': 0.009979417795269629, 'warmup': 300, 'gamma': 0.9789403546230366, 'time_dim': 16, 'patch_size': 64, 'depth': 4, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.012974770981729812, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 64 finished with value: 0.7606539901428916 and parameters: {'lr': 0.00409433212344819, 'wd': 0.009979417795269629, 'warmup': 300, 'gamma': 0.9789403546230366, 'time_dim': 16, 'patch_size': 64, 'depth': 4, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.012974770981729812, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 21115328
torch.Size([180])


[I 2024-10-28 15:22:22,837] Trial 65 finished with value: 0.7686465789404429 and parameters: {'lr': 0.0017616023406908136, 'wd': 0.004043934710796016, 'warmup': 300, 'gamma': 0.9813260441884673, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.0766411630584394, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 65 finished with value: 0.7686465789404429 and parameters: {'lr': 0.0017616023406908136, 'wd': 0.004043934710796016, 'warmup': 300, 'gamma': 0.9813260441884673, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.0766411630584394, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 31012560
torch.Size([180])


[I 2024-10-28 15:27:40,264] Trial 66 finished with value: 0.7556334961889272 and parameters: {'lr': 0.000464535638716839, 'wd': 2.4371595348085044e-05, 'warmup': 50, 'gamma': 0.9856251732329215, 'time_dim': 64, 'patch_size': 8, 'depth': 7, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.07024337096805665, 'schedule': 'cosine_with_restarts'}. Best is trial 11 with value: 0.6859774391857464.


Trial 66 finished with value: 0.7556334961889272 and parameters: {'lr': 0.000464535638716839, 'wd': 2.4371595348085044e-05, 'warmup': 50, 'gamma': 0.9856251732329215, 'time_dim': 64, 'patch_size': 8, 'depth': 7, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.07024337096805665, 'schedule': 'cosine_with_restarts'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 30377920
torch.Size([180])


[I 2024-10-28 15:29:20,281] Trial 67 finished with value: 0.741045162593961 and parameters: {'lr': 0.007977411576594954, 'wd': 0.0001670534210200038, 'warmup': 200, 'gamma': 0.9836323262003978, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 9, 'mlp_dim': 512, 'emb_dropout': 0.10909176283248205, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 67 finished with value: 0.741045162593961 and parameters: {'lr': 0.007977411576594954, 'wd': 0.0001670534210200038, 'warmup': 200, 'gamma': 0.9836323262003978, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 9, 'mlp_dim': 512, 'emb_dropout': 0.10909176283248205, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 52428224
torch.Size([180])


[I 2024-10-28 15:31:50,112] Trial 68 finished with value: 0.7464095636589252 and parameters: {'lr': 0.005110504681289356, 'wd': 4.4410697156908005e-05, 'warmup': 300, 'gamma': 0.9868908912419185, 'time_dim': 16, 'patch_size': 64, 'depth': 11, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.041212881494126254, 'schedule': 'constant'}. Best is trial 11 with value: 0.6859774391857464.


Trial 68 finished with value: 0.7464095636589252 and parameters: {'lr': 0.005110504681289356, 'wd': 4.4410697156908005e-05, 'warmup': 300, 'gamma': 0.9868908912419185, 'time_dim': 16, 'patch_size': 64, 'depth': 11, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.041212881494126254, 'schedule': 'constant'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 32205632
torch.Size([180])


[I 2024-10-28 15:33:32,000] Trial 69 finished with value: 0.7907813925439033 and parameters: {'lr': 0.0007234682042336843, 'wd': 0.005463284544067572, 'warmup': 100, 'gamma': 0.9806844666523241, 'time_dim': 32, 'patch_size': 64, 'depth': 7, 'heads': 11, 'mlp_dim': 768, 'emb_dropout': 0.11952519198703732, 'schedule': 'linear'}. Best is trial 11 with value: 0.6859774391857464.


Trial 69 finished with value: 0.7907813925439033 and parameters: {'lr': 0.0007234682042336843, 'wd': 0.005463284544067572, 'warmup': 100, 'gamma': 0.9806844666523241, 'time_dim': 32, 'patch_size': 64, 'depth': 7, 'heads': 11, 'mlp_dim': 768, 'emb_dropout': 0.11952519198703732, 'schedule': 'linear'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 27230144
torch.Size([180])


[I 2024-10-28 15:35:05,050] Trial 70 finished with value: 0.6962360489943484 and parameters: {'lr': 0.0033902764150212338, 'wd': 7.820250874014415e-05, 'warmup': 300, 'gamma': 0.9821841684480476, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 9, 'mlp_dim': 256, 'emb_dropout': 0.1268672160466333, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 70 finished with value: 0.6962360489943484 and parameters: {'lr': 0.0033902764150212338, 'wd': 7.820250874014415e-05, 'warmup': 300, 'gamma': 0.9821841684480476, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 9, 'mlp_dim': 256, 'emb_dropout': 0.1268672160466333, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 27230144
torch.Size([180])


[I 2024-10-28 15:36:38,038] Trial 71 finished with value: 0.7502630350299764 and parameters: {'lr': 0.0033451843200857905, 'wd': 9.075124182376099e-05, 'warmup': 300, 'gamma': 0.9824456373127606, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 9, 'mlp_dim': 256, 'emb_dropout': 0.12502446867158992, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 71 finished with value: 0.7502630350299764 and parameters: {'lr': 0.0033451843200857905, 'wd': 9.075124182376099e-05, 'warmup': 300, 'gamma': 0.9824456373127606, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 9, 'mlp_dim': 256, 'emb_dropout': 0.12502446867158992, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 20529600
torch.Size([180])


[I 2024-10-28 15:37:51,704] Trial 72 finished with value: 0.7499698239127525 and parameters: {'lr': 0.002568171076756764, 'wd': 0.0018092537149436913, 'warmup': 300, 'gamma': 0.9845018953284934, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 9, 'mlp_dim': 256, 'emb_dropout': 0.09938558232821809, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 72 finished with value: 0.7499698239127525 and parameters: {'lr': 0.002568171076756764, 'wd': 0.0018092537149436913, 'warmup': 300, 'gamma': 0.9845018953284934, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 9, 'mlp_dim': 256, 'emb_dropout': 0.09938558232821809, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 25657280
torch.Size([180])


[I 2024-10-28 15:39:21,165] Trial 73 finished with value: 0.718910889600621 and parameters: {'lr': 0.001588563563765425, 'wd': 4.078560905864621e-05, 'warmup': 300, 'gamma': 0.9817002879921993, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.09024487148523523, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 73 finished with value: 0.718910889600621 and parameters: {'lr': 0.001588563563765425, 'wd': 4.078560905864621e-05, 'warmup': 300, 'gamma': 0.9817002879921993, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.09024487148523523, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 20938688
torch.Size([180])


[I 2024-10-28 15:40:39,206] Trial 74 finished with value: 0.7872992232361866 and parameters: {'lr': 0.00207902844087417, 'wd': 8.394168045447387e-05, 'warmup': 300, 'gamma': 0.9832185371024514, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.11365267672356094, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 74 finished with value: 0.7872992232361866 and parameters: {'lr': 0.00207902844087417, 'wd': 8.394168045447387e-05, 'warmup': 300, 'gamma': 0.9832185371024514, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.11365267672356094, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 29390272
torch.Size([180])


[I 2024-10-28 15:42:12,249] Trial 75 finished with value: 0.7176433187137107 and parameters: {'lr': 0.0036322322564139703, 'wd': 0.00013806349541402204, 'warmup': 250, 'gamma': 0.9795905356869665, 'time_dim': 16, 'patch_size': 64, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.1276266060645069, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 75 finished with value: 0.7176433187137107 and parameters: {'lr': 0.0036322322564139703, 'wd': 0.00013806349541402204, 'warmup': 250, 'gamma': 0.9795905356869665, 'time_dim': 16, 'patch_size': 64, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.1276266060645069, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 38271936
torch.Size([180])


[I 2024-10-28 15:44:17,818] Trial 76 finished with value: 0.7583247517364476 and parameters: {'lr': 0.005751668235245827, 'wd': 0.0002455307830769641, 'warmup': 100, 'gamma': 0.9845754358383232, 'time_dim': 16, 'patch_size': 64, 'depth': 12, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.10435031683699464, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 76 finished with value: 0.7583247517364476 and parameters: {'lr': 0.005751668235245827, 'wd': 0.0002455307830769641, 'warmup': 100, 'gamma': 0.9845754358383232, 'time_dim': 16, 'patch_size': 64, 'depth': 12, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.10435031683699464, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 47691616
torch.Size([180])


[I 2024-10-28 15:48:41,371] Trial 77 finished with value: 0.7684106794257696 and parameters: {'lr': 0.007774152040956168, 'wd': 5.915456053531561e-05, 'warmup': 100, 'gamma': 0.9819924563603442, 'time_dim': 16, 'patch_size': 16, 'depth': 10, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.04786220542737317, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 77 finished with value: 0.7684106794257696 and parameters: {'lr': 0.007774152040956168, 'wd': 5.915456053531561e-05, 'warmup': 100, 'gamma': 0.9819924563603442, 'time_dim': 16, 'patch_size': 16, 'depth': 10, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.04786220542737317, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 36673472
torch.Size([180])


[I 2024-10-28 15:50:32,649] Trial 78 finished with value: 0.7603027811147479 and parameters: {'lr': 0.0029378554074580244, 'wd': 2.6878632563964e-05, 'warmup': 150, 'gamma': 0.9780277881323702, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.02738094289729967, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 78 finished with value: 0.7603027811147479 and parameters: {'lr': 0.0029378554074580244, 'wd': 2.6878632563964e-05, 'warmup': 150, 'gamma': 0.9780277881323702, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.02738094289729967, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 41450192
torch.Size([180])


[I 2024-10-28 15:57:34,757] Trial 79 finished with value: 0.7709667753486911 and parameters: {'lr': 0.0010036858202837169, 'wd': 1.6367633375074167e-05, 'warmup': 100, 'gamma': 0.976156558340346, 'time_dim': 64, 'patch_size': 8, 'depth': 9, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.13853644527031847, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 79 finished with value: 0.7709667753486911 and parameters: {'lr': 0.0010036858202837169, 'wd': 1.6367633375074167e-05, 'warmup': 100, 'gamma': 0.976156558340346, 'time_dim': 64, 'patch_size': 8, 'depth': 9, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.13853644527031847, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 33503616
torch.Size([180])


[I 2024-10-28 15:59:43,416] Trial 80 finished with value: 0.7173380221181852 and parameters: {'lr': 0.0013275382141337733, 'wd': 3.183093994895245e-05, 'warmup': 200, 'gamma': 0.980518200705333, 'time_dim': 16, 'patch_size': 32, 'depth': 7, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.11773412853898718, 'schedule': 'cosine_with_restarts'}. Best is trial 11 with value: 0.6859774391857464.


Trial 80 finished with value: 0.7173380221181852 and parameters: {'lr': 0.0013275382141337733, 'wd': 3.183093994895245e-05, 'warmup': 200, 'gamma': 0.980518200705333, 'time_dim': 16, 'patch_size': 32, 'depth': 7, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.11773412853898718, 'schedule': 'cosine_with_restarts'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 26432448
torch.Size([180])


[I 2024-10-28 16:01:09,066] Trial 81 finished with value: 0.7482982968739518 and parameters: {'lr': 0.0021888088881149015, 'wd': 0.005753880676553141, 'warmup': 300, 'gamma': 0.9831001295849962, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.08908847479459969, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 81 finished with value: 0.7482982968739518 and parameters: {'lr': 0.0021888088881149015, 'wd': 0.005753880676553141, 'warmup': 300, 'gamma': 0.9831001295849962, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.08908847479459969, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 21115328
torch.Size([180])


[I 2024-10-28 16:02:20,402] Trial 82 finished with value: 0.700154030530373 and parameters: {'lr': 0.00440991349608944, 'wd': 0.00964952346930463, 'warmup': 300, 'gamma': 0.9842538804995035, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.09898081150401092, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 82 finished with value: 0.700154030530373 and parameters: {'lr': 0.00440991349608944, 'wd': 0.00964952346930463, 'warmup': 300, 'gamma': 0.9842538804995035, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.09898081150401092, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 21115328
torch.Size([180])


[I 2024-10-28 16:03:31,705] Trial 83 finished with value: 0.7512907125606171 and parameters: {'lr': 0.0049458157249279485, 'wd': 2.133193299729076e-05, 'warmup': 300, 'gamma': 0.9838111252388319, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.10016568998524024, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 83 finished with value: 0.7512907125606171 and parameters: {'lr': 0.0049458157249279485, 'wd': 2.133193299729076e-05, 'warmup': 300, 'gamma': 0.9838111252388319, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.10016568998524024, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 20132288
torch.Size([180])


[I 2024-10-28 16:04:40,021] Trial 84 finished with value: 0.7275193791008161 and parameters: {'lr': 0.003802143942942422, 'wd': 3.830017320366615e-05, 'warmup': 300, 'gamma': 0.9849618360943109, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 6, 'mlp_dim': 1024, 'emb_dropout': 0.08194600306828062, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 84 finished with value: 0.7275193791008161 and parameters: {'lr': 0.003802143942942422, 'wd': 3.830017320366615e-05, 'warmup': 300, 'gamma': 0.9849618360943109, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 6, 'mlp_dim': 1024, 'emb_dropout': 0.08194600306828062, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 13830080
torch.Size([180])


[I 2024-10-28 16:05:34,036] Trial 85 finished with value: 0.775649972400871 and parameters: {'lr': 0.007754870190028376, 'wd': 0.00970047828414638, 'warmup': 300, 'gamma': 0.9858769176808602, 'time_dim': 16, 'patch_size': 64, 'depth': 4, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.1340723537397367, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 85 finished with value: 0.775649972400871 and parameters: {'lr': 0.007754870190028376, 'wd': 0.00970047828414638, 'warmup': 300, 'gamma': 0.9858769176808602, 'time_dim': 16, 'patch_size': 64, 'depth': 4, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.1340723537397367, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 35895744
torch.Size([180])


[I 2024-10-28 16:07:23,316] Trial 86 finished with value: 0.7454948090700451 and parameters: {'lr': 0.005765798067930953, 'wd': 0.006074141527065244, 'warmup': 50, 'gamma': 0.9810103819783372, 'time_dim': 16, 'patch_size': 64, 'depth': 9, 'heads': 6, 'mlp_dim': 1024, 'emb_dropout': 0.10941599545082956, 'schedule': 'constant'}. Best is trial 11 with value: 0.6859774391857464.


Trial 86 finished with value: 0.7454948090700451 and parameters: {'lr': 0.005765798067930953, 'wd': 0.006074141527065244, 'warmup': 50, 'gamma': 0.9810103819783372, 'time_dim': 16, 'patch_size': 64, 'depth': 9, 'heads': 6, 'mlp_dim': 1024, 'emb_dropout': 0.10941599545082956, 'schedule': 'constant'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 25317440
torch.Size([180])


[I 2024-10-28 16:08:45,489] Trial 87 finished with value: 0.7652328031948206 and parameters: {'lr': 0.004367751762651013, 'wd': 4.793342407152071e-05, 'warmup': 100, 'gamma': 0.979613691082898, 'time_dim': 32, 'patch_size': 64, 'depth': 6, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.09519683529188872, 'schedule': 'linear'}. Best is trial 11 with value: 0.6859774391857464.


Trial 87 finished with value: 0.7652328031948206 and parameters: {'lr': 0.004367751762651013, 'wd': 4.793342407152071e-05, 'warmup': 100, 'gamma': 0.979613691082898, 'time_dim': 32, 'patch_size': 64, 'depth': 6, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.09519683529188872, 'schedule': 'linear'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 25657280
torch.Size([180])


[I 2024-10-28 16:10:14,942] Trial 88 finished with value: 0.7511604412871529 and parameters: {'lr': 0.002996595667108237, 'wd': 7.069663978211314e-05, 'warmup': 300, 'gamma': 0.9826974552343843, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.06742865731434766, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 88 finished with value: 0.7511604412871529 and parameters: {'lr': 0.002996595667108237, 'wd': 7.069663978211314e-05, 'warmup': 300, 'gamma': 0.9826974552343843, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.06742865731434766, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 30764736
torch.Size([180])


[I 2024-10-28 16:11:52,567] Trial 89 finished with value: 0.7371458071953366 and parameters: {'lr': 0.006819413515974617, 'wd': 2.4096995549182684e-05, 'warmup': 100, 'gamma': 0.9867985664649097, 'time_dim': 16, 'patch_size': 64, 'depth': 7, 'heads': 10, 'mlp_dim': 768, 'emb_dropout': 0.14766855309430615, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 89 finished with value: 0.7371458071953366 and parameters: {'lr': 0.006819413515974617, 'wd': 2.4096995549182684e-05, 'warmup': 100, 'gamma': 0.9867985664649097, 'time_dim': 16, 'patch_size': 64, 'depth': 7, 'heads': 10, 'mlp_dim': 768, 'emb_dropout': 0.14766855309430615, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 50265536
torch.Size([180])


[I 2024-10-28 16:14:17,736] Trial 90 finished with value: 0.7413666806806785 and parameters: {'lr': 0.0015298346487292627, 'wd': 1.1862541083665458e-05, 'warmup': 250, 'gamma': 0.9984488253651652, 'time_dim': 16, 'patch_size': 64, 'depth': 11, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.008577552890403378, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 90 finished with value: 0.7413666806806785 and parameters: {'lr': 0.0015298346487292627, 'wd': 1.1862541083665458e-05, 'warmup': 250, 'gamma': 0.9984488253651652, 'time_dim': 16, 'patch_size': 64, 'depth': 11, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.008577552890403378, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 26432448
torch.Size([180])


[I 2024-10-28 16:15:43,022] Trial 91 finished with value: 0.7727574999751229 and parameters: {'lr': 0.0022328460259711533, 'wd': 0.00844668284335318, 'warmup': 300, 'gamma': 0.9819480990999949, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.08465178896792577, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 91 finished with value: 0.7727574999751229 and parameters: {'lr': 0.0022328460259711533, 'wd': 0.00844668284335318, 'warmup': 300, 'gamma': 0.9819480990999949, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.08465178896792577, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 22098368
torch.Size([180])


[I 2024-10-28 16:16:56,781] Trial 92 finished with value: 0.7534739836654508 and parameters: {'lr': 0.0026827199141375583, 'wd': 0.006696401542457065, 'warmup': 300, 'gamma': 0.9840748872797173, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.07547575738389815, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 92 finished with value: 0.7534739836654508 and parameters: {'lr': 0.0026827199141375583, 'wd': 0.006696401542457065, 'warmup': 300, 'gamma': 0.9840748872797173, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.07547575738389815, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 26432448
torch.Size([180])


[I 2024-10-28 16:18:22,056] Trial 93 finished with value: 0.745028516966993 and parameters: {'lr': 0.0011741779063122015, 'wd': 0.004656176300820536, 'warmup': 300, 'gamma': 0.9848903477378929, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.09178944418702709, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 93 finished with value: 0.745028516966993 and parameters: {'lr': 0.0011741779063122015, 'wd': 0.004656176300820536, 'warmup': 300, 'gamma': 0.9848903477378929, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.09178944418702709, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 20132288
torch.Size([180])


[I 2024-10-28 16:19:30,630] Trial 94 finished with value: 0.7611980695352378 and parameters: {'lr': 0.0018787679219590314, 'wd': 0.00803377388972192, 'warmup': 300, 'gamma': 0.9714787901700128, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 6, 'mlp_dim': 1024, 'emb_dropout': 0.0987740095504798, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.


Trial 94 finished with value: 0.7611980695352378 and parameters: {'lr': 0.0018787679219590314, 'wd': 0.00803377388972192, 'warmup': 300, 'gamma': 0.9714787901700128, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 6, 'mlp_dim': 1024, 'emb_dropout': 0.0987740095504798, 'schedule': 'constant_with_warmup'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 25252800
torch.Size([180])


[I 2024-10-28 16:20:52,862] Trial 95 finished with value: 0.6929167949791173 and parameters: {'lr': 0.0035916924776984668, 'wd': 0.006931815894827677, 'warmup': 300, 'gamma': 0.983912494913816, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.121400534491673, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 95 finished with value: 0.6929167949791173 and parameters: {'lr': 0.0035916924776984668, 'wd': 0.006931815894827677, 'warmup': 300, 'gamma': 0.983912494913816, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.121400534491673, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 29380960
torch.Size([180])


[I 2024-10-28 16:23:44,171] Trial 96 finished with value: 0.7393264987818828 and parameters: {'lr': 0.003763589780888229, 'wd': 0.004688460863451052, 'warmup': 100, 'gamma': 0.9837643049222154, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.12148330690644683, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 96 finished with value: 0.7393264987818828 and parameters: {'lr': 0.003763589780888229, 'wd': 0.004688460863451052, 'warmup': 100, 'gamma': 0.9837643049222154, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.12148330690644683, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 16977856
torch.Size([180])


[I 2024-10-28 16:24:44,477] Trial 97 finished with value: 0.8033036352744847 and parameters: {'lr': 0.004737376429752579, 'wd': 1.8282472023766728e-05, 'warmup': 300, 'gamma': 0.9829126373706537, 'time_dim': 16, 'patch_size': 64, 'depth': 4, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.11580760351037592, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 97 finished with value: 0.8033036352744847 and parameters: {'lr': 0.004737376429752579, 'wd': 1.8282472023766728e-05, 'warmup': 300, 'gamma': 0.9829126373706537, 'time_dim': 16, 'patch_size': 64, 'depth': 4, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.11580760351037592, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 51617664
torch.Size([180])


[I 2024-10-28 16:27:49,043] Trial 98 finished with value: 0.7089651936354381 and parameters: {'lr': 0.006331616985967347, 'wd': 0.0003848853374178485, 'warmup': 150, 'gamma': 0.996382997463033, 'time_dim': 16, 'patch_size': 32, 'depth': 10, 'heads': 12, 'mlp_dim': 1024, 'emb_dropout': 0.1276655727371504, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 98 finished with value: 0.7089651936354381 and parameters: {'lr': 0.006331616985967347, 'wd': 0.0003848853374178485, 'warmup': 150, 'gamma': 0.996382997463033, 'time_dim': 16, 'patch_size': 32, 'depth': 10, 'heads': 12, 'mlp_dim': 1024, 'emb_dropout': 0.1276655727371504, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 41204160
torch.Size([180])


[I 2024-10-28 16:29:51,285] Trial 99 finished with value: 0.7572544707890623 and parameters: {'lr': 0.0008540175363543749, 'wd': 0.007114775882027769, 'warmup': 100, 'gamma': 0.9877775530213008, 'time_dim': 16, 'patch_size': 64, 'depth': 9, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.1369895918355276, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 99 finished with value: 0.7572544707890623 and parameters: {'lr': 0.0008540175363543749, 'wd': 0.007114775882027769, 'warmup': 100, 'gamma': 0.9877775530213008, 'time_dim': 16, 'patch_size': 64, 'depth': 9, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.1369895918355276, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 30606656
torch.Size([180])


[I 2024-10-28 16:31:27,985] Trial 100 finished with value: 0.6994846279251371 and parameters: {'lr': 0.0033976056271661772, 'wd': 2.885201056515546e-05, 'warmup': 300, 'gamma': 0.9948189361101825, 'time_dim': 64, 'patch_size': 64, 'depth': 8, 'heads': 5, 'mlp_dim': 1024, 'emb_dropout': 0.11126333765669466, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 100 finished with value: 0.6994846279251371 and parameters: {'lr': 0.0033976056271661772, 'wd': 2.885201056515546e-05, 'warmup': 300, 'gamma': 0.9948189361101825, 'time_dim': 64, 'patch_size': 64, 'depth': 8, 'heads': 5, 'mlp_dim': 1024, 'emb_dropout': 0.11126333765669466, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 32179520
torch.Size([180])


[I 2024-10-28 16:33:07,755] Trial 101 finished with value: 0.7863974879456792 and parameters: {'lr': 0.0032290854290303226, 'wd': 2.909741238467715e-05, 'warmup': 300, 'gamma': 0.9948783090884139, 'time_dim': 64, 'patch_size': 64, 'depth': 8, 'heads': 6, 'mlp_dim': 1024, 'emb_dropout': 0.12263426070501428, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 101 finished with value: 0.7863974879456792 and parameters: {'lr': 0.0032290854290303226, 'wd': 2.909741238467715e-05, 'warmup': 300, 'gamma': 0.9948783090884139, 'time_dim': 64, 'patch_size': 64, 'depth': 8, 'heads': 6, 'mlp_dim': 1024, 'emb_dropout': 0.12263426070501428, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 30606656
torch.Size([180])


[I 2024-10-28 16:34:44,405] Trial 102 finished with value: 0.7601341116668467 and parameters: {'lr': 0.005628957686486033, 'wd': 5.55103010257702e-05, 'warmup': 300, 'gamma': 0.9979292612388262, 'time_dim': 64, 'patch_size': 64, 'depth': 8, 'heads': 5, 'mlp_dim': 1024, 'emb_dropout': 0.11098072407585151, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 102 finished with value: 0.7601341116668467 and parameters: {'lr': 0.005628957686486033, 'wd': 5.55103010257702e-05, 'warmup': 300, 'gamma': 0.9979292612388262, 'time_dim': 64, 'patch_size': 64, 'depth': 8, 'heads': 5, 'mlp_dim': 1024, 'emb_dropout': 0.11098072407585151, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 32581440
torch.Size([180])


[I 2024-10-28 16:36:25,689] Trial 103 finished with value: 0.7262296963043392 and parameters: {'lr': 0.0006855978727377132, 'wd': 3.678197598547747e-05, 'warmup': 300, 'gamma': 0.9971865838261289, 'time_dim': 64, 'patch_size': 64, 'depth': 9, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.10464801061799275, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 103 finished with value: 0.7262296963043392 and parameters: {'lr': 0.0006855978727377132, 'wd': 3.678197598547747e-05, 'warmup': 300, 'gamma': 0.9971865838261289, 'time_dim': 64, 'patch_size': 64, 'depth': 9, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.10464801061799275, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 30628048
torch.Size([180])


[I 2024-10-28 16:41:41,190] Trial 104 finished with value: 0.7259293387928754 and parameters: {'lr': 0.004314721624369186, 'wd': 3.100773460667922e-05, 'warmup': 300, 'gamma': 0.9986758296527617, 'time_dim': 64, 'patch_size': 8, 'depth': 8, 'heads': 5, 'mlp_dim': 1024, 'emb_dropout': 0.13159337765041815, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 104 finished with value: 0.7259293387928754 and parameters: {'lr': 0.004314721624369186, 'wd': 3.100773460667922e-05, 'warmup': 300, 'gamma': 0.9986758296527617, 'time_dim': 64, 'patch_size': 8, 'depth': 8, 'heads': 5, 'mlp_dim': 1024, 'emb_dropout': 0.13159337765041815, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 29611328
torch.Size([180])


[I 2024-10-28 16:43:18,841] Trial 105 finished with value: 0.7284696392491451 and parameters: {'lr': 0.00246274577220613, 'wd': 1.4928999749548226e-05, 'warmup': 300, 'gamma': 0.9814818594595347, 'time_dim': 64, 'patch_size': 64, 'depth': 7, 'heads': 11, 'mlp_dim': 512, 'emb_dropout': 0.10648408717710206, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 105 finished with value: 0.7284696392491451 and parameters: {'lr': 0.00246274577220613, 'wd': 1.4928999749548226e-05, 'warmup': 300, 'gamma': 0.9814818594595347, 'time_dim': 64, 'patch_size': 64, 'depth': 7, 'heads': 11, 'mlp_dim': 512, 'emb_dropout': 0.10648408717710206, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 30446656
torch.Size([180])


[I 2024-10-28 16:44:55,263] Trial 106 finished with value: 0.7741504262502241 and parameters: {'lr': 0.00046868324560047454, 'wd': 2.3195331418883006e-05, 'warmup': 100, 'gamma': 0.9834147508114047, 'time_dim': 32, 'patch_size': 64, 'depth': 8, 'heads': 5, 'mlp_dim': 1024, 'emb_dropout': 0.12499500605912084, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.


Trial 106 finished with value: 0.7741504262502241 and parameters: {'lr': 0.00046868324560047454, 'wd': 2.3195331418883006e-05, 'warmup': 100, 'gamma': 0.9834147508114047, 'time_dim': 32, 'patch_size': 64, 'depth': 8, 'heads': 5, 'mlp_dim': 1024, 'emb_dropout': 0.12499500605912084, 'schedule': 'cosine'}. Best is trial 11 with value: 0.6859774391857464.
Number of parameters: 14230208
torch.Size([180])


[I 2024-10-28 16:45:51,602] Trial 107 finished with value: 0.6578020265896588 and parameters: {'lr': 0.008829189217751884, 'wd': 4.2298162703445356e-05, 'warmup': 200, 'gamma': 0.9824131322055429, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.11333375377263626, 'schedule': 'cosine_with_restarts'}. Best is trial 107 with value: 0.6578020265896588.


Trial 107 finished with value: 0.6578020265896588 and parameters: {'lr': 0.008829189217751884, 'wd': 4.2298162703445356e-05, 'warmup': 200, 'gamma': 0.9824131322055429, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.11333375377263626, 'schedule': 'cosine_with_restarts'}. Best is trial 107 with value: 0.6578020265896588.
Number of parameters: 14230208
torch.Size([180])


[I 2024-10-28 16:46:47,805] Trial 108 finished with value: 0.6159034405503282 and parameters: {'lr': 0.008350534462504278, 'wd': 4.245103267748519e-05, 'warmup': 200, 'gamma': 0.9822439326684171, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15740880727184586, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.


Trial 108 finished with value: 0.6159034405503282 and parameters: {'lr': 0.008350534462504278, 'wd': 4.245103267748519e-05, 'warmup': 200, 'gamma': 0.9822439326684171, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15740880727184586, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 14230208
torch.Size([180])


[I 2024-10-28 16:47:44,085] Trial 109 finished with value: 0.7625219084347375 and parameters: {'lr': 0.00722940964382329, 'wd': 0.00010990914520020479, 'warmup': 200, 'gamma': 0.9799399002541171, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16984422311454642, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.


Trial 109 finished with value: 0.7625219084347375 and parameters: {'lr': 0.00722940964382329, 'wd': 0.00010990914520020479, 'warmup': 200, 'gamma': 0.9799399002541171, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16984422311454642, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 14230208
torch.Size([180])


[I 2024-10-28 16:48:40,323] Trial 110 finished with value: 0.7928364062875453 and parameters: {'lr': 0.00889195231126109, 'wd': 4.7547408265776096e-05, 'warmup': 200, 'gamma': 0.9823967057876746, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.17641857994852955, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.


Trial 110 finished with value: 0.7928364062875453 and parameters: {'lr': 0.00889195231126109, 'wd': 4.7547408265776096e-05, 'warmup': 200, 'gamma': 0.9823967057876746, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.17641857994852955, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 14230208
torch.Size([180])


[I 2024-10-28 16:49:36,621] Trial 111 finished with value: 0.6671572794080777 and parameters: {'lr': 0.008933596633272907, 'wd': 6.367624592531667e-05, 'warmup': 200, 'gamma': 0.9807418809425035, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.11316439717089953, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.


Trial 111 finished with value: 0.6671572794080777 and parameters: {'lr': 0.008933596633272907, 'wd': 6.367624592531667e-05, 'warmup': 200, 'gamma': 0.9807418809425035, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.11316439717089953, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 14230208
torch.Size([180])


[I 2024-10-28 16:50:32,825] Trial 112 finished with value: 0.6932797758619083 and parameters: {'lr': 0.009053256219556442, 'wd': 4.159916939920606e-05, 'warmup': 200, 'gamma': 0.9790481023876314, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.11502350776959577, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.


Trial 112 finished with value: 0.6932797758619083 and parameters: {'lr': 0.009053256219556442, 'wd': 4.159916939920606e-05, 'warmup': 200, 'gamma': 0.9790481023876314, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.11502350776959577, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 14230208
torch.Size([180])


[I 2024-10-28 16:51:29,045] Trial 113 finished with value: 0.7739906575128008 and parameters: {'lr': 0.008748788277841358, 'wd': 7.733305398457011e-05, 'warmup': 200, 'gamma': 0.979133489071246, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.18780511251647208, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.


Trial 113 finished with value: 0.7739906575128008 and parameters: {'lr': 0.008748788277841358, 'wd': 7.733305398457011e-05, 'warmup': 200, 'gamma': 0.979133489071246, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.18780511251647208, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 11469760
torch.Size([180])


[I 2024-10-28 16:52:17,243] Trial 114 finished with value: 0.7354257756797195 and parameters: {'lr': 0.009477401215519795, 'wd': 6.007311009379552e-05, 'warmup': 200, 'gamma': 0.9781866663106825, 'time_dim': 16, 'patch_size': 64, 'depth': 4, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.11467633206221352, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.


Trial 114 finished with value: 0.7354257756797195 and parameters: {'lr': 0.009477401215519795, 'wd': 6.007311009379552e-05, 'warmup': 200, 'gamma': 0.9781866663106825, 'time_dim': 16, 'patch_size': 64, 'depth': 4, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.11467633206221352, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 13247168
torch.Size([180])


[I 2024-10-28 16:53:11,764] Trial 115 finished with value: 0.7712037178455377 and parameters: {'lr': 0.00614167563803979, 'wd': 4.235533363572482e-05, 'warmup': 200, 'gamma': 0.980940151526627, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.15682296796300824, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.


Trial 115 finished with value: 0.7712037178455377 and parameters: {'lr': 0.00614167563803979, 'wd': 4.235533363572482e-05, 'warmup': 200, 'gamma': 0.980940151526627, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.15682296796300824, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 11469760
torch.Size([180])


[I 2024-10-28 16:53:59,934] Trial 116 finished with value: 0.7547787638122265 and parameters: {'lr': 0.0073141996622721585, 'wd': 5.149780804307586e-05, 'warmup': 200, 'gamma': 0.9817504238613436, 'time_dim': 16, 'patch_size': 64, 'depth': 4, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1117952315026742, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.


Trial 116 finished with value: 0.7547787638122265 and parameters: {'lr': 0.0073141996622721585, 'wd': 5.149780804307586e-05, 'warmup': 200, 'gamma': 0.9817504238613436, 'time_dim': 16, 'patch_size': 64, 'depth': 4, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1117952315026742, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 13471808
torch.Size([180])


[I 2024-10-28 16:54:54,743] Trial 117 finished with value: 0.7792857654732946 and parameters: {'lr': 0.009971828740064772, 'wd': 1.9656317170644718e-05, 'warmup': 200, 'gamma': 0.9804162207538955, 'time_dim': 64, 'patch_size': 64, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.11817905001297743, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.


Trial 117 finished with value: 0.7792857654732946 and parameters: {'lr': 0.009971828740064772, 'wd': 1.9656317170644718e-05, 'warmup': 200, 'gamma': 0.9804162207538955, 'time_dim': 64, 'patch_size': 64, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.11817905001297743, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 14220896
torch.Size([180])


[I 2024-10-28 16:56:36,481] Trial 118 finished with value: 0.7327692732482047 and parameters: {'lr': 0.005186953822988492, 'wd': 2.702434149635188e-05, 'warmup': 200, 'gamma': 0.9792596557029084, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.10774224008955044, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.


Trial 118 finished with value: 0.7327692732482047 and parameters: {'lr': 0.005186953822988492, 'wd': 2.702434149635188e-05, 'warmup': 200, 'gamma': 0.9792596557029084, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.10774224008955044, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 14230208
torch.Size([180])


[I 2024-10-28 16:57:32,917] Trial 119 finished with value: 0.7134060487523102 and parameters: {'lr': 0.00834261931133517, 'wd': 3.441575541270625e-05, 'warmup': 200, 'gamma': 0.9785767479710901, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1219609613475425, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.


Trial 119 finished with value: 0.7134060487523102 and parameters: {'lr': 0.00834261931133517, 'wd': 3.441575541270625e-05, 'warmup': 200, 'gamma': 0.9785767479710901, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1219609613475425, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 24100288
torch.Size([180])


[I 2024-10-28 16:59:00,025] Trial 120 finished with value: 0.7009192897986412 and parameters: {'lr': 0.005446234236201474, 'wd': 4.0481480039603785e-05, 'warmup': 200, 'gamma': 0.9822118417535926, 'time_dim': 16, 'patch_size': 64, 'depth': 10, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.10358514299326768, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.


Trial 120 finished with value: 0.7009192897986412 and parameters: {'lr': 0.005446234236201474, 'wd': 4.0481480039603785e-05, 'warmup': 200, 'gamma': 0.9822118417535926, 'time_dim': 16, 'patch_size': 64, 'depth': 10, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.10358514299326768, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 14230208
torch.Size([180])


[I 2024-10-28 16:59:56,372] Trial 121 finished with value: 0.7316412933636229 and parameters: {'lr': 0.006794711867326817, 'wd': 6.627257038733832e-05, 'warmup': 200, 'gamma': 0.9855228604622889, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1285837334332827, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.


Trial 121 finished with value: 0.7316412933636229 and parameters: {'lr': 0.006794711867326817, 'wd': 6.627257038733832e-05, 'warmup': 200, 'gamma': 0.9855228604622889, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1285837334332827, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 15213248
torch.Size([180])


[I 2024-10-28 17:00:55,789] Trial 122 finished with value: 0.7814892648334262 and parameters: {'lr': 0.004700588243854013, 'wd': 4.6196158541281804e-05, 'warmup': 200, 'gamma': 0.9813266699984996, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.09617204279652473, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.


Trial 122 finished with value: 0.7814892648334262 and parameters: {'lr': 0.004700588243854013, 'wd': 4.6196158541281804e-05, 'warmup': 200, 'gamma': 0.9813266699984996, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.09617204279652473, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 18170304
torch.Size([180])


[I 2024-10-28 17:02:03,766] Trial 123 finished with value: 0.6939746529190577 and parameters: {'lr': 0.008016651198670621, 'wd': 0.0009700111504527355, 'warmup': 200, 'gamma': 0.9840991557571117, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.11504908728259397, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.


Trial 123 finished with value: 0.6939746529190577 and parameters: {'lr': 0.008016651198670621, 'wd': 0.0009700111504527355, 'warmup': 200, 'gamma': 0.9840991557571117, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.11504908728259397, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 16990656
torch.Size([180])


[I 2024-10-28 17:03:08,130] Trial 124 finished with value: 0.7580864061251251 and parameters: {'lr': 0.008259313567821202, 'wd': 0.0010465686694945437, 'warmup': 200, 'gamma': 0.982728162094294, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.11514175732286927, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.


Trial 124 finished with value: 0.7580864061251251 and parameters: {'lr': 0.008259313567821202, 'wd': 0.0010465686694945437, 'warmup': 200, 'gamma': 0.982728162094294, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.11514175732286927, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 18154880
torch.Size([180])


[I 2024-10-28 17:04:33,333] Trial 125 finished with value: 0.7801027343849549 and parameters: {'lr': 0.0062660984722784, 'wd': 0.0023453676744727755, 'warmup': 200, 'gamma': 0.9805996574846569, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.03288557373332174, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.


Trial 125 finished with value: 0.7801027343849549 and parameters: {'lr': 0.0062660984722784, 'wd': 0.0023453676744727755, 'warmup': 200, 'gamma': 0.9805996574846569, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.03288557373332174, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 19751104
torch.Size([180])


[I 2024-10-28 17:05:45,672] Trial 126 finished with value: 0.7460748058300002 and parameters: {'lr': 0.007321799509804248, 'wd': 8.501512802967948e-05, 'warmup': 200, 'gamma': 0.9835740460713644, 'time_dim': 16, 'patch_size': 64, 'depth': 7, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1199608548144072, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.


Trial 126 finished with value: 0.7460748058300002 and parameters: {'lr': 0.007321799509804248, 'wd': 8.501512802967948e-05, 'warmup': 200, 'gamma': 0.9835740460713644, 'time_dim': 16, 'patch_size': 64, 'depth': 7, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1199608548144072, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 15811008
torch.Size([180])


[I 2024-10-28 17:06:48,138] Trial 127 finished with value: 0.7358829304908676 and parameters: {'lr': 0.0003272404751522635, 'wd': 2.5542477814867107e-05, 'warmup': 200, 'gamma': 0.9799891679868669, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.11091665999466482, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.


Trial 127 finished with value: 0.7358829304908676 and parameters: {'lr': 0.0003272404751522635, 'wd': 2.5542477814867107e-05, 'warmup': 200, 'gamma': 0.9799891679868669, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.11091665999466482, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 18256336
torch.Size([180])


[I 2024-10-28 17:10:29,194] Trial 128 finished with value: 0.7689770583784995 and parameters: {'lr': 0.003653427004820861, 'wd': 5.439997108997875e-05, 'warmup': 50, 'gamma': 0.9771241425086306, 'time_dim': 32, 'patch_size': 8, 'depth': 6, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.1324002956579492, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.


Trial 128 finished with value: 0.7689770583784995 and parameters: {'lr': 0.003653427004820861, 'wd': 5.439997108997875e-05, 'warmup': 50, 'gamma': 0.9771241425086306, 'time_dim': 32, 'patch_size': 8, 'depth': 6, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.1324002956579492, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 33553344
torch.Size([180])


[I 2024-10-28 17:12:21,829] Trial 129 finished with value: 0.7259820625302915 and parameters: {'lr': 0.00813773559772062, 'wd': 3.0415332813657133e-05, 'warmup': 100, 'gamma': 0.9820205732356032, 'time_dim': 16, 'patch_size': 64, 'depth': 12, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.10195642653320418, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 129 finished with value: 0.7259820625302915 and parameters: {'lr': 0.00813773559772062, 'wd': 3.0415332813657133e-05, 'warmup': 100, 'gamma': 0.9820205732356032, 'time_dim': 16, 'patch_size': 64, 'depth': 12, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.10195642653320418, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 15403968
torch.Size([180])


[I 2024-10-28 17:13:18,660] Trial 130 finished with value: 0.7289539880190089 and parameters: {'lr': 0.009962116304280389, 'wd': 3.7611324662888475e-05, 'warmup': 200, 'gamma': 0.989894197851745, 'time_dim': 16, 'patch_size': 64, 'depth': 4, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.11641734538604943, 'schedule': 'linear'}. Best is trial 108 with value: 0.6159034405503282.


Trial 130 finished with value: 0.7289539880190089 and parameters: {'lr': 0.009962116304280389, 'wd': 3.7611324662888475e-05, 'warmup': 200, 'gamma': 0.989894197851745, 'time_dim': 16, 'patch_size': 64, 'depth': 4, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.11641734538604943, 'schedule': 'linear'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 15213248
torch.Size([180])


[I 2024-10-28 17:14:18,129] Trial 131 finished with value: 0.7472005237185637 and parameters: {'lr': 0.0051070470111896586, 'wd': 9.560879484059502e-05, 'warmup': 100, 'gamma': 0.9840851886154041, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.10720561563200189, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.


Trial 131 finished with value: 0.7472005237185637 and parameters: {'lr': 0.0051070470111896586, 'wd': 9.560879484059502e-05, 'warmup': 100, 'gamma': 0.9840851886154041, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.10720561563200189, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 15213248
torch.Size([180])


[I 2024-10-28 17:15:17,536] Trial 132 finished with value: 0.7531317418789527 and parameters: {'lr': 0.004152225225817028, 'wd': 0.00013197444592061343, 'warmup': 250, 'gamma': 0.9830173947745569, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.10027530673722938, 'schedule': 'cosine'}. Best is trial 108 with value: 0.6159034405503282.


Trial 132 finished with value: 0.7531317418789527 and parameters: {'lr': 0.004152225225817028, 'wd': 0.00013197444592061343, 'warmup': 250, 'gamma': 0.9830173947745569, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.10027530673722938, 'schedule': 'cosine'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 14230208
torch.Size([180])


[I 2024-10-28 17:16:13,877] Trial 133 finished with value: 0.7368881577793179 and parameters: {'lr': 0.006428028089288617, 'wd': 6.229859233666615e-05, 'warmup': 200, 'gamma': 0.9849877545824502, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.14514783916376703, 'schedule': 'cosine'}. Best is trial 108 with value: 0.6159034405503282.


Trial 133 finished with value: 0.7368881577793179 and parameters: {'lr': 0.006428028089288617, 'wd': 6.229859233666615e-05, 'warmup': 200, 'gamma': 0.9849877545824502, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.14514783916376703, 'schedule': 'cosine'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 27041472
torch.Size([180])


[I 2024-10-28 17:17:48,092] Trial 134 finished with value: 0.6680796654631865 and parameters: {'lr': 0.003428528638384866, 'wd': 0.000824709411748595, 'warmup': 100, 'gamma': 0.9844703809950868, 'time_dim': 16, 'patch_size': 64, 'depth': 9, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.12657540261930614, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.


Trial 134 finished with value: 0.6680796654631865 and parameters: {'lr': 0.003428528638384866, 'wd': 0.000824709411748595, 'warmup': 100, 'gamma': 0.9844703809950868, 'time_dim': 16, 'patch_size': 64, 'depth': 9, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.12657540261930614, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 27041472
torch.Size([180])


[I 2024-10-28 17:19:22,320] Trial 135 finished with value: 0.7426833366357587 and parameters: {'lr': 0.0029673100114652054, 'wd': 0.0010287248100758128, 'warmup': 100, 'gamma': 0.9926751292298672, 'time_dim': 16, 'patch_size': 64, 'depth': 9, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.12548944435731277, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.


Trial 135 finished with value: 0.7426833366357587 and parameters: {'lr': 0.0029673100114652054, 'wd': 0.0010287248100758128, 'warmup': 100, 'gamma': 0.9926751292298672, 'time_dim': 16, 'patch_size': 64, 'depth': 9, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.12548944435731277, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 25272000
torch.Size([180])


[I 2024-10-28 17:20:51,208] Trial 136 finished with value: 0.7574477481684093 and parameters: {'lr': 0.0034238824956460425, 'wd': 0.0006404602319162532, 'warmup': 100, 'gamma': 0.9816284450919162, 'time_dim': 16, 'patch_size': 64, 'depth': 9, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.12081558055580104, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.


Trial 136 finished with value: 0.7574477481684093 and parameters: {'lr': 0.0034238824956460425, 'wd': 0.0006404602319162532, 'warmup': 100, 'gamma': 0.9816284450919162, 'time_dim': 16, 'patch_size': 64, 'depth': 9, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.12081558055580104, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 28630208
torch.Size([180])


[I 2024-10-28 17:22:32,132] Trial 137 finished with value: 0.7157708284045436 and parameters: {'lr': 0.005552687879753957, 'wd': 0.0008669464011710381, 'warmup': 100, 'gamma': 0.9826386422607498, 'time_dim': 16, 'patch_size': 64, 'depth': 11, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.11231034609689945, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.


Trial 137 finished with value: 0.7157708284045436 and parameters: {'lr': 0.005552687879753957, 'wd': 0.0008669464011710381, 'warmup': 100, 'gamma': 0.9826386422607498, 'time_dim': 16, 'patch_size': 64, 'depth': 11, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.11231034609689945, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 25496640
torch.Size([180])


[I 2024-10-28 17:24:01,022] Trial 138 finished with value: 0.7299251677636912 and parameters: {'lr': 0.007208291466256462, 'wd': 0.00019180965115612154, 'warmup': 100, 'gamma': 0.9748965235754499, 'time_dim': 64, 'patch_size': 64, 'depth': 9, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.13838076021245171, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.


Trial 138 finished with value: 0.7299251677636912 and parameters: {'lr': 0.007208291466256462, 'wd': 0.00019180965115612154, 'warmup': 100, 'gamma': 0.9748965235754499, 'time_dim': 64, 'patch_size': 64, 'depth': 9, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.13838076021245171, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 28810944
torch.Size([180])


[I 2024-10-28 17:25:39,544] Trial 139 finished with value: 0.7412078735307707 and parameters: {'lr': 0.0026983039065172527, 'wd': 0.0014017829024113163, 'warmup': 150, 'gamma': 0.9809349162345964, 'time_dim': 16, 'patch_size': 64, 'depth': 9, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.12895039532028035, 'schedule': 'cosine'}. Best is trial 108 with value: 0.6159034405503282.


Trial 139 finished with value: 0.7412078735307707 and parameters: {'lr': 0.0026983039065172527, 'wd': 0.0014017829024113163, 'warmup': 150, 'gamma': 0.9809349162345964, 'time_dim': 16, 'patch_size': 64, 'depth': 9, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.12895039532028035, 'schedule': 'cosine'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 27232192
torch.Size([180])


[I 2024-10-28 17:27:11,833] Trial 140 finished with value: 0.7055912362231958 and parameters: {'lr': 0.00844253722281956, 'wd': 0.000512580056876842, 'warmup': 100, 'gamma': 0.9777699983053216, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.12361784203733138, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.


Trial 140 finished with value: 0.7055912362231958 and parameters: {'lr': 0.00844253722281956, 'wd': 0.000512580056876842, 'warmup': 100, 'gamma': 0.9777699983053216, 'time_dim': 16, 'patch_size': 64, 'depth': 8, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.12361784203733138, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 19147968
torch.Size([180])


[I 2024-10-28 17:28:18,950] Trial 141 finished with value: 0.7628770047213084 and parameters: {'lr': 0.004120569275793185, 'wd': 0.0003471815358475046, 'warmup': 200, 'gamma': 0.9844682632670917, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.11772856559957567, 'schedule': 'cosine'}. Best is trial 108 with value: 0.6159034405503282.


Trial 141 finished with value: 0.7628770047213084 and parameters: {'lr': 0.004120569275793185, 'wd': 0.0003471815358475046, 'warmup': 200, 'gamma': 0.9844682632670917, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.11772856559957567, 'schedule': 'cosine'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 41802688
torch.Size([180])


[I 2024-10-28 17:30:23,272] Trial 142 finished with value: 0.715186377223451 and parameters: {'lr': 0.0035404034470624744, 'wd': 0.002855712412886095, 'warmup': 100, 'gamma': 0.9863594903073687, 'time_dim': 16, 'patch_size': 64, 'depth': 10, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.09357419886496371, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.


Trial 142 finished with value: 0.715186377223451 and parameters: {'lr': 0.0035404034470624744, 'wd': 0.002855712412886095, 'warmup': 100, 'gamma': 0.9863594903073687, 'time_dim': 16, 'patch_size': 64, 'depth': 10, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.09357419886496371, 'schedule': 'cosine_with_restarts'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 18170304
torch.Size([180])


[I 2024-10-28 17:31:31,363] Trial 143 finished with value: 0.6746386000637261 and parameters: {'lr': 0.004813546548892008, 'wd': 2.134270184599233e-05, 'warmup': 200, 'gamma': 0.9912726307711255, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.1074539058568604, 'schedule': 'cosine'}. Best is trial 108 with value: 0.6159034405503282.


Trial 143 finished with value: 0.6746386000637261 and parameters: {'lr': 0.004813546548892008, 'wd': 2.134270184599233e-05, 'warmup': 200, 'gamma': 0.9912726307711255, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.1074539058568604, 'schedule': 'cosine'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 18170304
torch.Size([180])


[I 2024-10-28 17:32:39,278] Trial 144 finished with value: 0.7124411242377189 and parameters: {'lr': 0.00576094469669608, 'wd': 1.9844509198915822e-05, 'warmup': 200, 'gamma': 0.9832264506218781, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.10930948507694926, 'schedule': 'cosine'}. Best is trial 108 with value: 0.6159034405503282.


Trial 144 finished with value: 0.7124411242377189 and parameters: {'lr': 0.00576094469669608, 'wd': 1.9844509198915822e-05, 'warmup': 200, 'gamma': 0.9832264506218781, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.10930948507694926, 'schedule': 'cosine'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 16990656
torch.Size([180])


[I 2024-10-28 17:33:43,780] Trial 145 finished with value: 0.716048495204814 and parameters: {'lr': 0.00024831501190942064, 'wd': 1.7074704493621307e-05, 'warmup': 200, 'gamma': 0.9903708160316517, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.10557608347548501, 'schedule': 'cosine'}. Best is trial 108 with value: 0.6159034405503282.


Trial 145 finished with value: 0.716048495204814 and parameters: {'lr': 0.00024831501190942064, 'wd': 1.7074704493621307e-05, 'warmup': 200, 'gamma': 0.9903708160316517, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.10557608347548501, 'schedule': 'cosine'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 22888896
torch.Size([180])


[I 2024-10-28 17:35:04,015] Trial 146 finished with value: 0.7844315473464754 and parameters: {'lr': 1.0063648800377533e-05, 'wd': 3.308504489084147e-05, 'warmup': 200, 'gamma': 0.9944614125108494, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 11, 'mlp_dim': 256, 'emb_dropout': 0.11624844704335292, 'schedule': 'cosine'}. Best is trial 108 with value: 0.6159034405503282.


Trial 146 finished with value: 0.7844315473464754 and parameters: {'lr': 1.0063648800377533e-05, 'wd': 3.308504489084147e-05, 'warmup': 200, 'gamma': 0.9944614125108494, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 11, 'mlp_dim': 256, 'emb_dropout': 0.11624844704335292, 'schedule': 'cosine'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 22503616
torch.Size([180])


[I 2024-10-28 17:36:24,439] Trial 147 finished with value: 0.6994529720406595 and parameters: {'lr': 0.00019301746398433747, 'wd': 2.186876103749283e-05, 'warmup': 200, 'gamma': 0.9914490128146436, 'time_dim': 16, 'patch_size': 64, 'depth': 7, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.017569429252767076, 'schedule': 'cosine'}. Best is trial 108 with value: 0.6159034405503282.


Trial 147 finished with value: 0.6994529720406595 and parameters: {'lr': 0.00019301746398433747, 'wd': 2.186876103749283e-05, 'warmup': 200, 'gamma': 0.9914490128146436, 'time_dim': 16, 'patch_size': 64, 'depth': 7, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.017569429252767076, 'schedule': 'cosine'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 22494304
torch.Size([180])


[I 2024-10-28 17:38:50,619] Trial 148 finished with value: 0.7682834870976227 and parameters: {'lr': 0.006829327115504692, 'wd': 1.48998651139115e-05, 'warmup': 200, 'gamma': 0.9916301398171201, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.018911543558946354, 'schedule': 'cosine'}. Best is trial 108 with value: 0.6159034405503282.


Trial 148 finished with value: 0.7682834870976227 and parameters: {'lr': 0.006829327115504692, 'wd': 1.48998651139115e-05, 'warmup': 200, 'gamma': 0.9916301398171201, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.018911543558946354, 'schedule': 'cosine'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 22503616
torch.Size([180])


[I 2024-10-28 17:40:11,054] Trial 149 finished with value: 0.6986319610809923 and parameters: {'lr': 0.00011498924192339818, 'wd': 2.3621925009309244e-05, 'warmup': 200, 'gamma': 0.9821250934560485, 'time_dim': 16, 'patch_size': 64, 'depth': 7, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.018333525600501763, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 149 finished with value: 0.6986319610809923 and parameters: {'lr': 0.00011498924192339818, 'wd': 2.3621925009309244e-05, 'warmup': 200, 'gamma': 0.9821250934560485, 'time_dim': 16, 'patch_size': 64, 'depth': 7, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.018333525600501763, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 19334528
torch.Size([180])


[I 2024-10-28 17:41:39,143] Trial 150 finished with value: 0.6176510247496961 and parameters: {'lr': 0.008966346423287303, 'wd': 0.0014062701160464269, 'warmup': 200, 'gamma': 0.9819937430975207, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.18609825467691218, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 150 finished with value: 0.6176510247496961 and parameters: {'lr': 0.008966346423287303, 'wd': 0.0014062701160464269, 'warmup': 200, 'gamma': 0.9819937430975207, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.18609825467691218, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 19334528
torch.Size([180])


[I 2024-10-28 17:43:07,084] Trial 151 finished with value: 0.7067831069932 and parameters: {'lr': 0.00013814517853210343, 'wd': 0.0012915445076820342, 'warmup': 200, 'gamma': 0.9821538363681906, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.18435199468562413, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 151 finished with value: 0.7067831069932 and parameters: {'lr': 0.00013814517853210343, 'wd': 0.0012915445076820342, 'warmup': 200, 'gamma': 0.9821538363681906, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.18435199468562413, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 19334528
torch.Size([180])


[I 2024-10-28 17:44:35,362] Trial 152 finished with value: 0.7515837773639081 and parameters: {'lr': 4.7835928143128053e-05, 'wd': 0.000259824535111675, 'warmup': 200, 'gamma': 0.9811742058763074, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.19208484908579065, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 152 finished with value: 0.7515837773639081 and parameters: {'lr': 4.7835928143128053e-05, 'wd': 0.000259824535111675, 'warmup': 200, 'gamma': 0.9811742058763074, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.19208484908579065, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 19334528
torch.Size([180])


[I 2024-10-28 17:46:03,311] Trial 153 finished with value: 0.6834914359243214 and parameters: {'lr': 0.00011195717871311684, 'wd': 0.0007570302951649809, 'warmup': 200, 'gamma': 0.9836913335148757, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.024529114089539228, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 153 finished with value: 0.6834914359243214 and parameters: {'lr': 0.00011195717871311684, 'wd': 0.0007570302951649809, 'warmup': 200, 'gamma': 0.9836913335148757, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.024529114089539228, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 19334528
torch.Size([180])


[I 2024-10-28 17:47:31,339] Trial 154 finished with value: 0.7320759067639891 and parameters: {'lr': 0.00040126668628682094, 'wd': 0.0008422157777461362, 'warmup': 200, 'gamma': 0.9836006538775308, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.0125010476882461, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 154 finished with value: 0.7320759067639891 and parameters: {'lr': 0.00040126668628682094, 'wd': 0.0008422157777461362, 'warmup': 200, 'gamma': 0.9836006538775308, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.0125010476882461, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 19334528
torch.Size([180])


[I 2024-10-28 17:48:59,557] Trial 155 finished with value: 0.7052327213137001 and parameters: {'lr': 0.00896908013496756, 'wd': 0.0016090820501871788, 'warmup': 200, 'gamma': 0.9839655737186161, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.16180647720798502, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 155 finished with value: 0.7052327213137001 and parameters: {'lr': 0.00896908013496756, 'wd': 0.0016090820501871788, 'warmup': 200, 'gamma': 0.9839655737186161, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.16180647720798502, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 17:50:20,138] Trial 156 finished with value: 0.6366949591316562 and parameters: {'lr': 0.008130015436672883, 'wd': 0.0006125485062177268, 'warmup': 100, 'gamma': 0.9826384296782555, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.023995814251063646, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 156 finished with value: 0.6366949591316562 and parameters: {'lr': 0.008130015436672883, 'wd': 0.0006125485062177268, 'warmup': 100, 'gamma': 0.9826384296782555, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.023995814251063646, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 17:51:40,549] Trial 157 finished with value: 0.8108435341499599 and parameters: {'lr': 1.2767007436529098e-05, 'wd': 0.0008079033328967167, 'warmup': 100, 'gamma': 0.9826445754256673, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.032901557668663345, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 157 finished with value: 0.8108435341499599 and parameters: {'lr': 1.2767007436529098e-05, 'wd': 0.0008079033328967167, 'warmup': 100, 'gamma': 0.9826445754256673, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.032901557668663345, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 17:53:00,978] Trial 158 finished with value: 0.7771838890934283 and parameters: {'lr': 2.9544147394491832e-05, 'wd': 0.000637597716814659, 'warmup': 100, 'gamma': 0.9884928782339217, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.026473930583945128, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 158 finished with value: 0.7771838890934283 and parameters: {'lr': 2.9544147394491832e-05, 'wd': 0.000637597716814659, 'warmup': 100, 'gamma': 0.9884928782339217, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.026473930583945128, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 17:54:21,412] Trial 159 finished with value: 0.6812365523442983 and parameters: {'lr': 0.008369212653696506, 'wd': 0.0005213269630056175, 'warmup': 100, 'gamma': 0.9848744928487174, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.041630358323605246, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 159 finished with value: 0.6812365523442983 and parameters: {'lr': 0.008369212653696506, 'wd': 0.0005213269630056175, 'warmup': 100, 'gamma': 0.9848744928487174, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.041630358323605246, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 17:55:41,857] Trial 160 finished with value: 0.7357823983776556 and parameters: {'lr': 0.0086329734421785, 'wd': 0.00044742501428699985, 'warmup': 100, 'gamma': 0.9847704009825922, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.04962863435843466, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 160 finished with value: 0.7357823983776556 and parameters: {'lr': 0.0086329734421785, 'wd': 0.00044742501428699985, 'warmup': 100, 'gamma': 0.9847704009825922, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.04962863435843466, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 17:57:02,289] Trial 161 finished with value: 0.6734646727121801 and parameters: {'lr': 0.009945539986952, 'wd': 0.0012063535174873033, 'warmup': 100, 'gamma': 0.9833765934521181, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.005467195429482749, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 161 finished with value: 0.6734646727121801 and parameters: {'lr': 0.009945539986952, 'wd': 0.0012063535174873033, 'warmup': 100, 'gamma': 0.9833765934521181, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.005467195429482749, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 17:58:22,758] Trial 162 finished with value: 0.7460130155502617 and parameters: {'lr': 0.009975594726512995, 'wd': 0.0011547503011721137, 'warmup': 100, 'gamma': 0.9854802297422598, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.008298587324372914, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 162 finished with value: 0.7460130155502617 and parameters: {'lr': 0.009975594726512995, 'wd': 0.0011547503011721137, 'warmup': 100, 'gamma': 0.9854802297422598, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.008298587324372914, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 17:59:43,433] Trial 163 finished with value: 0.6377262766068481 and parameters: {'lr': 0.007860969364631888, 'wd': 0.0005383376879568496, 'warmup': 100, 'gamma': 0.9838516007573592, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.04316488146652457, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 163 finished with value: 0.6377262766068481 and parameters: {'lr': 0.007860969364631888, 'wd': 0.0005383376879568496, 'warmup': 100, 'gamma': 0.9838516007573592, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.04316488146652457, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 18:01:03,934] Trial 164 finished with value: 0.7073108934573314 and parameters: {'lr': 0.007369806753313646, 'wd': 0.0006104532236739293, 'warmup': 100, 'gamma': 0.9835577871491991, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.04569037297099849, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 164 finished with value: 0.7073108934573314 and parameters: {'lr': 0.007369806753313646, 'wd': 0.0006104532236739293, 'warmup': 100, 'gamma': 0.9835577871491991, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.04569037297099849, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 18:02:24,526] Trial 165 finished with value: 0.7039617921317487 and parameters: {'lr': 0.007872071953401628, 'wd': 0.0004989059638208245, 'warmup': 100, 'gamma': 0.9842581018337476, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.04308876015473535, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 165 finished with value: 0.7039617921317487 and parameters: {'lr': 0.007872071953401628, 'wd': 0.0004989059638208245, 'warmup': 100, 'gamma': 0.9842581018337476, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.04308876015473535, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 18:03:45,463] Trial 166 finished with value: 0.7586788849797906 and parameters: {'lr': 0.006430561409158814, 'wd': 0.0007273701236374484, 'warmup': 100, 'gamma': 0.9830566444603273, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.05824172838146099, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 166 finished with value: 0.7586788849797906 and parameters: {'lr': 0.006430561409158814, 'wd': 0.0007273701236374484, 'warmup': 100, 'gamma': 0.9830566444603273, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.05824172838146099, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 18:05:06,488] Trial 167 finished with value: 0.7372847484702332 and parameters: {'lr': 0.00911001391350295, 'wd': 0.0009304721793159522, 'warmup': 50, 'gamma': 0.9858209528310776, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.031854760399158355, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 167 finished with value: 0.7372847484702332 and parameters: {'lr': 0.00911001391350295, 'wd': 0.0009304721793159522, 'warmup': 50, 'gamma': 0.9858209528310776, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.031854760399158355, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 17039872
torch.Size([180])


[I 2024-10-28 18:06:26,882] Trial 168 finished with value: 0.7615398350225027 and parameters: {'lr': 0.007631427095692752, 'wd': 0.0007358858246770422, 'warmup': 200, 'gamma': 0.9849372704427503, 'time_dim': 32, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.004535512176882593, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 168 finished with value: 0.7615398350225027 and parameters: {'lr': 0.007631427095692752, 'wd': 0.0007358858246770422, 'warmup': 200, 'gamma': 0.9849372704427503, 'time_dim': 32, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.004535512176882593, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 18:07:47,279] Trial 169 finished with value: 0.6584772993695966 and parameters: {'lr': 0.009992676793995644, 'wd': 0.0005450953021479023, 'warmup': 100, 'gamma': 0.9838648393500737, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.035050992179278206, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 169 finished with value: 0.6584772993695966 and parameters: {'lr': 0.009992676793995644, 'wd': 0.0005450953021479023, 'warmup': 100, 'gamma': 0.9838648393500737, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.035050992179278206, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 18:09:07,644] Trial 170 finished with value: 0.7157908260173942 and parameters: {'lr': 0.009279141432635602, 'wd': 0.0004118451919364619, 'warmup': 100, 'gamma': 0.9826991081175476, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.03857072034353637, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 170 finished with value: 0.7157908260173942 and parameters: {'lr': 0.009279141432635602, 'wd': 0.0004118451919364619, 'warmup': 100, 'gamma': 0.9826991081175476, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.03857072034353637, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 18:10:28,016] Trial 171 finished with value: 0.7121797054812281 and parameters: {'lr': 0.007958178340743508, 'wd': 0.0005372753822914692, 'warmup': 100, 'gamma': 0.9838839922270467, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.022162630427828817, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 171 finished with value: 0.7121797054812281 and parameters: {'lr': 0.007958178340743508, 'wd': 0.0005372753822914692, 'warmup': 100, 'gamma': 0.9838839922270467, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.022162630427828817, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 18:11:48,455] Trial 172 finished with value: 0.754714276270624 and parameters: {'lr': 0.009862256050472917, 'wd': 0.0006128635634114201, 'warmup': 100, 'gamma': 0.9834719936703765, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.03818327428600243, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 172 finished with value: 0.754714276270624 and parameters: {'lr': 0.009862256050472917, 'wd': 0.0006128635634114201, 'warmup': 100, 'gamma': 0.9834719936703765, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.03818327428600243, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 18:12:58,241] Trial 173 finished with value: 0.7028871952763177 and parameters: {'lr': 0.006794115327214193, 'wd': 0.0007304662622001508, 'warmup': 100, 'gamma': 0.9846185491530476, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.027872830752793062, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 173 finished with value: 0.7028871952763177 and parameters: {'lr': 0.006794115327214193, 'wd': 0.0007304662622001508, 'warmup': 100, 'gamma': 0.9846185491530476, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.027872830752793062, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 18:14:18,682] Trial 174 finished with value: 0.7905618010830754 and parameters: {'lr': 0.008366572629987898, 'wd': 0.001193105929314069, 'warmup': 200, 'gamma': 0.9815467461982667, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.05410477010928587, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 174 finished with value: 0.7905618010830754 and parameters: {'lr': 0.008366572629987898, 'wd': 0.001193105929314069, 'warmup': 200, 'gamma': 0.9815467461982667, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.05410477010928587, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 20113024
torch.Size([180])


[I 2024-10-28 18:15:47,368] Trial 175 finished with value: 0.7665082981510589 and parameters: {'lr': 0.0061701835010253255, 'wd': 0.0005430135968016298, 'warmup': 100, 'gamma': 0.9830577997910832, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 12, 'mlp_dim': 256, 'emb_dropout': 0.03530602950187393, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 175 finished with value: 0.7665082981510589 and parameters: {'lr': 0.0061701835010253255, 'wd': 0.0005430135968016298, 'warmup': 100, 'gamma': 0.9830577997910832, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 12, 'mlp_dim': 256, 'emb_dropout': 0.03530602950187393, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 18154880
torch.Size([180])


[I 2024-10-28 18:17:12,490] Trial 176 finished with value: 0.6475199062796247 and parameters: {'lr': 0.007434665843379028, 'wd': 0.0018815274733768532, 'warmup': 250, 'gamma': 0.9870352778621585, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.15275791171607078, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 176 finished with value: 0.6475199062796247 and parameters: {'lr': 0.007434665843379028, 'wd': 0.0018815274733768532, 'warmup': 250, 'gamma': 0.9870352778621585, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.15275791171607078, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 18:18:32,865] Trial 177 finished with value: 0.7279142878911067 and parameters: {'lr': 0.00511083310261815, 'wd': 0.0014992270926872423, 'warmup': 250, 'gamma': 0.9822987999266614, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1737312936257545, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 177 finished with value: 0.7279142878911067 and parameters: {'lr': 0.00511083310261815, 'wd': 0.0014992270926872423, 'warmup': 250, 'gamma': 0.9822987999266614, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1737312936257545, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 15197824
torch.Size([180])


[I 2024-10-28 18:19:46,632] Trial 178 finished with value: 0.754379390661813 and parameters: {'lr': 0.006730438511203551, 'wd': 0.0019245226615997998, 'warmup': 100, 'gamma': 0.9860335938534436, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.0235940684871552, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.


Trial 178 finished with value: 0.754379390661813 and parameters: {'lr': 0.006730438511203551, 'wd': 0.0019245226615997998, 'warmup': 100, 'gamma': 0.9860335938534436, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.0235940684871552, 'schedule': 'constant'}. Best is trial 108 with value: 0.6159034405503282.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 18:21:06,966] Trial 179 finished with value: 0.5534605159263463 and parameters: {'lr': 0.009773130154297444, 'wd': 0.0003570653379801602, 'warmup': 250, 'gamma': 0.9850697438992357, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1680840716922411, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 179 finished with value: 0.5534605159263463 and parameters: {'lr': 0.009773130154297444, 'wd': 0.0003570653379801602, 'warmup': 250, 'gamma': 0.9850697438992357, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1680840716922411, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 18:22:27,308] Trial 180 finished with value: 0.735255964159214 and parameters: {'lr': 0.00997468848937994, 'wd': 0.00039287337810643745, 'warmup': 250, 'gamma': 0.9870184953946524, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15698487564063732, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 180 finished with value: 0.735255964159214 and parameters: {'lr': 0.00997468848937994, 'wd': 0.00039287337810643745, 'warmup': 250, 'gamma': 0.9870184953946524, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15698487564063732, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 18:23:47,687] Trial 181 finished with value: 0.7925138445767641 and parameters: {'lr': 0.007432466721035131, 'wd': 0.00032748479071911335, 'warmup': 250, 'gamma': 0.9850605358256064, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16162335739497774, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 181 finished with value: 0.7925138445767641 and parameters: {'lr': 0.007432466721035131, 'wd': 0.00032748479071911335, 'warmup': 250, 'gamma': 0.9850605358256064, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16162335739497774, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 18:25:08,063] Trial 182 finished with value: 0.6992090432085267 and parameters: {'lr': 0.00596379409482001, 'wd': 0.00048014510565904425, 'warmup': 250, 'gamma': 0.984281461471824, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.043395290321224834, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 182 finished with value: 0.6992090432085267 and parameters: {'lr': 0.00596379409482001, 'wd': 0.00048014510565904425, 'warmup': 250, 'gamma': 0.984281461471824, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.043395290321224834, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 18:26:17,719] Trial 183 finished with value: 0.6392802045593394 and parameters: {'lr': 0.008703175436093195, 'wd': 0.001802419773482565, 'warmup': 250, 'gamma': 0.9834426829182279, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.19133444030669353, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 183 finished with value: 0.6392802045593394 and parameters: {'lr': 0.008703175436093195, 'wd': 0.001802419773482565, 'warmup': 250, 'gamma': 0.9834426829182279, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.19133444030669353, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 18:27:38,034] Trial 184 finished with value: 0.7541799388533549 and parameters: {'lr': 6.966583046024496e-05, 'wd': 0.002512533293416028, 'warmup': 250, 'gamma': 0.9853001013199842, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16862730994174177, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 184 finished with value: 0.7541799388533549 and parameters: {'lr': 6.966583046024496e-05, 'wd': 0.002512533293416028, 'warmup': 250, 'gamma': 0.9853001013199842, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16862730994174177, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 18:28:58,311] Trial 185 finished with value: 0.7672734028354119 and parameters: {'lr': 0.008354170535891884, 'wd': 0.00029820170714219197, 'warmup': 250, 'gamma': 0.9835660336006584, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.18866211035408878, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 185 finished with value: 0.7672734028354119 and parameters: {'lr': 0.008354170535891884, 'wd': 0.00029820170714219197, 'warmup': 250, 'gamma': 0.9835660336006584, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.18866211035408878, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 15197824
torch.Size([180])


[I 2024-10-28 18:30:12,235] Trial 186 finished with value: 0.7417343261125314 and parameters: {'lr': 0.007294853662598945, 'wd': 0.001651021413635384, 'warmup': 250, 'gamma': 0.9827808149586127, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.1517210269048784, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 186 finished with value: 0.7417343261125314 and parameters: {'lr': 0.007294853662598945, 'wd': 0.001651021413635384, 'warmup': 250, 'gamma': 0.9827808149586127, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.1517210269048784, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 22489984
torch.Size([180])


[I 2024-10-28 18:31:51,640] Trial 187 finished with value: 0.6881578501574566 and parameters: {'lr': 0.005710463228566841, 'wd': 0.002139615891039415, 'warmup': 250, 'gamma': 0.981724625753125, 'time_dim': 16, 'patch_size': 32, 'depth': 7, 'heads': 6, 'mlp_dim': 512, 'emb_dropout': 0.19407080303767135, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 187 finished with value: 0.6881578501574566 and parameters: {'lr': 0.005710463228566841, 'wd': 0.002139615891039415, 'warmup': 250, 'gamma': 0.981724625753125, 'time_dim': 16, 'patch_size': 32, 'depth': 7, 'heads': 6, 'mlp_dim': 512, 'emb_dropout': 0.19407080303767135, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 19336064
torch.Size([180])


[I 2024-10-28 18:33:18,807] Trial 188 finished with value: 0.7506471967425472 and parameters: {'lr': 0.009956028816431308, 'wd': 0.0034538707100797905, 'warmup': 250, 'gamma': 0.9844916821789476, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 512, 'emb_dropout': 0.19675929393479782, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 188 finished with value: 0.7506471967425472 and parameters: {'lr': 0.009956028816431308, 'wd': 0.0034538707100797905, 'warmup': 250, 'gamma': 0.9844916821789476, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 512, 'emb_dropout': 0.19675929393479782, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 23866240
torch.Size([180])


[I 2024-10-28 18:35:03,867] Trial 189 finished with value: 0.7738967867337136 and parameters: {'lr': 0.0054270786107444334, 'wd': 0.0018629882524301275, 'warmup': 250, 'gamma': 0.9815249433698953, 'time_dim': 16, 'patch_size': 32, 'depth': 7, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.1975188375007812, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 189 finished with value: 0.7738967867337136 and parameters: {'lr': 0.0054270786107444334, 'wd': 0.0018629882524301275, 'warmup': 250, 'gamma': 0.9815249433698953, 'time_dim': 16, 'patch_size': 32, 'depth': 7, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.1975188375007812, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 19336064
torch.Size([180])


[I 2024-10-28 18:36:31,475] Trial 190 finished with value: 0.739222250430376 and parameters: {'lr': 0.008555995564670101, 'wd': 0.002171135864498218, 'warmup': 250, 'gamma': 0.9833509609733859, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 512, 'emb_dropout': 0.18072342504737213, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 190 finished with value: 0.739222250430376 and parameters: {'lr': 0.008555995564670101, 'wd': 0.002171135864498218, 'warmup': 250, 'gamma': 0.9833509609733859, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 512, 'emb_dropout': 0.18072342504737213, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 19336064
torch.Size([180])


[I 2024-10-28 18:37:58,710] Trial 191 finished with value: 0.7200755210691587 and parameters: {'lr': 0.006203683054227572, 'wd': 0.0014327889242568795, 'warmup': 250, 'gamma': 0.982409071695741, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 512, 'emb_dropout': 0.19164028208007947, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 191 finished with value: 0.7200755210691587 and parameters: {'lr': 0.006203683054227572, 'wd': 0.0014327889242568795, 'warmup': 250, 'gamma': 0.982409071695741, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 512, 'emb_dropout': 0.19164028208007947, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 19735680
torch.Size([180])


[I 2024-10-28 18:39:29,962] Trial 192 finished with value: 0.7597096729849662 and parameters: {'lr': 0.004741482430454294, 'wd': 0.00042750724770124324, 'warmup': 250, 'gamma': 0.9817582337829764, 'time_dim': 16, 'patch_size': 32, 'depth': 7, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.18420647304430202, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 192 finished with value: 0.7597096729849662 and parameters: {'lr': 0.004741482430454294, 'wd': 0.00042750724770124324, 'warmup': 250, 'gamma': 0.9817582337829764, 'time_dim': 16, 'patch_size': 32, 'depth': 7, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.18420647304430202, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 18149504
torch.Size([180])


[I 2024-10-28 18:40:48,738] Trial 193 finished with value: 0.7477939150227033 and parameters: {'lr': 0.007213820859181611, 'wd': 0.0020314307539922158, 'warmup': 250, 'gamma': 0.9839979171153921, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 768, 'emb_dropout': 0.19365250797241257, 'schedule': 'linear'}. Best is trial 179 with value: 0.5534605159263463.


Trial 193 finished with value: 0.7477939150227033 and parameters: {'lr': 0.007213820859181611, 'wd': 0.0020314307539922158, 'warmup': 250, 'gamma': 0.9839979171153921, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 768, 'emb_dropout': 0.19365250797241257, 'schedule': 'linear'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 18156416
torch.Size([180])


[I 2024-10-28 18:42:12,873] Trial 194 finished with value: 0.6592715155483008 and parameters: {'lr': 0.008380377268604553, 'wd': 0.0011206110531578413, 'warmup': 250, 'gamma': 0.9831811888500295, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 5, 'mlp_dim': 512, 'emb_dropout': 0.14830746613151324, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 194 finished with value: 0.6592715155483008 and parameters: {'lr': 0.008380377268604553, 'wd': 0.0011206110531578413, 'warmup': 250, 'gamma': 0.9831811888500295, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 5, 'mlp_dim': 512, 'emb_dropout': 0.14830746613151324, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 18156416
torch.Size([180])


[I 2024-10-28 18:43:37,438] Trial 195 finished with value: 0.7499111336689287 and parameters: {'lr': 0.008657810183318463, 'wd': 0.0011124994685761706, 'warmup': 250, 'gamma': 0.9830948655562484, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 5, 'mlp_dim': 512, 'emb_dropout': 0.1997219793491196, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 195 finished with value: 0.7499111336689287 and parameters: {'lr': 0.008657810183318463, 'wd': 0.0011124994685761706, 'warmup': 250, 'gamma': 0.9830948655562484, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 5, 'mlp_dim': 512, 'emb_dropout': 0.1997219793491196, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 18156416
torch.Size([180])


[I 2024-10-28 18:45:01,551] Trial 196 finished with value: 0.7583985038293412 and parameters: {'lr': 0.0077603387585517235, 'wd': 0.0017027609210463375, 'warmup': 250, 'gamma': 0.9840430860704065, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 5, 'mlp_dim': 512, 'emb_dropout': 0.1536670042180563, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 196 finished with value: 0.7583985038293412 and parameters: {'lr': 0.0077603387585517235, 'wd': 0.0017027609210463375, 'warmup': 250, 'gamma': 0.9840430860704065, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 5, 'mlp_dim': 512, 'emb_dropout': 0.1536670042180563, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 18154880
torch.Size([180])


[I 2024-10-28 18:46:26,770] Trial 197 finished with value: 0.7732615862412251 and parameters: {'lr': 0.008556871400008688, 'wd': 0.0008039996646598698, 'warmup': 250, 'gamma': 0.985353777424774, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.1469751867155902, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 197 finished with value: 0.7732615862412251 and parameters: {'lr': 0.008556871400008688, 'wd': 0.0008039996646598698, 'warmup': 250, 'gamma': 0.985353777424774, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.1469751867155902, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 18156416
torch.Size([180])


[I 2024-10-28 18:47:50,987] Trial 198 finished with value: 0.6947151035949112 and parameters: {'lr': 0.009942132605722721, 'wd': 0.0009547407744569381, 'warmup': 250, 'gamma': 0.9885656821173459, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 5, 'mlp_dim': 512, 'emb_dropout': 0.17665228579100883, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 198 finished with value: 0.6947151035949112 and parameters: {'lr': 0.009942132605722721, 'wd': 0.0009547407744569381, 'warmup': 250, 'gamma': 0.9885656821173459, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 5, 'mlp_dim': 512, 'emb_dropout': 0.17665228579100883, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 17165184
torch.Size([180])


[I 2024-10-28 18:49:10,512] Trial 199 finished with value: 0.7574315340769132 and parameters: {'lr': 0.006255697293226243, 'wd': 0.0013388211983150432, 'warmup': 250, 'gamma': 0.9826383704322208, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.18783426052273755, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 199 finished with value: 0.7574315340769132 and parameters: {'lr': 0.006255697293226243, 'wd': 0.0013388211983150432, 'warmup': 250, 'gamma': 0.9826383704322208, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.18783426052273755, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 17047808
torch.Size([180])


[I 2024-10-28 18:50:31,601] Trial 200 finished with value: 0.755033784990955 and parameters: {'lr': 0.007014155812191607, 'wd': 0.0007055828670071643, 'warmup': 250, 'gamma': 0.9867809664645366, 'time_dim': 32, 'patch_size': 32, 'depth': 7, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.1948121194970226, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 200 finished with value: 0.755033784990955 and parameters: {'lr': 0.007014155812191607, 'wd': 0.0007055828670071643, 'warmup': 250, 'gamma': 0.9867809664645366, 'time_dim': 32, 'patch_size': 32, 'depth': 7, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.1948121194970226, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 18:51:52,126] Trial 201 finished with value: 0.775709653829478 and parameters: {'lr': 0.005356481116666207, 'wd': 0.0011901832559917628, 'warmup': 150, 'gamma': 0.9817967151987196, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.03070748060478641, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 201 finished with value: 0.775709653829478 and parameters: {'lr': 0.005356481116666207, 'wd': 0.0011901832559917628, 'warmup': 150, 'gamma': 0.9817967151987196, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.03070748060478641, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 19336064
torch.Size([180])


[I 2024-10-28 18:53:19,420] Trial 202 finished with value: 0.7617403534634343 and parameters: {'lr': 0.007772930269224214, 'wd': 0.0005832018279625986, 'warmup': 100, 'gamma': 0.9833507511482589, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 512, 'emb_dropout': 0.14375115070509337, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 202 finished with value: 0.7617403534634343 and parameters: {'lr': 0.007772930269224214, 'wd': 0.0005832018279625986, 'warmup': 100, 'gamma': 0.9833507511482589, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 512, 'emb_dropout': 0.14375115070509337, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 19735680
torch.Size([180])


[I 2024-10-28 18:54:50,632] Trial 203 finished with value: 0.7560914952457571 and parameters: {'lr': 0.008736139431464807, 'wd': 0.002574564310620256, 'warmup': 100, 'gamma': 0.9823328261114966, 'time_dim': 16, 'patch_size': 32, 'depth': 7, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15996991111327624, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 203 finished with value: 0.7560914952457571 and parameters: {'lr': 0.008736139431464807, 'wd': 0.002574564310620256, 'warmup': 100, 'gamma': 0.9823328261114966, 'time_dim': 16, 'patch_size': 32, 'depth': 7, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15996991111327624, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 18:56:11,111] Trial 204 finished with value: 0.7731000448863548 and parameters: {'lr': 0.006909185399104334, 'wd': 1.1953433676334995e-05, 'warmup': 100, 'gamma': 0.9847070664102172, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.01536598573450669, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 204 finished with value: 0.7731000448863548 and parameters: {'lr': 0.006909185399104334, 'wd': 1.1953433676334995e-05, 'warmup': 100, 'gamma': 0.9847070664102172, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.01536598573450669, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 15234640
torch.Size([180])


[I 2024-10-28 18:59:19,712] Trial 205 finished with value: 0.7400517589073449 and parameters: {'lr': 0.006082835086166596, 'wd': 0.0008469640074583942, 'warmup': 100, 'gamma': 0.9808052203360705, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.16439381720000962, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 205 finished with value: 0.7400517589073449 and parameters: {'lr': 0.006082835086166596, 'wd': 0.0008469640074583942, 'warmup': 100, 'gamma': 0.9808052203360705, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.16439381720000962, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 15795584
torch.Size([180])


[I 2024-10-28 19:00:37,031] Trial 206 finished with value: 0.7967962481856816 and parameters: {'lr': 0.00476800362745944, 'wd': 0.0006620165337350521, 'warmup': 100, 'gamma': 0.9837308885797694, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.006337009066647488, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 206 finished with value: 0.7967962481856816 and parameters: {'lr': 0.00476800362745944, 'wd': 0.0006620165337350521, 'warmup': 100, 'gamma': 0.9837308885797694, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.006337009066647488, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:01:46,725] Trial 207 finished with value: 0.7095961008865578 and parameters: {'lr': 0.00871601020283984, 'wd': 0.0012986750867605061, 'warmup': 50, 'gamma': 0.9828161618792669, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15049809744757964, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 207 finished with value: 0.7095961008865578 and parameters: {'lr': 0.00871601020283984, 'wd': 0.0012986750867605061, 'warmup': 50, 'gamma': 0.9828161618792669, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15049809744757964, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 19:03:07,239] Trial 208 finished with value: 0.7110439885609423 and parameters: {'lr': 0.007550249829202322, 'wd': 0.001570666557849024, 'warmup': 250, 'gamma': 0.9820222414538717, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.18215166182204032, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 208 finished with value: 0.7110439885609423 and parameters: {'lr': 0.007550249829202322, 'wd': 0.001570666557849024, 'warmup': 250, 'gamma': 0.9820222414538717, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.18215166182204032, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 23866240
torch.Size([180])


[I 2024-10-28 19:04:51,970] Trial 209 finished with value: 0.7864482162748072 and parameters: {'lr': 0.005629652055723814, 'wd': 0.0005452834951219927, 'warmup': 100, 'gamma': 0.9812916402303774, 'time_dim': 16, 'patch_size': 32, 'depth': 7, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.14046747278845195, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 209 finished with value: 0.7864482162748072 and parameters: {'lr': 0.005629652055723814, 'wd': 0.0005452834951219927, 'warmup': 100, 'gamma': 0.9812916402303774, 'time_dim': 16, 'patch_size': 32, 'depth': 7, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.14046747278845195, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 19:06:12,385] Trial 210 finished with value: 0.760414282408282 and parameters: {'lr': 0.006824924643965514, 'wd': 0.0011638403878187207, 'warmup': 100, 'gamma': 0.9833872521678592, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.002136307344926749, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 210 finished with value: 0.760414282408282 and parameters: {'lr': 0.006824924643965514, 'wd': 0.0011638403878187207, 'warmup': 100, 'gamma': 0.9833872521678592, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.002136307344926749, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:07:22,050] Trial 211 finished with value: 0.6774312182158989 and parameters: {'lr': 0.008934392013514831, 'wd': 4.6968865763303504e-05, 'warmup': 200, 'gamma': 0.9843546920489089, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15604794557448637, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.


Trial 211 finished with value: 0.6774312182158989 and parameters: {'lr': 0.008934392013514831, 'wd': 4.6968865763303504e-05, 'warmup': 200, 'gamma': 0.9843546920489089, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15604794557448637, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:08:31,735] Trial 212 finished with value: 0.7453977406856028 and parameters: {'lr': 0.00982911871690341, 'wd': 4.750914317077381e-05, 'warmup': 200, 'gamma': 0.9845790401510783, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15774277326132283, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.


Trial 212 finished with value: 0.7453977406856028 and parameters: {'lr': 0.00982911871690341, 'wd': 4.750914317077381e-05, 'warmup': 200, 'gamma': 0.9845790401510783, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15774277326132283, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:09:41,535] Trial 213 finished with value: 0.7042423019883509 and parameters: {'lr': 0.008187689407778406, 'wd': 5.376954889242819e-05, 'warmup': 200, 'gamma': 0.9840066174401444, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15295500984116173, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.


Trial 213 finished with value: 0.7042423019883509 and parameters: {'lr': 0.008187689407778406, 'wd': 5.376954889242819e-05, 'warmup': 200, 'gamma': 0.9840066174401444, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15295500984116173, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14220896
torch.Size([180])


[I 2024-10-28 19:11:22,955] Trial 214 finished with value: 0.784031571269697 and parameters: {'lr': 0.00874493182313546, 'wd': 0.0004830580117707834, 'warmup': 200, 'gamma': 0.9893798934214609, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1675429741882864, 'schedule': 'linear'}. Best is trial 179 with value: 0.5534605159263463.


Trial 214 finished with value: 0.784031571269697 and parameters: {'lr': 0.00874493182313546, 'wd': 0.0004830580117707834, 'warmup': 200, 'gamma': 0.9893798934214609, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1675429741882864, 'schedule': 'linear'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 25256576
torch.Size([180])


[I 2024-10-28 19:13:15,517] Trial 215 finished with value: 0.7543439310166362 and parameters: {'lr': 0.007527864388480515, 'wd': 0.0009122668004693647, 'warmup': 200, 'gamma': 0.9829761627102255, 'time_dim': 16, 'patch_size': 32, 'depth': 9, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.14839805634325845, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.


Trial 215 finished with value: 0.7543439310166362 and parameters: {'lr': 0.007527864388480515, 'wd': 0.0009122668004693647, 'warmup': 200, 'gamma': 0.9829761627102255, 'time_dim': 16, 'patch_size': 32, 'depth': 9, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.14839805634325845, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:14:25,389] Trial 216 finished with value: 0.6919075119197796 and parameters: {'lr': 0.009924070816898238, 'wd': 0.002131340177348823, 'warmup': 250, 'gamma': 0.9836732180210409, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.03582911742580722, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 216 finished with value: 0.6919075119197796 and parameters: {'lr': 0.009924070816898238, 'wd': 0.002131340177348823, 'warmup': 250, 'gamma': 0.9836732180210409, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.03582911742580722, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:15:35,229] Trial 217 finished with value: 0.7230833669409433 and parameters: {'lr': 0.00912537130612328, 'wd': 0.002207785193795496, 'warmup': 250, 'gamma': 0.9861006132642639, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.036907707840832735, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 217 finished with value: 0.7230833669409433 and parameters: {'lr': 0.00912537130612328, 'wd': 0.002207785193795496, 'warmup': 250, 'gamma': 0.9861006132642639, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.036907707840832735, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 20113024
torch.Size([180])


[I 2024-10-28 19:17:03,766] Trial 218 finished with value: 0.7038196324146937 and parameters: {'lr': 0.009971409258821059, 'wd': 0.00037480056094877046, 'warmup': 250, 'gamma': 0.9838551182691658, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 12, 'mlp_dim': 256, 'emb_dropout': 0.039475592022677176, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 218 finished with value: 0.7038196324146937 and parameters: {'lr': 0.009971409258821059, 'wd': 0.00037480056094877046, 'warmup': 250, 'gamma': 0.9838551182691658, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 12, 'mlp_dim': 256, 'emb_dropout': 0.039475592022677176, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:18:13,548] Trial 219 finished with value: 0.6399629682104211 and parameters: {'lr': 0.008255501347030127, 'wd': 0.0026510347039649267, 'warmup': 250, 'gamma': 0.9851233257206499, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.034881751871410335, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 219 finished with value: 0.6399629682104211 and parameters: {'lr': 0.008255501347030127, 'wd': 0.0026510347039649267, 'warmup': 250, 'gamma': 0.9851233257206499, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.034881751871410335, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:19:23,362] Trial 220 finished with value: 0.7627501137567837 and parameters: {'lr': 0.008115909258685567, 'wd': 0.001962738183157115, 'warmup': 250, 'gamma': 0.9851162576741133, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.03311062937486741, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 220 finished with value: 0.7627501137567837 and parameters: {'lr': 0.008115909258685567, 'wd': 0.001962738183157115, 'warmup': 250, 'gamma': 0.9851162576741133, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.03311062937486741, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:20:33,239] Trial 221 finished with value: 0.7813253438169235 and parameters: {'lr': 0.009881099747853287, 'wd': 0.002039530507414701, 'warmup': 250, 'gamma': 0.984505554010274, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.045470121264867394, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 221 finished with value: 0.7813253438169235 and parameters: {'lr': 0.009881099747853287, 'wd': 0.002039530507414701, 'warmup': 250, 'gamma': 0.984505554010274, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.045470121264867394, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:21:43,036] Trial 222 finished with value: 0.6232280745410037 and parameters: {'lr': 0.007696746673667552, 'wd': 0.0038458547351349153, 'warmup': 250, 'gamma': 0.985327439599307, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.027432197581387714, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 222 finished with value: 0.6232280745410037 and parameters: {'lr': 0.007696746673667552, 'wd': 0.0038458547351349153, 'warmup': 250, 'gamma': 0.985327439599307, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.027432197581387714, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:22:52,911] Trial 223 finished with value: 0.7964345719562641 and parameters: {'lr': 0.007524368483274546, 'wd': 0.0030285479689695248, 'warmup': 250, 'gamma': 0.986061308644522, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.02588945924740198, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 223 finished with value: 0.7964345719562641 and parameters: {'lr': 0.007524368483274546, 'wd': 0.0030285479689695248, 'warmup': 250, 'gamma': 0.986061308644522, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.02588945924740198, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:24:02,770] Trial 224 finished with value: 0.7628991723620403 and parameters: {'lr': 0.008548185407565044, 'wd': 0.0039039411460228216, 'warmup': 250, 'gamma': 0.9846988990695476, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.020510778004525904, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 224 finished with value: 0.7628991723620403 and parameters: {'lr': 0.008548185407565044, 'wd': 0.0039039411460228216, 'warmup': 250, 'gamma': 0.9846988990695476, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.020510778004525904, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 11454336
torch.Size([180])


[I 2024-10-28 19:25:01,814] Trial 225 finished with value: 0.6928613382674805 and parameters: {'lr': 0.0071315323178951055, 'wd': 0.0026268642443106464, 'warmup': 250, 'gamma': 0.9851209640630937, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.029792994233744798, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 225 finished with value: 0.6928613382674805 and parameters: {'lr': 0.0071315323178951055, 'wd': 0.0026268642443106464, 'warmup': 250, 'gamma': 0.9851209640630937, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.029792994233744798, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:26:11,638] Trial 226 finished with value: 0.7505776122736355 and parameters: {'lr': 0.008817834380623201, 'wd': 0.0023457617266458043, 'warmup': 250, 'gamma': 0.9855810010524133, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.03682630409423195, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 226 finished with value: 0.7505776122736355 and parameters: {'lr': 0.008817834380623201, 'wd': 0.0023457617266458043, 'warmup': 250, 'gamma': 0.9855810010524133, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.03682630409423195, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:27:21,415] Trial 227 finished with value: 0.7739619558163284 and parameters: {'lr': 0.00646888379990361, 'wd': 0.0017822573285222521, 'warmup': 250, 'gamma': 0.9835332510746582, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.04173619885669836, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 227 finished with value: 0.7739619558163284 and parameters: {'lr': 0.00646888379990361, 'wd': 0.0017822573285222521, 'warmup': 250, 'gamma': 0.9835332510746582, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.04173619885669836, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:28:31,154] Trial 228 finished with value: 0.7293622698067692 and parameters: {'lr': 0.009887197196723742, 'wd': 0.0034961554838302904, 'warmup': 250, 'gamma': 0.9844393098598118, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.029026716433150142, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 228 finished with value: 0.7293622698067692 and parameters: {'lr': 0.009887197196723742, 'wd': 0.0034961554838302904, 'warmup': 250, 'gamma': 0.9844393098598118, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.029026716433150142, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:29:41,004] Trial 229 finished with value: 0.7069883199309825 and parameters: {'lr': 0.007788651736616637, 'wd': 0.0028496457846063287, 'warmup': 250, 'gamma': 0.9875865996913387, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.03487885045835748, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 229 finished with value: 0.7069883199309825 and parameters: {'lr': 0.007788651736616637, 'wd': 0.0028496457846063287, 'warmup': 250, 'gamma': 0.9875865996913387, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.03487885045835748, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:30:50,764] Trial 230 finished with value: 0.5722178155166716 and parameters: {'lr': 0.006808150754901706, 'wd': 0.0015330156734959318, 'warmup': 200, 'gamma': 0.9830537550905766, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.022307557196964558, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.


Trial 230 finished with value: 0.5722178155166716 and parameters: {'lr': 0.006808150754901706, 'wd': 0.0015330156734959318, 'warmup': 200, 'gamma': 0.9830537550905766, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.022307557196964558, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:32:00,514] Trial 231 finished with value: 0.713991908109936 and parameters: {'lr': 0.006486263521237331, 'wd': 0.001909889703887798, 'warmup': 200, 'gamma': 0.9827311975963636, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.02617660422484613, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.


Trial 231 finished with value: 0.713991908109936 and parameters: {'lr': 0.006486263521237331, 'wd': 0.001909889703887798, 'warmup': 200, 'gamma': 0.9827311975963636, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.02617660422484613, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:33:10,143] Trial 232 finished with value: 0.7304384672932981 and parameters: {'lr': 0.007954038035811593, 'wd': 0.001548584943173902, 'warmup': 200, 'gamma': 0.9831876236810725, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.0214424514886571, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.


Trial 232 finished with value: 0.7304384672932981 and parameters: {'lr': 0.007954038035811593, 'wd': 0.001548584943173902, 'warmup': 200, 'gamma': 0.9831876236810725, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.0214424514886571, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:34:19,774] Trial 233 finished with value: 0.6327606637509062 and parameters: {'lr': 0.008813823049812937, 'wd': 0.0016287745397089365, 'warmup': 200, 'gamma': 0.9840247473960694, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.016051066511825113, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.


Trial 233 finished with value: 0.6327606637509062 and parameters: {'lr': 0.008813823049812937, 'wd': 0.0016287745397089365, 'warmup': 200, 'gamma': 0.9840247473960694, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.016051066511825113, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:35:29,484] Trial 234 finished with value: 0.7401089987887577 and parameters: {'lr': 0.006852841053685219, 'wd': 0.0015797145994868706, 'warmup': 200, 'gamma': 0.9932656840761643, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.010154301068024665, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.


Trial 234 finished with value: 0.7401089987887577 and parameters: {'lr': 0.006852841053685219, 'wd': 0.0015797145994868706, 'warmup': 200, 'gamma': 0.9932656840761643, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.010154301068024665, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 19:36:49,951] Trial 235 finished with value: 0.7350473895153565 and parameters: {'lr': 0.0058554284067986835, 'wd': 0.00135419562938363, 'warmup': 200, 'gamma': 0.9842625018514648, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.015222289921244556, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.


Trial 235 finished with value: 0.7350473895153565 and parameters: {'lr': 0.0058554284067986835, 'wd': 0.00135419562938363, 'warmup': 200, 'gamma': 0.9842625018514648, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.015222289921244556, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 13231744
torch.Size([180])


[I 2024-10-28 19:37:57,199] Trial 236 finished with value: 0.7860914553879266 and parameters: {'lr': 0.008583577921541635, 'wd': 0.0010571805879649693, 'warmup': 200, 'gamma': 0.9822060524330234, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.02358611900727591, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.


Trial 236 finished with value: 0.7860914553879266 and parameters: {'lr': 0.008583577921541635, 'wd': 0.0010571805879649693, 'warmup': 200, 'gamma': 0.9822060524330234, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.02358611900727591, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 15386496
torch.Size([180])


[I 2024-10-28 19:39:09,014] Trial 237 finished with value: 0.71092741727894 and parameters: {'lr': 0.0073972182157830985, 'wd': 0.0017362464161731798, 'warmup': 200, 'gamma': 0.9855943898466435, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 11, 'mlp_dim': 256, 'emb_dropout': 0.015271560364041302, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.


Trial 237 finished with value: 0.71092741727894 and parameters: {'lr': 0.0073972182157830985, 'wd': 0.0017362464161731798, 'warmup': 200, 'gamma': 0.9855943898466435, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 11, 'mlp_dim': 256, 'emb_dropout': 0.015271560364041302, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 17039872
torch.Size([180])


[I 2024-10-28 19:40:29,405] Trial 238 finished with value: 0.7404753494758265 and parameters: {'lr': 0.008668998088336703, 'wd': 0.004402322092960336, 'warmup': 200, 'gamma': 0.9849701897690912, 'time_dim': 32, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.02021567737824182, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.


Trial 238 finished with value: 0.7404753494758265 and parameters: {'lr': 0.008668998088336703, 'wd': 0.004402322092960336, 'warmup': 200, 'gamma': 0.9849701897690912, 'time_dim': 32, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.02021567737824182, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 18149504
torch.Size([180])


[I 2024-10-28 19:41:48,266] Trial 239 finished with value: 0.7736063240670408 and parameters: {'lr': 3.8011031055570515e-05, 'wd': 0.0013528783977550841, 'warmup': 200, 'gamma': 0.9830892336165549, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 768, 'emb_dropout': 0.19072722869873757, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.


Trial 239 finished with value: 0.7736063240670408 and parameters: {'lr': 3.8011031055570515e-05, 'wd': 0.0013528783977550841, 'warmup': 200, 'gamma': 0.9830892336165549, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 768, 'emb_dropout': 0.19072722869873757, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 17012048
torch.Size([180])


[I 2024-10-28 19:45:15,414] Trial 240 finished with value: 0.7245459194371358 and parameters: {'lr': 0.006848538244739398, 'wd': 0.000628912136854406, 'warmup': 150, 'gamma': 0.983872441107485, 'time_dim': 16, 'patch_size': 8, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1562841486333925, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.


Trial 240 finished with value: 0.7245459194371358 and parameters: {'lr': 0.006848538244739398, 'wd': 0.000628912136854406, 'warmup': 150, 'gamma': 0.983872441107485, 'time_dim': 16, 'patch_size': 8, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1562841486333925, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:46:25,176] Trial 241 finished with value: 0.6909259891127063 and parameters: {'lr': 0.009836139135759822, 'wd': 0.0018176173942947723, 'warmup': 250, 'gamma': 0.9836125077266844, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.03046119657166333, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 241 finished with value: 0.6909259891127063 and parameters: {'lr': 0.009836139135759822, 'wd': 0.0018176173942947723, 'warmup': 250, 'gamma': 0.9836125077266844, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.03046119657166333, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:47:35,070] Trial 242 finished with value: 0.6963517518658022 and parameters: {'lr': 0.008723491147002839, 'wd': 0.0017270339941207467, 'warmup': 200, 'gamma': 0.9826359364513916, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.028556539076352423, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 242 finished with value: 0.6963517518658022 and parameters: {'lr': 0.008723491147002839, 'wd': 0.0017270339941207467, 'warmup': 200, 'gamma': 0.9826359364513916, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.028556539076352423, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:48:45,019] Trial 243 finished with value: 0.7624241088207939 and parameters: {'lr': 0.009976995565398751, 'wd': 0.0014462873143443059, 'warmup': 250, 'gamma': 0.9834334237979186, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.031428169960171115, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 243 finished with value: 0.7624241088207939 and parameters: {'lr': 0.009976995565398751, 'wd': 0.0014462873143443059, 'warmup': 250, 'gamma': 0.9834334237979186, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.031428169960171115, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:49:54,868] Trial 244 finished with value: 0.6132800712747694 and parameters: {'lr': 0.007739189853113007, 'wd': 0.0007752208644293883, 'warmup': 200, 'gamma': 0.9842007725856343, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.02564385832896521, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 244 finished with value: 0.6132800712747694 and parameters: {'lr': 0.007739189853113007, 'wd': 0.0007752208644293883, 'warmup': 200, 'gamma': 0.9842007725856343, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.02564385832896521, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:51:04,734] Trial 245 finished with value: 0.7155515518752457 and parameters: {'lr': 0.007828100942545796, 'wd': 0.0007795022756391747, 'warmup': 200, 'gamma': 0.9845695884531472, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.024602697786317282, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 245 finished with value: 0.7155515518752457 and parameters: {'lr': 0.007828100942545796, 'wd': 0.0007795022756391747, 'warmup': 200, 'gamma': 0.9845695884531472, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.024602697786317282, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:52:14,699] Trial 246 finished with value: 0.7212441694434413 and parameters: {'lr': 0.008581954186120054, 'wd': 0.0006990307863279896, 'warmup': 200, 'gamma': 0.9842134736275457, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.02334850965103815, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 246 finished with value: 0.7212441694434413 and parameters: {'lr': 0.008581954186120054, 'wd': 0.0006990307863279896, 'warmup': 200, 'gamma': 0.9842134736275457, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.02334850965103815, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:53:24,601] Trial 247 finished with value: 0.7632610179293429 and parameters: {'lr': 0.007473550089100562, 'wd': 0.001257984477232943, 'warmup': 200, 'gamma': 0.9852404496561545, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.029141368235364336, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 247 finished with value: 0.7632610179293429 and parameters: {'lr': 0.007473550089100562, 'wd': 0.001257984477232943, 'warmup': 200, 'gamma': 0.9852404496561545, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.029141368235364336, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:54:34,441] Trial 248 finished with value: 0.7541393138647142 and parameters: {'lr': 0.006059114902150108, 'wd': 0.0005794591841880689, 'warmup': 200, 'gamma': 0.9839540181632267, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.018622907096054374, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 248 finished with value: 0.7541393138647142 and parameters: {'lr': 0.006059114902150108, 'wd': 0.0005794591841880689, 'warmup': 200, 'gamma': 0.9839540181632267, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.018622907096054374, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:55:44,286] Trial 249 finished with value: 0.7532871492333222 and parameters: {'lr': 0.008785362370473494, 'wd': 0.00044112484806726137, 'warmup': 200, 'gamma': 0.9922335267208294, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.02296736079877953, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 249 finished with value: 0.7532871492333222 and parameters: {'lr': 0.008785362370473494, 'wd': 0.00044112484806726137, 'warmup': 200, 'gamma': 0.9922335267208294, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.02296736079877953, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:56:54,020] Trial 250 finished with value: 0.782142955178347 and parameters: {'lr': 0.0076831847464706605, 'wd': 0.0009193947810095906, 'warmup': 250, 'gamma': 0.9864241177882762, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.026565972986806855, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.


Trial 250 finished with value: 0.782142955178347 and parameters: {'lr': 0.0076831847464706605, 'wd': 0.0009193947810095906, 'warmup': 250, 'gamma': 0.9864241177882762, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.026565972986806855, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 19:58:03,924] Trial 251 finished with value: 0.7428278607741053 and parameters: {'lr': 0.006577656993146245, 'wd': 0.0010880122212312637, 'warmup': 200, 'gamma': 0.9848143477039452, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1730235040607912, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 251 finished with value: 0.7428278607741053 and parameters: {'lr': 0.006577656993146245, 'wd': 0.0010880122212312637, 'warmup': 200, 'gamma': 0.9848143477039452, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1730235040607912, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 19336064
torch.Size([180])


[I 2024-10-28 19:59:31,267] Trial 252 finished with value: 0.7328493648189538 and parameters: {'lr': 0.008851870676880206, 'wd': 0.0015832588419590716, 'warmup': 100, 'gamma': 0.9819516109207485, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 512, 'emb_dropout': 0.01404420957543463, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 252 finished with value: 0.7328493648189538 and parameters: {'lr': 0.008851870676880206, 'wd': 0.0015832588419590716, 'warmup': 100, 'gamma': 0.9819516109207485, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 512, 'emb_dropout': 0.01404420957543463, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 20:00:51,721] Trial 253 finished with value: 0.8097398555201728 and parameters: {'lr': 2.172315791636847e-05, 'wd': 0.0022346991179993535, 'warmup': 250, 'gamma': 0.9834656909463687, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16268271150098873, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.


Trial 253 finished with value: 0.8097398555201728 and parameters: {'lr': 2.172315791636847e-05, 'wd': 0.0022346991179993535, 'warmup': 250, 'gamma': 0.9834656909463687, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16268271150098873, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-28 20:01:54,485] Trial 254 finished with value: 0.6140316325196176 and parameters: {'lr': 0.0074870913502483654, 'wd': 6.387986751688621e-05, 'warmup': 50, 'gamma': 0.984067997089029, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.19457189869954108, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 254 finished with value: 0.6140316325196176 and parameters: {'lr': 0.0074870913502483654, 'wd': 6.387986751688621e-05, 'warmup': 50, 'gamma': 0.984067997089029, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.19457189869954108, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14615936
torch.Size([180])


[I 2024-10-28 20:03:06,743] Trial 255 finished with value: 0.6934995655082494 and parameters: {'lr': 0.005573697369974611, 'wd': 5.306224142824392e-05, 'warmup': 50, 'gamma': 0.9953996729837855, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.1962258176462406, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 255 finished with value: 0.6934995655082494 and parameters: {'lr': 0.005573697369974611, 'wd': 5.306224142824392e-05, 'warmup': 50, 'gamma': 0.9953996729837855, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.1962258176462406, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 14622048
torch.Size([180])


[I 2024-10-28 20:04:52,871] Trial 256 finished with value: 0.7599222748292411 and parameters: {'lr': 0.006943894499315441, 'wd': 5.932694550575331e-05, 'warmup': 50, 'gamma': 0.9856500220597174, 'time_dim': 16, 'patch_size': 16, 'depth': 6, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.18965112715415447, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.


Trial 256 finished with value: 0.7599222748292411 and parameters: {'lr': 0.006943894499315441, 'wd': 5.932694550575331e-05, 'warmup': 50, 'gamma': 0.9856500220597174, 'time_dim': 16, 'patch_size': 16, 'depth': 6, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.18965112715415447, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-28 20:05:55,567] Trial 257 finished with value: 0.6124011763854311 and parameters: {'lr': 0.0075847013385567956, 'wd': 7.55827063000479e-05, 'warmup': 50, 'gamma': 0.9842516337864127, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.1992226255818883, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 257 finished with value: 0.6124011763854311 and parameters: {'lr': 0.0075847013385567956, 'wd': 7.55827063000479e-05, 'warmup': 50, 'gamma': 0.9842516337864127, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.1992226255818883, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-28 20:06:58,129] Trial 258 finished with value: 0.7867255912038451 and parameters: {'lr': 0.00789637206621782, 'wd': 7.137201561555268e-05, 'warmup': 50, 'gamma': 0.9843310068507991, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.19809196231307669, 'schedule': 'linear'}. Best is trial 179 with value: 0.5534605159263463.


Trial 258 finished with value: 0.7867255912038451 and parameters: {'lr': 0.00789637206621782, 'wd': 7.137201561555268e-05, 'warmup': 50, 'gamma': 0.9843310068507991, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.19809196231307669, 'schedule': 'linear'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 9881472
torch.Size([180])


[I 2024-10-28 20:07:51,371] Trial 259 finished with value: 0.7516819233376314 and parameters: {'lr': 0.007157860639332498, 'wd': 6.28245278642083e-05, 'warmup': 50, 'gamma': 0.9848351850915262, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.10303385895378384, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 259 finished with value: 0.7516819233376314 and parameters: {'lr': 0.007157860639332498, 'wd': 6.28245278642083e-05, 'warmup': 50, 'gamma': 0.9848351850915262, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.10303385895378384, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 13231744
torch.Size([180])


[I 2024-10-28 20:08:58,554] Trial 260 finished with value: 0.7810041975305752 and parameters: {'lr': 0.00028860611793950415, 'wd': 4.602583015922444e-05, 'warmup': 50, 'gamma': 0.9843062437355258, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.1882885968814761, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 260 finished with value: 0.7810041975305752 and parameters: {'lr': 0.00028860611793950415, 'wd': 4.602583015922444e-05, 'warmup': 50, 'gamma': 0.9843062437355258, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.1882885968814761, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-28 20:10:01,105] Trial 261 finished with value: 0.7273771783260324 and parameters: {'lr': 0.00803996244480355, 'wd': 7.20385242313855e-05, 'warmup': 50, 'gamma': 0.9854168677636422, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.14998430554247766, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.


Trial 261 finished with value: 0.7273771783260324 and parameters: {'lr': 0.00803996244480355, 'wd': 7.20385242313855e-05, 'warmup': 50, 'gamma': 0.9854168677636422, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.14998430554247766, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 13231744
torch.Size([180])


[I 2024-10-28 20:11:08,433] Trial 262 finished with value: 0.5941828900112543 and parameters: {'lr': 0.006398312943092567, 'wd': 0.0002847976869041244, 'warmup': 50, 'gamma': 0.9838477525748168, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.011087196235100252, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 262 finished with value: 0.5941828900112543 and parameters: {'lr': 0.006398312943092567, 'wd': 0.0002847976869041244, 'warmup': 50, 'gamma': 0.9838477525748168, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.011087196235100252, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-28 20:12:11,128] Trial 263 finished with value: 0.749700753884081 and parameters: {'lr': 0.006454959962385422, 'wd': 0.00026259533045877646, 'warmup': 50, 'gamma': 0.9838200733049856, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.011700412071789495, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 263 finished with value: 0.749700753884081 and parameters: {'lr': 0.006454959962385422, 'wd': 0.00026259533045877646, 'warmup': 50, 'gamma': 0.9838200733049856, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.011700412071789495, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 13231744
torch.Size([180])


[I 2024-10-28 20:13:18,418] Trial 264 finished with value: 0.7095277803934471 and parameters: {'lr': 0.008859131822725958, 'wd': 0.000146718130822075, 'warmup': 50, 'gamma': 0.9827653046880932, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.011110060791790994, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 264 finished with value: 0.7095277803934471 and parameters: {'lr': 0.008859131822725958, 'wd': 0.000146718130822075, 'warmup': 50, 'gamma': 0.9827653046880932, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.011110060791790994, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-28 20:14:21,194] Trial 265 finished with value: 0.7155117989131451 and parameters: {'lr': 0.00763197431711424, 'wd': 0.0007700059621059454, 'warmup': 50, 'gamma': 0.9841408063089785, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.005960816688203306, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 265 finished with value: 0.7155117989131451 and parameters: {'lr': 0.00763197431711424, 'wd': 0.0007700059621059454, 'warmup': 50, 'gamma': 0.9841408063089785, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.005960816688203306, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 13296384
torch.Size([180])


[I 2024-10-28 20:15:28,652] Trial 266 finished with value: 0.724583814121887 and parameters: {'lr': 0.008978329237357214, 'wd': 0.00019464132931600075, 'warmup': 50, 'gamma': 0.9832274586537818, 'time_dim': 32, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.01513778628000554, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 266 finished with value: 0.724583814121887 and parameters: {'lr': 0.008978329237357214, 'wd': 0.00019464132931600075, 'warmup': 50, 'gamma': 0.9832274586537818, 'time_dim': 32, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.01513778628000554, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 10667904
torch.Size([180])


[I 2024-10-28 20:16:25,716] Trial 267 finished with value: 0.8082640424320379 and parameters: {'lr': 0.007036628212905552, 'wd': 9.508583787140321e-05, 'warmup': 50, 'gamma': 0.9849896380875091, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.16562576478188656, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 267 finished with value: 0.8082640424320379 and parameters: {'lr': 0.007036628212905552, 'wd': 9.508583787140321e-05, 'warmup': 50, 'gamma': 0.9849896380875091, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.16562576478188656, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-28 20:17:28,336] Trial 268 finished with value: 0.7634394012186687 and parameters: {'lr': 0.008029168331210907, 'wd': 0.0005548880325254323, 'warmup': 200, 'gamma': 0.9839689496164623, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.15453886188935645, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.


Trial 268 finished with value: 0.7634394012186687 and parameters: {'lr': 0.008029168331210907, 'wd': 0.0005548880325254323, 'warmup': 200, 'gamma': 0.9839689496164623, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.15453886188935645, 'schedule': 'constant'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 13231744
torch.Size([180])


[I 2024-10-28 20:18:35,521] Trial 269 finished with value: 0.7489866475900386 and parameters: {'lr': 6.196521412559815e-05, 'wd': 0.0004874813873553326, 'warmup': 50, 'gamma': 0.9846424445107782, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.018586627107653755, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.


Trial 269 finished with value: 0.7489866475900386 and parameters: {'lr': 6.196521412559815e-05, 'wd': 0.0004874813873553326, 'warmup': 50, 'gamma': 0.9846424445107782, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.018586627107653755, 'schedule': 'cosine_with_restarts'}. Best is trial 179 with value: 0.5534605159263463.
Number of parameters: 12285520
torch.Size([180])


[I 2024-10-28 20:21:14,487] Trial 270 finished with value: 0.5098186230222284 and parameters: {'lr': 0.0063362736082415464, 'wd': 0.0006768766015576597, 'warmup': 200, 'gamma': 0.9833274757009607, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.0022351922007032587, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 270 finished with value: 0.5098186230222284 and parameters: {'lr': 0.0063362736082415464, 'wd': 0.0006768766015576597, 'warmup': 200, 'gamma': 0.9833274757009607, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.0022351922007032587, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12510160
torch.Size([180])


[I 2024-10-28 20:23:53,469] Trial 271 finished with value: 0.7578972273042467 and parameters: {'lr': 0.0060018619338198876, 'wd': 0.0006153399365462584, 'warmup': 200, 'gamma': 0.9830377173535203, 'time_dim': 64, 'patch_size': 8, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.0018774385108569908, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 271 finished with value: 0.7578972273042467 and parameters: {'lr': 0.0060018619338198876, 'wd': 0.0006153399365462584, 'warmup': 200, 'gamma': 0.9830377173535203, 'time_dim': 64, 'patch_size': 8, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.0018774385108569908, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12285520
torch.Size([180])


[I 2024-10-28 20:26:32,131] Trial 272 finished with value: 0.7462488281516653 and parameters: {'lr': 0.006995120140489714, 'wd': 0.00010948835055232406, 'warmup': 200, 'gamma': 0.9824363945058318, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.0009376125149917289, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 272 finished with value: 0.7462488281516653 and parameters: {'lr': 0.006995120140489714, 'wd': 0.00010948835055232406, 'warmup': 200, 'gamma': 0.9824363945058318, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.0009376125149917289, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-28 20:27:34,946] Trial 273 finished with value: 0.7826988934285426 and parameters: {'lr': 0.0050617920475019215, 'wd': 4.122563454761651e-05, 'warmup': 200, 'gamma': 0.9856815417771099, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.006560891503382394, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 273 finished with value: 0.7826988934285426 and parameters: {'lr': 0.0050617920475019215, 'wd': 4.122563454761651e-05, 'warmup': 200, 'gamma': 0.9856815417771099, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.006560891503382394, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12285520
torch.Size([180])


[I 2024-10-28 20:30:13,757] Trial 274 finished with value: 0.7560984466604108 and parameters: {'lr': 0.009935544182317058, 'wd': 0.0007040215877322, 'warmup': 50, 'gamma': 0.9830590803832768, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.19974061447160255, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 274 finished with value: 0.7560984466604108 and parameters: {'lr': 0.009935544182317058, 'wd': 0.0007040215877322, 'warmup': 50, 'gamma': 0.9830590803832768, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.19974061447160255, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12285520
torch.Size([180])


[I 2024-10-28 20:32:51,708] Trial 275 finished with value: 0.7000369229116044 and parameters: {'lr': 0.006427081134504759, 'wd': 0.0003145790809176313, 'warmup': 200, 'gamma': 0.9844591957192974, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.19428502264332345, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 275 finished with value: 0.7000369229116044 and parameters: {'lr': 0.006427081134504759, 'wd': 0.0003145790809176313, 'warmup': 200, 'gamma': 0.9844591957192974, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.19428502264332345, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 15234640
torch.Size([180])


[I 2024-10-28 20:35:59,908] Trial 276 finished with value: 0.7152905523735298 and parameters: {'lr': 0.008006691490314606, 'wd': 0.0009949501318199537, 'warmup': 100, 'gamma': 0.986469264504685, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.008924281313092015, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 276 finished with value: 0.7152905523735298 and parameters: {'lr': 0.008006691490314606, 'wd': 0.0009949501318199537, 'warmup': 100, 'gamma': 0.986469264504685, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.008924281313092015, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13231744
torch.Size([180])


[I 2024-10-28 20:37:07,119] Trial 277 finished with value: 0.7909784438277943 and parameters: {'lr': 0.008896929359019989, 'wd': 0.0005188133580296164, 'warmup': 150, 'gamma': 0.9834860332291488, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.00402044610147126, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 277 finished with value: 0.7909784438277943 and parameters: {'lr': 0.008896929359019989, 'wd': 0.0005188133580296164, 'warmup': 150, 'gamma': 0.9834860332291488, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.00402044610147126, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 20:38:16,971] Trial 278 finished with value: 0.6442344085373128 and parameters: {'lr': 0.007522961496873062, 'wd': 0.00037204222889896597, 'warmup': 200, 'gamma': 0.9849451293137345, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15949799127297, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 278 finished with value: 0.6442344085373128 and parameters: {'lr': 0.007522961496873062, 'wd': 0.00037204222889896597, 'warmup': 200, 'gamma': 0.9849451293137345, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15949799127297, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-28 20:39:19,715] Trial 279 finished with value: 0.7166360222080158 and parameters: {'lr': 0.006089169131692068, 'wd': 0.00037827840888819724, 'warmup': 200, 'gamma': 0.9872965822582517, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.16035943387417004, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 279 finished with value: 0.7166360222080158 and parameters: {'lr': 0.006089169131692068, 'wd': 0.00037827840888819724, 'warmup': 200, 'gamma': 0.9872965822582517, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.16035943387417004, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14602112
torch.Size([180])


[I 2024-10-28 20:40:26,611] Trial 280 finished with value: 0.7629057706424417 and parameters: {'lr': 0.0070288930784106465, 'wd': 8.098536227169964e-05, 'warmup': 200, 'gamma': 0.9825276660987842, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 6, 'mlp_dim': 768, 'emb_dropout': 0.15818217597409348, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 280 finished with value: 0.7629057706424417 and parameters: {'lr': 0.0070288930784106465, 'wd': 8.098536227169964e-05, 'warmup': 200, 'gamma': 0.9825276660987842, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 6, 'mlp_dim': 768, 'emb_dropout': 0.15818217597409348, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 15203936
torch.Size([180])


[I 2024-10-28 20:42:14,070] Trial 281 finished with value: 0.7395042505786954 and parameters: {'lr': 0.005527898908859592, 'wd': 0.0002912482685746241, 'warmup': 200, 'gamma': 0.9839994118778269, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.17204972598944246, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 281 finished with value: 0.7395042505786954 and parameters: {'lr': 0.005527898908859592, 'wd': 0.0002912482685746241, 'warmup': 200, 'gamma': 0.9839994118778269, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.17204972598944246, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14251600
torch.Size([180])


[I 2024-10-28 20:45:10,414] Trial 282 finished with value: 0.6615146953922156 and parameters: {'lr': 0.009919127686282938, 'wd': 0.00023272127236980954, 'warmup': 200, 'gamma': 0.9833975119200097, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1528788946023406, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 282 finished with value: 0.6615146953922156 and parameters: {'lr': 0.009919127686282938, 'wd': 0.00023272127236980954, 'warmup': 200, 'gamma': 0.9833975119200097, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1528788946023406, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14251600
torch.Size([180])


[I 2024-10-28 20:48:06,054] Trial 283 finished with value: 0.6441169370084318 and parameters: {'lr': 0.00995268766810678, 'wd': 0.00024270453742130662, 'warmup': 200, 'gamma': 0.9833066327589252, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15177750099499696, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 283 finished with value: 0.6441169370084318 and parameters: {'lr': 0.00995268766810678, 'wd': 0.00024270453742130662, 'warmup': 200, 'gamma': 0.9833066327589252, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15177750099499696, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14251600
torch.Size([180])


[I 2024-10-28 20:51:02,036] Trial 284 finished with value: 0.7463862047605737 and parameters: {'lr': 0.009974851096231821, 'wd': 0.00027470624427110334, 'warmup': 200, 'gamma': 0.9832640523027997, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.14628836263491643, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 284 finished with value: 0.7463862047605737 and parameters: {'lr': 0.009974851096231821, 'wd': 0.00027470624427110334, 'warmup': 200, 'gamma': 0.9832640523027997, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.14628836263491643, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14251600
torch.Size([180])


[I 2024-10-28 20:53:57,945] Trial 285 finished with value: 0.6289981880543498 and parameters: {'lr': 0.009972336110119134, 'wd': 0.00022615321021365813, 'warmup': 200, 'gamma': 0.982450903028064, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1503923823136747, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 285 finished with value: 0.6289981880543498 and parameters: {'lr': 0.009972336110119134, 'wd': 0.00022615321021365813, 'warmup': 200, 'gamma': 0.982450903028064, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1503923823136747, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14251600
torch.Size([180])


[I 2024-10-28 20:56:55,045] Trial 286 finished with value: 0.7642617017399488 and parameters: {'lr': 0.007910120318031125, 'wd': 0.00021105345244700434, 'warmup': 200, 'gamma': 0.9823941359599107, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15152516160056195, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 286 finished with value: 0.7642617017399488 and parameters: {'lr': 0.007910120318031125, 'wd': 0.00021105345244700434, 'warmup': 200, 'gamma': 0.9823941359599107, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15152516160056195, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14251600
torch.Size([180])


[I 2024-10-28 20:59:51,334] Trial 287 finished with value: 0.7210605679829183 and parameters: {'lr': 0.008828151148742625, 'wd': 0.00034191021641467867, 'warmup': 200, 'gamma': 0.9811401985380417, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1428012919013084, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 287 finished with value: 0.7210605679829183 and parameters: {'lr': 0.008828151148742625, 'wd': 0.00034191021641467867, 'warmup': 200, 'gamma': 0.9811401985380417, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1428012919013084, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14251600
torch.Size([180])


[I 2024-10-28 21:02:47,458] Trial 288 finished with value: 0.7158895112263177 and parameters: {'lr': 0.009999015906555775, 'wd': 0.00023886735739464806, 'warmup': 200, 'gamma': 0.9821730313849808, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15514646068829485, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 288 finished with value: 0.7158895112263177 and parameters: {'lr': 0.009999015906555775, 'wd': 0.00023886735739464806, 'warmup': 200, 'gamma': 0.9821730313849808, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15514646068829485, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13268560
torch.Size([180])


[I 2024-10-28 21:05:35,757] Trial 289 finished with value: 0.6642989605949464 and parameters: {'lr': 0.007889199001569706, 'wd': 0.0002579888609737429, 'warmup': 200, 'gamma': 0.9828193074024425, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.1514045764631464, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 289 finished with value: 0.6642989605949464 and parameters: {'lr': 0.007889199001569706, 'wd': 0.0002579888609737429, 'warmup': 200, 'gamma': 0.9828193074024425, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.1514045764631464, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13268560
torch.Size([180])


[I 2024-10-28 21:08:24,103] Trial 290 finished with value: 0.716211638808742 and parameters: {'lr': 0.007579867589051139, 'wd': 0.00016444492077532637, 'warmup': 200, 'gamma': 0.9828614944012138, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.1486863340177243, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 290 finished with value: 0.716211638808742 and parameters: {'lr': 0.007579867589051139, 'wd': 0.00016444492077532637, 'warmup': 200, 'gamma': 0.9828614944012138, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.1486863340177243, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13333200
torch.Size([180])


[I 2024-10-28 21:11:12,404] Trial 291 finished with value: 0.7079100261840874 and parameters: {'lr': 0.008369186205170773, 'wd': 0.0002562785405781771, 'warmup': 200, 'gamma': 0.9818208415781908, 'time_dim': 32, 'patch_size': 8, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.14617062778654302, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 291 finished with value: 0.7079100261840874 and parameters: {'lr': 0.008369186205170773, 'wd': 0.0002562785405781771, 'warmup': 200, 'gamma': 0.9818208415781908, 'time_dim': 32, 'patch_size': 8, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.14617062778654302, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13268560
torch.Size([180])


[I 2024-10-28 21:14:01,105] Trial 292 finished with value: 0.7393679289509145 and parameters: {'lr': 0.0072031305304409805, 'wd': 0.00022848300605330605, 'warmup': 200, 'gamma': 0.9804763200696475, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.1540179424259321, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 292 finished with value: 0.7393679289509145 and parameters: {'lr': 0.0072031305304409805, 'wd': 0.00022848300605330605, 'warmup': 200, 'gamma': 0.9804763200696475, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.1540179424259321, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13268560
torch.Size([180])


[I 2024-10-28 21:16:50,103] Trial 293 finished with value: 0.7096200047339245 and parameters: {'lr': 0.008639838821750925, 'wd': 0.00022061664954825479, 'warmup': 200, 'gamma': 0.9825909412597849, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.1513094789574311, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 293 finished with value: 0.7096200047339245 and parameters: {'lr': 0.008639838821750925, 'wd': 0.00022061664954825479, 'warmup': 200, 'gamma': 0.9825909412597849, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.1513094789574311, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13268560
torch.Size([180])


[I 2024-10-28 21:19:38,550] Trial 294 finished with value: 0.7767475963275046 and parameters: {'lr': 0.0066158953285663895, 'wd': 0.0003015116870364663, 'warmup': 200, 'gamma': 0.9815029632300091, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.1787644296394716, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 294 finished with value: 0.7767475963275046 and parameters: {'lr': 0.0066158953285663895, 'wd': 0.0003015116870364663, 'warmup': 200, 'gamma': 0.9815029632300091, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.1787644296394716, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14476240
torch.Size([180])


[I 2024-10-28 21:22:34,922] Trial 295 finished with value: 0.7205170549217397 and parameters: {'lr': 0.007810969511395672, 'wd': 0.00033049158549581235, 'warmup': 200, 'gamma': 0.9829659211982424, 'time_dim': 64, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16449308801355458, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 295 finished with value: 0.7205170549217397 and parameters: {'lr': 0.007810969511395672, 'wd': 0.00033049158549581235, 'warmup': 200, 'gamma': 0.9829659211982424, 'time_dim': 64, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16449308801355458, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14251600
torch.Size([180])


[I 2024-10-28 21:25:31,337] Trial 296 finished with value: 0.6802171628675577 and parameters: {'lr': 0.008905821917307355, 'wd': 0.0001872343017947523, 'warmup': 200, 'gamma': 0.983679500447203, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15927986005674843, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 296 finished with value: 0.6802171628675577 and parameters: {'lr': 0.008905821917307355, 'wd': 0.0001872343017947523, 'warmup': 200, 'gamma': 0.983679500447203, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15927986005674843, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14251600
torch.Size([180])


[I 2024-10-28 21:28:28,517] Trial 297 finished with value: 0.6999290135443568 and parameters: {'lr': 0.0073828082439581074, 'wd': 0.00025174106505785617, 'warmup': 50, 'gamma': 0.9822598876436487, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15072401331192115, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 297 finished with value: 0.6999290135443568 and parameters: {'lr': 0.0073828082439581074, 'wd': 0.00025174106505785617, 'warmup': 50, 'gamma': 0.9822598876436487, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15072401331192115, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12285520
torch.Size([180])


[I 2024-10-28 21:31:06,392] Trial 298 finished with value: 0.6936488165454577 and parameters: {'lr': 0.009980901349402818, 'wd': 0.00039952442151062757, 'warmup': 200, 'gamma': 0.983597684801103, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.15510399304689676, 'schedule': 'linear'}. Best is trial 270 with value: 0.5098186230222284.


Trial 298 finished with value: 0.6936488165454577 and parameters: {'lr': 0.009980901349402818, 'wd': 0.00039952442151062757, 'warmup': 200, 'gamma': 0.983597684801103, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.15510399304689676, 'schedule': 'linear'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 11491152
torch.Size([180])


[I 2024-10-28 21:33:32,394] Trial 299 finished with value: 0.7577966693618322 and parameters: {'lr': 0.00645586832640442, 'wd': 0.00023206850262969564, 'warmup': 200, 'gamma': 0.9829115427569108, 'time_dim': 16, 'patch_size': 8, 'depth': 4, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.14729483549687422, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 299 finished with value: 0.7577966693618322 and parameters: {'lr': 0.00645586832640442, 'wd': 0.00023206850262969564, 'warmup': 200, 'gamma': 0.9829115427569108, 'time_dim': 16, 'patch_size': 8, 'depth': 4, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.14729483549687422, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13268560
torch.Size([180])


[I 2024-10-28 21:36:20,788] Trial 300 finished with value: 0.7414626313454238 and parameters: {'lr': 0.00807657962836686, 'wd': 0.0003161214491497704, 'warmup': 200, 'gamma': 0.983337391358916, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.18679019534012253, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 300 finished with value: 0.7414626313454238 and parameters: {'lr': 0.00807657962836686, 'wd': 0.0003161214491497704, 'warmup': 200, 'gamma': 0.983337391358916, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.18679019534012253, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 18186320
torch.Size([180])


[I 2024-10-28 21:39:44,373] Trial 301 finished with value: 0.772641211080716 and parameters: {'lr': 0.009963336483188766, 'wd': 0.00019751551214582553, 'warmup': 200, 'gamma': 0.9819442273084175, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 768, 'emb_dropout': 0.19320557240047792, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 301 finished with value: 0.772641211080716 and parameters: {'lr': 0.009963336483188766, 'wd': 0.00019751551214582553, 'warmup': 200, 'gamma': 0.9819442273084175, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 768, 'emb_dropout': 0.19320557240047792, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14251600
torch.Size([180])


[I 2024-10-28 21:42:40,368] Trial 302 finished with value: 0.6314113683470319 and parameters: {'lr': 0.008631168989155426, 'wd': 0.0004379147510139244, 'warmup': 50, 'gamma': 0.9861740608491965, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1603071465743876, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 302 finished with value: 0.6314113683470319 and parameters: {'lr': 0.008631168989155426, 'wd': 0.0004379147510139244, 'warmup': 50, 'gamma': 0.9861740608491965, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1603071465743876, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14251600
torch.Size([180])


[I 2024-10-28 21:45:36,647] Trial 303 finished with value: 0.739883559929015 and parameters: {'lr': 0.007179946438115345, 'wd': 0.00034933995610367697, 'warmup': 50, 'gamma': 0.9867690674808375, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15941963872319936, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 303 finished with value: 0.739883559929015 and parameters: {'lr': 0.007179946438115345, 'wd': 0.00034933995610367697, 'warmup': 50, 'gamma': 0.9867690674808375, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15941963872319936, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14251600
torch.Size([180])


[I 2024-10-28 21:48:32,588] Trial 304 finished with value: 0.70258892526642 and parameters: {'lr': 0.008695258758563404, 'wd': 0.00043332142889783085, 'warmup': 50, 'gamma': 0.9852104238763456, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16164182992361129, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 304 finished with value: 0.70258892526642 and parameters: {'lr': 0.008695258758563404, 'wd': 0.00043332142889783085, 'warmup': 50, 'gamma': 0.9852104238763456, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16164182992361129, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14251600
torch.Size([180])


[I 2024-10-28 21:51:29,594] Trial 305 finished with value: 0.7715382515865292 and parameters: {'lr': 0.0061859359851521665, 'wd': 0.0002732589685094777, 'warmup': 50, 'gamma': 0.9860466125431487, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1677666478896457, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 305 finished with value: 0.7715382515865292 and parameters: {'lr': 0.0061859359851521665, 'wd': 0.0002732589685094777, 'warmup': 50, 'gamma': 0.9860466125431487, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1677666478896457, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13268560
torch.Size([180])


[I 2024-10-28 21:54:17,802] Trial 306 finished with value: 0.6982301906025016 and parameters: {'lr': 0.007960268683761509, 'wd': 0.00045244643639794855, 'warmup': 50, 'gamma': 0.9877973735185644, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.14350640400125012, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 306 finished with value: 0.6982301906025016 and parameters: {'lr': 0.007960268683761509, 'wd': 0.00045244643639794855, 'warmup': 50, 'gamma': 0.9877973735185644, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.14350640400125012, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 11491152
torch.Size([180])


[I 2024-10-28 21:56:43,035] Trial 307 finished with value: 0.7430006100308052 and parameters: {'lr': 0.006858573938331686, 'wd': 0.00034618222343191246, 'warmup': 50, 'gamma': 0.9858355411590621, 'time_dim': 16, 'patch_size': 8, 'depth': 4, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1604156356949073, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 307 finished with value: 0.7430006100308052 and parameters: {'lr': 0.006858573938331686, 'wd': 0.00034618222343191246, 'warmup': 50, 'gamma': 0.9858355411590621, 'time_dim': 16, 'patch_size': 8, 'depth': 4, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1604156356949073, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12285520
torch.Size([180])


[I 2024-10-28 21:59:21,233] Trial 308 finished with value: 0.6770399710997065 and parameters: {'lr': 0.008936050012642083, 'wd': 0.00039088806784996923, 'warmup': 50, 'gamma': 0.9853202738939402, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.15443392543876808, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 308 finished with value: 0.6770399710997065 and parameters: {'lr': 0.008936050012642083, 'wd': 0.00039088806784996923, 'warmup': 50, 'gamma': 0.9853202738939402, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.15443392543876808, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14251600
torch.Size([180])


[I 2024-10-28 22:02:17,473] Trial 309 finished with value: 0.7599080058328188 and parameters: {'lr': 0.009988662062978193, 'wd': 0.0001708598330325526, 'warmup': 250, 'gamma': 0.9863640522122145, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15117206898795169, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 309 finished with value: 0.7599080058328188 and parameters: {'lr': 0.009988662062978193, 'wd': 0.0001708598330325526, 'warmup': 250, 'gamma': 0.9863640522122145, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15117206898795169, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14251600
torch.Size([180])


[I 2024-10-28 22:05:14,094] Trial 310 finished with value: 0.7709707791723156 and parameters: {'lr': 0.0077646381188458576, 'wd': 0.0002868607195715806, 'warmup': 250, 'gamma': 0.9840689637033528, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.06404111928722718, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 310 finished with value: 0.7709707791723156 and parameters: {'lr': 0.0077646381188458576, 'wd': 0.0002868607195715806, 'warmup': 250, 'gamma': 0.9840689637033528, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.06404111928722718, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13231744
torch.Size([180])


[I 2024-10-28 22:06:21,492] Trial 311 finished with value: 0.7296028285163658 and parameters: {'lr': 0.005793372575381053, 'wd': 0.0004698753684139476, 'warmup': 150, 'gamma': 0.9729695630283259, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.15611137564496017, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 311 finished with value: 0.7296028285163658 and parameters: {'lr': 0.005793372575381053, 'wd': 0.0004698753684139476, 'warmup': 150, 'gamma': 0.9729695630283259, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.15611137564496017, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14251600
torch.Size([180])


[I 2024-10-28 22:09:18,166] Trial 312 finished with value: 0.7946416501697795 and parameters: {'lr': 0.0067308112014604115, 'wd': 0.0036553105627679995, 'warmup': 50, 'gamma': 0.9871132627743401, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1847959737259278, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 312 finished with value: 0.7946416501697795 and parameters: {'lr': 0.0067308112014604115, 'wd': 0.0036553105627679995, 'warmup': 50, 'gamma': 0.9871132627743401, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1847959737259278, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14279424
torch.Size([180])


[I 2024-10-28 22:10:28,088] Trial 313 finished with value: 0.7235975348970143 and parameters: {'lr': 0.008761048700728398, 'wd': 0.0003962394087170018, 'warmup': 250, 'gamma': 0.9850203142817185, 'time_dim': 32, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16511830263036734, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 313 finished with value: 0.7235975348970143 and parameters: {'lr': 0.008761048700728398, 'wd': 0.0003962394087170018, 'warmup': 250, 'gamma': 0.9850203142817185, 'time_dim': 32, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16511830263036734, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-28 22:11:30,841] Trial 314 finished with value: 0.7535781955238746 and parameters: {'lr': 0.007575618820853003, 'wd': 0.0002258792981171815, 'warmup': 200, 'gamma': 0.983850773526731, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.020324693387239197, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 314 finished with value: 0.7535781955238746 and parameters: {'lr': 0.007575618820853003, 'wd': 0.0002258792981171815, 'warmup': 200, 'gamma': 0.983850773526731, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.020324693387239197, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 22:12:40,862] Trial 315 finished with value: 0.7451405753888487 and parameters: {'lr': 0.008737851297178339, 'wd': 0.0005703823092280082, 'warmup': 200, 'gamma': 0.9825561488653074, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1917945887620417, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 315 finished with value: 0.7451405753888487 and parameters: {'lr': 0.008737851297178339, 'wd': 0.0005703823092280082, 'warmup': 200, 'gamma': 0.9825561488653074, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1917945887620417, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13237856
torch.Size([180])


[I 2024-10-28 22:14:17,703] Trial 316 finished with value: 0.6765796136118712 and parameters: {'lr': 0.007267871587082805, 'wd': 0.001520299734611569, 'warmup': 50, 'gamma': 0.9846270835419704, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.016102533415205088, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 316 finished with value: 0.6765796136118712 and parameters: {'lr': 0.007267871587082805, 'wd': 0.001520299734611569, 'warmup': 50, 'gamma': 0.9846270835419704, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.016102533415205088, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 22:15:27,621] Trial 317 finished with value: 0.7342093048813425 and parameters: {'lr': 0.005443903326226551, 'wd': 0.004859899581797002, 'warmup': 250, 'gamma': 0.9830937607767717, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1509899774140597, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 317 finished with value: 0.7342093048813425 and parameters: {'lr': 0.005443903326226551, 'wd': 0.004859899581797002, 'warmup': 250, 'gamma': 0.9830937607767717, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1509899774140597, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14476240
torch.Size([180])


[I 2024-10-28 22:18:25,042] Trial 318 finished with value: 0.7598790441171454 and parameters: {'lr': 0.007916290449542038, 'wd': 0.00028014427211435223, 'warmup': 200, 'gamma': 0.9856355519153266, 'time_dim': 64, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15698082790985599, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 318 finished with value: 0.7598790441171454 and parameters: {'lr': 0.007916290449542038, 'wd': 0.00028014427211435223, 'warmup': 200, 'gamma': 0.9856355519153266, 'time_dim': 64, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15698082790985599, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13028224
torch.Size([180])


[I 2024-10-28 22:19:29,097] Trial 319 finished with value: 0.7794268371521444 and parameters: {'lr': 0.006264320524053333, 'wd': 0.00035594017206496396, 'warmup': 200, 'gamma': 0.9882064363557447, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 6, 'mlp_dim': 512, 'emb_dropout': 0.1381706537658482, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 319 finished with value: 0.7794268371521444 and parameters: {'lr': 0.006264320524053333, 'wd': 0.00035594017206496396, 'warmup': 200, 'gamma': 0.9882064363557447, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 6, 'mlp_dim': 512, 'emb_dropout': 0.1381706537658482, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 22:20:38,968] Trial 320 finished with value: 0.726361006971591 and parameters: {'lr': 0.009964838023093368, 'wd': 0.0029075954572584627, 'warmup': 250, 'gamma': 0.9842100030225432, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.14884896728808963, 'schedule': 'linear'}. Best is trial 270 with value: 0.5098186230222284.


Trial 320 finished with value: 0.726361006971591 and parameters: {'lr': 0.009964838023093368, 'wd': 0.0029075954572584627, 'warmup': 250, 'gamma': 0.9842100030225432, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.14884896728808963, 'schedule': 'linear'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-28 22:21:41,691] Trial 321 finished with value: 0.7061962778648825 and parameters: {'lr': 0.008866530914538361, 'wd': 0.00012053812602930252, 'warmup': 200, 'gamma': 0.9834776961877589, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.19619617723306434, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 321 finished with value: 0.7061962778648825 and parameters: {'lr': 0.008866530914538361, 'wd': 0.00012053812602930252, 'warmup': 200, 'gamma': 0.9834776961877589, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.19619617723306434, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14251600
torch.Size([180])


[I 2024-10-28 22:24:38,431] Trial 322 finished with value: 0.6732245178619468 and parameters: {'lr': 0.006878378701725656, 'wd': 0.00043034836116037075, 'warmup': 50, 'gamma': 0.986518668336333, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.03294350854463213, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 322 finished with value: 0.6732245178619468 and parameters: {'lr': 0.006878378701725656, 'wd': 0.00043034836116037075, 'warmup': 50, 'gamma': 0.986518668336333, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.03294350854463213, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 22:25:48,356] Trial 323 finished with value: 0.701999243829573 and parameters: {'lr': 0.007998869180192833, 'wd': 0.0005796036525022696, 'warmup': 200, 'gamma': 0.9825802860621488, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16331100819504046, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 323 finished with value: 0.701999243829573 and parameters: {'lr': 0.007998869180192833, 'wd': 0.0005796036525022696, 'warmup': 200, 'gamma': 0.9825802860621488, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16331100819504046, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 18149504
torch.Size([180])


[I 2024-10-28 22:27:07,607] Trial 324 finished with value: 0.7264990356136206 and parameters: {'lr': 0.008768437671659787, 'wd': 0.0004991762151916688, 'warmup': 250, 'gamma': 0.9846921395789253, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 768, 'emb_dropout': 0.025407333671645455, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 324 finished with value: 0.7264990356136206 and parameters: {'lr': 0.008768437671659787, 'wd': 0.0004991762151916688, 'warmup': 250, 'gamma': 0.9846921395789253, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 768, 'emb_dropout': 0.025407333671645455, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-28 22:28:10,603] Trial 325 finished with value: 0.6239530693890663 and parameters: {'lr': 0.00630409562687906, 'wd': 0.0006386239054790138, 'warmup': 200, 'gamma': 0.9837655688090047, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.17064335324154228, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 325 finished with value: 0.6239530693890663 and parameters: {'lr': 0.00630409562687906, 'wd': 0.0006386239054790138, 'warmup': 200, 'gamma': 0.9837655688090047, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.17064335324154228, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-28 22:29:13,460] Trial 326 finished with value: 0.7713232962924809 and parameters: {'lr': 0.005055339430307046, 'wd': 0.0018455675096460135, 'warmup': 50, 'gamma': 0.9839249399100256, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.176044037895158, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 326 finished with value: 0.7713232962924809 and parameters: {'lr': 0.005055339430307046, 'wd': 0.0018455675096460135, 'warmup': 50, 'gamma': 0.9839249399100256, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.176044037895158, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-28 22:30:16,174] Trial 327 finished with value: 0.7379947305985273 and parameters: {'lr': 0.005931586929341237, 'wd': 0.0006939826278362357, 'warmup': 250, 'gamma': 0.984913594137455, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.16699180369971045, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 327 finished with value: 0.7379947305985273 and parameters: {'lr': 0.005931586929341237, 'wd': 0.0006939826278362357, 'warmup': 250, 'gamma': 0.984913594137455, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.16699180369971045, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-28 22:31:18,941] Trial 328 finished with value: 0.7766812525556088 and parameters: {'lr': 0.006666021232652432, 'wd': 0.000650994167146535, 'warmup': 200, 'gamma': 0.9835664214480369, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.1700253580335842, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 328 finished with value: 0.7766812525556088 and parameters: {'lr': 0.006666021232652432, 'wd': 0.000650994167146535, 'warmup': 200, 'gamma': 0.9835664214480369, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.1700253580335842, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 9881472
torch.Size([180])


[I 2024-10-28 22:32:12,292] Trial 329 finished with value: 0.7162283095575928 and parameters: {'lr': 0.005797145600381776, 'wd': 0.0013701055347433552, 'warmup': 200, 'gamma': 0.9858114448517378, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.16916639101685646, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 329 finished with value: 0.7162283095575928 and parameters: {'lr': 0.005797145600381776, 'wd': 0.0013701055347433552, 'warmup': 200, 'gamma': 0.9858114448517378, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.16916639101685646, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-28 22:33:14,912] Trial 330 finished with value: 0.800289983803693 and parameters: {'lr': 0.0071246088247836365, 'wd': 0.0005184750015576896, 'warmup': 250, 'gamma': 0.9846074794477744, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.18149151085774745, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 330 finished with value: 0.800289983803693 and parameters: {'lr': 0.0071246088247836365, 'wd': 0.0005184750015576896, 'warmup': 250, 'gamma': 0.9846074794477744, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.18149151085774745, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14615936
torch.Size([180])


[I 2024-10-28 22:34:26,881] Trial 331 finished with value: 0.756382516062665 and parameters: {'lr': 0.009924375319974102, 'wd': 0.0025263650644348316, 'warmup': 200, 'gamma': 0.9840948203557889, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.16059766021574326, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 331 finished with value: 0.756382516062665 and parameters: {'lr': 0.009924375319974102, 'wd': 0.0025263650644348316, 'warmup': 200, 'gamma': 0.9840948203557889, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.16059766021574326, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16182144
torch.Size([180])


[I 2024-10-28 22:35:42,626] Trial 332 finished with value: 0.880069515122106 and parameters: {'lr': 0.008771288506730436, 'wd': 0.0006326477068552312, 'warmup': 150, 'gamma': 0.9853107090408111, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 512, 'emb_dropout': 0.19986444852916901, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 332 finished with value: 0.880069515122106 and parameters: {'lr': 0.008771288506730436, 'wd': 0.0006326477068552312, 'warmup': 150, 'gamma': 0.9853107090408111, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 512, 'emb_dropout': 0.19986444852916901, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 22:36:52,659] Trial 333 finished with value: 0.720649562963336 and parameters: {'lr': 0.0073103377880521314, 'wd': 0.0005505580082377227, 'warmup': 50, 'gamma': 0.9819867458220444, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.17719570961297249, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 333 finished with value: 0.720649562963336 and parameters: {'lr': 0.0073103377880521314, 'wd': 0.0005505580082377227, 'warmup': 50, 'gamma': 0.9819867458220444, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.17719570961297249, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 22:38:02,944] Trial 334 finished with value: 0.7370023275122689 and parameters: {'lr': 0.0065200414628142425, 'wd': 0.0008451209588913517, 'warmup': 200, 'gamma': 0.9831572857649921, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.011517694180317156, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 334 finished with value: 0.7370023275122689 and parameters: {'lr': 0.0065200414628142425, 'wd': 0.0008451209588913517, 'warmup': 200, 'gamma': 0.9831572857649921, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.011517694180317156, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 17039872
torch.Size([180])


[I 2024-10-28 22:39:23,731] Trial 335 finished with value: 0.7375774058983319 and parameters: {'lr': 0.008178148940087443, 'wd': 3.673937190135253e-05, 'warmup': 250, 'gamma': 0.9837224578833633, 'time_dim': 32, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.019724336987880543, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 335 finished with value: 0.7375774058983319 and parameters: {'lr': 0.008178148940087443, 'wd': 3.673937190135253e-05, 'warmup': 250, 'gamma': 0.9837224578833633, 'time_dim': 32, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.019724336987880543, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 22:40:33,776] Trial 336 finished with value: 0.7430983836106471 and parameters: {'lr': 0.0052333443209999116, 'wd': 0.00045337919274958887, 'warmup': 200, 'gamma': 0.9842117698686488, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.17384799500263462, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 336 finished with value: 0.7430983836106471 and parameters: {'lr': 0.0052333443209999116, 'wd': 0.00045337919274958887, 'warmup': 200, 'gamma': 0.9842117698686488, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.17384799500263462, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 22:41:54,515] Trial 337 finished with value: 0.6855522989765263 and parameters: {'lr': 0.00896405292663985, 'wd': 0.0016128423393616188, 'warmup': 200, 'gamma': 0.9832338891985929, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1919427089374172, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 337 finished with value: 0.6855522989765263 and parameters: {'lr': 0.00896405292663985, 'wd': 0.0016128423393616188, 'warmup': 200, 'gamma': 0.9832338891985929, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1919427089374172, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-28 22:42:57,201] Trial 338 finished with value: 0.7674026221229183 and parameters: {'lr': 0.007661912375740354, 'wd': 0.0012933610528021338, 'warmup': 50, 'gamma': 0.981465852576339, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.028687981094421076, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 338 finished with value: 0.7674026221229183 and parameters: {'lr': 0.007661912375740354, 'wd': 0.0012933610528021338, 'warmup': 50, 'gamma': 0.981465852576339, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.028687981094421076, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 22:44:07,154] Trial 339 finished with value: 0.6833080403479027 and parameters: {'lr': 0.009949953180211355, 'wd': 0.0007111562848315792, 'warmup': 250, 'gamma': 0.982235819128655, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1854692536399062, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 339 finished with value: 0.6833080403479027 and parameters: {'lr': 0.009949953180211355, 'wd': 0.0007111562848315792, 'warmup': 250, 'gamma': 0.982235819128655, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1854692536399062, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 22:45:27,904] Trial 340 finished with value: 0.7011633505258809 and parameters: {'lr': 0.006219641092011077, 'wd': 0.0020575892677466836, 'warmup': 200, 'gamma': 0.9860334384786502, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.02240082211550737, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 340 finished with value: 0.7011633505258809 and parameters: {'lr': 0.006219641092011077, 'wd': 0.0020575892677466836, 'warmup': 200, 'gamma': 0.9860334384786502, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.02240082211550737, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14439424
torch.Size([180])


[I 2024-10-28 22:46:38,014] Trial 341 finished with value: 0.7979625121756635 and parameters: {'lr': 0.00792996711073704, 'wd': 0.0003063704403150961, 'warmup': 200, 'gamma': 0.9852151510326991, 'time_dim': 64, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.034060963029887655, 'schedule': 'linear'}. Best is trial 270 with value: 0.5098186230222284.


Trial 341 finished with value: 0.7979625121756635 and parameters: {'lr': 0.00792996711073704, 'wd': 0.0003063704403150961, 'warmup': 200, 'gamma': 0.9852151510326991, 'time_dim': 64, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.034060963029887655, 'schedule': 'linear'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-28 22:47:40,801] Trial 342 finished with value: 0.6812167519552734 and parameters: {'lr': 0.008930563372062932, 'wd': 0.0032696461605732758, 'warmup': 250, 'gamma': 0.9844080071858446, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.16303493771706673, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 342 finished with value: 0.6812167519552734 and parameters: {'lr': 0.008930563372062932, 'wd': 0.0032696461605732758, 'warmup': 250, 'gamma': 0.9844080071858446, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.16303493771706673, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16981344
torch.Size([180])


[I 2024-10-28 22:49:38,578] Trial 343 finished with value: 0.7675279524316664 and parameters: {'lr': 0.006945979514372058, 'wd': 0.0003723086388691504, 'warmup': 200, 'gamma': 0.9828500288564177, 'time_dim': 16, 'patch_size': 16, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15587673251369008, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 343 finished with value: 0.7675279524316664 and parameters: {'lr': 0.006945979514372058, 'wd': 0.0003723086388691504, 'warmup': 200, 'gamma': 0.9828500288564177, 'time_dim': 16, 'patch_size': 16, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15587673251369008, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13028224
torch.Size([180])


[I 2024-10-28 22:50:42,611] Trial 344 finished with value: 0.7793780809973788 and parameters: {'lr': 0.007938986423451292, 'wd': 0.0005868692959106476, 'warmup': 50, 'gamma': 0.9834557465862467, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 6, 'mlp_dim': 512, 'emb_dropout': 0.016034996585733985, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 344 finished with value: 0.7793780809973788 and parameters: {'lr': 0.007938986423451292, 'wd': 0.0005868692959106476, 'warmup': 50, 'gamma': 0.9834557465862467, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 6, 'mlp_dim': 512, 'emb_dropout': 0.016034996585733985, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-28 22:51:45,376] Trial 345 finished with value: 0.7261068651645027 and parameters: {'lr': 0.005934727665267738, 'wd': 0.0014726845428097836, 'warmup': 200, 'gamma': 0.9869970549813479, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.19569361689716328, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 345 finished with value: 0.7261068651645027 and parameters: {'lr': 0.005934727665267738, 'wd': 0.0014726845428097836, 'warmup': 200, 'gamma': 0.9869970549813479, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.19569361689716328, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 22:52:55,369] Trial 346 finished with value: 0.7517498134221171 and parameters: {'lr': 0.004567864959524275, 'wd': 0.0017654642384231588, 'warmup': 250, 'gamma': 0.9841348055718544, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.02640413633777243, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 346 finished with value: 0.7517498134221171 and parameters: {'lr': 0.004567864959524275, 'wd': 0.0017654642384231588, 'warmup': 250, 'gamma': 0.9841348055718544, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.02640413633777243, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 22:54:05,280] Trial 347 finished with value: 0.7100479957221312 and parameters: {'lr': 0.009951777971947686, 'wd': 0.00043815505581708085, 'warmup': 200, 'gamma': 0.9849288288312428, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15935744650455658, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 347 finished with value: 0.7100479957221312 and parameters: {'lr': 0.009951777971947686, 'wd': 0.00043815505581708085, 'warmup': 200, 'gamma': 0.9849288288312428, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15935744650455658, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 18149504
torch.Size([180])


[I 2024-10-28 22:55:24,306] Trial 348 finished with value: 0.7640748201037253 and parameters: {'lr': 0.007126564039044464, 'wd': 0.0010903883016312608, 'warmup': 50, 'gamma': 0.9826264003637277, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 768, 'emb_dropout': 0.009283987599365318, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 348 finished with value: 0.7640748201037253 and parameters: {'lr': 0.007126564039044464, 'wd': 0.0010903883016312608, 'warmup': 50, 'gamma': 0.9826264003637277, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 768, 'emb_dropout': 0.009283987599365318, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 22:56:34,285] Trial 349 finished with value: 0.7743621978625407 and parameters: {'lr': 0.008823836601874581, 'wd': 0.0007982752380304783, 'warmup': 200, 'gamma': 0.9836069213916175, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.14435617224194225, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 349 finished with value: 0.7743621978625407 and parameters: {'lr': 0.008823836601874581, 'wd': 0.0007982752380304783, 'warmup': 200, 'gamma': 0.9836069213916175, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.14435617224194225, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14652752
torch.Size([180])


[I 2024-10-28 22:59:41,520] Trial 350 finished with value: 0.7550626639405624 and parameters: {'lr': 0.006558155867667795, 'wd': 0.00047493018360917987, 'warmup': 250, 'gamma': 0.9821027309514, 'time_dim': 16, 'patch_size': 8, 'depth': 6, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.03781169947723472, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 350 finished with value: 0.7550626639405624 and parameters: {'lr': 0.006558155867667795, 'wd': 0.00047493018360917987, 'warmup': 250, 'gamma': 0.9821027309514, 'time_dim': 16, 'patch_size': 8, 'depth': 6, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.03781169947723472, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-28 23:01:02,248] Trial 351 finished with value: 0.7924400594884335 and parameters: {'lr': 0.007978138289770759, 'wd': 0.0006312158542503045, 'warmup': 200, 'gamma': 0.983126822049374, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.07053163779347812, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 351 finished with value: 0.7924400594884335 and parameters: {'lr': 0.007978138289770759, 'wd': 0.0006312158542503045, 'warmup': 200, 'gamma': 0.983126822049374, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.07053163779347812, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 15197824
torch.Size([180])


[I 2024-10-28 23:02:16,127] Trial 352 finished with value: 0.7548812569813391 and parameters: {'lr': 0.008929509454777208, 'wd': 0.00019829997424900445, 'warmup': 200, 'gamma': 0.9856127588533369, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.16527960208764808, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 352 finished with value: 0.7548812569813391 and parameters: {'lr': 0.008929509454777208, 'wd': 0.00019829997424900445, 'warmup': 200, 'gamma': 0.9856127588533369, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.16527960208764808, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 23:03:26,141] Trial 353 finished with value: 0.7060346419894914 and parameters: {'lr': 0.00752597123292046, 'wd': 0.003991956294909619, 'warmup': 250, 'gamma': 0.984411618356479, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.170864725554993, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 353 finished with value: 0.7060346419894914 and parameters: {'lr': 0.00752597123292046, 'wd': 0.003991956294909619, 'warmup': 250, 'gamma': 0.984411618356479, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.170864725554993, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14251600
torch.Size([180])


[I 2024-10-28 23:06:23,313] Trial 354 finished with value: 0.6825236665783663 and parameters: {'lr': 0.005643160662939041, 'wd': 0.0003446462534790836, 'warmup': 50, 'gamma': 0.9862668393802975, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.02220125821224981, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 354 finished with value: 0.6825236665783663 and parameters: {'lr': 0.005643160662939041, 'wd': 0.0003446462534790836, 'warmup': 50, 'gamma': 0.9862668393802975, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.02220125821224981, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 23:07:33,190] Trial 355 finished with value: 0.7782461174233387 and parameters: {'lr': 0.00020078639095877054, 'wd': 0.002250245807763819, 'warmup': 200, 'gamma': 0.9839067950869076, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1902544819836362, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 355 finished with value: 0.7782461174233387 and parameters: {'lr': 0.00020078639095877054, 'wd': 0.002250245807763819, 'warmup': 200, 'gamma': 0.9839067950869076, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1902544819836362, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14615936
torch.Size([180])


[I 2024-10-28 23:08:45,062] Trial 356 finished with value: 0.7648825719545506 and parameters: {'lr': 0.008624398702632185, 'wd': 0.0001547907528644572, 'warmup': 150, 'gamma': 0.9816040404753417, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.0004651621621086857, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 356 finished with value: 0.7648825719545506 and parameters: {'lr': 0.008624398702632185, 'wd': 0.0001547907528644572, 'warmup': 150, 'gamma': 0.9816040404753417, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.0004651621621086857, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14279424
torch.Size([180])


[I 2024-10-28 23:09:55,082] Trial 357 finished with value: 0.7205946941914816 and parameters: {'lr': 0.006604142978074764, 'wd': 0.0005257746020885182, 'warmup': 250, 'gamma': 0.9847201796942952, 'time_dim': 32, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15383805813825915, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 357 finished with value: 0.7205946941914816 and parameters: {'lr': 0.006604142978074764, 'wd': 0.0005257746020885182, 'warmup': 250, 'gamma': 0.9847201796942952, 'time_dim': 32, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15383805813825915, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 19372880
torch.Size([180])


[I 2024-10-28 23:13:37,941] Trial 358 finished with value: 0.7401131865949182 and parameters: {'lr': 0.007575830568761307, 'wd': 0.0009013570716339762, 'warmup': 200, 'gamma': 0.9826631153750867, 'time_dim': 16, 'patch_size': 8, 'depth': 6, 'heads': 6, 'mlp_dim': 512, 'emb_dropout': 0.029561801876391475, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 358 finished with value: 0.7401131865949182 and parameters: {'lr': 0.007575830568761307, 'wd': 0.0009013570716339762, 'warmup': 200, 'gamma': 0.9826631153750867, 'time_dim': 16, 'patch_size': 8, 'depth': 6, 'heads': 6, 'mlp_dim': 512, 'emb_dropout': 0.029561801876391475, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 11454336
torch.Size([180])


[I 2024-10-28 23:14:37,199] Trial 359 finished with value: 0.6625237419684016 and parameters: {'lr': 0.009085427062271178, 'wd': 3.313588541503257e-05, 'warmup': 50, 'gamma': 0.9834481085705958, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.013036286528470062, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 359 finished with value: 0.6625237419684016 and parameters: {'lr': 0.009085427062271178, 'wd': 3.313588541503257e-05, 'warmup': 50, 'gamma': 0.9834481085705958, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.013036286528470062, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14220896
torch.Size([180])


[I 2024-10-28 23:16:19,003] Trial 360 finished with value: 0.5429833472338291 and parameters: {'lr': 0.009944202441283538, 'wd': 0.0011867262451527034, 'warmup': 200, 'gamma': 0.9851562625900823, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1581780370337251, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 360 finished with value: 0.5429833472338291 and parameters: {'lr': 0.009944202441283538, 'wd': 0.0011867262451527034, 'warmup': 200, 'gamma': 0.9851562625900823, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1581780370337251, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13231744
torch.Size([180])


[I 2024-10-28 23:17:26,405] Trial 361 finished with value: 0.7360861466342591 and parameters: {'lr': 0.006964485642810978, 'wd': 0.0012653845245664045, 'warmup': 250, 'gamma': 0.9853588128407929, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.15845384305177948, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 361 finished with value: 0.7360861466342591 and parameters: {'lr': 0.006964485642810978, 'wd': 0.0012653845245664045, 'warmup': 250, 'gamma': 0.9853588128407929, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.15845384305177948, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14445536
torch.Size([180])


[I 2024-10-28 23:19:08,066] Trial 362 finished with value: 0.7824854837465663 and parameters: {'lr': 0.008216569786862887, 'wd': 0.001100318444813773, 'warmup': 200, 'gamma': 0.9861850328829795, 'time_dim': 64, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.18147845784279026, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 362 finished with value: 0.7824854837465663 and parameters: {'lr': 0.008216569786862887, 'wd': 0.001100318444813773, 'warmup': 200, 'gamma': 0.9861850328829795, 'time_dim': 64, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.18147845784279026, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14622048
torch.Size([180])


[I 2024-10-28 23:20:54,456] Trial 363 finished with value: 0.7593525969321656 and parameters: {'lr': 0.0005518837563829021, 'wd': 0.0016316145145222787, 'warmup': 100, 'gamma': 0.985104657090387, 'time_dim': 16, 'patch_size': 16, 'depth': 6, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.018525254073753827, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 363 finished with value: 0.7593525969321656 and parameters: {'lr': 0.0005518837563829021, 'wd': 0.0016316145145222787, 'warmup': 100, 'gamma': 0.985104657090387, 'time_dim': 16, 'patch_size': 16, 'depth': 6, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.018525254073753827, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14220896
torch.Size([180])


[I 2024-10-28 23:22:36,329] Trial 364 finished with value: 0.6808293144298584 and parameters: {'lr': 0.00989647317723488, 'wd': 0.0013108189949378479, 'warmup': 200, 'gamma': 0.9857039551735067, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.18833699606987775, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 364 finished with value: 0.6808293144298584 and parameters: {'lr': 0.00989647317723488, 'wd': 0.0013108189949378479, 'warmup': 200, 'gamma': 0.9857039551735067, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.18833699606987775, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 18160992
torch.Size([180])


[I 2024-10-28 23:24:41,281] Trial 365 finished with value: 0.7033923546300572 and parameters: {'lr': 0.00622019023228252, 'wd': 0.0009347254309901091, 'warmup': 50, 'gamma': 0.9867279717323991, 'time_dim': 16, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.16326972858920322, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 365 finished with value: 0.7033923546300572 and parameters: {'lr': 0.00622019023228252, 'wd': 0.0009347254309901091, 'warmup': 50, 'gamma': 0.9867279717323991, 'time_dim': 16, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.16326972858920322, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14220896
torch.Size([180])


[I 2024-10-28 23:26:23,161] Trial 366 finished with value: 0.6512720809431937 and parameters: {'lr': 0.005141852282547015, 'wd': 0.005449452328812054, 'warmup': 250, 'gamma': 0.9845023752876821, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16733523505758352, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 366 finished with value: 0.6512720809431937 and parameters: {'lr': 0.005141852282547015, 'wd': 0.005449452328812054, 'warmup': 250, 'gamma': 0.9845023752876821, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16733523505758352, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14220896
torch.Size([180])


[I 2024-10-28 23:28:05,068] Trial 367 finished with value: 0.7643662154081262 and parameters: {'lr': 0.004747277685151838, 'wd': 0.002531657480851385, 'warmup': 200, 'gamma': 0.9848222130934553, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16557031772144318, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 367 finished with value: 0.7643662154081262 and parameters: {'lr': 0.004747277685151838, 'wd': 0.002531657480851385, 'warmup': 200, 'gamma': 0.9848222130934553, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16557031772144318, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14220896
torch.Size([180])


[I 2024-10-28 23:29:46,875] Trial 368 finished with value: 0.7254989953084806 and parameters: {'lr': 0.005200928967739824, 'wd': 0.0072781859268956145, 'warmup': 250, 'gamma': 0.9842506667984243, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.17277398104483496, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 368 finished with value: 0.7254989953084806 and parameters: {'lr': 0.005200928967739824, 'wd': 0.0072781859268956145, 'warmup': 250, 'gamma': 0.9842506667984243, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.17277398104483496, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 23:30:56,885] Trial 369 finished with value: 0.8078507652949363 and parameters: {'lr': 0.006115849074775746, 'wd': 0.00041254536042299766, 'warmup': 200, 'gamma': 0.9854470656583247, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.17005990939554533, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 369 finished with value: 0.8078507652949363 and parameters: {'lr': 0.006115849074775746, 'wd': 0.00041254536042299766, 'warmup': 200, 'gamma': 0.9854470656583247, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.17005990939554533, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-28 23:32:06,868] Trial 370 finished with value: 0.7532819505859749 and parameters: {'lr': 0.005324397126600568, 'wd': 0.005180738062315308, 'warmup': 100, 'gamma': 0.9876238114240122, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16721428427690205, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 370 finished with value: 0.7532819505859749 and parameters: {'lr': 0.005324397126600568, 'wd': 0.005180738062315308, 'warmup': 100, 'gamma': 0.9876238114240122, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16721428427690205, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14220896
torch.Size([180])


[I 2024-10-28 23:33:49,267] Trial 371 finished with value: 0.7581081150370708 and parameters: {'lr': 0.006980383566718414, 'wd': 0.004266119345833141, 'warmup': 50, 'gamma': 0.9701395071192667, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.19461345830997526, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 371 finished with value: 0.7581081150370708 and parameters: {'lr': 0.006980383566718414, 'wd': 0.004266119345833141, 'warmup': 50, 'gamma': 0.9701395071192667, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.19461345830997526, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14220896
torch.Size([180])


[I 2024-10-28 23:35:31,111] Trial 372 finished with value: 0.6582609840295925 and parameters: {'lr': 0.00764071895835634, 'wd': 0.0017986579663215147, 'warmup': 200, 'gamma': 0.9847076538664462, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.0330744826332486, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 372 finished with value: 0.6582609840295925 and parameters: {'lr': 0.00764071895835634, 'wd': 0.0017986579663215147, 'warmup': 200, 'gamma': 0.9847076538664462, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.0330744826332486, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14220896
torch.Size([180])


[I 2024-10-28 23:37:13,381] Trial 373 finished with value: 0.7541820684182492 and parameters: {'lr': 0.005887072696871683, 'wd': 0.0017054753866976833, 'warmup': 200, 'gamma': 0.9850509468117389, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.025148547454808053, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 373 finished with value: 0.7541820684182492 and parameters: {'lr': 0.005887072696871683, 'wd': 0.0017054753866976833, 'warmup': 200, 'gamma': 0.9850509468117389, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.025148547454808053, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14220896
torch.Size([180])


[I 2024-10-28 23:38:55,065] Trial 374 finished with value: 0.7515247882111478 and parameters: {'lr': 0.007018679664898488, 'wd': 0.0018508902090704385, 'warmup': 200, 'gamma': 0.9858993055191153, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16107542254253204, 'schedule': 'linear'}. Best is trial 270 with value: 0.5098186230222284.


Trial 374 finished with value: 0.7515247882111478 and parameters: {'lr': 0.007018679664898488, 'wd': 0.0018508902090704385, 'warmup': 200, 'gamma': 0.9858993055191153, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16107542254253204, 'schedule': 'linear'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14220896
torch.Size([180])


[I 2024-10-28 23:40:36,860] Trial 375 finished with value: 0.7105482640536341 and parameters: {'lr': 0.007678228649489939, 'wd': 0.002929386332708127, 'warmup': 200, 'gamma': 0.9845547622972299, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.08026452747853624, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 375 finished with value: 0.7105482640536341 and parameters: {'lr': 0.007678228649489939, 'wd': 0.002929386332708127, 'warmup': 200, 'gamma': 0.9845547622972299, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.08026452747853624, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 11460448
torch.Size([180])


[I 2024-10-28 23:42:02,125] Trial 376 finished with value: 0.8021523273568223 and parameters: {'lr': 0.006371112981358999, 'wd': 0.00543497710525097, 'warmup': 200, 'gamma': 0.9852803048059561, 'time_dim': 16, 'patch_size': 16, 'depth': 4, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.17505809624469637, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 376 finished with value: 0.8021523273568223 and parameters: {'lr': 0.006371112981358999, 'wd': 0.00543497710525097, 'warmup': 200, 'gamma': 0.9852803048059561, 'time_dim': 16, 'patch_size': 16, 'depth': 4, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.17505809624469637, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 15203936
torch.Size([180])


[I 2024-10-28 23:43:49,665] Trial 377 finished with value: 0.7815652290969061 and parameters: {'lr': 0.004303301563546112, 'wd': 0.0019506912807513404, 'warmup': 200, 'gamma': 0.9846746612912572, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.033467956736087115, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 377 finished with value: 0.7815652290969061 and parameters: {'lr': 0.004303301563546112, 'wd': 0.0019506912807513404, 'warmup': 200, 'gamma': 0.9846746612912572, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.033467956736087115, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 18155616
torch.Size([180])


[I 2024-10-28 23:45:43,381] Trial 378 finished with value: 0.7227032566590088 and parameters: {'lr': 0.005506375491489843, 'wd': 0.0015321918645043885, 'warmup': 200, 'gamma': 0.9865323387582066, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 768, 'emb_dropout': 0.19992531008825065, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 378 finished with value: 0.7227032566590088 and parameters: {'lr': 0.005506375491489843, 'wd': 0.0015321918645043885, 'warmup': 200, 'gamma': 0.9865323387582066, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 768, 'emb_dropout': 0.19992531008825065, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14220896
torch.Size([180])


[I 2024-10-28 23:47:25,174] Trial 379 finished with value: 0.7172349117869687 and parameters: {'lr': 0.007619160294014381, 'wd': 0.0059270923396043985, 'warmup': 200, 'gamma': 0.9840858146302164, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.0282627119870868, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 379 finished with value: 0.7172349117869687 and parameters: {'lr': 0.007619160294014381, 'wd': 0.0059270923396043985, 'warmup': 200, 'gamma': 0.9840858146302164, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.0282627119870868, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14285536
torch.Size([180])


[I 2024-10-28 23:49:07,270] Trial 380 finished with value: 0.7791203190289618 and parameters: {'lr': 0.00849341877269135, 'wd': 0.0021292062006556516, 'warmup': 250, 'gamma': 0.9859744526943803, 'time_dim': 32, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.08561821907661965, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 380 finished with value: 0.7791203190289618 and parameters: {'lr': 0.00849341877269135, 'wd': 0.0021292062006556516, 'warmup': 250, 'gamma': 0.9859744526943803, 'time_dim': 32, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.08561821907661965, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14220896
torch.Size([180])


[I 2024-10-28 23:50:49,009] Trial 381 finished with value: 0.7296646339633481 and parameters: {'lr': 0.006512276906832427, 'wd': 0.0014106007333633977, 'warmup': 200, 'gamma': 0.9810691283616123, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16856336661153787, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 381 finished with value: 0.7296646339633481 and parameters: {'lr': 0.006512276906832427, 'wd': 0.0014106007333633977, 'warmup': 200, 'gamma': 0.9810691283616123, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16856336661153787, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12254816
torch.Size([180])


[I 2024-10-28 23:52:20,814] Trial 382 finished with value: 0.7134054873701039 and parameters: {'lr': 0.007405586587829805, 'wd': 3.9737189278921796e-05, 'warmup': 200, 'gamma': 0.9849902726949201, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.0505643055350987, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 382 finished with value: 0.7134054873701039 and parameters: {'lr': 0.007405586587829805, 'wd': 3.9737189278921796e-05, 'warmup': 200, 'gamma': 0.9849902726949201, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.0505643055350987, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14220896
torch.Size([180])


[I 2024-10-28 23:54:02,539] Trial 383 finished with value: 0.7171572371951815 and parameters: {'lr': 0.008274911458962442, 'wd': 0.0024373115644198444, 'warmup': 250, 'gamma': 0.9842822606397048, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.006045944109847948, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 383 finished with value: 0.7171572371951815 and parameters: {'lr': 0.008274911458962442, 'wd': 0.0024373115644198444, 'warmup': 250, 'gamma': 0.9842822606397048, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.006045944109847948, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 15428576
torch.Size([180])


[I 2024-10-28 23:55:50,078] Trial 384 finished with value: 0.7484106934557989 and parameters: {'lr': 0.005234731354198496, 'wd': 0.006047114643395074, 'warmup': 50, 'gamma': 0.9838350809830834, 'time_dim': 64, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.016794914412667096, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 384 finished with value: 0.7484106934557989 and parameters: {'lr': 0.005234731354198496, 'wd': 0.006047114643395074, 'warmup': 50, 'gamma': 0.9838350809830834, 'time_dim': 64, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.016794914412667096, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14220896
torch.Size([180])


[I 2024-10-28 23:57:31,864] Trial 385 finished with value: 0.7545747995098321 and parameters: {'lr': 0.006872839649184701, 'wd': 4.929309168477078e-05, 'warmup': 200, 'gamma': 0.9846842910769392, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15768791578266897, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 385 finished with value: 0.7545747995098321 and parameters: {'lr': 0.006872839649184701, 'wd': 4.929309168477078e-05, 'warmup': 200, 'gamma': 0.9846842910769392, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15768791578266897, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 11454336
torch.Size([180])


[I 2024-10-28 23:58:31,131] Trial 386 finished with value: 0.7907793598074441 and parameters: {'lr': 0.008780734283280238, 'wd': 0.0018763696134088846, 'warmup': 200, 'gamma': 0.9854918023685254, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.02184056068410302, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 386 finished with value: 0.7907793598074441 and parameters: {'lr': 0.008780734283280238, 'wd': 0.0018763696134088846, 'warmup': 200, 'gamma': 0.9854918023685254, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.02184056068410302, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-28 23:59:33,994] Trial 387 finished with value: 0.7307491888603017 and parameters: {'lr': 0.006153282437383762, 'wd': 0.003459814922506506, 'warmup': 250, 'gamma': 0.9826107482679494, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.040155434376844705, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 387 finished with value: 0.7307491888603017 and parameters: {'lr': 0.006153282437383762, 'wd': 0.003459814922506506, 'warmup': 250, 'gamma': 0.9826107482679494, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.040155434376844705, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14220896
torch.Size([180])


[I 2024-10-29 00:01:15,838] Trial 388 finished with value: 0.7141321663450836 and parameters: {'lr': 0.009941176013380524, 'wd': 0.00010184598537940745, 'warmup': 150, 'gamma': 0.983790998967119, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.17909144880600106, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 388 finished with value: 0.7141321663450836 and parameters: {'lr': 0.009941176013380524, 'wd': 0.00010184598537940745, 'warmup': 150, 'gamma': 0.983790998967119, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.17909144880600106, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 15197824
torch.Size([180])


[I 2024-10-29 00:02:30,022] Trial 389 finished with value: 0.7820818156608195 and parameters: {'lr': 0.007773018619075841, 'wd': 0.0015519236488561754, 'warmup': 200, 'gamma': 0.9817917361537487, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.16211427381675467, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 389 finished with value: 0.7820818156608195 and parameters: {'lr': 0.007773018619075841, 'wd': 0.0015519236488561754, 'warmup': 200, 'gamma': 0.9817917361537487, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.16211427381675467, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-29 00:03:40,075] Trial 390 finished with value: 0.6956805499138613 and parameters: {'lr': 0.00890002530788266, 'wd': 0.0006996789126674244, 'warmup': 50, 'gamma': 0.9870799570916021, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.012675531516618556, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 390 finished with value: 0.6956805499138613 and parameters: {'lr': 0.00890002530788266, 'wd': 0.0006996789126674244, 'warmup': 50, 'gamma': 0.9870799570916021, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.012675531516618556, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-29 00:04:50,163] Trial 391 finished with value: 0.7434574752458585 and parameters: {'lr': 0.007141929155886778, 'wd': 0.004307335435485569, 'warmup': 200, 'gamma': 0.9843877096891477, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16512144468488119, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 391 finished with value: 0.7434574752458585 and parameters: {'lr': 0.007141929155886778, 'wd': 0.004307335435485569, 'warmup': 200, 'gamma': 0.9843877096891477, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16512144468488119, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-29 00:06:00,140] Trial 392 finished with value: 0.7799100209574733 and parameters: {'lr': 0.005766952776143119, 'wd': 0.00031250396668877357, 'warmup': 250, 'gamma': 0.9832108770745062, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.18516797676856372, 'schedule': 'linear'}. Best is trial 270 with value: 0.5098186230222284.


Trial 392 finished with value: 0.7799100209574733 and parameters: {'lr': 0.005766952776143119, 'wd': 0.00031250396668877357, 'warmup': 250, 'gamma': 0.9832108770745062, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.18516797676856372, 'schedule': 'linear'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-29 00:07:10,241] Trial 393 finished with value: 0.7761179258691397 and parameters: {'lr': 0.007936361924785128, 'wd': 0.0003804779611058228, 'warmup': 200, 'gamma': 0.9823284309106312, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.030848313320872144, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 393 finished with value: 0.7761179258691397 and parameters: {'lr': 0.007936361924785128, 'wd': 0.0003804779611058228, 'warmup': 200, 'gamma': 0.9823284309106312, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.030848313320872144, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12285520
torch.Size([180])


[I 2024-10-29 00:09:47,803] Trial 394 finished with value: 0.7069537202309281 and parameters: {'lr': 0.008890881584633019, 'wd': 0.0022208120080503676, 'warmup': 50, 'gamma': 0.9851293632842278, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.1922584844132928, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 394 finished with value: 0.7069537202309281 and parameters: {'lr': 0.008890881584633019, 'wd': 0.0022208120080503676, 'warmup': 50, 'gamma': 0.9851293632842278, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.1922584844132928, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13231744
torch.Size([180])


[I 2024-10-29 00:10:55,053] Trial 395 finished with value: 0.7480041712711696 and parameters: {'lr': 0.006928110755658434, 'wd': 4.455320223629389e-05, 'warmup': 200, 'gamma': 0.98577147368079, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.02535539060953022, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 395 finished with value: 0.7480041712711696 and parameters: {'lr': 0.006928110755658434, 'wd': 4.455320223629389e-05, 'warmup': 200, 'gamma': 0.98577147368079, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.02535539060953022, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 11460448
torch.Size([180])


[I 2024-10-29 00:12:20,119] Trial 396 finished with value: 0.796229101200923 and parameters: {'lr': 0.009999730061928381, 'wd': 5.820409993383148e-05, 'warmup': 250, 'gamma': 0.9838800401245437, 'time_dim': 16, 'patch_size': 16, 'depth': 4, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16182615670090097, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 396 finished with value: 0.796229101200923 and parameters: {'lr': 0.009999730061928381, 'wd': 5.820409993383148e-05, 'warmup': 250, 'gamma': 0.9838800401245437, 'time_dim': 16, 'patch_size': 16, 'depth': 4, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16182615670090097, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-29 00:13:30,651] Trial 397 finished with value: 0.724049989684898 and parameters: {'lr': 0.0080004901119739, 'wd': 8.717111406946601e-05, 'warmup': 200, 'gamma': 0.9829131004051247, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.19641800709483828, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 397 finished with value: 0.724049989684898 and parameters: {'lr': 0.0080004901119739, 'wd': 8.717111406946601e-05, 'warmup': 200, 'gamma': 0.9829131004051247, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.19641800709483828, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-29 00:14:40,687] Trial 398 finished with value: 0.7291423984564108 and parameters: {'lr': 0.006250735744155704, 'wd': 0.0016543968533957861, 'warmup': 200, 'gamma': 0.9862070725864103, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.17129784150152608, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 398 finished with value: 0.7291423984564108 and parameters: {'lr': 0.006250735744155704, 'wd': 0.0016543968533957861, 'warmup': 200, 'gamma': 0.9862070725864103, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.17129784150152608, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 15234640
torch.Size([180])


[I 2024-10-29 00:17:49,692] Trial 399 finished with value: 0.7623663857026934 and parameters: {'lr': 0.007378140016987803, 'wd': 0.0012087967602517292, 'warmup': 250, 'gamma': 0.9846413368012719, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.1581196106907654, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 399 finished with value: 0.7623663857026934 and parameters: {'lr': 0.007378140016987803, 'wd': 0.0012087967602517292, 'warmup': 250, 'gamma': 0.9846413368012719, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.1581196106907654, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16183424
torch.Size([180])


[I 2024-10-29 00:19:01,602] Trial 400 finished with value: 0.8014927706255975 and parameters: {'lr': 0.004678810038755224, 'wd': 0.0014356536405444429, 'warmup': 50, 'gamma': 0.9835466852469547, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.02266137692313674, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 400 finished with value: 0.8014927706255975 and parameters: {'lr': 0.004678810038755224, 'wd': 0.0014356536405444429, 'warmup': 50, 'gamma': 0.9835466852469547, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.02266137692313674, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-29 00:20:11,552] Trial 401 finished with value: 0.7205731105685218 and parameters: {'lr': 0.008912453653928466, 'wd': 0.002787811866552614, 'warmup': 200, 'gamma': 0.9842103320490962, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.019480923747254617, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 401 finished with value: 0.7205731105685218 and parameters: {'lr': 0.008912453653928466, 'wd': 0.002787811866552614, 'warmup': 200, 'gamma': 0.9842103320490962, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.019480923747254617, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-29 00:21:21,649] Trial 402 finished with value: 0.749214206251154 and parameters: {'lr': 0.0056044472101820465, 'wd': 0.006619763692892624, 'warmup': 250, 'gamma': 0.9853624144456926, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1886173108295589, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 402 finished with value: 0.749214206251154 and parameters: {'lr': 0.0056044472101820465, 'wd': 0.006619763692892624, 'warmup': 250, 'gamma': 0.9853624144456926, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1886173108295589, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13333200
torch.Size([180])


[I 2024-10-29 00:24:09,847] Trial 403 finished with value: 0.7883921023773182 and parameters: {'lr': 1.2141161104256791e-05, 'wd': 0.001958812309547508, 'warmup': 200, 'gamma': 0.982121432146722, 'time_dim': 32, 'patch_size': 8, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.03218609483157093, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 403 finished with value: 0.7883921023773182 and parameters: {'lr': 1.2141161104256791e-05, 'wd': 0.001958812309547508, 'warmup': 200, 'gamma': 0.982121432146722, 'time_dim': 32, 'patch_size': 8, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.03218609483157093, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16186976
torch.Size([180])


[I 2024-10-29 00:26:01,244] Trial 404 finished with value: 0.5907265379738206 and parameters: {'lr': 0.008085316934405574, 'wd': 0.008621441546888218, 'warmup': 200, 'gamma': 0.9831637155061692, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.15600443826533755, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 404 finished with value: 0.5907265379738206 and parameters: {'lr': 0.008085316934405574, 'wd': 0.008621441546888218, 'warmup': 200, 'gamma': 0.9831637155061692, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.15600443826533755, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-29 00:27:04,096] Trial 405 finished with value: 0.7664351856821378 and parameters: {'lr': 0.008926057371732752, 'wd': 0.0076808068633133575, 'warmup': 50, 'gamma': 0.9830627946176163, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.16599560620365078, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 405 finished with value: 0.7664351856821378 and parameters: {'lr': 0.008926057371732752, 'wd': 0.0076808068633133575, 'warmup': 50, 'gamma': 0.9830627946176163, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.16599560620365078, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13027200
torch.Size([180])


[I 2024-10-29 00:28:08,009] Trial 406 finished with value: 0.7764989662727527 and parameters: {'lr': 0.006589807083882416, 'wd': 0.009181314441519524, 'warmup': 250, 'gamma': 0.9813264837762391, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.15398006818733048, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 406 finished with value: 0.7764989662727527 and parameters: {'lr': 0.006589807083882416, 'wd': 0.009181314441519524, 'warmup': 250, 'gamma': 0.9813264837762391, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.15398006818733048, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16405504
torch.Size([180])


[I 2024-10-29 00:29:24,670] Trial 407 finished with value: 0.7487286192955462 and parameters: {'lr': 0.008068945524126394, 'wd': 0.008192838596501412, 'warmup': 200, 'gamma': 0.9824632145385578, 'time_dim': 64, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.009005792535062873, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 407 finished with value: 0.7487286192955462 and parameters: {'lr': 0.008068945524126394, 'wd': 0.008192838596501412, 'warmup': 200, 'gamma': 0.9824632145385578, 'time_dim': 64, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.009005792535062873, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16180864
torch.Size([180])


[I 2024-10-29 00:30:40,863] Trial 408 finished with value: 0.61845410738775 and parameters: {'lr': 0.009901114414460295, 'wd': 0.0006440164233629454, 'warmup': 200, 'gamma': 0.983048733358215, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.15795860789786134, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 408 finished with value: 0.61845410738775 and parameters: {'lr': 0.009901114414460295, 'wd': 0.0006440164233629454, 'warmup': 200, 'gamma': 0.983048733358215, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.15795860789786134, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16180864
torch.Size([180])


[I 2024-10-29 00:31:57,239] Trial 409 finished with value: 0.6916173616787037 and parameters: {'lr': 0.009037947709774261, 'wd': 0.004824062146360912, 'warmup': 200, 'gamma': 0.9835989404836134, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.15772858558230385, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 409 finished with value: 0.6916173616787037 and parameters: {'lr': 0.009037947709774261, 'wd': 0.004824062146360912, 'warmup': 200, 'gamma': 0.9835989404836134, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.15772858558230385, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16180864
torch.Size([180])


[I 2024-10-29 00:33:13,604] Trial 410 finished with value: 0.746962675289269 and parameters: {'lr': 0.006881475883687553, 'wd': 0.0007567269134979735, 'warmup': 50, 'gamma': 0.9830312455937932, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.15483371715670058, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 410 finished with value: 0.746962675289269 and parameters: {'lr': 0.006881475883687553, 'wd': 0.0007567269134979735, 'warmup': 50, 'gamma': 0.9830312455937932, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.15483371715670058, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16180864
torch.Size([180])


[I 2024-10-29 00:34:29,813] Trial 411 finished with value: 0.7302281596963125 and parameters: {'lr': 0.009120447480712706, 'wd': 0.0006218697980600671, 'warmup': 250, 'gamma': 0.9838137103140476, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.16282861122612793, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 411 finished with value: 0.7302281596963125 and parameters: {'lr': 0.009120447480712706, 'wd': 0.0006218697980600671, 'warmup': 250, 'gamma': 0.9838137103140476, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.16282861122612793, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16180864
torch.Size([180])


[I 2024-10-29 00:35:45,989] Trial 412 finished with value: 0.7394488562410177 and parameters: {'lr': 0.00793683875234351, 'wd': 0.0005336590697381719, 'warmup': 200, 'gamma': 0.9832223852402091, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.15834902233428633, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 412 finished with value: 0.7394488562410177 and parameters: {'lr': 0.00793683875234351, 'wd': 0.0005336590697381719, 'warmup': 200, 'gamma': 0.9832223852402091, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.15834902233428633, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16180864
torch.Size([180])


[I 2024-10-29 00:37:02,360] Trial 413 finished with value: 0.6675195098609729 and parameters: {'lr': 0.009624654317229107, 'wd': 0.0006200049685696063, 'warmup': 150, 'gamma': 0.9840957653872003, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.14971739617785154, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 413 finished with value: 0.6675195098609729 and parameters: {'lr': 0.009624654317229107, 'wd': 0.0006200049685696063, 'warmup': 150, 'gamma': 0.9840957653872003, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.14971739617785154, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16180864
torch.Size([180])


[I 2024-10-29 00:38:18,813] Trial 414 finished with value: 0.7423975325407243 and parameters: {'lr': 0.006121256752390111, 'wd': 0.006406893249171521, 'warmup': 200, 'gamma': 0.9880608579595032, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.16173103422001775, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 414 finished with value: 0.7423975325407243 and parameters: {'lr': 0.006121256752390111, 'wd': 0.006406893249171521, 'warmup': 200, 'gamma': 0.9880608579595032, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.16173103422001775, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-29 00:39:21,706] Trial 415 finished with value: 0.7280671870018652 and parameters: {'lr': 0.005063523232515185, 'wd': 0.009848505212684584, 'warmup': 250, 'gamma': 0.9827595306576038, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.15227889187195665, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 415 finished with value: 0.7280671870018652 and parameters: {'lr': 0.005063523232515185, 'wd': 0.009848505212684584, 'warmup': 250, 'gamma': 0.9827595306576038, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.15227889187195665, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16217680
torch.Size([180])


[I 2024-10-29 00:42:36,116] Trial 416 finished with value: 0.7792281915533714 and parameters: {'lr': 0.009918184788004353, 'wd': 0.0008191381567125954, 'warmup': 200, 'gamma': 0.9835815468069385, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.15566502798908477, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 416 finished with value: 0.7792281915533714 and parameters: {'lr': 0.009918184788004353, 'wd': 0.0008191381567125954, 'warmup': 200, 'gamma': 0.9835815468069385, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.15566502798908477, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13027200
torch.Size([180])


[I 2024-10-29 00:43:40,262] Trial 417 finished with value: 0.6341980517374405 and parameters: {'lr': 0.00738255027709561, 'wd': 0.000482464938623618, 'warmup': 50, 'gamma': 0.984295496246105, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.16598495118950535, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 417 finished with value: 0.6341980517374405 and parameters: {'lr': 0.00738255027709561, 'wd': 0.000482464938623618, 'warmup': 50, 'gamma': 0.984295496246105, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.16598495118950535, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13027200
torch.Size([180])


[I 2024-10-29 00:44:44,390] Trial 418 finished with value: 0.6303072479805912 and parameters: {'lr': 0.008001484064853442, 'wd': 0.0004614363709793888, 'warmup': 50, 'gamma': 0.9822436043277253, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.16165966786271996, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 418 finished with value: 0.6303072479805912 and parameters: {'lr': 0.008001484064853442, 'wd': 0.0004614363709793888, 'warmup': 50, 'gamma': 0.9822436043277253, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.16165966786271996, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13027200
torch.Size([180])


[I 2024-10-29 00:45:48,271] Trial 419 finished with value: 0.7576057502699396 and parameters: {'lr': 0.008210321148895987, 'wd': 0.0004673638896602792, 'warmup': 50, 'gamma': 0.9816724902422113, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.1593288687577375, 'schedule': 'constant_with_warmup'}. Best is trial 270 with value: 0.5098186230222284.


Trial 419 finished with value: 0.7576057502699396 and parameters: {'lr': 0.008210321148895987, 'wd': 0.0004673638896602792, 'warmup': 50, 'gamma': 0.9816724902422113, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.1593288687577375, 'schedule': 'constant_with_warmup'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13027200
torch.Size([180])


[I 2024-10-29 00:46:52,265] Trial 420 finished with value: 0.7243493765944601 and parameters: {'lr': 0.008203868587517437, 'wd': 0.0004097420378157689, 'warmup': 50, 'gamma': 0.9821083650372732, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.1633646632204959, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 420 finished with value: 0.7243493765944601 and parameters: {'lr': 0.008203868587517437, 'wd': 0.0004097420378157689, 'warmup': 50, 'gamma': 0.9821083650372732, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.1633646632204959, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13027200
torch.Size([180])


[I 2024-10-29 00:47:56,523] Trial 421 finished with value: 0.7107736794018836 and parameters: {'lr': 0.00718334376762527, 'wd': 0.00047071961032010585, 'warmup': 50, 'gamma': 0.9821878468846421, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.16905780381130028, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 421 finished with value: 0.7107736794018836 and parameters: {'lr': 0.00718334376762527, 'wd': 0.00047071961032010585, 'warmup': 50, 'gamma': 0.9821878468846421, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.16905780381130028, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13027200
torch.Size([180])


[I 2024-10-29 00:49:00,553] Trial 422 finished with value: 0.6948703611385146 and parameters: {'lr': 0.009905947112574108, 'wd': 0.0005392705729535096, 'warmup': 50, 'gamma': 0.9807491221960548, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.16613750431318922, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 422 finished with value: 0.6948703611385146 and parameters: {'lr': 0.009905947112574108, 'wd': 0.0005392705729535096, 'warmup': 50, 'gamma': 0.9807491221960548, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.16613750431318922, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13027200
torch.Size([180])


[I 2024-10-29 00:50:04,531] Trial 423 finished with value: 0.7615854670020111 and parameters: {'lr': 0.00848206892189629, 'wd': 0.0003691252486267777, 'warmup': 50, 'gamma': 0.982853572824251, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.16086953589683456, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 423 finished with value: 0.7615854670020111 and parameters: {'lr': 0.00848206892189629, 'wd': 0.0003691252486267777, 'warmup': 50, 'gamma': 0.982853572824251, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.16086953589683456, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16174976
torch.Size([180])


[I 2024-10-29 00:51:16,064] Trial 424 finished with value: 0.6988701635144794 and parameters: {'lr': 0.009926320800974713, 'wd': 0.0006640971020614261, 'warmup': 50, 'gamma': 0.9832961744818441, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 768, 'emb_dropout': 0.1737301134935246, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 424 finished with value: 0.6988701635144794 and parameters: {'lr': 0.009926320800974713, 'wd': 0.0006640971020614261, 'warmup': 50, 'gamma': 0.9832961744818441, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 768, 'emb_dropout': 0.1737301134935246, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13027200
torch.Size([180])


[I 2024-10-29 00:52:20,131] Trial 425 finished with value: 0.6920060433654771 and parameters: {'lr': 0.007048123030527684, 'wd': 0.0004295680271677134, 'warmup': 50, 'gamma': 0.9825482800080292, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.195993733015301, 'schedule': 'linear'}. Best is trial 270 with value: 0.5098186230222284.


Trial 425 finished with value: 0.6920060433654771 and parameters: {'lr': 0.007048123030527684, 'wd': 0.0004295680271677134, 'warmup': 50, 'gamma': 0.9825482800080292, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.195993733015301, 'schedule': 'linear'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13091840
torch.Size([180])


[I 2024-10-29 00:53:24,674] Trial 426 finished with value: 0.7231816642497283 and parameters: {'lr': 0.00800456523609731, 'wd': 0.0005674730736569052, 'warmup': 50, 'gamma': 0.9813455279922157, 'time_dim': 32, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.0030942170566974165, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 426 finished with value: 0.7231816642497283 and parameters: {'lr': 0.00800456523609731, 'wd': 0.0005674730736569052, 'warmup': 50, 'gamma': 0.9813455279922157, 'time_dim': 32, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.0030942170566974165, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13027200
torch.Size([180])


[I 2024-10-29 00:54:28,735] Trial 427 finished with value: 0.7608920113411697 and parameters: {'lr': 0.00646720050268832, 'wd': 0.0005239585171231846, 'warmup': 50, 'gamma': 0.9838773699358533, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.016017495111583485, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 427 finished with value: 0.7608920113411697 and parameters: {'lr': 0.00646720050268832, 'wd': 0.0005239585171231846, 'warmup': 50, 'gamma': 0.9838773699358533, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.016017495111583485, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13813632
torch.Size([180])


[I 2024-10-29 00:55:35,847] Trial 428 finished with value: 0.7470725766489276 and parameters: {'lr': 0.009976547716474445, 'wd': 0.0004789394924349974, 'warmup': 50, 'gamma': 0.9830320294902375, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 9, 'mlp_dim': 256, 'emb_dropout': 0.1642119981977437, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 428 finished with value: 0.7470725766489276 and parameters: {'lr': 0.009976547716474445, 'wd': 0.0004789394924349974, 'warmup': 50, 'gamma': 0.9830320294902375, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 9, 'mlp_dim': 256, 'emb_dropout': 0.1642119981977437, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13064016
torch.Size([180])


[I 2024-10-29 00:58:15,779] Trial 429 finished with value: 0.759332200404836 and parameters: {'lr': 0.008494167351719767, 'wd': 0.0003299245277106417, 'warmup': 50, 'gamma': 0.9820836657141477, 'time_dim': 16, 'patch_size': 8, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.1574580127218009, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 429 finished with value: 0.759332200404836 and parameters: {'lr': 0.008494167351719767, 'wd': 0.0003299245277106417, 'warmup': 50, 'gamma': 0.9820836657141477, 'time_dim': 16, 'patch_size': 8, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.1574580127218009, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13251840
torch.Size([180])


[I 2024-10-29 00:59:20,057] Trial 430 finished with value: 0.6608763362003823 and parameters: {'lr': 0.007120623786702635, 'wd': 0.00039541967261004144, 'warmup': 50, 'gamma': 0.9841911557327765, 'time_dim': 64, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.18313607825238523, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 430 finished with value: 0.6608763362003823 and parameters: {'lr': 0.007120623786702635, 'wd': 0.00039541967261004144, 'warmup': 50, 'gamma': 0.9841911557327765, 'time_dim': 64, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.18313607825238523, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13027200
torch.Size([180])


[I 2024-10-29 01:00:24,065] Trial 431 finished with value: 0.6975183183067301 and parameters: {'lr': 0.008626689110955472, 'wd': 0.0006318212252115354, 'warmup': 50, 'gamma': 0.9834614720080672, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.19993724537838087, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 431 finished with value: 0.6975183183067301 and parameters: {'lr': 0.008626689110955472, 'wd': 0.0006318212252115354, 'warmup': 50, 'gamma': 0.9834614720080672, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.19993724537838087, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 38256512
torch.Size([180])


[I 2024-10-29 01:03:01,078] Trial 432 finished with value: 0.7391650845721678 and parameters: {'lr': 0.007614572063779462, 'wd': 0.0007238566932556376, 'warmup': 200, 'gamma': 0.9849981393561861, 'time_dim': 16, 'patch_size': 32, 'depth': 12, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.19054827913799222, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 432 finished with value: 0.7391650845721678 and parameters: {'lr': 0.007614572063779462, 'wd': 0.0007238566932556376, 'warmup': 200, 'gamma': 0.9849981393561861, 'time_dim': 16, 'patch_size': 32, 'depth': 12, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.19054827913799222, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13064016
torch.Size([180])


[I 2024-10-29 01:05:41,183] Trial 433 finished with value: 0.7703932484056313 and parameters: {'lr': 0.006065582208924508, 'wd': 0.00028568555433237307, 'warmup': 50, 'gamma': 0.9825339060333355, 'time_dim': 16, 'patch_size': 8, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.012703912578028064, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 433 finished with value: 0.7703932484056313 and parameters: {'lr': 0.006065582208924508, 'wd': 0.00028568555433237307, 'warmup': 50, 'gamma': 0.9825339060333355, 'time_dim': 16, 'patch_size': 8, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.012703912578028064, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16180864
torch.Size([180])


[I 2024-10-29 01:06:57,516] Trial 434 finished with value: 0.7051165682325868 and parameters: {'lr': 0.008790709120144844, 'wd': 0.0004943338797885857, 'warmup': 200, 'gamma': 0.9818073942586885, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.15740791293011747, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 434 finished with value: 0.7051165682325868 and parameters: {'lr': 0.008790709120144844, 'wd': 0.0004943338797885857, 'warmup': 200, 'gamma': 0.9818073942586885, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.15740791293011747, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 17163904
torch.Size([180])


[I 2024-10-29 01:08:17,425] Trial 435 finished with value: 0.6891723143012296 and parameters: {'lr': 0.007300605382644179, 'wd': 0.0010039512475790182, 'warmup': 200, 'gamma': 0.9842983207100218, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 9, 'mlp_dim': 256, 'emb_dropout': 0.17165402762168436, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 435 finished with value: 0.6891723143012296 and parameters: {'lr': 0.007300605382644179, 'wd': 0.0010039512475790182, 'warmup': 200, 'gamma': 0.9842983207100218, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 9, 'mlp_dim': 256, 'emb_dropout': 0.17165402762168436, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16180864
torch.Size([180])


[I 2024-10-29 01:09:33,668] Trial 436 finished with value: 0.7167973990126094 and parameters: {'lr': 0.009946049661575975, 'wd': 0.0004209701769131962, 'warmup': 50, 'gamma': 0.983676777281212, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.17933554184527034, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 436 finished with value: 0.7167973990126094 and parameters: {'lr': 0.009946049661575975, 'wd': 0.0004209701769131962, 'warmup': 50, 'gamma': 0.983676777281212, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.17933554184527034, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-29 01:10:36,444] Trial 437 finished with value: 0.7357117780186528 and parameters: {'lr': 0.006553587822479863, 'wd': 0.00035800098374419437, 'warmup': 200, 'gamma': 0.9829137449577697, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.007782583316948121, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 437 finished with value: 0.7357117780186528 and parameters: {'lr': 0.006553587822479863, 'wd': 0.00035800098374419437, 'warmup': 200, 'gamma': 0.9829137449577697, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.007782583316948121, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 9918288
torch.Size([180])


[I 2024-10-29 01:12:46,996] Trial 438 finished with value: 0.6761008822999873 and parameters: {'lr': 0.008193117730135162, 'wd': 0.00013232742216244276, 'warmup': 200, 'gamma': 0.9850942824952661, 'time_dim': 16, 'patch_size': 8, 'depth': 4, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.018595695797387526, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 438 finished with value: 0.6761008822999873 and parameters: {'lr': 0.008193117730135162, 'wd': 0.00013232742216244276, 'warmup': 200, 'gamma': 0.9850942824952661, 'time_dim': 16, 'patch_size': 8, 'depth': 4, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.018595695797387526, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 17163904
torch.Size([180])


[I 2024-10-29 01:14:07,506] Trial 439 finished with value: 0.7903097637694768 and parameters: {'lr': 0.005784253594651829, 'wd': 0.0007976842888905724, 'warmup': 50, 'gamma': 0.9763676889878932, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 9, 'mlp_dim': 256, 'emb_dropout': 0.1618557014095495, 'schedule': 'constant_with_warmup'}. Best is trial 270 with value: 0.5098186230222284.


Trial 439 finished with value: 0.7903097637694768 and parameters: {'lr': 0.005784253594651829, 'wd': 0.0007976842888905724, 'warmup': 50, 'gamma': 0.9763676889878932, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 9, 'mlp_dim': 256, 'emb_dropout': 0.1618557014095495, 'schedule': 'constant_with_warmup'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16180864
torch.Size([180])


[I 2024-10-29 01:15:24,383] Trial 440 finished with value: 0.6914075217363628 and parameters: {'lr': 0.007515362106153736, 'wd': 0.0006278138921638442, 'warmup': 200, 'gamma': 0.9833730510814609, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.1678519476271779, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 440 finished with value: 0.6914075217363628 and parameters: {'lr': 0.007515362106153736, 'wd': 0.0006278138921638442, 'warmup': 200, 'gamma': 0.9833730510814609, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.1678519476271779, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-29 01:16:27,755] Trial 441 finished with value: 0.7346748560274597 and parameters: {'lr': 0.0009067607316468415, 'wd': 0.0005745949962533486, 'warmup': 200, 'gamma': 0.9843088032845663, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.026718251472052687, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 441 finished with value: 0.7346748560274597 and parameters: {'lr': 0.0009067607316468415, 'wd': 0.0005745949962533486, 'warmup': 200, 'gamma': 0.9843088032845663, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.026718251472052687, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 10704720
torch.Size([180])


[I 2024-10-29 01:18:47,005] Trial 442 finished with value: 0.7501040775032591 and parameters: {'lr': 0.008837309706343098, 'wd': 0.0004610644663023565, 'warmup': 50, 'gamma': 0.9826967769123769, 'time_dim': 16, 'patch_size': 8, 'depth': 4, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.19391181660521398, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 442 finished with value: 0.7501040775032591 and parameters: {'lr': 0.008837309706343098, 'wd': 0.0004610644663023565, 'warmup': 50, 'gamma': 0.9826967769123769, 'time_dim': 16, 'patch_size': 8, 'depth': 4, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.19391181660521398, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-29 01:19:50,354] Trial 443 finished with value: 0.6401122356482606 and parameters: {'lr': 0.007062356314841709, 'wd': 0.0006940176585957247, 'warmup': 200, 'gamma': 0.9846893770831596, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.1525101072865251, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 443 finished with value: 0.6401122356482606 and parameters: {'lr': 0.007062356314841709, 'wd': 0.0006940176585957247, 'warmup': 200, 'gamma': 0.9846893770831596, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.1525101072865251, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-29 01:20:53,637] Trial 444 finished with value: 0.723662892326419 and parameters: {'lr': 0.00999819019005063, 'wd': 0.0006919057425315194, 'warmup': 100, 'gamma': 0.9838625865666532, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.1488528056704751, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 444 finished with value: 0.723662892326419 and parameters: {'lr': 0.00999819019005063, 'wd': 0.0006919057425315194, 'warmup': 100, 'gamma': 0.9838625865666532, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.1488528056704751, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-29 01:21:56,847] Trial 445 finished with value: 0.7287467681185081 and parameters: {'lr': 0.006516719126055957, 'wd': 0.0010192555085768219, 'warmup': 200, 'gamma': 0.983323995472312, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.14172613178791993, 'schedule': 'linear'}. Best is trial 270 with value: 0.5098186230222284.


Trial 445 finished with value: 0.7287467681185081 and parameters: {'lr': 0.006516719126055957, 'wd': 0.0010192555085768219, 'warmup': 200, 'gamma': 0.983323995472312, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.14172613178791993, 'schedule': 'linear'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-29 01:22:59,618] Trial 446 finished with value: 0.6189626059253976 and parameters: {'lr': 0.0056860104695858555, 'wd': 0.0005725292404763075, 'warmup': 200, 'gamma': 0.9810527543185332, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.14617575835025062, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 446 finished with value: 0.6189626059253976 and parameters: {'lr': 0.0056860104695858555, 'wd': 0.0005725292404763075, 'warmup': 200, 'gamma': 0.9810527543185332, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.14617575835025062, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-29 01:24:02,482] Trial 447 finished with value: 0.792629183981448 and parameters: {'lr': 0.005820003286630454, 'wd': 0.0008713150518873927, 'warmup': 50, 'gamma': 0.9803949413727677, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.14641385372354626, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 447 finished with value: 0.792629183981448 and parameters: {'lr': 0.005820003286630454, 'wd': 0.0008713150518873927, 'warmup': 50, 'gamma': 0.9803949413727677, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.14641385372354626, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-29 01:25:05,221] Trial 448 finished with value: 0.6699117337640136 and parameters: {'lr': 0.0051561035774326264, 'wd': 0.0007394207441345567, 'warmup': 200, 'gamma': 0.9811432664375965, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.15357674414161732, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 448 finished with value: 0.6699117337640136 and parameters: {'lr': 0.0051561035774326264, 'wd': 0.0007394207441345567, 'warmup': 200, 'gamma': 0.9811432664375965, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.15357674414161732, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 22491776
torch.Size([180])


[I 2024-10-29 01:26:39,348] Trial 449 finished with value: 0.7608709832456588 and parameters: {'lr': 0.006467917768410158, 'wd': 0.0005487561186673116, 'warmup': 150, 'gamma': 0.9805673513210242, 'time_dim': 16, 'patch_size': 32, 'depth': 7, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.17563129839444314, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 449 finished with value: 0.7608709832456588 and parameters: {'lr': 0.006467917768410158, 'wd': 0.0005487561186673116, 'warmup': 150, 'gamma': 0.9805673513210242, 'time_dim': 16, 'patch_size': 32, 'depth': 7, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.17563129839444314, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-29 01:27:42,185] Trial 450 finished with value: 0.6811452405818048 and parameters: {'lr': 0.004792233328561968, 'wd': 0.0006505702556214071, 'warmup': 200, 'gamma': 0.9801140355352449, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.024705869005975736, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 450 finished with value: 0.6811452405818048 and parameters: {'lr': 0.004792233328561968, 'wd': 0.0006505702556214071, 'warmup': 200, 'gamma': 0.9801140355352449, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.024705869005975736, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12313344
torch.Size([180])


[I 2024-10-29 01:28:45,315] Trial 451 finished with value: 0.7425029806568947 and parameters: {'lr': 0.004185308524700624, 'wd': 0.0008100298510935862, 'warmup': 50, 'gamma': 0.9815051751357249, 'time_dim': 32, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.0212376488408743, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 451 finished with value: 0.7425029806568947 and parameters: {'lr': 0.004185308524700624, 'wd': 0.0008100298510935862, 'warmup': 50, 'gamma': 0.9815051751357249, 'time_dim': 32, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.0212376488408743, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-29 01:29:48,138] Trial 452 finished with value: 0.6769068776138163 and parameters: {'lr': 0.005829228926650098, 'wd': 0.0005619124060235772, 'warmup': 200, 'gamma': 0.981677283381465, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.01500523431365265, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 452 finished with value: 0.6769068776138163 and parameters: {'lr': 0.005829228926650098, 'wd': 0.0005619124060235772, 'warmup': 200, 'gamma': 0.981677283381465, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.01500523431365265, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 10106112
torch.Size([180])


[I 2024-10-29 01:30:41,705] Trial 453 finished with value: 0.73742343195044 and parameters: {'lr': 0.006914080989929119, 'wd': 0.0006459719286621819, 'warmup': 100, 'gamma': 0.9808876185879845, 'time_dim': 64, 'patch_size': 32, 'depth': 4, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.18847619198960622, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 453 finished with value: 0.73742343195044 and parameters: {'lr': 0.006914080989929119, 'wd': 0.0006459719286621819, 'warmup': 100, 'gamma': 0.9808876185879845, 'time_dim': 64, 'patch_size': 32, 'depth': 4, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.18847619198960622, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-29 01:31:44,477] Trial 454 finished with value: 0.6790140843076843 and parameters: {'lr': 0.007456850646323892, 'wd': 0.0007037375211846613, 'warmup': 200, 'gamma': 0.9821676137742387, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.15181074268801403, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 454 finished with value: 0.6790140843076843 and parameters: {'lr': 0.007456850646323892, 'wd': 0.0007037375211846613, 'warmup': 200, 'gamma': 0.9821676137742387, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.15181074268801403, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-29 01:32:47,283] Trial 455 finished with value: 0.7598491847343803 and parameters: {'lr': 0.005471145724574137, 'wd': 0.0005345830586795658, 'warmup': 50, 'gamma': 0.9858747152215844, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.0001625734084777855, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 455 finished with value: 0.7598491847343803 and parameters: {'lr': 0.005471145724574137, 'wd': 0.0005345830586795658, 'warmup': 50, 'gamma': 0.9858747152215844, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.0001625734084777855, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-29 01:33:50,138] Trial 456 finished with value: 0.797638313192464 and parameters: {'lr': 0.006291164031263344, 'wd': 0.00088812661300705, 'warmup': 200, 'gamma': 0.9843644994206404, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.14618765568478015, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 456 finished with value: 0.797638313192464 and parameters: {'lr': 0.006291164031263344, 'wd': 0.00088812661300705, 'warmup': 200, 'gamma': 0.9843644994206404, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.14618765568478015, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-29 01:34:52,773] Trial 457 finished with value: 0.776273088311024 and parameters: {'lr': 0.007885046785762137, 'wd': 0.0005876622723498543, 'warmup': 300, 'gamma': 0.9855383179560876, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.057669489058173026, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 457 finished with value: 0.776273088311024 and parameters: {'lr': 0.007885046785762137, 'wd': 0.0005876622723498543, 'warmup': 300, 'gamma': 0.9855383179560876, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.057669489058173026, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-29 01:35:55,535] Trial 458 finished with value: 0.765656402857192 and parameters: {'lr': 0.006889834682487467, 'wd': 0.00048693430608451647, 'warmup': 200, 'gamma': 0.9821894757837621, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.15565791814306815, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 458 finished with value: 0.765656402857192 and parameters: {'lr': 0.006889834682487467, 'wd': 0.00048693430608451647, 'warmup': 200, 'gamma': 0.9821894757837621, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.15565791814306815, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 19334528
torch.Size([180])


[I 2024-10-29 01:37:23,498] Trial 459 finished with value: 0.6367017168363591 and parameters: {'lr': 0.008380831163803242, 'wd': 0.0007789251794443609, 'warmup': 50, 'gamma': 0.9848823675721528, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.009269457016416963, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 459 finished with value: 0.6367017168363591 and parameters: {'lr': 0.008380831163803242, 'wd': 0.0007789251794443609, 'warmup': 50, 'gamma': 0.9848823675721528, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.009269457016416963, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 19334528
torch.Size([180])


[I 2024-10-29 01:38:51,771] Trial 460 finished with value: 0.6684857376465334 and parameters: {'lr': 0.008445651571658089, 'wd': 7.656655819783626e-05, 'warmup': 50, 'gamma': 0.9851821878284669, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.007405840113142182, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 460 finished with value: 0.6684857376465334 and parameters: {'lr': 0.008445651571658089, 'wd': 7.656655819783626e-05, 'warmup': 50, 'gamma': 0.9851821878284669, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.007405840113142182, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 19334528
torch.Size([180])


[I 2024-10-29 01:40:19,631] Trial 461 finished with value: 0.7586572276529281 and parameters: {'lr': 0.008709648504151157, 'wd': 0.0011612815682147141, 'warmup': 50, 'gamma': 0.9813419120890309, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.010612881598361351, 'schedule': 'constant_with_warmup'}. Best is trial 270 with value: 0.5098186230222284.


Trial 461 finished with value: 0.7586572276529281 and parameters: {'lr': 0.008709648504151157, 'wd': 0.0011612815682147141, 'warmup': 50, 'gamma': 0.9813419120890309, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.010612881598361351, 'schedule': 'constant_with_warmup'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 19334528
torch.Size([180])


[I 2024-10-29 01:41:47,691] Trial 462 finished with value: 0.7514677192169844 and parameters: {'lr': 0.0012074243608950785, 'wd': 0.0009348278181482112, 'warmup': 50, 'gamma': 0.9826482668156599, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.008015476389879329, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 462 finished with value: 0.7514677192169844 and parameters: {'lr': 0.0012074243608950785, 'wd': 0.0009348278181482112, 'warmup': 50, 'gamma': 0.9826482668156599, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.008015476389879329, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 19334528
torch.Size([180])


[I 2024-10-29 01:43:15,638] Trial 463 finished with value: 0.670867632830779 and parameters: {'lr': 0.00801879420138322, 'wd': 0.0008435589766940037, 'warmup': 50, 'gamma': 0.9795871387318839, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.012182441094318767, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 463 finished with value: 0.670867632830779 and parameters: {'lr': 0.00801879420138322, 'wd': 0.0008435589766940037, 'warmup': 50, 'gamma': 0.9795871387318839, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.012182441094318767, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 19334528
torch.Size([180])


[I 2024-10-29 01:44:43,754] Trial 464 finished with value: 0.73714116636378 and parameters: {'lr': 0.0014737862900912465, 'wd': 0.0004929913362143618, 'warmup': 50, 'gamma': 0.9840106315570918, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.01739027069561408, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 464 finished with value: 0.73714116636378 and parameters: {'lr': 0.0014737862900912465, 'wd': 0.0004929913362143618, 'warmup': 50, 'gamma': 0.9840106315570918, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.01739027069561408, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 19334528
torch.Size([180])


[I 2024-10-29 01:46:11,778] Trial 465 finished with value: 0.7040072794341755 and parameters: {'lr': 0.008992189704412805, 'wd': 6.572478123246729e-05, 'warmup': 50, 'gamma': 0.9846892535017, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.00220148473781302, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 465 finished with value: 0.7040072794341755 and parameters: {'lr': 0.008992189704412805, 'wd': 6.572478123246729e-05, 'warmup': 50, 'gamma': 0.9846892535017, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.00220148473781302, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 19334528
torch.Size([180])


[I 2024-10-29 01:47:39,964] Trial 466 finished with value: 0.7627631342685544 and parameters: {'lr': 0.007686261091526396, 'wd': 0.0006166455920146434, 'warmup': 50, 'gamma': 0.9838823033237393, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.006080893032240513, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 466 finished with value: 0.7627631342685544 and parameters: {'lr': 0.007686261091526396, 'wd': 0.0006166455920146434, 'warmup': 50, 'gamma': 0.9838823033237393, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.006080893032240513, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 22488192
torch.Size([180])


[I 2024-10-29 01:49:20,260] Trial 467 finished with value: 0.7530106719626153 and parameters: {'lr': 0.006425150636490721, 'wd': 0.0007481720889891548, 'warmup': 50, 'gamma': 0.9818493912564276, 'time_dim': 16, 'patch_size': 32, 'depth': 7, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.013037169855744678, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 467 finished with value: 0.7530106719626153 and parameters: {'lr': 0.006425150636490721, 'wd': 0.0007481720889891548, 'warmup': 50, 'gamma': 0.9818493912564276, 'time_dim': 16, 'patch_size': 32, 'depth': 7, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.013037169855744678, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 19334528
torch.Size([180])


[I 2024-10-29 01:50:48,241] Trial 468 finished with value: 0.728852607044449 and parameters: {'lr': 0.008803522829848734, 'wd': 0.0004314084723498985, 'warmup': 100, 'gamma': 0.9711391743352825, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.1948917444202169, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 468 finished with value: 0.728852607044449 and parameters: {'lr': 0.008803522829848734, 'wd': 0.0004314084723498985, 'warmup': 100, 'gamma': 0.9711391743352825, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.1948917444202169, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13813632
torch.Size([180])


[I 2024-10-29 01:51:55,339] Trial 469 finished with value: 0.6425732149003281 and parameters: {'lr': 0.007567588953992564, 'wd': 0.001278883461954961, 'warmup': 50, 'gamma': 0.983056930800599, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 9, 'mlp_dim': 256, 'emb_dropout': 0.18476320257736706, 'schedule': 'linear'}. Best is trial 270 with value: 0.5098186230222284.


Trial 469 finished with value: 0.6425732149003281 and parameters: {'lr': 0.007567588953992564, 'wd': 0.001278883461954961, 'warmup': 50, 'gamma': 0.983056930800599, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 9, 'mlp_dim': 256, 'emb_dropout': 0.18476320257736706, 'schedule': 'linear'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 19334528
torch.Size([180])


[I 2024-10-29 01:53:23,457] Trial 470 finished with value: 0.6959686153798732 and parameters: {'lr': 0.00040389691732930014, 'wd': 0.0005864847070152083, 'warmup': 50, 'gamma': 0.9857363888666082, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.02177216378924963, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 470 finished with value: 0.6959686153798732 and parameters: {'lr': 0.00040389691732930014, 'wd': 0.0005864847070152083, 'warmup': 50, 'gamma': 0.9857363888666082, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.02177216378924963, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 15860224
torch.Size([180])


[I 2024-10-29 01:54:40,917] Trial 471 finished with value: 0.745496856864837 and parameters: {'lr': 8.91721675525148e-05, 'wd': 0.0009804152288587127, 'warmup': 100, 'gamma': 0.984798621848516, 'time_dim': 32, 'patch_size': 32, 'depth': 6, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.028145078765519492, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 471 finished with value: 0.745496856864837 and parameters: {'lr': 8.91721675525148e-05, 'wd': 0.0009804152288587127, 'warmup': 100, 'gamma': 0.984798621848516, 'time_dim': 32, 'patch_size': 32, 'depth': 6, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.028145078765519492, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16174976
torch.Size([180])


[I 2024-10-29 01:55:52,564] Trial 472 finished with value: 0.7299706781111721 and parameters: {'lr': 0.00013828825927597374, 'wd': 0.000763130006593084, 'warmup': 50, 'gamma': 0.9824824904504539, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 768, 'emb_dropout': 0.16956654248533753, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 472 finished with value: 0.7299706781111721 and parameters: {'lr': 0.00013828825927597374, 'wd': 0.000763130006593084, 'warmup': 50, 'gamma': 0.9824824904504539, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 768, 'emb_dropout': 0.16956654248533753, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16180864
torch.Size([180])


[I 2024-10-29 01:57:08,991] Trial 473 finished with value: 0.7270115560457322 and parameters: {'lr': 0.008841958975461589, 'wd': 0.0005258235192555209, 'warmup': 50, 'gamma': 0.9836860403965966, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.004874735155447199, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 473 finished with value: 0.7270115560457322 and parameters: {'lr': 0.008841958975461589, 'wd': 0.0005258235192555209, 'warmup': 50, 'gamma': 0.9836860403965966, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.004874735155447199, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 15795584
torch.Size([180])


[I 2024-10-29 01:58:26,576] Trial 474 finished with value: 0.7284400505904434 and parameters: {'lr': 0.00587712241171254, 'wd': 0.0010905799696900008, 'warmup': 200, 'gamma': 0.984257153646818, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.1648273256335846, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 474 finished with value: 0.7284400505904434 and parameters: {'lr': 0.00587712241171254, 'wd': 0.0010905799696900008, 'warmup': 200, 'gamma': 0.984257153646818, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.1648273256335846, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16405504
torch.Size([180])


[I 2024-10-29 01:59:43,150] Trial 475 finished with value: 0.7261902259739639 and parameters: {'lr': 0.007017175240744397, 'wd': 0.00043309639403733797, 'warmup': 150, 'gamma': 0.9853970601659598, 'time_dim': 64, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.010694349092329546, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 475 finished with value: 0.7261902259739639 and parameters: {'lr': 0.007017175240744397, 'wd': 0.00043309639403733797, 'warmup': 150, 'gamma': 0.9853970601659598, 'time_dim': 64, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.010694349092329546, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13231744
torch.Size([180])


[I 2024-10-29 02:00:50,372] Trial 476 finished with value: 0.7126379648284557 and parameters: {'lr': 0.008167881897692492, 'wd': 0.0006861014664822302, 'warmup': 50, 'gamma': 0.9863495209444535, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.19702915823524178, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 476 finished with value: 0.7126379648284557 and parameters: {'lr': 0.008167881897692492, 'wd': 0.0006861014664822302, 'warmup': 50, 'gamma': 0.9863495209444535, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.19702915823524178, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16180864
torch.Size([180])


[I 2024-10-29 02:02:06,666] Trial 477 finished with value: 0.7282127326022216 and parameters: {'lr': 0.007014393141238905, 'wd': 0.0005002804852043769, 'warmup': 200, 'gamma': 0.9832891395261922, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.014847472929799039, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 477 finished with value: 0.7282127326022216 and parameters: {'lr': 0.007014393141238905, 'wd': 0.0005002804852043769, 'warmup': 200, 'gamma': 0.9832891395261922, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.014847472929799039, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13027200
torch.Size([180])


[I 2024-10-29 02:03:10,669] Trial 478 finished with value: 0.6591175971534331 and parameters: {'lr': 0.008949039183031122, 'wd': 0.0013130219282563122, 'warmup': 100, 'gamma': 0.982834211352185, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.018341542556803786, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 478 finished with value: 0.6591175971534331 and parameters: {'lr': 0.008949039183031122, 'wd': 0.0013130219282563122, 'warmup': 100, 'gamma': 0.982834211352185, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.018341542556803786, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 15203936
torch.Size([180])


[I 2024-10-29 02:04:58,225] Trial 479 finished with value: 0.7053884355045695 and parameters: {'lr': 0.009990959093159689, 'wd': 0.0032651293177064704, 'warmup': 300, 'gamma': 0.9820153683075226, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.004279971921349625, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 479 finished with value: 0.7053884355045695 and parameters: {'lr': 0.009990959093159689, 'wd': 0.0032651293177064704, 'warmup': 300, 'gamma': 0.9820153683075226, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.004279971921349625, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 18146944
torch.Size([180])


[I 2024-10-29 02:06:21,200] Trial 480 finished with value: 0.680419428634936 and parameters: {'lr': 0.007944233005722995, 'wd': 0.0005756631601722284, 'warmup': 200, 'gamma': 0.9836673486944659, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 10, 'mlp_dim': 256, 'emb_dropout': 0.16090867935281983, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 480 finished with value: 0.680419428634936 and parameters: {'lr': 0.007944233005722995, 'wd': 0.0005756631601722284, 'warmup': 200, 'gamma': 0.9836673486944659, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 10, 'mlp_dim': 256, 'emb_dropout': 0.16090867935281983, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14615936
torch.Size([180])


[I 2024-10-29 02:07:33,237] Trial 481 finished with value: 0.6910031879477381 and parameters: {'lr': 0.0007742789258603806, 'wd': 0.0004003775623003711, 'warmup': 50, 'gamma': 0.9846787228183651, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.13426958787779433, 'schedule': 'cosine'}. Best is trial 270 with value: 0.5098186230222284.


Trial 481 finished with value: 0.6910031879477381 and parameters: {'lr': 0.0007742789258603806, 'wd': 0.0004003775623003711, 'warmup': 50, 'gamma': 0.9846787228183651, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.13426958787779433, 'schedule': 'cosine'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16180864
torch.Size([180])


[I 2024-10-29 02:08:49,430] Trial 482 finished with value: 0.7214931808724852 and parameters: {'lr': 0.00527674594501567, 'wd': 0.0007683412881928215, 'warmup': 200, 'gamma': 0.9808193835559353, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.19237202173546897, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 482 finished with value: 0.7214931808724852 and parameters: {'lr': 0.00527674594501567, 'wd': 0.0007683412881928215, 'warmup': 200, 'gamma': 0.9808193835559353, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.19237202173546897, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12240768
torch.Size([180])


[I 2024-10-29 02:09:51,833] Trial 483 finished with value: 0.7168926470365802 and parameters: {'lr': 0.006300430722338451, 'wd': 0.001520106160229242, 'warmup': 200, 'gamma': 0.9842194113706363, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.023042338633389114, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 483 finished with value: 0.7168926470365802 and parameters: {'lr': 0.006300430722338451, 'wd': 0.001520106160229242, 'warmup': 200, 'gamma': 0.9842194113706363, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.023042338633389114, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16180864
torch.Size([180])


[I 2024-10-29 02:11:07,971] Trial 484 finished with value: 0.7517916766738976 and parameters: {'lr': 0.00998755997724514, 'wd': 0.0006206996655544142, 'warmup': 50, 'gamma': 0.9829773554850584, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.17191529392880003, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 484 finished with value: 0.7517916766738976 and parameters: {'lr': 0.00998755997724514, 'wd': 0.0006206996655544142, 'warmup': 50, 'gamma': 0.9829773554850584, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.17191529392880003, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14615936
torch.Size([180])


[I 2024-10-29 02:12:19,873] Trial 485 finished with value: 0.777847341869231 and parameters: {'lr': 1.9026798045517226e-05, 'wd': 0.00047181467970891684, 'warmup': 200, 'gamma': 0.9851132841957843, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.17849044813503456, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 485 finished with value: 0.777847341869231 and parameters: {'lr': 1.9026798045517226e-05, 'wd': 0.00047181467970891684, 'warmup': 200, 'gamma': 0.9851132841957843, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.17849044813503456, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13237856
torch.Size([180])


[I 2024-10-29 02:13:56,715] Trial 486 finished with value: 0.7300416749804891 and parameters: {'lr': 0.007471018808235869, 'wd': 0.0003332538443144443, 'warmup': 100, 'gamma': 0.9824502170492011, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.16578149326647526, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 486 finished with value: 0.7300416749804891 and parameters: {'lr': 0.007471018808235869, 'wd': 0.0003332538443144443, 'warmup': 100, 'gamma': 0.9824502170492011, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.16578149326647526, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13231744
torch.Size([180])


[I 2024-10-29 02:15:04,014] Trial 487 finished with value: 0.7736380492449902 and parameters: {'lr': 0.008731076948928893, 'wd': 0.0011700133865954582, 'warmup': 50, 'gamma': 0.9816497788374313, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.0755143648818562, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 487 finished with value: 0.7736380492449902 and parameters: {'lr': 0.008731076948928893, 'wd': 0.0011700133865954582, 'warmup': 50, 'gamma': 0.9816497788374313, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.0755143648818562, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-29 02:16:14,093] Trial 488 finished with value: 0.7157564622943997 and parameters: {'lr': 0.005936042287671774, 'wd': 0.0008251401033993291, 'warmup': 200, 'gamma': 0.9836734802549086, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.02843151322296275, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 488 finished with value: 0.7157564622943997 and parameters: {'lr': 0.005936042287671774, 'wd': 0.0008251401033993291, 'warmup': 200, 'gamma': 0.9836734802549086, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.02843151322296275, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-29 02:17:24,008] Trial 489 finished with value: 0.662982514885016 and parameters: {'lr': 0.004640185944861434, 'wd': 0.00052934923035265, 'warmup': 200, 'gamma': 0.9856649192818239, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.03655943088329655, 'schedule': 'constant_with_warmup'}. Best is trial 270 with value: 0.5098186230222284.


Trial 489 finished with value: 0.662982514885016 and parameters: {'lr': 0.004640185944861434, 'wd': 0.00052934923035265, 'warmup': 200, 'gamma': 0.9856649192818239, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.03655943088329655, 'schedule': 'constant_with_warmup'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16983168
torch.Size([180])


[I 2024-10-29 02:18:45,197] Trial 490 finished with value: 0.7110539520164417 and parameters: {'lr': 0.006858416694080817, 'wd': 0.0006978997708825247, 'warmup': 50, 'gamma': 0.9845914682781626, 'time_dim': 16, 'patch_size': 32, 'depth': 7, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.18653135753931882, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 490 finished with value: 0.7110539520164417 and parameters: {'lr': 0.006858416694080817, 'wd': 0.0006978997708825247, 'warmup': 50, 'gamma': 0.9845914682781626, 'time_dim': 16, 'patch_size': 32, 'depth': 7, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.18653135753931882, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 11491152
torch.Size([180])


[I 2024-10-29 02:21:10,666] Trial 491 finished with value: 0.7632417828853838 and parameters: {'lr': 0.008013923034896358, 'wd': 0.00011678109298062731, 'warmup': 200, 'gamma': 0.9833116205433738, 'time_dim': 16, 'patch_size': 8, 'depth': 4, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1597113288487334, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 491 finished with value: 0.7632417828853838 and parameters: {'lr': 0.008013923034896358, 'wd': 0.00011678109298062731, 'warmup': 200, 'gamma': 0.9833116205433738, 'time_dim': 16, 'patch_size': 8, 'depth': 4, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1597113288487334, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 24056192
torch.Size([180])


[I 2024-10-29 02:22:50,427] Trial 492 finished with value: 0.7223369521178028 and parameters: {'lr': 0.008778421526666474, 'wd': 0.0009601539929192634, 'warmup': 200, 'gamma': 0.9842289565874535, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 768, 'emb_dropout': 0.19000662688480585, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 492 finished with value: 0.7223369521178028 and parameters: {'lr': 0.008778421526666474, 'wd': 0.0009601539929192634, 'warmup': 200, 'gamma': 0.9842289565874535, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 768, 'emb_dropout': 0.19000662688480585, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 37330176
torch.Size([180])


[I 2024-10-29 02:25:24,306] Trial 493 finished with value: 0.7193424085571537 and parameters: {'lr': 0.007376261429133966, 'wd': 0.0014504468070029365, 'warmup': 50, 'gamma': 0.9811973855726511, 'time_dim': 32, 'patch_size': 32, 'depth': 11, 'heads': 9, 'mlp_dim': 256, 'emb_dropout': 0.009552677237811847, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 493 finished with value: 0.7193424085571537 and parameters: {'lr': 0.007376261429133966, 'wd': 0.0014504468070029365, 'warmup': 50, 'gamma': 0.9811973855726511, 'time_dim': 32, 'patch_size': 32, 'depth': 11, 'heads': 9, 'mlp_dim': 256, 'emb_dropout': 0.009552677237811847, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 18160992
torch.Size([180])


[I 2024-10-29 02:27:29,238] Trial 494 finished with value: 0.7666640698482698 and parameters: {'lr': 0.005591505517603386, 'wd': 0.0023780649773840166, 'warmup': 100, 'gamma': 0.9863952327038258, 'time_dim': 16, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.19960161083588127, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 494 finished with value: 0.7666640698482698 and parameters: {'lr': 0.005591505517603386, 'wd': 0.0023780649773840166, 'warmup': 100, 'gamma': 0.9863952327038258, 'time_dim': 16, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.19960161083588127, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-29 02:28:39,333] Trial 495 finished with value: 0.6824528710539965 and parameters: {'lr': 0.00635565719829304, 'wd': 0.000597033219944529, 'warmup': 150, 'gamma': 0.9827443167068831, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.04569322815638769, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 495 finished with value: 0.6824528710539965 and parameters: {'lr': 0.00635565719829304, 'wd': 0.000597033219944529, 'warmup': 150, 'gamma': 0.9827443167068831, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.04569322815638769, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-29 02:29:42,053] Trial 496 finished with value: 0.7833288548546987 and parameters: {'lr': 0.003955409872163971, 'wd': 0.0004449971795720918, 'warmup': 200, 'gamma': 0.9849483732635655, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.018299595579119008, 'schedule': 'linear'}. Best is trial 270 with value: 0.5098186230222284.


Trial 496 finished with value: 0.7833288548546987 and parameters: {'lr': 0.003955409872163971, 'wd': 0.0004449971795720918, 'warmup': 200, 'gamma': 0.9849483732635655, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.018299595579119008, 'schedule': 'linear'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-29 02:30:52,037] Trial 497 finished with value: 0.7163052949603325 and parameters: {'lr': 0.008715228072437792, 'wd': 0.0016181070805068051, 'warmup': 250, 'gamma': 0.9840092946968914, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1571523613812121, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 497 finished with value: 0.7163052949603325 and parameters: {'lr': 0.008715228072437792, 'wd': 0.0016181070805068051, 'warmup': 250, 'gamma': 0.9840092946968914, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1571523613812121, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14251600
torch.Size([180])


[I 2024-10-29 02:33:49,103] Trial 498 finished with value: 0.7903782642463183 and parameters: {'lr': 0.007756440888653614, 'wd': 3.0425682654963676e-05, 'warmup': 50, 'gamma': 0.9798771935878864, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1623739052138343, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 498 finished with value: 0.7903782642463183 and parameters: {'lr': 0.007756440888653614, 'wd': 3.0425682654963676e-05, 'warmup': 50, 'gamma': 0.9798771935878864, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1623739052138343, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16182144
torch.Size([180])


[I 2024-10-29 02:35:04,912] Trial 499 finished with value: 0.6562464696689208 and parameters: {'lr': 0.009998144637647243, 'wd': 0.0036937867946854755, 'warmup': 200, 'gamma': 0.9823082627523487, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 512, 'emb_dropout': 0.024471461070755506, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 499 finished with value: 0.6562464696689208 and parameters: {'lr': 0.009998144637647243, 'wd': 0.0036937867946854755, 'warmup': 200, 'gamma': 0.9823082627523487, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 512, 'emb_dropout': 0.024471461070755506, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 19559168
torch.Size([180])


[I 2024-10-29 02:36:33,198] Trial 500 finished with value: 0.6835216707722013 and parameters: {'lr': 0.0068529603758432185, 'wd': 5.5104263855214756e-05, 'warmup': 200, 'gamma': 0.9833410564347049, 'time_dim': 64, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.16705472806691168, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 500 finished with value: 0.6835216707722013 and parameters: {'lr': 0.0068529603758432185, 'wd': 5.5104263855214756e-05, 'warmup': 200, 'gamma': 0.9833410564347049, 'time_dim': 64, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.16705472806691168, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 9881472
torch.Size([180])


[I 2024-10-29 02:37:26,612] Trial 501 finished with value: 0.7461493163346377 and parameters: {'lr': 0.007825037748995144, 'wd': 9.826926653044968e-05, 'warmup': 50, 'gamma': 0.9853508341468545, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.15513868542062173, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 501 finished with value: 0.7461493163346377 and parameters: {'lr': 0.007825037748995144, 'wd': 9.826926653044968e-05, 'warmup': 50, 'gamma': 0.9853508341468545, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.15513868542062173, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13237856
torch.Size([180])


[I 2024-10-29 02:39:03,638] Trial 502 finished with value: 0.6993509376992736 and parameters: {'lr': 0.008951425647335224, 'wd': 0.0006666342986365411, 'warmup': 200, 'gamma': 0.981757313083107, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.03029798000974398, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 502 finished with value: 0.6993509376992736 and parameters: {'lr': 0.008951425647335224, 'wd': 0.0006666342986365411, 'warmup': 200, 'gamma': 0.981757313083107, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.03029798000974398, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16180864
torch.Size([180])


[I 2024-10-29 02:40:19,944] Trial 503 finished with value: 0.6586597966641023 and parameters: {'lr': 0.006509864030191642, 'wd': 0.000888361954562207, 'warmup': 250, 'gamma': 0.983842603455557, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.18267300177290358, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 503 finished with value: 0.6586597966641023 and parameters: {'lr': 0.006509864030191642, 'wd': 0.000888361954562207, 'warmup': 250, 'gamma': 0.983842603455557, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.18267300177290358, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14251600
torch.Size([180])


[I 2024-10-29 02:43:16,834] Trial 504 finished with value: 0.7689150855294541 and parameters: {'lr': 0.0053371405235659855, 'wd': 0.0003973802631689358, 'warmup': 300, 'gamma': 0.986020712410666, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.01402958844297419, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 504 finished with value: 0.7689150855294541 and parameters: {'lr': 0.0053371405235659855, 'wd': 0.0003973802631689358, 'warmup': 300, 'gamma': 0.986020712410666, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.01402958844297419, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-29 02:44:26,967] Trial 505 finished with value: 0.7379637714407439 and parameters: {'lr': 0.007968344751136452, 'wd': 0.0005271404929992618, 'warmup': 100, 'gamma': 0.9830230069020292, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.19272660718838686, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 505 finished with value: 0.7379637714407439 and parameters: {'lr': 0.007968344751136452, 'wd': 0.0005271404929992618, 'warmup': 100, 'gamma': 0.9830230069020292, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.19272660718838686, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 19334528
torch.Size([180])


[I 2024-10-29 02:45:54,991] Trial 506 finished with value: 0.672820667742242 and parameters: {'lr': 0.008764960216255325, 'wd': 0.0010675957168965996, 'warmup': 200, 'gamma': 0.9844578095258963, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.0005683806617979019, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 506 finished with value: 0.672820667742242 and parameters: {'lr': 0.008764960216255325, 'wd': 0.0010675957168965996, 'warmup': 200, 'gamma': 0.9844578095258963, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.0005683806617979019, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 18150784
torch.Size([180])


[I 2024-10-29 02:47:13,562] Trial 507 finished with value: 0.751446676423752 and parameters: {'lr': 0.009960397502718479, 'wd': 0.0007584954787117521, 'warmup': 50, 'gamma': 0.9837025510528764, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.1744982010227762, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 507 finished with value: 0.751446676423752 and parameters: {'lr': 0.009960397502718479, 'wd': 0.0007584954787117521, 'warmup': 50, 'gamma': 0.9837025510528764, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.1744982010227762, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 19735680
torch.Size([180])


[I 2024-10-29 02:48:44,949] Trial 508 finished with value: 0.6760027098189224 and parameters: {'lr': 0.006965089197658153, 'wd': 0.0006179947158063961, 'warmup': 200, 'gamma': 0.9824042478083779, 'time_dim': 16, 'patch_size': 32, 'depth': 7, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15987617060647566, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 508 finished with value: 0.6760027098189224 and parameters: {'lr': 0.006965089197658153, 'wd': 0.0006179947158063961, 'warmup': 200, 'gamma': 0.9824042478083779, 'time_dim': 16, 'patch_size': 32, 'depth': 7, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15987617060647566, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-29 02:49:54,906] Trial 509 finished with value: 0.7480468094623483 and parameters: {'lr': 0.006165826751282809, 'wd': 0.002925602513956107, 'warmup': 250, 'gamma': 0.9851076332641964, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16893208716321162, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 509 finished with value: 0.7480468094623483 and parameters: {'lr': 0.006165826751282809, 'wd': 0.002925602513956107, 'warmup': 250, 'gamma': 0.9851076332641964, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16893208716321162, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13819744
torch.Size([180])


[I 2024-10-29 02:51:32,276] Trial 510 finished with value: 0.7530594273367027 and parameters: {'lr': 0.007674615307170282, 'wd': 0.0003503667514827273, 'warmup': 50, 'gamma': 0.9830459253577025, 'time_dim': 16, 'patch_size': 16, 'depth': 4, 'heads': 9, 'mlp_dim': 256, 'emb_dropout': 0.020907903989631327, 'schedule': 'cosine'}. Best is trial 270 with value: 0.5098186230222284.


Trial 510 finished with value: 0.7530594273367027 and parameters: {'lr': 0.007674615307170282, 'wd': 0.0003503667514827273, 'warmup': 50, 'gamma': 0.9830459253577025, 'time_dim': 16, 'patch_size': 16, 'depth': 4, 'heads': 9, 'mlp_dim': 256, 'emb_dropout': 0.020907903989631327, 'schedule': 'cosine'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16217680
torch.Size([180])


[I 2024-10-29 02:54:47,640] Trial 511 finished with value: 0.6817051625540914 and parameters: {'lr': 0.008689746632932458, 'wd': 0.00047064548707343123, 'warmup': 200, 'gamma': 0.9845512576809314, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.19617765039206495, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 511 finished with value: 0.6817051625540914 and parameters: {'lr': 0.008689746632932458, 'wd': 0.00047064548707343123, 'warmup': 200, 'gamma': 0.9845512576809314, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.19617765039206495, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12248704
torch.Size([180])


[I 2024-10-29 02:55:50,655] Trial 512 finished with value: 0.7301341921900355 and parameters: {'lr': 0.007248764989310328, 'wd': 7.309899245187162e-05, 'warmup': 200, 'gamma': 0.9811658342436186, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.006607625868757726, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 512 finished with value: 0.7301341921900355 and parameters: {'lr': 0.007248764989310328, 'wd': 7.309899245187162e-05, 'warmup': 200, 'gamma': 0.9811658342436186, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.006607625868757726, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 15197824
torch.Size([180])


[I 2024-10-29 02:57:04,629] Trial 513 finished with value: 0.6669531755325765 and parameters: {'lr': 0.009989034627472264, 'wd': 0.0012787710789352803, 'warmup': 50, 'gamma': 0.9839706410358703, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.16426208900339823, 'schedule': 'constant_with_warmup'}. Best is trial 270 with value: 0.5098186230222284.


Trial 513 finished with value: 0.6669531755325765 and parameters: {'lr': 0.009989034627472264, 'wd': 0.0012787710789352803, 'warmup': 50, 'gamma': 0.9839706410358703, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.16426208900339823, 'schedule': 'constant_with_warmup'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16975232
torch.Size([180])


[I 2024-10-29 02:58:25,433] Trial 514 finished with value: 0.7749421427412218 and parameters: {'lr': 0.0047283042444277845, 'wd': 0.001758036001100865, 'warmup': 200, 'gamma': 0.985540315987501, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1486320079632133, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 514 finished with value: 0.7749421427412218 and parameters: {'lr': 0.0047283042444277845, 'wd': 0.001758036001100865, 'warmup': 200, 'gamma': 0.985540315987501, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1486320079632133, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14279424
torch.Size([180])


[I 2024-10-29 02:59:35,476] Trial 515 finished with value: 0.7562360694114122 and parameters: {'lr': 0.005873436983165624, 'wd': 0.0005722923953888934, 'warmup': 100, 'gamma': 0.9820782197134569, 'time_dim': 32, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15518061692337495, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 515 finished with value: 0.7562360694114122 and parameters: {'lr': 0.005873436983165624, 'wd': 0.0005722923953888934, 'warmup': 100, 'gamma': 0.9820782197134569, 'time_dim': 32, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15518061692337495, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16183424
torch.Size([180])


[I 2024-10-29 03:00:47,664] Trial 516 finished with value: 0.7880676902559896 and parameters: {'lr': 0.007978874093806966, 'wd': 0.0021640516549045197, 'warmup': 200, 'gamma': 0.9834728724233063, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.02581329668469111, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 516 finished with value: 0.7880676902559896 and parameters: {'lr': 0.007978874093806966, 'wd': 0.0021640516549045197, 'warmup': 200, 'gamma': 0.9834728724233063, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.02581329668469111, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 17012048
torch.Size([180])


[I 2024-10-29 03:04:14,899] Trial 517 finished with value: 0.7418058461310651 and parameters: {'lr': 0.006809959418228635, 'wd': 0.00038318038970391687, 'warmup': 250, 'gamma': 0.9827021126927615, 'time_dim': 16, 'patch_size': 8, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.016496465691462722, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 517 finished with value: 0.7418058461310651 and parameters: {'lr': 0.006809959418228635, 'wd': 0.00038318038970391687, 'warmup': 250, 'gamma': 0.9827021126927615, 'time_dim': 16, 'patch_size': 8, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.016496465691462722, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 10667904
torch.Size([180])


[I 2024-10-29 03:05:12,157] Trial 518 finished with value: 0.7633738021864513 and parameters: {'lr': 0.008745892292882643, 'wd': 0.0014392445311775593, 'warmup': 50, 'gamma': 0.9847690223241863, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.035553545730369805, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 518 finished with value: 0.7633738021864513 and parameters: {'lr': 0.008745892292882643, 'wd': 0.0014392445311775593, 'warmup': 50, 'gamma': 0.9847690223241863, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.035553545730369805, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16411616
torch.Size([180])


[I 2024-10-29 03:07:03,696] Trial 519 finished with value: 0.679229588230606 and parameters: {'lr': 0.0075393329080047965, 'wd': 0.0003116205632716246, 'warmup': 200, 'gamma': 0.9841971508971092, 'time_dim': 64, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.0108265494745552, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 519 finished with value: 0.679229588230606 and parameters: {'lr': 0.0075393329080047965, 'wd': 0.0003116205632716246, 'warmup': 200, 'gamma': 0.9841971508971092, 'time_dim': 64, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.0108265494745552, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 19129984
torch.Size([180])


[I 2024-10-29 03:08:29,709] Trial 520 finished with value: 0.6643010399955284 and parameters: {'lr': 0.008908147514014962, 'wd': 0.0006719182917863948, 'warmup': 50, 'gamma': 0.9834219611093352, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 11, 'mlp_dim': 256, 'emb_dropout': 0.1593096989605012, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 520 finished with value: 0.6643010399955284 and parameters: {'lr': 0.008908147514014962, 'wd': 0.0006719182917863948, 'warmup': 50, 'gamma': 0.9834219611093352, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 11, 'mlp_dim': 256, 'emb_dropout': 0.1593096989605012, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-29 03:09:39,826] Trial 521 finished with value: 0.7865601538014213 and parameters: {'lr': 0.0056121623107639835, 'wd': 0.0005002746032620424, 'warmup': 250, 'gamma': 0.9861012431189518, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.187465586785797, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 521 finished with value: 0.7865601538014213 and parameters: {'lr': 0.0056121623107639835, 'wd': 0.0005002746032620424, 'warmup': 250, 'gamma': 0.9861012431189518, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.187465586785797, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13231744
torch.Size([180])


[I 2024-10-29 03:10:47,237] Trial 522 finished with value: 0.7303406190170104 and parameters: {'lr': 0.006553092624048709, 'wd': 0.0008175860557422055, 'warmup': 200, 'gamma': 0.9817057719937314, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.1640281722809002, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 522 finished with value: 0.7303406190170104 and parameters: {'lr': 0.006553092624048709, 'wd': 0.0008175860557422055, 'warmup': 200, 'gamma': 0.9817057719937314, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.1640281722809002, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 18154880
torch.Size([180])


[I 2024-10-29 03:12:12,776] Trial 523 finished with value: 0.6738606231202083 and parameters: {'lr': 0.009958789819942507, 'wd': 8.689193519124532e-05, 'warmup': 150, 'gamma': 0.9803188540646135, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.1707930683979267, 'schedule': 'linear'}. Best is trial 270 with value: 0.5098186230222284.


Trial 523 finished with value: 0.6738606231202083 and parameters: {'lr': 0.009958789819942507, 'wd': 8.689193519124532e-05, 'warmup': 150, 'gamma': 0.9803188540646135, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.1707930683979267, 'schedule': 'linear'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14251600
torch.Size([180])


[I 2024-10-29 03:15:09,232] Trial 524 finished with value: 0.6389017630024 and parameters: {'lr': 0.007871509527710043, 'wd': 0.0004517601206314605, 'warmup': 200, 'gamma': 0.9851234635157942, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.031202316388718763, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 524 finished with value: 0.6389017630024 and parameters: {'lr': 0.007871509527710043, 'wd': 0.0004517601206314605, 'warmup': 200, 'gamma': 0.9851234635157942, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.031202316388718763, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 11492176
torch.Size([180])


[I 2024-10-29 03:17:31,423] Trial 525 finished with value: 0.7439946263291147 and parameters: {'lr': 5.764243433377907e-05, 'wd': 0.00043102231653489376, 'warmup': 200, 'gamma': 0.9830684164296901, 'time_dim': 16, 'patch_size': 8, 'depth': 4, 'heads': 4, 'mlp_dim': 512, 'emb_dropout': 0.02011116457013559, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 525 finished with value: 0.7439946263291147 and parameters: {'lr': 5.764243433377907e-05, 'wd': 0.00043102231653489376, 'warmup': 200, 'gamma': 0.9830684164296901, 'time_dim': 16, 'patch_size': 8, 'depth': 4, 'heads': 4, 'mlp_dim': 512, 'emb_dropout': 0.02011116457013559, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14251600
torch.Size([180])


[I 2024-10-29 03:20:28,188] Trial 526 finished with value: 0.7478676850825975 and parameters: {'lr': 0.007156359437764619, 'wd': 0.00044511423099740326, 'warmup': 200, 'gamma': 0.984180615335827, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.005814691966212074, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 526 finished with value: 0.7478676850825975 and parameters: {'lr': 0.007156359437764619, 'wd': 0.00044511423099740326, 'warmup': 200, 'gamma': 0.984180615335827, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.005814691966212074, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 24089936
torch.Size([180])


[I 2024-10-29 03:25:09,940] Trial 527 finished with value: 0.7399239472250373 and parameters: {'lr': 0.006186405284123863, 'wd': 0.0005121215575223895, 'warmup': 200, 'gamma': 0.9866589025741355, 'time_dim': 16, 'patch_size': 8, 'depth': 6, 'heads': 12, 'mlp_dim': 256, 'emb_dropout': 0.01436938598653708, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 527 finished with value: 0.7399239472250373 and parameters: {'lr': 0.006186405284123863, 'wd': 0.0005121215575223895, 'warmup': 200, 'gamma': 0.9866589025741355, 'time_dim': 16, 'patch_size': 8, 'depth': 6, 'heads': 12, 'mlp_dim': 256, 'emb_dropout': 0.01436938598653708, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14251600
torch.Size([180])


[I 2024-10-29 03:28:05,994] Trial 528 finished with value: 0.7956105658177298 and parameters: {'lr': 0.008048488543459574, 'wd': 0.0005634073228222718, 'warmup': 200, 'gamma': 0.9825436799591017, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.02417329420872476, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 528 finished with value: 0.7956105658177298 and parameters: {'lr': 0.008048488543459574, 'wd': 0.0005634073228222718, 'warmup': 200, 'gamma': 0.9825436799591017, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.02417329420872476, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 18187600
torch.Size([180])


[I 2024-10-29 03:31:23,024] Trial 529 finished with value: 0.7712814513941989 and parameters: {'lr': 0.005356601785092745, 'wd': 0.00035128406298163444, 'warmup': 200, 'gamma': 0.9851258930897363, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.19976848347184303, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 529 finished with value: 0.7712814513941989 and parameters: {'lr': 0.005356601785092745, 'wd': 0.00035128406298163444, 'warmup': 200, 'gamma': 0.9851258930897363, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.19976848347184303, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16217680
torch.Size([180])


[I 2024-10-29 03:34:37,789] Trial 530 finished with value: 0.7221950240665732 and parameters: {'lr': 0.000280841499953915, 'wd': 0.0003956946024067981, 'warmup': 200, 'gamma': 0.9837435447627597, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.06571450204227741, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 530 finished with value: 0.7221950240665732 and parameters: {'lr': 0.000280841499953915, 'wd': 0.0003956946024067981, 'warmup': 200, 'gamma': 0.9837435447627597, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.06571450204227741, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 17012048
torch.Size([180])


[I 2024-10-29 03:38:04,794] Trial 531 finished with value: 0.6457803156326783 and parameters: {'lr': 0.008823835819254925, 'wd': 0.00047240981746503346, 'warmup': 200, 'gamma': 0.9846836125788884, 'time_dim': 16, 'patch_size': 8, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.19182614034722548, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 531 finished with value: 0.6457803156326783 and parameters: {'lr': 0.008823835819254925, 'wd': 0.00047240981746503346, 'warmup': 200, 'gamma': 0.9846836125788884, 'time_dim': 16, 'patch_size': 8, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.19182614034722548, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16217680
torch.Size([180])


[I 2024-10-29 03:41:19,540] Trial 532 finished with value: 0.7520761084200199 and parameters: {'lr': 0.006909087790883373, 'wd': 0.0006603326821861076, 'warmup': 200, 'gamma': 0.9819001843726288, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.17685711252353783, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 532 finished with value: 0.7520761084200199 and parameters: {'lr': 0.006909087790883373, 'wd': 0.0006603326821861076, 'warmup': 200, 'gamma': 0.9819001843726288, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.17685711252353783, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 28053840
torch.Size([180])


[I 2024-10-29 03:46:49,856] Trial 533 finished with value: 0.7602594990278275 and parameters: {'lr': 0.00818078187959995, 'wd': 2.67371009402971e-05, 'warmup': 200, 'gamma': 0.9830210894929651, 'time_dim': 16, 'patch_size': 8, 'depth': 10, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15808572941385826, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 533 finished with value: 0.7602594990278275 and parameters: {'lr': 0.00818078187959995, 'wd': 2.67371009402971e-05, 'warmup': 200, 'gamma': 0.9830210894929651, 'time_dim': 16, 'patch_size': 8, 'depth': 10, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15808572941385826, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 9918288
torch.Size([180])


[I 2024-10-29 03:49:00,025] Trial 534 finished with value: 0.7740355049096025 and parameters: {'lr': 0.00758853251244669, 'wd': 0.0003070845740453818, 'warmup': 100, 'gamma': 0.9856424737848605, 'time_dim': 16, 'patch_size': 8, 'depth': 4, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.152528567837333, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 534 finished with value: 0.7740355049096025 and parameters: {'lr': 0.00758853251244669, 'wd': 0.0003070845740453818, 'warmup': 100, 'gamma': 0.9856424737848605, 'time_dim': 16, 'patch_size': 8, 'depth': 4, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.152528567837333, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14220896
torch.Size([180])


[I 2024-10-29 03:50:41,690] Trial 535 finished with value: 0.7585529113612929 and parameters: {'lr': 0.006224305913338732, 'wd': 0.0005887391884973127, 'warmup': 50, 'gamma': 0.982384746591441, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.03014720832438679, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 535 finished with value: 0.7585529113612929 and parameters: {'lr': 0.006224305913338732, 'wd': 0.0005887391884973127, 'warmup': 50, 'gamma': 0.982384746591441, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.03014720832438679, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16217680
torch.Size([180])


[I 2024-10-29 03:53:56,154] Trial 536 finished with value: 0.7533749272093517 and parameters: {'lr': 0.008864376995653995, 'wd': 0.0007612674167008805, 'warmup': 200, 'gamma': 0.983683184824994, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.16200411173652587, 'schedule': 'cosine'}. Best is trial 270 with value: 0.5098186230222284.


Trial 536 finished with value: 0.7533749272093517 and parameters: {'lr': 0.008864376995653995, 'wd': 0.0007612674167008805, 'warmup': 200, 'gamma': 0.983683184824994, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.16200411173652587, 'schedule': 'cosine'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16990656
torch.Size([180])


[I 2024-10-29 03:55:00,698] Trial 537 finished with value: 0.7360674695187884 and parameters: {'lr': 0.007099940224995286, 'wd': 1.013476319114289e-05, 'warmup': 200, 'gamma': 0.9809469174348069, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.011452028004617313, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 537 finished with value: 0.7360674695187884 and parameters: {'lr': 0.007099940224995286, 'wd': 1.013476319114289e-05, 'warmup': 200, 'gamma': 0.9809469174348069, 'time_dim': 16, 'patch_size': 64, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.011452028004617313, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 15203936
torch.Size([180])


[I 2024-10-29 03:56:47,779] Trial 538 finished with value: 0.7424645175192222 and parameters: {'lr': 0.008920258268425827, 'wd': 0.0009651345379929384, 'warmup': 50, 'gamma': 0.9841946700959131, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.018494194836945886, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 538 finished with value: 0.7424645175192222 and parameters: {'lr': 0.008920258268425827, 'wd': 0.0009651345379929384, 'warmup': 50, 'gamma': 0.9841946700959131, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.018494194836945886, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14316240
torch.Size([180])


[I 2024-10-29 03:59:44,145] Trial 539 finished with value: 0.7499958251997486 and parameters: {'lr': 0.0018900954276443954, 'wd': 0.0004314134362342371, 'warmup': 300, 'gamma': 0.9846959806750154, 'time_dim': 32, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.14065327348782306, 'schedule': 'constant_with_warmup'}. Best is trial 270 with value: 0.5098186230222284.


Trial 539 finished with value: 0.7499958251997486 and parameters: {'lr': 0.0018900954276443954, 'wd': 0.0004314134362342371, 'warmup': 300, 'gamma': 0.9846959806750154, 'time_dim': 32, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.14065327348782306, 'schedule': 'constant_with_warmup'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16983168
torch.Size([180])


[I 2024-10-29 04:01:05,541] Trial 540 finished with value: 0.7315657358330838 and parameters: {'lr': 0.00776172363999796, 'wd': 0.0005276407659896002, 'warmup': 200, 'gamma': 0.9833627992039501, 'time_dim': 16, 'patch_size': 32, 'depth': 7, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.16589034631091484, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 540 finished with value: 0.7315657358330838 and parameters: {'lr': 0.00776172363999796, 'wd': 0.0005276407659896002, 'warmup': 200, 'gamma': 0.9833627992039501, 'time_dim': 16, 'patch_size': 32, 'depth': 7, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.16589034631091484, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 18149504
torch.Size([180])


[I 2024-10-29 04:02:24,828] Trial 541 finished with value: 0.7594505224733373 and parameters: {'lr': 0.0046112582105685695, 'wd': 0.0002800823820124405, 'warmup': 50, 'gamma': 0.9827076226586625, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 768, 'emb_dropout': 0.19475315620138556, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 541 finished with value: 0.7594505224733373 and parameters: {'lr': 0.0046112582105685695, 'wd': 0.0002800823820124405, 'warmup': 50, 'gamma': 0.9827076226586625, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 768, 'emb_dropout': 0.19475315620138556, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13251840
torch.Size([180])


[I 2024-10-29 04:03:29,194] Trial 542 finished with value: 0.7651190238904237 and parameters: {'lr': 0.005289037319641849, 'wd': 0.0006709444462003536, 'warmup': 200, 'gamma': 0.9851403479994116, 'time_dim': 64, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.1566120936193365, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 542 finished with value: 0.7651190238904237 and parameters: {'lr': 0.005289037319641849, 'wd': 0.0006709444462003536, 'warmup': 200, 'gamma': 0.9851403479994116, 'time_dim': 64, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.1566120936193365, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-29 04:04:39,424] Trial 543 finished with value: 0.7805505284523391 and parameters: {'lr': 2.5560896325526546e-05, 'wd': 0.0003813836614293243, 'warmup': 100, 'gamma': 0.983990726837916, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.026523821873695303, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 543 finished with value: 0.7805505284523391 and parameters: {'lr': 2.5560896325526546e-05, 'wd': 0.0003813836614293243, 'warmup': 100, 'gamma': 0.983990726837916, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.026523821873695303, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14652752
torch.Size([180])


[I 2024-10-29 04:07:45,557] Trial 544 finished with value: 0.7487800091969484 and parameters: {'lr': 0.009967126415928007, 'wd': 3.6185919501238374e-05, 'warmup': 200, 'gamma': 0.9814673303066507, 'time_dim': 16, 'patch_size': 8, 'depth': 6, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.16958737578515154, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 544 finished with value: 0.7487800091969484 and parameters: {'lr': 0.009967126415928007, 'wd': 3.6185919501238374e-05, 'warmup': 200, 'gamma': 0.9814673303066507, 'time_dim': 16, 'patch_size': 8, 'depth': 6, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.16958737578515154, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13231744
torch.Size([180])


[I 2024-10-29 04:08:52,823] Trial 545 finished with value: 0.7848668293506694 and parameters: {'lr': 0.009992494602746236, 'wd': 0.0008617704264402811, 'warmup': 50, 'gamma': 0.9856989557956441, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.004755996700040014, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 545 finished with value: 0.7848668293506694 and parameters: {'lr': 0.009992494602746236, 'wd': 0.0008617704264402811, 'warmup': 50, 'gamma': 0.9856989557956441, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.004755996700040014, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-29 04:10:02,706] Trial 546 finished with value: 0.7311699273509149 and parameters: {'lr': 0.006510412381515705, 'wd': 0.000592021835625669, 'warmup': 200, 'gamma': 0.9821784802417208, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.17990066545580308, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 546 finished with value: 0.7311699273509149 and parameters: {'lr': 0.006510412381515705, 'wd': 0.000592021835625669, 'warmup': 200, 'gamma': 0.9821784802417208, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.17990066545580308, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 15203936
torch.Size([180])


[I 2024-10-29 04:11:49,978] Trial 547 finished with value: 0.7540317775678068 and parameters: {'lr': 0.007922040913680062, 'wd': 0.00044797228542728265, 'warmup': 200, 'gamma': 0.9844748792797474, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.1497865973883952, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 547 finished with value: 0.7540317775678068 and parameters: {'lr': 0.007922040913680062, 'wd': 0.00044797228542728265, 'warmup': 200, 'gamma': 0.9844748792797474, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.1497865973883952, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 21695360
torch.Size([180])


[I 2024-10-29 04:13:25,314] Trial 548 finished with value: 0.711071397745291 and parameters: {'lr': 0.006149451672915719, 'wd': 0.0005004056727201284, 'warmup': 50, 'gamma': 0.9833412840288814, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 512, 'emb_dropout': 0.020732106727693992, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 548 finished with value: 0.711071397745291 and parameters: {'lr': 0.006149451672915719, 'wd': 0.0005004056727201284, 'warmup': 50, 'gamma': 0.9833412840288814, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 8, 'mlp_dim': 512, 'emb_dropout': 0.020732106727693992, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 11469760
torch.Size([180])


[I 2024-10-29 04:14:13,664] Trial 549 finished with value: 0.7455567679991996 and parameters: {'lr': 0.007220508981164844, 'wd': 0.0010992864066708748, 'warmup': 200, 'gamma': 0.9828629826708141, 'time_dim': 16, 'patch_size': 64, 'depth': 4, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.18484971542234996, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 549 finished with value: 0.7455567679991996 and parameters: {'lr': 0.007220508981164844, 'wd': 0.0010992864066708748, 'warmup': 200, 'gamma': 0.9828629826708141, 'time_dim': 16, 'patch_size': 64, 'depth': 4, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.18484971542234996, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 19350400
torch.Size([180])


[I 2024-10-29 04:15:44,037] Trial 550 finished with value: 0.6869797762132528 and parameters: {'lr': 0.008245829090643971, 'wd': 0.0007220529192423389, 'warmup': 50, 'gamma': 0.9839181707200504, 'time_dim': 16, 'patch_size': 32, 'depth': 8, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.14522436602571792, 'schedule': 'linear'}. Best is trial 270 with value: 0.5098186230222284.


Trial 550 finished with value: 0.6869797762132528 and parameters: {'lr': 0.008245829090643971, 'wd': 0.0007220529192423389, 'warmup': 50, 'gamma': 0.9839181707200504, 'time_dim': 16, 'patch_size': 32, 'depth': 8, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.14522436602571792, 'schedule': 'linear'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14251600
torch.Size([180])


[I 2024-10-29 04:18:40,791] Trial 551 finished with value: 0.5963424080766782 and parameters: {'lr': 0.008963094098345817, 'wd': 0.0005785868652910465, 'warmup': 100, 'gamma': 0.9860905159824258, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16138461321302558, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 551 finished with value: 0.5963424080766782 and parameters: {'lr': 0.008963094098345817, 'wd': 0.0005785868652910465, 'warmup': 100, 'gamma': 0.9860905159824258, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16138461321302558, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13268560
torch.Size([180])


[I 2024-10-29 04:21:30,204] Trial 552 finished with value: 0.7547516163386991 and parameters: {'lr': 0.00849453553740842, 'wd': 0.0005332416040220398, 'warmup': 100, 'gamma': 0.9868611507382756, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.16004176130655068, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 552 finished with value: 0.7547516163386991 and parameters: {'lr': 0.00849453553740842, 'wd': 0.0005332416040220398, 'warmup': 100, 'gamma': 0.9868611507382756, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.16004176130655068, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16217680
torch.Size([180])


[I 2024-10-29 04:24:45,522] Trial 553 finished with value: 0.7596964362930739 and parameters: {'lr': 0.005720080911841564, 'wd': 0.0006029536019323241, 'warmup': 100, 'gamma': 0.986428631172111, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.16177558784161558, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 553 finished with value: 0.7596964362930739 and parameters: {'lr': 0.005720080911841564, 'wd': 0.0006029536019323241, 'warmup': 100, 'gamma': 0.986428631172111, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.16177558784161558, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 20153680
torch.Size([180])


[I 2024-10-29 04:28:20,523] Trial 554 finished with value: 0.756962038261024 and parameters: {'lr': 0.0069648587944075874, 'wd': 0.0006490978795070037, 'warmup': 100, 'gamma': 0.9873099286301733, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 1024, 'emb_dropout': 0.05335543950891413, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 554 finished with value: 0.756962038261024 and parameters: {'lr': 0.0069648587944075874, 'wd': 0.0006490978795070037, 'warmup': 100, 'gamma': 0.9873099286301733, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 1024, 'emb_dropout': 0.05335543950891413, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 17200720
torch.Size([180])


[I 2024-10-29 04:31:47,465] Trial 555 finished with value: 0.7697240986741731 and parameters: {'lr': 0.00916631373938109, 'wd': 0.0004791567764343815, 'warmup': 100, 'gamma': 0.9858666858828641, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 9, 'mlp_dim': 256, 'emb_dropout': 0.16666710903839838, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 555 finished with value: 0.7697240986741731 and parameters: {'lr': 0.00916631373938109, 'wd': 0.0004791567764343815, 'warmup': 100, 'gamma': 0.9858666858828641, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 9, 'mlp_dim': 256, 'emb_dropout': 0.16666710903839838, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14652752
torch.Size([180])


[I 2024-10-29 04:34:55,273] Trial 556 finished with value: 0.7550450428889637 and parameters: {'lr': 0.007496526293093679, 'wd': 0.000717326137445214, 'warmup': 100, 'gamma': 0.9866322559311043, 'time_dim': 16, 'patch_size': 8, 'depth': 6, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.15435761091766534, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 556 finished with value: 0.7550450428889637 and parameters: {'lr': 0.007496526293093679, 'wd': 0.000717326137445214, 'warmup': 100, 'gamma': 0.9866322559311043, 'time_dim': 16, 'patch_size': 8, 'depth': 6, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.15435761091766534, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16217680
torch.Size([180])


[I 2024-10-29 04:38:09,478] Trial 557 finished with value: 0.7302527547910305 and parameters: {'lr': 0.009907744347296623, 'wd': 6.336988798427965e-05, 'warmup': 100, 'gamma': 0.9862756238367384, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.16421534035621405, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 557 finished with value: 0.7302527547910305 and parameters: {'lr': 0.009907744347296623, 'wd': 6.336988798427965e-05, 'warmup': 100, 'gamma': 0.9862756238367384, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.16421534035621405, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 11491152
torch.Size([180])


[I 2024-10-29 04:40:34,229] Trial 558 finished with value: 0.7472159097876524 and parameters: {'lr': 0.006606165836115728, 'wd': 0.0003541131988475059, 'warmup': 100, 'gamma': 0.9854696342497327, 'time_dim': 16, 'patch_size': 8, 'depth': 4, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15756307756260785, 'schedule': 'cosine'}. Best is trial 270 with value: 0.5098186230222284.


Trial 558 finished with value: 0.7472159097876524 and parameters: {'lr': 0.006606165836115728, 'wd': 0.0003541131988475059, 'warmup': 100, 'gamma': 0.9854696342497327, 'time_dim': 16, 'patch_size': 8, 'depth': 4, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15756307756260785, 'schedule': 'cosine'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14251600
torch.Size([180])


[I 2024-10-29 04:43:30,755] Trial 559 finished with value: 0.734151918855732 and parameters: {'lr': 0.008720217910862268, 'wd': 0.0005406213133458186, 'warmup': 100, 'gamma': 0.9858378831052402, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16882039243134137, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 559 finished with value: 0.734151918855732 and parameters: {'lr': 0.008720217910862268, 'wd': 0.0005406213133458186, 'warmup': 100, 'gamma': 0.9858378831052402, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16882039243134137, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 15897040
torch.Size([180])


[I 2024-10-29 04:46:49,589] Trial 560 finished with value: 0.7981004405470328 and parameters: {'lr': 0.005129006377910752, 'wd': 0.00041131894632984113, 'warmup': 100, 'gamma': 0.9851144810511148, 'time_dim': 32, 'patch_size': 8, 'depth': 6, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.16209606341428237, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 560 finished with value: 0.7981004405470328 and parameters: {'lr': 0.005129006377910752, 'wd': 0.00041131894632984113, 'warmup': 100, 'gamma': 0.9851144810511148, 'time_dim': 32, 'patch_size': 8, 'depth': 6, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.16209606341428237, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14251600
torch.Size([180])


[I 2024-10-29 04:49:45,931] Trial 561 finished with value: 0.7969121907150016 and parameters: {'lr': 0.007949466893405225, 'wd': 0.0005954472253566453, 'warmup': 200, 'gamma': 0.9736686292516217, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15312555744494197, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 561 finished with value: 0.7969121907150016 and parameters: {'lr': 0.007949466893405225, 'wd': 0.0005954472253566453, 'warmup': 200, 'gamma': 0.9736686292516217, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15312555744494197, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16217680
torch.Size([180])


[I 2024-10-29 04:53:00,596] Trial 562 finished with value: 0.7263406237794243 and parameters: {'lr': 0.006039093564837837, 'wd': 0.0009174546970096816, 'warmup': 200, 'gamma': 0.984876457384424, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.009300714211123436, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 562 finished with value: 0.7263406237794243 and parameters: {'lr': 0.006039093564837837, 'wd': 0.0009174546970096816, 'warmup': 200, 'gamma': 0.984876457384424, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.009300714211123436, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 18153056
torch.Size([180])


[I 2024-10-29 04:55:01,069] Trial 563 finished with value: 0.7672790489210741 and parameters: {'lr': 0.007123134654822029, 'wd': 0.0007861986753663524, 'warmup': 50, 'gamma': 0.9861195660205012, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 256, 'emb_dropout': 0.17254368626266225, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 563 finished with value: 0.7672790489210741 and parameters: {'lr': 0.007123134654822029, 'wd': 0.0007861986753663524, 'warmup': 50, 'gamma': 0.9861195660205012, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 256, 'emb_dropout': 0.17254368626266225, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 17236688
torch.Size([180])


[I 2024-10-29 04:58:28,789] Trial 564 finished with value: 0.7122708891440291 and parameters: {'lr': 0.008837809205232451, 'wd': 0.0004895445821121669, 'warmup': 200, 'gamma': 0.9854947853582499, 'time_dim': 64, 'patch_size': 8, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15906494019436695, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 564 finished with value: 0.7122708891440291 and parameters: {'lr': 0.008837809205232451, 'wd': 0.0004895445821121669, 'warmup': 200, 'gamma': 0.9854947853582499, 'time_dim': 64, 'patch_size': 8, 'depth': 6, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15906494019436695, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12264128
torch.Size([180])


[I 2024-10-29 04:59:20,578] Trial 565 finished with value: 0.7961697949234554 and parameters: {'lr': 0.0042029265177564315, 'wd': 0.0003221279447381033, 'warmup': 100, 'gamma': 0.9848371707339855, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.016670075264450752, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 565 finished with value: 0.7961697949234554 and parameters: {'lr': 0.0042029265177564315, 'wd': 0.0003221279447381033, 'warmup': 100, 'gamma': 0.9848371707339855, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.016670075264450752, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14608224
torch.Size([180])


[I 2024-10-29 05:00:55,535] Trial 566 finished with value: 0.7567323857303733 and parameters: {'lr': 0.009985790194094334, 'wd': 0.0006599487716909556, 'warmup': 150, 'gamma': 0.9871516023866264, 'time_dim': 16, 'patch_size': 16, 'depth': 4, 'heads': 6, 'mlp_dim': 768, 'emb_dropout': 0.023282677647887988, 'schedule': 'constant_with_warmup'}. Best is trial 270 with value: 0.5098186230222284.


Trial 566 finished with value: 0.7567323857303733 and parameters: {'lr': 0.009985790194094334, 'wd': 0.0006599487716909556, 'warmup': 150, 'gamma': 0.9871516023866264, 'time_dim': 16, 'patch_size': 16, 'depth': 4, 'heads': 6, 'mlp_dim': 768, 'emb_dropout': 0.023282677647887988, 'schedule': 'constant_with_warmup'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14251600
torch.Size([180])


[I 2024-10-29 05:03:51,950] Trial 567 finished with value: 0.7730699134387543 and parameters: {'lr': 0.0075017111599859155, 'wd': 5.213296469065868e-05, 'warmup': 200, 'gamma': 0.9844144996959846, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16469332210740745, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 567 finished with value: 0.7730699134387543 and parameters: {'lr': 0.0075017111599859155, 'wd': 5.213296469065868e-05, 'warmup': 200, 'gamma': 0.9844144996959846, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16469332210740745, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 15197824
torch.Size([180])


[I 2024-10-29 05:05:06,205] Trial 568 finished with value: 0.726360623235861 and parameters: {'lr': 0.00657113700130741, 'wd': 0.00839982036301237, 'warmup': 50, 'gamma': 0.9815068133850676, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.15599612996869672, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 568 finished with value: 0.726360623235861 and parameters: {'lr': 0.00657113700130741, 'wd': 0.00839982036301237, 'warmup': 50, 'gamma': 0.9815068133850676, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.15599612996869672, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14615936
torch.Size([180])


[I 2024-10-29 05:06:18,539] Trial 569 finished with value: 0.7558980100488983 and parameters: {'lr': 0.008090630720134651, 'wd': 0.0004428494106347878, 'warmup': 200, 'gamma': 0.9807305038161934, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.027706130208769723, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 569 finished with value: 0.7558980100488983 and parameters: {'lr': 0.008090630720134651, 'wd': 0.0004428494106347878, 'warmup': 200, 'gamma': 0.9807305038161934, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.027706130208769723, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 19735680
torch.Size([180])


[I 2024-10-29 05:07:50,139] Trial 570 finished with value: 0.773870810038409 and parameters: {'lr': 0.008897232595793758, 'wd': 0.000550386794962657, 'warmup': 50, 'gamma': 0.9823279614512572, 'time_dim': 16, 'patch_size': 32, 'depth': 7, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.013956005262425547, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 570 finished with value: 0.773870810038409 and parameters: {'lr': 0.008897232595793758, 'wd': 0.000550386794962657, 'warmup': 50, 'gamma': 0.9823279614512572, 'time_dim': 16, 'patch_size': 32, 'depth': 7, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.013956005262425547, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13268560
torch.Size([180])


[I 2024-10-29 05:10:39,138] Trial 571 finished with value: 0.7597737625690734 and parameters: {'lr': 0.005532788045901029, 'wd': 0.00025721348088311453, 'warmup': 200, 'gamma': 0.9854657338647979, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.17264832600449637, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 571 finished with value: 0.7597737625690734 and parameters: {'lr': 0.005532788045901029, 'wd': 0.00025721348088311453, 'warmup': 200, 'gamma': 0.9854657338647979, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.17264832600449637, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 17163904
torch.Size([180])


[I 2024-10-29 05:11:59,068] Trial 572 finished with value: 0.7707016471293817 and parameters: {'lr': 0.007227525412478878, 'wd': 0.0007376515944676957, 'warmup': 300, 'gamma': 0.9843454414378748, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 9, 'mlp_dim': 256, 'emb_dropout': 0.14836423721493663, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 572 finished with value: 0.7707016471293817 and parameters: {'lr': 0.007227525412478878, 'wd': 0.0007376515944676957, 'warmup': 300, 'gamma': 0.9843454414378748, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 9, 'mlp_dim': 256, 'emb_dropout': 0.14836423721493663, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13027200
torch.Size([180])


[I 2024-10-29 05:13:03,158] Trial 573 finished with value: 0.670197128099354 and parameters: {'lr': 0.0088519118012271, 'wd': 0.00037894059187048174, 'warmup': 200, 'gamma': 0.9836918391541062, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.003951547178073407, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 573 finished with value: 0.670197128099354 and parameters: {'lr': 0.0088519118012271, 'wd': 0.00037894059187048174, 'warmup': 200, 'gamma': 0.9836918391541062, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.003951547178073407, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16182144
torch.Size([180])


[I 2024-10-29 05:14:18,788] Trial 574 finished with value: 0.7780590343056467 and parameters: {'lr': 0.006396801207477763, 'wd': 0.0005921760608902418, 'warmup': 100, 'gamma': 0.9829993620932586, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 512, 'emb_dropout': 0.16120735005409592, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 574 finished with value: 0.7780590343056467 and parameters: {'lr': 0.006396801207477763, 'wd': 0.0005921760608902418, 'warmup': 100, 'gamma': 0.9829993620932586, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 512, 'emb_dropout': 0.16120735005409592, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12254816
torch.Size([180])


[I 2024-10-29 05:15:50,563] Trial 575 finished with value: 0.7379168776052881 and parameters: {'lr': 0.008002614335360701, 'wd': 0.00014676996281557305, 'warmup': 50, 'gamma': 0.9819620632980022, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.15249729158911352, 'schedule': 'linear'}. Best is trial 270 with value: 0.5098186230222284.


Trial 575 finished with value: 0.7379168776052881 and parameters: {'lr': 0.008002614335360701, 'wd': 0.00014676996281557305, 'warmup': 50, 'gamma': 0.9819620632980022, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.15249729158911352, 'schedule': 'linear'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16196288
torch.Size([180])


[I 2024-10-29 05:16:52,303] Trial 576 finished with value: 0.6771959099983251 and parameters: {'lr': 0.00997206304051287, 'wd': 0.0004636588800501839, 'warmup': 200, 'gamma': 0.9851063811045198, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.16770596525374534, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 576 finished with value: 0.6771959099983251 and parameters: {'lr': 0.00997206304051287, 'wd': 0.0004636588800501839, 'warmup': 200, 'gamma': 0.9851063811045198, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.16770596525374534, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 24094544
torch.Size([180])


[I 2024-10-29 05:21:05,287] Trial 577 finished with value: 0.7488886052671205 and parameters: {'lr': 0.007336883111687036, 'wd': 0.0008681817635186659, 'warmup': 200, 'gamma': 0.9861614169133172, 'time_dim': 16, 'patch_size': 8, 'depth': 6, 'heads': 6, 'mlp_dim': 1024, 'emb_dropout': 0.041653220326195985, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 577 finished with value: 0.7488886052671205 and parameters: {'lr': 0.007336883111687036, 'wd': 0.0008681817635186659, 'warmup': 200, 'gamma': 0.9861614169133172, 'time_dim': 16, 'patch_size': 8, 'depth': 6, 'heads': 6, 'mlp_dim': 1024, 'emb_dropout': 0.041653220326195985, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-29 05:22:15,356] Trial 578 finished with value: 0.6828435395754654 and parameters: {'lr': 0.005862504939561094, 'wd': 0.0005168024421934225, 'warmup': 50, 'gamma': 0.9842902458328248, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15769833719426735, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 578 finished with value: 0.6828435395754654 and parameters: {'lr': 0.005862504939561094, 'wd': 0.0005168024421934225, 'warmup': 50, 'gamma': 0.9842902458328248, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.15769833719426735, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 15197824
torch.Size([180])


[I 2024-10-29 05:23:29,415] Trial 579 finished with value: 0.7067663866188506 and parameters: {'lr': 0.00878161843265718, 'wd': 0.0006355257303415706, 'warmup': 200, 'gamma': 0.9834698115884906, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.009700414596244185, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 579 finished with value: 0.7067663866188506 and parameters: {'lr': 0.00878161843265718, 'wd': 0.0006355257303415706, 'warmup': 200, 'gamma': 0.9834698115884906, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 256, 'emb_dropout': 0.009700414596244185, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 15795584
torch.Size([180])


[I 2024-10-29 05:24:47,006] Trial 580 finished with value: 0.8183743907346681 and parameters: {'lr': 0.004863142583654572, 'wd': 0.00039593292136933124, 'warmup': 100, 'gamma': 0.9827328827523881, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.0003484431554451701, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 580 finished with value: 0.8183743907346681 and parameters: {'lr': 0.004863142583654572, 'wd': 0.00039593292136933124, 'warmup': 100, 'gamma': 0.9827328827523881, 'time_dim': 16, 'patch_size': 32, 'depth': 6, 'heads': 5, 'mlp_dim': 256, 'emb_dropout': 0.0003484431554451701, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 11454336
torch.Size([180])


[I 2024-10-29 05:25:46,283] Trial 581 finished with value: 0.7666957278379555 and parameters: {'lr': 0.006674235397435695, 'wd': 0.0006745862490361215, 'warmup': 200, 'gamma': 0.9811974849036864, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1621001856194985, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 581 finished with value: 0.7666957278379555 and parameters: {'lr': 0.006674235397435695, 'wd': 0.0006745862490361215, 'warmup': 200, 'gamma': 0.9811974849036864, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.1621001856194985, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12350160
torch.Size([180])


[I 2024-10-29 05:28:25,138] Trial 582 finished with value: 0.7213695848501605 and parameters: {'lr': 0.007947796044174232, 'wd': 4.532789916207604e-05, 'warmup': 50, 'gamma': 0.9840013061947493, 'time_dim': 32, 'patch_size': 8, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.020289916042344175, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 582 finished with value: 0.7213695848501605 and parameters: {'lr': 0.007947796044174232, 'wd': 4.532789916207604e-05, 'warmup': 50, 'gamma': 0.9840013061947493, 'time_dim': 32, 'patch_size': 8, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.020289916042344175, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 16180864
torch.Size([180])


[I 2024-10-29 05:29:41,302] Trial 583 finished with value: 0.7643967811702091 and parameters: {'lr': 0.00997116586553562, 'wd': 0.0008084975455496778, 'warmup': 200, 'gamma': 0.9846997420520825, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.0244203320622285, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 583 finished with value: 0.7643967811702091 and parameters: {'lr': 0.00997116586553562, 'wd': 0.0008084975455496778, 'warmup': 200, 'gamma': 0.9846997420520825, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.0244203320622285, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-29 05:30:51,177] Trial 584 finished with value: 0.739702412995505 and parameters: {'lr': 0.007869093164560852, 'wd': 0.0005454635789667526, 'warmup': 50, 'gamma': 0.9823296372636523, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.03149272724334259, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.


Trial 584 finished with value: 0.739702412995505 and parameters: {'lr': 0.007869093164560852, 'wd': 0.0005454635789667526, 'warmup': 50, 'gamma': 0.9823296372636523, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.03149272724334259, 'schedule': 'cosine_with_restarts'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14622048
torch.Size([180])


[I 2024-10-29 05:32:37,614] Trial 585 finished with value: 0.7651380502914173 and parameters: {'lr': 0.006560639654421063, 'wd': 0.006808159961493324, 'warmup': 200, 'gamma': 0.9855589499294753, 'time_dim': 16, 'patch_size': 16, 'depth': 6, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.15631284467745546, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 585 finished with value: 0.7651380502914173 and parameters: {'lr': 0.006560639654421063, 'wd': 0.006808159961493324, 'warmup': 200, 'gamma': 0.9855589499294753, 'time_dim': 16, 'patch_size': 16, 'depth': 6, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.15631284467745546, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-29 05:33:47,647] Trial 586 finished with value: 0.743824406581214 and parameters: {'lr': 0.008817166409116988, 'wd': 0.0010165634904146547, 'warmup': 200, 'gamma': 0.9868396382944877, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16482294803484834, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 586 finished with value: 0.743824406581214 and parameters: {'lr': 0.008817166409116988, 'wd': 0.0010165634904146547, 'warmup': 200, 'gamma': 0.9868396382944877, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.16482294803484834, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 13064016
torch.Size([180])


[I 2024-10-29 05:36:27,407] Trial 587 finished with value: 0.7746566384212386 and parameters: {'lr': 0.005674178719799897, 'wd': 0.0003357180177936019, 'warmup': 100, 'gamma': 0.9833715578092199, 'time_dim': 16, 'patch_size': 8, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.016048107455412363, 'schedule': 'cosine'}. Best is trial 270 with value: 0.5098186230222284.


Trial 587 finished with value: 0.7746566384212386 and parameters: {'lr': 0.005674178719799897, 'wd': 0.0003357180177936019, 'warmup': 100, 'gamma': 0.9833715578092199, 'time_dim': 16, 'patch_size': 8, 'depth': 4, 'heads': 8, 'mlp_dim': 256, 'emb_dropout': 0.016048107455412363, 'schedule': 'cosine'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 12473344
torch.Size([180])


[I 2024-10-29 05:37:30,379] Trial 588 finished with value: 0.7394060600751503 and parameters: {'lr': 0.007232799216476892, 'wd': 0.00048641109208010197, 'warmup': 50, 'gamma': 0.9850070668956632, 'time_dim': 64, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.151398998247224, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 588 finished with value: 0.7394060600751503 and parameters: {'lr': 0.007232799216476892, 'wd': 0.00048641109208010197, 'warmup': 50, 'gamma': 0.9850070668956632, 'time_dim': 64, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 256, 'emb_dropout': 0.151398998247224, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 14214784
torch.Size([180])


[I 2024-10-29 05:38:40,389] Trial 589 finished with value: 0.6364507698726996 and parameters: {'lr': 0.008707202170024051, 'wd': 0.00041698933984426524, 'warmup': 200, 'gamma': 0.9818645768528892, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.14511419864988623, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 589 finished with value: 0.6364507698726996 and parameters: {'lr': 0.008707202170024051, 'wd': 0.00041698933984426524, 'warmup': 200, 'gamma': 0.9818645768528892, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 6, 'mlp_dim': 256, 'emb_dropout': 0.14511419864988623, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 05:40:03,636] Trial 590 finished with value: 0.5515645845422917 and parameters: {'lr': 0.009062206540280891, 'wd': 0.0003977580733293694, 'warmup': 150, 'gamma': 0.981859769755393, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14146979683103994, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 590 finished with value: 0.5515645845422917 and parameters: {'lr': 0.009062206540280891, 'wd': 0.0003977580733293694, 'warmup': 150, 'gamma': 0.981859769755393, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14146979683103994, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 05:41:27,014] Trial 591 finished with value: 0.6338083954039144 and parameters: {'lr': 0.009918401247835171, 'wd': 0.00036284106713342326, 'warmup': 200, 'gamma': 0.9811874952910573, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13753205611118471, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 591 finished with value: 0.6338083954039144 and parameters: {'lr': 0.009918401247835171, 'wd': 0.00036284106713342326, 'warmup': 200, 'gamma': 0.9811874952910573, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13753205611118471, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 05:42:50,339] Trial 592 finished with value: 0.745283358364 and parameters: {'lr': 0.009965911059187229, 'wd': 0.00038844509521155024, 'warmup': 150, 'gamma': 0.9801372776343252, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13661461312429624, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 592 finished with value: 0.745283358364 and parameters: {'lr': 0.009965911059187229, 'wd': 0.00038844509521155024, 'warmup': 150, 'gamma': 0.9801372776343252, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13661461312429624, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 05:44:13,593] Trial 593 finished with value: 0.749522507464343 and parameters: {'lr': 0.009049440525296647, 'wd': 0.00028658266298397367, 'warmup': 150, 'gamma': 0.981250981542996, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1413235932194119, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 593 finished with value: 0.749522507464343 and parameters: {'lr': 0.009049440525296647, 'wd': 0.00028658266298397367, 'warmup': 150, 'gamma': 0.981250981542996, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1413235932194119, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 05:45:36,762] Trial 594 finished with value: 0.7074853123843332 and parameters: {'lr': 0.00021243701523038794, 'wd': 0.00031373505799746544, 'warmup': 150, 'gamma': 0.9805707870986341, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14602654843483, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 594 finished with value: 0.7074853123843332 and parameters: {'lr': 0.00021243701523038794, 'wd': 0.00031373505799746544, 'warmup': 150, 'gamma': 0.9805707870986341, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14602654843483, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 05:47:00,391] Trial 595 finished with value: 0.5712220295222408 and parameters: {'lr': 0.008893339556531897, 'wd': 0.000276557205297163, 'warmup': 150, 'gamma': 0.9807625656806673, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1377330764906138, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 595 finished with value: 0.5712220295222408 and parameters: {'lr': 0.008893339556531897, 'wd': 0.000276557205297163, 'warmup': 150, 'gamma': 0.9807625656806673, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1377330764906138, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 05:48:23,890] Trial 596 finished with value: 0.7442029527385021 and parameters: {'lr': 0.009042680851815265, 'wd': 0.0002787085798520209, 'warmup': 150, 'gamma': 0.9792574929120499, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1370172323534062, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 596 finished with value: 0.7442029527385021 and parameters: {'lr': 0.009042680851815265, 'wd': 0.0002787085798520209, 'warmup': 150, 'gamma': 0.9792574929120499, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1370172323534062, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 05:49:47,281] Trial 597 finished with value: 0.7258203561612201 and parameters: {'lr': 0.009801040299705079, 'wd': 0.00023204568468041868, 'warmup': 150, 'gamma': 0.9802763168126627, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1311648528376911, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.


Trial 597 finished with value: 0.7258203561612201 and parameters: {'lr': 0.009801040299705079, 'wd': 0.00023204568468041868, 'warmup': 150, 'gamma': 0.9802763168126627, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1311648528376911, 'schedule': 'constant'}. Best is trial 270 with value: 0.5098186230222284.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 05:51:10,482] Trial 598 finished with value: 0.49804964861220785 and parameters: {'lr': 0.009980944985851876, 'wd': 0.0002561887904737375, 'warmup': 150, 'gamma': 0.9793712795623954, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1423146531023044, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 598 finished with value: 0.49804964861220785 and parameters: {'lr': 0.009980944985851876, 'wd': 0.0002561887904737375, 'warmup': 150, 'gamma': 0.9793712795623954, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1423146531023044, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 05:52:33,710] Trial 599 finished with value: 0.7563638504179868 and parameters: {'lr': 0.008895740498067368, 'wd': 0.00021077058399875633, 'warmup': 150, 'gamma': 0.9789841583364732, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14016244091023616, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 599 finished with value: 0.7563638504179868 and parameters: {'lr': 0.008895740498067368, 'wd': 0.00021077058399875633, 'warmup': 150, 'gamma': 0.9789841583364732, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14016244091023616, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 05:53:56,989] Trial 600 finished with value: 0.7590512103749549 and parameters: {'lr': 3.9526856037772684e-05, 'wd': 0.00017335431004624326, 'warmup': 150, 'gamma': 0.9789259133907411, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1456498714334911, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 600 finished with value: 0.7590512103749549 and parameters: {'lr': 3.9526856037772684e-05, 'wd': 0.00017335431004624326, 'warmup': 150, 'gamma': 0.9789259133907411, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1456498714334911, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19147968
torch.Size([180])


[I 2024-10-29 05:55:04,290] Trial 601 finished with value: 0.6729123960721135 and parameters: {'lr': 0.009854273996771861, 'wd': 0.00022291899443543022, 'warmup': 150, 'gamma': 0.9801578095748049, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13697648876471788, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 601 finished with value: 0.6729123960721135 and parameters: {'lr': 0.009854273996771861, 'wd': 0.00022291899443543022, 'warmup': 150, 'gamma': 0.9801578095748049, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13697648876471788, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 05:56:27,578] Trial 602 finished with value: 0.7334800462024033 and parameters: {'lr': 0.00979109648871633, 'wd': 0.00026609427105438123, 'warmup': 150, 'gamma': 0.9781683567475866, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.138627884166792, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 602 finished with value: 0.7334800462024033 and parameters: {'lr': 0.00979109648871633, 'wd': 0.00026609427105438123, 'warmup': 150, 'gamma': 0.9781683567475866, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.138627884166792, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 05:57:50,901] Trial 603 finished with value: 0.773075760414967 and parameters: {'lr': 0.009853068635187555, 'wd': 0.00029223751711022583, 'warmup': 150, 'gamma': 0.9772833325281912, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14235681228616878, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 603 finished with value: 0.773075760414967 and parameters: {'lr': 0.009853068635187555, 'wd': 0.00029223751711022583, 'warmup': 150, 'gamma': 0.9772833325281912, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14235681228616878, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 05:59:14,191] Trial 604 finished with value: 0.7573498184405447 and parameters: {'lr': 0.008529131872824131, 'wd': 0.00019647743332298692, 'warmup': 150, 'gamma': 0.9781619843546118, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13167397101842168, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 604 finished with value: 0.7573498184405447 and parameters: {'lr': 0.008529131872824131, 'wd': 0.00019647743332298692, 'warmup': 150, 'gamma': 0.9781619843546118, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13167397101842168, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 06:00:37,418] Trial 605 finished with value: 0.7172540075071471 and parameters: {'lr': 0.009995181015755798, 'wd': 0.0003249876870593646, 'warmup': 150, 'gamma': 0.9809578932428793, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13432082079459318, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 605 finished with value: 0.7172540075071471 and parameters: {'lr': 0.009995181015755798, 'wd': 0.0003249876870593646, 'warmup': 150, 'gamma': 0.9809578932428793, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13432082079459318, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19203296
torch.Size([180])


[I 2024-10-29 06:02:37,127] Trial 606 finished with value: 0.7968714555411668 and parameters: {'lr': 0.008059530680343185, 'wd': 0.00023859057889517532, 'warmup': 150, 'gamma': 0.9812383397822935, 'time_dim': 32, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13674844870684078, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 606 finished with value: 0.7968714555411668 and parameters: {'lr': 0.008059530680343185, 'wd': 0.00023859057889517532, 'warmup': 150, 'gamma': 0.9812383397822935, 'time_dim': 32, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13674844870684078, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 06:04:01,045] Trial 607 finished with value: 0.7189106043128644 and parameters: {'lr': 0.0022775829775009576, 'wd': 0.00025723697419572776, 'warmup': 150, 'gamma': 0.9800138307201391, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14305300379343017, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 607 finished with value: 0.7189106043128644 and parameters: {'lr': 0.0022775829775009576, 'wd': 0.00025723697419572776, 'warmup': 150, 'gamma': 0.9800138307201391, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14305300379343017, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 06:05:24,677] Trial 608 finished with value: 0.7425041180634313 and parameters: {'lr': 0.008719262584622973, 'wd': 0.00029049074097062467, 'warmup': 200, 'gamma': 0.9798909658365433, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13028288611930244, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 608 finished with value: 0.7425041180634313 and parameters: {'lr': 0.008719262584622973, 'wd': 0.00029049074097062467, 'warmup': 200, 'gamma': 0.9798909658365433, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13028288611930244, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 06:06:48,092] Trial 609 finished with value: 0.5469453583044893 and parameters: {'lr': 0.009942898399762059, 'wd': 0.00025314489503859555, 'warmup': 150, 'gamma': 0.9805577818604461, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13909543106236666, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 609 finished with value: 0.5469453583044893 and parameters: {'lr': 0.009942898399762059, 'wd': 0.00025314489503859555, 'warmup': 150, 'gamma': 0.9805577818604461, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13909543106236666, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19357184
torch.Size([180])


[I 2024-10-29 06:08:11,790] Trial 610 finished with value: 0.664675603214527 and parameters: {'lr': 0.00993467285728483, 'wd': 0.0002533485858744457, 'warmup': 150, 'gamma': 0.9807184634270527, 'time_dim': 64, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1338927299836231, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 610 finished with value: 0.664675603214527 and parameters: {'lr': 0.00993467285728483, 'wd': 0.0002533485858744457, 'warmup': 150, 'gamma': 0.9807184634270527, 'time_dim': 64, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1338927299836231, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 06:09:35,035] Trial 611 finished with value: 0.7307065334945988 and parameters: {'lr': 0.008708308831835689, 'wd': 0.00018971891237020693, 'warmup': 150, 'gamma': 0.9794760487470469, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1401065111111602, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 611 finished with value: 0.7307065334945988 and parameters: {'lr': 0.008708308831835689, 'wd': 0.00018971891237020693, 'warmup': 150, 'gamma': 0.9794760487470469, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1401065111111602, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19147968
torch.Size([180])


[I 2024-10-29 06:10:42,366] Trial 612 finished with value: 0.7392958645630533 and parameters: {'lr': 0.008780472745067887, 'wd': 0.00023750117992871013, 'warmup': 150, 'gamma': 0.9798054856306065, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13565596645974606, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.


Trial 612 finished with value: 0.7392958645630533 and parameters: {'lr': 0.008780472745067887, 'wd': 0.00023750117992871013, 'warmup': 150, 'gamma': 0.9798054856306065, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13565596645974606, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 06:12:05,529] Trial 613 finished with value: 0.7168166564116359 and parameters: {'lr': 0.00017039152514420277, 'wd': 0.00021812651597439086, 'warmup': 150, 'gamma': 0.9806809896258658, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14248779698198946, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 613 finished with value: 0.7168166564116359 and parameters: {'lr': 0.00017039152514420277, 'wd': 0.00021812651597439086, 'warmup': 150, 'gamma': 0.9806809896258658, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14248779698198946, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19138656
torch.Size([180])


[I 2024-10-29 06:14:05,100] Trial 614 finished with value: 0.695838184273309 and parameters: {'lr': 0.009892793781980155, 'wd': 0.0002985044636455337, 'warmup': 150, 'gamma': 0.9787859970941972, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14110406651446933, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 614 finished with value: 0.695838184273309 and parameters: {'lr': 0.009892793781980155, 'wd': 0.0002985044636455337, 'warmup': 150, 'gamma': 0.9787859970941972, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14110406651446933, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 06:15:28,369] Trial 615 finished with value: 0.7154319619059236 and parameters: {'lr': 0.00796182051169446, 'wd': 0.00031512853924430846, 'warmup': 150, 'gamma': 0.9805091624420059, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14452437089185524, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 615 finished with value: 0.7154319619059236 and parameters: {'lr': 0.00796182051169446, 'wd': 0.00031512853924430846, 'warmup': 150, 'gamma': 0.9805091624420059, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14452437089185524, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 06:16:51,680] Trial 616 finished with value: 0.7261030975639176 and parameters: {'lr': 0.008733753578575644, 'wd': 0.0002727066357002473, 'warmup': 150, 'gamma': 0.9813069479041567, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1395543597561129, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 616 finished with value: 0.7261030975639176 and parameters: {'lr': 0.008733753578575644, 'wd': 0.0002727066357002473, 'warmup': 150, 'gamma': 0.9813069479041567, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1395543597561129, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 06:18:15,034] Trial 617 finished with value: 0.6728148958281921 and parameters: {'lr': 0.009932271485087234, 'wd': 0.0003377686709059417, 'warmup': 150, 'gamma': 0.9794479981960382, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1470249387376149, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 617 finished with value: 0.6728148958281921 and parameters: {'lr': 0.009932271485087234, 'wd': 0.0003377686709059417, 'warmup': 150, 'gamma': 0.9794479981960382, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1470249387376149, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 06:19:38,310] Trial 618 finished with value: 0.7155309902652518 and parameters: {'lr': 0.007481028837550828, 'wd': 0.00016815947861283136, 'warmup': 150, 'gamma': 0.9807877618317166, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.12658424188878936, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 618 finished with value: 0.7155309902652518 and parameters: {'lr': 0.007481028837550828, 'wd': 0.00016815947861283136, 'warmup': 150, 'gamma': 0.9807877618317166, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.12658424188878936, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 06:21:01,622] Trial 619 finished with value: 0.7204409807216657 and parameters: {'lr': 0.000680738696927424, 'wd': 0.00023328342502772543, 'warmup': 150, 'gamma': 0.980310428311263, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13773176305122012, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 619 finished with value: 0.7204409807216657 and parameters: {'lr': 0.000680738696927424, 'wd': 0.00023328342502772543, 'warmup': 150, 'gamma': 0.980310428311263, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13773176305122012, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19138656
torch.Size([180])


[I 2024-10-29 06:23:00,313] Trial 620 finished with value: 0.7383372905257548 and parameters: {'lr': 0.009989511672976095, 'wd': 0.0002705850466660077, 'warmup': 150, 'gamma': 0.9793926833411729, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14591137410042457, 'schedule': 'cosine'}. Best is trial 598 with value: 0.49804964861220785.


Trial 620 finished with value: 0.7383372905257548 and parameters: {'lr': 0.009989511672976095, 'wd': 0.0002705850466660077, 'warmup': 150, 'gamma': 0.9793926833411729, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14591137410042457, 'schedule': 'cosine'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 06:24:23,550] Trial 621 finished with value: 0.7349487447242823 and parameters: {'lr': 0.000510679941847517, 'wd': 0.00020339096706070928, 'warmup': 150, 'gamma': 0.9810887516307955, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13379273890595506, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 621 finished with value: 0.7349487447242823 and parameters: {'lr': 0.000510679941847517, 'wd': 0.00020339096706070928, 'warmup': 150, 'gamma': 0.9810887516307955, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13379273890595506, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 06:25:46,956] Trial 622 finished with value: 0.7565655723643369 and parameters: {'lr': 0.008134019633772275, 'wd': 0.00033535487239272604, 'warmup': 150, 'gamma': 0.9796895380151331, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14780393886472423, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 622 finished with value: 0.7565655723643369 and parameters: {'lr': 0.008134019633772275, 'wd': 0.00033535487239272604, 'warmup': 150, 'gamma': 0.9796895380151331, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14780393886472423, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19147968
torch.Size([180])


[I 2024-10-29 06:26:54,224] Trial 623 finished with value: 0.7009978340364379 and parameters: {'lr': 0.0071221893921787965, 'wd': 0.0003524865091222548, 'warmup': 150, 'gamma': 0.9815463090885529, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14971584054903642, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 623 finished with value: 0.7009978340364379 and parameters: {'lr': 0.0071221893921787965, 'wd': 0.0003524865091222548, 'warmup': 150, 'gamma': 0.9815463090885529, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14971584054903642, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 06:28:17,772] Trial 624 finished with value: 0.6839214128945853 and parameters: {'lr': 0.009978320067978174, 'wd': 0.00028111869574373786, 'warmup': 150, 'gamma': 0.98171630200353, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14066487058425098, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 624 finished with value: 0.6839214128945853 and parameters: {'lr': 0.009978320067978174, 'wd': 0.00028111869574373786, 'warmup': 150, 'gamma': 0.98171630200353, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14066487058425098, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 06:29:41,095] Trial 625 finished with value: 0.680592978391239 and parameters: {'lr': 0.008740402136548019, 'wd': 0.00024274288464085368, 'warmup': 150, 'gamma': 0.9804441429531895, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14358194204141073, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 625 finished with value: 0.680592978391239 and parameters: {'lr': 0.008740402136548019, 'wd': 0.00024274288464085368, 'warmup': 150, 'gamma': 0.9804441429531895, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14358194204141073, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 06:31:04,406] Trial 626 finished with value: 0.742750105866954 and parameters: {'lr': 0.0076768897641238225, 'wd': 0.0003469362960468773, 'warmup': 150, 'gamma': 0.9809540730263714, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1281513197756795, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 626 finished with value: 0.742750105866954 and parameters: {'lr': 0.0076768897641238225, 'wd': 0.0003469362960468773, 'warmup': 150, 'gamma': 0.9809540730263714, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1281513197756795, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 06:32:28,023] Trial 627 finished with value: 0.7167613936879466 and parameters: {'lr': 0.008686937585520006, 'wd': 0.0002691894340813662, 'warmup': 150, 'gamma': 0.9785208115963026, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1498998973775807, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 627 finished with value: 0.7167613936879466 and parameters: {'lr': 0.008686937585520006, 'wd': 0.0002691894340813662, 'warmup': 150, 'gamma': 0.9785208115963026, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1498998973775807, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19203296
torch.Size([180])


[I 2024-10-29 06:34:28,558] Trial 628 finished with value: 0.6917029652831488 and parameters: {'lr': 0.006969340076360351, 'wd': 0.0001317732007985679, 'warmup': 150, 'gamma': 0.9815927217375866, 'time_dim': 32, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13767858965810234, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 628 finished with value: 0.6917029652831488 and parameters: {'lr': 0.006969340076360351, 'wd': 0.0001317732007985679, 'warmup': 150, 'gamma': 0.9815927217375866, 'time_dim': 32, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13767858965810234, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 06:35:52,046] Trial 629 finished with value: 0.5935793267473146 and parameters: {'lr': 0.009969032935364289, 'wd': 0.00020600206764636377, 'warmup': 150, 'gamma': 0.9800144757267748, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1399244738960472, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 629 finished with value: 0.5935793267473146 and parameters: {'lr': 0.009969032935364289, 'wd': 0.00020600206764636377, 'warmup': 150, 'gamma': 0.9800144757267748, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1399244738960472, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 16183424
torch.Size([180])


[I 2024-10-29 06:37:04,179] Trial 630 finished with value: 0.7198395330016639 and parameters: {'lr': 0.0010850135075199587, 'wd': 0.00023640999813627308, 'warmup': 150, 'gamma': 0.9794101486293167, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.14375421031846722, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 630 finished with value: 0.7198395330016639 and parameters: {'lr': 0.0010850135075199587, 'wd': 0.00023640999813627308, 'warmup': 150, 'gamma': 0.9794101486293167, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.14375421031846722, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 06:38:27,906] Trial 631 finished with value: 0.776330766645488 and parameters: {'lr': 0.007850631335063958, 'wd': 0.0001914337956614727, 'warmup': 150, 'gamma': 0.9796875926166911, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14640707535601574, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 631 finished with value: 0.776330766645488 and parameters: {'lr': 0.007850631335063958, 'wd': 0.0001914337956614727, 'warmup': 150, 'gamma': 0.9796875926166911, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14640707535601574, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 06:39:51,220] Trial 632 finished with value: 0.7287386197112108 and parameters: {'lr': 0.006388639017286887, 'wd': 0.0002052602393869186, 'warmup': 150, 'gamma': 0.9797449595397287, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1494792841626988, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 632 finished with value: 0.7287386197112108 and parameters: {'lr': 0.006388639017286887, 'wd': 0.0002052602393869186, 'warmup': 150, 'gamma': 0.9797449595397287, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1494792841626988, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 16408064
torch.Size([180])


[I 2024-10-29 06:41:03,562] Trial 633 finished with value: 0.8105829374196126 and parameters: {'lr': 0.008736169430202268, 'wd': 0.0002156219111919137, 'warmup': 150, 'gamma': 0.9787351619847504, 'time_dim': 64, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.15238366700225514, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 633 finished with value: 0.8105829374196126 and parameters: {'lr': 0.008736169430202268, 'wd': 0.0002156219111919137, 'warmup': 150, 'gamma': 0.9787351619847504, 'time_dim': 64, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.15238366700225514, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 17181888
torch.Size([180])


[I 2024-10-29 06:42:06,014] Trial 634 finished with value: 0.8002954164044466 and parameters: {'lr': 0.007295026483389953, 'wd': 0.00025207155511573434, 'warmup': 150, 'gamma': 0.9790337509553261, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 5, 'mlp_dim': 768, 'emb_dropout': 0.1431410202602715, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 634 finished with value: 0.8002954164044466 and parameters: {'lr': 0.007295026483389953, 'wd': 0.00025207155511573434, 'warmup': 150, 'gamma': 0.9790337509553261, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 5, 'mlp_dim': 768, 'emb_dropout': 0.1431410202602715, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 16183424
torch.Size([180])


[I 2024-10-29 06:43:18,054] Trial 635 finished with value: 0.6515490137785382 and parameters: {'lr': 0.008625466684430042, 'wd': 0.0001738878106773939, 'warmup': 150, 'gamma': 0.9798855244866522, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.1500402466968387, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 635 finished with value: 0.6515490137785382 and parameters: {'lr': 0.008625466684430042, 'wd': 0.0001738878106773939, 'warmup': 150, 'gamma': 0.9798855244866522, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.1500402466968387, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 06:44:41,325] Trial 636 finished with value: 0.7367392651757771 and parameters: {'lr': 0.0065547017624166815, 'wd': 0.00020758541724858764, 'warmup': 150, 'gamma': 0.9802932954086588, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14043292204360017, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 636 finished with value: 0.7367392651757771 and parameters: {'lr': 0.0065547017624166815, 'wd': 0.00020758541724858764, 'warmup': 150, 'gamma': 0.9802932954086588, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14043292204360017, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 17172576
torch.Size([180])


[I 2024-10-29 06:46:30,845] Trial 637 finished with value: 0.722910995833276 and parameters: {'lr': 0.009983718962990619, 'wd': 0.00015945960744121599, 'warmup': 150, 'gamma': 0.9804589270914764, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 5, 'mlp_dim': 768, 'emb_dropout': 0.13226725690926777, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.


Trial 637 finished with value: 0.722910995833276 and parameters: {'lr': 0.009983718962990619, 'wd': 0.00015945960744121599, 'warmup': 150, 'gamma': 0.9804589270914764, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 5, 'mlp_dim': 768, 'emb_dropout': 0.13226725690926777, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 35108480
torch.Size([180])


[I 2024-10-29 06:48:47,426] Trial 638 finished with value: 0.7772009407298358 and parameters: {'lr': 0.007873010979105002, 'wd': 0.0002861036124504274, 'warmup': 150, 'gamma': 0.9776639494615424, 'time_dim': 16, 'patch_size': 32, 'depth': 11, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.15435916027896196, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 638 finished with value: 0.7772009407298358 and parameters: {'lr': 0.007873010979105002, 'wd': 0.0002861036124504274, 'warmup': 150, 'gamma': 0.9776639494615424, 'time_dim': 16, 'patch_size': 32, 'depth': 11, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.15435916027896196, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 06:50:11,312] Trial 639 finished with value: 0.7234162013572648 and parameters: {'lr': 0.00787914532542319, 'wd': 7.224016125095969e-05, 'warmup': 150, 'gamma': 0.9816581788133772, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14628931988248522, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 639 finished with value: 0.7234162013572648 and parameters: {'lr': 0.00787914532542319, 'wd': 7.224016125095969e-05, 'warmup': 150, 'gamma': 0.9816581788133772, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14628931988248522, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 16183424
torch.Size([180])


[I 2024-10-29 06:51:23,620] Trial 640 finished with value: 0.7667388177507998 and parameters: {'lr': 0.005888942858112677, 'wd': 0.009971926387789824, 'warmup': 150, 'gamma': 0.9806913411240644, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.1489828379113123, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 640 finished with value: 0.7667388177507998 and parameters: {'lr': 0.005888942858112677, 'wd': 0.009971926387789824, 'warmup': 150, 'gamma': 0.9806913411240644, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.1489828379113123, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 06:52:47,256] Trial 641 finished with value: 0.7086848525561266 and parameters: {'lr': 0.008731613703681295, 'wd': 0.004436045238117199, 'warmup': 150, 'gamma': 0.982330515775735, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.15432029255847587, 'schedule': 'cosine'}. Best is trial 598 with value: 0.49804964861220785.


Trial 641 finished with value: 0.7086848525561266 and parameters: {'lr': 0.008731613703681295, 'wd': 0.004436045238117199, 'warmup': 150, 'gamma': 0.982330515775735, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.15432029255847587, 'schedule': 'cosine'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 06:54:10,705] Trial 642 finished with value: 0.7303259891327308 and parameters: {'lr': 0.009965963579776924, 'wd': 0.00026155643825138286, 'warmup': 150, 'gamma': 0.9800709853418899, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14059591692721413, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 642 finished with value: 0.7303259891327308 and parameters: {'lr': 0.009965963579776924, 'wd': 0.00026155643825138286, 'warmup': 150, 'gamma': 0.9800709853418899, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14059591692721413, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 15205216
torch.Size([180])


[I 2024-10-29 06:55:55,551] Trial 643 finished with value: 0.7908325542039103 and parameters: {'lr': 0.006738206733607616, 'wd': 5.944626339384681e-05, 'warmup': 150, 'gamma': 0.9819129741732422, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 5, 'mlp_dim': 512, 'emb_dropout': 0.1520243268087348, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 643 finished with value: 0.7908325542039103 and parameters: {'lr': 0.006738206733607616, 'wd': 5.944626339384681e-05, 'warmup': 150, 'gamma': 0.9819129741732422, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 5, 'mlp_dim': 512, 'emb_dropout': 0.1520243268087348, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 16183424
torch.Size([180])


[I 2024-10-29 06:57:07,662] Trial 644 finished with value: 0.7767200349938526 and parameters: {'lr': 0.007525104530219578, 'wd': 0.00011303521217732385, 'warmup': 300, 'gamma': 0.9810674049971856, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.14362939324613097, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 644 finished with value: 0.7767200349938526 and parameters: {'lr': 0.007525104530219578, 'wd': 0.00011303521217732385, 'warmup': 300, 'gamma': 0.9810674049971856, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.14362939324613097, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 21114048
torch.Size([180])


[I 2024-10-29 06:58:20,160] Trial 645 finished with value: 0.7540897017048749 and parameters: {'lr': 0.004920007706242331, 'wd': 3.385383672692534e-05, 'warmup': 200, 'gamma': 0.9819658305278346, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 9, 'mlp_dim': 768, 'emb_dropout': 0.08879627456085828, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 645 finished with value: 0.7540897017048749 and parameters: {'lr': 0.004920007706242331, 'wd': 3.385383672692534e-05, 'warmup': 200, 'gamma': 0.9819658305278346, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 9, 'mlp_dim': 768, 'emb_dropout': 0.08879627456085828, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 16183424
torch.Size([180])


[I 2024-10-29 06:59:32,041] Trial 646 finished with value: 0.6039995697447791 and parameters: {'lr': 0.00880166751852467, 'wd': 0.00019129286570251046, 'warmup': 150, 'gamma': 0.9815023226859697, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.1560390587950684, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 646 finished with value: 0.6039995697447791 and parameters: {'lr': 0.00880166751852467, 'wd': 0.00019129286570251046, 'warmup': 150, 'gamma': 0.9815023226859697, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.1560390587950684, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 16183424
torch.Size([180])


[I 2024-10-29 07:00:44,254] Trial 647 finished with value: 0.7882907486455493 and parameters: {'lr': 0.0038212020291517016, 'wd': 0.00016894022337748517, 'warmup': 150, 'gamma': 0.9815788298857704, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.15673185011982194, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 647 finished with value: 0.7882907486455493 and parameters: {'lr': 0.0038212020291517016, 'wd': 0.00016894022337748517, 'warmup': 150, 'gamma': 0.9815788298857704, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.15673185011982194, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 16183424
torch.Size([180])


[I 2024-10-29 07:01:56,218] Trial 648 finished with value: 0.7595720861088816 and parameters: {'lr': 0.008758924823022835, 'wd': 0.0001862114027356911, 'warmup': 150, 'gamma': 0.9809791224844159, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.15820550717914156, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 648 finished with value: 0.7595720861088816 and parameters: {'lr': 0.008758924823022835, 'wd': 0.0001862114027356911, 'warmup': 150, 'gamma': 0.9809791224844159, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.15820550717914156, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 16183424
torch.Size([180])


[I 2024-10-29 07:03:08,300] Trial 649 finished with value: 0.677082276338558 and parameters: {'lr': 0.009996014283536213, 'wd': 0.00015024129256999402, 'warmup': 150, 'gamma': 0.9822296433132169, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.14863699879557943, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 649 finished with value: 0.677082276338558 and parameters: {'lr': 0.009996014283536213, 'wd': 0.00015024129256999402, 'warmup': 150, 'gamma': 0.9822296433132169, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.14863699879557943, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 16183424
torch.Size([180])


[I 2024-10-29 07:04:20,340] Trial 650 finished with value: 0.7148483530383327 and parameters: {'lr': 0.007266241619371404, 'wd': 0.00018954544301486717, 'warmup': 150, 'gamma': 0.9805590225204639, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.15408560701724433, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 650 finished with value: 0.7148483530383327 and parameters: {'lr': 0.007266241619371404, 'wd': 0.00018954544301486717, 'warmup': 150, 'gamma': 0.9805590225204639, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.15408560701724433, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 16183424
torch.Size([180])


[I 2024-10-29 07:05:32,476] Trial 651 finished with value: 0.7434069350526948 and parameters: {'lr': 0.006502481800469389, 'wd': 0.00022182863445540082, 'warmup': 150, 'gamma': 0.9816182021526594, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.15164094235752965, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 651 finished with value: 0.7434069350526948 and parameters: {'lr': 0.006502481800469389, 'wd': 0.00022182863445540082, 'warmup': 150, 'gamma': 0.9816182021526594, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.15164094235752965, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 16220240
torch.Size([180])


[I 2024-10-29 07:08:39,862] Trial 652 finished with value: 0.76617952979489 and parameters: {'lr': 0.00816500661507208, 'wd': 0.0002167147252074774, 'warmup': 150, 'gamma': 0.9800822465753388, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.1593872646080831, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 652 finished with value: 0.76617952979489 and parameters: {'lr': 0.00816500661507208, 'wd': 0.0002167147252074774, 'warmup': 150, 'gamma': 0.9800822465753388, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.1593872646080831, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 16254176
torch.Size([180])


[I 2024-10-29 07:10:24,011] Trial 653 finished with value: 0.7484977418221175 and parameters: {'lr': 7.78458835277213e-05, 'wd': 0.0002431728507784144, 'warmup': 150, 'gamma': 0.9819590114507102, 'time_dim': 32, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.13562593354526814, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 653 finished with value: 0.7484977418221175 and parameters: {'lr': 7.78458835277213e-05, 'wd': 0.0002431728507784144, 'warmup': 150, 'gamma': 0.9819590114507102, 'time_dim': 32, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.13562593354526814, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 16183424
torch.Size([180])


[I 2024-10-29 07:11:36,044] Trial 654 finished with value: 0.7384058221435572 and parameters: {'lr': 0.005675193471482952, 'wd': 0.0001776009644377363, 'warmup': 150, 'gamma': 0.9811198363392685, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.14691243070706397, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 654 finished with value: 0.7384058221435572 and parameters: {'lr': 0.005675193471482952, 'wd': 0.0001776009644377363, 'warmup': 150, 'gamma': 0.9811198363392685, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.14691243070706397, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 16408064
torch.Size([180])


[I 2024-10-29 07:12:48,660] Trial 655 finished with value: 0.765521604200595 and parameters: {'lr': 0.008488379419536285, 'wd': 8.029902914295572e-05, 'warmup': 150, 'gamma': 0.9792810821702735, 'time_dim': 64, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.1557280521616343, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 655 finished with value: 0.765521604200595 and parameters: {'lr': 0.008488379419536285, 'wd': 8.029902914295572e-05, 'warmup': 150, 'gamma': 0.9792810821702735, 'time_dim': 64, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.1557280521616343, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 28793728
torch.Size([180])


[I 2024-10-29 07:14:42,767] Trial 656 finished with value: 0.6615764350962022 and parameters: {'lr': 0.007030653649000268, 'wd': 4.092830282229562e-05, 'warmup': 150, 'gamma': 0.9889341170690381, 'time_dim': 16, 'patch_size': 32, 'depth': 8, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.16161725697705795, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 656 finished with value: 0.6615764350962022 and parameters: {'lr': 0.007030653649000268, 'wd': 4.092830282229562e-05, 'warmup': 150, 'gamma': 0.9889341170690381, 'time_dim': 16, 'patch_size': 32, 'depth': 8, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.16161725697705795, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 16220240
torch.Size([180])


[I 2024-10-29 07:17:48,776] Trial 657 finished with value: 0.6510997411854164 and parameters: {'lr': 0.00895858485077355, 'wd': 0.00028609976323471414, 'warmup': 150, 'gamma': 0.9823666025247261, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.15844704830952422, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 657 finished with value: 0.6510997411854164 and parameters: {'lr': 0.00895858485077355, 'wd': 0.00028609976323471414, 'warmup': 150, 'gamma': 0.9823666025247261, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.15844704830952422, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 16183424
torch.Size([180])


[I 2024-10-29 07:19:01,334] Trial 658 finished with value: 0.684158418088265 and parameters: {'lr': 0.0076706225157074604, 'wd': 0.0002263542543335619, 'warmup': 150, 'gamma': 0.9806103324234108, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.14160193045619301, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 658 finished with value: 0.684158418088265 and parameters: {'lr': 0.0076706225157074604, 'wd': 0.0002263542543335619, 'warmup': 150, 'gamma': 0.9806103324234108, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.14160193045619301, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19147968
torch.Size([180])


[I 2024-10-29 07:20:08,534] Trial 659 finished with value: 0.6899133238110348 and parameters: {'lr': 0.006000711855551391, 'wd': 0.00030570431415261924, 'warmup': 150, 'gamma': 0.9815507381946859, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.15278046459099406, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 659 finished with value: 0.6899133238110348 and parameters: {'lr': 0.006000711855551391, 'wd': 0.00030570431415261924, 'warmup': 150, 'gamma': 0.9815507381946859, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.15278046459099406, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 16183424
torch.Size([180])


[I 2024-10-29 07:21:20,470] Trial 660 finished with value: 0.6679925012384362 and parameters: {'lr': 0.009991920532666631, 'wd': 0.005106138525275972, 'warmup': 150, 'gamma': 0.9825295933170591, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.13257986430684565, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 660 finished with value: 0.6679925012384362 and parameters: {'lr': 0.009991920532666631, 'wd': 0.005106138525275972, 'warmup': 150, 'gamma': 0.9825295933170591, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.13257986430684565, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 15199104
torch.Size([180])


[I 2024-10-29 07:22:33,517] Trial 661 finished with value: 0.7872189995869936 and parameters: {'lr': 0.008786766225833834, 'wd': 0.00025826007313411287, 'warmup': 150, 'gamma': 0.978558521470003, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 512, 'emb_dropout': 0.14802139694316538, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 661 finished with value: 0.7872189995869936 and parameters: {'lr': 0.008786766225833834, 'wd': 0.00025826007313411287, 'warmup': 150, 'gamma': 0.978558521470003, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 512, 'emb_dropout': 0.14802139694316538, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19138656
torch.Size([180])


[I 2024-10-29 07:24:33,037] Trial 662 finished with value: 0.6339120282244252 and parameters: {'lr': 0.006964286541331088, 'wd': 0.000252500476027778, 'warmup': 300, 'gamma': 0.9811183754588312, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.16079120487422852, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 662 finished with value: 0.6339120282244252 and parameters: {'lr': 0.006964286541331088, 'wd': 0.000252500476027778, 'warmup': 300, 'gamma': 0.9811183754588312, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.16079120487422852, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 18150784
torch.Size([180])


[I 2024-10-29 07:25:51,488] Trial 663 finished with value: 0.7316030493962897 and parameters: {'lr': 0.00790944896302662, 'wd': 0.00010174620746032314, 'warmup': 200, 'gamma': 0.9821861207293372, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.1555401883275262, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 663 finished with value: 0.7316030493962897 and parameters: {'lr': 0.00790944896302662, 'wd': 0.00010174620746032314, 'warmup': 200, 'gamma': 0.9821861207293372, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.1555401883275262, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 38262656
torch.Size([180])


[I 2024-10-29 07:28:18,233] Trial 664 finished with value: 0.6427632111396295 and parameters: {'lr': 0.005053476054960032, 'wd': 0.0002002712620418399, 'warmup': 200, 'gamma': 0.980344678069188, 'time_dim': 16, 'patch_size': 32, 'depth': 12, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.14339558446046044, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 664 finished with value: 0.6427632111396295 and parameters: {'lr': 0.005053476054960032, 'wd': 0.0002002712620418399, 'warmup': 200, 'gamma': 0.980344678069188, 'time_dim': 16, 'patch_size': 32, 'depth': 12, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.14339558446046044, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19169360
torch.Size([180])


[I 2024-10-29 07:31:54,007] Trial 665 finished with value: 0.6984881390601012 and parameters: {'lr': 0.008940723220941582, 'wd': 0.00031434518807257715, 'warmup': 150, 'gamma': 0.9827747878396441, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.15042710451263513, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 665 finished with value: 0.6984881390601012 and parameters: {'lr': 0.008940723220941582, 'wd': 0.00031434518807257715, 'warmup': 150, 'gamma': 0.9827747878396441, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.15042710451263513, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 16183424
torch.Size([180])


[I 2024-10-29 07:33:06,062] Trial 666 finished with value: 0.6566781566316161 and parameters: {'lr': 0.0064849419100174045, 'wd': 0.00015801307236219068, 'warmup': 200, 'gamma': 0.981653864903437, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.16455507386159401, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 666 finished with value: 0.6566781566316161 and parameters: {'lr': 0.0064849419100174045, 'wd': 0.00015801307236219068, 'warmup': 200, 'gamma': 0.981653864903437, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.16455507386159401, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 17166464
torch.Size([180])


[I 2024-10-29 07:34:22,606] Trial 667 finished with value: 0.7753428857386867 and parameters: {'lr': 0.007840842104674174, 'wd': 0.007321288645684877, 'warmup': 200, 'gamma': 0.9795257634810243, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 768, 'emb_dropout': 0.1753126541218258, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 667 finished with value: 0.7753428857386867 and parameters: {'lr': 0.007840842104674174, 'wd': 0.007321288645684877, 'warmup': 200, 'gamma': 0.9795257634810243, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 5, 'mlp_dim': 768, 'emb_dropout': 0.1753126541218258, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 17202000
torch.Size([180])


[I 2024-10-29 07:37:43,967] Trial 668 finished with value: 0.6046616068608757 and parameters: {'lr': 0.009948211532646937, 'wd': 0.00021048699091586481, 'warmup': 150, 'gamma': 0.994056785316981, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.1375746022714993, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.


Trial 668 finished with value: 0.6046616068608757 and parameters: {'lr': 0.009948211532646937, 'wd': 0.00021048699091586481, 'warmup': 150, 'gamma': 0.994056785316981, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.1375746022714993, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 17165184
torch.Size([180])


[I 2024-10-29 07:39:03,833] Trial 669 finished with value: 0.8576080701484883 and parameters: {'lr': 0.009027352135278202, 'wd': 0.00013878185466261646, 'warmup': 150, 'gamma': 0.9899686318797869, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.13884248569692584, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 669 finished with value: 0.8576080701484883 and parameters: {'lr': 0.009027352135278202, 'wd': 0.00013878185466261646, 'warmup': 150, 'gamma': 0.9899686318797869, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.13884248569692584, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 17165184
torch.Size([180])


[I 2024-10-29 07:40:23,412] Trial 670 finished with value: 0.7589583056514185 and parameters: {'lr': 0.008700558004865435, 'wd': 0.00019902836695167932, 'warmup': 150, 'gamma': 0.9808741408219858, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.13345683405455033, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.


Trial 670 finished with value: 0.7589583056514185 and parameters: {'lr': 0.008700558004865435, 'wd': 0.00019902836695167932, 'warmup': 150, 'gamma': 0.9808741408219858, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.13345683405455033, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 17180608
torch.Size([180])


[I 2024-10-29 07:41:27,273] Trial 671 finished with value: 0.6859634549065582 and parameters: {'lr': 0.00992251011220097, 'wd': 0.00021392266113511024, 'warmup': 150, 'gamma': 0.9936062648651317, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.13879739442298994, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.


Trial 671 finished with value: 0.6859634549065582 and parameters: {'lr': 0.00992251011220097, 'wd': 0.00021392266113511024, 'warmup': 150, 'gamma': 0.9936062648651317, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.13879739442298994, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 17171296
torch.Size([180])


[I 2024-10-29 07:43:22,364] Trial 672 finished with value: 0.7454237512749596 and parameters: {'lr': 0.009916684964535221, 'wd': 0.00018777574575889162, 'warmup': 150, 'gamma': 0.9752957162201791, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.1230681949903932, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.


Trial 672 finished with value: 0.7454237512749596 and parameters: {'lr': 0.009916684964535221, 'wd': 0.00018777574575889162, 'warmup': 150, 'gamma': 0.9752957162201791, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.1230681949903932, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 17266640
torch.Size([180])


[I 2024-10-29 07:46:42,914] Trial 673 finished with value: 0.7393420442225004 and parameters: {'lr': 0.007529262208275946, 'wd': 0.00022963869775108318, 'warmup': 150, 'gamma': 0.9919851124466414, 'time_dim': 32, 'patch_size': 8, 'depth': 5, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.13041282984091282, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.


Trial 673 finished with value: 0.7393420442225004 and parameters: {'lr': 0.007529262208275946, 'wd': 0.00022963869775108318, 'warmup': 150, 'gamma': 0.9919851124466414, 'time_dim': 32, 'patch_size': 8, 'depth': 5, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.13041282984091282, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 17165184
torch.Size([180])


[I 2024-10-29 07:48:02,684] Trial 674 finished with value: 0.6982953238916259 and parameters: {'lr': 0.004275878207439903, 'wd': 0.0002537975217920301, 'warmup': 150, 'gamma': 0.992397375062351, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.1362474205772822, 'schedule': 'cosine'}. Best is trial 598 with value: 0.49804964861220785.


Trial 674 finished with value: 0.6982953238916259 and parameters: {'lr': 0.004275878207439903, 'wd': 0.0002537975217920301, 'warmup': 150, 'gamma': 0.992397375062351, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.1362474205772822, 'schedule': 'cosine'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 17165184
torch.Size([180])


[I 2024-10-29 07:49:22,298] Trial 675 finished with value: 0.7117630998090859 and parameters: {'lr': 0.007872336886070912, 'wd': 0.00016228072866732502, 'warmup': 150, 'gamma': 0.9739660437615811, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.128855402051325, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.


Trial 675 finished with value: 0.7117630998090859 and parameters: {'lr': 0.007872336886070912, 'wd': 0.00016228072866732502, 'warmup': 150, 'gamma': 0.9739660437615811, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.128855402051325, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 07:50:45,716] Trial 676 finished with value: 0.7182585617701734 and parameters: {'lr': 0.0067101504635972255, 'wd': 0.00018698682747428884, 'warmup': 150, 'gamma': 0.9974670847016154, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13868306041937817, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.


Trial 676 finished with value: 0.7182585617701734 and parameters: {'lr': 0.0067101504635972255, 'wd': 0.00018698682747428884, 'warmup': 150, 'gamma': 0.9974670847016154, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13868306041937817, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 17389824
torch.Size([180])


[I 2024-10-29 07:52:05,646] Trial 677 finished with value: 0.7963063103140424 and parameters: {'lr': 0.008832236366471825, 'wd': 0.00012777673620268843, 'warmup': 150, 'gamma': 0.982797707192882, 'time_dim': 64, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.1998611461594194, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.


Trial 677 finished with value: 0.7963063103140424 and parameters: {'lr': 0.008832236366471825, 'wd': 0.00012777673620268843, 'warmup': 150, 'gamma': 0.982797707192882, 'time_dim': 64, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.1998611461594194, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 17202000
torch.Size([180])


[I 2024-10-29 07:55:27,201] Trial 678 finished with value: 0.7622106438683125 and parameters: {'lr': 0.006174998294994716, 'wd': 2.4081886723899326e-05, 'warmup': 150, 'gamma': 0.9799561809192757, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.1454228187535344, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.


Trial 678 finished with value: 0.7622106438683125 and parameters: {'lr': 0.006174998294994716, 'wd': 2.4081886723899326e-05, 'warmup': 150, 'gamma': 0.9799561809192757, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.1454228187535344, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 17165184
torch.Size([180])


[I 2024-10-29 07:56:46,924] Trial 679 finished with value: 0.7306984447711724 and parameters: {'lr': 0.005441453989341475, 'wd': 0.00023716259536724036, 'warmup': 300, 'gamma': 0.9913593202979912, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.14295798151855296, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.


Trial 679 finished with value: 0.7306984447711724 and parameters: {'lr': 0.005441453989341475, 'wd': 0.00023716259536724036, 'warmup': 300, 'gamma': 0.9913593202979912, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.14295798151855296, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 17165184
torch.Size([180])


[I 2024-10-29 07:58:06,577] Trial 680 finished with value: 0.6702487905401846 and parameters: {'lr': 0.008741712006283864, 'wd': 0.00021174136366665543, 'warmup': 150, 'gamma': 0.9983513233721476, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.13372738764925995, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.


Trial 680 finished with value: 0.6702487905401846 and parameters: {'lr': 0.008741712006283864, 'wd': 0.00021174136366665543, 'warmup': 150, 'gamma': 0.9983513233721476, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 512, 'emb_dropout': 0.13372738764925995, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 18156896
torch.Size([180])


[I 2024-10-29 08:00:01,854] Trial 681 finished with value: 0.7311115416712625 and parameters: {'lr': 0.007266562365032924, 'wd': 8.877450543871016e-05, 'warmup': 200, 'gamma': 0.9822233928021906, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.14492473650936177, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.


Trial 681 finished with value: 0.7311115416712625 and parameters: {'lr': 0.007266562365032924, 'wd': 8.877450543871016e-05, 'warmup': 200, 'gamma': 0.9822233928021906, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.14492473650936177, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 20115584
torch.Size([180])


[I 2024-10-29 08:01:27,274] Trial 682 finished with value: 0.6774468808869257 and parameters: {'lr': 0.009941288285830128, 'wd': 0.0003033576137898308, 'warmup': 200, 'gamma': 0.9929891496625013, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 768, 'emb_dropout': 0.14829877237203393, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.


Trial 682 finished with value: 0.6774468808869257 and parameters: {'lr': 0.009941288285830128, 'wd': 0.0003033576137898308, 'warmup': 200, 'gamma': 0.9929891496625013, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 768, 'emb_dropout': 0.14829877237203393, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 14252880
torch.Size([180])


[I 2024-10-29 08:04:19,639] Trial 683 finished with value: 0.7354002633788453 and parameters: {'lr': 0.00783567187033996, 'wd': 0.00024206203444319572, 'warmup': 150, 'gamma': 0.9949563166704286, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 4, 'mlp_dim': 512, 'emb_dropout': 0.1418146451706414, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 683 finished with value: 0.7354002633788453 and parameters: {'lr': 0.00783567187033996, 'wd': 0.00024206203444319572, 'warmup': 150, 'gamma': 0.9949563166704286, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 4, 'mlp_dim': 512, 'emb_dropout': 0.1418146451706414, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 08:05:43,050] Trial 684 finished with value: 0.7639315193760269 and parameters: {'lr': 0.009964628224192, 'wd': 5.379443623061506e-05, 'warmup': 200, 'gamma': 0.9813768524028144, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1358642097916456, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 684 finished with value: 0.7639315193760269 and parameters: {'lr': 0.009964628224192, 'wd': 5.379443623061506e-05, 'warmup': 200, 'gamma': 0.9813768524028144, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1358642097916456, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 20131008
torch.Size([180])


[I 2024-10-29 08:06:52,573] Trial 685 finished with value: 0.7169681830263459 and parameters: {'lr': 0.006970504151444677, 'wd': 6.721431522486533e-05, 'warmup': 200, 'gamma': 0.9938372818493046, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 8, 'mlp_dim': 768, 'emb_dropout': 0.0932047791071479, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 685 finished with value: 0.7169681830263459 and parameters: {'lr': 0.006970504151444677, 'wd': 6.721431522486533e-05, 'warmup': 200, 'gamma': 0.9938372818493046, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 8, 'mlp_dim': 768, 'emb_dropout': 0.0932047791071479, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 13029248
torch.Size([180])


[I 2024-10-29 08:07:53,891] Trial 686 finished with value: 0.6425052250201365 and parameters: {'lr': 0.008679287211410055, 'wd': 2.922659603091967e-05, 'warmup': 150, 'gamma': 0.9955013031901684, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.1811446224203018, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 686 finished with value: 0.6425052250201365 and parameters: {'lr': 0.008679287211410055, 'wd': 2.922659603091967e-05, 'warmup': 150, 'gamma': 0.9955013031901684, 'time_dim': 16, 'patch_size': 32, 'depth': 4, 'heads': 4, 'mlp_dim': 768, 'emb_dropout': 0.1811446224203018, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19131264
torch.Size([180])


[I 2024-10-29 08:09:19,611] Trial 687 finished with value: 0.6709136991423644 and parameters: {'lr': 0.009976960993149667, 'wd': 0.0002831251245568089, 'warmup': 200, 'gamma': 0.9819152077783749, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 9, 'mlp_dim': 512, 'emb_dropout': 0.14024552175191074, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 687 finished with value: 0.6709136991423644 and parameters: {'lr': 0.009976960993149667, 'wd': 0.0002831251245568089, 'warmup': 200, 'gamma': 0.9819152077783749, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 9, 'mlp_dim': 512, 'emb_dropout': 0.14024552175191074, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19132544
torch.Size([180])


[I 2024-10-29 08:10:43,183] Trial 688 finished with value: 0.7656773769448648 and parameters: {'lr': 0.0058736707612384274, 'wd': 0.00018896169203035448, 'warmup': 150, 'gamma': 0.9828292212681466, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1956377226542781, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 688 finished with value: 0.7656773769448648 and parameters: {'lr': 0.0058736707612384274, 'wd': 0.00018896169203035448, 'warmup': 150, 'gamma': 0.9828292212681466, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1956377226542781, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 17203280
torch.Size([180])


[I 2024-10-29 08:13:59,166] Trial 689 finished with value: 0.7721020979361454 and parameters: {'lr': 0.0028875220086471915, 'wd': 4.6616797011936126e-05, 'warmup': 200, 'gamma': 0.9808241033903072, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 5, 'mlp_dim': 768, 'emb_dropout': 0.15186541076231838, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 689 finished with value: 0.7721020979361454 and parameters: {'lr': 0.0028875220086471915, 'wd': 4.6616797011936126e-05, 'warmup': 200, 'gamma': 0.9808241033903072, 'time_dim': 16, 'patch_size': 8, 'depth': 5, 'heads': 5, 'mlp_dim': 768, 'emb_dropout': 0.15186541076231838, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22082944
torch.Size([180])


[I 2024-10-29 08:15:31,229] Trial 690 finished with value: 0.5636936001217565 and parameters: {'lr': 0.007905489215459176, 'wd': 0.0003283822510559991, 'warmup': 200, 'gamma': 0.9988568821256518, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.14730243611762137, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 690 finished with value: 0.5636936001217565 and parameters: {'lr': 0.007905489215459176, 'wd': 0.0003283822510559991, 'warmup': 200, 'gamma': 0.9988568821256518, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.14730243611762137, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 18156896
torch.Size([180])


[I 2024-10-29 08:17:26,648] Trial 691 finished with value: 0.5779922027801837 and parameters: {'lr': 0.007927608503810987, 'wd': 0.0002879523468716546, 'warmup': 200, 'gamma': 0.9958792368248675, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.14139946379342463, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 691 finished with value: 0.5779922027801837 and parameters: {'lr': 0.007927608503810987, 'wd': 0.0002879523468716546, 'warmup': 200, 'gamma': 0.9958792368248675, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.14139946379342463, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 18156896
torch.Size([180])


[I 2024-10-29 08:19:22,326] Trial 692 finished with value: 0.7726436161333998 and parameters: {'lr': 0.006626019443453857, 'wd': 0.000346961469020092, 'warmup': 200, 'gamma': 0.9959587873566891, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.1349557445008851, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 692 finished with value: 0.7726436161333998 and parameters: {'lr': 0.006626019443453857, 'wd': 0.000346961469020092, 'warmup': 200, 'gamma': 0.9959587873566891, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.1349557445008851, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 18156896
torch.Size([180])


[I 2024-10-29 08:21:17,616] Trial 693 finished with value: 0.7089148830118641 and parameters: {'lr': 0.005950982202798342, 'wd': 0.00027867856484981376, 'warmup': 200, 'gamma': 0.9981064558530812, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.13910631072560428, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.


Trial 693 finished with value: 0.7089148830118641 and parameters: {'lr': 0.005950982202798342, 'wd': 0.00027867856484981376, 'warmup': 200, 'gamma': 0.9981064558530812, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.13910631072560428, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 18156896
torch.Size([180])


[I 2024-10-29 08:23:12,608] Trial 694 finished with value: 0.7835882136742529 and parameters: {'lr': 0.0016529284644246716, 'wd': 0.00030691601629442573, 'warmup': 200, 'gamma': 0.9973279428671066, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.14324980299042486, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 694 finished with value: 0.7835882136742529 and parameters: {'lr': 0.0016529284644246716, 'wd': 0.00030691601629442573, 'warmup': 200, 'gamma': 0.9973279428671066, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.14324980299042486, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 18156896
torch.Size([180])


[I 2024-10-29 08:25:08,324] Trial 695 finished with value: 0.7269904324726126 and parameters: {'lr': 0.0046700251499512384, 'wd': 0.00030654713675101465, 'warmup': 200, 'gamma': 0.9966266267495363, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.14146293130933324, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 695 finished with value: 0.7269904324726126 and parameters: {'lr': 0.0046700251499512384, 'wd': 0.00030654713675101465, 'warmup': 200, 'gamma': 0.9966266267495363, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.14146293130933324, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 18221536
torch.Size([180])


[I 2024-10-29 08:27:03,058] Trial 696 finished with value: 0.7453329406400832 and parameters: {'lr': 0.00012200118750682215, 'wd': 0.0002637668561111, 'warmup': 200, 'gamma': 0.9945745817632012, 'time_dim': 32, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.13686001364807487, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 696 finished with value: 0.7453329406400832 and parameters: {'lr': 0.00012200118750682215, 'wd': 0.0002637668561111, 'warmup': 200, 'gamma': 0.9945745817632012, 'time_dim': 32, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.13686001364807487, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 18156896
torch.Size([180])


[I 2024-10-29 08:28:58,584] Trial 697 finished with value: 0.749387775814234 and parameters: {'lr': 0.007113662387860305, 'wd': 0.00027929691972840455, 'warmup': 200, 'gamma': 0.9832305038321002, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.12986503072433753, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 697 finished with value: 0.749387775814234 and parameters: {'lr': 0.007113662387860305, 'wd': 0.00027929691972840455, 'warmup': 200, 'gamma': 0.9832305038321002, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.12986503072433753, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 18156896
torch.Size([180])


[I 2024-10-29 08:30:54,405] Trial 698 finished with value: 0.7791330172088502 and parameters: {'lr': 0.005111323526408339, 'wd': 0.006073818805664438, 'warmup': 200, 'gamma': 0.9910016451976463, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.14477064162884057, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 698 finished with value: 0.7791330172088502 and parameters: {'lr': 0.005111323526408339, 'wd': 0.006073818805664438, 'warmup': 200, 'gamma': 0.9910016451976463, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.14477064162884057, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 18156896
torch.Size([180])


[I 2024-10-29 08:32:50,262] Trial 699 finished with value: 0.7534819562545935 and parameters: {'lr': 0.0076641559277991645, 'wd': 0.0003175189291852829, 'warmup': 200, 'gamma': 0.9963305206448064, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.13934833536816424, 'schedule': 'cosine'}. Best is trial 598 with value: 0.49804964861220785.


Trial 699 finished with value: 0.7534819562545935 and parameters: {'lr': 0.0076641559277991645, 'wd': 0.0003175189291852829, 'warmup': 200, 'gamma': 0.9963305206448064, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.13934833536816424, 'schedule': 'cosine'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 18381536
torch.Size([180])


[I 2024-10-29 08:34:45,850] Trial 700 finished with value: 0.7276024570173026 and parameters: {'lr': 0.00633982758866137, 'wd': 0.000366120539978767, 'warmup': 200, 'gamma': 0.9976185349290326, 'time_dim': 64, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.1473472654503239, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 700 finished with value: 0.7276024570173026 and parameters: {'lr': 0.00633982758866137, 'wd': 0.000366120539978767, 'warmup': 200, 'gamma': 0.9976185349290326, 'time_dim': 64, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.1473472654503239, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 18156896
torch.Size([180])


[I 2024-10-29 08:36:41,582] Trial 701 finished with value: 0.6688709047248337 and parameters: {'lr': 0.007937520859575898, 'wd': 0.0002449585864835938, 'warmup': 200, 'gamma': 0.99805118536207, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.13492862151495996, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 701 finished with value: 0.6688709047248337 and parameters: {'lr': 0.007937520859575898, 'wd': 0.0002449585864835938, 'warmup': 200, 'gamma': 0.99805118536207, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.13492862151495996, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 18156896
torch.Size([180])


[I 2024-10-29 08:38:37,020] Trial 702 finished with value: 0.6674661330359654 and parameters: {'lr': 0.0070752303847900775, 'wd': 0.00034947989493107546, 'warmup': 150, 'gamma': 0.9987937441759037, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.1444508983783021, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 702 finished with value: 0.6674661330359654 and parameters: {'lr': 0.0070752303847900775, 'wd': 0.00034947989493107546, 'warmup': 150, 'gamma': 0.9987937441759037, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.1444508983783021, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 18156896
torch.Size([180])


[I 2024-10-29 08:40:32,679] Trial 703 finished with value: 0.7695718981300612 and parameters: {'lr': 0.005664559805002686, 'wd': 0.0039035004426528153, 'warmup': 200, 'gamma': 0.9957697191862637, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.12738578753216634, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 703 finished with value: 0.7695718981300612 and parameters: {'lr': 0.005664559805002686, 'wd': 0.0039035004426528153, 'warmup': 200, 'gamma': 0.9957697191862637, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.12738578753216634, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 18150784
torch.Size([180])


[I 2024-10-29 08:41:51,120] Trial 704 finished with value: 0.6900105481068248 and parameters: {'lr': 0.008196940210655862, 'wd': 0.0002739012575516002, 'warmup': 200, 'gamma': 0.9964413702702825, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.1329340411083018, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 704 finished with value: 0.6900105481068248 and parameters: {'lr': 0.008196940210655862, 'wd': 0.0002739012575516002, 'warmup': 200, 'gamma': 0.9964413702702825, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 4, 'mlp_dim': 1024, 'emb_dropout': 0.1329340411083018, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 21099904
torch.Size([180])


[I 2024-10-29 08:43:20,716] Trial 705 finished with value: 0.686774529928527 and parameters: {'lr': 0.006950552972133358, 'wd': 0.00033480751277982084, 'warmup': 150, 'gamma': 0.9968464197291272, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.14085582687859047, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 705 finished with value: 0.686774529928527 and parameters: {'lr': 0.006950552972133358, 'wd': 0.00033480751277982084, 'warmup': 150, 'gamma': 0.9968464197291272, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.14085582687859047, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22082944
torch.Size([180])


[I 2024-10-29 08:44:52,955] Trial 706 finished with value: 0.6191749682067867 and parameters: {'lr': 0.00871353633254632, 'wd': 0.00023645162608160633, 'warmup': 200, 'gamma': 0.9943233662217128, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.18937986929754963, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 706 finished with value: 0.6191749682067867 and parameters: {'lr': 0.00871353633254632, 'wd': 0.00023645162608160633, 'warmup': 200, 'gamma': 0.9943233662217128, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.18937986929754963, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22082944
torch.Size([180])


[I 2024-10-29 08:46:24,942] Trial 707 finished with value: 0.7099327309803598 and parameters: {'lr': 0.008694179582222218, 'wd': 0.00023533986142958252, 'warmup': 200, 'gamma': 0.9943762958801791, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.19002199714118054, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 707 finished with value: 0.7099327309803598 and parameters: {'lr': 0.008694179582222218, 'wd': 0.00023533986142958252, 'warmup': 200, 'gamma': 0.9943762958801791, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.19002199714118054, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22089056
torch.Size([180])


[I 2024-10-29 08:48:38,570] Trial 708 finished with value: 0.6559677556790987 and parameters: {'lr': 0.008818969491222373, 'wd': 0.00022867279090223774, 'warmup': 150, 'gamma': 0.996167161846968, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.18842307811350614, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 708 finished with value: 0.6559677556790987 and parameters: {'lr': 0.008818969491222373, 'wd': 0.00022867279090223774, 'warmup': 150, 'gamma': 0.996167161846968, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.18842307811350614, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22082944
torch.Size([180])


[I 2024-10-29 08:50:10,362] Trial 709 finished with value: 0.7543742655400035 and parameters: {'lr': 0.008096055386082898, 'wd': 0.00027324513997200174, 'warmup': 200, 'gamma': 0.9932140107999685, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.19160255214625826, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 709 finished with value: 0.7543742655400035 and parameters: {'lr': 0.008096055386082898, 'wd': 0.00027324513997200174, 'warmup': 200, 'gamma': 0.9932140107999685, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.19160255214625826, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22098368
torch.Size([180])


[I 2024-10-29 08:51:24,146] Trial 710 finished with value: 0.770767142931776 and parameters: {'lr': 0.00897298526924212, 'wd': 0.0002093658766023502, 'warmup': 200, 'gamma': 0.9927283289845038, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.19653509248115433, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 710 finished with value: 0.770767142931776 and parameters: {'lr': 0.00897298526924212, 'wd': 0.0002093658766023502, 'warmup': 200, 'gamma': 0.9927283289845038, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.19653509248115433, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22082944
torch.Size([180])


[I 2024-10-29 08:52:56,017] Trial 711 finished with value: 0.636248246737492 and parameters: {'lr': 0.00787964196713658, 'wd': 0.00017246019354581055, 'warmup': 150, 'gamma': 0.9949562460495802, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.18663350210551455, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.


Trial 711 finished with value: 0.636248246737492 and parameters: {'lr': 0.00787964196713658, 'wd': 0.00017246019354581055, 'warmup': 150, 'gamma': 0.9949562460495802, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.18663350210551455, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22082944
torch.Size([180])


[I 2024-10-29 08:54:28,124] Trial 712 finished with value: 0.6957354442670605 and parameters: {'lr': 0.009036032646436433, 'wd': 0.0002968900922336773, 'warmup': 300, 'gamma': 0.9944133949706968, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.19680217292471794, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 712 finished with value: 0.6957354442670605 and parameters: {'lr': 0.009036032646436433, 'wd': 0.0002968900922336773, 'warmup': 300, 'gamma': 0.9944133949706968, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.19680217292471794, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22082944
torch.Size([180])


[I 2024-10-29 08:55:59,959] Trial 713 finished with value: 0.7383823332656283 and parameters: {'lr': 0.00033340270202502, 'wd': 0.0002145026566908244, 'warmup': 250, 'gamma': 0.9937569355657616, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.19026055687345333, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 713 finished with value: 0.7383823332656283 and parameters: {'lr': 0.00033340270202502, 'wd': 0.0002145026566908244, 'warmup': 250, 'gamma': 0.9937569355657616, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.19026055687345333, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22082944
torch.Size([180])


[I 2024-10-29 08:57:32,383] Trial 714 finished with value: 0.7207507138128012 and parameters: {'lr': 0.007522636147342371, 'wd': 0.0002586122621773265, 'warmup': 200, 'gamma': 0.9903030815511598, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.18628560944732936, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 714 finished with value: 0.7207507138128012 and parameters: {'lr': 0.007522636147342371, 'wd': 0.0002586122621773265, 'warmup': 200, 'gamma': 0.9903030815511598, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.18628560944732936, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22089056
torch.Size([180])


[I 2024-10-29 08:59:46,459] Trial 715 finished with value: 0.7737725707061743 and parameters: {'lr': 0.008691821022007728, 'wd': 0.0003198330960714199, 'warmup': 150, 'gamma': 0.979122018329089, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.19173771232088138, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 715 finished with value: 0.7737725707061743 and parameters: {'lr': 0.008691821022007728, 'wd': 0.0003198330960714199, 'warmup': 150, 'gamma': 0.979122018329089, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.19173771232088138, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22082944
torch.Size([180])


[I 2024-10-29 09:01:18,236] Trial 716 finished with value: 0.7008138637985688 and parameters: {'lr': 0.009993378484166867, 'wd': 0.0002445899651009303, 'warmup': 200, 'gamma': 0.9954409295554737, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.14694162976260713, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 716 finished with value: 0.7008138637985688 and parameters: {'lr': 0.009993378484166867, 'wd': 0.0002445899651009303, 'warmup': 200, 'gamma': 0.9954409295554737, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.14694162976260713, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 20115584
torch.Size([180])


[I 2024-10-29 09:02:43,931] Trial 717 finished with value: 0.5694758920706178 and parameters: {'lr': 0.007478888837894916, 'wd': 0.00014863050590447333, 'warmup': 200, 'gamma': 0.9985521094524411, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 768, 'emb_dropout': 0.13789597788038394, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 717 finished with value: 0.5694758920706178 and parameters: {'lr': 0.007478888837894916, 'wd': 0.00014863050590447333, 'warmup': 200, 'gamma': 0.9985521094524411, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 768, 'emb_dropout': 0.13789597788038394, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22082944
torch.Size([180])


[I 2024-10-29 09:04:15,797] Trial 718 finished with value: 0.5274121776694156 and parameters: {'lr': 0.007154633934382134, 'wd': 0.00017071732832545756, 'warmup': 200, 'gamma': 0.9987351590311231, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.1357912257238472, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 718 finished with value: 0.5274121776694156 and parameters: {'lr': 0.007154633934382134, 'wd': 0.00017071732832545756, 'warmup': 200, 'gamma': 0.9987351590311231, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.1357912257238472, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22082944
torch.Size([180])


[I 2024-10-29 09:05:47,864] Trial 719 finished with value: 0.6013437482952854 and parameters: {'lr': 0.0066801257381137995, 'wd': 0.0001528370134747595, 'warmup': 200, 'gamma': 0.9988981949678452, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.12975485999116382, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 719 finished with value: 0.6013437482952854 and parameters: {'lr': 0.0066801257381137995, 'wd': 0.0001528370134747595, 'warmup': 200, 'gamma': 0.9988981949678452, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.12975485999116382, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22147584
torch.Size([180])


[I 2024-10-29 09:07:20,056] Trial 720 finished with value: 0.6224413524914433 and parameters: {'lr': 0.006827288748792695, 'wd': 0.00013871765296490316, 'warmup': 200, 'gamma': 0.9988909904180505, 'time_dim': 32, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.12912932816054715, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 720 finished with value: 0.6224413524914433 and parameters: {'lr': 0.006827288748792695, 'wd': 0.00013871765296490316, 'warmup': 200, 'gamma': 0.9988909904180505, 'time_dim': 32, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.12912932816054715, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22082944
torch.Size([180])


[I 2024-10-29 09:08:52,073] Trial 721 finished with value: 0.6113070829639531 and parameters: {'lr': 0.006980787451410322, 'wd': 0.00014336467053700382, 'warmup': 200, 'gamma': 0.9989246221031506, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.1320886516225932, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 721 finished with value: 0.6113070829639531 and parameters: {'lr': 0.006980787451410322, 'wd': 0.00014336467053700382, 'warmup': 200, 'gamma': 0.9989246221031506, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.1320886516225932, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22098368
torch.Size([180])


[I 2024-10-29 09:10:05,817] Trial 722 finished with value: 0.6385419844356269 and parameters: {'lr': 0.0062423719820016, 'wd': 0.00013306334012103387, 'warmup': 200, 'gamma': 0.9989663882073233, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.12698327448909025, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.


Trial 722 finished with value: 0.6385419844356269 and parameters: {'lr': 0.0062423719820016, 'wd': 0.00013306334012103387, 'warmup': 200, 'gamma': 0.9989663882073233, 'time_dim': 16, 'patch_size': 64, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.12698327448909025, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22089056
torch.Size([180])


[I 2024-10-29 09:12:19,477] Trial 723 finished with value: 0.6112535630659144 and parameters: {'lr': 0.006254603752304732, 'wd': 0.00014552264926765172, 'warmup': 200, 'gamma': 0.998346805170409, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.12319884658200991, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 723 finished with value: 0.6112535630659144 and parameters: {'lr': 0.006254603752304732, 'wd': 0.00014552264926765172, 'warmup': 200, 'gamma': 0.998346805170409, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.12319884658200991, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22089056
torch.Size([180])


[I 2024-10-29 09:14:32,978] Trial 724 finished with value: 0.7135056789045046 and parameters: {'lr': 0.00497255382837971, 'wd': 0.00011761998473953368, 'warmup': 200, 'gamma': 0.9982301053041698, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.12304953106072432, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 724 finished with value: 0.7135056789045046 and parameters: {'lr': 0.00497255382837971, 'wd': 0.00011761998473953368, 'warmup': 200, 'gamma': 0.9982301053041698, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.12304953106072432, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 43984096
torch.Size([180])


[I 2024-10-29 09:18:38,317] Trial 725 finished with value: 0.635907264087876 and parameters: {'lr': 0.005419122838257144, 'wd': 0.00015031650123839576, 'warmup': 200, 'gamma': 0.9986052162538188, 'time_dim': 64, 'patch_size': 16, 'depth': 10, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.1251676354077957, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 725 finished with value: 0.635907264087876 and parameters: {'lr': 0.005419122838257144, 'wd': 0.00015031650123839576, 'warmup': 200, 'gamma': 0.9986052162538188, 'time_dim': 64, 'patch_size': 16, 'depth': 10, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.1251676354077957, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22089056
torch.Size([180])


[I 2024-10-29 09:20:51,803] Trial 726 finished with value: 0.6239208764926631 and parameters: {'lr': 0.006147777796403309, 'wd': 0.00014848398707890828, 'warmup': 200, 'gamma': 0.9989933934300632, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.1297664049583176, 'schedule': 'cosine'}. Best is trial 598 with value: 0.49804964861220785.


Trial 726 finished with value: 0.6239208764926631 and parameters: {'lr': 0.006147777796403309, 'wd': 0.00014848398707890828, 'warmup': 200, 'gamma': 0.9989933934300632, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.1297664049583176, 'schedule': 'cosine'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22089056
torch.Size([180])


[I 2024-10-29 09:23:05,674] Trial 727 finished with value: 0.6460088676969052 and parameters: {'lr': 0.004500427662363786, 'wd': 0.00013290923268488182, 'warmup': 200, 'gamma': 0.9979542126164592, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.13113422225655058, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 727 finished with value: 0.6460088676969052 and parameters: {'lr': 0.004500427662363786, 'wd': 0.00013290923268488182, 'warmup': 200, 'gamma': 0.9979542126164592, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.13113422225655058, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22089056
torch.Size([180])


[I 2024-10-29 09:25:19,748] Trial 728 finished with value: 0.6718921009284596 and parameters: {'lr': 0.0055304970710839145, 'wd': 0.00016096301101783814, 'warmup': 200, 'gamma': 0.9984173367856972, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.13435709984364255, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 728 finished with value: 0.6718921009284596 and parameters: {'lr': 0.0055304970710839145, 'wd': 0.00016096301101783814, 'warmup': 200, 'gamma': 0.9984173367856972, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.13435709984364255, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22089056
torch.Size([180])


[I 2024-10-29 09:27:33,208] Trial 729 finished with value: 0.7129796314298893 and parameters: {'lr': 0.006191170040024371, 'wd': 0.00011784928408803293, 'warmup': 200, 'gamma': 0.9979522545678504, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.11977083449740214, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 729 finished with value: 0.7129796314298893 and parameters: {'lr': 0.006191170040024371, 'wd': 0.00011784928408803293, 'warmup': 200, 'gamma': 0.9979522545678504, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.11977083449740214, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22089056
torch.Size([180])


[I 2024-10-29 09:29:46,711] Trial 730 finished with value: 0.7335840012264399 and parameters: {'lr': 0.005660458478227769, 'wd': 0.00010460512221026852, 'warmup': 200, 'gamma': 0.9975412926430469, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.12602845694858517, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 730 finished with value: 0.7335840012264399 and parameters: {'lr': 0.005660458478227769, 'wd': 0.00010460512221026852, 'warmup': 200, 'gamma': 0.9975412926430469, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.12602845694858517, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 23072096
torch.Size([180])


[I 2024-10-29 09:32:05,623] Trial 731 finished with value: 0.6030152786806867 and parameters: {'lr': 0.006939932873175684, 'wd': 0.0001486428179130247, 'warmup': 150, 'gamma': 0.9989225107014312, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.13344427247519, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 731 finished with value: 0.6030152786806867 and parameters: {'lr': 0.006939932873175684, 'wd': 0.0001486428179130247, 'warmup': 150, 'gamma': 0.9989225107014312, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.13344427247519, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 23072096
torch.Size([180])


[I 2024-10-29 09:34:24,842] Trial 732 finished with value: 0.6433999028954309 and parameters: {'lr': 0.005098418483666624, 'wd': 0.00015181398084922196, 'warmup': 150, 'gamma': 0.9986325332212342, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.13160547854915808, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 732 finished with value: 0.6433999028954309 and parameters: {'lr': 0.005098418483666624, 'wd': 0.00015181398084922196, 'warmup': 150, 'gamma': 0.9986325332212342, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.13160547854915808, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 23072096
torch.Size([180])


[I 2024-10-29 09:36:44,022] Trial 733 finished with value: 0.696973946253978 and parameters: {'lr': 0.006711923482792796, 'wd': 0.00013046272434471458, 'warmup': 150, 'gamma': 0.9970998209227732, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.1328159779540544, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 733 finished with value: 0.696973946253978 and parameters: {'lr': 0.006711923482792796, 'wd': 0.00013046272434471458, 'warmup': 150, 'gamma': 0.9970998209227732, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.1328159779540544, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 23072096
torch.Size([180])


[I 2024-10-29 09:39:03,191] Trial 734 finished with value: 0.6310283141670889 and parameters: {'lr': 0.006612702680395491, 'wd': 0.00015566093549086921, 'warmup': 150, 'gamma': 0.9984949751802682, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.12633481925486983, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 734 finished with value: 0.6310283141670889 and parameters: {'lr': 0.006612702680395491, 'wd': 0.00015566093549086921, 'warmup': 150, 'gamma': 0.9984949751802682, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.12633481925486983, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24055136
torch.Size([180])


[I 2024-10-29 09:41:26,332] Trial 735 finished with value: 0.5361590981497991 and parameters: {'lr': 0.006181088936399636, 'wd': 0.0001715658693272719, 'warmup': 150, 'gamma': 0.9989466836861115, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.12062485802949537, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 735 finished with value: 0.5361590981497991 and parameters: {'lr': 0.006181088936399636, 'wd': 0.0001715658693272719, 'warmup': 150, 'gamma': 0.9989466836861115, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.12062485802949537, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 23072096
torch.Size([180])


[I 2024-10-29 09:43:45,007] Trial 736 finished with value: 0.660089361781759 and parameters: {'lr': 0.004300598489929464, 'wd': 0.00017574098166988941, 'warmup': 150, 'gamma': 0.9988628655866703, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.1163763272766746, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 736 finished with value: 0.660089361781759 and parameters: {'lr': 0.004300598489929464, 'wd': 0.00017574098166988941, 'warmup': 150, 'gamma': 0.9988628655866703, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.1163763272766746, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24055136
torch.Size([180])


[I 2024-10-29 09:46:08,539] Trial 737 finished with value: 0.5484374309073603 and parameters: {'lr': 0.005009537397192806, 'wd': 0.00012649766150520244, 'warmup': 150, 'gamma': 0.9989653020183034, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.12863547507453532, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 737 finished with value: 0.5484374309073603 and parameters: {'lr': 0.005009537397192806, 'wd': 0.00012649766150520244, 'warmup': 150, 'gamma': 0.9989653020183034, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.12863547507453532, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24055136
torch.Size([180])


[I 2024-10-29 09:48:31,451] Trial 738 finished with value: 0.5813803827060201 and parameters: {'lr': 0.004724237547909366, 'wd': 0.0001248009085993394, 'warmup': 150, 'gamma': 0.9989985738954967, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.11986042144489006, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 738 finished with value: 0.5813803827060201 and parameters: {'lr': 0.004724237547909366, 'wd': 0.0001248009085993394, 'warmup': 150, 'gamma': 0.9989985738954967, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.11986042144489006, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24055136
torch.Size([180])


[I 2024-10-29 09:50:54,528] Trial 739 finished with value: 0.7039539124197852 and parameters: {'lr': 0.004000972863452318, 'wd': 0.00012027412398126095, 'warmup': 150, 'gamma': 0.9979598085611607, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.12216434241344282, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 739 finished with value: 0.7039539124197852 and parameters: {'lr': 0.004000972863452318, 'wd': 0.00012027412398126095, 'warmup': 150, 'gamma': 0.9979598085611607, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.12216434241344282, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24055136
torch.Size([180])


[I 2024-10-29 09:53:17,164] Trial 740 finished with value: 0.6219832152171361 and parameters: {'lr': 0.004660689414144841, 'wd': 0.00014285543415502392, 'warmup': 150, 'gamma': 0.9989961883703449, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.1206682684762473, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 740 finished with value: 0.6219832152171361 and parameters: {'lr': 0.004660689414144841, 'wd': 0.00014285543415502392, 'warmup': 150, 'gamma': 0.9989961883703449, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.1206682684762473, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24055136
torch.Size([180])


[I 2024-10-29 09:55:39,959] Trial 741 finished with value: 0.6773818816954993 and parameters: {'lr': 0.0035726042560120103, 'wd': 0.00011798243567463782, 'warmup': 150, 'gamma': 0.9983493985762006, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.11823114622625373, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 741 finished with value: 0.6773818816954993 and parameters: {'lr': 0.0035726042560120103, 'wd': 0.00011798243567463782, 'warmup': 150, 'gamma': 0.9983493985762006, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.11823114622625373, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24055136
torch.Size([180])


[I 2024-10-29 09:58:02,689] Trial 742 finished with value: 0.6684276944174926 and parameters: {'lr': 0.0038976253293751756, 'wd': 0.00013610766504485884, 'warmup': 150, 'gamma': 0.9979161928872379, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.11138302905343726, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 742 finished with value: 0.6684276944174926 and parameters: {'lr': 0.0038976253293751756, 'wd': 0.00013610766504485884, 'warmup': 150, 'gamma': 0.9979161928872379, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.11138302905343726, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24055136
torch.Size([180])


[I 2024-10-29 10:00:25,691] Trial 743 finished with value: 0.6625849102886305 and parameters: {'lr': 0.004656115474602204, 'wd': 0.00014165410056784456, 'warmup': 150, 'gamma': 0.9973359305794837, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.11921957815154641, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 743 finished with value: 0.6625849102886305 and parameters: {'lr': 0.004656115474602204, 'wd': 0.00014165410056784456, 'warmup': 150, 'gamma': 0.9973359305794837, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.11921957815154641, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24055136
torch.Size([180])


[I 2024-10-29 10:02:48,663] Trial 744 finished with value: 0.7042634127196102 and parameters: {'lr': 0.005054700151666433, 'wd': 0.00016681980472490178, 'warmup': 150, 'gamma': 0.9982964281277508, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.12986885757365418, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 744 finished with value: 0.7042634127196102 and parameters: {'lr': 0.005054700151666433, 'wd': 0.00016681980472490178, 'warmup': 150, 'gamma': 0.9982964281277508, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.12986885757365418, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 25038176
torch.Size([180])


[I 2024-10-29 10:05:17,156] Trial 745 finished with value: 0.6151589440258325 and parameters: {'lr': 0.004287793895347358, 'wd': 0.00011852084636204112, 'warmup': 150, 'gamma': 0.9988469969993078, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 11, 'mlp_dim': 1024, 'emb_dropout': 0.12436143265685282, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 745 finished with value: 0.6151589440258325 and parameters: {'lr': 0.004287793895347358, 'wd': 0.00011852084636204112, 'warmup': 150, 'gamma': 0.9988469969993078, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 11, 'mlp_dim': 1024, 'emb_dropout': 0.12436143265685282, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24119776
torch.Size([180])


[I 2024-10-29 10:07:40,021] Trial 746 finished with value: 0.7183324066212158 and parameters: {'lr': 0.005122999802849057, 'wd': 0.00010342337473815568, 'warmup': 150, 'gamma': 0.9975606116500754, 'time_dim': 32, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.12753744165734465, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 746 finished with value: 0.7183324066212158 and parameters: {'lr': 0.005122999802849057, 'wd': 0.00010342337473815568, 'warmup': 150, 'gamma': 0.9975606116500754, 'time_dim': 32, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.12753744165734465, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24055136
torch.Size([180])


[I 2024-10-29 10:10:02,896] Trial 747 finished with value: 0.6305553222415133 and parameters: {'lr': 0.005418339747307733, 'wd': 0.0001695372585957558, 'warmup': 150, 'gamma': 0.9989783077620288, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.1272578442130961, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 747 finished with value: 0.6305553222415133 and parameters: {'lr': 0.005418339747307733, 'wd': 0.0001695372585957558, 'warmup': 150, 'gamma': 0.9989783077620288, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.1272578442130961, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24055136
torch.Size([180])


[I 2024-10-29 10:12:26,044] Trial 748 finished with value: 0.7056931799232767 and parameters: {'lr': 0.005402425270629406, 'wd': 0.0001385803994271513, 'warmup': 150, 'gamma': 0.9984011492691923, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.12187532375020768, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 748 finished with value: 0.7056931799232767 and parameters: {'lr': 0.005402425270629406, 'wd': 0.0001385803994271513, 'warmup': 150, 'gamma': 0.9984011492691923, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.12187532375020768, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 25262816
torch.Size([180])


[I 2024-10-29 10:14:55,014] Trial 749 finished with value: 0.7564526703984982 and parameters: {'lr': 0.005607071265425651, 'wd': 0.00015672868031947092, 'warmup': 150, 'gamma': 0.9978025030699065, 'time_dim': 64, 'patch_size': 16, 'depth': 5, 'heads': 11, 'mlp_dim': 1024, 'emb_dropout': 0.12306600156781189, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 749 finished with value: 0.7564526703984982 and parameters: {'lr': 0.005607071265425651, 'wd': 0.00015672868031947092, 'warmup': 150, 'gamma': 0.9978025030699065, 'time_dim': 64, 'patch_size': 16, 'depth': 5, 'heads': 11, 'mlp_dim': 1024, 'emb_dropout': 0.12306600156781189, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24055136
torch.Size([180])


[I 2024-10-29 10:17:17,766] Trial 750 finished with value: 0.670000213049496 and parameters: {'lr': 0.004941683222899952, 'wd': 0.00012530475329029507, 'warmup': 150, 'gamma': 0.998246401788926, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.1322602131763405, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 750 finished with value: 0.670000213049496 and parameters: {'lr': 0.004941683222899952, 'wd': 0.00012530475329029507, 'warmup': 150, 'gamma': 0.998246401788926, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.1322602131763405, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24055136
torch.Size([180])


[I 2024-10-29 10:19:40,754] Trial 751 finished with value: 0.6388407695219046 and parameters: {'lr': 0.004259571258645934, 'wd': 0.00017189390823260488, 'warmup': 150, 'gamma': 0.9985944613101335, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.1339467620284577, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.


Trial 751 finished with value: 0.6388407695219046 and parameters: {'lr': 0.004259571258645934, 'wd': 0.00017189390823260488, 'warmup': 150, 'gamma': 0.9985944613101335, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.1339467620284577, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24055136
torch.Size([180])


[I 2024-10-29 10:22:03,487] Trial 752 finished with value: 0.7316391991168518 and parameters: {'lr': 0.0031207691964544905, 'wd': 0.00014634299196815856, 'warmup': 150, 'gamma': 0.9968308206146297, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.11587674259236046, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 752 finished with value: 0.7316391991168518 and parameters: {'lr': 0.0031207691964544905, 'wd': 0.00014634299196815856, 'warmup': 150, 'gamma': 0.9968308206146297, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.11587674259236046, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24055136
torch.Size([180])


[I 2024-10-29 10:24:26,432] Trial 753 finished with value: 0.6880448616533822 and parameters: {'lr': 0.005521142714901249, 'wd': 0.00010825499469114206, 'warmup': 150, 'gamma': 0.997856631063828, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.12510306420918388, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 753 finished with value: 0.6880448616533822 and parameters: {'lr': 0.005521142714901249, 'wd': 0.00010825499469114206, 'warmup': 150, 'gamma': 0.997856631063828, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.12510306420918388, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 23072096
torch.Size([180])


[I 2024-10-29 10:26:45,493] Trial 754 finished with value: 0.6286919127893309 and parameters: {'lr': 0.005802076362085249, 'wd': 0.00015117444024788642, 'warmup': 150, 'gamma': 0.9988479462293763, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.13542961229806313, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 754 finished with value: 0.6286919127893309 and parameters: {'lr': 0.005802076362085249, 'wd': 0.00015117444024788642, 'warmup': 150, 'gamma': 0.9988479462293763, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.13542961229806313, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24055136
torch.Size([180])


[I 2024-10-29 10:29:07,422] Trial 755 finished with value: 0.8248197831430041 and parameters: {'lr': 1.6205243967035872e-05, 'wd': 0.00017607449400256075, 'warmup': 150, 'gamma': 0.9989860790114904, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.13659662155603117, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 755 finished with value: 0.8248197831430041 and parameters: {'lr': 1.6205243967035872e-05, 'wd': 0.00017607449400256075, 'warmup': 150, 'gamma': 0.9989860790114904, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.13659662155603117, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24055136
torch.Size([180])


[I 2024-10-29 10:31:30,456] Trial 756 finished with value: 0.671032573208348 and parameters: {'lr': 0.006147698520446579, 'wd': 0.00012887264485446348, 'warmup': 150, 'gamma': 0.9982739261942136, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.12921806643126119, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 756 finished with value: 0.671032573208348 and parameters: {'lr': 0.006147698520446579, 'wd': 0.00012887264485446348, 'warmup': 150, 'gamma': 0.9982739261942136, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.12921806643126119, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 25038176
torch.Size([180])


[I 2024-10-29 10:33:58,989] Trial 757 finished with value: 0.7371441426983067 and parameters: {'lr': 0.00470159127521813, 'wd': 0.00018453901778965514, 'warmup': 150, 'gamma': 0.9972702049412885, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 11, 'mlp_dim': 1024, 'emb_dropout': 0.11301169619469054, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 757 finished with value: 0.7371441426983067 and parameters: {'lr': 0.00470159127521813, 'wd': 0.00018453901778965514, 'warmup': 150, 'gamma': 0.9972702049412885, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 11, 'mlp_dim': 1024, 'emb_dropout': 0.11301169619469054, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24055136
torch.Size([180])


[I 2024-10-29 10:36:21,756] Trial 758 finished with value: 0.6312809800623549 and parameters: {'lr': 0.006143337284556715, 'wd': 0.00016397355712523704, 'warmup': 150, 'gamma': 0.9982340050771521, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.13083983216683628, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 758 finished with value: 0.6312809800623549 and parameters: {'lr': 0.006143337284556715, 'wd': 0.00016397355712523704, 'warmup': 150, 'gamma': 0.9982340050771521, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.13083983216683628, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24055136
torch.Size([180])


[I 2024-10-29 10:38:44,670] Trial 759 finished with value: 0.7170414323351799 and parameters: {'lr': 0.0062790839269217325, 'wd': 0.00010795936210045597, 'warmup': 150, 'gamma': 0.9979715637989405, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.13676149244135, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 759 finished with value: 0.7170414323351799 and parameters: {'lr': 0.0062790839269217325, 'wd': 0.00010795936210045597, 'warmup': 150, 'gamma': 0.9979715637989405, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.13676149244135, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24055136
torch.Size([180])


[I 2024-10-29 10:41:07,671] Trial 760 finished with value: 0.7219779551974514 and parameters: {'lr': 0.006551698086915384, 'wd': 0.00014153920718399014, 'warmup': 150, 'gamma': 0.9972439160642979, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.123984893458076, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 760 finished with value: 0.7219779551974514 and parameters: {'lr': 0.006551698086915384, 'wd': 0.00014153920718399014, 'warmup': 150, 'gamma': 0.9972439160642979, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.123984893458076, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24055136
torch.Size([180])


[I 2024-10-29 10:43:30,181] Trial 761 finished with value: 0.6294036293498736 and parameters: {'lr': 0.003397981773058818, 'wd': 0.00018427217224489855, 'warmup': 150, 'gamma': 0.998952423766977, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.12924523170739755, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 761 finished with value: 0.6294036293498736 and parameters: {'lr': 0.003397981773058818, 'wd': 0.00018427217224489855, 'warmup': 150, 'gamma': 0.998952423766977, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.12924523170739755, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 23072096
torch.Size([180])


[I 2024-10-29 10:45:49,090] Trial 762 finished with value: 0.6141390484051842 and parameters: {'lr': 0.004891407041065194, 'wd': 0.00015243958801340924, 'warmup': 150, 'gamma': 0.998974386333552, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.11982566063141017, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 762 finished with value: 0.6141390484051842 and parameters: {'lr': 0.004891407041065194, 'wd': 0.00015243958801340924, 'warmup': 150, 'gamma': 0.998974386333552, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.11982566063141017, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24055136
torch.Size([180])


[I 2024-10-29 10:48:12,047] Trial 763 finished with value: 0.6327713893903355 and parameters: {'lr': 0.00680828234420223, 'wd': 0.0001159739608217709, 'warmup': 150, 'gamma': 0.9982041538808988, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.1321382102643953, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.


Trial 763 finished with value: 0.6327713893903355 and parameters: {'lr': 0.00680828234420223, 'wd': 0.0001159739608217709, 'warmup': 150, 'gamma': 0.9982041538808988, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.1321382102643953, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24055136
torch.Size([180])


[I 2024-10-29 10:50:33,962] Trial 764 finished with value: 0.7640005263948484 and parameters: {'lr': 3.3452342494197856e-05, 'wd': 0.00013494864283307943, 'warmup': 150, 'gamma': 0.9976926947972408, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.13655379104687893, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 764 finished with value: 0.7640005263948484 and parameters: {'lr': 3.3452342494197856e-05, 'wd': 0.00013494864283307943, 'warmup': 150, 'gamma': 0.9976926947972408, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.13655379104687893, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 23072096
torch.Size([180])


[I 2024-10-29 10:52:52,527] Trial 765 finished with value: 0.6522733424659678 and parameters: {'lr': 0.00582385057165357, 'wd': 0.00018994668359225377, 'warmup': 150, 'gamma': 0.9984806254440399, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.13772320395307303, 'schedule': 'cosine'}. Best is trial 598 with value: 0.49804964861220785.


Trial 765 finished with value: 0.6522733424659678 and parameters: {'lr': 0.00582385057165357, 'wd': 0.00018994668359225377, 'warmup': 150, 'gamma': 0.9984806254440399, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.13772320395307303, 'schedule': 'cosine'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 25038176
torch.Size([180])


[I 2024-10-29 10:55:21,007] Trial 766 finished with value: 0.6312375627255732 and parameters: {'lr': 0.00688455466150863, 'wd': 0.00015661012551707076, 'warmup': 150, 'gamma': 0.998468763514408, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 11, 'mlp_dim': 1024, 'emb_dropout': 0.1276972408244237, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 766 finished with value: 0.6312375627255732 and parameters: {'lr': 0.00688455466150863, 'wd': 0.00015661012551707076, 'warmup': 150, 'gamma': 0.998468763514408, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 11, 'mlp_dim': 1024, 'emb_dropout': 0.1276972408244237, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 23072096
torch.Size([180])


[I 2024-10-29 10:57:39,268] Trial 767 finished with value: 0.7245491820185695 and parameters: {'lr': 0.005280135986124356, 'wd': 0.00017682789975544125, 'warmup': 150, 'gamma': 0.9977054389537315, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.12410829951022645, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 767 finished with value: 0.7245491820185695 and parameters: {'lr': 0.005280135986124356, 'wd': 0.00017682789975544125, 'warmup': 150, 'gamma': 0.9977054389537315, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.12410829951022645, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24119776
torch.Size([180])


[I 2024-10-29 11:00:02,301] Trial 768 finished with value: 0.6782821850090907 and parameters: {'lr': 0.006055761232540945, 'wd': 0.00012360122548919826, 'warmup': 150, 'gamma': 0.9983196939252095, 'time_dim': 32, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.13623776176295418, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 768 finished with value: 0.6782821850090907 and parameters: {'lr': 0.006055761232540945, 'wd': 0.00012360122548919826, 'warmup': 150, 'gamma': 0.9983196939252095, 'time_dim': 32, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.13623776176295418, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22089056
torch.Size([180])


[I 2024-10-29 11:02:15,611] Trial 769 finished with value: 0.6174124863113241 and parameters: {'lr': 0.0069500994964505685, 'wd': 9.792317966814315e-05, 'warmup': 150, 'gamma': 0.9989745237990727, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.13179929381012392, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 769 finished with value: 0.6174124863113241 and parameters: {'lr': 0.0069500994964505685, 'wd': 9.792317966814315e-05, 'warmup': 150, 'gamma': 0.9989745237990727, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.13179929381012392, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24055136
torch.Size([180])


[I 2024-10-29 11:04:38,266] Trial 770 finished with value: 0.7052380763958219 and parameters: {'lr': 0.003844236809298446, 'wd': 0.00015686971641390614, 'warmup': 150, 'gamma': 0.9972626882125541, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.10839382246997566, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 770 finished with value: 0.7052380763958219 and parameters: {'lr': 0.003844236809298446, 'wd': 0.00015686971641390614, 'warmup': 150, 'gamma': 0.9972626882125541, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.10839382246997566, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24055136
torch.Size([180])


[I 2024-10-29 11:07:01,200] Trial 771 finished with value: 0.6789446335640379 and parameters: {'lr': 0.007068099243310967, 'wd': 0.00018741879000873618, 'warmup': 150, 'gamma': 0.9978726895551778, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.13944239376654222, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 771 finished with value: 0.6789446335640379 and parameters: {'lr': 0.007068099243310967, 'wd': 0.00018741879000873618, 'warmup': 150, 'gamma': 0.9978726895551778, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.13944239376654222, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22089056
torch.Size([180])


[I 2024-10-29 11:09:15,038] Trial 772 finished with value: 0.5976198739277238 and parameters: {'lr': 0.0058579428227694, 'wd': 0.00012542015547726269, 'warmup': 150, 'gamma': 0.9989911808477563, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.13431895945075173, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 772 finished with value: 0.5976198739277238 and parameters: {'lr': 0.0058579428227694, 'wd': 0.00012542015547726269, 'warmup': 150, 'gamma': 0.9989911808477563, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.13431895945075173, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 23072096
torch.Size([180])


[I 2024-10-29 11:11:32,909] Trial 773 finished with value: 0.742532735654946 and parameters: {'lr': 0.0006165454960402629, 'wd': 0.00012626617650300816, 'warmup': 150, 'gamma': 0.9970088666188044, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.11890431427746494, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 773 finished with value: 0.742532735654946 and parameters: {'lr': 0.0006165454960402629, 'wd': 0.00012626617650300816, 'warmup': 150, 'gamma': 0.9970088666188044, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.11890431427746494, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24279776
torch.Size([180])


[I 2024-10-29 11:13:56,493] Trial 774 finished with value: 0.6795808283077118 and parameters: {'lr': 0.004363890540210216, 'wd': 9.201431054614028e-05, 'warmup': 150, 'gamma': 0.9984780313603603, 'time_dim': 64, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.1346967446110788, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 774 finished with value: 0.6795808283077118 and parameters: {'lr': 0.004363890540210216, 'wd': 9.201431054614028e-05, 'warmup': 150, 'gamma': 0.9984780313603603, 'time_dim': 64, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.1346967446110788, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 23072096
torch.Size([180])


[I 2024-10-29 11:16:15,621] Trial 775 finished with value: 0.6763154852901961 and parameters: {'lr': 0.005101134005698463, 'wd': 0.00010928768991410347, 'warmup': 150, 'gamma': 0.9978360840501799, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.13843409014725236, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 775 finished with value: 0.6763154852901961 and parameters: {'lr': 0.005101134005698463, 'wd': 0.00010928768991410347, 'warmup': 150, 'gamma': 0.9978360840501799, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.13843409014725236, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 39425376
torch.Size([180])


[I 2024-10-29 11:19:57,904] Trial 776 finished with value: 0.6579961608503516 and parameters: {'lr': 0.0045400016564590895, 'wd': 0.00019929607860011064, 'warmup': 150, 'gamma': 0.9985025905767942, 'time_dim': 16, 'patch_size': 16, 'depth': 9, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.12517454775802325, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 776 finished with value: 0.6579961608503516 and parameters: {'lr': 0.0045400016564590895, 'wd': 0.00019929607860011064, 'warmup': 150, 'gamma': 0.9985025905767942, 'time_dim': 16, 'patch_size': 16, 'depth': 9, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.12517454775802325, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 23072096
torch.Size([180])


[I 2024-10-29 11:22:16,449] Trial 777 finished with value: 0.6413234438994727 and parameters: {'lr': 0.005687014082364036, 'wd': 0.0001282028661948102, 'warmup': 150, 'gamma': 0.9989268893734036, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.13386285794242572, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 777 finished with value: 0.6413234438994727 and parameters: {'lr': 0.005687014082364036, 'wd': 0.0001282028661948102, 'warmup': 150, 'gamma': 0.9989268893734036, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.13386285794242572, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24055136
torch.Size([180])


[I 2024-10-29 11:24:39,251] Trial 778 finished with value: 0.6519689580006321 and parameters: {'lr': 0.006204322914753079, 'wd': 0.00016845926357855553, 'warmup': 150, 'gamma': 0.9989877535128843, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.13982613242423741, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.


Trial 778 finished with value: 0.6519689580006321 and parameters: {'lr': 0.006204322914753079, 'wd': 0.00016845926357855553, 'warmup': 150, 'gamma': 0.9989877535128843, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.13982613242423741, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 23072096
torch.Size([180])


[I 2024-10-29 11:26:58,075] Trial 779 finished with value: 0.6731188774924931 and parameters: {'lr': 0.005066524356181141, 'wd': 0.00013721133018696304, 'warmup': 150, 'gamma': 0.9981945645119928, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.12828597075696488, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 779 finished with value: 0.6731188774924931 and parameters: {'lr': 0.005066524356181141, 'wd': 0.00013721133018696304, 'warmup': 150, 'gamma': 0.9981945645119928, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.12828597075696488, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 25038176
torch.Size([180])


[I 2024-10-29 11:29:26,975] Trial 780 finished with value: 0.6318249373089815 and parameters: {'lr': 0.005745684610427074, 'wd': 0.00020085705768092203, 'warmup': 150, 'gamma': 0.9978787126678164, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 11, 'mlp_dim': 1024, 'emb_dropout': 0.13955009373667113, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 780 finished with value: 0.6318249373089815 and parameters: {'lr': 0.005745684610427074, 'wd': 0.00020085705768092203, 'warmup': 150, 'gamma': 0.9978787126678164, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 11, 'mlp_dim': 1024, 'emb_dropout': 0.13955009373667113, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26021216
torch.Size([180])


[I 2024-10-29 11:31:57,834] Trial 781 finished with value: 0.7286625209994115 and parameters: {'lr': 0.007350807844926449, 'wd': 0.0001653123900729032, 'warmup': 150, 'gamma': 0.9967749565252049, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 12, 'mlp_dim': 1024, 'emb_dropout': 0.13495909632985675, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 781 finished with value: 0.7286625209994115 and parameters: {'lr': 0.007350807844926449, 'wd': 0.0001653123900729032, 'warmup': 150, 'gamma': 0.9967749565252049, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 12, 'mlp_dim': 1024, 'emb_dropout': 0.13495909632985675, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22089056
torch.Size([180])


[I 2024-10-29 11:34:12,001] Trial 782 finished with value: 0.6787709550516193 and parameters: {'lr': 0.0062184151000438045, 'wd': 0.00011045687289470773, 'warmup': 150, 'gamma': 0.9977081223101474, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.11598886016318484, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 782 finished with value: 0.6787709550516193 and parameters: {'lr': 0.0062184151000438045, 'wd': 0.00011045687289470773, 'warmup': 150, 'gamma': 0.9977081223101474, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.11598886016318484, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22089056
torch.Size([180])


[I 2024-10-29 11:36:25,760] Trial 783 finished with value: 0.6433668148230145 and parameters: {'lr': 0.007414672395850704, 'wd': 0.00019930257567976655, 'warmup': 150, 'gamma': 0.9983229881376234, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.09757378166505594, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 783 finished with value: 0.6433668148230145 and parameters: {'lr': 0.007414672395850704, 'wd': 0.00019930257567976655, 'warmup': 150, 'gamma': 0.9983229881376234, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.09757378166505594, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 23072096
torch.Size([180])


[I 2024-10-29 11:38:44,735] Trial 784 finished with value: 0.713137566770796 and parameters: {'lr': 0.004491735805253411, 'wd': 0.0001396640874108901, 'warmup': 150, 'gamma': 0.9984219579759985, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.1300088277562022, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 784 finished with value: 0.713137566770796 and parameters: {'lr': 0.004491735805253411, 'wd': 0.0001396640874108901, 'warmup': 150, 'gamma': 0.9984219579759985, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 9, 'mlp_dim': 1024, 'emb_dropout': 0.1300088277562022, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24055136
torch.Size([180])


[I 2024-10-29 11:41:07,548] Trial 785 finished with value: 0.7379699423283476 and parameters: {'lr': 0.006475504235872648, 'wd': 0.0001551873229653956, 'warmup': 150, 'gamma': 0.997344718432699, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.14190671962001908, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 785 finished with value: 0.7379699423283476 and parameters: {'lr': 0.006475504235872648, 'wd': 0.0001551873229653956, 'warmup': 150, 'gamma': 0.997344718432699, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.14190671962001908, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22089056
torch.Size([180])


[I 2024-10-29 11:43:20,147] Trial 786 finished with value: 0.7706219383449423 and parameters: {'lr': 0.00010569140657314279, 'wd': 0.00018013350956787904, 'warmup': 150, 'gamma': 0.9989877842805843, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.1378964187527982, 'schedule': 'cosine'}. Best is trial 598 with value: 0.49804964861220785.


Trial 786 finished with value: 0.7706219383449423 and parameters: {'lr': 0.00010569140657314279, 'wd': 0.00018013350956787904, 'warmup': 150, 'gamma': 0.9989877842805843, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.1378964187527982, 'schedule': 'cosine'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22089056
torch.Size([180])


[I 2024-10-29 11:45:33,971] Trial 787 finished with value: 0.6463561097733705 and parameters: {'lr': 0.0076543951005252665, 'wd': 9.69391046930416e-05, 'warmup': 150, 'gamma': 0.9984303157336087, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.1246984436949288, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 787 finished with value: 0.6463561097733705 and parameters: {'lr': 0.0076543951005252665, 'wd': 9.69391046930416e-05, 'warmup': 150, 'gamma': 0.9984303157336087, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.1246984436949288, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 21106016
torch.Size([180])


[I 2024-10-29 11:47:44,016] Trial 788 finished with value: 0.6661561613505298 and parameters: {'lr': 0.005338259097263278, 'wd': 0.00020106909949638534, 'warmup': 150, 'gamma': 0.9975361039256078, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.13121943233881084, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 788 finished with value: 0.6661561613505298 and parameters: {'lr': 0.005338259097263278, 'wd': 0.00020106909949638534, 'warmup': 150, 'gamma': 0.9975361039256078, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.13121943233881084, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 24055136
torch.Size([180])


[I 2024-10-29 11:50:06,905] Trial 789 finished with value: 0.6170675637212021 and parameters: {'lr': 0.007560246517357384, 'wd': 0.00021004692368710567, 'warmup': 150, 'gamma': 0.9984312338573809, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.12225217344684502, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 789 finished with value: 0.6170675637212021 and parameters: {'lr': 0.007560246517357384, 'wd': 0.00021004692368710567, 'warmup': 150, 'gamma': 0.9984312338573809, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.12225217344684502, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22153696
torch.Size([180])


[I 2024-10-29 11:52:20,761] Trial 790 finished with value: 0.6474740257420916 and parameters: {'lr': 0.006381133094292065, 'wd': 0.0001251891856939845, 'warmup': 150, 'gamma': 0.9979104396004023, 'time_dim': 32, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.13632458158068028, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.


Trial 790 finished with value: 0.6474740257420916 and parameters: {'lr': 0.006381133094292065, 'wd': 0.0001251891856939845, 'warmup': 150, 'gamma': 0.9979104396004023, 'time_dim': 32, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.13632458158068028, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19138656
torch.Size([180])


[I 2024-10-29 11:54:20,550] Trial 791 finished with value: 0.6133804523003437 and parameters: {'lr': 0.00741613008665864, 'wd': 0.00016644394444141798, 'warmup': 150, 'gamma': 0.9989623862791955, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14272377635832187, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 791 finished with value: 0.6133804523003437 and parameters: {'lr': 0.00741613008665864, 'wd': 0.00016644394444141798, 'warmup': 150, 'gamma': 0.9989623862791955, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14272377635832187, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22089056
torch.Size([180])


[I 2024-10-29 11:56:33,195] Trial 792 finished with value: 0.7460253778848366 and parameters: {'lr': 0.0009226103438701926, 'wd': 0.00011426183624598507, 'warmup': 300, 'gamma': 0.9981016456638447, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.1333445105285872, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 792 finished with value: 0.7460253778848366 and parameters: {'lr': 0.0009226103438701926, 'wd': 0.00011426183624598507, 'warmup': 300, 'gamma': 0.9981016456638447, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 8, 'mlp_dim': 1024, 'emb_dropout': 0.1333445105285872, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22312416
torch.Size([180])


[I 2024-10-29 11:58:46,283] Trial 793 finished with value: 0.7327571985790798 and parameters: {'lr': 0.005446381113639947, 'wd': 0.000151310818160483, 'warmup': 150, 'gamma': 0.997173492754345, 'time_dim': 64, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 768, 'emb_dropout': 0.13953881457383488, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 793 finished with value: 0.7327571985790798 and parameters: {'lr': 0.005446381113639947, 'wd': 0.000151310818160483, 'warmup': 150, 'gamma': 0.997173492754345, 'time_dim': 64, 'patch_size': 16, 'depth': 5, 'heads': 10, 'mlp_dim': 768, 'emb_dropout': 0.13953881457383488, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 21106016
torch.Size([180])


[I 2024-10-29 12:00:56,414] Trial 794 finished with value: 0.7375414166359131 and parameters: {'lr': 0.003821893150017599, 'wd': 0.00017130877644068798, 'warmup': 150, 'gamma': 0.9965882410081589, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.1283740865529984, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 794 finished with value: 0.7375414166359131 and parameters: {'lr': 0.003821893150017599, 'wd': 0.00017130877644068798, 'warmup': 150, 'gamma': 0.9965882410081589, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.1283740865529984, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 21106016
torch.Size([180])


[I 2024-10-29 12:03:06,209] Trial 795 finished with value: 0.5325033772958178 and parameters: {'lr': 0.007925509709444237, 'wd': 0.00020770327278868183, 'warmup': 150, 'gamma': 0.9989757840145475, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.13516287702770013, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 795 finished with value: 0.5325033772958178 and parameters: {'lr': 0.007925509709444237, 'wd': 0.00020770327278868183, 'warmup': 150, 'gamma': 0.9989757840145475, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.13516287702770013, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19138656
torch.Size([180])


[I 2024-10-29 12:05:05,535] Trial 796 finished with value: 0.625522953336368 and parameters: {'lr': 0.008131524068139016, 'wd': 0.00020912674530810396, 'warmup': 150, 'gamma': 0.9989388114625798, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1346596085811997, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.


Trial 796 finished with value: 0.625522953336368 and parameters: {'lr': 0.008131524068139016, 'wd': 0.00020912674530810396, 'warmup': 150, 'gamma': 0.9989388114625798, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1346596085811997, 'schedule': 'linear'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 21106016
torch.Size([180])


[I 2024-10-29 12:07:15,621] Trial 797 finished with value: 0.6970149203335263 and parameters: {'lr': 0.008202991580906437, 'wd': 0.00022422151118917073, 'warmup': 150, 'gamma': 0.9978556851708147, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.1404709293538405, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 797 finished with value: 0.6970149203335263 and parameters: {'lr': 0.008202991580906437, 'wd': 0.00022422151118917073, 'warmup': 150, 'gamma': 0.9978556851708147, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.1404709293538405, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19138656
torch.Size([180])


[I 2024-10-29 12:09:15,088] Trial 798 finished with value: 0.5954601326000727 and parameters: {'lr': 0.007951638198728224, 'wd': 0.000200369063687365, 'warmup': 150, 'gamma': 0.9989424014725906, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13644047914493762, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 798 finished with value: 0.5954601326000727 and parameters: {'lr': 0.007951638198728224, 'wd': 0.000200369063687365, 'warmup': 150, 'gamma': 0.9989424014725906, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13644047914493762, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19138656
torch.Size([180])


[I 2024-10-29 12:11:14,499] Trial 799 finished with value: 0.6620283189734881 and parameters: {'lr': 0.00710989170468482, 'wd': 0.00017951528834341888, 'warmup': 150, 'gamma': 0.9984317299342029, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13222463236589668, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 799 finished with value: 0.6620283189734881 and parameters: {'lr': 0.00710989170468482, 'wd': 0.00017951528834341888, 'warmup': 150, 'gamma': 0.9984317299342029, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13222463236589668, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19138656
torch.Size([180])


[I 2024-10-29 12:13:13,670] Trial 800 finished with value: 0.57486833717798 and parameters: {'lr': 0.007056380185644935, 'wd': 0.00018631273728495835, 'warmup': 150, 'gamma': 0.9989683325214537, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1422737412021322, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 800 finished with value: 0.57486833717798 and parameters: {'lr': 0.007056380185644935, 'wd': 0.00018631273728495835, 'warmup': 150, 'gamma': 0.9989683325214537, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1422737412021322, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19138656
torch.Size([180])


[I 2024-10-29 12:15:12,889] Trial 801 finished with value: 0.5625495427989023 and parameters: {'lr': 0.0064072803654724, 'wd': 0.0001993308258836, 'warmup': 150, 'gamma': 0.9989859752506586, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14152826962926132, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 801 finished with value: 0.5625495427989023 and parameters: {'lr': 0.0064072803654724, 'wd': 0.0001993308258836, 'warmup': 150, 'gamma': 0.9989859752506586, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14152826962926132, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19138656
torch.Size([180])


[I 2024-10-29 12:17:12,072] Trial 802 finished with value: 0.5646691218222023 and parameters: {'lr': 0.005884777800306948, 'wd': 0.00021583097355789416, 'warmup': 150, 'gamma': 0.9989345381871049, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14269083664711307, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 802 finished with value: 0.5646691218222023 and parameters: {'lr': 0.005884777800306948, 'wd': 0.00021583097355789416, 'warmup': 150, 'gamma': 0.9989345381871049, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14269083664711307, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19138656
torch.Size([180])


[I 2024-10-29 12:19:11,358] Trial 803 finished with value: 0.6746385598745808 and parameters: {'lr': 0.0047231875905231, 'wd': 0.00022458660758016438, 'warmup': 150, 'gamma': 0.9986249404946189, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1417143459921658, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 803 finished with value: 0.6746385598745808 and parameters: {'lr': 0.0047231875905231, 'wd': 0.00022458660758016438, 'warmup': 150, 'gamma': 0.9986249404946189, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1417143459921658, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19138656
torch.Size([180])


[I 2024-10-29 12:21:10,180] Trial 804 finished with value: 0.7270536806426179 and parameters: {'lr': 0.00040015251709548094, 'wd': 0.0002186248353199274, 'warmup': 150, 'gamma': 0.9982638414434002, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14408594540425027, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 804 finished with value: 0.7270536806426179 and parameters: {'lr': 0.00040015251709548094, 'wd': 0.0002186248353199274, 'warmup': 150, 'gamma': 0.9982638414434002, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14408594540425027, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19138656
torch.Size([180])


[I 2024-10-29 12:23:09,132] Trial 805 finished with value: 0.7373534277541632 and parameters: {'lr': 0.0013563147528751957, 'wd': 0.00023933454766551814, 'warmup': 150, 'gamma': 0.9983918273129304, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14176801218156423, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 805 finished with value: 0.7373534277541632 and parameters: {'lr': 0.0013563147528751957, 'wd': 0.00023933454766551814, 'warmup': 150, 'gamma': 0.9983918273129304, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14176801218156423, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19138656
torch.Size([180])


[I 2024-10-29 12:25:08,665] Trial 806 finished with value: 0.6175920663612489 and parameters: {'lr': 0.006028707650788983, 'wd': 0.0002572711554743963, 'warmup': 150, 'gamma': 0.9989607004735985, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14065556062591292, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 806 finished with value: 0.6175920663612489 and parameters: {'lr': 0.006028707650788983, 'wd': 0.0002572711554743963, 'warmup': 150, 'gamma': 0.9989607004735985, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14065556062591292, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19138656
torch.Size([180])


[I 2024-10-29 12:27:08,441] Trial 807 finished with value: 0.7100423439863653 and parameters: {'lr': 0.005261764963316855, 'wd': 0.00019171915384521768, 'warmup': 150, 'gamma': 0.9976607276855605, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1418243182433769, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.


Trial 807 finished with value: 0.7100423439863653 and parameters: {'lr': 0.005261764963316855, 'wd': 0.00019171915384521768, 'warmup': 150, 'gamma': 0.9976607276855605, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1418243182433769, 'schedule': 'constant'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19138656
torch.Size([180])


[I 2024-10-29 12:29:07,902] Trial 808 finished with value: 0.664010647391792 and parameters: {'lr': 0.005585531117553218, 'wd': 0.00021043877960248713, 'warmup': 150, 'gamma': 0.9983734319224397, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14442788205583423, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 808 finished with value: 0.664010647391792 and parameters: {'lr': 0.005585531117553218, 'wd': 0.00021043877960248713, 'warmup': 150, 'gamma': 0.9983734319224397, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14442788205583423, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19138656
torch.Size([180])


[I 2024-10-29 12:31:07,190] Trial 809 finished with value: 0.6570012172082377 and parameters: {'lr': 0.0045441103538218725, 'wd': 0.0002501235316751376, 'warmup': 150, 'gamma': 0.9989374567608722, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13789367417007084, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 809 finished with value: 0.6570012172082377 and parameters: {'lr': 0.0045441103538218725, 'wd': 0.0002501235316751376, 'warmup': 150, 'gamma': 0.9989374567608722, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13789367417007084, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19138656
torch.Size([180])


[I 2024-10-29 12:33:06,539] Trial 810 finished with value: 0.7116222895442403 and parameters: {'lr': 0.006162256397895978, 'wd': 0.0002130851888828648, 'warmup': 150, 'gamma': 0.9977322647482361, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14479338685179347, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 810 finished with value: 0.7116222895442403 and parameters: {'lr': 0.006162256397895978, 'wd': 0.0002130851888828648, 'warmup': 150, 'gamma': 0.9977322647482361, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14479338685179347, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19138656
torch.Size([180])


[I 2024-10-29 12:35:05,907] Trial 811 finished with value: 0.6874956569820552 and parameters: {'lr': 0.004035450261483882, 'wd': 0.0002768951608150621, 'warmup': 150, 'gamma': 0.9989761687392784, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1399219601721963, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 811 finished with value: 0.6874956569820552 and parameters: {'lr': 0.004035450261483882, 'wd': 0.0002768951608150621, 'warmup': 150, 'gamma': 0.9989761687392784, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1399219601721963, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 15459296
torch.Size([180])


[I 2024-10-29 12:36:45,230] Trial 812 finished with value: 0.6770202732252999 and parameters: {'lr': 0.0026136883328747727, 'wd': 0.00019508230587117264, 'warmup': 150, 'gamma': 0.9980474240856074, 'time_dim': 32, 'patch_size': 16, 'depth': 4, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13754154030481697, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 812 finished with value: 0.6770202732252999 and parameters: {'lr': 0.0026136883328747727, 'wd': 0.00019508230587117264, 'warmup': 150, 'gamma': 0.9980474240856074, 'time_dim': 32, 'patch_size': 16, 'depth': 4, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13754154030481697, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19138656
torch.Size([180])


[I 2024-10-29 12:38:44,474] Trial 813 finished with value: 0.7032175131094938 and parameters: {'lr': 0.005108065315493213, 'wd': 0.0002456673730773078, 'warmup': 150, 'gamma': 0.9983909349277255, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14367566032763515, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 813 finished with value: 0.7032175131094938 and parameters: {'lr': 0.005108065315493213, 'wd': 0.0002456673730773078, 'warmup': 150, 'gamma': 0.9983909349277255, 'time_dim': 16, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14367566032763515, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30370656
torch.Size([180])


[I 2024-10-29 12:41:43,887] Trial 814 finished with value: 0.5820576923494912 and parameters: {'lr': 0.006994699832182379, 'wd': 0.00021352855753147592, 'warmup': 150, 'gamma': 0.9989816847638078, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13735168493949493, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 814 finished with value: 0.5820576923494912 and parameters: {'lr': 0.006994699832182379, 'wd': 0.00021352855753147592, 'warmup': 150, 'gamma': 0.9989816847638078, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13735168493949493, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 37858656
torch.Size([180])


[I 2024-10-29 12:45:23,508] Trial 815 finished with value: 0.7431182517665854 and parameters: {'lr': 0.007000284420123947, 'wd': 0.00022513528114928688, 'warmup': 150, 'gamma': 0.9974013991016747, 'time_dim': 16, 'patch_size': 16, 'depth': 10, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14408051499593869, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 815 finished with value: 0.7431182517665854 and parameters: {'lr': 0.007000284420123947, 'wd': 0.00022513528114928688, 'warmup': 150, 'gamma': 0.9974013991016747, 'time_dim': 16, 'patch_size': 16, 'depth': 10, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14408051499593869, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 19363296
torch.Size([180])


[I 2024-10-29 12:47:23,179] Trial 816 finished with value: 0.6361332441585484 and parameters: {'lr': 0.007639506207921641, 'wd': 0.00018755141329065048, 'warmup': 150, 'gamma': 0.9980960897710782, 'time_dim': 64, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.10397785450973682, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 816 finished with value: 0.6361332441585484 and parameters: {'lr': 0.007639506207921641, 'wd': 0.00018755141329065048, 'warmup': 150, 'gamma': 0.9980960897710782, 'time_dim': 64, 'patch_size': 16, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.10397785450973682, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30370656
torch.Size([180])


[I 2024-10-29 12:50:22,470] Trial 817 finished with value: 0.7179455810861731 and parameters: {'lr': 0.006991196419957764, 'wd': 0.0002611447762896623, 'warmup': 150, 'gamma': 0.9983285282655833, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14708960719227868, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 817 finished with value: 0.7179455810861731 and parameters: {'lr': 0.006991196419957764, 'wd': 0.0002611447762896623, 'warmup': 150, 'gamma': 0.9983285282655833, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14708960719227868, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 34114656
torch.Size([180])


[I 2024-10-29 12:53:41,840] Trial 818 finished with value: 0.745676642133492 and parameters: {'lr': 0.007854692321316076, 'wd': 0.0002173765535842972, 'warmup': 150, 'gamma': 0.9729153610915693, 'time_dim': 16, 'patch_size': 16, 'depth': 9, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13824907116928017, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 818 finished with value: 0.745676642133492 and parameters: {'lr': 0.007854692321316076, 'wd': 0.0002173765535842972, 'warmup': 150, 'gamma': 0.9729153610915693, 'time_dim': 16, 'patch_size': 16, 'depth': 9, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13824907116928017, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 34114656
torch.Size([180])


[I 2024-10-29 12:56:59,696] Trial 819 finished with value: 0.7713717772064189 and parameters: {'lr': 0.00648954406294357, 'wd': 0.0003082592364334572, 'warmup': 150, 'gamma': 0.9969276860206708, 'time_dim': 16, 'patch_size': 16, 'depth': 9, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14059578857132296, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 819 finished with value: 0.7713717772064189 and parameters: {'lr': 0.00648954406294357, 'wd': 0.0003082592364334572, 'warmup': 150, 'gamma': 0.9969276860206708, 'time_dim': 16, 'patch_size': 16, 'depth': 9, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14059578857132296, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 41602656
torch.Size([180])


[I 2024-10-29 13:00:59,490] Trial 820 finished with value: 0.6598757362692388 and parameters: {'lr': 0.008126839865671467, 'wd': 0.00023803764215990888, 'warmup': 150, 'gamma': 0.9979025821042894, 'time_dim': 16, 'patch_size': 16, 'depth': 11, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14595885362159292, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 820 finished with value: 0.6598757362692388 and parameters: {'lr': 0.008126839865671467, 'wd': 0.00023803764215990888, 'warmup': 150, 'gamma': 0.9979025821042894, 'time_dim': 16, 'patch_size': 16, 'depth': 11, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14595885362159292, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 37858656
torch.Size([180])


[I 2024-10-29 13:04:39,451] Trial 821 finished with value: 0.5914102815845286 and parameters: {'lr': 0.007324612247795516, 'wd': 0.00019337739749036002, 'warmup': 150, 'gamma': 0.9989442225044031, 'time_dim': 16, 'patch_size': 16, 'depth': 10, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13640011782828465, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 821 finished with value: 0.5914102815845286 and parameters: {'lr': 0.007324612247795516, 'wd': 0.00019337739749036002, 'warmup': 150, 'gamma': 0.9989442225044031, 'time_dim': 16, 'patch_size': 16, 'depth': 10, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13640011782828465, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 15394656
torch.Size([180])


[I 2024-10-29 13:06:19,804] Trial 822 finished with value: 0.6500785411268655 and parameters: {'lr': 0.005968017400909, 'wd': 0.00020046108653560301, 'warmup': 150, 'gamma': 0.998935908826543, 'time_dim': 16, 'patch_size': 16, 'depth': 4, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13542737960645929, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 822 finished with value: 0.6500785411268655 and parameters: {'lr': 0.005968017400909, 'wd': 0.00020046108653560301, 'warmup': 150, 'gamma': 0.998935908826543, 'time_dim': 16, 'patch_size': 16, 'depth': 4, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13542737960645929, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 13:08:59,426] Trial 823 finished with value: 0.5775454096031268 and parameters: {'lr': 0.006736704926362395, 'wd': 0.0001856574562286381, 'warmup': 150, 'gamma': 0.9989873160080761, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13758763742017874, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 823 finished with value: 0.5775454096031268 and parameters: {'lr': 0.006736704926362395, 'wd': 0.0001856574562286381, 'warmup': 150, 'gamma': 0.9989873160080761, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13758763742017874, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 13:11:38,783] Trial 824 finished with value: 0.7505782227192923 and parameters: {'lr': 0.005673613545463989, 'wd': 0.00017971684495354857, 'warmup': 150, 'gamma': 0.9983938156332555, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14300634142193136, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 824 finished with value: 0.7505782227192923 and parameters: {'lr': 0.005673613545463989, 'wd': 0.00017971684495354857, 'warmup': 150, 'gamma': 0.9983938156332555, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14300634142193136, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 13:14:17,936] Trial 825 finished with value: 0.6698097783691705 and parameters: {'lr': 0.006371106462090933, 'wd': 0.00017511226594158643, 'warmup': 150, 'gamma': 0.9984317095652111, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1373881111139829, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 825 finished with value: 0.6698097783691705 and parameters: {'lr': 0.006371106462090933, 'wd': 0.00017511226594158643, 'warmup': 150, 'gamma': 0.9984317095652111, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1373881111139829, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 45346656
torch.Size([180])


[I 2024-10-29 13:18:36,945] Trial 826 finished with value: 0.6135464409483283 and parameters: {'lr': 0.005502951006917141, 'wd': 0.00022978570122969018, 'warmup': 150, 'gamma': 0.998984246580383, 'time_dim': 16, 'patch_size': 16, 'depth': 12, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14053884140950734, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 826 finished with value: 0.6135464409483283 and parameters: {'lr': 0.005502951006917141, 'wd': 0.00022978570122969018, 'warmup': 150, 'gamma': 0.998984246580383, 'time_dim': 16, 'patch_size': 16, 'depth': 12, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14053884140950734, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30370656
torch.Size([180])


[I 2024-10-29 13:21:35,763] Trial 827 finished with value: 0.7354562985602168 and parameters: {'lr': 0.00489819437263142, 'wd': 0.0001813946434285066, 'warmup': 150, 'gamma': 0.9974229370447387, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14253427368652685, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 827 finished with value: 0.7354562985602168 and parameters: {'lr': 0.00489819437263142, 'wd': 0.0001813946434285066, 'warmup': 150, 'gamma': 0.9974229370447387, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14253427368652685, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30370656
torch.Size([180])


[I 2024-10-29 13:24:34,871] Trial 828 finished with value: 0.7159860515378714 and parameters: {'lr': 0.006868004848232985, 'wd': 0.0002735644882589532, 'warmup': 150, 'gamma': 0.997799955371518, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14032631826962552, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 828 finished with value: 0.7159860515378714 and parameters: {'lr': 0.006868004848232985, 'wd': 0.0002735644882589532, 'warmup': 150, 'gamma': 0.997799955371518, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14032631826962552, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 13:27:14,353] Trial 829 finished with value: 0.6713304882503331 and parameters: {'lr': 0.006428519566375605, 'wd': 0.0002130135195720315, 'warmup': 150, 'gamma': 0.9984680619947405, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14718854793131542, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 829 finished with value: 0.6713304882503331 and parameters: {'lr': 0.006428519566375605, 'wd': 0.0002130135195720315, 'warmup': 150, 'gamma': 0.9984680619947405, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14718854793131542, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30370656
torch.Size([180])


[I 2024-10-29 13:30:13,416] Trial 830 finished with value: 0.7301065531307066 and parameters: {'lr': 0.006929452051832936, 'wd': 0.000180281672929738, 'warmup': 150, 'gamma': 0.9979936448107333, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13643315093840055, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 830 finished with value: 0.7301065531307066 and parameters: {'lr': 0.006929452051832936, 'wd': 0.000180281672929738, 'warmup': 150, 'gamma': 0.9979936448107333, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13643315093840055, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30370656
torch.Size([180])


[I 2024-10-29 13:33:12,866] Trial 831 finished with value: 0.6736369569937656 and parameters: {'lr': 0.0055491682926516505, 'wd': 0.0002535131785414024, 'warmup': 150, 'gamma': 0.9766075798911431, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1449320421466768, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 831 finished with value: 0.6736369569937656 and parameters: {'lr': 0.0055491682926516505, 'wd': 0.0002535131785414024, 'warmup': 150, 'gamma': 0.9766075798911431, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1449320421466768, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30370656
torch.Size([180])


[I 2024-10-29 13:36:12,275] Trial 832 finished with value: 0.6580277326479542 and parameters: {'lr': 0.007272859248914904, 'wd': 0.00021347473410288586, 'warmup': 150, 'gamma': 0.9983801190083529, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13403089465373316, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 832 finished with value: 0.6580277326479542 and parameters: {'lr': 0.007272859248914904, 'wd': 0.00021347473410288586, 'warmup': 150, 'gamma': 0.9983801190083529, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13403089465373316, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 13:38:51,087] Trial 833 finished with value: 0.7394711052864289 and parameters: {'lr': 0.0062665361565239445, 'wd': 0.00029048523404440505, 'warmup': 150, 'gamma': 0.9973712281259149, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13862421746067105, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 833 finished with value: 0.7394711052864289 and parameters: {'lr': 0.0062665361565239445, 'wd': 0.00029048523404440505, 'warmup': 150, 'gamma': 0.9973712281259149, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13862421746067105, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 41602656
torch.Size([180])


[I 2024-10-29 13:42:49,726] Trial 834 finished with value: 0.7075294853410479 and parameters: {'lr': 0.0048849465042716525, 'wd': 0.00017135119693799042, 'warmup': 150, 'gamma': 0.9983124924208907, 'time_dim': 16, 'patch_size': 16, 'depth': 11, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1430906091049699, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 834 finished with value: 0.7075294853410479 and parameters: {'lr': 0.0048849465042716525, 'wd': 0.00017135119693799042, 'warmup': 150, 'gamma': 0.9983124924208907, 'time_dim': 16, 'patch_size': 16, 'depth': 11, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1430906091049699, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 34114656
torch.Size([180])


[I 2024-10-29 13:46:08,743] Trial 835 finished with value: 0.6969050232959237 and parameters: {'lr': 0.007317637692088967, 'wd': 0.00023228180529971218, 'warmup': 150, 'gamma': 0.9978376455552624, 'time_dim': 16, 'patch_size': 16, 'depth': 9, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13652904554732753, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 835 finished with value: 0.6969050232959237 and parameters: {'lr': 0.007317637692088967, 'wd': 0.00023228180529971218, 'warmup': 150, 'gamma': 0.9978376455552624, 'time_dim': 16, 'patch_size': 16, 'depth': 9, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13652904554732753, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 37923296
torch.Size([180])


[I 2024-10-29 13:49:48,320] Trial 836 finished with value: 0.6451191500434176 and parameters: {'lr': 0.0058588431992042085, 'wd': 0.00018585833269884206, 'warmup': 150, 'gamma': 0.998921001724613, 'time_dim': 32, 'patch_size': 16, 'depth': 10, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13244023481975314, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 836 finished with value: 0.6451191500434176 and parameters: {'lr': 0.0058588431992042085, 'wd': 0.00018585833269884206, 'warmup': 150, 'gamma': 0.998921001724613, 'time_dim': 32, 'patch_size': 16, 'depth': 10, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13244023481975314, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30370656
torch.Size([180])


[I 2024-10-29 13:52:47,688] Trial 837 finished with value: 0.5833771377421098 and parameters: {'lr': 0.0076239952437547375, 'wd': 0.00025083692365233425, 'warmup': 150, 'gamma': 0.9989705783580377, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14779070677934938, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 837 finished with value: 0.5833771377421098 and parameters: {'lr': 0.0076239952437547375, 'wd': 0.00025083692365233425, 'warmup': 150, 'gamma': 0.9989705783580377, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14779070677934938, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30370656
torch.Size([180])


[I 2024-10-29 13:55:47,108] Trial 838 finished with value: 0.6335660192505189 and parameters: {'lr': 0.007844749875622349, 'wd': 0.0002049449556503757, 'warmup': 150, 'gamma': 0.9984487637219265, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1460176472879837, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 838 finished with value: 0.6335660192505189 and parameters: {'lr': 0.007844749875622349, 'wd': 0.0002049449556503757, 'warmup': 150, 'gamma': 0.9984487637219265, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1460176472879837, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30370656
torch.Size([180])


[I 2024-10-29 13:58:46,578] Trial 839 finished with value: 0.6743571614178206 and parameters: {'lr': 0.00802331081836931, 'wd': 0.00024579629705616553, 'warmup': 150, 'gamma': 0.9971793894543002, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14791837669707292, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 839 finished with value: 0.6743571614178206 and parameters: {'lr': 0.00802331081836931, 'wd': 0.00024579629705616553, 'warmup': 150, 'gamma': 0.9971793894543002, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14791837669707292, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30595296
torch.Size([180])


[I 2024-10-29 14:01:45,421] Trial 840 finished with value: 0.7757509489544594 and parameters: {'lr': 5.282984111541751e-05, 'wd': 0.0002088127027606876, 'warmup': 150, 'gamma': 0.9989145365287772, 'time_dim': 64, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1479648225670567, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 840 finished with value: 0.7757509489544594 and parameters: {'lr': 5.282984111541751e-05, 'wd': 0.0002088127027606876, 'warmup': 150, 'gamma': 0.9989145365287772, 'time_dim': 64, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1479648225670567, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30370656
torch.Size([180])


[I 2024-10-29 14:04:44,964] Trial 841 finished with value: 0.5988953744582415 and parameters: {'lr': 0.007118446570712526, 'wd': 0.00016680446027769887, 'warmup': 150, 'gamma': 0.9989621257206683, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14263521508773433, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 841 finished with value: 0.5988953744582415 and parameters: {'lr': 0.007118446570712526, 'wd': 0.00016680446027769887, 'warmup': 150, 'gamma': 0.9989621257206683, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14263521508773433, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30370656
torch.Size([180])


[I 2024-10-29 14:07:44,453] Trial 842 finished with value: 0.707148942456976 and parameters: {'lr': 0.008237802467225667, 'wd': 0.00023071989386130545, 'warmup': 150, 'gamma': 0.9977909347879573, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.0835550952825929, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 842 finished with value: 0.707148942456976 and parameters: {'lr': 0.008237802467225667, 'wd': 0.00023071989386130545, 'warmup': 150, 'gamma': 0.9977909347879573, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.0835550952825929, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30370656
torch.Size([180])


[I 2024-10-29 14:10:44,171] Trial 843 finished with value: 0.6149496790238809 and parameters: {'lr': 0.006881511998770704, 'wd': 0.0002731781576157417, 'warmup': 150, 'gamma': 0.9989986855857887, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13923055194093342, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 843 finished with value: 0.6149496790238809 and parameters: {'lr': 0.006881511998770704, 'wd': 0.0002731781576157417, 'warmup': 150, 'gamma': 0.9989986855857887, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13923055194093342, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 14:13:22,631] Trial 844 finished with value: 0.7372431343676988 and parameters: {'lr': 0.00024725615106553937, 'wd': 0.00018988788426877468, 'warmup': 150, 'gamma': 0.9989963975268165, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14295574491644472, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 844 finished with value: 0.7372431343676988 and parameters: {'lr': 0.00024725615106553937, 'wd': 0.00018988788426877468, 'warmup': 150, 'gamma': 0.9989963975268165, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14295574491644472, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30370656
torch.Size([180])


[I 2024-10-29 14:16:21,783] Trial 845 finished with value: 0.7177063409705539 and parameters: {'lr': 0.008024037467968367, 'wd': 0.000155920586251168, 'warmup': 150, 'gamma': 0.9983077742270663, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1494102016778386, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 845 finished with value: 0.7177063409705539 and parameters: {'lr': 0.008024037467968367, 'wd': 0.000155920586251168, 'warmup': 150, 'gamma': 0.9983077742270663, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1494102016778386, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30370656
torch.Size([180])


[I 2024-10-29 14:19:20,854] Trial 846 finished with value: 0.720283168239291 and parameters: {'lr': 0.006436046235412128, 'wd': 0.00019740297462086508, 'warmup': 150, 'gamma': 0.9977491778890757, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13929572157415085, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 846 finished with value: 0.720283168239291 and parameters: {'lr': 0.006436046235412128, 'wd': 0.00019740297462086508, 'warmup': 150, 'gamma': 0.9977491778890757, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13929572157415085, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30370656
torch.Size([180])


[I 2024-10-29 14:22:20,340] Trial 847 finished with value: 0.6231644815205459 and parameters: {'lr': 0.00863572533013023, 'wd': 0.00026021241260655695, 'warmup': 150, 'gamma': 0.9983083276219469, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14527627606082144, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 847 finished with value: 0.6231644815205459 and parameters: {'lr': 0.00863572533013023, 'wd': 0.00026021241260655695, 'warmup': 150, 'gamma': 0.9983083276219469, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14527627606082144, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30370656
torch.Size([180])


[I 2024-10-29 14:25:20,045] Trial 848 finished with value: 0.685509942678687 and parameters: {'lr': 0.0074439675454166885, 'wd': 0.00016374108725096022, 'warmup': 150, 'gamma': 0.9984058120265024, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13549242950810073, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 848 finished with value: 0.685509942678687 and parameters: {'lr': 0.0074439675454166885, 'wd': 0.00016374108725096022, 'warmup': 150, 'gamma': 0.9984058120265024, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13549242950810073, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 14:27:59,518] Trial 849 finished with value: 0.7441951367439098 and parameters: {'lr': 0.00866996593882177, 'wd': 0.0003129243754956299, 'warmup': 300, 'gamma': 0.9980298149146499, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14119129202285946, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 849 finished with value: 0.7441951367439098 and parameters: {'lr': 0.00866996593882177, 'wd': 0.0003129243754956299, 'warmup': 300, 'gamma': 0.9980298149146499, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14119129202285946, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30370656
torch.Size([180])


[I 2024-10-29 14:30:58,828] Trial 850 finished with value: 0.7545942068673206 and parameters: {'lr': 0.0070066129338101, 'wd': 0.00021186580660618323, 'warmup': 150, 'gamma': 0.9967490733209149, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.07295845414820097, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 850 finished with value: 0.7545942068673206 and parameters: {'lr': 0.0070066129338101, 'wd': 0.00021186580660618323, 'warmup': 150, 'gamma': 0.9967490733209149, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.07295845414820097, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 47689056
torch.Size([180])


[I 2024-10-29 14:35:21,935] Trial 851 finished with value: 0.7346258543634165 and parameters: {'lr': 0.0058107598497126505, 'wd': 0.0002461757958676421, 'warmup': 150, 'gamma': 0.9976467297474787, 'time_dim': 16, 'patch_size': 16, 'depth': 10, 'heads': 12, 'mlp_dim': 768, 'emb_dropout': 0.13146371766136808, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 851 finished with value: 0.7346258543634165 and parameters: {'lr': 0.0058107598497126505, 'wd': 0.0002461757958676421, 'warmup': 150, 'gamma': 0.9976467297474787, 'time_dim': 16, 'patch_size': 16, 'depth': 10, 'heads': 12, 'mlp_dim': 768, 'emb_dropout': 0.13146371766136808, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 14:38:01,246] Trial 852 finished with value: 0.5418259936573431 and parameters: {'lr': 0.008848922987677536, 'wd': 0.000188006784949712, 'warmup': 150, 'gamma': 0.9989896106795546, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14848546422289768, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 852 finished with value: 0.5418259936573431 and parameters: {'lr': 0.008848922987677536, 'wd': 0.000188006784949712, 'warmup': 150, 'gamma': 0.9989896106795546, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14848546422289768, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 14:40:40,717] Trial 853 finished with value: 0.6178057754977145 and parameters: {'lr': 0.006735449177193494, 'wd': 0.00029168366395301544, 'warmup': 150, 'gamma': 0.9989753071800426, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1492331889969491, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 853 finished with value: 0.6178057754977145 and parameters: {'lr': 0.006735449177193494, 'wd': 0.00029168366395301544, 'warmup': 150, 'gamma': 0.9989753071800426, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1492331889969491, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 34114656
torch.Size([180])


[I 2024-10-29 14:44:00,451] Trial 854 finished with value: 0.6039736058056958 and parameters: {'lr': 0.007646335598637, 'wd': 0.00017121675998508473, 'warmup': 150, 'gamma': 0.9984601897094996, 'time_dim': 16, 'patch_size': 16, 'depth': 9, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1488161234262983, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 854 finished with value: 0.6039736058056958 and parameters: {'lr': 0.007646335598637, 'wd': 0.00017121675998508473, 'warmup': 150, 'gamma': 0.9984601897094996, 'time_dim': 16, 'patch_size': 16, 'depth': 9, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1488161234262983, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 14:46:39,730] Trial 855 finished with value: 0.6420595195042614 and parameters: {'lr': 0.005265177153202891, 'wd': 0.00017734197089171175, 'warmup': 150, 'gamma': 0.9973902910424975, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14600220746834885, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 855 finished with value: 0.6420595195042614 and parameters: {'lr': 0.005265177153202891, 'wd': 0.00017734197089171175, 'warmup': 150, 'gamma': 0.9973902910424975, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14600220746834885, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 14:49:19,207] Trial 856 finished with value: 0.7129123999437933 and parameters: {'lr': 0.006096524425651731, 'wd': 0.00015410508633444052, 'warmup': 150, 'gamma': 0.9984371854787952, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.06210731866881071, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 856 finished with value: 0.7129123999437933 and parameters: {'lr': 0.006096524425651731, 'wd': 0.00015410508633444052, 'warmup': 150, 'gamma': 0.9984371854787952, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.06210731866881071, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30435296
torch.Size([180])


[I 2024-10-29 14:52:18,608] Trial 857 finished with value: 0.626301850003801 and parameters: {'lr': 0.007412052964083928, 'wd': 0.000235013621982323, 'warmup': 150, 'gamma': 0.9984297070034616, 'time_dim': 32, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14650126144846143, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 857 finished with value: 0.626301850003801 and parameters: {'lr': 0.007412052964083928, 'wd': 0.000235013621982323, 'warmup': 150, 'gamma': 0.9984297070034616, 'time_dim': 32, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14650126144846143, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30370656
torch.Size([180])


[I 2024-10-29 14:55:18,105] Trial 858 finished with value: 0.5944805851213504 and parameters: {'lr': 0.008363041254390857, 'wd': 0.00019139246941928299, 'warmup': 150, 'gamma': 0.998951192354422, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14420292165554754, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 858 finished with value: 0.5944805851213504 and parameters: {'lr': 0.008363041254390857, 'wd': 0.00019139246941928299, 'warmup': 150, 'gamma': 0.998951192354422, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14420292165554754, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 34114656
torch.Size([180])


[I 2024-10-29 14:58:37,247] Trial 859 finished with value: 0.6836958032349854 and parameters: {'lr': 0.0064691493871269915, 'wd': 0.00022848491938989252, 'warmup': 150, 'gamma': 0.997942543112184, 'time_dim': 16, 'patch_size': 16, 'depth': 9, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1508219546254123, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 859 finished with value: 0.6836958032349854 and parameters: {'lr': 0.0064691493871269915, 'wd': 0.00022848491938989252, 'warmup': 150, 'gamma': 0.997942543112184, 'time_dim': 16, 'patch_size': 16, 'depth': 9, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1508219546254123, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 15:01:16,750] Trial 860 finished with value: 0.7539245593435908 and parameters: {'lr': 0.004418793802003392, 'wd': 0.0002705200028818146, 'warmup': 150, 'gamma': 0.9972045465460015, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1369830435561896, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 860 finished with value: 0.7539245593435908 and parameters: {'lr': 0.004418793802003392, 'wd': 0.0002705200028818146, 'warmup': 150, 'gamma': 0.9972045465460015, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1369830435561896, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 34339296
torch.Size([180])


[I 2024-10-29 15:04:35,931] Trial 861 finished with value: 0.6756242329438125 and parameters: {'lr': 0.007655274810613668, 'wd': 0.0001633333792288057, 'warmup': 150, 'gamma': 0.9983884243212291, 'time_dim': 64, 'patch_size': 16, 'depth': 9, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14218108564158036, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 861 finished with value: 0.6756242329438125 and parameters: {'lr': 0.007655274810613668, 'wd': 0.0001633333792288057, 'warmup': 150, 'gamma': 0.9983884243212291, 'time_dim': 64, 'patch_size': 16, 'depth': 9, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14218108564158036, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30370656
torch.Size([180])


[I 2024-10-29 15:07:34,806] Trial 862 finished with value: 0.7427893593642791 and parameters: {'lr': 0.005973370805142043, 'wd': 0.00019019402298104678, 'warmup': 150, 'gamma': 0.9977940409555844, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13430832340739648, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 862 finished with value: 0.7427893593642791 and parameters: {'lr': 0.005973370805142043, 'wd': 0.00019019402298104678, 'warmup': 150, 'gamma': 0.9977940409555844, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13430832340739648, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 15:10:14,481] Trial 863 finished with value: 0.6273918927946929 and parameters: {'lr': 0.008712143475107492, 'wd': 0.00033750208669714046, 'warmup': 150, 'gamma': 0.9989370373434044, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.15032714990184087, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 863 finished with value: 0.6273918927946929 and parameters: {'lr': 0.008712143475107492, 'wd': 0.00033750208669714046, 'warmup': 150, 'gamma': 0.9989370373434044, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.15032714990184087, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 15:12:53,722] Trial 864 finished with value: 0.6916617774579178 and parameters: {'lr': 0.0051675775456448145, 'wd': 0.00021221207292299252, 'warmup': 150, 'gamma': 0.9983794079033717, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13884525775277148, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 864 finished with value: 0.6916617774579178 and parameters: {'lr': 0.0051675775456448145, 'wd': 0.00021221207292299252, 'warmup': 150, 'gamma': 0.9983794079033717, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13884525775277148, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30370656
torch.Size([180])


[I 2024-10-29 15:15:53,042] Trial 865 finished with value: 0.678316335147356 and parameters: {'lr': 0.006736071734207359, 'wd': 0.00014078623470537303, 'warmup': 150, 'gamma': 0.997838612894231, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1446828160660914, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 865 finished with value: 0.678316335147356 and parameters: {'lr': 0.006736071734207359, 'wd': 0.00014078623470537303, 'warmup': 150, 'gamma': 0.997838612894231, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1446828160660914, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30370656
torch.Size([180])


[I 2024-10-29 15:18:52,378] Trial 866 finished with value: 0.7789912715912608 and parameters: {'lr': 0.007965210871009947, 'wd': 0.0002971586186360369, 'warmup': 300, 'gamma': 0.9969393407035747, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13048891506555735, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 866 finished with value: 0.7789912715912608 and parameters: {'lr': 0.007965210871009947, 'wd': 0.0002971586186360369, 'warmup': 300, 'gamma': 0.9969393407035747, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13048891506555735, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30370656
torch.Size([180])


[I 2024-10-29 15:21:51,312] Trial 867 finished with value: 0.7553398613391941 and parameters: {'lr': 0.002100624249963611, 'wd': 0.00017336816638689176, 'warmup': 150, 'gamma': 0.9961582872281425, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14923440590763853, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 867 finished with value: 0.7553398613391941 and parameters: {'lr': 0.002100624249963611, 'wd': 0.00017336816638689176, 'warmup': 150, 'gamma': 0.9961582872281425, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14923440590763853, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 15:24:30,839] Trial 868 finished with value: 0.7230567758931338 and parameters: {'lr': 0.007243716911426914, 'wd': 0.0002401132483124702, 'warmup': 150, 'gamma': 0.9718134976722358, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14258213884053597, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 868 finished with value: 0.7230567758931338 and parameters: {'lr': 0.007243716911426914, 'wd': 0.0002401132483124702, 'warmup': 150, 'gamma': 0.9718134976722358, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14258213884053597, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30370656
torch.Size([180])


[I 2024-10-29 15:27:30,574] Trial 869 finished with value: 0.6151955304041093 and parameters: {'lr': 0.008839920635843376, 'wd': 0.00014312895545005448, 'warmup': 150, 'gamma': 0.9989404199051769, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13489507080459848, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 869 finished with value: 0.6151955304041093 and parameters: {'lr': 0.008839920635843376, 'wd': 0.00014312895545005448, 'warmup': 150, 'gamma': 0.9989404199051769, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13489507080459848, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 41602656
torch.Size([180])


[I 2024-10-29 15:31:30,198] Trial 870 finished with value: 0.6735425169279352 and parameters: {'lr': 0.005829926025787421, 'wd': 0.00025519345059903646, 'warmup': 150, 'gamma': 0.9983936186393207, 'time_dim': 16, 'patch_size': 16, 'depth': 11, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13847300170425267, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 870 finished with value: 0.6735425169279352 and parameters: {'lr': 0.005829926025787421, 'wd': 0.00025519345059903646, 'warmup': 150, 'gamma': 0.9983936186393207, 'time_dim': 16, 'patch_size': 16, 'depth': 11, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13847300170425267, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 37858656
torch.Size([180])


[I 2024-10-29 15:35:10,403] Trial 871 finished with value: 0.6156776912928015 and parameters: {'lr': 0.008891127002138859, 'wd': 0.00018754870157883024, 'warmup': 150, 'gamma': 0.9989516147930105, 'time_dim': 16, 'patch_size': 16, 'depth': 10, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1470053722210517, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 871 finished with value: 0.6156776912928015 and parameters: {'lr': 0.008891127002138859, 'wd': 0.00018754870157883024, 'warmup': 150, 'gamma': 0.9989516147930105, 'time_dim': 16, 'patch_size': 16, 'depth': 10, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1470053722210517, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 34114656
torch.Size([180])


[I 2024-10-29 15:38:29,573] Trial 872 finished with value: 0.6725934067814434 and parameters: {'lr': 0.006641972586499205, 'wd': 0.00021343593722201036, 'warmup': 150, 'gamma': 0.9980359150101357, 'time_dim': 16, 'patch_size': 16, 'depth': 9, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1422043352740824, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 872 finished with value: 0.6725934067814434 and parameters: {'lr': 0.006641972586499205, 'wd': 0.00021343593722201036, 'warmup': 150, 'gamma': 0.9980359150101357, 'time_dim': 16, 'patch_size': 16, 'depth': 9, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1422043352740824, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 15:41:09,554] Trial 873 finished with value: 0.5861939438540713 and parameters: {'lr': 0.00761602765920456, 'wd': 0.0003380902558240254, 'warmup': 150, 'gamma': 0.9983613024252793, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.12850004395801135, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 873 finished with value: 0.5861939438540713 and parameters: {'lr': 0.00761602765920456, 'wd': 0.0003380902558240254, 'warmup': 150, 'gamma': 0.9983613024252793, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.12850004395801135, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 15:43:48,907] Trial 874 finished with value: 0.7152747863517728 and parameters: {'lr': 0.00879936688403523, 'wd': 0.0003476791893037702, 'warmup': 150, 'gamma': 0.9973679860613067, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1268186373381573, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 874 finished with value: 0.7152747863517728 and parameters: {'lr': 0.00879936688403523, 'wd': 0.0003476791893037702, 'warmup': 150, 'gamma': 0.9973679860613067, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1268186373381573, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30370656
torch.Size([180])


[I 2024-10-29 15:46:48,236] Trial 875 finished with value: 0.7288146620026802 and parameters: {'lr': 0.003441588921869603, 'wd': 0.0003598040586022037, 'warmup': 150, 'gamma': 0.9976079697958241, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1291757183942353, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 875 finished with value: 0.7288146620026802 and parameters: {'lr': 0.003441588921869603, 'wd': 0.0003598040586022037, 'warmup': 150, 'gamma': 0.9976079697958241, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1291757183942353, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 15:49:27,111] Trial 876 finished with value: 0.6665999141755328 and parameters: {'lr': 0.007808824778145054, 'wd': 0.00031965176663841096, 'warmup': 150, 'gamma': 0.9983707124484771, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.11964389237371367, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 876 finished with value: 0.6665999141755328 and parameters: {'lr': 0.007808824778145054, 'wd': 0.00031965176663841096, 'warmup': 150, 'gamma': 0.9983707124484771, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.11964389237371367, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 15:52:06,104] Trial 877 finished with value: 0.7294984303261234 and parameters: {'lr': 0.005228582707654018, 'wd': 0.00037637463100176387, 'warmup': 150, 'gamma': 0.9980156210589701, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.12668091737282502, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 877 finished with value: 0.7294984303261234 and parameters: {'lr': 0.005228582707654018, 'wd': 0.00037637463100176387, 'warmup': 150, 'gamma': 0.9980156210589701, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.12668091737282502, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 15:54:45,496] Trial 878 finished with value: 0.738165498100537 and parameters: {'lr': 0.00641706746031116, 'wd': 0.00031833948872731307, 'warmup': 150, 'gamma': 0.9967614705763179, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.12894673063438555, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 878 finished with value: 0.738165498100537 and parameters: {'lr': 0.00641706746031116, 'wd': 0.00031833948872731307, 'warmup': 150, 'gamma': 0.9967614705763179, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.12894673063438555, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26691296
torch.Size([180])


[I 2024-10-29 15:57:24,631] Trial 879 finished with value: 0.7289447452429983 and parameters: {'lr': 0.008980186455383751, 'wd': 0.00027424822933686113, 'warmup': 150, 'gamma': 0.9892387840222665, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.15035126303444782, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 879 finished with value: 0.7289447452429983 and parameters: {'lr': 0.008980186455383751, 'wd': 0.00027424822933686113, 'warmup': 150, 'gamma': 0.9892387840222665, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.15035126303444782, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 16:00:03,957] Trial 880 finished with value: 0.527757218882889 and parameters: {'lr': 0.007773810893648391, 'wd': 0.00032850746577724036, 'warmup': 300, 'gamma': 0.9989830576050682, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.11482119901242868, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 880 finished with value: 0.527757218882889 and parameters: {'lr': 0.007773810893648391, 'wd': 0.00032850746577724036, 'warmup': 300, 'gamma': 0.9989830576050682, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.11482119901242868, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 16:02:42,506] Trial 881 finished with value: 0.7319423902318789 and parameters: {'lr': 6.741407050651731e-05, 'wd': 0.00036723245355568677, 'warmup': 300, 'gamma': 0.9983903787899024, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.12569378619060512, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 881 finished with value: 0.7319423902318789 and parameters: {'lr': 6.741407050651731e-05, 'wd': 0.00036723245355568677, 'warmup': 300, 'gamma': 0.9983903787899024, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.12569378619060512, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 16:05:21,094] Trial 882 finished with value: 0.7325985599268179 and parameters: {'lr': 0.0001530032194701452, 'wd': 0.0003007990696694036, 'warmup': 300, 'gamma': 0.9984821576954817, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.12404864054130027, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 882 finished with value: 0.7325985599268179 and parameters: {'lr': 0.0001530032194701452, 'wd': 0.0003007990696694036, 'warmup': 300, 'gamma': 0.9984821576954817, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.12404864054130027, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 16:08:00,673] Trial 883 finished with value: 0.7236213793327789 and parameters: {'lr': 0.005766056749566606, 'wd': 0.0003405060147425029, 'warmup': 300, 'gamma': 0.9976030188791688, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.11453886828901011, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 883 finished with value: 0.7236213793327789 and parameters: {'lr': 0.005766056749566606, 'wd': 0.0003405060147425029, 'warmup': 300, 'gamma': 0.9976030188791688, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.11453886828901011, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26851296
torch.Size([180])


[I 2024-10-29 16:10:39,938] Trial 884 finished with value: 0.6253167401377311 and parameters: {'lr': 0.007092368860020351, 'wd': 0.0003861679039417748, 'warmup': 300, 'gamma': 0.998950421151634, 'time_dim': 64, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.11925429815615994, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 884 finished with value: 0.6253167401377311 and parameters: {'lr': 0.007092368860020351, 'wd': 0.0003861679039417748, 'warmup': 300, 'gamma': 0.998950421151634, 'time_dim': 64, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.11925429815615994, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 16:13:19,258] Trial 885 finished with value: 0.7366342901467768 and parameters: {'lr': 0.004539559529792092, 'wd': 0.0003040639490699078, 'warmup': 150, 'gamma': 0.9758874106844675, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1173441114552201, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 885 finished with value: 0.7366342901467768 and parameters: {'lr': 0.004539559529792092, 'wd': 0.0003040639490699078, 'warmup': 150, 'gamma': 0.9758874106844675, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1173441114552201, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 16:15:58,613] Trial 886 finished with value: 0.5989228619114405 and parameters: {'lr': 0.007888431844662522, 'wd': 0.00027785274241949197, 'warmup': 300, 'gamma': 0.9989909246155645, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.11556445557312534, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 886 finished with value: 0.5989228619114405 and parameters: {'lr': 0.007888431844662522, 'wd': 0.00027785274241949197, 'warmup': 300, 'gamma': 0.9989909246155645, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.11556445557312534, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 16:18:37,985] Trial 887 finished with value: 0.6985296528724465 and parameters: {'lr': 0.006660949372584268, 'wd': 0.000339435773899785, 'warmup': 300, 'gamma': 0.9980196641437807, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.10797559476123253, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 887 finished with value: 0.6985296528724465 and parameters: {'lr': 0.006660949372584268, 'wd': 0.000339435773899785, 'warmup': 300, 'gamma': 0.9980196641437807, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.10797559476123253, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 16:21:17,374] Trial 888 finished with value: 0.6526865647484495 and parameters: {'lr': 0.0089117712850985, 'wd': 0.0002665725370045526, 'warmup': 300, 'gamma': 0.9984329410675525, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.11249072998906609, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 888 finished with value: 0.6526865647484495 and parameters: {'lr': 0.0089117712850985, 'wd': 0.0002665725370045526, 'warmup': 300, 'gamma': 0.9984329410675525, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.11249072998906609, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 16:23:56,470] Trial 889 finished with value: 0.7345389687020547 and parameters: {'lr': 0.0058401872903819894, 'wd': 0.0003049481771502509, 'warmup': 300, 'gamma': 0.9973527461794578, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1120971145904302, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 889 finished with value: 0.7345389687020547 and parameters: {'lr': 0.0058401872903819894, 'wd': 0.0003049481771502509, 'warmup': 300, 'gamma': 0.9973527461794578, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1120971145904302, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 16:26:35,787] Trial 890 finished with value: 0.5368665482694736 and parameters: {'lr': 0.007685658040573718, 'wd': 0.0002479205354680446, 'warmup': 150, 'gamma': 0.9989732119588033, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13264936860744891, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 890 finished with value: 0.5368665482694736 and parameters: {'lr': 0.007685658040573718, 'wd': 0.0002479205354680446, 'warmup': 150, 'gamma': 0.9989732119588033, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13264936860744891, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 16:29:15,366] Trial 891 finished with value: 0.6493843469049327 and parameters: {'lr': 0.009954903861021847, 'wd': 0.00024100230027274938, 'warmup': 300, 'gamma': 0.9985249762636346, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13435010944322542, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 891 finished with value: 0.6493843469049327 and parameters: {'lr': 0.009954903861021847, 'wd': 0.00024100230027274938, 'warmup': 300, 'gamma': 0.9985249762636346, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13435010944322542, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 16:31:54,246] Trial 892 finished with value: 0.7127696146912972 and parameters: {'lr': 0.004942572880230894, 'wd': 0.0002325675188392359, 'warmup': 300, 'gamma': 0.997826944752963, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13275692044420495, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 892 finished with value: 0.7127696146912972 and parameters: {'lr': 0.004942572880230894, 'wd': 0.0002325675188392359, 'warmup': 300, 'gamma': 0.997826944752963, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13275692044420495, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 16:34:33,429] Trial 893 finished with value: 0.6028573509521451 and parameters: {'lr': 0.006599299046005232, 'wd': 0.0002646226787227322, 'warmup': 300, 'gamma': 0.9989418636775447, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1218074194475833, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 893 finished with value: 0.6028573509521451 and parameters: {'lr': 0.006599299046005232, 'wd': 0.0002646226787227322, 'warmup': 300, 'gamma': 0.9989418636775447, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1218074194475833, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 32131680
torch.Size([180])


[I 2024-10-29 16:37:39,389] Trial 894 finished with value: 0.6070106514682525 and parameters: {'lr': 0.007812800932275075, 'wd': 0.00022706940524161096, 'warmup': 300, 'gamma': 0.9989652526011346, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 11, 'mlp_dim': 768, 'emb_dropout': 0.10545957525778643, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 894 finished with value: 0.6070106514682525 and parameters: {'lr': 0.007812800932275075, 'wd': 0.00022706940524161096, 'warmup': 300, 'gamma': 0.9989652526011346, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 11, 'mlp_dim': 768, 'emb_dropout': 0.10545957525778643, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22882656
torch.Size([180])


[I 2024-10-29 16:39:58,882] Trial 895 finished with value: 0.5955040574256754 and parameters: {'lr': 0.008837563377575956, 'wd': 0.00026013280527347133, 'warmup': 150, 'gamma': 0.9989984486000738, 'time_dim': 16, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1319508146467735, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 895 finished with value: 0.5955040574256754 and parameters: {'lr': 0.008837563377575956, 'wd': 0.00026013280527347133, 'warmup': 150, 'gamma': 0.9989984486000738, 'time_dim': 16, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1319508146467735, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30370656
torch.Size([180])


[I 2024-10-29 16:42:58,309] Trial 896 finished with value: 0.7284274245850889 and parameters: {'lr': 0.00596879724989975, 'wd': 0.00020421583411187863, 'warmup': 150, 'gamma': 0.9979666090780254, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13885142141740386, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 896 finished with value: 0.7284274245850889 and parameters: {'lr': 0.00596879724989975, 'wd': 0.00020421583411187863, 'warmup': 150, 'gamma': 0.9979666090780254, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13885142141740386, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 16:45:38,216] Trial 897 finished with value: 0.7179950633388035 and parameters: {'lr': 0.007220910140665139, 'wd': 0.0001627894063511184, 'warmup': 150, 'gamma': 0.9983901234096657, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13953040981573953, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 897 finished with value: 0.7179950633388035 and parameters: {'lr': 0.007220910140665139, 'wd': 0.0001627894063511184, 'warmup': 150, 'gamma': 0.9983901234096657, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13953040981573953, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 16:48:17,382] Trial 898 finished with value: 0.7233391519769028 and parameters: {'lr': 0.008808371010083828, 'wd': 0.0002290112014784797, 'warmup': 300, 'gamma': 0.9970839433728501, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13301850828406786, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 898 finished with value: 0.7233391519769028 and parameters: {'lr': 0.008808371010083828, 'wd': 0.0002290112014784797, 'warmup': 300, 'gamma': 0.9970839433728501, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13301850828406786, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30755424
torch.Size([180])


[I 2024-10-29 16:51:14,953] Trial 899 finished with value: 0.7344275269196278 and parameters: {'lr': 0.005277136352083738, 'wd': 0.00027835829860216554, 'warmup': 150, 'gamma': 0.997928198992299, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 10, 'mlp_dim': 768, 'emb_dropout': 0.10088176101737224, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 899 finished with value: 0.7344275269196278 and parameters: {'lr': 0.005277136352083738, 'wd': 0.00027835829860216554, 'warmup': 150, 'gamma': 0.997928198992299, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 10, 'mlp_dim': 768, 'emb_dropout': 0.10088176101737224, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30370656
torch.Size([180])


[I 2024-10-29 16:54:14,416] Trial 900 finished with value: 0.6599840889161616 and parameters: {'lr': 0.00803588182902635, 'wd': 0.00029370905762167665, 'warmup': 150, 'gamma': 0.998263739483018, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14308331660007428, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 900 finished with value: 0.6599840889161616 and parameters: {'lr': 0.00803588182902635, 'wd': 0.00029370905762167665, 'warmup': 150, 'gamma': 0.998263739483018, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14308331660007428, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 16:56:53,960] Trial 901 finished with value: 0.6924866293161497 and parameters: {'lr': 0.009946329064278954, 'wd': 0.0002089622711417376, 'warmup': 150, 'gamma': 0.9984173198096375, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13584324008804538, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 901 finished with value: 0.6924866293161497 and parameters: {'lr': 0.009946329064278954, 'wd': 0.0002089622711417376, 'warmup': 150, 'gamma': 0.9984173198096375, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13584324008804538, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30435296
torch.Size([180])


[I 2024-10-29 16:59:52,958] Trial 902 finished with value: 0.7752823869654679 and parameters: {'lr': 0.00416611368658784, 'wd': 0.0002432199293755304, 'warmup': 150, 'gamma': 0.9777369593471215, 'time_dim': 32, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.12112317722928163, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 902 finished with value: 0.7752823869654679 and parameters: {'lr': 0.00416611368658784, 'wd': 0.0002432199293755304, 'warmup': 150, 'gamma': 0.9777369593471215, 'time_dim': 32, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.12112317722928163, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 17:02:32,075] Trial 903 finished with value: 0.7090295100039053 and parameters: {'lr': 0.0065741033343463245, 'wd': 0.00013733349350977412, 'warmup': 150, 'gamma': 0.9975282245838725, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1317334194203905, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 903 finished with value: 0.7090295100039053 and parameters: {'lr': 0.0065741033343463245, 'wd': 0.00013733349350977412, 'warmup': 150, 'gamma': 0.9975282245838725, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1317334194203905, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 17:05:11,395] Trial 904 finished with value: 0.6415291985189007 and parameters: {'lr': 0.0073412510566647805, 'wd': 0.00017777300377218834, 'warmup': 150, 'gamma': 0.9984471568044387, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.10958787158385395, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 904 finished with value: 0.6415291985189007 and parameters: {'lr': 0.0073412510566647805, 'wd': 0.00017777300377218834, 'warmup': 150, 'gamma': 0.9984471568044387, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.10958787158385395, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30370656
torch.Size([180])


[I 2024-10-29 17:08:10,334] Trial 905 finished with value: 0.6312934662114881 and parameters: {'lr': 0.008828408367026438, 'wd': 0.0002585253780874707, 'warmup': 150, 'gamma': 0.9984817821869681, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14014949686735995, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 905 finished with value: 0.6312934662114881 and parameters: {'lr': 0.008828408367026438, 'wd': 0.0002585253780874707, 'warmup': 150, 'gamma': 0.9984817821869681, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14014949686735995, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 17:10:49,243] Trial 906 finished with value: 0.6982705711525035 and parameters: {'lr': 0.0060514845227719145, 'wd': 0.00015488242121816995, 'warmup': 150, 'gamma': 0.9977207400848636, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14362635559571107, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 906 finished with value: 0.6982705711525035 and parameters: {'lr': 0.0060514845227719145, 'wd': 0.00015488242121816995, 'warmup': 150, 'gamma': 0.9977207400848636, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14362635559571107, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30980064
torch.Size([180])


[I 2024-10-29 17:13:47,090] Trial 907 finished with value: 0.673498508955409 and parameters: {'lr': 0.008028989525767227, 'wd': 0.0002062307253767315, 'warmup': 150, 'gamma': 0.9963664167742083, 'time_dim': 64, 'patch_size': 16, 'depth': 7, 'heads': 10, 'mlp_dim': 768, 'emb_dropout': 0.1372273391919871, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 907 finished with value: 0.673498508955409 and parameters: {'lr': 0.008028989525767227, 'wd': 0.0002062307253767315, 'warmup': 150, 'gamma': 0.9963664167742083, 'time_dim': 64, 'patch_size': 16, 'depth': 7, 'heads': 10, 'mlp_dim': 768, 'emb_dropout': 0.1372273391919871, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 17:16:26,371] Trial 908 finished with value: 0.6351513164314786 and parameters: {'lr': 0.009990692361975279, 'wd': 0.00018724353740753904, 'warmup': 300, 'gamma': 0.9989908620439638, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.12722275697851834, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 908 finished with value: 0.6351513164314786 and parameters: {'lr': 0.009990692361975279, 'wd': 0.00018724353740753904, 'warmup': 300, 'gamma': 0.9989908620439638, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.12722275697851834, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 33507936
torch.Size([180])


[I 2024-10-29 17:19:35,944] Trial 909 finished with value: 0.6778570310467611 and parameters: {'lr': 0.00692883717062132, 'wd': 0.0003154615078739355, 'warmup': 150, 'gamma': 0.9979451601913711, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 12, 'mlp_dim': 768, 'emb_dropout': 0.14473057379998802, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 909 finished with value: 0.6778570310467611 and parameters: {'lr': 0.00692883717062132, 'wd': 0.0003154615078739355, 'warmup': 150, 'gamma': 0.9979451601913711, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 12, 'mlp_dim': 768, 'emb_dropout': 0.14473057379998802, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 34114656
torch.Size([180])


[I 2024-10-29 17:22:53,940] Trial 910 finished with value: 0.6437142585062818 and parameters: {'lr': 0.005258702447200539, 'wd': 0.00013016558191127284, 'warmup': 150, 'gamma': 0.9989572975031871, 'time_dim': 16, 'patch_size': 16, 'depth': 9, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1171797916563241, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 910 finished with value: 0.6437142585062818 and parameters: {'lr': 0.005258702447200539, 'wd': 0.00013016558191127284, 'warmup': 150, 'gamma': 0.9989572975031871, 'time_dim': 16, 'patch_size': 16, 'depth': 9, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1171797916563241, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 17:25:33,309] Trial 911 finished with value: 0.7255963708020147 and parameters: {'lr': 0.00807614268609426, 'wd': 0.00015852136773290243, 'warmup': 150, 'gamma': 0.9969318390040659, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13151701745476568, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 911 finished with value: 0.7255963708020147 and parameters: {'lr': 0.00807614268609426, 'wd': 0.00015852136773290243, 'warmup': 150, 'gamma': 0.9969318390040659, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13151701745476568, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 30370656
torch.Size([180])


[I 2024-10-29 17:28:32,970] Trial 912 finished with value: 0.6690517762111515 and parameters: {'lr': 0.0064434967011516246, 'wd': 0.0002820508195042766, 'warmup': 150, 'gamma': 0.9983429217544992, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1374278558044526, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 912 finished with value: 0.6690517762111515 and parameters: {'lr': 0.0064434967011516246, 'wd': 0.0002820508195042766, 'warmup': 150, 'gamma': 0.9983429217544992, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1374278558044526, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22882656
torch.Size([180])


[I 2024-10-29 17:30:52,340] Trial 913 finished with value: 0.6110999980339702 and parameters: {'lr': 0.00721293367910529, 'wd': 0.0002275071815185848, 'warmup': 150, 'gamma': 0.998958357488683, 'time_dim': 16, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1406873772827574, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 913 finished with value: 0.6110999980339702 and parameters: {'lr': 0.00721293367910529, 'wd': 0.0002275071815185848, 'warmup': 150, 'gamma': 0.998958357488683, 'time_dim': 16, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1406873772827574, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26421600
torch.Size([180])


[I 2024-10-29 17:33:27,296] Trial 914 finished with value: 0.737756854236365 and parameters: {'lr': 0.008849330715413481, 'wd': 0.0002506654847628682, 'warmup': 250, 'gamma': 0.997518201606907, 'time_dim': 16, 'patch_size': 16, 'depth': 6, 'heads': 10, 'mlp_dim': 768, 'emb_dropout': 0.14686282276257162, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 914 finished with value: 0.737756854236365 and parameters: {'lr': 0.008849330715413481, 'wd': 0.0002506654847628682, 'warmup': 250, 'gamma': 0.997518201606907, 'time_dim': 16, 'patch_size': 16, 'depth': 6, 'heads': 10, 'mlp_dim': 768, 'emb_dropout': 0.14686282276257162, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 17:36:06,325] Trial 915 finished with value: 0.6791840744976638 and parameters: {'lr': 0.005605798531044194, 'wd': 0.0003883971121539582, 'warmup': 150, 'gamma': 0.9984542682800497, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13529314847583537, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 915 finished with value: 0.6791840744976638 and parameters: {'lr': 0.005605798531044194, 'wd': 0.0003883971121539582, 'warmup': 150, 'gamma': 0.9984542682800497, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.13529314847583537, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 33518432
torch.Size([180])


[I 2024-10-29 17:39:19,987] Trial 916 finished with value: 0.7122795760454691 and parameters: {'lr': 0.007809840660174705, 'wd': 0.0001898426144970684, 'warmup': 150, 'gamma': 0.9979579996155848, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.12578314430600585, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 916 finished with value: 0.7122795760454691 and parameters: {'lr': 0.007809840660174705, 'wd': 0.0001898426144970684, 'warmup': 150, 'gamma': 0.9979579996155848, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.12578314430600585, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26626656
torch.Size([180])


[I 2024-10-29 17:41:59,202] Trial 917 finished with value: 0.6778882300750769 and parameters: {'lr': 0.004676364435976446, 'wd': 0.0002223584193890198, 'warmup': 150, 'gamma': 0.9984549368347324, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14177538915840798, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 917 finished with value: 0.6778882300750769 and parameters: {'lr': 0.004676364435976446, 'wd': 0.0002223584193890198, 'warmup': 150, 'gamma': 0.9984549368347324, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.14177538915840798, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22882656
torch.Size([180])


[I 2024-10-29 17:44:18,737] Trial 918 finished with value: 0.5980195247344315 and parameters: {'lr': 0.006360580800189069, 'wd': 0.00017073016972946853, 'warmup': 150, 'gamma': 0.9989519388990947, 'time_dim': 16, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1305023213851173, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 918 finished with value: 0.5980195247344315 and parameters: {'lr': 0.006360580800189069, 'wd': 0.00017073016972946853, 'warmup': 150, 'gamma': 0.9989519388990947, 'time_dim': 16, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1305023213851173, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 33518432
torch.Size([180])


[I 2024-10-29 17:47:32,513] Trial 919 finished with value: 0.5765776116442646 and parameters: {'lr': 0.008958841962717073, 'wd': 0.00029724324708437985, 'warmup': 300, 'gamma': 0.998999406611434, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.14654445960310863, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 919 finished with value: 0.5765776116442646 and parameters: {'lr': 0.008958841962717073, 'wd': 0.00029724324708437985, 'warmup': 300, 'gamma': 0.998999406611434, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.14654445960310863, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29380960
torch.Size([180])


[I 2024-10-29 17:50:25,076] Trial 920 finished with value: 0.6406750074394622 and parameters: {'lr': 0.009971309190338885, 'wd': 0.00037126780132536093, 'warmup': 300, 'gamma': 0.9989917614707707, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.13557610106128332, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 920 finished with value: 0.6406750074394622 and parameters: {'lr': 0.009971309190338885, 'wd': 0.00037126780132536093, 'warmup': 300, 'gamma': 0.9989917614707707, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.13557610106128332, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29380960
torch.Size([180])


[I 2024-10-29 17:53:17,652] Trial 921 finished with value: 0.711857907149099 and parameters: {'lr': 0.008960068845381013, 'wd': 0.00032565040343836776, 'warmup': 300, 'gamma': 0.9980023857942929, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.1396294813260541, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 921 finished with value: 0.711857907149099 and parameters: {'lr': 0.008960068845381013, 'wd': 0.00032565040343836776, 'warmup': 300, 'gamma': 0.9980023857942929, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.1396294813260541, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 28782432
torch.Size([180])


[I 2024-10-29 17:56:04,203] Trial 922 finished with value: 0.6862656134761704 and parameters: {'lr': 0.008766511976783254, 'wd': 0.0003018991575201954, 'warmup': 300, 'gamma': 0.9971195504629383, 'time_dim': 16, 'patch_size': 16, 'depth': 6, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.14478267502414222, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 922 finished with value: 0.6862656134761704 and parameters: {'lr': 0.008766511976783254, 'wd': 0.0003018991575201954, 'warmup': 300, 'gamma': 0.9971195504629383, 'time_dim': 16, 'patch_size': 16, 'depth': 6, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.14478267502414222, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29380960
torch.Size([180])


[I 2024-10-29 17:58:56,869] Trial 923 finished with value: 0.6870222233206954 and parameters: {'lr': 0.009057848297314406, 'wd': 0.00011642775513875044, 'warmup': 300, 'gamma': 0.9984018995459427, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.11981918011474141, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 923 finished with value: 0.6870222233206954 and parameters: {'lr': 0.009057848297314406, 'wd': 0.00011642775513875044, 'warmup': 300, 'gamma': 0.9984018995459427, 'time_dim': 16, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.11981918011474141, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 39809888
torch.Size([180])


[I 2024-10-29 18:02:41,377] Trial 924 finished with value: 0.7241823173998607 and parameters: {'lr': 0.008051733013142541, 'wd': 0.0003735837811084971, 'warmup': 300, 'gamma': 0.9975793355523077, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 11, 'mlp_dim': 1024, 'emb_dropout': 0.1322547968921439, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 924 finished with value: 0.7241823173998607 and parameters: {'lr': 0.008051733013142541, 'wd': 0.0003735837811084971, 'warmup': 300, 'gamma': 0.9975793355523077, 'time_dim': 16, 'patch_size': 16, 'depth': 8, 'heads': 11, 'mlp_dim': 1024, 'emb_dropout': 0.1322547968921439, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 25308128
torch.Size([180])


[I 2024-10-29 18:05:12,864] Trial 925 finished with value: 0.5655817566628258 and parameters: {'lr': 0.00877864204612887, 'wd': 0.0002834535081454901, 'warmup': 300, 'gamma': 0.9989791844232025, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.13862448227344767, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 925 finished with value: 0.5655817566628258 and parameters: {'lr': 0.00877864204612887, 'wd': 0.0002834535081454901, 'warmup': 300, 'gamma': 0.9989791844232025, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.13862448227344767, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 25308128
torch.Size([180])


[I 2024-10-29 18:07:44,453] Trial 926 finished with value: 0.6831125967209767 and parameters: {'lr': 0.00900020363424244, 'wd': 0.00026502909787081644, 'warmup': 300, 'gamma': 0.9979779363102458, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.14360836349362707, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 926 finished with value: 0.6831125967209767 and parameters: {'lr': 0.00900020363424244, 'wd': 0.00026502909787081644, 'warmup': 300, 'gamma': 0.9979779363102458, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.14360836349362707, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 28847072
torch.Size([180])


[I 2024-10-29 18:10:31,038] Trial 927 finished with value: 0.6081327591774618 and parameters: {'lr': 0.009109141085541448, 'wd': 0.0003829828796459071, 'warmup': 300, 'gamma': 0.9984637723824406, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.11296297823589732, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 927 finished with value: 0.6081327591774618 and parameters: {'lr': 0.009109141085541448, 'wd': 0.0003829828796459071, 'warmup': 300, 'gamma': 0.9984637723824406, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.11296297823589732, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 25468128
torch.Size([180])


[I 2024-10-29 18:13:03,006] Trial 928 finished with value: 0.6314986889755981 and parameters: {'lr': 0.009805348109760475, 'wd': 0.00031310723986131627, 'warmup': 300, 'gamma': 0.9983896498172202, 'time_dim': 64, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.1408131427242067, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 928 finished with value: 0.6314986889755981 and parameters: {'lr': 0.009805348109760475, 'wd': 0.00031310723986131627, 'warmup': 300, 'gamma': 0.9983896498172202, 'time_dim': 64, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.1408131427242067, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 18:15:54,865] Trial 929 finished with value: 0.6728661269070794 and parameters: {'lr': 0.00817535945628087, 'wd': 0.0003156924891063519, 'warmup': 300, 'gamma': 0.997893957385967, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.14668475486760724, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 929 finished with value: 0.6728661269070794 and parameters: {'lr': 0.00817535945628087, 'wd': 0.0003156924891063519, 'warmup': 300, 'gamma': 0.997893957385967, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.14668475486760724, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 25308128
torch.Size([180])


[I 2024-10-29 18:18:26,251] Trial 930 finished with value: 0.5940424082466016 and parameters: {'lr': 0.008901156396332254, 'wd': 0.0003298420944365197, 'warmup': 300, 'gamma': 0.998967704007515, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.1285295501047506, 'schedule': 'cosine'}. Best is trial 598 with value: 0.49804964861220785.


Trial 930 finished with value: 0.5940424082466016 and parameters: {'lr': 0.008901156396332254, 'wd': 0.0003298420944365197, 'warmup': 300, 'gamma': 0.998967704007515, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.1285295501047506, 'schedule': 'cosine'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 18:21:18,676] Trial 931 finished with value: 0.6714673331018258 and parameters: {'lr': 0.007961621489755303, 'wd': 0.0002915899747936239, 'warmup': 300, 'gamma': 0.9972552132118458, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.13540918546054168, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 931 finished with value: 0.6714673331018258 and parameters: {'lr': 0.007961621489755303, 'wd': 0.0002915899747936239, 'warmup': 300, 'gamma': 0.9972552132118458, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.13540918546054168, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 25308128
torch.Size([180])


[I 2024-10-29 18:23:50,188] Trial 932 finished with value: 0.7060032284918818 and parameters: {'lr': 0.009996359166322438, 'wd': 0.0002822929478367148, 'warmup': 300, 'gamma': 0.9983502407676593, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.12262417813519721, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 932 finished with value: 0.7060032284918818 and parameters: {'lr': 0.009996359166322438, 'wd': 0.0002822929478367148, 'warmup': 300, 'gamma': 0.9983502407676593, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.12262417813519721, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 28847072
torch.Size([180])


[I 2024-10-29 18:26:37,464] Trial 933 finished with value: 0.7088150928422916 and parameters: {'lr': 0.009897550690234343, 'wd': 0.00034217817344294176, 'warmup': 300, 'gamma': 0.9979635552509675, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.13927334401750158, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 933 finished with value: 0.7088150928422916 and parameters: {'lr': 0.009897550690234343, 'wd': 0.00034217817344294176, 'warmup': 300, 'gamma': 0.9979635552509675, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.13927334401750158, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 18:29:30,175] Trial 934 finished with value: 0.6922832280380127 and parameters: {'lr': 0.009983629255094044, 'wd': 0.00014447582410139292, 'warmup': 300, 'gamma': 0.9967364455533072, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.1425223196279392, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 934 finished with value: 0.6922832280380127 and parameters: {'lr': 0.009983629255094044, 'wd': 0.00014447582410139292, 'warmup': 300, 'gamma': 0.9967364455533072, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.1425223196279392, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 25308128
torch.Size([180])


[I 2024-10-29 18:32:01,569] Trial 935 finished with value: 0.688979964059394 and parameters: {'lr': 0.007960379577063196, 'wd': 0.00040220462760405415, 'warmup': 300, 'gamma': 0.9985533963532833, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.149891770884234, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 935 finished with value: 0.688979964059394 and parameters: {'lr': 0.007960379577063196, 'wd': 0.00040220462760405415, 'warmup': 300, 'gamma': 0.9985533963532833, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.149891770884234, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 18:34:53,454] Trial 936 finished with value: 0.7061858864870253 and parameters: {'lr': 0.007009965166336815, 'wd': 0.00026066934155846347, 'warmup': 300, 'gamma': 0.9976156908623195, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.13378696927540334, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 936 finished with value: 0.7061858864870253 and parameters: {'lr': 0.007009965166336815, 'wd': 0.00026066934155846347, 'warmup': 300, 'gamma': 0.9976156908623195, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.13378696927540334, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 25308128
torch.Size([180])


[I 2024-10-29 18:37:25,675] Trial 937 finished with value: 0.6821696112569101 and parameters: {'lr': 0.008693329342492126, 'wd': 0.00030092702034538194, 'warmup': 300, 'gamma': 0.9984324618603121, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.14578324881977328, 'schedule': 'cosine'}. Best is trial 598 with value: 0.49804964861220785.


Trial 937 finished with value: 0.6821696112569101 and parameters: {'lr': 0.008693329342492126, 'wd': 0.00030092702034538194, 'warmup': 300, 'gamma': 0.9984324618603121, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.14578324881977328, 'schedule': 'cosine'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 18:40:17,455] Trial 938 finished with value: 0.6756774277010943 and parameters: {'lr': 0.007426681942464064, 'wd': 0.0002785046650045562, 'warmup': 300, 'gamma': 0.9985048420092242, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.1382123273253106, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 938 finished with value: 0.6756774277010943 and parameters: {'lr': 0.007426681942464064, 'wd': 0.0002785046650045562, 'warmup': 300, 'gamma': 0.9985048420092242, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.1382123273253106, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 28847072
torch.Size([180])


[I 2024-10-29 18:43:03,768] Trial 939 finished with value: 0.6165024212682167 and parameters: {'lr': 0.009991918972040768, 'wd': 0.00034837952856817383, 'warmup': 300, 'gamma': 0.9989991152467443, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.14199767489172743, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 939 finished with value: 0.6165024212682167 and parameters: {'lr': 0.009991918972040768, 'wd': 0.00034837952856817383, 'warmup': 300, 'gamma': 0.9989991152467443, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 10, 'mlp_dim': 1024, 'emb_dropout': 0.14199767489172743, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 18:45:56,433] Trial 940 finished with value: 0.5531512331804729 and parameters: {'lr': 0.008161119796039342, 'wd': 0.00012178545263129354, 'warmup': 300, 'gamma': 0.9989971751737166, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.07926418436898468, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 940 finished with value: 0.5531512331804729 and parameters: {'lr': 0.008161119796039342, 'wd': 0.00012178545263129354, 'warmup': 300, 'gamma': 0.9989971751737166, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.07926418436898468, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 18:48:48,809] Trial 941 finished with value: 0.6576011193469214 and parameters: {'lr': 0.008425349382243066, 'wd': 9.126067148328963e-05, 'warmup': 300, 'gamma': 0.997410919303669, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.1457438313727792, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 941 finished with value: 0.6576011193469214 and parameters: {'lr': 0.008425349382243066, 'wd': 9.126067148328963e-05, 'warmup': 300, 'gamma': 0.997410919303669, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.1457438313727792, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 18:51:41,223] Trial 942 finished with value: 0.6981834061296075 and parameters: {'lr': 0.008236190523560005, 'wd': 0.00010690994495038113, 'warmup': 300, 'gamma': 0.9959062008787337, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.13679286779277178, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 942 finished with value: 0.6981834061296075 and parameters: {'lr': 0.008236190523560005, 'wd': 0.00010690994495038113, 'warmup': 300, 'gamma': 0.9959062008787337, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.13679286779277178, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 18:54:33,659] Trial 943 finished with value: 0.5768263238638881 and parameters: {'lr': 0.00762112205952246, 'wd': 0.00023797904849723754, 'warmup': 300, 'gamma': 0.998981661029229, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.09747629984699568, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 943 finished with value: 0.5768263238638881 and parameters: {'lr': 0.00762112205952246, 'wd': 0.00023797904849723754, 'warmup': 300, 'gamma': 0.998981661029229, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.09747629984699568, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 18:57:25,945] Trial 944 finished with value: 0.7285968650311774 and parameters: {'lr': 0.007111028567077417, 'wd': 0.0002203262194900918, 'warmup': 300, 'gamma': 0.9979743510318951, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.07865388859801359, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 944 finished with value: 0.7285968650311774 and parameters: {'lr': 0.007111028567077417, 'wd': 0.0002203262194900918, 'warmup': 300, 'gamma': 0.9979743510318951, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.07865388859801359, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 19:00:18,299] Trial 945 finished with value: 0.6078089978849402 and parameters: {'lr': 0.008675351568861754, 'wd': 0.0001684179147414282, 'warmup': 300, 'gamma': 0.99843706694231, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.05808243913842298, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 945 finished with value: 0.6078089978849402 and parameters: {'lr': 0.008675351568861754, 'wd': 0.0001684179147414282, 'warmup': 300, 'gamma': 0.99843706694231, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.05808243913842298, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 19:03:10,521] Trial 946 finished with value: 0.6805471577077756 and parameters: {'lr': 0.00715687010772532, 'wd': 0.00023050902502675364, 'warmup': 300, 'gamma': 0.9979871071475753, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.1303321818000489, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 946 finished with value: 0.6805471577077756 and parameters: {'lr': 0.00715687010772532, 'wd': 0.00023050902502675364, 'warmup': 300, 'gamma': 0.9979871071475753, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.1303321818000489, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 19:06:02,521] Trial 947 finished with value: 0.6597626709430545 and parameters: {'lr': 0.008950366287781121, 'wd': 0.00018840126361779989, 'warmup': 300, 'gamma': 0.998417417532938, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.06771517857892746, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 947 finished with value: 0.6597626709430545 and parameters: {'lr': 0.008950366287781121, 'wd': 0.00018840126361779989, 'warmup': 300, 'gamma': 0.998417417532938, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.06771517857892746, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 19:08:54,373] Trial 948 finished with value: 0.6484657968796848 and parameters: {'lr': 0.007730664212273513, 'wd': 0.00014313505883730232, 'warmup': 300, 'gamma': 0.9989478717129244, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.09502250980548822, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 948 finished with value: 0.6484657968796848 and parameters: {'lr': 0.007730664212273513, 'wd': 0.00014313505883730232, 'warmup': 300, 'gamma': 0.9989478717129244, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.09502250980548822, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 19:11:47,154] Trial 949 finished with value: 0.5796179143601429 and parameters: {'lr': 0.008853986726484037, 'wd': 0.00024398825701008294, 'warmup': 300, 'gamma': 0.998523110565394, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.09565777818536927, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 949 finished with value: 0.5796179143601429 and parameters: {'lr': 0.008853986726484037, 'wd': 0.00024398825701008294, 'warmup': 300, 'gamma': 0.998523110565394, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.09565777818536927, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 19:14:39,726] Trial 950 finished with value: 0.6784111397748077 and parameters: {'lr': 0.006835762339746927, 'wd': 0.00019994480677512703, 'warmup': 300, 'gamma': 0.9980038475150063, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.04781637675804062, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 950 finished with value: 0.6784111397748077 and parameters: {'lr': 0.006835762339746927, 'wd': 0.00019994480677512703, 'warmup': 300, 'gamma': 0.9980038475150063, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.04781637675804062, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 19:17:30,800] Trial 951 finished with value: 0.72454004354013 and parameters: {'lr': 0.0007662982774304767, 'wd': 0.00015513103117804645, 'warmup': 300, 'gamma': 0.9979719195734529, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.0878647957354136, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 951 finished with value: 0.72454004354013 and parameters: {'lr': 0.0007662982774304767, 'wd': 0.00015513103117804645, 'warmup': 300, 'gamma': 0.9979719195734529, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.0878647957354136, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 19:20:22,416] Trial 952 finished with value: 0.7016820118158031 and parameters: {'lr': 0.00806300046542461, 'wd': 0.00018424098881385993, 'warmup': 300, 'gamma': 0.9984584640419871, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.1502448961022808, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 952 finished with value: 0.7016820118158031 and parameters: {'lr': 0.00806300046542461, 'wd': 0.00018424098881385993, 'warmup': 300, 'gamma': 0.9984584640419871, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.1502448961022808, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 19:23:15,318] Trial 953 finished with value: 0.5961873492234357 and parameters: {'lr': 0.007141121086543533, 'wd': 0.00024577594547402795, 'warmup': 300, 'gamma': 0.9989288789017184, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.0996772921125107, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 953 finished with value: 0.5961873492234357 and parameters: {'lr': 0.007141121086543533, 'wd': 0.00024577594547402795, 'warmup': 300, 'gamma': 0.9989288789017184, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.0996772921125107, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 19:26:07,773] Trial 954 finished with value: 0.7251832009670999 and parameters: {'lr': 0.009881487160876534, 'wd': 0.00020776773066167394, 'warmup': 300, 'gamma': 0.9743156751715258, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.0809510474299746, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 954 finished with value: 0.7251832009670999 and parameters: {'lr': 0.009881487160876534, 'wd': 0.00020776773066167394, 'warmup': 300, 'gamma': 0.9743156751715258, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.0809510474299746, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 19:28:59,806] Trial 955 finished with value: 0.7839729606896635 and parameters: {'lr': 0.006404460870249294, 'wd': 0.00016996094372504104, 'warmup': 300, 'gamma': 0.9974761517509019, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.13309903919422747, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 955 finished with value: 0.7839729606896635 and parameters: {'lr': 0.006404460870249294, 'wd': 0.00016996094372504104, 'warmup': 300, 'gamma': 0.9974761517509019, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.13309903919422747, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 19:31:51,761] Trial 956 finished with value: 0.6526142957438557 and parameters: {'lr': 0.007930139922869847, 'wd': 0.00022648691103949214, 'warmup': 300, 'gamma': 0.9985183865241642, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.10557573530423223, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 956 finished with value: 0.6526142957438557 and parameters: {'lr': 0.007930139922869847, 'wd': 0.00022648691103949214, 'warmup': 300, 'gamma': 0.9985183865241642, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.10557573530423223, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 19:34:44,345] Trial 957 finished with value: 0.5982019455011277 and parameters: {'lr': 0.009967677548286358, 'wd': 0.00013899508850728377, 'warmup': 300, 'gamma': 0.9989084923429576, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.09402199462541486, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 957 finished with value: 0.5982019455011277 and parameters: {'lr': 0.009967677548286358, 'wd': 0.00013899508850728377, 'warmup': 300, 'gamma': 0.9989084923429576, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.09402199462541486, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 19:37:36,529] Trial 958 finished with value: 0.6650868184188983 and parameters: {'lr': 0.008741999813590444, 'wd': 0.00025449532141570745, 'warmup': 300, 'gamma': 0.9978985668715434, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.05574327517374919, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 958 finished with value: 0.6650868184188983 and parameters: {'lr': 0.008741999813590444, 'wd': 0.00025449532141570745, 'warmup': 300, 'gamma': 0.9978985668715434, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.05574327517374919, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 19:40:28,512] Trial 959 finished with value: 0.7210247929863017 and parameters: {'lr': 0.006405803555255058, 'wd': 0.0002000136510644724, 'warmup': 300, 'gamma': 0.9984094840522512, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.10312179841154089, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 959 finished with value: 0.7210247929863017 and parameters: {'lr': 0.006405803555255058, 'wd': 0.0002000136510644724, 'warmup': 300, 'gamma': 0.9984094840522512, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.10312179841154089, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 19:43:20,341] Trial 960 finished with value: 0.6827825333034084 and parameters: {'lr': 0.007673438617132994, 'wd': 0.0001232149916947639, 'warmup': 300, 'gamma': 0.9786157010963225, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.08503448825755525, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 960 finished with value: 0.6827825333034084 and parameters: {'lr': 0.007673438617132994, 'wd': 0.0001232149916947639, 'warmup': 300, 'gamma': 0.9786157010963225, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.08503448825755525, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 19:46:12,616] Trial 961 finished with value: 0.7201820484686573 and parameters: {'lr': 0.008671352855739134, 'wd': 0.00029421188338823925, 'warmup': 300, 'gamma': 0.970979837392332, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.1353329567031435, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 961 finished with value: 0.7201820484686573 and parameters: {'lr': 0.008671352855739134, 'wd': 0.00029421188338823925, 'warmup': 300, 'gamma': 0.970979837392332, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.1353329567031435, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 19:49:05,191] Trial 962 finished with value: 0.734892837296371 and parameters: {'lr': 0.007075253319826682, 'wd': 0.00016266245298835543, 'warmup': 300, 'gamma': 0.9975335663148054, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.09879885825742139, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 962 finished with value: 0.734892837296371 and parameters: {'lr': 0.007075253319826682, 'wd': 0.00016266245298835543, 'warmup': 300, 'gamma': 0.9975335663148054, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.09879885825742139, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26691296
torch.Size([180])


[I 2024-10-29 19:51:43,994] Trial 963 finished with value: 0.841109964613139 and parameters: {'lr': 1.162964526875891e-05, 'wd': 0.0003741216734909114, 'warmup': 300, 'gamma': 0.9989035876973689, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1392804935587281, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 963 finished with value: 0.841109964613139 and parameters: {'lr': 1.162964526875891e-05, 'wd': 0.0003741216734909114, 'warmup': 300, 'gamma': 0.9989035876973689, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1392804935587281, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26691296
torch.Size([180])


[I 2024-10-29 19:54:22,963] Trial 964 finished with value: 0.7184846796466093 and parameters: {'lr': 0.0004875383439112197, 'wd': 0.0012972800760529734, 'warmup': 300, 'gamma': 0.998430499361891, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1291152675100822, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 964 finished with value: 0.7184846796466093 and parameters: {'lr': 0.0004875383439112197, 'wd': 0.0012972800760529734, 'warmup': 300, 'gamma': 0.998430499361891, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1291152675100822, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 19:57:14,744] Trial 965 finished with value: 0.6415550493054984 and parameters: {'lr': 0.006062758260816129, 'wd': 0.00022850265668484482, 'warmup': 300, 'gamma': 0.9989709642619895, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.1479197388358801, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 965 finished with value: 0.6415550493054984 and parameters: {'lr': 0.006062758260816129, 'wd': 0.00022850265668484482, 'warmup': 300, 'gamma': 0.9989709642619895, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.1479197388358801, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26691296
torch.Size([180])


[I 2024-10-29 19:59:54,073] Trial 966 finished with value: 0.568162038167577 and parameters: {'lr': 0.008767465972577273, 'wd': 0.000268623290782527, 'warmup': 300, 'gamma': 0.9989672598918201, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.07690640170762553, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 966 finished with value: 0.568162038167577 and parameters: {'lr': 0.008767465972577273, 'wd': 0.000268623290782527, 'warmup': 300, 'gamma': 0.9989672598918201, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.07690640170762553, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 20:02:46,155] Trial 967 finished with value: 0.6307857592286854 and parameters: {'lr': 0.008891415053435093, 'wd': 0.00034038372517629655, 'warmup': 300, 'gamma': 0.9980121356564056, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.07486194201436765, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 967 finished with value: 0.6307857592286854 and parameters: {'lr': 0.008891415053435093, 'wd': 0.00034038372517629655, 'warmup': 300, 'gamma': 0.9980121356564056, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.07486194201436765, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26691296
torch.Size([180])


[I 2024-10-29 20:05:25,816] Trial 968 finished with value: 0.7248748521161579 and parameters: {'lr': 0.008875419959229944, 'wd': 0.0002750838502581431, 'warmup': 300, 'gamma': 0.99071835680925, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.10833768235733557, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 968 finished with value: 0.7248748521161579 and parameters: {'lr': 0.008875419959229944, 'wd': 0.0002750838502581431, 'warmup': 300, 'gamma': 0.99071835680925, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.10833768235733557, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26691296
torch.Size([180])


[I 2024-10-29 20:08:05,274] Trial 969 finished with value: 0.7711903544249549 and parameters: {'lr': 0.00992297243670384, 'wd': 0.0002862097372618585, 'warmup': 300, 'gamma': 0.9770834537267538, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.08043207703289279, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 969 finished with value: 0.7711903544249549 and parameters: {'lr': 0.00992297243670384, 'wd': 0.0002862097372618585, 'warmup': 300, 'gamma': 0.9770834537267538, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.08043207703289279, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 29445600
torch.Size([180])


[I 2024-10-29 20:10:57,919] Trial 970 finished with value: 0.7185568836263899 and parameters: {'lr': 0.009923987267760653, 'wd': 0.00032601791525661193, 'warmup': 300, 'gamma': 0.9971098442647565, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.07540458413051053, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 970 finished with value: 0.7185568836263899 and parameters: {'lr': 0.009923987267760653, 'wd': 0.00032601791525661193, 'warmup': 300, 'gamma': 0.9971098442647565, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.07540458413051053, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26691296
torch.Size([180])


[I 2024-10-29 20:13:37,728] Trial 971 finished with value: 0.599063914851223 and parameters: {'lr': 0.008150485153698527, 'wd': 0.00039810988657674594, 'warmup': 300, 'gamma': 0.9989714317560924, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.08615310936743938, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 971 finished with value: 0.599063914851223 and parameters: {'lr': 0.008150485153698527, 'wd': 0.00039810988657674594, 'warmup': 300, 'gamma': 0.9989714317560924, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.08615310936743938, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 25308128
torch.Size([180])


[I 2024-10-29 20:16:09,304] Trial 972 finished with value: 0.6884764706797001 and parameters: {'lr': 0.007986472464705305, 'wd': 0.0002516368633875956, 'warmup': 300, 'gamma': 0.9979865593873968, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.08837318541380741, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 972 finished with value: 0.6884764706797001 and parameters: {'lr': 0.007986472464705305, 'wd': 0.0002516368633875956, 'warmup': 300, 'gamma': 0.9979865593873968, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.08837318541380741, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26691296
torch.Size([180])


[I 2024-10-29 20:18:48,779] Trial 973 finished with value: 0.6734525007977388 and parameters: {'lr': 0.008760239378280571, 'wd': 0.00030772769410574666, 'warmup': 300, 'gamma': 0.9983439765644444, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.07134843406363488, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 973 finished with value: 0.6734525007977388 and parameters: {'lr': 0.008760239378280571, 'wd': 0.00030772769410574666, 'warmup': 300, 'gamma': 0.9983439765644444, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.07134843406363488, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 25308128
torch.Size([180])


[I 2024-10-29 20:21:20,022] Trial 974 finished with value: 0.7491495253996527 and parameters: {'lr': 0.00997124821262006, 'wd': 0.0002612747707590902, 'warmup': 300, 'gamma': 0.9977436418794122, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.050374130936015726, 'schedule': 'cosine'}. Best is trial 598 with value: 0.49804964861220785.


Trial 974 finished with value: 0.7491495253996527 and parameters: {'lr': 0.00997124821262006, 'wd': 0.0002612747707590902, 'warmup': 300, 'gamma': 0.9977436418794122, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 1024, 'emb_dropout': 0.050374130936015726, 'schedule': 'cosine'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26691296
torch.Size([180])


[I 2024-10-29 20:23:59,574] Trial 975 finished with value: 0.585409556679752 and parameters: {'lr': 0.007550179272600182, 'wd': 0.00034184312314456826, 'warmup': 300, 'gamma': 0.9989997561327925, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.08964491245156472, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 975 finished with value: 0.585409556679752 and parameters: {'lr': 0.007550179272600182, 'wd': 0.00034184312314456826, 'warmup': 300, 'gamma': 0.9989997561327925, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.08964491245156472, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26691296
torch.Size([180])


[I 2024-10-29 20:26:38,970] Trial 976 finished with value: 0.5531302857840462 and parameters: {'lr': 0.008928591483043206, 'wd': 0.0002759099587135828, 'warmup': 300, 'gamma': 0.9989969348751494, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.08314913618947353, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 976 finished with value: 0.5531302857840462 and parameters: {'lr': 0.008928591483043206, 'wd': 0.0002759099587135828, 'warmup': 300, 'gamma': 0.9989969348751494, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.08314913618947353, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 33572576
torch.Size([180])


[I 2024-10-29 20:29:48,897] Trial 977 finished with value: 0.6873212522577825 and parameters: {'lr': 0.008828953698838734, 'wd': 0.00038293155798091157, 'warmup': 300, 'gamma': 0.9984152859075521, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 12, 'mlp_dim': 768, 'emb_dropout': 0.06335409029956672, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 977 finished with value: 0.6873212522577825 and parameters: {'lr': 0.008828953698838734, 'wd': 0.00038293155798091157, 'warmup': 300, 'gamma': 0.9984152859075521, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 12, 'mlp_dim': 768, 'emb_dropout': 0.06335409029956672, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22947296
torch.Size([180])


[I 2024-10-29 20:32:08,647] Trial 978 finished with value: 0.68606784463255 and parameters: {'lr': 0.008845046333266767, 'wd': 0.0003066933613610774, 'warmup': 300, 'gamma': 0.9975995506692787, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.09123858489843631, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 978 finished with value: 0.68606784463255 and parameters: {'lr': 0.008845046333266767, 'wd': 0.0003066933613610774, 'warmup': 300, 'gamma': 0.9975995506692787, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.09123858489843631, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26691296
torch.Size([180])


[I 2024-10-29 20:34:48,060] Trial 979 finished with value: 0.7293173378052199 and parameters: {'lr': 0.009180002589487877, 'wd': 0.00028473869687151084, 'warmup': 300, 'gamma': 0.9783076166227391, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.15146095782796423, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 979 finished with value: 0.7293173378052199 and parameters: {'lr': 0.009180002589487877, 'wd': 0.00028473869687151084, 'warmup': 300, 'gamma': 0.9783076166227391, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.15146095782796423, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22947296
torch.Size([180])


[I 2024-10-29 20:37:07,458] Trial 980 finished with value: 0.6504143250494825 and parameters: {'lr': 0.009012978197408122, 'wd': 0.00033216873907396905, 'warmup': 300, 'gamma': 0.9983868816347756, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.08003203775578098, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 980 finished with value: 0.6504143250494825 and parameters: {'lr': 0.009012978197408122, 'wd': 0.00033216873907396905, 'warmup': 300, 'gamma': 0.9983868816347756, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.08003203775578098, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26691296
torch.Size([180])


[I 2024-10-29 20:39:46,711] Trial 981 finished with value: 0.6862271362215668 and parameters: {'lr': 0.007910647987198809, 'wd': 0.00040172757815707365, 'warmup': 300, 'gamma': 0.9970473697234145, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.07172040487477159, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 981 finished with value: 0.6862271362215668 and parameters: {'lr': 0.007910647987198809, 'wd': 0.00040172757815707365, 'warmup': 300, 'gamma': 0.9970473697234145, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.07172040487477159, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26691296
torch.Size([180])


[I 2024-10-29 20:42:26,429] Trial 982 finished with value: 0.6055421415571485 and parameters: {'lr': 0.009910144760764697, 'wd': 0.00027223512206800155, 'warmup': 300, 'gamma': 0.9989709977132116, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.06200300481423708, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 982 finished with value: 0.6055421415571485 and parameters: {'lr': 0.009910144760764697, 'wd': 0.00027223512206800155, 'warmup': 300, 'gamma': 0.9989709977132116, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.06200300481423708, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 15459296
torch.Size([180])


[I 2024-10-29 20:44:06,573] Trial 983 finished with value: 0.6947250458341275 and parameters: {'lr': 0.007985714399816336, 'wd': 0.00031644721237572386, 'warmup': 300, 'gamma': 0.9981098453426774, 'time_dim': 32, 'patch_size': 16, 'depth': 4, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.04542910601142158, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 983 finished with value: 0.6947250458341275 and parameters: {'lr': 0.007985714399816336, 'wd': 0.00031644721237572386, 'warmup': 300, 'gamma': 0.9981098453426774, 'time_dim': 32, 'patch_size': 16, 'depth': 4, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.04542910601142158, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22947296
torch.Size([180])


[I 2024-10-29 20:46:25,748] Trial 984 finished with value: 0.7845503723985199 and parameters: {'lr': 0.00994039001333478, 'wd': 0.0003392741124943083, 'warmup': 250, 'gamma': 0.9984056217476983, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.08259193691546533, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 984 finished with value: 0.7845503723985199 and parameters: {'lr': 0.00994039001333478, 'wd': 0.0003392741124943083, 'warmup': 250, 'gamma': 0.9984056217476983, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.08259193691546533, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26691296
torch.Size([180])


[I 2024-10-29 20:49:04,977] Trial 985 finished with value: 0.7176859056646092 and parameters: {'lr': 0.00732675649577341, 'wd': 0.00010172818045386099, 'warmup': 300, 'gamma': 0.997459111655785, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.07488136019922009, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 985 finished with value: 0.7176859056646092 and parameters: {'lr': 0.00732675649577341, 'wd': 0.00010172818045386099, 'warmup': 300, 'gamma': 0.997459111655785, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.07488136019922009, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26691296
torch.Size([180])


[I 2024-10-29 20:51:44,514] Trial 986 finished with value: 0.6285534863594036 and parameters: {'lr': 0.009971568035914861, 'wd': 0.0002610746449781352, 'warmup': 300, 'gamma': 0.9989975196074986, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.0857606482567, 'schedule': 'cosine'}. Best is trial 598 with value: 0.49804964861220785.


Trial 986 finished with value: 0.6285534863594036 and parameters: {'lr': 0.009971568035914861, 'wd': 0.0002610746449781352, 'warmup': 300, 'gamma': 0.9989975196074986, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.0857606482567, 'schedule': 'cosine'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22947296
torch.Size([180])


[I 2024-10-29 20:54:03,283] Trial 987 finished with value: 0.6993796806404377 and parameters: {'lr': 0.000289009115086777, 'wd': 0.0002222855676450796, 'warmup': 300, 'gamma': 0.9980706997678472, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.07006776519204512, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 987 finished with value: 0.6993796806404377 and parameters: {'lr': 0.000289009115086777, 'wd': 0.0002222855676450796, 'warmup': 300, 'gamma': 0.9980706997678472, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.07006776519204512, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22947296
torch.Size([180])


[I 2024-10-29 20:56:23,119] Trial 988 finished with value: 0.6855664224851713 and parameters: {'lr': 0.008242974664194654, 'wd': 0.00028347625897816607, 'warmup': 300, 'gamma': 0.9985016229762443, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.08375284217111063, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 988 finished with value: 0.6855664224851713 and parameters: {'lr': 0.008242974664194654, 'wd': 0.00028347625897816607, 'warmup': 300, 'gamma': 0.9985016229762443, 'time_dim': 32, 'patch_size': 16, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.08375284217111063, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 26691296
torch.Size([180])


[I 2024-10-29 20:59:02,926] Trial 989 finished with value: 0.6801037244726307 and parameters: {'lr': 0.007215542805130138, 'wd': 0.000372936579106698, 'warmup': 250, 'gamma': 0.9979247885079303, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.08141938556665826, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 989 finished with value: 0.6801037244726307 and parameters: {'lr': 0.007215542805130138, 'wd': 0.000372936579106698, 'warmup': 250, 'gamma': 0.9979247885079303, 'time_dim': 32, 'patch_size': 16, 'depth': 7, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.08141938556665826, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 15459296
torch.Size([180])


[I 2024-10-29 21:00:42,959] Trial 990 finished with value: 0.6570258988883034 and parameters: {'lr': 0.008757485673876643, 'wd': 0.0002507220406444643, 'warmup': 300, 'gamma': 0.988274554289896, 'time_dim': 32, 'patch_size': 16, 'depth': 4, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.08219846636926947, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 990 finished with value: 0.6570258988883034 and parameters: {'lr': 0.008757485673876643, 'wd': 0.0002507220406444643, 'warmup': 300, 'gamma': 0.988274554289896, 'time_dim': 32, 'patch_size': 16, 'depth': 4, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.08219846636926947, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22956608
torch.Size([180])


[I 2024-10-29 21:02:00,796] Trial 991 finished with value: 0.5748764066830574 and parameters: {'lr': 0.008061271840188781, 'wd': 0.0002138963750844473, 'warmup': 300, 'gamma': 0.9989729287010253, 'time_dim': 32, 'patch_size': 64, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.07675830805769376, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 991 finished with value: 0.5748764066830574 and parameters: {'lr': 0.008061271840188781, 'wd': 0.0002138963750844473, 'warmup': 300, 'gamma': 0.9989729287010253, 'time_dim': 32, 'patch_size': 64, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.07675830805769376, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22956608
torch.Size([180])


[I 2024-10-29 21:03:18,738] Trial 992 finished with value: 0.7543154017749398 and parameters: {'lr': 0.006547068214500275, 'wd': 0.0002040941081642853, 'warmup': 300, 'gamma': 0.9752391159131243, 'time_dim': 32, 'patch_size': 64, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.07833773612903125, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 992 finished with value: 0.7543154017749398 and parameters: {'lr': 0.006547068214500275, 'wd': 0.0002040941081642853, 'warmup': 300, 'gamma': 0.9752391159131243, 'time_dim': 32, 'patch_size': 64, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.07833773612903125, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22956608
torch.Size([180])


[I 2024-10-29 21:04:36,894] Trial 993 finished with value: 0.7340594312238101 and parameters: {'lr': 0.0073433534464740135, 'wd': 0.00021138695809816202, 'warmup': 250, 'gamma': 0.9896397331035307, 'time_dim': 32, 'patch_size': 64, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.078531010947683, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 993 finished with value: 0.7340594312238101 and parameters: {'lr': 0.0073433534464740135, 'wd': 0.00021138695809816202, 'warmup': 250, 'gamma': 0.9896397331035307, 'time_dim': 32, 'patch_size': 64, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.078531010947683, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22956608
torch.Size([180])


[I 2024-10-29 21:05:54,744] Trial 994 finished with value: 0.6323538543317867 and parameters: {'lr': 0.006380729396785108, 'wd': 0.00013403122730110887, 'warmup': 300, 'gamma': 0.9989817501884644, 'time_dim': 32, 'patch_size': 64, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.07344875760239156, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 994 finished with value: 0.6323538543317867 and parameters: {'lr': 0.006380729396785108, 'wd': 0.00013403122730110887, 'warmup': 300, 'gamma': 0.9989817501884644, 'time_dim': 32, 'patch_size': 64, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.07344875760239156, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22956608
torch.Size([180])


[I 2024-10-29 21:07:12,482] Trial 995 finished with value: 0.5895036912058178 and parameters: {'lr': 0.007629967342909924, 'wd': 0.0001734482690851294, 'warmup': 300, 'gamma': 0.9989993096973355, 'time_dim': 32, 'patch_size': 64, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.06648142598245943, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 995 finished with value: 0.5895036912058178 and parameters: {'lr': 0.007629967342909924, 'wd': 0.0001734482690851294, 'warmup': 300, 'gamma': 0.9989993096973355, 'time_dim': 32, 'patch_size': 64, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.06648142598245943, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22956608
torch.Size([180])


[I 2024-10-29 21:08:30,350] Trial 996 finished with value: 0.7552719654997796 and parameters: {'lr': 0.006692760458864939, 'wd': 0.00021119108400630555, 'warmup': 300, 'gamma': 0.9974088996898187, 'time_dim': 32, 'patch_size': 64, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.06967240189730622, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 996 finished with value: 0.7552719654997796 and parameters: {'lr': 0.006692760458864939, 'wd': 0.00021119108400630555, 'warmup': 300, 'gamma': 0.9974088996898187, 'time_dim': 32, 'patch_size': 64, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.06967240189730622, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22956608
torch.Size([180])


[I 2024-10-29 21:09:48,082] Trial 997 finished with value: 0.6963335207608533 and parameters: {'lr': 0.00838318061368901, 'wd': 0.00018551522595627585, 'warmup': 250, 'gamma': 0.9967091443915953, 'time_dim': 32, 'patch_size': 64, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.07829810283664607, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 997 finished with value: 0.6963335207608533 and parameters: {'lr': 0.00838318061368901, 'wd': 0.00018551522595627585, 'warmup': 250, 'gamma': 0.9967091443915953, 'time_dim': 32, 'patch_size': 64, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.07829810283664607, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22941184
torch.Size([180])


[I 2024-10-29 21:11:24,813] Trial 998 finished with value: 0.701618436050434 and parameters: {'lr': 0.007246816636377364, 'wd': 0.0014893913157145112, 'warmup': 300, 'gamma': 0.9983429016583535, 'time_dim': 32, 'patch_size': 32, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.07832708718652247, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.


Trial 998 finished with value: 0.701618436050434 and parameters: {'lr': 0.007246816636377364, 'wd': 0.0014893913157145112, 'warmup': 300, 'gamma': 0.9983429016583535, 'time_dim': 32, 'patch_size': 32, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.07832708718652247, 'schedule': 'constant_with_warmup'}. Best is trial 598 with value: 0.49804964861220785.
Number of parameters: 22941184
torch.Size([180])


[I 2024-10-29 21:13:01,578] Trial 999 finished with value: 0.7236711885250081 and parameters: {'lr': 0.005926061675366004, 'wd': 0.00023534831523452086, 'warmup': 300, 'gamma': 0.997903198396636, 'time_dim': 32, 'patch_size': 32, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.07554766372128713, 'schedule': 'cosine'}. Best is trial 598 with value: 0.49804964861220785.


Trial 999 finished with value: 0.7236711885250081 and parameters: {'lr': 0.005926061675366004, 'wd': 0.00023534831523452086, 'warmup': 300, 'gamma': 0.997903198396636, 'time_dim': 32, 'patch_size': 32, 'depth': 6, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.07554766372128713, 'schedule': 'cosine'}. Best is trial 598 with value: 0.49804964861220785.
Accuracy: 0.49804964861220785
Best hyperparameters: {'lr': 0.009980944985851876, 'wd': 0.0002561887904737375, 'warmup': 150, 'gamma': 0.9793712795623954, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1423146531023044, 'schedule': 'constant'}


In [19]:
print("Accuracy: {}".format(trial.value))
print("Best hyperparameters: {}".format(trial.params))

Accuracy: 0.49804964861220785
Best hyperparameters: {'lr': 0.009980944985851876, 'wd': 0.0002561887904737375, 'warmup': 150, 'gamma': 0.9793712795623954, 'time_dim': 16, 'patch_size': 32, 'depth': 5, 'heads': 7, 'mlp_dim': 768, 'emb_dropout': 0.1423146531023044, 'schedule': 'constant'}
