In [1]:
import tqdm

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim

import os
import random

import argparse
import json
import pandas as pd
import tqdm
import argparse
import warnings
import joblib

import optuna
from optuna import Trial, visualization
from optuna.samplers import TPESampler

from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold

from src.data import dl_data_load, dl_data_split, dl_data_loader

warnings.filterwarnings(action='ignore')

In [2]:
SEED = 42
def seed_everything(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)

seed_everything(SEED)

In [3]:
# Metrics
def rmse(real: list, predict: list) -> float:
    pred = np.array(predict)
    return np.sqrt(np.mean((real-pred) ** 2))


class RMSELoss(torch.nn.Module):
    def __init__(self):
        super(RMSELoss,self).__init__()
        self.eps = 1e-6

    def forward(self, x, y):
        criterion = nn.MSELoss()
        loss = torch.sqrt(criterion(x, y)+self.eps)
        return loss

In [4]:
import numpy as np
import torch
import torch.nn as nn


# factorization을 통해 얻은 feature를 embedding 합니다.
class FeaturesEmbedding(nn.Module):
    def __init__(self, field_dims: np.ndarray, embed_dim: int):
        super().__init__()
        self.embedding = torch.nn.Embedding(sum(field_dims), embed_dim)
        self.offsets = np.array((0, *np.cumsum(field_dims)[:-1]), dtype=np.int32)
        torch.nn.init.xavier_uniform_(self.embedding.weight.data)


    def forward(self, x: torch.Tensor):
        x = x + x.new_tensor(self.offsets).unsqueeze(0)
        return self.embedding(x)


# cross product transformation을 구현합니다.
class CrossNetwork(nn.Module):
    def __init__(self, input_dim: int, num_layers: int):
        super().__init__()
        self.num_layers = num_layers
        self.w = torch.nn.ModuleList([
            torch.nn.Linear(input_dim, 1, bias=False) for _ in range(num_layers)
        ])
        self.b = torch.nn.ParameterList([
            torch.nn.Parameter(torch.zeros((input_dim,))) for _ in range(num_layers)
        ])


    def forward(self, x: torch.Tensor):
        x0 = x
        for i in range(self.num_layers):
            xw = self.w[i](x)
            x = x0 * xw + self.b[i] + x
        return x


# NCF 모델은 MLP와 GMF를 합하여 최종 결과를 도출합니다.
# MLP을 구현합니다.
class MultiLayerPerceptron(nn.Module):
    def __init__(self, input_dim, embed_dims, dropout, output_layer=True):
        super().__init__()
        layers = list()
        for embed_dim in embed_dims:
            layers.append(torch.nn.Linear(input_dim, embed_dim))
            layers.append(torch.nn.BatchNorm1d(embed_dim))
            layers.append(torch.nn.ReLU())
            layers.append(torch.nn.Dropout(p=dropout))
            input_dim = embed_dim
        if output_layer:
            layers.append(torch.nn.Linear(input_dim, 1))
        self.mlp = torch.nn.Sequential(*layers)


    def forward(self, x):
        return self.mlp(x)


# Crossnetwork 결과를 MLP layer에 넣어 최종결과를 도출합니다.
class _DeepCrossNetworkModel(nn.Module):
    def __init__(self, args, data):
        super().__init__()
        self.field_dims = data['field_dims']
        self.embedding = FeaturesEmbedding(self.field_dims, args.embed_dim)
        self.embed_output_dim = len(self.field_dims) * args.embed_dim
        self.cn = CrossNetwork(self.embed_output_dim, args.num_layers)
        self.mlp = MultiLayerPerceptron(self.embed_output_dim, args.mlp_dims, args.dropout, output_layer=False)
        self.cd_linear = nn.Linear(args.mlp_dims[0], 1, bias=False)


    def forward(self, x: torch.Tensor):
        embed_x = self.embedding(x).view(-1, self.embed_output_dim)
        x_l1 = self.cn(embed_x)
        x_out = self.mlp(x_l1)
        p = self.cd_linear(x_out)
        return p.squeeze(1)

In [5]:
class DeepCrossNetworkModel:

    def __init__(self, args, data):
        super().__init__()

        self.criterion = RMSELoss()

        self.train_dataloader = data['train_dataloader']
        self.valid_dataloader = data['valid_dataloader']
        self.field_dims = data['field_dims']

        self.embed_dim = args.embed_dim
        self.epochs = args.epochs
        self.learning_rate = args.lr
        self.weight_decay = args.weight_decay
        self.log_interval = 100

        self.device = args.device

        self.mlp_dims = args.mlp_dims
        self.dropout = args.dropout
        self.num_layers = args.num_layers

        self.model = _DeepCrossNetworkModel(args,data).to(self.device)
        self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr=self.learning_rate, amsgrad=True, weight_decay=self.weight_decay)


    def train(self):
      # model: type, optimizer: torch.optim, train_dataloader: DataLoader, criterion: torch.nn, device: str, log_interval: int=100
        for epoch in range(self.epochs):
            self.model.train()
            total_loss = 0
            tk0 = tqdm.tqdm(self.train_dataloader, smoothing=0, mininterval=1.0)
            for i, (fields, target) in enumerate(tk0):
                fields, target = fields.to(self.device), target.to(self.device)
                y = self.model(fields)
                loss = self.criterion(y, target.float())
                self.model.zero_grad()
                loss.backward()
                self.optimizer.step()
                total_loss += loss.item()
                if (i + 1) % self.log_interval == 0:
                    tk0.set_postfix(loss=total_loss / self.log_interval)
                    total_loss = 0

            rmse_score = self.predict_train()
            print('epoch:', epoch, 'validation: rmse:', rmse_score)


    def predict_train(self):
        self.model.eval()
        targets, predicts = list(), list()
        with torch.no_grad():
            for fields, target in tqdm.tqdm(self.valid_dataloader, smoothing=0, mininterval=1.0):
                fields, target = fields.to(self.device), target.to(self.device)
                y = self.model(fields)
                targets.extend(target.tolist())
                predicts.extend(y.tolist())
        return rmse(targets, predicts)

    def predict(self, dataloader):
        self.model.eval()
        predicts = list()
        with torch.no_grad():
            for fields in tqdm.tqdm(dataloader, smoothing=0, mininterval=1.0):
                fields = fields[0].to(self.device)
                y = self.model(fields)
                predicts.extend(y.tolist())
        return predicts
    

In [6]:
import argparse
# default args
args = argparse.Namespace(
  seed=42,
  data_path = '/opt/ml/data/',
    batch_size = 1024,
    data_shuffle = True,
    test_size = 0.2,
    epochs = 5,
    device = 'cuda',
    lr = 0.001,
    weight_decay = 0.0001,
    embed_dim = 16,
    mlp_dims = [16,16],
    dropout = 0.2,
    num_layers = 3
)

In [7]:
dl_dataset = dl_data_load(args)

In [8]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
folds = []
for train_idx, valid_idx in skf.split(dl_dataset['train'].drop(['rating'], axis=1), dl_dataset['train']['rating']):
    folds.append((train_idx, valid_idx))

In [9]:
def objective(trial):
    # trial 객체를 이용해 하이퍼파라미터 탐색 공간 설정
    args.batch_size = trial.suggest_categorical('batch_size', [128, 256, 512, 1024])
    args.lr = trial.suggest_loguniform('lr',0.001,0.01)
    args.weight_decay = trial.suggest_loguniform('weight_decay', 1e-8, 1e-3)
    args.embed_dim = trial.suggest_int('embed_dim', 1, 16) # 16이 default
    mlp_dim_layers = trial.suggest_int('mlp_dim_layers',1,4)
    # args.mlp_dims = [trial.suggest_int('mlp_dims',1,20)] * mlp_dim_layers
    args.dropout = trial.suggest_categorical("dropout",[0.2,0.25,0.3])
    dl_data = dl_data_split(args,dl_dataset)
    dl_data = dl_data_loader(args,dl_data)
    model = DeepCrossNetworkModel(args, dl_data)
    model.train()
    loss = model.predict_train()
    return loss

In [10]:
for fold in range(0,5):
    print(f'===================================={fold+1}============================================')
    train_idx, valid_idx = folds[fold]
    X_train = dl_dataset['train'].drop(['rating'],axis = 1).iloc[train_idx]
    X_valid = dl_dataset['train'].drop(['rating'],axis = 1).iloc[valid_idx]
    y_train = dl_dataset['train']['rating'].iloc[train_idx]
    y_valid = dl_dataset['train']['rating'].iloc[valid_idx]

    sampler = optuna.samplers.TPESampler(SEED)
    study = optuna.create_study(
        study_name = 'DCN_parameter_opt',
        direction = 'minimize',
        sampler = sampler,
    )
    study.optimize(objective, n_trials=5)    
    args.__dict__.update(study.best_params)
    
    dl_dataset['X_train'], dl_dataset['X_valid'], dl_dataset['y_train'], dl_dataset['y_valid'] = X_train, X_valid, y_train, y_valid
    dl_data = dl_data_loader(args,dl_dataset)
    
    model = DeepCrossNetworkModel(args, dl_data)
    model.train()
    
    pred = model.predict(dl_data['test_dataloader'])
    dl_dataset[f'pred_{fold}'] = pred
    print(f'================================================================================\n\n')

[32m[I 2023-04-18 10:44:08,704][0m A new study created in memory with name: DCN_parameter_opt[0m




100%|██████████| 959/959 [00:08<00:00, 119.71it/s, loss=2.59]
100%|██████████| 240/240 [00:00<00:00, 339.72it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.3827694707320184


100%|██████████| 959/959 [00:07<00:00, 127.58it/s, loss=2.43]
100%|██████████| 240/240 [00:00<00:00, 339.03it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.3273959715615784


100%|██████████| 959/959 [00:07<00:00, 128.98it/s, loss=2.22]
100%|██████████| 240/240 [00:00<00:00, 337.91it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.3456933704415497


100%|██████████| 959/959 [00:07<00:00, 129.56it/s, loss=2.11]
100%|██████████| 240/240 [00:00<00:00, 328.81it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.3544210108062766


100%|██████████| 959/959 [00:07<00:00, 128.81it/s, loss=2.01]
100%|██████████| 240/240 [00:00<00:00, 338.49it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.3521835454349813


100%|██████████| 240/240 [00:00<00:00, 288.67it/s]
[32m[I 2023-04-18 10:44:53,804][0m Trial 0 finished with value: 2.352183545434981 and parameters: {'batch_size': 256, 'lr': 0.004901743046370156, 'weight_decay': 8.073942772357392e-07, 'embed_dim': 12, 'mlp_dim_layers': 4, 'dropout': 0.3}. Best is trial 0 with value: 2.352183545434981.[0m
100%|██████████| 480/480 [00:04<00:00, 104.68it/s, loss=2.66]
100%|██████████| 120/120 [00:00<00:00, 205.39it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.3680995441218746


100%|██████████| 480/480 [00:04<00:00, 104.29it/s, loss=2.51]
100%|██████████| 120/120 [00:00<00:00, 205.49it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.4188236995423695


100%|██████████| 480/480 [00:04<00:00, 99.02it/s, loss=2.36] 
100%|██████████| 120/120 [00:00<00:00, 206.70it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.4546745417877815


100%|██████████| 480/480 [00:04<00:00, 104.04it/s, loss=2.21]
100%|██████████| 120/120 [00:00<00:00, 169.64it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.4199535229664124


100%|██████████| 480/480 [00:04<00:00, 103.02it/s, loss=2.11]
100%|██████████| 120/120 [00:00<00:00, 200.67it/s]
  0%|          | 0/120 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.3812817545890663


100%|██████████| 120/120 [00:00<00:00, 204.53it/s]
[32m[I 2023-04-18 10:45:20,982][0m Trial 1 finished with value: 2.381281754589066 and parameters: {'batch_size': 512, 'lr': 0.0032689182746475456, 'weight_decay': 0.00018542241644339892, 'embed_dim': 6, 'mlp_dim_layers': 1, 'dropout': 0.25}. Best is trial 0 with value: 2.352183545434981.[0m
100%|██████████| 959/959 [00:07<00:00, 132.84it/s, loss=2.53]
100%|██████████| 240/240 [00:00<00:00, 342.71it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.2920005049958467


100%|██████████| 959/959 [00:07<00:00, 130.12it/s, loss=2.32]
100%|██████████| 240/240 [00:00<00:00, 287.13it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.248819518677856


100%|██████████| 959/959 [00:07<00:00, 131.48it/s, loss=2.11]
100%|██████████| 240/240 [00:00<00:00, 287.86it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.3073949999949797


100%|██████████| 959/959 [00:07<00:00, 129.65it/s, loss=1.97]
100%|██████████| 240/240 [00:00<00:00, 335.04it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.3122339235863976


100%|██████████| 959/959 [00:07<00:00, 129.08it/s, loss=1.93]
100%|██████████| 240/240 [00:00<00:00, 339.89it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.307590087496725


100%|██████████| 240/240 [00:00<00:00, 342.81it/s]
[32m[I 2023-04-18 10:46:02,422][0m Trial 2 finished with value: 2.3075900874967243 and parameters: {'batch_size': 256, 'lr': 0.008204970025471013, 'weight_decay': 1.0874529663254856e-06, 'embed_dim': 10, 'mlp_dim_layers': 4, 'dropout': 0.25}. Best is trial 2 with value: 2.3075900874967243.[0m
100%|██████████| 959/959 [00:07<00:00, 128.18it/s, loss=2.72]
100%|██████████| 240/240 [00:00<00:00, 339.10it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.4463647579754424


100%|██████████| 959/959 [00:07<00:00, 126.80it/s, loss=2.48]
100%|██████████| 240/240 [00:00<00:00, 338.92it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.3668380093559223


100%|██████████| 959/959 [00:07<00:00, 128.86it/s, loss=2.3] 
100%|██████████| 240/240 [00:00<00:00, 330.38it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.4466144419874722


100%|██████████| 959/959 [00:07<00:00, 127.97it/s, loss=2.17]
100%|██████████| 240/240 [00:00<00:00, 290.03it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.448055334544037


100%|██████████| 959/959 [00:07<00:00, 130.08it/s, loss=2.09]
100%|██████████| 240/240 [00:00<00:00, 288.28it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.49070666066147


100%|██████████| 240/240 [00:00<00:00, 339.53it/s]
[32m[I 2023-04-18 10:46:44,520][0m Trial 3 finished with value: 2.4907066606614703 and parameters: {'batch_size': 256, 'lr': 0.0011735264894528277, 'weight_decay': 2.68963746983304e-08, 'embed_dim': 11, 'mlp_dim_layers': 4, 'dropout': 0.3}. Best is trial 2 with value: 2.3075900874967243.[0m
100%|██████████| 480/480 [00:04<00:00, 101.78it/s, loss=2.86]
100%|██████████| 120/120 [00:00<00:00, 167.76it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.7020191428451024


100%|██████████| 480/480 [00:04<00:00, 101.22it/s, loss=2.57]
100%|██████████| 120/120 [00:00<00:00, 206.00it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.578077288328174


100%|██████████| 480/480 [00:04<00:00, 99.67it/s, loss=2.35] 
100%|██████████| 120/120 [00:00<00:00, 204.64it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.4935454968305586


100%|██████████| 480/480 [00:04<00:00, 101.95it/s, loss=2.2] 
100%|██████████| 120/120 [00:00<00:00, 205.01it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.460468226451979


100%|██████████| 480/480 [00:04<00:00, 99.29it/s, loss=2.1] 
100%|██████████| 120/120 [00:00<00:00, 205.55it/s]
  0%|          | 0/120 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.4648167809154926


100%|██████████| 120/120 [00:00<00:00, 207.23it/s]
[32m[I 2023-04-18 10:47:12,234][0m Trial 4 finished with value: 2.4648167809154926 and parameters: {'batch_size': 512, 'lr': 0.0014181427219100778, 'weight_decay': 3.10505650951897e-07, 'embed_dim': 15, 'mlp_dim_layers': 2, 'dropout': 0.3}. Best is trial 2 with value: 2.3075900874967243.[0m
100%|██████████| 959/959 [00:07<00:00, 129.89it/s, loss=2.54]
100%|██████████| 240/240 [00:00<00:00, 337.33it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.262760938165383


100%|██████████| 959/959 [00:07<00:00, 128.39it/s, loss=2.35]
100%|██████████| 240/240 [00:00<00:00, 339.06it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.2482941778892753


100%|██████████| 959/959 [00:07<00:00, 129.51it/s, loss=2.13]
100%|██████████| 240/240 [00:00<00:00, 340.55it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.2883071018379266


100%|██████████| 959/959 [00:07<00:00, 130.72it/s, loss=2.01]
100%|██████████| 240/240 [00:00<00:00, 287.49it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.295540901439252


100%|██████████| 959/959 [00:07<00:00, 131.39it/s, loss=1.93]
100%|██████████| 240/240 [00:00<00:00, 290.16it/s]
  0%|          | 0/300 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.3396660015430033


100%|██████████| 300/300 [00:00<00:00, 485.09it/s]
[32m[I 2023-04-18 10:47:53,713][0m A new study created in memory with name: DCN_parameter_opt[0m
  0%|          | 0/1918 [00:00<?, ?it/s]





100%|██████████| 1918/1918 [00:12<00:00, 154.09it/s, loss=2.41]
100%|██████████| 480/480 [00:00<00:00, 516.77it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.257382515896743


100%|██████████| 1918/1918 [00:12<00:00, 153.80it/s, loss=2.38]
100%|██████████| 480/480 [00:00<00:00, 513.19it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.225663910417007


100%|██████████| 1918/1918 [00:12<00:00, 152.61it/s, loss=2.33]
100%|██████████| 480/480 [00:00<00:00, 516.99it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.191639204687936


100%|██████████| 1918/1918 [00:12<00:00, 154.27it/s, loss=2.3] 
100%|██████████| 480/480 [00:00<00:00, 519.66it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.193500352180839


100%|██████████| 1918/1918 [00:12<00:00, 152.29it/s, loss=2.25]
100%|██████████| 480/480 [00:00<00:00, 517.96it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.2034739692405765


100%|██████████| 480/480 [00:00<00:00, 520.09it/s]
[32m[I 2023-04-18 10:49:02,003][0m Trial 0 finished with value: 2.2034739692405765 and parameters: {'batch_size': 128, 'lr': 0.0036871457801145196, 'weight_decay': 0.0006398968602270167, 'embed_dim': 4, 'mlp_dim_layers': 1, 'dropout': 0.2}. Best is trial 0 with value: 2.2034739692405765.[0m
100%|██████████| 240/240 [00:03<00:00, 64.42it/s, loss=5.59]
100%|██████████| 60/60 [00:00<00:00, 91.93it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 3.5742228160665594


100%|██████████| 240/240 [00:03<00:00, 65.35it/s, loss=2.87]
100%|██████████| 60/60 [00:00<00:00, 91.33it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.3866926262964467


100%|██████████| 240/240 [00:03<00:00, 65.01it/s, loss=2.63]
100%|██████████| 60/60 [00:00<00:00, 90.65it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.411709416114082


100%|██████████| 240/240 [00:03<00:00, 64.90it/s, loss=2.46]
100%|██████████| 60/60 [00:00<00:00, 91.85it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.49312249960285


100%|██████████| 240/240 [00:03<00:00, 65.39it/s, loss=2.32]
100%|██████████| 60/60 [00:00<00:00, 93.00it/s]
  0%|          | 0/60 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.457138732293868


100%|██████████| 60/60 [00:00<00:00, 114.07it/s]
[32m[I 2023-04-18 10:49:24,506][0m Trial 1 finished with value: 2.457138732293868 and parameters: {'batch_size': 1024, 'lr': 0.0018383727086108104, 'weight_decay': 0.0001946075629055048, 'embed_dim': 11, 'mlp_dim_layers': 2, 'dropout': 0.25}. Best is trial 0 with value: 2.2034739692405765.[0m
100%|██████████| 240/240 [00:03<00:00, 63.22it/s, loss=2.6] 
100%|██████████| 60/60 [00:00<00:00, 113.01it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.2632192646813896


100%|██████████| 240/240 [00:03<00:00, 65.37it/s, loss=2.29]
100%|██████████| 60/60 [00:00<00:00, 93.05it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.2713964078789513


100%|██████████| 240/240 [00:03<00:00, 64.79it/s, loss=2.05]
100%|██████████| 60/60 [00:00<00:00, 91.67it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.313632108089927


100%|██████████| 240/240 [00:03<00:00, 63.77it/s, loss=1.89]
100%|██████████| 60/60 [00:00<00:00, 91.57it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.3199680470109407


100%|██████████| 240/240 [00:03<00:00, 64.88it/s, loss=1.79]
100%|██████████| 60/60 [00:00<00:00, 91.91it/s]
  0%|          | 0/60 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.344585489104775


100%|██████████| 60/60 [00:00<00:00, 92.58it/s]
[32m[I 2023-04-18 10:49:47,192][0m Trial 2 finished with value: 2.344585489104775 and parameters: {'batch_size': 1024, 'lr': 0.009999751184119594, 'weight_decay': 7.59525244383508e-08, 'embed_dim': 16, 'mlp_dim_layers': 4, 'dropout': 0.2}. Best is trial 0 with value: 2.2034739692405765.[0m
100%|██████████| 1918/1918 [00:12<00:00, 153.35it/s, loss=2.47]
100%|██████████| 480/480 [00:00<00:00, 516.46it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.2516259352035513


100%|██████████| 1918/1918 [00:12<00:00, 150.82it/s, loss=2.26]
100%|██████████| 480/480 [00:00<00:00, 519.56it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.203128383225817


100%|██████████| 1918/1918 [00:12<00:00, 151.16it/s, loss=2.08]
100%|██████████| 480/480 [00:00<00:00, 519.63it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.258963615515835


100%|██████████| 1918/1918 [00:12<00:00, 150.32it/s, loss=1.92]
100%|██████████| 480/480 [00:00<00:00, 517.28it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.3030445829903137


100%|██████████| 1918/1918 [00:12<00:00, 151.14it/s, loss=1.86]
100%|██████████| 480/480 [00:00<00:00, 511.57it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.335959760926378


100%|██████████| 480/480 [00:00<00:00, 513.96it/s]
[32m[I 2023-04-18 10:50:56,393][0m Trial 3 finished with value: 2.3359597609497746 and parameters: {'batch_size': 128, 'lr': 0.005554051815566247, 'weight_decay': 6.432639297276917e-08, 'embed_dim': 16, 'mlp_dim_layers': 1, 'dropout': 0.25}. Best is trial 0 with value: 2.2034739692405765.[0m
100%|██████████| 1918/1918 [00:12<00:00, 152.01it/s, loss=2.6] 
100%|██████████| 480/480 [00:00<00:00, 517.13it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.340022684081587


100%|██████████| 1918/1918 [00:12<00:00, 151.60it/s, loss=2.52]
100%|██████████| 480/480 [00:00<00:00, 516.55it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.3269943477227204


100%|██████████| 1918/1918 [00:12<00:00, 150.43it/s, loss=2.43]
100%|██████████| 480/480 [00:00<00:00, 515.88it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.2844168662415667


100%|██████████| 1918/1918 [00:12<00:00, 151.77it/s, loss=2.34]
100%|██████████| 480/480 [00:00<00:00, 510.84it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.322064373261854


100%|██████████| 1918/1918 [00:12<00:00, 151.38it/s, loss=2.24]
100%|██████████| 480/480 [00:00<00:00, 515.74it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.3176492425595785


100%|██████████| 480/480 [00:00<00:00, 513.90it/s]
[32m[I 2023-04-18 10:52:05,559][0m Trial 4 finished with value: 2.3176492426443485 and parameters: {'batch_size': 128, 'lr': 0.0027844636042886222, 'weight_decay': 9.858666254513638e-05, 'embed_dim': 13, 'mlp_dim_layers': 2, 'dropout': 0.3}. Best is trial 0 with value: 2.2034739692405765.[0m
100%|██████████| 1918/1918 [00:12<00:00, 155.88it/s, loss=2.47]
100%|██████████| 480/480 [00:00<00:00, 519.93it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.2744501397215218


100%|██████████| 1918/1918 [00:12<00:00, 154.59it/s, loss=2.39]
100%|██████████| 480/480 [00:00<00:00, 514.29it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.223483311684985


100%|██████████| 1918/1918 [00:12<00:00, 153.80it/s, loss=2.36]
100%|██████████| 480/480 [00:00<00:00, 504.66it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.1838107307613233


100%|██████████| 1918/1918 [00:12<00:00, 156.00it/s, loss=2.31]
100%|██████████| 480/480 [00:00<00:00, 519.97it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.1872639463391677


100%|██████████| 1918/1918 [00:12<00:00, 154.58it/s, loss=2.27]
100%|██████████| 480/480 [00:00<00:00, 508.83it/s]
  0%|          | 0/600 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.216300555164033


100%|██████████| 600/600 [00:00<00:00, 682.28it/s]
[32m[I 2023-04-18 10:53:13,142][0m A new study created in memory with name: DCN_parameter_opt[0m
  0%|          | 0/240 [00:00<?, ?it/s]





100%|██████████| 240/240 [00:03<00:00, 66.89it/s, loss=3.07]
100%|██████████| 60/60 [00:00<00:00, 91.23it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.2631969754328427


100%|██████████| 240/240 [00:03<00:00, 66.89it/s, loss=2.34]
100%|██████████| 60/60 [00:00<00:00, 93.09it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.2431626630964945


100%|██████████| 240/240 [00:03<00:00, 67.49it/s, loss=2.1] 
100%|██████████| 60/60 [00:00<00:00, 93.53it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.295642251093401


100%|██████████| 240/240 [00:03<00:00, 67.53it/s, loss=1.94]
100%|██████████| 60/60 [00:00<00:00, 93.21it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.3159973282198743


100%|██████████| 240/240 [00:03<00:00, 66.22it/s, loss=1.85]
100%|██████████| 60/60 [00:00<00:00, 92.66it/s]
  0%|          | 0/60 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.3494263634577592


100%|██████████| 60/60 [00:00<00:00, 114.02it/s]
[32m[I 2023-04-18 10:53:35,008][0m Trial 0 finished with value: 2.3494263634577592 and parameters: {'batch_size': 1024, 'lr': 0.002675083966802814, 'weight_decay': 1.0339272180547674e-05, 'embed_dim': 4, 'mlp_dim_layers': 1, 'dropout': 0.2}. Best is trial 0 with value: 2.3494263634577592.[0m
100%|██████████| 959/959 [00:07<00:00, 129.65it/s, loss=2.47]
100%|██████████| 240/240 [00:00<00:00, 338.07it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.2198766267103185


100%|██████████| 959/959 [00:07<00:00, 129.59it/s, loss=2.34]
100%|██████████| 240/240 [00:00<00:00, 285.77it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.217508413415036


100%|██████████| 959/959 [00:07<00:00, 133.21it/s, loss=2.2] 
100%|██████████| 240/240 [00:00<00:00, 288.85it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.234969956572764


100%|██████████| 959/959 [00:07<00:00, 129.72it/s, loss=2.07]
100%|██████████| 240/240 [00:00<00:00, 337.22it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.2687254502288594


100%|██████████| 959/959 [00:07<00:00, 130.93it/s, loss=1.97]
100%|██████████| 240/240 [00:00<00:00, 337.89it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.2899825157830187


100%|██████████| 240/240 [00:00<00:00, 340.17it/s]
[32m[I 2023-04-18 10:54:16,440][0m Trial 1 finished with value: 2.2899825157830187 and parameters: {'batch_size': 256, 'lr': 0.0037501367173630703, 'weight_decay': 9.200661065466652e-05, 'embed_dim': 7, 'mlp_dim_layers': 3, 'dropout': 0.2}. Best is trial 1 with value: 2.2899825157830187.[0m
100%|██████████| 959/959 [00:07<00:00, 126.95it/s, loss=2.51]
100%|██████████| 240/240 [00:00<00:00, 336.49it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.2513844698906937


100%|██████████| 959/959 [00:07<00:00, 127.10it/s, loss=2.4] 
100%|██████████| 240/240 [00:00<00:00, 331.18it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.2552272415705956


100%|██████████| 959/959 [00:07<00:00, 130.29it/s, loss=2.25]
100%|██████████| 240/240 [00:00<00:00, 330.15it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.2766267679588053


100%|██████████| 959/959 [00:07<00:00, 126.72it/s, loss=2.1] 
100%|██████████| 240/240 [00:00<00:00, 282.29it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.309366344110383


100%|██████████| 959/959 [00:07<00:00, 133.28it/s, loss=2.01]
100%|██████████| 240/240 [00:00<00:00, 285.49it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.336056100162665


100%|██████████| 240/240 [00:00<00:00, 339.65it/s]
[32m[I 2023-04-18 10:54:58,473][0m Trial 2 finished with value: 2.336056100162665 and parameters: {'batch_size': 256, 'lr': 0.0013541347247122925, 'weight_decay': 0.0003527465942552554, 'embed_dim': 13, 'mlp_dim_layers': 1, 'dropout': 0.2}. Best is trial 1 with value: 2.2899825157830187.[0m
100%|██████████| 240/240 [00:03<00:00, 66.05it/s, loss=3.47]
100%|██████████| 60/60 [00:00<00:00, 91.96it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.319043932429562


100%|██████████| 240/240 [00:03<00:00, 66.57it/s, loss=2.41]
100%|██████████| 60/60 [00:00<00:00, 92.08it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.3988958462800007


100%|██████████| 240/240 [00:03<00:00, 66.21it/s, loss=2.16]
100%|██████████| 60/60 [00:00<00:00, 91.33it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.340244565202598


100%|██████████| 240/240 [00:03<00:00, 66.91it/s, loss=1.99]
100%|██████████| 60/60 [00:00<00:00, 91.50it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.3452053869675633


100%|██████████| 240/240 [00:03<00:00, 65.26it/s, loss=1.89]
100%|██████████| 60/60 [00:00<00:00, 114.41it/s]
  0%|          | 0/60 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.395622502211221


100%|██████████| 60/60 [00:00<00:00, 93.04it/s]
[32m[I 2023-04-18 10:55:20,610][0m Trial 3 finished with value: 2.395622502211221 and parameters: {'batch_size': 1024, 'lr': 0.0029006008885749442, 'weight_decay': 1.8432082743171692e-07, 'embed_dim': 9, 'mlp_dim_layers': 2, 'dropout': 0.2}. Best is trial 1 with value: 2.2899825157830187.[0m
100%|██████████| 240/240 [00:03<00:00, 67.00it/s, loss=4.93]
100%|██████████| 60/60 [00:00<00:00, 92.03it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 5.666247004565155


100%|██████████| 240/240 [00:03<00:00, 66.98it/s, loss=2.64]
100%|██████████| 60/60 [00:00<00:00, 90.93it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.2524714673162345


100%|██████████| 240/240 [00:03<00:00, 66.48it/s, loss=2.34]
100%|██████████| 60/60 [00:00<00:00, 91.45it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.3136916164829646


100%|██████████| 240/240 [00:03<00:00, 67.52it/s, loss=2.15]
100%|██████████| 60/60 [00:00<00:00, 91.72it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.4727440856117675


100%|██████████| 240/240 [00:03<00:00, 66.61it/s, loss=2.04]
100%|██████████| 60/60 [00:00<00:00, 91.29it/s]
  0%|          | 0/60 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.436183830295009


100%|██████████| 60/60 [00:00<00:00, 92.28it/s]
[32m[I 2023-04-18 10:55:42,681][0m Trial 4 finished with value: 2.4361838302950085 and parameters: {'batch_size': 1024, 'lr': 0.0015246629464563048, 'weight_decay': 1.3320447489638006e-07, 'embed_dim': 7, 'mlp_dim_layers': 4, 'dropout': 0.25}. Best is trial 1 with value: 2.2899825157830187.[0m
100%|██████████| 959/959 [00:07<00:00, 130.49it/s, loss=2.48]
100%|██████████| 240/240 [00:00<00:00, 338.26it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.2634996546426818


100%|██████████| 959/959 [00:07<00:00, 127.72it/s, loss=2.37]
100%|██████████| 240/240 [00:00<00:00, 339.54it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.2625864283180244


100%|██████████| 959/959 [00:07<00:00, 130.67it/s, loss=2.21]
100%|██████████| 240/240 [00:00<00:00, 338.49it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.288520369676969


100%|██████████| 959/959 [00:07<00:00, 127.87it/s, loss=2.06]
100%|██████████| 240/240 [00:00<00:00, 337.99it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.2940976396873896


100%|██████████| 959/959 [00:07<00:00, 128.75it/s, loss=1.97]
100%|██████████| 240/240 [00:00<00:00, 326.25it/s]
  0%|          | 0/300 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.3059874510725216


100%|██████████| 300/300 [00:00<00:00, 483.24it/s]
[32m[I 2023-04-18 10:56:24,178][0m A new study created in memory with name: DCN_parameter_opt[0m






100%|██████████| 1918/1918 [00:12<00:00, 155.03it/s, loss=2.55]
100%|██████████| 480/480 [00:00<00:00, 523.68it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.2442125822116576


100%|██████████| 1918/1918 [00:12<00:00, 156.78it/s, loss=2.39]
100%|██████████| 480/480 [00:00<00:00, 517.04it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.241902222183969


100%|██████████| 1918/1918 [00:12<00:00, 155.09it/s, loss=2.17]
100%|██████████| 480/480 [00:00<00:00, 518.48it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.242519030454607


100%|██████████| 1918/1918 [00:12<00:00, 157.18it/s, loss=2.04]
100%|██████████| 480/480 [00:00<00:00, 521.05it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.264687030391026


100%|██████████| 1918/1918 [00:12<00:00, 157.24it/s, loss=1.97]
100%|██████████| 480/480 [00:00<00:00, 523.16it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.297205976881725


100%|██████████| 480/480 [00:00<00:00, 529.82it/s]
[32m[I 2023-04-18 10:57:31,310][0m Trial 0 finished with value: 2.2972059768535407 and parameters: {'batch_size': 128, 'lr': 0.008452670941412195, 'weight_decay': 6.521627654207918e-07, 'embed_dim': 9, 'mlp_dim_layers': 4, 'dropout': 0.3}. Best is trial 0 with value: 2.2972059768535407.[0m
100%|██████████| 480/480 [00:04<00:00, 100.40it/s, loss=2.61]
100%|██████████| 120/120 [00:00<00:00, 204.71it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.3580106789797455


100%|██████████| 480/480 [00:04<00:00, 101.78it/s, loss=2.41]
100%|██████████| 120/120 [00:00<00:00, 205.30it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.354790696230072


100%|██████████| 480/480 [00:04<00:00, 97.63it/s, loss=2.22]
100%|██████████| 120/120 [00:00<00:00, 205.38it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.3921478674312757


100%|██████████| 480/480 [00:04<00:00, 102.18it/s, loss=2.05]
100%|██████████| 120/120 [00:00<00:00, 166.64it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.353462990368224


100%|██████████| 480/480 [00:04<00:00, 104.08it/s, loss=1.97]
100%|██████████| 120/120 [00:00<00:00, 205.16it/s]
  0%|          | 0/120 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.3503171282318895


100%|██████████| 120/120 [00:00<00:00, 207.79it/s]
[32m[I 2023-04-18 10:57:58,857][0m Trial 1 finished with value: 2.350317128231889 and parameters: {'batch_size': 512, 'lr': 0.008030621710011598, 'weight_decay': 1.6699752840727248e-07, 'embed_dim': 2, 'mlp_dim_layers': 2, 'dropout': 0.25}. Best is trial 0 with value: 2.2972059768535407.[0m
100%|██████████| 240/240 [00:03<00:00, 64.11it/s, loss=3.73]
100%|██████████| 60/60 [00:00<00:00, 114.32it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 4.547776300249919


100%|██████████| 240/240 [00:03<00:00, 64.01it/s, loss=2.68]
100%|██████████| 60/60 [00:00<00:00, 113.02it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.7807296754927364


100%|██████████| 240/240 [00:03<00:00, 64.08it/s, loss=2.5] 
100%|██████████| 60/60 [00:00<00:00, 114.00it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 3.105392352502984


100%|██████████| 240/240 [00:03<00:00, 66.90it/s, loss=2.35]
100%|██████████| 60/60 [00:00<00:00, 92.95it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 4.710155716740246


100%|██████████| 240/240 [00:03<00:00, 66.89it/s, loss=2.24]
100%|██████████| 60/60 [00:00<00:00, 92.82it/s]
  0%|          | 0/60 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.703732084143652


100%|██████████| 60/60 [00:00<00:00, 92.17it/s]
[32m[I 2023-04-18 10:58:20,971][0m Trial 2 finished with value: 2.703732084143652 and parameters: {'batch_size': 1024, 'lr': 0.0022916248146004395, 'weight_decay': 1.6107072753021038e-08, 'embed_dim': 1, 'mlp_dim_layers': 2, 'dropout': 0.3}. Best is trial 0 with value: 2.2972059768535407.[0m
100%|██████████| 959/959 [00:07<00:00, 126.05it/s, loss=2.69]
100%|██████████| 240/240 [00:00<00:00, 338.12it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.62915814623968


100%|██████████| 959/959 [00:07<00:00, 127.60it/s, loss=2.52]
100%|██████████| 240/240 [00:00<00:00, 339.42it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.5337707695124103


100%|██████████| 959/959 [00:07<00:00, 129.19it/s, loss=2.31]
100%|██████████| 240/240 [00:00<00:00, 341.20it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.5360739197662414


100%|██████████| 959/959 [00:07<00:00, 127.74it/s, loss=2.19]
100%|██████████| 240/240 [00:00<00:00, 286.40it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.5583774032314426


100%|██████████| 959/959 [00:07<00:00, 130.13it/s, loss=2.1] 
100%|██████████| 240/240 [00:00<00:00, 287.37it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.5408293073047727


100%|██████████| 240/240 [00:00<00:00, 329.01it/s]
[32m[I 2023-04-18 10:59:03,176][0m Trial 3 finished with value: 2.5408293073047723 and parameters: {'batch_size': 256, 'lr': 0.0014508622717403171, 'weight_decay': 2.6035100227449678e-08, 'embed_dim': 15, 'mlp_dim_layers': 1, 'dropout': 0.3}. Best is trial 0 with value: 2.2972059768535407.[0m
100%|██████████| 1918/1918 [00:12<00:00, 152.42it/s, loss=2.58]
100%|██████████| 480/480 [00:00<00:00, 521.82it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.3716537187318876


100%|██████████| 1918/1918 [00:12<00:00, 154.78it/s, loss=2.4] 
100%|██████████| 480/480 [00:00<00:00, 524.61it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.3148732334185986


100%|██████████| 1918/1918 [00:12<00:00, 154.00it/s, loss=2.24]
100%|██████████| 480/480 [00:00<00:00, 520.96it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.30142448038075


100%|██████████| 1918/1918 [00:12<00:00, 156.91it/s, loss=2.18]
100%|██████████| 480/480 [00:00<00:00, 529.84it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.3418523264087807


100%|██████████| 1918/1918 [00:12<00:00, 157.98it/s, loss=2.08]
100%|██████████| 480/480 [00:00<00:00, 524.78it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.367065535677516


100%|██████████| 480/480 [00:00<00:00, 529.17it/s]
[32m[I 2023-04-18 11:00:10,642][0m Trial 4 finished with value: 2.367065535677516 and parameters: {'batch_size': 128, 'lr': 0.001929327668247665, 'weight_decay': 4.608715593982744e-06, 'embed_dim': 2, 'mlp_dim_layers': 2, 'dropout': 0.25}. Best is trial 0 with value: 2.2972059768535407.[0m
100%|██████████| 1918/1918 [00:12<00:00, 155.18it/s, loss=2.57]
100%|██████████| 480/480 [00:00<00:00, 513.62it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.326694339100461


100%|██████████| 1918/1918 [00:12<00:00, 153.65it/s, loss=2.33]
100%|██████████| 480/480 [00:00<00:00, 515.80it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.215297400316044


100%|██████████| 1918/1918 [00:12<00:00, 156.93it/s, loss=2.13]
100%|██████████| 480/480 [00:00<00:00, 514.36it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.272240940994389


100%|██████████| 1918/1918 [00:12<00:00, 153.97it/s, loss=2.03]
100%|██████████| 480/480 [00:00<00:00, 495.71it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.2771931743118707


100%|██████████| 1918/1918 [00:12<00:00, 158.33it/s, loss=2]   
100%|██████████| 480/480 [00:00<00:00, 519.46it/s]
  0%|          | 0/600 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.324342257049445


100%|██████████| 600/600 [00:00<00:00, 685.43it/s]
[32m[I 2023-04-18 11:01:18,022][0m A new study created in memory with name: DCN_parameter_opt[0m






100%|██████████| 1918/1918 [00:12<00:00, 154.76it/s, loss=2.49]
100%|██████████| 480/480 [00:00<00:00, 516.91it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.2790940292193693


100%|██████████| 1918/1918 [00:12<00:00, 153.67it/s, loss=2.35]
100%|██████████| 480/480 [00:00<00:00, 514.91it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.3064606389829043


100%|██████████| 1918/1918 [00:12<00:00, 154.19it/s, loss=2.14]
100%|██████████| 480/480 [00:00<00:00, 515.15it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.297438382630996


100%|██████████| 1918/1918 [00:12<00:00, 157.73it/s, loss=2.01]
100%|██████████| 480/480 [00:00<00:00, 514.78it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.3182922926247254


100%|██████████| 1918/1918 [00:12<00:00, 153.45it/s, loss=1.92]
100%|██████████| 480/480 [00:00<00:00, 512.80it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.344101574073307


100%|██████████| 480/480 [00:00<00:00, 519.57it/s]
[32m[I 2023-04-18 11:02:25,817][0m Trial 0 finished with value: 2.344101574128369 and parameters: {'batch_size': 128, 'lr': 0.0024347770342839615, 'weight_decay': 7.536148599398126e-08, 'embed_dim': 10, 'mlp_dim_layers': 3, 'dropout': 0.25}. Best is trial 0 with value: 2.344101574128369.[0m
100%|██████████| 1918/1918 [00:12<00:00, 150.29it/s, loss=2.52]
100%|██████████| 480/480 [00:00<00:00, 510.99it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.2610081124996313


100%|██████████| 1918/1918 [00:12<00:00, 152.08it/s, loss=2.31]
100%|██████████| 480/480 [00:00<00:00, 516.15it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.208327970983248


100%|██████████| 1918/1918 [00:12<00:00, 150.73it/s, loss=2.13]
100%|██████████| 480/480 [00:00<00:00, 511.67it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.2286650360808316


100%|██████████| 1918/1918 [00:12<00:00, 151.62it/s, loss=2.02]
100%|██████████| 480/480 [00:00<00:00, 513.93it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.261482543553922


100%|██████████| 1918/1918 [00:12<00:00, 150.59it/s, loss=1.9] 
100%|██████████| 480/480 [00:00<00:00, 512.11it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.307969250076247


100%|██████████| 480/480 [00:00<00:00, 515.79it/s]
[32m[I 2023-04-18 11:03:35,165][0m Trial 1 finished with value: 2.3079692501802187 and parameters: {'batch_size': 128, 'lr': 0.0033534476575600666, 'weight_decay': 1.8135755578972116e-05, 'embed_dim': 14, 'mlp_dim_layers': 2, 'dropout': 0.2}. Best is trial 1 with value: 2.3079692501802187.[0m
100%|██████████| 240/240 [00:03<00:00, 65.00it/s, loss=4.71]
100%|██████████| 60/60 [00:00<00:00, 90.43it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.3596424041798465


100%|██████████| 240/240 [00:03<00:00, 64.76it/s, loss=2.47]
100%|██████████| 60/60 [00:00<00:00, 90.02it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.2633837760628763


100%|██████████| 240/240 [00:03<00:00, 65.11it/s, loss=2.2]
100%|██████████| 60/60 [00:00<00:00, 90.46it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.3487378799257974


100%|██████████| 240/240 [00:03<00:00, 64.65it/s, loss=2.01]
100%|██████████| 60/60 [00:00<00:00, 89.97it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.4777038018464688


100%|██████████| 240/240 [00:03<00:00, 64.56it/s, loss=1.89]
100%|██████████| 60/60 [00:00<00:00, 112.33it/s]
  0%|          | 0/60 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.3451487656607406


100%|██████████| 60/60 [00:00<00:00, 89.37it/s]
[32m[I 2023-04-18 11:03:57,803][0m Trial 2 finished with value: 2.345148765660741 and parameters: {'batch_size': 1024, 'lr': 0.0017633481083139772, 'weight_decay': 3.3172519870848185e-06, 'embed_dim': 14, 'mlp_dim_layers': 4, 'dropout': 0.2}. Best is trial 1 with value: 2.3079692501802187.[0m
100%|██████████| 480/480 [00:04<00:00, 103.04it/s, loss=2.6] 
100%|██████████| 120/120 [00:00<00:00, 165.03it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.3010613952639063


100%|██████████| 480/480 [00:04<00:00, 100.21it/s, loss=2.49]
100%|██████████| 120/120 [00:00<00:00, 204.26it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.330111686580304


100%|██████████| 480/480 [00:04<00:00, 97.83it/s, loss=2.4]  
100%|██████████| 120/120 [00:00<00:00, 203.07it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.274240099779693


100%|██████████| 480/480 [00:04<00:00, 102.31it/s, loss=2.28]
100%|██████████| 120/120 [00:00<00:00, 203.41it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.3800576774225597


100%|██████████| 480/480 [00:04<00:00, 102.67it/s, loss=2.2] 
100%|██████████| 120/120 [00:00<00:00, 204.86it/s]
  0%|          | 0/120 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.354604735189495


100%|██████████| 120/120 [00:00<00:00, 168.62it/s]
[32m[I 2023-04-18 11:04:25,567][0m Trial 3 finished with value: 2.354604735189495 and parameters: {'batch_size': 512, 'lr': 0.004564134001215193, 'weight_decay': 0.00014020532848559348, 'embed_dim': 13, 'mlp_dim_layers': 1, 'dropout': 0.25}. Best is trial 1 with value: 2.3079692501802187.[0m
100%|██████████| 1918/1918 [00:12<00:00, 155.76it/s, loss=2.44]
100%|██████████| 480/480 [00:00<00:00, 513.13it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.2479399902192467


100%|██████████| 1918/1918 [00:12<00:00, 154.43it/s, loss=2.21]
100%|██████████| 480/480 [00:00<00:00, 514.26it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.1947450474806387


100%|██████████| 1918/1918 [00:12<00:00, 153.11it/s, loss=1.99]
100%|██████████| 480/480 [00:00<00:00, 512.38it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.2662490361098135


100%|██████████| 1918/1918 [00:12<00:00, 153.89it/s, loss=1.86]
100%|██████████| 480/480 [00:00<00:00, 509.54it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.3052264058080576


100%|██████████| 1918/1918 [00:12<00:00, 151.17it/s, loss=1.76]
100%|██████████| 480/480 [00:00<00:00, 513.18it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.3284025838447797


100%|██████████| 480/480 [00:00<00:00, 515.00it/s]
[32m[I 2023-04-18 11:05:33,848][0m Trial 4 finished with value: 2.328402583872195 and parameters: {'batch_size': 128, 'lr': 0.009100833957248361, 'weight_decay': 6.865023591761266e-08, 'embed_dim': 15, 'mlp_dim_layers': 3, 'dropout': 0.2}. Best is trial 1 with value: 2.3079692501802187.[0m
100%|██████████| 1918/1918 [00:12<00:00, 154.98it/s, loss=2.43]
100%|██████████| 480/480 [00:00<00:00, 517.15it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.259870962209891


100%|██████████| 1918/1918 [00:12<00:00, 155.71it/s, loss=2.31]
100%|██████████| 480/480 [00:00<00:00, 517.58it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.242948232661083


100%|██████████| 1918/1918 [00:12<00:00, 154.36it/s, loss=2.13]
100%|██████████| 480/480 [00:00<00:00, 520.17it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.2629062781632787


100%|██████████| 1918/1918 [00:12<00:00, 151.52it/s, loss=1.97]
100%|██████████| 480/480 [00:00<00:00, 520.13it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.3060772941606267


100%|██████████| 1918/1918 [00:12<00:00, 152.75it/s, loss=1.88]
100%|██████████| 480/480 [00:00<00:00, 516.85it/s]
  0%|          | 0/600 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.2930950690262355


100%|██████████| 600/600 [00:00<00:00, 669.45it/s]






