In [1]:
import tqdm

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim

import os
import random

import argparse
import json
import pandas as pd
import tqdm
import argparse
import warnings
import joblib

import optuna
from optuna import Trial, visualization
from optuna.samplers import TPESampler

from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold

from src.data import dl_data_load, dl_data_split, dl_data_loader

warnings.filterwarnings(action='ignore')

In [2]:
SEED = 42
def seed_everything(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)

seed_everything(SEED)

In [3]:
# Metrics
def rmse(real: list, predict: list) -> float:
    pred = np.array(predict)
    return np.sqrt(np.mean((real-pred) ** 2))


class RMSELoss(torch.nn.Module):
    def __init__(self):
        super(RMSELoss,self).__init__()
        self.eps = 1e-6

    def forward(self, x, y):
        criterion = nn.MSELoss()
        loss = torch.sqrt(criterion(x, y)+self.eps)
        return loss

In [4]:
import numpy as np
import torch
import torch.nn as nn


# FM모델 등에서 활용되는 선형 결합 부분을 정의합니다.
class FeaturesLinear(nn.Module):
    def __init__(self, field_dims: np.ndarray, output_dim: int=1):
        super().__init__()
        self.fc = torch.nn.Embedding(sum(field_dims), output_dim)
        self.bias = torch.nn.Parameter(torch.zeros((output_dim,)))
        self.offsets = np.array((0, *np.cumsum(field_dims)[:-1]), dtype=np.int32)


    def forward(self, x: torch.Tensor):
        x = x + x.new_tensor(self.offsets).unsqueeze(0)
        return torch.sum(self.fc(x), dim=1) + self.bias


# factorization을 통해 얻은 feature를 embedding 합니다.
class FeaturesEmbedding(nn.Module):
    def __init__(self, field_dims: np.ndarray, embed_dim: int):
        super().__init__()
        self.embedding = torch.nn.Embedding(sum(field_dims), embed_dim)
        self.offsets = np.array((0, *np.cumsum(field_dims)[:-1]), dtype=np.int32)
        torch.nn.init.xavier_uniform_(self.embedding.weight.data)


    def forward(self, x: torch.Tensor):
        x = x + x.new_tensor(self.offsets).unsqueeze(0)
        return self.embedding(x)



# NCF 모델은 MLP와 GMF를 합하여 최종 결과를 도출합니다.
# MLP을 구현합니다.
class MultiLayerPerceptron(nn.Module):
    def __init__(self, input_dim, embed_dims, dropout, output_layer=True):
        super().__init__()
        layers = list()
        for embed_dim in embed_dims:
            layers.append(torch.nn.Linear(input_dim, embed_dim))
            layers.append(torch.nn.BatchNorm1d(embed_dim))
            layers.append(torch.nn.ReLU())
            layers.append(torch.nn.Dropout(p=dropout))
            input_dim = embed_dim
        if output_layer:
            layers.append(torch.nn.Linear(input_dim, 1))
        self.mlp = torch.nn.Sequential(*layers)


    def forward(self, x):
        return self.mlp(x)


# Wide: memorization을 담당하는 generalized linear model
# Deep: generalization을 담당하는 feed-forward neural network
# wide and deep model은 위의 wide 와 deep 을 결합하는 모델입니다.
# 데이터를 embedding 하여 MLP 으로 학습시킨 Deep 모델과 parameter에 bias를 더한 linear 모델을 합하여 최종결과를 도출합니다.
class _WideAndDeepModel(nn.Module):
    def __init__(self, args, data):
        super().__init__()
        self.field_dims = data['field_dims']
        self.linear = FeaturesLinear(self.field_dims)
        self.embedding = FeaturesEmbedding(self.field_dims, args.embed_dim)
        self.embed_output_dim = len(self.field_dims) * args.embed_dim
        self.mlp = MultiLayerPerceptron(self.embed_output_dim, args.mlp_dims, args.dropout)


    def forward(self, x: torch.Tensor):
        embed_x = self.embedding(x)
        x = self.linear(x) + self.mlp(embed_x.view(-1, self.embed_output_dim))
        return x.squeeze(1)


In [5]:
class WideAndDeepModel:

    def __init__(self, args, data):
        super().__init__()

        self.criterion = RMSELoss()

        self.train_dataloader = data['train_dataloader']
        self.valid_dataloader = data['valid_dataloader']
        self.field_dims = data['field_dims']

        self.embed_dim = args.embed_dim
        self.epochs = args.epochs
        self.learning_rate = args.lr
        self.weight_decay = args.weight_decay
        self.log_interval = 100

        self.device = args.device

        self.mlp_dims = args.mlp_dims
        self.dropout = args.dropout

        self.model = _WideAndDeepModel(args,data).to(self.device)
        self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr=self.learning_rate, amsgrad=True, weight_decay=self.weight_decay)


    def train(self):
      # model: type, optimizer: torch.optim, train_dataloader: DataLoader, criterion: torch.nn, device: str, log_interval: int=100
        for epoch in range(self.epochs):
            self.model.train()
            total_loss = 0
            tk0 = tqdm.tqdm(self.train_dataloader, smoothing=0, mininterval=1.0)
            for i, (fields, target) in enumerate(tk0):
                fields, target = fields.to(self.device), target.to(self.device)
                y = self.model(fields)
                loss = self.criterion(y, target.float())
                self.model.zero_grad()
                loss.backward()
                self.optimizer.step()
                total_loss += loss.item()
                if (i + 1) % self.log_interval == 0:
                    tk0.set_postfix(loss=total_loss / self.log_interval)
                    total_loss = 0

            rmse_score = self.predict_train()
            print('epoch:', epoch, 'validation: rmse:', rmse_score)


    def predict_train(self):
        self.model.eval()
        targets, predicts = list(), list()
        with torch.no_grad():
            for fields, target in tqdm.tqdm(self.valid_dataloader, smoothing=0, mininterval=1.0):
                fields, target = fields.to(self.device), target.to(self.device)
                y = self.model(fields)
                targets.extend(target.tolist())
                predicts.extend(y.tolist())
        return rmse(targets, predicts)


    def predict(self, dataloader):
        self.model.eval()
        predicts = list()
        with torch.no_grad():
            for fields in tqdm.tqdm(dataloader, smoothing=0, mininterval=1.0):
                fields = fields[0].to(self.device)
                y = self.model(fields)
                predicts.extend(y.tolist())
        return predicts

In [6]:
import argparse
# default args
args = argparse.Namespace(
  seed=42,
  data_path = '/opt/ml/data/',
    batch_size = 1024,
    data_shuffle = True,
    test_size = 0.2,
    epochs = 5,
    device = 'cuda',
    lr = 0.001,
    weight_decay = 0.0001,
    embed_dim = 16,
    mlp_dims = [16,16],
    dropout = 0.2,
    num_layers = 3
)

In [7]:
dl_dataset = dl_data_load(args)

In [8]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
folds = []
for train_idx, valid_idx in skf.split(dl_dataset['train'].drop(['rating'], axis=1), dl_dataset['train']['rating']):
    folds.append((train_idx, valid_idx))

In [9]:
def objective(trial):
    # trial 객체를 이용해 하이퍼파라미터 탐색 공간 설정
    args.batch_size = trial.suggest_categorical('batch_size', [128, 256, 512, 1024])
    args.lr = trial.suggest_loguniform('lr',0.001,0.01)
    args.weight_decay = trial.suggest_loguniform('weight_decay', 1e-8, 1e-3)
    args.embed_dim = trial.suggest_int('embed_dim', 1, 16) # 16이 default
    mlp_dim_layers = trial.suggest_int('mlp_dim_layers',1,4)
    # args.mlp_dims = [trial.suggest_int('mlp_dims',1,20)] * mlp_dim_layers
    args.dropout = trial.suggest_categorical("dropout",[0.2,0.25,0.3])
    dl_data = dl_data_split(args,dl_dataset)
    dl_data = dl_data_loader(args,dl_data)
    model = WideAndDeepModel(args, dl_data)
    model.train()
    loss = model.predict_train()
    return loss

In [10]:
for fold in range(0,5):
    print(f'===================================={fold+1}============================================')
    train_idx, valid_idx = folds[fold]
    X_train = dl_dataset['train'].drop(['rating'],axis = 1).iloc[train_idx]
    X_valid = dl_dataset['train'].drop(['rating'],axis = 1).iloc[valid_idx]
    y_train = dl_dataset['train']['rating'].iloc[train_idx]
    y_valid = dl_dataset['train']['rating'].iloc[valid_idx]

    sampler = optuna.samplers.TPESampler(SEED)
    study = optuna.create_study(
        study_name = 'WDN_parameter_opt',
        direction = 'minimize',
        sampler = sampler,
    )
    study.optimize(objective, n_trials=5)    
    args.__dict__.update(study.best_params)
    
    dl_dataset['X_train'], dl_dataset['X_valid'], dl_dataset['y_train'], dl_dataset['y_valid'] = X_train, X_valid, y_train, y_valid
    dl_data = dl_data_loader(args,dl_dataset)
    
    model = WideAndDeepModel(args, dl_data)
    model.train()
    
    pred = model.predict(dl_data['test_dataloader'])
    dl_dataset[f'pred_{fold}'] = pred
    print(f'================================================================================\n\n')

[32m[I 2023-04-18 12:19:52,274][0m A new study created in memory with name: WDN_parameter_opt[0m




100%|██████████| 480/480 [00:04<00:00, 100.94it/s, loss=2.71]
100%|██████████| 120/120 [00:00<00:00, 172.93it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.4232153121331304


100%|██████████| 480/480 [00:04<00:00, 100.07it/s, loss=2.31]
100%|██████████| 120/120 [00:00<00:00, 206.70it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.3773565610369953


100%|██████████| 480/480 [00:04<00:00, 107.35it/s, loss=2]   
100%|██████████| 120/120 [00:00<00:00, 177.34it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.374472448661719


100%|██████████| 480/480 [00:04<00:00, 113.47it/s, loss=1.81]
100%|██████████| 120/120 [00:00<00:00, 213.99it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.374636577737524


100%|██████████| 480/480 [00:04<00:00, 109.79it/s, loss=1.69]
100%|██████████| 120/120 [00:00<00:00, 213.04it/s]
  0%|          | 0/120 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.3774447771559415


100%|██████████| 120/120 [00:00<00:00, 213.72it/s]
[32m[I 2023-04-18 12:20:21,398][0m Trial 0 finished with value: 2.3774447771559415 and parameters: {'batch_size': 512, 'lr': 0.0028176974404024602, 'weight_decay': 1.4324109851518631e-07, 'embed_dim': 7, 'mlp_dim_layers': 3, 'dropout': 0.2}. Best is trial 0 with value: 2.3774447771559415.[0m
100%|██████████| 480/480 [00:04<00:00, 111.22it/s, loss=2.28]
100%|██████████| 120/120 [00:00<00:00, 176.87it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.229847806089511


100%|██████████| 480/480 [00:04<00:00, 111.33it/s, loss=2.21]
100%|██████████| 120/120 [00:00<00:00, 216.05it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.2079729944821564


100%|██████████| 480/480 [00:04<00:00, 109.53it/s, loss=2.17]
100%|██████████| 120/120 [00:00<00:00, 216.37it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.2044785741009982


100%|██████████| 480/480 [00:04<00:00, 112.39it/s, loss=2.15]
100%|██████████| 120/120 [00:00<00:00, 213.65it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.212712688004075


100%|██████████| 480/480 [00:04<00:00, 111.31it/s, loss=2.15]
100%|██████████| 120/120 [00:00<00:00, 214.27it/s]
  0%|          | 0/120 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.2178686120077953


100%|██████████| 120/120 [00:00<00:00, 174.82it/s]
[32m[I 2023-04-18 12:20:46,794][0m Trial 1 finished with value: 2.2178686120077953 and parameters: {'batch_size': 512, 'lr': 0.009723114496173605, 'weight_decay': 0.0004647892346145884, 'embed_dim': 2, 'mlp_dim_layers': 1, 'dropout': 0.25}. Best is trial 1 with value: 2.2178686120077953.[0m
100%|██████████| 1918/1918 [00:10<00:00, 177.14it/s, loss=2.27]
100%|██████████| 480/480 [00:00<00:00, 560.91it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.2242064964664645


100%|██████████| 1918/1918 [00:11<00:00, 173.45it/s, loss=2.07]
100%|██████████| 480/480 [00:00<00:00, 556.87it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.191389083065414


100%|██████████| 1918/1918 [00:11<00:00, 173.99it/s, loss=1.95]
100%|██████████| 480/480 [00:00<00:00, 559.17it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.2258664925044145


100%|██████████| 1918/1918 [00:11<00:00, 172.63it/s, loss=1.89]
100%|██████████| 480/480 [00:00<00:00, 557.37it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.2603046011422023


100%|██████████| 1918/1918 [00:10<00:00, 175.32it/s, loss=1.8] 
100%|██████████| 480/480 [00:00<00:00, 559.38it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.282394402005929


100%|██████████| 480/480 [00:00<00:00, 558.09it/s]
[32m[I 2023-04-18 12:21:47,113][0m Trial 2 finished with value: 2.282394402005929 and parameters: {'batch_size': 128, 'lr': 0.005025932263559589, 'weight_decay': 3.93207358535373e-05, 'embed_dim': 4, 'mlp_dim_layers': 2, 'dropout': 0.25}. Best is trial 1 with value: 2.2178686120077953.[0m
100%|██████████| 480/480 [00:04<00:00, 113.49it/s, loss=2.85]
100%|██████████| 120/120 [00:00<00:00, 175.15it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.494845351727906


100%|██████████| 480/480 [00:04<00:00, 111.47it/s, loss=2.38]
100%|██████████| 120/120 [00:00<00:00, 213.90it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.318074437974996


100%|██████████| 480/480 [00:04<00:00, 111.44it/s, loss=2.07]
100%|██████████| 120/120 [00:00<00:00, 176.10it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.2978347333980707


100%|██████████| 480/480 [00:04<00:00, 111.58it/s, loss=1.9] 
100%|██████████| 120/120 [00:00<00:00, 213.28it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.2974614077763302


100%|██████████| 480/480 [00:04<00:00, 108.41it/s, loss=1.78]
100%|██████████| 120/120 [00:00<00:00, 215.58it/s]
  0%|          | 0/120 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.2974306272084903


100%|██████████| 120/120 [00:00<00:00, 215.45it/s]
[32m[I 2023-04-18 12:22:12,485][0m Trial 3 finished with value: 2.297430627208491 and parameters: {'batch_size': 512, 'lr': 0.0012739104584483817, 'weight_decay': 0.00018556936653747763, 'embed_dim': 2, 'mlp_dim_layers': 2, 'dropout': 0.3}. Best is trial 1 with value: 2.2178686120077953.[0m
100%|██████████| 1918/1918 [00:10<00:00, 176.13it/s, loss=2.24]
100%|██████████| 480/480 [00:00<00:00, 547.99it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.238712301446075


100%|██████████| 1918/1918 [00:10<00:00, 175.63it/s, loss=2.22]
100%|██████████| 480/480 [00:00<00:00, 546.91it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.202515396583101


100%|██████████| 1918/1918 [00:11<00:00, 173.91it/s, loss=2.2] 
100%|██████████| 480/480 [00:00<00:00, 546.23it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.204322013562284


100%|██████████| 1918/1918 [00:11<00:00, 173.23it/s, loss=2.11]
100%|██████████| 480/480 [00:00<00:00, 550.53it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.203267445124648


100%|██████████| 1918/1918 [00:11<00:00, 172.51it/s, loss=2.09]
100%|██████████| 480/480 [00:00<00:00, 546.83it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.2088757682381663


100%|██████████| 480/480 [00:00<00:00, 550.18it/s]
[32m[I 2023-04-18 12:23:13,007][0m Trial 4 finished with value: 2.2088757682381663 and parameters: {'batch_size': 128, 'lr': 0.004269183468115187, 'weight_decay': 0.00041455972205067256, 'embed_dim': 12, 'mlp_dim_layers': 4, 'dropout': 0.2}. Best is trial 4 with value: 2.2088757682381663.[0m
100%|██████████| 1918/1918 [00:11<00:00, 172.72it/s, loss=2.22]
100%|██████████| 480/480 [00:00<00:00, 547.56it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.2072400346005105


100%|██████████| 1918/1918 [00:11<00:00, 172.60it/s, loss=2.2] 
100%|██████████| 480/480 [00:00<00:00, 544.60it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.1864114820483063


100%|██████████| 1918/1918 [00:11<00:00, 171.08it/s, loss=2.15]
100%|██████████| 480/480 [00:00<00:00, 549.41it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.186362956315073


100%|██████████| 1918/1918 [00:11<00:00, 173.13it/s, loss=2.1] 
100%|██████████| 480/480 [00:00<00:00, 544.40it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.198367353270101


100%|██████████| 1918/1918 [00:11<00:00, 171.20it/s, loss=2.06]
100%|██████████| 480/480 [00:00<00:00, 548.06it/s]
  0%|          | 0/600 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.208479224695343


100%|██████████| 600/600 [00:00<00:00, 730.28it/s]
[32m[I 2023-04-18 12:24:14,120][0m A new study created in memory with name: WDN_parameter_opt[0m
  0%|          | 0/1918 [00:00<?, ?it/s]





100%|██████████| 1918/1918 [00:11<00:00, 172.99it/s, loss=2.36]
100%|██████████| 480/480 [00:00<00:00, 552.17it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.3187910596255423


100%|██████████| 1918/1918 [00:10<00:00, 175.16it/s, loss=2.1] 
100%|██████████| 480/480 [00:00<00:00, 554.35it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.271193063480319


100%|██████████| 1918/1918 [00:10<00:00, 174.37it/s, loss=1.82]
100%|██████████| 480/480 [00:00<00:00, 547.25it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.2805182947052103


100%|██████████| 1918/1918 [00:10<00:00, 175.33it/s, loss=1.73]
100%|██████████| 480/480 [00:00<00:00, 555.02it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.2917441842415696


100%|██████████| 1918/1918 [00:10<00:00, 176.49it/s, loss=1.62]
100%|██████████| 480/480 [00:00<00:00, 549.74it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.3093906047486312


100%|██████████| 480/480 [00:00<00:00, 553.21it/s]
[32m[I 2023-04-18 12:25:14,382][0m Trial 0 finished with value: 2.3093906047486312 and parameters: {'batch_size': 128, 'lr': 0.002002580395578707, 'weight_decay': 9.389841181150396e-06, 'embed_dim': 5, 'mlp_dim_layers': 2, 'dropout': 0.25}. Best is trial 0 with value: 2.3093906047486312.[0m
100%|██████████| 1918/1918 [00:10<00:00, 177.12it/s, loss=2.43]
100%|██████████| 480/480 [00:00<00:00, 553.91it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.377412432157659


100%|██████████| 1918/1918 [00:11<00:00, 172.67it/s, loss=2.16]
100%|██████████| 480/480 [00:00<00:00, 554.13it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.3098360201365966


100%|██████████| 1918/1918 [00:10<00:00, 174.80it/s, loss=1.94]
100%|██████████| 480/480 [00:00<00:00, 552.42it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.299651144923776


100%|██████████| 1918/1918 [00:10<00:00, 176.50it/s, loss=1.78]
100%|██████████| 480/480 [00:00<00:00, 552.64it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.3160608099669076


100%|██████████| 1918/1918 [00:10<00:00, 175.66it/s, loss=1.69]
100%|██████████| 480/480 [00:00<00:00, 552.12it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.315943805193532


100%|██████████| 480/480 [00:00<00:00, 551.21it/s]
[32m[I 2023-04-18 12:26:14,482][0m Trial 1 finished with value: 2.315943805193532 and parameters: {'batch_size': 128, 'lr': 0.0024543534066081064, 'weight_decay': 3.333761573712854e-08, 'embed_dim': 2, 'mlp_dim_layers': 3, 'dropout': 0.3}. Best is trial 0 with value: 2.3093906047486312.[0m
100%|██████████| 240/240 [00:03<00:00, 69.34it/s, loss=2.61]
100%|██████████| 60/60 [00:00<00:00, 95.12it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.379522360492324


100%|██████████| 240/240 [00:03<00:00, 69.35it/s, loss=2.16]
100%|██████████| 60/60 [00:00<00:00, 94.78it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.314877732684369


100%|██████████| 240/240 [00:03<00:00, 69.37it/s, loss=1.87]
100%|██████████| 60/60 [00:00<00:00, 94.93it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.3219994483871607


100%|██████████| 240/240 [00:03<00:00, 69.51it/s, loss=1.7] 
100%|██████████| 60/60 [00:00<00:00, 93.23it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.3442263220435846


100%|██████████| 240/240 [00:03<00:00, 70.06it/s, loss=1.6] 
100%|██████████| 60/60 [00:00<00:00, 93.34it/s]
  0%|          | 0/60 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.3494820024779344


100%|██████████| 60/60 [00:00<00:00, 95.08it/s]
[32m[I 2023-04-18 12:26:35,821][0m Trial 2 finished with value: 2.349482002477934 and parameters: {'batch_size': 1024, 'lr': 0.007804700152247432, 'weight_decay': 9.280931369184646e-08, 'embed_dim': 13, 'mlp_dim_layers': 2, 'dropout': 0.3}. Best is trial 0 with value: 2.3093906047486312.[0m
100%|██████████| 959/959 [00:06<00:00, 144.70it/s, loss=2.6] 
100%|██████████| 240/240 [00:00<00:00, 357.08it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.4164559992111725


100%|██████████| 959/959 [00:06<00:00, 144.12it/s, loss=2.24]
100%|██████████| 240/240 [00:00<00:00, 355.48it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.3506317091347824


100%|██████████| 959/959 [00:06<00:00, 142.23it/s, loss=1.96]
100%|██████████| 240/240 [00:00<00:00, 359.05it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.3566024786703914


100%|██████████| 959/959 [00:06<00:00, 143.74it/s, loss=1.79]
100%|██████████| 240/240 [00:00<00:00, 350.96it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.3603018484857046


100%|██████████| 959/959 [00:06<00:00, 143.83it/s, loss=1.67]
100%|██████████| 240/240 [00:00<00:00, 354.12it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.357429204869904


100%|██████████| 240/240 [00:00<00:00, 298.00it/s]
[32m[I 2023-04-18 12:27:13,630][0m Trial 3 finished with value: 2.357429204869904 and parameters: {'batch_size': 256, 'lr': 0.0012556782198673083, 'weight_decay': 8.107637021560101e-08, 'embed_dim': 15, 'mlp_dim_layers': 4, 'dropout': 0.25}. Best is trial 0 with value: 2.3093906047486312.[0m
100%|██████████| 959/959 [00:06<00:00, 147.19it/s, loss=2.33]
100%|██████████| 240/240 [00:00<00:00, 303.00it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.2629378576313144


100%|██████████| 959/959 [00:06<00:00, 144.87it/s, loss=1.99]
100%|██████████| 240/240 [00:00<00:00, 359.49it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.252585960229628


100%|██████████| 959/959 [00:06<00:00, 146.71it/s, loss=1.78]
100%|██████████| 240/240 [00:00<00:00, 360.00it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.282665120656574


100%|██████████| 959/959 [00:06<00:00, 144.64it/s, loss=1.62]
100%|██████████| 240/240 [00:00<00:00, 363.01it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.303947273362771


100%|██████████| 959/959 [00:06<00:00, 145.19it/s, loss=1.53]
100%|██████████| 240/240 [00:00<00:00, 356.73it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.3316510689723424


100%|██████████| 240/240 [00:00<00:00, 366.49it/s]
[32m[I 2023-04-18 12:27:50,850][0m Trial 4 finished with value: 2.3316510689723424 and parameters: {'batch_size': 256, 'lr': 0.00904695605371658, 'weight_decay': 2.3543619333809757e-07, 'embed_dim': 4, 'mlp_dim_layers': 1, 'dropout': 0.25}. Best is trial 0 with value: 2.3093906047486312.[0m
100%|██████████| 1918/1918 [00:11<00:00, 173.84it/s, loss=2.43]
100%|██████████| 480/480 [00:00<00:00, 550.49it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.3114025857649767


100%|██████████| 1918/1918 [00:10<00:00, 175.21it/s, loss=2.11]
100%|██████████| 480/480 [00:00<00:00, 547.34it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.2671420002644465


100%|██████████| 1918/1918 [00:10<00:00, 177.29it/s, loss=1.87]
100%|██████████| 480/480 [00:00<00:00, 551.90it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.278061815731494


100%|██████████| 1918/1918 [00:10<00:00, 174.75it/s, loss=1.7] 
100%|██████████| 480/480 [00:00<00:00, 552.66it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.295563081737177


100%|██████████| 1918/1918 [00:10<00:00, 177.69it/s, loss=1.62]
100%|██████████| 480/480 [00:00<00:00, 552.12it/s]
  0%|          | 0/600 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.322128807680449


100%|██████████| 600/600 [00:00<00:00, 740.33it/s]
[32m[I 2023-04-18 12:28:50,735][0m A new study created in memory with name: WDN_parameter_opt[0m
  0%|          | 0/1918 [00:00<?, ?it/s]





100%|██████████| 1918/1918 [00:10<00:00, 177.61it/s, loss=2.25]
100%|██████████| 480/480 [00:00<00:00, 551.27it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.2510329470196924


100%|██████████| 1918/1918 [00:10<00:00, 175.55it/s, loss=1.97]
100%|██████████| 480/480 [00:00<00:00, 545.61it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.234586750808885


100%|██████████| 1918/1918 [00:10<00:00, 176.22it/s, loss=1.77]
100%|██████████| 480/480 [00:00<00:00, 548.24it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.273043671723908


100%|██████████| 1918/1918 [00:11<00:00, 173.92it/s, loss=1.62]
100%|██████████| 480/480 [00:00<00:00, 549.22it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.2973900711822695


100%|██████████| 1918/1918 [00:11<00:00, 172.96it/s, loss=1.57]
100%|██████████| 480/480 [00:00<00:00, 552.25it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.322386624771801


100%|██████████| 480/480 [00:00<00:00, 556.09it/s]
[32m[I 2023-04-18 12:29:50,879][0m Trial 0 finished with value: 2.322386624771801 and parameters: {'batch_size': 128, 'lr': 0.007167775222700449, 'weight_decay': 2.6668893314202223e-07, 'embed_dim': 2, 'mlp_dim_layers': 3, 'dropout': 0.25}. Best is trial 0 with value: 2.322386624771801.[0m
100%|██████████| 1918/1918 [00:10<00:00, 176.04it/s, loss=2.53]
100%|██████████| 480/480 [00:00<00:00, 551.01it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.3979619877977916


100%|██████████| 1918/1918 [00:10<00:00, 175.58it/s, loss=2.2] 
100%|██████████| 480/480 [00:00<00:00, 550.71it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.342068888580931


100%|██████████| 1918/1918 [00:11<00:00, 173.07it/s, loss=1.96]
100%|██████████| 480/480 [00:00<00:00, 544.22it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.3342733287474435


100%|██████████| 1918/1918 [00:11<00:00, 171.86it/s, loss=1.79]
100%|██████████| 480/480 [00:00<00:00, 547.73it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.359077129478492


100%|██████████| 1918/1918 [00:11<00:00, 171.94it/s, loss=1.7] 
100%|██████████| 480/480 [00:00<00:00, 548.17it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.359456929011909


100%|██████████| 480/480 [00:00<00:00, 554.05it/s]
[32m[I 2023-04-18 12:30:51,576][0m Trial 1 finished with value: 2.359456929011909 and parameters: {'batch_size': 128, 'lr': 0.0010830849557850342, 'weight_decay': 1.9385066332637868e-07, 'embed_dim': 10, 'mlp_dim_layers': 2, 'dropout': 0.3}. Best is trial 0 with value: 2.322386624771801.[0m
100%|██████████| 240/240 [00:03<00:00, 69.82it/s, loss=2.57]
100%|██████████| 60/60 [00:00<00:00, 93.65it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.3631404395190696


100%|██████████| 240/240 [00:03<00:00, 70.20it/s, loss=2.15]
100%|██████████| 60/60 [00:00<00:00, 94.60it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.3245242311638683


100%|██████████| 240/240 [00:03<00:00, 70.66it/s, loss=1.86]
100%|██████████| 60/60 [00:00<00:00, 93.81it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.358117275646426


100%|██████████| 240/240 [00:03<00:00, 71.12it/s, loss=1.68]
100%|██████████| 60/60 [00:00<00:00, 93.92it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.3590025640724956


100%|██████████| 240/240 [00:03<00:00, 70.38it/s, loss=1.57]
100%|██████████| 60/60 [00:00<00:00, 95.07it/s]
  0%|          | 0/60 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.386964276587882


100%|██████████| 60/60 [00:00<00:00, 95.95it/s]
[32m[I 2023-04-18 12:31:12,629][0m Trial 2 finished with value: 2.386964276587882 and parameters: {'batch_size': 1024, 'lr': 0.004973133481878592, 'weight_decay': 3.864528663632234e-07, 'embed_dim': 3, 'mlp_dim_layers': 3, 'dropout': 0.2}. Best is trial 0 with value: 2.322386624771801.[0m
100%|██████████| 959/959 [00:06<00:00, 146.29it/s, loss=2.28]
100%|██████████| 240/240 [00:00<00:00, 340.41it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.253094836738577


100%|██████████| 959/959 [00:06<00:00, 146.69it/s, loss=1.98]
100%|██████████| 240/240 [00:00<00:00, 353.37it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.251192786637177


100%|██████████| 959/959 [00:06<00:00, 146.85it/s, loss=1.76]
100%|██████████| 240/240 [00:00<00:00, 356.76it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.2835722869537793


100%|██████████| 959/959 [00:06<00:00, 143.39it/s, loss=1.63]
100%|██████████| 240/240 [00:00<00:00, 357.75it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.2983118002379705


100%|██████████| 959/959 [00:06<00:00, 146.11it/s, loss=1.55]
100%|██████████| 240/240 [00:00<00:00, 356.06it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.3202120332414657


100%|██████████| 240/240 [00:00<00:00, 298.20it/s]
[32m[I 2023-04-18 12:31:49,916][0m Trial 3 finished with value: 2.3202120332414657 and parameters: {'batch_size': 256, 'lr': 0.009122074108381327, 'weight_decay': 1.2710750063438665e-06, 'embed_dim': 5, 'mlp_dim_layers': 4, 'dropout': 0.3}. Best is trial 3 with value: 2.3202120332414657.[0m
100%|██████████| 1918/1918 [00:10<00:00, 176.00it/s, loss=2.65]
100%|██████████| 480/480 [00:00<00:00, 557.83it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.4847839621287715


100%|██████████| 1918/1918 [00:11<00:00, 171.54it/s, loss=2.3] 
100%|██████████| 480/480 [00:00<00:00, 560.81it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.3538651064479916


100%|██████████| 1918/1918 [00:10<00:00, 176.86it/s, loss=2.03]
100%|██████████| 480/480 [00:00<00:00, 552.74it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.377960157588685


100%|██████████| 1918/1918 [00:11<00:00, 172.19it/s, loss=1.87]
100%|██████████| 480/480 [00:00<00:00, 549.78it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.360799682198637


100%|██████████| 1918/1918 [00:11<00:00, 171.78it/s, loss=1.73]
100%|██████████| 480/480 [00:00<00:00, 556.72it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.3568247704748875


100%|██████████| 480/480 [00:00<00:00, 558.52it/s]
[32m[I 2023-04-18 12:32:50,554][0m Trial 4 finished with value: 2.3568247704748875 and parameters: {'batch_size': 128, 'lr': 0.001074978834852989, 'weight_decay': 2.5632558542262433e-08, 'embed_dim': 10, 'mlp_dim_layers': 3, 'dropout': 0.3}. Best is trial 3 with value: 2.3202120332414657.[0m
100%|██████████| 959/959 [00:06<00:00, 143.75it/s, loss=2.29]
100%|██████████| 240/240 [00:00<00:00, 360.19it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.2714015495757782


100%|██████████| 959/959 [00:06<00:00, 143.68it/s, loss=2]   
100%|██████████| 240/240 [00:00<00:00, 356.15it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.255227319852747


100%|██████████| 959/959 [00:06<00:00, 145.48it/s, loss=1.79]
100%|██████████| 240/240 [00:00<00:00, 358.64it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.2854143702735956


100%|██████████| 959/959 [00:06<00:00, 145.81it/s, loss=1.67]
100%|██████████| 240/240 [00:00<00:00, 358.13it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.3027309521432175


100%|██████████| 959/959 [00:06<00:00, 144.52it/s, loss=1.59]
100%|██████████| 240/240 [00:00<00:00, 358.98it/s]
  0%|          | 0/300 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.32657771125842


100%|██████████| 300/300 [00:00<00:00, 426.81it/s]
[32m[I 2023-04-18 12:33:27,906][0m A new study created in memory with name: WDN_parameter_opt[0m
  0%|          | 0/240 [00:00<?, ?it/s]





100%|██████████| 240/240 [00:03<00:00, 70.45it/s, loss=2.64]
100%|██████████| 60/60 [00:00<00:00, 96.04it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.469980523829225


100%|██████████| 240/240 [00:03<00:00, 70.37it/s, loss=2.23]
100%|██████████| 60/60 [00:00<00:00, 118.60it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.3543525307591215


100%|██████████| 240/240 [00:03<00:00, 68.14it/s, loss=1.98]
100%|██████████| 60/60 [00:00<00:00, 119.09it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.3314862997411896


100%|██████████| 240/240 [00:03<00:00, 70.37it/s, loss=1.81]
100%|██████████| 60/60 [00:00<00:00, 94.48it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.320954871596311


100%|██████████| 240/240 [00:03<00:00, 70.26it/s, loss=1.69]
100%|██████████| 60/60 [00:00<00:00, 96.01it/s]
  0%|          | 0/60 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.330511551740421


100%|██████████| 60/60 [00:00<00:00, 95.88it/s]
[32m[I 2023-04-18 12:33:48,775][0m Trial 0 finished with value: 2.330511551740421 and parameters: {'batch_size': 1024, 'lr': 0.007335083841325669, 'weight_decay': 2.1719142603289962e-07, 'embed_dim': 1, 'mlp_dim_layers': 3, 'dropout': 0.25}. Best is trial 0 with value: 2.330511551740421.[0m
100%|██████████| 959/959 [00:06<00:00, 143.82it/s, loss=2.37]
100%|██████████| 240/240 [00:00<00:00, 353.46it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.283436014362691


100%|██████████| 959/959 [00:06<00:00, 141.37it/s, loss=2.11]
100%|██████████| 240/240 [00:00<00:00, 353.26it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.2343773477479214


100%|██████████| 959/959 [00:06<00:00, 142.57it/s, loss=1.93]
100%|██████████| 240/240 [00:00<00:00, 357.09it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.259171453562121


100%|██████████| 959/959 [00:06<00:00, 145.13it/s, loss=1.8] 
100%|██████████| 240/240 [00:00<00:00, 356.52it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.26207719888522


100%|██████████| 959/959 [00:06<00:00, 143.26it/s, loss=1.73]
100%|██████████| 240/240 [00:00<00:00, 351.96it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.287139290064785


100%|██████████| 240/240 [00:00<00:00, 297.51it/s]
[32m[I 2023-04-18 12:34:26,679][0m Trial 1 finished with value: 2.287139290064785 and parameters: {'batch_size': 256, 'lr': 0.003846308378524759, 'weight_decay': 3.428168741811899e-05, 'embed_dim': 11, 'mlp_dim_layers': 2, 'dropout': 0.2}. Best is trial 1 with value: 2.287139290064785.[0m
100%|██████████| 1918/1918 [00:11<00:00, 172.54it/s, loss=2.41]
100%|██████████| 480/480 [00:00<00:00, 547.30it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.334351401451493


100%|██████████| 1918/1918 [00:11<00:00, 174.18it/s, loss=2.14]
100%|██████████| 480/480 [00:00<00:00, 547.01it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.294171311088979


100%|██████████| 1918/1918 [00:10<00:00, 175.40it/s, loss=1.89]
100%|██████████| 480/480 [00:00<00:00, 543.91it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.2923640792532867


100%|██████████| 1918/1918 [00:11<00:00, 171.51it/s, loss=1.73]
100%|██████████| 480/480 [00:00<00:00, 550.57it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.3126621306850628


100%|██████████| 1918/1918 [00:10<00:00, 174.69it/s, loss=1.65]
100%|██████████| 480/480 [00:00<00:00, 554.66it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.3239053837615327


100%|██████████| 480/480 [00:00<00:00, 555.50it/s]
[32m[I 2023-04-18 12:35:27,386][0m Trial 2 finished with value: 2.3239053837615327 and parameters: {'batch_size': 128, 'lr': 0.0023992066395921796, 'weight_decay': 8.922914250188942e-08, 'embed_dim': 11, 'mlp_dim_layers': 1, 'dropout': 0.25}. Best is trial 1 with value: 2.287139290064785.[0m
100%|██████████| 1918/1918 [00:11<00:00, 172.79it/s, loss=2.24]
100%|██████████| 480/480 [00:00<00:00, 539.89it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.2103446080386044


100%|██████████| 1918/1918 [00:11<00:00, 172.81it/s, loss=2.2] 
100%|██████████| 480/480 [00:00<00:00, 548.88it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.1950857029919972


100%|██████████| 1918/1918 [00:11<00:00, 173.72it/s, loss=2.16]
100%|██████████| 480/480 [00:00<00:00, 546.29it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.181874796338952


100%|██████████| 1918/1918 [00:11<00:00, 171.77it/s, loss=2.12]
100%|██████████| 480/480 [00:00<00:00, 544.27it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.189881550555305


100%|██████████| 1918/1918 [00:10<00:00, 175.53it/s, loss=2.07]
100%|██████████| 480/480 [00:00<00:00, 549.27it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.2064835601420243


100%|██████████| 480/480 [00:00<00:00, 552.53it/s]
[32m[I 2023-04-18 12:36:28,233][0m Trial 3 finished with value: 2.2064835601420243 and parameters: {'batch_size': 128, 'lr': 0.003311233978450271, 'weight_decay': 0.00047257199946162135, 'embed_dim': 13, 'mlp_dim_layers': 1, 'dropout': 0.25}. Best is trial 3 with value: 2.2064835601420243.[0m
100%|██████████| 959/959 [00:06<00:00, 144.06it/s, loss=2.46]
100%|██████████| 240/240 [00:00<00:00, 356.86it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.358399558397308


100%|██████████| 959/959 [00:06<00:00, 146.30it/s, loss=2.13]
100%|██████████| 240/240 [00:00<00:00, 355.49it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.309262794688088


100%|██████████| 959/959 [00:06<00:00, 143.07it/s, loss=1.83]
100%|██████████| 240/240 [00:00<00:00, 358.49it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.333436714078147


100%|██████████| 959/959 [00:06<00:00, 143.93it/s, loss=1.67]
100%|██████████| 240/240 [00:00<00:00, 355.59it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.3503183991395242


100%|██████████| 959/959 [00:06<00:00, 144.13it/s, loss=1.58]
100%|██████████| 240/240 [00:00<00:00, 354.36it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.36229110232401


100%|██████████| 240/240 [00:00<00:00, 353.20it/s]
[32m[I 2023-04-18 12:37:05,748][0m Trial 4 finished with value: 2.36229110232401 and parameters: {'batch_size': 256, 'lr': 0.0031346266540434467, 'weight_decay': 2.3303699131797263e-08, 'embed_dim': 9, 'mlp_dim_layers': 1, 'dropout': 0.2}. Best is trial 3 with value: 2.2064835601420243.[0m
100%|██████████| 1918/1918 [00:11<00:00, 171.57it/s, loss=2.27]
100%|██████████| 480/480 [00:00<00:00, 545.42it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.2372618109600677


100%|██████████| 1918/1918 [00:11<00:00, 172.12it/s, loss=2.21]
100%|██████████| 480/480 [00:00<00:00, 545.96it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.2095214500781313


100%|██████████| 1918/1918 [00:11<00:00, 172.11it/s, loss=2.2] 
100%|██████████| 480/480 [00:00<00:00, 546.16it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.1963870529577023


100%|██████████| 1918/1918 [00:10<00:00, 175.08it/s, loss=2.09]
100%|██████████| 480/480 [00:00<00:00, 548.72it/s]
  0%|          | 0/1918 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.2264442605260544


100%|██████████| 1918/1918 [00:10<00:00, 176.83it/s, loss=1.99]
100%|██████████| 480/480 [00:00<00:00, 545.62it/s]
  0%|          | 0/600 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.245246072416863


100%|██████████| 600/600 [00:00<00:00, 730.43it/s]
[32m[I 2023-04-18 12:38:06,424][0m A new study created in memory with name: WDN_parameter_opt[0m






100%|██████████| 959/959 [00:06<00:00, 142.72it/s, loss=2.32]
100%|██████████| 240/240 [00:00<00:00, 294.55it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.239760286047572


100%|██████████| 959/959 [00:06<00:00, 149.33it/s, loss=2.17]
100%|██████████| 240/240 [00:00<00:00, 303.89it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.1901406735345965


100%|██████████| 959/959 [00:06<00:00, 143.91it/s, loss=2.05]
100%|██████████| 240/240 [00:00<00:00, 360.84it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.21012664648661


100%|██████████| 959/959 [00:06<00:00, 143.88it/s, loss=1.93]
100%|██████████| 240/240 [00:00<00:00, 356.64it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.2429228097859815


100%|██████████| 959/959 [00:06<00:00, 144.42it/s, loss=1.84]
100%|██████████| 240/240 [00:00<00:00, 347.48it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.271463542041889


100%|██████████| 240/240 [00:00<00:00, 356.16it/s]
[32m[I 2023-04-18 12:38:44,092][0m Trial 0 finished with value: 2.271463542041889 and parameters: {'batch_size': 256, 'lr': 0.00197208932680101, 'weight_decay': 0.00047008985519568947, 'embed_dim': 13, 'mlp_dim_layers': 1, 'dropout': 0.25}. Best is trial 0 with value: 2.271463542041889.[0m
100%|██████████| 240/240 [00:03<00:00, 69.66it/s, loss=2.94]
100%|██████████| 60/60 [00:00<00:00, 96.02it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.5306002410024977


100%|██████████| 240/240 [00:03<00:00, 70.92it/s, loss=2.43]
100%|██████████| 60/60 [00:00<00:00, 95.72it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.4171610091461924


100%|██████████| 240/240 [00:03<00:00, 70.76it/s, loss=2.12]
100%|██████████| 60/60 [00:00<00:00, 94.31it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.4013441697728317


100%|██████████| 240/240 [00:03<00:00, 70.84it/s, loss=1.95]
100%|██████████| 60/60 [00:00<00:00, 95.41it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.386627889715539


100%|██████████| 240/240 [00:03<00:00, 70.92it/s, loss=1.84]
100%|██████████| 60/60 [00:00<00:00, 95.07it/s]
  0%|          | 0/60 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.405802468354998


100%|██████████| 60/60 [00:00<00:00, 95.46it/s]
[32m[I 2023-04-18 12:39:05,046][0m Trial 1 finished with value: 2.405802468354998 and parameters: {'batch_size': 1024, 'lr': 0.0031654329029121527, 'weight_decay': 1.0697830030974757e-06, 'embed_dim': 2, 'mlp_dim_layers': 1, 'dropout': 0.25}. Best is trial 0 with value: 2.271463542041889.[0m
100%|██████████| 240/240 [00:03<00:00, 68.28it/s, loss=3.49]
100%|██████████| 60/60 [00:00<00:00, 93.57it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.6441116124098336


100%|██████████| 240/240 [00:03<00:00, 68.85it/s, loss=2.36]
100%|██████████| 60/60 [00:00<00:00, 114.79it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.430446462359213


100%|██████████| 240/240 [00:03<00:00, 66.10it/s, loss=2.01]
100%|██████████| 60/60 [00:00<00:00, 108.81it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.4232407319270743


100%|██████████| 240/240 [00:03<00:00, 65.36it/s, loss=1.83]
100%|██████████| 60/60 [00:00<00:00, 115.82it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.4190594028711665


100%|██████████| 240/240 [00:03<00:00, 66.04it/s, loss=1.71]
100%|██████████| 60/60 [00:00<00:00, 115.27it/s]
  0%|          | 0/60 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.4357841518201506


100%|██████████| 60/60 [00:00<00:00, 92.73it/s]
[32m[I 2023-04-18 12:39:26,641][0m Trial 2 finished with value: 2.4357841518201506 and parameters: {'batch_size': 1024, 'lr': 0.00126521917035312, 'weight_decay': 1.9755661842717525e-07, 'embed_dim': 14, 'mlp_dim_layers': 1, 'dropout': 0.2}. Best is trial 0 with value: 2.271463542041889.[0m
100%|██████████| 959/959 [00:06<00:00, 143.29it/s, loss=2.61]
100%|██████████| 240/240 [00:00<00:00, 360.45it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.441891273413714


100%|██████████| 959/959 [00:06<00:00, 146.73it/s, loss=2.25]
100%|██████████| 240/240 [00:00<00:00, 362.14it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.3946919902298562


100%|██████████| 959/959 [00:06<00:00, 146.98it/s, loss=1.94]
100%|██████████| 240/240 [00:00<00:00, 303.03it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.375437917955599


100%|██████████| 959/959 [00:06<00:00, 147.88it/s, loss=1.78]
100%|██████████| 240/240 [00:00<00:00, 304.00it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.3702497256017434


100%|██████████| 959/959 [00:06<00:00, 145.16it/s, loss=1.66]
100%|██████████| 240/240 [00:00<00:00, 361.75it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.3878928560204797


100%|██████████| 240/240 [00:00<00:00, 364.50it/s]
[32m[I 2023-04-18 12:40:03,956][0m Trial 3 finished with value: 2.3878928560204797 and parameters: {'batch_size': 256, 'lr': 0.002175323416111292, 'weight_decay': 1.467199193257238e-08, 'embed_dim': 11, 'mlp_dim_layers': 1, 'dropout': 0.2}. Best is trial 0 with value: 2.271463542041889.[0m
100%|██████████| 480/480 [00:04<00:00, 111.65it/s, loss=2.47]
100%|██████████| 120/120 [00:00<00:00, 214.53it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.323836340540091


100%|██████████| 480/480 [00:04<00:00, 106.97it/s, loss=2.08]
100%|██████████| 120/120 [00:00<00:00, 214.80it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.2860355038846487


100%|██████████| 480/480 [00:04<00:00, 110.16it/s, loss=1.81]
100%|██████████| 120/120 [00:00<00:00, 171.74it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.296075354282848


100%|██████████| 480/480 [00:04<00:00, 110.87it/s, loss=1.66]
100%|██████████| 120/120 [00:00<00:00, 211.88it/s]
  0%|          | 0/480 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.3143058767715052


100%|██████████| 480/480 [00:04<00:00, 108.23it/s, loss=1.58]
100%|██████████| 120/120 [00:00<00:00, 212.87it/s]
  0%|          | 0/120 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.3230039745273166


100%|██████████| 120/120 [00:00<00:00, 215.43it/s]
[32m[I 2023-04-18 12:40:29,611][0m Trial 4 finished with value: 2.3230039745273166 and parameters: {'batch_size': 512, 'lr': 0.008979417357321965, 'weight_decay': 6.640643881718679e-07, 'embed_dim': 12, 'mlp_dim_layers': 4, 'dropout': 0.3}. Best is trial 0 with value: 2.271463542041889.[0m
100%|██████████| 959/959 [00:06<00:00, 144.26it/s, loss=2.38]
100%|██████████| 240/240 [00:00<00:00, 350.12it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 0 validation: rmse: 2.2801115054913694


100%|██████████| 959/959 [00:06<00:00, 142.96it/s, loss=2.22]
100%|██████████| 240/240 [00:00<00:00, 356.12it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 1 validation: rmse: 2.203367206593882


100%|██████████| 959/959 [00:06<00:00, 145.75it/s, loss=2.12]
100%|██████████| 240/240 [00:00<00:00, 358.82it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 2 validation: rmse: 2.201968335697776


100%|██████████| 959/959 [00:06<00:00, 143.34it/s, loss=1.98]
100%|██████████| 240/240 [00:00<00:00, 356.00it/s]
  0%|          | 0/959 [00:00<?, ?it/s]

epoch: 3 validation: rmse: 2.240342108672214


100%|██████████| 959/959 [00:06<00:00, 144.73it/s, loss=1.87]
100%|██████████| 240/240 [00:00<00:00, 351.98it/s]
  0%|          | 0/300 [00:00<?, ?it/s]

epoch: 4 validation: rmse: 2.2659742684377897


100%|██████████| 300/300 [00:00<00:00, 416.03it/s]






