In [1]:
import optuna
from optuna import Trial, visualization
from optuna.samplers import TPESampler

from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.metrics import mean_squared_error, make_scorer

import numpy as np
import pandas as pd
import torch
import torch.nn as nn

import matplotlib.pyplot as plt
import seaborn as sns

import re
from tqdm import tqdm
import random

import time
import pandas as pd
from src.utils import Logger, Setting, models_load
from src.data import context_data_load, context_data_split, context_data_loader
from src.data import dl_data_load, dl_data_split, dl_data_loader
from src.data import image_data_load, image_data_split, image_data_loader
from src.data import text_data_load, text_data_split, text_data_loader
from src.train import train, test

In [2]:
class argparse: # dummy class
    def __init__(self):
        pass
    
args = argparse()

############### USER DEFINE
model = 'DCN'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
seed = 42 # 바꾸지 마시오!

############### BASIC OPTION
args.data_path='data/'
args.saved_model_path='./saved_models'
args.model=model
args.data_shuffle=True
args.test_size=0.2
args.seed=seed
args.use_best_model=True

############### TRAINING OPTION
args.batch_size=1024
args.epochs=30
args.lr=1e-3
args.loss_fn='RMSE'
args.optimizer='ADAM'
args.weight_decay=1e-6
args.early_stop=3

############### GPU
args.device=device

############### FM, FFM, NCF, WDN, DCN Common OPTION
args.embed_dim=16
args.dropout=0.2
args.mlp_dims=(16, 16)

############### DCN
args.num_layers=3

############### CNN_FM
args.cnn_embed_dim=64
args.cnn_latent_dim=12

############### DeepCoNN
args.vector_create=False
args.deepconn_embed_dim=32
args.deepconn_latent_dim=10
args.conv_1d_out_dimt=50
args.kernel_size=3
args.word_dim=768
args.out_dim=32

In [3]:
Setting.seed_everything(args.seed)

In [4]:
data = dl_data_load(args)

ids : 68092 isbn : 149570 author : 60290


In [5]:
# sample_submission = pd.read_csv(args.data_path + 'sample_submission_filtered.csv')
sample_submission = pd.read_csv(args.data_path + 'sample_submission.csv')

In [6]:
def rmse(y, y_pred):
    return mean_squared_error(y, y_pred, squared = False)

n_splits = 10
cv = StratifiedKFold(n_splits=n_splits, random_state=42, shuffle=True)

folds = []
for train_idx, valid_idx in cv.split(data['train'].drop('rating', axis = 1), data['train']['rating']):
    folds.append((train_idx,valid_idx))

    
def objective(trial):
    args.batch_size = trial.suggest_categorical('batch_size',[256, 512, 1024, 2048])
    args.lr = trial.suggest_loguniform('lr',0.001,0.01)
    args.weight_decay = trial.suggest_loguniform('weight_decay',1e-07,1e-06)
    args.embed_dim = trial.suggest_int('embed_dim', 1, 16)
    args.dropout = trial.suggest_categorical("dropout",[0.25,0.4,0.55, 0.7])
    args.num_layers = trial.suggest_int('num_layers',1 , 4)
    args.mlp_dims = [trial.suggest_int('mlp_dims',1,16)]*args.num_layers
    
    setting = Setting()

    log_path = setting.get_log_path(args)
    setting.make_dir(log_path)

    logger = Logger(args, log_path)
    logger.save_args()
    
    model = models_load(args,data)
    minimum_loss, model = train(args, model, data, logger, setting)
    
    return minimum_loss

for fold in range(0, n_splits):
    print(f'===================================={fold+1}============================================')
    train_idx, valid_idx = folds[fold]
    X_train = data['train'].drop('rating', axis = 1).iloc[train_idx]
    X_valid = data['train'].drop('rating', axis = 1).iloc[valid_idx]
    y_train = data['train']['rating'].iloc[train_idx]
    y_valid = data['train']['rating'].iloc[valid_idx]

    data['X_train'], data['X_valid'], data['y_train'], data['y_valid'] = X_train, X_valid, y_train, y_valid
    data = dl_data_loader(args, data)
    
    sampler = optuna.samplers.TPESampler(seed=seed)
    study = optuna.create_study(
        study_name = 'dcnc_parameter_opt',
        direction = 'minimize',
        sampler = sampler)
    
    study.optimize(objective, n_trials=10)

    print('best params {} :'.format(fold+1), study.best_value)
    print(study.best_params)
    temp = args.__dict__
    for key, value in study.best_params.items():
        if key == 'mlp_dims':
            temp[key] = [value] * study.best_params['num_layers']
        else:
            temp[key] = value
    
    setting = Setting()

    log_path = setting.get_log_path(args)
    setting.make_dir(log_path)

    logger = Logger(args, log_path)
    logger.save_args()
    
    model = models_load(args, data)
    _, model = train(args, model, data, logger, setting)
    
    predicts = test(args, model, data, setting)

    sample_submission[f'pred_{fold}'] = predicts
    print(f'================================================================================\n\n')

[32m[I 2023-04-19 11:22:18,590][0m A new study created in memory with name: dcnc_parameter_opt[0m




  args.lr = trial.suggest_loguniform('lr',0.001,0.01)
  args.weight_decay = trial.suggest_loguniform('weight_decay',1e-07,1e-06)
  3%|▎         | 1/30 [00:05<02:38,  5.47s/it]

Epoch: 1, Train_loss: 3.869, valid_loss: 2.378


  7%|▋         | 2/30 [00:10<02:29,  5.33s/it]

Epoch: 2, Train_loss: 2.309, valid_loss: 2.264


 10%|█         | 3/30 [00:15<02:20,  5.19s/it]

Epoch: 3, Train_loss: 2.095, valid_loss: 2.249


 13%|█▎        | 4/30 [00:20<02:13,  5.13s/it]

Epoch: 4, Train_loss: 1.895, valid_loss: 2.284


 17%|█▋        | 5/30 [00:25<02:07,  5.11s/it]

Epoch: 5, Train_loss: 1.757, valid_loss: 2.309


 17%|█▋        | 5/30 [00:30<02:32,  6.09s/it]
[32m[I 2023-04-19 11:22:51,656][0m Trial 0 finished with value: 2.249439533551534 and parameters: {'batch_size': 512, 'lr': 0.001432249371823025, 'weight_decay': 1.4321698289111518e-07, 'embed_dim': 1, 'dropout': 0.25, 'num_layers': 4, 'mlp_dims': 14}. Best is trial 0 with value: 2.249439533551534.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 6, Train_loss: 1.664, valid_loss: 2.346
Early Stoping!!!!!


  3%|▎         | 1/30 [00:04<02:02,  4.24s/it]

Epoch: 1, Train_loss: 2.759, valid_loss: 2.171


  7%|▋         | 2/30 [00:08<01:57,  4.19s/it]

Epoch: 2, Train_loss: 1.945, valid_loss: 2.186


 10%|█         | 3/30 [00:12<01:54,  4.23s/it]

Epoch: 3, Train_loss: 1.694, valid_loss: 2.245


 10%|█         | 3/30 [00:17<02:33,  5.70s/it]
[32m[I 2023-04-19 11:23:08,794][0m Trial 1 finished with value: 2.171184476216634 and parameters: {'batch_size': 2048, 'lr': 0.003347776308515932, 'weight_decay': 2.703616066661999e-07, 'embed_dim': 5, 'dropout': 0.25, 'num_layers': 2, 'mlp_dims': 13}. Best is trial 1 with value: 2.171184476216634.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 4, Train_loss: 1.539, valid_loss: 2.285
Early Stoping!!!!!


  3%|▎         | 1/30 [00:04<02:11,  4.55s/it]

Epoch: 1, Train_loss: 3.114, valid_loss: 2.185


  7%|▋         | 2/30 [00:09<02:08,  4.60s/it]

Epoch: 2, Train_loss: 2.018, valid_loss: 2.175


 10%|█         | 3/30 [00:13<02:01,  4.49s/it]

Epoch: 3, Train_loss: 1.767, valid_loss: 2.236


 13%|█▎        | 4/30 [00:17<01:51,  4.28s/it]

Epoch: 4, Train_loss: 1.608, valid_loss: 2.276


 13%|█▎        | 4/30 [00:20<02:15,  5.22s/it]
[32m[I 2023-04-19 11:23:29,700][0m Trial 2 finished with value: 2.175238768259684 and parameters: {'batch_size': 1024, 'lr': 0.004050837781329677, 'weight_decay': 1.4808945119975175e-07, 'embed_dim': 2, 'dropout': 0.4, 'num_layers': 1, 'mlp_dims': 11}. Best is trial 1 with value: 2.171184476216634.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 5, Train_loss: 1.507, valid_loss: 2.297
Early Stoping!!!!!


  3%|▎         | 1/30 [00:04<02:24,  4.99s/it]

Epoch: 1, Train_loss: 2.400, valid_loss: 2.159


  7%|▋         | 2/30 [00:09<02:19,  4.98s/it]

Epoch: 2, Train_loss: 1.873, valid_loss: 2.210


 10%|█         | 3/30 [00:14<02:13,  4.95s/it]

Epoch: 3, Train_loss: 1.591, valid_loss: 2.273


 10%|█         | 3/30 [00:19<02:58,  6.61s/it]
[32m[I 2023-04-19 11:23:49,625][0m Trial 3 finished with value: 2.1591196219126383 and parameters: {'batch_size': 1024, 'lr': 0.008115595675970505, 'weight_decay': 1.8145961353490245e-07, 'embed_dim': 11, 'dropout': 0.55, 'num_layers': 4, 'mlp_dims': 13}. Best is trial 3 with value: 2.1591196219126383.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 4, Train_loss: 1.430, valid_loss: 2.316
Early Stoping!!!!!


  3%|▎         | 1/30 [00:04<01:56,  4.03s/it]

Epoch: 1, Train_loss: 5.021, valid_loss: 2.473


  7%|▋         | 2/30 [00:08<01:54,  4.09s/it]

Epoch: 2, Train_loss: 2.378, valid_loss: 2.299


 10%|█         | 3/30 [00:12<01:50,  4.11s/it]

Epoch: 3, Train_loss: 2.140, valid_loss: 2.207


 13%|█▎        | 4/30 [00:16<01:46,  4.09s/it]

Epoch: 4, Train_loss: 1.936, valid_loss: 2.174


 17%|█▋        | 5/30 [00:20<01:43,  4.14s/it]

Epoch: 5, Train_loss: 1.758, valid_loss: 2.204


 20%|██        | 6/30 [00:24<01:39,  4.13s/it]

Epoch: 6, Train_loss: 1.622, valid_loss: 2.254


 20%|██        | 6/30 [00:28<01:55,  4.83s/it]
[32m[I 2023-04-19 11:24:18,598][0m Trial 4 finished with value: 2.1737297693888347 and parameters: {'batch_size': 256, 'lr': 0.0012260057359187524, 'weight_decay': 1.5703008378806713e-07, 'embed_dim': 1, 'dropout': 0.7, 'num_layers': 2, 'mlp_dims': 5}. Best is trial 3 with value: 2.1591196219126383.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 7, Train_loss: 1.531, valid_loss: 2.300
Early Stoping!!!!!


  3%|▎         | 1/30 [00:04<02:23,  4.95s/it]

Epoch: 1, Train_loss: 2.421, valid_loss: 2.152


  7%|▋         | 2/30 [00:09<02:17,  4.90s/it]

Epoch: 2, Train_loss: 1.872, valid_loss: 2.197


 10%|█         | 3/30 [00:14<02:12,  4.91s/it]

Epoch: 3, Train_loss: 1.608, valid_loss: 2.261


 10%|█         | 3/30 [00:19<02:55,  6.49s/it]
[32m[I 2023-04-19 11:24:38,093][0m Trial 5 finished with value: 2.1515968640645347 and parameters: {'batch_size': 1024, 'lr': 0.009702573394120726, 'weight_decay': 5.918951335463648e-07, 'embed_dim': 4, 'dropout': 0.4, 'num_layers': 4, 'mlp_dims': 2}. Best is trial 5 with value: 2.1515968640645347.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 4, Train_loss: 1.464, valid_loss: 2.307
Early Stoping!!!!!


  3%|▎         | 1/30 [00:04<02:00,  4.16s/it]

Epoch: 1, Train_loss: 3.060, valid_loss: 2.186


  7%|▋         | 2/30 [00:08<01:56,  4.16s/it]

Epoch: 2, Train_loss: 1.988, valid_loss: 2.187


 10%|█         | 3/30 [00:12<01:51,  4.13s/it]

Epoch: 3, Train_loss: 1.746, valid_loss: 2.242


 10%|█         | 3/30 [00:16<02:28,  5.51s/it]
[32m[I 2023-04-19 11:24:54,652][0m Trial 6 finished with value: 2.1861950953801474 and parameters: {'batch_size': 1024, 'lr': 0.0021423874956449057, 'weight_decay': 1.1575995526672756e-07, 'embed_dim': 5, 'dropout': 0.7, 'num_layers': 2, 'mlp_dims': 2}. Best is trial 5 with value: 2.1515968640645347.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 4, Train_loss: 1.592, valid_loss: 2.278
Early Stoping!!!!!


  3%|▎         | 1/30 [00:04<01:57,  4.06s/it]

Epoch: 1, Train_loss: 2.761, valid_loss: 2.178


  7%|▋         | 2/30 [00:08<01:54,  4.10s/it]

Epoch: 2, Train_loss: 1.958, valid_loss: 2.185


 10%|█         | 3/30 [00:12<01:51,  4.13s/it]

Epoch: 3, Train_loss: 1.700, valid_loss: 2.237


 10%|█         | 3/30 [00:16<02:28,  5.50s/it]
[32m[I 2023-04-19 11:25:11,194][0m Trial 7 finished with value: 2.177967921892802 and parameters: {'batch_size': 2048, 'lr': 0.003117422003004632, 'weight_decay': 3.332213575546235e-07, 'embed_dim': 7, 'dropout': 0.7, 'num_layers': 2, 'mlp_dims': 9}. Best is trial 5 with value: 2.1515968640645347.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 4, Train_loss: 1.547, valid_loss: 2.278
Early Stoping!!!!!


  3%|▎         | 1/30 [00:04<02:24,  5.00s/it]

Epoch: 1, Train_loss: 3.002, valid_loss: 2.218


  7%|▋         | 2/30 [00:09<02:18,  4.95s/it]

Epoch: 2, Train_loss: 2.032, valid_loss: 2.186


 10%|█         | 3/30 [00:14<02:13,  4.95s/it]

Epoch: 3, Train_loss: 1.753, valid_loss: 2.234


 13%|█▎        | 4/30 [00:19<02:08,  4.94s/it]

Epoch: 4, Train_loss: 1.597, valid_loss: 2.270


 13%|█▎        | 4/30 [00:24<02:38,  6.11s/it]
[32m[I 2023-04-19 11:25:35,698][0m Trial 8 finished with value: 2.186497100194295 and parameters: {'batch_size': 256, 'lr': 0.0016935505549297925, 'weight_decay': 1.1939328726535435e-07, 'embed_dim': 5, 'dropout': 0.4, 'num_layers': 4, 'mlp_dims': 13}. Best is trial 5 with value: 2.1515968640645347.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 5, Train_loss: 1.503, valid_loss: 2.300
Early Stoping!!!!!


  3%|▎         | 1/30 [00:03<01:48,  3.75s/it]

Epoch: 1, Train_loss: 2.757, valid_loss: 2.175


  7%|▋         | 2/30 [00:07<01:43,  3.70s/it]

Epoch: 2, Train_loss: 1.955, valid_loss: 2.186


 10%|█         | 3/30 [00:11<01:40,  3.71s/it]

Epoch: 3, Train_loss: 1.724, valid_loss: 2.242


 10%|█         | 3/30 [00:14<02:12,  4.92s/it]
[32m[I 2023-04-19 11:25:50,483][0m Trial 9 finished with value: 2.175071954727173 and parameters: {'batch_size': 512, 'lr': 0.00787211264452507, 'weight_decay': 2.0797133276936325e-07, 'embed_dim': 2, 'dropout': 0.7, 'num_layers': 1, 'mlp_dims': 9}. Best is trial 5 with value: 2.1515968640645347.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 4, Train_loss: 1.584, valid_loss: 2.282
Early Stoping!!!!!
best params 1 : 2.1515968640645347
{'batch_size': 1024, 'lr': 0.009702573394120726, 'weight_decay': 5.918951335463648e-07, 'embed_dim': 4, 'dropout': 0.4, 'num_layers': 4, 'mlp_dims': 2}


  3%|▎         | 1/30 [00:04<02:19,  4.81s/it]

Epoch: 1, Train_loss: 2.412, valid_loss: 2.159


  7%|▋         | 2/30 [00:09<02:15,  4.85s/it]

Epoch: 2, Train_loss: 1.874, valid_loss: 2.197


 10%|█         | 3/30 [00:14<02:11,  4.89s/it]

Epoch: 3, Train_loss: 1.607, valid_loss: 2.281


 10%|█         | 3/30 [00:19<02:57,  6.56s/it]

Epoch: 4, Train_loss: 1.463, valid_loss: 2.298
Early Stoping!!!!!



[32m[I 2023-04-19 11:26:10,862][0m A new study created in memory with name: dcnc_parameter_opt[0m
  args.lr = trial.suggest_loguniform('lr',0.001,0.01)
  args.weight_decay = trial.suggest_loguniform('weight_decay',1e-07,1e-06)
  0%|          | 0/30 [00:00<?, ?it/s]





  3%|▎         | 1/30 [00:04<02:21,  4.87s/it]

Epoch: 1, Train_loss: 3.993, valid_loss: 2.355


  7%|▋         | 2/30 [00:09<02:16,  4.89s/it]

Epoch: 2, Train_loss: 2.267, valid_loss: 2.196


 10%|█         | 3/30 [00:14<02:11,  4.87s/it]

Epoch: 3, Train_loss: 1.969, valid_loss: 2.194


 13%|█▎        | 4/30 [00:19<02:06,  4.88s/it]

Epoch: 4, Train_loss: 1.745, valid_loss: 2.236


 17%|█▋        | 5/30 [00:24<02:02,  4.91s/it]

Epoch: 5, Train_loss: 1.609, valid_loss: 2.277


 17%|█▋        | 5/30 [00:29<02:26,  5.87s/it]
[32m[I 2023-04-19 11:26:40,217][0m Trial 0 finished with value: 2.1939783891042075 and parameters: {'batch_size': 512, 'lr': 0.001432249371823025, 'weight_decay': 1.4321698289111518e-07, 'embed_dim': 1, 'dropout': 0.25, 'num_layers': 4, 'mlp_dims': 14}. Best is trial 0 with value: 2.1939783891042075.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 6, Train_loss: 1.523, valid_loss: 2.297
Early Stoping!!!!!


  3%|▎         | 1/30 [00:04<02:01,  4.18s/it]

Epoch: 1, Train_loss: 2.797, valid_loss: 2.191


  7%|▋         | 2/30 [00:08<01:55,  4.14s/it]

Epoch: 2, Train_loss: 1.990, valid_loss: 2.189


 10%|█         | 3/30 [00:12<01:51,  4.15s/it]

Epoch: 3, Train_loss: 1.732, valid_loss: 2.240


 13%|█▎        | 4/30 [00:16<01:48,  4.16s/it]

Epoch: 4, Train_loss: 1.564, valid_loss: 2.285


 13%|█▎        | 4/30 [00:20<02:13,  5.15s/it]
[32m[I 2023-04-19 11:27:00,871][0m Trial 1 finished with value: 2.18915909131368 and parameters: {'batch_size': 2048, 'lr': 0.003347776308515932, 'weight_decay': 2.703616066661999e-07, 'embed_dim': 5, 'dropout': 0.25, 'num_layers': 2, 'mlp_dims': 13}. Best is trial 1 with value: 2.18915909131368.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 5, Train_loss: 1.447, valid_loss: 2.306
Early Stoping!!!!!


  3%|▎         | 1/30 [00:03<01:49,  3.78s/it]

Epoch: 1, Train_loss: 3.098, valid_loss: 2.173


  7%|▋         | 2/30 [00:07<01:44,  3.74s/it]

Epoch: 2, Train_loss: 1.998, valid_loss: 2.169


 10%|█         | 3/30 [00:11<01:43,  3.85s/it]

Epoch: 3, Train_loss: 1.759, valid_loss: 2.236


 13%|█▎        | 4/30 [00:15<01:38,  3.79s/it]

Epoch: 4, Train_loss: 1.613, valid_loss: 2.285


 13%|█▎        | 4/30 [00:18<02:03,  4.74s/it]
[32m[I 2023-04-19 11:27:19,866][0m Trial 2 finished with value: 2.1690284172693888 and parameters: {'batch_size': 1024, 'lr': 0.004050837781329677, 'weight_decay': 1.4808945119975175e-07, 'embed_dim': 2, 'dropout': 0.4, 'num_layers': 1, 'mlp_dims': 11}. Best is trial 2 with value: 2.1690284172693888.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 5, Train_loss: 1.520, valid_loss: 2.312
Early Stoping!!!!!


  3%|▎         | 1/30 [00:05<02:27,  5.08s/it]

Epoch: 1, Train_loss: 2.391, valid_loss: 2.150


  7%|▋         | 2/30 [00:10<02:22,  5.09s/it]

Epoch: 2, Train_loss: 1.869, valid_loss: 2.208


 10%|█         | 3/30 [00:15<02:16,  5.07s/it]

Epoch: 3, Train_loss: 1.592, valid_loss: 2.262


 10%|█         | 3/30 [00:20<03:01,  6.74s/it]
[32m[I 2023-04-19 11:27:40,165][0m Trial 3 finished with value: 2.1502406279246014 and parameters: {'batch_size': 1024, 'lr': 0.008115595675970505, 'weight_decay': 1.8145961353490245e-07, 'embed_dim': 11, 'dropout': 0.55, 'num_layers': 4, 'mlp_dims': 13}. Best is trial 3 with value: 2.1502406279246014.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 4, Train_loss: 1.434, valid_loss: 2.316
Early Stoping!!!!!


  3%|▎         | 1/30 [00:04<01:56,  4.03s/it]

Epoch: 1, Train_loss: 5.033, valid_loss: 2.451


  7%|▋         | 2/30 [00:08<01:59,  4.26s/it]

Epoch: 2, Train_loss: 2.370, valid_loss: 2.304


 10%|█         | 3/30 [00:12<01:53,  4.19s/it]

Epoch: 3, Train_loss: 2.153, valid_loss: 2.210


 13%|█▎        | 4/30 [00:17<01:49,  4.20s/it]

Epoch: 4, Train_loss: 1.951, valid_loss: 2.174


 17%|█▋        | 5/30 [00:21<01:46,  4.27s/it]

Epoch: 5, Train_loss: 1.777, valid_loss: 2.197


 20%|██        | 6/30 [00:25<01:40,  4.19s/it]

Epoch: 6, Train_loss: 1.641, valid_loss: 2.245


 20%|██        | 6/30 [00:29<01:58,  4.94s/it]
[32m[I 2023-04-19 11:28:09,821][0m Trial 4 finished with value: 2.173901391029358 and parameters: {'batch_size': 256, 'lr': 0.0012260057359187524, 'weight_decay': 1.5703008378806713e-07, 'embed_dim': 1, 'dropout': 0.7, 'num_layers': 2, 'mlp_dims': 5}. Best is trial 3 with value: 2.1502406279246014.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 7, Train_loss: 1.546, valid_loss: 2.293
Early Stoping!!!!!


  3%|▎         | 1/30 [00:04<02:20,  4.83s/it]

Epoch: 1, Train_loss: 2.411, valid_loss: 2.149


  7%|▋         | 2/30 [00:09<02:16,  4.86s/it]

Epoch: 2, Train_loss: 1.880, valid_loss: 2.186


 10%|█         | 3/30 [00:14<02:12,  4.89s/it]

Epoch: 3, Train_loss: 1.609, valid_loss: 2.244


 10%|█         | 3/30 [00:19<02:56,  6.52s/it]
[32m[I 2023-04-19 11:28:29,420][0m Trial 5 finished with value: 2.1488680760065715 and parameters: {'batch_size': 1024, 'lr': 0.009702573394120726, 'weight_decay': 5.918951335463648e-07, 'embed_dim': 4, 'dropout': 0.4, 'num_layers': 4, 'mlp_dims': 2}. Best is trial 5 with value: 2.1488680760065715.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 4, Train_loss: 1.459, valid_loss: 2.289
Early Stoping!!!!!


  3%|▎         | 1/30 [00:04<02:00,  4.17s/it]

Epoch: 1, Train_loss: 3.082, valid_loss: 2.180


  7%|▋         | 2/30 [00:08<01:55,  4.12s/it]

Epoch: 2, Train_loss: 1.991, valid_loss: 2.182


 10%|█         | 3/30 [00:12<01:51,  4.12s/it]

Epoch: 3, Train_loss: 1.754, valid_loss: 2.238


 10%|█         | 3/30 [00:16<02:27,  5.45s/it]
[32m[I 2023-04-19 11:28:45,796][0m Trial 6 finished with value: 2.180108133951823 and parameters: {'batch_size': 1024, 'lr': 0.0021423874956449057, 'weight_decay': 1.1575995526672756e-07, 'embed_dim': 5, 'dropout': 0.7, 'num_layers': 2, 'mlp_dims': 2}. Best is trial 5 with value: 2.1488680760065715.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 4, Train_loss: 1.602, valid_loss: 2.277
Early Stoping!!!!!


  3%|▎         | 1/30 [00:04<02:01,  4.19s/it]

Epoch: 1, Train_loss: 2.800, valid_loss: 2.177


  7%|▋         | 2/30 [00:08<01:56,  4.18s/it]

Epoch: 2, Train_loss: 1.956, valid_loss: 2.182


 10%|█         | 3/30 [00:12<01:51,  4.14s/it]

Epoch: 3, Train_loss: 1.692, valid_loss: 2.236


 10%|█         | 3/30 [00:16<02:28,  5.51s/it]
[32m[I 2023-04-19 11:29:02,370][0m Trial 7 finished with value: 2.1771634340286257 and parameters: {'batch_size': 2048, 'lr': 0.003117422003004632, 'weight_decay': 3.332213575546235e-07, 'embed_dim': 7, 'dropout': 0.7, 'num_layers': 2, 'mlp_dims': 9}. Best is trial 5 with value: 2.1488680760065715.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 4, Train_loss: 1.541, valid_loss: 2.287
Early Stoping!!!!!


  3%|▎         | 1/30 [00:04<02:19,  4.83s/it]

Epoch: 1, Train_loss: 3.008, valid_loss: 2.201


  7%|▋         | 2/30 [00:09<02:16,  4.88s/it]

Epoch: 2, Train_loss: 2.020, valid_loss: 2.187


 10%|█         | 3/30 [00:14<02:12,  4.90s/it]

Epoch: 3, Train_loss: 1.759, valid_loss: 2.231


 13%|█▎        | 4/30 [00:19<02:06,  4.86s/it]

Epoch: 4, Train_loss: 1.601, valid_loss: 2.268


 13%|█▎        | 4/30 [00:24<02:39,  6.13s/it]
[32m[I 2023-04-19 11:29:26,924][0m Trial 8 finished with value: 2.1868820587793985 and parameters: {'batch_size': 256, 'lr': 0.0016935505549297925, 'weight_decay': 1.1939328726535435e-07, 'embed_dim': 5, 'dropout': 0.4, 'num_layers': 4, 'mlp_dims': 13}. Best is trial 5 with value: 2.1488680760065715.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 5, Train_loss: 1.504, valid_loss: 2.290
Early Stoping!!!!!


  3%|▎         | 1/30 [00:03<01:45,  3.63s/it]

Epoch: 1, Train_loss: 2.785, valid_loss: 2.169


  7%|▋         | 2/30 [00:07<01:42,  3.67s/it]

Epoch: 2, Train_loss: 1.974, valid_loss: 2.177


 10%|█         | 3/30 [00:11<01:39,  3.68s/it]

Epoch: 3, Train_loss: 1.729, valid_loss: 2.244


 10%|█         | 3/30 [00:14<02:12,  4.91s/it]
[32m[I 2023-04-19 11:29:41,667][0m Trial 9 finished with value: 2.1687931060791015 and parameters: {'batch_size': 512, 'lr': 0.00787211264452507, 'weight_decay': 2.0797133276936325e-07, 'embed_dim': 2, 'dropout': 0.7, 'num_layers': 1, 'mlp_dims': 9}. Best is trial 5 with value: 2.1488680760065715.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 4, Train_loss: 1.577, valid_loss: 2.277
Early Stoping!!!!!
best params 2 : 2.1488680760065715
{'batch_size': 1024, 'lr': 0.009702573394120726, 'weight_decay': 5.918951335463648e-07, 'embed_dim': 4, 'dropout': 0.4, 'num_layers': 4, 'mlp_dims': 2}


  3%|▎         | 1/30 [00:05<02:40,  5.54s/it]

Epoch: 1, Train_loss: 2.417, valid_loss: 2.154


  7%|▋         | 2/30 [00:11<02:35,  5.54s/it]

Epoch: 2, Train_loss: 1.875, valid_loss: 2.203


 10%|█         | 3/30 [00:16<02:26,  5.43s/it]

Epoch: 3, Train_loss: 1.609, valid_loss: 2.265


 10%|█         | 3/30 [00:21<03:15,  7.23s/it]

Epoch: 4, Train_loss: 1.463, valid_loss: 2.311
Early Stoping!!!!!



[32m[I 2023-04-19 11:30:03,916][0m A new study created in memory with name: dcnc_parameter_opt[0m
  args.lr = trial.suggest_loguniform('lr',0.001,0.01)
  args.weight_decay = trial.suggest_loguniform('weight_decay',1e-07,1e-06)
  0%|          | 0/30 [00:00<?, ?it/s]





  3%|▎         | 1/30 [00:04<02:24,  4.98s/it]

Epoch: 1, Train_loss: 3.867, valid_loss: 2.344


  7%|▋         | 2/30 [00:09<02:19,  4.98s/it]

Epoch: 2, Train_loss: 2.207, valid_loss: 2.190


 10%|█         | 3/30 [00:14<02:13,  4.93s/it]

Epoch: 3, Train_loss: 1.922, valid_loss: 2.198


 13%|█▎        | 4/30 [00:19<02:08,  4.94s/it]

Epoch: 4, Train_loss: 1.728, valid_loss: 2.246


 13%|█▎        | 4/30 [00:24<02:39,  6.13s/it]
[32m[I 2023-04-19 11:30:28,468][0m Trial 0 finished with value: 2.1897045373916626 and parameters: {'batch_size': 512, 'lr': 0.001432249371823025, 'weight_decay': 1.4321698289111518e-07, 'embed_dim': 1, 'dropout': 0.25, 'num_layers': 4, 'mlp_dims': 14}. Best is trial 0 with value: 2.1897045373916626.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 5, Train_loss: 1.603, valid_loss: 2.280
Early Stoping!!!!!


  3%|▎         | 1/30 [00:04<02:00,  4.16s/it]

Epoch: 1, Train_loss: 2.775, valid_loss: 2.165


  7%|▋         | 2/30 [00:08<01:56,  4.15s/it]

Epoch: 2, Train_loss: 1.934, valid_loss: 2.186


 10%|█         | 3/30 [00:12<01:51,  4.12s/it]

Epoch: 3, Train_loss: 1.691, valid_loss: 2.246


 10%|█         | 3/30 [00:16<02:28,  5.48s/it]
[32m[I 2023-04-19 11:30:44,954][0m Trial 1 finished with value: 2.1648368358612062 and parameters: {'batch_size': 2048, 'lr': 0.003347776308515932, 'weight_decay': 2.703616066661999e-07, 'embed_dim': 5, 'dropout': 0.25, 'num_layers': 2, 'mlp_dims': 13}. Best is trial 1 with value: 2.1648368358612062.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 4, Train_loss: 1.540, valid_loss: 2.291
Early Stoping!!!!!


  3%|▎         | 1/30 [00:03<01:44,  3.62s/it]

Epoch: 1, Train_loss: 3.109, valid_loss: 2.189


  7%|▋         | 2/30 [00:07<01:42,  3.65s/it]

Epoch: 2, Train_loss: 2.009, valid_loss: 2.192


 10%|█         | 3/30 [00:11<01:39,  3.68s/it]

Epoch: 3, Train_loss: 1.759, valid_loss: 2.251


 10%|█         | 3/30 [00:14<02:12,  4.90s/it]
[32m[I 2023-04-19 11:30:59,678][0m Trial 2 finished with value: 2.1892889658610026 and parameters: {'batch_size': 1024, 'lr': 0.004050837781329677, 'weight_decay': 1.4808945119975175e-07, 'embed_dim': 2, 'dropout': 0.4, 'num_layers': 1, 'mlp_dims': 11}. Best is trial 1 with value: 2.1648368358612062.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 4, Train_loss: 1.596, valid_loss: 2.287
Early Stoping!!!!!


  3%|▎         | 1/30 [00:04<02:23,  4.95s/it]

Epoch: 1, Train_loss: 2.393, valid_loss: 2.163


  7%|▋         | 2/30 [00:09<02:18,  4.93s/it]

Epoch: 2, Train_loss: 1.873, valid_loss: 2.207


 10%|█         | 3/30 [00:14<02:13,  4.95s/it]

Epoch: 3, Train_loss: 1.595, valid_loss: 2.260


 10%|█         | 3/30 [00:19<02:58,  6.62s/it]
[32m[I 2023-04-19 11:31:19,612][0m Trial 3 finished with value: 2.1629298051198322 and parameters: {'batch_size': 1024, 'lr': 0.008115595675970505, 'weight_decay': 1.8145961353490245e-07, 'embed_dim': 11, 'dropout': 0.55, 'num_layers': 4, 'mlp_dims': 13}. Best is trial 3 with value: 2.1629298051198322.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 4, Train_loss: 1.436, valid_loss: 2.301
Early Stoping!!!!!


  3%|▎         | 1/30 [00:04<01:57,  4.05s/it]

Epoch: 1, Train_loss: 4.882, valid_loss: 2.438


  7%|▋         | 2/30 [00:08<01:54,  4.08s/it]

Epoch: 2, Train_loss: 2.348, valid_loss: 2.270


 10%|█         | 3/30 [00:12<01:49,  4.05s/it]

Epoch: 3, Train_loss: 2.118, valid_loss: 2.197


 13%|█▎        | 4/30 [00:16<01:45,  4.07s/it]

Epoch: 4, Train_loss: 1.923, valid_loss: 2.178


 17%|█▋        | 5/30 [00:20<01:42,  4.09s/it]

Epoch: 5, Train_loss: 1.752, valid_loss: 2.213


 20%|██        | 6/30 [00:24<01:37,  4.07s/it]

Epoch: 6, Train_loss: 1.623, valid_loss: 2.260


 20%|██        | 6/30 [00:28<01:54,  4.77s/it]
[32m[I 2023-04-19 11:31:48,245][0m Trial 4 finished with value: 2.1775493462880453 and parameters: {'batch_size': 256, 'lr': 0.0012260057359187524, 'weight_decay': 1.5703008378806713e-07, 'embed_dim': 1, 'dropout': 0.7, 'num_layers': 2, 'mlp_dims': 5}. Best is trial 3 with value: 2.1629298051198322.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 7, Train_loss: 1.535, valid_loss: 2.303
Early Stoping!!!!!


  3%|▎         | 1/30 [00:04<02:17,  4.75s/it]

Epoch: 1, Train_loss: 2.413, valid_loss: 2.156


  7%|▋         | 2/30 [00:09<02:13,  4.78s/it]

Epoch: 2, Train_loss: 1.876, valid_loss: 2.206


 10%|█         | 3/30 [00:14<02:09,  4.81s/it]

Epoch: 3, Train_loss: 1.607, valid_loss: 2.281


 10%|█         | 3/30 [00:19<02:53,  6.43s/it]
[32m[I 2023-04-19 11:32:07,576][0m Trial 5 finished with value: 2.1557101806004844 and parameters: {'batch_size': 1024, 'lr': 0.009702573394120726, 'weight_decay': 5.918951335463648e-07, 'embed_dim': 4, 'dropout': 0.4, 'num_layers': 4, 'mlp_dims': 2}. Best is trial 5 with value: 2.1557101806004844.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 4, Train_loss: 1.456, valid_loss: 2.298
Early Stoping!!!!!


  3%|▎         | 1/30 [00:04<02:00,  4.14s/it]

Epoch: 1, Train_loss: 3.069, valid_loss: 2.186


  7%|▋         | 2/30 [00:08<01:55,  4.12s/it]

Epoch: 2, Train_loss: 1.987, valid_loss: 2.186


 10%|█         | 3/30 [00:12<01:51,  4.13s/it]

Epoch: 3, Train_loss: 1.745, valid_loss: 2.240


 13%|█▎        | 4/30 [00:16<01:47,  4.14s/it]

Epoch: 4, Train_loss: 1.591, valid_loss: 2.274


 13%|█▎        | 4/30 [00:20<02:13,  5.14s/it]
[32m[I 2023-04-19 11:32:28,168][0m Trial 6 finished with value: 2.185812775293986 and parameters: {'batch_size': 1024, 'lr': 0.0021423874956449057, 'weight_decay': 1.1575995526672756e-07, 'embed_dim': 5, 'dropout': 0.7, 'num_layers': 2, 'mlp_dims': 2}. Best is trial 5 with value: 2.1557101806004844.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 5, Train_loss: 1.498, valid_loss: 2.291
Early Stoping!!!!!


  3%|▎         | 1/30 [00:04<02:01,  4.19s/it]

Epoch: 1, Train_loss: 2.792, valid_loss: 2.182


  7%|▋         | 2/30 [00:08<01:55,  4.14s/it]

Epoch: 2, Train_loss: 1.958, valid_loss: 2.187


 10%|█         | 3/30 [00:12<01:52,  4.15s/it]

Epoch: 3, Train_loss: 1.697, valid_loss: 2.239


 10%|█         | 3/30 [00:16<02:28,  5.51s/it]
[32m[I 2023-04-19 11:32:44,767][0m Trial 7 finished with value: 2.181804092725118 and parameters: {'batch_size': 2048, 'lr': 0.003117422003004632, 'weight_decay': 3.332213575546235e-07, 'embed_dim': 7, 'dropout': 0.7, 'num_layers': 2, 'mlp_dims': 9}. Best is trial 5 with value: 2.1557101806004844.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 4, Train_loss: 1.546, valid_loss: 2.278
Early Stoping!!!!!


  3%|▎         | 1/30 [00:04<02:20,  4.85s/it]

Epoch: 1, Train_loss: 2.995, valid_loss: 2.196


  7%|▋         | 2/30 [00:09<02:16,  4.89s/it]

Epoch: 2, Train_loss: 2.013, valid_loss: 2.184


 10%|█         | 3/30 [00:14<02:11,  4.87s/it]

Epoch: 3, Train_loss: 1.747, valid_loss: 2.247


 13%|█▎        | 4/30 [00:19<02:07,  4.91s/it]

Epoch: 4, Train_loss: 1.599, valid_loss: 2.281


 13%|█▎        | 4/30 [00:24<02:39,  6.14s/it]
[32m[I 2023-04-19 11:33:09,387][0m Trial 8 finished with value: 2.1837634642918906 and parameters: {'batch_size': 256, 'lr': 0.0016935505549297925, 'weight_decay': 1.1939328726535435e-07, 'embed_dim': 5, 'dropout': 0.4, 'num_layers': 4, 'mlp_dims': 13}. Best is trial 5 with value: 2.1557101806004844.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 5, Train_loss: 1.506, valid_loss: 2.301
Early Stoping!!!!!


  3%|▎         | 1/30 [00:03<01:43,  3.58s/it]

Epoch: 1, Train_loss: 2.817, valid_loss: 2.193


  7%|▋         | 2/30 [00:07<01:41,  3.63s/it]

Epoch: 2, Train_loss: 1.990, valid_loss: 2.193


 10%|█         | 3/30 [00:10<01:37,  3.62s/it]

Epoch: 3, Train_loss: 1.722, valid_loss: 2.243


 13%|█▎        | 4/30 [00:14<01:35,  3.66s/it]

Epoch: 4, Train_loss: 1.566, valid_loss: 2.279


 13%|█▎        | 4/30 [00:18<01:59,  4.60s/it]
[32m[I 2023-04-19 11:33:27,803][0m Trial 9 finished with value: 2.192596658070882 and parameters: {'batch_size': 512, 'lr': 0.00787211264452507, 'weight_decay': 2.0797133276936325e-07, 'embed_dim': 2, 'dropout': 0.7, 'num_layers': 1, 'mlp_dims': 9}. Best is trial 5 with value: 2.1557101806004844.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 5, Train_loss: 1.467, valid_loss: 2.310
Early Stoping!!!!!
best params 3 : 2.1557101806004844
{'batch_size': 1024, 'lr': 0.009702573394120726, 'weight_decay': 5.918951335463648e-07, 'embed_dim': 4, 'dropout': 0.4, 'num_layers': 4, 'mlp_dims': 2}


  3%|▎         | 1/30 [00:04<02:19,  4.80s/it]

Epoch: 1, Train_loss: 2.414, valid_loss: 2.156


  7%|▋         | 2/30 [00:09<02:16,  4.86s/it]

Epoch: 2, Train_loss: 1.878, valid_loss: 2.192


 10%|█         | 3/30 [00:15<02:15,  5.03s/it]

Epoch: 3, Train_loss: 1.616, valid_loss: 2.237


 10%|█         | 3/30 [00:20<03:00,  6.70s/it]

Epoch: 4, Train_loss: 1.465, valid_loss: 2.304
Early Stoping!!!!!



[32m[I 2023-04-19 11:33:48,457][0m A new study created in memory with name: dcnc_parameter_opt[0m
  args.lr = trial.suggest_loguniform('lr',0.001,0.01)
  args.weight_decay = trial.suggest_loguniform('weight_decay',1e-07,1e-06)
  0%|          | 0/30 [00:00<?, ?it/s]





  3%|▎         | 1/30 [00:04<02:22,  4.92s/it]

Epoch: 1, Train_loss: 3.909, valid_loss: 2.252


  7%|▋         | 2/30 [00:09<02:18,  4.94s/it]

Epoch: 2, Train_loss: 2.138, valid_loss: 2.178


 10%|█         | 3/30 [00:14<02:12,  4.90s/it]

Epoch: 3, Train_loss: 1.866, valid_loss: 2.221


 13%|█▎        | 4/30 [00:19<02:07,  4.91s/it]

Epoch: 4, Train_loss: 1.687, valid_loss: 2.263


 13%|█▎        | 4/30 [00:24<02:38,  6.11s/it]
[32m[I 2023-04-19 11:34:12,930][0m Trial 0 finished with value: 2.1775854428609214 and parameters: {'batch_size': 512, 'lr': 0.001432249371823025, 'weight_decay': 1.4321698289111518e-07, 'embed_dim': 1, 'dropout': 0.25, 'num_layers': 4, 'mlp_dims': 14}. Best is trial 0 with value: 2.1775854428609214.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 5, Train_loss: 1.574, valid_loss: 2.292
Early Stoping!!!!!


  3%|▎         | 1/30 [00:04<02:05,  4.34s/it]

Epoch: 1, Train_loss: 2.768, valid_loss: 2.176


  7%|▋         | 2/30 [00:08<01:59,  4.28s/it]

Epoch: 2, Train_loss: 1.924, valid_loss: 2.197


 10%|█         | 3/30 [00:12<01:53,  4.21s/it]

Epoch: 3, Train_loss: 1.683, valid_loss: 2.251


 10%|█         | 3/30 [00:16<02:30,  5.57s/it]
[32m[I 2023-04-19 11:34:29,695][0m Trial 1 finished with value: 2.1759517908096315 and parameters: {'batch_size': 2048, 'lr': 0.003347776308515932, 'weight_decay': 2.703616066661999e-07, 'embed_dim': 5, 'dropout': 0.25, 'num_layers': 2, 'mlp_dims': 13}. Best is trial 1 with value: 2.1759517908096315.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 4, Train_loss: 1.536, valid_loss: 2.287
Early Stoping!!!!!


  3%|▎         | 1/30 [00:03<01:45,  3.65s/it]

Epoch: 1, Train_loss: 3.201, valid_loss: 2.213


  7%|▋         | 2/30 [00:07<01:43,  3.68s/it]

Epoch: 2, Train_loss: 2.073, valid_loss: 2.194


 10%|█         | 3/30 [00:11<01:40,  3.71s/it]

Epoch: 3, Train_loss: 1.832, valid_loss: 2.237


 13%|█▎        | 4/30 [00:14<01:35,  3.68s/it]

Epoch: 4, Train_loss: 1.662, valid_loss: 2.276


 13%|█▎        | 4/30 [00:18<02:00,  4.63s/it]
[32m[I 2023-04-19 11:34:48,240][0m Trial 2 finished with value: 2.194154445330302 and parameters: {'batch_size': 1024, 'lr': 0.004050837781329677, 'weight_decay': 1.4808945119975175e-07, 'embed_dim': 2, 'dropout': 0.4, 'num_layers': 1, 'mlp_dims': 11}. Best is trial 1 with value: 2.1759517908096315.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 5, Train_loss: 1.541, valid_loss: 2.296
Early Stoping!!!!!


  3%|▎         | 1/30 [00:05<02:30,  5.17s/it]

Epoch: 1, Train_loss: 2.386, valid_loss: 2.158


  7%|▋         | 2/30 [00:10<02:22,  5.10s/it]

Epoch: 2, Train_loss: 1.862, valid_loss: 2.202


 10%|█         | 3/30 [00:14<02:15,  5.03s/it]

Epoch: 3, Train_loss: 1.590, valid_loss: 2.266


 10%|█         | 3/30 [00:19<02:59,  6.65s/it]
[32m[I 2023-04-19 11:35:08,276][0m Trial 3 finished with value: 2.1583110094070435 and parameters: {'batch_size': 1024, 'lr': 0.008115595675970505, 'weight_decay': 1.8145961353490245e-07, 'embed_dim': 11, 'dropout': 0.55, 'num_layers': 4, 'mlp_dims': 13}. Best is trial 3 with value: 2.1583110094070435.[0m
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch: 4, Train_loss: 1.431, valid_loss: 2.305
Early Stoping!!!!!


  3%|▎         | 1/30 [00:04<01:59,  4.12s/it]

Epoch: 1, Train_loss: 4.773, valid_loss: 2.430


  7%|▋         | 2/30 [00:08<01:54,  4.08s/it]

Epoch: 2, Train_loss: 2.314, valid_loss: 2.234


 10%|█         | 3/30 [00:12<01:50,  4.08s/it]

Epoch: 3, Train_loss: 2.075, valid_loss: 2.177


 13%|█▎        | 4/30 [00:16<01:45,  4.05s/it]

Epoch: 4, Train_loss: 1.879, valid_loss: 2.185


 13%|█▎        | 4/30 [00:18<01:59,  4.61s/it]
[33m[W 2023-04-19 11:35:26,713][0m Trial 4 failed with parameters: {'batch_size': 256, 'lr': 0.0012260057359187524, 'weight_decay': 1.5703008378806713e-07, 'embed_dim': 1, 'dropout': 0.7, 'num_layers': 2, 'mlp_dims': 5} because of the following error: KeyboardInterrupt().[0m
Traceback (most recent call last):
  File "/opt/conda/lib/python3.8/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_86768/510847217.py", line 30, in objective
    minimum_loss, model = train(args, model, data, logger, setting)
  File "/opt/ml/level1_bookratingprediction-recsys-02/src/train/trainer.py", line 50, in train
    loss.backward()
  File "/opt/conda/lib/python3.8/site-packages/torch/tensor.py", line 221, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph)
  File "/opt/conda/lib/python3.8/site-packages/torch/autograd/__init__.py", line 130, in backward


KeyboardInterrupt: 

In [None]:
sample_submission['rating'] = 0
final_pred = sample_submission['rating'] 

for fold in range(n_splits):
    final_pred += sample_submission[f'pred_{fold}']

sample_submission['rating'] = final_pred / n_splits
temp = sample_submission.copy()

sample_submission = sample_submission[['user_id', 'isbn', 'rating']]

In [None]:
sample_submission.head()

In [None]:
sample_submission['rating'].isna().sum()

In [None]:
sample_submission.to_csv(args.data_path + '/submission_DCNC.csv', index=False)

In [None]:
sns.kdeplot(sample_submission['rating'])

In [None]:
sample_submission.shape