# Imports

In [1]:
import sys
sys.path.append('..')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

from scipy import stats
from scipy.sparse import csr_matrix
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_squared_error

import xgb
from catboost import CatBoostRegressor
from xgboost import XGBRegressor, DMatrix
from lightgbm import LGBMRegressor, Dataset

import joblib
import optuna
from optuna.samplers import TPESampler


plt.rcParams['font.family'] = 'Malgun Gothic'
plt.rcParams['axes.unicode_minus'] = False
%reload_ext autotime

time: 0 ns (started: 2024-06-27 13:47:31 +09:00)


# Define Functions

In [2]:
def show_all_columns() -> None:
    """Set pandas display options to show all columns."""
    pd.set_option('display.max_columns', None)  # Show all columns
    pd.set_option('display.expand_frame_repr', False)  # Prevent column wrapping
    pd.set_option('display.max_colwidth', None)  # Show full column content
    
def set_random_seed(seed_value: int) -> None:
    np.random.seed(seed_value)
    
def add_date_features(df: pd.DataFrame, date_column: str, prefix: str) -> pd.DataFrame:
    # Ensure the date column is in datetime format
    if not pd.api.types.is_datetime64_any_dtype(df[date_column]):
        df[date_column] = pd.to_datetime(df[date_column])
        
    # Adding date features
    df[f'{prefix}.year'] = df[date_column].dt.year
    df[f'{prefix}.month'] = df[date_column].dt.month
    df[f'{prefix}.day'] = df[date_column].dt.day
    df[f'{prefix}.quarter'] = df[date_column].dt.quarter
    df[f'{prefix}.week'] = df[date_column].dt.isocalendar().week.astype(np.int32)
    
    return df

def add_time_features(df: pd.DataFrame, hour_column: str, prefix: str = 'hour') -> pd.DataFrame:
    df[f'{prefix}_sin_time'] = np.sin(2 * np.pi * df[hour_column] / 24)
    df[f'{prefix}_cos_time'] = np.cos(2 * np.pi * df[hour_column] / 24)
    return df
    
def calculate_cdd(df: pd.DataFrame, temp_column: str, base_temp: float = 24.0) -> pd.DataFrame:
    df['CDD'] = df[temp_column].apply(lambda x: max(0, x - base_temp))
    return df

def calculate_thi(df: pd.DataFrame, temp_colum: str, humidity_column: str) -> pd.DataFrame:
    df['THI'] = 1.8 * df[temp_colum] - 0.55 * (1 - (df[humidity_column] / 100)) * (1.8 * df[temp_colum] - 26) + 32
    return df

def clean_column_name_train(df: pd.DataFrame, prefix_to_remove: str='electric_train.') -> pd.DataFrame:
    """
    데이터프레임의 칼럼명에서 접두사를 제거하고, 칼럼명을 매핑에 따라 변경.

    Args:
        df (pd.DataFrame): 원본 데이터프레임

    Returns:
        pd.DataFrame: 칼럼명이 정리된 데이터프레임
    """

    mapping = {
    'tm': 'TM', 'hh24': 'HH24', 'weekday': 'weekday', 'week_name': 'week_name', 'sum_qctr': 'sum_qctr',
    'n': 'n', 'sum_load': 'sum_load', 'n_mean_load': 'n_mean_load', 'elec': 'elec',
    'num': 'NUM', 'stn': 'STN', 'nph_ta': 'nph_ta', 'nph_hm': 'nph_hm', 'nph_ws_10m': 'nph_ws_10m',
    'nph_rn_60m': 'nph_rn_60m', 'nph_ta_chi': 'nph_ta_chi'
    }

    # 접두사 제거
    df.columns = [col.replace(prefix_to_remove, '') for col in df.columns]

    # 칼럼명 변경
    df = df.rename(columns=mapping)

    return df

def get_rscore(y_true: np.ndarray, y_pred: np.ndarray) -> float:
    """
    실제 값과 예측 값 사이의 피어슨 상관 계수(R-score)를 계산.
    
    이 함수는 두 데이터 집합 간의 선형 상관 관계를 측정하는 피어슨 상관 계수를 계산한다.
    반환되는 값은 -1과 1 사이이며:
     - 1은 완벽한 양의 선형 관계
     - -1은 완벽한 음의 선형 관계
     - 0은 선형 관계가 없음을 나타냄.

    Args:
        y_true (np.ndarray): 실제 값의 배열
        y_pred (np.ndarray): 예측 값의 배열

    Returns:
        float: 'y_true'와 'y_pred' 사이의 피어슨 상관 계수
    """
    return np.corrcoef(y_true, y_pred)[0, 1]

time: 0 ns (started: 2024-06-27 13:47:32 +09:00)


In [None]:
show_all_columns()
set_random_seed(seed_value=42)

# Load Data

In [3]:
org = pd.read_csv("../data/electric_train.csv")
test = pd.read_csv("../data/electric_test.csv")

train = org.copy()

time: 11.6 s (started: 2024-06-27 13:47:33 +09:00)


## Feature Engineering

각 데이터에 date features 추가

In [6]:
train = add_date_features(train, 'electric_train.tm', 'electric_train')
test = add_date_features(test, 'TM', 'electric_test')

time: 6.84 s (started: 2024-06-27 13:47:45 +09:00)


이상치 제거

In [7]:
outlier_base = 7

# 전력기상지수의 이상치 탐지 (예제로 Z-score 방법 사용)
z_scores = np.abs(stats.zscore(train['electric_train.elec']))
outliers = train[z_scores > outlier_base]

# 지점번호별 월 평균 전력기상지수 계산
mean_by_station_and_month = train.groupby(['electric_train.stn', 'electric_train.month'])['electric_train.elec'].mean().reset_index()
mean_by_station_and_month.rename(columns={'electric_train.elec': 'electric_train.elec_month'}, inplace=True)

# 원본 데이터와 월별 평균값을 결합 (merge)
train = train.reset_index()
train = pd.merge(train, mean_by_station_and_month, on=['electric_train.stn', 'electric_train.month'], how='left')

# 이상치를 월별 평균값으로 대체
train['electric_train.elec'] = np.where(z_scores > outlier_base, train['electric_train.elec_month'], train['electric_train.elec'])

# 필요에 따라 사용된 임시 열 제거
train.drop(columns=['electric_train.elec_month','index'], inplace=True)

time: 4.33 s (started: 2024-06-27 13:47:52 +09:00)


시간 특성에 대해 sin, cos 변환한 값 추가

In [10]:
train = add_time_features(train, 'electric_train.hh24', 'hour')
test = add_time_features(test, 'HH24', 'hour')

time: 203 ms (started: 2024-06-27 13:47:56 +09:00)


불쾌지수 추가

In [12]:
train = calculate_thi(train, 'electric_train.nph_ta', 'electric_train.nph_hm')
test = calculate_thi(test, 'nph_ta', 'nph_hm')

time: 125 ms (started: 2024-06-27 13:47:57 +09:00)


냉방지수 추가

In [14]:
train = calculate_cdd(train, 'electric_train.nph_ta')
test = calculate_cdd(test, 'nph_ta')

time: 3.44 s (started: 2024-06-27 13:48:00 +09:00)


In [16]:
train_df = train.copy()
test_df = test.copy()

time: 797 ms (started: 2024-06-27 13:48:04 +09:00)


In [17]:
train_df

Unnamed: 0,electric_train.num,electric_train.tm,electric_train.hh24,electric_train.n,electric_train.stn,electric_train.sum_qctr,electric_train.sum_load,electric_train.n_mean_load,electric_train.nph_ta,electric_train.nph_hm,electric_train.nph_ws_10m,electric_train.nph_rn_60m,electric_train.nph_ta_chi,electric_train.weekday,electric_train.week_name,electric_train.elec,electric_train.year,electric_train.month,electric_train.day,electric_train.quarter,electric_train.week,hour_sin_time,hour_cos_time,THI,CDD
0,4821,2021-01-01 01:00:00,1,11,884,6950,751.32,68.606449,2.2,62.7,1.8,0.0,-1.0,4,0,99.56,2021,1,1,1,53,2.588190e-01,0.965926,40.481506,0.0
1,4821,2021-01-01 02:00:00,2,11,884,6950,692.60,68.606449,2.3,63.1,2.1,0.0,-0.6,4,0,91.78,2021,1,1,1,53,5.000000e-01,0.866025,40.576487,0.0
2,4821,2021-01-01 03:00:00,3,11,884,6950,597.48,68.606449,2.2,62.4,2.5,0.0,-1.3,4,0,79.17,2021,1,1,1,53,7.071068e-01,0.707107,40.517872,0.0
3,4821,2021-01-01 04:00:00,4,11,884,6950,553.48,68.606449,1.7,63.5,1.7,0.0,-0.2,4,0,73.34,2021,1,1,1,53,8.660254e-01,0.500000,39.665205,0.0
4,4821,2021-01-01 05:00:00,5,11,884,6950,526.24,68.606449,1.7,63.0,1.6,0.0,-0.8,4,0,69.73,2021,1,1,1,53,9.659258e-01,0.258819,39.728290,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7593350,20947,2022-12-31 20:00:00,20,23,671,34200,6779.84,225.461986,2.7,46.3,3.1,0.0,-0.4,5,1,130.74,2022,12,31,4,52,-8.660254e-01,0.500000,43.103699,0.0
7593351,20947,2022-12-31 21:00:00,21,23,671,34200,6802.40,225.461986,2.6,46.8,3.1,0.0,-0.5,5,1,131.18,2022,12,31,4,52,-7.071068e-01,0.707107,42.918232,0.0
7593352,20947,2022-12-31 22:00:00,22,23,671,34200,6706.68,225.461986,2.4,47.4,2.1,0.0,0.2,5,1,129.33,2022,12,31,4,52,-5.000000e-01,0.866025,42.592024,0.0
7593353,20947,2022-12-31 23:00:00,23,23,671,34200,6355.88,225.461986,2.5,47.0,2.1,0.0,0.3,5,1,122.57,2022,12,31,4,52,-2.588190e-01,0.965926,42.767250,0.0


time: 16 ms (started: 2024-06-27 13:48:05 +09:00)


In [18]:
test_df

Unnamed: 0,NUM,TM,HH24,STN,nph_ta,nph_hm,nph_ws_10m,nph_rn_60m,nph_ta_chi,weekday,week_name,elect,electric_test.year,electric_test.month,electric_test.day,electric_test.quarter,electric_test.week,hour_sin_time,hour_cos_time,THI,CDD
0,4816,2023-01-01 01:00:00,1,752,3.0,68.6,2.9,0.0,-0.1,6,1.0,,2023,1,1,1,52,2.588190e-01,0.965926,40.957620,0.0
1,4816,2023-01-01 02:00:00,2,752,3.1,69.4,2.7,0.0,0.3,6,1.0,,2023,1,1,1,52,5.000000e-01,0.866025,41.016686,0.0
2,4816,2023-01-01 03:00:00,3,752,3.6,68.3,2.3,0.0,1.2,6,1.0,,2023,1,1,1,52,7.071068e-01,0.707107,41.883312,0.0
3,4816,2023-01-01 04:00:00,4,752,4.0,69.2,3.1,0.0,1.1,6,1.0,,2023,1,1,1,52,8.660254e-01,0.500000,42.384720,0.0
4,4816,2023-01-01 05:00:00,5,752,4.2,69.5,2.5,0.0,2.0,6,1.0,,2023,1,1,1,52,9.659258e-01,0.258819,42.653310,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2829473,12322,2023-12-31 20:00:00,20,901,5.6,70.8,3.2,0.0,5.8,6,1.0,,2023,12,31,4,52,-8.660254e-01,0.500000,44.636752,0.0
2829474,12322,2023-12-31 21:00:00,21,901,5.3,69.1,3.6,0.0,4.5,6,1.0,,2023,12,31,4,52,-7.071068e-01,0.707107,44.337377,0.0
2829475,12322,2023-12-31 22:00:00,22,901,5.1,70.6,3.0,0.0,5.1,6,1.0,,2023,12,31,4,52,-5.000000e-01,0.866025,43.899794,0.0
2829476,12322,2023-12-31 23:00:00,23,901,5.2,69.7,3.8,0.0,5.1,6,1.0,,2023,12,31,4,52,-2.588190e-01,0.965926,44.133056,0.0


time: 16 ms (started: 2024-06-27 13:48:07 +09:00)


# Train and Valid split

- train set: 2020년, 2021년
- valid set: 2022년

In [19]:
train_df.set_index('electric_train.tm').sort_index()

Unnamed: 0_level_0,electric_train.num,electric_train.hh24,electric_train.n,electric_train.stn,electric_train.sum_qctr,electric_train.sum_load,electric_train.n_mean_load,electric_train.nph_ta,electric_train.nph_hm,electric_train.nph_ws_10m,electric_train.nph_rn_60m,electric_train.nph_ta_chi,electric_train.weekday,electric_train.week_name,electric_train.elec,electric_train.year,electric_train.month,electric_train.day,electric_train.quarter,electric_train.week,hour_sin_time,hour_cos_time,THI,CDD
electric_train.tm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
2020-01-01 01:00:00,13615,1,39,140,63850,9732.96,250.490543,-8.5,74.5,0.9,0.0,-5.8,2,0,99.63,2020,1,1,1,1,2.588190e-01,0.965926,22.492325,0.0
2020-01-01 01:00:00,18235,1,20,565,14750,2401.12,116.010234,-8.8,25.6,1.9,0.0,-4.6,2,0,103.49,2020,1,1,1,1,2.588190e-01,0.965926,33.280928,0.0
2020-01-01 01:00:00,18234,1,32,565,71630,12139.16,363.252769,-8.8,25.6,1.9,0.0,-4.6,2,0,104.43,2020,1,1,1,1,2.588190e-01,0.965926,33.280928,0.0
2020-01-01 01:00:00,18233,1,28,512,45840,6723.84,225.924357,-6.3,31.3,2.5,0.0,-7.8,2,0,106.29,2020,1,1,1,1,2.588190e-01,0.965926,34.768919,0.0
2020-01-01 01:00:00,11272,1,44,941,76450,12949.24,287.250770,2.4,68.0,1.2,0.0,-5.3,2,0,102.45,2020,1,1,1,1,2.588190e-01,0.965926,40.135680,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-01-01 00:00:00,12468,24,40,152,50075,8018.88,187.277655,7.1,60.2,4.5,0.0,-2.3,6,1,107.05,2023,1,1,1,52,-2.449294e-16,1.000000,47.673858,0.0
2023-01-01 00:00:00,16453,24,57,617,85000,13949.76,228.064905,-5.5,81.1,0.2,0.0,-2.9,6,1,107.31,2023,1,1,1,52,-2.449294e-16,1.000000,25.831805,0.0
2023-01-01 00:00:00,11418,24,36,253,63735,11472.28,296.369472,5.6,59.5,1.3,0.0,-1.6,6,1,107.53,2023,1,1,1,52,-2.449294e-16,1.000000,45.626180,0.0
2023-01-01 00:00:00,10487,24,16,710,43950,5834.28,360.043027,7.1,56.1,5.9,0.0,-3.5,6,1,101.28,2023,1,1,1,52,-2.449294e-16,1.000000,47.971969,0.0


time: 1.52 s (started: 2024-06-27 13:48:16 +09:00)


train_df에서 각 격자넘버 별로 관측 기간 확인

In [20]:
tmp = train_df.groupby(by='electric_train.stn')['electric_train.tm'].agg(func=['max', 'min'])
tmp['기간'] = tmp['min'].dt.strftime('%Y-%m-%d') + ' ~ ' + tmp['max'].dt.strftime('%Y-%m-%d')

tmp

Unnamed: 0_level_0,max,min,기간
electric_train.stn,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
98,2023-01-01,2020-01-01 01:00:00,2020-01-01 ~ 2023-01-01
99,2021-01-01,2020-01-01 01:00:00,2020-01-01 ~ 2021-01-01
101,2023-01-01,2020-01-01 01:00:00,2020-01-01 ~ 2023-01-01
104,2023-01-01,2020-01-01 01:00:00,2020-01-01 ~ 2023-01-01
106,2022-01-01,2020-01-01 01:00:00,2020-01-01 ~ 2022-01-01
...,...,...,...
942,2023-01-01,2020-01-01 01:00:00,2020-01-01 ~ 2023-01-01
943,2023-01-01,2020-01-01 01:00:00,2020-01-01 ~ 2023-01-01
950,2023-01-01,2020-01-01 01:00:00,2020-01-01 ~ 2023-01-01
974,2023-01-01,2020-01-01 01:00:00,2020-01-01 ~ 2023-01-01


time: 140 ms (started: 2024-06-27 13:48:18 +09:00)


대부분 2020년 1월 1일부터 2023년 1월 1일까지 관측한 것을 확인

In [21]:
tmp['기간'].value_counts()

2020-01-01 ~ 2023-01-01    158
2020-01-01 ~ 2022-01-01     12
2020-01-01 ~ 2021-01-01      6
2021-01-01 ~ 2022-01-01      5
2021-01-01 ~ 2023-01-01      3
2022-01-01 ~ 2023-01-01      1
Name: 기간, dtype: int64

time: 0 ns (started: 2024-06-27 13:48:20 +09:00)


따라서, 관측기간이 2020년 1월 1일부터 2023년 1월 1일까지인 데이터만을 사용

In [24]:
mask = tmp[tmp['기간'] == '2020-01-01 ~ 2023-01-01'].index

mask

Int64Index([ 98, 101, 104, 108, 112, 114, 119, 127, 129, 131,
            ...
            937, 938, 939, 940, 941, 942, 943, 950, 974, 991],
           dtype='int64', name='electric_train.stn', length=158)

time: 16 ms (started: 2024-06-27 13:48:25 +09:00)


In [25]:
data = train_df[train_df['electric_train.stn'].isin(mask)]
data = data.reset_index(drop=True)

time: 1.5 s (started: 2024-06-27 13:48:27 +09:00)


In [26]:
data

Unnamed: 0,electric_train.num,electric_train.tm,electric_train.hh24,electric_train.n,electric_train.stn,electric_train.sum_qctr,electric_train.sum_load,electric_train.n_mean_load,electric_train.nph_ta,electric_train.nph_hm,electric_train.nph_ws_10m,electric_train.nph_rn_60m,electric_train.nph_ta_chi,electric_train.weekday,electric_train.week_name,electric_train.elec,electric_train.year,electric_train.month,electric_train.day,electric_train.quarter,electric_train.week,hour_sin_time,hour_cos_time,THI,CDD
0,5565,2020-01-01 01:00:00,1,58,184,42250,4468.72,79.776051,2.3,65.8,1.7,0.0,-0.3,2,0,96.58,2020,1,1,1,1,2.588190e-01,0.965926,40.251866,0.0
1,5565,2020-01-01 02:00:00,2,58,184,42250,4101.00,79.776051,2.5,69.7,1.8,0.0,-0.2,2,0,88.63,2020,1,1,1,1,5.000000e-01,0.866025,40.082975,0.0
2,5565,2020-01-01 03:00:00,3,58,184,42250,3724.60,79.776051,2.8,69.5,2.3,0.0,-0.2,2,0,80.50,2020,1,1,1,1,7.071068e-01,0.707107,40.556040,0.0
3,5565,2020-01-01 04:00:00,4,58,184,42250,3484.40,79.776051,2.8,66.0,1.6,0.0,0.5,2,0,75.31,2020,1,1,1,1,8.660254e-01,0.500000,40.959520,0.0
4,5565,2020-01-01 05:00:00,5,58,184,42250,3409.12,79.776051,3.0,66.1,1.6,0.0,0.5,2,0,73.68,2020,1,1,1,1,9.659258e-01,0.258819,41.240870,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7198670,20947,2022-12-31 20:00:00,20,23,671,34200,6779.84,225.461986,2.7,46.3,3.1,0.0,-0.4,5,1,130.74,2022,12,31,4,52,-8.660254e-01,0.500000,43.103699,0.0
7198671,20947,2022-12-31 21:00:00,21,23,671,34200,6802.40,225.461986,2.6,46.8,3.1,0.0,-0.5,5,1,131.18,2022,12,31,4,52,-7.071068e-01,0.707107,42.918232,0.0
7198672,20947,2022-12-31 22:00:00,22,23,671,34200,6706.68,225.461986,2.4,47.4,2.1,0.0,0.2,5,1,129.33,2022,12,31,4,52,-5.000000e-01,0.866025,42.592024,0.0
7198673,20947,2022-12-31 23:00:00,23,23,671,34200,6355.88,225.461986,2.5,47.0,2.1,0.0,0.3,5,1,122.57,2022,12,31,4,52,-2.588190e-01,0.965926,42.767250,0.0


time: 16 ms (started: 2024-06-27 13:48:29 +09:00)


data의 column name 정리

In [28]:
data = clean_column_name_train(data)

time: 297 ms (started: 2024-06-27 13:48:34 +09:00)


In [29]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7198675 entries, 0 to 7198674
Data columns (total 25 columns):
 #   Column         Dtype         
---  ------         -----         
 0   NUM            int64         
 1   TM             datetime64[ns]
 2   HH24           int64         
 3   n              int64         
 4   STN            int64         
 5   sum_qctr       int64         
 6   sum_load       float64       
 7   n_mean_load    float64       
 8   nph_ta         float64       
 9   nph_hm         float64       
 10  nph_ws_10m     float64       
 11  nph_rn_60m     float64       
 12  nph_ta_chi     float64       
 13  weekday        int64         
 14  week_name      int64         
 15  elec           float64       
 16  year           int64         
 17  month          int64         
 18  day            int64         
 19  quarter        int64         
 20  week           int32         
 21  hour_sin_time  float64       
 22  hour_cos_time  float64       
 23  THI    

mask로 filtering한 데이터 중 2020년과 2021년 데이터를 train_data로, 2022년 관측 데이터를 valid_data로 선택

In [31]:
train_data, valid_data = data[data['TM'].dt.year < 2022], data[data['TM'].dt.year >= 2022]

time: 1.11 s (started: 2024-06-27 13:48:39 +09:00)


train_data 중 학습에 사용하지 않을 feature 제거

In [32]:
train_remove_columns = ['NUM', 'elec', 'TM', 'n', 'sum_qctr', 'sum_load', 'n_mean_load']

train_features = [column for column in train_data.columns if column not in train_remove_columns]

time: 0 ns (started: 2024-06-27 13:48:41 +09:00)


In [35]:
train_features

['HH24',
 'STN',
 'nph_ta',
 'nph_hm',
 'nph_ws_10m',
 'nph_rn_60m',
 'nph_ta_chi',
 'weekday',
 'week_name',
 'year',
 'month',
 'day',
 'quarter',
 'week',
 'hour_sin_time',
 'hour_cos_time',
 'THI',
 'CDD']

time: 0 ns (started: 2024-06-27 13:48:45 +09:00)


test_df 중 종속변수인 'elect'와 사용하지 않을 feature인 'TM' 제거

In [33]:
test_df.columns

Index(['NUM', 'TM', 'HH24', 'STN', 'nph_ta', 'nph_hm', 'nph_ws_10m',
       'nph_rn_60m', 'nph_ta_chi', 'weekday', 'week_name', 'elect',
       'electric_test.year', 'electric_test.month', 'electric_test.day',
       'electric_test.quarter', 'electric_test.week', 'hour_sin_time',
       'hour_cos_time', 'THI', 'CDD'],
      dtype='object')

time: 0 ns (started: 2024-06-27 13:48:42 +09:00)


In [34]:
test_remove_columns = ['elect', 'TM']

test_features = [column for column in test_df.columns if column not in test_remove_columns]

time: 0 ns (started: 2024-06-27 13:48:44 +09:00)


In [36]:
test_features

['NUM',
 'HH24',
 'STN',
 'nph_ta',
 'nph_hm',
 'nph_ws_10m',
 'nph_rn_60m',
 'nph_ta_chi',
 'weekday',
 'week_name',
 'electric_test.year',
 'electric_test.month',
 'electric_test.day',
 'electric_test.quarter',
 'electric_test.week',
 'hour_sin_time',
 'hour_cos_time',
 'THI',
 'CDD']

time: 0 ns (started: 2024-06-27 13:48:49 +09:00)


선택한 feature로 X_train, X_valid, y_train, y_valid를 만들고, 범주형 특성도 골라냄

In [37]:
X_train = train_data[train_features]
y_train = train_data['elec']
X_valid = valid_data[train_features]
y_valid = valid_data['elec']


categorical_features = ['weekday', 'week_name', 'quarter', 'STN']
numeric_features = [feature for feature in train_features if feature not in categorical_features]

time: 422 ms (started: 2024-06-27 13:48:52 +09:00)


마찬가지로, 최종 예측 테스트세트도 만듦

In [38]:
X_test = test_df[test_features]

time: 187 ms (started: 2024-06-27 13:48:54 +09:00)


앞서 지정한 범주형 특성은 OneHotEncoding을, 그외 수치형 특성은 MinMaxScaling 변환

In [39]:
numeric_transformer = Pipeline(steps=[
    ('scaler', MinMaxScaler())
])

categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# 칼럼 변환기 설정
preprocessor = ColumnTransformer(
    transformers=[
        ('NUM', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# 변환기 적용
X_train = preprocessor.fit_transform(X_train)
X_valid = preprocessor.transform(X_valid)

print(X_train.shape)
print(X_valid.shape)

(4894519, 185)
(2304156, 185)
time: 8.45 s (started: 2024-06-27 13:48:57 +09:00)


X_test의 column name이 앞서 지정한 이름과 달라 통일시켜줌

In [40]:
X_test.columns

Index(['NUM', 'HH24', 'STN', 'nph_ta', 'nph_hm', 'nph_ws_10m', 'nph_rn_60m',
       'nph_ta_chi', 'weekday', 'week_name', 'electric_test.year',
       'electric_test.month', 'electric_test.day', 'electric_test.quarter',
       'electric_test.week', 'hour_sin_time', 'hour_cos_time', 'THI', 'CDD'],
      dtype='object')

time: 0 ns (started: 2024-06-27 13:49:06 +09:00)


In [41]:
for column in X_test.columns:
    if column.startswith('electric_test.'):
        X_test = X_test.rename(columns={column:column.split('.')[1]})

time: 406 ms (started: 2024-06-27 13:49:06 +09:00)


In [42]:
X_test.columns

Index(['NUM', 'HH24', 'STN', 'nph_ta', 'nph_hm', 'nph_ws_10m', 'nph_rn_60m',
       'nph_ta_chi', 'weekday', 'week_name', 'year', 'month', 'day', 'quarter',
       'week', 'hour_sin_time', 'hour_cos_time', 'THI', 'CDD'],
      dtype='object')

time: 0 ns (started: 2024-06-27 13:49:08 +09:00)


X_test도 앞서 만든 변환기로 변환시켜줌

In [43]:
X_test = preprocessor.transform(X_test)

print(X_test.shape)

(2829478, 185)
time: 2.19 s (started: 2024-06-27 13:49:09 +09:00)


# Hyperparameter 튜닝

해당 시계열 데이터에 잘 어울리는 모델로 다음과 같은 3가지를 선택
- CatBoostRegressor
- LGBMRegressor
- XGBoostRegressor

## CatBoostRegressor

In [39]:
def objective_catboost(trial):
    cbrm_param = {
        'iterations': trial.suggest_int("iterations", 4000, 25000),
        'od_wait': trial.suggest_int('od_wait', 500, 2300),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 1, log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-5, 100, log=True),
        'subsample': trial.suggest_float('subsample', 0, 1),
        'random_strength': trial.suggest_float('random_strength', 10, 50),
        'depth': trial.suggest_int('depth', 1, 15),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 1, 30),
        'leaf_estimation_iterations': trial.suggest_int('leaf_estimation_iterations', 1, 15),
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0.01, 100, log=True),
        'colsample_bylevel': trial.suggest_float('colsample_bylevel', 0.4, 1.0),
    }

    model_cbrm = CatBoostRegressor(**cbrm_param, verbose=0)
    model_cbrm.fit(X_train, y_train, eval_set=[(X_valid, y_valid)], early_stopping_rounds=25, verbose=100)
    predictions = model_cbrm.predict(X_valid)
    rscore = np.corrcoef(y_valid.squeeze(), predictions.squeeze())[0, 1]
    return rscore

study_catboost = optuna.create_study(direction='maximize', sampler=TPESampler(seed=42))
study_catboost.optimize(objective_catboost, n_trials=50)

[I 2024-06-26 22:49:19,264] A new study created in memory with name: no-name-8dbde11b-8c36-471c-ada7-6f853c5665cb


0:	learn: 22.1198543	test: 22.8981295	best: 22.8981295 (0)	total: 134ms	remaining: 26m 26s
100:	learn: 9.9392260	test: 10.2409679	best: 10.2409679 (100)	total: 11.7s	remaining: 22m 45s
200:	learn: 9.4040252	test: 9.6599507	best: 9.6599507 (200)	total: 23.3s	remaining: 22m 31s
300:	learn: 9.2255396	test: 9.4594528	best: 9.4586674 (296)	total: 35.1s	remaining: 22m 27s
400:	learn: 9.1385554	test: 9.3584713	best: 9.3577231 (399)	total: 46.4s	remaining: 22m 6s
500:	learn: 9.0843719	test: 9.3080886	best: 9.3060003 (499)	total: 57.6s	remaining: 21m 47s
600:	learn: 9.0455040	test: 9.2690153	best: 9.2690153 (600)	total: 1m 8s	remaining: 21m 28s
700:	learn: 9.0150100	test: 9.2483838	best: 9.2427153 (694)	total: 1m 19s	remaining: 21m 12s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 9.242715276
bestIteration = 694

Shrink model to first 695 iterations.


[I 2024-06-26 22:50:46,586] Trial 0 finished with value: 0.9332660549403757 and parameters: {'iterations': 11865, 'od_wait': 2212, 'learning_rate': 0.29106359131330695, 'reg_lambda': 0.1550991398759431, 'subsample': 0.15601864044243652, 'random_strength': 16.239780813448107, 'depth': 1, 'min_data_in_leaf': 26, 'leaf_estimation_iterations': 10, 'bagging_temperature': 6.79657809075816, 'colsample_bylevel': 0.41235069657748147}. Best is trial 0 with value: 0.9332660549403757.


0:	learn: 24.3579018	test: 25.0994847	best: 25.0994847 (0)	total: 247ms	remaining: 1h 40m 29s
100:	learn: 7.8974369	test: 8.1925407	best: 8.1925407 (100)	total: 28s	remaining: 1h 52m 17s
200:	learn: 6.5047026	test: 7.0992337	best: 7.0992337 (200)	total: 59.8s	remaining: 1h 59m 52s
300:	learn: 6.0802822	test: 6.8892220	best: 6.8892220 (300)	total: 1m 24s	remaining: 1h 52m 40s
400:	learn: 5.7956305	test: 6.8403765	best: 6.8339083 (399)	total: 1m 49s	remaining: 1h 49m 10s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.825357003
bestIteration = 430

Shrink model to first 431 iterations.


[I 2024-06-26 22:52:55,671] Trial 1 finished with value: 0.964398770257953 and parameters: {'iterations': 24369, 'od_wait': 1999, 'learning_rate': 0.0265875439832727, 'reg_lambda': 0.00018740223688836313, 'subsample': 0.18340450985343382, 'random_strength': 22.16968971838151, 'depth': 8, 'min_data_in_leaf': 13, 'leaf_estimation_iterations': 5, 'bagging_temperature': 2.801635158716261, 'colsample_bylevel': 0.4836963163912251}. Best is trial 1 with value: 0.964398770257953.


0:	learn: 23.3382516	test: 24.0384581	best: 24.0384581 (0)	total: 357ms	remaining: 1h 21s
100:	learn: 5.9470077	test: 6.9278976	best: 6.9278976 (100)	total: 31.6s	remaining: 52m 14s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.867006545
bestIteration = 147

Shrink model to first 148 iterations.


[I 2024-06-26 22:53:55,797] Trial 2 finished with value: 0.9639352526796601 and parameters: {'iterations': 10135, 'od_wait': 1159, 'learning_rate': 0.08168455894760163, 'reg_lambda': 3.1349580210969097, 'subsample': 0.19967378215835974, 'random_strength': 30.569377536544465, 'depth': 9, 'min_data_in_leaf': 2, 'leaf_estimation_iterations': 10, 'bagging_temperature': 0.04809461967501574, 'colsample_bylevel': 0.43903095579116774}. Best is trial 1 with value: 0.964398770257953.


0:	learn: 17.6476143	test: 18.2010829	best: 18.2010829 (0)	total: 252ms	remaining: 1h 40m 21s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 7.238906989
bestIteration = 48

Shrink model to first 49 iterations.


[I 2024-06-26 22:54:18,449] Trial 3 finished with value: 0.959953165668268 and parameters: {'iterations': 23927, 'od_wait': 2239, 'learning_rate': 0.4138040112561014, 'reg_lambda': 0.0013561145768453491, 'subsample': 0.09767211400638387, 'random_strength': 37.36932106048627, 'depth': 7, 'min_data_in_leaf': 4, 'leaf_estimation_iterations': 8, 'bagging_temperature': 0.013726318898045875, 'colsample_bylevel': 0.9455922412472693}. Best is trial 1 with value: 0.964398770257953.


0:	learn: 24.0117813	test: 24.7188513	best: 24.7188513 (0)	total: 1.34s	remaining: 3h 30m 57s
100:	learn: 5.4794445	test: 7.0492700	best: 7.0429865 (97)	total: 1m 46s	remaining: 2h 43m 41s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 7.011902495
bestIteration = 133

Shrink model to first 134 iterations.


[I 2024-06-26 22:57:13,627] Trial 4 finished with value: 0.9622125395784247 and parameters: {'iterations': 9434, 'od_wait': 1693, 'learning_rate': 0.0420167205437253, 'reg_lambda': 0.043699467835955796, 'subsample': 0.5467102793432796, 'random_strength': 17.394178221021082, 'depth': 15, 'min_data_in_leaf': 24, 'leaf_estimation_iterations': 15, 'bagging_temperature': 37.95853142670641, 'colsample_bylevel': 0.7587399872866512}. Best is trial 1 with value: 0.964398770257953.


0:	learn: 24.4298003	test: 25.1632653	best: 25.1632653 (0)	total: 238ms	remaining: 1h 32m 32s
100:	learn: 9.2668427	test: 9.5577511	best: 9.5577511 (100)	total: 22.9s	remaining: 1h 27m 47s
200:	learn: 7.6005902	test: 7.8583238	best: 7.8583238 (200)	total: 46.8s	remaining: 1h 29m 48s
300:	learn: 6.9923619	test: 7.3430048	best: 7.3430048 (300)	total: 1m 9s	remaining: 1h 28m 51s
400:	learn: 6.6810372	test: 7.1086118	best: 7.1086118 (400)	total: 1m 31s	remaining: 1h 27m 45s
500:	learn: 6.4323557	test: 6.9957634	best: 6.9941365 (496)	total: 1m 54s	remaining: 1h 27m 13s
600:	learn: 6.2427212	test: 6.9290789	best: 6.9287868 (598)	total: 2m 17s	remaining: 1h 26m 48s
700:	learn: 6.0749987	test: 6.8856741	best: 6.8856741 (700)	total: 2m 41s	remaining: 1h 26m 51s
800:	learn: 5.9490729	test: 6.8535021	best: 6.8530506 (797)	total: 3m 5s	remaining: 1h 27m 2s
900:	learn: 5.8228214	test: 6.8320898	best: 6.8293850 (890)	total: 3m 29s	remaining: 1h 26m 55s
1000:	learn: 5.7331475	test: 6.8043655	best: 6.

[I 2024-06-26 23:01:26,980] Trial 5 finished with value: 0.964740551136595 and parameters: {'iterations': 23360, 'od_wait': 659, 'learning_rate': 0.024658447214487383, 'reg_lambda': 2.0729604791291162e-05, 'subsample': 0.32533033076326434, 'random_strength': 25.54709158757928, 'depth': 5, 'min_data_in_leaf': 25, 'leaf_estimation_iterations': 6, 'bagging_temperature': 0.13296521457299515, 'colsample_bylevel': 0.7256176498949491}. Best is trial 5 with value: 0.964740551136595.


0:	learn: 24.7325869	test: 25.4923154	best: 25.4923154 (0)	total: 159ms	remaining: 18m 28s
100:	learn: 17.4335966	test: 18.0224822	best: 18.0224822 (100)	total: 15.9s	remaining: 17m 56s
200:	learn: 14.4552912	test: 14.8902343	best: 14.8902343 (200)	total: 31.4s	remaining: 17m 35s
300:	learn: 13.1079218	test: 13.5404579	best: 13.5404579 (300)	total: 46.9s	remaining: 17m 18s
400:	learn: 12.3942144	test: 12.8304770	best: 12.8304770 (400)	total: 1m 2s	remaining: 16m 54s
500:	learn: 11.9021876	test: 12.3502819	best: 12.3502819 (500)	total: 1m 17s	remaining: 16m 36s
600:	learn: 11.5251449	test: 11.9607238	best: 11.9607238 (600)	total: 1m 32s	remaining: 16m 20s
700:	learn: 11.2308838	test: 11.6504534	best: 11.6504534 (700)	total: 1m 47s	remaining: 16m 3s
800:	learn: 10.9948970	test: 11.3940521	best: 11.3940521 (800)	total: 2m 3s	remaining: 15m 48s
900:	learn: 10.7995398	test: 11.1893842	best: 11.1893842 (900)	total: 2m 18s	remaining: 15m 34s
1000:	learn: 10.6348811	test: 11.0087266	best: 11.0

[I 2024-06-26 23:18:42,471] Trial 6 finished with value: 0.9325381133635324 and parameters: {'iterations': 6959, 'od_wait': 1944, 'learning_rate': 0.01409617514981587, 'reg_lambda': 80.9484535228614, 'subsample': 0.7722447692966574, 'random_strength': 17.948627261366894, 'depth': 1, 'min_data_in_leaf': 25, 'leaf_estimation_iterations': 11, 'bagging_temperature': 8.241925264876453, 'colsample_bylevel': 0.8627622080115674}. Best is trial 5 with value: 0.964740551136595.


0:	learn: 24.7019025	test: 25.4618918	best: 25.4618918 (0)	total: 141ms	remaining: 13m 3s
100:	learn: 16.6169290	test: 17.1571222	best: 17.1571222 (100)	total: 12.1s	remaining: 10m 53s
200:	learn: 13.8022321	test: 14.2324115	best: 14.2324115 (200)	total: 24.4s	remaining: 10m 49s
300:	learn: 12.6665595	test: 13.1091626	best: 13.1091626 (300)	total: 36.3s	remaining: 10m 33s
400:	learn: 12.0341873	test: 12.4695205	best: 12.4695205 (400)	total: 47.8s	remaining: 10m 14s
500:	learn: 11.5738749	test: 12.0022713	best: 12.0022713 (500)	total: 59.4s	remaining: 9m 59s
600:	learn: 11.2369235	test: 11.6516423	best: 11.6516423 (600)	total: 1m 11s	remaining: 9m 49s
700:	learn: 10.9782719	test: 11.3772888	best: 11.3772888 (700)	total: 1m 24s	remaining: 9m 42s
800:	learn: 10.7636194	test: 11.1409605	best: 11.1409605 (800)	total: 1m 36s	remaining: 9m 34s
900:	learn: 10.5813857	test: 10.9414241	best: 10.9414241 (900)	total: 1m 51s	remaining: 9m 34s
1000:	learn: 10.4196572	test: 10.7820404	best: 10.782040

[I 2024-06-26 23:31:11,987] Trial 7 finished with value: 0.9322640888155854 and parameters: {'iterations': 5555, 'od_wait': 1145, 'learning_rate': 0.017050539260269296, 'reg_lambda': 11.00839441018131, 'subsample': 0.6232981268275579, 'random_strength': 23.235920994105967, 'depth': 1, 'min_data_in_leaf': 10, 'leaf_estimation_iterations': 5, 'bagging_temperature': 8.28752236376816, 'colsample_bylevel': 0.7825344828131279}. Best is trial 5 with value: 0.964740551136595.


0:	learn: 24.5306903	test: 25.2752149	best: 25.2752149 (0)	total: 545ms	remaining: 3h 25m 36s
100:	learn: 8.9630965	test: 9.3531878	best: 9.3531878 (100)	total: 51.1s	remaining: 3h 9m 56s
200:	learn: 6.5456822	test: 7.2880899	best: 7.2880899 (200)	total: 2m 2s	remaining: 3h 47m 14s
300:	learn: 5.9148238	test: 6.9650408	best: 6.9650408 (300)	total: 3m 10s	remaining: 3h 54m 57s
400:	learn: 5.6212777	test: 6.8833450	best: 6.8804123 (397)	total: 4m 14s	remaining: 3h 54m 42s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.880412295
bestIteration = 397

Shrink model to first 398 iterations.


[I 2024-06-26 23:35:43,997] Trial 8 finished with value: 0.9637211955990015 and parameters: {'iterations': 22632, 'od_wait': 1350, 'learning_rate': 0.017345566642360945, 'reg_lambda': 0.983362200838292, 'subsample': 0.7607850486168974, 'random_strength': 32.45108790277985, 'depth': 12, 'min_data_in_leaf': 15, 'leaf_estimation_iterations': 8, 'bagging_temperature': 0.5130551760589835, 'colsample_bylevel': 0.4152514760464571}. Best is trial 5 with value: 0.964740551136595.


0:	learn: 21.7649089	test: 22.4403990	best: 22.4403990 (0)	total: 250ms	remaining: 26m 8s
100:	learn: 6.7294638	test: 7.2090545	best: 7.1850158 (97)	total: 22.2s	remaining: 22m 35s
200:	learn: 5.8737792	test: 6.8361044	best: 6.8361044 (200)	total: 45s	remaining: 22m 37s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.827007115
bestIteration = 204

Shrink model to first 205 iterations.


[I 2024-06-26 23:36:40,782] Trial 9 finished with value: 0.9645990842734916 and parameters: {'iterations': 6265, 'od_wait': 556, 'learning_rate': 0.18742210985555696, 'reg_lambda': 0.0015866892068163167, 'subsample': 0.5085706911647028, 'random_strength': 46.30265895704372, 'depth': 4, 'min_data_in_leaf': 13, 'leaf_estimation_iterations': 12, 'bagging_temperature': 0.08226074946221039, 'colsample_bylevel': 0.44618794589727584}. Best is trial 5 with value: 0.964740551136595.


0:	learn: 13.8025868	test: 13.9431160	best: 13.9431160 (0)	total: 114ms	remaining: 34m 9s
100:	learn: 5.6438879	test: 7.2266987	best: 7.1325548 (95)	total: 16.2s	remaining: 47m 34s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 7.132554768
bestIteration = 95

Shrink model to first 96 iterations.


[I 2024-06-26 23:37:05,692] Trial 10 finished with value: 0.9616424217318739 and parameters: {'iterations': 17914, 'od_wait': 510, 'learning_rate': 0.8691089486124977, 'reg_lambda': 1.3357744409416387e-05, 'subsample': 0.3316175144475764, 'random_strength': 41.613885357541506, 'depth': 4, 'min_data_in_leaf': 30, 'leaf_estimation_iterations': 1, 'bagging_temperature': 0.3299263281612618, 'colsample_bylevel': 0.5684565725438225}. Best is trial 5 with value: 0.964740551136595.


0:	learn: 23.1002886	test: 23.7922518	best: 23.7922518 (0)	total: 285ms	remaining: 1h 20m 21s
100:	learn: 6.6345172	test: 7.1179176	best: 7.1179176 (100)	total: 25.7s	remaining: 1h 11m 13s
200:	learn: 5.8947879	test: 6.8605327	best: 6.8605327 (200)	total: 50.8s	remaining: 1h 10m 23s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.83772176
bestIteration = 231

Shrink model to first 232 iterations.


[I 2024-06-26 23:38:16,886] Trial 11 finished with value: 0.9644035778241137 and parameters: {'iterations': 16921, 'od_wait': 542, 'learning_rate': 0.11782800596762827, 'reg_lambda': 0.0024981122910129, 'subsample': 0.3947976271602265, 'random_strength': 49.634334278041514, 'depth': 5, 'min_data_in_leaf': 20, 'leaf_estimation_iterations': 15, 'bagging_temperature': 0.10136998263702947, 'colsample_bylevel': 0.6166547866428138}. Best is trial 5 with value: 0.964740551136595.


0:	learn: 23.1180791	test: 23.8139070	best: 23.8139070 (0)	total: 196ms	remaining: 1h 1m 19s
100:	learn: 7.1552949	test: 7.3960779	best: 7.3960779 (100)	total: 18.7s	remaining: 57m 26s
200:	learn: 6.2177524	test: 6.9623267	best: 6.9623267 (200)	total: 38.2s	remaining: 58m 43s
300:	learn: 5.8385159	test: 6.8617537	best: 6.8423885 (290)	total: 1m 5s	remaining: 1h 6m 50s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.842388535
bestIteration = 290

Shrink model to first 291 iterations.


[I 2024-06-26 23:39:32,091] Trial 12 finished with value: 0.9643137953268338 and parameters: {'iterations': 18740, 'od_wait': 822, 'learning_rate': 0.12001629444458148, 'reg_lambda': 1.7183087219846452e-05, 'subsample': 0.4160029864447672, 'random_strength': 49.840661269897424, 'depth': 4, 'min_data_in_leaf': 8, 'leaf_estimation_iterations': 5, 'bagging_temperature': 0.12936814164755678, 'colsample_bylevel': 0.6598519583572935}. Best is trial 5 with value: 0.964740551136595.


0:	learn: 23.9068988	test: 24.6122042	best: 24.6122042 (0)	total: 460ms	remaining: 1h 47m 45s
100:	learn: 7.3274676	test: 7.5762376	best: 7.5762376 (100)	total: 49.6s	remaining: 1h 54m 12s
200:	learn: 6.4267020	test: 6.9607186	best: 6.9607186 (200)	total: 1m 38s	remaining: 1h 53m 16s
300:	learn: 6.0177305	test: 6.8332117	best: 6.8309605 (287)	total: 2m 25s	remaining: 1h 50m 31s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.79891106
bestIteration = 343

Shrink model to first 344 iterations.


[I 2024-06-26 23:42:30,584] Trial 13 finished with value: 0.9646874078595566 and parameters: {'iterations': 14050, 'od_wait': 826, 'learning_rate': 0.05480206656613185, 'reg_lambda': 0.000916042320509428, 'subsample': 0.9653379460654554, 'random_strength': 41.55481916168644, 'depth': 6, 'min_data_in_leaf': 19, 'leaf_estimation_iterations': 13, 'bagging_temperature': 0.01681464945811167, 'colsample_bylevel': 0.5694035371173286}. Best is trial 5 with value: 0.964740551136595.


0:	learn: 23.8797344	test: 24.6081406	best: 24.6081406 (0)	total: 366ms	remaining: 1h 26m 2s
100:	learn: 6.3133953	test: 7.0108030	best: 7.0108030 (100)	total: 41.2s	remaining: 1h 35m 15s
200:	learn: 5.5948790	test: 6.8513660	best: 6.8454281 (181)	total: 1m 24s	remaining: 1h 37m 39s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.845428073
bestIteration = 181

Shrink model to first 182 iterations.


[I 2024-06-26 23:44:03,237] Trial 14 finished with value: 0.9641912633994432 and parameters: {'iterations': 14121, 'od_wait': 909, 'learning_rate': 0.05179053512731636, 'reg_lambda': 0.0001407607102846306, 'subsample': 0.9375611593660228, 'random_strength': 36.54376012828619, 'depth': 10, 'min_data_in_leaf': 20, 'leaf_estimation_iterations': 1, 'bagging_temperature': 0.015255634251256289, 'colsample_bylevel': 0.553243634739046}. Best is trial 5 with value: 0.964740551136595.


0:	learn: 24.1865650	test: 24.9038850	best: 24.9038850 (0)	total: 422ms	remaining: 2h 20m 53s
100:	learn: 7.6364765	test: 7.9190552	best: 7.9190552 (100)	total: 39.4s	remaining: 2h 9m 45s
200:	learn: 6.5101283	test: 7.1068812	best: 7.1068812 (200)	total: 1m 19s	remaining: 2h 10m 25s
300:	learn: 6.1240031	test: 6.9272242	best: 6.9267145 (299)	total: 1m 57s	remaining: 2h 8m 21s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.875897021
bestIteration = 366

Shrink model to first 367 iterations.


[I 2024-06-26 23:46:41,388] Trial 15 finished with value: 0.9638992310167405 and parameters: {'iterations': 20046, 'od_wait': 850, 'learning_rate': 0.03626047213972506, 'reg_lambda': 0.00015451492113315244, 'subsample': 0.9468049264135636, 'random_strength': 11.180017810840845, 'depth': 6, 'min_data_in_leaf': 19, 'leaf_estimation_iterations': 13, 'bagging_temperature': 0.02476190197132804, 'colsample_bylevel': 0.723041307077803}. Best is trial 5 with value: 0.964740551136595.


0:	learn: 23.5450057	test: 24.2895106	best: 24.2895106 (0)	total: 382ms	remaining: 1h 34m 10s
100:	learn: 5.6078595	test: 6.9389667	best: 6.9070894 (79)	total: 57.5s	remaining: 2h 19m 14s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.907089396
bestIteration = 79

Shrink model to first 80 iterations.


[I 2024-06-26 23:47:46,813] Trial 16 finished with value: 0.9634834048553582 and parameters: {'iterations': 14783, 'od_wait': 1043, 'learning_rate': 0.07049364392362123, 'reg_lambda': 0.007421017314322173, 'subsample': 0.6565427410150755, 'random_strength': 24.858721515991522, 'depth': 11, 'min_data_in_leaf': 29, 'leaf_estimation_iterations': 3, 'bagging_temperature': 0.2671873415336477, 'colsample_bylevel': 0.8279726503470002}. Best is trial 5 with value: 0.964740551136595.


0:	learn: 24.4656767	test: 25.2096808	best: 25.2096808 (0)	total: 169ms	remaining: 1h 6s
100:	learn: 10.6629895	test: 11.0109125	best: 11.0109125 (100)	total: 15.1s	remaining: 53m 9s
200:	learn: 8.7999992	test: 9.0230722	best: 9.0230722 (200)	total: 31.4s	remaining: 55m 8s
300:	learn: 8.0816135	test: 8.2676018	best: 8.2676018 (300)	total: 47.3s	remaining: 55m 12s
400:	learn: 7.6989109	test: 7.8700336	best: 7.8700336 (400)	total: 1m 3s	remaining: 55m 6s
500:	learn: 7.3996124	test: 7.5764895	best: 7.5764895 (500)	total: 1m 17s	remaining: 54m 2s
600:	learn: 7.2025988	test: 7.4175966	best: 7.4175966 (600)	total: 1m 32s	remaining: 53m 21s
700:	learn: 7.0158498	test: 7.3115888	best: 7.3115888 (700)	total: 1m 47s	remaining: 52m 57s
800:	learn: 6.8746968	test: 7.2174191	best: 7.2174191 (800)	total: 2m 3s	remaining: 52m 38s
900:	learn: 6.7587415	test: 7.1513698	best: 7.1513698 (900)	total: 2m 18s	remaining: 52m 27s
1000:	learn: 6.6589357	test: 7.0995917	best: 7.0995917 (1000)	total: 2m 34s	rema

[I 2024-06-26 23:52:00,792] Trial 17 finished with value: 0.9631182295556876 and parameters: {'iterations': 21365, 'od_wait': 1515, 'learning_rate': 0.02594497547249185, 'reg_lambda': 7.989592948682046e-05, 'subsample': 0.030527023535454445, 'random_strength': 27.384729859749385, 'depth': 3, 'min_data_in_leaf': 22, 'leaf_estimation_iterations': 7, 'bagging_temperature': 1.3255240424677173, 'colsample_bylevel': 0.660462160819921}. Best is trial 5 with value: 0.964740551136595.


0:	learn: 24.3908189	test: 25.1224834	best: 25.1224834 (0)	total: 310ms	remaining: 1h 18m 19s
100:	learn: 8.3796389	test: 8.6466293	best: 8.6466293 (100)	total: 28.8s	remaining: 1h 11m 43s
200:	learn: 6.9164501	test: 7.3332832	best: 7.3332832 (200)	total: 58.3s	remaining: 1h 12m 27s
300:	learn: 6.4760250	test: 7.0337264	best: 7.0337264 (300)	total: 1m 26s	remaining: 1h 11m 10s
400:	learn: 6.1521816	test: 6.9002011	best: 6.9002011 (400)	total: 1m 54s	remaining: 1h 10m 16s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.84072225
bestIteration = 474

Shrink model to first 475 iterations.


[I 2024-06-26 23:54:30,794] Trial 18 finished with value: 0.9642523175441111 and parameters: {'iterations': 15181, 'od_wait': 669, 'learning_rate': 0.027402266839797258, 'reg_lambda': 0.0005746035058010457, 'subsample': 0.29767076318089036, 'random_strength': 41.53175069788862, 'depth': 7, 'min_data_in_leaf': 17, 'leaf_estimation_iterations': 7, 'bagging_temperature': 0.038943171975142646, 'colsample_bylevel': 0.5524244437789395}. Best is trial 5 with value: 0.964740551136595.


0:	learn: 24.6801917	test: 25.4310810	best: 25.4310810 (0)	total: 327ms	remaining: 1h 7m
100:	learn: 13.0532593	test: 13.3909493	best: 13.3909493 (100)	total: 35.9s	remaining: 1h 12m 23s
200:	learn: 9.4109178	test: 9.6822301	best: 9.6822301 (200)	total: 1m 12s	remaining: 1h 12m 42s
300:	learn: 8.0999283	test: 8.3787955	best: 8.3787955 (300)	total: 1m 50s	remaining: 1h 13m 14s
400:	learn: 7.4755781	test: 7.7598620	best: 7.7598620 (400)	total: 2m 27s	remaining: 1h 13m 1s
500:	learn: 7.1046262	test: 7.4309591	best: 7.4309591 (500)	total: 3m 4s	remaining: 1h 12m 19s
600:	learn: 6.8672489	test: 7.2352357	best: 7.2352357 (600)	total: 3m 40s	remaining: 1h 11m 27s
700:	learn: 6.6820641	test: 7.1069860	best: 7.1069860 (700)	total: 4m 16s	remaining: 1h 10m 55s
800:	learn: 6.5150763	test: 7.0238427	best: 7.0238215 (799)	total: 4m 53s	remaining: 1h 10m 17s
900:	learn: 6.4017718	test: 6.9625165	best: 6.9625165 (900)	total: 5m 30s	remaining: 1h 9m 42s
1000:	learn: 6.2740294	test: 6.9218589	best: 6.9

[I 2024-06-27 00:01:54,087] Trial 19 finished with value: 0.9639071993000539 and parameters: {'iterations': 12315, 'od_wait': 776, 'learning_rate': 0.010788803713634339, 'reg_lambda': 0.009713083839070448, 'subsample': 0.8314553684028665, 'random_strength': 33.73762073910783, 'depth': 6, 'min_data_in_leaf': 27, 'leaf_estimation_iterations': 13, 'bagging_temperature': 0.16950622017480563, 'colsample_bylevel': 0.9817629182950665}. Best is trial 5 with value: 0.964740551136595.


0:	learn: 23.7060052	test: 24.4027328	best: 24.4027328 (0)	total: 263ms	remaining: 1h 28m 25s
100:	learn: 5.5647227	test: 6.9414417	best: 6.9414417 (100)	total: 48s	remaining: 2h 38m 42s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.934087509
bestIteration = 107

Shrink model to first 108 iterations.


[I 2024-06-27 00:03:03,942] Trial 20 finished with value: 0.9631140544083596 and parameters: {'iterations': 20143, 'od_wait': 1373, 'learning_rate': 0.0644650985552179, 'reg_lambda': 3.529671729609833e-05, 'subsample': 0.26569315978831315, 'random_strength': 40.164756308107975, 'depth': 13, 'min_data_in_leaf': 23, 'leaf_estimation_iterations': 3, 'bagging_temperature': 1.126056197011584, 'colsample_bylevel': 0.8722959816290402}. Best is trial 5 with value: 0.964740551136595.


0:	learn: 21.8107026	test: 22.5057394	best: 22.5057394 (0)	total: 227ms	remaining: 15m 52s
100:	learn: 7.0559779	test: 7.3623684	best: 7.3623684 (100)	total: 20.1s	remaining: 13m 34s
200:	learn: 6.3856013	test: 7.0196745	best: 7.0140771 (195)	total: 40.1s	remaining: 13m 18s
300:	learn: 6.0289746	test: 6.8937167	best: 6.8937167 (300)	total: 1m 1s	remaining: 13m 21s
400:	learn: 5.7914851	test: 6.8000361	best: 6.7969755 (399)	total: 1m 24s	remaining: 13m 20s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.771988913
bestIteration = 463

Shrink model to first 464 iterations.


[I 2024-06-27 00:04:53,251] Trial 21 finished with value: 0.9653193776547078 and parameters: {'iterations': 4205, 'od_wait': 653, 'learning_rate': 0.20974518042426216, 'reg_lambda': 0.0006470673379163174, 'subsample': 0.4504819492171165, 'random_strength': 46.36458402075453, 'depth': 3, 'min_data_in_leaf': 12, 'leaf_estimation_iterations': 13, 'bagging_temperature': 0.0692711669355712, 'colsample_bylevel': 0.4954715806961587}. Best is trial 21 with value: 0.9653193776547078.


0:	learn: 21.9242759	test: 22.6212124	best: 22.6212124 (0)	total: 253ms	remaining: 18m 51s
100:	learn: 7.1507767	test: 7.5468762	best: 7.5356435 (98)	total: 19.8s	remaining: 14m 18s
200:	learn: 6.3305148	test: 7.0685896	best: 7.0685896 (200)	total: 40s	remaining: 14m 10s
300:	learn: 6.0347143	test: 6.9666554	best: 6.9649005 (299)	total: 1m 1s	remaining: 14m 9s
400:	learn: 5.7983905	test: 6.8858037	best: 6.8823903 (394)	total: 1m 23s	remaining: 14m 4s
500:	learn: 5.6136314	test: 6.8295460	best: 6.7995832 (490)	total: 1m 45s	remaining: 13m 56s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.79958323
bestIteration = 490

Shrink model to first 491 iterations.


[I 2024-06-27 00:06:47,433] Trial 22 finished with value: 0.964780230866574 and parameters: {'iterations': 4477, 'od_wait': 994, 'learning_rate': 0.20155222519084884, 'reg_lambda': 0.0006410790756185968, 'subsample': 0.4016616184522461, 'random_strength': 46.56939166559379, 'depth': 3, 'min_data_in_leaf': 9, 'leaf_estimation_iterations': 14, 'bagging_temperature': 0.011365544133512479, 'colsample_bylevel': 0.504803996569392}. Best is trial 21 with value: 0.9653193776547078.


0:	learn: 21.5852083	test: 22.2766026	best: 22.2766026 (0)	total: 247ms	remaining: 16m 33s
100:	learn: 6.9505100	test: 7.2680308	best: 7.2680308 (100)	total: 20.8s	remaining: 13m 25s
200:	learn: 6.2804901	test: 6.9527404	best: 6.9527404 (200)	total: 41.6s	remaining: 13m 9s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.952472971
bestIteration = 201

Shrink model to first 202 iterations.


[I 2024-06-27 00:07:40,440] Trial 23 finished with value: 0.9631754348727426 and parameters: {'iterations': 4020, 'od_wait': 1019, 'learning_rate': 0.22613950841496952, 'reg_lambda': 0.048211091787694046, 'subsample': 0.4331853761443151, 'random_strength': 45.47457335016499, 'depth': 3, 'min_data_in_leaf': 8, 'leaf_estimation_iterations': 14, 'bagging_temperature': 0.052089080569285975, 'colsample_bylevel': 0.5095231886081665}. Best is trial 21 with value: 0.9653193776547078.


0:	learn: 19.5774763	test: 20.0088482	best: 20.0088482 (0)	total: 169ms	remaining: 22m 55s
100:	learn: 7.2529035	test: 7.6856783	best: 7.6851970 (99)	total: 17.4s	remaining: 23m 2s
200:	learn: 6.7436631	test: 7.4200593	best: 7.4196427 (198)	total: 35s	remaining: 23m
300:	learn: 6.5170430	test: 7.2895231	best: 7.2890934 (296)	total: 53s	remaining: 22m 59s
400:	learn: 6.3204939	test: 7.2121459	best: 7.2121459 (400)	total: 1m 11s	remaining: 22m 50s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 7.191733579
bestIteration = 429

Shrink model to first 430 iterations.


[I 2024-06-27 00:09:07,171] Trial 24 finished with value: 0.9609776327440861 and parameters: {'iterations': 8135, 'od_wait': 738, 'learning_rate': 0.46305779506046313, 'reg_lambda': 0.0004125838904150603, 'subsample': 0.5509625923088535, 'random_strength': 44.828804160290844, 'depth': 2, 'min_data_in_leaf': 10, 'leaf_estimation_iterations': 11, 'bagging_temperature': 0.01020849348519926, 'colsample_bylevel': 0.6486587154723804}. Best is trial 21 with value: 0.9653193776547078.


0:	learn: 22.6689265	test: 23.3539559	best: 23.3539559 (0)	total: 192ms	remaining: 14m 3s
100:	learn: 7.2341279	test: 7.4739066	best: 7.4739066 (100)	total: 18.6s	remaining: 13m 7s
200:	learn: 6.4949608	test: 7.1309043	best: 7.1256439 (193)	total: 36.3s	remaining: 12m 35s
300:	learn: 6.2186738	test: 6.9902323	best: 6.9902323 (300)	total: 55.5s	remaining: 12m 33s
400:	learn: 6.0244587	test: 6.9052657	best: 6.9031651 (384)	total: 1m 15s	remaining: 12m 34s
500:	learn: 5.8573412	test: 6.8660936	best: 6.8426291 (498)	total: 1m 36s	remaining: 12m 30s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.842629103
bestIteration = 498

Shrink model to first 499 iterations.


[I 2024-06-27 00:10:54,106] Trial 25 finished with value: 0.9644934756281517 and parameters: {'iterations': 4388, 'od_wait': 660, 'learning_rate': 0.15273500671989418, 'reg_lambda': 0.008198644911182976, 'subsample': 0.3587565011200738, 'random_strength': 29.111955807665513, 'depth': 3, 'min_data_in_leaf': 5, 'leaf_estimation_iterations': 9, 'bagging_temperature': 0.031451532292563464, 'colsample_bylevel': 0.7206273069738144}. Best is trial 21 with value: 0.9653193776547078.


0:	learn: 19.6931500	test: 20.3953794	best: 20.3953794 (0)	total: 273ms	remaining: 36m 32s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 7.052722882
bestIteration = 71

Shrink model to first 72 iterations.


[I 2024-06-27 00:11:27,724] Trial 26 finished with value: 0.9621021205569812 and parameters: {'iterations': 8021, 'od_wait': 982, 'learning_rate': 0.3235699514487835, 'reg_lambda': 5.322235480080753e-05, 'subsample': 0.46945430819220785, 'random_strength': 37.059556184916886, 'depth': 5, 'min_data_in_leaf': 13, 'leaf_estimation_iterations': 14, 'bagging_temperature': 0.48336624873626016, 'colsample_bylevel': 0.4894642170823952}. Best is trial 21 with value: 0.9653193776547078.


0:	learn: 17.9002640	test: 18.1977545	best: 18.1977545 (0)	total: 153ms	remaining: 26m 50s
100:	learn: 7.1005157	test: 7.8104026	best: 7.8104026 (100)	total: 13.7s	remaining: 23m 33s
200:	learn: 6.5824038	test: 7.5370944	best: 7.5321496 (199)	total: 27.6s	remaining: 23m 43s
300:	learn: 6.3377402	test: 7.3692158	best: 7.3601028 (298)	total: 41.7s	remaining: 23m 42s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 7.337843184
bestIteration = 322

Shrink model to first 323 iterations.


[I 2024-06-27 00:12:21,788] Trial 27 finished with value: 0.9590663026261775 and parameters: {'iterations': 10558, 'od_wait': 1170, 'learning_rate': 0.6861107325705791, 'reg_lambda': 0.00034303698751700823, 'subsample': 0.2558268606789321, 'random_strength': 46.89106746120085, 'depth': 2, 'min_data_in_leaf': 7, 'leaf_estimation_iterations': 6, 'bagging_temperature': 0.06715828672854303, 'colsample_bylevel': 0.61781081176476}. Best is trial 21 with value: 0.9653193776547078.


0:	learn: 21.5904917	test: 22.2490470	best: 22.2490470 (0)	total: 308ms	remaining: 27m 26s
100:	learn: 6.0925736	test: 6.9783669	best: 6.9395019 (95)	total: 30.4s	remaining: 26m 19s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.939501944
bestIteration = 95

Shrink model to first 96 iterations.


[I 2024-06-27 00:13:03,815] Trial 28 finished with value: 0.9632998302244729 and parameters: {'iterations': 5346, 'od_wait': 649, 'learning_rate': 0.18547640243276667, 'reg_lambda': 1.130623056409198e-05, 'subsample': 0.6005818949249977, 'random_strength': 25.87631745884616, 'depth': 5, 'min_data_in_leaf': 11, 'leaf_estimation_iterations': 12, 'bagging_temperature': 0.1707809712781129, 'colsample_bylevel': 0.5109225577418821}. Best is trial 21 with value: 0.9653193776547078.


0:	learn: 21.5532050	test: 22.1163682	best: 22.1163682 (0)	total: 151ms	remaining: 30m 4s
100:	learn: 7.7375601	test: 8.0134715	best: 8.0134715 (100)	total: 15.5s	remaining: 30m 13s
200:	learn: 7.2121829	test: 7.7486399	best: 7.7486399 (200)	total: 30.7s	remaining: 29m 55s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 7.732179499
bestIteration = 206

Shrink model to first 207 iterations.


[I 2024-06-27 00:13:45,027] Trial 29 finished with value: 0.9541000024640568 and parameters: {'iterations': 11943, 'od_wait': 1250, 'learning_rate': 0.24916715513486407, 'reg_lambda': 0.004044208982283682, 'subsample': 0.35559597786451136, 'random_strength': 19.740582179150877, 'depth': 2, 'min_data_in_leaf': 16, 'leaf_estimation_iterations': 10, 'bagging_temperature': 0.02598745611389123, 'colsample_bylevel': 0.6009367623947287}. Best is trial 21 with value: 0.9653193776547078.


0:	learn: 18.3825333	test: 18.9236529	best: 18.9236529 (0)	total: 275ms	remaining: 35m 17s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 7.23418638
bestIteration = 30

Shrink model to first 31 iterations.


[I 2024-06-27 00:14:06,874] Trial 30 finished with value: 0.9602648094861739 and parameters: {'iterations': 7698, 'od_wait': 1524, 'learning_rate': 0.32825099878626285, 'reg_lambda': 0.025625997759220944, 'subsample': 0.11264003605236811, 'random_strength': 12.459279744766441, 'depth': 8, 'min_data_in_leaf': 1, 'leaf_estimation_iterations': 9, 'bagging_temperature': 2.614260093539244, 'colsample_bylevel': 0.6920046747483075}. Best is trial 21 with value: 0.9653193776547078.


0:	learn: 23.9568018	test: 24.6878390	best: 24.6878390 (0)	total: 421ms	remaining: 1h 33m 28s
100:	learn: 7.5158199	test: 7.7973622	best: 7.7973622 (100)	total: 37.2s	remaining: 1h 21m 11s
200:	learn: 6.5910612	test: 7.0653078	best: 7.0653078 (200)	total: 1m 13s	remaining: 1h 19m 26s
300:	learn: 6.2029870	test: 6.8711823	best: 6.8692067 (298)	total: 1m 49s	remaining: 1h 19m 3s
400:	learn: 5.8307861	test: 6.8293219	best: 6.8274591 (398)	total: 2m 27s	remaining: 1h 18m 55s
500:	learn: 5.5866091	test: 6.7909332	best: 6.7895704 (492)	total: 3m 5s	remaining: 1h 18m 55s
600:	learn: 5.4151780	test: 6.7703827	best: 6.7613360 (586)	total: 3m 43s	remaining: 1h 18m 58s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.761335977
bestIteration = 586

Shrink model to first 587 iterations.


[I 2024-06-27 00:18:01,393] Trial 31 finished with value: 0.9650844360413923 and parameters: {'iterations': 13319, 'od_wait': 895, 'learning_rate': 0.04961846702450295, 'reg_lambda': 0.000875193088720282, 'subsample': 0.6925779746332141, 'random_strength': 44.21785107037128, 'depth': 6, 'min_data_in_leaf': 27, 'leaf_estimation_iterations': 13, 'bagging_temperature': 0.02510259481550901, 'colsample_bylevel': 0.4610056928860618}. Best is trial 21 with value: 0.9653193776547078.


0:	learn: 22.9587977	test: 23.6644992	best: 23.6644992 (0)	total: 296ms	remaining: 1h 20m 10s
100:	learn: 7.2588024	test: 7.4751053	best: 7.4751053 (100)	total: 25.3s	remaining: 1h 7m 29s
200:	learn: 6.3341500	test: 6.9515188	best: 6.9515188 (200)	total: 50s	remaining: 1h 6m 36s
300:	learn: 5.9419469	test: 6.8328279	best: 6.8266421 (296)	total: 1m 16s	remaining: 1h 7m 11s
400:	learn: 5.7080900	test: 6.7749821	best: 6.7717857 (395)	total: 1m 42s	remaining: 1h 7m 29s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.76970836
bestIteration = 405

Shrink model to first 406 iterations.


[I 2024-06-27 00:19:59,009] Trial 32 finished with value: 0.9651090951377641 and parameters: {'iterations': 16260, 'od_wait': 927, 'learning_rate': 0.11392347042702941, 'reg_lambda': 0.15472169214329295, 'subsample': 0.4904494217702541, 'random_strength': 43.8514327429127, 'depth': 4, 'min_data_in_leaf': 29, 'leaf_estimation_iterations': 14, 'bagging_temperature': 0.023606263619978265, 'colsample_bylevel': 0.4652719135797898}. Best is trial 21 with value: 0.9653193776547078.


0:	learn: 23.5716391	test: 24.2829911	best: 24.2829911 (0)	total: 287ms	remaining: 1h 18m 25s
100:	learn: 8.1578337	test: 8.3151194	best: 8.3151194 (100)	total: 27.2s	remaining: 1h 12m 59s
200:	learn: 7.2144878	test: 7.4669251	best: 7.4669251 (200)	total: 54s	remaining: 1h 12m 29s
300:	learn: 6.7549403	test: 7.1324494	best: 7.1324494 (300)	total: 1m 20s	remaining: 1h 11m 44s
400:	learn: 6.5069955	test: 7.0405098	best: 7.0405098 (400)	total: 1m 45s	remaining: 1h 9m 59s
500:	learn: 6.3351244	test: 6.9769184	best: 6.9769184 (500)	total: 2m 10s	remaining: 1h 8m 57s
600:	learn: 6.2074912	test: 6.9281622	best: 6.9281622 (600)	total: 2m 33s	remaining: 1h 7m
700:	learn: 6.1111352	test: 6.8908482	best: 6.8908482 (700)	total: 2m 56s	remaining: 1h 5m 56s
800:	learn: 6.0253726	test: 6.8540333	best: 6.8528772 (797)	total: 3m 20s	remaining: 1h 4m 57s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.852877186
bestIteration = 797

Shrink model to first 798 iterations.


[I 2024-06-27 00:23:31,656] Trial 33 finished with value: 0.9642882346290732 and parameters: {'iterations': 16383, 'od_wait': 946, 'learning_rate': 0.09199678999036426, 'reg_lambda': 0.8323152138338018, 'subsample': 0.6917408046813708, 'random_strength': 43.53659238699439, 'depth': 3, 'min_data_in_leaf': 28, 'leaf_estimation_iterations': 14, 'bagging_temperature': 0.022399446922404396, 'colsample_bylevel': 0.4017179779342894}. Best is trial 21 with value: 0.9653193776547078.


0:	learn: 22.5184817	test: 23.2164228	best: 23.2164228 (0)	total: 381ms	remaining: 1h 23m 33s
100:	learn: 6.2162976	test: 6.9864159	best: 6.9512690 (97)	total: 34.1s	remaining: 1h 13m 39s
200:	learn: 5.3334593	test: 6.8418682	best: 6.8226518 (190)	total: 1m 9s	remaining: 1h 14m 15s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.822651772
bestIteration = 190

Shrink model to first 191 iterations.


[I 2024-06-27 00:24:52,074] Trial 34 finished with value: 0.9644021400781544 and parameters: {'iterations': 13178, 'od_wait': 1061, 'learning_rate': 0.13716910373488478, 'reg_lambda': 0.09587908903680746, 'subsample': 0.4996870180260675, 'random_strength': 47.982733790908185, 'depth': 7, 'min_data_in_leaf': 27, 'leaf_estimation_iterations': 15, 'bagging_temperature': 0.010143713689824937, 'colsample_bylevel': 0.45015567798412054}. Best is trial 21 with value: 0.9653193776547078.


0:	learn: 23.1151380	test: 23.8249591	best: 23.8249591 (0)	total: 363ms	remaining: 56m 4s
100:	learn: 7.3295433	test: 7.5490653	best: 7.5490653 (100)	total: 30.6s	remaining: 46m 20s
200:	learn: 6.5296738	test: 7.0437064	best: 7.0437064 (200)	total: 59.8s	remaining: 45m 2s
300:	learn: 6.0748877	test: 6.8946692	best: 6.8946692 (300)	total: 1m 30s	remaining: 45m 9s
400:	learn: 5.8017058	test: 6.8043948	best: 6.8043948 (400)	total: 2m	remaining: 44m 29s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.786920496
bestIteration = 412

Shrink model to first 413 iterations.


[I 2024-06-27 00:27:12,594] Trial 35 finished with value: 0.9649685305949787 and parameters: {'iterations': 9283, 'od_wait': 1237, 'learning_rate': 0.10446987418570079, 'reg_lambda': 0.01648611420121118, 'subsample': 0.5656167014339294, 'random_strength': 43.900138095119935, 'depth': 4, 'min_data_in_leaf': 5, 'leaf_estimation_iterations': 12, 'bagging_temperature': 0.039532026467624576, 'colsample_bylevel': 0.4698884737919286}. Best is trial 21 with value: 0.9653193776547078.


0:	learn: 23.1163676	test: 23.8223830	best: 23.8223830 (0)	total: 610ms	remaining: 1h 46m 42s
100:	learn: 6.1856228	test: 6.9966252	best: 6.9966252 (100)	total: 52.8s	remaining: 1h 30m 30s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.920389478
bestIteration = 151

Shrink model to first 152 iterations.


[I 2024-06-27 00:28:51,535] Trial 36 finished with value: 0.9632606326342691 and parameters: {'iterations': 10496, 'od_wait': 1231, 'learning_rate': 0.09529760645194052, 'reg_lambda': 0.3384841345811569, 'subsample': 0.5734303133595103, 'random_strength': 43.90646314813842, 'depth': 8, 'min_data_in_leaf': 6, 'leaf_estimation_iterations': 12, 'bagging_temperature': 0.0496809788563204, 'colsample_bylevel': 0.46375468197583125}. Best is trial 21 with value: 0.9653193776547078.


0:	learn: 23.5755770	test: 24.2977256	best: 24.2977256 (0)	total: 278ms	remaining: 42m 54s
100:	learn: 7.7256973	test: 7.8917949	best: 7.8917949 (100)	total: 25.1s	remaining: 37m 55s
200:	learn: 6.8929905	test: 7.2122566	best: 7.2122566 (200)	total: 49.3s	remaining: 37m
300:	learn: 6.3692555	test: 6.9653722	best: 6.9653722 (300)	total: 1m 13s	remaining: 36m 30s
400:	learn: 6.0646091	test: 6.8755192	best: 6.8755192 (400)	total: 1m 40s	remaining: 37m 3s
500:	learn: 5.8969969	test: 6.8065375	best: 6.8065375 (500)	total: 2m 14s	remaining: 39m 1s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.766827588
bestIteration = 551

Shrink model to first 552 iterations.


[I 2024-06-27 00:31:38,396] Trial 37 finished with value: 0.9651138796847376 and parameters: {'iterations': 9247, 'od_wait': 1633, 'learning_rate': 0.07683074840919778, 'reg_lambda': 0.20581460748590005, 'subsample': 0.7085157689251846, 'random_strength': 38.445276081494285, 'depth': 4, 'min_data_in_leaf': 4, 'leaf_estimation_iterations': 11, 'bagging_temperature': 51.51029458212318, 'colsample_bylevel': 0.47206412580622603}. Best is trial 21 with value: 0.9653193776547078.


0:	learn: 24.4322314	test: 25.1939219	best: 25.1939219 (0)	total: 182ms	remaining: 48m 56s
100:	learn: 13.0355541	test: 13.4728598	best: 13.4728598 (100)	total: 17.8s	remaining: 46m 53s
200:	learn: 11.6010037	test: 12.0209928	best: 12.0209928 (200)	total: 35s	remaining: 46m 13s
300:	learn: 10.9180833	test: 11.3026935	best: 11.3026935 (300)	total: 52.3s	remaining: 45m 45s
400:	learn: 10.4584671	test: 10.8218569	best: 10.8218569 (400)	total: 1m 9s	remaining: 45m 13s
500:	learn: 10.1517865	test: 10.4842541	best: 10.4842541 (500)	total: 1m 26s	remaining: 44m 40s
600:	learn: 9.9320212	test: 10.2403496	best: 10.2403496 (600)	total: 1m 42s	remaining: 44m 14s
700:	learn: 9.7720497	test: 10.0596698	best: 10.0596698 (700)	total: 1m 59s	remaining: 43m 56s
800:	learn: 9.6499305	test: 9.9286918	best: 9.9286918 (800)	total: 2m 16s	remaining: 43m 35s
900:	learn: 9.5561176	test: 9.8263995	best: 9.8263995 (900)	total: 2m 33s	remaining: 43m 11s
1000:	learn: 9.4803703	test: 9.7466069	best: 9.7459640 (999

[I 2024-06-27 00:43:23,066] Trial 38 finished with value: 0.9340338384309396 and parameters: {'iterations': 16110, 'od_wait': 1880, 'learning_rate': 0.046005736515655433, 'reg_lambda': 0.20292640268692025, 'subsample': 0.7091365304170278, 'random_strength': 39.45002417159371, 'depth': 1, 'min_data_in_leaf': 30, 'leaf_estimation_iterations': 11, 'bagging_temperature': 94.17352111885074, 'colsample_bylevel': 0.5236657596198362}. Best is trial 21 with value: 0.9653193776547078.


0:	learn: 24.2233765	test: 24.9598646	best: 24.9598646 (0)	total: 526ms	remaining: 1h 22m 17s
100:	learn: 7.1316805	test: 7.5452747	best: 7.5452747 (100)	total: 48.7s	remaining: 1h 14m 41s
200:	learn: 6.1707173	test: 6.9406625	best: 6.9406625 (200)	total: 1m 37s	remaining: 1h 14m 6s
300:	learn: 5.7630894	test: 6.8632208	best: 6.8462828 (286)	total: 2m 25s	remaining: 1h 13m 19s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.8462828
bestIteration = 286

Shrink model to first 287 iterations.


[I 2024-06-27 00:45:59,513] Trial 39 finished with value: 0.9641849541321207 and parameters: {'iterations': 9391, 'od_wait': 1770, 'learning_rate': 0.03475163338693455, 'reg_lambda': 4.470053722222172, 'subsample': 0.8028956521338732, 'random_strength': 34.95435716094939, 'depth': 9, 'min_data_in_leaf': 26, 'leaf_estimation_iterations': 13, 'bagging_temperature': 33.22330484382635, 'colsample_bylevel': 0.42622490852050565}. Best is trial 21 with value: 0.9653193776547078.


0:	learn: 23.4857546	test: 24.1679281	best: 24.1679281 (0)	total: 333ms	remaining: 1h 12m 31s
100:	learn: 6.8159610	test: 7.2001635	best: 7.2001635 (100)	total: 37.2s	remaining: 1h 19m 24s
200:	learn: 6.1047690	test: 6.8901755	best: 6.8874318 (191)	total: 1m 14s	remaining: 1h 19m 38s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.829357328
bestIteration = 243

Shrink model to first 244 iterations.


[I 2024-06-27 00:47:44,310] Trial 40 finished with value: 0.9643940060479309 and parameters: {'iterations': 13050, 'od_wait': 2162, 'learning_rate': 0.0788120139701916, 'reg_lambda': 0.6190594860673078, 'subsample': 0.8745337594354604, 'random_strength': 38.09858252403791, 'depth': 6, 'min_data_in_leaf': 3, 'leaf_estimation_iterations': 10, 'bagging_temperature': 3.353972747823125, 'colsample_bylevel': 0.5351411852810358}. Best is trial 21 with value: 0.9653193776547078.


0:	learn: 23.0211361	test: 23.7284761	best: 23.7284761 (0)	total: 272ms	remaining: 41m 20s
100:	learn: 7.1523814	test: 7.4735023	best: 7.4735023 (100)	total: 23.8s	remaining: 35m 28s
200:	learn: 6.3536830	test: 7.0060429	best: 6.9960162 (199)	total: 47.4s	remaining: 35m 4s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.882568694
bestIteration = 272

Shrink model to first 273 iterations.


[I 2024-06-27 00:49:00,637] Trial 41 finished with value: 0.9638501320954116 and parameters: {'iterations': 9127, 'od_wait': 1572, 'learning_rate': 0.11014964507585558, 'reg_lambda': 0.01568897250755912, 'subsample': 0.6475158122666776, 'random_strength': 43.055213761109265, 'depth': 4, 'min_data_in_leaf': 4, 'leaf_estimation_iterations': 12, 'bagging_temperature': 21.78945031436308, 'colsample_bylevel': 0.47476963402339833}. Best is trial 21 with value: 0.9653193776547078.


0:	learn: 22.2868515	test: 22.9818750	best: 22.9818750 (0)	total: 337ms	remaining: 1h 37s
100:	learn: 6.4090885	test: 7.0426230	best: 7.0398289 (97)	total: 30.3s	remaining: 53m 28s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.931860539
bestIteration = 138

Shrink model to first 139 iterations.


[I 2024-06-27 00:49:54,714] Trial 42 finished with value: 0.9633782623796013 and parameters: {'iterations': 10800, 'od_wait': 1416, 'learning_rate': 0.15118685099884502, 'reg_lambda': 0.07335595271219314, 'subsample': 0.7459859545306985, 'random_strength': 39.43407763681009, 'depth': 5, 'min_data_in_leaf': 1, 'leaf_estimation_iterations': 11, 'bagging_temperature': 14.966488135182013, 'colsample_bylevel': 0.4758341113250346}. Best is trial 21 with value: 0.9653193776547078.


0:	learn: 23.8688320	test: 24.5758013	best: 24.5758013 (0)	total: 270ms	remaining: 29m 54s
100:	learn: 8.1266008	test: 8.4202821	best: 8.4202821 (100)	total: 24.8s	remaining: 26m 44s
200:	learn: 7.1180948	test: 7.4369761	best: 7.4369761 (200)	total: 48.8s	remaining: 26m 3s
300:	learn: 6.6215634	test: 7.1053956	best: 7.1053956 (300)	total: 1m 12s	remaining: 25m 35s
400:	learn: 6.2742699	test: 6.9527821	best: 6.9527821 (400)	total: 1m 36s	remaining: 25m 7s
500:	learn: 6.0601362	test: 6.8523896	best: 6.8523896 (500)	total: 2m 2s	remaining: 24m 56s
600:	learn: 5.8974098	test: 6.8164578	best: 6.8154683 (593)	total: 2m 27s	remaining: 24m 41s
700:	learn: 5.7615291	test: 6.7577161	best: 6.7577161 (700)	total: 2m 53s	remaining: 24m 28s
800:	learn: 5.6608804	test: 6.7287911	best: 6.7239088 (798)	total: 3m 19s	remaining: 24m 15s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.723908765
bestIteration = 798

Shrink model to first 799 iterations.


[I 2024-06-27 00:53:25,257] Trial 43 finished with value: 0.9656936754794401 and parameters: {'iterations': 6644, 'od_wait': 1695, 'learning_rate': 0.06237746134009798, 'reg_lambda': 2.6397327403374717, 'subsample': 0.4915506441230588, 'random_strength': 48.24674432037646, 'depth': 4, 'min_data_in_leaf': 3, 'leaf_estimation_iterations': 15, 'bagging_temperature': 0.07131640531121744, 'colsample_bylevel': 0.4261590739334049}. Best is trial 43 with value: 0.9656936754794401.


0:	learn: 24.1262746	test: 24.9091042	best: 24.9091042 (0)	total: 180ms	remaining: 19m
100:	learn: 9.9913849	test: 10.3070117	best: 10.3070117 (100)	total: 16.2s	remaining: 16m 40s
200:	learn: 8.8212906	test: 9.0345347	best: 9.0345347 (200)	total: 32.5s	remaining: 16m 31s
300:	learn: 8.1494983	test: 8.3402639	best: 8.3402639 (300)	total: 48.7s	remaining: 16m 13s
400:	learn: 7.7635162	test: 7.9489965	best: 7.9488313 (399)	total: 1m 4s	remaining: 15m 56s
500:	learn: 7.5117589	test: 7.7297891	best: 7.7297891 (500)	total: 1m 20s	remaining: 15m 35s
600:	learn: 7.3371317	test: 7.5962239	best: 7.5951987 (597)	total: 1m 36s	remaining: 15m 16s
700:	learn: 7.1975617	test: 7.5309219	best: 7.5247165 (685)	total: 1m 52s	remaining: 15m
800:	learn: 7.0940259	test: 7.4644476	best: 7.4644476 (800)	total: 2m 8s	remaining: 14m 47s
900:	learn: 7.0018353	test: 7.4144830	best: 7.4144830 (900)	total: 2m 24s	remaining: 14m 32s
1000:	learn: 6.9216175	test: 7.3754279	best: 7.3754279 (1000)	total: 2m 41s	remaini

[I 2024-06-27 00:57:42,061] Trial 44 finished with value: 0.9598375577248339 and parameters: {'iterations': 6326, 'od_wait': 1682, 'learning_rate': 0.06208692677198685, 'reg_lambda': 62.10194865611512, 'subsample': 0.5084100273863639, 'random_strength': 48.477288189245684, 'depth': 2, 'min_data_in_leaf': 3, 'leaf_estimation_iterations': 15, 'bagging_temperature': 81.46774013992871, 'colsample_bylevel': 0.42583691962435}. Best is trial 43 with value: 0.9656936754794401.


0:	learn: 23.6436318	test: 24.3232071	best: 24.3232071 (0)	total: 231ms	remaining: 21m 17s
100:	learn: 7.8808614	test: 8.0583704	best: 8.0583704 (100)	total: 22.4s	remaining: 20m 5s
200:	learn: 6.8646405	test: 7.2030961	best: 7.2030961 (200)	total: 44.7s	remaining: 19m 45s
300:	learn: 6.3315282	test: 6.9186506	best: 6.9186506 (300)	total: 1m 6s	remaining: 19m 16s
400:	learn: 6.0875504	test: 6.8514805	best: 6.8514257 (397)	total: 1m 29s	remaining: 19m 7s
500:	learn: 5.8861834	test: 6.8113963	best: 6.8038947 (492)	total: 1m 53s	remaining: 19m
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.803894749
bestIteration = 492

Shrink model to first 493 iterations.


[I 2024-06-27 00:59:44,681] Trial 45 finished with value: 0.9647693282628447 and parameters: {'iterations': 5530, 'od_wait': 1810, 'learning_rate': 0.07809902251982291, 'reg_lambda': 2.4389278047534937, 'subsample': 0.4500328744978409, 'random_strength': 46.65274363112157, 'depth': 4, 'min_data_in_leaf': 12, 'leaf_estimation_iterations': 14, 'bagging_temperature': 0.06019557671647006, 'colsample_bylevel': 0.4031443056379379}. Best is trial 43 with value: 0.9656936754794401.


0:	learn: 24.1996171	test: 24.9396436	best: 24.9396436 (0)	total: 454ms	remaining: 53m 7s
100:	learn: 7.5868826	test: 7.9042281	best: 7.9042281 (100)	total: 42s	remaining: 47m 57s
200:	learn: 6.5880365	test: 7.1200211	best: 7.1200211 (200)	total: 1m 23s	remaining: 47m 18s
300:	learn: 6.2131774	test: 6.9351809	best: 6.9350156 (299)	total: 2m 4s	remaining: 46m 33s
400:	learn: 5.9404010	test: 6.8744846	best: 6.8674802 (399)	total: 2m 46s	remaining: 45m 54s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.867480194
bestIteration = 399

Shrink model to first 400 iterations.


[I 2024-06-27 01:02:46,836] Trial 46 finished with value: 0.9639142243361698 and parameters: {'iterations': 7028, 'od_wait': 1635, 'learning_rate': 0.03886346662617217, 'reg_lambda': 20.341740002364837, 'subsample': 0.7137947079238524, 'random_strength': 49.967949058166205, 'depth': 7, 'min_data_in_leaf': 22, 'leaf_estimation_iterations': 15, 'bagging_temperature': 0.09893949558034387, 'colsample_bylevel': 0.44228281441053074}. Best is trial 43 with value: 0.9656936754794401.


0:	learn: 24.0463654	test: 24.7747885	best: 24.7747885 (0)	total: 283ms	remaining: 33m 21s
100:	learn: 7.7274835	test: 7.9077987	best: 7.9077987 (100)	total: 27.3s	remaining: 31m 28s
200:	learn: 6.8077704	test: 7.1601163	best: 7.1601163 (200)	total: 53.5s	remaining: 30m 31s
300:	learn: 6.3732778	test: 6.9333594	best: 6.9314422 (297)	total: 1m 19s	remaining: 29m 54s
400:	learn: 5.9750882	test: 6.8846118	best: 6.8824333 (392)	total: 1m 45s	remaining: 29m 22s
500:	learn: 5.7365272	test: 6.8133896	best: 6.8133896 (500)	total: 2m 13s	remaining: 29m 9s
600:	learn: 5.5773978	test: 6.7805102	best: 6.7680759 (581)	total: 2m 41s	remaining: 29m 4s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.768075935
bestIteration = 581

Shrink model to first 582 iterations.


[I 2024-06-27 01:05:35,462] Trial 47 finished with value: 0.9651165912488091 and parameters: {'iterations': 7081, 'od_wait': 1755, 'learning_rate': 0.05306910721118303, 'reg_lambda': 0.2578325627459118, 'subsample': 0.4981843442501711, 'random_strength': 41.779459128046106, 'depth': 5, 'min_data_in_leaf': 15, 'leaf_estimation_iterations': 13, 'bagging_temperature': 0.01690795913152374, 'colsample_bylevel': 0.5919360877481336}. Best is trial 43 with value: 0.9656936754794401.


0:	learn: 24.3276726	test: 25.0492800	best: 25.0492800 (0)	total: 306ms	remaining: 33m 8s
100:	learn: 8.7622922	test: 9.0488476	best: 9.0488476 (100)	total: 30.3s	remaining: 31m 54s
200:	learn: 7.4086215	test: 7.6820136	best: 7.6820136 (200)	total: 1m	remaining: 31m 23s
300:	learn: 6.8395839	test: 7.2014032	best: 7.2014032 (300)	total: 1m 29s	remaining: 30m 37s
400:	learn: 6.5461768	test: 7.0536781	best: 7.0536781 (400)	total: 1m 58s	remaining: 29m 56s
500:	learn: 6.2876233	test: 6.9479911	best: 6.9477497 (498)	total: 2m 27s	remaining: 29m 18s
600:	learn: 6.0964137	test: 6.8884385	best: 6.8884336 (599)	total: 2m 56s	remaining: 28m 50s
700:	learn: 5.9281609	test: 6.8450930	best: 6.8450930 (700)	total: 3m 26s	remaining: 28m 25s
Stopped by overfitting detector  (25 iterations wait)

bestTest = 6.843315323
bestIteration = 703

Shrink model to first 704 iterations.


[I 2024-06-27 01:09:15,617] Trial 48 finished with value: 0.9642453005945273 and parameters: {'iterations': 6491, 'od_wait': 2156, 'learning_rate': 0.030501040555902418, 'reg_lambda': 2.421830916775544, 'subsample': 0.5990230437940431, 'random_strength': 31.339338342419776, 'depth': 5, 'min_data_in_leaf': 15, 'leaf_estimation_iterations': 14, 'bagging_temperature': 0.017497040793714173, 'colsample_bylevel': 0.5830046823943319}. Best is trial 43 with value: 0.9656936754794401.


0:	learn: 24.3016799	test: 25.0642735	best: 25.0642735 (0)	total: 133ms	remaining: 15m 45s
100:	learn: 12.3966946	test: 12.8773943	best: 12.8773943 (100)	total: 13s	remaining: 15m
200:	learn: 11.1601364	test: 11.5774399	best: 11.5774399 (200)	total: 25.6s	remaining: 14m 41s
300:	learn: 10.5351028	test: 10.9228289	best: 10.9228289 (300)	total: 38.4s	remaining: 14m 28s
400:	learn: 10.1513050	test: 10.4943797	best: 10.4943797 (400)	total: 51.1s	remaining: 14m 14s
500:	learn: 9.9005655	test: 10.2153759	best: 10.2153759 (500)	total: 1m 3s	remaining: 13m 57s
600:	learn: 9.7265977	test: 10.0242593	best: 10.0242593 (600)	total: 1m 15s	remaining: 13m 42s
700:	learn: 9.5978496	test: 9.8866156	best: 9.8859314 (699)	total: 1m 28s	remaining: 13m 27s
800:	learn: 9.4967946	test: 9.7776380	best: 9.7776380 (800)	total: 1m 40s	remaining: 13m 13s
900:	learn: 9.4170619	test: 9.6925798	best: 9.6925798 (900)	total: 1m 53s	remaining: 13m
1000:	learn: 9.3533536	test: 9.6241970	best: 9.6241970 (1000)	total: 2m

[I 2024-06-27 01:16:27,572] Trial 49 finished with value: 0.933903897513787 and parameters: {'iterations': 7112, 'od_wait': 1951, 'learning_rate': 0.05968788493640818, 'reg_lambda': 0.19397665467235214, 'subsample': 0.4840591597883353, 'random_strength': 41.24243483693198, 'depth': 1, 'min_data_in_leaf': 17, 'leaf_estimation_iterations': 13, 'bagging_temperature': 0.26861946935998265, 'colsample_bylevel': 0.5422126362824453}. Best is trial 43 with value: 0.9656936754794401.


time: 2h 27min 8s (started: 2024-06-26 22:49:19 +09:00)


In [53]:
print(f"Best trial: {study_catboost.best_trial.value}")
print(f"Best parameters: {study_catboost.best_trial.params}")

Best trial: 0.9656936754794401
Best parameters: {'iterations': 6644, 'od_wait': 1695, 'learning_rate': 0.06237746134009798, 'reg_lambda': 2.6397327403374717, 'subsample': 0.4915506441230588, 'random_strength': 48.24674432037646, 'depth': 4, 'min_data_in_leaf': 3, 'leaf_estimation_iterations': 15, 'bagging_temperature': 0.07131640531121744, 'colsample_bylevel': 0.4261590739334049}
time: 0 ns (started: 2024-06-27 01:56:22 +09:00)


## XGBRegressor

In [54]:
def objective_xgboost(trial):
    xgb_param = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'gamma': trial.suggest_float('gamma', 0, 5),
        'reg_alpha': trial.suggest_float('reg_alpha', 0, 5),
        'reg_lambda': trial.suggest_float('reg_lambda', 0, 5),
    }

    model_xgb = XGBRegressor(**xgb_param, objective='reg:squarederror')
    model_xgb.fit(X_train, y_train, 
                  eval_set=[(X_valid, y_valid)], 
                  early_stopping_rounds=25, 
                  verbose=False)
    
    predictions = model_xgb.predict(X_valid)
    rscore = np.corrcoef(y_valid.squeeze(), predictions.squeeze())[0, 1]
    return rscore

study_xgboost = optuna.create_study(direction='minimize', sampler=TPESampler(seed=42))
study_xgboost.optimize(objective_xgboost, n_trials=50)

print(f"Best trial: {study_xgboost.best_trial.value}")
print(f"Best parameters: {study_xgboost.best_trial.params}")

[I 2024-06-27 01:56:24,274] A new study created in memory with name: no-name-7bdc766c-faaa-4e32-8cba-f9883b1c040a
[I 2024-06-27 01:56:57,721] Trial 0 finished with value: 0.9601316340876372 and parameters: {'n_estimators': 437, 'learning_rate': 0.28570714885887566, 'max_depth': 8, 'min_child_weight': 6, 'subsample': 0.5780093202212182, 'colsample_bytree': 0.5779972601681014, 'gamma': 0.2904180608409973, 'reg_alpha': 4.330880728874676, 'reg_lambda': 3.005575058716044}. Best is trial 0 with value: 0.9601316340876372.
[I 2024-06-27 02:05:26,030] Trial 1 finished with value: 0.9626324292371459 and parameters: {'n_estimators': 737, 'learning_rate': 0.01596950334578271, 'max_depth': 10, 'min_child_weight': 9, 'subsample': 0.6061695553391381, 'colsample_bytree': 0.5909124836035503, 'gamma': 0.9170225492671691, 'reg_alpha': 1.5212112147976886, 'reg_lambda': 2.6237821581611893}. Best is trial 0 with value: 0.9601316340876372.
[I 2024-06-27 02:06:38,255] Trial 2 finished with value: 0.9631823692

Best trial: 0.9570683878417976
Best parameters: {'n_estimators': 934, 'learning_rate': 0.28175665598309774, 'max_depth': 9, 'min_child_weight': 1, 'subsample': 0.9292853154718507, 'colsample_bytree': 0.9424638756724492, 'gamma': 2.0270422174538107, 'reg_alpha': 0.5941655845608484, 'reg_lambda': 2.145028502301874}
time: 1h 14min 1s (started: 2024-06-27 01:56:24 +09:00)


## LGBMRegressor

In [55]:
def objective_lightgbm(trial):
    lgb_param = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'num_leaves': trial.suggest_int('num_leaves', 2, 256),
        'max_depth': trial.suggest_int('max_depth', -1, 15),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 1, 50),
        'feature_fraction': trial.suggest_float('feature_fraction', 0.4, 1.0),
        'bagging_fraction': trial.suggest_float('bagging_fraction', 0.4, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 10),
        'lambda_l1': trial.suggest_float('lambda_l1', 0, 5),
        'lambda_l2': trial.suggest_float('lambda_l2', 0, 5),
    }

    model_lgb = LGBMRegressor(**lgb_param)
    model_lgb.fit(X_train, y_train, 
                  eval_set=[(X_valid, y_valid)])
    
    predictions = model_lgb.predict(X_valid)
    rscore = np.corrcoef(y_valid.squeeze(), predictions.squeeze())[0, 1]
    return rscore

study_lightgbm = optuna.create_study(direction='minimize', sampler=TPESampler(seed=42))
study_lightgbm.optimize(objective_lightgbm, n_trials=50)

print(f"Best trial: {study_lightgbm.best_trial.value}")
print(f"Best parameters: {study_lightgbm.best_trial.params}")

[I 2024-06-27 03:10:25,668] A new study created in memory with name: no-name-0bc88d1a-d383-4e1c-98b1-a3015e175d7f


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.124869 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:11:21,153] Trial 0 finished with value: 0.9586937923877085 and parameters: {'n_estimators': 437, 'learning_rate': 0.28570714885887566, 'num_leaves': 188, 'max_depth': 9, 'min_data_in_leaf': 8, 'feature_fraction': 0.49359671220172163, 'bagging_fraction': 0.4348501673009197, 'bagging_freq': 9, 'lambda_l1': 3.005575058716044, 'lambda_l2': 3.540362888980227}. Best is trial 0 with value: 0.9586937923877085.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.118610 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:11:29,563] Trial 1 finished with value: 0.9555601755416646 and parameters: {'n_estimators': 118, 'learning_rate': 0.29127385712697834, 'num_leaves': 214, 'max_depth': 2, 'min_data_in_leaf': 10, 'feature_fraction': 0.5100427059120604, 'bagging_fraction': 0.5825453457757226, 'bagging_freq': 6, 'lambda_l1': 2.1597250932105787, 'lambda_l2': 1.4561457009902097}. Best is trial 1 with value: 0.9555601755416646.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.128094 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:12:22,598] Trial 2 finished with value: 0.9650059546783808 and parameters: {'n_estimators': 651, 'learning_rate': 0.05045321958909213, 'num_leaves': 76, 'max_depth': 5, 'min_data_in_leaf': 23, 'feature_fraction': 0.8711055768358081, 'bagging_fraction': 0.5198042692950159, 'bagging_freq': 6, 'lambda_l1': 2.9620728443102124, 'lambda_l2': 0.23225206359998862}. Best is trial 1 with value: 0.9555601755416646.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.095389 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:13:06,900] Trial 3 finished with value: 0.963626349167579 and parameters: {'n_estimators': 647, 'learning_rate': 0.059451995869314544, 'num_leaves': 18, 'max_depth': 15, 'min_data_in_leaf': 49, 'feature_fraction': 0.8850384088698766, 'bagging_fraction': 0.5827682615040224, 'bagging_freq': 1, 'lambda_l1': 3.4211651325607844, 'lambda_l2': 2.2007624686980067}. Best is trial 1 with value: 0.9555601755416646.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.099837 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:13:21,037] Trial 4 finished with value: 0.9626287758179616 and parameters: {'n_estimators': 209, 'learning_rate': 0.15360130393226834, 'num_leaves': 10, 'max_depth': 14, 'min_data_in_leaf': 13, 'feature_fraction': 0.7975133706123891, 'bagging_fraction': 0.5870266456536466, 'bagging_freq': 6, 'lambda_l1': 2.7335513967163982, 'lambda_l2': 0.9242722776276352}. Best is trial 1 with value: 0.9555601755416646.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.126924 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:15:17,698] Trial 5 finished with value: 0.9537362064385119 and parameters: {'n_estimators': 973, 'learning_rate': 0.2347885187747232, 'num_leaves': 241, 'max_depth': 14, 'min_data_in_leaf': 30, 'feature_fraction': 0.9531245410138701, 'bagging_fraction': 0.4530955012311517, 'bagging_freq': 2, 'lambda_l1': 0.22613644455269033, 'lambda_l2': 1.6266516538163218}. Best is trial 5 with value: 0.9537362064385119.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.130238 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:15:52,767] Trial 6 finished with value: 0.964579012308308 and parameters: {'n_estimators': 450, 'learning_rate': 0.08869121921442981, 'num_leaves': 213, 'max_depth': 5, 'min_data_in_leaf': 15, 'feature_fraction': 0.7256176498949491, 'bagging_fraction': 0.4845545349848576, 'bagging_freq': 9, 'lambda_l1': 0.3727532183988541, 'lambda_l2': 4.9344346830025865}. Best is trial 5 with value: 0.9537362064385119.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.149333 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:16:29,420] Trial 7 finished with value: 0.9588372063769726 and parameters: {'n_estimators': 795, 'learning_rate': 0.06762754764491, 'num_leaves': 3, 'max_depth': 12, 'min_data_in_leaf': 36, 'feature_fraction': 0.8374043008245924, 'bagging_fraction': 0.8627622080115674, 'bagging_freq': 1, 'lambda_l1': 1.7923286427213632, 'lambda_l2': 0.5793452976256486}. Best is trial 5 with value: 0.9537362064385119.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.089458 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:17:54,660] Trial 8 finished with value: 0.9588871406666686 and parameters: {'n_estimators': 877, 'learning_rate': 0.19075645677999178, 'num_leaves': 86, 'max_depth': 0, 'min_data_in_leaf': 16, 'feature_fraction': 0.5951099932160482, 'bagging_fraction': 0.8377637070028385, 'bagging_freq': 7, 'lambda_l1': 4.436063712881633, 'lambda_l2': 2.3610746258097466}. Best is trial 5 with value: 0.9537362064385119.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.128685 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:18:23,569] Trial 9 finished with value: 0.9628281133562202 and parameters: {'n_estimators': 207, 'learning_rate': 0.21684098829466855, 'num_leaves': 196, 'max_depth': 8, 'min_data_in_leaf': 39, 'feature_fraction': 0.6962773578186345, 'bagging_fraction': 0.7136396976291964, 'bagging_freq': 5, 'lambda_l1': 0.12709563372047594, 'lambda_l2': 0.5394571349665223}. Best is trial 5 with value: 0.9537362064385119.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.133037 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:20:43,334] Trial 10 finished with value: 0.9534201108653217 and parameters: {'n_estimators': 982, 'learning_rate': 0.23138398443540723, 'num_leaves': 249, 'max_depth': 11, 'min_data_in_leaf': 31, 'feature_fraction': 0.9445988771695107, 'bagging_fraction': 0.9758624475672522, 'bagging_freq': 3, 'lambda_l1': 1.0623493620999978, 'lambda_l2': 3.5162929773066094}. Best is trial 10 with value: 0.9534201108653217.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.133301 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:23:03,241] Trial 11 finished with value: 0.9543685030374567 and parameters: {'n_estimators': 997, 'learning_rate': 0.23495832767373295, 'num_leaves': 250, 'max_depth': 11, 'min_data_in_leaf': 30, 'feature_fraction': 0.9758270708358225, 'bagging_fraction': 0.9945701925466705, 'bagging_freq': 3, 'lambda_l1': 1.1152670894803383, 'lambda_l2': 3.546831194002327}. Best is trial 10 with value: 0.9534201108653217.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.124522 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:25:23,033] Trial 12 finished with value: 0.9558356472067234 and parameters: {'n_estimators': 974, 'learning_rate': 0.13348118463794467, 'num_leaves': 256, 'max_depth': 12, 'min_data_in_leaf': 25, 'feature_fraction': 0.9942035548238166, 'bagging_fraction': 0.743197316358561, 'bagging_freq': 3, 'lambda_l1': 1.0821084221822654, 'lambda_l2': 3.3362280523931753}. Best is trial 10 with value: 0.9534201108653217.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.106090 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:27:04,042] Trial 13 finished with value: 0.9566690602669733 and parameters: {'n_estimators': 806, 'learning_rate': 0.24769458589519983, 'num_leaves': 150, 'max_depth': 10, 'min_data_in_leaf': 35, 'feature_fraction': 0.9277418963580313, 'bagging_fraction': 0.9855355977976394, 'bagging_freq': 3, 'lambda_l1': 1.0296197922685757, 'lambda_l2': 4.398069633632563}. Best is trial 10 with value: 0.9534201108653217.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.100250 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:28:40,098] Trial 14 finished with value: 0.9576815271628997 and parameters: {'n_estimators': 872, 'learning_rate': 0.1988561000022668, 'num_leaves': 151, 'max_depth': 14, 'min_data_in_leaf': 42, 'feature_fraction': 0.7954171628853844, 'bagging_fraction': 0.8719767230619992, 'bagging_freq': 4, 'lambda_l1': 0.05494594621885662, 'lambda_l2': 1.5778766934690505}. Best is trial 10 with value: 0.9534201108653217.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.095992 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:29:51,171] Trial 15 finished with value: 0.9612563040332097 and parameters: {'n_estimators': 698, 'learning_rate': 0.259796938829652, 'num_leaves': 234, 'max_depth': 6, 'min_data_in_leaf': 1, 'feature_fraction': 0.4125481398900424, 'bagging_fraction': 0.40420382185668, 'bagging_freq': 2, 'lambda_l1': 1.6399266073423122, 'lambda_l2': 2.9074820460327535}. Best is trial 10 with value: 0.9534201108653217.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.124975 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:31:00,186] Trial 16 finished with value: 0.9593231931896204 and parameters: {'n_estimators': 504, 'learning_rate': 0.1722886089980672, 'num_leaves': 171, 'max_depth': 13, 'min_data_in_leaf': 30, 'feature_fraction': 0.6948718767360637, 'bagging_fraction': 0.7856495827996224, 'bagging_freq': 4, 'lambda_l1': 0.6935956558099475, 'lambda_l2': 4.069968527889159}. Best is trial 10 with value: 0.9534201108653217.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.134273 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:32:53,085] Trial 17 finished with value: 0.9599196663548063 and parameters: {'n_estimators': 906, 'learning_rate': 0.10210824316320499, 'num_leaves': 110, 'max_depth': 10, 'min_data_in_leaf': 20, 'feature_fraction': 0.9415942892292042, 'bagging_fraction': 0.6772081613210963, 'bagging_freq': 2, 'lambda_l1': 1.613422511667511, 'lambda_l2': 1.9619837223083327}. Best is trial 10 with value: 0.9534201108653217.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.093512 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:34:34,199] Trial 18 finished with value: 0.9556297469155438 and parameters: {'n_estimators': 800, 'learning_rate': 0.25859647903071326, 'num_leaves': 231, 'max_depth': 15, 'min_data_in_leaf': 30, 'feature_fraction': 0.7582074332198886, 'bagging_fraction': 0.6677932050700218, 'bagging_freq': 2, 'lambda_l1': 4.834510220511309, 'lambda_l2': 2.8806809909013324}. Best is trial 10 with value: 0.9534201108653217.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.130327 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:36:24,130] Trial 19 finished with value: 0.9586283494416942 and parameters: {'n_estimators': 718, 'learning_rate': 0.22240512507773927, 'num_leaves': 175, 'max_depth': 8, 'min_data_in_leaf': 45, 'feature_fraction': 0.6385903789183353, 'bagging_fraction': 0.9196483355228753, 'bagging_freq': 4, 'lambda_l1': 0.6357192100443614, 'lambda_l2': 1.2447239188761943}. Best is trial 10 with value: 0.9534201108653217.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.123457 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:36:55,402] Trial 20 finished with value: 0.9622027303920552 and parameters: {'n_estimators': 353, 'learning_rate': 0.1295622742845453, 'num_leaves': 49, 'max_depth': 12, 'min_data_in_leaf': 33, 'feature_fraction': 0.9082782978064106, 'bagging_fraction': 0.6289896469586884, 'bagging_freq': 1, 'lambda_l1': 3.6738901466096086, 'lambda_l2': 2.75309458596762}. Best is trial 10 with value: 0.9534201108653217.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.151012 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:39:16,985] Trial 21 finished with value: 0.9533274851081436 and parameters: {'n_estimators': 1000, 'learning_rate': 0.231638098071013, 'num_leaves': 255, 'max_depth': 11, 'min_data_in_leaf': 30, 'feature_fraction': 0.9984818970885843, 'bagging_fraction': 0.9829689616103408, 'bagging_freq': 3, 'lambda_l1': 1.1166172221317385, 'lambda_l2': 3.5890112050732546}. Best is trial 21 with value: 0.9533274851081436.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.126330 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:41:32,606] Trial 22 finished with value: 0.9556057642358953 and parameters: {'n_estimators': 1000, 'learning_rate': 0.19908951195128494, 'num_leaves': 225, 'max_depth': 11, 'min_data_in_leaf': 26, 'feature_fraction': 0.9807453051543618, 'bagging_fraction': 0.9195393351730121, 'bagging_freq': 3, 'lambda_l1': 1.3767584338635126, 'lambda_l2': 4.298511164779878}. Best is trial 21 with value: 0.9533274851081436.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.132260 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:44:50,289] Trial 23 finished with value: 0.9593628307016497 and parameters: {'n_estimators': 916, 'learning_rate': 0.019170280050830713, 'num_leaves': 252, 'max_depth': 14, 'min_data_in_leaf': 39, 'feature_fraction': 0.9996955383024964, 'bagging_fraction': 0.9271888612266154, 'bagging_freq': 5, 'lambda_l1': 0.6302075707097973, 'lambda_l2': 3.8662055676747866}. Best is trial 21 with value: 0.9533274851081436.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.124851 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:47:01,526] Trial 24 finished with value: 0.9549253908940054 and parameters: {'n_estimators': 950, 'learning_rate': 0.26216596758092453, 'num_leaves': 200, 'max_depth': 10, 'min_data_in_leaf': 22, 'feature_fraction': 0.8581922760354985, 'bagging_fraction': 0.7951195177167036, 'bagging_freq': 2, 'lambda_l1': 2.2678633644818413, 'lambda_l2': 3.2035991850432852}. Best is trial 21 with value: 0.9533274851081436.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.151198 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:48:54,494] Trial 25 finished with value: 0.9552249248179521 and parameters: {'n_estimators': 854, 'learning_rate': 0.17917526539382567, 'num_leaves': 237, 'max_depth': 13, 'min_data_in_leaf': 28, 'feature_fraction': 0.9452541389270354, 'bagging_fraction': 0.9542490541189028, 'bagging_freq': 4, 'lambda_l1': 0.8401964690883612, 'lambda_l2': 1.8788937743940255}. Best is trial 21 with value: 0.9533274851081436.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.092092 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:50:24,727] Trial 26 finished with value: 0.9587604231072754 and parameters: {'n_estimators': 741, 'learning_rate': 0.22576609305031428, 'num_leaves': 216, 'max_depth': 7, 'min_data_in_leaf': 19, 'feature_fraction': 0.8224043723951824, 'bagging_fraction': 0.808445773545466, 'bagging_freq': 8, 'lambda_l1': 2.015635768362244, 'lambda_l2': 4.802678480183166}. Best is trial 21 with value: 0.9533274851081436.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.127961 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:51:37,401] Trial 27 finished with value: 0.9626982460955303 and parameters: {'n_estimators': 932, 'learning_rate': 0.2816780642583227, 'num_leaves': 144, 'max_depth': 3, 'min_data_in_leaf': 33, 'feature_fraction': 0.91279573758914, 'bagging_fraction': 0.8899810542252306, 'bagging_freq': 3, 'lambda_l1': 0.3996148156119238, 'lambda_l2': 2.5460899469862337}. Best is trial 21 with value: 0.9533274851081436.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.132636 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:52:58,161] Trial 28 finished with value: 0.9552046228685411 and parameters: {'n_estimators': 585, 'learning_rate': 0.2080681608925039, 'num_leaves': 242, 'max_depth': 9, 'min_data_in_leaf': 38, 'feature_fraction': 0.9544449282142122, 'bagging_fraction': 0.5152406126961597, 'bagging_freq': 5, 'lambda_l1': 1.305221789473105, 'lambda_l2': 3.9202670450357995}. Best is trial 21 with value: 0.9533274851081436.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.100199 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:54:45,266] Trial 29 finished with value: 0.9545424981240852 and parameters: {'n_estimators': 834, 'learning_rate': 0.2805597151725161, 'num_leaves': 182, 'max_depth': 9, 'min_data_in_leaf': 43, 'feature_fraction': 0.8964557963181212, 'bagging_fraction': 0.7473626969067109, 'bagging_freq': 10, 'lambda_l1': 0.36861691385598816, 'lambda_l2': 3.5999349012078357}. Best is trial 21 with value: 0.9533274851081436.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.123188 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:55:40,141] Trial 30 finished with value: 0.9603051721154776 and parameters: {'n_estimators': 358, 'learning_rate': 0.15731363972694978, 'num_leaves': 201, 'max_depth': 13, 'min_data_in_leaf': 26, 'feature_fraction': 0.5759108329841006, 'bagging_fraction': 0.46901184316702005, 'bagging_freq': 2, 'lambda_l1': 2.483272422894323, 'lambda_l2': 3.1525954429580594}. Best is trial 21 with value: 0.9533274851081436.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.123833 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:57:57,821] Trial 31 finished with value: 0.9533641981962337 and parameters: {'n_estimators': 978, 'learning_rate': 0.2384131856054032, 'num_leaves': 254, 'max_depth': 11, 'min_data_in_leaf': 31, 'feature_fraction': 0.9674405008959904, 'bagging_fraction': 0.9961109651032162, 'bagging_freq': 3, 'lambda_l1': 1.19400349751639, 'lambda_l2': 3.561520814647863}. Best is trial 21 with value: 0.9533274851081436.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.134663 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 03:59:31,459] Trial 32 finished with value: 0.9530233096379402 and parameters: {'n_estimators': 932, 'learning_rate': 0.297667374955342, 'num_leaves': 255, 'max_depth': 11, 'min_data_in_leaf': 31, 'feature_fraction': 0.9530883288518838, 'bagging_fraction': 0.9763592817003283, 'bagging_freq': 1, 'lambda_l1': 1.436311888327562, 'lambda_l2': 4.533077366077679}. Best is trial 32 with value: 0.9530233096379402.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.127831 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 04:01:18,002] Trial 33 finished with value: 0.9561980766608796 and parameters: {'n_estimators': 910, 'learning_rate': 0.29053063248134603, 'num_leaves': 214, 'max_depth': 8, 'min_data_in_leaf': 33, 'feature_fraction': 0.8527385302286066, 'bagging_fraction': 0.9569940280912965, 'bagging_freq': 1, 'lambda_l1': 1.8607958095555892, 'lambda_l2': 4.411494947109869}. Best is trial 32 with value: 0.9530233096379402.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.130633 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 04:03:03,482] Trial 34 finished with value: 0.953020075907139 and parameters: {'n_estimators': 768, 'learning_rate': 0.29768778912210975, 'num_leaves': 256, 'max_depth': 11, 'min_data_in_leaf': 24, 'feature_fraction': 0.8929701076355936, 'bagging_fraction': 0.9961897832411183, 'bagging_freq': 4, 'lambda_l1': 1.4020243241192476, 'lambda_l2': 4.662992160957201}. Best is trial 34 with value: 0.953020075907139.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.123712 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 04:04:52,512] Trial 35 finished with value: 0.9558044217405552 and parameters: {'n_estimators': 760, 'learning_rate': 0.273354434764322, 'num_leaves': 220, 'max_depth': 10, 'min_data_in_leaf': 23, 'feature_fraction': 0.8827597930412261, 'bagging_fraction': 0.9438630457680421, 'bagging_freq': 4, 'lambda_l1': 1.4535348768373826, 'lambda_l2': 4.501083047334385}. Best is trial 34 with value: 0.953020075907139.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.128356 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 04:06:16,940] Trial 36 finished with value: 0.9530833499289011 and parameters: {'n_estimators': 640, 'learning_rate': 0.2979438818155126, 'num_leaves': 232, 'max_depth': 11, 'min_data_in_leaf': 19, 'feature_fraction': 0.973795331594105, 'bagging_fraction': 0.8994428615988076, 'bagging_freq': 5, 'lambda_l1': 2.1846201427627436, 'lambda_l2': 4.737520311586423}. Best is trial 34 with value: 0.953020075907139.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.108193 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 04:07:39,116] Trial 37 finished with value: 0.9573322183897415 and parameters: {'n_estimators': 642, 'learning_rate': 0.2997322165612267, 'num_leaves': 239, 'max_depth': 7, 'min_data_in_leaf': 6, 'feature_fraction': 0.8008170251084374, 'bagging_fraction': 0.8970766312199595, 'bagging_freq': 7, 'lambda_l1': 2.2979416456614157, 'lambda_l2': 4.731174217375502}. Best is trial 34 with value: 0.953020075907139.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.100309 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 04:09:05,185] Trial 38 finished with value: 0.9562111034652951 and parameters: {'n_estimators': 576, 'learning_rate': 0.2954604618301775, 'num_leaves': 226, 'max_depth': 9, 'min_data_in_leaf': 11, 'feature_fraction': 0.8858745013802087, 'bagging_fraction': 0.8553801880398232, 'bagging_freq': 6, 'lambda_l1': 2.956173583618517, 'lambda_l2': 4.945743781023701}. Best is trial 34 with value: 0.953020075907139.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.138171 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 04:10:24,397] Trial 39 finished with value: 0.9543925570468113 and parameters: {'n_estimators': 668, 'learning_rate': 0.2741399683650684, 'num_leaves': 204, 'max_depth': 12, 'min_data_in_leaf': 17, 'feature_fraction': 0.9210310335747295, 'bagging_fraction': 0.8318062113582728, 'bagging_freq': 5, 'lambda_l1': 3.287302079932886, 'lambda_l2': 4.108798579000728}. Best is trial 34 with value: 0.953020075907139.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.128500 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 04:11:25,346] Trial 40 finished with value: 0.9615164706964029 and parameters: {'n_estimators': 610, 'learning_rate': 0.2674859820287925, 'num_leaves': 50, 'max_depth': 5, 'min_data_in_leaf': 22, 'feature_fraction': 0.7524259269484609, 'bagging_fraction': 0.9606273114695062, 'bagging_freq': 6, 'lambda_l1': 2.606048564631629, 'lambda_l2': 4.6730546297322855}. Best is trial 34 with value: 0.953020075907139.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.099694 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 04:13:17,781] Trial 41 finished with value: 0.9537193383688891 and parameters: {'n_estimators': 776, 'learning_rate': 0.2474734463578534, 'num_leaves': 254, 'max_depth': 11, 'min_data_in_leaf': 27, 'feature_fraction': 0.9705794622727767, 'bagging_fraction': 0.9902941018551482, 'bagging_freq': 4, 'lambda_l1': 1.980336759437574, 'lambda_l2': 3.7867789760564383}. Best is trial 34 with value: 0.953020075907139.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.128758 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 04:14:17,157] Trial 42 finished with value: 0.9551586212761556 and parameters: {'n_estimators': 521, 'learning_rate': 0.24492420998374542, 'num_leaves': 242, 'max_depth': 12, 'min_data_in_leaf': 24, 'feature_fraction': 0.9968916463442901, 'bagging_fraction': 0.9064986640410386, 'bagging_freq': 1, 'lambda_l1': 1.6841117574817999, 'lambda_l2': 4.193736079731493}. Best is trial 34 with value: 0.953020075907139.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.129583 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 04:15:43,584] Trial 43 finished with value: 0.9533289310514053 and parameters: {'n_estimators': 831, 'learning_rate': 0.2885190529527798, 'num_leaves': 229, 'max_depth': -1, 'min_data_in_leaf': 20, 'feature_fraction': 0.9580162754737885, 'bagging_fraction': 0.9982527232831665, 'bagging_freq': 7, 'lambda_l1': 1.375126184058951, 'lambda_l2': 4.537040985262283}. Best is trial 34 with value: 0.953020075907139.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.126769 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 04:16:34,176] Trial 44 finished with value: 0.9614621133152599 and parameters: {'n_estimators': 853, 'learning_rate': 0.29858557329111046, 'num_leaves': 189, 'max_depth': 2, 'min_data_in_leaf': 19, 'feature_fraction': 0.923174674017935, 'bagging_fraction': 0.9534580230525035, 'bagging_freq': 7, 'lambda_l1': 1.4928778085692613, 'lambda_l2': 4.546283146964443}. Best is trial 34 with value: 0.953020075907139.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.101522 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 04:17:51,020] Trial 45 finished with value: 0.9551884862804846 and parameters: {'n_estimators': 699, 'learning_rate': 0.28586913881504533, 'num_leaves': 227, 'max_depth': -1, 'min_data_in_leaf': 15, 'feature_fraction': 0.8718944697880515, 'bagging_fraction': 0.9987167752770948, 'bagging_freq': 8, 'lambda_l1': 2.1589558748222695, 'lambda_l2': 4.950944377035269}. Best is trial 34 with value: 0.953020075907139.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.132332 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 04:19:04,008] Trial 46 finished with value: 0.9617146154491989 and parameters: {'n_estimators': 883, 'learning_rate': 0.2829575265724725, 'num_leaves': 212, 'max_depth': 4, 'min_data_in_leaf': 21, 'feature_fraction': 0.9614436990705622, 'bagging_fraction': 0.9381081828521838, 'bagging_freq': 8, 'lambda_l1': 1.8410692200376388, 'lambda_l2': 4.532693130650603}. Best is trial 34 with value: 0.953020075907139.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.123304 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 04:19:43,070] Trial 47 finished with value: 0.9338682929160048 and parameters: {'n_estimators': 821, 'learning_rate': 0.25425872786436887, 'num_leaves': 242, 'max_depth': 1, 'min_data_in_leaf': 18, 'feature_fraction': 0.8255270157606998, 'bagging_fraction': 0.9639133209857684, 'bagging_freq': 6, 'lambda_l1': 0.8472350809425995, 'lambda_l2': 4.695939923947451}. Best is trial 47 with value: 0.9338682929160048.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.132253 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 04:20:18,702] Trial 48 finished with value: 0.9338036781556204 and parameters: {'n_estimators': 744, 'learning_rate': 0.2545244786177426, 'num_leaves': 246, 'max_depth': 1, 'min_data_in_leaf': 10, 'feature_fraction': 0.8240406890951261, 'bagging_fraction': 0.8743814446734592, 'bagging_freq': 5, 'lambda_l1': 0.8892036978267366, 'lambda_l2': 4.228305239103268}. Best is trial 48 with value: 0.9338036781556204.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.097536 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2029
[LightGBM] [Info] Number of data points in the train set: 4894519, number of used features: 185
[LightGBM] [Info] Start training from score 99.999040


[I 2024-06-27 04:20:50,175] Trial 49 finished with value: 0.9335688013568963 and parameters: {'n_estimators': 672, 'learning_rate': 0.2540899515936272, 'num_leaves': 112, 'max_depth': 1, 'min_data_in_leaf': 11, 'feature_fraction': 0.817072661897291, 'bagging_fraction': 0.8739444832072618, 'bagging_freq': 6, 'lambda_l1': 0.8367563018412693, 'lambda_l2': 4.998220284984137}. Best is trial 49 with value: 0.9335688013568963.


Best trial: 0.9335688013568963
Best parameters: {'n_estimators': 672, 'learning_rate': 0.2540899515936272, 'num_leaves': 112, 'max_depth': 1, 'min_data_in_leaf': 11, 'feature_fraction': 0.817072661897291, 'bagging_fraction': 0.8739444832072618, 'bagging_freq': 6, 'lambda_l1': 0.8367563018412693, 'lambda_l2': 4.998220284984137}
time: 1h 10min 24s (started: 2024-06-27 03:10:25 +09:00)


# Model Compare

In [61]:
study_catboost.best_params

{'iterations': 6644,
 'od_wait': 1695,
 'learning_rate': 0.06237746134009798,
 'reg_lambda': 2.6397327403374717,
 'subsample': 0.4915506441230588,
 'random_strength': 48.24674432037646,
 'depth': 4,
 'min_data_in_leaf': 3,
 'leaf_estimation_iterations': 15,
 'bagging_temperature': 0.07131640531121744,
 'colsample_bylevel': 0.4261590739334049}

time: 0 ns (started: 2024-06-27 07:21:05 +09:00)


In [45]:
study_catboost_best_params = {'iterations': 6644,
                              'od_wait': 1695,
                              'learning_rate': 0.06237746134009798,
                              'reg_lambda': 2.6397327403374717,
                              'subsample': 0.4915506441230588,
                              'random_strength': 48.24674432037646,
                              'depth': 4,
                              'min_data_in_leaf': 3,
                              'leaf_estimation_iterations': 15,
                              'bagging_temperature': 0.07131640531121744,
                              'colsample_bylevel': 0.4261590739334049}

time: 0 ns (started: 2024-06-27 13:49:54 +09:00)


In [65]:
study_lightgbm.best_params

{'n_estimators': 672,
 'learning_rate': 0.2540899515936272,
 'num_leaves': 112,
 'max_depth': 1,
 'min_data_in_leaf': 11,
 'feature_fraction': 0.817072661897291,
 'bagging_fraction': 0.8739444832072618,
 'bagging_freq': 6,
 'lambda_l1': 0.8367563018412693,
 'lambda_l2': 4.998220284984137}

time: 0 ns (started: 2024-06-27 07:24:09 +09:00)


In [46]:
study_lightgbm_best_params = {'n_estimators': 672,
                              'learning_rate': 0.2540899515936272,
                              'num_leaves': 112,
                              'max_depth': 1,
                              'min_data_in_leaf': 11,
                              'feature_fraction': 0.817072661897291,
                              'bagging_fraction': 0.8739444832072618,
                              'bagging_freq': 6,
                              'lambda_l1': 0.8367563018412693,
                              'lambda_l2': 4.998220284984137}

time: 0 ns (started: 2024-06-27 13:50:12 +09:00)


In [67]:
study_xgboost.best_params

{'n_estimators': 934,
 'learning_rate': 0.28175665598309774,
 'max_depth': 9,
 'min_child_weight': 1,
 'subsample': 0.9292853154718507,
 'colsample_bytree': 0.9424638756724492,
 'gamma': 2.0270422174538107,
 'reg_alpha': 0.5941655845608484,
 'reg_lambda': 2.145028502301874}

time: 0 ns (started: 2024-06-27 12:21:05 +09:00)


In [47]:
study_xgboost_best_params = {'n_estimators': 934,
                             'learning_rate': 0.28175665598309774,
                             'max_depth': 9,
                             'min_child_weight': 1,
                             'subsample': 0.9292853154718507,
                             'colsample_bytree': 0.9424638756724492,
                             'gamma': 2.0270422174538107,
                             'reg_alpha': 0.5941655845608484,
                             'reg_lambda': 2.145028502301874}

time: 0 ns (started: 2024-06-27 13:50:30 +09:00)


## Eval Models

In [77]:
def pearson_corrcoef(y_true, y_pred):
    return np.corrcoef(y_true.squeeze(), y_pred.squeeze())[0, 1]

def evaluate_model(model, X_train, X_valid, y_train, y_valid, early_stopping=True):
    if early_stopping:
        model.fit(X_train, y_train, 
                  eval_set=[(X_valid, y_valid)], 
                  early_stopping_rounds=25, 
                  verbose=False)
    else:
        model.fit(X_train, y_train, 
                  eval_set=[(X_valid, y_valid)])
    
    predictions = model.predict(X_valid)
    rscore = pearson_corrcoef(y_valid, predictions)
    return rscore

catboost_model = CatBoostRegressor(**study_catboost.best_params)
xgboost_model = XGBRegressor(**study_xgboost.best_params)
lightgbm_model = LGBMRegressor(**study_lightgbm.best_params)

catboost_rscore = evaluate_model(catboost_model, X_train, X_valid, y_train, y_valid, early_stopping=True)
xgboost_rscore = evaluate_model(xgboost_model, X_train, X_valid, y_train, y_valid, early_stopping=False)
lightgbm_rscore = evaluate_model(lightgbm_model, X_train, X_valid, y_train, y_valid, early_stopping=False)

print(f"CatBoost Pearson correlation coefficient: {catboost_rscore}")
print(f"XGBoost Pearson correlation coefficient: {xgboost_rscore}")
print(f"LightGBM Pearson correlation coefficient: {lightgbm_rscore}")

[0]	validation_0-rmse:73.86474
[1]	validation_0-rmse:53.20470
[2]	validation_0-rmse:38.45468
[3]	validation_0-rmse:28.00462
[4]	validation_0-rmse:20.74678
[5]	validation_0-rmse:15.81235
[6]	validation_0-rmse:12.50904
[7]	validation_0-rmse:10.39212
[8]	validation_0-rmse:9.14442
[9]	validation_0-rmse:8.41317
[10]	validation_0-rmse:8.06978
[11]	validation_0-rmse:7.85157
[12]	validation_0-rmse:7.73178
[13]	validation_0-rmse:7.66012
[14]	validation_0-rmse:7.63827
[15]	validation_0-rmse:7.61266
[16]	validation_0-rmse:7.59197
[17]	validation_0-rmse:7.58415
[18]	validation_0-rmse:7.56603
[19]	validation_0-rmse:7.57124
[20]	validation_0-rmse:7.56109
[21]	validation_0-rmse:7.55358
[22]	validation_0-rmse:7.54548
[23]	validation_0-rmse:7.53493
[24]	validation_0-rmse:7.51740
[25]	validation_0-rmse:7.54882
[26]	validation_0-rmse:7.53553
[27]	validation_0-rmse:7.52651
[28]	validation_0-rmse:7.51966
[29]	validation_0-rmse:7.52864
[30]	validation_0-rmse:7.52156
[31]	validation_0-rmse:7.51635
[32]	valid

# Test Result

In [49]:
X_test

<2829478x185 sparse matrix of type '<class 'numpy.float64'>'
	with 44550939 stored elements in Compressed Sparse Row format>

time: 0 ns (started: 2024-06-27 13:51:49 +09:00)


X_train과 X_valid를 concat한 후 희소행렬로 만듦

In [None]:
X_final_train = np.concatenate((X_train.todense(), X_valid.todense()), axis=0)
y_final_train = np.concatenate((y_train, y_valid), axis=0)

X_final_train_sparse = csr_matrix(X_final_train)

최종으로 얻은 best_parameter로 각 모델을 학습하고, pkl형태로 저장

In [None]:
# Train the final CatBoost model
final_catboost_model = CatBoostRegressor(**study_catboost_best_params)
final_catboost_model.fit(X_final_train_sparse, y_final_train, verbose=False)
joblib.dump(final_catboost_model, 'final_catboost_model.pkl')

In [None]:
# Train the final XGBoost model
final_xgboost_dmatrix = DMatrix(X_final_train_sparse, label=y_final_train)
final_xgboost_model = XGBRegressor(**study_xgboost_best_params, objective='reg:squarederror')
final_xgboost_model.fit(X_final_train_sparse, y_final_train, verbose=False)
joblib.dump(final_xgboost_model, 'final_xgboost_model.pkl')

In [None]:
# Train the final LightGBM model
final_lightgbm_dataset = Dataset(X_final_train_sparse, label=y_final_train)
final_lightgbm_model = LGBMRegressor(**study_lightgbm_best_params)
final_lightgbm_model.fit(X_final_train_sparse, y_final_train)
joblib.dump(final_lightgbm_model, 'final_lightgbm_model.pkl')

## Final Submission

In [64]:
res = pd.read_csv('../data/electric_test.csv')

res['elect'] = final_catboost_model.predict(X_test)

res.to_csv('240312.csv', index=False)

time: 17.1 s (started: 2024-06-27 15:21:22 +09:00)


최종 예측 간 상관관계 확인

In [71]:
cat_res = final_catboost_model.predict(X_test)
xgb_res = final_xgboost_model.predict(X_test)
lgbm_res = final_lightgbm_model.predict(X_test)

time: 37 s (started: 2024-06-27 15:29:40 +09:00)


In [72]:
cat_res

array([101.19458772,  89.08676444,  79.17274224, ..., 129.30724906,
       119.67231616, 108.86112048])

time: 0 ns (started: 2024-06-27 15:30:17 +09:00)


In [73]:
xgb_res

array([ 99.679184,  85.89056 ,  77.196106, ..., 130.03871 , 121.47312 ,
       109.411736], dtype=float32)

time: 0 ns (started: 2024-06-27 15:30:17 +09:00)


In [74]:
lgbm_res

array([103.49096976,  90.8188584 ,  83.39900934, ..., 134.53283669,
       126.64563799, 110.38179456])

time: 0 ns (started: 2024-06-27 15:30:17 +09:00)


In [79]:
from itertools import combinations

results = [cat_res, xgb_res, lgbm_res]

for res1, res2 in combinations(results, 2):
    print(np.corrcoef(res1, res2)[0, 1])

0.9699433887448536
0.956070214462164
0.9381451263514439
time: 94 ms (started: 2024-06-27 15:32:36 +09:00)
