**About :** Trains XGBoost models.

**TODO**:
- better neg sampling technique ??

In [1]:
cd ../src

/workspace/kaggle_otto_rs/src


In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [4]:
import os
import gc
import cudf
import json
import glob
import numba
import xgboost
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm
from pandarallel import pandarallel
from numerize.numerize import numerize

warnings.simplefilter(action="ignore", category=FutureWarning)
pd.options.display.max_columns = 500
pd.options.display.max_rows = 500
pandarallel.initialize(nb_workers=32, progress_bar=False)

INFO: Pandarallel will run on 32 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


In [5]:
from params import *

from utils.metrics import get_coverage
from utils.plot import plot_importances
from utils.load import *
from utils.logger import save_config, prepare_log_folder, create_logger, init_neptune
from utils.torch import seed_everything

from training.xgb import kfold, optimize

In [6]:
import matplotlib
matplotlib.use('Agg')

### Load

In [7]:
# VERSION = "clicks_cv3-tv5.10"
# VERSION = "cv3-tv5.10"
# VERSION = "cv6-tv6.11"
# TEST_VERSION = "cv3-tv5.10"

VERSION = "cv7-tv5.11"
TEST_VERSION = "cv7-tv5.11"

#### Train data
- neg sampling could use candidates from lower versions

In [8]:
POS_RATIO = 0.2
TARGET = "gt_orders"   # "gt_clicks", "gt_carts", "gt_orders"

In [9]:
PROBS_PATHS = {
    "gt_orders": "../logs/2023-01-14/9/",
    "gt_carts": "../logs/2023-01-14/8/",
}

In [10]:
# if TARGET != "gt_clicks":
#     REGEX = f"../output/features/fts_val_{VERSION}_{TARGET}/*"
# else:

REGEX = f"../output/features/fts_val_{VERSION}/*"
len(glob.glob(REGEX))

39

In [11]:
EXTRA_REGEX = f"../output/features/fts_extra_{VERSION}/*"
len(glob.glob(EXTRA_REGEX))

111

In [12]:
TEST_REGEX = f"../output/features/fts_test_{TEST_VERSION}/*"
len(glob.glob(TEST_REGEX))

101

In [13]:
# filter_dfs(
#     f"../output/features/fts_val_{VERSION}/*",
#     target="gt_carts",
# )

In [14]:
# def load_sessions(regex):
#     dfs = []
#     for idx, chunk_file in enumerate(glob.glob(regex)):
#         df = cudf.read_parquet(chunk_file, columns=["session"])
#         dfs.append(df.drop_duplicates(keep="first"))

#     return cudf.concat(dfs).reset_index(drop=True)

# sessions = load_sessions( f"../output/features/fts_val_{VERSION}/*")

# from sklearn.model_selection import KFold
# K = 4

# kf = KFold(n_splits=K, shuffle=True, random_state=42)
# splits = kf.split(sessions)

# sessions['fold'] = -1
# for i, (_, val_idx) in enumerate(splits):
#     sessions.loc[val_idx, "fold"] = i

# sessions.to_csv(f"../input/folds_{K}.csv", index=False)

In [15]:
# df_train = load_parquets_cudf_folds(
#     f"../output/features/fts_val_{VERSION}/*",
#     "../input/folds_4.csv",
#     fold=0,
#     pos_ratio=0.1,
#     target=TARGET,
#     train_only=True,
#     columns=['session', 'candidates', 'gt_clicks', 'gt_carts', 'gt_orders'],
#     max_n=1,
# )

In [16]:
# df_train = df_train.sort_values('session', ignore_index=True)
# group = df_train[['session', 'candidates']].groupby('session').size().values

In [17]:
FEATURES = [  # REMOVE CORRELATED
    'clicks_popularity_w_pos-log', 'clicks_popularity_w_type-163', 'clicks_popularity_w_lastday', 'clicks_popularity_w_recsys', 
    'carts_popularity_w_pos-log', 'carts_popularity_w_type-163', 'carts_popularity_w_lastday', 'carts_popularity_w_recsys', 
    'orders_popularity_w_pos-log', 'orders_popularity_w_type-163', 'orders_popularity_w_lastday', 'orders_popularity_w_recsys', 
    'clicks_popularity_w_pos-log_w', 'clicks_popularity_w_type-163_w', 'clicks_popularity_w_recsys_w', 
    'carts_popularity_w_pos-log_w', 'carts_popularity_w_type-163_w', 'carts_popularity_w_recsys_w', 
    'orders_popularity_w_pos-log_w', 'orders_popularity_w_type-163_w', 'orders_popularity_w_recsys_w',
    'w_pos-log', 'w_type-163', 'w_lastday', 'w_time', 'w_recsys',
    'matrix_123_temporal_20_mean', 'matrix_123_temporal_20_sum', 'matrix_123_temporal_20_max', 'matrix_123_temporal_20_pos-log_mean', 'matrix_123_temporal_20_pos-log_sum', 'matrix_123_temporal_20_pos-log_max', 'matrix_123_temporal_20_type-163_mean', 'matrix_123_temporal_20_type-163_sum', 'matrix_123_temporal_20_type-163_max', 'matrix_123_temporal_20_lastday_mean', 'matrix_123_temporal_20_lastday_sum', 'matrix_123_temporal_20_lastday_max', 'matrix_123_temporal_20_time_mean', 'matrix_123_temporal_20_time_sum', 'matrix_123_temporal_20_time_max', 'matrix_123_temporal_20_recsys_mean', 'matrix_123_temporal_20_recsys_sum', 'matrix_123_temporal_20_recsys_max',
    'matrix_123_type136_20_mean', 'matrix_123_type136_20_sum', 'matrix_123_type136_20_max', 'matrix_123_type136_20_pos-log_mean', 'matrix_123_type136_20_pos-log_sum', 'matrix_123_type136_20_pos-log_max', 'matrix_123_type136_20_type-163_mean', 'matrix_123_type136_20_type-163_sum', 'matrix_123_type136_20_type-163_max', 'matrix_123_type136_20_lastday_mean', 'matrix_123_type136_20_lastday_sum', 'matrix_123_type136_20_lastday_max', 'matrix_123_type136_20_time_mean', 'matrix_123_type136_20_time_sum', 'matrix_123_type136_20_time_max', 'matrix_123_type136_20_recsys_mean', 'matrix_123_type136_20_recsys_sum', 'matrix_123_type136_20_recsys_max',
    'matrix_12__20_mean', 'matrix_12__20_sum', 'matrix_12__20_max', 'matrix_12__20_pos-log_mean', 'matrix_12__20_pos-log_sum', 'matrix_12__20_pos-log_max', 'matrix_12__20_type-163_mean', 'matrix_12__20_type-163_sum', 'matrix_12__20_type-163_max', 'matrix_12__20_lastday_mean', 'matrix_12__20_lastday_sum', 'matrix_12__20_lastday_max', 'matrix_12__20_time_mean', 'matrix_12__20_time_sum', 'matrix_12__20_time_max', 'matrix_12__20_recsys_mean', 'matrix_12__20_recsys_sum', 'matrix_12__20_recsys_max',
    'matrix_123_type0.590.5_20_mean', 'matrix_123_type0.590.5_20_sum', 'matrix_123_type0.590.5_20_max', 'matrix_123_type0.590.5_20_pos-log_mean', 'matrix_123_type0.590.5_20_pos-log_sum', 'matrix_123_type0.590.5_20_pos-log_max', 'matrix_123_type0.590.5_20_type-163_mean', 'matrix_123_type0.590.5_20_type-163_sum', 'matrix_123_type0.590.5_20_type-163_max', 'matrix_123_type0.590.5_20_lastday_mean', 'matrix_123_type0.590.5_20_lastday_sum', 'matrix_123_type0.590.5_20_lastday_max', 'matrix_123_type0.590.5_20_time_mean', 'matrix_123_type0.590.5_20_time_sum', 'matrix_123_type0.590.5_20_time_max', 'matrix_123_type0.590.5_20_recsys_mean', 'matrix_123_type0.590.5_20_recsys_sum', 'matrix_123_type0.590.5_20_recsys_max',
    'matrix_cpu-90_mean', 'matrix_cpu-90_sum', 'matrix_cpu-90_max', 'matrix_cpu-90_pos-log_mean', 'matrix_cpu-90_pos-log_sum', 'matrix_cpu-90_pos-log_max', 'matrix_cpu-90_type-163_mean', 'matrix_cpu-90_type-163_sum', 'matrix_cpu-90_type-163_max', 'matrix_cpu-90_lastday_mean', 'matrix_cpu-90_lastday_sum', 'matrix_cpu-90_lastday_max', 'matrix_cpu-90_time_mean', 'matrix_cpu-90_time_sum', 'matrix_cpu-90_time_max', 'matrix_cpu-90_recsys_mean', 'matrix_cpu-90_recsys_sum', 'matrix_cpu-90_recsys_max',
    'matrix_cpu-95_mean', 'matrix_cpu-95_sum', 'matrix_cpu-95_max', 'matrix_cpu-95_pos-log_mean', 'matrix_cpu-95_pos-log_sum', 'matrix_cpu-95_pos-log_max', 'matrix_cpu-95_type-163_mean', 'matrix_cpu-95_type-163_sum', 'matrix_cpu-95_type-163_max', 'matrix_cpu-95_lastday_mean', 'matrix_cpu-95_lastday_sum', 'matrix_cpu-95_lastday_max', 'matrix_cpu-95_time_mean', 'matrix_cpu-95_time_sum', 'matrix_cpu-95_time_max', 'matrix_cpu-95_recsys_mean', 'matrix_cpu-95_recsys_sum', 'matrix_cpu-95_recsys_max',
    'matrix_cpu-99_mean', 'matrix_cpu-99_sum', 'matrix_cpu-99_max', 'matrix_cpu-99_pos-log_mean', 'matrix_cpu-99_pos-log_sum', 'matrix_cpu-99_pos-log_max', 'matrix_cpu-99_type-163_mean', 'matrix_cpu-99_type-163_sum', 'matrix_cpu-99_type-163_max', 'matrix_cpu-99_lastday_mean', 'matrix_cpu-99_lastday_sum', 'matrix_cpu-99_lastday_max', 'matrix_cpu-99_time_mean', 'matrix_cpu-99_time_sum', 'matrix_cpu-99_time_max', 'matrix_cpu-99_recsys_mean', 'matrix_cpu-99_recsys_sum', 'matrix_cpu-99_recsys_max',
    'matrix_gpu-116_mean', 'matrix_gpu-116_sum', 'matrix_gpu-116_max', 'matrix_gpu-116_pos-log_mean', 'matrix_gpu-116_pos-log_sum', 'matrix_gpu-116_pos-log_max', 'matrix_gpu-116_type-163_mean', 'matrix_gpu-116_type-163_sum', 'matrix_gpu-116_type-163_max', 'matrix_gpu-116_lastday_mean', 'matrix_gpu-116_lastday_sum', 'matrix_gpu-116_lastday_max', 'matrix_gpu-116_time_mean', 'matrix_gpu-116_time_sum', 'matrix_gpu-116_time_max', 'matrix_gpu-116_recsys_mean', 'matrix_gpu-116_recsys_sum', 'matrix_gpu-116_recsys_max',
    'matrix_gpu-115_mean', 'matrix_gpu-115_sum', 'matrix_gpu-115_max', 'matrix_gpu-115_pos-log_mean', 'matrix_gpu-115_pos-log_sum', 'matrix_gpu-115_pos-log_max', 'matrix_gpu-115_type-163_mean', 'matrix_gpu-115_type-163_sum', 'matrix_gpu-115_type-163_max', 'matrix_gpu-115_lastday_mean', 'matrix_gpu-115_lastday_sum', 'matrix_gpu-115_lastday_max', 'matrix_gpu-115_time_mean', 'matrix_gpu-115_time_sum', 'matrix_gpu-115_time_max', 'matrix_gpu-115_recsys_mean', 'matrix_gpu-115_recsys_sum', 'matrix_gpu-115_recsys_max',
    'matrix_gpu-93_mean', 'matrix_gpu-93_sum', 'matrix_gpu-93_max', 'matrix_gpu-93_pos-log_mean', 'matrix_gpu-93_pos-log_sum', 'matrix_gpu-93_pos-log_max', 'matrix_gpu-93_type-163_mean', 'matrix_gpu-93_type-163_sum', 'matrix_gpu-93_type-163_max', 'matrix_gpu-93_lastday_mean', 'matrix_gpu-93_lastday_sum', 'matrix_gpu-93_lastday_max', 'matrix_gpu-93_time_mean', 'matrix_gpu-93_time_sum', 'matrix_gpu-93_time_max', 'matrix_gpu-93_recsys_mean', 'matrix_gpu-93_recsys_sum', 'matrix_gpu-93_recsys_max',
    'matrix_gpu-217_mean', 'matrix_gpu-217_sum', 'matrix_gpu-217_max', 'matrix_gpu-217_pos-log_mean', 'matrix_gpu-217_pos-log_sum', 'matrix_gpu-217_pos-log_max', 'matrix_gpu-217_type-163_mean', 'matrix_gpu-217_type-163_sum', 'matrix_gpu-217_type-163_max', 'matrix_gpu-217_lastday_mean', 'matrix_gpu-217_lastday_sum', 'matrix_gpu-217_lastday_max', 'matrix_gpu-217_time_mean', 'matrix_gpu-217_time_sum', 'matrix_gpu-217_time_max', 'matrix_gpu-217_recsys_mean', 'matrix_gpu-217_recsys_sum', 'matrix_gpu-217_recsys_max',
    'matrix_gpu-226_mean','matrix_gpu-226_sum','matrix_gpu-226_max','matrix_gpu-226_pos-log_mean','matrix_gpu-226_pos-log_sum','matrix_gpu-226_pos-log_max','matrix_gpu-226_type-163_mean','matrix_gpu-226_type-163_sum','matrix_gpu-226_type-163_max','matrix_gpu-226_lastday_mean','matrix_gpu-226_lastday_sum','matrix_gpu-226_lastday_max','matrix_gpu-226_time_mean','matrix_gpu-226_time_sum','matrix_gpu-226_time_max','matrix_gpu-226_recsys_mean','matrix_gpu-226_recsys_sum','matrix_gpu-226_recsys_max',
    'matrix_gpu-232_mean', 'matrix_gpu-232_sum', 'matrix_gpu-232_max', 'matrix_gpu-232_pos-log_mean', 'matrix_gpu-232_pos-log_sum', 'matrix_gpu-232_pos-log_max', 'matrix_gpu-232_type-163_mean', 'matrix_gpu-232_type-163_sum', 'matrix_gpu-232_type-163_max', 'matrix_gpu-232_lastday_mean', 'matrix_gpu-232_lastday_sum', 'matrix_gpu-232_lastday_max', 'matrix_gpu-232_time_mean', 'matrix_gpu-232_time_sum', 'matrix_gpu-232_time_max', 'matrix_gpu-232_recsys_mean', 'matrix_gpu-232_recsys_sum', 'matrix_gpu-232_recsys_max',
    'matrix_gpu-239_mean', 'matrix_gpu-239_sum', 'matrix_gpu-239_max', 'matrix_gpu-239_pos-log_mean', 'matrix_gpu-239_pos-log_sum', 'matrix_gpu-239_pos-log_max', 'matrix_gpu-239_type-163_mean', 'matrix_gpu-239_type-163_sum', 'matrix_gpu-239_type-163_max', 'matrix_gpu-239_lastday_mean', 'matrix_gpu-239_lastday_sum', 'matrix_gpu-239_lastday_max', 'matrix_gpu-239_time_mean', 'matrix_gpu-239_time_sum', 'matrix_gpu-239_time_max', 'matrix_gpu-239_recsys_mean', 'matrix_gpu-239_recsys_sum', 'matrix_gpu-239_recsys_max',
    'matrix_gpu-700_mean', 'matrix_gpu-700_sum', 'matrix_gpu-700_max', 'matrix_gpu-700_pos-log_mean', 'matrix_gpu-700_pos-log_sum', 'matrix_gpu-700_pos-log_max', 'matrix_gpu-700_type-163_mean', 'matrix_gpu-700_type-163_sum', 'matrix_gpu-700_type-163_max', 'matrix_gpu-700_lastday_mean', 'matrix_gpu-700_lastday_sum', 'matrix_gpu-700_lastday_max', 'matrix_gpu-700_time_mean', 'matrix_gpu-700_time_sum', 'matrix_gpu-700_time_max', 'matrix_gpu-700_recsys_mean', 'matrix_gpu-700_recsys_sum', 'matrix_gpu-700_recsys_max',
    'matrix_gpu-701_mean', 'matrix_gpu-701_sum', 'matrix_gpu-701_max', 'matrix_gpu-701_pos-log_mean', 'matrix_gpu-701_pos-log_sum', 'matrix_gpu-701_pos-log_max', 'matrix_gpu-701_type-163_mean', 'matrix_gpu-701_type-163_sum', 'matrix_gpu-701_type-163_max', 'matrix_gpu-701_lastday_mean', 'matrix_gpu-701_lastday_sum', 'matrix_gpu-701_lastday_max', 'matrix_gpu-701_time_mean', 'matrix_gpu-701_time_sum', 'matrix_gpu-701_time_max', 'matrix_gpu-701_recsys_mean', 'matrix_gpu-701_recsys_sum', 'matrix_gpu-701_recsys_max',
    'candidate_clicks_before', 'candidate_carts_before', 'candidate_orders_before', 'candidate_*_before', 'n_views', 'n_clicks', 'n_carts', 'n_orders',
    'clicks_popularity_w_pos-log_rank', 'clicks_popularity_w_type-163_rank', 'clicks_popularity_w_lastday_rank', 'clicks_popularity_w_time_rank', 'clicks_popularity_w_recsys_rank', 'carts_popularity_w_pos-log_rank', 'carts_popularity_w_type-163_rank', 'carts_popularity_w_lastday_rank', 'carts_popularity_w_time_rank', 'carts_popularity_w_recsys_rank', 'orders_popularity_w_pos-log_rank', 'orders_popularity_w_type-163_rank', 'orders_popularity_w_lastday_rank', 'orders_popularity_w_time_rank', 'orders_popularity_w_recsys_rank',
    'clicks_popularity_w_pos-log_w_rank', 'clicks_popularity_w_type-163_w_rank', 'clicks_popularity_w_lastday_w_rank', 'clicks_popularity_w_time_w_rank', 'clicks_popularity_w_recsys_w_rank', 'carts_popularity_w_pos-log_w_rank', 'carts_popularity_w_type-163_w_rank', 'carts_popularity_w_lastday_w_rank', 'carts_popularity_w_time_w_rank', 'carts_popularity_w_recsys_w_rank', 'orders_popularity_w_pos-log_w_rank', 'orders_popularity_w_type-163_w_rank', 'orders_popularity_w_lastday_w_rank', 'orders_popularity_w_time_w_rank', 'orders_popularity_w_recsys_w_rank',
    'w_pos-log_rank', 'w_type-163_rank', 'w_lastday_rank', 'w_time_rank', 'w_recsys_rank',
    'matrix_123_temporal_20_mean_rank', 'matrix_123_temporal_20_pos-log_mean_rank', 'matrix_123_temporal_20_type-163_mean_rank', 'matrix_123_temporal_20_lastday_mean_rank', 'matrix_123_temporal_20_time_mean_rank', 'matrix_123_temporal_20_recsys_mean_rank', 'matrix_123_type136_20_mean_rank', 'matrix_123_type136_20_pos-log_mean_rank', 'matrix_123_type136_20_type-163_mean_rank', 'matrix_123_type136_20_lastday_mean_rank', 'matrix_123_type136_20_time_mean_rank', 'matrix_123_type136_20_recsys_mean_rank', 
    'matrix_12__20_mean_rank', 'matrix_12__20_pos-log_mean_rank', 'matrix_12__20_type-163_mean_rank', 'matrix_12__20_lastday_mean_rank', 'matrix_12__20_time_mean_rank', 'matrix_12__20_recsys_mean_rank', 'matrix_123_type0.590.5_20_mean_rank', 'matrix_123_type0.590.5_20_pos-log_mean_rank', 'matrix_123_type0.590.5_20_type-163_mean_rank', 'matrix_123_type0.590.5_20_lastday_mean_rank', 'matrix_123_type0.590.5_20_time_mean_rank', 'matrix_123_type0.590.5_20_recsys_mean_rank',
    'matrix_cpu-90_mean_rank', 'matrix_cpu-90_pos-log_mean_rank', 'matrix_cpu-90_type-163_mean_rank', 'matrix_cpu-90_lastday_mean_rank', 'matrix_cpu-90_time_mean_rank', 'matrix_cpu-90_recsys_mean_rank', 'matrix_cpu-95_mean_rank', 'matrix_cpu-95_pos-log_mean_rank', 'matrix_cpu-95_type-163_mean_rank', 'matrix_cpu-95_lastday_mean_rank', 'matrix_cpu-95_time_mean_rank', 'matrix_cpu-95_recsys_mean_rank', 'matrix_cpu-99_mean_rank', 'matrix_cpu-99_pos-log_mean_rank', 'matrix_cpu-99_type-163_mean_rank', 'matrix_cpu-99_lastday_mean_rank', 'matrix_cpu-99_time_mean_rank', 'matrix_cpu-99_recsys_mean_rank',
    'matrix_gpu-116_mean_rank', 'matrix_gpu-116_pos-log_mean_rank', 'matrix_gpu-116_type-163_mean_rank', 'matrix_gpu-116_lastday_mean_rank', 'matrix_gpu-116_time_mean_rank', 'matrix_gpu-116_recsys_mean_rank', 'matrix_gpu-115_mean_rank', 'matrix_gpu-115_pos-log_mean_rank', 'matrix_gpu-115_type-163_mean_rank', 'matrix_gpu-115_lastday_mean_rank', 'matrix_gpu-115_time_mean_rank', 'matrix_gpu-115_recsys_mean_rank', 'matrix_gpu-93_mean_rank', 'matrix_gpu-93_pos-log_mean_rank', 'matrix_gpu-93_type-163_mean_rank', 'matrix_gpu-93_lastday_mean_rank', 'matrix_gpu-93_time_mean_rank', 'matrix_gpu-93_recsys_mean_rank',
    'matrix_gpu-217_mean_rank', 'matrix_gpu-217_pos-log_mean_rank', 'matrix_gpu-217_type-163_mean_rank', 'matrix_gpu-217_lastday_mean_rank', 'matrix_gpu-217_time_mean_rank', 'matrix_gpu-217_recsys_mean_rank', 'matrix_gpu-226_mean_rank', 'matrix_gpu-226_pos-log_mean_rank', 'matrix_gpu-226_type-163_mean_rank', 'matrix_gpu-226_lastday_mean_rank', 'matrix_gpu-226_time_mean_rank', 'matrix_gpu-226_recsys_mean_rank', 'matrix_gpu-232_mean_rank', 'matrix_gpu-232_pos-log_mean_rank', 'matrix_gpu-232_type-163_mean_rank', 'matrix_gpu-232_lastday_mean_rank', 'matrix_gpu-232_time_mean_rank', 'matrix_gpu-232_recsys_mean_rank',
    'matrix_gpu-239_mean_rank', 'matrix_gpu-239_pos-log_mean_rank', 'matrix_gpu-239_type-163_mean_rank', 'matrix_gpu-239_lastday_mean_rank', 'matrix_gpu-239_time_mean_rank', 'matrix_gpu-239_recsys_mean_rank', 'matrix_gpu-700_mean_rank', 'matrix_gpu-700_pos-log_mean_rank', 'matrix_gpu-700_type-163_mean_rank', 'matrix_gpu-700_lastday_mean_rank', 'matrix_gpu-700_time_mean_rank', 'matrix_gpu-700_recsys_mean_rank', 'matrix_gpu-701_mean_rank', 'matrix_gpu-701_pos-log_mean_rank', 'matrix_gpu-701_type-163_mean_rank', 'matrix_gpu-701_lastday_mean_rank', 'matrix_gpu-701_time_mean_rank', 'matrix_gpu-701_recsys_mean_rank',
]

In [18]:
FEATURES += [
    'popularity_week_clicks','popularity_day_clicks','popularity_hour_clicks','popularity_hour/day_clicks','popularity_day/week_clicks','popularity_week_carts','popularity_day_carts','popularity_hour_carts','popularity_hour/day_carts','popularity_day/week_carts','popularity_week_orders','popularity_day_orders','popularity_hour_orders','popularity_hour/day_orders','popularity_day/week_orders',
    'embed_1-9_64_cartbuy_last_0', 'embed_1-9_64_cartbuy_last_1', 'embed_1-9_64_cartbuy_last_2', 'embed_1-9_64_cartbuy_last_3', 'embed_1-9_64_cartbuy_last_4', 'embed_1-9_64_cartbuy_pos-log_mean', 'embed_1-9_64_cartbuy_pos-log_sum', 'embed_1-9_64_cartbuy_pos-log_max', 'embed_1-9_64_cartbuy_type-163_mean', 'embed_1-9_64_cartbuy_type-163_sum', 'embed_1-9_64_cartbuy_type-163_max', 'embed_1-9_64_cartbuy_lastday_mean', 'embed_1-9_64_cartbuy_lastday_sum', 'embed_1-9_64_cartbuy_lastday_max', 'embed_1-9_64_cartbuy_time_mean', 'embed_1-9_64_cartbuy_time_sum', 'embed_1-9_64_cartbuy_time_max', 'embed_1-9_64_cartbuy_recsys_mean', 'embed_1-9_64_cartbuy_recsys_sum', 'embed_1-9_64_cartbuy_recsys_max',
    'embed_1_64_last_0', 'embed_1_64_last_1', 'embed_1_64_last_2', 'embed_1_64_last_3', 'embed_1_64_last_4', 'embed_1_64_pos-log_mean', 'embed_1_64_pos-log_sum', 'embed_1_64_pos-log_max', 'embed_1_64_type-163_mean', 'embed_1_64_type-163_sum', 'embed_1_64_type-163_max', 'embed_1_64_lastday_mean', 'embed_1_64_lastday_sum', 'embed_1_64_lastday_max', 'embed_1_64_time_mean', 'embed_1_64_time_sum', 'embed_1_64_time_max', 'embed_1_64_recsys_mean', 'embed_1_64_recsys_sum', 'embed_1_64_recsys_max',
    'embed_1-5_64_last_0', 'embed_1-5_64_last_1', 'embed_1-5_64_last_2', 'embed_1-5_64_last_3', 'embed_1-5_64_last_4', 'embed_1-5_64_pos-log_mean', 'embed_1-5_64_pos-log_sum', 'embed_1-5_64_pos-log_max', 'embed_1-5_64_type-163_mean', 'embed_1-5_64_type-163_sum', 'embed_1-5_64_type-163_max', 'embed_1-5_64_lastday_mean', 'embed_1-5_64_lastday_sum', 'embed_1-5_64_lastday_max', 'embed_1-5_64_time_mean', 'embed_1-5_64_time_sum', 'embed_1-5_64_time_max', 'embed_1-5_64_recsys_mean', 'embed_1-5_64_recsys_sum', 'embed_1-5_64_recsys_max',
]


In [19]:
FEATURES += [
    'popularity_week_clicks_rank', 'popularity_day_clicks_rank', 'popularity_hour_clicks_rank', 'popularity_hour/day_clicks_rank', 'popularity_day/week_clicks_rank', 'popularity_week_carts_rank', 'popularity_day_carts_rank', 'popularity_hour_carts_rank', 'popularity_hour/day_carts_rank', 'popularity_day/week_carts_rank', 'popularity_week_orders_rank', 'popularity_day_orders_rank', 'popularity_hour_orders_rank', 'popularity_hour/day_orders_rank', 'popularity_day/week_orders_rank',
    'embed_1-9_64_cartbuy_last_0_rank', 'embed_1-9_64_cartbuy_last_1_rank', 'embed_1-9_64_cartbuy_last_2_rank', 'embed_1-9_64_cartbuy_last_3_rank', 'embed_1-9_64_cartbuy_last_4_rank', 'embed_1-9_64_cartbuy_pos-log_mean_rank', 'embed_1-9_64_cartbuy_type-163_mean_rank', 'embed_1-9_64_cartbuy_lastday_mean_rank', 'embed_1-9_64_cartbuy_time_mean_rank', 'embed_1-9_64_cartbuy_recsys_mean_rank', 'embed_1_64_last_0_rank', 'embed_1_64_last_1_rank', 'embed_1_64_last_2_rank', 'embed_1_64_last_3_rank', 'embed_1_64_last_4_rank', 'embed_1_64_pos-log_mean_rank', 'embed_1_64_type-163_mean_rank', 'embed_1_64_lastday_mean_rank', 'embed_1_64_time_mean_rank', 'embed_1_64_recsys_mean_rank', 'embed_1-5_64_last_0_rank', 'embed_1-5_64_last_1_rank', 'embed_1-5_64_last_2_rank', 'embed_1-5_64_last_3_rank', 'embed_1-5_64_last_4_rank', 'embed_1-5_64_pos-log_mean_rank', 'embed_1-5_64_type-163_mean_rank', 'embed_1-5_64_lastday_mean_rank', 'embed_1-5_64_time_mean_rank', 'embed_1-5_64_recsys_mean_rank'
]

In [20]:
FEATURES += [
    'w2v_sim_1', 'w2v_sim_2', 'w2v_sim_3', 'w2v_sim_wgt_1', 'w2v_sim_wgt_2', 'w2v_sim_last', 'w2v_sim_type_1', 'w2v_sim_1_rank', 'w2v_sim_2_rank', 'w2v_sim_3_rank', 'w2v_sim_wgt_1_rank', 'w2v_sim_wgt_2_rank', 'w2v_sim_last_rank', 'w2v_sim_type_1_rank',
    'matrix_gpu-155_mean', 'matrix_gpu-155_sum', 'matrix_gpu-155_max', 'matrix_gpu-155_pos-log_mean', 'matrix_gpu-155_pos-log_sum', 'matrix_gpu-155_pos-log_max', 'matrix_gpu-155_type-163_mean', 'matrix_gpu-155_type-163_sum', 'matrix_gpu-155_type-163_max', 'matrix_gpu-155_lastday_mean', 'matrix_gpu-155_lastday_sum', 'matrix_gpu-155_lastday_max', 'matrix_gpu-155_time_mean', 'matrix_gpu-155_time_sum', 'matrix_gpu-155_time_max', 'matrix_gpu-155_recsys_mean', 'matrix_gpu-155_recsys_sum', 'matrix_gpu-155_recsys_max',
    'matrix_gpu-157_mean', 'matrix_gpu-157_sum', 'matrix_gpu-157_max', 'matrix_gpu-157_pos-log_mean', 'matrix_gpu-157_pos-log_sum', 'matrix_gpu-157_pos-log_max', 'matrix_gpu-157_type-163_mean', 'matrix_gpu-157_type-163_sum', 'matrix_gpu-157_type-163_max', 'matrix_gpu-157_lastday_mean', 'matrix_gpu-157_lastday_sum', 'matrix_gpu-157_lastday_max', 'matrix_gpu-157_time_mean', 'matrix_gpu-157_time_sum', 'matrix_gpu-157_time_max', 'matrix_gpu-157_recsys_mean', 'matrix_gpu-157_recsys_sum', 'matrix_gpu-157_recsys_max',
    'matrix_gpu-155_mean_rank', 'matrix_gpu-155_pos-log_mean_rank', 'matrix_gpu-155_type-163_mean_rank', 'matrix_gpu-155_lastday_mean_rank', 'matrix_gpu-155_time_mean_rank', 'matrix_gpu-155_recsys_mean_rank', 'matrix_gpu-157_mean_rank', 'matrix_gpu-157_pos-log_mean_rank', 'matrix_gpu-157_type-163_mean_rank', 'matrix_gpu-157_lastday_mean_rank', 'matrix_gpu-157_time_mean_rank', 'matrix_gpu-157_recsys_mean_rank', 
]

In [21]:
# df_train = load_parquets_cudf_folds(
#     f"../output/features/fts_val_{VERSION}/*",
#     "../input/folds_4.csv",
#     fold=0,
#     pos_ratio=POS_RATIO,
#     target=TARGET,
#     max_n=1,
#     train_only=True,
# )
# [c for c in df_train.columns if c not in FEATURES ]

In [22]:
TO_REMOVE = []
TO_REMOVE += [f for f in FEATURES if "popularity_w_time" in f]
TO_REMOVE += [f for f in FEATURES if "popularity_w_lastday_w" in f]

FEATURES = [f for f in FEATURES if f not in TO_REMOVE]

In [23]:
len(FEATURES)

626

In [24]:
# df_train = cudf.from_pandas(df_train)
# corr = df_train[FEATURES].corr()
# corr = corr.to_pandas()
# corr = corr.values

# mask = np.zeros_like(corr, dtype=bool)
# mask[np.triu_indices_from(mask)] = True
# corr[mask] = 0

In [25]:
# TH = 0.99

# for i in range(len(corr)):
#     for j in range(len(corr)):
#         if corr[i, j] > TH:
#             if FEATURES[i] in TO_REMOVE or FEATURES[j] in TO_REMOVE:
#                 continue
#             print(FEATURES[i], FEATURES[j], f'{corr[i, j] :.3f}')

In [26]:
# df = cudf.read_parquet(glob.glob(REGEX)[0])
# df = df.rename(columns={"clicks_popularity_w_pos-log_rank" : "clicks_popularity_w_pos-log_rank_ref"})
# df = cudf.read_parquet(glob.glob(TEST_REGEX)[0])

# from data.fe import add_rank_feature
# for c in ['clicks_popularity_w_pos-log', 'clicks_popularity_w_type-163', 'clicks_popularity_w_lastday']:
#     if c + "_rank" not in df.columns:
#         print(f'Add rank ft for {c}')
#         df = df.reset_index(drop=True)
#         add_rank_feature(df, c)
# (df['clicks_popularity_w_pos-log_rank'] == df['clicks_popularity_w_pos-log_rank_ref']).all()

# for f in tqdm(glob.glob(TEST_REGEX)):
#     dft = cudf.read_parquet(f, columns=['clicks_popularity_w_pos-log_rank'])

### Params

In [27]:
PARAMS = {
    "xgb":
    {
        "learning_rate": 0.01,
        'max_depth': 8,
        "subsample": 0.9,  # 0.7 / 0.8 / O.9
        'colsample_bytree': 0.7,  # 0.7 / 0.8 / 0.9
        'reg_alpha': 0.01,
        'reg_lambda': 0.1,
        "min_child_weight": 0,
#         "gamma": 0.01,
        'scale_pos_weight': 1,
        'eval_metric': 'auc',
        'objective': 'binary:logistic',  # 'binary:logistic',
        'tree_method':'gpu_hist',
        'predictor':'gpu_predictor',
        "random_state": 42,
    },
}

In [28]:
TO_REMOVE = [
    'popularity_week_orders', 'candidate_*_before', 'popularity_week_carts', 'popularity_week_clicks_rank', 'popularity_week_orders_rank', 'popularity_week_clicks', 'popularity_week_carts_rank', 'matrix_gpu-226_lastday_max', 'matrix_cpu-90_lastday_sum', 'matrix_cpu-90_lastday_max', 
    'matrix_12__20_lastday_max', 'matrix_gpu-700_sum', 'matrix_123_type136_20_lastday_max', 'matrix_12__20_time_sum', 'matrix_gpu-226_lastday_sum', 'matrix_123_type136_20_lastday_sum', 'matrix_12__20_lastday_sum', 'matrix_gpu-217_lastday_max', 'matrix_cpu-90_time_sum', 'matrix_cpu-90_max', 
    'matrix_gpu-217_lastday_sum', 'matrix_cpu-90_type-163_sum', 'matrix_123_type136_20_time_sum', 'matrix_12__20_type-163_sum', 'matrix_12__20_pos-log_sum',
    'matrix_cpu-90_lastday_mean', 'matrix_gpu-226_max', 'matrix_cpu-90_pos-log_sum', 'popularity_hour_orders', 'matrix_12__20_pos-log_max', 'matrix_gpu-226_time_sum', 'matrix_12__20_time_mean', 'matrix_cpu-90_pos-log_max', 'matrix_cpu-99_time_sum', 'matrix_123_type0.590.5_20_time_sum', 
    'matrix_123_type136_20_type-163_max', 'matrix_gpu-701_lastday_max', 'matrix_gpu-217_sum', 'matrix_gpu-226_sum', 'matrix_123_type136_20_time_mean', 'matrix_123_type136_20_type-163_sum', 'matrix_cpu-99_lastday_sum', 'matrix_123_type0.590.5_20_lastday_max', 'matrix_cpu-90_type-163_max', 
    'matrix_gpu-226_type-163_sum', 'matrix_12__20_sum', 'matrix_gpu-217_time_sum', 'popularity_hour/day_orders', 'matrix_12__20_type-163_max', 'matrix_123_type136_20_pos-log_sum', 'matrix_gpu-700_lastday_max', 'matrix_123_type136_20_lastday_mean', 'matrix_gpu-700_max', 'matrix_123_type136_20_time_max',
    'matrix_cpu-90_time_max', 'matrix_cpu-90_sum', 'matrix_12__20_lastday_mean', 'matrix_gpu-226_pos-log_sum', 'matrix_12__20_max',
    'matrix_cpu-99_lastday_max', 'matrix_cpu-95_lastday_max', 'matrix_cpu-90_time_mean', 'matrix_gpu-217_pos-log_sum', 'matrix_12__20_type-163_mean', 'matrix_123_temporal_20_time_sum', 'matrix_cpu-90_pos-log_mean', 'matrix_123_type136_20_pos-log_max', 'matrix_123_type136_20_sum', 
    'matrix_gpu-226_lastday_mean', 'matrix_gpu-701_time_sum', 'matrix_gpu-700_pos-log_sum', 'matrix_cpu-95_lastday_sum', 'matrix_12__20_time_max', 'matrix_gpu-217_type-163_max', 'matrix_cpu-95_time_mean', 'matrix_123_type0.590.5_20_lastday_sum', 'matrix_123_type0.590.5_20_type-163_max', 
    'matrix_gpu-226_time_mean', 'matrix_gpu-701_lastday_sum', 'matrix_123_temporal_20_lastday_max', 'matrix_123_type0.590.5_20_pos-log_sum', 'matrix_cpu-95_time_sum', 'matrix_123_type0.590.5_20_type-163_sum', 'matrix_cpu-99_lastday_mean', 'matrix_cpu-99_time_mean', 'matrix_12__20_pos-log_mean',
    'matrix_gpu-217_time_mean', 'matrix_cpu-95_lastday_mean', 'matrix_gpu-226_type-163_max', 'matrix_123_temporal_20_lastday_sum', 'matrix_gpu-217_type-163_sum',
    'matrix_123_type0.590.5_20_type-163_mean', 'matrix_gpu-226_pos-log_max', 'matrix_123_type136_20_max', 'matrix_gpu-226_time_max', 'matrix_gpu-217_lastday_mean', 'matrix_cpu-99_type-163_sum', 'matrix_cpu-99_pos-log_sum', 'matrix_gpu-700_type-163_max', 'matrix_gpu-217_time_max'
][:100]

In [29]:
class Config:
    seed = 42
    version = VERSION
    
    folds_file = "../input/folds_4.csv"
    k = 4
    mode = ""

    features = FEATURES
    features = [ft for ft in features if ft not in TO_REMOVE]

    cat_features = []

    target = TARGET
    pos_ratio = POS_RATIO

    use_gt_sessions = True  # filter out sessions with no gt
    use_gt_pos = False  # add candidates from gt
    gt_regex = ""
    
    model = "xgb"

    params = PARAMS[model]

    use_es = True
    num_boost_round = 10000
    
    probs_file = None  # PROBS_PATHS[target]
    probs_mode = ""  # "head"  "rank_40"
    restrict_all = False

    selected_folds = [0, 1, 2, 3]
    folds_optimize = [0, 1, 2, 3]
    n_trials = 25

    pca_components = 0
    
    use_extra = True
    extra_regex = EXTRA_REGEX
    extra_prop = 0.5

### Main

In [30]:
DEBUG = False
DEBUG_MORE = False

In [None]:
%%time

log_folder = None
run = None
if not DEBUG:
    log_folder = prepare_log_folder(LOG_PATH)
    run = init_neptune(Config, log_folder)
    print(f'Logging results to {log_folder}')
    create_logger(directory=log_folder, name="logs.txt")

    save_config(Config, log_folder + 'config')

ft_imp = kfold(REGEX, TEST_REGEX, Config, log_folder=log_folder, debug=DEBUG_MORE, run=run)

if run is not None:
    run.stop()


https://app.neptune.ai/KagglingTheo/Otto-Recommender-System/e/OTTO-78
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.

Logging results to ../logs/2023-01-24/17/


Using 502662 extra samples
Train / val session inter 0

-------------  Optimizing XGB Model  -------------


    -> 1.51M training candidates
    -> 3.25M validation candidates



[32m[I 2023-01-24 22:23:43,660][0m A new study created in memory with name: no-name-b40597d8-405f-4a91-96d1-b7d9d730067a[0m


[0]	val-auc:0.95365
[100]	val-auc:0.95891
[200]	val-auc:0.96067
[300]	val-auc:0.96233
[400]	val-auc:0.96403
[500]	val-auc:0.96533
[600]	val-auc:0.96617
[700]	val-auc:0.96677
[800]	val-auc:0.96717
[900]	val-auc:0.96746
[1000]	val-auc:0.96768
[1100]	val-auc:0.96784
[1200]	val-auc:0.96797
[1300]	val-auc:0.96809
[1400]	val-auc:0.96818
[1500]	val-auc:0.96827
[1600]	val-auc:0.96835
[1700]	val-auc:0.96842
[1800]	val-auc:0.96848
[1900]	val-auc:0.96853
[2000]	val-auc:0.96858
[2100]	val-auc:0.96863
[2200]	val-auc:0.96867
[2300]	val-auc:0.96872
[2400]	val-auc:0.96876
[2500]	val-auc:0.96879
[2600]	val-auc:0.96882
[2700]	val-auc:0.96884
[2800]	val-auc:0.96888
[2900]	val-auc:0.96889
[3000]	val-auc:0.96892
[3100]	val-auc:0.96893
[3200]	val-auc:0.96896
[3300]	val-auc:0.96897
[3400]	val-auc:0.96899
[3500]	val-auc:0.96901
[3600]	val-auc:0.96903
[3700]	val-auc:0.96905
[3800]	val-auc:0.96907
[3900]	val-auc:0.96909
[4000]	val-auc:0.96910
[4100]	val-auc:0.96911
[4200]	val-auc:0.96913
[4300]	val-auc:0.96914


[32m[I 2023-01-24 22:32:19,208][0m Trial 0 finished with value: 0.6684145747326855 and parameters: {'max_depth': 7, 'subsample': 0.9135116765200668, 'colsample_bytree': 0.8157391931616342, 'reg_alpha': 2.335440282172311, 'reg_lambda': 7.507458315050695}. Best is trial 0 with value: 0.6684145747326855.[0m


[0]	val-auc:0.95621
[100]	val-auc:0.96083
[200]	val-auc:0.96234
[300]	val-auc:0.96394
[400]	val-auc:0.96546
[500]	val-auc:0.96655
[600]	val-auc:0.96715
[700]	val-auc:0.96762
[800]	val-auc:0.96792
[900]	val-auc:0.96813
[1000]	val-auc:0.96782
[1069]	val-auc:0.96809

-> gt_orders  -  Recall : 0.6675

Params : {'max_depth': 9, 'subsample': '0.682', 'colsample_bytree': '0.946', 'reg_alpha': '3.33e+00', 'reg_lambda': '1.20e-01'},



[32m[I 2023-01-24 22:35:35,746][0m Trial 1 finished with value: 0.6675171158234826 and parameters: {'max_depth': 9, 'subsample': 0.6817518928328737, 'colsample_bytree': 0.9461715717906725, 'reg_alpha': 3.329101900723337, 'reg_lambda': 0.11973841648755788}. Best is trial 0 with value: 0.6684145747326855.[0m


[0]	val-auc:0.95081
[100]	val-auc:0.95775
[200]	val-auc:0.95954
[300]	val-auc:0.96128
[400]	val-auc:0.96307
[500]	val-auc:0.96447
[600]	val-auc:0.96544
[700]	val-auc:0.96611
[800]	val-auc:0.96654
[900]	val-auc:0.96687
[1000]	val-auc:0.96711
[1100]	val-auc:0.96729
[1200]	val-auc:0.96746
[1300]	val-auc:0.96761
[1400]	val-auc:0.96773
[1500]	val-auc:0.96784
[1600]	val-auc:0.96793
[1700]	val-auc:0.96801
[1800]	val-auc:0.96808
[1900]	val-auc:0.96815
[2000]	val-auc:0.96821
[2100]	val-auc:0.96826
[2200]	val-auc:0.96832
[2300]	val-auc:0.96836
[2400]	val-auc:0.96841
[2500]	val-auc:0.96845
[2600]	val-auc:0.96849
[2700]	val-auc:0.96853
[2800]	val-auc:0.96857
[2900]	val-auc:0.96860
[3000]	val-auc:0.96863
[3100]	val-auc:0.96865
[3200]	val-auc:0.96869
[3300]	val-auc:0.96871
[3400]	val-auc:0.96874
[3500]	val-auc:0.96876
[3600]	val-auc:0.96879
[3700]	val-auc:0.96880
[3800]	val-auc:0.96882
[3900]	val-auc:0.96884
[4000]	val-auc:0.96886
[4100]	val-auc:0.96888
[4200]	val-auc:0.96891
[4300]	val-auc:0.96892


[32m[I 2023-01-24 22:43:27,239][0m Trial 2 finished with value: 0.6683248288417651 and parameters: {'max_depth': 6, 'subsample': 0.7967339704036409, 'colsample_bytree': 0.6147018482475863, 'reg_alpha': 0.17040677345344987, 'reg_lambda': 11.139988370624714}. Best is trial 0 with value: 0.6684145747326855.[0m


[0]	val-auc:0.94856
[100]	val-auc:0.95789
[200]	val-auc:0.95998
[300]	val-auc:0.96200
[400]	val-auc:0.96402
[500]	val-auc:0.96541
[600]	val-auc:0.96628
[700]	val-auc:0.96686
[800]	val-auc:0.96726
[900]	val-auc:0.96754
[1000]	val-auc:0.96775
[1100]	val-auc:0.96792
[1200]	val-auc:0.96807
[1300]	val-auc:0.96818
[1400]	val-auc:0.96829
[1500]	val-auc:0.96837
[1600]	val-auc:0.96845
[1700]	val-auc:0.96852
[1800]	val-auc:0.96859
[1900]	val-auc:0.96864
[2000]	val-auc:0.96868
[2100]	val-auc:0.96874
[2200]	val-auc:0.96878
[2300]	val-auc:0.96882
[2400]	val-auc:0.96884
[2500]	val-auc:0.96887
[2600]	val-auc:0.96890
[2700]	val-auc:0.96893
[2800]	val-auc:0.96895
[2900]	val-auc:0.96897
[3000]	val-auc:0.96900
[3100]	val-auc:0.96901
[3200]	val-auc:0.96904
[3300]	val-auc:0.96905
[3400]	val-auc:0.96908
[3500]	val-auc:0.96909
[3600]	val-auc:0.96910
[3700]	val-auc:0.96911
[3800]	val-auc:0.96913
[3900]	val-auc:0.96915
[4000]	val-auc:0.96916
[4100]	val-auc:0.96917
[4200]	val-auc:0.96917
[4252]	val-auc:0.96917


[32m[I 2023-01-24 22:51:15,350][0m Trial 3 finished with value: 0.6683376496833252 and parameters: {'max_depth': 8, 'subsample': 0.6178608248312815, 'colsample_bytree': 0.7957195338178975, 'reg_alpha': 1.3657453701902824, 'reg_lambda': 43.89921642587502}. Best is trial 0 with value: 0.6684145747326855.[0m


[0]	val-auc:0.95384
[100]	val-auc:0.95929
[200]	val-auc:0.96122
[300]	val-auc:0.96305
[400]	val-auc:0.96483
[500]	val-auc:0.96602
[600]	val-auc:0.96682
[700]	val-auc:0.96737
[800]	val-auc:0.96774
[900]	val-auc:0.96799
[1000]	val-auc:0.96819
[1100]	val-auc:0.96834
[1200]	val-auc:0.96848
[1300]	val-auc:0.96858
[1400]	val-auc:0.96866
[1500]	val-auc:0.96874
[1600]	val-auc:0.96881
[1700]	val-auc:0.96888
[1800]	val-auc:0.96893
[1900]	val-auc:0.96898
[2000]	val-auc:0.96901
[2100]	val-auc:0.96905
[2200]	val-auc:0.96909
[2300]	val-auc:0.96911
[2400]	val-auc:0.96913
[2500]	val-auc:0.96915
[2600]	val-auc:0.96917
[2700]	val-auc:0.96919
[2800]	val-auc:0.96921
[2900]	val-auc:0.96922
[3000]	val-auc:0.96923
[3100]	val-auc:0.96923
[3200]	val-auc:0.96924
[3300]	val-auc:0.96925
[3400]	val-auc:0.96926
[3500]	val-auc:0.96927
[3600]	val-auc:0.96928
[3700]	val-auc:0.96928
[3800]	val-auc:0.96930
[3900]	val-auc:0.96931
[4000]	val-auc:0.96931
[4038]	val-auc:0.96931

-> gt_orders  -  Recall : 0.6689

Params : {'

[32m[I 2023-01-24 23:00:31,762][0m Trial 4 finished with value: 0.6688633041872869 and parameters: {'max_depth': 9, 'subsample': 0.8069592352473326, 'colsample_bytree': 0.848912304036441, 'reg_alpha': 9.146857627498331, 'reg_lambda': 8.761877967368239}. Best is trial 4 with value: 0.6688633041872869.[0m


[0]	val-auc:0.95069
[100]	val-auc:0.95768
[200]	val-auc:0.95959
[300]	val-auc:0.96139
[400]	val-auc:0.96315
[500]	val-auc:0.96455
[600]	val-auc:0.96552
[700]	val-auc:0.96617
[800]	val-auc:0.96661
[900]	val-auc:0.96692
[1000]	val-auc:0.96717
[1100]	val-auc:0.96737
[1200]	val-auc:0.96753
[1300]	val-auc:0.96766
[1400]	val-auc:0.96777
[1500]	val-auc:0.96787
[1600]	val-auc:0.96794
[1700]	val-auc:0.96801
[1800]	val-auc:0.96808
[1900]	val-auc:0.96812
[2000]	val-auc:0.96814
[2100]	val-auc:0.96819
[2200]	val-auc:0.96820
[2300]	val-auc:0.96826
[2400]	val-auc:0.96828
[2500]	val-auc:0.96831
[2600]	val-auc:0.96835
[2700]	val-auc:0.96839
[2800]	val-auc:0.96844
[2900]	val-auc:0.96844
[3000]	val-auc:0.96848
[3100]	val-auc:0.96851
[3200]	val-auc:0.96854
[3300]	val-auc:0.96857
[3400]	val-auc:0.96860
[3500]	val-auc:0.96862
[3600]	val-auc:0.96865
[3700]	val-auc:0.96868
[3800]	val-auc:0.96869
[3900]	val-auc:0.96872
[4000]	val-auc:0.96874
[4100]	val-auc:0.96874
[4200]	val-auc:0.96876
[4300]	val-auc:0.96877


[32m[I 2023-01-24 23:06:07,326][0m Trial 5 finished with value: 0.6678504577040437 and parameters: {'max_depth': 6, 'subsample': 0.644496570747001, 'colsample_bytree': 0.6993001314396556, 'reg_alpha': 0.07693481461303038, 'reg_lambda': 2.444602346736312}. Best is trial 4 with value: 0.6688633041872869.[0m


[0]	val-auc:0.95317
[100]	val-auc:0.95948
[200]	val-auc:0.96094
[300]	val-auc:0.96255
[400]	val-auc:0.96412
[500]	val-auc:0.96538
[600]	val-auc:0.96624
[700]	val-auc:0.96681


Done