**About :** Trains XGBoost models.

**TODO**:
- better neg sampling technique ??

In [1]:
cd ../src

/workspace/kaggle_otto_rs/src


In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [4]:
import os
import gc
import cudf
import json
import glob
import numba
import xgboost
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm
from pandarallel import pandarallel
from numerize.numerize import numerize

warnings.simplefilter(action="ignore", category=FutureWarning)
pd.options.display.max_columns = 500
pd.options.display.max_rows = 500
pandarallel.initialize(nb_workers=32, progress_bar=False)

INFO: Pandarallel will run on 32 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


In [6]:
from params import *

from utils.metrics import get_coverage
from utils.plot import plot_importances
from utils.load import *
from utils.logger import save_config, prepare_log_folder, create_logger, init_neptune
from utils.torch import seed_everything

from training.xgb import kfold, optimize

In [7]:
import matplotlib
matplotlib.use('Agg')

### Load

In [8]:
# VERSION = "clicks_cv3-tv5.10"
VERSION = "cv3-tv5.10"

GT_VERSION = "gt.7"

#### Train data
- neg sampling could use candidates from lower versions

In [9]:
POS_RATIO = 0.
TARGET = "gt_orders"   # "gt_clicks", "gt_carts", "gt_orders"

In [10]:
PROBS_PATHS = {
    "gt_orders": "../logs/2023-01-14/9/",
    "gt_carts": "../logs/2023-01-14/8/",
}

In [11]:
# filter_dfs(
#     f"../output/features/fts_val_{VERSION}/*",
#     target="gt_carts",
# )

In [12]:
# def load_sessions(regex):
#     dfs = []
#     for idx, chunk_file in enumerate(glob.glob(regex)):
#         df = cudf.read_parquet(chunk_file, columns=["session"])
#         dfs.append(df.drop_duplicates(keep="first"))

#     return cudf.concat(dfs).reset_index(drop=True)

# sessions = load_sessions( f"../output/features/fts_val_{VERSION}/*")

# from sklearn.model_selection import KFold
# K = 4

# kf = KFold(n_splits=K, shuffle=True, random_state=42)
# splits = kf.split(sessions)

# sessions['fold'] = -1
# for i, (_, val_idx) in enumerate(splits):
#     sessions.loc[val_idx, "fold"] = i

# sessions.to_csv(f"../input/folds_{K}.csv", index=False)

In [13]:
# df_train = load_parquets_cudf_folds(
#     f"../output/features/fts_val_{VERSION}/*",
#     "../input/folds_4.csv",
#     fold=0,
#     pos_ratio=POS_RATIO,
#     target=TARGET,
#     max_n=1,
#     train_only=True,
#     probs_file=PROBS_PATHS[TARGET]
# )

In [14]:
# df_train = load_parquets_cudf_folds(
#     f"../output/features/fts_val_{VERSION}/*",
#     "../input/folds_4.csv",
#     fold=0,
#     pos_ratio=0.1,
#     target=TARGET,
#     train_only=True,
#     columns=['session', 'candidates', 'gt_clicks', 'gt_carts', 'gt_orders'],
#     max_n=1,
# )

In [15]:
# df_train = df_train.sort_values('session', ignore_index=True)
# group = df_train[['session', 'candidates']].groupby('session').size().values

In [16]:
FEATURES = [  # REMOVE CORRELATED
    'clicks_popularity_w_pos-log', 'clicks_popularity_w_type-163', 'clicks_popularity_w_lastday', 'clicks_popularity_w_recsys', 
    'carts_popularity_w_pos-log', 'carts_popularity_w_type-163', 'carts_popularity_w_lastday', 'carts_popularity_w_recsys', 
    'orders_popularity_w_pos-log', 'orders_popularity_w_type-163', 'orders_popularity_w_lastday', 'orders_popularity_w_recsys', 
    'clicks_popularity_w_pos-log_w', 'clicks_popularity_w_type-163_w', 'clicks_popularity_w_recsys_w', 
    'carts_popularity_w_pos-log_w', 'carts_popularity_w_type-163_w', 'carts_popularity_w_recsys_w', 
    'orders_popularity_w_pos-log_w', 'orders_popularity_w_type-163_w', 'orders_popularity_w_recsys_w',
    'w_pos-log', 'w_type-163', 'w_lastday', 'w_time', 'w_recsys',
    'matrix_123_temporal_20_mean', 'matrix_123_temporal_20_sum', 'matrix_123_temporal_20_max', 'matrix_123_temporal_20_pos-log_mean', 'matrix_123_temporal_20_pos-log_sum', 'matrix_123_temporal_20_pos-log_max', 'matrix_123_temporal_20_type-163_mean', 'matrix_123_temporal_20_type-163_sum', 'matrix_123_temporal_20_type-163_max', 'matrix_123_temporal_20_lastday_mean', 'matrix_123_temporal_20_lastday_sum', 'matrix_123_temporal_20_lastday_max', 'matrix_123_temporal_20_time_mean', 'matrix_123_temporal_20_time_sum', 'matrix_123_temporal_20_time_max', 'matrix_123_temporal_20_recsys_mean', 'matrix_123_temporal_20_recsys_sum', 'matrix_123_temporal_20_recsys_max',
    'matrix_123_type136_20_mean', 'matrix_123_type136_20_sum', 'matrix_123_type136_20_max', 'matrix_123_type136_20_pos-log_mean', 'matrix_123_type136_20_pos-log_sum', 'matrix_123_type136_20_pos-log_max', 'matrix_123_type136_20_type-163_mean', 'matrix_123_type136_20_type-163_sum', 'matrix_123_type136_20_type-163_max', 'matrix_123_type136_20_lastday_mean', 'matrix_123_type136_20_lastday_sum', 'matrix_123_type136_20_lastday_max', 'matrix_123_type136_20_time_mean', 'matrix_123_type136_20_time_sum', 'matrix_123_type136_20_time_max', 'matrix_123_type136_20_recsys_mean', 'matrix_123_type136_20_recsys_sum', 'matrix_123_type136_20_recsys_max',
    'matrix_12__20_mean', 'matrix_12__20_sum', 'matrix_12__20_max', 'matrix_12__20_pos-log_mean', 'matrix_12__20_pos-log_sum', 'matrix_12__20_pos-log_max', 'matrix_12__20_type-163_mean', 'matrix_12__20_type-163_sum', 'matrix_12__20_type-163_max', 'matrix_12__20_lastday_mean', 'matrix_12__20_lastday_sum', 'matrix_12__20_lastday_max', 'matrix_12__20_time_mean', 'matrix_12__20_time_sum', 'matrix_12__20_time_max', 'matrix_12__20_recsys_mean', 'matrix_12__20_recsys_sum', 'matrix_12__20_recsys_max',
    'matrix_123_type0.590.5_20_mean', 'matrix_123_type0.590.5_20_sum', 'matrix_123_type0.590.5_20_max', 'matrix_123_type0.590.5_20_pos-log_mean', 'matrix_123_type0.590.5_20_pos-log_sum', 'matrix_123_type0.590.5_20_pos-log_max', 'matrix_123_type0.590.5_20_type-163_mean', 'matrix_123_type0.590.5_20_type-163_sum', 'matrix_123_type0.590.5_20_type-163_max', 'matrix_123_type0.590.5_20_lastday_mean', 'matrix_123_type0.590.5_20_lastday_sum', 'matrix_123_type0.590.5_20_lastday_max', 'matrix_123_type0.590.5_20_time_mean', 'matrix_123_type0.590.5_20_time_sum', 'matrix_123_type0.590.5_20_time_max', 'matrix_123_type0.590.5_20_recsys_mean', 'matrix_123_type0.590.5_20_recsys_sum', 'matrix_123_type0.590.5_20_recsys_max',
    'matrix_cpu-90_mean', 'matrix_cpu-90_sum', 'matrix_cpu-90_max', 'matrix_cpu-90_pos-log_mean', 'matrix_cpu-90_pos-log_sum', 'matrix_cpu-90_pos-log_max', 'matrix_cpu-90_type-163_mean', 'matrix_cpu-90_type-163_sum', 'matrix_cpu-90_type-163_max', 'matrix_cpu-90_lastday_mean', 'matrix_cpu-90_lastday_sum', 'matrix_cpu-90_lastday_max', 'matrix_cpu-90_time_mean', 'matrix_cpu-90_time_sum', 'matrix_cpu-90_time_max', 'matrix_cpu-90_recsys_mean', 'matrix_cpu-90_recsys_sum', 'matrix_cpu-90_recsys_max',
    'matrix_cpu-95_mean', 'matrix_cpu-95_sum', 'matrix_cpu-95_max', 'matrix_cpu-95_pos-log_mean', 'matrix_cpu-95_pos-log_sum', 'matrix_cpu-95_pos-log_max', 'matrix_cpu-95_type-163_mean', 'matrix_cpu-95_type-163_sum', 'matrix_cpu-95_type-163_max', 'matrix_cpu-95_lastday_mean', 'matrix_cpu-95_lastday_sum', 'matrix_cpu-95_lastday_max', 'matrix_cpu-95_time_mean', 'matrix_cpu-95_time_sum', 'matrix_cpu-95_time_max', 'matrix_cpu-95_recsys_mean', 'matrix_cpu-95_recsys_sum', 'matrix_cpu-95_recsys_max',
    'matrix_cpu-99_mean', 'matrix_cpu-99_sum', 'matrix_cpu-99_max', 'matrix_cpu-99_pos-log_mean', 'matrix_cpu-99_pos-log_sum', 'matrix_cpu-99_pos-log_max', 'matrix_cpu-99_type-163_mean', 'matrix_cpu-99_type-163_sum', 'matrix_cpu-99_type-163_max', 'matrix_cpu-99_lastday_mean', 'matrix_cpu-99_lastday_sum', 'matrix_cpu-99_lastday_max', 'matrix_cpu-99_time_mean', 'matrix_cpu-99_time_sum', 'matrix_cpu-99_time_max', 'matrix_cpu-99_recsys_mean', 'matrix_cpu-99_recsys_sum', 'matrix_cpu-99_recsys_max',
    'matrix_gpu-116_mean', 'matrix_gpu-116_sum', 'matrix_gpu-116_max', 'matrix_gpu-116_pos-log_mean', 'matrix_gpu-116_pos-log_sum', 'matrix_gpu-116_pos-log_max', 'matrix_gpu-116_type-163_mean', 'matrix_gpu-116_type-163_sum', 'matrix_gpu-116_type-163_max', 'matrix_gpu-116_lastday_mean', 'matrix_gpu-116_lastday_sum', 'matrix_gpu-116_lastday_max', 'matrix_gpu-116_time_mean', 'matrix_gpu-116_time_sum', 'matrix_gpu-116_time_max', 'matrix_gpu-116_recsys_mean', 'matrix_gpu-116_recsys_sum', 'matrix_gpu-116_recsys_max',
    'matrix_gpu-115_mean', 'matrix_gpu-115_sum', 'matrix_gpu-115_max', 'matrix_gpu-115_pos-log_mean', 'matrix_gpu-115_pos-log_sum', 'matrix_gpu-115_pos-log_max', 'matrix_gpu-115_type-163_mean', 'matrix_gpu-115_type-163_sum', 'matrix_gpu-115_type-163_max', 'matrix_gpu-115_lastday_mean', 'matrix_gpu-115_lastday_sum', 'matrix_gpu-115_lastday_max', 'matrix_gpu-115_time_mean', 'matrix_gpu-115_time_sum', 'matrix_gpu-115_time_max', 'matrix_gpu-115_recsys_mean', 'matrix_gpu-115_recsys_sum', 'matrix_gpu-115_recsys_max',
    'matrix_gpu-93_mean', 'matrix_gpu-93_sum', 'matrix_gpu-93_max', 'matrix_gpu-93_pos-log_mean', 'matrix_gpu-93_pos-log_sum', 'matrix_gpu-93_pos-log_max', 'matrix_gpu-93_type-163_mean', 'matrix_gpu-93_type-163_sum', 'matrix_gpu-93_type-163_max', 'matrix_gpu-93_lastday_mean', 'matrix_gpu-93_lastday_sum', 'matrix_gpu-93_lastday_max', 'matrix_gpu-93_time_mean', 'matrix_gpu-93_time_sum', 'matrix_gpu-93_time_max', 'matrix_gpu-93_recsys_mean', 'matrix_gpu-93_recsys_sum', 'matrix_gpu-93_recsys_max',
    'matrix_gpu-217_mean', 'matrix_gpu-217_sum', 'matrix_gpu-217_max', 'matrix_gpu-217_pos-log_mean', 'matrix_gpu-217_pos-log_sum', 'matrix_gpu-217_pos-log_max', 'matrix_gpu-217_type-163_mean', 'matrix_gpu-217_type-163_sum', 'matrix_gpu-217_type-163_max', 'matrix_gpu-217_lastday_mean', 'matrix_gpu-217_lastday_sum', 'matrix_gpu-217_lastday_max', 'matrix_gpu-217_time_mean', 'matrix_gpu-217_time_sum', 'matrix_gpu-217_time_max', 'matrix_gpu-217_recsys_mean', 'matrix_gpu-217_recsys_sum', 'matrix_gpu-217_recsys_max',
    'matrix_gpu-226_mean','matrix_gpu-226_sum','matrix_gpu-226_max','matrix_gpu-226_pos-log_mean','matrix_gpu-226_pos-log_sum','matrix_gpu-226_pos-log_max','matrix_gpu-226_type-163_mean','matrix_gpu-226_type-163_sum','matrix_gpu-226_type-163_max','matrix_gpu-226_lastday_mean','matrix_gpu-226_lastday_sum','matrix_gpu-226_lastday_max','matrix_gpu-226_time_mean','matrix_gpu-226_time_sum','matrix_gpu-226_time_max','matrix_gpu-226_recsys_mean','matrix_gpu-226_recsys_sum','matrix_gpu-226_recsys_max',
    'matrix_gpu-232_mean', 'matrix_gpu-232_sum', 'matrix_gpu-232_max', 'matrix_gpu-232_pos-log_mean', 'matrix_gpu-232_pos-log_sum', 'matrix_gpu-232_pos-log_max', 'matrix_gpu-232_type-163_mean', 'matrix_gpu-232_type-163_sum', 'matrix_gpu-232_type-163_max', 'matrix_gpu-232_lastday_mean', 'matrix_gpu-232_lastday_sum', 'matrix_gpu-232_lastday_max', 'matrix_gpu-232_time_mean', 'matrix_gpu-232_time_sum', 'matrix_gpu-232_time_max', 'matrix_gpu-232_recsys_mean', 'matrix_gpu-232_recsys_sum', 'matrix_gpu-232_recsys_max',
    'matrix_gpu-239_mean', 'matrix_gpu-239_sum', 'matrix_gpu-239_max', 'matrix_gpu-239_pos-log_mean', 'matrix_gpu-239_pos-log_sum', 'matrix_gpu-239_pos-log_max', 'matrix_gpu-239_type-163_mean', 'matrix_gpu-239_type-163_sum', 'matrix_gpu-239_type-163_max', 'matrix_gpu-239_lastday_mean', 'matrix_gpu-239_lastday_sum', 'matrix_gpu-239_lastday_max', 'matrix_gpu-239_time_mean', 'matrix_gpu-239_time_sum', 'matrix_gpu-239_time_max', 'matrix_gpu-239_recsys_mean', 'matrix_gpu-239_recsys_sum', 'matrix_gpu-239_recsys_max',
    'matrix_gpu-700_mean', 'matrix_gpu-700_sum', 'matrix_gpu-700_max', 'matrix_gpu-700_pos-log_mean', 'matrix_gpu-700_pos-log_sum', 'matrix_gpu-700_pos-log_max', 'matrix_gpu-700_type-163_mean', 'matrix_gpu-700_type-163_sum', 'matrix_gpu-700_type-163_max', 'matrix_gpu-700_lastday_mean', 'matrix_gpu-700_lastday_sum', 'matrix_gpu-700_lastday_max', 'matrix_gpu-700_time_mean', 'matrix_gpu-700_time_sum', 'matrix_gpu-700_time_max', 'matrix_gpu-700_recsys_mean', 'matrix_gpu-700_recsys_sum', 'matrix_gpu-700_recsys_max',
    'matrix_gpu-701_mean', 'matrix_gpu-701_sum', 'matrix_gpu-701_max', 'matrix_gpu-701_pos-log_mean', 'matrix_gpu-701_pos-log_sum', 'matrix_gpu-701_pos-log_max', 'matrix_gpu-701_type-163_mean', 'matrix_gpu-701_type-163_sum', 'matrix_gpu-701_type-163_max', 'matrix_gpu-701_lastday_mean', 'matrix_gpu-701_lastday_sum', 'matrix_gpu-701_lastday_max', 'matrix_gpu-701_time_mean', 'matrix_gpu-701_time_sum', 'matrix_gpu-701_time_max', 'matrix_gpu-701_recsys_mean', 'matrix_gpu-701_recsys_sum', 'matrix_gpu-701_recsys_max',
    'candidate_clicks_before', 'candidate_carts_before', 'candidate_orders_before', 'candidate_*_before', 'n_views', 'n_clicks', 'n_carts', 'n_orders',
    'clicks_popularity_w_pos-log_rank', 'clicks_popularity_w_type-163_rank', 'clicks_popularity_w_lastday_rank', 'clicks_popularity_w_time_rank', 'clicks_popularity_w_recsys_rank', 'carts_popularity_w_pos-log_rank', 'carts_popularity_w_type-163_rank', 'carts_popularity_w_lastday_rank', 'carts_popularity_w_time_rank', 'carts_popularity_w_recsys_rank', 'orders_popularity_w_pos-log_rank', 'orders_popularity_w_type-163_rank', 'orders_popularity_w_lastday_rank', 'orders_popularity_w_time_rank', 'orders_popularity_w_recsys_rank',
    'clicks_popularity_w_pos-log_w_rank', 'clicks_popularity_w_type-163_w_rank', 'clicks_popularity_w_lastday_w_rank', 'clicks_popularity_w_time_w_rank', 'clicks_popularity_w_recsys_w_rank', 'carts_popularity_w_pos-log_w_rank', 'carts_popularity_w_type-163_w_rank', 'carts_popularity_w_lastday_w_rank', 'carts_popularity_w_time_w_rank', 'carts_popularity_w_recsys_w_rank', 'orders_popularity_w_pos-log_w_rank', 'orders_popularity_w_type-163_w_rank', 'orders_popularity_w_lastday_w_rank', 'orders_popularity_w_time_w_rank', 'orders_popularity_w_recsys_w_rank',
    'w_pos-log_rank', 'w_type-163_rank', 'w_lastday_rank', 'w_time_rank', 'w_recsys_rank',
    'matrix_123_temporal_20_mean_rank', 'matrix_123_temporal_20_pos-log_mean_rank', 'matrix_123_temporal_20_type-163_mean_rank', 'matrix_123_temporal_20_lastday_mean_rank', 'matrix_123_temporal_20_time_mean_rank', 'matrix_123_temporal_20_recsys_mean_rank', 'matrix_123_type136_20_mean_rank', 'matrix_123_type136_20_pos-log_mean_rank', 'matrix_123_type136_20_type-163_mean_rank', 'matrix_123_type136_20_lastday_mean_rank', 'matrix_123_type136_20_time_mean_rank', 'matrix_123_type136_20_recsys_mean_rank', 
    'matrix_12__20_mean_rank', 'matrix_12__20_pos-log_mean_rank', 'matrix_12__20_type-163_mean_rank', 'matrix_12__20_lastday_mean_rank', 'matrix_12__20_time_mean_rank', 'matrix_12__20_recsys_mean_rank', 'matrix_123_type0.590.5_20_mean_rank', 'matrix_123_type0.590.5_20_pos-log_mean_rank', 'matrix_123_type0.590.5_20_type-163_mean_rank', 'matrix_123_type0.590.5_20_lastday_mean_rank', 'matrix_123_type0.590.5_20_time_mean_rank', 'matrix_123_type0.590.5_20_recsys_mean_rank',
    'matrix_cpu-90_mean_rank', 'matrix_cpu-90_pos-log_mean_rank', 'matrix_cpu-90_type-163_mean_rank', 'matrix_cpu-90_lastday_mean_rank', 'matrix_cpu-90_time_mean_rank', 'matrix_cpu-90_recsys_mean_rank', 'matrix_cpu-95_mean_rank', 'matrix_cpu-95_pos-log_mean_rank', 'matrix_cpu-95_type-163_mean_rank', 'matrix_cpu-95_lastday_mean_rank', 'matrix_cpu-95_time_mean_rank', 'matrix_cpu-95_recsys_mean_rank', 'matrix_cpu-99_mean_rank', 'matrix_cpu-99_pos-log_mean_rank', 'matrix_cpu-99_type-163_mean_rank', 'matrix_cpu-99_lastday_mean_rank', 'matrix_cpu-99_time_mean_rank', 'matrix_cpu-99_recsys_mean_rank',
    'matrix_gpu-116_mean_rank', 'matrix_gpu-116_pos-log_mean_rank', 'matrix_gpu-116_type-163_mean_rank', 'matrix_gpu-116_lastday_mean_rank', 'matrix_gpu-116_time_mean_rank', 'matrix_gpu-116_recsys_mean_rank', 'matrix_gpu-115_mean_rank', 'matrix_gpu-115_pos-log_mean_rank', 'matrix_gpu-115_type-163_mean_rank', 'matrix_gpu-115_lastday_mean_rank', 'matrix_gpu-115_time_mean_rank', 'matrix_gpu-115_recsys_mean_rank', 'matrix_gpu-93_mean_rank', 'matrix_gpu-93_pos-log_mean_rank', 'matrix_gpu-93_type-163_mean_rank', 'matrix_gpu-93_lastday_mean_rank', 'matrix_gpu-93_time_mean_rank', 'matrix_gpu-93_recsys_mean_rank',
    'matrix_gpu-217_mean_rank', 'matrix_gpu-217_pos-log_mean_rank', 'matrix_gpu-217_type-163_mean_rank', 'matrix_gpu-217_lastday_mean_rank', 'matrix_gpu-217_time_mean_rank', 'matrix_gpu-217_recsys_mean_rank', 'matrix_gpu-226_mean_rank', 'matrix_gpu-226_pos-log_mean_rank', 'matrix_gpu-226_type-163_mean_rank', 'matrix_gpu-226_lastday_mean_rank', 'matrix_gpu-226_time_mean_rank', 'matrix_gpu-226_recsys_mean_rank', 'matrix_gpu-232_mean_rank', 'matrix_gpu-232_pos-log_mean_rank', 'matrix_gpu-232_type-163_mean_rank', 'matrix_gpu-232_lastday_mean_rank', 'matrix_gpu-232_time_mean_rank', 'matrix_gpu-232_recsys_mean_rank',
    'matrix_gpu-239_mean_rank', 'matrix_gpu-239_pos-log_mean_rank', 'matrix_gpu-239_type-163_mean_rank', 'matrix_gpu-239_lastday_mean_rank', 'matrix_gpu-239_time_mean_rank', 'matrix_gpu-239_recsys_mean_rank', 'matrix_gpu-700_mean_rank', 'matrix_gpu-700_pos-log_mean_rank', 'matrix_gpu-700_type-163_mean_rank', 'matrix_gpu-700_lastday_mean_rank', 'matrix_gpu-700_time_mean_rank', 'matrix_gpu-700_recsys_mean_rank', 'matrix_gpu-701_mean_rank', 'matrix_gpu-701_pos-log_mean_rank', 'matrix_gpu-701_type-163_mean_rank', 'matrix_gpu-701_lastday_mean_rank', 'matrix_gpu-701_time_mean_rank', 'matrix_gpu-701_recsys_mean_rank',
]

In [17]:
FEATURES += [
    'popularity_week_clicks','popularity_day_clicks','popularity_hour_clicks','popularity_hour/day_clicks','popularity_day/week_clicks','popularity_week_carts','popularity_day_carts','popularity_hour_carts','popularity_hour/day_carts','popularity_day/week_carts','popularity_week_orders','popularity_day_orders','popularity_hour_orders','popularity_hour/day_orders','popularity_day/week_orders',
    'embed_1-9_64_cartbuy_last_0', 'embed_1-9_64_cartbuy_last_1', 'embed_1-9_64_cartbuy_last_2', 'embed_1-9_64_cartbuy_last_3', 'embed_1-9_64_cartbuy_last_4', 'embed_1-9_64_cartbuy_pos-log_mean', 'embed_1-9_64_cartbuy_pos-log_sum', 'embed_1-9_64_cartbuy_pos-log_max', 'embed_1-9_64_cartbuy_type-163_mean', 'embed_1-9_64_cartbuy_type-163_sum', 'embed_1-9_64_cartbuy_type-163_max', 'embed_1-9_64_cartbuy_lastday_mean', 'embed_1-9_64_cartbuy_lastday_sum', 'embed_1-9_64_cartbuy_lastday_max', 'embed_1-9_64_cartbuy_time_mean', 'embed_1-9_64_cartbuy_time_sum', 'embed_1-9_64_cartbuy_time_max', 'embed_1-9_64_cartbuy_recsys_mean', 'embed_1-9_64_cartbuy_recsys_sum', 'embed_1-9_64_cartbuy_recsys_max',
    'embed_1_64_last_0', 'embed_1_64_last_1', 'embed_1_64_last_2', 'embed_1_64_last_3', 'embed_1_64_last_4', 'embed_1_64_pos-log_mean', 'embed_1_64_pos-log_sum', 'embed_1_64_pos-log_max', 'embed_1_64_type-163_mean', 'embed_1_64_type-163_sum', 'embed_1_64_type-163_max', 'embed_1_64_lastday_mean', 'embed_1_64_lastday_sum', 'embed_1_64_lastday_max', 'embed_1_64_time_mean', 'embed_1_64_time_sum', 'embed_1_64_time_max', 'embed_1_64_recsys_mean', 'embed_1_64_recsys_sum', 'embed_1_64_recsys_max',
    'embed_1-5_64_last_0', 'embed_1-5_64_last_1', 'embed_1-5_64_last_2', 'embed_1-5_64_last_3', 'embed_1-5_64_last_4', 'embed_1-5_64_pos-log_mean', 'embed_1-5_64_pos-log_sum', 'embed_1-5_64_pos-log_max', 'embed_1-5_64_type-163_mean', 'embed_1-5_64_type-163_sum', 'embed_1-5_64_type-163_max', 'embed_1-5_64_lastday_mean', 'embed_1-5_64_lastday_sum', 'embed_1-5_64_lastday_max', 'embed_1-5_64_time_mean', 'embed_1-5_64_time_sum', 'embed_1-5_64_time_max', 'embed_1-5_64_recsys_mean', 'embed_1-5_64_recsys_sum', 'embed_1-5_64_recsys_max',
]


In [18]:
FEATURES += [
    'popularity_week_clicks_rank', 'popularity_day_clicks_rank', 'popularity_hour_clicks_rank', 'popularity_hour/day_clicks_rank', 'popularity_day/week_clicks_rank', 'popularity_week_carts_rank', 'popularity_day_carts_rank', 'popularity_hour_carts_rank', 'popularity_hour/day_carts_rank', 'popularity_day/week_carts_rank', 'popularity_week_orders_rank', 'popularity_day_orders_rank', 'popularity_hour_orders_rank', 'popularity_hour/day_orders_rank', 'popularity_day/week_orders_rank',
    'embed_1-9_64_cartbuy_last_0_rank', 'embed_1-9_64_cartbuy_last_1_rank', 'embed_1-9_64_cartbuy_last_2_rank', 'embed_1-9_64_cartbuy_last_3_rank', 'embed_1-9_64_cartbuy_last_4_rank', 'embed_1-9_64_cartbuy_pos-log_mean_rank', 'embed_1-9_64_cartbuy_type-163_mean_rank', 'embed_1-9_64_cartbuy_lastday_mean_rank', 'embed_1-9_64_cartbuy_time_mean_rank', 'embed_1-9_64_cartbuy_recsys_mean_rank', 'embed_1_64_last_0_rank', 'embed_1_64_last_1_rank', 'embed_1_64_last_2_rank', 'embed_1_64_last_3_rank', 'embed_1_64_last_4_rank', 'embed_1_64_pos-log_mean_rank', 'embed_1_64_type-163_mean_rank', 'embed_1_64_lastday_mean_rank', 'embed_1_64_time_mean_rank', 'embed_1_64_recsys_mean_rank', 'embed_1-5_64_last_0_rank', 'embed_1-5_64_last_1_rank', 'embed_1-5_64_last_2_rank', 'embed_1-5_64_last_3_rank', 'embed_1-5_64_last_4_rank', 'embed_1-5_64_pos-log_mean_rank', 'embed_1-5_64_type-163_mean_rank', 'embed_1-5_64_lastday_mean_rank', 'embed_1-5_64_time_mean_rank', 'embed_1-5_64_recsys_mean_rank'
]

In [19]:
TO_REMOVE = []
TO_REMOVE += [f for f in FEATURES if "popularity_w_time" in f]
TO_REMOVE += [f for f in FEATURES if "popularity_w_lastday_w" in f]

FEATURES = [f for f in FEATURES if f not in TO_REMOVE]

In [20]:
len(FEATURES)

564

In [21]:
# df_train = cudf.from_pandas(df_train)
# corr = df_train[FEATURES].corr()
# corr = corr.to_pandas()
# corr = corr.values

# mask = np.zeros_like(corr, dtype=bool)
# mask[np.triu_indices_from(mask)] = True
# corr[mask] = 0

In [22]:
# TH = 0.99

# for i in range(len(corr)):
#     for j in range(len(corr)):
#         if corr[i, j] > TH:
#             if FEATURES[i] in TO_REMOVE or FEATURES[j] in TO_REMOVE:
#                 continue
#             print(FEATURES[i], FEATURES[j], f'{corr[i, j] :.3f}')

In [23]:
# df = cudf.read_parquet(glob.glob(REGEX)[0])
# df = df.rename(columns={"clicks_popularity_w_pos-log_rank" : "clicks_popularity_w_pos-log_rank_ref"})
# df = cudf.read_parquet(glob.glob(TEST_REGEX)[0])

# from data.fe import add_rank_feature
# for c in ['clicks_popularity_w_pos-log', 'clicks_popularity_w_type-163', 'clicks_popularity_w_lastday']:
#     if c + "_rank" not in df.columns:
#         print(f'Add rank ft for {c}')
#         df = df.reset_index(drop=True)
#         add_rank_feature(df, c)
# (df['clicks_popularity_w_pos-log_rank'] == df['clicks_popularity_w_pos-log_rank_ref']).all()

# for f in tqdm(glob.glob(TEST_REGEX)):
#     dft = cudf.read_parquet(f, columns=['clicks_popularity_w_pos-log_rank'])

In [24]:
if TARGET != "gt_clicks":
    REGEX = f"../output/features/fts_val_{VERSION}_{TARGET}/*"
else:
    REGEX = f"../output/features/fts_val_{VERSION}/*"
len(glob.glob(REGEX))

91

In [25]:
TEST_REGEX = f"../output/features/fts_test_{VERSION}/*"
len(glob.glob(TEST_REGEX))

85

In [26]:
GT_REGEX = f"../output/features/fts_val_{GT_VERSION}/*"
len(glob.glob(GT_REGEX))

0

### Params

In [27]:
PARAMS = {
    "xgb":
    {
        "learning_rate": 0.01,
        'max_depth': 8,
        "subsample": 0.9,  # 0.7 / 0.8 / O.9
        'colsample_bytree': 0.7,  # 0.7 / 0.8 / 0.9
        'reg_alpha': 0.01,
        'reg_lambda': 0.1,
        "min_child_weight": 0,
#         "gamma": 0.01,
#         'scale_pos_weight': 1,
        'eval_metric': 'auc',
        'objective': 'rank:pairwise',  # 'binary:logistic',
        'tree_method':'gpu_hist',
        'predictor':'gpu_predictor',
        "random_state": 42,
    },
}

In [28]:
# TO_REMOVE = [
#     'candidate_*_before', 'matrix_gpu-700_lastday_max', 'matrix_12__20_lastday_max', 'matrix_gpu-226_lastday_max', 'matrix_cpu-90_lastday_max', 'matrix_gpu-700_sum', 'matrix_gpu-700_pos-log_sum', 'matrix_12__20_lastday_sum',
#     'matrix_gpu-700_pos-log_max', 'matrix_gpu-226_lastday_sum', 'matrix_gpu-700_max', 'matrix_gpu-700_time_sum', 'matrix_123_type136_20_lastday_max', 'matrix_cpu-90_lastday_sum', 'matrix_cpu-90_type-163_max', 'matrix_gpu-700_time_max',
#     'matrix_12__20_time_sum', 'matrix_gpu-700_type-163_sum', 'matrix_gpu-700_lastday_sum', 'matrix_gpu-700_type-163_max', 'matrix_cpu-90_time_sum', 'matrix_123_type136_20_time_sum', 'matrix_gpu-217_lastday_max', 'matrix_12__20_pos-log_sum',
#     'matrix_12__20_type-163_max', 'matrix_12__20_time_max', 'matrix_cpu-90_max', 'matrix_cpu-90_type-163_sum', 'matrix_cpu-99_lastday_max', 'matrix_cpu-90_sum', 'matrix_gpu-226_sum', 'matrix_gpu-226_time_sum', 'matrix_12__20_time_mean',
#     'matrix_12__20_type-163_mean', 'matrix_gpu-700_pos-log_mean', 'matrix_123_type0.590.5_20_lastday_max', 'matrix_gpu-700_time_mean', 'matrix_12__20_type-163_sum', 'matrix_12__20_pos-log_max', 'matrix_123_type136_20_lastday_sum',
#     'matrix_cpu-90_time_mean', 'matrix_gpu-226_max', 'matrix_123_type136_20_type-163_max', 'matrix_gpu-226_type-163_max', 'matrix_gpu-226_lastday_mean', 'matrix_gpu-226_type-163_sum', 'matrix_cpu-99_time_sum', 'matrix_12__20_lastday_mean',
#     'matrix_gpu-700_type-163_mean','matrix_123_type136_20_type-163_sum'
# ][:50]

In [29]:
class Config:
    seed = 42
    version = VERSION
    
    folds_file = "../input/folds_4.csv"
    k = 4
    mode = ""

    features = FEATURES
#     features = [ft for ft in features if ft not in TO_REMOVE]

    cat_features = []

    target = TARGET
    pos_ratio = POS_RATIO

    use_gt_sessions = True  # filter out sessions with no gt
    use_gt_pos = False  # add candidates from gt
    gt_regex = GT_REGEX
    
    model = "xgb"

    params = PARAMS[model]

    use_es = True
    num_boost_round = 10000
    
    probs_file = None  # PROBS_PATHS[target]
    probs_mode = ""  # "head"  "rank_40"
    restrict_all = False

    selected_folds = [0]
    folds_optimize = [0, 1, 2, 3]
    n_trials = 20

### Main

In [30]:
DEBUG = False
DEBUG_MORE = False

In [None]:
%%time

log_folder = None
run = None
if not DEBUG:
    log_folder = prepare_log_folder(LOG_PATH)
    if not DEBUG_MORE:
        run = init_neptune(Config, log_folder)
    print(f'Logging results to {log_folder}')
    create_logger(directory=log_folder, name="logs.txt")

    save_config(Config, log_folder + 'config')

ft_imp = kfold(REGEX, TEST_REGEX, Config, log_folder=log_folder, debug=DEBUG_MORE, run=run)
run.stop()


https://app.neptune.ai/KagglingTheo/Otto-Recommender-System/e/OTTO-34
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.

Logging results to ../logs/2023-01-21/47/


Files were filtered !
Train / val session inter 0

-------------  Optimizing XGB Model  -------------


    -> 9.75M training candidates
    -> 3.25M validation candidates



[32m[I 2023-01-21 17:25:50,111][0m A new study created in memory with name: no-name-d481fab1-b8d4-44dc-8b26-59f69c1594d6[0m


[0]	val-auc:0.95260
[100]	val-auc:0.96305
[200]	val-auc:0.96405
[300]	val-auc:0.96492
[400]	val-auc:0.96558
[500]	val-auc:0.96607
[600]	val-auc:0.96646
[700]	val-auc:0.96672
[800]	val-auc:0.96692
[900]	val-auc:0.96708
[1000]	val-auc:0.96720
[1100]	val-auc:0.96728
[1200]	val-auc:0.96732
[1300]	val-auc:0.96734
[1400]	val-auc:0.96737
[1500]	val-auc:0.96740
[1600]	val-auc:0.96743
[1700]	val-auc:0.96746
[1800]	val-auc:0.96748
[1900]	val-auc:0.96749
[2000]	val-auc:0.96749
[2076]	val-auc:0.96749

-> gt_orders  -  Recall : 0.6663

Params : {'max_depth': 10, 'subsample': '0.788', 'colsample_bytree': '0.608', 'reg_alpha': '2.13e-05', 'reg_lambda': '3.68e-03'},



[32m[I 2023-01-21 17:49:34,122][0m Trial 0 finished with value: 0.6662991358752789 and parameters: {'max_depth': 10, 'subsample': 0.788484981223704, 'colsample_bytree': 0.6082203460331841, 'reg_alpha': 2.12821878210239e-05, 'reg_lambda': 0.0036821155585651643}. Best is trial 0 with value: 0.6662991358752789.[0m


[0]	val-auc:0.95502
[100]	val-auc:0.95954
[200]	val-auc:0.96076
[300]	val-auc:0.96185
[400]	val-auc:0.96287
[500]	val-auc:0.96371
[600]	val-auc:0.96437
[700]	val-auc:0.96487
[800]	val-auc:0.96534
[900]	val-auc:0.96575
[1000]	val-auc:0.96610
[1100]	val-auc:0.96643
[1200]	val-auc:0.96670
[1300]	val-auc:0.96691
[1400]	val-auc:0.96709
[1500]	val-auc:0.96724
[1600]	val-auc:0.96737
[1700]	val-auc:0.96748
[1800]	val-auc:0.96757
[1900]	val-auc:0.96765
[2000]	val-auc:0.96772
[2100]	val-auc:0.96779
[2200]	val-auc:0.96785
[2300]	val-auc:0.96790
[2400]	val-auc:0.96795
[2500]	val-auc:0.96799
[2600]	val-auc:0.96803
[2700]	val-auc:0.96808
[2800]	val-auc:0.96811
[2900]	val-auc:0.96814
[3000]	val-auc:0.96817
[3100]	val-auc:0.96820
[3200]	val-auc:0.96823
[3300]	val-auc:0.96825
[3400]	val-auc:0.96827
[3500]	val-auc:0.96829
[3600]	val-auc:0.96831
[3700]	val-auc:0.96834
[3800]	val-auc:0.96835
[3900]	val-auc:0.96837
[4000]	val-auc:0.96838
[4100]	val-auc:0.96840
[4200]	val-auc:0.96841
[4300]	val-auc:0.96842


[32m[I 2023-01-21 18:32:04,949][0m Trial 1 finished with value: 0.66665811943896 and parameters: {'max_depth': 6, 'subsample': 0.8816640203082446, 'colsample_bytree': 0.5399534428323278, 'reg_alpha': 0.017420581777930405, 'reg_lambda': 0.2505834536661377}. Best is trial 1 with value: 0.66665811943896.[0m


[0]	val-auc:0.95583
[100]	val-auc:0.96149
[200]	val-auc:0.96271
[300]	val-auc:0.96376
[400]	val-auc:0.96466
[500]	val-auc:0.96537
[600]	val-auc:0.96589
[700]	val-auc:0.96629
[800]	val-auc:0.96662
[900]	val-auc:0.96688
[1000]	val-auc:0.96711
[1100]	val-auc:0.96729
[1200]	val-auc:0.96744
[1300]	val-auc:0.96755
[1400]	val-auc:0.96766
[1500]	val-auc:0.96774
[1600]	val-auc:0.96780
[1700]	val-auc:0.96786
[1800]	val-auc:0.96791
[1900]	val-auc:0.96795
[2000]	val-auc:0.96799
[2100]	val-auc:0.96802
[2200]	val-auc:0.96805
[2300]	val-auc:0.96806
[2400]	val-auc:0.96807
[2500]	val-auc:0.96808
[2585]	val-auc:0.96808

-> gt_orders  -  Recall : 0.6666

Params : {'max_depth': 8, 'subsample': '0.912', 'colsample_bytree': '0.801', 'reg_alpha': '9.18e-05', 'reg_lambda': '1.98e-03'},



[32m[I 2023-01-21 18:54:34,619][0m Trial 2 finished with value: 0.6665683735480397 and parameters: {'max_depth': 8, 'subsample': 0.9117558991061636, 'colsample_bytree': 0.8007529651765688, 'reg_alpha': 9.180452495120153e-05, 'reg_lambda': 0.0019809426731178283}. Best is trial 1 with value: 0.66665811943896.[0m


[0]	val-auc:0.95406
[100]	val-auc:0.95902
[200]	val-auc:0.96044
[300]	val-auc:0.96171
[400]	val-auc:0.96282
[500]	val-auc:0.96367
[600]	val-auc:0.96436
[700]	val-auc:0.96490
[800]	val-auc:0.96533
[900]	val-auc:0.96571
[1000]	val-auc:0.96609
[1100]	val-auc:0.96640
[1200]	val-auc:0.96664
[1300]	val-auc:0.96686
[1400]	val-auc:0.96696
[1500]	val-auc:0.96710
[1600]	val-auc:0.96722
[1700]	val-auc:0.96733
[1800]	val-auc:0.96742
[1900]	val-auc:0.96751
[2000]	val-auc:0.96754
[2100]	val-auc:0.96758
[2200]	val-auc:0.96763
[2300]	val-auc:0.96768
[2400]	val-auc:0.96772
[2500]	val-auc:0.96774
[2600]	val-auc:0.96778
[2700]	val-auc:0.96777
[2707]	val-auc:0.96777

-> gt_orders  -  Recall : 0.6658

Params : {'max_depth': 6, 'subsample': '0.985', 'colsample_bytree': '0.957', 'reg_alpha': '2.24e-03', 'reg_lambda': '2.09e-06'},



[32m[I 2023-01-21 19:13:39,888][0m Trial 3 finished with value: 0.6657991230544373 and parameters: {'max_depth': 6, 'subsample': 0.9847784858794328, 'colsample_bytree': 0.95652589556673, 'reg_alpha': 0.0022361817340353224, 'reg_lambda': 2.091108376172022e-06}. Best is trial 1 with value: 0.66665811943896.[0m


[0]	val-auc:0.95637
[100]	val-auc:0.96072
[200]	val-auc:0.96187
[300]	val-auc:0.96304
[400]	val-auc:0.96398
[500]	val-auc:0.96471
[600]	val-auc:0.96530
[700]	val-auc:0.96577
[800]	val-auc:0.96619
[900]	val-auc:0.96651
[1000]	val-auc:0.96678
[1100]	val-auc:0.96701
[1200]	val-auc:0.96720
[1300]	val-auc:0.96734
[1400]	val-auc:0.96746
[1500]	val-auc:0.96757
[1600]	val-auc:0.96767
[1700]	val-auc:0.96775
[1800]	val-auc:0.96783
[1900]	val-auc:0.96789
[2000]	val-auc:0.96793
[2100]	val-auc:0.96798
[2200]	val-auc:0.96802
[2300]	val-auc:0.96805
[2400]	val-auc:0.96808
[2500]	val-auc:0.96810
[2600]	val-auc:0.96813
[2700]	val-auc:0.96815
[2800]	val-auc:0.96817
[2900]	val-auc:0.96819
[3000]	val-auc:0.96819
[3028]	val-auc:0.96819

-> gt_orders  -  Recall : 0.6666

Params : {'max_depth': 7, 'subsample': '0.738', 'colsample_bytree': '0.632', 'reg_alpha': '1.34e-02', 'reg_lambda': '2.23e-03'},



[32m[I 2023-01-21 19:36:39,830][0m Trial 4 finished with value: 0.6665811943895997 and parameters: {'max_depth': 7, 'subsample': 0.7383660281557252, 'colsample_bytree': 0.6322772781966076, 'reg_alpha': 0.013431657744890562, 'reg_lambda': 0.0022255313469677547}. Best is trial 1 with value: 0.66665811943896.[0m


[0]	val-auc:0.95329
[100]	val-auc:0.96247
[200]	val-auc:0.96359
[300]	val-auc:0.96453
[400]	val-auc:0.96532
[500]	val-auc:0.96589
[600]	val-auc:0.96632
[700]	val-auc:0.96665
[800]	val-auc:0.96689
[900]	val-auc:0.96707
[1000]	val-auc:0.96713
[1100]	val-auc:0.96718
[1200]	val-auc:0.96726
[1300]	val-auc:0.96729
[1400]	val-auc:0.96727
[1418]	val-auc:0.96726

-> gt_orders  -  Recall : 0.6658

Params : {'max_depth': 9, 'subsample': '0.666', 'colsample_bytree': '0.894', 'reg_alpha': '6.63e-04', 'reg_lambda': '8.45e-05'},



[32m[I 2023-01-21 19:51:56,158][0m Trial 5 finished with value: 0.6657863022128773 and parameters: {'max_depth': 9, 'subsample': 0.6663395898823019, 'colsample_bytree': 0.8935969190681103, 'reg_alpha': 0.0006626712389526148, 'reg_lambda': 8.445984340814827e-05}. Best is trial 1 with value: 0.66665811943896.[0m


[0]	val-auc:0.95643
[100]	val-auc:0.96085
[200]	val-auc:0.96192
[300]	val-auc:0.96300
[400]	val-auc:0.96395
[500]	val-auc:0.96469
[600]	val-auc:0.96529
[700]	val-auc:0.96576
[800]	val-auc:0.96617
[900]	val-auc:0.96652
[1000]	val-auc:0.96680
[1100]	val-auc:0.96703
[1200]	val-auc:0.96722
[1300]	val-auc:0.96739
[1400]	val-auc:0.96754
[1500]	val-auc:0.96766
[1600]	val-auc:0.96775
[1700]	val-auc:0.96783
[1800]	val-auc:0.96791
[1900]	val-auc:0.96797
[2000]	val-auc:0.96803
[2100]	val-auc:0.96808
[2200]	val-auc:0.96812
[2300]	val-auc:0.96816
[2400]	val-auc:0.96819
[2500]	val-auc:0.96823
[2600]	val-auc:0.96825
[2700]	val-auc:0.96827
[2800]	val-auc:0.96829
[2900]	val-auc:0.96831
[3000]	val-auc:0.96832
[3100]	val-auc:0.96834
[3200]	val-auc:0.96836
[3300]	val-auc:0.96837
[3400]	val-auc:0.96838
[3500]	val-auc:0.96839
[3600]	val-auc:0.96840
[3700]	val-auc:0.96841
[3800]	val-auc:0.96842
[3900]	val-auc:0.96842
[4000]	val-auc:0.96843
[4100]	val-auc:0.96844
[4200]	val-auc:0.96844
[4300]	val-auc:0.96844


[32m[I 2023-01-21 20:23:29,355][0m Trial 6 finished with value: 0.6666068360727199 and parameters: {'max_depth': 7, 'subsample': 0.7590406439370354, 'colsample_bytree': 0.5106162369059721, 'reg_alpha': 1.7272431052533372e-05, 'reg_lambda': 0.04115219113399547}. Best is trial 1 with value: 0.66665811943896.[0m


[0]	val-auc:0.94729
[100]	val-auc:0.96303
[200]	val-auc:0.96405
[300]	val-auc:0.96486
[400]	val-auc:0.96555
[500]	val-auc:0.96606
[600]	val-auc:0.96644
[700]	val-auc:0.96672
[800]	val-auc:0.96695
[900]	val-auc:0.96709
[1000]	val-auc:0.96721
[1100]	val-auc:0.96728
[1200]	val-auc:0.96735
[1300]	val-auc:0.96739
[1400]	val-auc:0.96743
[1500]	val-auc:0.96743
[1533]	val-auc:0.96743

-> gt_orders  -  Recall : 0.6661

Params : {'max_depth': 10, 'subsample': '0.523', 'colsample_bytree': '0.854', 'reg_alpha': '2.15e-02', 'reg_lambda': '1.01e-01'},



[32m[I 2023-01-21 20:41:37,632][0m Trial 7 finished with value: 0.6660555398856381 and parameters: {'max_depth': 10, 'subsample': 0.5228707953140816, 'colsample_bytree': 0.8535928411898415, 'reg_alpha': 0.021539955595389203, 'reg_lambda': 0.10054106302547766}. Best is trial 1 with value: 0.66665811943896.[0m


[0]	val-auc:0.95389
[100]	val-auc:0.95929
[200]	val-auc:0.96070
[300]	val-auc:0.96185
[400]	val-auc:0.96292
[500]	val-auc:0.96376
[600]	val-auc:0.96442
[700]	val-auc:0.96496
[800]	val-auc:0.96542
[900]	val-auc:0.96580
[1000]	val-auc:0.96615
[1100]	val-auc:0.96645
[1200]	val-auc:0.96668
[1300]	val-auc:0.96689
[1400]	val-auc:0.96700
[1500]	val-auc:0.96712
[1600]	val-auc:0.96724
[1700]	val-auc:0.96734
[1800]	val-auc:0.96742
[1900]	val-auc:0.96743
[2000]	val-auc:0.96749
[2100]	val-auc:0.96754
[2200]	val-auc:0.96746
[2257]	val-auc:0.96748

-> gt_orders  -  Recall : 0.6654

Params : {'max_depth': 6, 'subsample': '0.796', 'colsample_bytree': '0.806', 'reg_alpha': '5.28e-05', 'reg_lambda': '1.42e-05'},



[32m[I 2023-01-21 20:58:15,043][0m Trial 8 finished with value: 0.6654273186491961 and parameters: {'max_depth': 6, 'subsample': 0.7962711723163047, 'colsample_bytree': 0.8062584557517884, 'reg_alpha': 5.2849105006773386e-05, 'reg_lambda': 1.4172921447323346e-05}. Best is trial 1 with value: 0.66665811943896.[0m


[0]	val-auc:0.95547
[100]	val-auc:0.96054
[200]	val-auc:0.96180
[300]	val-auc:0.96291
[400]	val-auc:0.96389
[500]	val-auc:0.96465
[600]	val-auc:0.96525
[700]	val-auc:0.96574
[800]	val-auc:0.96613
[900]	val-auc:0.96644
[1000]	val-auc:0.96672
[1100]	val-auc:0.96696
[1200]	val-auc:0.96716
[1300]	val-auc:0.96730
[1400]	val-auc:0.96742
[1500]	val-auc:0.96751
[1600]	val-auc:0.96758
[1700]	val-auc:0.96765
[1800]	val-auc:0.96768
[1882]	val-auc:0.96768

-> gt_orders  -  Recall : 0.6657

Params : {'max_depth': 7, 'subsample': '0.850', 'colsample_bytree': '0.743', 'reg_alpha': '3.16e-02', 'reg_lambda': '3.59e-05'},



[32m[I 2023-01-21 21:14:02,564][0m Trial 9 finished with value: 0.6657478396881972 and parameters: {'max_depth': 7, 'subsample': 0.8495254921083764, 'colsample_bytree': 0.7431074998726734, 'reg_alpha': 0.03162089876449657, 'reg_lambda': 3.5927929173568555e-05}. Best is trial 1 with value: 0.66665811943896.[0m


[0]	val-auc:0.95690
[100]	val-auc:0.96170
[200]	val-auc:0.96276
[300]	val-auc:0.96380
[400]	val-auc:0.96464
[500]	val-auc:0.96533
[600]	val-auc:0.96586
[700]	val-auc:0.96627
[800]	val-auc:0.96663
[900]	val-auc:0.96693
[1000]	val-auc:0.96717
[1100]	val-auc:0.96736
[1200]	val-auc:0.96753
[1300]	val-auc:0.96766
[1400]	val-auc:0.96778
[1500]	val-auc:0.96787
[1600]	val-auc:0.96795
[1700]	val-auc:0.96801
[1800]	val-auc:0.96807
[1900]	val-auc:0.96812
[2000]	val-auc:0.96817
[2100]	val-auc:0.96820
[2200]	val-auc:0.96823
[2300]	val-auc:0.96824
[2400]	val-auc:0.96826
[2500]	val-auc:0.96827
[2600]	val-auc:0.96828
[2700]	val-auc:0.96828
[2800]	val-auc:0.96828
[2900]	val-auc:0.96829
[3000]	val-auc:0.96829
[3055]	val-auc:0.96829

-> gt_orders  -  Recall : 0.6665

Params : {'max_depth': 8, 'subsample': '0.992', 'colsample_bytree': '0.512', 'reg_alpha': '9.84e-02', 'reg_lambda': '8.08e-01'},



[32m[I 2023-01-21 21:39:05,435][0m Trial 10 finished with value: 0.6665427318649196 and parameters: {'max_depth': 8, 'subsample': 0.9915813300276377, 'colsample_bytree': 0.5117541520880596, 'reg_alpha': 0.09841557986658642, 'reg_lambda': 0.8079740655822105}. Best is trial 1 with value: 0.66665811943896.[0m


[0]	val-auc:0.95634
[100]	val-auc:0.96086
[200]	val-auc:0.96189
[300]	val-auc:0.96300
[400]	val-auc:0.96391
[500]	val-auc:0.96467
[600]	val-auc:0.96525
[700]	val-auc:0.96573
[800]	val-auc:0.96613
[900]	val-auc:0.96648
[1000]	val-auc:0.96677
[1100]	val-auc:0.96702
[1200]	val-auc:0.96722
[1300]	val-auc:0.96738
[1400]	val-auc:0.96752
[1500]	val-auc:0.96764
[1600]	val-auc:0.96773
[1700]	val-auc:0.96781
[1800]	val-auc:0.96789
[1900]	val-auc:0.96795
[2000]	val-auc:0.96800
[2100]	val-auc:0.96805
[2200]	val-auc:0.96810
[2300]	val-auc:0.96814
[2400]	val-auc:0.96817
[2500]	val-auc:0.96820
[2600]	val-auc:0.96823
[2700]	val-auc:0.96826
[2800]	val-auc:0.96829
[2900]	val-auc:0.96831
[3000]	val-auc:0.96832
[3100]	val-auc:0.96833
[3200]	val-auc:0.96835
[3300]	val-auc:0.96836
[3400]	val-auc:0.96837
[3500]	val-auc:0.96838
[3600]	val-auc:0.96839
[3700]	val-auc:0.96840
[3800]	val-auc:0.96840
[3900]	val-auc:0.96841
[4000]	val-auc:0.96842
[4100]	val-auc:0.96842
[4193]	val-auc:0.96842

-> gt_orders  -  Recal

[32m[I 2023-01-21 22:09:00,105][0m Trial 11 finished with value: 0.6665170901817995 and parameters: {'max_depth': 7, 'subsample': 0.8482622201883273, 'colsample_bytree': 0.5150586864909056, 'reg_alpha': 0.0004991989090473534, 'reg_lambda': 0.06122289773389768}. Best is trial 1 with value: 0.66665811943896.[0m


[0]	val-auc:0.95548
[100]	val-auc:0.95952
[200]	val-auc:0.96081


- 0.6663

In [None]:
# fig = plot_importances(ft_imp, run=run)

Done