In [None]:
!pip install optuna
!pip install catboost

In [None]:
pip install -U imbalanced-learn

In [None]:
pip install -U torch

In [23]:
import pandas as pd
import numpy as np
import optuna
import matplotlib.pyplot as plt
from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    f1_score,
    precision_score,
    recall_score,
)
import catboost as cb
from catboost import CatBoostClassifier, Pool, cv
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE

In [41]:
# 레이블 인코딩할 칼럼들
cat_columns = [
    "customer_country",
    "business_subarea",
    "business_area",
    "business_unit",
    "customer_type",
    "customer_idx",
    "enterprise",
    "customer_job",
    "inquiry_type",
    "product_category",
    "product_subcategory",
    "product_modelname",
    "customer_position",
    "response_corporate",
    "expected_timeline",
    "category",
    "product_count",
    "timeline_count",
    "idit_all",
    "lead_owner",
    "bant_submit_count",
    "com_reg_count",
    "idx_count",
    "lead_count",
    "enterprise_count",
    "enterprise_weight"
]

def index_processing(context_df, train, test, column_name):
    idx = {v:k for k,v in enumerate(context_df[column_name].unique())}
    train[column_name] = train[column_name].map(idx)
    test[column_name] = test[column_name].map(idx)
    # train.loc[:, column_name] = train[column_name].map(idx)
    # test.loc[:, column_name] = test[column_name].map(idx)
    return idx

def process_context_data(train_df, test_df):
    context_df = pd.concat([train_df[cat_columns], test_df[cat_columns]]).reset_index(drop=True)
    idx = {}
    for col in cat_columns:
        idx_name = index_processing(context_df, train_df, test_df, col)
        idx[col+'2idx'] = idx_name
    return idx, train_df, test_df

def context_data_load():
    ######################## DATA LOAD
    train = pd.read_csv('train_final.csv', low_memory=False)
    test = pd.read_csv('submission_final.csv')

    idx, context_train, context_test = process_context_data(train, test)
    field_dims = np.array([len(toidx) for toidx in idx], dtype=np.int32)

    data = {
            'train':context_train.fillna(0),
            'test':context_test.fillna(0),
            'field_dims':field_dims,
            'cat_columns' : cat_columns,
            }


    return data

def context_data_split(data):
    # SMOTE를 사용하여 데이터 오버샘플링
    smote = SMOTE(random_state=42)
    X_resampled, y_resampled = smote.fit_resample(data['train'].drop(['is_converted'], axis=1), data['train']['is_converted'])

    # 샘플링된 데이터를 다시 훈련 데이터와 테스트 데이터로 분할
    X_train, X_valid, y_train, y_valid = train_test_split(X_resampled, 
                                                      y_resampled, 
                                                      test_size=0.2, 
                                                      random_state=42, 
                                                      stratify=y_resampled)

    y_train = y_train.astype(np.int32) ; y_valid = y_valid.astype(np.int32)
    data['X_train'], data['X_valid'], data['y_train'], data['y_valid'], data['X_resampled'], data['y_resampled'] = X_train, X_valid, y_train, y_valid, X_resampled, y_resampled
    
    return data


In [42]:
data = context_data_load()
data = context_data_split(data)

In [None]:
# CatBoost 모델 훈련
train_pool = Pool(data['X_train'], label=data['y_train'], cat_features=data['cat_columns'])
valid_pool = Pool(data['X_valid'], label=data['y_valid'], cat_features=data['cat_columns'])

In [None]:
def objective_catboost(trial):
    param = {
        "random_state": 42,
        "iterations": 
        'early_stopping_rounds': 20,
        'loss_function': 'Logloss',
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1),
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0.01, 100.00),
        "n_estimators": trial.suggest_int("n_estimators", 500, 3000),
        "max_depth": trial.suggest_int("max_depth", 1, 8),
        'random_strength': trial.suggest_int('random_strength', 0, 50),
        "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 1e-8, 3e-5),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 50),
        "max_bin": trial.suggest_int("max_bin", 150, 300),
    }

    model = cb.CatBoostClassifier(**param, verbose=0)
    model.fit(train_pool, eval_set=valid_pool, use_best_model=True)

    # 검증 데이터셋에 대한 예측 및 정확도 계산
    pred = model.predict(data['X_valid'])
    return f1_score(y_val, y_pred, labels=[True, False])

In [None]:
# Optuna 최적화
study = optuna.create_study(direction='maximize')
study.optimize(objective_catboost, n_trials=2)

print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial
print("  Value: ", trial.value)
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

In [None]:
model = cb.CatBoostClassifier(**trial.params, verbose=False)

model.fit(train_pool)

## K-fold

In [43]:
x_train, x_val, y_train, y_val = data['X_train'], data['X_valid'], data['y_train'], data['y_valid']

In [44]:
from sklearn.model_selection import StratifiedKFold
fold_num = 5
skf = StratifiedKFold(n_splits=fold_num, shuffle=True, random_state=42)
folds = []
for train_idx, valid_idx in skf.split(data['X_resampled'], data['y_resampled']):
    folds.append((train_idx,valid_idx))

In [51]:
def objective(trial):
    param = {
        "random_state":42,
        "objective": "Logloss",
        "cat_features" : data['cat_columns'],
        'learning_rate' : trial.suggest_float('learning_rate', 0.01, 0.5),
        'bagging_temperature' :trial.suggest_float('bagging_temperature', 0.01, 100.00),
        "n_estimators":trial.suggest_int("n_estimators", 1000, 10000),
        "max_depth":trial.suggest_int("max_depth", 4, 16),
        'random_strength' :trial.suggest_int('random_strength', 0, 100),
        "l2_leaf_reg":trial.suggest_float("l2_leaf_reg",1e-8,3e-5),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
        "max_bin": trial.suggest_int("max_bin", 200, 500),
        'od_type': trial.suggest_categorical('od_type', ['IncToDec', 'Iter']),
    }

    model = cb.CatBoostClassifier(**param, devices = '0')

    model.fit(
        x_train,
        y_train.astype(int),
        eval_set=[(x_val, y_val)],
        early_stopping_rounds = 50,
        verbose=100
    )

    # 검증 데이터셋에 대한 예측 및 정확도 계산
    y_pred = model.predict(x_val)
    return f1_score(y_val.astype(int), y_pred.astype(int), labels=[True, False])

for fold in range(0,fold_num):
    print(f'===================================={fold+1}============================================')
    train_idx, valid_idx = folds[fold]
    X_train = data['X_resampled'].iloc[train_idx]
    X_valid = data['X_resampled'].iloc[valid_idx]
    y_train = data['y_resampled'].iloc[train_idx]
    y_valid = data['y_resampled'].iloc[valid_idx]

    
    sampler = optuna.samplers.TPESampler(seed=42)
    study = optuna.create_study(
        study_name = 'cat_parameter_optuna',
        direction = 'maximize',
        sampler = sampler,
    )
    study.optimize(objective, n_trials=10)

    model = cb.CatBoostClassifier(**study.best_params,
                              devices = '0', random_state = 42, objective = 'Logloss', 
                              cat_features = data['cat_columns'])
    model.fit(X_train, y_train.astype(int), early_stopping_rounds = 50)
                
    pred = model.predict(data['test'].drop(["is_converted", "id"], axis=1))
    data['test'][f'pred_{fold}'] = pred
    print(f'================================================================================\n\n')

[32m[I 2024-02-21 19:21:46,724][0m A new study created in memory with name: cat_parameter_optuna[0m


0:	learn: 0.6931459	test: 0.6931218	best: 0.6931218 (0)	total: 510ms	remaining: 1h 4m 27s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6931218455
bestIteration = 0

Shrink model to first 1 iterations.


[32m[I 2024-02-21 19:22:41,734][0m Trial 0 finished with value: 0.6005559189012426 and parameters: {'learning_rate': 0.19352465823520762, 'bagging_temperature': 95.07192349792751, 'n_estimators': 7588, 'max_depth': 11, 'random_strength': 15, 'l2_leaf_reg': 4.688275664882717e-06, 'min_child_samples': 10, 'max_bin': 460, 'od_type': 'Iter'}. Best is trial 0 with value: 0.6005559189012426.[0m


0:	learn: 0.6931470	test: 0.6931445	best: 0.6931445 (0)	total: 143ms	remaining: 20m 16s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6931028401
bestIteration = 13

Shrink model to first 14 iterations.


[32m[I 2024-02-21 19:23:10,274][0m Trial 1 finished with value: 0.5991952414275717 and parameters: {'learning_rate': 0.020086402204943198, 'bagging_temperature': 96.99128611767782, 'n_estimators': 8492, 'max_depth': 6, 'random_strength': 18, 'l2_leaf_reg': 5.51030125050448e-06, 'min_child_samples': 34, 'max_bin': 357, 'od_type': 'IncToDec'}. Best is trial 0 with value: 0.6005559189012426.[0m


0:	learn: 0.6931453	test: 0.6931068	best: 0.6931068 (0)	total: 122ms	remaining: 7m 21s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6930312769
bestIteration = 1

Shrink model to first 2 iterations.


[32m[I 2024-02-21 19:23:33,273][0m Trial 2 finished with value: 0.5551193538197734 and parameters: {'learning_rate': 0.30980791841396593, 'bagging_temperature': 13.957991126597662, 'n_estimators': 3629, 'max_depth': 8, 'random_strength': 46, 'l2_leaf_reg': 2.355742708217648e-05, 'min_child_samples': 24, 'max_bin': 354, 'od_type': 'IncToDec'}. Best is trial 0 with value: 0.6005559189012426.[0m


0:	learn: 0.6931453	test: 0.6931070	best: 0.6931070 (0)	total: 124ms	remaining: 3m 17s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6930020799
bestIteration = 1

Shrink model to first 2 iterations.


[32m[I 2024-02-21 19:29:49,693][0m Trial 3 finished with value: 0.5563916550263596 and parameters: {'learning_rate': 0.3076969774317048, 'bagging_temperature': 17.060707127492282, 'n_estimators': 1585, 'max_depth': 16, 'random_strength': 97, 'l2_leaf_reg': 2.425383647001267e-05, 'min_child_samples': 34, 'max_bin': 229, 'od_type': 'IncToDec'}. Best is trial 0 with value: 0.6005559189012426.[0m


0:	learn: 0.6931467	test: 0.6931380	best: 0.6931380 (0)	total: 133ms	remaining: 2m 53s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6931380116
bestIteration = 0

Shrink model to first 1 iterations.


[32m[I 2024-02-21 19:32:30,402][0m Trial 4 finished with value: 0.6005559189012426 and parameters: {'learning_rate': 0.06979873507394162, 'bagging_temperature': 49.5227392420259, 'n_estimators': 1309, 'max_depth': 15, 'random_strength': 26, 'l2_leaf_reg': 1.987904330777592e-05, 'min_child_samples': 34, 'max_bin': 356, 'od_type': 'IncToDec'}. Best is trial 0 with value: 0.6005559189012426.[0m


0:	learn: 0.6931445	test: 0.6930842	best: 0.6930842 (0)	total: 173ms	remaining: 27m 15s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6929251901
bestIteration = 1

Shrink model to first 2 iterations.


[32m[I 2024-02-21 19:38:53,904][0m Trial 5 finished with value: 0.5563916550263596 and parameters: {'learning_rate': 0.4850964676046337, 'bagging_temperature': 77.51553100787784, 'n_estimators': 9456, 'max_depth': 15, 'random_strength': 60, 'l2_leaf_reg': 2.7657008308343274e-05, 'min_child_samples': 13, 'max_bin': 258, 'od_type': 'Iter'}. Best is trial 0 with value: 0.6005559189012426.[0m


0:	learn: 0.6931459	test: 0.6931209	best: 0.6931209 (0)	total: 131ms	remaining: 18m 31s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6930662659
bestIteration = 1

Shrink model to first 2 iterations.


[32m[I 2024-02-21 19:39:18,429][0m Trial 6 finished with value: 0.5551193538197734 and parameters: {'learning_rate': 0.2004518719478462, 'bagging_temperature': 27.142189687071852, 'n_estimators': 8459, 'max_depth': 8, 'random_strength': 28, 'l2_leaf_reg': 1.6285455533915874e-05, 'min_child_samples': 18, 'max_bin': 441, 'od_type': 'Iter'}. Best is trial 0 with value: 0.6005559189012426.[0m


0:	learn: 0.6931449	test: 0.6930966	best: 0.6930966 (0)	total: 128ms	remaining: 2m 14s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6929665242
bestIteration = 1

Shrink model to first 2 iterations.


[32m[I 2024-02-21 19:42:11,484][0m Trial 7 finished with value: 0.5563916550263596 and parameters: {'learning_rate': 0.3883999369553621, 'bagging_temperature': 19.8795809966019, 'n_estimators': 1049, 'max_depth': 14, 'random_strength': 71, 'l2_leaf_reg': 2.187292496954921e-05, 'min_child_samples': 79, 'max_bin': 222, 'od_type': 'IncToDec'}. Best is trial 0 with value: 0.6005559189012426.[0m


0:	learn: 0.6931447	test: 0.6930909	best: 0.6930909 (0)	total: 172ms	remaining: 11m 22s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6928588587
bestIteration = 2

Shrink model to first 3 iterations.


[32m[I 2024-02-21 19:42:26,773][0m Trial 8 finished with value: 0.5947427005736793 and parameters: {'learning_rate': 0.4329206786790408, 'bagging_temperature': 62.33357970148751, 'n_estimators': 3978, 'max_depth': 4, 'random_strength': 31, 'l2_leaf_reg': 9.762247827582143e-06, 'min_child_samples': 75, 'max_bin': 391, 'od_type': 'IncToDec'}. Best is trial 0 with value: 0.6005559189012426.[0m


0:	learn: 0.6931467	test: 0.6931382	best: 0.6931382 (0)	total: 145ms	remaining: 18m 58s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.693113471
bestIteration = 1

Shrink model to first 2 iterations.


[32m[I 2024-02-21 19:43:09,816][0m Trial 9 finished with value: 0.5563916550263596 and parameters: {'learning_rate': 0.06860118050976784, 'bagging_temperature': 71.32734627442727, 'n_estimators': 7847, 'max_depth': 11, 'random_strength': 77, 'l2_leaf_reg': 1.4818929934968078e-05, 'min_child_samples': 55, 'max_bin': 328, 'od_type': 'Iter'}. Best is trial 0 with value: 0.6005559189012426.[0m


0:	learn: 0.5304222	total: 344ms	remaining: 43m 28s
1:	learn: 0.3748495	total: 2.42s	remaining: 2h 32m 48s
2:	learn: 0.2484511	total: 4.68s	remaining: 3h 17m 23s
3:	learn: 0.1798216	total: 6.63s	remaining: 3h 29m 37s
4:	learn: 0.1535254	total: 8.54s	remaining: 3h 35m 59s
5:	learn: 0.1276578	total: 11.3s	remaining: 3h 57m 12s
6:	learn: 0.1218502	total: 11.7s	remaining: 3h 31m 58s
7:	learn: 0.1148343	total: 13.4s	remaining: 3h 32m 8s
8:	learn: 0.0993635	total: 15.6s	remaining: 3h 39m 32s
9:	learn: 0.0930921	total: 17.1s	remaining: 3h 36m 8s
10:	learn: 0.0861625	total: 18.8s	remaining: 3h 35m 36s
11:	learn: 0.0861032	total: 18.8s	remaining: 3h 18m 15s
12:	learn: 0.0832942	total: 20.6s	remaining: 3h 20m 2s
13:	learn: 0.0828783	total: 20.9s	remaining: 3h 8m 14s
14:	learn: 0.0812404	total: 22.6s	remaining: 3h 9m 57s
15:	learn: 0.0783314	total: 24.7s	remaining: 3h 14m 55s
16:	learn: 0.0766216	total: 26.4s	remaining: 3h 16m 4s
17:	learn: 0.0766216	total: 26.5s	remaining: 3h 5m 30s
18:	learn: 0

Training has stopped (degenerate solution on iteration 92, probably too small l2-regularization, try to increase it)






[32m[I 2024-02-21 19:45:43,483][0m A new study created in memory with name: cat_parameter_optuna[0m


0:	learn: 0.6931432	test: 0.6931025	best: 0.6931025 (0)	total: 205ms	remaining: 25m 52s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6931024653
bestIteration = 0

Shrink model to first 1 iterations.


[32m[I 2024-02-21 19:47:09,565][0m Trial 0 finished with value: 0.6005559189012426 and parameters: {'learning_rate': 0.19352465823520762, 'bagging_temperature': 95.07192349792751, 'n_estimators': 7588, 'max_depth': 11, 'random_strength': 15, 'l2_leaf_reg': 4.688275664882717e-06, 'min_child_samples': 10, 'max_bin': 460, 'od_type': 'Iter'}. Best is trial 0 with value: 0.6005559189012426.[0m


0:	learn: 0.6931467	test: 0.6931425	best: 0.6931425 (0)	total: 233ms	remaining: 32m 55s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6930954923
bestIteration = 8

Shrink model to first 9 iterations.


[32m[I 2024-02-21 19:47:39,395][0m Trial 1 finished with value: 0.5595303113052976 and parameters: {'learning_rate': 0.020086402204943198, 'bagging_temperature': 96.99128611767782, 'n_estimators': 8492, 'max_depth': 6, 'random_strength': 18, 'l2_leaf_reg': 5.51030125050448e-06, 'min_child_samples': 34, 'max_bin': 357, 'od_type': 'IncToDec'}. Best is trial 0 with value: 0.6005559189012426.[0m


0:	learn: 0.6931412	test: 0.6930760	best: 0.6930760 (0)	total: 217ms	remaining: 13m 6s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6919343612
bestIteration = 37

Shrink model to first 38 iterations.


[32m[I 2024-02-21 19:48:33,611][0m Trial 2 finished with value: 0.6025629536696837 and parameters: {'learning_rate': 0.30980791841396593, 'bagging_temperature': 13.957991126597662, 'n_estimators': 3629, 'max_depth': 8, 'random_strength': 46, 'l2_leaf_reg': 2.355742708217648e-05, 'min_child_samples': 24, 'max_bin': 354, 'od_type': 'IncToDec'}. Best is trial 2 with value: 0.6025629536696837.[0m


0:	learn: 0.6931413	test: 0.6930765	best: 0.6930765 (0)	total: 157ms	remaining: 4m 8s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6930764856
bestIteration = 0

Shrink model to first 1 iterations.


[32m[I 2024-02-21 19:55:26,201][0m Trial 3 finished with value: 0.6005559189012426 and parameters: {'learning_rate': 0.3076969774317048, 'bagging_temperature': 17.060707127492282, 'n_estimators': 1585, 'max_depth': 16, 'random_strength': 97, 'l2_leaf_reg': 2.425383647001267e-05, 'min_child_samples': 34, 'max_bin': 229, 'od_type': 'IncToDec'}. Best is trial 2 with value: 0.6025629536696837.[0m


0:	learn: 0.6931457	test: 0.6931310	best: 0.6931310 (0)	total: 252ms	remaining: 5m 30s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6928436525
bestIteration = 24

Shrink model to first 25 iterations.


[32m[I 2024-02-21 19:58:52,888][0m Trial 4 finished with value: 0.579786749293721 and parameters: {'learning_rate': 0.06979873507394162, 'bagging_temperature': 49.5227392420259, 'n_estimators': 1309, 'max_depth': 15, 'random_strength': 26, 'l2_leaf_reg': 1.987904330777592e-05, 'min_child_samples': 34, 'max_bin': 356, 'od_type': 'IncToDec'}. Best is trial 2 with value: 0.6025629536696837.[0m


0:	learn: 0.6931388	test: 0.6930367	best: 0.6930367 (0)	total: 134ms	remaining: 21m 6s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6930367085
bestIteration = 0

Shrink model to first 1 iterations.


[32m[I 2024-02-21 20:04:22,790][0m Trial 5 finished with value: 0.6005559189012426 and parameters: {'learning_rate': 0.4850964676046337, 'bagging_temperature': 77.51553100787784, 'n_estimators': 9456, 'max_depth': 15, 'random_strength': 60, 'l2_leaf_reg': 2.7657008308343274e-05, 'min_child_samples': 13, 'max_bin': 258, 'od_type': 'Iter'}. Best is trial 2 with value: 0.6025629536696837.[0m


0:	learn: 0.6931431	test: 0.6931009	best: 0.6931009 (0)	total: 138ms	remaining: 19m 26s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6929255798
bestIteration = 5

Shrink model to first 6 iterations.


[32m[I 2024-02-21 20:04:46,798][0m Trial 6 finished with value: 0.5396103896103897 and parameters: {'learning_rate': 0.2004518719478462, 'bagging_temperature': 27.142189687071852, 'n_estimators': 8459, 'max_depth': 8, 'random_strength': 28, 'l2_leaf_reg': 1.6285455533915874e-05, 'min_child_samples': 18, 'max_bin': 441, 'od_type': 'Iter'}. Best is trial 2 with value: 0.6025629536696837.[0m


0:	learn: 0.6931401	test: 0.6930583	best: 0.6930583 (0)	total: 126ms	remaining: 2m 12s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6930583011
bestIteration = 0

Shrink model to first 1 iterations.


[32m[I 2024-02-21 20:07:20,650][0m Trial 7 finished with value: 0.6005559189012426 and parameters: {'learning_rate': 0.3883999369553621, 'bagging_temperature': 19.8795809966019, 'n_estimators': 1049, 'max_depth': 14, 'random_strength': 71, 'l2_leaf_reg': 2.187292496954921e-05, 'min_child_samples': 79, 'max_bin': 222, 'od_type': 'IncToDec'}. Best is trial 2 with value: 0.6025629536696837.[0m


0:	learn: 0.6931395	test: 0.6930483	best: 0.6930483 (0)	total: 147ms	remaining: 9m 44s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6927054213
bestIteration = 8

Shrink model to first 9 iterations.


[32m[I 2024-02-21 20:07:37,508][0m Trial 8 finished with value: 0.5951660343578158 and parameters: {'learning_rate': 0.4329206786790408, 'bagging_temperature': 62.33357970148751, 'n_estimators': 3978, 'max_depth': 4, 'random_strength': 31, 'l2_leaf_reg': 9.762247827582143e-06, 'min_child_samples': 75, 'max_bin': 391, 'od_type': 'IncToDec'}. Best is trial 2 with value: 0.6025629536696837.[0m


0:	learn: 0.6931457	test: 0.6931312	best: 0.6931312 (0)	total: 136ms	remaining: 17m 43s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.693120215
bestIteration = 19

Shrink model to first 20 iterations.


[32m[I 2024-02-21 20:08:25,752][0m Trial 9 finished with value: 0.550339393364029 and parameters: {'learning_rate': 0.06860118050976784, 'bagging_temperature': 71.32734627442727, 'n_estimators': 7847, 'max_depth': 11, 'random_strength': 77, 'l2_leaf_reg': 1.4818929934968078e-05, 'min_child_samples': 55, 'max_bin': 328, 'od_type': 'Iter'}. Best is trial 2 with value: 0.6025629536696837.[0m


0:	learn: 0.5203594	total: 421ms	remaining: 25m 29s
1:	learn: 0.3628044	total: 596ms	remaining: 18m 1s
2:	learn: 0.2822596	total: 1.08s	remaining: 21m 47s
3:	learn: 0.2485845	total: 1.51s	remaining: 22m 51s
4:	learn: 0.2160641	total: 1.67s	remaining: 20m 9s
5:	learn: 0.1851047	total: 2.27s	remaining: 22m 49s
6:	learn: 0.1732479	total: 2.57s	remaining: 22m 9s
7:	learn: 0.1649436	total: 3.15s	remaining: 23m 44s
8:	learn: 0.1574685	total: 3.58s	remaining: 24m 2s
9:	learn: 0.1521805	total: 4s	remaining: 24m 6s
10:	learn: 0.1346164	total: 4.57s	remaining: 25m 3s
11:	learn: 0.1301409	total: 5.14s	remaining: 25m 50s
12:	learn: 0.1282427	total: 5.57s	remaining: 25m 48s
13:	learn: 0.1282427	total: 5.62s	remaining: 24m 11s
14:	learn: 0.1249040	total: 6.12s	remaining: 24m 34s
15:	learn: 0.1213669	total: 6.69s	remaining: 25m 11s
16:	learn: 0.1173619	total: 7.28s	remaining: 25m 46s
17:	learn: 0.1096872	total: 7.89s	remaining: 26m 22s
18:	learn: 0.1093600	total: 8.11s	remaining: 25m 41s
19:	learn: 0

Training has stopped (degenerate solution on iteration 84, probably too small l2-regularization, try to increase it)
[32m[I 2024-02-21 20:09:14,366][0m A new study created in memory with name: cat_parameter_optuna[0m




0:	learn: 0.6931014	test: 0.6931052	best: 0.6931052 (0)	total: 514ms	remaining: 1h 4m 57s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6930476716
bestIteration = 7

Shrink model to first 8 iterations.


[32m[I 2024-02-21 20:10:09,512][0m Trial 0 finished with value: 0.5133464032574272 and parameters: {'learning_rate': 0.19352465823520762, 'bagging_temperature': 95.07192349792751, 'n_estimators': 7588, 'max_depth': 11, 'random_strength': 15, 'l2_leaf_reg': 4.688275664882717e-06, 'min_child_samples': 10, 'max_bin': 460, 'od_type': 'Iter'}. Best is trial 0 with value: 0.5133464032574272.[0m


0:	learn: 0.6931411	test: 0.6931485	best: 0.6931485 (0)	total: 459ms	remaining: 1h 4m 54s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6931375335
bestIteration = 3

Shrink model to first 4 iterations.


[32m[I 2024-02-21 20:10:29,160][0m Trial 1 finished with value: 0.5836515170176952 and parameters: {'learning_rate': 0.020086402204943198, 'bagging_temperature': 96.99128611767782, 'n_estimators': 8492, 'max_depth': 6, 'random_strength': 18, 'l2_leaf_reg': 5.51030125050448e-06, 'min_child_samples': 34, 'max_bin': 357, 'od_type': 'IncToDec'}. Best is trial 1 with value: 0.5836515170176952.[0m


0:	learn: 0.6931416	test: 0.6930784	best: 0.6930784 (0)	total: 127ms	remaining: 7m 41s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6929974631
bestIteration = 3

Shrink model to first 4 iterations.


[32m[I 2024-02-21 20:10:51,152][0m Trial 2 finished with value: 0.6037865748709123 and parameters: {'learning_rate': 0.30980791841396593, 'bagging_temperature': 13.957991126597662, 'n_estimators': 3629, 'max_depth': 8, 'random_strength': 46, 'l2_leaf_reg': 2.355742708217648e-05, 'min_child_samples': 24, 'max_bin': 354, 'od_type': 'IncToDec'}. Best is trial 2 with value: 0.6037865748709123.[0m


0:	learn: 0.6931417	test: 0.6930788	best: 0.6930788 (0)	total: 137ms	remaining: 3m 37s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6930788212
bestIteration = 0

Shrink model to first 1 iterations.


[32m[I 2024-02-21 20:16:13,272][0m Trial 3 finished with value: 0.6005559189012426 and parameters: {'learning_rate': 0.3076969774317048, 'bagging_temperature': 17.060707127492282, 'n_estimators': 1585, 'max_depth': 16, 'random_strength': 97, 'l2_leaf_reg': 2.425383647001267e-05, 'min_child_samples': 34, 'max_bin': 229, 'od_type': 'IncToDec'}. Best is trial 2 with value: 0.6037865748709123.[0m


0:	learn: 0.6931458	test: 0.6931315	best: 0.6931315 (0)	total: 134ms	remaining: 2m 55s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6931314969
bestIteration = 0

Shrink model to first 1 iterations.


[32m[I 2024-02-21 20:19:02,075][0m Trial 4 finished with value: 0.6005559189012426 and parameters: {'learning_rate': 0.06979873507394162, 'bagging_temperature': 49.5227392420259, 'n_estimators': 1309, 'max_depth': 15, 'random_strength': 26, 'l2_leaf_reg': 1.987904330777592e-05, 'min_child_samples': 34, 'max_bin': 356, 'od_type': 'IncToDec'}. Best is trial 2 with value: 0.6037865748709123.[0m


0:	learn: 0.6931394	test: 0.6930403	best: 0.6930403 (0)	total: 137ms	remaining: 21m 39s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.693040326
bestIteration = 0

Shrink model to first 1 iterations.


[32m[I 2024-02-21 20:23:57,621][0m Trial 5 finished with value: 0.6005559189012426 and parameters: {'learning_rate': 0.4850964676046337, 'bagging_temperature': 77.51553100787784, 'n_estimators': 9456, 'max_depth': 15, 'random_strength': 60, 'l2_leaf_reg': 2.7657008308343274e-05, 'min_child_samples': 13, 'max_bin': 258, 'od_type': 'Iter'}. Best is trial 2 with value: 0.6037865748709123.[0m


0:	learn: 0.6931434	test: 0.6931024	best: 0.6931024 (0)	total: 143ms	remaining: 20m 13s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6931024183
bestIteration = 0

Shrink model to first 1 iterations.


[32m[I 2024-02-21 20:24:21,840][0m Trial 6 finished with value: 0.6005559189012426 and parameters: {'learning_rate': 0.2004518719478462, 'bagging_temperature': 27.142189687071852, 'n_estimators': 8459, 'max_depth': 8, 'random_strength': 28, 'l2_leaf_reg': 1.6285455533915874e-05, 'min_child_samples': 18, 'max_bin': 441, 'od_type': 'Iter'}. Best is trial 2 with value: 0.6037865748709123.[0m


0:	learn: 0.6931405	test: 0.6930612	best: 0.6930612 (0)	total: 131ms	remaining: 2m 16s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6930612257
bestIteration = 0

Shrink model to first 1 iterations.


[32m[I 2024-02-21 20:26:44,138][0m Trial 7 finished with value: 0.6005559189012426 and parameters: {'learning_rate': 0.3883999369553621, 'bagging_temperature': 19.8795809966019, 'n_estimators': 1049, 'max_depth': 14, 'random_strength': 71, 'l2_leaf_reg': 2.187292496954921e-05, 'min_child_samples': 79, 'max_bin': 222, 'od_type': 'IncToDec'}. Best is trial 2 with value: 0.6037865748709123.[0m


0:	learn: 0.6931400	test: 0.6930516	best: 0.6930516 (0)	total: 136ms	remaining: 8m 59s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6926732289
bestIteration = 4

Shrink model to first 5 iterations.


[32m[I 2024-02-21 20:26:58,063][0m Trial 8 finished with value: 0.5851505016722408 and parameters: {'learning_rate': 0.4329206786790408, 'bagging_temperature': 62.33357970148751, 'n_estimators': 3978, 'max_depth': 4, 'random_strength': 31, 'l2_leaf_reg': 9.762247827582143e-06, 'min_child_samples': 75, 'max_bin': 391, 'od_type': 'IncToDec'}. Best is trial 2 with value: 0.6037865748709123.[0m


0:	learn: 0.6931458	test: 0.6931318	best: 0.6931318 (0)	total: 150ms	remaining: 19m 38s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6930766061
bestIteration = 5

Shrink model to first 6 iterations.


[32m[I 2024-02-21 20:27:45,963][0m Trial 9 finished with value: 0.5645207439198856 and parameters: {'learning_rate': 0.06860118050976784, 'bagging_temperature': 71.32734627442727, 'n_estimators': 7847, 'max_depth': 11, 'random_strength': 77, 'l2_leaf_reg': 1.4818929934968078e-05, 'min_child_samples': 55, 'max_bin': 328, 'od_type': 'Iter'}. Best is trial 2 with value: 0.6037865748709123.[0m


0:	learn: 0.5200892	total: 445ms	remaining: 26m 55s
1:	learn: 0.3223738	total: 1.15s	remaining: 34m 38s
2:	learn: 0.2432432	total: 1.71s	remaining: 34m 29s
3:	learn: 0.1813364	total: 2.22s	remaining: 33m 35s
4:	learn: 0.1671239	total: 2.85s	remaining: 34m 22s
5:	learn: 0.1499187	total: 3.38s	remaining: 34m 1s
6:	learn: 0.1437595	total: 4.02s	remaining: 34m 39s
7:	learn: 0.1382145	total: 4.62s	remaining: 34m 51s
8:	learn: 0.1379257	total: 4.85s	remaining: 32m 29s
9:	learn: 0.1258244	total: 5.35s	remaining: 32m 16s
10:	learn: 0.1201290	total: 5.92s	remaining: 32m 29s
11:	learn: 0.1192429	total: 6.3s	remaining: 31m 37s
12:	learn: 0.1148627	total: 6.8s	remaining: 31m 31s
13:	learn: 0.1120725	total: 7.49s	remaining: 32m 14s
14:	learn: 0.1108737	total: 8s	remaining: 32m 7s
15:	learn: 0.1107354	total: 8.16s	remaining: 30m 42s
16:	learn: 0.1085077	total: 8.7s	remaining: 30m 48s
17:	learn: 0.1063868	total: 9.26s	remaining: 30m 57s
18:	learn: 0.0990408	total: 9.73s	remaining: 30m 49s
19:	learn: 

Training has stopped (degenerate solution on iteration 70, probably too small l2-regularization, try to increase it)
[32m[I 2024-02-21 20:28:25,205][0m A new study created in memory with name: cat_parameter_optuna[0m




0:	learn: 0.6931427	test: 0.6930995	best: 0.6930995 (0)	total: 123ms	remaining: 15m 33s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6930994988
bestIteration = 0

Shrink model to first 1 iterations.


[32m[I 2024-02-21 20:29:15,186][0m Trial 0 finished with value: 0.6005559189012426 and parameters: {'learning_rate': 0.19352465823520762, 'bagging_temperature': 95.07192349792751, 'n_estimators': 7588, 'max_depth': 11, 'random_strength': 15, 'l2_leaf_reg': 4.688275664882717e-06, 'min_child_samples': 10, 'max_bin': 460, 'od_type': 'Iter'}. Best is trial 0 with value: 0.6005559189012426.[0m


0:	learn: 0.6931467	test: 0.6931422	best: 0.6931422 (0)	total: 127ms	remaining: 17m 57s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6930663326
bestIteration = 26

Shrink model to first 27 iterations.


[32m[I 2024-02-21 20:29:41,569][0m Trial 1 finished with value: 0.5543682641595964 and parameters: {'learning_rate': 0.020086402204943198, 'bagging_temperature': 96.99128611767782, 'n_estimators': 8492, 'max_depth': 6, 'random_strength': 18, 'l2_leaf_reg': 5.51030125050448e-06, 'min_child_samples': 34, 'max_bin': 357, 'od_type': 'IncToDec'}. Best is trial 0 with value: 0.6005559189012426.[0m


0:	learn: 0.6931404	test: 0.6930713	best: 0.6930713 (0)	total: 174ms	remaining: 10m 31s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6928893065
bestIteration = 7

Shrink model to first 8 iterations.


[32m[I 2024-02-21 20:30:06,890][0m Trial 2 finished with value: 0.5905753798873143 and parameters: {'learning_rate': 0.30980791841396593, 'bagging_temperature': 13.957991126597662, 'n_estimators': 3629, 'max_depth': 8, 'random_strength': 46, 'l2_leaf_reg': 2.355742708217648e-05, 'min_child_samples': 24, 'max_bin': 354, 'od_type': 'IncToDec'}. Best is trial 0 with value: 0.6005559189012426.[0m


0:	learn: 0.6931405	test: 0.6930718	best: 0.6930718 (0)	total: 130ms	remaining: 3m 25s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6930718245
bestIteration = 0

Shrink model to first 1 iterations.


[32m[I 2024-02-21 20:33:43,902][0m Trial 3 finished with value: 0.6005559189012426 and parameters: {'learning_rate': 0.3076969774317048, 'bagging_temperature': 17.060707127492282, 'n_estimators': 1585, 'max_depth': 16, 'random_strength': 97, 'l2_leaf_reg': 2.425383647001267e-05, 'min_child_samples': 34, 'max_bin': 229, 'od_type': 'IncToDec'}. Best is trial 0 with value: 0.6005559189012426.[0m


0:	learn: 0.6931454	test: 0.6931299	best: 0.6931299 (0)	total: 133ms	remaining: 2m 53s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.693129871
bestIteration = 0

Shrink model to first 1 iterations.


[32m[I 2024-02-21 20:36:35,282][0m Trial 4 finished with value: 0.6005559189012426 and parameters: {'learning_rate': 0.06979873507394162, 'bagging_temperature': 49.5227392420259, 'n_estimators': 1309, 'max_depth': 15, 'random_strength': 26, 'l2_leaf_reg': 1.987904330777592e-05, 'min_child_samples': 34, 'max_bin': 356, 'od_type': 'IncToDec'}. Best is trial 0 with value: 0.6005559189012426.[0m


0:	learn: 0.6931377	test: 0.6930295	best: 0.6930295 (0)	total: 157ms	remaining: 24m 41s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6930294961
bestIteration = 0

Shrink model to first 1 iterations.


[32m[I 2024-02-21 20:42:13,465][0m Trial 5 finished with value: 0.6005559189012426 and parameters: {'learning_rate': 0.4850964676046337, 'bagging_temperature': 77.51553100787784, 'n_estimators': 9456, 'max_depth': 15, 'random_strength': 60, 'l2_leaf_reg': 2.7657008308343274e-05, 'min_child_samples': 13, 'max_bin': 258, 'od_type': 'Iter'}. Best is trial 0 with value: 0.6005559189012426.[0m


0:	learn: 0.6931425	test: 0.6930978	best: 0.6930978 (0)	total: 170ms	remaining: 23m 55s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6929486588
bestIteration = 16

Shrink model to first 17 iterations.


[32m[I 2024-02-21 20:42:46,953][0m Trial 6 finished with value: 0.5667657102310524 and parameters: {'learning_rate': 0.2004518719478462, 'bagging_temperature': 27.142189687071852, 'n_estimators': 8459, 'max_depth': 8, 'random_strength': 28, 'l2_leaf_reg': 1.6285455533915874e-05, 'min_child_samples': 18, 'max_bin': 441, 'od_type': 'Iter'}. Best is trial 0 with value: 0.6005559189012426.[0m


0:	learn: 0.6931391	test: 0.6930525	best: 0.6930525 (0)	total: 151ms	remaining: 2m 37s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.693052467
bestIteration = 0

Shrink model to first 1 iterations.


[32m[I 2024-02-21 20:45:05,810][0m Trial 7 finished with value: 0.6005559189012426 and parameters: {'learning_rate': 0.3883999369553621, 'bagging_temperature': 19.8795809966019, 'n_estimators': 1049, 'max_depth': 14, 'random_strength': 71, 'l2_leaf_reg': 2.187292496954921e-05, 'min_child_samples': 79, 'max_bin': 222, 'od_type': 'IncToDec'}. Best is trial 0 with value: 0.6005559189012426.[0m


0:	learn: 0.6931384	test: 0.6930419	best: 0.6930419 (0)	total: 147ms	remaining: 9m 46s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6917042766
bestIteration = 21

Shrink model to first 22 iterations.


[32m[I 2024-02-21 20:45:28,918][0m Trial 8 finished with value: 0.6262040670709953 and parameters: {'learning_rate': 0.4329206786790408, 'bagging_temperature': 62.33357970148751, 'n_estimators': 3978, 'max_depth': 4, 'random_strength': 31, 'l2_leaf_reg': 9.762247827582143e-06, 'min_child_samples': 75, 'max_bin': 391, 'od_type': 'IncToDec'}. Best is trial 8 with value: 0.6262040670709953.[0m


0:	learn: 0.6931455	test: 0.6931302	best: 0.6931302 (0)	total: 174ms	remaining: 22m 42s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6931042685
bestIteration = 1

Shrink model to first 2 iterations.


[32m[I 2024-02-21 20:46:08,305][0m Trial 9 finished with value: 0.49922544665909324 and parameters: {'learning_rate': 0.06860118050976784, 'bagging_temperature': 71.32734627442727, 'n_estimators': 7847, 'max_depth': 11, 'random_strength': 77, 'l2_leaf_reg': 1.4818929934968078e-05, 'min_child_samples': 55, 'max_bin': 328, 'od_type': 'Iter'}. Best is trial 8 with value: 0.6262040670709953.[0m


0:	learn: 0.5038743	total: 378ms	remaining: 25m 1s
1:	learn: 0.2949822	total: 749ms	remaining: 24m 49s
2:	learn: 0.1701374	total: 1.06s	remaining: 23m 23s
3:	learn: 0.1499306	total: 1.39s	remaining: 23m 2s
4:	learn: 0.1457592	total: 1.57s	remaining: 20m 49s
5:	learn: 0.1277521	total: 1.83s	remaining: 20m 11s
6:	learn: 0.1236492	total: 2.07s	remaining: 19m 33s
7:	learn: 0.1206933	total: 2.39s	remaining: 19m 44s
8:	learn: 0.1167380	total: 2.66s	remaining: 19m 33s
9:	learn: 0.1103431	total: 2.95s	remaining: 19m 30s
10:	learn: 0.1080315	total: 3.14s	remaining: 18m 51s
11:	learn: 0.1022107	total: 3.42s	remaining: 18m 49s
12:	learn: 0.1022106	total: 3.48s	remaining: 17m 40s
13:	learn: 0.1015297	total: 3.84s	remaining: 18m 6s
14:	learn: 0.1002387	total: 4.08s	remaining: 17m 57s
15:	learn: 0.0994408	total: 4.45s	remaining: 18m 20s
16:	learn: 0.0978512	total: 4.89s	remaining: 18m 58s
17:	learn: 0.0956639	total: 5.34s	remaining: 19m 34s
18:	learn: 0.0948612	total: 5.67s	remaining: 19m 40s
19:	le

[32m[I 2024-02-21 21:07:26,367][0m A new study created in memory with name: cat_parameter_optuna[0m




0:	learn: 0.6931409	test: 0.6930911	best: 0.6930911 (0)	total: 137ms	remaining: 17m 15s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6930911118
bestIteration = 0

Shrink model to first 1 iterations.


[32m[I 2024-02-21 21:08:10,336][0m Trial 0 finished with value: 0.6005559189012426 and parameters: {'learning_rate': 0.19352465823520762, 'bagging_temperature': 95.07192349792751, 'n_estimators': 7588, 'max_depth': 11, 'random_strength': 15, 'l2_leaf_reg': 4.688275664882717e-06, 'min_child_samples': 10, 'max_bin': 460, 'od_type': 'Iter'}. Best is trial 0 with value: 0.6005559189012426.[0m


0:	learn: 0.6931465	test: 0.6931413	best: 0.6931413 (0)	total: 138ms	remaining: 19m 30s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6931360028
bestIteration = 1

Shrink model to first 2 iterations.


[32m[I 2024-02-21 21:08:30,558][0m Trial 1 finished with value: 0.599361597642822 and parameters: {'learning_rate': 0.020086402204943198, 'bagging_temperature': 96.99128611767782, 'n_estimators': 8492, 'max_depth': 6, 'random_strength': 18, 'l2_leaf_reg': 5.51030125050448e-06, 'min_child_samples': 34, 'max_bin': 357, 'od_type': 'IncToDec'}. Best is trial 0 with value: 0.6005559189012426.[0m


0:	learn: 0.6931378	test: 0.6930581	best: 0.6930581 (0)	total: 170ms	remaining: 10m 14s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6930093537
bestIteration = 2

Shrink model to first 3 iterations.


[32m[I 2024-02-21 21:08:55,160][0m Trial 2 finished with value: 0.5742549565651108 and parameters: {'learning_rate': 0.30980791841396593, 'bagging_temperature': 13.957991126597662, 'n_estimators': 3629, 'max_depth': 8, 'random_strength': 46, 'l2_leaf_reg': 2.355742708217648e-05, 'min_child_samples': 24, 'max_bin': 354, 'od_type': 'IncToDec'}. Best is trial 0 with value: 0.6005559189012426.[0m


0:	learn: 0.6931378	test: 0.6930587	best: 0.6930587 (0)	total: 135ms	remaining: 3m 33s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6930586663
bestIteration = 0

Shrink model to first 1 iterations.


[32m[I 2024-02-21 21:14:23,377][0m Trial 3 finished with value: 0.6005559189012426 and parameters: {'learning_rate': 0.3076969774317048, 'bagging_temperature': 17.060707127492282, 'n_estimators': 1585, 'max_depth': 16, 'random_strength': 97, 'l2_leaf_reg': 2.425383647001267e-05, 'min_child_samples': 34, 'max_bin': 229, 'od_type': 'IncToDec'}. Best is trial 0 with value: 0.6005559189012426.[0m


0:	learn: 0.6931448	test: 0.6931268	best: 0.6931268 (0)	total: 187ms	remaining: 4m 4s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6931268026
bestIteration = 0

Shrink model to first 1 iterations.


[32m[I 2024-02-21 21:17:57,362][0m Trial 4 finished with value: 0.6005559189012426 and parameters: {'learning_rate': 0.06979873507394162, 'bagging_temperature': 49.5227392420259, 'n_estimators': 1309, 'max_depth': 15, 'random_strength': 26, 'l2_leaf_reg': 1.987904330777592e-05, 'min_child_samples': 34, 'max_bin': 356, 'od_type': 'IncToDec'}. Best is trial 0 with value: 0.6005559189012426.[0m


0:	learn: 0.6931340	test: 0.6930092	best: 0.6930092 (0)	total: 144ms	remaining: 22m 40s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6930091849
bestIteration = 0

Shrink model to first 1 iterations.


[32m[I 2024-02-21 21:24:17,764][0m Trial 5 finished with value: 0.6005559189012426 and parameters: {'learning_rate': 0.4850964676046337, 'bagging_temperature': 77.51553100787784, 'n_estimators': 9456, 'max_depth': 15, 'random_strength': 60, 'l2_leaf_reg': 2.7657008308343274e-05, 'min_child_samples': 13, 'max_bin': 258, 'od_type': 'Iter'}. Best is trial 0 with value: 0.6005559189012426.[0m


0:	learn: 0.6931407	test: 0.6930891	best: 0.6930891 (0)	total: 160ms	remaining: 22m 34s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6930891298
bestIteration = 0

Shrink model to first 1 iterations.


[32m[I 2024-02-21 21:24:46,330][0m Trial 6 finished with value: 0.6005559189012426 and parameters: {'learning_rate': 0.2004518719478462, 'bagging_temperature': 27.142189687071852, 'n_estimators': 8459, 'max_depth': 8, 'random_strength': 28, 'l2_leaf_reg': 1.6285455533915874e-05, 'min_child_samples': 18, 'max_bin': 441, 'od_type': 'Iter'}. Best is trial 0 with value: 0.6005559189012426.[0m


0:	learn: 0.6931360	test: 0.6930360	best: 0.6930360 (0)	total: 166ms	remaining: 2m 53s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6930360155
bestIteration = 0

Shrink model to first 1 iterations.


[32m[I 2024-02-21 21:27:22,905][0m Trial 7 finished with value: 0.6005559189012426 and parameters: {'learning_rate': 0.3883999369553621, 'bagging_temperature': 19.8795809966019, 'n_estimators': 1049, 'max_depth': 14, 'random_strength': 71, 'l2_leaf_reg': 2.187292496954921e-05, 'min_child_samples': 79, 'max_bin': 222, 'od_type': 'IncToDec'}. Best is trial 0 with value: 0.6005559189012426.[0m


0:	learn: 0.6931350	test: 0.6930236	best: 0.6930236 (0)	total: 163ms	remaining: 10m 47s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.692376433
bestIteration = 6

Shrink model to first 7 iterations.


[32m[I 2024-02-21 21:27:41,941][0m Trial 8 finished with value: 0.509617581212592 and parameters: {'learning_rate': 0.4329206786790408, 'bagging_temperature': 62.33357970148751, 'n_estimators': 3978, 'max_depth': 4, 'random_strength': 31, 'l2_leaf_reg': 9.762247827582143e-06, 'min_child_samples': 75, 'max_bin': 391, 'od_type': 'IncToDec'}. Best is trial 0 with value: 0.6005559189012426.[0m


0:	learn: 0.6931448	test: 0.6931272	best: 0.6931272 (0)	total: 157ms	remaining: 20m 30s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6931271507
bestIteration = 0

Shrink model to first 1 iterations.


[32m[I 2024-02-21 21:28:28,022][0m Trial 9 finished with value: 0.6005559189012426 and parameters: {'learning_rate': 0.06860118050976784, 'bagging_temperature': 71.32734627442727, 'n_estimators': 7847, 'max_depth': 11, 'random_strength': 77, 'l2_leaf_reg': 1.4818929934968078e-05, 'min_child_samples': 55, 'max_bin': 328, 'od_type': 'Iter'}. Best is trial 0 with value: 0.6005559189012426.[0m


0:	learn: 0.5294854	total: 400ms	remaining: 50m 31s
1:	learn: 0.3735725	total: 2.4s	remaining: 2h 31m 30s
2:	learn: 0.2420443	total: 4.17s	remaining: 2h 55m 52s
3:	learn: 0.1789102	total: 5.86s	remaining: 3h 5m 5s
4:	learn: 0.1616202	total: 6.23s	remaining: 2h 37m 22s
5:	learn: 0.1406784	total: 8.39s	remaining: 2h 56m 49s
6:	learn: 0.1204955	total: 9.03s	remaining: 2h 42m 55s
7:	learn: 0.1005215	total: 11.2s	remaining: 2h 56m 49s
8:	learn: 0.0923033	total: 13.3s	remaining: 3h 6m 14s
9:	learn: 0.0901101	total: 14.1s	remaining: 2h 57m 37s
10:	learn: 0.0895053	total: 14.6s	remaining: 2h 47m 38s
11:	learn: 0.0848521	total: 16.6s	remaining: 2h 55m 5s
12:	learn: 0.0815776	total: 18.9s	remaining: 3h 3m 55s
13:	learn: 0.0805783	total: 21s	remaining: 3h 8m 57s
14:	learn: 0.0805702	total: 21s	remaining: 2h 57m 5s
15:	learn: 0.0789279	total: 23.2s	remaining: 3h 3m 16s
16:	learn: 0.0778278	total: 24.2s	remaining: 2h 59m 18s
17:	learn: 0.0746797	total: 25.2s	remaining: 2h 56m 58s
18:	learn: 0.07366

Training has stopped (degenerate solution on iteration 176, probably too small l2-regularization, try to increase it)






In [52]:
data['test']

Unnamed: 0,id,bant_submit,customer_country,business_unit,com_reg_ver_win_rate,customer_idx,customer_type,enterprise,historical_existing_cnt,customer_job,...,idx_count,lead_log,lead_count,enterprise_count,enterprise_weight,pred_0,pred_1,pred_2,pred_3,pred_4
0,19844,0.00,5,1,0.073248,50,0,0,53.0,4,...,0,1.098612,1,1,0,1,1,1,0,1
1,9738,0.25,8,2,0.000000,13019,0,1,0.0,7,...,0,3.135494,1,0,0,1,0,1,1,1
2,8491,1.00,6,1,0.000000,35112,1,1,0.0,11,...,0,4.927254,0,0,0,0,0,0,0,0
3,19895,0.50,8,1,0.118644,35113,4,0,0.0,6,...,1,1.098612,1,0,0,0,0,0,0,0
4,10465,1.00,5,1,0.074949,35114,0,0,2.0,2,...,0,4.672829,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5266,13855,0.50,5,0,0.000000,9124,4,0,10.0,6,...,1,1.945910,1,1,0,0,0,0,0,0
5267,7979,0.25,8,2,0.000000,50,4,0,0.0,7,...,0,5.056246,0,0,0,1,1,1,1,1
5268,12887,0.75,5,0,0.000000,38733,1,0,0.0,3,...,1,3.663562,0,0,0,0,0,0,0,0
5269,17530,0.00,39,2,0.000000,38734,0,1,0.0,7,...,1,1.098612,1,0,0,0,1,0,0,0


In [58]:
fold = 1
data['test'][f'pred_{fold}']

0       1
1       0
2       0
3       0
4       0
       ..
5266    0
5267    1
5268    0
5269    1
5270    1
Name: pred_1, Length: 5271, dtype: int64

In [65]:
# 각 array의 i번째 값들을 추출하여 리스트 생성
df_test = data['test']
df_test['is_converted'] = df_test['pred_0'] + df_test['pred_1'] + df_test['pred_2'] + df_test['pred_3'] + df_test['pred_4']
print(df_test['is_converted'].value_counts())
df_test['is_converted'] = df_test['is_converted'].apply(lambda x: True if x > 0 else False)
test_pred = df_test['is_converted']
df_test[['pred_0', 'pred_1', 'pred_2', 'pred_3', 'pred_4', 'is_converted']]

0    3715
1     476
5     351
2     273
4     241
3     215
Name: is_converted, dtype: int64


Unnamed: 0,pred_0,pred_1,pred_2,pred_3,pred_4,is_converted
0,1,1,1,0,1,True
1,1,0,1,1,1,True
2,0,0,0,0,0,False
3,0,0,0,0,0,False
4,0,0,0,0,0,False
...,...,...,...,...,...,...
5266,0,0,0,0,0,False
5267,1,1,1,1,1,True
5268,0,0,0,0,0,False
5269,0,1,0,0,0,True


In [None]:
def get_clf_eval(y_test, y_pred=None):
    confusion = confusion_matrix(y_test, y_pred, labels=[True, False])
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, labels=[True, False])
    recall = recall_score(y_test, y_pred)
    F1 = f1_score(y_test, y_pred, labels=[True, False])

    print("오차행렬:\n", confusion)
    print("\n정확도: {:.4f}".format(accuracy))
    print("정밀도: {:.4f}".format(precision))
    print("재현율: {:.4f}".format(recall))
    print("F1: {:.4f}".format(F1))

In [None]:
# 테스트 데이터로 예측
pred = model.predict(data['X_valid'])
get_clf_eval(data['y_valid'], pred)

### 테스트 데이터 예측

In [None]:
# 예측에 필요한 데이터 분리
test_pred = model.predict(data['test'].drop(["is_converted", "id"], axis=1))

In [None]:
test_pred = [val == 1 for val in test_pred]

In [None]:
sum(test_pred) # True로 예측된 개수

### 제출 파일 작성

In [67]:
# 제출 데이터 읽어오기 (df_test는 전처리된 데이터가 저장됨)
df_sub = pd.read_csv("submission.csv")
df_sub["is_converted"] = test_pred

df_sub

Unnamed: 0,id,bant_submit,customer_country,business_unit,com_reg_ver_win_rate,customer_idx,customer_type,enterprise,historical_existing_cnt,id_strategic_ver,...,response_corporate,expected_timeline,ver_cus,ver_pro,ver_win_rate_x,ver_win_ratio_per_bu,business_area,business_subarea,lead_owner,is_converted
0,19844,0.00,/ / Brazil,ID,0.073248,47466,End Customer,Enterprise,53.0,,...,LGESP,,1,0,0.001183,0.049840,retail,Electronics & Telco,278,True
1,9738,0.25,400 N State Of Franklin Rd Cloud IT / Johnson...,IT,,5405,End Customer,SMB,,,...,LGEUS,,0,0,0.000013,,transportation,Others,437,True
2,8491,1.00,/ / U.A.E,ID,,13597,Specifier/ Influencer,SMB,,,...,LGEGF,less than 3 months,0,0,0.000060,0.131148,hospital & health care,General Hospital,874,False
3,19895,0.50,/ Madison / United States,ID,0.118644,17204,,Enterprise,,,...,LGEUS,more than a year,0,0,0.001183,0.049840,retail,,194,False
4,10465,1.00,/ Sao Paulo / Brazil,ID,0.074949,2329,End Customer,Enterprise,2.0,1.0,...,LGESP,less than 3 months,1,1,0.003079,0.064566,corporate / office,Engineering,167,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5266,13855,0.50,/São Paulo/Brazil,AS,,40292,,Enterprise,10.0,,...,LGESP,,0,0,,,,,97,False
5267,7979,0.25,General / / United States,IT,,47466,,Enterprise,0.0,,...,LGEUS,,0,0,,,,,438,True
5268,12887,0.75,/ OURO BRANCO / Brazil,AS,,46227,Specifier/ Influencer,Enterprise,,,...,LGESP,less than 3 months,0,0,,,,,97,False
5269,17530,0.00,/ / Germany,IT,,45667,End Customer,SMB,,,...,LGEDG,,0,0,,,,,429,True


In [68]:
# 제출 파일 저장
df_sub.to_csv("submission.csv", index=False)