In [1]:
import pandas as pd
import numpy as np
from xgboost import XGBClassifier, callback
from optuna.integration import XGBoostPruningCallback
import mlflow
from utils.metrics import Metrics_df,print_metrics, precision, recall, f1


In [2]:


import pandas as pd
from sklearn.metrics import roc_auc_score
from Config import config
import warnings
warnings.filterwarnings("ignore")
import optuna  # pip install optuna
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GroupKFold
import numpy as np
from sklearn.model_selection import train_test_split
from datetime import datetime
import gc
import pickle
import tempfile

In [3]:
from lightgbm.basic import LightGBMError
from lightgbm import LGBMClassifier, early_stopping, log_evaluation, Booster
from sklearn.metrics import roc_auc_score
import os

# Loading data and group split

In [4]:
# df_pairs = pd.read_csv("/workspace/Training_data/training/Full_training_data.csv")
# df_pairs = df_pairs.sample(frac=1).reset_index(drop=True)
# kf = GroupKFold(n_splits=5)
# for i, (trn_idx, val_idx) in enumerate(kf.split(df_pairs, df_pairs["placeId1"], df_pairs["placeId1"])):
#     df_pairs.loc[val_idx, "fold"] = i
# df_pairs["fold"].value_counts()
# df_pairs.to_csv("/workspace/Training_data/training/Full_training_data_fold.csv",index=None)

# Loading data

In [5]:
df_pairs = pd.read_csv("/workspace/Training_data/training/Full_training_data_fold.csv")

In [6]:
features = ['similarity', 'haversine', 'name_davies',
       'name_leven', 'name_dice', 'name_jaro', 'name_set_ratio',
       'street_davies', 'street_leven', 'street_jaro', 'email_davies',
       'email_leven', 'email_jaro', 'url_davies', 'url_leven', 'url_jaro',
       'brands_davies', 'brand_leven', 'brand_jaro', 'phone_lcs',
       'subcat_WRatio', 'subcat_ratio', 'subcat_token_set_ratio',
       'Is_direction_match_0', 'Is_direction_match_1', 'Is_direction_match_2',
       'Is_house_match_0', 'Is_house_match_1', 'Is_house_match_2',
       'Is_category_match_0', 'Is_category_match_1', 'Is_subcategory_match_0',
       'Is_subcategory_match_1', 'Is_subcategory_match_2', 'Is_brand_match_0',
       'Is_brand_match_1', 'Is_brand_match_2', 'Is_brand_match_3',
       'Is_related_cat_0', 'Is_related_cat_1', 'Is_name_number_match_0',
       'Is_name_number_match_1', 'Is_name_number_match_2',
       'Is_name_number_match_3', 'is_phone_match_1', 'is_phone_match_2',
       'is_phone_match_3', 'is_phone_match_4', 'Is_email_match_0',
       'Is_email_match_1', 'Is_email_match_2', 'Is_url_match_0',
       'Is_url_match_1', 'Is_url_match_2']

In [7]:
X_train = df_pairs[df_pairs["fold"]!=4]

In [8]:
X_test = df_pairs[df_pairs["fold"]==4]

In [9]:
def objective(trial, X_train,X_test,Y_train,Y_test):
    params = {
        "max_depth": trial.suggest_int("max_depth", 4, 20),
        "colsample_bytree": trial.suggest_loguniform("colsample_bytree", 0.5, 1),
        "subsample": trial.suggest_loguniform("subsample", 0.4, 1),
        "alpha": trial.suggest_loguniform("alpha", 0.01, 10.0),
        "lambda": trial.suggest_loguniform("lambda", 1e-8, 10.0),
        "gamma": trial.suggest_loguniform("lambda", 1e-8, 10.0),
        "min_child_weight": trial.suggest_loguniform("min_child_weight", 1, 300),
    }
    
    
    # cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=1121218)
    cback = [XGBoostPruningCallback(trial, "validation_0-auc")]
    
    model = XGBClassifier(
        **params,
        n_estimators=10000,
        learning_rate=0.1,
        objective="binary:logistic",
        eval_metric="auc",
        tree_method="gpu_hist",
        use_label_encoder=False,
        random_state=42,
        enable_categorical=True,
    )

        
        
    model.fit(X_train,Y_train,eval_set=[(X_test, Y_test)],verbose=100,early_stopping_rounds=20,callbacks=cback)
        
    with tempfile.TemporaryFile() as dump_file:        
        pickle.dump(model , dump_file)
        dump_file.seek(0)
        model = pickle.load(dump_file)
    preds = model.predict_proba(X_test)[:, 1]
    del trial
    return roc_auc_score(Y_test, preds)

In [10]:
date_log = datetime.now().strftime("%Y_%m_%d_%H_%M")
study = optuna.create_study(direction="maximize", study_name=f"parameter_tuning_{config.country}_{date_log}")
func = lambda trial: objective(trial, X_train[features],X_test[features],X_train["duplicate_flag"],X_test["duplicate_flag"])
study.optimize(func, n_trials=50)
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)


[32m[I 2022-12-09 19:09:35,546][0m A new study created in memory with name: parameter_tuning_NZL_2022_12_09_19_09[0m


[0]	validation_0-auc:0.99730
[100]	validation_0-auc:0.99978
[200]	validation_0-auc:0.99980
[300]	validation_0-auc:0.99981
[400]	validation_0-auc:0.99982
[500]	validation_0-auc:0.99982
[539]	validation_0-auc:0.99982


[32m[I 2022-12-09 19:10:23,185][0m Trial 0 finished with value: 0.9998229974908446 and parameters: {'max_depth': 14, 'colsample_bytree': 0.9186136272179423, 'subsample': 0.6850176145347159, 'alpha': 0.3584883757182529, 'lambda': 0.0001760402607513321, 'min_child_weight': 126.44241820052189}. Best is trial 0 with value: 0.9998229974908446.[0m


[0]	validation_0-auc:0.99676
[100]	validation_0-auc:0.99981
[200]	validation_0-auc:0.99984
[300]	validation_0-auc:0.99985
[400]	validation_0-auc:0.99985
[500]	validation_0-auc:0.99985
[600]	validation_0-auc:0.99986
[636]	validation_0-auc:0.99986


[32m[I 2022-12-09 19:11:15,648][0m Trial 1 finished with value: 0.9998566930332051 and parameters: {'max_depth': 9, 'colsample_bytree': 0.5269845457413087, 'subsample': 0.9346247586259055, 'alpha': 0.012062027528091367, 'lambda': 0.0085505761406352, 'min_child_weight': 94.37961567971094}. Best is trial 1 with value: 0.9998566930332051.[0m


[0]	validation_0-auc:0.99386
[100]	validation_0-auc:0.99981
[200]	validation_0-auc:0.99987
[300]	validation_0-auc:0.99989
[400]	validation_0-auc:0.99990
[500]	validation_0-auc:0.99990
[600]	validation_0-auc:0.99991
[673]	validation_0-auc:0.99991


[32m[I 2022-12-09 19:12:05,592][0m Trial 2 finished with value: 0.99990982095696 and parameters: {'max_depth': 5, 'colsample_bytree': 0.9134134531068125, 'subsample': 0.5624551104679633, 'alpha': 0.22006231087761569, 'lambda': 0.0008226157847129335, 'min_child_weight': 5.536911859149326}. Best is trial 2 with value: 0.99990982095696.[0m


[0]	validation_0-auc:0.99561
[100]	validation_0-auc:0.99984
[200]	validation_0-auc:0.99987
[300]	validation_0-auc:0.99988
[350]	validation_0-auc:0.99988


[32m[I 2022-12-09 19:12:43,471][0m Trial 3 finished with value: 0.9998791323678982 and parameters: {'max_depth': 16, 'colsample_bytree': 0.5692875405843302, 'subsample': 0.5013963728445617, 'alpha': 0.5186345810918846, 'lambda': 3.0375724558498316e-07, 'min_child_weight': 29.248103980035214}. Best is trial 2 with value: 0.99990982095696.[0m


[0]	validation_0-auc:0.98667
[100]	validation_0-auc:0.99973
[200]	validation_0-auc:0.99984
[300]	validation_0-auc:0.99987
[400]	validation_0-auc:0.99988
[500]	validation_0-auc:0.99989
[600]	validation_0-auc:0.99990
[700]	validation_0-auc:0.99990
[800]	validation_0-auc:0.99990
[900]	validation_0-auc:0.99991
[995]	validation_0-auc:0.99991


[32m[I 2022-12-09 19:13:47,209][0m Trial 4 finished with value: 0.9999092758332846 and parameters: {'max_depth': 4, 'colsample_bytree': 0.7144272770930772, 'subsample': 0.6847956927214623, 'alpha': 0.4781098273559269, 'lambda': 0.1900606844502818, 'min_child_weight': 3.1947497533983586}. Best is trial 2 with value: 0.99990982095696.[0m


[0]	validation_0-auc:0.99500


[32m[I 2022-12-09 19:13:50,151][0m Trial 5 pruned. Trial was pruned at iteration 0.[0m


[0]	validation_0-auc:0.99009


[32m[I 2022-12-09 19:13:53,307][0m Trial 6 pruned. Trial was pruned at iteration 0.[0m


[0]	validation_0-auc:0.99309


[32m[I 2022-12-09 19:13:56,348][0m Trial 7 pruned. Trial was pruned at iteration 0.[0m


[0]	validation_0-auc:0.99376


[32m[I 2022-12-09 19:13:59,121][0m Trial 8 pruned. Trial was pruned at iteration 0.[0m


[0]	validation_0-auc:0.99733


[32m[I 2022-12-09 19:14:02,047][0m Trial 9 pruned. Trial was pruned at iteration 1.[0m


[0]	validation_0-auc:0.98538


[32m[I 2022-12-09 19:14:04,447][0m Trial 10 pruned. Trial was pruned at iteration 0.[0m
[32m[I 2022-12-09 19:14:07,027][0m Trial 11 pruned. Trial was pruned at iteration 0.[0m


[0]	validation_0-auc:0.99643


[32m[I 2022-12-09 19:14:10,102][0m Trial 12 pruned. Trial was pruned at iteration 1.[0m


[0]	validation_0-auc:0.99202


[32m[I 2022-12-09 19:14:13,019][0m Trial 13 pruned. Trial was pruned at iteration 0.[0m
[32m[I 2022-12-09 19:14:16,293][0m Trial 14 pruned. Trial was pruned at iteration 0.[0m


[0]	validation_0-auc:0.99670
[100]	validation_0-auc:0.99990
[183]	validation_0-auc:0.99991


[32m[I 2022-12-09 19:14:48,701][0m Trial 15 finished with value: 0.9999141017961478 and parameters: {'max_depth': 10, 'colsample_bytree': 0.7208497316688175, 'subsample': 0.43986234255301915, 'alpha': 0.033631661431039225, 'lambda': 0.0003953963106833996, 'min_child_weight': 1.3631983762556588}. Best is trial 15 with value: 0.9999141017961478.[0m


[0]	validation_0-auc:0.99777
[100]	validation_0-auc:0.99991
[157]	validation_0-auc:0.99992


[32m[I 2022-12-09 19:15:24,892][0m Trial 16 finished with value: 0.9999171322579051 and parameters: {'max_depth': 12, 'colsample_bytree': 0.6161675658839894, 'subsample': 0.4088790389160657, 'alpha': 0.022424635940900765, 'lambda': 3.243373645440172e-06, 'min_child_weight': 1.1007273499097492}. Best is trial 16 with value: 0.9999171322579051.[0m


[0]	validation_0-auc:0.99844
[100]	validation_0-auc:0.99990
[194]	validation_0-auc:0.99991


[32m[I 2022-12-09 19:16:03,830][0m Trial 17 finished with value: 0.99991325225497 and parameters: {'max_depth': 11, 'colsample_bytree': 0.6057191879727409, 'subsample': 0.408666043670173, 'alpha': 0.017803174082064065, 'lambda': 2.007896483158292e-06, 'min_child_weight': 1.0454622287230981}. Best is trial 16 with value: 0.9999171322579051.[0m


[0]	validation_0-auc:0.99809
[100]	validation_0-auc:0.99990
[200]	validation_0-auc:0.99991
[269]	validation_0-auc:0.99991


[32m[I 2022-12-09 19:16:49,943][0m Trial 18 finished with value: 0.9999147447865581 and parameters: {'max_depth': 11, 'colsample_bytree': 0.6326002881021041, 'subsample': 0.44474980966958166, 'alpha': 0.03267971552041733, 'lambda': 1.9194426669799467e-07, 'min_child_weight': 2.099479414040363}. Best is trial 16 with value: 0.9999171322579051.[0m


[0]	validation_0-auc:0.99851


[32m[I 2022-12-09 19:16:54,216][0m Trial 19 pruned. Trial was pruned at iteration 2.[0m


[0]	validation_0-auc:0.99846


[32m[I 2022-12-09 19:17:00,291][0m Trial 20 pruned. Trial was pruned at iteration 18.[0m


[0]	validation_0-auc:0.99740
[100]	validation_0-auc:0.99990
[200]	validation_0-auc:0.99991
[209]	validation_0-auc:0.99991


[32m[I 2022-12-09 19:17:39,656][0m Trial 21 finished with value: 0.9999112062525002 and parameters: {'max_depth': 11, 'colsample_bytree': 0.7399413940240407, 'subsample': 0.4464986647054278, 'alpha': 0.029505254748058964, 'lambda': 5.7535646700390595e-06, 'min_child_weight': 1.6013567783171556}. Best is trial 16 with value: 0.9999171322579051.[0m


[0]	validation_0-auc:0.99835
[100]	validation_0-auc:0.99990
[165]	validation_0-auc:0.99991


[32m[I 2022-12-09 19:18:14,968][0m Trial 22 finished with value: 0.999914141573141 and parameters: {'max_depth': 11, 'colsample_bytree': 0.6217089613250836, 'subsample': 0.4242005696165182, 'alpha': 0.010563679964924277, 'lambda': 1.4901966339341323e-07, 'min_child_weight': 1.04689908202118}. Best is trial 16 with value: 0.9999171322579051.[0m


[0]	validation_0-auc:0.99763
[100]	validation_0-auc:0.99991
[165]	validation_0-auc:0.99992


[32m[I 2022-12-09 19:18:51,342][0m Trial 23 finished with value: 0.9999153889694263 and parameters: {'max_depth': 12, 'colsample_bytree': 0.6132757954518478, 'subsample': 0.5317588788681346, 'alpha': 0.011649070651440479, 'lambda': 1.8247595780434574e-07, 'min_child_weight': 2.087806917744855}. Best is trial 16 with value: 0.9999171322579051.[0m


[0]	validation_0-auc:0.99817
[100]	validation_0-auc:0.99991
[200]	validation_0-auc:0.99992
[207]	validation_0-auc:0.99992


[32m[I 2022-12-09 19:19:34,702][0m Trial 24 finished with value: 0.9999170682910489 and parameters: {'max_depth': 13, 'colsample_bytree': 0.5760211949770218, 'subsample': 0.5223430619992357, 'alpha': 0.02270629982496866, 'lambda': 5.1498600039935845e-08, 'min_child_weight': 3.0158317545237234}. Best is trial 16 with value: 0.9999171322579051.[0m


[0]	validation_0-auc:0.99871
[100]	validation_0-auc:0.99991
[200]	validation_0-auc:0.99992
[224]	validation_0-auc:0.99991


[32m[I 2022-12-09 19:20:17,479][0m Trial 25 finished with value: 0.9999155983309879 and parameters: {'max_depth': 13, 'colsample_bytree': 0.5073359586452293, 'subsample': 0.529410349503827, 'alpha': 0.018620478511706784, 'lambda': 1.1194932982472348e-08, 'min_child_weight': 3.8882206406745836}. Best is trial 16 with value: 0.9999171322579051.[0m


[0]	validation_0-auc:0.99874
[100]	validation_0-auc:0.99990
[200]	validation_0-auc:0.99992
[261]	validation_0-auc:0.99992


[32m[I 2022-12-09 19:21:05,314][0m Trial 26 finished with value: 0.9999167898520964 and parameters: {'max_depth': 13, 'colsample_bytree': 0.5006911302547915, 'subsample': 0.5410980962935483, 'alpha': 0.01998074135432142, 'lambda': 1.0685732431650807e-08, 'min_child_weight': 4.014097726885774}. Best is trial 16 with value: 0.9999171322579051.[0m
[32m[I 2022-12-09 19:21:08,499][0m Trial 27 pruned. Trial was pruned at iteration 0.[0m


[0]	validation_0-auc:0.99861


[32m[I 2022-12-09 19:21:19,545][0m Trial 28 pruned. Trial was pruned at iteration 28.[0m


[0]	validation_0-auc:0.99898


[32m[I 2022-12-09 19:21:37,424][0m Trial 29 pruned. Trial was pruned at iteration 57.[0m


[0]	validation_0-auc:0.99873


[32m[I 2022-12-09 19:21:46,682][0m Trial 30 pruned. Trial was pruned at iteration 25.[0m


[0]	validation_0-auc:0.99837


[32m[I 2022-12-09 19:21:49,736][0m Trial 31 pruned. Trial was pruned at iteration 1.[0m


[0]	validation_0-auc:0.99815


[32m[I 2022-12-09 19:21:53,547][0m Trial 32 pruned. Trial was pruned at iteration 3.[0m


[0]	validation_0-auc:0.99919
[100]	validation_0-auc:0.99991
[200]	validation_0-auc:0.99992
[247]	validation_0-auc:0.99992


[32m[I 2022-12-09 19:22:50,676][0m Trial 33 finished with value: 0.9999220485920526 and parameters: {'max_depth': 15, 'colsample_bytree': 0.5891797914157996, 'subsample': 0.9915131532117468, 'alpha': 0.01601488896353689, 'lambda': 6.471320277059568e-08, 'min_child_weight': 4.639909639888095}. Best is trial 33 with value: 0.9999220485920526.[0m


[0]	validation_0-auc:0.99916
[100]	validation_0-auc:0.99991
[200]	validation_0-auc:0.99992
[271]	validation_0-auc:0.99992


[32m[I 2022-12-09 19:23:46,500][0m Trial 34 finished with value: 0.9999187483792515 and parameters: {'max_depth': 15, 'colsample_bytree': 0.5906790341717117, 'subsample': 0.9678478133518589, 'alpha': 0.011129400083703498, 'lambda': 6.459634185015178e-07, 'min_child_weight': 6.169584493535323}. Best is trial 33 with value: 0.9999220485920526.[0m


[0]	validation_0-auc:0.99903


[32m[I 2022-12-09 19:23:56,435][0m Trial 35 pruned. Trial was pruned at iteration 22.[0m


[0]	validation_0-auc:0.99910
[100]	validation_0-auc:0.99991
[200]	validation_0-auc:0.99992
[207]	validation_0-auc:0.99992


[32m[I 2022-12-09 19:25:00,910][0m Trial 36 finished with value: 0.9999203179095587 and parameters: {'max_depth': 16, 'colsample_bytree': 0.6616550098968121, 'subsample': 0.889204032871335, 'alpha': 0.04499153489873214, 'lambda': 3.719314435287241e-06, 'min_child_weight': 1.7152158649213802}. Best is trial 33 with value: 0.9999220485920526.[0m


[0]	validation_0-auc:0.99902
[100]	validation_0-auc:0.99991
[200]	validation_0-auc:0.99992
[209]	validation_0-auc:0.99992


[32m[I 2022-12-09 19:26:11,879][0m Trial 37 finished with value: 0.9999219777259747 and parameters: {'max_depth': 19, 'colsample_bytree': 0.6787886480834696, 'subsample': 0.9013278308763079, 'alpha': 0.11909681624395013, 'lambda': 7.11022282953649e-05, 'min_child_weight': 1.723543442595701}. Best is trial 33 with value: 0.9999220485920526.[0m


[0]	validation_0-auc:0.99905
[100]	validation_0-auc:0.99991
[200]	validation_0-auc:0.99992
[250]	validation_0-auc:0.99992


[32m[I 2022-12-09 19:27:05,597][0m Trial 38 finished with value: 0.9999180449141836 and parameters: {'max_depth': 18, 'colsample_bytree': 0.677666987233004, 'subsample': 0.8954556467995292, 'alpha': 0.37228399653632, 'lambda': 3.292094730886552e-05, 'min_child_weight': 5.654723480639898}. Best is trial 33 with value: 0.9999220485920526.[0m


[0]	validation_0-auc:0.99916
[100]	validation_0-auc:0.99992
[183]	validation_0-auc:0.99992


[32m[I 2022-12-09 19:28:16,019][0m Trial 39 finished with value: 0.9999205487353651 and parameters: {'max_depth': 20, 'colsample_bytree': 0.6641562725882335, 'subsample': 0.9987960214242532, 'alpha': 0.12105573495525211, 'lambda': 7.949119726546566e-05, 'min_child_weight': 1.715211632735842}. Best is trial 33 with value: 0.9999220485920526.[0m


[0]	validation_0-auc:0.99926
[100]	validation_0-auc:0.99992
[156]	validation_0-auc:0.99992


[32m[I 2022-12-09 19:29:18,123][0m Trial 40 finished with value: 0.9999195666609936 and parameters: {'max_depth': 20, 'colsample_bytree': 0.6768535525530017, 'subsample': 0.8966307432304494, 'alpha': 0.18570645738041813, 'lambda': 9.031493524212372e-05, 'min_child_weight': 1.5733621926785821}. Best is trial 33 with value: 0.9999220485920526.[0m


[0]	validation_0-auc:0.99884
[100]	validation_0-auc:0.99992
[139]	validation_0-auc:0.99992


[32m[I 2022-12-09 19:30:09,755][0m Trial 41 finished with value: 0.9999187249559686 and parameters: {'max_depth': 19, 'colsample_bytree': 0.6639919054882953, 'subsample': 0.8987384201374014, 'alpha': 0.23140663023376593, 'lambda': 9.47518597280454e-05, 'min_child_weight': 1.866036369915277}. Best is trial 33 with value: 0.9999220485920526.[0m


[0]	validation_0-auc:0.99876
[100]	validation_0-auc:0.99991
[196]	validation_0-auc:0.99992


[32m[I 2022-12-09 19:31:21,649][0m Trial 42 finished with value: 0.9999186207010656 and parameters: {'max_depth': 20, 'colsample_bytree': 0.6941307084593156, 'subsample': 0.8395876259026716, 'alpha': 0.13077023605742644, 'lambda': 0.0023222394269973774, 'min_child_weight': 1.5733299004714685}. Best is trial 33 with value: 0.9999220485920526.[0m


[0]	validation_0-auc:0.99926
[100]	validation_0-auc:0.99991
[200]	validation_0-auc:0.99992
[220]	validation_0-auc:0.99992


[32m[I 2022-12-09 19:32:26,460][0m Trial 43 finished with value: 0.9999186680586849 and parameters: {'max_depth': 19, 'colsample_bytree': 0.7564102593084113, 'subsample': 0.9249412762988694, 'alpha': 0.10921625108719825, 'lambda': 4.434280159766121e-05, 'min_child_weight': 2.7167196226636037}. Best is trial 33 with value: 0.9999220485920526.[0m


[0]	validation_0-auc:0.99881
[100]	validation_0-auc:0.99991
[187]	validation_0-auc:0.99992


[32m[I 2022-12-09 19:33:23,439][0m Trial 44 finished with value: 0.9999189947070124 and parameters: {'max_depth': 17, 'colsample_bytree': 0.6506434893062417, 'subsample': 0.8206736574419473, 'alpha': 0.24559384312991625, 'lambda': 7.958816761174589e-06, 'min_child_weight': 1.691275928357683}. Best is trial 33 with value: 0.9999220485920526.[0m


[0]	validation_0-auc:0.99908
[100]	validation_0-auc:0.99991
[200]	validation_0-auc:0.99992
[243]	validation_0-auc:0.99992


[32m[I 2022-12-09 19:34:42,071][0m Trial 45 finished with value: 0.9999201252424098 and parameters: {'max_depth': 19, 'colsample_bytree': 0.6890894485570006, 'subsample': 0.866072846711905, 'alpha': 0.6207557458672079, 'lambda': 0.0005284755065215357, 'min_child_weight': 1.269433801514758}. Best is trial 33 with value: 0.9999220485920526.[0m


[0]	validation_0-auc:0.99873


[32m[I 2022-12-09 19:34:45,682][0m Trial 46 pruned. Trial was pruned at iteration 2.[0m


[0]	validation_0-auc:0.99901


[32m[I 2022-12-09 19:34:56,450][0m Trial 47 pruned. Trial was pruned at iteration 16.[0m


[0]	validation_0-auc:0.99920
[100]	validation_0-auc:0.99991
[168]	validation_0-auc:0.99992


[32m[I 2022-12-09 19:35:58,271][0m Trial 48 finished with value: 0.9999188344917571 and parameters: {'max_depth': 19, 'colsample_bytree': 0.6409458191324515, 'subsample': 0.9486584299767553, 'alpha': 0.04905574988631387, 'lambda': 0.001107445401896605, 'min_child_weight': 2.1698296076809367}. Best is trial 33 with value: 0.9999220485920526.[0m


[0]	validation_0-auc:0.99839


[32m[I 2022-12-09 19:36:01,195][0m Trial 49 pruned. Trial was pruned at iteration 0.[0m


Number of finished trials: 50
Best trial: {'max_depth': 15, 'colsample_bytree': 0.5891797914157996, 'subsample': 0.9915131532117468, 'alpha': 0.01601488896353689, 'lambda': 6.471320277059568e-08, 'min_child_weight': 4.639909639888095}


In [14]:
study.trials_dataframe().to_csv("/workspace/Xgboost_trials.csv",index=None)