In [1]:
! pip install -r requirements.txt -q

[0m

In [2]:
# Import necessary libraries
import pandas as pd
import numpy as np
import mlflow
import mlflow.xgboost
import shap


In [3]:
import boto3
import io

s3 = boto3.client(
    's3',
    endpoint_url='http://minio-service.kubeflow.svc.cluster.local:9000',
    aws_access_key_id='minio',
    aws_secret_access_key='minio123',
)

In [4]:
obj_test = s3.get_object(Bucket='sample-data', Key='data/data/application_test.csv')
test_df = pd.read_csv(io.BytesIO(obj_test['Body'].read()))

In [5]:
obj_train = s3.get_object(Bucket='sample-data', Key='data/data/application_train.csv')
train_df = pd.read_csv(io.BytesIO(obj_train['Body'].read()))

In [6]:
# Get numerical and categorical columns
numerical_features = train_df.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_features = train_df.select_dtypes(include=['object']).columns.tolist()

# Remove 'SK_ID_CURR' and 'TARGET' from numerical features as they are identifiers/target
if 'SK_ID_CURR' in numerical_features:
    numerical_features.remove('SK_ID_CURR')
if 'TARGET' in numerical_features:
    numerical_features.remove('TARGET')

print("Categorical features:", len(categorical_features))
print(categorical_features)
print("\nNumerical features:", len(numerical_features))
print(numerical_features)

Categorical features: 16
['NAME_CONTRACT_TYPE', 'CODE_GENDER', 'FLAG_OWN_CAR', 'FLAG_OWN_REALTY', 'NAME_TYPE_SUITE', 'NAME_INCOME_TYPE', 'NAME_EDUCATION_TYPE', 'NAME_FAMILY_STATUS', 'NAME_HOUSING_TYPE', 'OCCUPATION_TYPE', 'WEEKDAY_APPR_PROCESS_START', 'ORGANIZATION_TYPE', 'FONDKAPREMONT_MODE', 'HOUSETYPE_MODE', 'WALLSMATERIAL_MODE', 'EMERGENCYSTATE_MODE']

Numerical features: 104
['CNT_CHILDREN', 'AMT_INCOME_TOTAL', 'AMT_CREDIT', 'AMT_ANNUITY', 'AMT_GOODS_PRICE', 'REGION_POPULATION_RELATIVE', 'DAYS_BIRTH', 'DAYS_EMPLOYED', 'DAYS_REGISTRATION', 'DAYS_ID_PUBLISH', 'OWN_CAR_AGE', 'FLAG_MOBIL', 'FLAG_EMP_PHONE', 'FLAG_WORK_PHONE', 'FLAG_CONT_MOBILE', 'FLAG_PHONE', 'FLAG_EMAIL', 'CNT_FAM_MEMBERS', 'REGION_RATING_CLIENT', 'REGION_RATING_CLIENT_W_CITY', 'HOUR_APPR_PROCESS_START', 'REG_REGION_NOT_LIVE_REGION', 'REG_REGION_NOT_WORK_REGION', 'LIVE_REGION_NOT_WORK_REGION', 'REG_CITY_NOT_LIVE_CITY', 'REG_CITY_NOT_WORK_CITY', 'LIVE_CITY_NOT_WORK_CITY', 'EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3'

In [7]:
from src.utils import PreprocessFeatureSelector

# Prepare the data (remove target column for features)
X_train = train_df.drop(columns=["TARGET"])
X_test = test_df.copy()
y_train = train_df["TARGET"].values

data_version = "v1"
save_train_data_path = "data/processed/train"
save_test_data_path = "data/processed/test"

# Create and run the PreprocessFeatureSelector wrapper.
pfs = PreprocessFeatureSelector(X_train, X_test, y_train,
                                categorical_features, numerical_features,
                                data_version=data_version,
                                save_train_data_path=save_train_data_path,
                                save_test_data_path=save_test_data_path)
selected_train, selected_test, selected_features, excluded_features = pfs.run()

print("Final Selected Features:", selected_features)
print("Total Excluded Features:", excluded_features)

[32m2025-04-15 02:10:33.467[0m | [1mINFO    [0m | [36msrc.utils[0m:[36mrun[0m:[36m119[0m - [1m🚀 Starting preprocessing and filtering...[0m
[32m2025-04-15 02:11:05.937[0m | [1mINFO    [0m | [36msrc.utils[0m:[36mrun[0m:[36m123[0m - [1m✅ Preprocessing complete.[0m
[32m2025-04-15 02:11:05.941[0m | [1mINFO    [0m | [36msrc.utils[0m:[36mrun[0m:[36m125[0m - [1m🧮 Features before filtering: ['NAME_CONTRACT_TYPE', 'CODE_GENDER', 'FLAG_OWN_CAR', 'FLAG_OWN_REALTY', 'NAME_TYPE_SUITE', 'NAME_INCOME_TYPE', 'NAME_EDUCATION_TYPE', 'NAME_FAMILY_STATUS', 'NAME_HOUSING_TYPE', 'OCCUPATION_TYPE', 'WEEKDAY_APPR_PROCESS_START', 'ORGANIZATION_TYPE', 'FONDKAPREMONT_MODE', 'HOUSETYPE_MODE', 'WALLSMATERIAL_MODE', 'EMERGENCYSTATE_MODE', 'CNT_CHILDREN', 'AMT_INCOME_TOTAL', 'AMT_CREDIT', 'AMT_ANNUITY', 'AMT_GOODS_PRICE', 'REGION_POPULATION_RELATIVE', 'DAYS_BIRTH', 'DAYS_EMPLOYED', 'DAYS_REGISTRATION', 'DAYS_ID_PUBLISH', 'OWN_CAR_AGE', 'FLAG_MOBIL', 'FLAG_EMP_PHONE', 'FLAG_WORK_PHON

Final Selected Features: ['CODE_GENDER', 'NAME_INCOME_TYPE', 'NAME_EDUCATION_TYPE', 'NAME_FAMILY_STATUS', 'ORGANIZATION_TYPE', 'AMT_CREDIT', 'DAYS_LAST_PHONE_CHANGE', 'FLAG_DOCUMENT_3']
Total Excluded Features: ['NAME_CONTRACT_TYPE', 'FLAG_OWN_CAR', 'FLAG_OWN_REALTY', 'NAME_TYPE_SUITE', 'NAME_HOUSING_TYPE', 'OCCUPATION_TYPE', 'WEEKDAY_APPR_PROCESS_START', 'FONDKAPREMONT_MODE', 'HOUSETYPE_MODE', 'WALLSMATERIAL_MODE', 'EMERGENCYSTATE_MODE', 'CNT_CHILDREN', 'AMT_ANNUITY', 'DAYS_BIRTH', 'DAYS_EMPLOYED', 'DAYS_REGISTRATION', 'OWN_CAR_AGE', 'FLAG_MOBIL', 'FLAG_WORK_PHONE', 'FLAG_CONT_MOBILE', 'FLAG_PHONE', 'FLAG_EMAIL', 'CNT_FAM_MEMBERS', 'HOUR_APPR_PROCESS_START', 'REG_REGION_NOT_LIVE_REGION', 'REG_REGION_NOT_WORK_REGION', 'LIVE_REGION_NOT_WORK_REGION', 'LIVE_CITY_NOT_WORK_CITY', 'EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3', 'APARTMENTS_AVG', 'BASEMENTAREA_AVG', 'YEARS_BEGINEXPLUATATION_AVG', 'YEARS_BUILD_AVG', 'COMMONAREA_AVG', 'ELEVATORS_AVG', 'ENTRANCES_AVG', 'FLOORSMAX_AVG', 'FLOORSMI

In [8]:
print("Target value counts:")
print(selected_train['TARGET'].value_counts())
print("\nPercentage distribution:")
print(selected_train['TARGET'].value_counts(normalize=True) * 100)

Target value counts:
TARGET
0    282686
1     24825
Name: count, dtype: int64

Percentage distribution:
TARGET
0    91.927118
1     8.072882
Name: proportion, dtype: float64


In [9]:
from src.trainer import UnderWritingTrainer

processed_train = "data/processed/train/processed_train_v1.csv"
processed_test = "data/processed/test/processed_test_v1.csv"

trained_model = UnderWritingTrainer.train_model(
    model_name="xgb",
    processed_train=processed_train,
    processed_test=processed_test,
    version="v2",
    experiment_name="XGB",
)


[32m2025-04-15 02:13:43.329[0m | [1mINFO    [0m | [36msrc.trainer[0m:[36mtrain_model[0m:[36m198[0m - [1mStart training underwriting model xgb with version v2[0m
[32m2025-04-15 02:13:43.864[0m | [1mINFO    [0m | [36msrc.trainer[0m:[36mtrain_model[0m:[36m227[0m - [1mNo categorical features provided for schema mapping.[0m
[I 2025-04-15 02:13:43,896] A new study created in memory with name: no-name-bf6fafde-0c3c-4c13-adf6-d03e3454eb9b


[0]	validation_0-auc:0.63692
[1]	validation_0-auc:0.65568
[2]	validation_0-auc:0.67358
[3]	validation_0-auc:0.67652
[4]	validation_0-auc:0.67562
[5]	validation_0-auc:0.67637
[6]	validation_0-auc:0.67615
[7]	validation_0-auc:0.67793
[8]	validation_0-auc:0.68482
[9]	validation_0-auc:0.68417
[10]	validation_0-auc:0.68387
[11]	validation_0-auc:0.68700


Parameters: { "use_label_encoder" } are not used.



[12]	validation_0-auc:0.69038
[13]	validation_0-auc:0.68967
[14]	validation_0-auc:0.69033
[15]	validation_0-auc:0.68970
[16]	validation_0-auc:0.68854
[17]	validation_0-auc:0.68867
[18]	validation_0-auc:0.68763
[19]	validation_0-auc:0.68874
[20]	validation_0-auc:0.68873
[21]	validation_0-auc:0.68926
[22]	validation_0-auc:0.68991
[23]	validation_0-auc:0.69005
[24]	validation_0-auc:0.69152
[25]	validation_0-auc:0.69142
[26]	validation_0-auc:0.69191
[27]	validation_0-auc:0.69205
[28]	validation_0-auc:0.69176
[29]	validation_0-auc:0.69187
[30]	validation_0-auc:0.69295
[31]	validation_0-auc:0.69343
[32]	validation_0-auc:0.69333
[33]	validation_0-auc:0.69355
[34]	validation_0-auc:0.69394
[35]	validation_0-auc:0.69357
[36]	validation_0-auc:0.69395
[37]	validation_0-auc:0.69406
[38]	validation_0-auc:0.69501
[39]	validation_0-auc:0.69473
[40]	validation_0-auc:0.69481
[41]	validation_0-auc:0.69519
[42]	validation_0-auc:0.69492
[43]	validation_0-auc:0.69515
[44]	validation_0-auc:0.69489
[45]	valid

[I 2025-04-15 02:13:44,799] Trial 0 finished with value: 0.9195323805342829 and parameters: {'max_depth': 2, 'learning_rate': 0.023899944848030215, 'n_estimators': 108, 'subsample': 0.5321515598126965, 'colsample_bytree': 0.8682664878529063}. Best is trial 0 with value: 0.9195323805342829.


[0]	validation_0-auc:0.63692
[1]	validation_0-auc:0.66280
[2]	validation_0-auc:0.66685
[3]	validation_0-auc:0.66721
[4]	validation_0-auc:0.66817
[5]	validation_0-auc:0.66815
[6]	validation_0-auc:0.67228
[7]	validation_0-auc:0.67216
[8]	validation_0-auc:0.67822
[9]	validation_0-auc:0.67803
[10]	validation_0-auc:0.67803
[11]	validation_0-auc:0.68123
[12]	validation_0-auc:0.68512
[13]	validation_0-auc:0.68608
[14]	validation_0-auc:0.68551
[15]	validation_0-auc:0.68418
[16]	validation_0-auc:0.68562


Parameters: { "use_label_encoder" } are not used.



[17]	validation_0-auc:0.68650
[18]	validation_0-auc:0.68604
[19]	validation_0-auc:0.68859
[20]	validation_0-auc:0.68905
[21]	validation_0-auc:0.68827
[22]	validation_0-auc:0.68738
[23]	validation_0-auc:0.68693
[24]	validation_0-auc:0.68789
[25]	validation_0-auc:0.68806
[26]	validation_0-auc:0.68784
[27]	validation_0-auc:0.68728
[28]	validation_0-auc:0.68726
[29]	validation_0-auc:0.68711
[30]	validation_0-auc:0.68934
[31]	validation_0-auc:0.68913
[32]	validation_0-auc:0.68838
[33]	validation_0-auc:0.68934
[34]	validation_0-auc:0.68975
[35]	validation_0-auc:0.68965
[36]	validation_0-auc:0.68942
[37]	validation_0-auc:0.68917
[38]	validation_0-auc:0.68986
[39]	validation_0-auc:0.68981
[40]	validation_0-auc:0.68949
[41]	validation_0-auc:0.68916
[42]	validation_0-auc:0.68924
[43]	validation_0-auc:0.68923
[44]	validation_0-auc:0.68937
[45]	validation_0-auc:0.68928
[46]	validation_0-auc:0.69018
[47]	validation_0-auc:0.68983
[48]	validation_0-auc:0.68973
[49]	validation_0-auc:0.68936
[50]	valid

[I 2025-04-15 02:13:46,769] Trial 1 finished with value: 0.9195323805342829 and parameters: {'max_depth': 2, 'learning_rate': 0.011638797475400562, 'n_estimators': 233, 'subsample': 0.8079434824997958, 'colsample_bytree': 0.8489635568404086}. Best is trial 0 with value: 0.9195323805342829.


[0]	validation_0-auc:0.66315
[1]	validation_0-auc:0.69195
[2]	validation_0-auc:0.69729
[3]	validation_0-auc:0.69944
[4]	validation_0-auc:0.70013
[5]	validation_0-auc:0.69985
[6]	validation_0-auc:0.70138
[7]	validation_0-auc:0.70145
[8]	validation_0-auc:0.70507
[9]	validation_0-auc:0.70437
[10]	validation_0-auc:0.70399
[11]	validation_0-auc:0.70396
[12]	validation_0-auc:0.70405
[13]	validation_0-auc:0.70378
[14]	validation_0-auc:0.70365


Parameters: { "use_label_encoder" } are not used.



[15]	validation_0-auc:0.70353
[16]	validation_0-auc:0.70373
[17]	validation_0-auc:0.70365
[18]	validation_0-auc:0.70397
[19]	validation_0-auc:0.70378
[20]	validation_0-auc:0.70412
[21]	validation_0-auc:0.70396
[22]	validation_0-auc:0.70390
[23]	validation_0-auc:0.70384
[24]	validation_0-auc:0.70459
[25]	validation_0-auc:0.70455
[26]	validation_0-auc:0.70439
[27]	validation_0-auc:0.70448
[28]	validation_0-auc:0.70439
[29]	validation_0-auc:0.70441
[30]	validation_0-auc:0.70496
[31]	validation_0-auc:0.70486
[32]	validation_0-auc:0.70471
[33]	validation_0-auc:0.70435
[34]	validation_0-auc:0.70457
[35]	validation_0-auc:0.70460
[36]	validation_0-auc:0.70452
[37]	validation_0-auc:0.70429
[38]	validation_0-auc:0.70433
[39]	validation_0-auc:0.70437
[40]	validation_0-auc:0.70429
[41]	validation_0-auc:0.70419
[42]	validation_0-auc:0.70423
[43]	validation_0-auc:0.70419
[44]	validation_0-auc:0.70408
[45]	validation_0-auc:0.70404
[46]	validation_0-auc:0.70466
[47]	validation_0-auc:0.70475
[48]	valid

[I 2025-04-15 02:13:48,174] Trial 2 finished with value: 0.9195323805342829 and parameters: {'max_depth': 4, 'learning_rate': 0.001148809568183889, 'n_estimators': 142, 'subsample': 0.786208700109454, 'colsample_bytree': 0.9807788256459284}. Best is trial 0 with value: 0.9195323805342829.


[0]	validation_0-auc:0.67536
[1]	validation_0-auc:0.68523
[2]	validation_0-auc:0.70347
[3]	validation_0-auc:0.71097
[4]	validation_0-auc:0.71412
[5]	validation_0-auc:0.71494
[6]	validation_0-auc:0.71414
[7]	validation_0-auc:0.71488
[8]	validation_0-auc:0.71726
[9]	validation_0-auc:0.71726
[10]	validation_0-auc:0.71830
[11]	validation_0-auc:0.71919
[12]	validation_0-auc:0.71982


Parameters: { "use_label_encoder" } are not used.



[13]	validation_0-auc:0.71960
[14]	validation_0-auc:0.71976
[15]	validation_0-auc:0.72005
[16]	validation_0-auc:0.71998
[17]	validation_0-auc:0.72025
[18]	validation_0-auc:0.72035
[19]	validation_0-auc:0.72035
[20]	validation_0-auc:0.72069
[21]	validation_0-auc:0.72085
[22]	validation_0-auc:0.72117
[23]	validation_0-auc:0.72105
[24]	validation_0-auc:0.72133
[25]	validation_0-auc:0.72131
[26]	validation_0-auc:0.72150
[27]	validation_0-auc:0.72157
[28]	validation_0-auc:0.72169
[29]	validation_0-auc:0.72169
[30]	validation_0-auc:0.72192
[31]	validation_0-auc:0.72203
[32]	validation_0-auc:0.72212
[33]	validation_0-auc:0.72219
[34]	validation_0-auc:0.72224
[35]	validation_0-auc:0.72220
[36]	validation_0-auc:0.72220
[37]	validation_0-auc:0.72228
[38]	validation_0-auc:0.72239
[39]	validation_0-auc:0.72240
[40]	validation_0-auc:0.72240
[41]	validation_0-auc:0.72237
[42]	validation_0-auc:0.72245
[43]	validation_0-auc:0.72260
[44]	validation_0-auc:0.72264
[45]	validation_0-auc:0.72264
[46]	valid

[I 2025-04-15 02:13:49,523] Trial 3 finished with value: 0.9195323805342829 and parameters: {'max_depth': 7, 'learning_rate': 0.06751681945731859, 'n_estimators': 118, 'subsample': 0.5893160336345371, 'colsample_bytree': 0.7182935863006293}. Best is trial 0 with value: 0.9195323805342829.


[0]	validation_0-auc:0.67723
[1]	validation_0-auc:0.70206
[2]	validation_0-auc:0.70779
[3]	validation_0-auc:0.71102
[4]	validation_0-auc:0.71127
[5]	validation_0-auc:0.71216
[6]	validation_0-auc:0.71214
[7]	validation_0-auc:0.71257
[8]	validation_0-auc:0.71437
[9]	validation_0-auc:0.71431
[10]	validation_0-auc:0.71432
[11]	validation_0-auc:0.71392
[12]	validation_0-auc:0.71380
[13]	validation_0-auc:0.71370


Parameters: { "use_label_encoder" } are not used.



[14]	validation_0-auc:0.71355
[15]	validation_0-auc:0.71359
[16]	validation_0-auc:0.71424
[17]	validation_0-auc:0.71412
[18]	validation_0-auc:0.71414
[19]	validation_0-auc:0.71401
[20]	validation_0-auc:0.71440
[21]	validation_0-auc:0.71420
[22]	validation_0-auc:0.71410
[23]	validation_0-auc:0.71393
[24]	validation_0-auc:0.71448
[25]	validation_0-auc:0.71448
[26]	validation_0-auc:0.71437
[27]	validation_0-auc:0.71442
[28]	validation_0-auc:0.71428
[29]	validation_0-auc:0.71433
[30]	validation_0-auc:0.71477
[31]	validation_0-auc:0.71481
[32]	validation_0-auc:0.71467
[33]	validation_0-auc:0.71463
[34]	validation_0-auc:0.71483
[35]	validation_0-auc:0.71504
[36]	validation_0-auc:0.71489
[37]	validation_0-auc:0.71506
[38]	validation_0-auc:0.71495
[39]	validation_0-auc:0.71484
[40]	validation_0-auc:0.71490
[41]	validation_0-auc:0.71490
[42]	validation_0-auc:0.71493
[43]	validation_0-auc:0.71486
[44]	validation_0-auc:0.71498
[45]	validation_0-auc:0.71490
[46]	validation_0-auc:0.71523
[47]	valid

[I 2025-04-15 02:13:54,445] Trial 4 finished with value: 0.9195323805342829 and parameters: {'max_depth': 6, 'learning_rate': 0.0017709184352429478, 'n_estimators': 461, 'subsample': 0.7124264039698285, 'colsample_bytree': 0.9837912587898436}. Best is trial 0 with value: 0.9195323805342829.


[0]	validation_0-auc:0.67370
[1]	validation_0-auc:0.69961
[2]	validation_0-auc:0.70317
[3]	validation_0-auc:0.70498
[4]	validation_0-auc:0.70514
[5]	validation_0-auc:0.70516
[6]	validation_0-auc:0.70464
[7]	validation_0-auc:0.70489
[8]	validation_0-auc:0.70815
[9]	validation_0-auc:0.70796
[10]	validation_0-auc:0.70835
[11]	validation_0-auc:0.70843
[12]	validation_0-auc:0.70895
[13]	validation_0-auc:0.70947
[14]	validation_0-auc:0.70946
[15]	validation_0-auc:0.70959


Parameters: { "use_label_encoder" } are not used.



[16]	validation_0-auc:0.71041
[17]	validation_0-auc:0.71041
[18]	validation_0-auc:0.71074
[19]	validation_0-auc:0.71057
[20]	validation_0-auc:0.71079
[21]	validation_0-auc:0.71080
[22]	validation_0-auc:0.71081
[23]	validation_0-auc:0.71094
[24]	validation_0-auc:0.71175
[25]	validation_0-auc:0.71194
[26]	validation_0-auc:0.71228
[27]	validation_0-auc:0.71243
[28]	validation_0-auc:0.71248
[29]	validation_0-auc:0.71272
[30]	validation_0-auc:0.71319
[31]	validation_0-auc:0.71331
[32]	validation_0-auc:0.71333
[33]	validation_0-auc:0.71356
[34]	validation_0-auc:0.71387
[35]	validation_0-auc:0.71395
[36]	validation_0-auc:0.71389
[37]	validation_0-auc:0.71405
[38]	validation_0-auc:0.71422
[39]	validation_0-auc:0.71420
[40]	validation_0-auc:0.71444
[41]	validation_0-auc:0.71460
[42]	validation_0-auc:0.71464
[43]	validation_0-auc:0.71478
[44]	validation_0-auc:0.71491
[45]	validation_0-auc:0.71490
[46]	validation_0-auc:0.71532
[47]	validation_0-auc:0.71557
[48]	validation_0-auc:0.71565
[49]	valid

[I 2025-04-15 02:13:57,144] Trial 5 finished with value: 0.9195323805342829 and parameters: {'max_depth': 5, 'learning_rate': 0.02577733606458182, 'n_estimators': 282, 'subsample': 0.9334267090229948, 'colsample_bytree': 0.924880361470648}. Best is trial 0 with value: 0.9195323805342829.


[0]	validation_0-auc:0.67495
[1]	validation_0-auc:0.70815
[2]	validation_0-auc:0.71353
[3]	validation_0-auc:0.71791
[4]	validation_0-auc:0.71883
[5]	validation_0-auc:0.71876
[6]	validation_0-auc:0.71861
[7]	validation_0-auc:0.71875
[8]	validation_0-auc:0.71986
[9]	validation_0-auc:0.71986
[10]	validation_0-auc:0.72013
[11]	validation_0-auc:0.72027


Parameters: { "use_label_encoder" } are not used.



[12]	validation_0-auc:0.72053
[13]	validation_0-auc:0.72045
[14]	validation_0-auc:0.72050
[15]	validation_0-auc:0.72052
[16]	validation_0-auc:0.72060
[17]	validation_0-auc:0.72065
[18]	validation_0-auc:0.72072
[19]	validation_0-auc:0.72067
[20]	validation_0-auc:0.72089
[21]	validation_0-auc:0.72103
[22]	validation_0-auc:0.72103
[23]	validation_0-auc:0.72105
[24]	validation_0-auc:0.72097
[25]	validation_0-auc:0.72096
[26]	validation_0-auc:0.72116
[27]	validation_0-auc:0.72110
[28]	validation_0-auc:0.72114
[29]	validation_0-auc:0.72111
[30]	validation_0-auc:0.72110
[31]	validation_0-auc:0.72111
[32]	validation_0-auc:0.72117
[33]	validation_0-auc:0.72118
[34]	validation_0-auc:0.72119
[35]	validation_0-auc:0.72128
[36]	validation_0-auc:0.72118
[37]	validation_0-auc:0.72130
[38]	validation_0-auc:0.72130
[39]	validation_0-auc:0.72127
[40]	validation_0-auc:0.72121
[41]	validation_0-auc:0.72125
[42]	validation_0-auc:0.72131
[43]	validation_0-auc:0.72127
[44]	validation_0-auc:0.72132
[45]	valid

[I 2025-04-15 02:14:00,094] Trial 6 finished with value: 0.9193210087312814 and parameters: {'max_depth': 8, 'learning_rate': 0.09443653522134898, 'n_estimators': 258, 'subsample': 0.8545698955192472, 'colsample_bytree': 0.8353677837097938}. Best is trial 0 with value: 0.9195323805342829.


[0]	validation_0-auc:0.63692
[1]	validation_0-auc:0.65124
[2]	validation_0-auc:0.66822
[3]	validation_0-auc:0.67483
[4]	validation_0-auc:0.67367
[5]	validation_0-auc:0.67344
[6]	validation_0-auc:0.67420
[7]	validation_0-auc:0.67641
[8]	validation_0-auc:0.67975
[9]	validation_0-auc:0.68118
[10]	validation_0-auc:0.68274
[11]	validation_0-auc:0.68360
[12]	validation_0-auc:0.68723
[13]	validation_0-auc:0.68595
[14]	validation_0-auc:0.68483
[15]	validation_0-auc:0.68803
[16]	validation_0-auc:0.68720


Parameters: { "use_label_encoder" } are not used.



[17]	validation_0-auc:0.68914
[18]	validation_0-auc:0.68808
[19]	validation_0-auc:0.68972
[20]	validation_0-auc:0.68871
[21]	validation_0-auc:0.68848
[22]	validation_0-auc:0.68782
[23]	validation_0-auc:0.68780
[24]	validation_0-auc:0.68879
[25]	validation_0-auc:0.68882
[26]	validation_0-auc:0.68821
[27]	validation_0-auc:0.68754
[28]	validation_0-auc:0.68802
[29]	validation_0-auc:0.68924
[30]	validation_0-auc:0.69037
[31]	validation_0-auc:0.69036
[32]	validation_0-auc:0.69049
[33]	validation_0-auc:0.69066
[34]	validation_0-auc:0.69001
[35]	validation_0-auc:0.68955
[36]	validation_0-auc:0.68918
[37]	validation_0-auc:0.68893
[38]	validation_0-auc:0.68985
[39]	validation_0-auc:0.68935
[40]	validation_0-auc:0.68950
[41]	validation_0-auc:0.69049
[42]	validation_0-auc:0.69000
[43]	validation_0-auc:0.69105
[44]	validation_0-auc:0.69091
[45]	validation_0-auc:0.69041
[46]	validation_0-auc:0.69114
[47]	validation_0-auc:0.69049
[48]	validation_0-auc:0.69016
[49]	validation_0-auc:0.68986
[50]	valid

[I 2025-04-15 02:14:01,176] Trial 7 finished with value: 0.9195323805342829 and parameters: {'max_depth': 2, 'learning_rate': 0.0016450052703983775, 'n_estimators': 120, 'subsample': 0.7152204674674919, 'colsample_bytree': 0.6698675321880468}. Best is trial 0 with value: 0.9195323805342829.


[0]	validation_0-auc:0.67482
[1]	validation_0-auc:0.70225
[2]	validation_0-auc:0.70855
[3]	validation_0-auc:0.71109
[4]	validation_0-auc:0.71115
[5]	validation_0-auc:0.71193
[6]	validation_0-auc:0.71228
[7]	validation_0-auc:0.71446
[8]	validation_0-auc:0.71629
[9]	validation_0-auc:0.71606
[10]	validation_0-auc:0.71682
[11]	validation_0-auc:0.71688
[12]	validation_0-auc:0.71700


Parameters: { "use_label_encoder" } are not used.



[13]	validation_0-auc:0.71707
[14]	validation_0-auc:0.71703
[15]	validation_0-auc:0.71722
[16]	validation_0-auc:0.71765
[17]	validation_0-auc:0.71749
[18]	validation_0-auc:0.71750
[19]	validation_0-auc:0.71750
[20]	validation_0-auc:0.71762
[21]	validation_0-auc:0.71775
[22]	validation_0-auc:0.71776
[23]	validation_0-auc:0.71792
[24]	validation_0-auc:0.71860
[25]	validation_0-auc:0.71870
[26]	validation_0-auc:0.71881
[27]	validation_0-auc:0.71894
[28]	validation_0-auc:0.71913
[29]	validation_0-auc:0.71919
[30]	validation_0-auc:0.71957
[31]	validation_0-auc:0.71968
[32]	validation_0-auc:0.71976
[33]	validation_0-auc:0.71989
[34]	validation_0-auc:0.71998
[35]	validation_0-auc:0.72003
[36]	validation_0-auc:0.72003
[37]	validation_0-auc:0.72013
[38]	validation_0-auc:0.72016
[39]	validation_0-auc:0.72031
[40]	validation_0-auc:0.72041
[41]	validation_0-auc:0.72046
[42]	validation_0-auc:0.72064
[43]	validation_0-auc:0.72063
[44]	validation_0-auc:0.72076
[45]	validation_0-auc:0.72091
[46]	valid

[I 2025-04-15 02:14:04,928] Trial 8 finished with value: 0.9194836024258979 and parameters: {'max_depth': 6, 'learning_rate': 0.048001591951286615, 'n_estimators': 351, 'subsample': 0.79648882222405, 'colsample_bytree': 0.9330917986178469}. Best is trial 0 with value: 0.9195323805342829.


[0]	validation_0-auc:0.60091
[1]	validation_0-auc:0.61879
[2]	validation_0-auc:0.69981
[3]	validation_0-auc:0.70456
[4]	validation_0-auc:0.70283
[5]	validation_0-auc:0.71052
[6]	validation_0-auc:0.70904
[7]	validation_0-auc:0.71274
[8]	validation_0-auc:0.71637
[9]	validation_0-auc:0.71706
[10]	validation_0-auc:0.71842
[11]	validation_0-auc:0.71965
[12]	validation_0-auc:0.71952


Parameters: { "use_label_encoder" } are not used.



[13]	validation_0-auc:0.71934
[14]	validation_0-auc:0.71991
[15]	validation_0-auc:0.72015
[16]	validation_0-auc:0.72042
[17]	validation_0-auc:0.72034
[18]	validation_0-auc:0.72030
[19]	validation_0-auc:0.72025
[20]	validation_0-auc:0.72088
[21]	validation_0-auc:0.72114
[22]	validation_0-auc:0.72135
[23]	validation_0-auc:0.72127
[24]	validation_0-auc:0.72120
[25]	validation_0-auc:0.72133
[26]	validation_0-auc:0.72147
[27]	validation_0-auc:0.72158
[28]	validation_0-auc:0.72168
[29]	validation_0-auc:0.72146
[30]	validation_0-auc:0.72152
[31]	validation_0-auc:0.72177
[32]	validation_0-auc:0.72191
[33]	validation_0-auc:0.72208
[34]	validation_0-auc:0.72205
[35]	validation_0-auc:0.72210
[36]	validation_0-auc:0.72200
[37]	validation_0-auc:0.72209
[38]	validation_0-auc:0.72198
[39]	validation_0-auc:0.72207
[40]	validation_0-auc:0.72213
[41]	validation_0-auc:0.72209
[42]	validation_0-auc:0.72213
[43]	validation_0-auc:0.72210
[44]	validation_0-auc:0.72216
[45]	validation_0-auc:0.72214
[46]	valid

[I 2025-04-15 02:14:09,114] Trial 9 finished with value: 0.9194836024258979 and parameters: {'max_depth': 8, 'learning_rate': 0.08876788185244802, 'n_estimators': 366, 'subsample': 0.8574498869342663, 'colsample_bytree': 0.540802535225285}. Best is trial 0 with value: 0.9195323805342829.
Parameters: { "use_label_encoder" } are not used.

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
[32m2025-04-15 02:14:09.945[0m | [1mINFO    [0m | [36msrc.trainer[0m:[36mlog_model[0m:[36m107[0m - [1mROC AUC (roc_auc_score): 0.7105458428607876[0m
[32m2025-04-15 02:14:09.946[0m | [1mINFO    [0m | [36msrc.trainer[0m:[36mlog_model[0m:[36m108[0m - [1mROC AUC (computed with auc & roc_curve): 0.7105458428607876[0m
Successfully registered model 'v2_XGBoost'.
2025/04/15 02:14:27 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version 