Author: Ben Turner  
Objective: Develop a method to implement a good baseline model

# Import packages

In [1]:
import pandas as pd
import category_encoders as ce
import numpy as np
import seaborn as sns

from feature_engine.selection import (DropFeatures, DropConstantFeatures, 
                                      DropDuplicateFeatures)

from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold

from sklearn.preprocessing import (StandardScaler, FunctionTransformer, Normalizer, OrdinalEncoder, OneHotEncoder,
                                   RobustScaler, MinMaxScaler, PowerTransformer, MaxAbsScaler, LabelEncoder)

from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer, KNNImputer

from sklearn.compose import ColumnTransformer

from sklearn.metrics import roc_auc_score, f1_score, classification_report

from xgboost import XGBClassifier

from sklearn.model_selection import GridSearchCV
from skopt import BayesSearchCV

# Read Data

In [2]:
N_ROWS=10000

In [3]:
df=pd.read_csv('data/application_train.csv', nrows=N_ROWS)

In [4]:
df.head(5)

Unnamed: 0,SK_ID_CURR,TARGET,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,...,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR
0,100002,1,Cash loans,M,N,Y,0,202500.0,406597.5,24700.5,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0
1,100003,0,Cash loans,F,N,N,0,270000.0,1293502.5,35698.5,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
2,100004,0,Revolving loans,M,Y,Y,0,67500.0,135000.0,6750.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
3,100006,0,Cash loans,F,N,Y,0,135000.0,312682.5,29686.5,...,0,0,0,0,,,,,,
4,100007,0,Cash loans,M,N,Y,0,121500.0,513000.0,21865.5,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
X=df.loc[:, ~df.columns.isin(['SK_ID_CURR', 'TARGET'])]
y=df['TARGET'].astype(int)

In [6]:
# using the train test split function
X_train, X_test,y_train, y_test = train_test_split(X,y,random_state=104,test_size=0.25,shuffle=True)

# Feature Selection

In [None]:
for i in list(df.columns):
    
    try:
        sns.violinplot(x=df["TARGET"], y=df["i"], palette="Blues")
    except:
        print(i, 'failed...')
        pass

# Create a Scikit Learn Pipeline

Categoric: Nominal, Ordinal  
Numeric: Discrete, Continious

In [7]:
categorical_feature_mask = X.dtypes==object
categorical_features = X.columns[categorical_feature_mask].tolist()
print(categorical_features)

['NAME_CONTRACT_TYPE', 'CODE_GENDER', 'FLAG_OWN_CAR', 'FLAG_OWN_REALTY', 'NAME_TYPE_SUITE', 'NAME_INCOME_TYPE', 'NAME_EDUCATION_TYPE', 'NAME_FAMILY_STATUS', 'NAME_HOUSING_TYPE', 'OCCUPATION_TYPE', 'WEEKDAY_APPR_PROCESS_START', 'ORGANIZATION_TYPE', 'FONDKAPREMONT_MODE', 'HOUSETYPE_MODE', 'WALLSMATERIAL_MODE', 'EMERGENCYSTATE_MODE']


In [8]:
numeric_feature_mask = X.dtypes!=object
numeric_features = X.columns[numeric_feature_mask].tolist()
print(numeric_features)

['CNT_CHILDREN', 'AMT_INCOME_TOTAL', 'AMT_CREDIT', 'AMT_ANNUITY', 'AMT_GOODS_PRICE', 'REGION_POPULATION_RELATIVE', 'DAYS_BIRTH', 'DAYS_EMPLOYED', 'DAYS_REGISTRATION', 'DAYS_ID_PUBLISH', 'OWN_CAR_AGE', 'FLAG_MOBIL', 'FLAG_EMP_PHONE', 'FLAG_WORK_PHONE', 'FLAG_CONT_MOBILE', 'FLAG_PHONE', 'FLAG_EMAIL', 'CNT_FAM_MEMBERS', 'REGION_RATING_CLIENT', 'REGION_RATING_CLIENT_W_CITY', 'HOUR_APPR_PROCESS_START', 'REG_REGION_NOT_LIVE_REGION', 'REG_REGION_NOT_WORK_REGION', 'LIVE_REGION_NOT_WORK_REGION', 'REG_CITY_NOT_LIVE_CITY', 'REG_CITY_NOT_WORK_CITY', 'LIVE_CITY_NOT_WORK_CITY', 'EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3', 'APARTMENTS_AVG', 'BASEMENTAREA_AVG', 'YEARS_BEGINEXPLUATATION_AVG', 'YEARS_BUILD_AVG', 'COMMONAREA_AVG', 'ELEVATORS_AVG', 'ENTRANCES_AVG', 'FLOORSMAX_AVG', 'FLOORSMIN_AVG', 'LANDAREA_AVG', 'LIVINGAPARTMENTS_AVG', 'LIVINGAREA_AVG', 'NONLIVINGAPARTMENTS_AVG', 'NONLIVINGAREA_AVG', 'APARTMENTS_MODE', 'BASEMENTAREA_MODE', 'YEARS_BEGINEXPLUATATION_MODE', 'YEARS_BUILD_MODE', 'COMMONAR

In [9]:
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('cat_enc', OneHotEncoder(handle_unknown='ignore')),
])

In [10]:
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler()),
#     ('norm', Normalizer()),
])

In [11]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ]
)

In [12]:
pipe = Pipeline([
    ('preprocessor', preprocessor)
])

In [13]:
cv = RepeatedStratifiedKFold(n_splits=3, n_repeats=3, random_state=1)

In [14]:
check_df=pd.DataFrame(pipe.fit_transform(X_train))
check=np.isinf(pd.DataFrame(check_df)).values.sum()
print('check for inf values:', check)

check for inf values: 0


# Determine Base Pipeline Performance

In [15]:
ratio = float(np.sum(y_train == 0)) / np.sum(y_train==1)

best_params = {
    'learning_rate': 0.01,
    'n_estimators': 100,
    'max_depth': 3,
    'subsample': 0.8,
    'colsample_bytree': 1,
    'gamma': 1,
    'scale_pos_weight': ratio}

pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', XGBClassifier(eval_metric='auc', 
                                 objective='binary:logistic', 
                                 use_label_encoder=False,
                                 **best_params))
])

In [16]:
for parameter in pipeline.get_params():
    print(parameter)

memory
steps
verbose
preprocessor
classifier
preprocessor__n_jobs
preprocessor__remainder
preprocessor__sparse_threshold
preprocessor__transformer_weights
preprocessor__transformers
preprocessor__verbose
preprocessor__verbose_feature_names_out
preprocessor__num
preprocessor__cat
preprocessor__num__memory
preprocessor__num__steps
preprocessor__num__verbose
preprocessor__num__imputer
preprocessor__num__scaler
preprocessor__num__imputer__add_indicator
preprocessor__num__imputer__copy
preprocessor__num__imputer__fill_value
preprocessor__num__imputer__missing_values
preprocessor__num__imputer__strategy
preprocessor__num__imputer__verbose
preprocessor__num__scaler__copy
preprocessor__num__scaler__with_mean
preprocessor__num__scaler__with_std
preprocessor__cat__memory
preprocessor__cat__steps
preprocessor__cat__verbose
preprocessor__cat__imputer
preprocessor__cat__cat_enc
preprocessor__cat__imputer__add_indicator
preprocessor__cat__imputer__copy
preprocessor__cat__imputer__fill_value
preproce

In [17]:
pipeline.fit(X_train, y_train)

In [18]:
train_score=f1_score(y_train, pipeline.predict(X_train))
test_score=f1_score(y_test, pipeline.predict(X_test))
print('Training set score: ' + str(train_score))
print('Test set score: ' + str(test_score))

Training set score: 0.2816696914700544
Test set score: 0.22451081359423272


In [23]:
print('*'*15, 'BASELINE TRAIN RESULTS', '*'*15, '\n')
print(classification_report(y_train, pipeline.predict(X_train)))

print('*'*15, 'BASELINE TEST RESULTS', '*'*15, '\n')
print(classification_report(y_test, pipeline.predict(X_test)))

*************** BASELINE TRAIN RESULTS *************** 

              precision    recall  f1-score   support

           0       0.96      0.74      0.84      6909
           1       0.18      0.66      0.28       591

    accuracy                           0.74      7500
   macro avg       0.57      0.70      0.56      7500
weighted avg       0.90      0.74      0.79      7500

*************** BASELINE TEST RESULTS *************** 

              precision    recall  f1-score   support

           0       0.96      0.71      0.81      2316
           1       0.14      0.59      0.22       184

    accuracy                           0.70      2500
   macro avg       0.55      0.65      0.52      2500
weighted avg       0.90      0.70      0.77      2500



# Determine Best Pipeline Transformations

Could also add: RobustScaler, Normalizer, ('log', FunctionTransformer(np.log1p)), Imputer = KNNImputer, LabelBinarizer

In [24]:
parameters = {'preprocessor__num__scaler': [StandardScaler(), MinMaxScaler(), MaxAbsScaler(), RobustScaler()],
              'preprocessor__cat__cat_enc': [OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-999), 
                                             OneHotEncoder(handle_unknown='ignore'),
                                             ce.LeaveOneOutEncoder(handle_unknown='value', handle_missing='value')]}
 
cv_feat = RepeatedStratifiedKFold(n_splits=2, n_repeats=1, random_state=1)
grid = GridSearchCV(pipeline, parameters, cv=cv_feat, scoring = 'f1', verbose=10).fit(X_train, y_train)
 
train_score=f1_score(y_train, grid.predict(X_train))
test_score=f1_score(y_test, grid.predict(X_test))
print('Training set score: ' + str(train_score))
print('Test set score: ' + str(test_score))

Fitting 2 folds for each of 12 candidates, totalling 24 fits
[CV 1/2; 1/12] START preprocessor__cat__cat_enc=OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-999), preprocessor__num__scaler=StandardScaler()
[CV 1/2; 1/12] END preprocessor__cat__cat_enc=OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-999), preprocessor__num__scaler=StandardScaler();, score=0.250 total time=   1.0s
[CV 2/2; 1/12] START preprocessor__cat__cat_enc=OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-999), preprocessor__num__scaler=StandardScaler()
[CV 2/2; 1/12] END preprocessor__cat__cat_enc=OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-999), preprocessor__num__scaler=StandardScaler();, score=0.259 total time=   1.0s
[CV 1/2; 2/12] START preprocessor__cat__cat_enc=OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-999), preprocessor__num__scaler=MinMaxScaler()
[CV 1/2; 2/12] END preprocessor__cat__cat_enc=OrdinalEncoder(han

In [25]:
print('*'*15, 'TRANSFORMATION TRAIN RESULTS', '*'*15, '\n')
print(classification_report(y_train, grid.predict(X_train)))

print('*'*15, 'TRANSFORMATION TEST RESULTS', '*'*15, '\n')
print(classification_report(y_test, grid.predict(X_test)))

*************** TRANSFORMATION TRAIN RESULTS *************** 

              precision    recall  f1-score   support

           0       0.96      0.74      0.84      6909
           1       0.18      0.66      0.28       591

    accuracy                           0.73      7500
   macro avg       0.57      0.70      0.56      7500
weighted avg       0.90      0.73      0.79      7500

*************** TRANSFORMATION TEST RESULTS *************** 

              precision    recall  f1-score   support

           0       0.96      0.71      0.81      2316
           1       0.14      0.61      0.23       184

    accuracy                           0.70      2500
   macro avg       0.55      0.66      0.52      2500
weighted avg       0.90      0.70      0.77      2500



In [26]:
best_features=grid.best_params_
print(best_features)

{'preprocessor__cat__cat_enc': OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-999), 'preprocessor__num__scaler': StandardScaler()}


# Set-up Best Pipeline

In [28]:
categorical_feature_mask = X.dtypes==object
categorical_features = X.columns[categorical_feature_mask].tolist()

numeric_feature_mask = X.dtypes!=object
numeric_features = X.columns[numeric_feature_mask].tolist()

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('cat_enc', OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-999)),
])

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler()),
#     ('norm', Normalizer()),
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ]
)

pipe = Pipeline([
    ('preprocessor', preprocessor)
])

X_train_transformed=pipe.fit_transform(X_train)
X_test_transformed=pipe.fit_transform(X_test)

# Semi-Manual Opt

In [29]:
def get_best_params(opt):
    
    """
    function to add mean score and std score to find the most stable model
    """
    
    df=pd.DataFrame(opt.cv_results_)
    df['score']=df['mean_test_score']+df['std_test_score']
    df=df.sort_values(by='score', ascending=False)
    
    return df['params'].iloc[0]

In [None]:
from sklearn.model_selection import GridSearchCV
import math

"""
following: https://towardsdatascience.com/fine-tuning-xgboost-in-python-like-a-boss-b4543ed8b1e
"""

# define the cross-fold
cv = RepeatedStratifiedKFold(n_splits=2, n_repeats=1, random_state=1)

# begin by determining the optimal scale_pos_weight
ratio = float(np.sum(y_train == 0)) / np.sum(y_train==1)
ratio_root = math.sqrt(ratio)
print('scale_pos_weight ratio: ', ratio)
print('scale_pos_weight ratio root: ', ratio_root)

best_params = {
    'learning_rate': 0.01,
    'n_estimators': 100,
    'max_depth': 3,
    'subsample': 0.8,
    'colsample_bytree': 1,
    'gamma': 1,
    'scale_pos_weight': ratio}

hardcoded_params = {
    'objective': 'reg:binary'
}

find_scale_pos_weight = {
    'scale_pos_weight' : [1, ratio, ratio_root]
    
}

find_learning_rate = {
    'learning_rate': list([i/100 for i in range(0,100)])
}

find_depth_and_child = {
    'max_depth': list(range(3,12,1)),
    'min_child_weight': list(range(1,10,1))
}

find_gamma = {
    'gamma': list([i/10.0 for i in range(0,5)])
}

find_subsample_and_colsample = {
    'subsample':list([i/10.0 for i in range(6,10)]),
    'colsample_bytree':list([i/10.0 for i in range(6,10)])
}

find_alpha = {
    'reg_alpha':[1e-5, 1e-2, 0.1, 1, 100]
}

find_estimators = {
    'n_estimators': list(range(1,1000,1))
}

print('*'*43)
print('*'*15, 'MODEL TUNER', '*'*15)
print('*'*43, '\n')

# find the optimal value of scale pos_weight
print('*'*15, 'SCALE POS WEIGHT', '*'*15, '\n')
xgb = XGBClassifier(eval_metric='auc', use_label_encoder=False, **best_params)
opt = GridSearchCV(xgb, param_grid = find_scale_pos_weight, scoring = 'f1', verbose=10, cv=cv)
opt.fit(X_train_transformed, y_train)
best_scale_pos_weight=get_best_params(opt)
print(best_scale_pos_weight)

# find the optimal value of depth and child
print('*'*15, 'LEARNING_RATE', '*'*15, '\n')
best_params = { **best_params, **best_scale_pos_weight}
xgb = XGBClassifier(eval_metric='auc', use_label_encoder=False, **best_params)
opt = GridSearchCV(xgb, param_grid = find_learning_rate, scoring = 'f1', verbose=10, cv=cv)
opt.fit(X_train_transformed, y_train)
learning_rate=get_best_params(opt)
print(learning_rate)

# find the optimal value of depth and child
print('*'*15, 'DEPTH AND CHILD', '*'*15, '\n')
best_params = { **best_params, **learning_rate}
xgb = XGBClassifier(eval_metric='auc', use_label_encoder=False, **best_params)
opt = GridSearchCV(xgb, param_grid = find_depth_and_child, scoring = 'f1', verbose=10, cv=cv)
opt.fit(X_train_transformed, y_train)
depth_and_child=get_best_params(opt)
print(depth_and_child)

# find the optimal value of depth and child
print('*'*15, 'GAMMA', '*'*15, '\n')
best_params = { **best_params, **depth_and_child}
xgb = XGBClassifier(eval_metric='auc', use_label_encoder=False, **best_params)
opt = GridSearchCV(xgb, param_grid = find_gamma, scoring = 'f1', verbose=10, cv=cv)
opt.fit(X_train_transformed, y_train)
gamma=get_best_params(opt)
print(gamma)

# find the optimal value of depth and child
print('*'*15, 'SUBSAMPLE AND COLSAMPLE', '*'*15, '\n')
best_params = { **best_params, **gamma}
xgb = XGBClassifier(eval_metric='auc', use_label_encoder=False, **best_params)
opt = GridSearchCV(xgb, param_grid = find_subsample_and_colsample, scoring = 'f1', verbose=10, cv=cv)
opt.fit(X_train_transformed, y_train)
subsample_and_colsample=get_best_params(opt)
print(subsample_and_colsample)

# find the optimal value of depth and child
print('*'*15, 'ALPHA', '*'*15, '\n')
best_params = { **best_params, **subsample_and_colsample}
xgb = XGBClassifier(eval_metric='auc', use_label_encoder=False, **best_params)
opt = GridSearchCV(xgb, param_grid = find_alpha, scoring = 'f1', verbose=10, cv=cv)
opt.fit(X_train_transformed, y_train)
alpha=get_best_params(opt)
print(alpha)

# find the optimal value of depth and child
print('*'*15, 'N_ESTIMATORS', '*'*15, '\n')
best_params = { **best_params, **alpha}
xgb = XGBClassifier(eval_metric='auc', use_label_encoder=False, **best_params)
opt = GridSearchCV(xgb, param_grid = find_estimators, scoring = 'f1', verbose=10, cv=cv)
opt.fit(X_train_transformed, y_train)
estimators=get_best_params(opt)
print(estimators)
# change this for early_stopping?

scale_pos_weight ratio:  11.690355329949238
scale_pos_weight ratio root:  3.4191161621023114
*****************************************
*************** MODEL TUNER *************** 

*****************************************
*************** SCALE POS WEIGHT *************** 

Fitting 2 folds for each of 3 candidates, totalling 6 fits
[CV 1/2; 1/3] START scale_pos_weight=1..........................................
[CV 1/2; 1/3] END ...........scale_pos_weight=1;, score=0.000 total time=   0.8s
[CV 2/2; 1/3] START scale_pos_weight=1..........................................
[CV 2/2; 1/3] END ...........scale_pos_weight=1;, score=0.007 total time=   0.9s
[CV 1/2; 2/3] START scale_pos_weight=11.690355329949238.........................
[CV 1/2; 2/3] END scale_pos_weight=11.690355329949238;, score=0.251 total time=   0.8s
[CV 2/2; 2/3] START scale_pos_weight=11.690355329949238.........................
[CV 2/2; 2/3] END scale_pos_weight=11.690355329949238;, score=0.259 total time=   0.8s
[CV 1/2

[CV 2/2; 21/100] END .........learning_rate=0.2;, score=0.290 total time=   1.0s
[CV 1/2; 22/100] START learning_rate=0.21.......................................
[CV 1/2; 22/100] END ........learning_rate=0.21;, score=0.258 total time=   1.0s
[CV 2/2; 22/100] START learning_rate=0.21.......................................
[CV 2/2; 22/100] END ........learning_rate=0.21;, score=0.279 total time=   1.1s
[CV 1/2; 23/100] START learning_rate=0.22.......................................
[CV 1/2; 23/100] END ........learning_rate=0.22;, score=0.262 total time=   0.9s
[CV 2/2; 23/100] START learning_rate=0.22.......................................
[CV 2/2; 23/100] END ........learning_rate=0.22;, score=0.287 total time=   0.8s
[CV 1/2; 24/100] START learning_rate=0.23.......................................
[CV 1/2; 24/100] END ........learning_rate=0.23;, score=0.256 total time=   0.8s
[CV 2/2; 24/100] START learning_rate=0.23.......................................
[CV 2/2; 24/100] END .......

[CV 1/2; 47/100] END ........learning_rate=0.46;, score=0.229 total time=   0.8s
[CV 2/2; 47/100] START learning_rate=0.46.......................................
[CV 2/2; 47/100] END ........learning_rate=0.46;, score=0.264 total time=   0.8s
[CV 1/2; 48/100] START learning_rate=0.47.......................................
[CV 1/2; 48/100] END ........learning_rate=0.47;, score=0.196 total time=   0.8s
[CV 2/2; 48/100] START learning_rate=0.47.......................................
[CV 2/2; 48/100] END ........learning_rate=0.47;, score=0.248 total time=   0.8s
[CV 1/2; 49/100] START learning_rate=0.48.......................................
[CV 1/2; 49/100] END ........learning_rate=0.48;, score=0.215 total time=   0.8s
[CV 2/2; 49/100] START learning_rate=0.48.......................................
[CV 2/2; 49/100] END ........learning_rate=0.48;, score=0.245 total time=   0.8s
[CV 1/2; 50/100] START learning_rate=0.49.......................................
[CV 1/2; 50/100] END .......

[CV 2/2; 72/100] END ........learning_rate=0.71;, score=0.205 total time=   0.8s
[CV 1/2; 73/100] START learning_rate=0.72.......................................
[CV 1/2; 73/100] END ........learning_rate=0.72;, score=0.207 total time=   0.8s
[CV 2/2; 73/100] START learning_rate=0.72.......................................
[CV 2/2; 73/100] END ........learning_rate=0.72;, score=0.215 total time=   0.8s
[CV 1/2; 74/100] START learning_rate=0.73.......................................
[CV 1/2; 74/100] END ........learning_rate=0.73;, score=0.213 total time=   0.8s
[CV 2/2; 74/100] START learning_rate=0.73.......................................
[CV 2/2; 74/100] END ........learning_rate=0.73;, score=0.205 total time=   0.8s
[CV 1/2; 75/100] START learning_rate=0.74.......................................
[CV 1/2; 75/100] END ........learning_rate=0.74;, score=0.189 total time=   0.8s
[CV 2/2; 75/100] START learning_rate=0.74.......................................
[CV 2/2; 75/100] END .......

[CV 1/2; 98/100] END ........learning_rate=0.97;, score=0.173 total time=   0.8s
[CV 2/2; 98/100] START learning_rate=0.97.......................................
[CV 2/2; 98/100] END ........learning_rate=0.97;, score=0.195 total time=   0.8s
[CV 1/2; 99/100] START learning_rate=0.98.......................................
[CV 1/2; 99/100] END ........learning_rate=0.98;, score=0.226 total time=   0.8s
[CV 2/2; 99/100] START learning_rate=0.98.......................................
[CV 2/2; 99/100] END ........learning_rate=0.98;, score=0.193 total time=   0.8s
[CV 1/2; 100/100] START learning_rate=0.99......................................
[CV 1/2; 100/100] END .......learning_rate=0.99;, score=0.202 total time=   0.8s
[CV 2/2; 100/100] START learning_rate=0.99......................................
[CV 2/2; 100/100] END .......learning_rate=0.99;, score=0.218 total time=   0.8s
{'learning_rate': 0.15}
*************** DEPTH AND CHILD *************** 

Fitting 2 folds for each of 81 cand

[CV 2/2; 22/81] END max_depth=5, min_child_weight=4;, score=0.291 total time=   1.2s
[CV 1/2; 23/81] START max_depth=5, min_child_weight=5...........................
[CV 1/2; 23/81] END max_depth=5, min_child_weight=5;, score=0.250 total time=   1.2s
[CV 2/2; 23/81] START max_depth=5, min_child_weight=5...........................
[CV 2/2; 23/81] END max_depth=5, min_child_weight=5;, score=0.270 total time=   1.2s
[CV 1/2; 24/81] START max_depth=5, min_child_weight=6...........................
[CV 1/2; 24/81] END max_depth=5, min_child_weight=6;, score=0.251 total time=   1.2s
[CV 2/2; 24/81] START max_depth=5, min_child_weight=6...........................
[CV 2/2; 24/81] END max_depth=5, min_child_weight=6;, score=0.291 total time=   1.2s
[CV 1/2; 25/81] START max_depth=5, min_child_weight=7...........................
[CV 1/2; 25/81] END max_depth=5, min_child_weight=7;, score=0.253 total time=   1.2s
[CV 2/2; 25/81] START max_depth=5, min_child_weight=7...........................
[CV 

[CV 2/2; 47/81] END max_depth=8, min_child_weight=2;, score=0.176 total time=   1.9s
[CV 1/2; 48/81] START max_depth=8, min_child_weight=3...........................
[CV 1/2; 48/81] END max_depth=8, min_child_weight=3;, score=0.215 total time=   1.9s
[CV 2/2; 48/81] START max_depth=8, min_child_weight=3...........................
[CV 2/2; 48/81] END max_depth=8, min_child_weight=3;, score=0.181 total time=   1.8s
[CV 1/2; 49/81] START max_depth=8, min_child_weight=4...........................
[CV 1/2; 49/81] END max_depth=8, min_child_weight=4;, score=0.206 total time=   1.9s
[CV 2/2; 49/81] START max_depth=8, min_child_weight=4...........................
[CV 2/2; 49/81] END max_depth=8, min_child_weight=4;, score=0.217 total time=   1.8s
[CV 1/2; 50/81] START max_depth=8, min_child_weight=5...........................
[CV 1/2; 50/81] END max_depth=8, min_child_weight=5;, score=0.238 total time=   1.8s
[CV 2/2; 50/81] START max_depth=8, min_child_weight=5...........................
[CV 

[CV 2/2; 72/81] END max_depth=10, min_child_weight=9;, score=0.233 total time=   2.0s
[CV 1/2; 73/81] START max_depth=11, min_child_weight=1..........................
[CV 1/2; 73/81] END max_depth=11, min_child_weight=1;, score=0.179 total time=   2.6s
[CV 2/2; 73/81] START max_depth=11, min_child_weight=1..........................
[CV 2/2; 73/81] END max_depth=11, min_child_weight=1;, score=0.168 total time=   2.5s
[CV 1/2; 74/81] START max_depth=11, min_child_weight=2..........................
[CV 1/2; 74/81] END max_depth=11, min_child_weight=2;, score=0.175 total time=   2.4s
[CV 2/2; 74/81] START max_depth=11, min_child_weight=2..........................
[CV 2/2; 74/81] END max_depth=11, min_child_weight=2;, score=0.184 total time=   2.3s
[CV 1/2; 75/81] START max_depth=11, min_child_weight=3..........................
[CV 1/2; 75/81] END max_depth=11, min_child_weight=3;, score=0.215 total time=   2.3s
[CV 2/2; 75/81] START max_depth=11, min_child_weight=3.........................

[CV 2/2; 10/16] END colsample_bytree=0.8, subsample=0.7;, score=0.277 total time=   0.8s
[CV 1/2; 11/16] START colsample_bytree=0.8, subsample=0.8.......................
[CV 1/2; 11/16] END colsample_bytree=0.8, subsample=0.8;, score=0.266 total time=   0.8s
[CV 2/2; 11/16] START colsample_bytree=0.8, subsample=0.8.......................
[CV 2/2; 11/16] END colsample_bytree=0.8, subsample=0.8;, score=0.323 total time=   0.8s
[CV 1/2; 12/16] START colsample_bytree=0.8, subsample=0.9.......................
[CV 1/2; 12/16] END colsample_bytree=0.8, subsample=0.9;, score=0.249 total time=   0.8s
[CV 2/2; 12/16] START colsample_bytree=0.8, subsample=0.9.......................
[CV 2/2; 12/16] END colsample_bytree=0.8, subsample=0.9;, score=0.300 total time=   0.8s
[CV 1/2; 13/16] START colsample_bytree=0.9, subsample=0.6.......................
[CV 1/2; 13/16] END colsample_bytree=0.9, subsample=0.6;, score=0.255 total time=   0.8s
[CV 2/2; 13/16] START colsample_bytree=0.9, subsample=0.6....

[CV 2/2; 14/999] END ...........n_estimators=14;, score=0.271 total time=   0.1s
[CV 1/2; 15/999] START n_estimators=15..........................................
[CV 1/2; 15/999] END ...........n_estimators=15;, score=0.267 total time=   0.1s
[CV 2/2; 15/999] START n_estimators=15..........................................
[CV 2/2; 15/999] END ...........n_estimators=15;, score=0.273 total time=   0.1s
[CV 1/2; 16/999] START n_estimators=16..........................................
[CV 1/2; 16/999] END ...........n_estimators=16;, score=0.265 total time=   0.1s
[CV 2/2; 16/999] START n_estimators=16..........................................
[CV 2/2; 16/999] END ...........n_estimators=16;, score=0.274 total time=   0.2s
[CV 1/2; 17/999] START n_estimators=17..........................................
[CV 1/2; 17/999] END ...........n_estimators=17;, score=0.256 total time=   0.2s
[CV 2/2; 17/999] START n_estimators=17..........................................
[CV 2/2; 17/999] END .......

[CV 1/2; 40/999] END ...........n_estimators=40;, score=0.256 total time=   0.3s
[CV 2/2; 40/999] START n_estimators=40..........................................
[CV 2/2; 40/999] END ...........n_estimators=40;, score=0.302 total time=   0.3s
[CV 1/2; 41/999] START n_estimators=41..........................................
[CV 1/2; 41/999] END ...........n_estimators=41;, score=0.262 total time=   0.4s
[CV 2/2; 41/999] START n_estimators=41..........................................
[CV 2/2; 41/999] END ...........n_estimators=41;, score=0.298 total time=   0.4s
[CV 1/2; 42/999] START n_estimators=42..........................................
[CV 1/2; 42/999] END ...........n_estimators=42;, score=0.261 total time=   0.4s
[CV 2/2; 42/999] START n_estimators=42..........................................
[CV 2/2; 42/999] END ...........n_estimators=42;, score=0.298 total time=   0.4s
[CV 1/2; 43/999] START n_estimators=43..........................................
[CV 1/2; 43/999] END .......

[CV 2/2; 65/999] END ...........n_estimators=65;, score=0.310 total time=   0.5s
[CV 1/2; 66/999] START n_estimators=66..........................................
[CV 1/2; 66/999] END ...........n_estimators=66;, score=0.260 total time=   0.5s
[CV 2/2; 66/999] START n_estimators=66..........................................
[CV 2/2; 66/999] END ...........n_estimators=66;, score=0.315 total time=   0.6s
[CV 1/2; 67/999] START n_estimators=67..........................................
[CV 1/2; 67/999] END ...........n_estimators=67;, score=0.261 total time=   0.6s
[CV 2/2; 67/999] START n_estimators=67..........................................
[CV 2/2; 67/999] END ...........n_estimators=67;, score=0.316 total time=   0.6s
[CV 1/2; 68/999] START n_estimators=68..........................................
[CV 1/2; 68/999] END ...........n_estimators=68;, score=0.257 total time=   0.6s
[CV 2/2; 68/999] START n_estimators=68..........................................
[CV 2/2; 68/999] END .......

[CV 1/2; 91/999] END ...........n_estimators=91;, score=0.266 total time=   0.7s
[CV 2/2; 91/999] START n_estimators=91..........................................
[CV 2/2; 91/999] END ...........n_estimators=91;, score=0.319 total time=   0.8s
[CV 1/2; 92/999] START n_estimators=92..........................................
[CV 1/2; 92/999] END ...........n_estimators=92;, score=0.268 total time=   0.8s
[CV 2/2; 92/999] START n_estimators=92..........................................
[CV 2/2; 92/999] END ...........n_estimators=92;, score=0.322 total time=   0.8s
[CV 1/2; 93/999] START n_estimators=93..........................................
[CV 1/2; 93/999] END ...........n_estimators=93;, score=0.266 total time=   0.8s
[CV 2/2; 93/999] START n_estimators=93..........................................
[CV 2/2; 93/999] END ...........n_estimators=93;, score=0.321 total time=   0.8s
[CV 1/2; 94/999] START n_estimators=94..........................................
[CV 1/2; 94/999] END .......

[CV 2/2; 116/999] END .........n_estimators=116;, score=0.322 total time=   1.0s
[CV 1/2; 117/999] START n_estimators=117........................................
[CV 1/2; 117/999] END .........n_estimators=117;, score=0.260 total time=   1.0s
[CV 2/2; 117/999] START n_estimators=117........................................
[CV 2/2; 117/999] END .........n_estimators=117;, score=0.319 total time=   1.1s
[CV 1/2; 118/999] START n_estimators=118........................................
[CV 1/2; 118/999] END .........n_estimators=118;, score=0.259 total time=   1.0s
[CV 2/2; 118/999] START n_estimators=118........................................
[CV 2/2; 118/999] END .........n_estimators=118;, score=0.322 total time=   1.0s
[CV 1/2; 119/999] START n_estimators=119........................................
[CV 1/2; 119/999] END .........n_estimators=119;, score=0.254 total time=   1.1s
[CV 2/2; 119/999] START n_estimators=119........................................
[CV 2/2; 119/999] END ......

[CV 1/2; 142/999] END .........n_estimators=142;, score=0.251 total time=   1.1s
[CV 2/2; 142/999] START n_estimators=142........................................
[CV 2/2; 142/999] END .........n_estimators=142;, score=0.303 total time=   1.1s
[CV 1/2; 143/999] START n_estimators=143........................................
[CV 1/2; 143/999] END .........n_estimators=143;, score=0.252 total time=   1.3s
[CV 2/2; 143/999] START n_estimators=143........................................
[CV 2/2; 143/999] END .........n_estimators=143;, score=0.305 total time=   1.2s
[CV 1/2; 144/999] START n_estimators=144........................................
[CV 1/2; 144/999] END .........n_estimators=144;, score=0.255 total time=   1.1s
[CV 2/2; 144/999] START n_estimators=144........................................
[CV 2/2; 144/999] END .........n_estimators=144;, score=0.303 total time=   1.1s
[CV 1/2; 145/999] START n_estimators=145........................................
[CV 1/2; 145/999] END ......

[CV 2/2; 167/999] END .........n_estimators=167;, score=0.312 total time=   1.7s
[CV 1/2; 168/999] START n_estimators=168........................................
[CV 1/2; 168/999] END .........n_estimators=168;, score=0.246 total time=   1.8s
[CV 2/2; 168/999] START n_estimators=168........................................
[CV 2/2; 168/999] END .........n_estimators=168;, score=0.317 total time=   1.4s
[CV 1/2; 169/999] START n_estimators=169........................................
[CV 1/2; 169/999] END .........n_estimators=169;, score=0.246 total time=   1.3s
[CV 2/2; 169/999] START n_estimators=169........................................
[CV 2/2; 169/999] END .........n_estimators=169;, score=0.319 total time=   1.4s
[CV 1/2; 170/999] START n_estimators=170........................................
[CV 1/2; 170/999] END .........n_estimators=170;, score=0.246 total time=   1.3s
[CV 2/2; 170/999] START n_estimators=170........................................
[CV 2/2; 170/999] END ......

[CV 1/2; 193/999] END .........n_estimators=193;, score=0.244 total time=   2.3s
[CV 2/2; 193/999] START n_estimators=193........................................
[CV 2/2; 193/999] END .........n_estimators=193;, score=0.307 total time=   2.3s
[CV 1/2; 194/999] START n_estimators=194........................................
[CV 1/2; 194/999] END .........n_estimators=194;, score=0.249 total time=   1.9s
[CV 2/2; 194/999] START n_estimators=194........................................
[CV 2/2; 194/999] END .........n_estimators=194;, score=0.309 total time=   1.8s
[CV 1/2; 195/999] START n_estimators=195........................................
[CV 1/2; 195/999] END .........n_estimators=195;, score=0.251 total time=   2.0s
[CV 2/2; 195/999] START n_estimators=195........................................
[CV 2/2; 195/999] END .........n_estimators=195;, score=0.302 total time=   1.8s
[CV 1/2; 196/999] START n_estimators=196........................................
[CV 1/2; 196/999] END ......

[CV 2/2; 218/999] END .........n_estimators=218;, score=0.298 total time=   1.8s
[CV 1/2; 219/999] START n_estimators=219........................................
[CV 1/2; 219/999] END .........n_estimators=219;, score=0.243 total time=   1.8s
[CV 2/2; 219/999] START n_estimators=219........................................
[CV 2/2; 219/999] END .........n_estimators=219;, score=0.298 total time=   1.7s
[CV 1/2; 220/999] START n_estimators=220........................................
[CV 1/2; 220/999] END .........n_estimators=220;, score=0.243 total time=   2.6s
[CV 2/2; 220/999] START n_estimators=220........................................
[CV 2/2; 220/999] END .........n_estimators=220;, score=0.297 total time=   2.0s
[CV 1/2; 221/999] START n_estimators=221........................................
[CV 1/2; 221/999] END .........n_estimators=221;, score=0.239 total time=   2.0s
[CV 2/2; 221/999] START n_estimators=221........................................
[CV 2/2; 221/999] END ......

[CV 1/2; 244/999] END .........n_estimators=244;, score=0.238 total time=   2.0s
[CV 2/2; 244/999] START n_estimators=244........................................
[CV 2/2; 244/999] END .........n_estimators=244;, score=0.277 total time=   2.3s
[CV 1/2; 245/999] START n_estimators=245........................................
[CV 1/2; 245/999] END .........n_estimators=245;, score=0.239 total time=   2.2s
[CV 2/2; 245/999] START n_estimators=245........................................
[CV 2/2; 245/999] END .........n_estimators=245;, score=0.273 total time=   2.1s
[CV 1/2; 246/999] START n_estimators=246........................................
[CV 1/2; 246/999] END .........n_estimators=246;, score=0.234 total time=   2.1s
[CV 2/2; 246/999] START n_estimators=246........................................
[CV 2/2; 246/999] END .........n_estimators=246;, score=0.277 total time=   2.2s
[CV 1/2; 247/999] START n_estimators=247........................................
[CV 1/2; 247/999] END ......

[CV 2/2; 269/999] END .........n_estimators=269;, score=0.278 total time=   5.6s
[CV 1/2; 270/999] START n_estimators=270........................................
[CV 1/2; 270/999] END .........n_estimators=270;, score=0.240 total time=   3.1s
[CV 2/2; 270/999] START n_estimators=270........................................
[CV 2/2; 270/999] END .........n_estimators=270;, score=0.277 total time=   2.9s
[CV 1/2; 271/999] START n_estimators=271........................................
[CV 1/2; 271/999] END .........n_estimators=271;, score=0.240 total time=   3.0s
[CV 2/2; 271/999] START n_estimators=271........................................
[CV 2/2; 271/999] END .........n_estimators=271;, score=0.276 total time=   2.8s
[CV 1/2; 272/999] START n_estimators=272........................................
[CV 1/2; 272/999] END .........n_estimators=272;, score=0.239 total time=   2.8s
[CV 2/2; 272/999] START n_estimators=272........................................
[CV 2/2; 272/999] END ......

[CV 1/2; 295/999] END .........n_estimators=295;, score=0.223 total time=   3.1s
[CV 2/2; 295/999] START n_estimators=295........................................
[CV 2/2; 295/999] END .........n_estimators=295;, score=0.243 total time=   3.1s
[CV 1/2; 296/999] START n_estimators=296........................................
[CV 1/2; 296/999] END .........n_estimators=296;, score=0.226 total time=   3.1s
[CV 2/2; 296/999] START n_estimators=296........................................
[CV 2/2; 296/999] END .........n_estimators=296;, score=0.236 total time=   3.0s
[CV 1/2; 297/999] START n_estimators=297........................................
[CV 1/2; 297/999] END .........n_estimators=297;, score=0.222 total time=   3.0s
[CV 2/2; 297/999] START n_estimators=297........................................
[CV 2/2; 297/999] END .........n_estimators=297;, score=0.238 total time=   2.7s
[CV 1/2; 298/999] START n_estimators=298........................................
[CV 1/2; 298/999] END ......

[CV 2/2; 320/999] END .........n_estimators=320;, score=0.229 total time=   3.0s
[CV 1/2; 321/999] START n_estimators=321........................................
[CV 1/2; 321/999] END .........n_estimators=321;, score=0.221 total time=   3.0s
[CV 2/2; 321/999] START n_estimators=321........................................
[CV 2/2; 321/999] END .........n_estimators=321;, score=0.234 total time=   3.6s
[CV 1/2; 322/999] START n_estimators=322........................................
[CV 1/2; 322/999] END .........n_estimators=322;, score=0.221 total time=   2.7s
[CV 2/2; 322/999] START n_estimators=322........................................
[CV 2/2; 322/999] END .........n_estimators=322;, score=0.238 total time=   3.0s
[CV 1/2; 323/999] START n_estimators=323........................................
[CV 1/2; 323/999] END .........n_estimators=323;, score=0.221 total time=   3.0s
[CV 2/2; 323/999] START n_estimators=323........................................
[CV 2/2; 323/999] END ......

[CV 1/2; 346/999] END .........n_estimators=346;, score=0.218 total time=   2.8s
[CV 2/2; 346/999] START n_estimators=346........................................
[CV 2/2; 346/999] END .........n_estimators=346;, score=0.235 total time=   2.8s
[CV 1/2; 347/999] START n_estimators=347........................................
[CV 1/2; 347/999] END .........n_estimators=347;, score=0.216 total time=   2.8s
[CV 2/2; 347/999] START n_estimators=347........................................
[CV 2/2; 347/999] END .........n_estimators=347;, score=0.238 total time=   2.8s
[CV 1/2; 348/999] START n_estimators=348........................................
[CV 1/2; 348/999] END .........n_estimators=348;, score=0.215 total time=   2.8s
[CV 2/2; 348/999] START n_estimators=348........................................
[CV 2/2; 348/999] END .........n_estimators=348;, score=0.239 total time=   2.8s
[CV 1/2; 349/999] START n_estimators=349........................................
[CV 1/2; 349/999] END ......

[CV 2/2; 371/999] END .........n_estimators=371;, score=0.239 total time=   3.2s
[CV 1/2; 372/999] START n_estimators=372........................................
[CV 1/2; 372/999] END .........n_estimators=372;, score=0.204 total time=   3.1s
[CV 2/2; 372/999] START n_estimators=372........................................
[CV 2/2; 372/999] END .........n_estimators=372;, score=0.240 total time=   3.0s
[CV 1/2; 373/999] START n_estimators=373........................................
[CV 1/2; 373/999] END .........n_estimators=373;, score=0.206 total time=   3.1s
[CV 2/2; 373/999] START n_estimators=373........................................
[CV 2/2; 373/999] END .........n_estimators=373;, score=0.240 total time=   3.3s
[CV 1/2; 374/999] START n_estimators=374........................................
[CV 1/2; 374/999] END .........n_estimators=374;, score=0.206 total time=   3.1s
[CV 2/2; 374/999] START n_estimators=374........................................
[CV 2/2; 374/999] END ......

[CV 1/2; 397/999] END .........n_estimators=397;, score=0.213 total time=   3.1s
[CV 2/2; 397/999] START n_estimators=397........................................
[CV 2/2; 397/999] END .........n_estimators=397;, score=0.232 total time=   3.1s
[CV 1/2; 398/999] START n_estimators=398........................................
[CV 1/2; 398/999] END .........n_estimators=398;, score=0.212 total time=   3.2s
[CV 2/2; 398/999] START n_estimators=398........................................
[CV 2/2; 398/999] END .........n_estimators=398;, score=0.236 total time=   3.1s
[CV 1/2; 399/999] START n_estimators=399........................................
[CV 1/2; 399/999] END .........n_estimators=399;, score=0.211 total time=   3.1s
[CV 2/2; 399/999] START n_estimators=399........................................
[CV 2/2; 399/999] END .........n_estimators=399;, score=0.239 total time=   3.1s
[CV 1/2; 400/999] START n_estimators=400........................................
[CV 1/2; 400/999] END ......

[CV 2/2; 422/999] END .........n_estimators=422;, score=0.221 total time=   3.4s
[CV 1/2; 423/999] START n_estimators=423........................................
[CV 1/2; 423/999] END .........n_estimators=423;, score=0.216 total time=   3.2s
[CV 2/2; 423/999] START n_estimators=423........................................
[CV 2/2; 423/999] END .........n_estimators=423;, score=0.222 total time=   3.5s
[CV 1/2; 424/999] START n_estimators=424........................................
[CV 1/2; 424/999] END .........n_estimators=424;, score=0.213 total time=   3.3s
[CV 2/2; 424/999] START n_estimators=424........................................
[CV 2/2; 424/999] END .........n_estimators=424;, score=0.225 total time=   3.3s
[CV 1/2; 425/999] START n_estimators=425........................................
[CV 1/2; 425/999] END .........n_estimators=425;, score=0.213 total time=   3.3s
[CV 2/2; 425/999] START n_estimators=425........................................
[CV 2/2; 425/999] END ......

[CV 1/2; 448/999] END .........n_estimators=448;, score=0.209 total time=   3.9s
[CV 2/2; 448/999] START n_estimators=448........................................
[CV 2/2; 448/999] END .........n_estimators=448;, score=0.221 total time=   3.7s
[CV 1/2; 449/999] START n_estimators=449........................................
[CV 1/2; 449/999] END .........n_estimators=449;, score=0.210 total time=   4.0s
[CV 2/2; 449/999] START n_estimators=449........................................
[CV 2/2; 449/999] END .........n_estimators=449;, score=0.214 total time=   3.6s
[CV 1/2; 450/999] START n_estimators=450........................................
[CV 1/2; 450/999] END .........n_estimators=450;, score=0.210 total time=   3.5s
[CV 2/2; 450/999] START n_estimators=450........................................
[CV 2/2; 450/999] END .........n_estimators=450;, score=0.212 total time=   3.7s
[CV 1/2; 451/999] START n_estimators=451........................................
[CV 1/2; 451/999] END ......

[CV 2/2; 473/999] END .........n_estimators=473;, score=0.205 total time=   4.1s
[CV 1/2; 474/999] START n_estimators=474........................................
[CV 1/2; 474/999] END .........n_estimators=474;, score=0.214 total time=   4.2s
[CV 2/2; 474/999] START n_estimators=474........................................
[CV 2/2; 474/999] END .........n_estimators=474;, score=0.205 total time=  10.2s
[CV 1/2; 475/999] START n_estimators=475........................................
[CV 1/2; 475/999] END .........n_estimators=475;, score=0.211 total time=   6.3s
[CV 2/2; 475/999] START n_estimators=475........................................
[CV 2/2; 475/999] END .........n_estimators=475;, score=0.209 total time=   6.4s
[CV 1/2; 476/999] START n_estimators=476........................................
[CV 1/2; 476/999] END .........n_estimators=476;, score=0.207 total time=   5.3s
[CV 2/2; 476/999] START n_estimators=476........................................
[CV 2/2; 476/999] END ......

[CV 1/2; 499/999] END .........n_estimators=499;, score=0.210 total time=   3.8s
[CV 2/2; 499/999] START n_estimators=499........................................
[CV 2/2; 499/999] END .........n_estimators=499;, score=0.195 total time=   3.8s
[CV 1/2; 500/999] START n_estimators=500........................................
[CV 1/2; 500/999] END .........n_estimators=500;, score=0.209 total time=   3.8s
[CV 2/2; 500/999] START n_estimators=500........................................
[CV 2/2; 500/999] END .........n_estimators=500;, score=0.194 total time=   3.8s
[CV 1/2; 501/999] START n_estimators=501........................................
[CV 1/2; 501/999] END .........n_estimators=501;, score=0.209 total time=   3.8s
[CV 2/2; 501/999] START n_estimators=501........................................
[CV 2/2; 501/999] END .........n_estimators=501;, score=0.200 total time=   3.9s
[CV 1/2; 502/999] START n_estimators=502........................................
[CV 1/2; 502/999] END ......

[CV 2/2; 524/999] END .........n_estimators=524;, score=0.200 total time=   4.0s
[CV 1/2; 525/999] START n_estimators=525........................................
[CV 1/2; 525/999] END .........n_estimators=525;, score=0.207 total time=   4.0s
[CV 2/2; 525/999] START n_estimators=525........................................
[CV 2/2; 525/999] END .........n_estimators=525;, score=0.200 total time=   4.0s
[CV 1/2; 526/999] START n_estimators=526........................................
[CV 1/2; 526/999] END .........n_estimators=526;, score=0.210 total time=   4.6s
[CV 2/2; 526/999] START n_estimators=526........................................
[CV 2/2; 526/999] END .........n_estimators=526;, score=0.199 total time=   4.7s
[CV 1/2; 527/999] START n_estimators=527........................................
[CV 1/2; 527/999] END .........n_estimators=527;, score=0.210 total time=   4.1s
[CV 2/2; 527/999] START n_estimators=527........................................
[CV 2/2; 527/999] END ......

[CV 1/2; 550/999] END .........n_estimators=550;, score=0.211 total time=   4.2s
[CV 2/2; 550/999] START n_estimators=550........................................
[CV 2/2; 550/999] END .........n_estimators=550;, score=0.201 total time=   4.2s
[CV 1/2; 551/999] START n_estimators=551........................................
[CV 1/2; 551/999] END .........n_estimators=551;, score=0.211 total time=   4.2s
[CV 2/2; 551/999] START n_estimators=551........................................
[CV 2/2; 551/999] END .........n_estimators=551;, score=0.204 total time=   4.2s
[CV 1/2; 552/999] START n_estimators=552........................................
[CV 1/2; 552/999] END .........n_estimators=552;, score=0.211 total time=   4.2s
[CV 2/2; 552/999] START n_estimators=552........................................
[CV 2/2; 552/999] END .........n_estimators=552;, score=0.201 total time=   4.2s
[CV 1/2; 553/999] START n_estimators=553........................................
[CV 1/2; 553/999] END ......

[CV 2/2; 575/999] END .........n_estimators=575;, score=0.197 total time=   6.0s
[CV 1/2; 576/999] START n_estimators=576........................................
[CV 1/2; 576/999] END .........n_estimators=576;, score=0.200 total time=   5.5s
[CV 2/2; 576/999] START n_estimators=576........................................
[CV 2/2; 576/999] END .........n_estimators=576;, score=0.197 total time=   4.9s
[CV 1/2; 577/999] START n_estimators=577........................................
[CV 1/2; 577/999] END .........n_estimators=577;, score=0.200 total time=   4.7s
[CV 2/2; 577/999] START n_estimators=577........................................
[CV 2/2; 577/999] END .........n_estimators=577;, score=0.197 total time=   4.4s
[CV 1/2; 578/999] START n_estimators=578........................................
[CV 1/2; 578/999] END .........n_estimators=578;, score=0.200 total time=   4.8s
[CV 2/2; 578/999] START n_estimators=578........................................
[CV 2/2; 578/999] END ......

[CV 1/2; 601/999] END .........n_estimators=601;, score=0.204 total time=   6.7s
[CV 2/2; 601/999] START n_estimators=601........................................
[CV 2/2; 601/999] END .........n_estimators=601;, score=0.204 total time=   4.9s
[CV 1/2; 602/999] START n_estimators=602........................................
[CV 1/2; 602/999] END .........n_estimators=602;, score=0.204 total time=   4.7s
[CV 2/2; 602/999] START n_estimators=602........................................
[CV 2/2; 602/999] END .........n_estimators=602;, score=0.208 total time=   4.6s
[CV 1/2; 603/999] START n_estimators=603........................................
[CV 1/2; 603/999] END .........n_estimators=603;, score=0.204 total time=   4.6s
[CV 2/2; 603/999] START n_estimators=603........................................
[CV 2/2; 603/999] END .........n_estimators=603;, score=0.208 total time=   4.6s
[CV 1/2; 604/999] START n_estimators=604........................................
[CV 1/2; 604/999] END ......

In [None]:
# fit best model
print('*'*15, 'BEST MODEL', '*'*15, '\n')
best_params = { **best_params, **estimators}
print(best_params)
xgb = XGBClassifier(eval_metric='auc', use_label_encoder=False, **best_params)
xgb.fit(X_train_transformed, y_train)

# Comparing Manual Tune vs Baseline

In [None]:
# *************** TRANSFORMATION TRAIN RESULTS *************** 

#               precision    recall  f1-score   support

#            0       0.96      0.74      0.84      6909
#            1       0.18      0.66      0.28       591

#     accuracy                           0.73      7500
#    macro avg       0.57      0.70      0.56      7500
# weighted avg       0.90      0.73      0.79      7500

# *************** TRANSFORMATION TEST RESULTS *************** 

#               precision    recall  f1-score   support

#            0       0.96      0.71      0.81      2316
#            1       0.14      0.61      0.23       184

#     accuracy                           0.70      2500
#    macro avg       0.55      0.66      0.52      2500
# weighted avg       0.90      0.70      0.77      2500

In [None]:
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report

print('actual 1s:', y_train.sum())
print('predicted 1s:', xgb.predict(X_train_transformed).sum())

print('actual 1s:', y_test.sum())
print('predicted 1s:', xgb.predict(X_test_transformed).sum())

f1_train=classification_report(y_train, xgb.predict(X_train_transformed))
f1_test=classification_report(y_test, xgb.predict(X_test_transformed))

print(f1_train)
print(f1_test)

In [None]:

# dxgb=XGBClassifier(eval_metric='auc', use_label_encoder=False)
# dxgb.fit(X_train_transformed, y_train)

# print('actual 1s:', y_train.sum())
# print('predicted 1s:', dxgb.predict(X_train_transformed).sum())

# print('actual 1s:', y_test.sum())
# print('predicted 1s:', dxgb.predict(X_test_transformed).sum())

# f1_train=classification_report(y_train, dxgb.predict(X_train_transformed))
# f1_test=classification_report(y_test, dxgb.predict(X_test_transformed))

# print(f1_train)
# print(f1_test)

In [None]:
# scoring on test dataset
test_df=pd.read_csv('data/application_test.csv')
test_df_=test_df.loc[:, ~test_df.columns.isin(['SK_ID_CURR'])]
test_df_transformed=pipe.transform(test_df_)

submission_df=test_df[['SK_ID_CURR']]
submission_df['TARGET']=opt.predict(test_df_transformed)
submission_df.to_csv('submission.csv', index=False)

# Bayes Opt

In [None]:
N_ITER=10

estimator = XGBClassifier(eval_metric='auc', use_label_encoder=False)

fit_params = {
    'early_stopping_rounds': 10,
    'eval_set':[(X_test, y_test)],
    'verbose': False,
}

ratio = float(np.sum(y_train == 0)) / np.sum(y_train==1)

search_space = {
    'max_depth': (1, 6),
    'n_estimators': (50, 500),
    'min_child_weight': (1, 100),
    'gamma': [0.5, 1, 1.5, 2, 5],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0],
    'scale_pos_weight' : (0, ratio),
}

opt = BayesSearchCV(
    estimator=estimator,
    search_spaces=search_space,
    fit_params=fit_params,
    cv=cv,
    scoring="roc_auc",
    random_state=42,
    n_iter=N_ITER,
    verbose=1,
    return_train_score=True,
)

opt.fit(X_train_transformed, y_train)