<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Introduction" data-toc-modified-id="Introduction-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Introduction</a></span></li></ul></div>

# Introduction

In [1]:
import pandas as pd
from xgboost import XGBClassifier
from sklearn.metrics import log_loss
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe
import xgboost as xgb
import warnings
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from catboost import CatBoostClassifier

warnings.filterwarnings('ignore')

In [2]:
dfMasterTable = pd.read_csv('dfMasterTable.csv')

In [3]:
dfMasterTable.fillna(0, inplace=True)

train_df = dfMasterTable[dfMasterTable.date_pred <= 201702]

validation_df = dfMasterTable[dfMasterTable.date_pred == 201703]

clf = XGBClassifier(n_jobs=-1, tree_method="hist", random_state=0, eval_metric="logloss")

X_train = train_df.drop(['msno', 'is_churn', 'date_pred'], 1).values
y_train = train_df['is_churn'].values

X_val = validation_df.drop(['msno', 'is_churn', 'date_pred'], 1).values
y_val = validation_df['is_churn'].values


In [4]:
train_df.columns

Index(['msno', 'is_churn', 'date_pred', 'bd', 'gender', 'city_1', 'city_4',
       'city_5', 'city_13', 'city_15', 'city_22', 'city_other',
       'registered_via_3', 'registered_via_4', 'registered_via_7',
       'registered_via_9', 'registered_via_11', 'registered_via_13',
       'registered_via_other', 'tenure_days', 'actual_amount_paid_min',
       'actual_amount_paid_max', 'actual_amount_paid_mean',
       'actual_amount_paid_std', 'actual_amount_paid_sum',
       'is_auto_renew_mean', 'is_auto_renew_sum', 'is_cancel_sum',
       'is_cancel_mean', 'msno_count_x', 'diff_dates_min', 'diff_dates_max',
       'diff_dates_mean', 'diff_dates_std', 'churn_mean', 'churn_sum',
       'days_last_trx', 'total_secs_sum', 'total_secs_mean', 'total_secs_std',
       'num_unq_sum', 'num_unq_mean', 'num_unq_std', 'msno_count_y',
       'nbr_logins_monthly_mean', 'nbr_logins_monthly_max',
       'nbr_logins_monthly_min', 'nbr_logins_monthly_std',
       'nbr_logins_monthly_count', 'last_login_days

In [5]:
cont_cols = train_df.drop(['msno', 'is_churn', 'date_pred',
                           *[x for x in train_df.columns if 
                             (x.startswith('city') or x.startswith('registered'))]], 
              axis=1).columns

In [6]:
cont_cols

Index(['bd', 'gender', 'tenure_days', 'actual_amount_paid_min',
       'actual_amount_paid_max', 'actual_amount_paid_mean',
       'actual_amount_paid_std', 'actual_amount_paid_sum',
       'is_auto_renew_mean', 'is_auto_renew_sum', 'is_cancel_sum',
       'is_cancel_mean', 'msno_count_x', 'diff_dates_min', 'diff_dates_max',
       'diff_dates_mean', 'diff_dates_std', 'churn_mean', 'churn_sum',
       'days_last_trx', 'total_secs_sum', 'total_secs_mean', 'total_secs_std',
       'num_unq_sum', 'num_unq_mean', 'num_unq_std', 'msno_count_y',
       'nbr_logins_monthly_mean', 'nbr_logins_monthly_max',
       'nbr_logins_monthly_min', 'nbr_logins_monthly_std',
       'nbr_logins_monthly_count', 'last_login_days'],
      dtype='object')

In [7]:
losses_feature_selection = []

for i, column in enumerate(cont_cols):
    keep_cols = cont_cols[:i+1]
    
    X_train, y_train = train_df[keep_cols], train_df['is_churn']
    X_val, y_val = validation_df[keep_cols], validation_df['is_churn']
    evaluation = [( X_train, y_train), ( X_val, y_val)]
    
    
    clf = XGBClassifier(n_jobs=-1, tree_method="hist", random_state=0, eval_metric="logloss")
    clf.fit(X_train, y_train,
            eval_set=evaluation,
            early_stopping_rounds=10,verbose=False)
    
    
    y_pred = clf.predict_proba(X_val)[:, 1]

    loss = log_loss(y_val, y_pred)
    losses_feature_selection.append((list(keep_cols), loss))
    print(losses_feature_selection[i])


(['bd'], 0.2937722463463623)
(['bd', 'gender'], 0.2934531672854632)
(['bd', 'gender', 'tenure_days'], 0.29173480201236485)
(['bd', 'gender', 'tenure_days', 'actual_amount_paid_min'], 0.2575269312973176)
(['bd', 'gender', 'tenure_days', 'actual_amount_paid_min', 'actual_amount_paid_max'], 0.24833040591841152)
(['bd', 'gender', 'tenure_days', 'actual_amount_paid_min', 'actual_amount_paid_max', 'actual_amount_paid_mean'], 0.24644466400989753)
(['bd', 'gender', 'tenure_days', 'actual_amount_paid_min', 'actual_amount_paid_max', 'actual_amount_paid_mean', 'actual_amount_paid_std'], 0.24589454388407697)
(['bd', 'gender', 'tenure_days', 'actual_amount_paid_min', 'actual_amount_paid_max', 'actual_amount_paid_mean', 'actual_amount_paid_std', 'actual_amount_paid_sum'], 0.24321346629509566)
(['bd', 'gender', 'tenure_days', 'actual_amount_paid_min', 'actual_amount_paid_max', 'actual_amount_paid_mean', 'actual_amount_paid_std', 'actual_amount_paid_sum', 'is_auto_renew_mean'], 0.23773488949610733)
([

(['bd', 'gender', 'tenure_days', 'actual_amount_paid_min', 'actual_amount_paid_max', 'actual_amount_paid_mean', 'actual_amount_paid_std', 'actual_amount_paid_sum', 'is_auto_renew_mean', 'is_auto_renew_sum', 'is_cancel_sum', 'is_cancel_mean', 'msno_count_x', 'diff_dates_min', 'diff_dates_max', 'diff_dates_mean', 'diff_dates_std', 'churn_mean', 'churn_sum', 'days_last_trx', 'total_secs_sum', 'total_secs_mean', 'total_secs_std', 'num_unq_sum', 'num_unq_mean', 'num_unq_std', 'msno_count_y', 'nbr_logins_monthly_mean', 'nbr_logins_monthly_max'], 0.2161930957225652)
(['bd', 'gender', 'tenure_days', 'actual_amount_paid_min', 'actual_amount_paid_max', 'actual_amount_paid_mean', 'actual_amount_paid_std', 'actual_amount_paid_sum', 'is_auto_renew_mean', 'is_auto_renew_sum', 'is_cancel_sum', 'is_cancel_mean', 'msno_count_x', 'diff_dates_min', 'diff_dates_max', 'diff_dates_mean', 'diff_dates_std', 'churn_mean', 'churn_sum', 'days_last_trx', 'total_secs_sum', 'total_secs_mean', 'total_secs_std', 'num

In [8]:
city_columns = [x for x in train_df.columns if x.startswith('city')]
registration_columns = [x for x in train_df.columns if x.startswith('registered')]

In [9]:
categ_columns = [city_columns, registration_columns]

In [10]:
categ_columns

[['city_1', 'city_4', 'city_5', 'city_13', 'city_15', 'city_22', 'city_other'],
 ['registered_via_3',
  'registered_via_4',
  'registered_via_7',
  'registered_via_9',
  'registered_via_11',
  'registered_via_13',
  'registered_via_other']]

In [11]:
# adding the categorical columns to the feature selection step

In [12]:
losses_copy = losses_feature_selection.copy()

In [13]:
losses_feature_selection = losses_copy

In [14]:
keep_cols = list(cont_cols)

In [15]:
for i, categ_feature in enumerate(categ_columns):
    keep_cols += categ_feature
    
    X_train, y_train = train_df[keep_cols], train_df['is_churn']
    X_val, y_val = validation_df[keep_cols], validation_df['is_churn']
    evaluation = [( X_train, y_train), ( X_val, y_val)]
    
    
    clf = XGBClassifier(n_jobs=-1, tree_method="hist", random_state=0, eval_metric="logloss")
    clf.fit(X_train, y_train,
            eval_set=evaluation,
            early_stopping_rounds=10,verbose=False)
    
    
    y_pred = clf.predict_proba(X_val)[:, 1]

    loss = log_loss(y_val, y_pred)
    losses_feature_selection.append((list(keep_cols), loss))
    print(losses_feature_selection[len(cont_cols) + i])

    

(['bd', 'gender', 'tenure_days', 'actual_amount_paid_min', 'actual_amount_paid_max', 'actual_amount_paid_mean', 'actual_amount_paid_std', 'actual_amount_paid_sum', 'is_auto_renew_mean', 'is_auto_renew_sum', 'is_cancel_sum', 'is_cancel_mean', 'msno_count_x', 'diff_dates_min', 'diff_dates_max', 'diff_dates_mean', 'diff_dates_std', 'churn_mean', 'churn_sum', 'days_last_trx', 'total_secs_sum', 'total_secs_mean', 'total_secs_std', 'num_unq_sum', 'num_unq_mean', 'num_unq_std', 'msno_count_y', 'nbr_logins_monthly_mean', 'nbr_logins_monthly_max', 'nbr_logins_monthly_min', 'nbr_logins_monthly_std', 'nbr_logins_monthly_count', 'last_login_days', 'city_1', 'city_4', 'city_5', 'city_13', 'city_15', 'city_22', 'city_other'], 0.21639743356974134)
(['bd', 'gender', 'tenure_days', 'actual_amount_paid_min', 'actual_amount_paid_max', 'actual_amount_paid_mean', 'actual_amount_paid_std', 'actual_amount_paid_sum', 'is_auto_renew_mean', 'is_auto_renew_sum', 'is_cancel_sum', 'is_cancel_mean', 'msno_count_x',

In [16]:
losses_feature_selection

[(['bd'], 0.2937722463463623),
 (['bd', 'gender'], 0.2934531672854632),
 (['bd', 'gender', 'tenure_days'], 0.29173480201236485),
 (['bd', 'gender', 'tenure_days', 'actual_amount_paid_min'],
  0.2575269312973176),
 (['bd',
   'gender',
   'tenure_days',
   'actual_amount_paid_min',
   'actual_amount_paid_max'],
  0.24833040591841152),
 (['bd',
   'gender',
   'tenure_days',
   'actual_amount_paid_min',
   'actual_amount_paid_max',
   'actual_amount_paid_mean'],
  0.24644466400989753),
 (['bd',
   'gender',
   'tenure_days',
   'actual_amount_paid_min',
   'actual_amount_paid_max',
   'actual_amount_paid_mean',
   'actual_amount_paid_std'],
  0.24589454388407697),
 (['bd',
   'gender',
   'tenure_days',
   'actual_amount_paid_min',
   'actual_amount_paid_max',
   'actual_amount_paid_mean',
   'actual_amount_paid_std',
   'actual_amount_paid_sum'],
  0.24321346629509566),
 (['bd',
   'gender',
   'tenure_days',
   'actual_amount_paid_min',
   'actual_amount_paid_max',
   'actual_amount_pa

In [17]:
final_losses = [x[1] for x in losses_feature_selection]

In [18]:
final_losses

[0.2937722463463623,
 0.2934531672854632,
 0.29173480201236485,
 0.2575269312973176,
 0.24833040591841152,
 0.24644466400989753,
 0.24589454388407697,
 0.24321346629509566,
 0.23773488949610733,
 0.23764340094556902,
 0.22296633535267948,
 0.2255018699221768,
 0.22522560994882745,
 0.2204095078620457,
 0.21833522867853192,
 0.21674950429229103,
 0.2173984755400892,
 0.21615552670897328,
 0.21698784309054078,
 0.21562437450728694,
 0.21525116637510847,
 0.21472698127755604,
 0.21493352083477213,
 0.2153096169729207,
 0.21533707011587516,
 0.21531986012479765,
 0.215292464439895,
 0.2158674499786104,
 0.2161930957225652,
 0.21675060627079493,
 0.21702214400838973,
 0.2183653408046403,
 0.21622188488434552,
 0.21639743356974134,
 0.2151477048544849]

In [19]:
keep_features = [True] + [True if final_losses[i] < final_losses[i-1] else False for i in range(1, len(final_losses))] 

In [20]:
len(keep_features)

35

In [21]:
len(final_losses)

35

In [22]:
result_feature_selection = list(zip(losses_feature_selection, keep_features))

In [23]:
result_feature_selection

[((['bd'], 0.2937722463463623), True),
 ((['bd', 'gender'], 0.2934531672854632), True),
 ((['bd', 'gender', 'tenure_days'], 0.29173480201236485), True),
 ((['bd', 'gender', 'tenure_days', 'actual_amount_paid_min'],
   0.2575269312973176),
  True),
 ((['bd',
    'gender',
    'tenure_days',
    'actual_amount_paid_min',
    'actual_amount_paid_max'],
   0.24833040591841152),
  True),
 ((['bd',
    'gender',
    'tenure_days',
    'actual_amount_paid_min',
    'actual_amount_paid_max',
    'actual_amount_paid_mean'],
   0.24644466400989753),
  True),
 ((['bd',
    'gender',
    'tenure_days',
    'actual_amount_paid_min',
    'actual_amount_paid_max',
    'actual_amount_paid_mean',
    'actual_amount_paid_std'],
   0.24589454388407697),
  True),
 ((['bd',
    'gender',
    'tenure_days',
    'actual_amount_paid_min',
    'actual_amount_paid_max',
    'actual_amount_paid_mean',
    'actual_amount_paid_std',
    'actual_amount_paid_sum'],
   0.24321346629509566),
  True),
 ((['bd',
    'ge

In [24]:
result_continous = result_feature_selection[:-2]

In [25]:
result_continous

[((['bd'], 0.2937722463463623), True),
 ((['bd', 'gender'], 0.2934531672854632), True),
 ((['bd', 'gender', 'tenure_days'], 0.29173480201236485), True),
 ((['bd', 'gender', 'tenure_days', 'actual_amount_paid_min'],
   0.2575269312973176),
  True),
 ((['bd',
    'gender',
    'tenure_days',
    'actual_amount_paid_min',
    'actual_amount_paid_max'],
   0.24833040591841152),
  True),
 ((['bd',
    'gender',
    'tenure_days',
    'actual_amount_paid_min',
    'actual_amount_paid_max',
    'actual_amount_paid_mean'],
   0.24644466400989753),
  True),
 ((['bd',
    'gender',
    'tenure_days',
    'actual_amount_paid_min',
    'actual_amount_paid_max',
    'actual_amount_paid_mean',
    'actual_amount_paid_std'],
   0.24589454388407697),
  True),
 ((['bd',
    'gender',
    'tenure_days',
    'actual_amount_paid_min',
    'actual_amount_paid_max',
    'actual_amount_paid_mean',
    'actual_amount_paid_std',
    'actual_amount_paid_sum'],
   0.24321346629509566),
  True),
 ((['bd',
    'ge

In [26]:
final_keep_cols = [x[0][0][-1] for x in result_continous if x[1] ]

In [27]:
registration_columns = [x for x in train_df.columns if x.startswith('registered')]

In [28]:
final_keep_cols += registration_columns

In [49]:
final_keep_cols

['bd',
 'gender',
 'tenure_days',
 'actual_amount_paid_min',
 'actual_amount_paid_max',
 'actual_amount_paid_mean',
 'actual_amount_paid_std',
 'actual_amount_paid_sum',
 'is_auto_renew_mean',
 'is_auto_renew_sum',
 'is_cancel_sum',
 'msno_count_x',
 'diff_dates_min',
 'diff_dates_max',
 'diff_dates_mean',
 'churn_mean',
 'days_last_trx',
 'total_secs_sum',
 'total_secs_mean',
 'num_unq_std',
 'msno_count_y',
 'last_login_days',
 'registered_via_3',
 'registered_via_4',
 'registered_via_7',
 'registered_via_9',
 'registered_via_11',
 'registered_via_13',
 'registered_via_other']

In [44]:
training_data = train_df[final_keep_cols]

In [45]:
training_data.head(3)

Unnamed: 0,bd,gender,tenure_days,actual_amount_paid_min,actual_amount_paid_max,actual_amount_paid_mean,actual_amount_paid_std,actual_amount_paid_sum,is_auto_renew_mean,is_auto_renew_sum,...,num_unq_std,msno_count_y,last_login_days,registered_via_3,registered_via_4,registered_via_7,registered_via_9,registered_via_11,registered_via_13,registered_via_other
0,36.0,0.0,4319.0,0.0,149.0,74.5,105.35891,149.0,0.0,0.0,...,16.012807,20.0,3.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,38.0,1.0,4318.0,149.0,180.0,150.409091,6.609222,3309.0,0.954545,21.0,...,14.006454,520.0,33.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,27.0,0.0,4126.0,149.0,150.0,149.2,0.421637,1492.0,0.8,8.0,...,35.829472,237.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [47]:
# Create correlation matrix
corr_matrix = training_data.corr().abs()

# Select upper triangle of correlation matrix
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))

# Find index of feature columns with correlation greater than a threshold
to_drop = [column for column in upper.columns if any(upper[column] > 0.95)]

In [48]:
to_drop

[]

In [29]:
X_train, y_train = train_df[final_keep_cols], train_df['is_churn']
X_val, y_val = validation_df[final_keep_cols], validation_df['is_churn']
evaluation = [( X_train, y_train), ( X_val, y_val)]


clf = XGBClassifier(n_jobs=-1, tree_method="hist", random_state=0, eval_metric="logloss")
clf.fit(X_train, y_train,
        eval_set=evaluation,
        early_stopping_rounds=10,verbose=False)


y_pred = clf.predict_proba(X_val)[:, 1]

loss = log_loss(y_val, y_pred)

In [30]:
loss

0.21284646144240338

In [31]:
best_loss = 0
best_model = clf
best_params = {}

In [32]:
from sklearn.metrics import roc_auc_score
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials
import numpy as np
import xgboost as xgb
def objective(space):
    # Instantiate the classifier
    clf = xgb.XGBClassifier(
                 learning_rate =0.1,
                 n_estimators=350,
                 max_depth=int(space['max_depth']),
                 min_child_weight=space['min_child_weight'],
                 gamma=space['gamma'],
                 subsample=space['subsample'],
                 colsample_bytree=space['colsample_bytree'],
                 objective= 'binary:logistic',
        n_jobs=-1,
        tree_method='hist',
                 seed=0)
    
    eval_set  = [( X_train, y_train), ( X_val, y_val)]
    
    # Fit the classsifier
    clf.fit(X_train, y_train,
            eval_set=eval_set, eval_metric="logloss",
            early_stopping_rounds=10)
    
    # Predict on Cross Validation data
    pred = clf.predict_proba(X_val)[:,1]
    
    # Calculate our Metric - accuracy
    loss = log_loss(y_val, pred)
    print(loss)
    global best_loss
    global best_model
    global best_params
    if loss < best_loss:
        best_auc = loss
        best_model = clf
        best_params = space
    # return needs to be in this below format. We use negative of accuracy since we want to maximize it.
    return {'loss': loss, 'status': STATUS_OK }

In [33]:
space ={'max_depth': hp.quniform("max_depth", 4, 16, 1),
        'min_child_weight': hp.quniform ('min_child_weight', 1, 10, 1),
        'subsample': hp.uniform ('subsample', 0.7, 1),
        'gamma' : hp.uniform ('gamma', 0.1,0.5),
        'colsample_bytree' : hp.uniform ('colsample_bytree', 0.7,1),
    }

In [34]:
trials = Trials()
best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=25,
            trials=trials)
print(best)

[0]	validation_0-logloss:0.61466	validation_1-logloss:0.61803                                                                                                                                 
[1]	validation_0-logloss:0.55034	validation_1-logloss:0.55703                                                                                                                                 
[2]	validation_0-logloss:0.49671	validation_1-logloss:0.50638                                                                                                                                 
[3]	validation_0-logloss:0.45139	validation_1-logloss:0.46371                                                                                                                                 
[4]	validation_0-logloss:0.41289	validation_1-logloss:0.42771                                                                                                                                 
[5]	validation_0-logloss:0.37999	validation_1

[34]	validation_0-logloss:0.14478	validation_1-logloss:0.20775                                                                                                                                
[35]	validation_0-logloss:0.14381	validation_1-logloss:0.20777                                                                                                                                
[36]	validation_0-logloss:0.14301	validation_1-logloss:0.20779                                                                                                                                
[37]	validation_0-logloss:0.14221	validation_1-logloss:0.20788                                                                                                                                
[38]	validation_0-logloss:0.14148	validation_1-logloss:0.20795                                                                                                                                
[39]	validation_0-logloss:0.14086	validation_

[26]	validation_0-logloss:0.17052	validation_1-logloss:0.21684                                                                                                                                
[27]	validation_0-logloss:0.16852	validation_1-logloss:0.21575                                                                                                                                
[28]	validation_0-logloss:0.16677	validation_1-logloss:0.21481                                                                                                                                
[29]	validation_0-logloss:0.16518	validation_1-logloss:0.21407                                                                                                                                
[30]	validation_0-logloss:0.16373	validation_1-logloss:0.21347                                                                                                                                
[31]	validation_0-logloss:0.16227	validation_

[15]	validation_0-logloss:0.20389	validation_1-logloss:0.24565                                                                                                                                
[16]	validation_0-logloss:0.19591	validation_1-logloss:0.23949                                                                                                                                
[17]	validation_0-logloss:0.18881	validation_1-logloss:0.23423                                                                                                                                
[18]	validation_0-logloss:0.18248	validation_1-logloss:0.22966                                                                                                                                
[19]	validation_0-logloss:0.17686	validation_1-logloss:0.22572                                                                                                                                
[20]	validation_0-logloss:0.17181	validation_

[5]	validation_0-logloss:0.38321	validation_1-logloss:0.39968                                                                                                                                 
[6]	validation_0-logloss:0.35513	validation_1-logloss:0.37372                                                                                                                                 
[7]	validation_0-logloss:0.33086	validation_1-logloss:0.35154                                                                                                                                 
[8]	validation_0-logloss:0.30945	validation_1-logloss:0.33208                                                                                                                                 
[9]	validation_0-logloss:0.29084	validation_1-logloss:0.31537                                                                                                                                 
[10]	validation_0-logloss:0.27469	validation_

[39]	validation_0-logloss:0.14027	validation_1-logloss:0.20810                                                                                                                                
[40]	validation_0-logloss:0.13965	validation_1-logloss:0.20828                                                                                                                                
[41]	validation_0-logloss:0.13909	validation_1-logloss:0.20847                                                                                                                                
[42]	validation_0-logloss:0.13862	validation_1-logloss:0.20866                                                                                                                                
[43]	validation_0-logloss:0.13815	validation_1-logloss:0.20889                                                                                                                                
[44]	validation_0-logloss:0.13771	validation_

[31]	validation_0-logloss:0.14353	validation_1-logloss:0.20811                                                                                                                                
[32]	validation_0-logloss:0.14227	validation_1-logloss:0.20781                                                                                                                                
[33]	validation_0-logloss:0.14098	validation_1-logloss:0.20757                                                                                                                                
[34]	validation_0-logloss:0.13975	validation_1-logloss:0.20755                                                                                                                                
[35]	validation_0-logloss:0.13868	validation_1-logloss:0.20756                                                                                                                                
[36]	validation_0-logloss:0.13767	validation_

[11]	validation_0-logloss:0.24935	validation_1-logloss:0.28190                                                                                                                                
[12]	validation_0-logloss:0.23645	validation_1-logloss:0.27097                                                                                                                                
[13]	validation_0-logloss:0.22507	validation_1-logloss:0.26135                                                                                                                                
[14]	validation_0-logloss:0.21500	validation_1-logloss:0.25312                                                                                                                                
[15]	validation_0-logloss:0.20616	validation_1-logloss:0.24602                                                                                                                                
[16]	validation_0-logloss:0.19830	validation_

[5]	validation_0-logloss:0.37999	validation_1-logloss:0.39717                                                                                                                                 
[6]	validation_0-logloss:0.35154	validation_1-logloss:0.37087                                                                                                                                 
[7]	validation_0-logloss:0.32688	validation_1-logloss:0.34832                                                                                                                                 
[8]	validation_0-logloss:0.30548	validation_1-logloss:0.32876                                                                                                                                 
[9]	validation_0-logloss:0.28683	validation_1-logloss:0.31194                                                                                                                                 
[10]	validation_0-logloss:0.27051	validation_

[40]	validation_0-logloss:0.13218	validation_1-logloss:0.20860                                                                                                                                
[41]	validation_0-logloss:0.13154	validation_1-logloss:0.20886                                                                                                                                
[42]	validation_0-logloss:0.13088	validation_1-logloss:0.20906                                                                                                                                
[43]	validation_0-logloss:0.13030	validation_1-logloss:0.20929                                                                                                                                
[44]	validation_0-logloss:0.12982	validation_1-logloss:0.20953                                                                                                                                
0.20778830483293803                          

[33]	validation_0-logloss:0.14284	validation_1-logloss:0.20790                                                                                                                                
[34]	validation_0-logloss:0.14178	validation_1-logloss:0.20788                                                                                                                                
[35]	validation_0-logloss:0.14079	validation_1-logloss:0.20789                                                                                                                                
[36]	validation_0-logloss:0.13993	validation_1-logloss:0.20795                                                                                                                                
[37]	validation_0-logloss:0.13908	validation_1-logloss:0.20802                                                                                                                                
[38]	validation_0-logloss:0.13829	validation_

[24]	validation_0-logloss:0.15395	validation_1-logloss:0.21383                                                                                                                                
[25]	validation_0-logloss:0.15092	validation_1-logloss:0.21249                                                                                                                                
[26]	validation_0-logloss:0.14823	validation_1-logloss:0.21138                                                                                                                                
[27]	validation_0-logloss:0.14579	validation_1-logloss:0.21044                                                                                                                                
[28]	validation_0-logloss:0.14358	validation_1-logloss:0.20972                                                                                                                                
[29]	validation_0-logloss:0.14153	validation_

[18]	validation_0-logloss:0.18465	validation_1-logloss:0.23041                                                                                                                                
[19]	validation_0-logloss:0.17900	validation_1-logloss:0.22648                                                                                                                                
[20]	validation_0-logloss:0.17389	validation_1-logloss:0.22309                                                                                                                                
[21]	validation_0-logloss:0.16950	validation_1-logloss:0.22022                                                                                                                                
[22]	validation_0-logloss:0.16558	validation_1-logloss:0.21789                                                                                                                                
[23]	validation_0-logloss:0.16201	validation_

[9]	validation_0-logloss:0.28157	validation_1-logloss:0.30993                                                                                                                                 
[10]	validation_0-logloss:0.26487	validation_1-logloss:0.29533                                                                                                                                
[11]	validation_0-logloss:0.25008	validation_1-logloss:0.28258                                                                                                                                
[12]	validation_0-logloss:0.23706	validation_1-logloss:0.27159                                                                                                                                
[13]	validation_0-logloss:0.22563	validation_1-logloss:0.26211                                                                                                                                
[14]	validation_0-logloss:0.21542	validation_

In [35]:
best

{'colsample_bytree': 0.7273674654463997,
 'gamma': 0.10477869581569521,
 'max_depth': 14.0,
 'min_child_weight': 9.0,
 'subsample': 0.7273410804180915}

In [36]:
clf = xgb.XGBClassifier(
             learning_rate =0.1,
             n_estimators=350,
             max_depth=int(best['max_depth']),
             min_child_weight=best['min_child_weight'],
             gamma=best['gamma'],
             subsample=best['subsample'],
             colsample_bytree=best['colsample_bytree'],
             objective= 'binary:logistic',
    n_jobs=-1,
    tree_method='hist',
             seed=0)

In [37]:
eval_set  = [( X_val, y_val)]

In [38]:
# Fit the classsifier
clf.fit(X_train, y_train,
        eval_set=eval_set, eval_metric="logloss",
        early_stopping_rounds=10)

[0]	validation_0-logloss:0.61755
[1]	validation_0-logloss:0.55583
[2]	validation_0-logloss:0.50478
[3]	validation_0-logloss:0.46185
[4]	validation_0-logloss:0.42566
[5]	validation_0-logloss:0.39497
[6]	validation_0-logloss:0.36860
[7]	validation_0-logloss:0.34586
[8]	validation_0-logloss:0.32680
[9]	validation_0-logloss:0.30993
[10]	validation_0-logloss:0.29533
[11]	validation_0-logloss:0.28258
[12]	validation_0-logloss:0.27159
[13]	validation_0-logloss:0.26211
[14]	validation_0-logloss:0.25379
[15]	validation_0-logloss:0.24659
[16]	validation_0-logloss:0.24038
[17]	validation_0-logloss:0.23499
[18]	validation_0-logloss:0.23043
[19]	validation_0-logloss:0.22658
[20]	validation_0-logloss:0.22319
[21]	validation_0-logloss:0.22025
[22]	validation_0-logloss:0.21779
[23]	validation_0-logloss:0.21566
[24]	validation_0-logloss:0.21386
[25]	validation_0-logloss:0.21240
[26]	validation_0-logloss:0.21120
[27]	validation_0-logloss:0.21024
[28]	validation_0-logloss:0.20945
[29]	validation_0-loglos

XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1,
              colsample_bytree=0.7273674654463997, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None,
              gamma=0.10477869581569521, gpu_id=-1, grow_policy='depthwise',
              importance_type=None, interaction_constraints='',
              learning_rate=0.1, max_bin=256, max_cat_to_onehot=4,
              max_delta_step=0, max_depth=14, max_leaves=0,
              min_child_weight=9.0, missing=nan, monotone_constraints='()',
              n_estimators=350, n_jobs=-1, num_parallel_tree=1,
              predictor='auto', random_state=0, reg_alpha=0, reg_lambda=1, ...)

In [39]:
pred = clf.predict_proba(X_val)[:,1]

# Calculate our Metric - accuracy
loss = log_loss(y_val, pred)

In [40]:
loss

0.20754205099516446

In [54]:
estimators = [
    ('rf', RandomForestClassifier(n_estimators=300, n_jobs=-1, random_state=42)),
    ('xgb', xgb.XGBClassifier(
             learning_rate =0.1,
             n_estimators=350,
             max_depth=int(best['max_depth']),
             min_child_weight=best['min_child_weight'],
             gamma=best['gamma'],
             subsample=best['subsample'],
             colsample_bytree=best['colsample_bytree'],
             objective= 'binary:logistic',
    n_jobs=-1,
    tree_method='hist',
             seed=0))
]

In [55]:
stack_clf = StackingClassifier(
    estimators=estimators, final_estimator=CatBoostClassifier())


In [56]:
stack_clf.fit(X_train, y_train)

Learning rate set to 0.196164
0:	learn: 0.4246755	total: 131ms	remaining: 2m 11s
1:	learn: 0.2908865	total: 236ms	remaining: 1m 57s
2:	learn: 0.2270230	total: 319ms	remaining: 1m 46s
3:	learn: 0.1931374	total: 403ms	remaining: 1m 40s
4:	learn: 0.1755887	total: 498ms	remaining: 1m 39s
5:	learn: 0.1663519	total: 584ms	remaining: 1m 36s
6:	learn: 0.1605881	total: 669ms	remaining: 1m 34s
7:	learn: 0.1569328	total: 766ms	remaining: 1m 34s
8:	learn: 0.1547229	total: 842ms	remaining: 1m 32s
9:	learn: 0.1533222	total: 944ms	remaining: 1m 33s
10:	learn: 0.1523940	total: 1.03s	remaining: 1m 32s
11:	learn: 0.1518696	total: 1.12s	remaining: 1m 31s
12:	learn: 0.1514695	total: 1.2s	remaining: 1m 30s
13:	learn: 0.1511896	total: 1.3s	remaining: 1m 31s
14:	learn: 0.1510273	total: 1.38s	remaining: 1m 30s
15:	learn: 0.1509154	total: 1.48s	remaining: 1m 30s
16:	learn: 0.1508164	total: 1.58s	remaining: 1m 31s
17:	learn: 0.1507688	total: 1.66s	remaining: 1m 30s
18:	learn: 0.1507337	total: 1.74s	remaining: 1

158:	learn: 0.1501192	total: 13.5s	remaining: 1m 11s
159:	learn: 0.1501162	total: 13.6s	remaining: 1m 11s
160:	learn: 0.1501152	total: 13.7s	remaining: 1m 11s
161:	learn: 0.1501155	total: 13.7s	remaining: 1m 11s
162:	learn: 0.1501142	total: 13.8s	remaining: 1m 10s
163:	learn: 0.1501138	total: 13.9s	remaining: 1m 10s
164:	learn: 0.1501127	total: 13.9s	remaining: 1m 10s
165:	learn: 0.1501120	total: 14s	remaining: 1m 10s
166:	learn: 0.1501105	total: 14.1s	remaining: 1m 10s
167:	learn: 0.1501054	total: 14.2s	remaining: 1m 10s
168:	learn: 0.1501037	total: 14.3s	remaining: 1m 10s
169:	learn: 0.1501006	total: 14.4s	remaining: 1m 10s
170:	learn: 0.1501000	total: 14.4s	remaining: 1m 9s
171:	learn: 0.1500991	total: 14.5s	remaining: 1m 9s
172:	learn: 0.1500990	total: 14.6s	remaining: 1m 9s
173:	learn: 0.1500984	total: 14.7s	remaining: 1m 9s
174:	learn: 0.1500966	total: 14.7s	remaining: 1m 9s
175:	learn: 0.1500953	total: 14.8s	remaining: 1m 9s
176:	learn: 0.1500916	total: 14.9s	remaining: 1m 9s
17

319:	learn: 0.1499131	total: 26.6s	remaining: 56.5s
320:	learn: 0.1499128	total: 26.7s	remaining: 56.4s
321:	learn: 0.1499125	total: 26.7s	remaining: 56.3s
322:	learn: 0.1499113	total: 26.8s	remaining: 56.3s
323:	learn: 0.1499106	total: 26.9s	remaining: 56.1s
324:	learn: 0.1499072	total: 27s	remaining: 56s
325:	learn: 0.1499052	total: 27.1s	remaining: 55.9s
326:	learn: 0.1499052	total: 27.1s	remaining: 55.8s
327:	learn: 0.1499052	total: 27.2s	remaining: 55.7s
328:	learn: 0.1499051	total: 27.2s	remaining: 55.6s
329:	learn: 0.1499045	total: 27.3s	remaining: 55.5s
330:	learn: 0.1499036	total: 27.4s	remaining: 55.3s
331:	learn: 0.1499037	total: 27.4s	remaining: 55.2s
332:	learn: 0.1499032	total: 27.5s	remaining: 55.2s
333:	learn: 0.1499007	total: 27.6s	remaining: 55.1s
334:	learn: 0.1499008	total: 27.7s	remaining: 55s
335:	learn: 0.1499008	total: 27.7s	remaining: 54.8s
336:	learn: 0.1499004	total: 27.8s	remaining: 54.7s
337:	learn: 0.1498994	total: 27.9s	remaining: 54.6s
338:	learn: 0.1498

479:	learn: 0.1498000	total: 39s	remaining: 42.2s
480:	learn: 0.1497997	total: 39s	remaining: 42.1s
481:	learn: 0.1497982	total: 39.1s	remaining: 42s
482:	learn: 0.1497979	total: 39.2s	remaining: 41.9s
483:	learn: 0.1497976	total: 39.3s	remaining: 41.8s
484:	learn: 0.1497976	total: 39.3s	remaining: 41.7s
485:	learn: 0.1497982	total: 39.4s	remaining: 41.7s
486:	learn: 0.1497977	total: 39.5s	remaining: 41.6s
487:	learn: 0.1497967	total: 39.6s	remaining: 41.5s
488:	learn: 0.1497969	total: 39.6s	remaining: 41.4s
489:	learn: 0.1497961	total: 39.7s	remaining: 41.3s
490:	learn: 0.1497960	total: 39.8s	remaining: 41.3s
491:	learn: 0.1497954	total: 39.9s	remaining: 41.2s
492:	learn: 0.1497948	total: 40s	remaining: 41.1s
493:	learn: 0.1497938	total: 40s	remaining: 41s
494:	learn: 0.1497939	total: 40.1s	remaining: 40.9s
495:	learn: 0.1497940	total: 40.2s	remaining: 40.8s
496:	learn: 0.1497918	total: 40.3s	remaining: 40.7s
497:	learn: 0.1497919	total: 40.3s	remaining: 40.7s
498:	learn: 0.1497909	to

641:	learn: 0.1497191	total: 51.2s	remaining: 28.5s
642:	learn: 0.1497187	total: 51.2s	remaining: 28.4s
643:	learn: 0.1497188	total: 51.3s	remaining: 28.4s
644:	learn: 0.1497163	total: 51.4s	remaining: 28.3s
645:	learn: 0.1497160	total: 51.5s	remaining: 28.2s
646:	learn: 0.1497168	total: 51.5s	remaining: 28.1s
647:	learn: 0.1497148	total: 51.6s	remaining: 28s
648:	learn: 0.1497147	total: 51.7s	remaining: 27.9s
649:	learn: 0.1497134	total: 51.8s	remaining: 27.9s
650:	learn: 0.1497134	total: 51.8s	remaining: 27.8s
651:	learn: 0.1497133	total: 51.9s	remaining: 27.7s
652:	learn: 0.1497129	total: 52s	remaining: 27.6s
653:	learn: 0.1497127	total: 52s	remaining: 27.5s
654:	learn: 0.1497128	total: 52.1s	remaining: 27.4s
655:	learn: 0.1497114	total: 52.2s	remaining: 27.4s
656:	learn: 0.1497114	total: 52.2s	remaining: 27.3s
657:	learn: 0.1497112	total: 52.3s	remaining: 27.2s
658:	learn: 0.1497112	total: 52.4s	remaining: 27.1s
659:	learn: 0.1497112	total: 52.5s	remaining: 27s
660:	learn: 0.149710

802:	learn: 0.1496597	total: 1m 2s	remaining: 15.4s
803:	learn: 0.1496563	total: 1m 3s	remaining: 15.4s
804:	learn: 0.1496563	total: 1m 3s	remaining: 15.3s
805:	learn: 0.1496564	total: 1m 3s	remaining: 15.2s
806:	learn: 0.1496563	total: 1m 3s	remaining: 15.1s
807:	learn: 0.1496562	total: 1m 3s	remaining: 15s
808:	learn: 0.1496562	total: 1m 3s	remaining: 15s
809:	learn: 0.1496555	total: 1m 3s	remaining: 14.9s
810:	learn: 0.1496556	total: 1m 3s	remaining: 14.8s
811:	learn: 0.1496555	total: 1m 3s	remaining: 14.7s
812:	learn: 0.1496556	total: 1m 3s	remaining: 14.6s
813:	learn: 0.1496552	total: 1m 3s	remaining: 14.6s
814:	learn: 0.1496531	total: 1m 3s	remaining: 14.5s
815:	learn: 0.1496531	total: 1m 3s	remaining: 14.4s
816:	learn: 0.1496523	total: 1m 3s	remaining: 14.3s
817:	learn: 0.1496506	total: 1m 4s	remaining: 14.2s
818:	learn: 0.1496506	total: 1m 4s	remaining: 14.2s
819:	learn: 0.1496505	total: 1m 4s	remaining: 14.1s
820:	learn: 0.1496502	total: 1m 4s	remaining: 14s
821:	learn: 0.1496

961:	learn: 0.1496090	total: 1m 14s	remaining: 2.94s
962:	learn: 0.1496088	total: 1m 14s	remaining: 2.86s
963:	learn: 0.1496089	total: 1m 14s	remaining: 2.78s
964:	learn: 0.1496088	total: 1m 14s	remaining: 2.71s
965:	learn: 0.1496086	total: 1m 14s	remaining: 2.63s
966:	learn: 0.1496083	total: 1m 14s	remaining: 2.55s
967:	learn: 0.1496083	total: 1m 14s	remaining: 2.48s
968:	learn: 0.1496084	total: 1m 14s	remaining: 2.4s
969:	learn: 0.1496083	total: 1m 15s	remaining: 2.32s
970:	learn: 0.1496084	total: 1m 15s	remaining: 2.24s
971:	learn: 0.1496081	total: 1m 15s	remaining: 2.16s
972:	learn: 0.1496081	total: 1m 15s	remaining: 2.09s
973:	learn: 0.1496058	total: 1m 15s	remaining: 2.01s
974:	learn: 0.1496058	total: 1m 15s	remaining: 1.93s
975:	learn: 0.1496059	total: 1m 15s	remaining: 1.85s
976:	learn: 0.1496046	total: 1m 15s	remaining: 1.78s
977:	learn: 0.1496045	total: 1m 15s	remaining: 1.7s
978:	learn: 0.1496042	total: 1m 15s	remaining: 1.62s
979:	learn: 0.1496040	total: 1m 15s	remaining: 1

StackingClassifier(estimators=[('rf',
                                RandomForestClassifier(n_estimators=300,
                                                       n_jobs=-1,
                                                       random_state=42)),
                               ('xgb',
                                XGBClassifier(base_score=None, booster=None,
                                              callbacks=None,
                                              colsample_bylevel=None,
                                              colsample_bynode=None,
                                              colsample_bytree=0.7049278532210257,
                                              early_stopping_rounds=None,
                                              enable_categorical=False,
                                              eval_metric=None,
                                              gamma=0.10601111353229488,
                                              gpu_...
            

In [57]:
pred = stack_clf.predict_proba(X_val)[:,1]

# Calculate our Metric - accuracy
loss = log_loss(y_val, pred)

In [59]:
loss

0.21242543247170292