<center>
<img src="../../img/ods_stickers.jpg">
## Открытый курс по машинному обучению. Сессия № 2
Автор материала: программист-исследователь Mail.ru Group, старший преподаватель Факультета Компьютерных Наук ВШЭ Юрий Кашницкий. Материал распространяется на условиях лицензии [Creative Commons CC BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/). Можно использовать в любых целях (редактировать, поправлять и брать за основу), кроме коммерческих, но с обязательным упоминанием автора материала.

# <center>Тема 10. Бустинг
## <center> Часть 10. Продвинутые методы работы с категориальными признаками и CatBoost

In [1]:
import numpy as np
import pandas as pd
pd.set_option('display.max.columns', 100)
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score

Считаем данные и посмотрим на первые несколько строк. Видим, что у нас тут немало категориальных признаков.

In [2]:
df = pd.read_csv('../../data/bank.csv')

In [3]:
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,30,unemployed,married,primary,no,1787,no,no,cellular,19,oct,79,1,-1,0,unknown,0
1,33,services,married,secondary,no,4789,yes,yes,cellular,11,may,220,1,339,4,failure,0
2,35,management,single,tertiary,no,1350,yes,no,cellular,16,apr,185,1,330,1,failure,0
3,30,management,married,tertiary,no,1476,yes,yes,unknown,3,jun,199,4,-1,0,unknown,0
4,59,blue-collar,married,secondary,no,0,yes,no,unknown,5,may,226,1,-1,0,unknown,0


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4521 entries, 0 to 4520
Data columns (total 17 columns):
age          4521 non-null int64
job          4521 non-null object
marital      4521 non-null object
education    4521 non-null object
default      4521 non-null object
balance      4521 non-null int64
housing      4521 non-null object
loan         4521 non-null object
contact      4521 non-null object
day          4521 non-null int64
month        4521 non-null object
duration     4521 non-null int64
campaign     4521 non-null int64
pdays        4521 non-null int64
previous     4521 non-null int64
poutcome     4521 non-null object
y            4521 non-null int64
dtypes: int64(8), object(9)
memory usage: 600.5+ KB


Всего 9 признаков со строковыми значениями.

In [5]:
df.columns[df.dtypes == 'object']

Index(['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact',
       'month', 'poutcome'],
      dtype='object')

## Без категориальных признаков
Попытаемся сначала просто проигнорировать категориальные признаки. Обучим случайный лес и посмотрим на ROC AUC на кросс-валидации и на отоженной выборке. Это будет наш бейзлайн. 

In [9]:
df_no_cat, y = df.loc[:, df.dtypes != 'object'].drop('y', axis=1), df['y']

In [10]:
df_no_cat_part, df_no_cat_valid, y_train_part, y_valid = train_test_split(df_no_cat, y,
                                                                            test_size=.3, 
                                                                            stratify=y,
                                                                            random_state=17)

In [11]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=17)

In [12]:
forest = RandomForestClassifier(random_state=17)

In [14]:
np.mean(cross_val_score(forest, df_no_cat_part, y_train_part, cv=skf, scoring='roc_auc'));



In [15]:
forest.fit(df_no_cat_part, y_train_part)



RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=17, verbose=0,
                       warm_start=False)

In [16]:
roc_auc_score(y_valid, forest.predict_proba(df_no_cat_valid)[:, 1])

0.8246600055509299

In [17]:
forest.feature_importances_

array([0.1473972 , 0.1775853 , 0.12930319, 0.35103475, 0.06646041,
       0.08351306, 0.0447061 ])

## LabelEncoder для категориальных признаков
Сделаем то же самое, но попробуем закодировать категориальные признаки по-простому: с помощью `LabelEncoder`.

In [18]:
from sklearn.preprocessing import LabelEncoder

In [19]:
label_encoder = LabelEncoder()

In [20]:
df_cat_label_enc = df.copy().drop('y', axis=1)
for col in df.columns[df.dtypes == 'object']:
    df_cat_label_enc[col] = label_encoder.fit_transform(df_cat_label_enc[col])

In [21]:
df_cat_label_enc.shape

(4521, 16)

In [22]:
df_cat_label_enc.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome
0,30,10,1,0,0,1787,0,0,0,19,10,79,1,-1,0,3
1,33,7,1,1,0,4789,1,1,0,11,8,220,1,339,4,0
2,35,4,2,2,0,1350,1,0,0,16,0,185,1,330,1,0
3,30,4,1,2,0,1476,1,1,2,3,6,199,4,-1,0,3
4,59,1,1,1,0,0,1,0,2,5,8,226,1,-1,0,3


In [17]:
df_cat_label_enc_part, df_cat_label_enc_valid = train_test_split(df_cat_label_enc, test_size=.3, 
                                                    stratify=y, random_state=17)

In [18]:
np.mean(cross_val_score(forest, df_cat_label_enc_part, y_train_part, cv=skf, scoring='roc_auc'))

0.84296976359098053

In [19]:
forest.fit(df_cat_label_enc_part, y_train_part)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=17, verbose=0, warm_start=False)

In [20]:
roc_auc_score(y_valid, forest.predict_proba(df_cat_label_enc_valid)[:, 1])

0.84814737718567856

## Бинаризация категориальных признаков (dummies, OHE)
Теперь сделаем то, что обычно по умолчанию и делают – бинаризацию категориальных признаков. Dummy-признаки, One-Hot Encoding... с небольшими различиями это об одном же - для каждого значения каждого категориального признака завести свой бинарный признак.

In [23]:
df_cat_dummies = pd.get_dummies(df, columns=df.columns[df.dtypes == 'object']).drop('y', axis=1)

In [24]:
df_cat_dummies.shape

(4521, 51)

In [26]:
df_cat_dummies.head()

Unnamed: 0,age,balance,day,duration,campaign,pdays,previous,job_admin.,job_blue-collar,job_entrepreneur,job_housemaid,job_management,job_retired,job_self-employed,job_services,job_student,job_technician,job_unemployed,job_unknown,marital_divorced,marital_married,marital_single,education_primary,education_secondary,education_tertiary,education_unknown,default_no,default_yes,housing_no,housing_yes,loan_no,loan_yes,contact_cellular,contact_telephone,contact_unknown,month_apr,month_aug,month_dec,month_feb,month_jan,month_jul,month_jun,month_mar,month_may,month_nov,month_oct,month_sep,poutcome_failure,poutcome_other,poutcome_success,poutcome_unknown
0,30,1787,19,79,1,-1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1
1,33,4789,11,220,1,339,4,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0
2,35,1350,16,185,1,330,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
3,30,1476,3,199,4,-1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1
4,59,0,5,226,1,-1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1


In [27]:
df_cat_dummies_part, df_cat_dummies_valid = train_test_split(df_cat_dummies, test_size=.3, 
                                                    stratify=y, random_state=17)

In [28]:
np.mean(cross_val_score(forest, df_cat_dummies_part, y_train_part, cv=skf, scoring='roc_auc'))

0.84579530735273

In [29]:
forest.fit(df_cat_dummies_part, y_train_part)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=17, verbose=0,
                       warm_start=False)

In [40]:
roc_auc_score(y_valid, forest.predict_proba(df_cat_dummies_valid)[:, 1])

0.8587742052563034

In [38]:
forest.predict_proba(df_cat_dummies_valid)[:,1]

array([0. , 0.2, 0. , ..., 0.4, 0.1, 0. ])

## Попарные взаимодействия признаков
Пока лес все еще лучше регрессии (хотя мы не тюнили гиперпараметры, но и не будем). Мы хотим идти дальше. Мощной техникой для работы с категориальными признаками будет учет попарных взаимодействий признаков (feature interactions). Построим попарные взаимодействия всех признаков. Вообще тут можно пойти дальше и строить взаимодействия трех и более признаков. Owen Zhang [как-то строил](https://www.youtube.com/watch?v=LgLcfZjNF44) даже 7-way interactions. Чего не сделаешь ради победы на Kaggle! :)

In [41]:
df_interact = df.copy()

In [43]:
cat_features = df.columns[df.dtypes == 'object']
for i, col1 in enumerate(cat_features):
    print(i, col1)
    for j, col2 in enumerate(cat_features[i + 1:]):
        print('WITH',j, col2)
        df_interact[col1 + '_' + col2] = df_interact[col1] + '_' + df_interact[col2] 

0 job
WITH 0 marital
WITH 1 education
WITH 2 default
WITH 3 housing
WITH 4 loan
WITH 5 contact
WITH 6 month
WITH 7 poutcome
1 marital
WITH 0 education
WITH 1 default
WITH 2 housing
WITH 3 loan
WITH 4 contact
WITH 5 month
WITH 6 poutcome
2 education
WITH 0 default
WITH 1 housing
WITH 2 loan
WITH 3 contact
WITH 4 month
WITH 5 poutcome
3 default
WITH 0 housing
WITH 1 loan
WITH 2 contact
WITH 3 month
WITH 4 poutcome
4 housing
WITH 0 loan
WITH 1 contact
WITH 2 month
WITH 3 poutcome
5 loan
WITH 0 contact
WITH 1 month
WITH 2 poutcome
6 contact
WITH 0 month
WITH 1 poutcome
7 month
WITH 0 poutcome
8 poutcome


In [29]:
df_interact.shape

(4521, 53)

In [30]:
df_interact.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y,job_marital,job_education,job_default,job_housing,job_loan,job_contact,job_month,job_poutcome,marital_education,marital_default,marital_housing,marital_loan,marital_contact,marital_month,marital_poutcome,education_default,education_housing,education_loan,education_contact,education_month,education_poutcome,default_housing,default_loan,default_contact,default_month,default_poutcome,housing_loan,housing_contact,housing_month,housing_poutcome,loan_contact,loan_month,loan_poutcome,contact_month,contact_poutcome,month_poutcome
0,30,unemployed,married,primary,no,1787,no,no,cellular,19,oct,79,1,-1,0,unknown,0,unemployed_married,unemployed_primary,unemployed_no,unemployed_no,unemployed_no,unemployed_cellular,unemployed_oct,unemployed_unknown,married_primary,married_no,married_no,married_no,married_cellular,married_oct,married_unknown,primary_no,primary_no,primary_no,primary_cellular,primary_oct,primary_unknown,no_no,no_no,no_cellular,no_oct,no_unknown,no_no,no_cellular,no_oct,no_unknown,no_cellular,no_oct,no_unknown,cellular_oct,cellular_unknown,oct_unknown
1,33,services,married,secondary,no,4789,yes,yes,cellular,11,may,220,1,339,4,failure,0,services_married,services_secondary,services_no,services_yes,services_yes,services_cellular,services_may,services_failure,married_secondary,married_no,married_yes,married_yes,married_cellular,married_may,married_failure,secondary_no,secondary_yes,secondary_yes,secondary_cellular,secondary_may,secondary_failure,no_yes,no_yes,no_cellular,no_may,no_failure,yes_yes,yes_cellular,yes_may,yes_failure,yes_cellular,yes_may,yes_failure,cellular_may,cellular_failure,may_failure
2,35,management,single,tertiary,no,1350,yes,no,cellular,16,apr,185,1,330,1,failure,0,management_single,management_tertiary,management_no,management_yes,management_no,management_cellular,management_apr,management_failure,single_tertiary,single_no,single_yes,single_no,single_cellular,single_apr,single_failure,tertiary_no,tertiary_yes,tertiary_no,tertiary_cellular,tertiary_apr,tertiary_failure,no_yes,no_no,no_cellular,no_apr,no_failure,yes_no,yes_cellular,yes_apr,yes_failure,no_cellular,no_apr,no_failure,cellular_apr,cellular_failure,apr_failure
3,30,management,married,tertiary,no,1476,yes,yes,unknown,3,jun,199,4,-1,0,unknown,0,management_married,management_tertiary,management_no,management_yes,management_yes,management_unknown,management_jun,management_unknown,married_tertiary,married_no,married_yes,married_yes,married_unknown,married_jun,married_unknown,tertiary_no,tertiary_yes,tertiary_yes,tertiary_unknown,tertiary_jun,tertiary_unknown,no_yes,no_yes,no_unknown,no_jun,no_unknown,yes_yes,yes_unknown,yes_jun,yes_unknown,yes_unknown,yes_jun,yes_unknown,unknown_jun,unknown_unknown,jun_unknown
4,59,blue-collar,married,secondary,no,0,yes,no,unknown,5,may,226,1,-1,0,unknown,0,blue-collar_married,blue-collar_secondary,blue-collar_no,blue-collar_yes,blue-collar_no,blue-collar_unknown,blue-collar_may,blue-collar_unknown,married_secondary,married_no,married_yes,married_no,married_unknown,married_may,married_unknown,secondary_no,secondary_yes,secondary_no,secondary_unknown,secondary_may,secondary_unknown,no_yes,no_no,no_unknown,no_may,no_unknown,yes_no,yes_unknown,yes_may,yes_unknown,no_unknown,no_may,no_unknown,unknown_may,unknown_unknown,may_unknown


## Бинаризация категориальных признаков (dummies, OHE) + попарные взаимодействия
Получилось аж 824 бинарных признака – многовато для такой задачи, и тут случайный лес начинает не справляться, да и логистическая регрессия сработала хуже, чем в прошлый раз.

In [44]:
df_interact_cat_dummies = pd.get_dummies(df_interact, columns=df_interact.columns[df_interact.dtypes == 'object']).drop('y', axis=1)

In [45]:
df_interact_cat_dummies.shape

(4521, 824)

In [46]:
df_interact_cat_dummies_part, df_interact_cat_dummies_valid = train_test_split(df_interact_cat_dummies, test_size=.3, 
                                                    stratify=y, random_state=17)

In [47]:
np.mean(cross_val_score(forest, df_interact_cat_dummies_part, y_train_part, cv=skf, scoring='roc_auc'))

0.7919934473777258

In [48]:
forest.fit(df_interact_cat_dummies_part, y_train_part)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=17, verbose=0,
                       warm_start=False)

In [49]:
roc_auc_score(y_valid, forest.predict_proba(df_interact_cat_dummies_valid)[:, 1])

0.7673039561049553

Случайному лесу уже тяжеловато, когда признаков так много, а вот логистической регрессии – норм.

In [50]:
from sklearn.linear_model import LogisticRegression
logit = LogisticRegression(random_state=17)

In [51]:
np.mean(cross_val_score(logit, df_interact_cat_dummies_part, y_train_part, cv=skf, scoring='roc_auc'))



0.8743901694037088

In [52]:
logit.fit(df_interact_cat_dummies_part, y_train_part)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=17, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

In [53]:
roc_auc_score(y_valid, logit.predict_proba(df_interact_cat_dummies_valid)[:, 1])

0.8807243963363864

## Mean Target
Теперь будем использовать технику кодирования категориальных признаков средним значением целевого признака. Это очень мощная техника, правда, надо умело ее использовать – легко переобучиться. 
Основная идея – для каждого значения категориального признака посчитать среднее значение целевого признака и заменить категориальный признак на посчитанные средние. Правда, считать средние надо на кросс-валидации, а то легко переобучиться. 
Но далее я адресую к видео топ-участников соревнований Kaggle, от них можно узнать про эту технику из первых уст. 
- [Специализация](https://www.coursera.org/specializations/aml) "Advanced Machine Learning" на Coursera, [курс](https://www.coursera.org/learn/competitive-data-science)", How to Win a Data Science Competition: Learn from Top Kagglers", несколько видео посвящено различным способам построяния признаков с задействованием целевого, и как при этом не переобучиться. Рассказывает Дмитрий Алтухов
- [Лекция](https://www.youtube.com/watch?v=g335THJxkto) с презентацией решения конкурса Kaggle BNP paribas, Станислав Семенов

Похожая техника [используется](https://tech.yandex.com/catboost/doc/dg/concepts/algorithm-main-stages_cat-to-numberic-docpage/) и в CatBoost.

Для начала давайте таким образом закодируем исходные категориальные признаки.

In [54]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)

In [55]:
train_df, y = df.copy(), df['y']
train_df_part, valid_df, y_train_part, y_valid = train_test_split(train_df.drop('y', axis=1), y, 
                                                                  test_size=.3, stratify=y, 
                                                                               random_state=17)

In [58]:
y_train_part.mean()

0.11536030341340076

In [59]:
def mean_target_enc(train_df, y_train, valid_df, skf):
    import warnings
    warnings.filterwarnings('ignore')
    
    glob_mean = y_train.mean()
    train_df = pd.concat([train_df, pd.Series(y_train, name='y')], axis=1)
    new_train_df = train_df.copy()
    
    cat_features = train_df.columns[train_df.dtypes == 'object'].tolist()    

    for col in cat_features:
        new_train_df[col + '_mean_target'] = [glob_mean for _ in range(new_train_df.shape[0])]

    for train_idx, valid_idx in skf.split(train_df, y_train):
        train_df_cv, valid_df_cv = train_df.iloc[train_idx, :], train_df.iloc[valid_idx, :]

        for col in cat_features:
            
            means = valid_df_cv[col].map(train_df_cv.groupby(col)['y'].mean())
            valid_df_cv[col + '_mean_target'] = means.fillna(glob_mean)
            
        new_train_df.iloc[valid_idx] = valid_df_cv
    
    new_train_df.drop(cat_features + ['y'], axis=1, inplace=True)
    
    for col in cat_features:
        means = valid_df[col].map(train_df.groupby(col)['y'].mean())
        valid_df[col + '_mean_target'] = means.fillna(glob_mean)
        
    valid_df.drop(train_df.columns[train_df.dtypes == 'object'], axis=1, inplace=True)
    
    return new_train_df, valid_df

In [60]:
train_mean_target_part, valid_mean_target = mean_target_enc(train_df_part, y_train_part, valid_df, skf)

In [64]:
train_mean_target_part.head()

Unnamed: 0,age,balance,day,duration,campaign,pdays,previous,job_mean_target,marital_mean_target,education_mean_target,default_mean_target,housing_mean_target,loan_mean_target,contact_mean_target,month_mean_target,poutcome_mean_target
482,40,694,4,97,4,196,5,0.112128,0.10103,0.111541,0.115849,0.092254,0.122611,0.139435,0.163934,0.120301
566,26,211,29,168,3,-1,0,0.106383,0.128167,0.142292,0.11557,0.143369,0.124474,0.142503,0.123457,0.089806
2209,59,0,18,247,4,-1,0,0.214286,0.100381,0.103987,0.115169,0.144567,0.122782,0.140405,0.094763,0.091522
725,31,1010,15,385,3,364,9,0.132979,0.10418,0.142292,0.11557,0.093286,0.124474,0.142503,0.074264,0.209091
1025,56,1044,3,353,2,-1,0,0.222222,0.10418,0.103825,0.11557,0.143369,0.124474,0.139665,0.085642,0.089806


In [61]:
np.mean(cross_val_score(forest, train_mean_target_part, y_train_part, cv=skf, scoring='roc_auc'))

0.8439875686594386

In [65]:
forest.fit(train_mean_target_part, y_train_part)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=17, verbose=0,
                       warm_start=False)

In [66]:
roc_auc_score(y_valid, forest.predict_proba(valid_mean_target)[:, 1])

0.8670712440487627

## Mean Target + попарные взаимодействия

In [67]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)

In [68]:
train_df, y = df_interact.drop('y', axis=1).copy(), df_interact['y']
train_df_part, valid_df, y_train_part, y_valid = train_test_split(train_df, y, 
                                                                  test_size=.3, stratify=y, 
                                                                               random_state=17)

In [69]:
train_mean_target_part, valid_mean_target = mean_target_enc(train_df_part, y_train_part, valid_df, skf)

In [70]:
np.mean(cross_val_score(forest, train_mean_target_part, y_train_part, cv=skf, scoring='roc_auc'))

0.8350562884869193

In [71]:
forest.fit(train_mean_target_part, y_train_part)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=17, verbose=0,
                       warm_start=False)

In [72]:
roc_auc_score(y_valid, forest.predict_proba(valid_mean_target)[:, 1])

0.8690167381882619

Опять лучше справляется логистическая регрессия.

In [73]:
np.mean(cross_val_score(logit, train_mean_target_part, y_train_part, cv=skf, scoring='roc_auc'))

0.8915079258110478

In [74]:
logit.fit(train_mean_target_part, y_train_part)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=17, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

In [75]:
roc_auc_score(y_valid, logit.predict_proba(valid_mean_target)[:, 1])

0.9053726595358569

## Catboost
В библиотеке [Catboost](https://catboost.yandex), помимо всего прочего, реализована как раз техника кодирования категориальных значений средним значением целевого признака. Результаты получаются хорошими именно когда в данных много важных категориальных признаков. Из минусов можно отметить меньшую (пока что) производительность в сравнении с Xgboost и LightGBM.

In [76]:
from catboost import CatBoostClassifier

In [77]:
ctb = CatBoostClassifier(random_seed=17)

In [78]:
train_df, y = df.drop('y', axis=1), df['y']
train_df_part, valid_df, y_train_part, y_valid = train_test_split(train_df, y, 
                                                                  test_size=.3, stratify=y, 
                                                                  random_state=17)

In [95]:
train_df_part.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3164 entries, 482 to 4173
Data columns (total 16 columns):
age          3164 non-null int64
job          3164 non-null object
marital      3164 non-null object
education    3164 non-null object
default      3164 non-null object
balance      3164 non-null int64
housing      3164 non-null object
loan         3164 non-null object
contact      3164 non-null object
day          3164 non-null int64
month        3164 non-null object
duration     3164 non-null int64
campaign     3164 non-null int64
pdays        3164 non-null int64
previous     3164 non-null int64
poutcome     3164 non-null object
dtypes: int64(7), object(9)
memory usage: 420.2+ KB


In [79]:
cat_features_idx = np.where(train_df_part.dtypes == 'object')[0].tolist()

In [81]:
train_df_part.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome
482,40,technician,married,secondary,no,694,yes,no,cellular,4,feb,97,4,196,5,failure
566,26,self-employed,single,tertiary,no,211,no,no,cellular,29,jan,168,3,-1,0,unknown
2209,59,retired,married,secondary,no,0,no,no,cellular,18,jul,247,4,-1,0,unknown
725,31,management,married,tertiary,no,1010,yes,no,cellular,15,may,385,3,364,9,other
1025,56,retired,married,secondary,no,1044,no,no,telephone,3,jul,353,2,-1,0,unknown


In [80]:
cat_features_idx

[1, 2, 3, 4, 6, 7, 8, 10, 15]

In [82]:
%%time
cv_scores = []
for train_idx, test_idx in skf.split(train_df_part, y_train_part):
    cv_train_df, cv_valid_df = train_df_part.iloc[train_idx, :], train_df_part.iloc[test_idx, :]
    y_cv_train, y_cv_valid = y_train_part.iloc[train_idx], y_train_part.iloc[test_idx]
    
    ctb.fit(cv_train_df, y_cv_train,
        cat_features=cat_features_idx);
    
    cv_scores.append(roc_auc_score(y_cv_valid, ctb.predict_proba(cv_valid_df)[:, 1]))

Learning rate set to 0.02179
0:	learn: 0.6697101	total: 77.5ms	remaining: 1m 17s
1:	learn: 0.6475756	total: 94ms	remaining: 46.9s
2:	learn: 0.6327331	total: 103ms	remaining: 34.1s
3:	learn: 0.6125712	total: 122ms	remaining: 30.4s
4:	learn: 0.5972384	total: 139ms	remaining: 27.7s
5:	learn: 0.5793834	total: 160ms	remaining: 26.4s
6:	learn: 0.5624773	total: 172ms	remaining: 24.4s
7:	learn: 0.5503571	total: 181ms	remaining: 22.5s
8:	learn: 0.5358257	total: 197ms	remaining: 21.7s
9:	learn: 0.5218813	total: 214ms	remaining: 21.2s
10:	learn: 0.5101448	total: 222ms	remaining: 20s
11:	learn: 0.5004410	total: 228ms	remaining: 18.8s
12:	learn: 0.4866016	total: 247ms	remaining: 18.7s
13:	learn: 0.4780427	total: 255ms	remaining: 18s
14:	learn: 0.4691578	total: 263ms	remaining: 17.3s
15:	learn: 0.4591666	total: 272ms	remaining: 16.7s
16:	learn: 0.4493871	total: 292ms	remaining: 16.9s
17:	learn: 0.4388486	total: 304ms	remaining: 16.6s
18:	learn: 0.4296159	total: 324ms	remaining: 16.7s
19:	learn: 0.42

161:	learn: 0.2245866	total: 2.96s	remaining: 15.3s
162:	learn: 0.2243711	total: 2.99s	remaining: 15.3s
163:	learn: 0.2243152	total: 3s	remaining: 15.3s
164:	learn: 0.2240901	total: 3.02s	remaining: 15.3s
165:	learn: 0.2240067	total: 3.04s	remaining: 15.3s
166:	learn: 0.2239649	total: 3.05s	remaining: 15.2s
167:	learn: 0.2238831	total: 3.06s	remaining: 15.2s
168:	learn: 0.2238096	total: 3.07s	remaining: 15.1s
169:	learn: 0.2237452	total: 3.08s	remaining: 15s
170:	learn: 0.2235366	total: 3.1s	remaining: 15s
171:	learn: 0.2234548	total: 3.12s	remaining: 15s
172:	learn: 0.2234021	total: 3.12s	remaining: 14.9s
173:	learn: 0.2232606	total: 3.14s	remaining: 14.9s
174:	learn: 0.2230554	total: 3.15s	remaining: 14.9s
175:	learn: 0.2228509	total: 3.17s	remaining: 14.8s
176:	learn: 0.2227472	total: 3.19s	remaining: 14.9s
177:	learn: 0.2225303	total: 3.22s	remaining: 14.9s
178:	learn: 0.2224975	total: 3.23s	remaining: 14.8s
179:	learn: 0.2224537	total: 3.24s	remaining: 14.8s
180:	learn: 0.2222124	

329:	learn: 0.2068280	total: 5.73s	remaining: 11.6s
330:	learn: 0.2065878	total: 5.75s	remaining: 11.6s
331:	learn: 0.2064039	total: 5.77s	remaining: 11.6s
332:	learn: 0.2063704	total: 5.79s	remaining: 11.6s
333:	learn: 0.2062674	total: 5.81s	remaining: 11.6s
334:	learn: 0.2062094	total: 5.82s	remaining: 11.6s
335:	learn: 0.2061898	total: 5.83s	remaining: 11.5s
336:	learn: 0.2061207	total: 5.84s	remaining: 11.5s
337:	learn: 0.2060259	total: 5.86s	remaining: 11.5s
338:	learn: 0.2059834	total: 5.87s	remaining: 11.5s
339:	learn: 0.2059316	total: 5.88s	remaining: 11.4s
340:	learn: 0.2058016	total: 5.91s	remaining: 11.4s
341:	learn: 0.2057860	total: 5.92s	remaining: 11.4s
342:	learn: 0.2057544	total: 5.93s	remaining: 11.4s
343:	learn: 0.2057413	total: 5.94s	remaining: 11.3s
344:	learn: 0.2057041	total: 5.95s	remaining: 11.3s
345:	learn: 0.2056645	total: 5.96s	remaining: 11.3s
346:	learn: 0.2054840	total: 5.98s	remaining: 11.3s
347:	learn: 0.2052288	total: 6s	remaining: 11.2s
348:	learn: 0.2

488:	learn: 0.1903731	total: 8.88s	remaining: 9.28s
489:	learn: 0.1902892	total: 8.9s	remaining: 9.26s
490:	learn: 0.1902607	total: 8.92s	remaining: 9.25s
491:	learn: 0.1901372	total: 8.94s	remaining: 9.23s
492:	learn: 0.1901149	total: 8.96s	remaining: 9.22s
493:	learn: 0.1900113	total: 8.99s	remaining: 9.21s
494:	learn: 0.1899090	total: 9.01s	remaining: 9.19s
495:	learn: 0.1898813	total: 9.03s	remaining: 9.18s
496:	learn: 0.1898679	total: 9.06s	remaining: 9.17s
497:	learn: 0.1897633	total: 9.08s	remaining: 9.15s
498:	learn: 0.1896690	total: 9.11s	remaining: 9.14s
499:	learn: 0.1896286	total: 9.13s	remaining: 9.13s
500:	learn: 0.1895307	total: 9.15s	remaining: 9.12s
501:	learn: 0.1894903	total: 9.18s	remaining: 9.1s
502:	learn: 0.1894297	total: 9.2s	remaining: 9.09s
503:	learn: 0.1894086	total: 9.22s	remaining: 9.08s
504:	learn: 0.1892852	total: 9.25s	remaining: 9.07s
505:	learn: 0.1892530	total: 9.28s	remaining: 9.06s
506:	learn: 0.1892222	total: 9.3s	remaining: 9.04s
507:	learn: 0.18

651:	learn: 0.1765429	total: 12.7s	remaining: 6.76s
652:	learn: 0.1764438	total: 12.7s	remaining: 6.74s
653:	learn: 0.1763308	total: 12.7s	remaining: 6.72s
654:	learn: 0.1762066	total: 12.7s	remaining: 6.7s
655:	learn: 0.1761157	total: 12.8s	remaining: 6.69s
656:	learn: 0.1760929	total: 12.8s	remaining: 6.67s
657:	learn: 0.1760265	total: 12.8s	remaining: 6.65s
658:	learn: 0.1760130	total: 12.8s	remaining: 6.63s
659:	learn: 0.1759766	total: 12.8s	remaining: 6.62s
660:	learn: 0.1758634	total: 12.9s	remaining: 6.6s
661:	learn: 0.1758349	total: 12.9s	remaining: 6.58s
662:	learn: 0.1756515	total: 12.9s	remaining: 6.56s
663:	learn: 0.1753697	total: 12.9s	remaining: 6.54s
664:	learn: 0.1751571	total: 12.9s	remaining: 6.52s
665:	learn: 0.1750806	total: 13s	remaining: 6.5s
666:	learn: 0.1748543	total: 13s	remaining: 6.49s
667:	learn: 0.1748263	total: 13s	remaining: 6.47s
668:	learn: 0.1747474	total: 13s	remaining: 6.45s
669:	learn: 0.1747129	total: 13.1s	remaining: 6.44s
670:	learn: 0.1746914	t

814:	learn: 0.1651875	total: 16.4s	remaining: 3.73s
815:	learn: 0.1651864	total: 16.5s	remaining: 3.71s
816:	learn: 0.1651653	total: 16.5s	remaining: 3.69s
817:	learn: 0.1651317	total: 16.5s	remaining: 3.67s
818:	learn: 0.1651152	total: 16.5s	remaining: 3.65s
819:	learn: 0.1650261	total: 16.5s	remaining: 3.63s
820:	learn: 0.1649661	total: 16.6s	remaining: 3.61s
821:	learn: 0.1648961	total: 16.6s	remaining: 3.59s
822:	learn: 0.1648062	total: 16.6s	remaining: 3.57s
823:	learn: 0.1647591	total: 16.6s	remaining: 3.55s
824:	learn: 0.1647519	total: 16.6s	remaining: 3.53s
825:	learn: 0.1646037	total: 16.7s	remaining: 3.51s
826:	learn: 0.1645278	total: 16.7s	remaining: 3.49s
827:	learn: 0.1644984	total: 16.7s	remaining: 3.47s
828:	learn: 0.1644583	total: 16.7s	remaining: 3.45s
829:	learn: 0.1644507	total: 16.8s	remaining: 3.43s
830:	learn: 0.1643464	total: 16.8s	remaining: 3.41s
831:	learn: 0.1642277	total: 16.8s	remaining: 3.39s
832:	learn: 0.1641822	total: 16.8s	remaining: 3.37s
833:	learn: 

980:	learn: 0.1554484	total: 20.2s	remaining: 392ms
981:	learn: 0.1553623	total: 20.3s	remaining: 372ms
982:	learn: 0.1553577	total: 20.3s	remaining: 351ms
983:	learn: 0.1553470	total: 20.3s	remaining: 330ms
984:	learn: 0.1553284	total: 20.3s	remaining: 310ms
985:	learn: 0.1552802	total: 20.4s	remaining: 289ms
986:	learn: 0.1552570	total: 20.4s	remaining: 269ms
987:	learn: 0.1552249	total: 20.4s	remaining: 248ms
988:	learn: 0.1551742	total: 20.5s	remaining: 227ms
989:	learn: 0.1551137	total: 20.5s	remaining: 207ms
990:	learn: 0.1550338	total: 20.5s	remaining: 186ms
991:	learn: 0.1550136	total: 20.5s	remaining: 166ms
992:	learn: 0.1549160	total: 20.6s	remaining: 145ms
993:	learn: 0.1548480	total: 20.6s	remaining: 124ms
994:	learn: 0.1548268	total: 20.6s	remaining: 103ms
995:	learn: 0.1548140	total: 20.6s	remaining: 82.8ms
996:	learn: 0.1547916	total: 20.7s	remaining: 62.1ms
997:	learn: 0.1547293	total: 20.7s	remaining: 41.4ms
998:	learn: 0.1546903	total: 20.7s	remaining: 20.7ms
999:	lea

150:	learn: 0.2239172	total: 2.76s	remaining: 15.5s
151:	learn: 0.2239170	total: 2.77s	remaining: 15.4s
152:	learn: 0.2238948	total: 2.77s	remaining: 15.4s
153:	learn: 0.2236587	total: 2.79s	remaining: 15.4s
154:	learn: 0.2235016	total: 2.81s	remaining: 15.3s
155:	learn: 0.2233114	total: 2.84s	remaining: 15.3s
156:	learn: 0.2231185	total: 2.85s	remaining: 15.3s
157:	learn: 0.2230869	total: 2.85s	remaining: 15.2s
158:	learn: 0.2229077	total: 2.87s	remaining: 15.2s
159:	learn: 0.2227456	total: 2.89s	remaining: 15.2s
160:	learn: 0.2227053	total: 2.9s	remaining: 15.1s
161:	learn: 0.2224558	total: 2.92s	remaining: 15.1s
162:	learn: 0.2224445	total: 2.92s	remaining: 15s
163:	learn: 0.2221896	total: 2.94s	remaining: 15s
164:	learn: 0.2218990	total: 2.96s	remaining: 15s
165:	learn: 0.2217280	total: 2.98s	remaining: 15s
166:	learn: 0.2215518	total: 3s	remaining: 15s
167:	learn: 0.2212068	total: 3.02s	remaining: 15s
168:	learn: 0.2211077	total: 3.03s	remaining: 14.9s
169:	learn: 0.2208675	total:

312:	learn: 0.2046460	total: 5.36s	remaining: 11.8s
313:	learn: 0.2044724	total: 5.38s	remaining: 11.8s
314:	learn: 0.2042456	total: 5.4s	remaining: 11.7s
315:	learn: 0.2041778	total: 5.41s	remaining: 11.7s
316:	learn: 0.2041739	total: 5.42s	remaining: 11.7s
317:	learn: 0.2041627	total: 5.43s	remaining: 11.6s
318:	learn: 0.2038998	total: 5.45s	remaining: 11.6s
319:	learn: 0.2038525	total: 5.46s	remaining: 11.6s
320:	learn: 0.2038029	total: 5.47s	remaining: 11.6s
321:	learn: 0.2037573	total: 5.49s	remaining: 11.6s
322:	learn: 0.2036767	total: 5.51s	remaining: 11.5s
323:	learn: 0.2036189	total: 5.53s	remaining: 11.5s
324:	learn: 0.2035472	total: 5.55s	remaining: 11.5s
325:	learn: 0.2034305	total: 5.57s	remaining: 11.5s
326:	learn: 0.2034294	total: 5.58s	remaining: 11.5s
327:	learn: 0.2033930	total: 5.6s	remaining: 11.5s
328:	learn: 0.2032747	total: 5.63s	remaining: 11.5s
329:	learn: 0.2032268	total: 5.64s	remaining: 11.4s
330:	learn: 0.2031785	total: 5.65s	remaining: 11.4s
331:	learn: 0.

471:	learn: 0.1884640	total: 8.62s	remaining: 9.64s
472:	learn: 0.1882509	total: 8.64s	remaining: 9.63s
473:	learn: 0.1880971	total: 8.66s	remaining: 9.61s
474:	learn: 0.1880718	total: 8.69s	remaining: 9.6s
475:	learn: 0.1879759	total: 8.71s	remaining: 9.59s
476:	learn: 0.1879647	total: 8.73s	remaining: 9.57s
477:	learn: 0.1878012	total: 8.75s	remaining: 9.56s
478:	learn: 0.1877875	total: 8.78s	remaining: 9.55s
479:	learn: 0.1877035	total: 8.8s	remaining: 9.54s
480:	learn: 0.1874475	total: 8.82s	remaining: 9.52s
481:	learn: 0.1873254	total: 8.85s	remaining: 9.51s
482:	learn: 0.1873170	total: 8.87s	remaining: 9.49s
483:	learn: 0.1872307	total: 8.89s	remaining: 9.48s
484:	learn: 0.1870678	total: 8.91s	remaining: 9.46s
485:	learn: 0.1870387	total: 8.93s	remaining: 9.45s
486:	learn: 0.1869307	total: 8.95s	remaining: 9.43s
487:	learn: 0.1868018	total: 8.98s	remaining: 9.42s
488:	learn: 0.1866335	total: 9s	remaining: 9.4s
489:	learn: 0.1866274	total: 9.02s	remaining: 9.39s
490:	learn: 0.1864

636:	learn: 0.1723208	total: 12.4s	remaining: 7.06s
637:	learn: 0.1723104	total: 12.4s	remaining: 7.04s
638:	learn: 0.1720589	total: 12.4s	remaining: 7.03s
639:	learn: 0.1719296	total: 12.5s	remaining: 7.01s
640:	learn: 0.1718861	total: 12.5s	remaining: 6.99s
641:	learn: 0.1717717	total: 12.5s	remaining: 6.97s
642:	learn: 0.1716915	total: 12.5s	remaining: 6.95s
643:	learn: 0.1716863	total: 12.5s	remaining: 6.93s
644:	learn: 0.1716405	total: 12.6s	remaining: 6.92s
645:	learn: 0.1716032	total: 12.6s	remaining: 6.9s
646:	learn: 0.1715209	total: 12.6s	remaining: 6.88s
647:	learn: 0.1715060	total: 12.6s	remaining: 6.86s
648:	learn: 0.1714029	total: 12.7s	remaining: 6.85s
649:	learn: 0.1712032	total: 12.7s	remaining: 6.83s
650:	learn: 0.1711950	total: 12.7s	remaining: 6.81s
651:	learn: 0.1709758	total: 12.7s	remaining: 6.79s
652:	learn: 0.1708568	total: 12.7s	remaining: 6.77s
653:	learn: 0.1708243	total: 12.8s	remaining: 6.75s
654:	learn: 0.1707994	total: 12.8s	remaining: 6.74s
655:	learn: 0

800:	learn: 0.1605406	total: 16s	remaining: 3.98s
801:	learn: 0.1605199	total: 16.1s	remaining: 3.96s
802:	learn: 0.1605103	total: 16.1s	remaining: 3.95s
803:	learn: 0.1604913	total: 16.1s	remaining: 3.93s
804:	learn: 0.1604625	total: 16.1s	remaining: 3.91s
805:	learn: 0.1604135	total: 16.2s	remaining: 3.89s
806:	learn: 0.1603211	total: 16.2s	remaining: 3.87s
807:	learn: 0.1602760	total: 16.2s	remaining: 3.85s
808:	learn: 0.1602302	total: 16.2s	remaining: 3.83s
809:	learn: 0.1602271	total: 16.2s	remaining: 3.81s
810:	learn: 0.1602121	total: 16.3s	remaining: 3.79s
811:	learn: 0.1602047	total: 16.3s	remaining: 3.77s
812:	learn: 0.1601825	total: 16.3s	remaining: 3.75s
813:	learn: 0.1601209	total: 16.3s	remaining: 3.73s
814:	learn: 0.1601155	total: 16.4s	remaining: 3.71s
815:	learn: 0.1600962	total: 16.4s	remaining: 3.69s
816:	learn: 0.1599887	total: 16.4s	remaining: 3.67s
817:	learn: 0.1599565	total: 16.4s	remaining: 3.65s
818:	learn: 0.1599443	total: 16.4s	remaining: 3.63s
819:	learn: 0.

965:	learn: 0.1518630	total: 19.7s	remaining: 694ms
966:	learn: 0.1518592	total: 19.7s	remaining: 673ms
967:	learn: 0.1518277	total: 19.7s	remaining: 653ms
968:	learn: 0.1517364	total: 19.8s	remaining: 633ms
969:	learn: 0.1515821	total: 19.8s	remaining: 612ms
970:	learn: 0.1515401	total: 19.8s	remaining: 592ms
971:	learn: 0.1515301	total: 19.8s	remaining: 571ms
972:	learn: 0.1515261	total: 19.9s	remaining: 551ms
973:	learn: 0.1515090	total: 19.9s	remaining: 531ms
974:	learn: 0.1514535	total: 19.9s	remaining: 510ms
975:	learn: 0.1514168	total: 19.9s	remaining: 490ms
976:	learn: 0.1514095	total: 19.9s	remaining: 470ms
977:	learn: 0.1514002	total: 20s	remaining: 449ms
978:	learn: 0.1513795	total: 20s	remaining: 429ms
979:	learn: 0.1513186	total: 20s	remaining: 408ms
980:	learn: 0.1510393	total: 20s	remaining: 388ms
981:	learn: 0.1510201	total: 20.1s	remaining: 368ms
982:	learn: 0.1509816	total: 20.1s	remaining: 348ms
983:	learn: 0.1507540	total: 20.1s	remaining: 327ms
984:	learn: 0.150745

135:	learn: 0.2325461	total: 2.27s	remaining: 14.4s
136:	learn: 0.2319952	total: 2.29s	remaining: 14.4s
137:	learn: 0.2317398	total: 2.31s	remaining: 14.4s
138:	learn: 0.2313730	total: 2.33s	remaining: 14.5s
139:	learn: 0.2311525	total: 2.36s	remaining: 14.5s
140:	learn: 0.2310353	total: 2.38s	remaining: 14.5s
141:	learn: 0.2309937	total: 2.39s	remaining: 14.4s
142:	learn: 0.2307441	total: 2.41s	remaining: 14.4s
143:	learn: 0.2305332	total: 2.43s	remaining: 14.4s
144:	learn: 0.2304257	total: 2.44s	remaining: 14.4s
145:	learn: 0.2303925	total: 2.44s	remaining: 14.3s
146:	learn: 0.2302557	total: 2.46s	remaining: 14.3s
147:	learn: 0.2301950	total: 2.47s	remaining: 14.2s
148:	learn: 0.2297980	total: 2.49s	remaining: 14.2s
149:	learn: 0.2297763	total: 2.5s	remaining: 14.1s
150:	learn: 0.2293269	total: 2.51s	remaining: 14.1s
151:	learn: 0.2291772	total: 2.54s	remaining: 14.1s
152:	learn: 0.2291109	total: 2.55s	remaining: 14.1s
153:	learn: 0.2289818	total: 2.56s	remaining: 14s
154:	learn: 0.2

296:	learn: 0.2120701	total: 4.72s	remaining: 11.2s
297:	learn: 0.2120446	total: 4.74s	remaining: 11.2s
298:	learn: 0.2119699	total: 4.75s	remaining: 11.1s
299:	learn: 0.2117253	total: 4.77s	remaining: 11.1s
300:	learn: 0.2116526	total: 4.79s	remaining: 11.1s
301:	learn: 0.2116431	total: 4.79s	remaining: 11.1s
302:	learn: 0.2115382	total: 4.81s	remaining: 11.1s
303:	learn: 0.2113726	total: 4.83s	remaining: 11.1s
304:	learn: 0.2113612	total: 4.84s	remaining: 11s
305:	learn: 0.2111344	total: 4.86s	remaining: 11s
306:	learn: 0.2110780	total: 4.87s	remaining: 11s
307:	learn: 0.2107998	total: 4.88s	remaining: 11s
308:	learn: 0.2106927	total: 4.9s	remaining: 11s
309:	learn: 0.2103346	total: 4.92s	remaining: 11s
310:	learn: 0.2100046	total: 4.95s	remaining: 11s
311:	learn: 0.2099043	total: 4.96s	remaining: 10.9s
312:	learn: 0.2098483	total: 4.97s	remaining: 10.9s
313:	learn: 0.2098179	total: 4.99s	remaining: 10.9s
314:	learn: 0.2097819	total: 5s	remaining: 10.9s
315:	learn: 0.2097666	total: 5

456:	learn: 0.1943186	total: 7.77s	remaining: 9.23s
457:	learn: 0.1941084	total: 7.79s	remaining: 9.22s
458:	learn: 0.1940618	total: 7.81s	remaining: 9.21s
459:	learn: 0.1939694	total: 7.83s	remaining: 9.19s
460:	learn: 0.1937363	total: 7.85s	remaining: 9.18s
461:	learn: 0.1936562	total: 7.87s	remaining: 9.17s
462:	learn: 0.1936305	total: 7.89s	remaining: 9.16s
463:	learn: 0.1935630	total: 7.92s	remaining: 9.15s
464:	learn: 0.1933830	total: 7.94s	remaining: 9.14s
465:	learn: 0.1932437	total: 7.96s	remaining: 9.13s
466:	learn: 0.1932329	total: 7.99s	remaining: 9.12s
467:	learn: 0.1931335	total: 8.01s	remaining: 9.1s
468:	learn: 0.1930841	total: 8.03s	remaining: 9.09s
469:	learn: 0.1930838	total: 8.04s	remaining: 9.06s
470:	learn: 0.1929662	total: 8.06s	remaining: 9.05s
471:	learn: 0.1928136	total: 8.08s	remaining: 9.04s
472:	learn: 0.1926540	total: 8.1s	remaining: 9.03s
473:	learn: 0.1925682	total: 8.12s	remaining: 9.01s
474:	learn: 0.1923493	total: 8.14s	remaining: 9s
475:	learn: 0.192

618:	learn: 0.1782984	total: 11.3s	remaining: 6.96s
619:	learn: 0.1782389	total: 11.3s	remaining: 6.94s
620:	learn: 0.1781591	total: 11.3s	remaining: 6.92s
621:	learn: 0.1781408	total: 11.4s	remaining: 6.91s
622:	learn: 0.1780364	total: 11.4s	remaining: 6.89s
623:	learn: 0.1779083	total: 11.4s	remaining: 6.88s
624:	learn: 0.1778884	total: 11.4s	remaining: 6.86s
625:	learn: 0.1778452	total: 11.5s	remaining: 6.84s
626:	learn: 0.1776383	total: 11.5s	remaining: 6.83s
627:	learn: 0.1776169	total: 11.5s	remaining: 6.81s
628:	learn: 0.1776060	total: 11.5s	remaining: 6.79s
629:	learn: 0.1776008	total: 11.5s	remaining: 6.78s
630:	learn: 0.1774057	total: 11.6s	remaining: 6.76s
631:	learn: 0.1773550	total: 11.6s	remaining: 6.75s
632:	learn: 0.1772215	total: 11.6s	remaining: 6.73s
633:	learn: 0.1770385	total: 11.6s	remaining: 6.71s
634:	learn: 0.1770291	total: 11.6s	remaining: 6.7s
635:	learn: 0.1769900	total: 11.7s	remaining: 6.68s
636:	learn: 0.1768308	total: 11.7s	remaining: 6.67s
637:	learn: 0

777:	learn: 0.1658192	total: 14.8s	remaining: 4.23s
778:	learn: 0.1656201	total: 14.8s	remaining: 4.21s
779:	learn: 0.1655822	total: 14.9s	remaining: 4.19s
780:	learn: 0.1654867	total: 14.9s	remaining: 4.17s
781:	learn: 0.1654691	total: 14.9s	remaining: 4.16s
782:	learn: 0.1654629	total: 14.9s	remaining: 4.14s
783:	learn: 0.1654069	total: 15s	remaining: 4.12s
784:	learn: 0.1653094	total: 15s	remaining: 4.1s
785:	learn: 0.1651835	total: 15s	remaining: 4.08s
786:	learn: 0.1651741	total: 15s	remaining: 4.07s
787:	learn: 0.1651672	total: 15s	remaining: 4.05s
788:	learn: 0.1651403	total: 15.1s	remaining: 4.03s
789:	learn: 0.1649861	total: 15.1s	remaining: 4.01s
790:	learn: 0.1649831	total: 15.1s	remaining: 3.99s
791:	learn: 0.1649674	total: 15.1s	remaining: 3.97s
792:	learn: 0.1648885	total: 15.1s	remaining: 3.95s
793:	learn: 0.1648311	total: 15.2s	remaining: 3.94s
794:	learn: 0.1647609	total: 15.2s	remaining: 3.92s
795:	learn: 0.1647071	total: 15.2s	remaining: 3.9s
796:	learn: 0.1646650	to

942:	learn: 0.1567823	total: 18.5s	remaining: 1.12s
943:	learn: 0.1566120	total: 18.5s	remaining: 1.1s
944:	learn: 0.1565376	total: 18.5s	remaining: 1.08s
945:	learn: 0.1565195	total: 18.5s	remaining: 1.06s
946:	learn: 0.1564210	total: 18.6s	remaining: 1.04s
947:	learn: 0.1563598	total: 18.6s	remaining: 1.02s
948:	learn: 0.1562111	total: 18.6s	remaining: 1s
949:	learn: 0.1560822	total: 18.6s	remaining: 981ms
950:	learn: 0.1560423	total: 18.7s	remaining: 961ms
951:	learn: 0.1559964	total: 18.7s	remaining: 942ms
952:	learn: 0.1559942	total: 18.7s	remaining: 922ms
953:	learn: 0.1559517	total: 18.7s	remaining: 903ms
954:	learn: 0.1559438	total: 18.7s	remaining: 883ms
955:	learn: 0.1559300	total: 18.8s	remaining: 864ms
956:	learn: 0.1558405	total: 18.8s	remaining: 844ms
957:	learn: 0.1557883	total: 18.8s	remaining: 825ms
958:	learn: 0.1556783	total: 18.8s	remaining: 805ms
959:	learn: 0.1556681	total: 18.9s	remaining: 786ms
960:	learn: 0.1556393	total: 18.9s	remaining: 766ms
961:	learn: 0.15

104:	learn: 0.2362170	total: 1.85s	remaining: 15.7s
105:	learn: 0.2356890	total: 1.87s	remaining: 15.8s
106:	learn: 0.2351051	total: 1.89s	remaining: 15.8s
107:	learn: 0.2345249	total: 1.91s	remaining: 15.8s
108:	learn: 0.2344101	total: 1.92s	remaining: 15.7s
109:	learn: 0.2342375	total: 1.93s	remaining: 15.6s
110:	learn: 0.2341378	total: 1.94s	remaining: 15.6s
111:	learn: 0.2337462	total: 1.97s	remaining: 15.6s
112:	learn: 0.2336146	total: 1.98s	remaining: 15.5s
113:	learn: 0.2334020	total: 2s	remaining: 15.5s
114:	learn: 0.2332967	total: 2s	remaining: 15.4s
115:	learn: 0.2329824	total: 2.03s	remaining: 15.5s
116:	learn: 0.2322818	total: 2.05s	remaining: 15.5s
117:	learn: 0.2315277	total: 2.07s	remaining: 15.5s
118:	learn: 0.2310106	total: 2.09s	remaining: 15.5s
119:	learn: 0.2309050	total: 2.1s	remaining: 15.4s
120:	learn: 0.2308483	total: 2.11s	remaining: 15.3s
121:	learn: 0.2306829	total: 2.12s	remaining: 15.3s
122:	learn: 0.2301762	total: 2.14s	remaining: 15.3s
123:	learn: 0.22994

267:	learn: 0.2055784	total: 4.86s	remaining: 13.3s
268:	learn: 0.2054948	total: 4.88s	remaining: 13.3s
269:	learn: 0.2054585	total: 4.9s	remaining: 13.2s
270:	learn: 0.2052003	total: 4.92s	remaining: 13.2s
271:	learn: 0.2051544	total: 4.95s	remaining: 13.2s
272:	learn: 0.2050848	total: 4.97s	remaining: 13.2s
273:	learn: 0.2047187	total: 5s	remaining: 13.3s
274:	learn: 0.2047178	total: 5.01s	remaining: 13.2s
275:	learn: 0.2046857	total: 5.02s	remaining: 13.2s
276:	learn: 0.2046138	total: 5.04s	remaining: 13.2s
277:	learn: 0.2045837	total: 5.05s	remaining: 13.1s
278:	learn: 0.2045405	total: 5.06s	remaining: 13.1s
279:	learn: 0.2043870	total: 5.07s	remaining: 13s
280:	learn: 0.2043868	total: 5.08s	remaining: 13s
281:	learn: 0.2040445	total: 5.1s	remaining: 13s
282:	learn: 0.2040071	total: 5.12s	remaining: 13s
283:	learn: 0.2039966	total: 5.13s	remaining: 12.9s
284:	learn: 0.2039955	total: 5.14s	remaining: 12.9s
285:	learn: 0.2039820	total: 5.15s	remaining: 12.9s
286:	learn: 0.2039812	tot

434:	learn: 0.1902317	total: 7.84s	remaining: 10.2s
435:	learn: 0.1901281	total: 7.87s	remaining: 10.2s
436:	learn: 0.1899860	total: 7.89s	remaining: 10.2s
437:	learn: 0.1899034	total: 7.91s	remaining: 10.1s
438:	learn: 0.1896490	total: 7.93s	remaining: 10.1s
439:	learn: 0.1895519	total: 7.95s	remaining: 10.1s
440:	learn: 0.1894846	total: 7.97s	remaining: 10.1s
441:	learn: 0.1893705	total: 8s	remaining: 10.1s
442:	learn: 0.1892849	total: 8.02s	remaining: 10.1s
443:	learn: 0.1891977	total: 8.04s	remaining: 10.1s
444:	learn: 0.1891594	total: 8.07s	remaining: 10.1s
445:	learn: 0.1888334	total: 8.09s	remaining: 10s
446:	learn: 0.1887934	total: 8.11s	remaining: 10s
447:	learn: 0.1886885	total: 8.13s	remaining: 10s
448:	learn: 0.1884953	total: 8.15s	remaining: 10s
449:	learn: 0.1884025	total: 8.18s	remaining: 9.99s
450:	learn: 0.1882413	total: 8.2s	remaining: 9.98s
451:	learn: 0.1881532	total: 8.22s	remaining: 9.97s
452:	learn: 0.1879662	total: 8.24s	remaining: 9.96s
453:	learn: 0.1879395	to

598:	learn: 0.1741142	total: 11.5s	remaining: 7.69s
599:	learn: 0.1740908	total: 11.5s	remaining: 7.68s
600:	learn: 0.1740234	total: 11.5s	remaining: 7.66s
601:	learn: 0.1740113	total: 11.6s	remaining: 7.64s
602:	learn: 0.1739800	total: 11.6s	remaining: 7.62s
603:	learn: 0.1739726	total: 11.6s	remaining: 7.61s
604:	learn: 0.1738409	total: 11.6s	remaining: 7.59s
605:	learn: 0.1737544	total: 11.6s	remaining: 7.57s
606:	learn: 0.1736360	total: 11.7s	remaining: 7.55s
607:	learn: 0.1735388	total: 11.7s	remaining: 7.54s
608:	learn: 0.1734404	total: 11.7s	remaining: 7.52s
609:	learn: 0.1734219	total: 11.7s	remaining: 7.5s
610:	learn: 0.1733444	total: 11.8s	remaining: 7.48s
611:	learn: 0.1733191	total: 11.8s	remaining: 7.47s
612:	learn: 0.1732085	total: 11.8s	remaining: 7.45s
613:	learn: 0.1730045	total: 11.8s	remaining: 7.43s
614:	learn: 0.1729689	total: 11.8s	remaining: 7.41s
615:	learn: 0.1728937	total: 11.9s	remaining: 7.4s
616:	learn: 0.1728587	total: 11.9s	remaining: 7.38s
617:	learn: 0.

764:	learn: 0.1641796	total: 15.2s	remaining: 4.67s
765:	learn: 0.1638831	total: 15.2s	remaining: 4.65s
766:	learn: 0.1637847	total: 15.2s	remaining: 4.63s
767:	learn: 0.1637728	total: 15.3s	remaining: 4.61s
768:	learn: 0.1636933	total: 15.3s	remaining: 4.59s
769:	learn: 0.1635495	total: 15.3s	remaining: 4.57s
770:	learn: 0.1633271	total: 15.3s	remaining: 4.55s
771:	learn: 0.1632404	total: 15.4s	remaining: 4.54s
772:	learn: 0.1631744	total: 15.4s	remaining: 4.52s
773:	learn: 0.1630719	total: 15.4s	remaining: 4.5s
774:	learn: 0.1630549	total: 15.4s	remaining: 4.48s
775:	learn: 0.1630312	total: 15.5s	remaining: 4.46s
776:	learn: 0.1627927	total: 15.5s	remaining: 4.44s
777:	learn: 0.1627572	total: 15.5s	remaining: 4.42s
778:	learn: 0.1627430	total: 15.5s	remaining: 4.4s
779:	learn: 0.1626934	total: 15.5s	remaining: 4.38s
780:	learn: 0.1626405	total: 15.6s	remaining: 4.37s
781:	learn: 0.1623335	total: 15.6s	remaining: 4.35s
782:	learn: 0.1623075	total: 15.6s	remaining: 4.33s
783:	learn: 0.

927:	learn: 0.1531290	total: 18.9s	remaining: 1.46s
928:	learn: 0.1529850	total: 18.9s	remaining: 1.44s
929:	learn: 0.1529768	total: 18.9s	remaining: 1.42s
930:	learn: 0.1529617	total: 18.9s	remaining: 1.4s
931:	learn: 0.1529529	total: 19s	remaining: 1.38s
932:	learn: 0.1529277	total: 19s	remaining: 1.36s
933:	learn: 0.1527613	total: 19s	remaining: 1.34s
934:	learn: 0.1527545	total: 19s	remaining: 1.32s
935:	learn: 0.1527415	total: 19.1s	remaining: 1.3s
936:	learn: 0.1526039	total: 19.1s	remaining: 1.28s
937:	learn: 0.1525735	total: 19.1s	remaining: 1.26s
938:	learn: 0.1525309	total: 19.1s	remaining: 1.24s
939:	learn: 0.1524599	total: 19.1s	remaining: 1.22s
940:	learn: 0.1523900	total: 19.2s	remaining: 1.2s
941:	learn: 0.1523269	total: 19.2s	remaining: 1.18s
942:	learn: 0.1523151	total: 19.2s	remaining: 1.16s
943:	learn: 0.1521388	total: 19.2s	remaining: 1.14s
944:	learn: 0.1521114	total: 19.3s	remaining: 1.12s
945:	learn: 0.1520901	total: 19.3s	remaining: 1.1s
946:	learn: 0.1520594	to

90:	learn: 0.2541076	total: 1.68s	remaining: 16.8s
91:	learn: 0.2534121	total: 1.7s	remaining: 16.8s
92:	learn: 0.2531265	total: 1.71s	remaining: 16.7s
93:	learn: 0.2525434	total: 1.74s	remaining: 16.7s
94:	learn: 0.2519988	total: 1.76s	remaining: 16.7s
95:	learn: 0.2518034	total: 1.76s	remaining: 16.6s
96:	learn: 0.2513969	total: 1.78s	remaining: 16.6s
97:	learn: 0.2511282	total: 1.8s	remaining: 16.5s
98:	learn: 0.2508602	total: 1.81s	remaining: 16.5s
99:	learn: 0.2502271	total: 1.82s	remaining: 16.4s
100:	learn: 0.2498865	total: 1.83s	remaining: 16.3s
101:	learn: 0.2492716	total: 1.84s	remaining: 16.2s
102:	learn: 0.2490754	total: 1.86s	remaining: 16.2s
103:	learn: 0.2487814	total: 1.88s	remaining: 16.2s
104:	learn: 0.2485474	total: 1.9s	remaining: 16.2s
105:	learn: 0.2481384	total: 1.93s	remaining: 16.2s
106:	learn: 0.2478463	total: 1.94s	remaining: 16.2s
107:	learn: 0.2473291	total: 1.96s	remaining: 16.2s
108:	learn: 0.2468496	total: 1.98s	remaining: 16.2s
109:	learn: 0.2465920	tot

259:	learn: 0.2221958	total: 4.31s	remaining: 12.3s
260:	learn: 0.2221775	total: 4.33s	remaining: 12.2s
261:	learn: 0.2220796	total: 4.34s	remaining: 12.2s
262:	learn: 0.2219577	total: 4.37s	remaining: 12.2s
263:	learn: 0.2218925	total: 4.38s	remaining: 12.2s
264:	learn: 0.2218633	total: 4.39s	remaining: 12.2s
265:	learn: 0.2217751	total: 4.41s	remaining: 12.2s
266:	learn: 0.2217590	total: 4.42s	remaining: 12.1s
267:	learn: 0.2217306	total: 4.43s	remaining: 12.1s
268:	learn: 0.2217112	total: 4.44s	remaining: 12.1s
269:	learn: 0.2216142	total: 4.46s	remaining: 12s
270:	learn: 0.2214309	total: 4.48s	remaining: 12s
271:	learn: 0.2214177	total: 4.49s	remaining: 12s
272:	learn: 0.2213438	total: 4.5s	remaining: 12s
273:	learn: 0.2213248	total: 4.51s	remaining: 11.9s
274:	learn: 0.2213212	total: 4.51s	remaining: 11.9s
275:	learn: 0.2213123	total: 4.52s	remaining: 11.9s
276:	learn: 0.2211314	total: 4.54s	remaining: 11.9s
277:	learn: 0.2209838	total: 4.56s	remaining: 11.9s
278:	learn: 0.2207216

423:	learn: 0.2081770	total: 7.11s	remaining: 9.66s
424:	learn: 0.2080561	total: 7.14s	remaining: 9.66s
425:	learn: 0.2079230	total: 7.16s	remaining: 9.65s
426:	learn: 0.2079180	total: 7.17s	remaining: 9.62s
427:	learn: 0.2077330	total: 7.19s	remaining: 9.61s
428:	learn: 0.2076760	total: 7.21s	remaining: 9.6s
429:	learn: 0.2076228	total: 7.24s	remaining: 9.59s
430:	learn: 0.2075613	total: 7.26s	remaining: 9.58s
431:	learn: 0.2072572	total: 7.28s	remaining: 9.57s
432:	learn: 0.2072295	total: 7.3s	remaining: 9.56s
433:	learn: 0.2071956	total: 7.33s	remaining: 9.56s
434:	learn: 0.2069587	total: 7.35s	remaining: 9.55s
435:	learn: 0.2069354	total: 7.37s	remaining: 9.54s
436:	learn: 0.2066257	total: 7.39s	remaining: 9.53s
437:	learn: 0.2065938	total: 7.41s	remaining: 9.51s
438:	learn: 0.2065630	total: 7.43s	remaining: 9.5s
439:	learn: 0.2064529	total: 7.45s	remaining: 9.49s
440:	learn: 0.2063736	total: 7.48s	remaining: 9.48s
441:	learn: 0.2063424	total: 7.5s	remaining: 9.47s
442:	learn: 0.20

587:	learn: 0.1900075	total: 11.1s	remaining: 7.75s
588:	learn: 0.1898879	total: 11.1s	remaining: 7.73s
589:	learn: 0.1898169	total: 11.1s	remaining: 7.72s
590:	learn: 0.1896731	total: 11.1s	remaining: 7.71s
591:	learn: 0.1895894	total: 11.2s	remaining: 7.69s
592:	learn: 0.1895413	total: 11.2s	remaining: 7.68s
593:	learn: 0.1892133	total: 11.2s	remaining: 7.66s
594:	learn: 0.1891804	total: 11.2s	remaining: 7.65s
595:	learn: 0.1888803	total: 11.3s	remaining: 7.63s
596:	learn: 0.1888773	total: 11.3s	remaining: 7.62s
597:	learn: 0.1886760	total: 11.3s	remaining: 7.6s
598:	learn: 0.1886641	total: 11.3s	remaining: 7.58s
599:	learn: 0.1885084	total: 11.4s	remaining: 7.57s
600:	learn: 0.1884685	total: 11.4s	remaining: 7.55s
601:	learn: 0.1879500	total: 11.4s	remaining: 7.54s
602:	learn: 0.1878714	total: 11.4s	remaining: 7.53s
603:	learn: 0.1878451	total: 11.5s	remaining: 7.51s
604:	learn: 0.1878008	total: 11.5s	remaining: 7.5s
605:	learn: 0.1874786	total: 11.5s	remaining: 7.48s
606:	learn: 0.

748:	learn: 0.1743996	total: 14.9s	remaining: 4.98s
749:	learn: 0.1743550	total: 14.9s	remaining: 4.96s
750:	learn: 0.1743258	total: 14.9s	remaining: 4.94s
751:	learn: 0.1742356	total: 14.9s	remaining: 4.92s
752:	learn: 0.1742260	total: 14.9s	remaining: 4.9s
753:	learn: 0.1742028	total: 15s	remaining: 4.88s
754:	learn: 0.1741700	total: 15s	remaining: 4.86s
755:	learn: 0.1741296	total: 15s	remaining: 4.84s
756:	learn: 0.1740897	total: 15s	remaining: 4.83s
757:	learn: 0.1738998	total: 15.1s	remaining: 4.81s
758:	learn: 0.1738737	total: 15.1s	remaining: 4.79s
759:	learn: 0.1738573	total: 15.1s	remaining: 4.77s
760:	learn: 0.1737808	total: 15.1s	remaining: 4.75s
761:	learn: 0.1737044	total: 15.1s	remaining: 4.73s
762:	learn: 0.1736618	total: 15.2s	remaining: 4.71s
763:	learn: 0.1735097	total: 15.2s	remaining: 4.69s
764:	learn: 0.1733432	total: 15.2s	remaining: 4.67s
765:	learn: 0.1733200	total: 15.2s	remaining: 4.65s
766:	learn: 0.1732578	total: 15.3s	remaining: 4.64s
767:	learn: 0.1731762

914:	learn: 0.1631338	total: 18.6s	remaining: 1.73s
915:	learn: 0.1631050	total: 18.7s	remaining: 1.71s
916:	learn: 0.1630767	total: 18.7s	remaining: 1.69s
917:	learn: 0.1629800	total: 18.7s	remaining: 1.67s
918:	learn: 0.1629234	total: 18.7s	remaining: 1.65s
919:	learn: 0.1629198	total: 18.8s	remaining: 1.63s
920:	learn: 0.1628805	total: 18.8s	remaining: 1.61s
921:	learn: 0.1628711	total: 18.8s	remaining: 1.59s
922:	learn: 0.1628595	total: 18.8s	remaining: 1.57s
923:	learn: 0.1627309	total: 18.8s	remaining: 1.55s
924:	learn: 0.1627148	total: 18.9s	remaining: 1.53s
925:	learn: 0.1625329	total: 18.9s	remaining: 1.51s
926:	learn: 0.1625216	total: 18.9s	remaining: 1.49s
927:	learn: 0.1625084	total: 18.9s	remaining: 1.47s
928:	learn: 0.1624343	total: 19s	remaining: 1.45s
929:	learn: 0.1623387	total: 19s	remaining: 1.43s
930:	learn: 0.1623154	total: 19s	remaining: 1.41s
931:	learn: 0.1623051	total: 19s	remaining: 1.39s
932:	learn: 0.1622784	total: 19s	remaining: 1.37s
933:	learn: 0.1621699	

In [91]:
cat_features_idx

[1, 2, 3, 4, 6, 7, 8, 10, 15]

In [83]:
np.mean(cv_scores)

0.9005784984368927

In [96]:
valid_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1357 entries, 3552 to 3904
Data columns (total 16 columns):
age          1357 non-null int64
job          1357 non-null object
marital      1357 non-null object
education    1357 non-null object
default      1357 non-null object
balance      1357 non-null int64
housing      1357 non-null object
loan         1357 non-null object
contact      1357 non-null object
day          1357 non-null int64
month        1357 non-null object
duration     1357 non-null int64
campaign     1357 non-null int64
pdays        1357 non-null int64
previous     1357 non-null int64
poutcome     1357 non-null object
dtypes: int64(7), object(9)
memory usage: 180.2+ KB


In [84]:
%%time
ctb.fit(train_df_part, y_train_part,
        cat_features=cat_features_idx);

Learning rate set to 0.023211
0:	learn: 0.6667391	total: 21ms	remaining: 21s
1:	learn: 0.6423247	total: 45.3ms	remaining: 22.6s
2:	learn: 0.6182561	total: 69.3ms	remaining: 23s
3:	learn: 0.5997096	total: 86.9ms	remaining: 21.6s
4:	learn: 0.5804962	total: 109ms	remaining: 21.6s
5:	learn: 0.5632466	total: 130ms	remaining: 21.6s
6:	learn: 0.5466916	total: 152ms	remaining: 21.6s
7:	learn: 0.5297737	total: 163ms	remaining: 20.2s
8:	learn: 0.5180969	total: 172ms	remaining: 18.9s
9:	learn: 0.5032803	total: 193ms	remaining: 19.1s
10:	learn: 0.4931403	total: 205ms	remaining: 18.4s
11:	learn: 0.4836170	total: 213ms	remaining: 17.6s
12:	learn: 0.4713271	total: 225ms	remaining: 17.1s
13:	learn: 0.4577966	total: 246ms	remaining: 17.3s
14:	learn: 0.4473978	total: 257ms	remaining: 16.9s
15:	learn: 0.4371805	total: 266ms	remaining: 16.4s
16:	learn: 0.4258150	total: 287ms	remaining: 16.6s
17:	learn: 0.4178063	total: 301ms	remaining: 16.4s
18:	learn: 0.4085884	total: 320ms	remaining: 16.5s
19:	learn: 0.

162:	learn: 0.2233140	total: 3.18s	remaining: 16.3s
163:	learn: 0.2232837	total: 3.19s	remaining: 16.3s
164:	learn: 0.2230416	total: 3.22s	remaining: 16.3s
165:	learn: 0.2229150	total: 3.23s	remaining: 16.2s
166:	learn: 0.2225240	total: 3.25s	remaining: 16.2s
167:	learn: 0.2223981	total: 3.27s	remaining: 16.2s
168:	learn: 0.2222308	total: 3.29s	remaining: 16.2s
169:	learn: 0.2219625	total: 3.31s	remaining: 16.2s
170:	learn: 0.2219624	total: 3.32s	remaining: 16.1s
171:	learn: 0.2218992	total: 3.33s	remaining: 16.1s
172:	learn: 0.2217438	total: 3.35s	remaining: 16s
173:	learn: 0.2216293	total: 3.36s	remaining: 16s
174:	learn: 0.2216224	total: 3.37s	remaining: 15.9s
175:	learn: 0.2214485	total: 3.39s	remaining: 15.9s
176:	learn: 0.2208879	total: 3.42s	remaining: 15.9s
177:	learn: 0.2206509	total: 3.44s	remaining: 15.9s
178:	learn: 0.2202049	total: 3.47s	remaining: 15.9s
179:	learn: 0.2199237	total: 3.5s	remaining: 15.9s
180:	learn: 0.2197592	total: 3.52s	remaining: 15.9s
181:	learn: 0.219

322:	learn: 0.2046527	total: 6.24s	remaining: 13.1s
323:	learn: 0.2045948	total: 6.26s	remaining: 13.1s
324:	learn: 0.2043876	total: 6.28s	remaining: 13s
325:	learn: 0.2042819	total: 6.3s	remaining: 13s
326:	learn: 0.2042780	total: 6.32s	remaining: 13s
327:	learn: 0.2042055	total: 6.33s	remaining: 13s
328:	learn: 0.2041370	total: 6.35s	remaining: 12.9s
329:	learn: 0.2040857	total: 6.37s	remaining: 12.9s
330:	learn: 0.2040729	total: 6.38s	remaining: 12.9s
331:	learn: 0.2040529	total: 6.38s	remaining: 12.8s
332:	learn: 0.2040467	total: 6.4s	remaining: 12.8s
333:	learn: 0.2037664	total: 6.43s	remaining: 12.8s
334:	learn: 0.2036997	total: 6.46s	remaining: 12.8s
335:	learn: 0.2036414	total: 6.47s	remaining: 12.8s
336:	learn: 0.2036382	total: 6.49s	remaining: 12.8s
337:	learn: 0.2035211	total: 6.51s	remaining: 12.8s
338:	learn: 0.2033213	total: 6.54s	remaining: 12.8s
339:	learn: 0.2031810	total: 6.57s	remaining: 12.7s
340:	learn: 0.2030370	total: 6.59s	remaining: 12.7s
341:	learn: 0.2028125	

484:	learn: 0.1874339	total: 9.84s	remaining: 10.4s
485:	learn: 0.1873080	total: 9.86s	remaining: 10.4s
486:	learn: 0.1870912	total: 9.88s	remaining: 10.4s
487:	learn: 0.1868699	total: 9.91s	remaining: 10.4s
488:	learn: 0.1867034	total: 9.93s	remaining: 10.4s
489:	learn: 0.1864823	total: 9.95s	remaining: 10.4s
490:	learn: 0.1863406	total: 9.98s	remaining: 10.3s
491:	learn: 0.1862131	total: 10s	remaining: 10.3s
492:	learn: 0.1861239	total: 10s	remaining: 10.3s
493:	learn: 0.1860434	total: 10.1s	remaining: 10.3s
494:	learn: 0.1860103	total: 10.1s	remaining: 10.3s
495:	learn: 0.1859596	total: 10.1s	remaining: 10.3s
496:	learn: 0.1858658	total: 10.1s	remaining: 10.2s
497:	learn: 0.1857290	total: 10.1s	remaining: 10.2s
498:	learn: 0.1854923	total: 10.2s	remaining: 10.2s
499:	learn: 0.1853472	total: 10.2s	remaining: 10.2s
500:	learn: 0.1852840	total: 10.2s	remaining: 10.2s
501:	learn: 0.1851221	total: 10.2s	remaining: 10.2s
502:	learn: 0.1850707	total: 10.3s	remaining: 10.1s
503:	learn: 0.18

650:	learn: 0.1718239	total: 13.8s	remaining: 7.42s
651:	learn: 0.1717469	total: 13.9s	remaining: 7.4s
652:	learn: 0.1717251	total: 13.9s	remaining: 7.38s
653:	learn: 0.1716522	total: 13.9s	remaining: 7.36s
654:	learn: 0.1716200	total: 13.9s	remaining: 7.34s
655:	learn: 0.1715326	total: 14s	remaining: 7.32s
656:	learn: 0.1714700	total: 14s	remaining: 7.3s
657:	learn: 0.1714649	total: 14s	remaining: 7.28s
658:	learn: 0.1714209	total: 14s	remaining: 7.26s
659:	learn: 0.1713506	total: 14.1s	remaining: 7.24s
660:	learn: 0.1712946	total: 14.1s	remaining: 7.22s
661:	learn: 0.1712749	total: 14.1s	remaining: 7.2s
662:	learn: 0.1711841	total: 14.1s	remaining: 7.18s
663:	learn: 0.1711587	total: 14.2s	remaining: 7.16s
664:	learn: 0.1710914	total: 14.2s	remaining: 7.14s
665:	learn: 0.1710338	total: 14.2s	remaining: 7.12s
666:	learn: 0.1708826	total: 14.2s	remaining: 7.1s
667:	learn: 0.1707113	total: 14.3s	remaining: 7.09s
668:	learn: 0.1705861	total: 14.3s	remaining: 7.07s
669:	learn: 0.1705471	to

813:	learn: 0.1623182	total: 17.8s	remaining: 4.07s
814:	learn: 0.1622977	total: 17.9s	remaining: 4.05s
815:	learn: 0.1622601	total: 17.9s	remaining: 4.03s
816:	learn: 0.1622195	total: 17.9s	remaining: 4.01s
817:	learn: 0.1621911	total: 17.9s	remaining: 3.99s
818:	learn: 0.1621414	total: 18s	remaining: 3.97s
819:	learn: 0.1621121	total: 18s	remaining: 3.95s
820:	learn: 0.1619116	total: 18s	remaining: 3.92s
821:	learn: 0.1618838	total: 18s	remaining: 3.9s
822:	learn: 0.1618153	total: 18.1s	remaining: 3.88s
823:	learn: 0.1617712	total: 18.1s	remaining: 3.86s
824:	learn: 0.1617411	total: 18.1s	remaining: 3.84s
825:	learn: 0.1616476	total: 18.1s	remaining: 3.82s
826:	learn: 0.1616230	total: 18.2s	remaining: 3.8s
827:	learn: 0.1615287	total: 18.2s	remaining: 3.78s
828:	learn: 0.1614147	total: 18.2s	remaining: 3.75s
829:	learn: 0.1612639	total: 18.2s	remaining: 3.73s
830:	learn: 0.1612382	total: 18.3s	remaining: 3.71s
831:	learn: 0.1610252	total: 18.3s	remaining: 3.69s
832:	learn: 0.1609003	

979:	learn: 0.1522211	total: 21.8s	remaining: 445ms
980:	learn: 0.1522126	total: 21.8s	remaining: 423ms
981:	learn: 0.1522043	total: 21.9s	remaining: 401ms
982:	learn: 0.1521835	total: 21.9s	remaining: 378ms
983:	learn: 0.1520898	total: 21.9s	remaining: 356ms
984:	learn: 0.1520810	total: 21.9s	remaining: 334ms
985:	learn: 0.1520752	total: 22s	remaining: 312ms
986:	learn: 0.1520296	total: 22s	remaining: 289ms
987:	learn: 0.1520048	total: 22s	remaining: 267ms
988:	learn: 0.1519464	total: 22s	remaining: 245ms
989:	learn: 0.1519367	total: 22.1s	remaining: 223ms
990:	learn: 0.1518770	total: 22.1s	remaining: 200ms
991:	learn: 0.1518614	total: 22.1s	remaining: 178ms
992:	learn: 0.1518494	total: 22.1s	remaining: 156ms
993:	learn: 0.1518071	total: 22.1s	remaining: 134ms
994:	learn: 0.1517889	total: 22.2s	remaining: 111ms
995:	learn: 0.1516644	total: 22.2s	remaining: 89.1ms
996:	learn: 0.1516565	total: 22.2s	remaining: 66.8ms
997:	learn: 0.1516168	total: 22.2s	remaining: 44.6ms
998:	learn: 0.151

<catboost.core.CatBoostClassifier at 0xef00f28>

In [85]:
roc_auc_score(y_valid, ctb.predict_proba(valid_df)[:, 1])

0.9171577104549627