In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [3]:
def log_progress(sequence, every=None, size=None, name='Items'):
    from ipywidgets import IntProgress, HTML, VBox
    from IPython.display import display

    is_iterator = False
    if size is None:
        try:
            size = len(sequence)
        except TypeError:
            is_iterator = True
    if size is not None:
        if every is None:
            if size <= 200:
                every = 1
            else:
                every = int(size / 200)     # every 0.5%
    else:
        assert every is not None, 'sequence is iterator, set every'

    if is_iterator:
        progress = IntProgress(min=0, max=1, value=1)
        progress.bar_style = 'info'
    else:
        progress = IntProgress(min=0, max=size, value=0)
    label = HTML()
    box = VBox(children=[label, progress])
    display(box)

    index = 0
    try:
        for index, record in enumerate(sequence, 1):
            if index == 1 or index % every == 0:
                if is_iterator:
                    label.value = '{name}: {index} / ?'.format(
                        name=name,
                        index=index
                    )
                else:
                    progress.value = index
                    label.value = u'{name}: {index} / {size}'.format(
                        name=name,
                        index=index,
                        size=size
                    )
            yield record
    except:
        progress.bar_style = 'danger'
        raise
    else:
        progress.bar_style = 'success'
        progress.value = index
        label.value = "{name}: {index}".format(
            name=name,
            index=str(index or '?')
        )

### Load pre-processed data

In [4]:
X_train = pd.read_csv('./readied_data_Q2/X_train.csv')
X_test= pd.read_csv('./readied_data_Q2/X_test.csv')
y_train = pd.read_csv('./readied_data_Q2/y_train.csv', header = None)
y_test = pd.read_csv('./readied_data_Q2/y_test.csv', header = None)

In [5]:
X_train.drop(columns = ['Unnamed: 0'], inplace = True)
X_test.drop(columns = ['Unnamed: 0'], inplace = True)

In [6]:
y_train.drop(columns = [y_train.columns.values.tolist()[0]], inplace = True)
y_test.drop(columns = [y_test.columns.values.tolist()[0]], inplace = True)

In [7]:
y_train.columns = ['target']
y_test.columns = ['target']

In [8]:
y_train = pd.Series(y_train['target'])
y_test = pd.Series(y_test['target'])

### Perform Feature Selection

In [9]:
from cLiML.model_prep import feature_select_logistic_reg

In [38]:
df_feature = feature_select_logistic_reg(X_train, y_train, cv = 5)

In [11]:
#df_feature.head()

In [39]:
#df_feature.to_csv('./readied_data_Q2/df_feature.csv')

In [25]:
#df_feature = pd.read_csv('./readied_data_Q2/df_feature.csv')

### Logistic Regression

In [53]:
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression


In [82]:
indexs = []
test_score = []
for i in log_progress(range(0, 200, 10)):
    skb = df_feature[df_feature['score'] >= i]['variable_skb'].values.tolist()
    indexs.append(i)
    
    lr = LogisticRegression()
    lr.fit(X_train[skb], y_train)
    test_score.append(lr.score(X_test[skb], y_test))
    

VBox(children=(HTML(value=''), IntProgress(value=0, max=20)))

In [83]:
indexs

[0,
 10,
 20,
 30,
 40,
 50,
 60,
 70,
 80,
 90,
 100,
 110,
 120,
 130,
 140,
 150,
 160,
 170,
 180,
 190]

In [84]:
test_score

[0.8445378151260504,
 0.865546218487395,
 0.8592436974789915,
 0.865546218487395,
 0.865546218487395,
 0.884453781512605,
 0.8718487394957983,
 0.8550420168067226,
 0.8697478991596639,
 0.8634453781512605,
 0.865546218487395,
 0.8571428571428571,
 0.8613445378151261,
 0.8571428571428571,
 0.8529411764705882,
 0.8550420168067226,
 0.8529411764705882,
 0.8571428571428571,
 0.8466386554621849,
 0.8445378151260504]

In [85]:
skb = df_feature[df_feature['score'] >= 50]['variable_skb'].values.tolist()

lr = LogisticRegression()
lr.fit(X_train[skb], y_train);

In [86]:
lr.score(X_train[skb],y_train)

0.9088514225500527

In [87]:
lr.score(X_test[skb], y_test)

0.884453781512605

### Logistic Regression with tuning

In [88]:
from sklearn.model_selection import GridSearchCV

In [101]:
np.arange(750, 820)/10000

array([0.075 , 0.0751, 0.0752, 0.0753, 0.0754, 0.0755, 0.0756, 0.0757,
       0.0758, 0.0759, 0.076 , 0.0761, 0.0762, 0.0763, 0.0764, 0.0765,
       0.0766, 0.0767, 0.0768, 0.0769, 0.077 , 0.0771, 0.0772, 0.0773,
       0.0774, 0.0775, 0.0776, 0.0777, 0.0778, 0.0779, 0.078 , 0.0781,
       0.0782, 0.0783, 0.0784, 0.0785, 0.0786, 0.0787, 0.0788, 0.0789,
       0.079 , 0.0791, 0.0792, 0.0793, 0.0794, 0.0795, 0.0796, 0.0797,
       0.0798, 0.0799, 0.08  , 0.0801, 0.0802, 0.0803, 0.0804, 0.0805,
       0.0806, 0.0807, 0.0808, 0.0809, 0.081 , 0.0811, 0.0812, 0.0813,
       0.0814, 0.0815, 0.0816, 0.0817, 0.0818, 0.0819])

In [102]:
params = {'penalty': ['l1'],
         'C': np.arange(750, 820)/10000}

lrt = LogisticRegression()
lrt_gs = GridSearchCV(lrt, params, n_jobs = -1, cv = 10)
lrt_gs.fit(X_train[skb], y_train)

GridSearchCV(cv=10, error_score='raise',
       estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid={'penalty': ['l1'], 'C': array([0.075 , 0.0751, ..., 0.0818, 0.0819])},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [103]:
lrt_gs.best_params_

{'C': 0.0773, 'penalty': 'l1'}

In [104]:
lrt_gs.score(X_train[skb], y_train)

0.8967334035827187

In [105]:
lrt_gs.score(X_test[skb], y_test)

0.8718487394957983

### Naive Bayes Classification

In [106]:
from sklearn.naive_bayes import MultinomialNB

In [116]:
nb = MultinomialNB()
nb.fit(X_train[skb], y_train)

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)

In [117]:
nb.score(X_train[skb], y_train)

0.8461538461538461

In [118]:
nb.score(X_test[skb], y_test)

0.7899159663865546

### Random Forest 

In [119]:
from sklearn.ensemble import RandomForestClassifier

  from numpy.core.umath_tests import inner1d


In [120]:
rfc = RandomForestClassifier()
rfc.fit(X_train[skb], y_train)


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [122]:
rfc.score(X_train[skb], y_train)

0.9947312961011591

In [123]:
rfc.score(X_test[skb], y_test)

0.8487394957983193

### Random Forest Grid Search

In [135]:
np.arange(5, 135,  15)

array([  5,  20,  35,  50,  65,  80,  95, 110, 125])

In [131]:
X_train[skb].shape

(1898, 135)

In [136]:
params = {'max_depth': [20],
         'max_features': np.arange(5, 135,  15)}

rfct = RandomForestClassifier()
rfct_gs = GridSearchCV(rfct, params, n_jobs = -1, cv = 10)
rfct_gs.fit(X_train[skb], y_train)

GridSearchCV(cv=10, error_score='raise',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid={'max_depth': [20], 'max_features': array([  5,  20,  35,  50,  65,  80,  95, 110, 125])},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [137]:
rfct_gs.best_params_

{'max_depth': 20, 'max_features': 50}

In [138]:
rfct_gs.score(X_train[skb], y_train)

0.9936775553213909

In [139]:
rfct_gs.score(X_test[skb], y_test)

0.8487394957983193

### Boosting

In [141]:
from xgboost import XGBClassifier

xgb = XGBClassifier(N_JOBS = -1)
xgb.fit(X_train[skb],y_train)

XGBClassifier(N_JOBS=-1, base_score=0.5, booster='gbtree',
       colsample_bylevel=1, colsample_bytree=1, gamma=0, learning_rate=0.1,
       max_delta_step=0, max_depth=3, min_child_weight=1, missing=None,
       n_estimators=100, n_jobs=1, nthread=None,
       objective='binary:logistic', random_state=0, reg_alpha=0,
       reg_lambda=1, scale_pos_weight=1, seed=None, silent=True,
       subsample=1)

In [143]:
xgb.score(X_train[skb], y_train)

  if diff:


0.9409905163329821

In [144]:
xgb.score(X_test[skb], y_test)

  if diff:


0.8676470588235294

### CATBOOST

In [145]:
from catboost import CatBoostClassifier

In [146]:
catboost = CatBoostClassifier(custom_loss=['Accuracy'])

catboost.fit(X_train[skb], y_train)

0:	learn: 0.6771650	total: 85.5ms	remaining: 1m 25s
1:	learn: 0.6621859	total: 98.2ms	remaining: 49s
2:	learn: 0.6514667	total: 111ms	remaining: 36.9s
3:	learn: 0.6385619	total: 125ms	remaining: 31.1s
4:	learn: 0.6257465	total: 138ms	remaining: 27.4s
5:	learn: 0.6162246	total: 151ms	remaining: 25.1s
6:	learn: 0.6041730	total: 163ms	remaining: 23.2s
7:	learn: 0.5951921	total: 175ms	remaining: 21.8s
8:	learn: 0.5861066	total: 191ms	remaining: 21s
9:	learn: 0.5784876	total: 203ms	remaining: 20.1s
10:	learn: 0.5698906	total: 215ms	remaining: 19.4s
11:	learn: 0.5598014	total: 227ms	remaining: 18.7s
12:	learn: 0.5526813	total: 240ms	remaining: 18.2s
13:	learn: 0.5436999	total: 252ms	remaining: 17.8s
14:	learn: 0.5340177	total: 266ms	remaining: 17.4s
15:	learn: 0.5263588	total: 279ms	remaining: 17.2s
16:	learn: 0.5190402	total: 312ms	remaining: 18s
17:	learn: 0.5111493	total: 347ms	remaining: 18.9s
18:	learn: 0.5056065	total: 362ms	remaining: 18.7s
19:	learn: 0.4996775	total: 377ms	remaining:

167:	learn: 0.2664608	total: 3.86s	remaining: 19.1s
168:	learn: 0.2659880	total: 3.9s	remaining: 19.2s
169:	learn: 0.2652913	total: 3.93s	remaining: 19.2s
170:	learn: 0.2644366	total: 3.97s	remaining: 19.2s
171:	learn: 0.2638902	total: 3.99s	remaining: 19.2s
172:	learn: 0.2633400	total: 4.02s	remaining: 19.2s
173:	learn: 0.2628037	total: 4.05s	remaining: 19.3s
174:	learn: 0.2621558	total: 4.11s	remaining: 19.4s
175:	learn: 0.2617334	total: 4.12s	remaining: 19.3s
176:	learn: 0.2611653	total: 4.16s	remaining: 19.3s
177:	learn: 0.2608031	total: 4.19s	remaining: 19.4s
178:	learn: 0.2602952	total: 4.23s	remaining: 19.4s
179:	learn: 0.2600794	total: 4.26s	remaining: 19.4s
180:	learn: 0.2598074	total: 4.31s	remaining: 19.5s
181:	learn: 0.2593226	total: 4.35s	remaining: 19.6s
182:	learn: 0.2590565	total: 4.38s	remaining: 19.5s
183:	learn: 0.2586895	total: 4.41s	remaining: 19.5s
184:	learn: 0.2583007	total: 4.43s	remaining: 19.5s
185:	learn: 0.2577291	total: 4.47s	remaining: 19.6s
186:	learn: 0

326:	learn: 0.2076135	total: 7.79s	remaining: 16s
327:	learn: 0.2071803	total: 7.83s	remaining: 16s
328:	learn: 0.2069900	total: 7.85s	remaining: 16s
329:	learn: 0.2069268	total: 7.88s	remaining: 16s
330:	learn: 0.2066357	total: 7.9s	remaining: 16s
331:	learn: 0.2062066	total: 7.93s	remaining: 15.9s
332:	learn: 0.2059628	total: 7.95s	remaining: 15.9s
333:	learn: 0.2057613	total: 7.97s	remaining: 15.9s
334:	learn: 0.2054455	total: 7.99s	remaining: 15.9s
335:	learn: 0.2050133	total: 8.02s	remaining: 15.8s
336:	learn: 0.2047191	total: 8.05s	remaining: 15.8s
337:	learn: 0.2042138	total: 8.07s	remaining: 15.8s
338:	learn: 0.2037537	total: 8.1s	remaining: 15.8s
339:	learn: 0.2033670	total: 8.11s	remaining: 15.7s
340:	learn: 0.2032309	total: 8.13s	remaining: 15.7s
341:	learn: 0.2029029	total: 8.14s	remaining: 15.7s
342:	learn: 0.2025262	total: 8.15s	remaining: 15.6s
343:	learn: 0.2023608	total: 8.16s	remaining: 15.6s
344:	learn: 0.2019996	total: 8.18s	remaining: 15.5s
345:	learn: 0.2017523	to

488:	learn: 0.1587963	total: 11.2s	remaining: 11.7s
489:	learn: 0.1585946	total: 11.2s	remaining: 11.7s
490:	learn: 0.1583369	total: 11.3s	remaining: 11.7s
491:	learn: 0.1580153	total: 11.3s	remaining: 11.7s
492:	learn: 0.1578307	total: 11.3s	remaining: 11.6s
493:	learn: 0.1575116	total: 11.3s	remaining: 11.6s
494:	learn: 0.1570052	total: 11.3s	remaining: 11.6s
495:	learn: 0.1564860	total: 11.4s	remaining: 11.5s
496:	learn: 0.1563208	total: 11.4s	remaining: 11.5s
497:	learn: 0.1559978	total: 11.4s	remaining: 11.5s
498:	learn: 0.1558180	total: 11.4s	remaining: 11.4s
499:	learn: 0.1556072	total: 11.4s	remaining: 11.4s
500:	learn: 0.1554429	total: 11.5s	remaining: 11.4s
501:	learn: 0.1551119	total: 11.5s	remaining: 11.4s
502:	learn: 0.1548092	total: 11.5s	remaining: 11.4s
503:	learn: 0.1542202	total: 11.5s	remaining: 11.3s
504:	learn: 0.1541148	total: 11.6s	remaining: 11.3s
505:	learn: 0.1537961	total: 11.6s	remaining: 11.3s
506:	learn: 0.1534964	total: 11.6s	remaining: 11.3s
507:	learn: 

649:	learn: 0.1212743	total: 14.8s	remaining: 7.96s
650:	learn: 0.1211400	total: 14.8s	remaining: 7.95s
651:	learn: 0.1210671	total: 14.9s	remaining: 7.93s
652:	learn: 0.1208773	total: 14.9s	remaining: 7.91s
653:	learn: 0.1206305	total: 14.9s	remaining: 7.89s
654:	learn: 0.1205606	total: 14.9s	remaining: 7.87s
655:	learn: 0.1204536	total: 15s	remaining: 7.84s
656:	learn: 0.1203461	total: 15s	remaining: 7.81s
657:	learn: 0.1202939	total: 15s	remaining: 7.79s
658:	learn: 0.1201506	total: 15s	remaining: 7.76s
659:	learn: 0.1200172	total: 15s	remaining: 7.74s
660:	learn: 0.1198542	total: 15.1s	remaining: 7.72s
661:	learn: 0.1196106	total: 15.1s	remaining: 7.7s
662:	learn: 0.1194760	total: 15.1s	remaining: 7.68s
663:	learn: 0.1192708	total: 15.1s	remaining: 7.65s
664:	learn: 0.1189908	total: 15.1s	remaining: 7.63s
665:	learn: 0.1188007	total: 15.2s	remaining: 7.6s
666:	learn: 0.1186716	total: 15.2s	remaining: 7.57s
667:	learn: 0.1186226	total: 15.2s	remaining: 7.55s
668:	learn: 0.1185148	to

811:	learn: 0.0986717	total: 18.5s	remaining: 4.29s
812:	learn: 0.0985434	total: 18.6s	remaining: 4.27s
813:	learn: 0.0984463	total: 18.6s	remaining: 4.26s
814:	learn: 0.0981772	total: 18.7s	remaining: 4.24s
815:	learn: 0.0979219	total: 18.7s	remaining: 4.22s
816:	learn: 0.0976172	total: 18.8s	remaining: 4.2s
817:	learn: 0.0975394	total: 18.8s	remaining: 4.18s
818:	learn: 0.0974240	total: 18.8s	remaining: 4.16s
819:	learn: 0.0973430	total: 18.9s	remaining: 4.14s
820:	learn: 0.0972882	total: 18.9s	remaining: 4.12s
821:	learn: 0.0971847	total: 18.9s	remaining: 4.09s
822:	learn: 0.0970211	total: 18.9s	remaining: 4.07s
823:	learn: 0.0969176	total: 18.9s	remaining: 4.04s
824:	learn: 0.0968631	total: 18.9s	remaining: 4.02s
825:	learn: 0.0966163	total: 19s	remaining: 3.99s
826:	learn: 0.0965503	total: 19s	remaining: 3.97s
827:	learn: 0.0965350	total: 19s	remaining: 3.95s
828:	learn: 0.0964729	total: 19s	remaining: 3.93s
829:	learn: 0.0964080	total: 19.1s	remaining: 3.9s
830:	learn: 0.0962197	

978:	learn: 0.0817295	total: 22.8s	remaining: 489ms
979:	learn: 0.0816273	total: 22.8s	remaining: 466ms
980:	learn: 0.0815536	total: 22.9s	remaining: 443ms
981:	learn: 0.0815240	total: 22.9s	remaining: 420ms
982:	learn: 0.0814552	total: 22.9s	remaining: 396ms
983:	learn: 0.0813773	total: 22.9s	remaining: 373ms
984:	learn: 0.0813182	total: 23s	remaining: 350ms
985:	learn: 0.0812825	total: 23s	remaining: 326ms
986:	learn: 0.0812317	total: 23s	remaining: 303ms
987:	learn: 0.0811905	total: 23.1s	remaining: 280ms
988:	learn: 0.0810773	total: 23.1s	remaining: 257ms
989:	learn: 0.0810454	total: 23.1s	remaining: 233ms
990:	learn: 0.0809872	total: 23.1s	remaining: 210ms
991:	learn: 0.0808908	total: 23.2s	remaining: 187ms
992:	learn: 0.0808192	total: 23.2s	remaining: 163ms
993:	learn: 0.0807455	total: 23.2s	remaining: 140ms
994:	learn: 0.0806195	total: 23.2s	remaining: 117ms
995:	learn: 0.0804466	total: 23.3s	remaining: 93.5ms
996:	learn: 0.0804111	total: 23.3s	remaining: 70.1ms
997:	learn: 0.08

<catboost.core.CatBoostClassifier at 0x1a1ff240b8>

In [147]:
catboost.score(X_train[skb], y_train)

0.9894625922023182

In [148]:
catboost.score(X_test[skb], y_test)

0.8760504201680672

### Bagging and Vote Classifier

In [160]:
X_train
[skb].shape

(476, 135)

In [165]:
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import BaggingClassifier

# logistic regression
lr = LogisticRegression()

bag_lr = BaggingClassifier(lr,n_estimators = 20, max_features = 60, n_jobs = -1)

# Naive Bayes
nb = MultinomialNB()

bag_nb = BaggingClassifier(nb,n_estimators = 20, max_features = 60, n_jobs = -1)

# XGBoost
xgb = XGBClassifier(N_JOBS = -1)

# catboost
catboost = CatBoostClassifier(custom_loss=['Accuracy'])

# VotingClassifier
vc = VotingClassifier(estimators = [('bag_lr', bag_lr), ('bag_nb', bag_nb), ('xgb', xgb), ('catboost', catboost)], voting = 'soft', n_jobs = -1)


In [166]:
vc.fit(X_train[skb], y_train)

  **self._backend_args)
  **self._backend_args)


0:	learn: 0.6761197	total: 37.7ms	remaining: 37.6s
1:	learn: 0.6617181	total: 97.2ms	remaining: 48.5s
2:	learn: 0.6472268	total: 153ms	remaining: 50.9s
3:	learn: 0.6342943	total: 205ms	remaining: 51s
4:	learn: 0.6207862	total: 266ms	remaining: 52.9s
5:	learn: 0.6063180	total: 326ms	remaining: 53.9s
6:	learn: 0.5966457	total: 383ms	remaining: 54.3s
7:	learn: 0.5855846	total: 435ms	remaining: 54s
8:	learn: 0.5764833	total: 494ms	remaining: 54.4s
9:	learn: 0.5676920	total: 548ms	remaining: 54.2s
10:	learn: 0.5580898	total: 587ms	remaining: 52.7s
11:	learn: 0.5497934	total: 632ms	remaining: 52s
12:	learn: 0.5426894	total: 670ms	remaining: 50.9s
13:	learn: 0.5340712	total: 713ms	remaining: 50.2s
14:	learn: 0.5255871	total: 752ms	remaining: 49.4s
15:	learn: 0.5173603	total: 793ms	remaining: 48.8s
16:	learn: 0.5104591	total: 846ms	remaining: 48.9s
17:	learn: 0.5031691	total: 886ms	remaining: 48.3s
18:	learn: 0.4996715	total: 936ms	remaining: 48.3s
19:	learn: 0.4929036	total: 986ms	remaining: 

161:	learn: 0.2683765	total: 7.29s	remaining: 37.7s
162:	learn: 0.2679401	total: 7.33s	remaining: 37.7s
163:	learn: 0.2672783	total: 7.37s	remaining: 37.6s
164:	learn: 0.2668859	total: 7.39s	remaining: 37.4s
165:	learn: 0.2661309	total: 7.43s	remaining: 37.4s
166:	learn: 0.2658220	total: 7.45s	remaining: 37.2s
167:	learn: 0.2651374	total: 7.5s	remaining: 37.1s
168:	learn: 0.2647045	total: 7.52s	remaining: 37s
169:	learn: 0.2639765	total: 7.56s	remaining: 36.9s
170:	learn: 0.2636104	total: 7.59s	remaining: 36.8s
171:	learn: 0.2632020	total: 7.63s	remaining: 36.7s
172:	learn: 0.2628733	total: 7.66s	remaining: 36.6s
173:	learn: 0.2619482	total: 7.69s	remaining: 36.5s
174:	learn: 0.2616385	total: 7.73s	remaining: 36.4s
175:	learn: 0.2612667	total: 7.76s	remaining: 36.3s
176:	learn: 0.2609292	total: 7.79s	remaining: 36.2s
177:	learn: 0.2604539	total: 7.83s	remaining: 36.1s
178:	learn: 0.2602329	total: 7.86s	remaining: 36s
179:	learn: 0.2596026	total: 7.89s	remaining: 36s
180:	learn: 0.25917

320:	learn: 0.2076803	total: 13.3s	remaining: 28.2s
321:	learn: 0.2074866	total: 13.3s	remaining: 28.1s
322:	learn: 0.2070606	total: 13.4s	remaining: 28.1s
323:	learn: 0.2066764	total: 13.4s	remaining: 28s
324:	learn: 0.2060625	total: 13.4s	remaining: 27.9s
325:	learn: 0.2058874	total: 13.5s	remaining: 27.9s
326:	learn: 0.2056028	total: 13.5s	remaining: 27.8s
327:	learn: 0.2052999	total: 13.6s	remaining: 27.8s
328:	learn: 0.2048873	total: 13.6s	remaining: 27.7s
329:	learn: 0.2044802	total: 13.6s	remaining: 27.7s
330:	learn: 0.2043531	total: 13.7s	remaining: 27.6s
331:	learn: 0.2039804	total: 13.7s	remaining: 27.6s
332:	learn: 0.2035897	total: 13.8s	remaining: 27.6s
333:	learn: 0.2030362	total: 13.8s	remaining: 27.5s
334:	learn: 0.2027636	total: 13.8s	remaining: 27.4s
335:	learn: 0.2025631	total: 13.9s	remaining: 27.4s
336:	learn: 0.2020472	total: 13.9s	remaining: 27.3s
337:	learn: 0.2018740	total: 13.9s	remaining: 27.3s
338:	learn: 0.2015248	total: 14s	remaining: 27.2s
339:	learn: 0.20

479:	learn: 0.1573017	total: 20.1s	remaining: 21.8s
480:	learn: 0.1567765	total: 20.2s	remaining: 21.8s
481:	learn: 0.1564508	total: 20.2s	remaining: 21.7s
482:	learn: 0.1561501	total: 20.2s	remaining: 21.7s
483:	learn: 0.1560836	total: 20.3s	remaining: 21.6s
484:	learn: 0.1559915	total: 20.3s	remaining: 21.6s
485:	learn: 0.1557165	total: 20.4s	remaining: 21.5s
486:	learn: 0.1555296	total: 20.4s	remaining: 21.5s
487:	learn: 0.1550908	total: 20.5s	remaining: 21.5s
488:	learn: 0.1546806	total: 20.5s	remaining: 21.4s
489:	learn: 0.1543483	total: 20.5s	remaining: 21.4s
490:	learn: 0.1538193	total: 20.6s	remaining: 21.3s
491:	learn: 0.1536104	total: 20.6s	remaining: 21.3s
492:	learn: 0.1532306	total: 20.7s	remaining: 21.2s
493:	learn: 0.1527610	total: 20.7s	remaining: 21.2s
494:	learn: 0.1524027	total: 20.7s	remaining: 21.1s
495:	learn: 0.1521326	total: 20.8s	remaining: 21.1s
496:	learn: 0.1520700	total: 20.8s	remaining: 21.1s
497:	learn: 0.1517431	total: 20.9s	remaining: 21s
498:	learn: 0.

638:	learn: 0.1220486	total: 26.9s	remaining: 15.2s
639:	learn: 0.1218968	total: 26.9s	remaining: 15.2s
640:	learn: 0.1218066	total: 27s	remaining: 15.1s
641:	learn: 0.1217368	total: 27s	remaining: 15.1s
642:	learn: 0.1215935	total: 27.1s	remaining: 15s
643:	learn: 0.1215721	total: 27.1s	remaining: 15s
644:	learn: 0.1213630	total: 27.1s	remaining: 14.9s
645:	learn: 0.1210943	total: 27.2s	remaining: 14.9s
646:	learn: 0.1209796	total: 27.2s	remaining: 14.8s
647:	learn: 0.1208509	total: 27.2s	remaining: 14.8s
648:	learn: 0.1208429	total: 27.3s	remaining: 14.7s
649:	learn: 0.1207168	total: 27.3s	remaining: 14.7s
650:	learn: 0.1204384	total: 27.4s	remaining: 14.7s
651:	learn: 0.1201535	total: 27.4s	remaining: 14.6s
652:	learn: 0.1200080	total: 27.4s	remaining: 14.6s
653:	learn: 0.1199991	total: 27.5s	remaining: 14.5s
654:	learn: 0.1198028	total: 27.5s	remaining: 14.5s
655:	learn: 0.1196720	total: 27.6s	remaining: 14.5s
656:	learn: 0.1195506	total: 27.6s	remaining: 14.4s
657:	learn: 0.119461

797:	learn: 0.1000425	total: 33.6s	remaining: 8.51s
798:	learn: 0.0999039	total: 33.7s	remaining: 8.46s
799:	learn: 0.0998566	total: 33.7s	remaining: 8.42s
800:	learn: 0.0997275	total: 33.7s	remaining: 8.38s
801:	learn: 0.0997181	total: 33.8s	remaining: 8.34s
802:	learn: 0.0995187	total: 33.8s	remaining: 8.29s
803:	learn: 0.0994606	total: 33.8s	remaining: 8.25s
804:	learn: 0.0993822	total: 33.9s	remaining: 8.21s
805:	learn: 0.0993332	total: 33.9s	remaining: 8.17s
806:	learn: 0.0992209	total: 34s	remaining: 8.12s
807:	learn: 0.0990454	total: 34s	remaining: 8.08s
808:	learn: 0.0990312	total: 34.1s	remaining: 8.04s
809:	learn: 0.0990118	total: 34.1s	remaining: 8s
810:	learn: 0.0988716	total: 34.1s	remaining: 7.95s
811:	learn: 0.0985067	total: 34.2s	remaining: 7.91s
812:	learn: 0.0982576	total: 34.2s	remaining: 7.87s
813:	learn: 0.0981139	total: 34.3s	remaining: 7.83s
814:	learn: 0.0980007	total: 34.3s	remaining: 7.79s
815:	learn: 0.0976886	total: 34.3s	remaining: 7.74s
816:	learn: 0.09765

956:	learn: 0.0859776	total: 40.3s	remaining: 1.81s
957:	learn: 0.0859476	total: 40.3s	remaining: 1.77s
958:	learn: 0.0858725	total: 40.3s	remaining: 1.73s
959:	learn: 0.0858311	total: 40.4s	remaining: 1.68s
960:	learn: 0.0857883	total: 40.4s	remaining: 1.64s
961:	learn: 0.0857675	total: 40.5s	remaining: 1.6s
962:	learn: 0.0857574	total: 40.5s	remaining: 1.56s
963:	learn: 0.0856639	total: 40.6s	remaining: 1.51s
964:	learn: 0.0856104	total: 40.6s	remaining: 1.47s
965:	learn: 0.0856014	total: 40.6s	remaining: 1.43s
966:	learn: 0.0855637	total: 40.7s	remaining: 1.39s
967:	learn: 0.0855076	total: 40.7s	remaining: 1.35s
968:	learn: 0.0854777	total: 40.8s	remaining: 1.3s
969:	learn: 0.0854056	total: 40.8s	remaining: 1.26s
970:	learn: 0.0853762	total: 40.8s	remaining: 1.22s
971:	learn: 0.0852481	total: 40.9s	remaining: 1.18s
972:	learn: 0.0851793	total: 40.9s	remaining: 1.14s
973:	learn: 0.0850713	total: 41s	remaining: 1.09s
974:	learn: 0.0849751	total: 41s	remaining: 1.05s
975:	learn: 0.0848

VotingClassifier(estimators=[('bag_lr', BaggingClassifier(base_estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=...True,
       subsample=1)), ('catboost', <catboost.core.CatBoostClassifier object at 0x1a1fed53c8>)],
         flatten_transform=None, n_jobs=-1, voting='soft', weights=None)

In [167]:
vc.score(X_train[skb], y_train)

  if diff:


0.9452054794520548

In [168]:
vc.score(X_test[skb], y_test)

  if diff:


0.8865546218487395