# Supervised Learning

Given the **train.csv** and **test.csv**, perform the following actions.
1. Create classifiers from train.csv data, in order to predict ``att10`` as label based on the
rest of the attributes. Use at least 5 algorithms (k-NN, Decision Tree, Logistic
Regression, Voting, Averaging, Bagging, Random Forest, Averaging, Voting, AdaBoost,
XGBoost, LightGBM, CatBoost, or Stacking). Use AUC for your model evaluation
performance.
Submit your .ipynb file that contains your coding process in creating classifiers.
2. Choose the best Classifier based on highest AUC and use it for predicting the test.csv
data.
Submit your .csv file that contains the prediction of test.csv data. Sample of prediction
can be found in **sample_prediction.csv** file. 

In [2]:
# Import the libraries and load the dataset
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, Markdown
pd.options.display.max_columns = None
pd.options.display.max_rows = 15

data_train = pd.read_csv(r'dataset/train.csv')
data_test = pd.read_csv(r'dataset/test.csv')

display(data_train.sample(5))
display(data_test.sample(5))

print('Train data shape {}'.format(data_train.shape))
print('Test data shape {}'.format(data_test.shape))

Unnamed: 0,att1,att2,att3,att4,att5,att6,att7,att8a,att8b,att8c,att8d,att8e,att8f,att8g,att8h,att8i,att8j,att9a,att9b,att9c,att10
1565,0.91,0.55,3,223,3,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0
7395,0.84,0.89,4,187,2,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0
6636,0.5,0.4,3,180,4,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0
499,0.59,0.72,3,182,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0
2627,0.69,0.82,4,137,2,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0


Unnamed: 0,att1,att2,att3,att4,att5,att6,att7,att8a,att8b,att8c,att8d,att8e,att8f,att8g,att8h,att8i,att8j,att9a,att9b,att9c
855,0.18,0.46,4,249,4,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0
860,0.59,0.73,3,172,6,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1
259,0.85,0.86,3,255,2,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0
1632,0.1,0.84,6,261,4,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1
1369,0.87,0.98,4,173,3,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0


Train data shape (8000, 21)
Test data shape (2000, 20)


We can see that the training and test data already have the same columns, except that the label column `att10` is missing on the test data.

In [5]:
data_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8000 entries, 0 to 7999
Data columns (total 21 columns):
att1     8000 non-null float64
att2     8000 non-null float64
att3     8000 non-null int64
att4     8000 non-null int64
att5     8000 non-null int64
att6     8000 non-null int64
att7     8000 non-null int64
att8a    8000 non-null int64
att8b    8000 non-null int64
att8c    8000 non-null int64
att8d    8000 non-null int64
att8e    8000 non-null int64
att8f    8000 non-null int64
att8g    8000 non-null int64
att8h    8000 non-null int64
att8i    8000 non-null int64
att8j    8000 non-null int64
att9a    8000 non-null int64
att9b    8000 non-null int64
att9c    8000 non-null int64
att10    8000 non-null int64
dtypes: float64(2), int64(19)
memory usage: 1.3 MB


All of the features are numerical and have no missing values, and as we can infer that the `att8` and `att9` are the results of one hot encode of a categorical feature.

In [21]:
display(data_train[data_train.columns[:7]].describe())

Unnamed: 0,att1,att2,att3,att4,att5,att6,att7
count,8000.0,8000.0,8000.0,8000.0,8000.0,8000.0,8000.0
mean,0.61226,0.717594,3.791375,201.245,3.486375,0.145375,0.020625
std,0.248338,0.170957,1.230463,49.837947,1.446055,0.352501,0.142134
min,0.09,0.36,2.0,96.0,2.0,0.0,0.0
25%,0.44,0.56,3.0,156.0,3.0,0.0,0.0
50%,0.64,0.72,4.0,201.0,3.0,0.0,0.0
75%,0.82,0.87,5.0,245.0,4.0,0.0,0.0
max,1.0,1.0,7.0,310.0,10.0,1.0,1.0


In [43]:
display(data_train['att10'].value_counts(normalize=True))

0    0.760125
1    0.239875
Name: att10, dtype: float64

In [72]:
from sklearn.metrics import fbeta_score, roc_auc_score
from time import time

def train_predict(learner, X_train, y_train, X_test, y_test): 
    '''
    inputs:
       - learner: the learning algorithm to be trained and predicted on
       - X_train: features training set
       - y_train: income training set
       - X_test: features testing set
       - y_test: income testing set
    '''
    
    results = {}
    
    # Fit the learner to the training data 
    start = time() # Get start time
    learner.fit(X_train, y_train)
    end = time() # Get end time
    
    # Save the model
    results['model'] = learner
    
    # Calculate the training time
    results['train_time'] = end - start
        
    # Get the predictions on the test set(X_test),
    # then get predictions on the first 300 training samples(X_train) using .predict()
    start = time() # Get start time
    predictions_test = learner.predict(X_test)
    predictions_train = learner.predict(X_train[:300])
    end = time() # Get end time
    
    # Calculate the total prediction time
    results['pred_time'] = end - start
            
    # Compute roc_auc on the first 300 training samples which is y_train[:300]
    results['roc_auc_train'] = roc_auc_score(y_train[:300], predictions_train)
        
    # Compute roc_auc on test set using accuracy_score()
    results['roc_auc_test'] = roc_auc_score(y_test, predictions_test)
    
    # Compute F-score on the the first 300 training samples using fbeta_score()
    results['f_train'] = fbeta_score(y_train[:300], predictions_train, beta=0.5)
        
    # Compute F-score on the test set which is y_test
    results['f_test'] = fbeta_score(y_test, predictions_test, beta=0.5)
       
    # Success
    print("{} trained on {} samples.".format(learner.__class__.__name__, len(X_train)))
        
    # Return the results
    return results

In [69]:
from sklearn.model_selection import KFold

# Drop the last column of each one hot encoded features
X = data_train.drop(['att8j', 'att9c', 'att10'], axis=1)
X = np.array(X)
y = data_train['att10']

kf = KFold(n_splits=5, shuffle=True)


In [73]:
# Import the five supervised learning models from sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB 
from sklearn.tree import DecisionTreeClassifier
import xgboost as xgb
import catboost as cb

# Initialize the five models
clf_A = LogisticRegression()
clf_B = MultinomialNB()
clf_C = DecisionTreeClassifier()
clf_D = xgb.XGBClassifier()
clf_E = cb.CatBoostClassifier()

# Collect results on the learners
results = {}
for clf in [clf_A, clf_B, clf_C, clf_D, clf_E]:
    clf_name = clf.__class__.__name__
    results[clf_name] = {}
    count = 1
    for train, valid in kf.split(X):
        results[clf_name][count] = train_predict(clf, X[train], y[train], X[valid], y[valid])
        count+=1

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


LogisticRegression trained on 6400 samples.
LogisticRegression trained on 6400 samples.
LogisticRegression trained on 6400 samples.
LogisticRegression trained on 6400 samples.
LogisticRegression trained on 6400 samples.
MultinomialNB trained on 6400 samples.
MultinomialNB trained on 6400 samples.
MultinomialNB trained on 6400 samples.
MultinomialNB trained on 6400 samples.
MultinomialNB trained on 6400 samples.
DecisionTreeClassifier trained on 6400 samples.

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)



DecisionTreeClassifier trained on 6400 samples.
DecisionTreeClassifier trained on 6400 samples.
DecisionTreeClassifier trained on 6400 samples.
DecisionTreeClassifier trained on 6400 samples.
XGBClassifier trained on 6400 samples.
XGBClassifier trained on 6400 samples.
XGBClassifier trained on 6400 samples.
XGBClassifier trained on 6400 samples.
XGBClassifier trained on 6400 samples.
Learning rate set to 0.028332
0:	learn: 0.6458392	total: 6.29ms	remaining: 6.29s
1:	learn: 0.6028726	total: 12.5ms	remaining: 6.22s
2:	learn: 0.5660619	total: 17.9ms	remaining: 5.96s
3:	learn: 0.5282945	total: 23.6ms	remaining: 5.87s
4:	learn: 0.4930996	total: 34.1ms	remaining: 6.78s
5:	learn: 0.4620377	total: 41ms	remaining: 6.8s
6:	learn: 0.4348961	total: 48.1ms	remaining: 6.82s
7:	learn: 0.4110067	total: 55ms	remaining: 6.81s
8:	learn: 0.3869799	total: 60.8ms	remaining: 6.7s
9:	learn: 0.3660858	total: 69.7ms	remaining: 6.9s
10:	learn: 0.3470746	total: 76.4ms	remaining: 6.87s
11:	learn: 0.3288842	total:

167:	learn: 0.0751175	total: 1.2s	remaining: 5.93s
168:	learn: 0.0750101	total: 1.2s	remaining: 5.92s
169:	learn: 0.0748796	total: 1.21s	remaining: 5.92s
170:	learn: 0.0747074	total: 1.22s	remaining: 5.91s
171:	learn: 0.0745666	total: 1.22s	remaining: 5.9s
172:	learn: 0.0744172	total: 1.23s	remaining: 5.89s
173:	learn: 0.0742171	total: 1.24s	remaining: 5.88s
174:	learn: 0.0740968	total: 1.25s	remaining: 5.87s
175:	learn: 0.0739975	total: 1.25s	remaining: 5.87s
176:	learn: 0.0738332	total: 1.26s	remaining: 5.86s
177:	learn: 0.0736920	total: 1.27s	remaining: 5.86s
178:	learn: 0.0734201	total: 1.27s	remaining: 5.85s
179:	learn: 0.0732172	total: 1.28s	remaining: 5.85s
180:	learn: 0.0730977	total: 1.29s	remaining: 5.84s
181:	learn: 0.0729411	total: 1.3s	remaining: 5.84s
182:	learn: 0.0727873	total: 1.31s	remaining: 5.83s
183:	learn: 0.0724607	total: 1.31s	remaining: 5.83s
184:	learn: 0.0722607	total: 1.32s	remaining: 5.83s
185:	learn: 0.0721432	total: 1.33s	remaining: 5.82s
186:	learn: 0.07

343:	learn: 0.0547023	total: 2.43s	remaining: 4.63s
344:	learn: 0.0546216	total: 2.44s	remaining: 4.63s
345:	learn: 0.0545596	total: 2.44s	remaining: 4.62s
346:	learn: 0.0544788	total: 2.45s	remaining: 4.61s
347:	learn: 0.0544091	total: 2.46s	remaining: 4.61s
348:	learn: 0.0542943	total: 2.47s	remaining: 4.61s
349:	learn: 0.0541519	total: 2.48s	remaining: 4.6s
350:	learn: 0.0539607	total: 2.48s	remaining: 4.59s
351:	learn: 0.0538436	total: 2.49s	remaining: 4.59s
352:	learn: 0.0537798	total: 2.5s	remaining: 4.58s
353:	learn: 0.0536980	total: 2.51s	remaining: 4.57s
354:	learn: 0.0536169	total: 2.51s	remaining: 4.57s
355:	learn: 0.0535373	total: 2.52s	remaining: 4.56s
356:	learn: 0.0534434	total: 2.53s	remaining: 4.55s
357:	learn: 0.0533587	total: 2.54s	remaining: 4.55s
358:	learn: 0.0532545	total: 2.54s	remaining: 4.54s
359:	learn: 0.0531568	total: 2.55s	remaining: 4.54s
360:	learn: 0.0530703	total: 2.56s	remaining: 4.53s
361:	learn: 0.0530359	total: 2.57s	remaining: 4.53s
362:	learn: 0.

518:	learn: 0.0422624	total: 3.66s	remaining: 3.39s
519:	learn: 0.0422180	total: 3.67s	remaining: 3.38s
520:	learn: 0.0421691	total: 3.67s	remaining: 3.38s
521:	learn: 0.0421032	total: 3.68s	remaining: 3.37s
522:	learn: 0.0420543	total: 3.69s	remaining: 3.36s
523:	learn: 0.0420351	total: 3.69s	remaining: 3.36s
524:	learn: 0.0419841	total: 3.7s	remaining: 3.35s
525:	learn: 0.0419395	total: 3.71s	remaining: 3.34s
526:	learn: 0.0418479	total: 3.71s	remaining: 3.33s
527:	learn: 0.0417965	total: 3.72s	remaining: 3.33s
528:	learn: 0.0417615	total: 3.73s	remaining: 3.32s
529:	learn: 0.0417295	total: 3.73s	remaining: 3.31s
530:	learn: 0.0416266	total: 3.74s	remaining: 3.31s
531:	learn: 0.0415629	total: 3.75s	remaining: 3.3s
532:	learn: 0.0415195	total: 3.75s	remaining: 3.29s
533:	learn: 0.0414330	total: 3.76s	remaining: 3.28s
534:	learn: 0.0413730	total: 3.77s	remaining: 3.28s
535:	learn: 0.0412827	total: 3.77s	remaining: 3.27s
536:	learn: 0.0412516	total: 3.78s	remaining: 3.26s
537:	learn: 0.

698:	learn: 0.0343144	total: 4.89s	remaining: 2.11s
699:	learn: 0.0342422	total: 4.9s	remaining: 2.1s
700:	learn: 0.0341463	total: 4.91s	remaining: 2.09s
701:	learn: 0.0341036	total: 4.92s	remaining: 2.09s
702:	learn: 0.0340843	total: 4.92s	remaining: 2.08s
703:	learn: 0.0340576	total: 4.93s	remaining: 2.07s
704:	learn: 0.0340455	total: 4.94s	remaining: 2.06s
705:	learn: 0.0340061	total: 4.94s	remaining: 2.06s
706:	learn: 0.0339903	total: 4.95s	remaining: 2.05s
707:	learn: 0.0339666	total: 4.96s	remaining: 2.04s
708:	learn: 0.0339187	total: 4.96s	remaining: 2.04s
709:	learn: 0.0338774	total: 4.97s	remaining: 2.03s
710:	learn: 0.0338372	total: 4.98s	remaining: 2.02s
711:	learn: 0.0337888	total: 4.98s	remaining: 2.02s
712:	learn: 0.0336937	total: 4.99s	remaining: 2.01s
713:	learn: 0.0336747	total: 5s	remaining: 2s
714:	learn: 0.0336453	total: 5s	remaining: 2s
715:	learn: 0.0335795	total: 5.01s	remaining: 1.99s
716:	learn: 0.0335513	total: 5.02s	remaining: 1.98s
717:	learn: 0.0335116	tota

875:	learn: 0.0279067	total: 6.13s	remaining: 867ms
876:	learn: 0.0278673	total: 6.13s	remaining: 860ms
877:	learn: 0.0278552	total: 6.14s	remaining: 853ms
878:	learn: 0.0278180	total: 6.15s	remaining: 847ms
879:	learn: 0.0277725	total: 6.16s	remaining: 840ms
880:	learn: 0.0277465	total: 6.16s	remaining: 833ms
881:	learn: 0.0277128	total: 6.17s	remaining: 825ms
882:	learn: 0.0276704	total: 6.18s	remaining: 818ms
883:	learn: 0.0276511	total: 6.18s	remaining: 811ms
884:	learn: 0.0276387	total: 6.19s	remaining: 804ms
885:	learn: 0.0275981	total: 6.2s	remaining: 797ms
886:	learn: 0.0275820	total: 6.2s	remaining: 790ms
887:	learn: 0.0275645	total: 6.21s	remaining: 783ms
888:	learn: 0.0275340	total: 6.22s	remaining: 776ms
889:	learn: 0.0275214	total: 6.22s	remaining: 769ms
890:	learn: 0.0274737	total: 6.23s	remaining: 762ms
891:	learn: 0.0274603	total: 6.24s	remaining: 755ms
892:	learn: 0.0274061	total: 6.24s	remaining: 748ms
893:	learn: 0.0273519	total: 6.25s	remaining: 741ms
894:	learn: 0.

39:	learn: 0.1473307	total: 285ms	remaining: 6.85s
40:	learn: 0.1449163	total: 292ms	remaining: 6.84s
41:	learn: 0.1427262	total: 300ms	remaining: 6.83s
42:	learn: 0.1413223	total: 307ms	remaining: 6.83s
43:	learn: 0.1392843	total: 315ms	remaining: 6.83s
44:	learn: 0.1379640	total: 323ms	remaining: 6.85s
45:	learn: 0.1361908	total: 330ms	remaining: 6.85s
46:	learn: 0.1349866	total: 337ms	remaining: 6.83s
47:	learn: 0.1334227	total: 344ms	remaining: 6.82s
48:	learn: 0.1324431	total: 350ms	remaining: 6.8s
49:	learn: 0.1310092	total: 357ms	remaining: 6.78s
50:	learn: 0.1301738	total: 364ms	remaining: 6.76s
51:	learn: 0.1290573	total: 370ms	remaining: 6.75s
52:	learn: 0.1276851	total: 377ms	remaining: 6.74s
53:	learn: 0.1265236	total: 384ms	remaining: 6.72s
54:	learn: 0.1255087	total: 390ms	remaining: 6.71s
55:	learn: 0.1245477	total: 397ms	remaining: 6.69s
56:	learn: 0.1233100	total: 404ms	remaining: 6.68s
57:	learn: 0.1222918	total: 411ms	remaining: 6.67s
58:	learn: 0.1213973	total: 418m

213:	learn: 0.0722283	total: 1.52s	remaining: 5.58s
214:	learn: 0.0720981	total: 1.52s	remaining: 5.57s
215:	learn: 0.0719738	total: 1.53s	remaining: 5.56s
216:	learn: 0.0717731	total: 1.54s	remaining: 5.56s
217:	learn: 0.0716543	total: 1.55s	remaining: 5.55s
218:	learn: 0.0714984	total: 1.55s	remaining: 5.55s
219:	learn: 0.0713446	total: 1.56s	remaining: 5.54s
220:	learn: 0.0712261	total: 1.57s	remaining: 5.54s
221:	learn: 0.0711509	total: 1.58s	remaining: 5.53s
222:	learn: 0.0709453	total: 1.58s	remaining: 5.52s
223:	learn: 0.0708393	total: 1.59s	remaining: 5.51s
224:	learn: 0.0706957	total: 1.6s	remaining: 5.5s
225:	learn: 0.0706074	total: 1.6s	remaining: 5.5s
226:	learn: 0.0704899	total: 1.61s	remaining: 5.49s
227:	learn: 0.0704008	total: 1.62s	remaining: 5.48s
228:	learn: 0.0702708	total: 1.63s	remaining: 5.47s
229:	learn: 0.0701125	total: 1.63s	remaining: 5.46s
230:	learn: 0.0699737	total: 1.64s	remaining: 5.46s
231:	learn: 0.0698966	total: 1.65s	remaining: 5.45s
232:	learn: 0.06

388:	learn: 0.0540496	total: 2.75s	remaining: 4.31s
389:	learn: 0.0539496	total: 2.75s	remaining: 4.3s
390:	learn: 0.0539129	total: 2.76s	remaining: 4.3s
391:	learn: 0.0538507	total: 2.77s	remaining: 4.29s
392:	learn: 0.0537525	total: 2.78s	remaining: 4.29s
393:	learn: 0.0537145	total: 2.78s	remaining: 4.28s
394:	learn: 0.0536027	total: 2.79s	remaining: 4.27s
395:	learn: 0.0535099	total: 2.8s	remaining: 4.27s
396:	learn: 0.0533864	total: 2.81s	remaining: 4.26s
397:	learn: 0.0532679	total: 2.81s	remaining: 4.25s
398:	learn: 0.0532048	total: 2.82s	remaining: 4.25s
399:	learn: 0.0531392	total: 2.83s	remaining: 4.24s
400:	learn: 0.0530181	total: 2.83s	remaining: 4.23s
401:	learn: 0.0529480	total: 2.84s	remaining: 4.22s
402:	learn: 0.0528587	total: 2.85s	remaining: 4.22s
403:	learn: 0.0528053	total: 2.85s	remaining: 4.21s
404:	learn: 0.0527195	total: 2.86s	remaining: 4.2s
405:	learn: 0.0526586	total: 2.87s	remaining: 4.2s
406:	learn: 0.0525010	total: 2.88s	remaining: 4.19s
407:	learn: 0.052

563:	learn: 0.0419759	total: 3.97s	remaining: 3.07s
564:	learn: 0.0418862	total: 3.98s	remaining: 3.06s
565:	learn: 0.0418372	total: 3.99s	remaining: 3.06s
566:	learn: 0.0417959	total: 4s	remaining: 3.05s
567:	learn: 0.0417415	total: 4s	remaining: 3.04s
568:	learn: 0.0416931	total: 4.01s	remaining: 3.04s
569:	learn: 0.0416401	total: 4.02s	remaining: 3.03s
570:	learn: 0.0415368	total: 4.02s	remaining: 3.02s
571:	learn: 0.0414457	total: 4.03s	remaining: 3.02s
572:	learn: 0.0414197	total: 4.04s	remaining: 3.01s
573:	learn: 0.0413866	total: 4.05s	remaining: 3s
574:	learn: 0.0413473	total: 4.05s	remaining: 3s
575:	learn: 0.0412980	total: 4.06s	remaining: 2.99s
576:	learn: 0.0412147	total: 4.07s	remaining: 2.98s
577:	learn: 0.0411546	total: 4.07s	remaining: 2.97s
578:	learn: 0.0411100	total: 4.08s	remaining: 2.97s
579:	learn: 0.0410433	total: 4.09s	remaining: 2.96s
580:	learn: 0.0409780	total: 4.09s	remaining: 2.95s
581:	learn: 0.0408696	total: 4.1s	remaining: 2.94s
582:	learn: 0.0408266	tot

742:	learn: 0.0339103	total: 5.21s	remaining: 1.8s
743:	learn: 0.0338486	total: 5.22s	remaining: 1.79s
744:	learn: 0.0338172	total: 5.22s	remaining: 1.79s
745:	learn: 0.0338012	total: 5.23s	remaining: 1.78s
746:	learn: 0.0337022	total: 5.24s	remaining: 1.77s
747:	learn: 0.0336446	total: 5.25s	remaining: 1.77s
748:	learn: 0.0336006	total: 5.26s	remaining: 1.76s
749:	learn: 0.0335645	total: 5.26s	remaining: 1.75s
750:	learn: 0.0335380	total: 5.27s	remaining: 1.75s
751:	learn: 0.0334632	total: 5.28s	remaining: 1.74s
752:	learn: 0.0333702	total: 5.28s	remaining: 1.73s
753:	learn: 0.0333570	total: 5.29s	remaining: 1.73s
754:	learn: 0.0333110	total: 5.3s	remaining: 1.72s
755:	learn: 0.0332926	total: 5.3s	remaining: 1.71s
756:	learn: 0.0332751	total: 5.31s	remaining: 1.7s
757:	learn: 0.0332390	total: 5.32s	remaining: 1.7s
758:	learn: 0.0331845	total: 5.32s	remaining: 1.69s
759:	learn: 0.0331014	total: 5.33s	remaining: 1.68s
760:	learn: 0.0330891	total: 5.34s	remaining: 1.68s
761:	learn: 0.033

920:	learn: 0.0277283	total: 6.44s	remaining: 552ms
921:	learn: 0.0277174	total: 6.44s	remaining: 545ms
922:	learn: 0.0276944	total: 6.45s	remaining: 538ms
923:	learn: 0.0276851	total: 6.46s	remaining: 531ms
924:	learn: 0.0276537	total: 6.46s	remaining: 524ms
925:	learn: 0.0276281	total: 6.47s	remaining: 517ms
926:	learn: 0.0276106	total: 6.48s	remaining: 510ms
927:	learn: 0.0275686	total: 6.49s	remaining: 503ms
928:	learn: 0.0275598	total: 6.49s	remaining: 496ms
929:	learn: 0.0275114	total: 6.5s	remaining: 489ms
930:	learn: 0.0274920	total: 6.5s	remaining: 482ms
931:	learn: 0.0274714	total: 6.51s	remaining: 475ms
932:	learn: 0.0274391	total: 6.52s	remaining: 468ms
933:	learn: 0.0274055	total: 6.52s	remaining: 461ms
934:	learn: 0.0273961	total: 6.53s	remaining: 454ms
935:	learn: 0.0273770	total: 6.54s	remaining: 447ms
936:	learn: 0.0273422	total: 6.54s	remaining: 440ms
937:	learn: 0.0273132	total: 6.55s	remaining: 433ms
938:	learn: 0.0272735	total: 6.56s	remaining: 426ms
939:	learn: 0.

88:	learn: 0.0967402	total: 620ms	remaining: 6.34s
89:	learn: 0.0965033	total: 627ms	remaining: 6.34s
90:	learn: 0.0959504	total: 634ms	remaining: 6.33s
91:	learn: 0.0953471	total: 641ms	remaining: 6.33s
92:	learn: 0.0950538	total: 649ms	remaining: 6.33s
93:	learn: 0.0946969	total: 662ms	remaining: 6.38s
94:	learn: 0.0943186	total: 671ms	remaining: 6.39s
95:	learn: 0.0939300	total: 678ms	remaining: 6.39s
96:	learn: 0.0936639	total: 685ms	remaining: 6.38s
97:	learn: 0.0932814	total: 692ms	remaining: 6.37s
98:	learn: 0.0930090	total: 700ms	remaining: 6.37s
99:	learn: 0.0925741	total: 707ms	remaining: 6.36s
100:	learn: 0.0921251	total: 714ms	remaining: 6.36s
101:	learn: 0.0916346	total: 721ms	remaining: 6.35s
102:	learn: 0.0913791	total: 729ms	remaining: 6.35s
103:	learn: 0.0910692	total: 735ms	remaining: 6.33s
104:	learn: 0.0906437	total: 743ms	remaining: 6.33s
105:	learn: 0.0903849	total: 750ms	remaining: 6.32s
106:	learn: 0.0900293	total: 757ms	remaining: 6.32s
107:	learn: 0.0897835	to

255:	learn: 0.0636015	total: 1.85s	remaining: 5.38s
256:	learn: 0.0635468	total: 1.86s	remaining: 5.37s
257:	learn: 0.0634285	total: 1.86s	remaining: 5.36s
258:	learn: 0.0632414	total: 1.87s	remaining: 5.35s
259:	learn: 0.0630089	total: 1.88s	remaining: 5.35s
260:	learn: 0.0629301	total: 1.89s	remaining: 5.34s
261:	learn: 0.0628586	total: 1.89s	remaining: 5.33s
262:	learn: 0.0627729	total: 1.9s	remaining: 5.33s
263:	learn: 0.0626585	total: 1.91s	remaining: 5.32s
264:	learn: 0.0625163	total: 1.91s	remaining: 5.31s
265:	learn: 0.0624503	total: 1.93s	remaining: 5.32s
266:	learn: 0.0623698	total: 1.94s	remaining: 5.31s
267:	learn: 0.0622578	total: 1.94s	remaining: 5.31s
268:	learn: 0.0621287	total: 1.95s	remaining: 5.31s
269:	learn: 0.0620667	total: 1.96s	remaining: 5.3s
270:	learn: 0.0620065	total: 1.97s	remaining: 5.3s
271:	learn: 0.0619508	total: 1.98s	remaining: 5.3s
272:	learn: 0.0618431	total: 1.99s	remaining: 5.29s
273:	learn: 0.0617349	total: 2s	remaining: 5.29s
274:	learn: 0.06163

419:	learn: 0.0492668	total: 3.08s	remaining: 4.26s
420:	learn: 0.0492136	total: 3.09s	remaining: 4.25s
421:	learn: 0.0491622	total: 3.1s	remaining: 4.25s
422:	learn: 0.0490808	total: 3.11s	remaining: 4.24s
423:	learn: 0.0490262	total: 3.11s	remaining: 4.23s
424:	learn: 0.0489824	total: 3.12s	remaining: 4.22s
425:	learn: 0.0488878	total: 3.13s	remaining: 4.22s
426:	learn: 0.0488135	total: 3.14s	remaining: 4.21s
427:	learn: 0.0487494	total: 3.14s	remaining: 4.2s
428:	learn: 0.0486997	total: 3.15s	remaining: 4.19s
429:	learn: 0.0486077	total: 3.16s	remaining: 4.19s
430:	learn: 0.0485647	total: 3.17s	remaining: 4.18s
431:	learn: 0.0484839	total: 3.17s	remaining: 4.17s
432:	learn: 0.0483902	total: 3.18s	remaining: 4.16s
433:	learn: 0.0483355	total: 3.19s	remaining: 4.16s
434:	learn: 0.0482844	total: 3.19s	remaining: 4.15s
435:	learn: 0.0482140	total: 3.2s	remaining: 4.14s
436:	learn: 0.0481222	total: 3.21s	remaining: 4.13s
437:	learn: 0.0480202	total: 3.21s	remaining: 4.12s
438:	learn: 0.0

589:	learn: 0.0395992	total: 4.32s	remaining: 3s
590:	learn: 0.0395745	total: 4.33s	remaining: 2.99s
591:	learn: 0.0395220	total: 4.33s	remaining: 2.99s
592:	learn: 0.0395016	total: 4.34s	remaining: 2.98s
593:	learn: 0.0393846	total: 4.35s	remaining: 2.97s
594:	learn: 0.0392959	total: 4.36s	remaining: 2.96s
595:	learn: 0.0392014	total: 4.36s	remaining: 2.96s
596:	learn: 0.0391262	total: 4.37s	remaining: 2.95s
597:	learn: 0.0390413	total: 4.38s	remaining: 2.94s
598:	learn: 0.0389970	total: 4.38s	remaining: 2.93s
599:	learn: 0.0389737	total: 4.39s	remaining: 2.93s
600:	learn: 0.0389400	total: 4.4s	remaining: 2.92s
601:	learn: 0.0389138	total: 4.4s	remaining: 2.91s
602:	learn: 0.0388683	total: 4.41s	remaining: 2.9s
603:	learn: 0.0388389	total: 4.42s	remaining: 2.9s
604:	learn: 0.0387905	total: 4.42s	remaining: 2.89s
605:	learn: 0.0387540	total: 4.43s	remaining: 2.88s
606:	learn: 0.0386802	total: 4.44s	remaining: 2.87s
607:	learn: 0.0386484	total: 4.44s	remaining: 2.87s
608:	learn: 0.03862

760:	learn: 0.0316460	total: 5.55s	remaining: 1.74s
761:	learn: 0.0316171	total: 5.56s	remaining: 1.74s
762:	learn: 0.0315698	total: 5.57s	remaining: 1.73s
763:	learn: 0.0315292	total: 5.58s	remaining: 1.72s
764:	learn: 0.0315102	total: 5.58s	remaining: 1.72s
765:	learn: 0.0314229	total: 5.59s	remaining: 1.71s
766:	learn: 0.0314062	total: 5.6s	remaining: 1.7s
767:	learn: 0.0313710	total: 5.61s	remaining: 1.69s
768:	learn: 0.0313473	total: 5.61s	remaining: 1.69s
769:	learn: 0.0313247	total: 5.62s	remaining: 1.68s
770:	learn: 0.0313083	total: 5.63s	remaining: 1.67s
771:	learn: 0.0312676	total: 5.64s	remaining: 1.67s
772:	learn: 0.0312528	total: 5.64s	remaining: 1.66s
773:	learn: 0.0311836	total: 5.65s	remaining: 1.65s
774:	learn: 0.0311594	total: 5.66s	remaining: 1.64s
775:	learn: 0.0311403	total: 5.67s	remaining: 1.64s
776:	learn: 0.0311043	total: 5.67s	remaining: 1.63s
777:	learn: 0.0310425	total: 5.68s	remaining: 1.62s
778:	learn: 0.0310294	total: 5.68s	remaining: 1.61s
779:	learn: 0.

935:	learn: 0.0263806	total: 6.79s	remaining: 464ms
936:	learn: 0.0263589	total: 6.79s	remaining: 457ms
937:	learn: 0.0263193	total: 6.8s	remaining: 450ms
938:	learn: 0.0262830	total: 6.81s	remaining: 443ms
939:	learn: 0.0262624	total: 6.82s	remaining: 435ms
940:	learn: 0.0262506	total: 6.83s	remaining: 428ms
941:	learn: 0.0262234	total: 6.83s	remaining: 421ms
942:	learn: 0.0262064	total: 6.84s	remaining: 413ms
943:	learn: 0.0261794	total: 6.85s	remaining: 406ms
944:	learn: 0.0261612	total: 6.85s	remaining: 399ms
945:	learn: 0.0261265	total: 6.86s	remaining: 392ms
946:	learn: 0.0261027	total: 6.87s	remaining: 384ms
947:	learn: 0.0260787	total: 6.88s	remaining: 377ms
948:	learn: 0.0260488	total: 6.88s	remaining: 370ms
949:	learn: 0.0260179	total: 6.89s	remaining: 363ms
950:	learn: 0.0260058	total: 6.9s	remaining: 355ms
951:	learn: 0.0259797	total: 6.9s	remaining: 348ms
952:	learn: 0.0259611	total: 6.91s	remaining: 341ms
953:	learn: 0.0259367	total: 6.92s	remaining: 334ms
954:	learn: 0.0

104:	learn: 0.0898169	total: 725ms	remaining: 6.18s
105:	learn: 0.0894907	total: 732ms	remaining: 6.17s
106:	learn: 0.0891586	total: 741ms	remaining: 6.18s
107:	learn: 0.0889111	total: 747ms	remaining: 6.17s
108:	learn: 0.0885954	total: 754ms	remaining: 6.16s
109:	learn: 0.0881452	total: 761ms	remaining: 6.15s
110:	learn: 0.0877335	total: 767ms	remaining: 6.14s
111:	learn: 0.0873083	total: 774ms	remaining: 6.13s
112:	learn: 0.0869482	total: 781ms	remaining: 6.13s
113:	learn: 0.0867863	total: 787ms	remaining: 6.12s
114:	learn: 0.0864189	total: 794ms	remaining: 6.11s
115:	learn: 0.0860941	total: 801ms	remaining: 6.11s
116:	learn: 0.0858803	total: 808ms	remaining: 6.1s
117:	learn: 0.0856839	total: 814ms	remaining: 6.09s
118:	learn: 0.0853531	total: 821ms	remaining: 6.08s
119:	learn: 0.0851466	total: 828ms	remaining: 6.07s
120:	learn: 0.0848634	total: 835ms	remaining: 6.06s
121:	learn: 0.0846292	total: 841ms	remaining: 6.05s
122:	learn: 0.0844176	total: 848ms	remaining: 6.04s
123:	learn: 0

282:	learn: 0.0605059	total: 1.97s	remaining: 5s
283:	learn: 0.0604001	total: 1.98s	remaining: 5s
284:	learn: 0.0603201	total: 1.99s	remaining: 4.99s
285:	learn: 0.0602507	total: 2s	remaining: 4.98s
286:	learn: 0.0601926	total: 2s	remaining: 4.98s
287:	learn: 0.0601087	total: 2.01s	remaining: 4.97s
288:	learn: 0.0600116	total: 2.02s	remaining: 4.97s
289:	learn: 0.0598633	total: 2.03s	remaining: 4.97s
290:	learn: 0.0597355	total: 2.04s	remaining: 4.97s
291:	learn: 0.0596469	total: 2.05s	remaining: 4.97s
292:	learn: 0.0595217	total: 2.06s	remaining: 4.96s
293:	learn: 0.0594029	total: 2.06s	remaining: 4.96s
294:	learn: 0.0593423	total: 2.08s	remaining: 4.96s
295:	learn: 0.0592149	total: 2.08s	remaining: 4.96s
296:	learn: 0.0591175	total: 2.09s	remaining: 4.95s
297:	learn: 0.0590871	total: 2.1s	remaining: 4.94s
298:	learn: 0.0590177	total: 2.1s	remaining: 4.93s
299:	learn: 0.0589296	total: 2.11s	remaining: 4.93s
300:	learn: 0.0588509	total: 2.12s	remaining: 4.92s
301:	learn: 0.0587924	tota

462:	learn: 0.0465515	total: 3.35s	remaining: 3.89s
463:	learn: 0.0464929	total: 3.36s	remaining: 3.88s
464:	learn: 0.0464263	total: 3.37s	remaining: 3.88s
465:	learn: 0.0463470	total: 3.38s	remaining: 3.87s
466:	learn: 0.0462961	total: 3.38s	remaining: 3.86s
467:	learn: 0.0462092	total: 3.39s	remaining: 3.85s
468:	learn: 0.0461604	total: 3.4s	remaining: 3.85s
469:	learn: 0.0461203	total: 3.4s	remaining: 3.84s
470:	learn: 0.0460208	total: 3.41s	remaining: 3.83s
471:	learn: 0.0459622	total: 3.42s	remaining: 3.83s
472:	learn: 0.0459181	total: 3.43s	remaining: 3.82s
473:	learn: 0.0458558	total: 3.44s	remaining: 3.82s
474:	learn: 0.0458227	total: 3.44s	remaining: 3.81s
475:	learn: 0.0457861	total: 3.45s	remaining: 3.8s
476:	learn: 0.0457434	total: 3.46s	remaining: 3.79s
477:	learn: 0.0456898	total: 3.47s	remaining: 3.79s
478:	learn: 0.0456423	total: 3.47s	remaining: 3.78s
479:	learn: 0.0455953	total: 3.48s	remaining: 3.77s
480:	learn: 0.0455362	total: 3.49s	remaining: 3.77s
481:	learn: 0.0

642:	learn: 0.0378029	total: 4.75s	remaining: 2.64s
643:	learn: 0.0377483	total: 4.76s	remaining: 2.63s
644:	learn: 0.0377046	total: 4.77s	remaining: 2.63s
645:	learn: 0.0376889	total: 4.78s	remaining: 2.62s
646:	learn: 0.0376579	total: 4.78s	remaining: 2.61s
647:	learn: 0.0375752	total: 4.79s	remaining: 2.6s
648:	learn: 0.0375499	total: 4.8s	remaining: 2.6s
649:	learn: 0.0375044	total: 4.8s	remaining: 2.59s
650:	learn: 0.0374718	total: 4.81s	remaining: 2.58s
651:	learn: 0.0374548	total: 4.83s	remaining: 2.58s
652:	learn: 0.0374024	total: 4.83s	remaining: 2.57s
653:	learn: 0.0373717	total: 4.84s	remaining: 2.56s
654:	learn: 0.0373438	total: 4.85s	remaining: 2.55s
655:	learn: 0.0372833	total: 4.86s	remaining: 2.55s
656:	learn: 0.0372459	total: 4.87s	remaining: 2.54s
657:	learn: 0.0372225	total: 4.87s	remaining: 2.53s
658:	learn: 0.0372038	total: 4.88s	remaining: 2.52s
659:	learn: 0.0371877	total: 4.89s	remaining: 2.52s
660:	learn: 0.0371678	total: 4.9s	remaining: 2.51s
661:	learn: 0.037

814:	learn: 0.0313142	total: 6s	remaining: 1.36s
815:	learn: 0.0312758	total: 6.01s	remaining: 1.35s
816:	learn: 0.0312609	total: 6.02s	remaining: 1.35s
817:	learn: 0.0312457	total: 6.03s	remaining: 1.34s
818:	learn: 0.0312063	total: 6.03s	remaining: 1.33s
819:	learn: 0.0311884	total: 6.04s	remaining: 1.33s
820:	learn: 0.0311760	total: 6.05s	remaining: 1.32s
821:	learn: 0.0311510	total: 6.06s	remaining: 1.31s
822:	learn: 0.0311293	total: 6.06s	remaining: 1.3s
823:	learn: 0.0310859	total: 6.07s	remaining: 1.3s
824:	learn: 0.0310100	total: 6.08s	remaining: 1.29s
825:	learn: 0.0308767	total: 6.09s	remaining: 1.28s
826:	learn: 0.0307565	total: 6.09s	remaining: 1.27s
827:	learn: 0.0307318	total: 6.1s	remaining: 1.27s
828:	learn: 0.0306646	total: 6.11s	remaining: 1.26s
829:	learn: 0.0306369	total: 6.12s	remaining: 1.25s
830:	learn: 0.0306172	total: 6.13s	remaining: 1.25s
831:	learn: 0.0305836	total: 6.13s	remaining: 1.24s
832:	learn: 0.0305389	total: 6.14s	remaining: 1.23s
833:	learn: 0.0304

982:	learn: 0.0258552	total: 7.25s	remaining: 125ms
983:	learn: 0.0257985	total: 7.26s	remaining: 118ms
984:	learn: 0.0257834	total: 7.27s	remaining: 111ms
985:	learn: 0.0257696	total: 7.28s	remaining: 103ms
986:	learn: 0.0257331	total: 7.29s	remaining: 96ms
987:	learn: 0.0256795	total: 7.29s	remaining: 88.6ms
988:	learn: 0.0256298	total: 7.3s	remaining: 81.2ms
989:	learn: 0.0256151	total: 7.31s	remaining: 73.8ms
990:	learn: 0.0255754	total: 7.32s	remaining: 66.4ms
991:	learn: 0.0255306	total: 7.32s	remaining: 59.1ms
992:	learn: 0.0255021	total: 7.33s	remaining: 51.7ms
993:	learn: 0.0254588	total: 7.34s	remaining: 44.3ms
994:	learn: 0.0254475	total: 7.34s	remaining: 36.9ms
995:	learn: 0.0254114	total: 7.35s	remaining: 29.5ms
996:	learn: 0.0253552	total: 7.36s	remaining: 22.2ms
997:	learn: 0.0253103	total: 7.37s	remaining: 14.8ms
998:	learn: 0.0252709	total: 7.38s	remaining: 7.38ms
999:	learn: 0.0252265	total: 7.38s	remaining: 0us
CatBoostClassifier trained on 6400 samples.
Learning rat

158:	learn: 0.0772238	total: 1.18s	remaining: 6.24s
159:	learn: 0.0770884	total: 1.19s	remaining: 6.23s
160:	learn: 0.0769731	total: 1.19s	remaining: 6.22s
161:	learn: 0.0768405	total: 1.2s	remaining: 6.21s
162:	learn: 0.0766198	total: 1.21s	remaining: 6.2s
163:	learn: 0.0765046	total: 1.22s	remaining: 6.2s
164:	learn: 0.0762952	total: 1.22s	remaining: 6.2s
165:	learn: 0.0761285	total: 1.24s	remaining: 6.21s
166:	learn: 0.0757125	total: 1.24s	remaining: 6.2s
167:	learn: 0.0755383	total: 1.25s	remaining: 6.19s
168:	learn: 0.0753031	total: 1.26s	remaining: 6.18s
169:	learn: 0.0751015	total: 1.26s	remaining: 6.16s
170:	learn: 0.0748873	total: 1.27s	remaining: 6.15s
171:	learn: 0.0747209	total: 1.27s	remaining: 6.14s
172:	learn: 0.0745856	total: 1.28s	remaining: 6.13s
173:	learn: 0.0744412	total: 1.29s	remaining: 6.12s
174:	learn: 0.0742906	total: 1.3s	remaining: 6.11s
175:	learn: 0.0741138	total: 1.3s	remaining: 6.1s
176:	learn: 0.0738857	total: 1.31s	remaining: 6.09s
177:	learn: 0.073751

333:	learn: 0.0554967	total: 2.41s	remaining: 4.8s
334:	learn: 0.0554200	total: 2.42s	remaining: 4.8s
335:	learn: 0.0552506	total: 2.42s	remaining: 4.79s
336:	learn: 0.0551518	total: 2.43s	remaining: 4.79s
337:	learn: 0.0550551	total: 2.44s	remaining: 4.78s
338:	learn: 0.0549406	total: 2.45s	remaining: 4.77s
339:	learn: 0.0548955	total: 2.45s	remaining: 4.76s
340:	learn: 0.0548044	total: 2.46s	remaining: 4.75s
341:	learn: 0.0547124	total: 2.47s	remaining: 4.75s
342:	learn: 0.0546249	total: 2.47s	remaining: 4.74s
343:	learn: 0.0544992	total: 2.48s	remaining: 4.73s
344:	learn: 0.0544376	total: 2.49s	remaining: 4.72s
345:	learn: 0.0543350	total: 2.49s	remaining: 4.71s
346:	learn: 0.0542489	total: 2.5s	remaining: 4.7s
347:	learn: 0.0541045	total: 2.51s	remaining: 4.7s
348:	learn: 0.0540145	total: 2.51s	remaining: 4.69s
349:	learn: 0.0539001	total: 2.52s	remaining: 4.68s
350:	learn: 0.0538250	total: 2.53s	remaining: 4.67s
351:	learn: 0.0536634	total: 2.53s	remaining: 4.66s
352:	learn: 0.053

500:	learn: 0.0420994	total: 3.64s	remaining: 3.63s
501:	learn: 0.0420395	total: 3.65s	remaining: 3.62s
502:	learn: 0.0419818	total: 3.65s	remaining: 3.61s
503:	learn: 0.0419373	total: 3.66s	remaining: 3.6s
504:	learn: 0.0418245	total: 3.67s	remaining: 3.6s
505:	learn: 0.0417497	total: 3.68s	remaining: 3.59s
506:	learn: 0.0416948	total: 3.68s	remaining: 3.58s
507:	learn: 0.0416194	total: 3.69s	remaining: 3.57s
508:	learn: 0.0415691	total: 3.7s	remaining: 3.57s
509:	learn: 0.0415036	total: 3.71s	remaining: 3.57s
510:	learn: 0.0414306	total: 3.72s	remaining: 3.56s
511:	learn: 0.0413928	total: 3.73s	remaining: 3.55s
512:	learn: 0.0413240	total: 3.74s	remaining: 3.55s
513:	learn: 0.0412640	total: 3.74s	remaining: 3.54s
514:	learn: 0.0411938	total: 3.75s	remaining: 3.53s
515:	learn: 0.0411372	total: 3.76s	remaining: 3.53s
516:	learn: 0.0410831	total: 3.77s	remaining: 3.52s
517:	learn: 0.0410263	total: 3.77s	remaining: 3.51s
518:	learn: 0.0409727	total: 3.78s	remaining: 3.5s
519:	learn: 0.04

668:	learn: 0.0335578	total: 5.05s	remaining: 2.5s
669:	learn: 0.0335349	total: 5.05s	remaining: 2.49s
670:	learn: 0.0334881	total: 5.06s	remaining: 2.48s
671:	learn: 0.0334332	total: 5.07s	remaining: 2.48s
672:	learn: 0.0334031	total: 5.08s	remaining: 2.47s
673:	learn: 0.0333680	total: 5.09s	remaining: 2.46s
674:	learn: 0.0333033	total: 5.1s	remaining: 2.46s
675:	learn: 0.0332911	total: 5.12s	remaining: 2.45s
676:	learn: 0.0332553	total: 5.13s	remaining: 2.45s
677:	learn: 0.0332269	total: 5.13s	remaining: 2.44s
678:	learn: 0.0332027	total: 5.14s	remaining: 2.43s
679:	learn: 0.0330971	total: 5.15s	remaining: 2.42s
680:	learn: 0.0330627	total: 5.16s	remaining: 2.42s
681:	learn: 0.0330282	total: 5.16s	remaining: 2.41s
682:	learn: 0.0329581	total: 5.17s	remaining: 2.4s
683:	learn: 0.0329362	total: 5.18s	remaining: 2.39s
684:	learn: 0.0329194	total: 5.18s	remaining: 2.38s
685:	learn: 0.0328816	total: 5.19s	remaining: 2.38s
686:	learn: 0.0328024	total: 5.2s	remaining: 2.37s
687:	learn: 0.03

831:	learn: 0.0278090	total: 6.29s	remaining: 1.27s
832:	learn: 0.0277709	total: 6.29s	remaining: 1.26s
833:	learn: 0.0277205	total: 6.3s	remaining: 1.25s
834:	learn: 0.0277047	total: 6.31s	remaining: 1.25s
835:	learn: 0.0276773	total: 6.32s	remaining: 1.24s
836:	learn: 0.0276490	total: 6.33s	remaining: 1.23s
837:	learn: 0.0276027	total: 6.33s	remaining: 1.22s
838:	learn: 0.0275563	total: 6.34s	remaining: 1.22s
839:	learn: 0.0275448	total: 6.35s	remaining: 1.21s
840:	learn: 0.0275156	total: 6.36s	remaining: 1.2s
841:	learn: 0.0274885	total: 6.36s	remaining: 1.19s
842:	learn: 0.0274466	total: 6.37s	remaining: 1.19s
843:	learn: 0.0273820	total: 6.38s	remaining: 1.18s
844:	learn: 0.0273643	total: 6.38s	remaining: 1.17s
845:	learn: 0.0273488	total: 6.39s	remaining: 1.16s
846:	learn: 0.0273209	total: 6.4s	remaining: 1.16s
847:	learn: 0.0272748	total: 6.4s	remaining: 1.15s
848:	learn: 0.0272574	total: 6.41s	remaining: 1.14s
849:	learn: 0.0272280	total: 6.42s	remaining: 1.13s
850:	learn: 0.02

CatBoostClassifier trained on 6400 samples.


In [115]:
results

{'LogisticRegression': {1: {'model': LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                      intercept_scaling=1, l1_ratio=None, max_iter=100,
                      multi_class='warn', n_jobs=None, penalty='l2',
                      random_state=None, solver='warn', tol=0.0001, verbose=0,
                      warm_start=False),
   'train_time': 0.03500008583068848,
   'pred_time': 0.0009999275207519531,
   'roc_auc_train': 0.6357330782109543,
   'roc_auc_test': 0.6430668191917449,
   'f_train': 0.5341880341880342,
   'f_test': 0.5270972531551597},
  2: {'model': LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                      intercept_scaling=1, l1_ratio=None, max_iter=100,
                      multi_class='warn', n_jobs=None, penalty='l2',
                      random_state=None, solver='warn', tol=0.0001, verbose=0,
                      warm_start=False),
   'train_time': 0.01900005340576172,
   'pred_t

In [123]:
clf_min_time = {}
for clf in [clf_A, clf_B, clf_C, clf_D, clf_E]:
    clf_name = clf.__class__.__name__
    clf_min_time[clf_name] = min([results[clf_name][i]["train_time"] for i in range(1, 6)])

In [109]:
clf_max_score = {}
for clf in [clf_A, clf_B, clf_C, clf_D, clf_E]:
    clf_name = clf.__class__.__name__
    clf_max_score[clf_name] = max([results[clf_name][i]["roc_auc_test"] for i in range(1, 6)])

In [124]:
clf_min_time

{'LogisticRegression': 0.01699995994567871,
 'MultinomialNB': 0.0019998550415039062,
 'DecisionTreeClassifier': 0.020999908447265625,
 'XGBClassifier': 0.6319999694824219,
 'CatBoostClassifier': 7.491999626159668}

In [116]:
clf_max_score

{'LogisticRegression': 0.6622892306656228,
 'MultinomialNB': 0.5,
 'DecisionTreeClassifier': 0.9697420423214483,
 'XGBClassifier': 0.9628392609977735,
 'CatBoostClassifier': 0.9720688830028632}

In [126]:
print('Classifier with best score is {}'.format(max(clf_max_score, key=clf_max_score.get)))
print('Classifier with fastest training time is {}'.format(min(clf_min_time, key=clf_min_time.get)))

Classifier with best score is CatBoostClassifier
Classifier with fastest training time is MultinomialNB


From above result I will choose the `XGBClassifier` because the score and the training time is good.

In [129]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [132]:
# Import 'GridSearchCV', 'make_scorer'
from sklearn.metrics import make_scorer
from sklearn.model_selection import GridSearchCV

# Initialize the classifier
clf = xgb.XGBClassifier(random_state=42)

# Created the parameters dict for xgb classifier
parameters = {
    'max_depth':range(1, 10), 
    'learning_rate':[0.01, 0.05, 0.1],      
    'gamma':range(0, 5)
}

# Make an roc_auc scoring object using make_scorer()
scorer = make_scorer(roc_auc_score)

# Perform grid search on the classifier using 'scorer' as the scoring method using GridSearchCV()
grid_obj = GridSearchCV(clf, parameters, scoring=scorer)

# Fit the grid search object to the training data and find the optimal parameters using fit()
grid_fit = grid_obj.fit(X_train, y_train)

# Get the estimator
best_clf = grid_fit.best_estimator_

# Make predictions using the unoptimized and model
predictions = (clf.fit(X_train, y_train)).predict(X_test)
best_predictions = best_clf.predict(X_test)



Unoptimized model
------


NameError: name 'accuracy_score' is not defined

In [133]:
# Report the before-and-afterscores
print("Unoptimized model\n------")
print("ROC_AUC score on testing data: {:.4f}".format(roc_auc_score(y_test, predictions)))
print("F-score on testing data: {:.4f}".format(fbeta_score(y_test, predictions, beta = 0.5)))
print("\nOptimized Model\n------")
print("Final ROC_AUC score on the testing data: {:.4f}".format(roc_auc_score(y_test, best_predictions)))
print("Final F-score on the testing data: {:.4f}".format(fbeta_score(y_test, best_predictions, beta = 0.5)))

Unoptimized model
------
ROC_AUC score on testing data: 0.9647
F-score on testing data: 0.9649

Optimized Model
------
Final ROC_AUC score on the testing data: 0.9726
Final F-score on the testing data: 0.9801


In [143]:
features_columns = data_train.drop(['att8j', 'att9c', 'att10'], axis=1).columns
col_importance_score = {}
for i in range(len(features_columns)):
    col_importance_score[features_columns[i]] = best_clf.feature_importances_[i]

In [146]:
col_importance_score

{'att1': 0.2954557,
 'att2': 0.07647457,
 'att3': 0.13411395,
 'att4': 0.045030687,
 'att5': 0.23844256,
 'att6': 0.023917245,
 'att7': 0.013406309,
 'att8a': 0.012392218,
 'att8b': 0.010416632,
 'att8c': 0.009066616,
 'att8d': 0.024436198,
 'att8e': 0.018360063,
 'att8f': 0.0154176075,
 'att8g': 0.012611132,
 'att8h': 0.020124257,
 'att8i': 0.014575411,
 'att9a': 0.014196939,
 'att9b': 0.021561962}

In [149]:
print('Top five important features based on the best XGBClassifier: {}'\
.format(sorted(col_importance_score, key=col_importance_score.get, reverse=True)[:5]))

Top five important features based on the best XGBClassifier: ['att1', 'att5', 'att3', 'att2', 'att4']


In [169]:
# Import functionality for cloning a model
from sklearn.base import clone

# Reduce the feature space
X_reduced = data_train[['att1', 'att5', 'att3', 'att2', 'att4']]
X_train_reduced, X_test_reduced, y_train, y_test = train_test_split(X_reduced, y, random_state=42)

# Train on the "best" model found from grid search earlier
clf = clone(best_clf).fit(X_train_reduced, y_train)

# Make new predictions
reduced_predictions = clf.predict(X_test_reduced)
best_predictions = best_clf.predict(X_test)

# Report scores from the final model using both versions of data
print("Final Model trained on full data\n------")
print("ROC_AUC on testing data: {:.4f}".format(roc_auc_score(y_test, best_predictions)))
print("F-score on testing data: {:.4f}".format(fbeta_score(y_test, best_predictions, beta = 0.5)))
print("\nFinal Model trained on reduced data\n------")
print("ROC_AUC on testing data: {:.4f}".format(roc_auc_score(y_test, reduced_predictions)))
print("F-score on testing data: {:.4f}".format(fbeta_score(y_test, reduced_predictions, beta = 0.5)))

Final Model trained on full data
------
ROC_AUC on testing data: 0.9726
F-score on testing data: 0.9801

Final Model trained on reduced data
------
ROC_AUC on testing data: 0.9745
F-score on testing data: 0.9793


In [177]:
X = data_test.drop(['att8j', 'att9c'], axis=1)
X = np.array(X)
y_pred = best_clf.predict_proba(X)

In [225]:
data_pred = {'no':[i for i in range(1, len(y_pred)+1)], 'prediction_for_class_1':[i[1] for i in y_pred]}

In [226]:
df_pred = pd.DataFrame(data=data_pred)

In [229]:
df_pred.to_csv('answer.csv', index=False)