## Import Packages

In [26]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")


from sklearn.preprocessing import LabelEncoder
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split

## Load data

In [27]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
sampleSubmission = pd.read_csv('sample_submission.csv')

In [28]:
train.head()

Unnamed: 0,customer_id,age,job,marital,education,default,housing,loan,contact,month,...,campaign,pdays,previous,poutcome,emp_var_rate,cons_price_idx,cons_conf_idx,euribor3m,nr_employed,subscribed
0,customer_id_39075,31,admin.,married,university.degree,no,no,no,cellular,dec,...,3,999,1,failure,-2.97,46.3565,-23.1,1.711,5023.5,0
1,customer_id_34855,31,technician,single,university.degree,no,no,no,telephone,may,...,4,999,0,nonexistent,-1.77,46.4465,-32.34,2.252,5099.1,0
2,customer_id_7107,47,blue-collar,married,basic.6y,unknown,yes,no,telephone,may,...,2,999,0,nonexistent,1.13,46.997,-25.48,5.862,5191.0,0
3,customer_id_31614,36,services,married,university.degree,no,no,no,cellular,may,...,1,999,1,failure,-1.77,46.4465,-32.34,2.329,5099.1,0
4,customer_id_34878,34,admin.,single,high.school,no,no,no,cellular,may,...,9,999,0,nonexistent,-1.77,46.4465,-32.34,2.252,5099.1,0


In [29]:
test.head()

Unnamed: 0,customer_id,age,job,marital,education,default,housing,loan,contact,month,...,duration,campaign,pdays,previous,poutcome,emp_var_rate,cons_price_idx,cons_conf_idx,euribor3m,nr_employed
0,customer_id_32884,59,technician,married,high.school,no,no,yes,cellular,may,...,6.183333,1,999,1,failure,-1.77,46.4465,-32.34,2.301,5099.1
1,customer_id_3169,57,unknown,married,unknown,unknown,yes,no,telephone,may,...,4.75,2,999,0,nonexistent,1.13,46.997,-25.48,5.862,5191.0
2,customer_id_32206,35,blue-collar,married,basic.9y,no,no,no,cellular,may,...,0.866667,1,999,1,failure,-1.77,46.4465,-32.34,2.315,5099.1
3,customer_id_9403,38,admin.,married,high.school,no,no,no,telephone,jun,...,5.916667,4,999,0,nonexistent,1.43,47.2325,-29.26,5.969,5228.1
4,customer_id_14020,29,housemaid,married,high.school,no,yes,no,cellular,jul,...,3.15,2,999,0,nonexistent,1.43,46.959,-29.89,5.965,5228.1


In [30]:
sampleSubmission.head()

Unnamed: 0,customer_id,subscribed
0,customer_id_32884,1
1,customer_id_3169,1
2,customer_id_32206,1
3,customer_id_9403,1
4,customer_id_14020,1


## Exploratory Data Analysis

In [31]:
train.shape, test.shape

((28831, 22), (12357, 21))

In [32]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28831 entries, 0 to 28830
Data columns (total 22 columns):
customer_id       28831 non-null object
age               28831 non-null int64
job               28831 non-null object
marital           28831 non-null object
education         28831 non-null object
default           28831 non-null object
housing           28831 non-null object
loan              28831 non-null object
contact           28831 non-null object
month             28831 non-null object
day_of_week       28831 non-null object
duration          28831 non-null float64
campaign          28831 non-null int64
pdays             28831 non-null int64
previous          28831 non-null int64
poutcome          28831 non-null object
emp_var_rate      28831 non-null float64
cons_price_idx    28831 non-null float64
cons_conf_idx     28831 non-null float64
euribor3m         28831 non-null float64
nr_employed       28831 non-null float64
subscribed        28831 non-null int64
dtypes: fl

In [33]:
train.describe()

Unnamed: 0,age,duration,campaign,pdays,previous,emp_var_rate,cons_price_idx,cons_conf_idx,euribor3m,nr_employed,subscribed
count,28831.0,28831.0,28831.0,28831.0,28831.0,28831.0,28831.0,28831.0,28831.0,28831.0,28831.0
mean,42.011203,4.297919,2.575769,963.215844,0.172592,0.113202,46.788632,-28.360564,4.623599,5167.01188,0.112761
std,10.450128,4.336882,2.752303,185.077567,0.494338,1.570978,0.289847,3.244405,1.735202,72.542598,0.316305
min,19.0,0.0,1.0,0.0,0.0,-3.37,46.1005,-35.56,1.636,4963.6,0.0
25%,34.0,1.7,1.0,999.0,0.0,-1.77,46.5375,-29.89,2.346,5099.1,0.0
50%,40.0,3.0,2.0,999.0,0.0,1.13,46.8745,-29.26,5.859,5191.0,0.0
75%,49.0,5.3,3.0,999.0,0.0,1.43,46.997,-25.48,5.963,5228.1,0.0
max,100.0,81.966667,43.0,999.0,7.0,1.43,47.3835,-18.83,6.047,5228.1,1.0


In [34]:
target = [col for col in train.columns if col not in test.columns]
target

['subscribed']

In [35]:
# Check for missing values
train.isna().sum().sum(), test.isna().sum().sum(), sampleSubmission.isna().sum().sum()

(0, 0, 0)

## Converting some columns to dummies using label encoder

In [36]:
# Instantiate the model
label_encoder = LabelEncoder()

In [37]:
train.head().T

Unnamed: 0,0,1,2,3,4
customer_id,customer_id_39075,customer_id_34855,customer_id_7107,customer_id_31614,customer_id_34878
age,31,31,47,36,34
job,admin.,technician,blue-collar,services,admin.
marital,married,single,married,married,single
education,university.degree,university.degree,basic.6y,university.degree,high.school
default,no,no,unknown,no,no
housing,no,no,yes,no,no
loan,no,no,no,no,no
contact,cellular,telephone,telephone,cellular,cellular
month,dec,may,may,may,may


In [38]:
# Contact
train['contact'] = label_encoder.fit_transform(train['contact'])
test['contact'] = label_encoder.fit_transform(test['contact'])

# Default
train['default'] = label_encoder.fit_transform(train['default'])
test['default'] = label_encoder.fit_transform(test['default'])

# Hosuing
#train['housing'] = label_encoder.fit_transform(train['housing'])
#test['housing'] = label_encoder.fit_transform(test['housing'])

# Loan
#train['loan'] = label_encoder.fit_transform(train['loan'])
#test['loan'] = label_encoder.fit_transform(test['loan'])

# Poutcome
train['poutcome'] = label_encoder.fit_transform(train['poutcome'])
test['poutcome'] = label_encoder.fit_transform(test['poutcome'])

# Day of the week
#train['day_of_week'] = label_encoder.fit_transform(train['day_of_week'])
#test['day_of_week'] = label_encoder.fit_transform(test['day_of_week'])

# Month
#train['month'] = label_encoder.fit_transform(train['month'])
#test['month'] = label_encoder.fit_transform(test['month'])

# Marital
#train['marital'] = label_encoder.fit_transform(train['marital'])
#test['marital'] = label_encoder.fit_transform(test['marital'])

# Job
train['job'] = label_encoder.fit_transform(train['job'])
test['job'] = label_encoder.fit_transform(test['job'])

# Education
train['education'] = label_encoder.fit_transform(train['education'])
test['education'] = label_encoder.fit_transform(test['education'])

# Pdays
#train['pdays'] = train['pdays']/train['pdays'].mean()
#test['pdays'] = test['pdays']/test['pdays'].mean()

# nr_employed
#train['nr_employed'] = train['nr_employed']/train['nr_employed'].mean()
#test['nr_employed'] = test['nr_employed']/test['nr_employed'].mean()

In [39]:
# Create a new column for ids
train['id'] = train['customer_id'].apply(lambda x:x.split('_')[2])
test['id'] = test['customer_id'].apply(lambda x:x.split('_')[2])

In [40]:
train['id'] = train['id'].astype('int')
test['id'] = test['id'].astype('int')

## Grouping the features into Categorical and Numerical features

In [41]:
categorical_feat = [col for col in train.columns if train[col].dtype=='O']
num_feat = [col for col in train.columns if col not in categorical_feat]

In [42]:
train[categorical_feat]

Unnamed: 0,customer_id,marital,housing,loan,month,day_of_week
0,customer_id_39075,married,no,no,dec,mon
1,customer_id_34855,single,no,no,may,fri
2,customer_id_7107,married,yes,no,may,thu
3,customer_id_31614,married,no,no,may,thu
4,customer_id_34878,single,no,no,may,fri
...,...,...,...,...,...,...
28826,customer_id_6265,married,no,no,may,tue
28827,customer_id_11284,married,no,no,jun,thu
28828,customer_id_38158,married,yes,no,oct,thu
28829,customer_id_860,married,yes,no,may,wed


In [43]:
delete = ['subscribed']
for i in delete:
    num_feat.remove(i)
num_feat

['age',
 'job',
 'education',
 'default',
 'contact',
 'duration',
 'campaign',
 'pdays',
 'previous',
 'poutcome',
 'emp_var_rate',
 'cons_price_idx',
 'cons_conf_idx',
 'euribor3m',
 'nr_employed',
 'id']

In [44]:
X = train[num_feat]
y = train['subscribed']

## Build a base model

In [45]:
random = 314

In [46]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.70, random_state=random)

### Catboost Classifier

52cat 

catboost = CatBoostClassifier(iterations=1500,learning_rate=0.01,random_seed=random,l2_leaf_reg=2.5,od_type='Iter',
                              early_stopping_rounds=500,eval_metric='AUC',depth=6,rsm=0.78)
                              
53 cat

catboost = CatBoostClassifier(iterations=1500,learning_rate=0.01,random_seed=random,l2_leaf_reg=3.5,od_type='Iter',
                              early_stopping_rounds=500,eval_metric='AUC',depth=7,rsm=0.88)
                              
54 CAT
catboost = CatBoostClassifier(iterations=1500,learning_rate=0.01,random_seed=random,l2_leaf_reg=2.5,od_type='Iter',
                              early_stopping_rounds=500,eval_metric='AUC',depth=5,rsm=0.78)
training set
classifier f1 score 0.6383818450912679

testing set
classifier f1 score 0.64501679731243


57 Cat
catboost = CatBoostClassifier(iterations=2000,learning_rate=0.01,random_seed=random,l2_leaf_reg=3.5,od_type='Iter',
                              early_stopping_rounds=500,eval_metric='AUC',depth=5,rsm=0.78)
training set
classifier f1 score 0.6648780487804878

testing set
classifier f1 score 0.6592551417454141

58cat
catboost = CatBoostClassifier(iterations=1200,learning_rate=0.01,random_seed=random,l2_leaf_reg=2.5,od_type='Iter',
                              early_stopping_rounds=500,eval_metric='AUC',depth=5,rsm=0.78)
                              
59cat
catboost = CatBoostClassifier(iterations=1000,learning_rate=0.01,random_seed=random,l2_leaf_reg=.5,od_type='Iter',
                              early_stopping_rounds=500,eval_metric='AUC',depth=5,rsm=0.78)

In [47]:
catboost = CatBoostClassifier(iterations=1500,learning_rate=0.01,random_seed=random,l2_leaf_reg=2.5,od_type='Iter',
                              early_stopping_rounds=500,eval_metric='AUC',depth=5,rsm=0.78)

In [48]:
catboost.fit(X,y,eval_set=(X_test,y_test),use_best_model=True)

0:	test: 0.8884542	best: 0.8884542 (0)	total: 19ms	remaining: 28.5s
1:	test: 0.9139551	best: 0.9139551 (1)	total: 33.8ms	remaining: 25.3s
2:	test: 0.9206711	best: 0.9206711 (2)	total: 49.1ms	remaining: 24.5s
3:	test: 0.9252533	best: 0.9252533 (3)	total: 65ms	remaining: 24.3s
4:	test: 0.9274875	best: 0.9274875 (4)	total: 80.5ms	remaining: 24.1s
5:	test: 0.9287028	best: 0.9287028 (5)	total: 95.8ms	remaining: 23.9s
6:	test: 0.9284829	best: 0.9287028 (5)	total: 110ms	remaining: 23.6s
7:	test: 0.9279717	best: 0.9287028 (5)	total: 126ms	remaining: 23.4s
8:	test: 0.9264129	best: 0.9287028 (5)	total: 141ms	remaining: 23.3s
9:	test: 0.9276389	best: 0.9287028 (5)	total: 156ms	remaining: 23.2s
10:	test: 0.9274247	best: 0.9287028 (5)	total: 171ms	remaining: 23.1s
11:	test: 0.9269568	best: 0.9287028 (5)	total: 184ms	remaining: 22.9s
12:	test: 0.9264659	best: 0.9287028 (5)	total: 196ms	remaining: 22.4s
13:	test: 0.9268113	best: 0.9287028 (5)	total: 217ms	remaining: 23s
14:	test: 0.9266463	best: 0.92

120:	test: 0.9318896	best: 0.9318896 (120)	total: 2.05s	remaining: 23.4s
121:	test: 0.9319572	best: 0.9319572 (121)	total: 2.08s	remaining: 23.4s
122:	test: 0.9320628	best: 0.9320628 (122)	total: 2.09s	remaining: 23.4s
123:	test: 0.9321220	best: 0.9321220 (123)	total: 2.11s	remaining: 23.4s
124:	test: 0.9321271	best: 0.9321271 (124)	total: 2.12s	remaining: 23.4s
125:	test: 0.9322310	best: 0.9322310 (125)	total: 2.14s	remaining: 23.3s
126:	test: 0.9322470	best: 0.9322470 (126)	total: 2.15s	remaining: 23.3s
127:	test: 0.9324402	best: 0.9324402 (127)	total: 2.17s	remaining: 23.2s
128:	test: 0.9325238	best: 0.9325238 (128)	total: 2.18s	remaining: 23.2s
129:	test: 0.9326514	best: 0.9326514 (129)	total: 2.2s	remaining: 23.2s
130:	test: 0.9327923	best: 0.9327923 (130)	total: 2.21s	remaining: 23.2s
131:	test: 0.9328020	best: 0.9328020 (131)	total: 2.23s	remaining: 23.1s
132:	test: 0.9329695	best: 0.9329695 (132)	total: 2.25s	remaining: 23.2s
133:	test: 0.9329940	best: 0.9329940 (133)	total: 2.

234:	test: 0.9393107	best: 0.9393107 (234)	total: 3.9s	remaining: 21s
235:	test: 0.9393354	best: 0.9393354 (235)	total: 3.91s	remaining: 21s
236:	test: 0.9393604	best: 0.9393604 (236)	total: 3.93s	remaining: 20.9s
237:	test: 0.9395218	best: 0.9395218 (237)	total: 3.95s	remaining: 20.9s
238:	test: 0.9395473	best: 0.9395473 (238)	total: 3.96s	remaining: 20.9s
239:	test: 0.9395711	best: 0.9395711 (239)	total: 3.98s	remaining: 20.9s
240:	test: 0.9396233	best: 0.9396233 (240)	total: 3.99s	remaining: 20.9s
241:	test: 0.9396428	best: 0.9396428 (241)	total: 4.01s	remaining: 20.8s
242:	test: 0.9396835	best: 0.9396835 (242)	total: 4.03s	remaining: 20.8s
243:	test: 0.9396897	best: 0.9396897 (243)	total: 4.04s	remaining: 20.8s
244:	test: 0.9397549	best: 0.9397549 (244)	total: 4.06s	remaining: 20.8s
245:	test: 0.9397673	best: 0.9397673 (245)	total: 4.08s	remaining: 20.8s
246:	test: 0.9398083	best: 0.9398083 (246)	total: 4.09s	remaining: 20.8s
247:	test: 0.9398334	best: 0.9398334 (247)	total: 4.11s	

351:	test: 0.9435140	best: 0.9435140 (351)	total: 5.84s	remaining: 19s
352:	test: 0.9435259	best: 0.9435259 (352)	total: 5.86s	remaining: 19s
353:	test: 0.9435760	best: 0.9435760 (353)	total: 5.87s	remaining: 19s
354:	test: 0.9435882	best: 0.9435882 (354)	total: 5.89s	remaining: 19s
355:	test: 0.9436157	best: 0.9436157 (355)	total: 5.9s	remaining: 19s
356:	test: 0.9436294	best: 0.9436294 (356)	total: 5.92s	remaining: 18.9s
357:	test: 0.9436732	best: 0.9436732 (357)	total: 5.93s	remaining: 18.9s
358:	test: 0.9436872	best: 0.9436872 (358)	total: 5.95s	remaining: 18.9s
359:	test: 0.9437132	best: 0.9437132 (359)	total: 5.96s	remaining: 18.9s
360:	test: 0.9437377	best: 0.9437377 (360)	total: 5.98s	remaining: 18.9s
361:	test: 0.9437484	best: 0.9437484 (361)	total: 6s	remaining: 18.9s
362:	test: 0.9437696	best: 0.9437696 (362)	total: 6.01s	remaining: 18.8s
363:	test: 0.9438151	best: 0.9438151 (363)	total: 6.03s	remaining: 18.8s
364:	test: 0.9438590	best: 0.9438590 (364)	total: 6.05s	remaining

471:	test: 0.9462509	best: 0.9462509 (471)	total: 7.83s	remaining: 17.1s
472:	test: 0.9462590	best: 0.9462590 (472)	total: 7.85s	remaining: 17s
473:	test: 0.9462772	best: 0.9462772 (473)	total: 7.86s	remaining: 17s
474:	test: 0.9462914	best: 0.9462914 (474)	total: 7.88s	remaining: 17s
475:	test: 0.9463403	best: 0.9463403 (475)	total: 7.89s	remaining: 17s
476:	test: 0.9463898	best: 0.9463898 (476)	total: 7.91s	remaining: 17s
477:	test: 0.9464063	best: 0.9464063 (477)	total: 7.92s	remaining: 16.9s
478:	test: 0.9464185	best: 0.9464185 (478)	total: 7.94s	remaining: 16.9s
479:	test: 0.9464273	best: 0.9464273 (479)	total: 7.96s	remaining: 16.9s
480:	test: 0.9464803	best: 0.9464803 (480)	total: 7.97s	remaining: 16.9s
481:	test: 0.9464843	best: 0.9464843 (481)	total: 7.99s	remaining: 16.9s
482:	test: 0.9465069	best: 0.9465069 (482)	total: 8.01s	remaining: 16.9s
483:	test: 0.9465225	best: 0.9465225 (483)	total: 8.02s	remaining: 16.8s
484:	test: 0.9465398	best: 0.9465398 (484)	total: 8.04s	remai

591:	test: 0.9482593	best: 0.9482593 (591)	total: 9.82s	remaining: 15.1s
592:	test: 0.9482648	best: 0.9482648 (592)	total: 9.84s	remaining: 15.1s
593:	test: 0.9482737	best: 0.9482737 (593)	total: 9.86s	remaining: 15s
594:	test: 0.9482828	best: 0.9482828 (594)	total: 9.87s	remaining: 15s
595:	test: 0.9482920	best: 0.9482920 (595)	total: 9.89s	remaining: 15s
596:	test: 0.9482992	best: 0.9482992 (596)	total: 9.9s	remaining: 15s
597:	test: 0.9483311	best: 0.9483311 (597)	total: 9.92s	remaining: 15s
598:	test: 0.9483579	best: 0.9483579 (598)	total: 9.94s	remaining: 14.9s
599:	test: 0.9483683	best: 0.9483683 (599)	total: 9.95s	remaining: 14.9s
600:	test: 0.9483740	best: 0.9483740 (600)	total: 9.97s	remaining: 14.9s
601:	test: 0.9484115	best: 0.9484115 (601)	total: 9.98s	remaining: 14.9s
602:	test: 0.9484339	best: 0.9484339 (602)	total: 10s	remaining: 14.9s
603:	test: 0.9484383	best: 0.9484383 (603)	total: 10s	remaining: 14.9s
604:	test: 0.9484506	best: 0.9484506 (604)	total: 10s	remaining: 1

705:	test: 0.9499142	best: 0.9499142 (705)	total: 11.6s	remaining: 13.1s
706:	test: 0.9499213	best: 0.9499213 (706)	total: 11.7s	remaining: 13.1s
707:	test: 0.9499255	best: 0.9499255 (707)	total: 11.7s	remaining: 13.1s
708:	test: 0.9499306	best: 0.9499306 (708)	total: 11.7s	remaining: 13.1s
709:	test: 0.9499439	best: 0.9499439 (709)	total: 11.7s	remaining: 13s
710:	test: 0.9499534	best: 0.9499534 (710)	total: 11.7s	remaining: 13s
711:	test: 0.9499710	best: 0.9499710 (711)	total: 11.8s	remaining: 13s
712:	test: 0.9499765	best: 0.9499765 (712)	total: 11.8s	remaining: 13s
713:	test: 0.9499861	best: 0.9499861 (713)	total: 11.8s	remaining: 13s
714:	test: 0.9499940	best: 0.9499940 (714)	total: 11.8s	remaining: 13s
715:	test: 0.9500003	best: 0.9500003 (715)	total: 11.8s	remaining: 13s
716:	test: 0.9500102	best: 0.9500102 (716)	total: 11.9s	remaining: 12.9s
717:	test: 0.9500188	best: 0.9500188 (717)	total: 11.9s	remaining: 12.9s
718:	test: 0.9500250	best: 0.9500250 (718)	total: 11.9s	remaining

824:	test: 0.9510798	best: 0.9510798 (824)	total: 13.7s	remaining: 11.2s
825:	test: 0.9510901	best: 0.9510901 (825)	total: 13.7s	remaining: 11.2s
826:	test: 0.9510963	best: 0.9510963 (826)	total: 13.7s	remaining: 11.2s
827:	test: 0.9511049	best: 0.9511049 (827)	total: 13.7s	remaining: 11.1s
828:	test: 0.9511171	best: 0.9511171 (828)	total: 13.7s	remaining: 11.1s
829:	test: 0.9511221	best: 0.9511221 (829)	total: 13.8s	remaining: 11.1s
830:	test: 0.9511323	best: 0.9511323 (830)	total: 13.8s	remaining: 11.1s
831:	test: 0.9511406	best: 0.9511406 (831)	total: 13.8s	remaining: 11.1s
832:	test: 0.9511521	best: 0.9511521 (832)	total: 13.8s	remaining: 11.1s
833:	test: 0.9511651	best: 0.9511651 (833)	total: 13.8s	remaining: 11s
834:	test: 0.9511970	best: 0.9511970 (834)	total: 13.8s	remaining: 11s
835:	test: 0.9512054	best: 0.9512054 (835)	total: 13.9s	remaining: 11s
836:	test: 0.9512172	best: 0.9512172 (836)	total: 13.9s	remaining: 11s
837:	test: 0.9512311	best: 0.9512311 (837)	total: 13.9s	rem

944:	test: 0.9521154	best: 0.9521154 (944)	total: 15.7s	remaining: 9.21s
945:	test: 0.9521322	best: 0.9521322 (945)	total: 15.7s	remaining: 9.19s
946:	test: 0.9521458	best: 0.9521458 (946)	total: 15.7s	remaining: 9.17s
947:	test: 0.9521541	best: 0.9521541 (947)	total: 15.7s	remaining: 9.16s
948:	test: 0.9521692	best: 0.9521692 (948)	total: 15.7s	remaining: 9.14s
949:	test: 0.9521813	best: 0.9521813 (949)	total: 15.8s	remaining: 9.12s
950:	test: 0.9521929	best: 0.9521929 (950)	total: 15.8s	remaining: 9.11s
951:	test: 0.9522024	best: 0.9522024 (951)	total: 15.8s	remaining: 9.09s
952:	test: 0.9522114	best: 0.9522114 (952)	total: 15.8s	remaining: 9.07s
953:	test: 0.9522285	best: 0.9522285 (953)	total: 15.8s	remaining: 9.05s
954:	test: 0.9522447	best: 0.9522447 (954)	total: 15.8s	remaining: 9.04s
955:	test: 0.9522502	best: 0.9522502 (955)	total: 15.8s	remaining: 9.02s
956:	test: 0.9522572	best: 0.9522572 (956)	total: 15.9s	remaining: 9.01s
957:	test: 0.9522649	best: 0.9522649 (957)	total: 1

1057:	test: 0.9531073	best: 0.9531073 (1057)	total: 17.5s	remaining: 7.31s
1058:	test: 0.9531131	best: 0.9531131 (1058)	total: 17.5s	remaining: 7.3s
1059:	test: 0.9531292	best: 0.9531292 (1059)	total: 17.5s	remaining: 7.28s
1060:	test: 0.9531446	best: 0.9531446 (1060)	total: 17.6s	remaining: 7.26s
1061:	test: 0.9531559	best: 0.9531559 (1061)	total: 17.6s	remaining: 7.25s
1062:	test: 0.9531678	best: 0.9531678 (1062)	total: 17.6s	remaining: 7.23s
1063:	test: 0.9531764	best: 0.9531764 (1063)	total: 17.6s	remaining: 7.21s
1064:	test: 0.9531838	best: 0.9531838 (1064)	total: 17.6s	remaining: 7.2s
1065:	test: 0.9532016	best: 0.9532016 (1065)	total: 17.6s	remaining: 7.18s
1066:	test: 0.9532094	best: 0.9532094 (1066)	total: 17.6s	remaining: 7.16s
1067:	test: 0.9532186	best: 0.9532186 (1067)	total: 17.7s	remaining: 7.15s
1068:	test: 0.9532353	best: 0.9532353 (1068)	total: 17.7s	remaining: 7.13s
1069:	test: 0.9532386	best: 0.9532386 (1069)	total: 17.7s	remaining: 7.11s
1070:	test: 0.9532479	best:

1169:	test: 0.9540589	best: 0.9540589 (1169)	total: 19.3s	remaining: 5.44s
1170:	test: 0.9540651	best: 0.9540651 (1170)	total: 19.3s	remaining: 5.42s
1171:	test: 0.9540707	best: 0.9540707 (1171)	total: 19.3s	remaining: 5.41s
1172:	test: 0.9540769	best: 0.9540769 (1172)	total: 19.3s	remaining: 5.39s
1173:	test: 0.9540865	best: 0.9540865 (1173)	total: 19.3s	remaining: 5.37s
1174:	test: 0.9540950	best: 0.9540950 (1174)	total: 19.4s	remaining: 5.36s
1175:	test: 0.9541047	best: 0.9541047 (1175)	total: 19.4s	remaining: 5.34s
1176:	test: 0.9541093	best: 0.9541093 (1176)	total: 19.4s	remaining: 5.32s
1177:	test: 0.9541142	best: 0.9541142 (1177)	total: 19.4s	remaining: 5.3s
1178:	test: 0.9541209	best: 0.9541209 (1178)	total: 19.4s	remaining: 5.29s
1179:	test: 0.9541253	best: 0.9541253 (1179)	total: 19.4s	remaining: 5.27s
1180:	test: 0.9541318	best: 0.9541318 (1180)	total: 19.5s	remaining: 5.25s
1181:	test: 0.9541439	best: 0.9541439 (1181)	total: 19.5s	remaining: 5.24s
1182:	test: 0.9541501	best

1283:	test: 0.9548624	best: 0.9548624 (1283)	total: 21.1s	remaining: 3.55s
1284:	test: 0.9548687	best: 0.9548687 (1284)	total: 21.1s	remaining: 3.54s
1285:	test: 0.9548741	best: 0.9548741 (1285)	total: 21.1s	remaining: 3.52s
1286:	test: 0.9548834	best: 0.9548834 (1286)	total: 21.2s	remaining: 3.5s
1287:	test: 0.9548901	best: 0.9548901 (1287)	total: 21.2s	remaining: 3.48s
1288:	test: 0.9548981	best: 0.9548981 (1288)	total: 21.2s	remaining: 3.47s
1289:	test: 0.9549036	best: 0.9549036 (1289)	total: 21.2s	remaining: 3.45s
1290:	test: 0.9549099	best: 0.9549099 (1290)	total: 21.2s	remaining: 3.44s
1291:	test: 0.9549163	best: 0.9549163 (1291)	total: 21.2s	remaining: 3.42s
1292:	test: 0.9549192	best: 0.9549192 (1292)	total: 21.3s	remaining: 3.4s
1293:	test: 0.9549337	best: 0.9549337 (1293)	total: 21.3s	remaining: 3.38s
1294:	test: 0.9549388	best: 0.9549388 (1294)	total: 21.3s	remaining: 3.37s
1295:	test: 0.9549450	best: 0.9549450 (1295)	total: 21.3s	remaining: 3.35s
1296:	test: 0.9549473	best:

1393:	test: 0.9556167	best: 0.9556167 (1393)	total: 22.9s	remaining: 1.74s
1394:	test: 0.9556239	best: 0.9556239 (1394)	total: 22.9s	remaining: 1.72s
1395:	test: 0.9556265	best: 0.9556265 (1395)	total: 22.9s	remaining: 1.71s
1396:	test: 0.9556333	best: 0.9556333 (1396)	total: 22.9s	remaining: 1.69s
1397:	test: 0.9556466	best: 0.9556466 (1397)	total: 22.9s	remaining: 1.67s
1398:	test: 0.9556520	best: 0.9556520 (1398)	total: 22.9s	remaining: 1.66s
1399:	test: 0.9556521	best: 0.9556521 (1399)	total: 22.9s	remaining: 1.64s
1400:	test: 0.9556640	best: 0.9556640 (1400)	total: 23s	remaining: 1.62s
1401:	test: 0.9556742	best: 0.9556742 (1401)	total: 23s	remaining: 1.61s
1402:	test: 0.9556795	best: 0.9556795 (1402)	total: 23s	remaining: 1.59s
1403:	test: 0.9556854	best: 0.9556854 (1403)	total: 23s	remaining: 1.57s
1404:	test: 0.9556927	best: 0.9556927 (1404)	total: 23s	remaining: 1.56s
1405:	test: 0.9557035	best: 0.9557035 (1405)	total: 23s	remaining: 1.54s
1406:	test: 0.9557087	best: 0.9557087

<catboost.core.CatBoostClassifier at 0x201d55d9588>

In [49]:
catboost_pred = catboost.predict(X_test)
catboost_pred2 = catboost.predict(X_train)

In [50]:
print('training set')
print('classifier f1 score {}'. format(f1_score(y_train, catboost_pred2).mean()))
print('')
print('testing set')
print('classifier f1 score {}'. format(f1_score(y_test, catboost_pred).mean()))

training set
classifier f1 score 0.6402298850574714

testing set
classifier f1 score 0.6337926033357506


In [64]:
submission = sampleSubmission.copy()

In [87]:
submission['subscribed'] = catboost.predict(test[num_feat])

In [88]:
submission

Unnamed: 0,customer_id,subscribed
0,customer_id_32884,0
1,customer_id_3169,0
2,customer_id_32206,0
3,customer_id_9403,0
4,customer_id_14020,0
...,...,...
12352,customer_id_15908,0
12353,customer_id_28222,0
12354,customer_id_14194,0
12355,customer_id_19764,0


In [89]:
submission.to_csv('60cat_submission.csv', index=False)