## Import Packages

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")


from sklearn.preprocessing import LabelEncoder
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import f1_score
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import KFold,train_test_split

## Load data

In [2]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
sampleSubmission = pd.read_csv('sample_submission.csv')

In [3]:
train.head()

Unnamed: 0,customer_id,age,job,marital,education,default,housing,loan,contact,month,...,campaign,pdays,previous,poutcome,emp_var_rate,cons_price_idx,cons_conf_idx,euribor3m,nr_employed,subscribed
0,customer_id_39075,31,admin.,married,university.degree,no,no,no,cellular,dec,...,3,999,1,failure,-2.97,46.3565,-23.1,1.711,5023.5,0
1,customer_id_34855,31,technician,single,university.degree,no,no,no,telephone,may,...,4,999,0,nonexistent,-1.77,46.4465,-32.34,2.252,5099.1,0
2,customer_id_7107,47,blue-collar,married,basic.6y,unknown,yes,no,telephone,may,...,2,999,0,nonexistent,1.13,46.997,-25.48,5.862,5191.0,0
3,customer_id_31614,36,services,married,university.degree,no,no,no,cellular,may,...,1,999,1,failure,-1.77,46.4465,-32.34,2.329,5099.1,0
4,customer_id_34878,34,admin.,single,high.school,no,no,no,cellular,may,...,9,999,0,nonexistent,-1.77,46.4465,-32.34,2.252,5099.1,0


In [4]:
test.head()

Unnamed: 0,customer_id,age,job,marital,education,default,housing,loan,contact,month,...,duration,campaign,pdays,previous,poutcome,emp_var_rate,cons_price_idx,cons_conf_idx,euribor3m,nr_employed
0,customer_id_32884,59,technician,married,high.school,no,no,yes,cellular,may,...,6.183333,1,999,1,failure,-1.77,46.4465,-32.34,2.301,5099.1
1,customer_id_3169,57,unknown,married,unknown,unknown,yes,no,telephone,may,...,4.75,2,999,0,nonexistent,1.13,46.997,-25.48,5.862,5191.0
2,customer_id_32206,35,blue-collar,married,basic.9y,no,no,no,cellular,may,...,0.866667,1,999,1,failure,-1.77,46.4465,-32.34,2.315,5099.1
3,customer_id_9403,38,admin.,married,high.school,no,no,no,telephone,jun,...,5.916667,4,999,0,nonexistent,1.43,47.2325,-29.26,5.969,5228.1
4,customer_id_14020,29,housemaid,married,high.school,no,yes,no,cellular,jul,...,3.15,2,999,0,nonexistent,1.43,46.959,-29.89,5.965,5228.1


In [5]:
sampleSubmission.head()

Unnamed: 0,customer_id,subscribed
0,customer_id_32884,1
1,customer_id_3169,1
2,customer_id_32206,1
3,customer_id_9403,1
4,customer_id_14020,1


## Exploratory Data Analysis

In [6]:
train.shape, test.shape

((28831, 22), (12357, 21))

In [7]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28831 entries, 0 to 28830
Data columns (total 22 columns):
customer_id       28831 non-null object
age               28831 non-null int64
job               28831 non-null object
marital           28831 non-null object
education         28831 non-null object
default           28831 non-null object
housing           28831 non-null object
loan              28831 non-null object
contact           28831 non-null object
month             28831 non-null object
day_of_week       28831 non-null object
duration          28831 non-null float64
campaign          28831 non-null int64
pdays             28831 non-null int64
previous          28831 non-null int64
poutcome          28831 non-null object
emp_var_rate      28831 non-null float64
cons_price_idx    28831 non-null float64
cons_conf_idx     28831 non-null float64
euribor3m         28831 non-null float64
nr_employed       28831 non-null float64
subscribed        28831 non-null int64
dtypes: fl

In [8]:
train.describe()

Unnamed: 0,age,duration,campaign,pdays,previous,emp_var_rate,cons_price_idx,cons_conf_idx,euribor3m,nr_employed,subscribed
count,28831.0,28831.0,28831.0,28831.0,28831.0,28831.0,28831.0,28831.0,28831.0,28831.0,28831.0
mean,42.011203,4.297919,2.575769,963.215844,0.172592,0.113202,46.788632,-28.360564,4.623599,5167.01188,0.112761
std,10.450128,4.336882,2.752303,185.077567,0.494338,1.570978,0.289847,3.244405,1.735202,72.542598,0.316305
min,19.0,0.0,1.0,0.0,0.0,-3.37,46.1005,-35.56,1.636,4963.6,0.0
25%,34.0,1.7,1.0,999.0,0.0,-1.77,46.5375,-29.89,2.346,5099.1,0.0
50%,40.0,3.0,2.0,999.0,0.0,1.13,46.8745,-29.26,5.859,5191.0,0.0
75%,49.0,5.3,3.0,999.0,0.0,1.43,46.997,-25.48,5.963,5228.1,0.0
max,100.0,81.966667,43.0,999.0,7.0,1.43,47.3835,-18.83,6.047,5228.1,1.0


In [9]:
target = [col for col in train.columns if col not in test.columns]
target

['subscribed']

In [10]:
# Check for missing values
train.isna().sum().sum(), test.isna().sum().sum(), sampleSubmission.isna().sum().sum()

(0, 0, 0)

## Converting some columns to dummies using label encoder

In [11]:
# Instantiate the model
label_encoder = LabelEncoder()

In [12]:
y=train[['job', 'marital', 'education','month', 'day_of_week']]

In [13]:
y['job'].unique()

array(['admin.', 'technician', 'blue-collar', 'services', 'entrepreneur',
       'unemployed', 'housemaid', 'management', 'unknown',
       'self-employed', 'retired', 'student'], dtype=object)

In [14]:
y['marital'].unique()

array(['married', 'single', 'divorced', 'unknown'], dtype=object)

In [15]:
y['education'].unique()

array(['university.degree', 'basic.6y', 'high.school', 'basic.4y',
       'professional.course', 'basic.9y', 'unknown', 'illiterate'],
      dtype=object)

In [16]:
y['month'].unique()

array(['dec', 'may', 'nov', 'jul', 'jun', 'mar', 'aug', 'oct', 'apr',
       'sep'], dtype=object)

In [17]:
y['day_of_week'].unique()

array(['mon', 'fri', 'thu', 'tue', 'wed'], dtype=object)

In [18]:
# Contact
train['contact'] = label_encoder.fit_transform(train['contact'])
test['contact'] = label_encoder.fit_transform(test['contact'])

# Default
train['default'] = label_encoder.fit_transform(train['default'])
test['default'] = label_encoder.fit_transform(test['default'])

# Hosuing
train['housing'] = label_encoder.fit_transform(train['housing'])
test['housing'] = label_encoder.fit_transform(test['housing'])

# Loan
train['loan'] = label_encoder.fit_transform(train['loan'])
test['loan'] = label_encoder.fit_transform(test['loan'])

# Poutcome
train['poutcome'] = label_encoder.fit_transform(train['poutcome'])
test['poutcome'] = label_encoder.fit_transform(test['poutcome'])

# Day of the week
train['day_of_week'] = label_encoder.fit_transform(train['day_of_week'])
test['day_of_week'] = label_encoder.fit_transform(test['day_of_week'])

# Month
train['month'] = label_encoder.fit_transform(train['month'])
test['month'] = label_encoder.fit_transform(test['month'])

# Marital
train['marital'] = label_encoder.fit_transform(train['marital'])
test['marital'] = label_encoder.fit_transform(test['marital'])

# Job
train['job'] = label_encoder.fit_transform(train['job'])
test['job'] = label_encoder.fit_transform(test['job'])

# Education
train['education'] = label_encoder.fit_transform(train['education'])
test['education'] = label_encoder.fit_transform(test['education'])

## Grouping the features into Categorical and Numerical

In [19]:
categorical_feat = [col for col in train.columns if train[col].dtype=='O']
num_feat = [col for col in train.columns if col not in categorical_feat]

In [20]:
train[categorical_feat]

Unnamed: 0,customer_id
0,customer_id_39075
1,customer_id_34855
2,customer_id_7107
3,customer_id_31614
4,customer_id_34878
...,...
28826,customer_id_6265
28827,customer_id_11284
28828,customer_id_38158
28829,customer_id_860


In [21]:
train[num_feat]

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,...,campaign,pdays,previous,poutcome,emp_var_rate,cons_price_idx,cons_conf_idx,euribor3m,nr_employed,subscribed
0,31,0,1,6,0,0,0,0,2,1,...,3,999,1,0,-2.97,46.3565,-23.10,1.711,5023.5,0
1,31,9,2,6,0,0,0,1,6,0,...,4,999,0,1,-1.77,46.4465,-32.34,2.252,5099.1,0
2,47,1,1,1,1,2,0,1,6,2,...,2,999,0,1,1.13,46.9970,-25.48,5.862,5191.0,0
3,36,7,1,6,0,0,0,0,6,2,...,1,999,1,0,-1.77,46.4465,-32.34,2.329,5099.1,0
4,34,0,2,3,0,0,0,0,6,0,...,9,999,0,1,-1.77,46.4465,-32.34,2.252,5099.1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28826,60,5,1,5,1,0,0,1,6,3,...,2,999,0,1,1.13,46.9970,-25.48,5.859,5191.0,0
28827,39,4,1,6,0,0,0,1,4,2,...,1,999,0,1,1.43,47.2325,-29.26,5.963,5228.1,0
28828,37,0,1,3,0,2,0,0,8,2,...,1,4,1,2,-3.37,46.2155,-18.83,1.756,5017.5,1
28829,42,4,1,6,0,2,0,1,6,4,...,2,999,0,1,1.13,46.9970,-25.48,5.858,5191.0,0


In [22]:
train['pdays'] = train['pdays'].astype('str')
test['pdays'] = test['pdays'].astype('str')

In [23]:
# Train 
strange = []
for each in train['pdays']:
    if '999' not in each:
        strange.append(each)
    
        
df2 = pd.DataFrame(strange,columns=['values'])
df2['values'] = df2['values'].astype('int')
print(np.mean(df2['values']))



# Test
strange = []
for each in test['pdays']:
    if '999' not in each:
        strange.append(each)
    
        
df2 = pd.DataFrame(strange,columns=['values'])
df2['values'] = df2['values'].astype('int')
print(np.mean(df2['values']))

6.0327237728585175
5.974789915966387


In [24]:
train['pdays'] = train['pdays'].astype('int')
test['pdays'] = test['pdays'].astype('int')

In [25]:
train['pdays'].replace(999, 6, inplace=True)
test['pdays'].replace(999, 6, inplace=True)

In [267]:
delete = ['subscribed']
for i in delete:
    num_feat.remove(i)
num_feat

['age',
 'job',
 'marital',
 'education',
 'default',
 'housing',
 'loan',
 'contact',
 'month',
 'day_of_week',
 'duration',
 'campaign',
 'pdays',
 'previous',
 'poutcome',
 'emp_var_rate',
 'cons_price_idx',
 'cons_conf_idx',
 'euribor3m',
 'nr_employed']

## Need to scale the data so that the model trains faster

scaler =  StandardScaler()
cols = num_feat
X = scaler.fit_transform(train[num_feat])
testing = scaler.transform(test[num_feat])
X = pd.DataFrame(data=X, columns=cols)
testing = pd.DataFrame(data=test, columns=cols)

In [268]:
X = train[num_feat]
y = train['subscribed']

## Build a base model

In [269]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.30, random_state=42)

In [291]:
catboost = CatBoostClassifier(iterations=1300,learning_rate=0.01,random_seed=42,l2_leaf_reg=3.5,od_type='Iter',
                              early_stopping_rounds=500,eval_metric='AUC',depth=8,rsm=0.98)

In [292]:
catboost.fit(X,y,eval_set=(X_test,y_test),use_best_model=True)

0:	test: 0.9057212	best: 0.9057212 (0)	total: 96.6ms	remaining: 2m 5s
1:	test: 0.9245753	best: 0.9245753 (1)	total: 146ms	remaining: 1m 34s
2:	test: 0.9251594	best: 0.9251594 (2)	total: 175ms	remaining: 1m 15s
3:	test: 0.9295486	best: 0.9295486 (3)	total: 199ms	remaining: 1m 4s
4:	test: 0.9289012	best: 0.9295486 (3)	total: 223ms	remaining: 57.8s
5:	test: 0.9282582	best: 0.9295486 (3)	total: 248ms	remaining: 53.4s
6:	test: 0.9286252	best: 0.9295486 (3)	total: 278ms	remaining: 51.3s
7:	test: 0.9285295	best: 0.9295486 (3)	total: 314ms	remaining: 50.7s
8:	test: 0.9285930	best: 0.9295486 (3)	total: 362ms	remaining: 51.9s
9:	test: 0.9294539	best: 0.9295486 (3)	total: 399ms	remaining: 51.4s
10:	test: 0.9296752	best: 0.9296752 (10)	total: 425ms	remaining: 49.8s
11:	test: 0.9290478	best: 0.9296752 (10)	total: 452ms	remaining: 48.5s
12:	test: 0.9290162	best: 0.9296752 (10)	total: 477ms	remaining: 47.2s
13:	test: 0.9290777	best: 0.9296752 (10)	total: 505ms	remaining: 46.4s
14:	test: 0.9291741	bes

121:	test: 0.9393065	best: 0.9393065 (121)	total: 4.31s	remaining: 41.6s
122:	test: 0.9394921	best: 0.9394921 (122)	total: 4.34s	remaining: 41.6s
123:	test: 0.9395270	best: 0.9395270 (123)	total: 4.37s	remaining: 41.5s
124:	test: 0.9396439	best: 0.9396439 (124)	total: 4.4s	remaining: 41.4s
125:	test: 0.9396878	best: 0.9396878 (125)	total: 4.44s	remaining: 41.3s
126:	test: 0.9397119	best: 0.9397119 (126)	total: 4.47s	remaining: 41.3s
127:	test: 0.9398101	best: 0.9398101 (127)	total: 4.5s	remaining: 41.2s
128:	test: 0.9398936	best: 0.9398936 (128)	total: 4.53s	remaining: 41.1s
129:	test: 0.9399166	best: 0.9399166 (129)	total: 4.56s	remaining: 41s
130:	test: 0.9400288	best: 0.9400288 (130)	total: 4.59s	remaining: 41s
131:	test: 0.9400973	best: 0.9400973 (131)	total: 4.62s	remaining: 40.9s
132:	test: 0.9402583	best: 0.9402583 (132)	total: 4.66s	remaining: 40.8s
133:	test: 0.9402992	best: 0.9402992 (133)	total: 4.69s	remaining: 40.8s
134:	test: 0.9403787	best: 0.9403787 (134)	total: 4.72s	r

234:	test: 0.9469588	best: 0.9469588 (234)	total: 8.25s	remaining: 37.4s
235:	test: 0.9470362	best: 0.9470362 (235)	total: 8.28s	remaining: 37.3s
236:	test: 0.9470846	best: 0.9470846 (236)	total: 8.3s	remaining: 37.2s
237:	test: 0.9471327	best: 0.9471327 (237)	total: 8.33s	remaining: 37.2s
238:	test: 0.9471453	best: 0.9471453 (238)	total: 8.37s	remaining: 37.2s
239:	test: 0.9471960	best: 0.9471960 (239)	total: 8.41s	remaining: 37.2s
240:	test: 0.9472247	best: 0.9472247 (240)	total: 8.44s	remaining: 37.1s
241:	test: 0.9472957	best: 0.9472957 (241)	total: 8.47s	remaining: 37s
242:	test: 0.9473361	best: 0.9473361 (242)	total: 8.49s	remaining: 37s
243:	test: 0.9473713	best: 0.9473713 (243)	total: 8.52s	remaining: 36.9s
244:	test: 0.9474145	best: 0.9474145 (244)	total: 8.55s	remaining: 36.8s
245:	test: 0.9474548	best: 0.9474548 (245)	total: 8.61s	remaining: 36.9s
246:	test: 0.9474960	best: 0.9474960 (246)	total: 8.66s	remaining: 36.9s
247:	test: 0.9475203	best: 0.9475203 (247)	total: 8.69s	

352:	test: 0.9517726	best: 0.9517726 (352)	total: 12.2s	remaining: 32.6s
353:	test: 0.9518021	best: 0.9518021 (353)	total: 12.2s	remaining: 32.6s
354:	test: 0.9518595	best: 0.9518595 (354)	total: 12.2s	remaining: 32.5s
355:	test: 0.9518879	best: 0.9518879 (355)	total: 12.2s	remaining: 32.5s
356:	test: 0.9519198	best: 0.9519198 (356)	total: 12.3s	remaining: 32.5s
357:	test: 0.9519729	best: 0.9519729 (357)	total: 12.3s	remaining: 32.5s
358:	test: 0.9519997	best: 0.9519997 (358)	total: 12.4s	remaining: 32.4s
359:	test: 0.9520235	best: 0.9520235 (359)	total: 12.4s	remaining: 32.4s
360:	test: 0.9520654	best: 0.9520654 (360)	total: 12.4s	remaining: 32.3s
361:	test: 0.9521033	best: 0.9521033 (361)	total: 12.4s	remaining: 32.2s
362:	test: 0.9521584	best: 0.9521584 (362)	total: 12.5s	remaining: 32.2s
363:	test: 0.9522045	best: 0.9522045 (363)	total: 12.5s	remaining: 32.2s
364:	test: 0.9522333	best: 0.9522333 (364)	total: 12.6s	remaining: 32.2s
365:	test: 0.9522665	best: 0.9522665 (365)	total: 1

467:	test: 0.9551327	best: 0.9551327 (467)	total: 15.8s	remaining: 28s
468:	test: 0.9551366	best: 0.9551366 (468)	total: 15.8s	remaining: 28s
469:	test: 0.9551636	best: 0.9551636 (469)	total: 15.8s	remaining: 28s
470:	test: 0.9552002	best: 0.9552002 (470)	total: 15.9s	remaining: 27.9s
471:	test: 0.9552109	best: 0.9552109 (471)	total: 15.9s	remaining: 27.9s
472:	test: 0.9552514	best: 0.9552514 (472)	total: 15.9s	remaining: 27.8s
473:	test: 0.9552745	best: 0.9552745 (473)	total: 16s	remaining: 27.8s
474:	test: 0.9553291	best: 0.9553291 (474)	total: 16s	remaining: 27.8s
475:	test: 0.9553506	best: 0.9553506 (475)	total: 16s	remaining: 27.7s
476:	test: 0.9553691	best: 0.9553691 (476)	total: 16.1s	remaining: 27.7s
477:	test: 0.9553821	best: 0.9553821 (477)	total: 16.1s	remaining: 27.7s
478:	test: 0.9553887	best: 0.9553887 (478)	total: 16.1s	remaining: 27.6s
479:	test: 0.9554202	best: 0.9554202 (479)	total: 16.2s	remaining: 27.6s
480:	test: 0.9554555	best: 0.9554555 (480)	total: 16.2s	remaini

582:	test: 0.9579566	best: 0.9579566 (582)	total: 19.4s	remaining: 23.8s
583:	test: 0.9579732	best: 0.9579732 (583)	total: 19.4s	remaining: 23.8s
584:	test: 0.9579959	best: 0.9579959 (584)	total: 19.4s	remaining: 23.7s
585:	test: 0.9580352	best: 0.9580352 (585)	total: 19.5s	remaining: 23.7s
586:	test: 0.9580755	best: 0.9580755 (586)	total: 19.5s	remaining: 23.7s
587:	test: 0.9580937	best: 0.9580937 (587)	total: 19.5s	remaining: 23.7s
588:	test: 0.9581216	best: 0.9581216 (588)	total: 19.6s	remaining: 23.6s
589:	test: 0.9581543	best: 0.9581543 (589)	total: 19.6s	remaining: 23.6s
590:	test: 0.9581752	best: 0.9581752 (590)	total: 19.6s	remaining: 23.5s
591:	test: 0.9581916	best: 0.9581916 (591)	total: 19.7s	remaining: 23.5s
592:	test: 0.9582140	best: 0.9582140 (592)	total: 19.7s	remaining: 23.5s
593:	test: 0.9582487	best: 0.9582487 (593)	total: 19.7s	remaining: 23.4s
594:	test: 0.9582668	best: 0.9582668 (594)	total: 19.7s	remaining: 23.4s
595:	test: 0.9582826	best: 0.9582826 (595)	total: 1

697:	test: 0.9603628	best: 0.9603628 (697)	total: 23.2s	remaining: 20s
698:	test: 0.9603669	best: 0.9603669 (698)	total: 23.3s	remaining: 20s
699:	test: 0.9603967	best: 0.9603967 (699)	total: 23.3s	remaining: 20s
700:	test: 0.9604281	best: 0.9604281 (700)	total: 23.4s	remaining: 20s
701:	test: 0.9604460	best: 0.9604460 (701)	total: 23.4s	remaining: 19.9s
702:	test: 0.9604636	best: 0.9604636 (702)	total: 23.4s	remaining: 19.9s
703:	test: 0.9604872	best: 0.9604872 (703)	total: 23.4s	remaining: 19.8s
704:	test: 0.9604981	best: 0.9604981 (704)	total: 23.5s	remaining: 19.8s
705:	test: 0.9605126	best: 0.9605126 (705)	total: 23.5s	remaining: 19.8s
706:	test: 0.9605260	best: 0.9605260 (706)	total: 23.5s	remaining: 19.7s
707:	test: 0.9605449	best: 0.9605449 (707)	total: 23.6s	remaining: 19.7s
708:	test: 0.9605609	best: 0.9605609 (708)	total: 23.6s	remaining: 19.7s
709:	test: 0.9605837	best: 0.9605837 (709)	total: 23.6s	remaining: 19.6s
710:	test: 0.9606260	best: 0.9606260 (710)	total: 23.7s	rem

810:	test: 0.9624874	best: 0.9624874 (810)	total: 26.8s	remaining: 16.1s
811:	test: 0.9625006	best: 0.9625006 (811)	total: 26.8s	remaining: 16.1s
812:	test: 0.9625133	best: 0.9625133 (812)	total: 26.8s	remaining: 16.1s
813:	test: 0.9625329	best: 0.9625329 (813)	total: 26.9s	remaining: 16s
814:	test: 0.9625419	best: 0.9625419 (814)	total: 26.9s	remaining: 16s
815:	test: 0.9625650	best: 0.9625650 (815)	total: 26.9s	remaining: 16s
816:	test: 0.9625677	best: 0.9625677 (816)	total: 26.9s	remaining: 15.9s
817:	test: 0.9625815	best: 0.9625815 (817)	total: 27s	remaining: 15.9s
818:	test: 0.9625973	best: 0.9625973 (818)	total: 27s	remaining: 15.9s
819:	test: 0.9626117	best: 0.9626117 (819)	total: 27s	remaining: 15.8s
820:	test: 0.9626231	best: 0.9626231 (820)	total: 27.1s	remaining: 15.8s
821:	test: 0.9626380	best: 0.9626380 (821)	total: 27.1s	remaining: 15.8s
822:	test: 0.9626484	best: 0.9626484 (822)	total: 27.1s	remaining: 15.7s
823:	test: 0.9626654	best: 0.9626654 (823)	total: 27.2s	remaini

925:	test: 0.9644444	best: 0.9644444 (925)	total: 30.4s	remaining: 12.3s
926:	test: 0.9644545	best: 0.9644545 (926)	total: 30.5s	remaining: 12.3s
927:	test: 0.9644770	best: 0.9644770 (927)	total: 30.5s	remaining: 12.2s
928:	test: 0.9644964	best: 0.9644964 (928)	total: 30.5s	remaining: 12.2s
929:	test: 0.9645048	best: 0.9645048 (929)	total: 30.6s	remaining: 12.2s
930:	test: 0.9645275	best: 0.9645275 (930)	total: 30.6s	remaining: 12.1s
931:	test: 0.9645494	best: 0.9645494 (931)	total: 30.6s	remaining: 12.1s
932:	test: 0.9645812	best: 0.9645812 (932)	total: 30.7s	remaining: 12.1s
933:	test: 0.9645981	best: 0.9645981 (933)	total: 30.7s	remaining: 12s
934:	test: 0.9646050	best: 0.9646050 (934)	total: 30.7s	remaining: 12s
935:	test: 0.9646270	best: 0.9646270 (935)	total: 30.8s	remaining: 12s
936:	test: 0.9646571	best: 0.9646571 (936)	total: 30.8s	remaining: 11.9s
937:	test: 0.9646742	best: 0.9646742 (937)	total: 30.8s	remaining: 11.9s
938:	test: 0.9646867	best: 0.9646867 (938)	total: 30.9s	r

1037:	test: 0.9663399	best: 0.9663399 (1037)	total: 34.1s	remaining: 8.61s
1038:	test: 0.9663615	best: 0.9663615 (1038)	total: 34.1s	remaining: 8.58s
1039:	test: 0.9663769	best: 0.9663769 (1039)	total: 34.2s	remaining: 8.54s
1040:	test: 0.9663795	best: 0.9663795 (1040)	total: 34.2s	remaining: 8.51s
1041:	test: 0.9663997	best: 0.9663997 (1041)	total: 34.2s	remaining: 8.48s
1042:	test: 0.9664128	best: 0.9664128 (1042)	total: 34.3s	remaining: 8.45s
1043:	test: 0.9664120	best: 0.9664128 (1042)	total: 34.3s	remaining: 8.42s
1044:	test: 0.9664334	best: 0.9664334 (1044)	total: 34.4s	remaining: 8.38s
1045:	test: 0.9664507	best: 0.9664507 (1045)	total: 34.4s	remaining: 8.35s
1046:	test: 0.9664780	best: 0.9664780 (1046)	total: 34.4s	remaining: 8.32s
1047:	test: 0.9664998	best: 0.9664998 (1047)	total: 34.4s	remaining: 8.28s
1048:	test: 0.9665168	best: 0.9665168 (1048)	total: 34.5s	remaining: 8.25s
1049:	test: 0.9665311	best: 0.9665311 (1049)	total: 34.5s	remaining: 8.21s
1050:	test: 0.9665569	bes

1147:	test: 0.9680354	best: 0.9680354 (1147)	total: 38.1s	remaining: 5.04s
1148:	test: 0.9680549	best: 0.9680549 (1148)	total: 38.1s	remaining: 5s
1149:	test: 0.9680786	best: 0.9680786 (1149)	total: 38.1s	remaining: 4.97s
1150:	test: 0.9680927	best: 0.9680927 (1150)	total: 38.2s	remaining: 4.94s
1151:	test: 0.9681186	best: 0.9681186 (1151)	total: 38.2s	remaining: 4.91s
1152:	test: 0.9681371	best: 0.9681371 (1152)	total: 38.2s	remaining: 4.87s
1153:	test: 0.9681420	best: 0.9681420 (1153)	total: 38.3s	remaining: 4.84s
1154:	test: 0.9681653	best: 0.9681653 (1154)	total: 38.3s	remaining: 4.81s
1155:	test: 0.9681905	best: 0.9681905 (1155)	total: 38.3s	remaining: 4.77s
1156:	test: 0.9682136	best: 0.9682136 (1156)	total: 38.3s	remaining: 4.74s
1157:	test: 0.9682366	best: 0.9682366 (1157)	total: 38.4s	remaining: 4.71s
1158:	test: 0.9682470	best: 0.9682470 (1158)	total: 38.4s	remaining: 4.67s
1159:	test: 0.9682556	best: 0.9682556 (1159)	total: 38.5s	remaining: 4.64s
1160:	test: 0.9682738	best: 

1260:	test: 0.9697967	best: 0.9697967 (1260)	total: 41.7s	remaining: 1.29s
1261:	test: 0.9697980	best: 0.9697980 (1261)	total: 41.7s	remaining: 1.26s
1262:	test: 0.9698019	best: 0.9698019 (1262)	total: 41.8s	remaining: 1.22s
1263:	test: 0.9698210	best: 0.9698210 (1263)	total: 41.8s	remaining: 1.19s
1264:	test: 0.9698237	best: 0.9698237 (1264)	total: 41.8s	remaining: 1.16s
1265:	test: 0.9698363	best: 0.9698363 (1265)	total: 41.9s	remaining: 1.12s
1266:	test: 0.9698414	best: 0.9698414 (1266)	total: 41.9s	remaining: 1.09s
1267:	test: 0.9698549	best: 0.9698549 (1267)	total: 41.9s	remaining: 1.06s
1268:	test: 0.9698717	best: 0.9698717 (1268)	total: 42s	remaining: 1.02s
1269:	test: 0.9698774	best: 0.9698774 (1269)	total: 42s	remaining: 992ms
1270:	test: 0.9698829	best: 0.9698829 (1270)	total: 42s	remaining: 959ms
1271:	test: 0.9698995	best: 0.9698995 (1271)	total: 42.1s	remaining: 926ms
1272:	test: 0.9699016	best: 0.9699016 (1272)	total: 42.1s	remaining: 893ms
1273:	test: 0.9699021	best: 0.9

<catboost.core.CatBoostClassifier at 0x294d79ca388>

In [293]:
catboost_pred = catboost.predict(X_test)
catboost_pred2 = catboost.predict(X_train)

In [290]:
print('training set')
print('classifier f1 score {}'. format(f1_score(y_train, catboost_pred2)))
print('')
print('testing set')
print('classifier f1 score {}'. format(f1_score(y_test, catboost_pred)))

training set
classifier f1 score 0.7288758265980898

testing set
classifier f1 score 0.7075688073394497


In [286]:
submission = sampleSubmission.copy()

In [287]:
submission['subscribed'] = cat.predict(test[num_feat])

In [288]:
submission

Unnamed: 0,customer_id,subscribed
0,customer_id_32884,0
1,customer_id_3169,0
2,customer_id_32206,0
3,customer_id_9403,0
4,customer_id_14020,0
...,...,...
12352,customer_id_15908,0
12353,customer_id_28222,0
12354,customer_id_14194,0
12355,customer_id_19764,0


In [289]:
submission.to_csv('23_submission.csv', index=False)