In [1]:
from __future__ import division

# import matplotlib.pyplot as plt
from collections import Counter
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
from sklearn.pipeline import Pipeline
from sklearn.svm import OneClassSVM
from sklearn.model_selection import GridSearchCV



In [2]:
df_train = pd.read_csv('new_train.csv')

In [3]:
print (Counter(df_train['TARGET']))

Counter({0: 57665, 1: 2355})


In [4]:
names = df_train.columns.values.tolist()[:-1]

In [5]:
df_test = pd.read_csv('test.csv', names=names)

In [6]:
# process base
def process_base(train, test):
    train.loc[(train['var38']>117310.979) & (train['var38']<117310.98), 'var38'] = -999.0
    test.loc[(test['var38']>117310.979) & (test['var38']<117310.98), 'var38'] = -999.0

    train.loc[train['var3']==-999999, 'var3'] = -999.0
    test.loc[test['var3']==-999999, 'var3'] = -999.0

    for f in ['imp_op_var40_comer_ult1', 'imp_op_var40_efect_ult3', 'imp_op_var41_comer_ult3', 'imp_sal_var16_ult1']:
        train.loc[train[f]==0.0, f] = -999.0
        test.loc[test[f]==0.0, f] = -999.0
        
    return train, test

In [7]:
df_train, df_test = process_base(df_train, df_test)

In [8]:
# remove constant columns
remove = []
for col in df_train.columns:
    if df_train[col].std() == 0:
        remove.append(col)

df_train.drop(remove, axis=1, inplace=True)
df_test.drop(remove, axis=1, inplace=True)

In [9]:
# remove duplicated columns
remove = []
c = df_train.columns
for i in range(len(c)-1):
    v = df_train[c[i]].values
    for j in range(i+1,len(c)):
        if np.array_equal(v,df_train[c[j]].values):
            remove.append(c[j])
df_train.drop(remove, axis=1, inplace=True)
df_test.drop(remove, axis=1, inplace=True)

In [10]:
# insert sum zeros feature
flist = [x for x in df_train.columns if not x in ['ID','TARGET']]
df_train.insert(1, 'SumZeros', (df_train[flist] == 0).astype(int).sum(axis=1))
df_test.insert(1, 'SumZeros', (df_test[flist] == 0).astype(int).sum(axis=1))

In [11]:
def normalize_features(train, test):
    flist = [x for x in train.columns if not x in ['ID','TARGET']]
    for f in flist:
        if train[f].max() == 9999999999.0:
            fmax = train.loc[train[f]<9999999999.0, f].max()
            train.loc[train[f]==9999999999.0, f] = fmax + 1

        if len(train.loc[train[f]<0, f].value_counts()) == 1:
            train.loc[train[f]<0, f] = -1.0
            test.loc[test[f]<0, f] = -1.0
            fmax = max(np.max(train[f]), np.max(test[f]))
            if fmax > 0:
                train.loc[train[f]>0, f] = 1.0*train.loc[train[f]>0, f]/fmax
                test.loc[test[f]>0, f] = 1.0*test.loc[test[f]>0, f]/fmax

        if len(train.loc[train[f]<0, f]) == 0:
            fmax = max(np.max(train[f]), np.max(test[f]))
            if fmax > 0:
                train.loc[train[f]>0, f] = 1.0*train.loc[train[f]>0, f]/fmax
                test.loc[test[f]>0, f] = 1.0*test.loc[test[f]>0, f]/fmax

        if len(train.loc[train[f]<0, f].value_counts()) > 1:
            fmax = max(np.max(train[f]), np.max(test[f]))
            if fmax > 0:
                train[f] = 1.0*train[f]/fmax
                test[f] = 1.0*test[f]/fmax

    return train, test


In [12]:
df_train, df_test = normalize_features(df_train, df_test)

In [13]:
# from sklearn.decomposition import PCA

# pca = PCA(n_components=2)
# x_train_projected = pca.fit_transform(df_train.drop(['TARGET'], axis=1).values)
# x_test_projected = pca.transform(df_test.values)
# df_train.insert(1, 'PCAOne', x_train_projected[:, 0])
# df_train.insert(1, 'PCATwo', x_train_projected[:, 1])
# df_test.insert(1, 'PCAOne', x_test_projected[:, 0])
# df_test.insert(1, 'PCATwo', x_test_projected[:, 1])

In [14]:
# from sklearn.manifold import TSNE
# from sklearn.decomposition import TruncatedSVD
# from sklearn.preprocessing import StandardScaler

# X = df_train.drop(['TARGET'], axis=1).append(df_test).values
# svd = TruncatedSVD(n_components=30)

# len_train = df_train.values.shape[0]

# X_svd = svd.fit_transform(X)
# X_scaled = StandardScaler().fit_transform(X_svd)
# modelTSNE = TSNE(n_components=2,random_state=0)
# feats_tsne = modelTSNE.fit_transform(X_scaled)
# feats_tsne_train = pd.DataFrame(feats_tsne[:len_train], columns=['tsne1', 'tsne2'])
# feats_tsne_test = pd.DataFrame(feats_tsne[len_train:], columns=['tsne1', 'tsne2'])
# # feats_tsne['ID'] = train[['ID']].append(test[['ID']], ignore_index=True)['ID'].values
# df_train = pd.merge(train, feats_tsne, on='ID', how='left')
# df_test = pd.merge(test, feats_tsne, on='ID', how='left')


# # tsne = TSNE(n_components=2)

In [13]:
# add K-means features

from sklearn.cluster import KMeans

# flist = [x for x in train.columns if not x in ['ID','TARGET']]

flist_kmeans = []
for ncl in range(2,11):
    cls = KMeans(n_clusters=ncl)
    cls.fit_predict(df_train[flist].values)
    df_train['kmeans_cluster'+str(ncl)] = cls.predict(df_train[flist].values)
    df_test['kmeans_cluster'+str(ncl)] = cls.predict(df_test[flist].values)
    flist_kmeans.append('kmeans_cluster'+str(ncl))

In [14]:
y_train = df_train['TARGET'].values
X_train = df_train.drop(['TARGET'], axis=1).values

X_test = df_test.values

# length of dataset
len_train = len(X_train)
len_test  = len(X_test)

In [15]:
# from sklearn.ensemble import ExtraTreesClassifier
# from sklearn.feature_selection import SelectFromModel

# clf_ = ExtraTreesClassifier(n_estimators=1000, n_jobs=-1)
# clf_ = clf_.fit(X_train, y_train)
# model = SelectFromModel(clf_, prefit=True)
# X_new_train = model.transform(X_train)
# X_new_test = model.transform(X_test)

In [16]:
# print (X_new_train.shape)

(60020, 39)


In [15]:
# classifier
# clf = xgb.XGBClassifier(missing=-999.0, max_depth=5, min_child_weight=5,
#                         n_estimators=500, learning_rate=0.02, gamma=1.0, 
#                         nthread=-1, subsample=0.8, colsample_bytree=1.0, colsample_bylevel=0.7, seed=123089)

X_fit, X_eval, y_fit, y_eval= train_test_split(X_train, y_train, test_size=0.3)

clfEns = []

for seed in [123089, 21324, 324003, 450453, 120032]:
    clfEns.append( xgb.XGBClassifier(missing=-999.0, max_depth=5, min_child_weight=5,
                        n_estimators=750, learning_rate=0.02, gamma=1.0, 
                        nthread=-1, subsample=0.8, colsample_bytree=1.0, colsample_bylevel=0.7, seed=seed) )
    

In [19]:
# param_test1 = {
# #     'max_depth':range(1,10,1),
# #     'min_child_weight':[1,2],
#     'gamma':[0.0, 0.05, 0.25, 0.75],
# #     'subsample':[0.75, 0.95],
# #     'colsample_bytree':[0.75,0.95],
# #     'reg_alpha':[0, 0.01],
#     'learning_rate':[0.003, 0.01, 0.03, 0.1, 0.3]
# }

In [20]:
# gsearch1 = GridSearchCV(estimator=xgb.sklearn.XGBClassifier(missing=np.nan, max_depth=5,
#                         min_child_weight=1,objective= 'binary:logistic', gamma=0,
#                         scale_pos_weight=1, n_estimators=350, learning_rate=0.03, 
#                         nthread=-1, subsample=0.95, colsample_bytree=0.85, seed=4242), 
# param_grid = param_test1, scoring='roc_auc',iid=False, verbose=30, n_jobs=-1, cv=5)

In [21]:
# gsearch1.fit(X_train, y_train)
# # print 'cv_results', gsearch1.cv_results_
# print ('\n')
# print ('best parameters', gsearch1.best_params_ )
# print ('best_score', gsearch1.best_score_)

In [16]:
# fitting
for clf in clfEns:
    clf.fit(X_train, y_train, early_stopping_rounds=20, eval_metric="auc", eval_set=[(X_eval, y_eval)])
    

[0]	validation_0-auc:0.803313
Will train until validation_0-auc hasn't improved in 20 rounds.
[1]	validation_0-auc:0.809813
[2]	validation_0-auc:0.811366
[3]	validation_0-auc:0.821103
[4]	validation_0-auc:0.823587
[5]	validation_0-auc:0.824871
[6]	validation_0-auc:0.826172
[7]	validation_0-auc:0.827442
[8]	validation_0-auc:0.826984
[9]	validation_0-auc:0.827018
[10]	validation_0-auc:0.828351
[11]	validation_0-auc:0.827926
[12]	validation_0-auc:0.827581
[13]	validation_0-auc:0.828109
[14]	validation_0-auc:0.828038
[15]	validation_0-auc:0.828409
[16]	validation_0-auc:0.828437
[17]	validation_0-auc:0.828051
[18]	validation_0-auc:0.828968
[19]	validation_0-auc:0.82917
[20]	validation_0-auc:0.830112
[21]	validation_0-auc:0.829944
[22]	validation_0-auc:0.830157
[23]	validation_0-auc:0.830641
[24]	validation_0-auc:0.830922
[25]	validation_0-auc:0.831197
[26]	validation_0-auc:0.831235
[27]	validation_0-auc:0.831626
[28]	validation_0-auc:0.83184
[29]	validation_0-auc:0.832297
[30]	validation_0-

[259]	validation_0-auc:0.86127
[260]	validation_0-auc:0.861366
[261]	validation_0-auc:0.861413
[262]	validation_0-auc:0.861514
[263]	validation_0-auc:0.861534
[264]	validation_0-auc:0.861633
[265]	validation_0-auc:0.861766
[266]	validation_0-auc:0.861893
[267]	validation_0-auc:0.86202
[268]	validation_0-auc:0.862093
[269]	validation_0-auc:0.8622
[270]	validation_0-auc:0.862279
[271]	validation_0-auc:0.8624
[272]	validation_0-auc:0.86245
[273]	validation_0-auc:0.862482
[274]	validation_0-auc:0.862477
[275]	validation_0-auc:0.862543
[276]	validation_0-auc:0.862622
[277]	validation_0-auc:0.862742
[278]	validation_0-auc:0.862856
[279]	validation_0-auc:0.8629
[280]	validation_0-auc:0.862971
[281]	validation_0-auc:0.863147
[282]	validation_0-auc:0.863158
[283]	validation_0-auc:0.863173
[284]	validation_0-auc:0.863215
[285]	validation_0-auc:0.863351
[286]	validation_0-auc:0.863421
[287]	validation_0-auc:0.863447
[288]	validation_0-auc:0.863572
[289]	validation_0-auc:0.863663
[290]	validation_

[516]	validation_0-auc:0.876005
[517]	validation_0-auc:0.876026
[518]	validation_0-auc:0.876053
[519]	validation_0-auc:0.876068
[520]	validation_0-auc:0.876107
[521]	validation_0-auc:0.876165
[522]	validation_0-auc:0.876196
[523]	validation_0-auc:0.876276
[524]	validation_0-auc:0.876363
[525]	validation_0-auc:0.876395
[526]	validation_0-auc:0.876459
[527]	validation_0-auc:0.876461
[528]	validation_0-auc:0.876556
[529]	validation_0-auc:0.876628
[530]	validation_0-auc:0.876728
[531]	validation_0-auc:0.876745
[532]	validation_0-auc:0.876764
[533]	validation_0-auc:0.876824
[534]	validation_0-auc:0.876854
[535]	validation_0-auc:0.87695
[536]	validation_0-auc:0.876978
[537]	validation_0-auc:0.877
[538]	validation_0-auc:0.877031
[539]	validation_0-auc:0.877084
[540]	validation_0-auc:0.877159
[541]	validation_0-auc:0.877171
[542]	validation_0-auc:0.877272
[543]	validation_0-auc:0.877321
[544]	validation_0-auc:0.877385
[545]	validation_0-auc:0.877472
[546]	validation_0-auc:0.87748
[547]	validat

[22]	validation_0-auc:0.831421
[23]	validation_0-auc:0.831912
[24]	validation_0-auc:0.831294
[25]	validation_0-auc:0.832032
[26]	validation_0-auc:0.832085
[27]	validation_0-auc:0.83191
[28]	validation_0-auc:0.83229
[29]	validation_0-auc:0.831955
[30]	validation_0-auc:0.832709
[31]	validation_0-auc:0.833069
[32]	validation_0-auc:0.833408
[33]	validation_0-auc:0.833508
[34]	validation_0-auc:0.833719
[35]	validation_0-auc:0.834156
[36]	validation_0-auc:0.834005
[37]	validation_0-auc:0.83392
[38]	validation_0-auc:0.833959
[39]	validation_0-auc:0.83398
[40]	validation_0-auc:0.833957
[41]	validation_0-auc:0.833771
[42]	validation_0-auc:0.834747
[43]	validation_0-auc:0.83494
[44]	validation_0-auc:0.835167
[45]	validation_0-auc:0.835299
[46]	validation_0-auc:0.835245
[47]	validation_0-auc:0.835693
[48]	validation_0-auc:0.835938
[49]	validation_0-auc:0.835973
[50]	validation_0-auc:0.836086
[51]	validation_0-auc:0.835839
[52]	validation_0-auc:0.836267
[53]	validation_0-auc:0.836625
[54]	validati

[282]	validation_0-auc:0.863328
[283]	validation_0-auc:0.863426
[284]	validation_0-auc:0.863548
[285]	validation_0-auc:0.8636
[286]	validation_0-auc:0.863644
[287]	validation_0-auc:0.863708
[288]	validation_0-auc:0.863771
[289]	validation_0-auc:0.863816
[290]	validation_0-auc:0.863878
[291]	validation_0-auc:0.864046
[292]	validation_0-auc:0.864036
[293]	validation_0-auc:0.864088
[294]	validation_0-auc:0.864202
[295]	validation_0-auc:0.86429
[296]	validation_0-auc:0.864451
[297]	validation_0-auc:0.864539
[298]	validation_0-auc:0.86462
[299]	validation_0-auc:0.86463
[300]	validation_0-auc:0.864639
[301]	validation_0-auc:0.864725
[302]	validation_0-auc:0.864762
[303]	validation_0-auc:0.864817
[304]	validation_0-auc:0.864865
[305]	validation_0-auc:0.864914
[306]	validation_0-auc:0.864948
[307]	validation_0-auc:0.864993
[308]	validation_0-auc:0.865066
[309]	validation_0-auc:0.865178
[310]	validation_0-auc:0.865319
[311]	validation_0-auc:0.865316
[312]	validation_0-auc:0.865402
[313]	validat

[540]	validation_0-auc:0.876787
[541]	validation_0-auc:0.876826
[542]	validation_0-auc:0.876973
[543]	validation_0-auc:0.877066
[544]	validation_0-auc:0.877109
[545]	validation_0-auc:0.877172
[546]	validation_0-auc:0.877194
[547]	validation_0-auc:0.877209
[548]	validation_0-auc:0.877237
[549]	validation_0-auc:0.877288
[550]	validation_0-auc:0.877412
[551]	validation_0-auc:0.877479
[552]	validation_0-auc:0.877532
[553]	validation_0-auc:0.877626
[554]	validation_0-auc:0.877709
[555]	validation_0-auc:0.877739
[556]	validation_0-auc:0.87775
[557]	validation_0-auc:0.877804
[558]	validation_0-auc:0.877841
[559]	validation_0-auc:0.877879
[560]	validation_0-auc:0.877981
[561]	validation_0-auc:0.878053
[562]	validation_0-auc:0.878143
[563]	validation_0-auc:0.878172
[564]	validation_0-auc:0.878201
[565]	validation_0-auc:0.87821
[566]	validation_0-auc:0.878232
[567]	validation_0-auc:0.87829
[568]	validation_0-auc:0.878308
[569]	validation_0-auc:0.878365
[570]	validation_0-auc:0.878378
[571]	valid

[47]	validation_0-auc:0.835496
[48]	validation_0-auc:0.835905
[49]	validation_0-auc:0.835861
[50]	validation_0-auc:0.836261
[51]	validation_0-auc:0.836546
[52]	validation_0-auc:0.836581
[53]	validation_0-auc:0.836541
[54]	validation_0-auc:0.836988
[55]	validation_0-auc:0.836929
[56]	validation_0-auc:0.837132
[57]	validation_0-auc:0.837053
[58]	validation_0-auc:0.836931
[59]	validation_0-auc:0.838131
[60]	validation_0-auc:0.838379
[61]	validation_0-auc:0.838366
[62]	validation_0-auc:0.838517
[63]	validation_0-auc:0.839004
[64]	validation_0-auc:0.839297
[65]	validation_0-auc:0.839208
[66]	validation_0-auc:0.839236
[67]	validation_0-auc:0.839417
[68]	validation_0-auc:0.839404
[69]	validation_0-auc:0.839711
[70]	validation_0-auc:0.840155
[71]	validation_0-auc:0.840448
[72]	validation_0-auc:0.840491
[73]	validation_0-auc:0.840866
[74]	validation_0-auc:0.840967
[75]	validation_0-auc:0.840921
[76]	validation_0-auc:0.840902
[77]	validation_0-auc:0.841081
[78]	validation_0-auc:0.841062
[79]	val

[306]	validation_0-auc:0.865547
[307]	validation_0-auc:0.865616
[308]	validation_0-auc:0.865617
[309]	validation_0-auc:0.865695
[310]	validation_0-auc:0.865725
[311]	validation_0-auc:0.865758
[312]	validation_0-auc:0.865791
[313]	validation_0-auc:0.865834
[314]	validation_0-auc:0.865864
[315]	validation_0-auc:0.865953
[316]	validation_0-auc:0.866094
[317]	validation_0-auc:0.866143
[318]	validation_0-auc:0.866345
[319]	validation_0-auc:0.866382
[320]	validation_0-auc:0.866418
[321]	validation_0-auc:0.866467
[322]	validation_0-auc:0.866528
[323]	validation_0-auc:0.866582
[324]	validation_0-auc:0.866749
[325]	validation_0-auc:0.866792
[326]	validation_0-auc:0.86682
[327]	validation_0-auc:0.866915
[328]	validation_0-auc:0.866926
[329]	validation_0-auc:0.866971
[330]	validation_0-auc:0.867006
[331]	validation_0-auc:0.867027
[332]	validation_0-auc:0.86708
[333]	validation_0-auc:0.867163
[334]	validation_0-auc:0.867187
[335]	validation_0-auc:0.867276
[336]	validation_0-auc:0.867282
[337]	vali

[563]	validation_0-auc:0.878238
[564]	validation_0-auc:0.878306
[565]	validation_0-auc:0.878381
[566]	validation_0-auc:0.878396
[567]	validation_0-auc:0.878437
[568]	validation_0-auc:0.87848
[569]	validation_0-auc:0.87852
[570]	validation_0-auc:0.878583
[571]	validation_0-auc:0.878625
[572]	validation_0-auc:0.878667
[573]	validation_0-auc:0.878755
[574]	validation_0-auc:0.878778
[575]	validation_0-auc:0.878792
[576]	validation_0-auc:0.878822
[577]	validation_0-auc:0.878877
[578]	validation_0-auc:0.878916
[579]	validation_0-auc:0.878975
[580]	validation_0-auc:0.87902
[581]	validation_0-auc:0.879029
[582]	validation_0-auc:0.879094
[583]	validation_0-auc:0.879125
[584]	validation_0-auc:0.879139
[585]	validation_0-auc:0.879174
[586]	validation_0-auc:0.879181
[587]	validation_0-auc:0.879219
[588]	validation_0-auc:0.879259
[589]	validation_0-auc:0.879279
[590]	validation_0-auc:0.879314
[591]	validation_0-auc:0.879375
[592]	validation_0-auc:0.879445
[593]	validation_0-auc:0.879474
[594]	valid

[71]	validation_0-auc:0.839637
[72]	validation_0-auc:0.840285
[73]	validation_0-auc:0.84064
[74]	validation_0-auc:0.840755
[75]	validation_0-auc:0.840855
[76]	validation_0-auc:0.840816
[77]	validation_0-auc:0.840672
[78]	validation_0-auc:0.84078
[79]	validation_0-auc:0.840884
[80]	validation_0-auc:0.840932
[81]	validation_0-auc:0.841142
[82]	validation_0-auc:0.841227
[83]	validation_0-auc:0.841462
[84]	validation_0-auc:0.841624
[85]	validation_0-auc:0.841809
[86]	validation_0-auc:0.841875
[87]	validation_0-auc:0.842096
[88]	validation_0-auc:0.842057
[89]	validation_0-auc:0.842155
[90]	validation_0-auc:0.842441
[91]	validation_0-auc:0.843016
[92]	validation_0-auc:0.843052
[93]	validation_0-auc:0.843212
[94]	validation_0-auc:0.843427
[95]	validation_0-auc:0.843622
[96]	validation_0-auc:0.843685
[97]	validation_0-auc:0.843709
[98]	validation_0-auc:0.843802
[99]	validation_0-auc:0.843935
[100]	validation_0-auc:0.84415
[101]	validation_0-auc:0.844194
[102]	validation_0-auc:0.844232
[103]	va

[330]	validation_0-auc:0.86701
[331]	validation_0-auc:0.867029
[332]	validation_0-auc:0.867104
[333]	validation_0-auc:0.867235
[334]	validation_0-auc:0.867264
[335]	validation_0-auc:0.867266
[336]	validation_0-auc:0.867262
[337]	validation_0-auc:0.867305
[338]	validation_0-auc:0.867366
[339]	validation_0-auc:0.867399
[340]	validation_0-auc:0.86744
[341]	validation_0-auc:0.867468
[342]	validation_0-auc:0.867582
[343]	validation_0-auc:0.867616
[344]	validation_0-auc:0.867689
[345]	validation_0-auc:0.867711
[346]	validation_0-auc:0.867777
[347]	validation_0-auc:0.867839
[348]	validation_0-auc:0.867881
[349]	validation_0-auc:0.867907
[350]	validation_0-auc:0.867924
[351]	validation_0-auc:0.86794
[352]	validation_0-auc:0.868035
[353]	validation_0-auc:0.868069
[354]	validation_0-auc:0.868238
[355]	validation_0-auc:0.868369
[356]	validation_0-auc:0.868441
[357]	validation_0-auc:0.868515
[358]	validation_0-auc:0.868658
[359]	validation_0-auc:0.868749
[360]	validation_0-auc:0.868769
[361]	valid

[588]	validation_0-auc:0.880504
[589]	validation_0-auc:0.880526
[590]	validation_0-auc:0.880585
[591]	validation_0-auc:0.880605
[592]	validation_0-auc:0.880633
[593]	validation_0-auc:0.88067
[594]	validation_0-auc:0.880687
[595]	validation_0-auc:0.88077
[596]	validation_0-auc:0.880772
[597]	validation_0-auc:0.880781
[598]	validation_0-auc:0.880797
[599]	validation_0-auc:0.880811
[600]	validation_0-auc:0.880867
[601]	validation_0-auc:0.8809
[602]	validation_0-auc:0.881
[603]	validation_0-auc:0.881011
[604]	validation_0-auc:0.88105
[605]	validation_0-auc:0.881084
[606]	validation_0-auc:0.881101
[607]	validation_0-auc:0.881116
[608]	validation_0-auc:0.881126
[609]	validation_0-auc:0.881152
[610]	validation_0-auc:0.881154
[611]	validation_0-auc:0.881166
[612]	validation_0-auc:0.881193
[613]	validation_0-auc:0.881265
[614]	validation_0-auc:0.881268
[615]	validation_0-auc:0.881276
[616]	validation_0-auc:0.88133
[617]	validation_0-auc:0.881382
[618]	validation_0-auc:0.881414
[619]	validation_

[97]	validation_0-auc:0.843915
[98]	validation_0-auc:0.844036
[99]	validation_0-auc:0.844524
[100]	validation_0-auc:0.844592
[101]	validation_0-auc:0.844672
[102]	validation_0-auc:0.84472
[103]	validation_0-auc:0.844804
[104]	validation_0-auc:0.844812
[105]	validation_0-auc:0.844811
[106]	validation_0-auc:0.844906
[107]	validation_0-auc:0.8449
[108]	validation_0-auc:0.844941
[109]	validation_0-auc:0.845158
[110]	validation_0-auc:0.845382
[111]	validation_0-auc:0.845554
[112]	validation_0-auc:0.845618
[113]	validation_0-auc:0.845562
[114]	validation_0-auc:0.84559
[115]	validation_0-auc:0.845709
[116]	validation_0-auc:0.845904
[117]	validation_0-auc:0.845881
[118]	validation_0-auc:0.846123
[119]	validation_0-auc:0.846227
[120]	validation_0-auc:0.846334
[121]	validation_0-auc:0.84653
[122]	validation_0-auc:0.846788
[123]	validation_0-auc:0.846968
[124]	validation_0-auc:0.847026
[125]	validation_0-auc:0.847014
[126]	validation_0-auc:0.847136
[127]	validation_0-auc:0.847357
[128]	validation

[355]	validation_0-auc:0.867621
[356]	validation_0-auc:0.867725
[357]	validation_0-auc:0.867753
[358]	validation_0-auc:0.867756
[359]	validation_0-auc:0.867867
[360]	validation_0-auc:0.867975
[361]	validation_0-auc:0.868061
[362]	validation_0-auc:0.86816
[363]	validation_0-auc:0.868189
[364]	validation_0-auc:0.868282
[365]	validation_0-auc:0.868351
[366]	validation_0-auc:0.868424
[367]	validation_0-auc:0.868468
[368]	validation_0-auc:0.868522
[369]	validation_0-auc:0.868749
[370]	validation_0-auc:0.868761
[371]	validation_0-auc:0.868832
[372]	validation_0-auc:0.868908
[373]	validation_0-auc:0.868967
[374]	validation_0-auc:0.869002
[375]	validation_0-auc:0.869043
[376]	validation_0-auc:0.869147
[377]	validation_0-auc:0.869179
[378]	validation_0-auc:0.869242
[379]	validation_0-auc:0.869296
[380]	validation_0-auc:0.869414
[381]	validation_0-auc:0.869481
[382]	validation_0-auc:0.869522
[383]	validation_0-auc:0.869549
[384]	validation_0-auc:0.869603
[385]	validation_0-auc:0.869626
[386]	val

[612]	validation_0-auc:0.880845
[613]	validation_0-auc:0.880889
[614]	validation_0-auc:0.880958
[615]	validation_0-auc:0.880951
[616]	validation_0-auc:0.880992
[617]	validation_0-auc:0.881071
[618]	validation_0-auc:0.881075
[619]	validation_0-auc:0.881155
[620]	validation_0-auc:0.881181
[621]	validation_0-auc:0.881223
[622]	validation_0-auc:0.881256
[623]	validation_0-auc:0.881277
[624]	validation_0-auc:0.881299
[625]	validation_0-auc:0.881381
[626]	validation_0-auc:0.881416
[627]	validation_0-auc:0.881486
[628]	validation_0-auc:0.881521
[629]	validation_0-auc:0.881608
[630]	validation_0-auc:0.881674
[631]	validation_0-auc:0.881755
[632]	validation_0-auc:0.881808
[633]	validation_0-auc:0.881853
[634]	validation_0-auc:0.881859
[635]	validation_0-auc:0.881992
[636]	validation_0-auc:0.882083
[637]	validation_0-auc:0.882094
[638]	validation_0-auc:0.882165
[639]	validation_0-auc:0.882185
[640]	validation_0-auc:0.882282
[641]	validation_0-auc:0.882339
[642]	validation_0-auc:0.88235
[643]	val

In [17]:
y_pred = 0

for idx, clf in enumerate(clfEns):
    y_temp = clf.predict_proba(X_train)[:,1]
    print('clf {0} AUC:'.format(idx), roc_auc_score(y_train, y_temp))
    y_pred += y_temp

y_pred = y_pred/5.0
print('Overall AUC:', roc_auc_score(y_train, y_pred))

('clf 0 AUC:', 0.88918026238010273)
('clf 1 AUC:', 0.88958667668867863)
('clf 2 AUC:', 0.88902126879334364)
('clf 3 AUC:', 0.88940578710440987)
('clf 4 AUC:', 0.89005475103934195)
('Overall AUC:', 0.88970673833031144)


In [18]:
# predicting
y_pred = 0
for clf in clfEns:
    y_pred+= clf.predict_proba(X_test)[:,1]
y_pred = y_pred/5.0

In [19]:
submission = pd.DataFrame(data=y_pred)
submission.to_csv("submission.csv", index=False, header=None)