In [1]:
import os
from sklearn.model_selection import KFold, train_test_split, GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn import metrics
from scipy.stats import entropy, kurtosis
import xgboost as xgb
import seaborn as sns
from xgboost import plot_importance
import warnings
import matplotlib.pyplot as plt
import pandas as pd
from math import *
import numpy as np

pd.set_option('display.max_columns', None)
from IPython.display import display
warnings.filterwarnings('ignore')
%matplotlib inline

In [2]:
pathf = os.path.join("..", "data", "particles")

trainpd = pd.read_csv(os.path.join(pathf, "train.csv"))
print(trainpd.head(1))
trainshape = trainpd.shape
print(trainshape)
eventpd = pd.read_csv(os.path.join(pathf, "event.csv"))
print(eventpd.head(1))
print(eventpd.shape)
testpd = pd.read_csv(os.path.join(pathf, "test.csv"))
testshape = testpd.shape
print(testpd.head(1))
print(testpd.shape)

data = pd.concat([trainpd, testpd], ignore_index=True)
data = pd.merge(data, eventpd, on='event_id', how='left')

       x      y  z        t   terror        q  flag  event_id  hit_id
0 -142.5 -147.5  0  767.879  2.02966  1.05052     0         7       1
(9473201, 9)
   event_id  nhit  nhitreal  energymc  thetamc    phimc   xcmc    ycmc
0         7   426        70   48348.9  63.1686  11.0982 -40.83  114.03
(13315, 8)
       x      y  z        t  terror        q  event_id  hit_id
0 -142.5 -127.5  0  848.061  1.9984  1.15067         9       1
(4086511, 8)


In [3]:
# (k(q,mc)*t0)^2 + dis^2 -dis*cos(phi)*sin(thmc)*t0 = t^2
data['fx'] = data['x'] - data['xcmc']
data['fy'] = data['y'] - data['ycmc']
data['fphi'] = np.arctan2(data['fy'], data['fx'])-(data['phimc'] * np.pi / 180.)
data['fdis'] = np.sqrt(data['fx'] ** 2 + data['fy'] ** 2)
data['fsinthmc'] = np.sin(data['thetamc'] * np.pi / 180.)
data['fcosphi'] = np.cos(data['fphi'] * np.pi / 180.)

data['ft2'] = data['t'] ** 2
data['fdis2'] = data['fdis'] ** 2 
data['fsencond'] = data['fdis'] * data['fcosphi'] * data['fsinthmc']

data['fttrue'] = data['t'] / data['terror']
data['nhitratio'] = data['nhit'] / data['nhitreal']

# del data['fx']
# del data['fy']
del data['x']
del data['y']
del data['z']

In [4]:
info_new = pd.DataFrame()
info_new["event_id"] = data.groupby(["event_id"])["event_id"].mean()
info_new["fdis_mean"] = data.groupby(["event_id"])["fdis"].mean()
info_new["fdis_std"] = data.groupby(["event_id"])["fdis"].std()
info_new["fdis_stdmean"] = info_new["fdis_std"] / info_new["fdis_mean"]
info_new["ft_mean"] = data.groupby(["event_id"])["t"].mean()
info_new["ft_std"] = data.groupby(["event_id"])["t"].std()
info_new["ft_stdmean"] = info_new["ft_std"] / info_new["ft_mean"]
info_new["ft_mean2"] = info_new["ft_mean"] ** 2
info_new.reset_index(drop=True, inplace=True)
data = pd.merge(data, info_new, on='event_id', how='left')

In [5]:
print(trainshape[0])
print(type(trainshape[0]))
print(data.shape)

trainpd = data[:trainshape[0]].reset_index()
testpd = data[trainshape[0]:].reset_index()
del data

9473201
<class 'int'>
(13559712, 27)


In [6]:
print(trainpd.columns)
feature= [x for x in trainpd.columns if x not in ['flag','index','hit_id','event_id']]
labels = trainpd['flag']
del trainpd['flag']
del testpd['flag']

Index(['index', 'event_id', 'flag', 'hit_id', 'q', 't', 'terror', 'nhit',
       'nhitreal', 'energymc', 'thetamc', 'phimc', 'xcmc', 'ycmc', 'fx', 'fy',
       'fphi', 'fdis', 'fsinth', 'fcosphi', 'fttrue', 'nhitratio', 'fdis_mean',
       'fdis_std', 'fdis_stdmean', 'ft_mean', 'ft_std', 'ft_stdmean'],
      dtype='object')


In [None]:
#这里用的gpu版的xgb，cpu的话实在太慢了，建议装个gpu版
n_splits = 5
kf = KFold(n_splits=n_splits, shuffle=True, random_state=4399)
y_pp_xgb = np.zeros(testshape[0])
y_pp_xgb_stacking = np.zeros(len(labels))
for train_index, test_index in kf.split(trainpd):
    print ( ">>>", train_index )
    clf = xgb.XGBClassifier(tree_method='gpu_hist',max_depth=8,learning_rate=0.1,verbosity=3,
                           eval_metric='auc',n_estimators=2000)
    clf.fit(
        trainpd[feature].iloc[train_index], labels[train_index],
        eval_set=[(trainpd[feature].iloc[train_index], labels[train_index]),
                  (trainpd[feature].iloc[test_index], labels[test_index])],
        early_stopping_rounds=50,
        verbose=True,
    )

    y_pred = clf.predict(trainpd[feature].iloc[test_index]) 
    y_predprob = clf.predict_proba(trainpd[feature].iloc[test_index])[:, 1] 
    
    y_pp_xgb_stacking[test_index] = y_predprob
      
    auc = metrics.roc_auc_score(labels[test_index], y_predprob)
    print("AUC Score (Train): %f" % auc) 
    
    y_pp_xgb += clf.predict_proba(testpd[feature])[:, 1] / n_splits

>>> [      0       2       3 ... 9473198 9473199 9473200]
[18:46:12] DEBUG: C:/Users/Administrator/workspace/xgboost-win64_release_1.0.0/src/tree/updater_gpu_hist.cu:1167: [GPU Hist]: Configure
[18:46:16] Init: 0.010708s, 1 calls @ 10708us

[0]	validation_0-auc:0.96706	validation_1-auc:0.96699
Multiple eval metrics have been passed: 'validation_1-auc' will be used for early stopping.

Will train until validation_1-auc hasn't improved in 50 rounds.
[1]	validation_0-auc:0.97024	validation_1-auc:0.97019
[2]	validation_0-auc:0.97178	validation_1-auc:0.97167
[3]	validation_0-auc:0.97276	validation_1-auc:0.97263
[4]	validation_0-auc:0.97353	validation_1-auc:0.97340
[5]	validation_0-auc:0.97475	validation_1-auc:0.97460
[6]	validation_0-auc:0.97524	validation_1-auc:0.97508
[7]	validation_0-auc:0.97561	validation_1-auc:0.97544
[8]	validation_0-auc:0.97621	validation_1-auc:0.97604
[9]	validation_0-auc:0.97650	validation_1-auc:0.97631
[10]	validation_0-auc:0.97713	validation_1-auc:0.97695
[11]	va

[139]	validation_0-auc:0.99359	validation_1-auc:0.99337
[140]	validation_0-auc:0.99372	validation_1-auc:0.99349
[141]	validation_0-auc:0.99373	validation_1-auc:0.99350
[142]	validation_0-auc:0.99376	validation_1-auc:0.99353
[143]	validation_0-auc:0.99378	validation_1-auc:0.99355
[144]	validation_0-auc:0.99379	validation_1-auc:0.99356
[145]	validation_0-auc:0.99379	validation_1-auc:0.99357
[146]	validation_0-auc:0.99381	validation_1-auc:0.99358
[147]	validation_0-auc:0.99383	validation_1-auc:0.99360
[148]	validation_0-auc:0.99386	validation_1-auc:0.99363
[149]	validation_0-auc:0.99390	validation_1-auc:0.99367
[150]	validation_0-auc:0.99392	validation_1-auc:0.99369
[151]	validation_0-auc:0.99393	validation_1-auc:0.99370
[152]	validation_0-auc:0.99397	validation_1-auc:0.99374
[153]	validation_0-auc:0.99398	validation_1-auc:0.99374
[154]	validation_0-auc:0.99402	validation_1-auc:0.99378
[155]	validation_0-auc:0.99403	validation_1-auc:0.99379
[156]	validation_0-auc:0.99405	validation_1-auc:

[286]	validation_0-auc:0.99602	validation_1-auc:0.99567
[287]	validation_0-auc:0.99603	validation_1-auc:0.99567
[288]	validation_0-auc:0.99603	validation_1-auc:0.99567
[289]	validation_0-auc:0.99603	validation_1-auc:0.99567
[290]	validation_0-auc:0.99604	validation_1-auc:0.99568
[291]	validation_0-auc:0.99604	validation_1-auc:0.99568
[292]	validation_0-auc:0.99605	validation_1-auc:0.99568
[293]	validation_0-auc:0.99605	validation_1-auc:0.99569
[294]	validation_0-auc:0.99606	validation_1-auc:0.99569
[295]	validation_0-auc:0.99606	validation_1-auc:0.99569
[296]	validation_0-auc:0.99607	validation_1-auc:0.99570
[297]	validation_0-auc:0.99608	validation_1-auc:0.99570
[298]	validation_0-auc:0.99608	validation_1-auc:0.99571
[299]	validation_0-auc:0.99609	validation_1-auc:0.99571
[300]	validation_0-auc:0.99609	validation_1-auc:0.99571
[301]	validation_0-auc:0.99609	validation_1-auc:0.99572
[302]	validation_0-auc:0.99610	validation_1-auc:0.99572
[303]	validation_0-auc:0.99610	validation_1-auc:

[433]	validation_0-auc:0.99679	validation_1-auc:0.99630
[434]	validation_0-auc:0.99679	validation_1-auc:0.99630
[435]	validation_0-auc:0.99679	validation_1-auc:0.99630
[436]	validation_0-auc:0.99680	validation_1-auc:0.99630
[437]	validation_0-auc:0.99681	validation_1-auc:0.99632
[438]	validation_0-auc:0.99681	validation_1-auc:0.99632
[439]	validation_0-auc:0.99682	validation_1-auc:0.99632
[440]	validation_0-auc:0.99682	validation_1-auc:0.99633
[441]	validation_0-auc:0.99682	validation_1-auc:0.99633
[442]	validation_0-auc:0.99683	validation_1-auc:0.99633
[443]	validation_0-auc:0.99684	validation_1-auc:0.99633
[444]	validation_0-auc:0.99684	validation_1-auc:0.99634
[445]	validation_0-auc:0.99684	validation_1-auc:0.99634
[446]	validation_0-auc:0.99685	validation_1-auc:0.99635
[447]	validation_0-auc:0.99686	validation_1-auc:0.99635
[448]	validation_0-auc:0.99686	validation_1-auc:0.99635
[449]	validation_0-auc:0.99686	validation_1-auc:0.99635
[450]	validation_0-auc:0.99687	validation_1-auc:

[580]	validation_0-auc:0.99725	validation_1-auc:0.99661
[581]	validation_0-auc:0.99725	validation_1-auc:0.99661
[582]	validation_0-auc:0.99725	validation_1-auc:0.99661
[583]	validation_0-auc:0.99726	validation_1-auc:0.99662
[584]	validation_0-auc:0.99726	validation_1-auc:0.99662
[585]	validation_0-auc:0.99726	validation_1-auc:0.99662
[586]	validation_0-auc:0.99727	validation_1-auc:0.99662
[587]	validation_0-auc:0.99727	validation_1-auc:0.99662
[588]	validation_0-auc:0.99727	validation_1-auc:0.99662
[589]	validation_0-auc:0.99727	validation_1-auc:0.99662
[590]	validation_0-auc:0.99727	validation_1-auc:0.99662
[591]	validation_0-auc:0.99728	validation_1-auc:0.99662
[592]	validation_0-auc:0.99728	validation_1-auc:0.99662
[593]	validation_0-auc:0.99728	validation_1-auc:0.99663
[594]	validation_0-auc:0.99728	validation_1-auc:0.99663
[595]	validation_0-auc:0.99729	validation_1-auc:0.99663
[596]	validation_0-auc:0.99729	validation_1-auc:0.99663
[597]	validation_0-auc:0.99729	validation_1-auc:

[727]	validation_0-auc:0.99758	validation_1-auc:0.99681
[728]	validation_0-auc:0.99758	validation_1-auc:0.99681
[729]	validation_0-auc:0.99758	validation_1-auc:0.99682
[730]	validation_0-auc:0.99759	validation_1-auc:0.99682
[731]	validation_0-auc:0.99759	validation_1-auc:0.99682
[732]	validation_0-auc:0.99759	validation_1-auc:0.99682
[733]	validation_0-auc:0.99759	validation_1-auc:0.99682
[734]	validation_0-auc:0.99759	validation_1-auc:0.99682
[735]	validation_0-auc:0.99759	validation_1-auc:0.99682
[736]	validation_0-auc:0.99760	validation_1-auc:0.99682
[737]	validation_0-auc:0.99760	validation_1-auc:0.99683
[738]	validation_0-auc:0.99760	validation_1-auc:0.99683
[739]	validation_0-auc:0.99760	validation_1-auc:0.99683
[740]	validation_0-auc:0.99761	validation_1-auc:0.99683
[741]	validation_0-auc:0.99761	validation_1-auc:0.99683
[742]	validation_0-auc:0.99761	validation_1-auc:0.99683
[743]	validation_0-auc:0.99762	validation_1-auc:0.99684
[744]	validation_0-auc:0.99762	validation_1-auc:

[874]	validation_0-auc:0.99783	validation_1-auc:0.99693
[875]	validation_0-auc:0.99783	validation_1-auc:0.99693
[876]	validation_0-auc:0.99783	validation_1-auc:0.99693
[877]	validation_0-auc:0.99783	validation_1-auc:0.99693
[878]	validation_0-auc:0.99783	validation_1-auc:0.99694
[879]	validation_0-auc:0.99783	validation_1-auc:0.99694
[880]	validation_0-auc:0.99784	validation_1-auc:0.99694
[881]	validation_0-auc:0.99784	validation_1-auc:0.99694
[882]	validation_0-auc:0.99784	validation_1-auc:0.99694
[883]	validation_0-auc:0.99784	validation_1-auc:0.99694
[884]	validation_0-auc:0.99784	validation_1-auc:0.99694
[885]	validation_0-auc:0.99784	validation_1-auc:0.99694
[886]	validation_0-auc:0.99785	validation_1-auc:0.99694
[887]	validation_0-auc:0.99785	validation_1-auc:0.99694
[888]	validation_0-auc:0.99785	validation_1-auc:0.99694
[889]	validation_0-auc:0.99785	validation_1-auc:0.99694
[890]	validation_0-auc:0.99785	validation_1-auc:0.99694
[891]	validation_0-auc:0.99785	validation_1-auc:

[1020]	validation_0-auc:0.99802	validation_1-auc:0.99701
[1021]	validation_0-auc:0.99802	validation_1-auc:0.99701
[1022]	validation_0-auc:0.99802	validation_1-auc:0.99701
[1023]	validation_0-auc:0.99803	validation_1-auc:0.99701
[1024]	validation_0-auc:0.99803	validation_1-auc:0.99701
[1025]	validation_0-auc:0.99803	validation_1-auc:0.99701
[1026]	validation_0-auc:0.99803	validation_1-auc:0.99701
[1027]	validation_0-auc:0.99803	validation_1-auc:0.99701
[1028]	validation_0-auc:0.99804	validation_1-auc:0.99702
[1029]	validation_0-auc:0.99804	validation_1-auc:0.99702
[1030]	validation_0-auc:0.99804	validation_1-auc:0.99702
[1031]	validation_0-auc:0.99804	validation_1-auc:0.99702
[1032]	validation_0-auc:0.99804	validation_1-auc:0.99702
[1033]	validation_0-auc:0.99804	validation_1-auc:0.99702
[1034]	validation_0-auc:0.99804	validation_1-auc:0.99702
[1035]	validation_0-auc:0.99804	validation_1-auc:0.99702
[1036]	validation_0-auc:0.99805	validation_1-auc:0.99702
[1037]	validation_0-auc:0.99805

[1164]	validation_0-auc:0.99820	validation_1-auc:0.99708
[1165]	validation_0-auc:0.99820	validation_1-auc:0.99708
[1166]	validation_0-auc:0.99820	validation_1-auc:0.99708
[1167]	validation_0-auc:0.99820	validation_1-auc:0.99708
[1168]	validation_0-auc:0.99820	validation_1-auc:0.99708
[1169]	validation_0-auc:0.99820	validation_1-auc:0.99708
[1170]	validation_0-auc:0.99820	validation_1-auc:0.99708
[1171]	validation_0-auc:0.99820	validation_1-auc:0.99708
[1172]	validation_0-auc:0.99820	validation_1-auc:0.99708
[1173]	validation_0-auc:0.99820	validation_1-auc:0.99708
[1174]	validation_0-auc:0.99821	validation_1-auc:0.99708
[1175]	validation_0-auc:0.99821	validation_1-auc:0.99708
[1176]	validation_0-auc:0.99821	validation_1-auc:0.99708
[1177]	validation_0-auc:0.99821	validation_1-auc:0.99708
[1178]	validation_0-auc:0.99821	validation_1-auc:0.99708
[1179]	validation_0-auc:0.99821	validation_1-auc:0.99708
[1180]	validation_0-auc:0.99821	validation_1-auc:0.99708
[1181]	validation_0-auc:0.99821

[1308]	validation_0-auc:0.99835	validation_1-auc:0.99714
[1309]	validation_0-auc:0.99835	validation_1-auc:0.99714
[1310]	validation_0-auc:0.99835	validation_1-auc:0.99714
[1311]	validation_0-auc:0.99835	validation_1-auc:0.99714
[1312]	validation_0-auc:0.99835	validation_1-auc:0.99714
[1313]	validation_0-auc:0.99835	validation_1-auc:0.99714
[1314]	validation_0-auc:0.99835	validation_1-auc:0.99714
[1315]	validation_0-auc:0.99836	validation_1-auc:0.99714
[1316]	validation_0-auc:0.99836	validation_1-auc:0.99714
[1317]	validation_0-auc:0.99836	validation_1-auc:0.99714
[1318]	validation_0-auc:0.99836	validation_1-auc:0.99714
[1319]	validation_0-auc:0.99836	validation_1-auc:0.99714
[1320]	validation_0-auc:0.99836	validation_1-auc:0.99714
[1321]	validation_0-auc:0.99836	validation_1-auc:0.99714
[1322]	validation_0-auc:0.99836	validation_1-auc:0.99714
[1323]	validation_0-auc:0.99836	validation_1-auc:0.99714
[1324]	validation_0-auc:0.99836	validation_1-auc:0.99714
[1325]	validation_0-auc:0.99836

[1452]	validation_0-auc:0.99848	validation_1-auc:0.99716
[1453]	validation_0-auc:0.99848	validation_1-auc:0.99716
[1454]	validation_0-auc:0.99848	validation_1-auc:0.99716
[1455]	validation_0-auc:0.99848	validation_1-auc:0.99717
[1456]	validation_0-auc:0.99848	validation_1-auc:0.99717
[1457]	validation_0-auc:0.99848	validation_1-auc:0.99717
[1458]	validation_0-auc:0.99848	validation_1-auc:0.99717
[1459]	validation_0-auc:0.99848	validation_1-auc:0.99717
[1460]	validation_0-auc:0.99848	validation_1-auc:0.99717
[1461]	validation_0-auc:0.99848	validation_1-auc:0.99717
[1462]	validation_0-auc:0.99848	validation_1-auc:0.99717
[1463]	validation_0-auc:0.99849	validation_1-auc:0.99717
[1464]	validation_0-auc:0.99849	validation_1-auc:0.99717
[1465]	validation_0-auc:0.99849	validation_1-auc:0.99717
[1466]	validation_0-auc:0.99849	validation_1-auc:0.99717
[1467]	validation_0-auc:0.99849	validation_1-auc:0.99717
[1468]	validation_0-auc:0.99849	validation_1-auc:0.99717
[1469]	validation_0-auc:0.99849

[1596]	validation_0-auc:0.99859	validation_1-auc:0.99719
[1597]	validation_0-auc:0.99859	validation_1-auc:0.99719
[1598]	validation_0-auc:0.99859	validation_1-auc:0.99719
[1599]	validation_0-auc:0.99859	validation_1-auc:0.99719
[1600]	validation_0-auc:0.99860	validation_1-auc:0.99719
[1601]	validation_0-auc:0.99860	validation_1-auc:0.99719
[1602]	validation_0-auc:0.99860	validation_1-auc:0.99719
[1603]	validation_0-auc:0.99860	validation_1-auc:0.99719
[1604]	validation_0-auc:0.99860	validation_1-auc:0.99719
[1605]	validation_0-auc:0.99860	validation_1-auc:0.99719
[1606]	validation_0-auc:0.99860	validation_1-auc:0.99719
[1607]	validation_0-auc:0.99860	validation_1-auc:0.99719
[1608]	validation_0-auc:0.99860	validation_1-auc:0.99719
[1609]	validation_0-auc:0.99860	validation_1-auc:0.99719
[1610]	validation_0-auc:0.99860	validation_1-auc:0.99719
[1611]	validation_0-auc:0.99860	validation_1-auc:0.99719
[1612]	validation_0-auc:0.99860	validation_1-auc:0.99719
[1613]	validation_0-auc:0.99860

[1740]	validation_0-auc:0.99869	validation_1-auc:0.99721
[1741]	validation_0-auc:0.99869	validation_1-auc:0.99721
[1742]	validation_0-auc:0.99869	validation_1-auc:0.99721
[1743]	validation_0-auc:0.99869	validation_1-auc:0.99721
[1744]	validation_0-auc:0.99869	validation_1-auc:0.99721
[1745]	validation_0-auc:0.99869	validation_1-auc:0.99721
[1746]	validation_0-auc:0.99869	validation_1-auc:0.99721
[1747]	validation_0-auc:0.99869	validation_1-auc:0.99721
[1748]	validation_0-auc:0.99869	validation_1-auc:0.99721
[1749]	validation_0-auc:0.99869	validation_1-auc:0.99721
[1750]	validation_0-auc:0.99869	validation_1-auc:0.99721
[1751]	validation_0-auc:0.99869	validation_1-auc:0.99721
[1752]	validation_0-auc:0.99869	validation_1-auc:0.99721
[1753]	validation_0-auc:0.99869	validation_1-auc:0.99721
[1754]	validation_0-auc:0.99869	validation_1-auc:0.99721
[1755]	validation_0-auc:0.99870	validation_1-auc:0.99721
[1756]	validation_0-auc:0.99870	validation_1-auc:0.99721
[1757]	validation_0-auc:0.99870

[1884]	validation_0-auc:0.99878	validation_1-auc:0.99723
[1885]	validation_0-auc:0.99878	validation_1-auc:0.99723
[1886]	validation_0-auc:0.99878	validation_1-auc:0.99723
[1887]	validation_0-auc:0.99878	validation_1-auc:0.99723
[1888]	validation_0-auc:0.99878	validation_1-auc:0.99723
[1889]	validation_0-auc:0.99878	validation_1-auc:0.99723
[1890]	validation_0-auc:0.99878	validation_1-auc:0.99723
[1891]	validation_0-auc:0.99878	validation_1-auc:0.99723
[1892]	validation_0-auc:0.99878	validation_1-auc:0.99723
[1893]	validation_0-auc:0.99878	validation_1-auc:0.99723
[1894]	validation_0-auc:0.99878	validation_1-auc:0.99723
[1895]	validation_0-auc:0.99879	validation_1-auc:0.99723
[1896]	validation_0-auc:0.99879	validation_1-auc:0.99723
[1897]	validation_0-auc:0.99879	validation_1-auc:0.99723
[1898]	validation_0-auc:0.99879	validation_1-auc:0.99723
[1899]	validation_0-auc:0.99879	validation_1-auc:0.99723
[1900]	validation_0-auc:0.99879	validation_1-auc:0.99723
[1901]	validation_0-auc:0.99879

[20:08:25] DEBUG: C:/Users/Administrator/workspace/xgboost-win64_release_1.0.0/src/tree/updater_gpu_hist.cu:1167: [GPU Hist]: Configure
[20:08:26] Init: 0.014807s, 1 calls @ 14807us

[0]	validation_0-auc:0.96716	validation_1-auc:0.96664
Multiple eval metrics have been passed: 'validation_1-auc' will be used for early stopping.

Will train until validation_1-auc hasn't improved in 50 rounds.
[1]	validation_0-auc:0.96932	validation_1-auc:0.96886
[2]	validation_0-auc:0.97177	validation_1-auc:0.97127
[3]	validation_0-auc:0.97303	validation_1-auc:0.97252
[4]	validation_0-auc:0.97397	validation_1-auc:0.97343
[5]	validation_0-auc:0.97491	validation_1-auc:0.97434
[6]	validation_0-auc:0.97525	validation_1-auc:0.97472
[7]	validation_0-auc:0.97598	validation_1-auc:0.97542
[8]	validation_0-auc:0.97637	validation_1-auc:0.97581
[9]	validation_0-auc:0.97664	validation_1-auc:0.97609
[10]	validation_0-auc:0.97694	validation_1-auc:0.97639
[11]	validation_0-auc:0.97758	validation_1-auc:0.97702
[12]	valid

[140]	validation_0-auc:0.99374	validation_1-auc:0.99330
[141]	validation_0-auc:0.99379	validation_1-auc:0.99335
[142]	validation_0-auc:0.99380	validation_1-auc:0.99336
[143]	validation_0-auc:0.99382	validation_1-auc:0.99338
[144]	validation_0-auc:0.99386	validation_1-auc:0.99341
[145]	validation_0-auc:0.99389	validation_1-auc:0.99344
[146]	validation_0-auc:0.99392	validation_1-auc:0.99347
[147]	validation_0-auc:0.99400	validation_1-auc:0.99355
[148]	validation_0-auc:0.99403	validation_1-auc:0.99358
[149]	validation_0-auc:0.99405	validation_1-auc:0.99359
[150]	validation_0-auc:0.99407	validation_1-auc:0.99362
[151]	validation_0-auc:0.99410	validation_1-auc:0.99365
[152]	validation_0-auc:0.99412	validation_1-auc:0.99367
[153]	validation_0-auc:0.99413	validation_1-auc:0.99368
[154]	validation_0-auc:0.99415	validation_1-auc:0.99370
[155]	validation_0-auc:0.99418	validation_1-auc:0.99373
[156]	validation_0-auc:0.99421	validation_1-auc:0.99376
[157]	validation_0-auc:0.99425	validation_1-auc:

[287]	validation_0-auc:0.99599	validation_1-auc:0.99544
[288]	validation_0-auc:0.99600	validation_1-auc:0.99545
[289]	validation_0-auc:0.99601	validation_1-auc:0.99546
[290]	validation_0-auc:0.99603	validation_1-auc:0.99548
[291]	validation_0-auc:0.99604	validation_1-auc:0.99549
[292]	validation_0-auc:0.99605	validation_1-auc:0.99550
[293]	validation_0-auc:0.99605	validation_1-auc:0.99550
[294]	validation_0-auc:0.99606	validation_1-auc:0.99551
[295]	validation_0-auc:0.99609	validation_1-auc:0.99554
[296]	validation_0-auc:0.99610	validation_1-auc:0.99555
[297]	validation_0-auc:0.99610	validation_1-auc:0.99555
[298]	validation_0-auc:0.99611	validation_1-auc:0.99556
[299]	validation_0-auc:0.99611	validation_1-auc:0.99556
[300]	validation_0-auc:0.99612	validation_1-auc:0.99556
[301]	validation_0-auc:0.99614	validation_1-auc:0.99558
[302]	validation_0-auc:0.99616	validation_1-auc:0.99560
[303]	validation_0-auc:0.99616	validation_1-auc:0.99561
[304]	validation_0-auc:0.99617	validation_1-auc:

[434]	validation_0-auc:0.99687	validation_1-auc:0.99622
[435]	validation_0-auc:0.99687	validation_1-auc:0.99622
[436]	validation_0-auc:0.99688	validation_1-auc:0.99623
[437]	validation_0-auc:0.99689	validation_1-auc:0.99624
[438]	validation_0-auc:0.99689	validation_1-auc:0.99624
[439]	validation_0-auc:0.99689	validation_1-auc:0.99625
[440]	validation_0-auc:0.99690	validation_1-auc:0.99625
[441]	validation_0-auc:0.99691	validation_1-auc:0.99626
[442]	validation_0-auc:0.99691	validation_1-auc:0.99626
[443]	validation_0-auc:0.99691	validation_1-auc:0.99626
[444]	validation_0-auc:0.99691	validation_1-auc:0.99626
[445]	validation_0-auc:0.99691	validation_1-auc:0.99626
[446]	validation_0-auc:0.99692	validation_1-auc:0.99626
[447]	validation_0-auc:0.99692	validation_1-auc:0.99626
[448]	validation_0-auc:0.99692	validation_1-auc:0.99627
[449]	validation_0-auc:0.99693	validation_1-auc:0.99627
[450]	validation_0-auc:0.99694	validation_1-auc:0.99628
[451]	validation_0-auc:0.99694	validation_1-auc:

[581]	validation_0-auc:0.99731	validation_1-auc:0.99653
[582]	validation_0-auc:0.99731	validation_1-auc:0.99653
[583]	validation_0-auc:0.99731	validation_1-auc:0.99653
[584]	validation_0-auc:0.99731	validation_1-auc:0.99653
[585]	validation_0-auc:0.99731	validation_1-auc:0.99653
[586]	validation_0-auc:0.99732	validation_1-auc:0.99653
[587]	validation_0-auc:0.99732	validation_1-auc:0.99653
[588]	validation_0-auc:0.99732	validation_1-auc:0.99653
[589]	validation_0-auc:0.99732	validation_1-auc:0.99653
[590]	validation_0-auc:0.99732	validation_1-auc:0.99653
[591]	validation_0-auc:0.99732	validation_1-auc:0.99654
[592]	validation_0-auc:0.99732	validation_1-auc:0.99654
[593]	validation_0-auc:0.99733	validation_1-auc:0.99654
[594]	validation_0-auc:0.99733	validation_1-auc:0.99654
[595]	validation_0-auc:0.99734	validation_1-auc:0.99655
[596]	validation_0-auc:0.99734	validation_1-auc:0.99655
[597]	validation_0-auc:0.99735	validation_1-auc:0.99656
[598]	validation_0-auc:0.99735	validation_1-auc:

[728]	validation_0-auc:0.99764	validation_1-auc:0.99675
[729]	validation_0-auc:0.99764	validation_1-auc:0.99675
[730]	validation_0-auc:0.99764	validation_1-auc:0.99675
[731]	validation_0-auc:0.99765	validation_1-auc:0.99675
[732]	validation_0-auc:0.99765	validation_1-auc:0.99675
[733]	validation_0-auc:0.99765	validation_1-auc:0.99675
[734]	validation_0-auc:0.99765	validation_1-auc:0.99675
[735]	validation_0-auc:0.99765	validation_1-auc:0.99676
[736]	validation_0-auc:0.99765	validation_1-auc:0.99675
[737]	validation_0-auc:0.99766	validation_1-auc:0.99676
[738]	validation_0-auc:0.99766	validation_1-auc:0.99676
[739]	validation_0-auc:0.99766	validation_1-auc:0.99676
[740]	validation_0-auc:0.99767	validation_1-auc:0.99676
[741]	validation_0-auc:0.99767	validation_1-auc:0.99676
[742]	validation_0-auc:0.99767	validation_1-auc:0.99676
[743]	validation_0-auc:0.99767	validation_1-auc:0.99676
[744]	validation_0-auc:0.99767	validation_1-auc:0.99676
[745]	validation_0-auc:0.99767	validation_1-auc:

[875]	validation_0-auc:0.99791	validation_1-auc:0.99688
[876]	validation_0-auc:0.99791	validation_1-auc:0.99688
[877]	validation_0-auc:0.99791	validation_1-auc:0.99688
[878]	validation_0-auc:0.99791	validation_1-auc:0.99688
[879]	validation_0-auc:0.99791	validation_1-auc:0.99688
[880]	validation_0-auc:0.99791	validation_1-auc:0.99688
[881]	validation_0-auc:0.99791	validation_1-auc:0.99688
[882]	validation_0-auc:0.99791	validation_1-auc:0.99688
[883]	validation_0-auc:0.99791	validation_1-auc:0.99688
[884]	validation_0-auc:0.99792	validation_1-auc:0.99689
[885]	validation_0-auc:0.99792	validation_1-auc:0.99689
[886]	validation_0-auc:0.99792	validation_1-auc:0.99689
[887]	validation_0-auc:0.99792	validation_1-auc:0.99689
[888]	validation_0-auc:0.99792	validation_1-auc:0.99689
[889]	validation_0-auc:0.99793	validation_1-auc:0.99689
[890]	validation_0-auc:0.99793	validation_1-auc:0.99689
[891]	validation_0-auc:0.99793	validation_1-auc:0.99689
[892]	validation_0-auc:0.99793	validation_1-auc:

[1021]	validation_0-auc:0.99809	validation_1-auc:0.99696
[1022]	validation_0-auc:0.99809	validation_1-auc:0.99696
[1023]	validation_0-auc:0.99810	validation_1-auc:0.99696
[1024]	validation_0-auc:0.99810	validation_1-auc:0.99696
[1025]	validation_0-auc:0.99810	validation_1-auc:0.99696
[1026]	validation_0-auc:0.99810	validation_1-auc:0.99696
[1027]	validation_0-auc:0.99810	validation_1-auc:0.99696
[1028]	validation_0-auc:0.99810	validation_1-auc:0.99696
[1029]	validation_0-auc:0.99810	validation_1-auc:0.99696
[1030]	validation_0-auc:0.99810	validation_1-auc:0.99696
[1031]	validation_0-auc:0.99811	validation_1-auc:0.99697
[1032]	validation_0-auc:0.99811	validation_1-auc:0.99697
[1033]	validation_0-auc:0.99811	validation_1-auc:0.99697
[1034]	validation_0-auc:0.99811	validation_1-auc:0.99697
[1035]	validation_0-auc:0.99811	validation_1-auc:0.99697
[1036]	validation_0-auc:0.99812	validation_1-auc:0.99697
[1037]	validation_0-auc:0.99812	validation_1-auc:0.99697
[1038]	validation_0-auc:0.99812

[1165]	validation_0-auc:0.99825	validation_1-auc:0.99701
[1166]	validation_0-auc:0.99825	validation_1-auc:0.99701
[1167]	validation_0-auc:0.99825	validation_1-auc:0.99701
[1168]	validation_0-auc:0.99825	validation_1-auc:0.99701
[1169]	validation_0-auc:0.99825	validation_1-auc:0.99701
[1170]	validation_0-auc:0.99825	validation_1-auc:0.99701
[1171]	validation_0-auc:0.99825	validation_1-auc:0.99701
[1172]	validation_0-auc:0.99825	validation_1-auc:0.99701
[1173]	validation_0-auc:0.99825	validation_1-auc:0.99701
[1174]	validation_0-auc:0.99825	validation_1-auc:0.99701
[1175]	validation_0-auc:0.99825	validation_1-auc:0.99701
[1176]	validation_0-auc:0.99825	validation_1-auc:0.99701
[1177]	validation_0-auc:0.99826	validation_1-auc:0.99701
[1178]	validation_0-auc:0.99826	validation_1-auc:0.99701
[1179]	validation_0-auc:0.99826	validation_1-auc:0.99701
[1180]	validation_0-auc:0.99826	validation_1-auc:0.99701
[1181]	validation_0-auc:0.99826	validation_1-auc:0.99701
[1182]	validation_0-auc:0.99826

[1309]	validation_0-auc:0.99836	validation_1-auc:0.99705
[1310]	validation_0-auc:0.99837	validation_1-auc:0.99705
[1311]	validation_0-auc:0.99837	validation_1-auc:0.99705
[1312]	validation_0-auc:0.99837	validation_1-auc:0.99705
[1313]	validation_0-auc:0.99837	validation_1-auc:0.99705
[1314]	validation_0-auc:0.99837	validation_1-auc:0.99705
[1315]	validation_0-auc:0.99837	validation_1-auc:0.99705
[1316]	validation_0-auc:0.99838	validation_1-auc:0.99706
[1317]	validation_0-auc:0.99838	validation_1-auc:0.99706
[1318]	validation_0-auc:0.99838	validation_1-auc:0.99705
[1319]	validation_0-auc:0.99838	validation_1-auc:0.99705
[1320]	validation_0-auc:0.99838	validation_1-auc:0.99705
[1321]	validation_0-auc:0.99838	validation_1-auc:0.99705
[1322]	validation_0-auc:0.99838	validation_1-auc:0.99706
[1323]	validation_0-auc:0.99838	validation_1-auc:0.99706
[1324]	validation_0-auc:0.99838	validation_1-auc:0.99706
[1325]	validation_0-auc:0.99838	validation_1-auc:0.99706
[1326]	validation_0-auc:0.99838

[1453]	validation_0-auc:0.99849	validation_1-auc:0.99710
[1454]	validation_0-auc:0.99849	validation_1-auc:0.99710
[1455]	validation_0-auc:0.99850	validation_1-auc:0.99710
[1456]	validation_0-auc:0.99850	validation_1-auc:0.99710
[1457]	validation_0-auc:0.99850	validation_1-auc:0.99710
[1458]	validation_0-auc:0.99850	validation_1-auc:0.99710
[1459]	validation_0-auc:0.99850	validation_1-auc:0.99710
[1460]	validation_0-auc:0.99850	validation_1-auc:0.99710
[1461]	validation_0-auc:0.99850	validation_1-auc:0.99710
[1462]	validation_0-auc:0.99850	validation_1-auc:0.99710
[1463]	validation_0-auc:0.99850	validation_1-auc:0.99710
[1464]	validation_0-auc:0.99850	validation_1-auc:0.99710
[1465]	validation_0-auc:0.99850	validation_1-auc:0.99710
[1466]	validation_0-auc:0.99850	validation_1-auc:0.99710
[1467]	validation_0-auc:0.99850	validation_1-auc:0.99710


In [None]:
#阈值大概在0.2-0.4之间 本题对召回率较敏感，可适当降低一下阈值
thre = 0.25

In [None]:
#生成提交文件
sub = pd.DataFrame()
sub['hit_id']=testpd['hit_id']
sub['flag_pred'] = y_pp_xgb
sub['event_id'] = testpd['event_id']
sub['flag_pred'] = sub['flag_pred'].apply(lambda x: 1 if x >= thre else 0)
sub.to_csv(os.path.join(pathf, "subsample.csv").format(sub['flag_pred'].mean()),index=False)