In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from lightgbm import LGBMClassifier
import logging

In [2]:
log_fmt = "[%(asctime)s] %(levelname)s in %(module)s: %(message)s"
logging.basicConfig(format=log_fmt, level=logging.INFO)

In [3]:
base_path = './data'
feature_path = './feature'

In [4]:
# 加载邀请回答数据
train = pd.read_csv(f'{base_path}/invite_info_0926.txt', sep='\t', header=None)
train.columns = ['qid', 'uid', 'dt', 'label']
del train['dt']
logging.info("invite %s", train.shape)

test = pd.read_csv(f'{base_path}/invite_info_evaluate_0926.txt', sep='\t', header=None)
test.columns = ['qid', 'uid', 'dt']
sub = test.copy()
sub_size = len(sub)
del test['dt']
logging.info("test %s", test.shape)



[2019-11-29 08:04:12,190] INFO in <ipython-input-4-f5fcce844f4e>: invite (9489162, 3)
[2019-11-29 08:04:13,632] INFO in <ipython-input-4-f5fcce844f4e>: test (1141683, 2)


In [5]:
# 加载 kfold feature
t1 = pd.read_csv(f'{feature_path}/train_kfold_feature_2.txt', sep='\t')
train = pd.concat([train, t1], axis=1)

t1 = pd.read_csv(f'{feature_path}/test_kfold_feature_2.txt', sep='\t')
test = pd.concat([test, t1], axis=1)

In [6]:
# 加载用户
user = pd.read_csv(f'{base_path}/member_info_0926.txt', header=None, sep='\t')
user.columns = ['uid', 'gender', 'freq',
                'uf_b1', 'uf_b2','uf_b3', 'uf_b4', 'uf_b5', 
                'uf_c1', 'uf_c2', 'uf_c3', 'uf_c4', 'uf_c5', 
                'score', 'follow_topic', 'inter_topic']

del user['follow_topic'], user['inter_topic']
logging.info("user %s", user.shape)

unq = user.nunique()
logging.info("user unq %s", unq)

for x in unq[unq == 1].index:
    del user[x]
    logging.info('del unq==1 %s', x)

t = user.dtypes
cats = [x for x in t[t == 'object'].index if x not in ['follow_topic', 'inter_topic', 'uid']]
logging.info("user cat %s", cats)

for d in cats:
    lb = LabelEncoder()
    user[d] = lb.fit_transform(user[d])
    logging.info('encode %s', d)
    
q_lb = LabelEncoder()
q_lb.fit(list(train['qid'].astype(str).values) + list(test['qid'].astype(str).values))
train['qid_enc'] = q_lb.transform(train['qid'])
test['qid_enc'] = q_lb.transform(test['qid'])

u_lb = LabelEncoder()
u_lb.fit(user['uid'])
train['uid_enc'] = u_lb.transform(train['uid'])
test['uid_enc'] = u_lb.transform(test['uid'])

# merge user
train = pd.merge(train, user, on='uid', how='left')
test = pd.merge(test, user, on='uid', how='left')
logging.info("train shape %s, test shape %s", train.shape, test.shape)

[2019-11-29 08:06:19,779] INFO in <ipython-input-6-af285af762e4>: user (1931654, 14)
[2019-11-29 08:06:23,189] INFO in <ipython-input-6-af285af762e4>: user unq uid       1931654
gender          3
freq            5
uf_b1           2
uf_b2           2
uf_b3           2
uf_b4           2
uf_b5           2
uf_c1        2561
uf_c2         291
uf_c3         428
uf_c4        1556
uf_c5           2
score         732
dtype: int64
[2019-11-29 08:06:23,194] INFO in <ipython-input-6-af285af762e4>: user cat ['gender', 'freq', 'uf_c1', 'uf_c2', 'uf_c3', 'uf_c4', 'uf_c5']
[2019-11-29 08:06:24,098] INFO in <ipython-input-6-af285af762e4>: encode gender
[2019-11-29 08:06:24,950] INFO in <ipython-input-6-af285af762e4>: encode freq
[2019-11-29 08:06:25,777] INFO in <ipython-input-6-af285af762e4>: encode uf_c1
[2019-11-29 08:06:26,594] INFO in <ipython-input-6-af285af762e4>: encode uf_c2
[2019-11-29 08:06:27,396] INFO in <ipython-input-6-af285af762e4>: encode uf_c3
[2019-11-29 08:06:28,179] INFO in <ipytho

In [7]:
train.columns

Index(['qid', 'uid', 'label', 'day', 'hour', 'q_inv_kfold_mean',
       'q_inv_kfold_sum', 'q_inv_kfold_std', 'q_inv_kfold_count',
       'u_inv_kfold_mean',
       ...
       'uf_b2', 'uf_b3', 'uf_b4', 'uf_b5', 'uf_c1', 'uf_c2', 'uf_c3', 'uf_c4',
       'uf_c5', 'score'],
      dtype='object', length=120)

In [8]:
test.columns

Index(['qid', 'uid', 'day', 'hour', 'q_inv_kfold_mean', 'q_inv_kfold_sum',
       'q_inv_kfold_std', 'q_inv_kfold_count', 'u_inv_kfold_mean',
       'u_inv_kfold_sum',
       ...
       'uf_b2', 'uf_b3', 'uf_b4', 'uf_b5', 'uf_c1', 'uf_c2', 'uf_c3', 'uf_c4',
       'uf_c5', 'score'],
      dtype='object', length=119)

In [9]:
data = pd.concat((train, test), axis=0, sort=True)

In [10]:
# count编码
count_fea = ['uid_enc', 'qid_enc', 'gender', 'freq', 'uf_c1', 'uf_c2', 'uf_c3', 'uf_c4', 'uf_c5']
for feat in count_fea:
    col_name = '{}_count'.format(feat)
    data[col_name] = data[feat].map(data[feat].value_counts().astype(int))
    data.loc[data[col_name] < 2, feat] = -1
    data[feat] += 1
    data[col_name] = data[feat].map(data[feat].value_counts().astype(int))
    data[col_name] = (data[col_name] - data[col_name].min()) / (data[col_name].max() - data[col_name].min())

In [11]:
data[-20:]

Unnamed: 0,day,freq,gender,hour,label,q_ans_kfold_count,q_diff_qa_days_max,q_diff_qa_days_mean,q_diff_qa_days_sum,q_has_img_max,...,uid_enc,uid_enc_count,qid_enc_count,gender_count,freq_count,uf_c1_count,uf_c2_count,uf_c3_count,uf_c4_count,uf_c5_count
1141663,3871,1,3,8,,,,,,,...,942361,7.9e-05,0.0,1.0,1.0,1.0,1.0,0.028967,1.0,1.0
1141664,3872,1,2,18,,,,,,,...,561542,0.000237,2.4e-05,0.027183,1.0,1.0,1.0,0.441988,0.129433,1.0
1141665,3872,2,1,15,,,,,,,...,1442840,5.7e-05,5.9e-05,0.0,0.24599,1.0,1.0,0.432284,0.005611,1.0
1141666,3873,1,3,21,,,,,,,...,245886,3.1e-05,1.2e-05,1.0,1.0,0.042962,0.372573,0.069194,1.0,1.0
1141667,3868,1,3,14,,,,,,,...,930792,5.3e-05,6e-06,1.0,1.0,1.0,1.0,1.0,0.071494,1.0
1141668,3870,3,3,14,,1.0,0.0,0.0,0.0,0.0,...,1285244,0.0,0.000349,1.0,0.023868,1.0,1.0,0.020596,1.0,1.0
1141669,3869,5,3,8,,15.0,5.0,3.533333,53.0,1.0,...,0,1.0,0.002174,1.0,0.934679,0.05124,0.372573,0.612471,0.179357,1.0
1141670,3870,1,3,22,,,,,,,...,1185999,6.6e-05,1.0,1.0,1.0,1.0,1.0,1.0,0.071494,1.0
1141671,3870,2,2,16,,10.0,13.0,8.0,80.0,1.0,...,1071387,0.0,0.000195,0.027183,0.24599,1.0,1.0,0.187605,0.009302,1.0
1141672,3869,1,2,8,,,,,,,...,14280,3.5e-05,2.4e-05,0.027183,1.0,0.057592,0.372573,0.128859,0.011044,1.0


In [12]:
print(list(data.columns))

['day', 'freq', 'gender', 'hour', 'label', 'q_ans_kfold_count', 'q_diff_qa_days_max', 'q_diff_qa_days_mean', 'q_diff_qa_days_sum', 'q_has_img_max', 'q_has_img_mean', 'q_has_img_sum', 'q_has_video_max', 'q_has_video_mean', 'q_has_video_sum', 'q_inv_kfold_count', 'q_inv_kfold_mean', 'q_inv_kfold_std', 'q_inv_kfold_sum', 'q_is_dest_max', 'q_is_dest_mean', 'q_is_dest_sum', 'q_is_good_max', 'q_is_good_mean', 'q_is_good_sum', 'q_is_rec_max', 'q_is_rec_mean', 'q_is_rec_sum', 'q_reci_cheer_max', 'q_reci_cheer_mean', 'q_reci_cheer_sum', 'q_reci_comment_max', 'q_reci_comment_mean', 'q_reci_comment_sum', 'q_reci_dis_max', 'q_reci_dis_mean', 'q_reci_dis_sum', 'q_reci_mark_max', 'q_reci_mark_mean', 'q_reci_mark_sum', 'q_reci_no_help_max', 'q_reci_no_help_mean', 'q_reci_no_help_sum', 'q_reci_tks_max', 'q_reci_tks_mean', 'q_reci_tks_sum', 'q_reci_uncheer_max', 'q_reci_uncheer_mean', 'q_reci_uncheer_sum', 'q_reci_xxx_max', 'q_reci_xxx_mean', 'q_reci_xxx_sum', 'q_word_count_max', 'q_word_count_mean', '

In [13]:
data['wk'] = data['day'] % 7
# 选特征
# feature_cols = [x for x in data.columns if x not in ('label', 'uid', 'qid', 'dt', 'day')]

In [14]:
drop_feat = ('label', 'uid', 'qid', 'dt', 'day') 
# drop_feat += ('q_ans_kfold_count', 'q_diff_qa_days_max', 'q_diff_qa_days_mean', 'q_diff_qa_days_sum', 
#               'q_has_img_max', 'q_has_img_mean', 'q_has_img_sum', 'q_has_video_max', 'q_has_video_mean', 
#               'q_has_video_sum','q_is_dest_max', 'q_is_dest_mean', 'q_is_dest_sum', 'q_is_good_max', 
#               'q_is_good_mean', 'q_is_good_sum', 'q_is_rec_max', 'q_is_rec_mean', 'q_is_rec_sum', 
#               'q_reci_cheer_max', 'q_reci_cheer_mean', 'q_reci_cheer_sum', 'q_reci_comment_max', 
#               'q_reci_comment_mean', 'q_reci_comment_sum', 'q_reci_dis_max', 'q_reci_dis_mean', 
#               'q_reci_dis_sum', 'q_reci_mark_max', 'q_reci_mark_mean', 'q_reci_mark_sum', 'q_reci_no_help_max',
#               'q_reci_no_help_mean', 'q_reci_no_help_sum', 'q_reci_tks_max', 'q_reci_tks_mean', 
#               'q_reci_tks_sum', 'q_reci_uncheer_max', 'q_reci_uncheer_mean', 'q_reci_uncheer_sum', 
#               'q_reci_xxx_max', 'q_reci_xxx_mean', 'q_reci_xxx_sum', 'q_word_count_max', 'q_word_count_mean', 
#               'q_word_count_sum')
# drop_feat += ('u_ans_kfold_count', 'u_diff_qa_days_max', 'u_diff_qa_days_mean', 'u_diff_qa_days_sum', 
#               'u_has_img_max', 'u_has_img_mean', 'u_has_img_sum', 'u_has_video_max', 'u_has_video_mean', 
#               'u_has_video_sum', 'u_is_dest_max', 'u_is_dest_mean', 'u_is_dest_sum', 'u_is_good_max', 
#               'u_is_good_mean', 'u_is_good_sum', 'u_is_rec_max', 'u_is_rec_mean', 'u_is_rec_sum', 'u_reci_cheer_max', 
#               'u_reci_cheer_mean', 'u_reci_cheer_sum', 'u_reci_comment_max', 'u_reci_comment_mean',
#               'u_reci_comment_sum', 'u_reci_dis_max', 'u_reci_dis_mean', 'u_reci_dis_sum', 'u_reci_mark_max', 
#               'u_reci_mark_mean', 'u_reci_mark_sum', 'u_reci_no_help_max', 'u_reci_no_help_mean', 
#               'u_reci_no_help_sum', 'u_reci_tks_max', 'u_reci_tks_mean', 'u_reci_tks_sum', 'u_reci_uncheer_max', 
#               'u_reci_uncheer_mean', 'u_reci_uncheer_sum', 'u_reci_xxx_max', 'u_reci_xxx_mean', 'u_reci_xxx_sum', 
#               'u_word_count_max', 'u_word_count_mean', 'u_word_count_sum')
feature_cols = [x for x in data.columns if x not in drop_feat]
# feature_cols

In [None]:
logging.info("feature size %s", len(feature_cols))

X_train_all = data.iloc[:len(train)][feature_cols]
y_train_all = data.iloc[:len(train)]['label']
X_test = data.iloc[len(train):]
assert len(X_test) == sub_size

logging.info("train shape %s, test shape %s", train.shape, test.shape)

fold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for index, (train_idx, val_idx) in enumerate(fold.split(X=X_train_all, y=y_train_all)):
    break

X_train, X_val, y_train, y_val = X_train_all.iloc[train_idx][feature_cols], X_train_all.iloc[val_idx][feature_cols], \
                                 y_train_all.iloc[train_idx], \
                                 y_train_all.iloc[val_idx]
del X_train_all

model_lgb = LGBMClassifier(n_estimators=2000, n_jobs=-1, objective='binary', seed=1000, silent=True)
model_lgb.fit(X_train, y_train,
              eval_metric=['logloss', 'auc'],
              eval_set=[(X_val, y_val)],
              early_stopping_rounds=50)



[2019-11-29 08:09:09,377] INFO in <ipython-input-15-20ad4e7d53e4>: feature size 126
[2019-11-29 08:09:30,003] INFO in <ipython-input-15-20ad4e7d53e4>: train shape (9489162, 120), test shape (1141683, 119)


[1]	valid_0's auc: 0.749298	valid_0's binary_logloss: 0.455412
Training until validation scores don't improve for 50 rounds
[2]	valid_0's auc: 0.757711	valid_0's binary_logloss: 0.446076
[3]	valid_0's auc: 0.763004	valid_0's binary_logloss: 0.438448
[4]	valid_0's auc: 0.766972	valid_0's binary_logloss: 0.431992
[5]	valid_0's auc: 0.768482	valid_0's binary_logloss: 0.426681
[6]	valid_0's auc: 0.77094	valid_0's binary_logloss: 0.421996
[7]	valid_0's auc: 0.772823	valid_0's binary_logloss: 0.417909
[8]	valid_0's auc: 0.774439	valid_0's binary_logloss: 0.414341
[9]	valid_0's auc: 0.775594	valid_0's binary_logloss: 0.411272
[10]	valid_0's auc: 0.776754	valid_0's binary_logloss: 0.408594
[11]	valid_0's auc: 0.778058	valid_0's binary_logloss: 0.406116
[12]	valid_0's auc: 0.778925	valid_0's binary_logloss: 0.403967
[13]	valid_0's auc: 0.779931	valid_0's binary_logloss: 0.401959
[14]	valid_0's auc: 0.780837	valid_0's binary_logloss: 0.400256
[15]	valid_0's auc: 0.781863	valid_0's binary_logloss

[129]	valid_0's auc: 0.806309	valid_0's binary_logloss: 0.372056
[130]	valid_0's auc: 0.806377	valid_0's binary_logloss: 0.372002
[131]	valid_0's auc: 0.80643	valid_0's binary_logloss: 0.371956
[132]	valid_0's auc: 0.806496	valid_0's binary_logloss: 0.371906
[133]	valid_0's auc: 0.806555	valid_0's binary_logloss: 0.371859
[134]	valid_0's auc: 0.806611	valid_0's binary_logloss: 0.371816
[135]	valid_0's auc: 0.806671	valid_0's binary_logloss: 0.371773
[136]	valid_0's auc: 0.806713	valid_0's binary_logloss: 0.371737
[137]	valid_0's auc: 0.806794	valid_0's binary_logloss: 0.371678
[138]	valid_0's auc: 0.806831	valid_0's binary_logloss: 0.371647
[139]	valid_0's auc: 0.806872	valid_0's binary_logloss: 0.371616
[140]	valid_0's auc: 0.806913	valid_0's binary_logloss: 0.371585
[141]	valid_0's auc: 0.806954	valid_0's binary_logloss: 0.371554
[142]	valid_0's auc: 0.806989	valid_0's binary_logloss: 0.371526
[143]	valid_0's auc: 0.807018	valid_0's binary_logloss: 0.371504
[144]	valid_0's auc: 0.807

[256]	valid_0's auc: 0.810734	valid_0's binary_logloss: 0.368579
[257]	valid_0's auc: 0.810743	valid_0's binary_logloss: 0.368572
[258]	valid_0's auc: 0.810769	valid_0's binary_logloss: 0.368555
[259]	valid_0's auc: 0.81079	valid_0's binary_logloss: 0.368537
[260]	valid_0's auc: 0.810809	valid_0's binary_logloss: 0.368523
[261]	valid_0's auc: 0.810828	valid_0's binary_logloss: 0.368508
[262]	valid_0's auc: 0.810834	valid_0's binary_logloss: 0.368503
[263]	valid_0's auc: 0.810866	valid_0's binary_logloss: 0.368481
[264]	valid_0's auc: 0.810877	valid_0's binary_logloss: 0.368473
[265]	valid_0's auc: 0.810898	valid_0's binary_logloss: 0.368458
[266]	valid_0's auc: 0.810919	valid_0's binary_logloss: 0.368445
[267]	valid_0's auc: 0.810934	valid_0's binary_logloss: 0.368432
[268]	valid_0's auc: 0.81094	valid_0's binary_logloss: 0.368424
[269]	valid_0's auc: 0.810953	valid_0's binary_logloss: 0.368414
[270]	valid_0's auc: 0.810999	valid_0's binary_logloss: 0.368375
[271]	valid_0's auc: 0.8110

[383]	valid_0's auc: 0.812583	valid_0's binary_logloss: 0.367135
[384]	valid_0's auc: 0.812598	valid_0's binary_logloss: 0.367122
[385]	valid_0's auc: 0.81261	valid_0's binary_logloss: 0.367113
[386]	valid_0's auc: 0.81263	valid_0's binary_logloss: 0.367098
[387]	valid_0's auc: 0.812636	valid_0's binary_logloss: 0.367094
[388]	valid_0's auc: 0.81264	valid_0's binary_logloss: 0.367091
[389]	valid_0's auc: 0.812646	valid_0's binary_logloss: 0.367087
[390]	valid_0's auc: 0.812648	valid_0's binary_logloss: 0.367086
[391]	valid_0's auc: 0.812652	valid_0's binary_logloss: 0.367082
[392]	valid_0's auc: 0.812658	valid_0's binary_logloss: 0.367078
[393]	valid_0's auc: 0.81268	valid_0's binary_logloss: 0.367061
[394]	valid_0's auc: 0.812696	valid_0's binary_logloss: 0.367046
[395]	valid_0's auc: 0.812702	valid_0's binary_logloss: 0.367043
[396]	valid_0's auc: 0.812715	valid_0's binary_logloss: 0.367033
[397]	valid_0's auc: 0.812735	valid_0's binary_logloss: 0.367016
[398]	valid_0's auc: 0.812747

[510]	valid_0's auc: 0.814124	valid_0's binary_logloss: 0.365892
[511]	valid_0's auc: 0.814132	valid_0's binary_logloss: 0.365886
[512]	valid_0's auc: 0.814133	valid_0's binary_logloss: 0.365884
[513]	valid_0's auc: 0.814145	valid_0's binary_logloss: 0.365874
[514]	valid_0's auc: 0.814151	valid_0's binary_logloss: 0.36587
[515]	valid_0's auc: 0.814153	valid_0's binary_logloss: 0.365869
[516]	valid_0's auc: 0.814158	valid_0's binary_logloss: 0.365866
[517]	valid_0's auc: 0.814168	valid_0's binary_logloss: 0.365857
[518]	valid_0's auc: 0.814169	valid_0's binary_logloss: 0.365857
[519]	valid_0's auc: 0.814173	valid_0's binary_logloss: 0.365854
[520]	valid_0's auc: 0.814176	valid_0's binary_logloss: 0.365852
[521]	valid_0's auc: 0.814179	valid_0's binary_logloss: 0.365849
[522]	valid_0's auc: 0.814182	valid_0's binary_logloss: 0.365846
[523]	valid_0's auc: 0.814183	valid_0's binary_logloss: 0.365846
[524]	valid_0's auc: 0.814193	valid_0's binary_logloss: 0.365838
[525]	valid_0's auc: 0.814

[637]	valid_0's auc: 0.814965	valid_0's binary_logloss: 0.365216
[638]	valid_0's auc: 0.814967	valid_0's binary_logloss: 0.365215
[639]	valid_0's auc: 0.814979	valid_0's binary_logloss: 0.365206
[640]	valid_0's auc: 0.814981	valid_0's binary_logloss: 0.365204
[641]	valid_0's auc: 0.814982	valid_0's binary_logloss: 0.365203
[642]	valid_0's auc: 0.814985	valid_0's binary_logloss: 0.3652
[643]	valid_0's auc: 0.814993	valid_0's binary_logloss: 0.365192
[644]	valid_0's auc: 0.815016	valid_0's binary_logloss: 0.365172
[645]	valid_0's auc: 0.81503	valid_0's binary_logloss: 0.36516
[646]	valid_0's auc: 0.815034	valid_0's binary_logloss: 0.365156
[647]	valid_0's auc: 0.815044	valid_0's binary_logloss: 0.365149
[648]	valid_0's auc: 0.815048	valid_0's binary_logloss: 0.365146
[649]	valid_0's auc: 0.81505	valid_0's binary_logloss: 0.365144
[650]	valid_0's auc: 0.815047	valid_0's binary_logloss: 0.365146
[651]	valid_0's auc: 0.815046	valid_0's binary_logloss: 0.365146
[652]	valid_0's auc: 0.815047	

[764]	valid_0's auc: 0.81568	valid_0's binary_logloss: 0.36464
[765]	valid_0's auc: 0.81568	valid_0's binary_logloss: 0.36464
[766]	valid_0's auc: 0.81568	valid_0's binary_logloss: 0.364639
[767]	valid_0's auc: 0.815689	valid_0's binary_logloss: 0.364633
[768]	valid_0's auc: 0.815697	valid_0's binary_logloss: 0.364628
[769]	valid_0's auc: 0.815699	valid_0's binary_logloss: 0.364626
[770]	valid_0's auc: 0.815699	valid_0's binary_logloss: 0.364626
[771]	valid_0's auc: 0.815703	valid_0's binary_logloss: 0.364622
[772]	valid_0's auc: 0.815709	valid_0's binary_logloss: 0.364618
[773]	valid_0's auc: 0.815713	valid_0's binary_logloss: 0.364615
[774]	valid_0's auc: 0.815714	valid_0's binary_logloss: 0.364614
[775]	valid_0's auc: 0.815726	valid_0's binary_logloss: 0.364605
[776]	valid_0's auc: 0.81574	valid_0's binary_logloss: 0.364594
[777]	valid_0's auc: 0.815744	valid_0's binary_logloss: 0.364589
[778]	valid_0's auc: 0.815754	valid_0's binary_logloss: 0.364582
[779]	valid_0's auc: 0.815758	v

[891]	valid_0's auc: 0.816346	valid_0's binary_logloss: 0.364108
[892]	valid_0's auc: 0.816347	valid_0's binary_logloss: 0.364107
[893]	valid_0's auc: 0.816351	valid_0's binary_logloss: 0.364105
[894]	valid_0's auc: 0.816353	valid_0's binary_logloss: 0.364103
[895]	valid_0's auc: 0.816357	valid_0's binary_logloss: 0.3641
[896]	valid_0's auc: 0.816357	valid_0's binary_logloss: 0.3641
[897]	valid_0's auc: 0.816357	valid_0's binary_logloss: 0.3641
[898]	valid_0's auc: 0.816363	valid_0's binary_logloss: 0.364096
[899]	valid_0's auc: 0.816365	valid_0's binary_logloss: 0.364094
[900]	valid_0's auc: 0.816365	valid_0's binary_logloss: 0.364094
[901]	valid_0's auc: 0.816364	valid_0's binary_logloss: 0.364094
[902]	valid_0's auc: 0.816364	valid_0's binary_logloss: 0.364094
[903]	valid_0's auc: 0.816364	valid_0's binary_logloss: 0.364094
[904]	valid_0's auc: 0.816371	valid_0's binary_logloss: 0.36409
[905]	valid_0's auc: 0.816379	valid_0's binary_logloss: 0.364082
[906]	valid_0's auc: 0.816393	va

[1018]	valid_0's auc: 0.816924	valid_0's binary_logloss: 0.363634
[1019]	valid_0's auc: 0.816929	valid_0's binary_logloss: 0.363631
[1020]	valid_0's auc: 0.816928	valid_0's binary_logloss: 0.363631
[1021]	valid_0's auc: 0.81693	valid_0's binary_logloss: 0.363629
[1022]	valid_0's auc: 0.816941	valid_0's binary_logloss: 0.363621
[1023]	valid_0's auc: 0.816942	valid_0's binary_logloss: 0.36362
[1024]	valid_0's auc: 0.816943	valid_0's binary_logloss: 0.363619
[1025]	valid_0's auc: 0.816944	valid_0's binary_logloss: 0.363618
[1026]	valid_0's auc: 0.816947	valid_0's binary_logloss: 0.363617
[1027]	valid_0's auc: 0.816948	valid_0's binary_logloss: 0.363616
[1028]	valid_0's auc: 0.816953	valid_0's binary_logloss: 0.363612
[1029]	valid_0's auc: 0.816953	valid_0's binary_logloss: 0.363612
[1030]	valid_0's auc: 0.816954	valid_0's binary_logloss: 0.363611
[1031]	valid_0's auc: 0.816956	valid_0's binary_logloss: 0.36361
[1032]	valid_0's auc: 0.816957	valid_0's binary_logloss: 0.363608
[1033]	valid_

[1143]	valid_0's auc: 0.817335	valid_0's binary_logloss: 0.3633
[1144]	valid_0's auc: 0.817339	valid_0's binary_logloss: 0.363297
[1145]	valid_0's auc: 0.817341	valid_0's binary_logloss: 0.363295
[1146]	valid_0's auc: 0.817341	valid_0's binary_logloss: 0.363295
[1147]	valid_0's auc: 0.817341	valid_0's binary_logloss: 0.363295
[1148]	valid_0's auc: 0.817344	valid_0's binary_logloss: 0.363292
[1149]	valid_0's auc: 0.817343	valid_0's binary_logloss: 0.363292
[1150]	valid_0's auc: 0.817344	valid_0's binary_logloss: 0.363292
[1151]	valid_0's auc: 0.817344	valid_0's binary_logloss: 0.363292
[1152]	valid_0's auc: 0.817348	valid_0's binary_logloss: 0.363289
[1153]	valid_0's auc: 0.817349	valid_0's binary_logloss: 0.363289
[1154]	valid_0's auc: 0.817351	valid_0's binary_logloss: 0.363287
[1155]	valid_0's auc: 0.817352	valid_0's binary_logloss: 0.363287
[1156]	valid_0's auc: 0.817353	valid_0's binary_logloss: 0.363286
[1157]	valid_0's auc: 0.817354	valid_0's binary_logloss: 0.363285
[1158]	valid

[1268]	valid_0's auc: 0.817752	valid_0's binary_logloss: 0.362967
[1269]	valid_0's auc: 0.817756	valid_0's binary_logloss: 0.362964
[1270]	valid_0's auc: 0.817771	valid_0's binary_logloss: 0.362952
[1271]	valid_0's auc: 0.817776	valid_0's binary_logloss: 0.362948
[1272]	valid_0's auc: 0.81778	valid_0's binary_logloss: 0.362945
[1273]	valid_0's auc: 0.817781	valid_0's binary_logloss: 0.362944
[1274]	valid_0's auc: 0.817783	valid_0's binary_logloss: 0.362943
[1275]	valid_0's auc: 0.817784	valid_0's binary_logloss: 0.362942
[1276]	valid_0's auc: 0.817786	valid_0's binary_logloss: 0.36294
[1277]	valid_0's auc: 0.817787	valid_0's binary_logloss: 0.362939
[1278]	valid_0's auc: 0.817788	valid_0's binary_logloss: 0.362939
[1279]	valid_0's auc: 0.817789	valid_0's binary_logloss: 0.362937
[1280]	valid_0's auc: 0.817789	valid_0's binary_logloss: 0.362937
[1281]	valid_0's auc: 0.81779	valid_0's binary_logloss: 0.362936
[1282]	valid_0's auc: 0.817806	valid_0's binary_logloss: 0.362924
[1283]	valid_

[1393]	valid_0's auc: 0.81797	valid_0's binary_logloss: 0.362803
[1394]	valid_0's auc: 0.817972	valid_0's binary_logloss: 0.362802
[1395]	valid_0's auc: 0.817974	valid_0's binary_logloss: 0.362801
[1396]	valid_0's auc: 0.817978	valid_0's binary_logloss: 0.362798
[1397]	valid_0's auc: 0.817979	valid_0's binary_logloss: 0.362797
[1398]	valid_0's auc: 0.81798	valid_0's binary_logloss: 0.362796
[1399]	valid_0's auc: 0.817981	valid_0's binary_logloss: 0.362795
[1400]	valid_0's auc: 0.817982	valid_0's binary_logloss: 0.362794
[1401]	valid_0's auc: 0.817984	valid_0's binary_logloss: 0.362793
[1402]	valid_0's auc: 0.817986	valid_0's binary_logloss: 0.362791
[1403]	valid_0's auc: 0.817989	valid_0's binary_logloss: 0.362789
[1404]	valid_0's auc: 0.817991	valid_0's binary_logloss: 0.362787
[1405]	valid_0's auc: 0.817992	valid_0's binary_logloss: 0.362786
[1406]	valid_0's auc: 0.817997	valid_0's binary_logloss: 0.362782
[1407]	valid_0's auc: 0.818	valid_0's binary_logloss: 0.36278
[1408]	valid_0's

[1518]	valid_0's auc: 0.818249	valid_0's binary_logloss: 0.362578
[1519]	valid_0's auc: 0.818251	valid_0's binary_logloss: 0.362577
[1520]	valid_0's auc: 0.818259	valid_0's binary_logloss: 0.362567
[1521]	valid_0's auc: 0.818262	valid_0's binary_logloss: 0.362565


In [None]:
# sub = pd.read_csv(f'{base_path}/invite_info_evaluate_0926.txt', sep='\t', header=None)
# sub.columns = ['qid', 'uid', 'dt']
sub['label'] = model_lgb.predict_proba(X_test[feature_cols])[:, 1]

In [None]:
sub[:20]

In [None]:
sub.to_csv('./result/kfold_2000.txt', index=None, header=None, sep='\t')

In [None]:
fi = pd.DataFrame({'feature': feature_cols, 'imp': model_lgb.feature_importances_})
fi['rate'] = fi['imp'] / fi['imp'].sum()
fi

In [None]:
fi.sort_values(by='rate', ascending=False)[:60]