In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from lightgbm import LGBMClassifier
import logging

In [2]:
log_fmt = "[%(asctime)s] %(levelname)s in %(module)s: %(message)s"
logging.basicConfig(format=log_fmt, level=logging.INFO)

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
def extract_day(s):
    return s.apply(lambda x: int(x.split('-')[0][1:]))

def extract_hour(s):
    return s.apply(lambda x: int(x.split('-')[1][1:]))

In [5]:
base_path = '../data'

In [6]:
# 加载邀请回答数据

train = pd.read_csv(f'{base_path}/invite_info_0926.txt', sep='\t', header=None)
train.columns = ['qid', 'uid', 'dt', 'label']
logging.info("invite %s", train.shape)

test = pd.read_csv(f'{base_path}/invite_info_evaluate_1_0926.txt', sep='\t', header=None)
test.columns = ['qid', 'uid', 'dt']
logging.info("test %s", test.shape)

sub = test.copy()

sub_size = len(sub)

train['day'] = extract_day(train['dt'])
train['hour'] = extract_hour(train['dt'])

test['day'] = extract_day(test['dt'])
test['hour'] = extract_hour(test['dt'])
del train['dt'], test['dt']


[2019-12-11 18:40:05,591] INFO in <ipython-input-6-12241a91563a>: invite (9489162, 4)
[2019-12-11 18:40:09,979] INFO in <ipython-input-6-12241a91563a>: test (1141683, 3)


In [7]:
# 加载问题
ques = pd.read_csv(f'{base_path}/question_info_0926.txt', header=None, sep='\t')
ques.columns = ['qid', 'q_dt', 'title_t1', 'title_t2', 'desc_t1', 'desc_t2', 'topic']
del ques['title_t1'], ques['title_t2'], ques['desc_t1'], ques['desc_t2']
logging.info("ques %s", ques.shape)

ques['q_day'] = extract_day(ques['q_dt'])
ques['q_hour'] = extract_hour(ques['q_dt'])
del ques['q_dt']

[2019-12-11 18:41:42,774] INFO in <ipython-input-7-d1d53554f000>: ques (1829900, 3)


In [8]:
# 加载回答
ans = pd.read_csv(f'{base_path}/answer_info_0926.txt', header=None, sep='\t')
ans.columns = ['aid', 'qid', 'uid', 'ans_dt', 'ans_t1', 'ans_t2', 'is_good', 'is_rec', 'is_dest', 'has_img',
               'has_video', 'word_count', 'reci_cheer', 'reci_uncheer', 'reci_comment', 'reci_mark', 'reci_tks',
               'reci_xxx', 'reci_no_help', 'reci_dis']
del ans['ans_t1'], ans['ans_t2']
logging.info("ans %s", ans.shape)

ans['a_day'] = extract_day(ans['ans_dt'])
ans['a_hour'] = extract_hour(ans['ans_dt'])
del ans['ans_dt']

ans = pd.merge(ans, ques, on='qid')
del ques

[2019-12-11 18:44:51,249] INFO in <ipython-input-8-0f04b531e5be>: ans (4513735, 18)


In [9]:
print(ans['a_day'].min())
print(ans['a_day'].max())

3807
3867


In [10]:
# 回答距提问的天数
ans['diff_qa_days'] = ans['a_day'] - ans['q_day']

[2019-12-11 18:45:31,234] INFO in utils: Note: NumExpr detected 24 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
[2019-12-11 18:45:31,238] INFO in utils: NumExpr defaulting to 8 threads.


In [11]:
# 时间窗口划分
# train
# val
train_start = 3838
train_end = 3867

val_start = 3868
val_end = 3874

label_end = 3867
label_start = label_end - 6

train_label_feature_end = label_end - 7
train_label_feature_start = train_label_feature_end - 22

train_ans_feature_end = label_end - 7
train_ans_feature_start = train_ans_feature_end - 50

val_label_feature_end = val_start - 1
val_label_feature_start = val_label_feature_end - 22

val_ans_feature_end = val_start - 1
val_ans_feature_start = val_ans_feature_end - 50

train_label_feature = train[(train['day'] >= train_label_feature_start) & (train['day'] <= train_label_feature_end)]
logging.info("train_label_feature %s", train_label_feature.shape)

val_label_feature = train[(train['day'] >= val_label_feature_start) & (train['day'] <= val_label_feature_end)]
logging.info("val_label_feature %s", val_label_feature.shape)

train_label = train[(train['day'] > train_label_feature_end)]

logging.info("train feature start %s end %s, label start %s end %s", train_label_feature['day'].min(),
             train_label_feature['day'].max(), train_label['day'].min(), train_label['day'].max())

logging.info("test feature start %s end %s, label start %s end %s", val_label_feature['day'].min(),
             val_label_feature['day'].max(), test['day'].min(), test['day'].max())

[2019-12-11 18:45:33,051] INFO in <ipython-input-11-413cbf6f4a7d>: train_label_feature (6895493, 5)
[2019-12-11 18:45:34,058] INFO in <ipython-input-11-413cbf6f4a7d>: val_label_feature (7583553, 5)
[2019-12-11 18:45:34,557] INFO in <ipython-input-11-413cbf6f4a7d>: train feature start 3838 end 3860, label start 3861 end 3867
[2019-12-11 18:45:34,610] INFO in <ipython-input-11-413cbf6f4a7d>: test feature start 3845 end 3867, label start 3868 end 3874


In [12]:
# 确定ans的时间范围
# 3807~3874
train_ans_feature = ans[(ans['a_day'] >= train_ans_feature_start) & (ans['a_day'] <= train_ans_feature_end)]

val_ans_feature = ans[(ans['a_day'] >= val_ans_feature_start) & (ans['a_day'] <= val_ans_feature_end)]

logging.info("train ans feature %s, start %s end %s", train_ans_feature.shape, train_ans_feature['a_day'].min(),
             train_ans_feature['a_day'].max())

logging.info("val ans feature %s, start %s end %s", val_ans_feature.shape, val_ans_feature['a_day'].min(),
             val_ans_feature['a_day'].max())

fea_cols = ['is_good', 'is_rec', 'is_dest', 'has_img', 'has_video', 'word_count',
            'reci_cheer', 'reci_uncheer', 'reci_comment', 'reci_mark', 'reci_tks',
            'reci_xxx', 'reci_no_help', 'reci_dis', 'diff_qa_days']


def extract_feature1(target, label_feature, ans_feature):
    # 问题特征
    t1 = label_feature.groupby('qid')['label'].agg(['mean', 'sum', 'std', 'count']).reset_index()
    t1.columns = ['qid', 'q_inv_mean', 'q_inv_sum', 'q_inv_std', 'q_inv_count']
    target = pd.merge(target, t1, on='qid', how='left')

    # 用户特征
    t1 = label_feature.groupby('uid')['label'].agg(['mean', 'sum', 'std', 'count']).reset_index()
    t1.columns = ['uid', 'u_inv_mean', 'u_inv_sum', 'u_inv_std', 'u_inv_count']
    target = pd.merge(target, t1, on='uid', how='left')
    #
    # train_size = len(train)
    # data = pd.concat((train, test), sort=True)

    # 回答部分特征

    t1 = ans_feature.groupby('qid')['aid'].count().reset_index()
    t1.columns = ['qid', 'q_ans_count']
    target = pd.merge(target, t1, on='qid', how='left')

    t1 = ans_feature.groupby('uid')['aid'].count().reset_index()
    t1.columns = ['uid', 'u_ans_count']
    target = pd.merge(target, t1, on='uid', how='left')

    for col in fea_cols:
        t1 = ans_feature.groupby('uid')[col].agg(['sum', 'max', 'mean']).reset_index()
        t1.columns = ['uid', f'u_{col}_sum', f'u_{col}_max', f'u_{col}_mean']
        target = pd.merge(target, t1, on='uid', how='left')

        t1 = ans_feature.groupby('qid')[col].agg(['sum', 'max', 'mean']).reset_index()
        t1.columns = ['qid', f'q_{col}_sum', f'q_{col}_max', f'q_{col}_mean']
        target = pd.merge(target, t1, on='qid', how='left')
        logging.info("extract %s", col)
    return target


train_label = extract_feature1(train_label, train_label_feature, train_ans_feature)
test = extract_feature1(test, val_label_feature, val_ans_feature)

[2019-12-11 18:45:48,611] INFO in <ipython-input-12-0511893e776f>: train ans feature (3700178, 23), start 3810 end 3860
[2019-12-11 18:45:48,626] INFO in <ipython-input-12-0511893e776f>: val ans feature (3992334, 23), start 3817 end 3867
[2019-12-11 18:47:39,190] INFO in <ipython-input-12-0511893e776f>: extract is_good
[2019-12-11 18:48:12,874] INFO in <ipython-input-12-0511893e776f>: extract is_rec
[2019-12-11 18:48:46,838] INFO in <ipython-input-12-0511893e776f>: extract is_dest
[2019-12-11 18:49:19,070] INFO in <ipython-input-12-0511893e776f>: extract has_img
[2019-12-11 18:49:49,925] INFO in <ipython-input-12-0511893e776f>: extract has_video
[2019-12-11 18:50:22,002] INFO in <ipython-input-12-0511893e776f>: extract word_count
[2019-12-11 18:50:53,793] INFO in <ipython-input-12-0511893e776f>: extract reci_cheer
[2019-12-11 18:51:26,527] INFO in <ipython-input-12-0511893e776f>: extract reci_uncheer
[2019-12-11 18:51:58,229] INFO in <ipython-input-12-0511893e776f>: extract reci_commen

In [13]:
# 特征提取结束
logging.info("train shape %s, test shape %s", train_label.shape, test.shape)
assert len(test) == sub_size

# 加载用户
user = pd.read_csv(f'{base_path}/member_info_0926.txt', header=None, sep='\t')
user.columns = ['uid', 'gender', 'creat_keyword', 'level', 'hot', 'reg_type', 'reg_plat', 'freq', 'uf_b1', 'uf_b2',
                'uf_b3', 'uf_b4', 'uf_b5', 'uf_c1', 'uf_c2', 'uf_c3', 'uf_c4', 'uf_c5', 'score', 'follow_topic',
                'inter_topic']

[2019-12-11 19:03:38,870] INFO in <ipython-input-13-d70daf315b0b>: train shape (2593669, 105), test shape (1141683, 104)


In [16]:
dit = {'daily': 4, 'weekly': 3, 'monthly': 2, 'new': 1,'unknow':0}
user['freq'] = user['freq'].map(dit)

In [17]:
user = user.drop(['creat_keyword','level','hot','reg_type','reg_plat'],axis=1)

In [18]:
user.head()

Unnamed: 0,uid,gender,freq,uf_b1,uf_b2,uf_b3,uf_b4,uf_b5,uf_c1,uf_c2,uf_c3,uf_c4,uf_c5,score,follow_topic,inter_topic
0,M1934753188,male,2.0,0,1,0,1,0,MD470265,BR470265,PV929066,CT929066,PF470265,764,T540,"T21107:1.7915097,T405:1.6123838,T4436:1.518003..."
1,M595924114,male,4.0,0,0,0,1,1,MD195122,BR596936,PV002320,CT840234,PF470265,671,"T44126,T15940,T839,T8978,T2934,T1113,T3914,T12...","T18016:2.0650618,T2384:1.2503042,T1142:1.13569..."
2,M1473482940,female,3.0,0,1,0,1,0,MD116493,BR641329,PV170953,CT470265,PF470265,454,"T30874,T2113,T8656,T21,T523,T8,T116,T5727,T68,...","T46:1.330939,T2159:1.1296458,T379:1.1241927,T1..."
3,M578477092,male,4.0,1,1,0,1,0,MD889589,BR803759,PV545833,CT545833,PF470265,588,"T946,T7323,T297,T2660,T36067,T53107,T2654,T507...","T15918:1.9479566,T8106:1.8578106,T4787:1.58486..."
4,M1088794709,male,3.0,0,1,0,0,0,MD825760,BR641329,PV071037,CT470265,PF470265,361,"T582,T558,T28776,T5186,T9081,T2419,T2693,T2299...","T65:1.5992582,T867:1.3179373,T93:1.2095009,T31..."


In [19]:
# 特征提取结束
logging.info("train shape %s, test shape %s", train_label.shape, test.shape)
assert len(test) == sub_size

# 加载用户
user = pd.read_csv(f'{base_path}/member_info_0926.txt', header=None, sep='\t')
user.columns = ['uid', 'gender', 'creat_keyword', 'level', 'hot', 'reg_type', 'reg_plat', 'freq', 'uf_b1', 'uf_b2',
                'uf_b3', 'uf_b4', 'uf_b5', 'uf_c1', 'uf_c2', 'uf_c3', 'uf_c4', 'uf_c5', 'score', 'follow_topic',
                'inter_topic']
del user['follow_topic'], user['inter_topic']
logging.info("user %s", user.shape)

unq = user.nunique()
logging.info("user unq %s", unq)

for x in unq[unq == 1].index:
    del user[x]
    logging.info('del unq==1 %s', x)

t = user.dtypes
cats = [x for x in t[t == 'object'].index if x not in ['follow_topic', 'inter_topic', 'uid']]
logging.info("user cat %s", cats)

for d in cats:
    lb = LabelEncoder()
    user[d] = lb.fit_transform(user[d])
    logging.info('encode %s', d)

q_lb = LabelEncoder()
q_lb.fit(list(train_label['qid'].astype(str).values) + list(test['qid'].astype(str).values))
train_label['qid_enc'] = q_lb.transform(train_label['qid'])
test['qid_enc'] = q_lb.transform(test['qid'])

u_lb = LabelEncoder()
u_lb.fit(user['uid'])
train_label['uid_enc'] = u_lb.transform(train_label['uid'])
test['uid_enc'] = u_lb.transform(test['uid'])

# merge user
train_label = pd.merge(train_label, user, on='uid', how='left')
test = pd.merge(test, user, on='uid', how='left')
logging.info("train shape %s, test shape %s", train_label.shape, test.shape)

data = pd.concat((train_label, test), axis=0, sort=True)
# del train_label, test

[2019-12-11 21:32:52,495] INFO in <ipython-input-19-6f2a9956cd08>: train shape (2593669, 105), test shape (1141683, 104)
[2019-12-11 21:33:02,732] INFO in <ipython-input-19-6f2a9956cd08>: user (1931654, 19)
[2019-12-11 21:33:06,942] INFO in <ipython-input-19-6f2a9956cd08>: user unq uid              1931654
gender                 3
creat_keyword          1
level                  1
hot                    1
reg_type               1
reg_plat               1
freq                   5
uf_b1                  2
uf_b2                  2
uf_b3                  2
uf_b4                  2
uf_b5                  2
uf_c1               2561
uf_c2                291
uf_c3                428
uf_c4               1556
uf_c5                  2
score                732
dtype: int64
[2019-12-11 21:33:06,962] INFO in <ipython-input-19-6f2a9956cd08>: del unq==1 creat_keyword
[2019-12-11 21:33:06,966] INFO in <ipython-input-19-6f2a9956cd08>: del unq==1 level
[2019-12-11 21:33:06,967] INFO in <ipython-input-19-6

In [20]:
# # count编码
# count_fea = ['uid_enc', 'qid_enc', 'gender', 'freq', 'uf_c1', 'uf_c2', 'uf_c3', 'uf_c4', 'uf_c5']
# for feat in count_fea:
#     col_name = '{}_count'.format(feat)
#     data[col_name] = data[feat].map(data[feat].value_counts().astype(int))
#     data.loc[data[col_name] < 2, feat] = -1
#     data[feat] += 1
#     data[col_name] = data[feat].map(data[feat].value_counts().astype(int))
#     data[col_name] = (data[col_name] - data[col_name].min()) / (data[col_name].max() - data[col_name].min())
#     # 

# # 问题被回答的次数

In [21]:
# 压缩数据
t = data.dtypes
for x in t[t == 'int64'].index:
    data[x] = data[x].astype('int32')

for x in t[t == 'float64'].index:
    data[x] = data[x].astype('float32')

data['wk'] = data['day'] % 7

feature_cols = [x for x in data.columns if x not in ('label', 'uid', 'qid', 'dt', 'day')]

In [22]:
# target编码
logging.info("feature size %s", len(feature_cols))

X_train_all = data.iloc[:len(train_label)][feature_cols]
y_train_all = data.iloc[:len(train_label)]['label']
test = data.iloc[len(train_label):]
# del data
assert len(test) == sub_size

logging.info("train shape %s, test shape %s", train_label.shape, test.shape)

fold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for index, (train_idx, val_idx) in enumerate(fold.split(X=X_train_all, y=y_train_all)):
    break

X_train, X_val, y_train, y_val = X_train_all.iloc[train_idx][feature_cols], X_train_all.iloc[val_idx][feature_cols], \
                                 y_train_all.iloc[train_idx], \
                                 y_train_all.iloc[val_idx]
del X_train_all

model_lgb = LGBMClassifier(n_estimators=2000, n_jobs=-1, objective='binary', seed=1000, silent=True)
model_lgb.fit(X_train, y_train,
              eval_metric=['logloss', 'auc'],
              eval_set=[(X_val, y_val)],
              early_stopping_rounds=50)

sub['label'] = model_lgb.predict_proba(test[feature_cols])[:, 1]


sub.to_csv('../example/result_1211.txt', index=None, header=None, sep='\t')

[2019-12-11 21:33:50,977] INFO in <ipython-input-22-72ab1911ff15>: feature size 117
[2019-12-11 21:33:51,487] INFO in <ipython-input-22-72ab1911ff15>: train shape (2593669, 120), test shape (1141683, 121)


[1]	valid_0's auc: 0.725572	valid_0's binary_logloss: 0.430041
Training until validation scores don't improve for 50 rounds
[2]	valid_0's auc: 0.73069	valid_0's binary_logloss: 0.422624
[3]	valid_0's auc: 0.733677	valid_0's binary_logloss: 0.416746
[4]	valid_0's auc: 0.734595	valid_0's binary_logloss: 0.411994
[5]	valid_0's auc: 0.736529	valid_0's binary_logloss: 0.408056
[6]	valid_0's auc: 0.738714	valid_0's binary_logloss: 0.404577
[7]	valid_0's auc: 0.740688	valid_0's binary_logloss: 0.401535
[8]	valid_0's auc: 0.74166	valid_0's binary_logloss: 0.399105
[9]	valid_0's auc: 0.74215	valid_0's binary_logloss: 0.397037
[10]	valid_0's auc: 0.743252	valid_0's binary_logloss: 0.395128
[11]	valid_0's auc: 0.744247	valid_0's binary_logloss: 0.393498
[12]	valid_0's auc: 0.744771	valid_0's binary_logloss: 0.392108
[13]	valid_0's auc: 0.745461	valid_0's binary_logloss: 0.390854
[14]	valid_0's auc: 0.746221	valid_0's binary_logloss: 0.389719
[15]	valid_0's auc: 0.746872	valid_0's binary_logloss: 

[129]	valid_0's auc: 0.765336	valid_0's binary_logloss: 0.373732
[130]	valid_0's auc: 0.765417	valid_0's binary_logloss: 0.373691
[131]	valid_0's auc: 0.76543	valid_0's binary_logloss: 0.373683
[132]	valid_0's auc: 0.765448	valid_0's binary_logloss: 0.373672
[133]	valid_0's auc: 0.76546	valid_0's binary_logloss: 0.373667
[134]	valid_0's auc: 0.765474	valid_0's binary_logloss: 0.373661
[135]	valid_0's auc: 0.765509	valid_0's binary_logloss: 0.373647
[136]	valid_0's auc: 0.765532	valid_0's binary_logloss: 0.373632
[137]	valid_0's auc: 0.765556	valid_0's binary_logloss: 0.373619
[138]	valid_0's auc: 0.765564	valid_0's binary_logloss: 0.373612
[139]	valid_0's auc: 0.765578	valid_0's binary_logloss: 0.373604
[140]	valid_0's auc: 0.76562	valid_0's binary_logloss: 0.373583
[141]	valid_0's auc: 0.765684	valid_0's binary_logloss: 0.373547
[142]	valid_0's auc: 0.765692	valid_0's binary_logloss: 0.373553
[143]	valid_0's auc: 0.7657	valid_0's binary_logloss: 0.373544
[144]	valid_0's auc: 0.765718	

[256]	valid_0's auc: 0.76813	valid_0's binary_logloss: 0.372179
[257]	valid_0's auc: 0.768139	valid_0's binary_logloss: 0.372173
[258]	valid_0's auc: 0.768163	valid_0's binary_logloss: 0.37216
[259]	valid_0's auc: 0.768166	valid_0's binary_logloss: 0.372156
[260]	valid_0's auc: 0.768188	valid_0's binary_logloss: 0.372144
[261]	valid_0's auc: 0.768198	valid_0's binary_logloss: 0.372136
[262]	valid_0's auc: 0.768217	valid_0's binary_logloss: 0.372128
[263]	valid_0's auc: 0.768224	valid_0's binary_logloss: 0.372123
[264]	valid_0's auc: 0.768242	valid_0's binary_logloss: 0.372112
[265]	valid_0's auc: 0.768278	valid_0's binary_logloss: 0.372095
[266]	valid_0's auc: 0.768298	valid_0's binary_logloss: 0.372081
[267]	valid_0's auc: 0.768296	valid_0's binary_logloss: 0.372083
[268]	valid_0's auc: 0.768322	valid_0's binary_logloss: 0.372069
[269]	valid_0's auc: 0.768336	valid_0's binary_logloss: 0.372062
[270]	valid_0's auc: 0.76835	valid_0's binary_logloss: 0.372053
[271]	valid_0's auc: 0.76837

[383]	valid_0's auc: 0.770086	valid_0's binary_logloss: 0.371086
[384]	valid_0's auc: 0.770084	valid_0's binary_logloss: 0.371085
[385]	valid_0's auc: 0.770085	valid_0's binary_logloss: 0.371085
[386]	valid_0's auc: 0.770085	valid_0's binary_logloss: 0.371084
[387]	valid_0's auc: 0.7701	valid_0's binary_logloss: 0.371078
[388]	valid_0's auc: 0.770115	valid_0's binary_logloss: 0.37107
[389]	valid_0's auc: 0.770115	valid_0's binary_logloss: 0.37107
[390]	valid_0's auc: 0.77015	valid_0's binary_logloss: 0.371054
[391]	valid_0's auc: 0.770165	valid_0's binary_logloss: 0.371045
[392]	valid_0's auc: 0.770178	valid_0's binary_logloss: 0.371037
[393]	valid_0's auc: 0.770207	valid_0's binary_logloss: 0.37102
[394]	valid_0's auc: 0.770254	valid_0's binary_logloss: 0.370993
[395]	valid_0's auc: 0.770291	valid_0's binary_logloss: 0.370975
[396]	valid_0's auc: 0.770295	valid_0's binary_logloss: 0.370969
[397]	valid_0's auc: 0.770303	valid_0's binary_logloss: 0.370966
[398]	valid_0's auc: 0.770334	v

[510]	valid_0's auc: 0.771494	valid_0's binary_logloss: 0.370304
[511]	valid_0's auc: 0.771496	valid_0's binary_logloss: 0.370303
[512]	valid_0's auc: 0.771504	valid_0's binary_logloss: 0.3703
[513]	valid_0's auc: 0.771524	valid_0's binary_logloss: 0.37029
[514]	valid_0's auc: 0.771529	valid_0's binary_logloss: 0.370287
[515]	valid_0's auc: 0.771543	valid_0's binary_logloss: 0.370277
[516]	valid_0's auc: 0.771546	valid_0's binary_logloss: 0.370275
[517]	valid_0's auc: 0.771546	valid_0's binary_logloss: 0.370275
[518]	valid_0's auc: 0.771565	valid_0's binary_logloss: 0.370265
[519]	valid_0's auc: 0.771582	valid_0's binary_logloss: 0.370257
[520]	valid_0's auc: 0.771599	valid_0's binary_logloss: 0.370249
[521]	valid_0's auc: 0.771611	valid_0's binary_logloss: 0.370243
[522]	valid_0's auc: 0.771613	valid_0's binary_logloss: 0.370243
[523]	valid_0's auc: 0.771622	valid_0's binary_logloss: 0.370237
[524]	valid_0's auc: 0.771624	valid_0's binary_logloss: 0.370236
[525]	valid_0's auc: 0.77162

[637]	valid_0's auc: 0.772635	valid_0's binary_logloss: 0.369627
[638]	valid_0's auc: 0.772637	valid_0's binary_logloss: 0.369626
[639]	valid_0's auc: 0.772656	valid_0's binary_logloss: 0.369614
[640]	valid_0's auc: 0.772675	valid_0's binary_logloss: 0.369598
[641]	valid_0's auc: 0.772673	valid_0's binary_logloss: 0.3696
[642]	valid_0's auc: 0.772668	valid_0's binary_logloss: 0.369602
[643]	valid_0's auc: 0.77269	valid_0's binary_logloss: 0.369592
[644]	valid_0's auc: 0.772694	valid_0's binary_logloss: 0.369585
[645]	valid_0's auc: 0.772728	valid_0's binary_logloss: 0.369566
[646]	valid_0's auc: 0.772744	valid_0's binary_logloss: 0.369559
[647]	valid_0's auc: 0.772751	valid_0's binary_logloss: 0.36955
[648]	valid_0's auc: 0.772762	valid_0's binary_logloss: 0.369544
[649]	valid_0's auc: 0.772792	valid_0's binary_logloss: 0.369528
[650]	valid_0's auc: 0.772789	valid_0's binary_logloss: 0.369529
[651]	valid_0's auc: 0.772795	valid_0's binary_logloss: 0.369525
[652]	valid_0's auc: 0.7728	v

[764]	valid_0's auc: 0.773748	valid_0's binary_logloss: 0.368985
[765]	valid_0's auc: 0.773747	valid_0's binary_logloss: 0.368983
[766]	valid_0's auc: 0.773755	valid_0's binary_logloss: 0.36898
[767]	valid_0's auc: 0.773771	valid_0's binary_logloss: 0.368971
[768]	valid_0's auc: 0.773774	valid_0's binary_logloss: 0.368969
[769]	valid_0's auc: 0.773774	valid_0's binary_logloss: 0.36897
[770]	valid_0's auc: 0.773774	valid_0's binary_logloss: 0.36897
[771]	valid_0's auc: 0.77377	valid_0's binary_logloss: 0.368972
[772]	valid_0's auc: 0.773774	valid_0's binary_logloss: 0.36897
[773]	valid_0's auc: 0.773797	valid_0's binary_logloss: 0.368959
[774]	valid_0's auc: 0.773809	valid_0's binary_logloss: 0.368953
[775]	valid_0's auc: 0.77382	valid_0's binary_logloss: 0.368946
[776]	valid_0's auc: 0.773832	valid_0's binary_logloss: 0.368937
[777]	valid_0's auc: 0.773838	valid_0's binary_logloss: 0.368933
[778]	valid_0's auc: 0.773839	valid_0's binary_logloss: 0.368932
[779]	valid_0's auc: 0.773836	v

[891]	valid_0's auc: 0.774604	valid_0's binary_logloss: 0.368497
[892]	valid_0's auc: 0.774611	valid_0's binary_logloss: 0.368493
[893]	valid_0's auc: 0.77462	valid_0's binary_logloss: 0.368488
[894]	valid_0's auc: 0.774639	valid_0's binary_logloss: 0.368479
[895]	valid_0's auc: 0.77464	valid_0's binary_logloss: 0.368478
[896]	valid_0's auc: 0.774642	valid_0's binary_logloss: 0.368476
[897]	valid_0's auc: 0.774647	valid_0's binary_logloss: 0.368475
[898]	valid_0's auc: 0.774655	valid_0's binary_logloss: 0.36847
[899]	valid_0's auc: 0.774669	valid_0's binary_logloss: 0.368464
[900]	valid_0's auc: 0.774677	valid_0's binary_logloss: 0.368458
[901]	valid_0's auc: 0.774688	valid_0's binary_logloss: 0.368452
[902]	valid_0's auc: 0.774696	valid_0's binary_logloss: 0.368449
[903]	valid_0's auc: 0.774702	valid_0's binary_logloss: 0.368445
[904]	valid_0's auc: 0.774709	valid_0's binary_logloss: 0.368441
[905]	valid_0's auc: 0.774726	valid_0's binary_logloss: 0.368431
[906]	valid_0's auc: 0.77472

[1018]	valid_0's auc: 0.775381	valid_0's binary_logloss: 0.368045
[1019]	valid_0's auc: 0.775383	valid_0's binary_logloss: 0.368045
[1020]	valid_0's auc: 0.775386	valid_0's binary_logloss: 0.368045
[1021]	valid_0's auc: 0.775389	valid_0's binary_logloss: 0.368044
[1022]	valid_0's auc: 0.775385	valid_0's binary_logloss: 0.368046
[1023]	valid_0's auc: 0.775382	valid_0's binary_logloss: 0.368045
[1024]	valid_0's auc: 0.775389	valid_0's binary_logloss: 0.368041
[1025]	valid_0's auc: 0.775388	valid_0's binary_logloss: 0.368042
[1026]	valid_0's auc: 0.775389	valid_0's binary_logloss: 0.368042
[1027]	valid_0's auc: 0.775392	valid_0's binary_logloss: 0.368039
[1028]	valid_0's auc: 0.775393	valid_0's binary_logloss: 0.368039
[1029]	valid_0's auc: 0.775403	valid_0's binary_logloss: 0.368033
[1030]	valid_0's auc: 0.775417	valid_0's binary_logloss: 0.368028
[1031]	valid_0's auc: 0.775424	valid_0's binary_logloss: 0.368024
[1032]	valid_0's auc: 0.775425	valid_0's binary_logloss: 0.368025
[1033]	val

[1144]	valid_0's auc: 0.776124	valid_0's binary_logloss: 0.367636
[1145]	valid_0's auc: 0.776133	valid_0's binary_logloss: 0.367631
[1146]	valid_0's auc: 0.77613	valid_0's binary_logloss: 0.367633
[1147]	valid_0's auc: 0.776134	valid_0's binary_logloss: 0.36763
[1148]	valid_0's auc: 0.776132	valid_0's binary_logloss: 0.367631
[1149]	valid_0's auc: 0.776139	valid_0's binary_logloss: 0.367627
[1150]	valid_0's auc: 0.776142	valid_0's binary_logloss: 0.367627
[1151]	valid_0's auc: 0.776143	valid_0's binary_logloss: 0.367626
[1152]	valid_0's auc: 0.776161	valid_0's binary_logloss: 0.367616
[1153]	valid_0's auc: 0.776172	valid_0's binary_logloss: 0.367608
[1154]	valid_0's auc: 0.776192	valid_0's binary_logloss: 0.3676
[1155]	valid_0's auc: 0.776196	valid_0's binary_logloss: 0.367598
[1156]	valid_0's auc: 0.776201	valid_0's binary_logloss: 0.367595
[1157]	valid_0's auc: 0.776201	valid_0's binary_logloss: 0.367595
[1158]	valid_0's auc: 0.776206	valid_0's binary_logloss: 0.367592
[1159]	valid_0

[1270]	valid_0's auc: 0.776643	valid_0's binary_logloss: 0.367336
[1271]	valid_0's auc: 0.77666	valid_0's binary_logloss: 0.367326
[1272]	valid_0's auc: 0.776668	valid_0's binary_logloss: 0.367321
[1273]	valid_0's auc: 0.776672	valid_0's binary_logloss: 0.367318
[1274]	valid_0's auc: 0.776672	valid_0's binary_logloss: 0.367317
[1275]	valid_0's auc: 0.776688	valid_0's binary_logloss: 0.367308
[1276]	valid_0's auc: 0.776693	valid_0's binary_logloss: 0.367303
[1277]	valid_0's auc: 0.776708	valid_0's binary_logloss: 0.367294
[1278]	valid_0's auc: 0.776709	valid_0's binary_logloss: 0.367293
[1279]	valid_0's auc: 0.776707	valid_0's binary_logloss: 0.367295
[1280]	valid_0's auc: 0.776712	valid_0's binary_logloss: 0.367292
[1281]	valid_0's auc: 0.776716	valid_0's binary_logloss: 0.36729
[1282]	valid_0's auc: 0.776729	valid_0's binary_logloss: 0.367284
[1283]	valid_0's auc: 0.776746	valid_0's binary_logloss: 0.367276
[1284]	valid_0's auc: 0.776755	valid_0's binary_logloss: 0.367271
[1285]	valid

[1396]	valid_0's auc: 0.777327	valid_0's binary_logloss: 0.366948
[1397]	valid_0's auc: 0.777344	valid_0's binary_logloss: 0.366939
[1398]	valid_0's auc: 0.777342	valid_0's binary_logloss: 0.366938
[1399]	valid_0's auc: 0.777348	valid_0's binary_logloss: 0.366935
[1400]	valid_0's auc: 0.777349	valid_0's binary_logloss: 0.366935
[1401]	valid_0's auc: 0.777359	valid_0's binary_logloss: 0.366931
[1402]	valid_0's auc: 0.777358	valid_0's binary_logloss: 0.36693
[1403]	valid_0's auc: 0.777363	valid_0's binary_logloss: 0.366928
[1404]	valid_0's auc: 0.777367	valid_0's binary_logloss: 0.366924
[1405]	valid_0's auc: 0.777369	valid_0's binary_logloss: 0.366923
[1406]	valid_0's auc: 0.777375	valid_0's binary_logloss: 0.36692
[1407]	valid_0's auc: 0.777379	valid_0's binary_logloss: 0.366917
[1408]	valid_0's auc: 0.777399	valid_0's binary_logloss: 0.366907
[1409]	valid_0's auc: 0.777424	valid_0's binary_logloss: 0.366895
[1410]	valid_0's auc: 0.777429	valid_0's binary_logloss: 0.366893
[1411]	valid

[1521]	valid_0's auc: 0.777967	valid_0's binary_logloss: 0.366606
[1522]	valid_0's auc: 0.777969	valid_0's binary_logloss: 0.366606
[1523]	valid_0's auc: 0.777977	valid_0's binary_logloss: 0.366601
[1524]	valid_0's auc: 0.777976	valid_0's binary_logloss: 0.366602
[1525]	valid_0's auc: 0.777984	valid_0's binary_logloss: 0.366598
[1526]	valid_0's auc: 0.777988	valid_0's binary_logloss: 0.366596
[1527]	valid_0's auc: 0.777988	valid_0's binary_logloss: 0.366595
[1528]	valid_0's auc: 0.77799	valid_0's binary_logloss: 0.366595
[1529]	valid_0's auc: 0.777994	valid_0's binary_logloss: 0.366592
[1530]	valid_0's auc: 0.77799	valid_0's binary_logloss: 0.366594
[1531]	valid_0's auc: 0.77799	valid_0's binary_logloss: 0.366595
[1532]	valid_0's auc: 0.777999	valid_0's binary_logloss: 0.366591
[1533]	valid_0's auc: 0.778002	valid_0's binary_logloss: 0.366591
[1534]	valid_0's auc: 0.778007	valid_0's binary_logloss: 0.366586
[1535]	valid_0's auc: 0.778014	valid_0's binary_logloss: 0.366584
[1536]	valid_

[1647]	valid_0's auc: 0.778525	valid_0's binary_logloss: 0.366311
[1648]	valid_0's auc: 0.778543	valid_0's binary_logloss: 0.366302
[1649]	valid_0's auc: 0.778543	valid_0's binary_logloss: 0.366302
[1650]	valid_0's auc: 0.778546	valid_0's binary_logloss: 0.3663
[1651]	valid_0's auc: 0.778547	valid_0's binary_logloss: 0.366299
[1652]	valid_0's auc: 0.778548	valid_0's binary_logloss: 0.366298
[1653]	valid_0's auc: 0.778545	valid_0's binary_logloss: 0.366299
[1654]	valid_0's auc: 0.778547	valid_0's binary_logloss: 0.366298
[1655]	valid_0's auc: 0.778545	valid_0's binary_logloss: 0.366299
[1656]	valid_0's auc: 0.77854	valid_0's binary_logloss: 0.366302
[1657]	valid_0's auc: 0.778539	valid_0's binary_logloss: 0.366302
[1658]	valid_0's auc: 0.778536	valid_0's binary_logloss: 0.366304
[1659]	valid_0's auc: 0.778538	valid_0's binary_logloss: 0.366303
[1660]	valid_0's auc: 0.778545	valid_0's binary_logloss: 0.3663
[1661]	valid_0's auc: 0.778554	valid_0's binary_logloss: 0.366295
[1662]	valid_0'

[1772]	valid_0's auc: 0.779069	valid_0's binary_logloss: 0.366013
[1773]	valid_0's auc: 0.779079	valid_0's binary_logloss: 0.366007
[1774]	valid_0's auc: 0.779081	valid_0's binary_logloss: 0.366005
[1775]	valid_0's auc: 0.779078	valid_0's binary_logloss: 0.366006
[1776]	valid_0's auc: 0.779081	valid_0's binary_logloss: 0.366005
[1777]	valid_0's auc: 0.779076	valid_0's binary_logloss: 0.366006
[1778]	valid_0's auc: 0.779082	valid_0's binary_logloss: 0.366001
[1779]	valid_0's auc: 0.779082	valid_0's binary_logloss: 0.366002
[1780]	valid_0's auc: 0.779082	valid_0's binary_logloss: 0.366
[1781]	valid_0's auc: 0.779084	valid_0's binary_logloss: 0.366
[1782]	valid_0's auc: 0.779089	valid_0's binary_logloss: 0.365996
[1783]	valid_0's auc: 0.779095	valid_0's binary_logloss: 0.365992
[1784]	valid_0's auc: 0.779091	valid_0's binary_logloss: 0.365994
[1785]	valid_0's auc: 0.779092	valid_0's binary_logloss: 0.365993
[1786]	valid_0's auc: 0.779094	valid_0's binary_logloss: 0.365992
[1787]	valid_0's

[1898]	valid_0's auc: 0.779483	valid_0's binary_logloss: 0.36577
[1899]	valid_0's auc: 0.779483	valid_0's binary_logloss: 0.36577
[1900]	valid_0's auc: 0.779487	valid_0's binary_logloss: 0.365767
[1901]	valid_0's auc: 0.77949	valid_0's binary_logloss: 0.365766
[1902]	valid_0's auc: 0.779489	valid_0's binary_logloss: 0.365765
[1903]	valid_0's auc: 0.779488	valid_0's binary_logloss: 0.365766
[1904]	valid_0's auc: 0.779486	valid_0's binary_logloss: 0.365768
[1905]	valid_0's auc: 0.779486	valid_0's binary_logloss: 0.365768
[1906]	valid_0's auc: 0.779488	valid_0's binary_logloss: 0.365767
[1907]	valid_0's auc: 0.77949	valid_0's binary_logloss: 0.365766
[1908]	valid_0's auc: 0.779492	valid_0's binary_logloss: 0.365763
[1909]	valid_0's auc: 0.77949	valid_0's binary_logloss: 0.365764
[1910]	valid_0's auc: 0.779486	valid_0's binary_logloss: 0.365766
[1911]	valid_0's auc: 0.779484	valid_0's binary_logloss: 0.365768
[1912]	valid_0's auc: 0.779485	valid_0's binary_logloss: 0.365766
[1913]	valid_0'