In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from lightgbm import LGBMClassifier
import logging
import pickle

In [2]:
log_fmt = "[%(asctime)s] %(levelname)s in %(module)s: %(message)s"
logging.basicConfig(format=log_fmt, level=logging.INFO)

In [3]:
base_path = './data'
feature_path = './feature'

In [4]:
train = pd.read_csv(f'{base_path}/invite_info_0926.txt', sep='\t', header=None)
train.columns = ['qid', 'uid', 'dt', 'label']

del train['dt']
logging.info("train %s", train.shape)

test = pd.read_csv(f'{base_path}/invite_info_evaluate_0926.txt', sep='\t', header=None)
test.columns = ['qid', 'uid', 'dt']
sub = test.copy()
sub_size = len(sub)

del test['dt']
logging.info("test %s", test.shape)

[2019-12-12 13:31:39,090] INFO in <ipython-input-4-c899d16ad23a>: train (9489162, 3)
[2019-12-12 13:31:41,130] INFO in <ipython-input-4-c899d16ad23a>: test (1141683, 2)


In [5]:
# 加载 ans kfold feature
all_col = ['day', 'hour', 'q_inv_kfold_mean', 'q_inv_kfold_sum', 'q_inv_kfold_std', 'q_inv_kfold_count', 
           'u_inv_kfold_mean', 'u_inv_kfold_sum', 'u_inv_kfold_std', 'u_inv_kfold_count', 'q_ans_kfold_count',
           'u_ans_kfold_count', 'q_is_good_sum', 'q_is_good_max', 'q_is_good_mean', 'u_is_good_sum',
           'u_is_good_max', 'u_is_good_mean', 'q_is_rec_sum', 'q_is_rec_max', 'q_is_rec_mean', 'u_is_rec_sum',
           'u_is_rec_max', 'u_is_rec_mean', 'q_is_dest_sum', 'q_is_dest_max', 'q_is_dest_mean', 
           'u_is_dest_sum', 'u_is_dest_max', 'u_is_dest_mean', 'q_has_img_sum', 'q_has_img_max', 
           'q_has_img_mean', 'u_has_img_sum', 'u_has_img_max', 'u_has_img_mean', 'q_has_video_sum', 
           'q_has_video_max', 'q_has_video_mean', 'u_has_video_sum', 'u_has_video_max', 'u_has_video_mean',
           'q_word_count_sum', 'q_word_count_max', 'q_word_count_mean', 'u_word_count_sum', 'u_word_count_max',
           'u_word_count_mean', 'q_reci_cheer_sum', 'q_reci_cheer_max', 'q_reci_cheer_mean', 'u_reci_cheer_sum',
           'u_reci_cheer_max', 'u_reci_cheer_mean', 'q_reci_uncheer_sum', 'q_reci_uncheer_max', 
           'q_reci_uncheer_mean', 'u_reci_uncheer_sum', 'u_reci_uncheer_max', 'u_reci_uncheer_mean', 
           'q_reci_comment_sum', 'q_reci_comment_max', 'q_reci_comment_mean', 'u_reci_comment_sum', 
           'u_reci_comment_max', 'u_reci_comment_mean', 'q_reci_mark_sum', 'q_reci_mark_max', 
           'q_reci_mark_mean', 'u_reci_mark_sum', 'u_reci_mark_max', 'u_reci_mark_mean', 'q_reci_tks_sum',
           'q_reci_tks_max', 'q_reci_tks_mean', 'u_reci_tks_sum', 'u_reci_tks_max', 'u_reci_tks_mean',
           'q_reci_xxx_sum', 'q_reci_xxx_max', 'q_reci_xxx_mean', 'u_reci_xxx_sum', 'u_reci_xxx_max', 
           'u_reci_xxx_mean', 'q_reci_no_help_sum', 'q_reci_no_help_max', 'q_reci_no_help_mean', 
           'u_reci_no_help_sum', 'u_reci_no_help_max', 'u_reci_no_help_mean', 'q_reci_dis_sum', 
           'q_reci_dis_max', 'q_reci_dis_mean', 'u_reci_dis_sum', 'u_reci_dis_max', 'u_reci_dis_mean', 
           'q_diff_qa_days_sum', 'q_diff_qa_days_max', 'q_diff_qa_days_mean', 'u_diff_qa_days_sum', 
           'u_diff_qa_days_max', 'u_diff_qa_days_mean']
drop_col = ['u_is_rec_mean', 'u_reci_uncheer_mean', 'q_is_dest_sum', 'u_reci_uncheer_sum', 'u_is_rec_max', 
             'u_is_dest_mean','q_reci_uncheer_mean', 'q_reci_uncheer_sum', 'u_is_dest_sum', 'q_is_dest_max',
             'q_reci_uncheer_max', 'u_reci_tks_max', 'q_reci_mark_max','u_reci_dis_max', 'q_has_video_mean',
             'q_reci_no_help_mean', 'count_u_topic', 'u_has_video_mean', 'q_reci_dis_sum', 'q_reci_mark_sum',
             'q_reci_tks_sum','q_reci_tks_max','q_reci_dis_max','u_reci_mark_max','q_is_good_mean',
             'q_reci_no_help_sum', 'q_reci_xxx_max', 'u_reci_xxx_max','u_reci_no_help_sum','u_reci_xxx_sum',
              'u_is_good_mean','q_reci_no_help_max','u_has_img_max','u_is_good_sum','u_reci_no_help_max',
              'u_has_video_sum','uf_b5','q_reci_xxx_sum','q_is_good_sum','q_has_img_max','q_has_video_sum',
              'q_has_video_max','u_has_video_max','q_is_good_max','q_is_rec_max','u_is_good_max',
              'q_is_dest_mean','u_reci_uncheer_max','uf_c5_count','u_is_dest_max','q_is_rec_mean',
              'q_is_rec_sum','u_is_rec_sum', 'q_reci_xxx_mean','u_reci_xxx_mean','u_reci_comment_max',
              'q_reci_comment_sum','u_reci_cheer_max','u_reci_dis_sum','u_reci_tks_sum','q_has_img_sum',
              'q_reci_comment_max','q_reci_cheer_max','u_reci_no_help_mean','u_has_img_sum','u_reci_mark_sum']
use_col = list(set(all_col) - set(drop_col))

t1 = pd.read_csv(f'{feature_path}/train_kfold_feature.txt', sep='\t', usecols=use_col)
train = pd.concat([train, t1], axis=1)
logging.info("train %s", train.shape)


t1 = pd.read_csv(f'{feature_path}/test_kfold_feature.txt', sep='\t', usecols=use_col)
test = pd.concat([test, t1], axis=1)
logging.info("test %s", test.shape)

# t1 = pd.read_csv(f'{feature_path}/train_kfold_feature.txt', sep='\t')
# train = pd.concat([train, t1], axis=1)
# logging.info("train %s", train.shape)


# t1 = pd.read_csv(f'{feature_path}/test_kfold_feature.txt', sep='\t')
# test = pd.concat([test, t1], axis=1)
# logging.info("test %s", test.shape)

[2019-12-12 13:32:57,284] INFO in <ipython-input-5-df91efe901e4>: train (9489162, 42)
[2019-12-12 13:33:05,570] INFO in <ipython-input-5-df91efe901e4>: test (1141683, 41)


In [6]:
train['week'] = train['day']%7
test['week'] = test['day']%7

In [7]:
# 加载 invete feature 1
t1 = pd.read_csv(f'{feature_path}/train_invite_feature.txt', sep='\t')
train = pd.concat([train, t1], axis=1)
logging.info("train %s", train.shape)

t1 = pd.read_csv(f'{feature_path}/test_invite_feature.txt', sep='\t')
test = pd.concat([test, t1], axis=1)
logging.info("test %s", test.shape)

[2019-12-12 13:33:29,162] INFO in <ipython-input-7-25a44eae3c1c>: train (9489162, 61)
[2019-12-12 13:33:31,660] INFO in <ipython-input-7-25a44eae3c1c>: test (1141683, 60)


In [8]:
# 加载 invete feature 2
t1 = pd.read_csv(f'{feature_path}/train_invite_feature_2.txt', sep='\t')
train = pd.concat([train, t1], axis=1)
logging.info("train %s", train.shape)

t1 = pd.read_csv(f'{feature_path}/test_invite_feature_2.txt', sep='\t')
test = pd.concat([test, t1], axis=1)
logging.info("test %s", test.shape)

[2019-12-12 13:34:11,117] INFO in <ipython-input-8-898ae5923687>: train (9489162, 89)
[2019-12-12 13:34:15,540] INFO in <ipython-input-8-898ae5923687>: test (1141683, 88)


In [None]:
# 加载 kfold topic feature, QU
# t1 = pd.read_csv(f'{feature_path}/train_kfold_topic_feature.txt', sep='\t')
# train = pd.concat([train, t1], axis=1)
# logging.info("train %s", train.shape)

# t1 = pd.read_csv(f'{feature_path}/test_kfold_topic_feature.txt', sep='\t')
# test = pd.concat([test, t1], axis=1)
# logging.info("test %s", test.shape)

In [None]:
# 加载 user kfold topic feature，UU
# t1 = pd.read_csv(f'{feature_path}/train_kfold_ut_feature.txt', sep='\t')
# train = pd.concat([train, t1], axis=1)
# logging.info("train %s", train.shape)

# t1 = pd.read_csv(f'{feature_path}/test_kfold_ut_feature.txt', sep='\t')
# test = pd.concat([test, t1], axis=1)
# logging.info("test %s", test.shape)

In [9]:
# 加载 kfold uid2order label 特征
t1 = pickle.load(open(f'{feature_path}/train_kfold_uid_2order_label_feature.pkl', 'rb'))
train = pd.concat([train, t1], axis=1)
logging.info("train %s", train.shape)

t1 = pickle.load(open(f'{feature_path}/test_kfold_uid_2order_label_feature.pkl', 'rb'))
test = pd.concat([test, t1], axis=1)
logging.info("train %s", test.shape)

[2019-12-12 13:34:28,705] INFO in <ipython-input-9-f11254bbb51b>: train (9489162, 133)
[2019-12-12 13:34:29,933] INFO in <ipython-input-9-f11254bbb51b>: train (1141683, 132)


In [10]:
# 加载 kfold qid2order label 特征
t1 = pickle.load(open(f'{feature_path}/train_kfold_qid_2order_label_feature.pkl', 'rb'))
train = pd.concat([train, t1], axis=1)
logging.info("train %s", train.shape)

t1 = pickle.load(open(f'{feature_path}/test_kfold_qid_2order_label_feature.pkl', 'rb'))
test = pd.concat([test, t1], axis=1)
logging.info("train %s", test.shape)

[2019-12-12 13:34:50,309] INFO in <ipython-input-10-681451a90e2f>: train (9489162, 226)
[2019-12-12 13:34:52,392] INFO in <ipython-input-10-681451a90e2f>: train (1141683, 225)


In [None]:

# t1 = pickle.load(open(f'{feature_path}/train_kfold_label_feature.pkl', 'rb'))
# train = pd.concat([train, t1], axis=1)
# logging.info("train %s", train.shape)

# t1 = pickle.load(open(f'{feature_path}/test_kfold_label_feature.pkl', 'rb'))
# test = pd.concat([test, t1], axis=1)
# logging.info("test %s", test.shape)

In [11]:
# 加载用户
user = pd.read_csv(f'{base_path}/member_info_0926.txt', header=None, sep='\t')
user.columns = ['uid', 'gender', 'freq', 'uf_b1', 'uf_b2','uf_b3', 'uf_b4', 'uf_b5', 
                'uf_c1', 'uf_c2', 'uf_c3', 'uf_c4', 'uf_c5',  'score', 'follow_topic', 'inter_topic']

del user['follow_topic'], user['inter_topic']
logging.info("user %s", user.shape)

unq = user.nunique()
logging.info("user unq %s", unq)

for x in unq[unq == 1].index:
    del user[x]
    logging.info('del unq==1 %s', x)

t = user.dtypes
cats = [x for x in t[t == 'object'].index if x not in ['follow_topic', 'inter_topic', 'uid']]
logging.info("user cat %s", cats)

for d in cats:
    lb = LabelEncoder()
    user[d] = lb.fit_transform(user[d])
    logging.info('encode %s', d)
    
logging.info('encoding qid...')    
q_lb = LabelEncoder()
q_lb.fit(list(train['qid'].astype(str).values) + list(test['qid'].astype(str).values))
train['qid_enc'] = q_lb.transform(train['qid'])
test['qid_enc'] = q_lb.transform(test['qid'])
logging.info('add qid_enc')

logging.info('encoding uid...')
u_lb = LabelEncoder()
u_lb.fit(user['uid'])
train['uid_enc'] = u_lb.transform(train['uid'])
test['uid_enc'] = u_lb.transform(test['uid'])
logging.info('add uid_enc')

# merge user
train = pd.merge(train, user, on='uid', how='left')
test = pd.merge(test, user, on='uid', how='left')
logging.info("train shape %s, test shape %s", train.shape, test.shape)

[2019-12-12 13:35:03,270] INFO in <ipython-input-11-9ca2dee00e76>: user (1931654, 14)
[2019-12-12 13:35:07,186] INFO in <ipython-input-11-9ca2dee00e76>: user unq uid       1931654
gender          3
freq            5
uf_b1           2
uf_b2           2
uf_b3           2
uf_b4           2
uf_b5           2
uf_c1        2561
uf_c2         291
uf_c3         428
uf_c4        1556
uf_c5           2
score         732
dtype: int64
[2019-12-12 13:35:07,192] INFO in <ipython-input-11-9ca2dee00e76>: user cat ['gender', 'freq', 'uf_c1', 'uf_c2', 'uf_c3', 'uf_c4', 'uf_c5']
[2019-12-12 13:35:08,177] INFO in <ipython-input-11-9ca2dee00e76>: encode gender
[2019-12-12 13:35:09,140] INFO in <ipython-input-11-9ca2dee00e76>: encode freq
[2019-12-12 13:35:10,007] INFO in <ipython-input-11-9ca2dee00e76>: encode uf_c1
[2019-12-12 13:35:10,904] INFO in <ipython-input-11-9ca2dee00e76>: encode uf_c2
[2019-12-12 13:35:11,734] INFO in <ipython-input-11-9ca2dee00e76>: encode uf_c3
[2019-12-12 13:35:12,541] INFO in

In [12]:
data = pd.concat((train, test), axis=0, sort=True)
len_train = len(train)
del train

In [None]:
# feat_counts_save = []

# other_feats = ['uid_enc', 'qid_enc', 'gender', 'freq', 'uf_c1', 'uf_c2', 'uf_c3', 'uf_c4', 'uf_c5']
# for fj in other_feats:
#     feat_counts_save.append(fj+'_count')

# fi = 'qid_enc'
# other_feats = ['freq', 'gender', 'score', 'uid_enc', 'uf_b1', 'uf_b2', 'uf_b3', 'uf_b4', 'uf_b5', 
#                'uf_c1', 'uf_c2', 'uf_c3', 'uf_c4', 'uf_c5', 'hour', 'q_hour', 'wk', 'q_week', 'diff_iq_day', 
#                'diff_iq_hour', 'day']
# for fj in other_feats:
#     feat_counts_save.append(fi+'_'+fj+'_count')
# fi = 'uid_enc'
# other_feats = ['hour', 'q_hour', 'wk', 'q_week', 'diff_iq_day', 'diff_iq_hour', 'day']
# for fj in other_feats:
#     feat_counts_save.append(fi+'_'+fj+'_count')
# fi = 'day'
# other_feats = ['freq', 'gender', 'score', 'uf_b1', 'uf_b2', 'uf_b3', 'uf_b4', 'uf_b5', 'uf_c1', 'uf_c2', 'uf_c3', 'uf_c4', 'uf_c5', 'q_hour', 'wk', 'q_week', 'diff_iq_day', 'diff_iq_hour', 'hour']
# for fj in other_feats:
#     feat_counts_save.append(fi+'_'+fj+'_count')

# print(feat_counts_save)

# # data[feat_counts_save].to_csv(f'{feature_path}/count_features.txt', index=None, header=None, sep='\t')
# # 加载 invete feature 2
# t1 = pd.read_csv(f'{feature_path}/count_features.txt', sep='\t', header=None)
# t1.columns = feat_counts_save
# print(len(t1), len(data))
# t1 = t1.reset_index(drop= True)
# # t1 = t1.drop(['qid_enc_day_count', 'uid_enc_day_count'], axis=1)
# data = data.reset_index(drop= True)
# data = pd.concat([data, t1], axis=1)

In [None]:
# fi = 'day'
# other_feats = ['freq', 'gender', 'score', 'uf_b1', 'uf_b2', 'uf_b3', 'uf_b4', 'uf_b5', 'uf_c1', 'uf_c2', 'uf_c3', 'uf_c4', 'uf_c5', 'q_hour', 'wk', 'q_week', 'diff_iq_day', 'diff_iq_hour', 'hour']
# times2_feats = []
# for fj in other_feats:
#     times2_feats.append(fi+'_'+fj+'_count')
# print(data.loc[len_train:, times2_feats].head(1))
# data.loc[len_train:, times2_feats] = data.loc[len_train:, times2_feats] * 2
# print(data.loc[len_train:, times2_feats].head(1))

In [13]:
# count 特征
count_feat = ['uid_enc', 'qid_enc', 'gender', 'freq', 'uf_c1', 'uf_c2', 'uf_c3', 'uf_c4', 'uf_c5']
for feat in count_feat:
    logging.info('counting %s', feat)
    col_name = '{}_count'.format(feat)
    data[col_name] = data[feat].map(data[feat].value_counts().astype(int))
    data.loc[data[col_name] < 2, feat] = -1
    data[feat] += 1
    data[col_name] = data[feat].map(data[feat].value_counts().astype(int))
    data[col_name] = (data[col_name] - data[col_name].min()) / (data[col_name].max() - data[col_name].min())

[2019-12-12 13:36:47,388] INFO in <ipython-input-13-1e9177a7eb97>: counting uid_enc
[2019-12-12 13:37:08,421] INFO in <ipython-input-13-1e9177a7eb97>: counting qid_enc
[2019-12-12 13:37:24,964] INFO in <ipython-input-13-1e9177a7eb97>: counting gender
[2019-12-12 13:37:38,519] INFO in <ipython-input-13-1e9177a7eb97>: counting freq
[2019-12-12 13:37:51,728] INFO in <ipython-input-13-1e9177a7eb97>: counting uf_c1
[2019-12-12 13:38:06,112] INFO in <ipython-input-13-1e9177a7eb97>: counting uf_c2
[2019-12-12 13:38:20,138] INFO in <ipython-input-13-1e9177a7eb97>: counting uf_c3
[2019-12-12 13:38:34,552] INFO in <ipython-input-13-1e9177a7eb97>: counting uf_c4
[2019-12-12 13:38:49,414] INFO in <ipython-input-13-1e9177a7eb97>: counting uf_c5


In [14]:
drop_feat = ['label', 'uid', 'qid', 'dt']
# drop_feat += ['u_is_rec_mean', 'u_reci_uncheer_mean', 'q_is_dest_sum', 'u_reci_uncheer_sum', 'u_is_rec_max', 
#              'u_is_dest_mean','q_reci_uncheer_mean', 'q_reci_uncheer_sum', 'u_is_dest_sum', 'q_is_dest_max',
#              'q_reci_uncheer_max', 'u_reci_tks_max', 'q_reci_mark_max','u_reci_dis_max', 'q_has_video_mean',
#              'q_reci_no_help_mean', 'count_u_topic', 'u_has_video_mean', 'q_reci_dis_sum', 'q_reci_mark_sum',
#              'q_reci_tks_sum','q_reci_tks_max','q_reci_dis_max','u_reci_mark_max','q_is_good_mean',
#              'q_reci_no_help_sum', 'q_reci_xxx_max', 'u_reci_xxx_max','u_reci_no_help_sum','u_reci_xxx_sum',
#               'u_is_good_mean','q_reci_no_help_max','u_has_img_max','u_is_good_sum','u_reci_no_help_max',
#               'u_has_video_sum','uf_b5','q_reci_xxx_sum','q_is_good_sum','q_has_img_max','q_has_video_sum',
#               'q_has_video_max','u_has_video_max','q_is_good_max','q_is_rec_max','u_is_good_max',
#               'q_is_dest_mean','u_reci_uncheer_max','uf_c5_count','u_is_dest_max','q_is_rec_mean',
#               'q_is_rec_sum','u_is_rec_sum', 'q_reci_xxx_mean','u_reci_xxx_mean','u_reci_comment_max',
#               'q_reci_comment_sum','u_reci_cheer_max','u_reci_dis_sum','u_reci_tks_sum','q_has_img_sum',
#               'q_reci_comment_max','q_reci_cheer_max','u_reci_no_help_mean','u_has_img_sum','u_reci_mark_sum']
# drop_feat += ['q_is_good_sum', 'q_is_good_max', 'q_is_good_mean', 'u_is_good_sum', 'u_is_good_max', 
#               'u_is_good_mean', 'q_is_rec_sum', 'q_is_rec_max', 'q_is_rec_mean', 'u_is_rec_sum', 
#               'u_is_rec_max', 'u_is_rec_mean', 'q_is_dest_sum', 'q_is_dest_max', 'q_is_dest_mean', 
#               'u_is_dest_sum', 'u_is_dest_max', 'u_is_dest_mean', 'q_has_img_sum', 'q_has_img_max', 
#               'q_has_img_mean', 'u_has_img_sum', 'u_has_img_max', 'u_has_img_mean', 'q_has_video_sum', 
#               'q_has_video_max', 'q_has_video_mean', 'u_has_video_sum', 'u_has_video_max', 'u_has_video_mean', 
#               'q_word_count_sum', 'q_word_count_max', 'q_word_count_mean', 'u_word_count_sum', 
#               'u_word_count_max', 'u_word_count_mean', 'q_reci_cheer_sum', 'q_reci_cheer_max', 
#               'q_reci_cheer_mean', 'u_reci_cheer_sum', 'u_reci_cheer_max', 'u_reci_cheer_mean', 
#               'q_reci_uncheer_sum', 'q_reci_uncheer_max', 'q_reci_uncheer_mean', 'u_reci_uncheer_sum',
#               'u_reci_uncheer_max', 'u_reci_uncheer_mean', 'q_reci_comment_sum', 'q_reci_comment_max', 
#               'q_reci_comment_mean', 'u_reci_comment_sum', 'u_reci_comment_max', 'u_reci_comment_mean', 
#               'q_reci_mark_sum', 'q_reci_mark_max', 'q_reci_mark_mean', 'u_reci_mark_sum', 'u_reci_mark_max', 
#               'u_reci_mark_mean', 'q_reci_tks_sum', 'q_reci_tks_max', 'q_reci_tks_mean', 'u_reci_tks_sum', 
#               'u_reci_tks_max', 'u_reci_tks_mean', 'q_reci_xxx_sum', 'q_reci_xxx_max', 'q_reci_xxx_mean', 
#               'u_reci_xxx_sum', 'u_reci_xxx_max', 'u_reci_xxx_mean', 'q_reci_no_help_sum', 'q_reci_no_help_max',
#               'q_reci_no_help_mean', 'u_reci_no_help_sum', 'u_reci_no_help_max', 'u_reci_no_help_mean', 
#               'q_reci_dis_sum', 'q_reci_dis_max', 'q_reci_dis_mean', 'u_reci_dis_sum', 'u_reci_dis_max', 
#               'u_reci_dis_mean']

feature_with_day = [x for x in data.columns if x not in drop_feat]
feature_cols = [x for x in data.columns if x not in drop_feat+['day']]
# feature_cols

In [15]:
logging.info("feature size %s", len(feature_cols))

X_train_all = data.iloc[:len_train][feature_with_day]
y_train_all = data.iloc[:len_train]['label']
X_test = data.iloc[len_train:]
assert len(X_test) == sub_size

fold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for index, (train_idx, val_idx) in enumerate(fold.split(X=X_train_all, y=y_train_all)):
    break

X_train, X_val, y_train, y_val = X_train_all.iloc[train_idx][feature_cols], X_train_all.iloc[val_idx][feature_cols], \
                                 y_train_all.iloc[train_idx], \
                                 y_train_all.iloc[val_idx]
del X_train_all

# X_train = X_train_all.loc[X_train_all['day']<3867, feature_cols]
# X_val = X_train_all.loc[X_train_all['day']==3867, feature_cols]
# y_train = y_train_all[X_train_all['day']<3867]
# y_val = y_train_all[X_train_all['day']==3867]
# del X_train_all

logging.info("train shape %s, val shape %s, test shape %s", X_train.shape, X_val.shape, X_test.shape)


[2019-12-12 13:39:03,637] INFO in <ipython-input-15-3d50b0574904>: feature size 246
[2019-12-12 13:39:46,977] INFO in <ipython-input-15-3d50b0574904>: train shape (7591329, 246), val shape (1897833, 246), test shape (1141683, 250)


In [None]:
X_train.columns

In [16]:
# model_lgb = LGBMClassifier(n_estimators=2000, num_leaves=256, n_jobs=-1, objective='binary', learning_rate=0.01,
#                            seed=1000, silent=True, max_bin=425, subsample_for_bin=50000, min_split_gain=0,
#                           min_child_weight=5, min_child_samples=10, subsample=0.8, subsample_freq=1,
#                           colsample_bytree=1, reg_alpha=3, reg_lambda=5)
model_lgb = LGBMClassifier(n_estimators=2000, n_jobs=-1, objective='binary', seed=1000, silent=True)
model_lgb.fit(X_train, y_train,         
              eval_metric=['logloss', 'auc'],
              eval_set=[(X_val, y_val)],
              early_stopping_rounds=50)

[1]	valid_0's auc: 0.76832	valid_0's binary_logloss: 0.452018
Training until validation scores don't improve for 50 rounds
[2]	valid_0's auc: 0.7825	valid_0's binary_logloss: 0.440156
[3]	valid_0's auc: 0.793541	valid_0's binary_logloss: 0.429953
[4]	valid_0's auc: 0.801204	valid_0's binary_logloss: 0.421472
[5]	valid_0's auc: 0.80476	valid_0's binary_logloss: 0.414123
[6]	valid_0's auc: 0.807786	valid_0's binary_logloss: 0.40804
[7]	valid_0's auc: 0.810297	valid_0's binary_logloss: 0.402718
[8]	valid_0's auc: 0.812994	valid_0's binary_logloss: 0.397905
[9]	valid_0's auc: 0.815288	valid_0's binary_logloss: 0.393477
[10]	valid_0's auc: 0.816925	valid_0's binary_logloss: 0.38966
[11]	valid_0's auc: 0.818509	valid_0's binary_logloss: 0.38636
[12]	valid_0's auc: 0.819951	valid_0's binary_logloss: 0.383167
[13]	valid_0's auc: 0.821471	valid_0's binary_logloss: 0.38041
[14]	valid_0's auc: 0.822615	valid_0's binary_logloss: 0.377905
[15]	valid_0's auc: 0.823934	valid_0's binary_logloss: 0.375

[129]	valid_0's auc: 0.861602	valid_0's binary_logloss: 0.325045
[130]	valid_0's auc: 0.861684	valid_0's binary_logloss: 0.32495
[131]	valid_0's auc: 0.861773	valid_0's binary_logloss: 0.32486
[132]	valid_0's auc: 0.861862	valid_0's binary_logloss: 0.324766
[133]	valid_0's auc: 0.861931	valid_0's binary_logloss: 0.324679
[134]	valid_0's auc: 0.862029	valid_0's binary_logloss: 0.32457
[135]	valid_0's auc: 0.862082	valid_0's binary_logloss: 0.324505
[136]	valid_0's auc: 0.862184	valid_0's binary_logloss: 0.32441
[137]	valid_0's auc: 0.862244	valid_0's binary_logloss: 0.32432
[138]	valid_0's auc: 0.862337	valid_0's binary_logloss: 0.324217
[139]	valid_0's auc: 0.862448	valid_0's binary_logloss: 0.32409
[140]	valid_0's auc: 0.862554	valid_0's binary_logloss: 0.323992
[141]	valid_0's auc: 0.862629	valid_0's binary_logloss: 0.323908
[142]	valid_0's auc: 0.862725	valid_0's binary_logloss: 0.323811
[143]	valid_0's auc: 0.862793	valid_0's binary_logloss: 0.323735
[144]	valid_0's auc: 0.862871	v

[256]	valid_0's auc: 0.869249	valid_0's binary_logloss: 0.316635
[257]	valid_0's auc: 0.869274	valid_0's binary_logloss: 0.316608
[258]	valid_0's auc: 0.869336	valid_0's binary_logloss: 0.316554
[259]	valid_0's auc: 0.869368	valid_0's binary_logloss: 0.316517
[260]	valid_0's auc: 0.869409	valid_0's binary_logloss: 0.316476
[261]	valid_0's auc: 0.869448	valid_0's binary_logloss: 0.316437
[262]	valid_0's auc: 0.869483	valid_0's binary_logloss: 0.316389
[263]	valid_0's auc: 0.86956	valid_0's binary_logloss: 0.316294
[264]	valid_0's auc: 0.869595	valid_0's binary_logloss: 0.316262
[265]	valid_0's auc: 0.869638	valid_0's binary_logloss: 0.316215
[266]	valid_0's auc: 0.869683	valid_0's binary_logloss: 0.316166
[267]	valid_0's auc: 0.869721	valid_0's binary_logloss: 0.316123
[268]	valid_0's auc: 0.869747	valid_0's binary_logloss: 0.316097
[269]	valid_0's auc: 0.869793	valid_0's binary_logloss: 0.316049
[270]	valid_0's auc: 0.86984	valid_0's binary_logloss: 0.315995
[271]	valid_0's auc: 0.8698

[383]	valid_0's auc: 0.872791	valid_0's binary_logloss: 0.312774
[384]	valid_0's auc: 0.872835	valid_0's binary_logloss: 0.312715
[385]	valid_0's auc: 0.872857	valid_0's binary_logloss: 0.312694
[386]	valid_0's auc: 0.872872	valid_0's binary_logloss: 0.312679
[387]	valid_0's auc: 0.87291	valid_0's binary_logloss: 0.312636
[388]	valid_0's auc: 0.872944	valid_0's binary_logloss: 0.312605
[389]	valid_0's auc: 0.872967	valid_0's binary_logloss: 0.312583
[390]	valid_0's auc: 0.872988	valid_0's binary_logloss: 0.312557
[391]	valid_0's auc: 0.873013	valid_0's binary_logloss: 0.312529
[392]	valid_0's auc: 0.873047	valid_0's binary_logloss: 0.312496
[393]	valid_0's auc: 0.87308	valid_0's binary_logloss: 0.312459
[394]	valid_0's auc: 0.873094	valid_0's binary_logloss: 0.312441
[395]	valid_0's auc: 0.873117	valid_0's binary_logloss: 0.312418
[396]	valid_0's auc: 0.873134	valid_0's binary_logloss: 0.312401
[397]	valid_0's auc: 0.873146	valid_0's binary_logloss: 0.312388
[398]	valid_0's auc: 0.8731

[510]	valid_0's auc: 0.874922	valid_0's binary_logloss: 0.310425
[511]	valid_0's auc: 0.874932	valid_0's binary_logloss: 0.310413
[512]	valid_0's auc: 0.87494	valid_0's binary_logloss: 0.310405
[513]	valid_0's auc: 0.874968	valid_0's binary_logloss: 0.310377
[514]	valid_0's auc: 0.874974	valid_0's binary_logloss: 0.310371
[515]	valid_0's auc: 0.874981	valid_0's binary_logloss: 0.310363
[516]	valid_0's auc: 0.874996	valid_0's binary_logloss: 0.310347
[517]	valid_0's auc: 0.875	valid_0's binary_logloss: 0.310343
[518]	valid_0's auc: 0.875011	valid_0's binary_logloss: 0.31033
[519]	valid_0's auc: 0.875018	valid_0's binary_logloss: 0.310321
[520]	valid_0's auc: 0.875024	valid_0's binary_logloss: 0.310312
[521]	valid_0's auc: 0.875049	valid_0's binary_logloss: 0.310284
[522]	valid_0's auc: 0.875071	valid_0's binary_logloss: 0.310255
[523]	valid_0's auc: 0.875084	valid_0's binary_logloss: 0.310241
[524]	valid_0's auc: 0.875102	valid_0's binary_logloss: 0.310222
[525]	valid_0's auc: 0.875113	

[637]	valid_0's auc: 0.876388	valid_0's binary_logloss: 0.3088
[638]	valid_0's auc: 0.876394	valid_0's binary_logloss: 0.308793
[639]	valid_0's auc: 0.876399	valid_0's binary_logloss: 0.308789
[640]	valid_0's auc: 0.876422	valid_0's binary_logloss: 0.308763
[641]	valid_0's auc: 0.876428	valid_0's binary_logloss: 0.308755
[642]	valid_0's auc: 0.876447	valid_0's binary_logloss: 0.308734
[643]	valid_0's auc: 0.876461	valid_0's binary_logloss: 0.308719
[644]	valid_0's auc: 0.876476	valid_0's binary_logloss: 0.308703
[645]	valid_0's auc: 0.876481	valid_0's binary_logloss: 0.308698
[646]	valid_0's auc: 0.876483	valid_0's binary_logloss: 0.308696
[647]	valid_0's auc: 0.876495	valid_0's binary_logloss: 0.308676
[648]	valid_0's auc: 0.876501	valid_0's binary_logloss: 0.308669
[649]	valid_0's auc: 0.876506	valid_0's binary_logloss: 0.308663
[650]	valid_0's auc: 0.876513	valid_0's binary_logloss: 0.308654
[651]	valid_0's auc: 0.876514	valid_0's binary_logloss: 0.308653
[652]	valid_0's auc: 0.8765

[764]	valid_0's auc: 0.877459	valid_0's binary_logloss: 0.307589
[765]	valid_0's auc: 0.877463	valid_0's binary_logloss: 0.307584
[766]	valid_0's auc: 0.877465	valid_0's binary_logloss: 0.307581
[767]	valid_0's auc: 0.877468	valid_0's binary_logloss: 0.307578
[768]	valid_0's auc: 0.877468	valid_0's binary_logloss: 0.307577
[769]	valid_0's auc: 0.877473	valid_0's binary_logloss: 0.307572
[770]	valid_0's auc: 0.877477	valid_0's binary_logloss: 0.307566
[771]	valid_0's auc: 0.877485	valid_0's binary_logloss: 0.307558
[772]	valid_0's auc: 0.877493	valid_0's binary_logloss: 0.307548
[773]	valid_0's auc: 0.877508	valid_0's binary_logloss: 0.307533
[774]	valid_0's auc: 0.877511	valid_0's binary_logloss: 0.30753
[775]	valid_0's auc: 0.877519	valid_0's binary_logloss: 0.307521
[776]	valid_0's auc: 0.877528	valid_0's binary_logloss: 0.307513
[777]	valid_0's auc: 0.87753	valid_0's binary_logloss: 0.30751
[778]	valid_0's auc: 0.877532	valid_0's binary_logloss: 0.307508
[779]	valid_0's auc: 0.87753

[891]	valid_0's auc: 0.878383	valid_0's binary_logloss: 0.306517
[892]	valid_0's auc: 0.878395	valid_0's binary_logloss: 0.306503
[893]	valid_0's auc: 0.878418	valid_0's binary_logloss: 0.306481
[894]	valid_0's auc: 0.87842	valid_0's binary_logloss: 0.306479
[895]	valid_0's auc: 0.878423	valid_0's binary_logloss: 0.306476
[896]	valid_0's auc: 0.878429	valid_0's binary_logloss: 0.306468
[897]	valid_0's auc: 0.878441	valid_0's binary_logloss: 0.306455
[898]	valid_0's auc: 0.878456	valid_0's binary_logloss: 0.306438
[899]	valid_0's auc: 0.878473	valid_0's binary_logloss: 0.306422
[900]	valid_0's auc: 0.878475	valid_0's binary_logloss: 0.30642
[901]	valid_0's auc: 0.878478	valid_0's binary_logloss: 0.306416
[902]	valid_0's auc: 0.878479	valid_0's binary_logloss: 0.306415
[903]	valid_0's auc: 0.878479	valid_0's binary_logloss: 0.306414
[904]	valid_0's auc: 0.878481	valid_0's binary_logloss: 0.306412
[905]	valid_0's auc: 0.878491	valid_0's binary_logloss: 0.306401
[906]	valid_0's auc: 0.8785

[1018]	valid_0's auc: 0.879151	valid_0's binary_logloss: 0.305654
[1019]	valid_0's auc: 0.879161	valid_0's binary_logloss: 0.305643
[1020]	valid_0's auc: 0.879163	valid_0's binary_logloss: 0.305641
[1021]	valid_0's auc: 0.879163	valid_0's binary_logloss: 0.305641
[1022]	valid_0's auc: 0.879164	valid_0's binary_logloss: 0.30564
[1023]	valid_0's auc: 0.87917	valid_0's binary_logloss: 0.305633
[1024]	valid_0's auc: 0.879171	valid_0's binary_logloss: 0.305632
[1025]	valid_0's auc: 0.879177	valid_0's binary_logloss: 0.305625
[1026]	valid_0's auc: 0.87919	valid_0's binary_logloss: 0.30561
[1027]	valid_0's auc: 0.879196	valid_0's binary_logloss: 0.305603
[1028]	valid_0's auc: 0.879203	valid_0's binary_logloss: 0.305595
[1029]	valid_0's auc: 0.879207	valid_0's binary_logloss: 0.305592
[1030]	valid_0's auc: 0.879213	valid_0's binary_logloss: 0.305585
[1031]	valid_0's auc: 0.879213	valid_0's binary_logloss: 0.305584
[1032]	valid_0's auc: 0.879216	valid_0's binary_logloss: 0.305582
[1033]	valid_0

[1143]	valid_0's auc: 0.879848	valid_0's binary_logloss: 0.304852
[1144]	valid_0's auc: 0.879852	valid_0's binary_logloss: 0.304847
[1145]	valid_0's auc: 0.879853	valid_0's binary_logloss: 0.304846
[1146]	valid_0's auc: 0.879854	valid_0's binary_logloss: 0.304845
[1147]	valid_0's auc: 0.879855	valid_0's binary_logloss: 0.304844
[1148]	valid_0's auc: 0.879858	valid_0's binary_logloss: 0.304841
[1149]	valid_0's auc: 0.879863	valid_0's binary_logloss: 0.304835
[1150]	valid_0's auc: 0.879864	valid_0's binary_logloss: 0.304834
[1151]	valid_0's auc: 0.879868	valid_0's binary_logloss: 0.304829
[1152]	valid_0's auc: 0.879881	valid_0's binary_logloss: 0.304814
[1153]	valid_0's auc: 0.879887	valid_0's binary_logloss: 0.304808
[1154]	valid_0's auc: 0.879894	valid_0's binary_logloss: 0.304801
[1155]	valid_0's auc: 0.879901	valid_0's binary_logloss: 0.304794
[1156]	valid_0's auc: 0.879902	valid_0's binary_logloss: 0.304792
[1157]	valid_0's auc: 0.879918	valid_0's binary_logloss: 0.304774
[1158]	val

[1268]	valid_0's auc: 0.880449	valid_0's binary_logloss: 0.304133
[1269]	valid_0's auc: 0.88046	valid_0's binary_logloss: 0.304122
[1270]	valid_0's auc: 0.88046	valid_0's binary_logloss: 0.304121
[1271]	valid_0's auc: 0.880462	valid_0's binary_logloss: 0.30412
[1272]	valid_0's auc: 0.880462	valid_0's binary_logloss: 0.304119
[1273]	valid_0's auc: 0.88047	valid_0's binary_logloss: 0.30411
[1274]	valid_0's auc: 0.880476	valid_0's binary_logloss: 0.304104
[1275]	valid_0's auc: 0.880479	valid_0's binary_logloss: 0.3041
[1276]	valid_0's auc: 0.880491	valid_0's binary_logloss: 0.304088
[1277]	valid_0's auc: 0.880493	valid_0's binary_logloss: 0.304086
[1278]	valid_0's auc: 0.880497	valid_0's binary_logloss: 0.304081
[1279]	valid_0's auc: 0.880504	valid_0's binary_logloss: 0.304074
[1280]	valid_0's auc: 0.880503	valid_0's binary_logloss: 0.304074
[1281]	valid_0's auc: 0.880507	valid_0's binary_logloss: 0.30407
[1282]	valid_0's auc: 0.880512	valid_0's binary_logloss: 0.304065
[1283]	valid_0's a

[1393]	valid_0's auc: 0.881018	valid_0's binary_logloss: 0.303471
[1394]	valid_0's auc: 0.881018	valid_0's binary_logloss: 0.303471
[1395]	valid_0's auc: 0.881027	valid_0's binary_logloss: 0.30346
[1396]	valid_0's auc: 0.88103	valid_0's binary_logloss: 0.303458
[1397]	valid_0's auc: 0.881032	valid_0's binary_logloss: 0.303453
[1398]	valid_0's auc: 0.881045	valid_0's binary_logloss: 0.303441
[1399]	valid_0's auc: 0.881046	valid_0's binary_logloss: 0.303439
[1400]	valid_0's auc: 0.88105	valid_0's binary_logloss: 0.303434
[1401]	valid_0's auc: 0.881051	valid_0's binary_logloss: 0.303433
[1402]	valid_0's auc: 0.881054	valid_0's binary_logloss: 0.303429
[1403]	valid_0's auc: 0.881054	valid_0's binary_logloss: 0.303429
[1404]	valid_0's auc: 0.881055	valid_0's binary_logloss: 0.303428
[1405]	valid_0's auc: 0.881055	valid_0's binary_logloss: 0.303428
[1406]	valid_0's auc: 0.881056	valid_0's binary_logloss: 0.303426
[1407]	valid_0's auc: 0.881057	valid_0's binary_logloss: 0.303425
[1408]	valid_

[1518]	valid_0's auc: 0.881504	valid_0's binary_logloss: 0.302903
[1519]	valid_0's auc: 0.881505	valid_0's binary_logloss: 0.302902
[1520]	valid_0's auc: 0.881506	valid_0's binary_logloss: 0.302901
[1521]	valid_0's auc: 0.881507	valid_0's binary_logloss: 0.3029
[1522]	valid_0's auc: 0.881508	valid_0's binary_logloss: 0.302899
[1523]	valid_0's auc: 0.881511	valid_0's binary_logloss: 0.302896
[1524]	valid_0's auc: 0.881511	valid_0's binary_logloss: 0.302896
[1525]	valid_0's auc: 0.881511	valid_0's binary_logloss: 0.302896
[1526]	valid_0's auc: 0.881511	valid_0's binary_logloss: 0.302896
[1527]	valid_0's auc: 0.881515	valid_0's binary_logloss: 0.302891
[1528]	valid_0's auc: 0.881516	valid_0's binary_logloss: 0.30289
[1529]	valid_0's auc: 0.881515	valid_0's binary_logloss: 0.302891
[1530]	valid_0's auc: 0.881517	valid_0's binary_logloss: 0.302888
[1531]	valid_0's auc: 0.88152	valid_0's binary_logloss: 0.302884
[1532]	valid_0's auc: 0.881523	valid_0's binary_logloss: 0.302879
[1533]	valid_0

[1643]	valid_0's auc: 0.88187	valid_0's binary_logloss: 0.302473
[1644]	valid_0's auc: 0.881873	valid_0's binary_logloss: 0.302471
[1645]	valid_0's auc: 0.881878	valid_0's binary_logloss: 0.302465
[1646]	valid_0's auc: 0.881884	valid_0's binary_logloss: 0.302458
[1647]	valid_0's auc: 0.881884	valid_0's binary_logloss: 0.302458
[1648]	valid_0's auc: 0.881884	valid_0's binary_logloss: 0.302458
[1649]	valid_0's auc: 0.881886	valid_0's binary_logloss: 0.302456
[1650]	valid_0's auc: 0.881887	valid_0's binary_logloss: 0.302455
[1651]	valid_0's auc: 0.881887	valid_0's binary_logloss: 0.302455
[1652]	valid_0's auc: 0.881888	valid_0's binary_logloss: 0.302454
[1653]	valid_0's auc: 0.881896	valid_0's binary_logloss: 0.302441
[1654]	valid_0's auc: 0.881909	valid_0's binary_logloss: 0.302425
[1655]	valid_0's auc: 0.881913	valid_0's binary_logloss: 0.302421
[1656]	valid_0's auc: 0.881915	valid_0's binary_logloss: 0.30242
[1657]	valid_0's auc: 0.881918	valid_0's binary_logloss: 0.302417
[1658]	valid

[1768]	valid_0's auc: 0.88225	valid_0's binary_logloss: 0.302035
[1769]	valid_0's auc: 0.882254	valid_0's binary_logloss: 0.302029
[1770]	valid_0's auc: 0.882257	valid_0's binary_logloss: 0.302027
[1771]	valid_0's auc: 0.882264	valid_0's binary_logloss: 0.302019
[1772]	valid_0's auc: 0.882267	valid_0's binary_logloss: 0.302013
[1773]	valid_0's auc: 0.882268	valid_0's binary_logloss: 0.302012
[1774]	valid_0's auc: 0.882268	valid_0's binary_logloss: 0.302012
[1775]	valid_0's auc: 0.882268	valid_0's binary_logloss: 0.302012
[1776]	valid_0's auc: 0.882268	valid_0's binary_logloss: 0.302011
[1777]	valid_0's auc: 0.88227	valid_0's binary_logloss: 0.30201
[1778]	valid_0's auc: 0.882271	valid_0's binary_logloss: 0.302009
[1779]	valid_0's auc: 0.882274	valid_0's binary_logloss: 0.302004
[1780]	valid_0's auc: 0.882285	valid_0's binary_logloss: 0.301992
[1781]	valid_0's auc: 0.88229	valid_0's binary_logloss: 0.301986
[1782]	valid_0's auc: 0.882298	valid_0's binary_logloss: 0.301975
[1783]	valid_0

[1893]	valid_0's auc: 0.882626	valid_0's binary_logloss: 0.301591
[1894]	valid_0's auc: 0.882627	valid_0's binary_logloss: 0.30159
[1895]	valid_0's auc: 0.882628	valid_0's binary_logloss: 0.301589
[1896]	valid_0's auc: 0.882629	valid_0's binary_logloss: 0.301588
[1897]	valid_0's auc: 0.882635	valid_0's binary_logloss: 0.301582
[1898]	valid_0's auc: 0.882643	valid_0's binary_logloss: 0.301575
[1899]	valid_0's auc: 0.882644	valid_0's binary_logloss: 0.301573
[1900]	valid_0's auc: 0.882646	valid_0's binary_logloss: 0.301567
[1901]	valid_0's auc: 0.882647	valid_0's binary_logloss: 0.301566
[1902]	valid_0's auc: 0.882648	valid_0's binary_logloss: 0.301566
[1903]	valid_0's auc: 0.882647	valid_0's binary_logloss: 0.301567
[1904]	valid_0's auc: 0.882647	valid_0's binary_logloss: 0.301566
[1905]	valid_0's auc: 0.882648	valid_0's binary_logloss: 0.301565
[1906]	valid_0's auc: 0.882648	valid_0's binary_logloss: 0.301565
[1907]	valid_0's auc: 0.882649	valid_0's binary_logloss: 0.301563
[1908]	vali

LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
               importance_type='split', learning_rate=0.1, max_depth=-1,
               min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
               n_estimators=2000, n_jobs=-1, num_leaves=31, objective='binary',
               random_state=None, reg_alpha=0.0, reg_lambda=0.0, seed=1000,
               silent=True, subsample=1.0, subsample_for_bin=200000,
               subsample_freq=0)

In [None]:
pickle.dump(model_lgb, open('./model/model.pkl', 'wb'))

In [17]:
sub['label'] = model_lgb.predict_proba(X_test[feature_cols])[:, 1]
sub.to_csv('./result/2000_0.882887.txt', index=None, header=None, sep='\t')

In [18]:
fi = pd.DataFrame({'feature': feature_cols, 'imp': model_lgb.feature_importances_})
fi['rate'] = fi['imp'] / fi['imp'].sum()
fi_sorted = fi.sort_values(by='rate', ascending=False)

In [19]:
fi_sorted.to_csv('./fi/fi.txt', sep='\t')

In [20]:
# import pickle
# pickle.dump(fi_sorted, open('./feature_importance.pkl', 'wb'))

In [21]:
fi_sorted[:60]

Unnamed: 0,feature,imp,rate
138,score,2050,0.034201
237,uid_enc_count,1608,0.026827
93,qid_hour_count,1588,0.026493
153,u_inv_kfold_mean,1318,0.021989
84,qid_day_count,1156,0.019286
147,u_ans_kfold_count,1149,0.019169
227,uid_hour_std,1067,0.017801
152,u_inv_kfold_count,1061,0.017701
14,diff_iq_hour,1017,0.016967
45,hour,994,0.016583


In [22]:
fi_sorted[-60:]

Unnamed: 0,feature,imp,rate
179,uf_b2_qid_day_labelsum_kfold_mean,32,0.000534
182,uf_b3_qid_day_labelneg_kfold_mean,31,0.000517
62,intersection_it_count_uid_day_labelsum_kfold_mean,31,0.000517
142,score_qid_day_labelneg_kfold_median,28,0.000467
168,uf_b1_qid_day_labelneg_kfold_mean,28,0.000467
184,uf_b3_qid_day_labelrate_kfold_mean,26,0.000434
213,uf_c5_qid_day_labelsum_kfold_mean,25,0.000417
170,uf_b1_qid_day_labelrate_kfold_mean,25,0.000417
186,uf_b3_qid_day_labelsum_kfold_mean,24,0.0004
108,qu_topic_count_qid_day_labelcnt_kfold_median,24,0.0004
