In [1]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
import matplotlib.pyplot as plt
# import seaborn as sns
import pickle
import gc
import time
from tqdm import tqdm

%matplotlib inline

#每次可以输出多个变量
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from pylab import rcParams
rcParams['figure.figsize'] = 14, 6

import warnings
warnings.filterwarnings("ignore")

#中文字体
import matplotlib
matplotlib.use('qt4agg')
#指定默认字体
matplotlib.rcParams['font.sans-serif'] = ['SimHei']
matplotlib.rcParams['font.family'] = 'sans-serif'
#解决负号'-'显示为方块的问题
matplotlib.rcParams['axes.unicode_minus'] = False

In [2]:
from sklearn.ensemble import GradientBoostingClassifier
import random
from sklearn.model_selection import GridSearchCV
from collections import Counter
from sklearn.model_selection import StratifiedKFold

### Data

In [3]:
origin = [
    'item_price_level', 'item_sales_level', 'item_collected_level',
    'item_pv_level', 'user_gender_id', 'user_age_level', 'user_occupation_id',
    'user_star_level', 'context_page_id', 'shop_review_num_level',
    'shop_review_positive_rate', 'shop_star_level', 'shop_score_service',
    'shop_score_delivery', 'shop_score_description'
]

features_zero = [
    'user_count_first_time_sub_full_time', 'user_item_day_num',
    'user_trade_hot', 'user_hour_query', 'yesterhour',
    'user_minute_count', 'item_occupation_trade_count', 'user_context_count',
    'user_sell_power_mean_item_minus', 'city_trade_hot', 'city_item_count_his',
    'user_count_first_time_sub', 'user_item_next_time_sub_full_time',
    'user_item_next_time_sub', 'item_property_split_count', 'city_item_count',
    'max_click', 'brand_trade_percent', 'user_item_num',
    'user_shop_next_time_sub_full_time', 'city_brand_count', 'item_trade_hot',
    'user_sell_power', 'user_sell_trade_power', 'brand_item_count',
    'item_trade_percent', 'user_hot', 'user_shop_count_first_time_sub',
    'user_item_day_num_full_time', 'is_last_click', 'item_minute_query',
    'user_is_his', 'shop_item_count_his', 'user_item_last_time_sub_full_time',
    'user_shop_next_time_sub', 'max_click_time_sub', 'user_item_count',
    'item_score', 'last_time_sub', 'user_sell_power_mean',
    'user_shop_last_time_sub', 'item_hot', 'user_sell_trade_power_mean',
    'user_sell_trade_power_mean_item_minus', 'last_time_sub_full_time',
    'brand_trade_hot', 'item_gender_trade_count',
    'next_time_sub_full_time', 'max_click_full_time',
    'user_shop_last_time_sub_full_time', 'user_item_hour_query',
    'shop_item_count', 'user_minute_query', 'brand_item_count_his',
    'item_age_trade_count', 'user_day_query', 'max_user_item_click_time_sub',
    'item_is_trade_his', 'item_pred_category_score_item%', 'user_query_day',
    'item_day_query', 'city_hot', 'user_item_count_first_time_sub',
    'item_score2', 'user_query_day_hour', 'hour', 'city_brand_count_his',
    'is_last_user_item_click', 'user_shop_num_full_time',
    'item_pred_category_score', 'user_item_day_query',
    'user_item_last_time_sub', 'user_is_trade_his',
    'user_shop_day_num_full_time', 'brand_hot', 'user_shop_day_num',
    'user_count', 'shop_trade_hot', 'is_last_user_item_click_full_time',
    'next_time_sub', 'user_shop_num', 'shop_hot',
    'user_shop_count_first_time_sub_full_time',
    'item_pred_property_score_item%',
    'user_item_count_first_time_sub_full_time', 'max_user_item_click',
    'max_user_item_click_time_sub_full_time', 'user_trade_percent',
    'item_hour_query', 'user_yesterday_query', 'min_time', 'user_query_minute',
    'max_user_item_click_full_time', 'is_last_click_full_time'
]

features_cjf = ['user_lastquery_timedelta','user_lastquery','item_price_level_shop_id_bayes_rate_2','item_price_level_shop_id_bayes_rate_1','item_price_stair_shop_id_bayes_rate_1','item_id_bayes_rate_1','item_price_stair_shop_id_bayes_rate_2','item_id_shop_id_bayes_rate_1','item_id_bayes_rate_2','item_id_shop_id_bayes_rate_2','item_id_shop_star_level_bayes_rate_1','item_city_id_shop_id_bayes_rate_2','item_id_shop_review_num_level_bayes_rate_2','item_id_shop_star_level_bayes_rate_2','item_sales_stair_shop_id_bayes_rate_2','item_city_id_shop_id_bayes_rate_1','item_sales_stair_shop_id_bayes_rate_1','shop_id_bayes_rate_2','item_id_shop_review_num_level_bayes_rate_1','item_pv_stair_shop_id_bayes_rate_2','item_brand_id_shop_id_bayes_rate_1','item_pv_stair_shop_id_bayes_rate_1','item_brand_id_shop_id_bayes_rate_2','item_id_gender_filled_bayes_rate_1','item_id_gender_filled_bayes_rate_2','shop_id_gender_filled_bayes_rate_1','shop_id_gender_filled_bayes_rate_2','item_id_user_star_stair_bayes_rate_1','shop_id_user_star_stair_bayes_rate_1','shop_id_user_star_stair_bayes_rate_2','item_collected_level_shop_id_bayes_rate_1','item_collected_level_shop_id_bayes_rate_2','user_query_max_score7_percent','item_sales_level_shop_id_bayes_rate_1','item_sales_level_shop_id_bayes_rate_2','item_pv_level_shop_id_bayes_rate_1','user_query_max_score3_percent','user_query_max_score6_percent','user_query_max_score8_percent','shop_id_user_occupation_stair_bayes_rate_2','shop_id_user_occupation_stair_bayes_rate_1','user_query_max_score5_percent','shop_id_user_age_stair_bayes_rate_2','user_query_max_score1_percent','user_query_max_score2_percent','context_page_stair_shop_id_bayes_rate_2','context_page_stair_shop_id_bayes_rate_1','shop_id_user_age_stair_bayes_rate_1','item_id_user_occupation_stair_bayes_rate_2','item_id_user_gender_id_bayes_rate_1','context_page_stair_item_id_bayes_rate_1','context_page_stair_item_id_bayes_rate_2','shop_id_user_gender_id_bayes_rate_2','item_id_user_occupation_stair_bayes_rate_1','item_id_user_gender_id_bayes_rate_2','shop_id_user_gender_id_bayes_rate_1','item_id_user_age_stair_bayes_rate_2','item_id_user_age_stair_bayes_rate_1','user_query_max_score9_percent','shop_id_user_occupation_id_bayes_rate_2','shop_id_user_occupation_id_bayes_rate_1','user_query_max_score4_percent','user_total_query_times','item_brand_id_shop_star_level_bayes_rate_2','item_brand_id_shop_review_num_level_bayes_rate_1','item_brand_id_shop_star_level_bayes_rate_1','item_brand_id_shop_review_num_level_bayes_rate_2','item_id_user_occupation_id_bayes_rate_2','item_id_user_occupation_id_bayes_rate_1','item_brand_id_bayes_rate_1','item_brand_id_bayes_rate_2','user_query_queryitem_c_similarity_rank2','user_query_goodreview_rank','item_brand_id_gender_filled_bayes_rate_2','item_brand_id_gender_filled_bayes_rate_1','user_query_shopstar_rank','item_brand_id_user_star_stair_bayes_rate_1','user_query_service_rank','user_query_queryitem_c_similarity_rank','user_query_review_rank','item_brand_id_user_star_stair_bayes_rate_2','user_query_queryitem_p_similarity_rank2','user_query_delivery_rank','user_query_queryitem_p_similarity_rank','item_brand_id_user_occupation_stair_bayes_rate_2','shop_id_user_age_level_bayes_rate_2','item_brand_id_user_occupation_stair_bayes_rate_1','user_query_pv_rank','user_query_max_score10_rank','user_query_max_score9_rank','context_page_id_shop_id_bayes_rate_2','item_brand_id_user_age_stair_bayes_rate_1','item_brand_id_user_age_stair_bayes_rate_2','item_brand_id_user_gender_id_bayes_rate_1','item_brand_id_user_gender_id_bayes_rate_2','context_page_id_shop_id_bayes_rate_1','context_page_stair_item_brand_id_bayes_rate_2','context_page_id_item_id_bayes_rate_2','context_page_id_item_id_bayes_rate_1','item_brand_id_user_occupation_id_bayes_rate_1','user_query_collect_rank','user_query_price_rank','item_brand_id_user_occupation_id_bayes_rate_2','item_id_user_age_level_bayes_rate_2','item_id_user_age_level_bayes_rate_1','user_current_hour_query','user_query_sales_div_pv_percent','user_query_max_score2','user_query_max_score4_rank','user_query_collect_percent','user_query_price_percent','user_query_max_score3_rank','user_query_max_score8_rank','user_query_max_score6_rank','user_query_queryitem_p_similarity_percent','user_query_queryitem_p_similarity_percent2','shop_id_user_star_level_bayes_rate_1','shop_id_user_star_level_bayes_rate_2','user_today_query','user_today_query_percent','user_query_sale_rank','user_query_queryitem_c_similarity_percent2','user_query_sales_div_pv_rank','user_query_max_score5','user_query_pv_percent','user_query_shopstar_percent','item_id_user_star_level_bayes_rate_2','user_query_max_score7_rank','user_id_bayes_rate_2','item_brand_id_user_age_level_bayes_rate_1','item_id_user_star_level_bayes_rate_1','user_query_max_score1','item_brand_id_user_age_level_bayes_rate_2','user_query_queryitem_c_similarity_percent','user_query_review_percent','user_current_hour_percent','hour_shop_id_bayes_rate_1','user_id_bayes_rate_1','user_query_goodreview_percent','user_query_max_score3','user_query_max_score8','user_query_description_percent','context_page_id_item_brand_id_bayes_rate_1','context_page_id_item_brand_id_bayes_rate_2','item_pv_stair_user_id_bayes_rate_2','item_pv_stair_user_id_bayes_rate_1','user_query_max_score5_rank','user_query_service_percent','time_slice_user_gender_id_bayes_rate_2','user_query_max_score1_rank','user_query_max_score2_rank','time_slice_gender_filled_bayes_rate_1','time_slice_item_price_stair_bayes_rate_2','time_slice_user_gender_id_bayes_rate_1','time_slice_item_price_stair_bayes_rate_1','time_slice','time_slice_gender_filled_bayes_rate_2','hour_gender_filled_bayes_rate_2','hour_user_gender_id_bayes_rate_1','hour_gender_filled_bayes_rate_1','hour_bayes_rate_1','hour_item_pv_stair_bayes_rate_1','time_slice_user_age_level_bayes_rate_1','time_slice_bayes_rate_1','time_slice_bayes_rate_2','time_slice_item_pv_stair_bayes_rate_1','time_slice_item_pv_stair_bayes_rate_2','user_query_max_score6','hour_user_gender_id_bayes_rate_2','time_slice_user_age_stair_bayes_rate_1','hour','time_slice_user_age_stair_bayes_rate_2','time_slice_user_occupation_stair_bayes_rate_2','hour_bayes_rate_2','hour_item_price_stair_bayes_rate_1','hour_item_price_stair_bayes_rate_2','hour_item_pv_stair_bayes_rate_2','hour_user_age_stair_bayes_rate_1','time_slice_user_age_level_bayes_rate_2','time_slice_user_occupation_stair_bayes_rate_1','time_slice_user_star_stair_bayes_rate_2','hour_user_age_level_bayes_rate_1','item_brand_id_user_star_level_bayes_rate_2','item_brand_id_user_star_level_bayes_rate_1','time_slice_user_star_stair_bayes_rate_1','hour_item_brand_id_bayes_rate_2','time_slice_item_sales_stair_bayes_rate_1','hour_user_age_stair_bayes_rate_2','hour_user_star_stair_bayes_rate_2','time_slice_user_occupation_id_bayes_rate_2','hour_user_age_level_bayes_rate_2','hour_user_occupation_id_bayes_rate_2','time_slice_user_occupation_id_bayes_rate_1','hour_user_star_stair_bayes_rate_1','time_slice_item_sales_stair_bayes_rate_2','hour_item_id_bayes_rate_2','hour_item_brand_id_bayes_rate_1','hour_item_city_id_bayes_rate_2','hour_item_sales_stair_bayes_rate_1','hour_user_occupation_id_bayes_rate_1','hour_user_occupation_stair_bayes_rate_1','hour_item_city_id_bayes_rate_1','hour_user_occupation_stair_bayes_rate_2','hour_item_pv_level_bayes_rate_1','hour_item_id_bayes_rate_1','time_slice_item_price_level_bayes_rate_1','time_slice_item_price_level_bayes_rate_2','context_page_stair_user_id_bayes_rate_2','time_slice_item_sales_level_bayes_rate_1','time_slice_item_sales_level_bayes_rate_2','hour_item_sales_level_bayes_rate_1','hour_item_pv_level_bayes_rate_2','context_page_stair_user_id_bayes_rate_1','shop_today_query','time_slice_item_pv_level_bayes_rate_2','hour_user_star_level_bayes_rate_2','hour_user_star_level_bayes_rate_1','item_city_id_shop_star_level_bayes_rate_1','item_price_stair_user_id_bayes_rate_1','time_slice_item_collected_level_bayes_rate_2','time_slice_item_collected_level_bayes_rate_1','item_city_id_shop_star_level_bayes_rate_2','time_slice_item_pv_level_bayes_rate_1','hour_shop_review_num_level_bayes_rate_2','user_query_max_score9','time_slice_user_star_level_bayes_rate_1','hour_item_collected_level_bayes_rate_2','item_price_stair_user_id_bayes_rate_2','hour_shop_star_level_bayes_rate_1','item_city_id_shop_review_num_level_bayes_rate_1','hour_shop_star_level_bayes_rate_2','time_slice_item_city_id_bayes_rate_1','time_slice_shop_id_bayes_rate_2','time_slice_user_star_level_bayes_rate_2','item_city_id_shop_review_num_level_bayes_rate_2','hour_shop_review_num_level_bayes_rate_1','time_slice_item_city_id_bayes_rate_2','user_query_max_score7','shop_today_query_percent','time_slice_shop_id_bayes_rate_1','time_slice_shop_star_level_bayes_rate_2','time_slice_shop_review_num_level_bayes_rate_2','time_slice_shop_star_level_bayes_rate_1','user_query_maxreview','shop_current_hour_query','user_query_best_service','time_slice_shop_review_num_level_bayes_rate_1','user_query_maxshopstar','user_query_best_description','shop_cnt_dayavg_total','time_slice_item_brand_id_bayes_rate_2','user_query_best_delivery','time_slice_item_brand_id_bayes_rate_1','user_cnt_total','user_cnt_dayavg_total','category_1','shop_cnt_total','user_query_maxgoodreview','user_query_max_score4','shop_cnt_day','context_page_id_user_id_bayes_rate_2','item_sales_stair_user_id_bayes_rate_2','item_sales_stair_user_id_bayes_rate_1','context_page_id_user_id_bayes_rate_1','shop_current_hour_percent','user_query_maxcollect','query_category_0','item_price_category_percent','item_city_id_bayes_rate_2','user_query_maxpv','item_city_id_bayes_rate_1','time_slice_item_id_bayes_rate_1','item_city_id_gender_filled_bayes_rate_2','item_city_id_gender_filled_bayes_rate_1','item_city_id_user_star_stair_bayes_rate_2','item_city_id_user_star_stair_bayes_rate_1','brand_today_query','time_slice_item_id_bayes_rate_2','item_city_id_user_gender_id_bayes_rate_1','item_city_id_user_age_stair_bayes_rate_1','item_city_id_user_gender_id_bayes_rate_2','item_city_id_user_age_stair_bayes_rate_2','context_page_stair_item_city_id_bayes_rate_1','item_city_id_user_occupation_id_bayes_rate_1','brand_current_hour_percent','item_price_level_user_id_bayes_rate_2','item_city_id_user_occupation_id_bayes_rate_2','brand_current_hour_query','item_city_id_user_occupation_stair_bayes_rate_2','user_query_max_score10','item_price_category_rank','context_page_stair_item_city_id_bayes_rate_2','item_city_id_user_occupation_stair_bayes_rate_1','user_query_maxsell','item_city_id_user_age_level_bayes_rate_1','item_city_id_user_age_level_bayes_rate_2','item_city_id','user_query_maxsales_div_pv','item_city_sales_category_percent','item_city_id_user_star_level_bayes_rate_1','context_page_id_item_city_id_bayes_rate_1','item_city_id_user_star_level_bayes_rate_2','context_page_id_item_city_id_bayes_rate_2','user_query_maxqueryitem_c_similarity','property_count','category_2','item_brand_price_category_percent','item_brand_pv_category_rank','user_query_maxqueryitem_p_similarity','item_collected_level_user_id_bayes_rate_1','user_query_maxqueryitem_c_similarity2','property_0','item_city_pv_category_percent','item_city_price_category_percent','shop_star_level_user_id_bayes_rate_1','shop_star_level_user_id_bayes_rate_2','item_collected_level_user_id_bayes_rate_2','item_pv_level_user_id_bayes_rate_2','item_city_sales_div_pv_category_percent','item_pv_level_user_id_bayes_rate_1','property_27','item_city_collect_category_rank','property_26','shop_review_num_level_user_id_bayes_rate_1','shop_id_user_gender_id_percent','property_30','user_query_maxqueryitem_p_similarity2','property_29','item_collect_category_rank','item_city_collect_category_percent','property_28','item_brand_sales_category_rank','item_city_sales_category_rank','shop_review_num_level_user_id_bayes_rate_2','item_sales_level_user_id_bayes_rate_2','property_31','item_id_user_gender_id_percent','hour_user_id_bayes_rate_1','item_city_sales_div_pv_category_rank','user_query_cheapest','hour_user_id_bayes_rate_2','item_id_user_id_bayes_rate_1','item_brand_collect_category_rank','shop_avg_age','item_brand_sales_category_percent','item_brand_id_user_gender_id_percent','item_city_pv_category_rank','item_brand_sales_div_pv_category_rank','item_sales_level_user_id_bayes_rate_1','property_33','brand_avg_star','common_category_prob2','item_brand_price_category_rank','user_isnew','item_collect_category_percent','property_32','category_col_47','property_1','property_25','item_id_user_id_bayes_rate_2','item_sales_category_percent','brand_avg_age','property_36','shop_id','property_34','property_38','query_category_1','property_3','shop_id_user_id_bayes_rate_1','shop_avg_star','property_24','item_brand_collect_category_percent','item_sales_div_pv_rank','item_brand_sales_div_pv_category_percent','property_2','item_pv_category_rank','property_35','item_pv_category_percent','category_col_19','shop_id_user_id_bayes_rate_2','query_col_0','property_39','item_price_level_user_age_stair_bayes_rate_1','item_price_level_user_age_stair_bayes_rate_2','item_sales_category_rank','common_category','category_col_78','item_brand_pv_category_percent','common_category_prob','item_current_hour_query','shop_id_user_age_level_percent','property_22','property_6','property_12','item_price_level_bayes_rate_1','item_price_level_bayes_rate_2','shop_id_user_occupation_id_percent','item_avg_age','property_37','category_col_31','item_price_level_user_gender_id_bayes_rate_1','item_price_level_user_gender_id_bayes_rate_2','property_11','property_4','item_avg_star','user_cnt_day','item_price_stair_user_age_level_bayes_rate_2','item_price_level_user_star_stair_bayes_rate_1','item_price_level_user_star_stair_bayes_rate_2','property_19','user_shop_age_dist','item_price_stair_user_age_level_bayes_rate_1','item_price_stair_user_age_stair_bayes_rate_1','item_price_stair_user_age_stair_bayes_rate_2','item_collected_level_shop_review_num_level_bayes_rate_1','user_gender_id_bayes_rate_1','user_gender_id_bayes_rate_2','item_price_level_user_age_level_bayes_rate_1','property_10','context_page_id_user_age_stair_bayes_rate_2','item_collected_level_shop_review_num_level_bayes_rate_2','item_price_stair_user_occupation_id_bayes_rate_1','item_price_stair_user_occupation_id_bayes_rate_2','item_price_stair_gender_filled_bayes_rate_2','property_14','item_price_stair_gender_filled_bayes_rate_1','item_today_query','property_7','category_col_56','item_city_id_user_id_bayes_rate_2','item_price_level_gender_filled_bayes_rate_1','item_price_level_gender_filled_bayes_rate_2','user_brand_age_dist','category_col_84','item_pv_stair_gender_filled_bayes_rate_2','user_brand_star_dist','user_shop_dist_dist','item_brand_id','category_col_45','item_pv_stair_user_star_stair_bayes_rate_1','item_pv_stair_user_star_stair_bayes_rate_2','shop_review_num_level_bayes_rate_1','context_page_id_gender_filled_bayes_rate_1','context_page_id_user_star_stair_bayes_rate_2','item_price_stair_user_star_stair_bayes_rate_1','item_price_stair_user_star_stair_bayes_rate_2','item_collected_level_shop_star_level_bayes_rate_1','item_sales_level_user_gender_id_bayes_rate_2','category_col_59','item_collected_level_shop_star_level_bayes_rate_2','item_price_stair_bayes_rate_1','item_price_stair_bayes_rate_2','item_sales_level_user_gender_id_bayes_rate_1','property_8','time_slice_user_id_bayes_rate_2','item_cnt_dayavg_total','property_9','category_col_83']
features_cjf_new = ['user_query_max1_item_score','user_query_rank1_item_score', 'user_query_percent1_item_score','user_query_max1_item_score2', 'user_query_rank1_item_score2',
       'user_query_percent1_item_score2', 'user_query_max1_item_score3','user_query_rank1_item_score3', 'user_query_percent1_item_score3',
       'user_query_max1_shop_score', 'user_query_rank1_shop_score','user_query_percent1_shop_score', 'user_query_max2_item_score',
       'user_query_rank2_item_score', 'user_query_percent2_item_score','user_query_max2_item_score2', 'user_query_rank2_item_score2',
       'user_query_percent2_item_score2', 'user_query_max2_item_score3','user_query_rank2_item_score3', 'user_query_percent2_item_score3',
       'user_query_max2_shop_score', 'user_query_rank2_shop_score','user_query_percent2_shop_score']
features_cjf = features_cjf + features_cjf_new
features_cjf.remove('hour')
features_cjf.remove('user_cnt_day')
features_cjf.remove('shop_cnt_day')

features_cjy = ['item_cat_vec0', 'item_cat_vec1', 'item_cat_vec2',
       'item_cat_vec3', 'item_cat_vec4', 'item_cat_vec5', 'item_cat_vec6',
       'item_cat_vec7', 'item_cat_vec8', 'item_cat_vec9', 'item_cat_vec10',
       'item_cat_vec11', 'item_cat_vec12', 'item_cat_vec13', 'item_cat_vec14',
       'item_cat_vec15', 'shop_item_cnt', 'shop_brand_cnt',
       'item_pred_cat_cos', 'item_pred_ppt_cos', 'age_category_min',
       'gender_category_mean', 'occupation_category_max', 'star_category_max',
       'gender_category_min', 'star_category_min', 'occupation_category_min',
       '24h_cat_hot_item', '24h_ppt_hot_item', '24h_user_seem_times_item',
       '24h_cat_hot', '24h_ppt_hot', '24h_user_seem_times', 'item_id_pred_1',
       'shop_id_pred_1','item_brand_id_pred_1','user_id_pred_1','context_hour_pred_1',
       'hist_hour_ctr_pred_1','shop_item_cnt_pred_1','gender_category_min_pred_1','gender_category_mean_pred_1',
       'occupation_category_min_pred_1','item_brand_id+item_city_id_pred_1','item_brand_id+item_price_level_pred_1',
       'item_brand_id+item_sales_level_pred_1','item_brand_id+item_collected_level_pred_1','item_brand_id+item_pv_level_pred_1',
       'context_page_id+context_hour_pred_1','shop_review_num_level+shop_item_cnt_pred_1','shop_star_level+shop_item_cnt_pred_1',
       'shop_item_cnt+shop_brand_cnt_pred_1','shop_review_positive_rate+shop_item_cnt_pred_1','shop_review_num_level+shop_review_positive_rate_pred_1',
       'item_city_id+item_pv_level_pred_1','item_sales_level+item_collected_level_pred_1','item_city_id+item_price_level_pred_1',
       'shop_score_description+shop_item_cnt_pred_1','shop_score_delivery+shop_item_cnt_pred_1','shop_review_positive_rate+shop_star_level_pred_1',
       'item_price_level+item_collected_level_pred_1','shop_score_service+shop_score_delivery_pred_1','shop_star_level+shop_brand_cnt_pred_1',
       'item_city_id+item_collected_level_pred_1','item_city_id+item_sales_level_pred_1','shop_star_level+shop_score_description_pred_1',
       'shop_review_positive_rate+shop_score_service_pred_1','user_age_level+user_star_level_pred_1','shop_review_num_level+shop_brand_cnt_pred_1',
       'shop_score_service+shop_score_description_pred_1','shop_review_num_level+shop_score_service_pred_1','shop_star_level+shop_score_delivery_pred_1',
       'user_occupation_id+user_star_level_pred_1', 'next_time_sub_pred_1','max_click_time_sub_pred_1', 'is_last_click_pred_1', 'max_click_pred_1',
       'user_minute_query_pred_1', 'user_query_minute_pred_1','user_item_next_time_sub_pred_1', 'is_last_user_item_click_pred_1',
       'max_user_item_click_time_sub_pred_1', 'user_shop_next_time_sub_pred_1',
       'user_hour_query_pred_1', 'user_query_day_hour_pred_1','user_item_last_time_sub_pred_1', 'user_item_num_pred_1',
       'user_item_day_num_pred_1', 'user_item_count_first_time_sub_pred_1','user_query_day_pred_1', 'user_day_query_pred_1',
       'user_shop_count_first_time_sub_pred_1','user_shop_last_time_sub_pred_1', 'user_shop_num_pred_1',
       'user_shop_day_num_pred_1', 'yesterhour_pred_1', 'user_count_pred_1','user_item_hour_query_pred_1', 'user_item_day_query_pred_1',
       'max_user_item_click_pred_1', 'last_time_sub_pred_1','user_yesterday_query_pred_1', 'item_minute_query_pred_1']

target = ['is_trade']

by = ['instance_id']

In [4]:
predictors = list(set(origin + features_zero + features_cjf + features_cjy))
len(predictors)

691

In [5]:
%%time
train_zero = pd.read_csv('train_zero_model2_b.csv', usecols=origin+features_zero+by+target)
train_cjf = pd.read_csv('feature_full_v1_457_new.csv', usecols=features_cjf+by)
test_cjf = pd.read_csv('../df_test_457_v3.csv', usecols=features_cjf+by)
train_cjy = pd.read_csv('cjy_goodfeas_457th_05_11.csv')
train_cjy2 = pd.read_csv('cjy_45th_0512.csv')

train_cjf = pd.concat([train_cjf, test_cjf])
train_cjy = train_cjy.merge(train_cjy2, on=['instance_id'], how='left')

train_zero.shape
train_zero.head()
train_cjf.shape
train_cjf.head()
train_cjy.shape
train_cjy.head()

CPU times: user 10min 50s, sys: 4min 18s, total: 15min 9s
Wall time: 16min 17s


In [6]:
%%time
train = train_zero.merge(train_cjf, on=['instance_id'], how='left')
train = train.merge(train_cjy, on=['instance_id'], how='left')

train.shape

CPU times: user 2min 20s, sys: 16min 26s, total: 18min 47s
Wall time: 28min 52s


In [7]:
del test_cjf
del train_cjy2
del train_zero
del train_cjf
del train_cjy
gc.collect()

126

In [8]:
test = train[train.is_trade == -1]
train = train[(train.is_trade != -1) & (train.day != 7)]

test.shape
train.shape

(1729656, 642)

(2357860, 642)

In [16]:
train.to_csv('train_model2_mix_final.csv', index=None)
test.to_csv('test_model2_mix_final.csv', index=None)

## Feature Impantance

In [5]:
df = pd.read_csv('df_final_0514_new.csv')

train = df[df.is_trade_x != -1]
test = df[df.is_trade_x == -1]

train.shape
test.shape

(2357860, 697)

(1209768, 697)

In [6]:
import lightgbm as lgb

In [7]:
# lgb_cv
params = {
    'objective':'binary',
        # metric='binary_error',
        'num_leaves':7,
        'max_depth':3,
        'learning_rate':0.05,
        'reg_alpha' : .4,
        'reg_lambda' : .2,
        'random_state':1024,
        'colsample_bytree':0.8,
        'subsample':0.9,
#         'n_estimators':20000,
        'n_jobs': 31,
#         'device':'gpu',
        'histogram_pool_size':30270,
        'max_bin':63,
        'gpu_use_dp':True
}
        
len(predictors)

lgb_train = lgb.Dataset(train[predictors],label = train['is_trade_x'])
print('Start training')
cv = lgb.cv(params,lgb_train,num_boost_round=10000,nfold=3,early_stopping_rounds=50,seed=1024,verbose_eval=100)

print('Baseline->' + str(len(cv['binary_logloss-mean'])) + ':' + str(cv['binary_logloss-mean'][-1]))

691

Start training
[100]	cv_agg's binary_logloss: 0.0529051 + 7.42261e-05
[200]	cv_agg's binary_logloss: 0.0506994 + 0.000105093
[300]	cv_agg's binary_logloss: 0.0504029 + 0.000110148
[400]	cv_agg's binary_logloss: 0.0502717 + 0.000110548
[500]	cv_agg's binary_logloss: 0.0502 + 0.000109658
[600]	cv_agg's binary_logloss: 0.0501559 + 0.000108231
[700]	cv_agg's binary_logloss: 0.0501263 + 0.000108949
[800]	cv_agg's binary_logloss: 0.0501088 + 0.000108988
[900]	cv_agg's binary_logloss: 0.0500918 + 0.000112644
[1000]	cv_agg's binary_logloss: 0.0500808 + 0.000110474
[1100]	cv_agg's binary_logloss: 0.0500728 + 0.000112156
[1200]	cv_agg's binary_logloss: 0.0500635 + 0.000110713
Baseline->1219:0.0500619392475869


In [8]:
del lgb_train
gc.collect()

54

In [7]:
lgb_online = lgb.LGBMClassifier(
        objective='binary',
        metric='binary_logloss',
        num_leaves=7,#35,
        max_depth=3,
        learning_rate=0.05, # small_eta
        reg_alpha = .4,
        reg_lambda = .2,
        colsample_bytree = .8,
        subsample = .9,
        random_state=1024,
        n_estimators=1219,
        n_jobs = 31,
        histogram_pool_size=30270,
        max_bin=63
    )
len(predictors)

submit_model = lgb_online.fit(train[predictors], train['is_trade_x'])
print("train success.")
test['predicted_score'] = submit_model.predict_proba(test[predictors])[:,1]
print("predict success.")
test[['instance_id','predicted_score']].to_csv('resultWithHistory_round2(model2_lgb_final).txt',sep=' ',index=False)
print("save success.")

691

train success.
predict success.
save success.


In [None]:
len(list(submit_model.booster_.feature_name()))
len(list(submit_model.booster_.feature_importance()))

In [8]:
fea_importance = pd.DataFrame(list(zip(submit_model.booster_.feature_name(), submit_model.booster_.feature_importance())))

fea_importance.shape
fea_importance.sort_values(by=[1], ascending=False).head()

fea_importance = fea_importance.sort_values(by=[1], ascending=False)
fea_importance.to_csv('lgb_667_imp.csv', header=['feature_name', 'feature_importance'])

(691, 2)

Unnamed: 0,0,1
402,next_time_sub_pred_1,187
236,user_lastquery_timedelta,149
17,item_sales_level+item_collected_level_pred_1,97
484,max_click_time_sub_pred_1,81
472,user_age_level+user_star_level_pred_1,73


In [10]:
leaf_train = submit_model.booster_.predict(data=train[predictors], pred_leaf=True)
leaf_test = submit_model.booster_.predict(data=test[predictors], pred_leaf=True)

leaf_train.shape
leaf_test.shape

(2357860, 500)

(1729656, 500)

In [11]:
leaf_train = pd.DataFrame(leaf_train)
leaf_test = pd.DataFrame(leaf_test)

leaf_train['instance_id'] = train.instance_id
leaf_train['is_trade'] = train.is_trade
leaf_test['instance_id'] = test.instance_id
leaf_test['is_trade'] = test.is_trade

leaf_train.shape
leaf_train.head()
leaf_test.shape
leaf_test.head()

(2357860, 502)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,460,461,462,463,464,465,466,467,468,469,470,471,472,473,474,475,476,477,478,479,480,481,482,483,484,485,486,487,488,489,490,491,492,493,494,495,496,497,498,499,instance_id,is_trade
0,6,6,1,6,1,1,3,1,1,4,3,3,5,6,3,2,1,1,4,3,4,4,1,4,5,4,3,3,5,2,5,1,4,4,3,3,5,6,4,2,3,1,1,1,6,1,5,3,4,6,4,6,2,2,4,1,1,2,5,5,5,5,5,2,2,4,5,1,1,4,1,1,6,2,2,5,1,6,1,1,2,1,5,1,3,1,4,3,4,3,1,1,4,5,3,1,0,5,1,1,1,2,2,1,5,2,1,1,6,1,1,1,5,1,4,1,1,1,2,2,1,2,1,3,5,2,1,0,4,0,4,6,1,3,6,2,2,1,3,6,5,5,6,4,4,4,6,0,6,6,1,2,6,5,6,4,3,0,5,4,1,6,0,6,3,0,2,5,3,0,6,5,4,6,5,5,0,3,2,4,5,6,6,6,6,6,5,0,4,6,4,2,4,4,5,3,6,6,0,3,2,0,3,5,0,5,5,0,0,1,0,6,0,3,4,0,1,1,1,2,0,0,6,6,5,5,1,2,5,3,6,6,4,5,4,4,1,3,2,6,6,5,4,2,4,1,2,5,1,1,0,2,4,6,2,5,1,0,4,0,3,6,6,3,6,0,1,0,1,4,6,5,2,0,6,2,3,4,1,5,4,2,3,2,6,2,3,3,3,6,4,6,4,2,3,4,4,4,1,1,6,3,1,3,5,0,1,1,5,3,2,1,6,6,2,1,1,1,1,0,6,1,3,1,0,4,6,1,0,1,0,1,3,1,2,0,5,4,1,2,2,5,4,0,1,6,1,1,1,0,0,4,4,3,0,0,1,0,1,2,0,1,0,2,6,5,1,1,1,1,4,5,3,3,1,4,0,1,0,2,6,0,0,0,6,4,1,0,1,2,2,1,1,2,0,0,6,1,0,1,0,1,5,2,3,6,0,1,1,1,0,3,3,0,0,4,3,0,6,3,0,3,1,1,1,2,0,1,0,5,1,1,0,2,0,1,2,1,0,0,0,2,3,0,0,0,1,1,1,1,3,3,1,1,4,1,0,1,0,6,1,1,1,1,5,2,2,2,2,0,1,1,1,3,1,2,1,1,0,1,2,1,1,1,0,1,1,2,2,1,0,1,4,0,2,1,1,1,3,1,141832830997226270,0
1,0,5,0,5,0,0,0,0,0,0,4,5,0,0,0,0,6,0,0,5,5,0,0,5,5,5,6,0,4,0,3,0,5,4,6,0,0,4,4,3,6,6,0,6,3,5,6,5,6,3,6,5,3,4,3,4,3,5,3,3,4,4,3,5,5,5,5,4,5,6,4,3,5,4,5,3,5,5,5,3,4,3,6,1,5,4,3,6,3,5,4,2,3,6,3,5,2,1,2,6,1,3,1,5,1,2,3,1,3,3,4,5,3,6,4,2,3,2,3,4,1,4,2,2,6,6,1,3,3,3,5,2,6,6,6,5,2,6,2,2,0,5,1,6,4,3,1,4,4,6,1,5,6,2,6,5,3,4,5,3,5,6,6,2,4,6,3,6,4,2,1,5,0,6,5,5,5,3,3,2,1,2,2,6,6,6,3,6,4,6,3,5,0,3,1,3,5,3,2,5,2,6,6,4,0,5,1,3,4,4,1,6,0,2,4,0,1,1,0,4,4,6,2,6,5,3,6,0,5,5,0,3,4,3,6,5,1,2,3,4,2,5,4,2,5,6,5,6,1,1,3,2,1,5,0,6,3,0,5,1,5,3,3,2,6,0,2,5,4,0,4,6,2,0,6,2,3,4,2,4,4,2,2,5,6,2,3,2,3,3,4,0,4,2,3,3,2,5,1,1,6,3,2,5,3,3,3,1,5,1,3,2,6,6,2,1,1,2,1,0,6,1,3,0,0,4,4,1,5,1,4,1,3,1,0,0,3,4,1,2,3,5,6,4,1,5,0,1,1,1,0,4,1,3,6,0,1,0,1,3,2,1,1,2,6,1,0,1,1,1,3,6,3,6,5,4,0,1,0,2,5,2,0,4,4,0,3,0,3,2,3,1,5,2,1,1,6,1,0,1,0,1,4,2,2,1,0,1,1,1,0,3,3,0,0,4,3,1,0,1,0,4,1,5,1,0,0,1,0,4,1,1,0,0,0,1,2,0,1,0,0,0,0,0,3,0,1,0,1,1,2,3,1,1,0,1,0,0,6,6,1,1,1,1,6,2,2,2,2,0,1,1,1,3,1,0,1,4,0,1,2,1,1,1,2,5,1,2,2,5,0,2,0,2,0,3,1,2,2,1,590920614464250290,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,4,0,5,0,4,3,3,0,2,0,0,0,2,0,0,0,0,0,0,0,5,0,0,0,1,0,6,2,0,0,0,5,5,6,0,0,0,0,2,0,2,0,0,0,1,0,0,2,2,0,0,1,0,0,0,0,0,0,2,6,3,2,6,6,0,2,1,0,0,0,0,4,2,0,2,0,4,6,0,0,2,3,1,5,0,0,0,0,0,0,0,3,6,1,2,1,3,0,2,0,0,0,2,1,0,0,2,2,0,1,2,3,5,2,0,0,4,2,5,2,3,1,5,2,1,1,2,0,3,1,0,0,0,1,0,0,2,5,0,0,1,2,2,0,0,3,1,3,1,1,0,1,0,5,5,5,2,0,4,5,6,0,3,5,2,0,1,4,3,1,0,3,3,0,2,0,2,1,0,0,2,0,0,2,6,1,3,6,2,0,3,2,3,2,2,2,2,5,0,6,0,3,3,4,0,0,0,0,5,0,3,0,0,6,0,0,0,1,0,2,1,5,1,1,4,1,5,2,1,1,2,1,0,1,1,3,1,0,5,1,0,4,1,3,3,4,0,2,6,1,0,0,2,0,4,5,0,1,6,0,1,1,1,0,4,1,3,1,0,1,0,0,2,0,1,1,2,6,1,6,2,2,1,1,4,3,2,6,0,0,1,0,2,5,0,0,5,5,3,0,3,1,2,6,1,0,2,1,1,6,1,0,1,0,3,3,1,0,1,0,0,1,1,0,3,3,0,0,4,3,1,0,1,0,3,1,3,5,0,0,1,0,4,0,1,1,0,1,1,4,0,0,0,0,0,0,0,0,0,1,0,0,1,1,3,1,1,0,1,0,0,0,6,1,1,3,1,0,2,4,2,2,0,1,1,1,2,3,2,1,2,1,1,2,1,1,1,0,6,0,2,2,6,0,2,1,2,1,3,1,3,5,1,8274988120765843024,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,5,0,5,0,5,5,5,3,4,3,6,3,5,4,0,4,0,5,0,4,3,3,5,2,2,4,5,2,0,0,0,0,6,0,0,0,0,3,0,0,0,0,2,2,0,5,0,6,0,0,2,2,6,0,1,1,3,1,0,0,1,0,0,0,0,3,0,2,0,3,1,6,4,0,5,1,0,0,6,2,3,0,4,1,0,3,4,0,6,0,1,2,1,6,3,0,0,5,5,4,0,0,2,5,1,5,0,0,0,2,0,2,6,6,4,2,1,0,6,6,0,0,0,3,2,0,3,0,2,1,6,1,1,4,2,1,4,5,2,0,2,1,4,0,2,1,0,4,5,0,4,0,5,2,6,3,0,2,0,3,4,3,5,5,2,1,6,4,0,5,5,0,4,5,5,0,0,1,3,2,0,0,4,4,2,0,3,5,4,5,6,2,6,5,3,4,4,0,2,0,3,3,6,2,3,5,2,5,4,4,2,5,5,1,6,3,3,3,6,0,5,1,3,3,4,5,1,4,6,3,1,1,4,3,2,1,5,2,4,0,5,6,2,1,1,1,1,0,5,1,6,0,0,5,0,1,5,3,0,3,3,1,0,0,3,4,0,2,0,4,5,4,1,1,1,1,1,1,0,4,2,3,6,0,0,0,1,3,2,1,1,2,6,2,0,1,1,0,2,3,2,4,1,4,0,1,0,2,5,2,0,4,4,0,1,0,3,1,5,1,1,2,1,1,6,1,0,1,0,3,0,2,5,1,2,0,1,4,0,3,3,0,0,4,3,1,0,3,0,4,1,3,1,0,0,0,0,5,0,1,0,0,0,1,4,1,0,0,0,0,0,0,0,2,1,0,1,1,2,3,6,1,0,1,1,1,4,6,1,1,3,1,6,0,2,2,2,0,1,1,1,1,1,3,1,1,0,1,2,1,1,5,0,6,1,2,1,6,0,1,2,2,0,3,1,2,2,1,3168805145622878188,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,4,0,5,0,4,3,3,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,2,0,0,0,0,5,0,0,0,0,0,0,0,2,0,2,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,3,0,0,2,0,3,1,2,0,0,3,4,0,0,0,1,2,1,6,0,0,0,1,6,0,4,3,0,0,1,5,1,0,1,5,1,3,2,6,0,2,4,0,1,4,4,0,1,3,1,3,3,3,2,1,6,5,2,4,1,0,2,5,1,0,4,1,0,0,5,0,4,4,5,0,4,0,5,5,0,6,0,2,2,3,3,3,5,6,3,0,0,0,0,5,0,2,4,5,5,0,0,1,0,2,6,0,0,3,3,0,3,6,4,0,0,2,6,0,0,2,4,0,2,6,3,0,6,2,3,5,0,5,2,3,2,1,4,5,6,3,3,3,4,6,4,4,3,3,0,5,1,0,6,2,0,3,1,3,2,1,5,1,3,0,5,6,2,1,1,0,1,0,1,1,3,0,0,4,4,1,0,1,0,3,6,1,0,0,3,4,1,2,2,3,3,4,1,5,0,1,1,1,0,4,0,3,2,0,1,0,1,3,2,1,1,2,6,6,1,1,1,0,2,3,3,4,6,4,0,1,0,2,3,2,0,4,2,0,0,1,3,1,1,1,1,1,1,1,6,1,0,1,0,0,0,1,0,1,0,0,1,1,0,3,3,0,0,4,3,0,0,3,0,4,1,3,5,0,0,4,0,5,3,1,1,0,1,1,1,0,1,0,0,2,2,0,0,0,1,1,1,1,2,2,1,1,2,1,0,0,6,6,1,1,1,6,6,2,2,2,2,0,1,1,1,5,2,3,1,2,3,1,3,1,1,1,2,1,1,2,1,6,0,2,0,2,0,3,1,2,5,1,4811858053894378966,0


(1729656, 502)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,460,461,462,463,464,465,466,467,468,469,470,471,472,473,474,475,476,477,478,479,480,481,482,483,484,485,486,487,488,489,490,491,492,493,494,495,496,497,498,499,instance_id,is_trade
0,0,5,0,5,0,0,5,0,0,6,4,5,0,0,5,0,6,0,3,5,5,0,0,5,5,5,6,4,4,0,3,5,5,4,6,6,3,4,4,3,6,6,6,6,3,5,6,5,6,3,6,5,5,4,3,4,3,5,3,3,4,4,3,5,5,5,3,4,5,6,4,4,5,6,5,6,5,3,3,3,4,5,6,1,4,4,1,1,3,4,2,3,5,4,1,4,3,3,6,4,2,5,1,3,3,2,2,3,3,4,3,3,6,2,3,5,2,4,4,4,2,2,3,5,2,6,6,1,3,1,3,2,4,4,6,4,6,2,2,3,5,2,4,4,2,3,4,1,4,0,2,4,0,2,2,3,6,5,0,3,1,6,4,2,4,2,4,5,6,6,6,5,4,2,6,4,4,2,1,4,1,4,2,1,1,2,5,4,4,6,3,3,4,3,5,6,2,3,5,4,4,4,2,5,0,2,1,4,0,6,0,6,5,2,3,3,3,1,6,5,6,3,3,1,5,5,1,4,6,5,1,4,1,5,1,4,3,2,3,3,3,5,4,6,4,1,2,5,1,4,0,5,5,6,6,5,5,0,3,0,5,4,3,2,6,5,1,0,1,1,4,6,2,0,1,2,3,1,1,3,4,3,2,6,3,2,6,3,3,4,4,0,4,4,3,2,4,3,1,5,6,3,1,3,0,1,3,1,2,1,2,0,2,6,2,1,1,2,1,5,6,1,2,1,0,4,1,5,0,1,1,5,3,5,2,6,3,4,1,2,2,1,4,1,1,5,1,1,1,0,0,4,1,2,3,3,1,0,0,2,6,1,0,2,6,1,0,1,1,1,2,6,3,1,5,4,0,4,0,2,5,3,3,1,1,1,3,0,4,1,3,1,1,2,0,0,6,1,0,1,0,2,6,2,3,6,0,1,1,1,0,3,3,0,0,4,1,0,0,3,6,3,1,5,1,0,0,1,0,5,1,1,0,0,0,1,4,0,0,0,0,0,0,0,3,0,1,0,1,1,2,3,2,1,0,1,0,3,3,6,1,1,1,4,4,2,2,2,2,0,1,1,1,3,1,3,1,1,0,3,4,1,1,2,0,5,1,1,1,1,0,2,3,0,0,3,1,3,4,1,329735819823914333,-1
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,3,5,3,5,3,0,0,4,4,0,3,4,3,3,3,0,3,3,3,0,3,0,4,0,6,0,0,0,3,0,0,0,4,0,0,3,6,3,5,4,5,3,6,0,5,4,0,0,6,6,1,3,2,0,5,5,6,5,6,2,0,1,5,3,0,2,0,6,5,0,2,6,0,0,0,0,1,0,1,2,0,0,2,0,2,5,0,0,0,5,0,5,0,0,0,6,1,0,0,2,0,3,0,2,0,5,3,6,0,5,0,1,1,1,2,0,0,0,2,1,1,0,4,2,0,1,2,1,0,1,2,0,3,1,3,0,2,0,0,1,0,0,0,0,6,6,6,2,0,2,5,2,1,1,1,1,1,0,4,2,0,0,0,0,2,2,1,0,6,0,0,0,0,2,0,0,3,0,3,0,1,5,0,5,3,3,2,0,5,0,5,1,6,0,0,5,0,1,3,5,0,0,0,4,2,2,0,5,3,5,5,0,2,1,0,1,1,0,0,3,6,3,3,5,2,3,5,0,0,0,3,2,5,5,2,1,0,3,3,4,6,0,0,0,5,4,5,1,5,6,2,0,0,2,1,3,1,2,0,0,0,2,5,2,1,1,0,1,1,6,1,0,0,0,4,0,1,6,0,0,1,3,5,3,0,2,0,1,2,5,4,1,1,1,6,0,1,1,1,0,4,0,0,1,0,1,0,0,1,2,1,1,2,0,1,4,1,1,0,0,1,3,2,5,0,0,1,0,1,1,2,0,5,1,1,0,0,0,0,0,1,0,2,1,0,6,1,0,1,0,2,2,0,0,0,0,1,1,1,0,3,3,0,0,4,1,1,0,3,1,1,1,4,1,0,0,4,0,5,1,1,0,0,0,1,4,0,0,0,0,0,2,0,4,0,0,1,0,1,2,2,6,1,0,1,0,5,4,6,0,1,1,0,6,2,2,2,0,0,0,1,2,3,1,2,1,3,0,6,2,1,0,1,0,6,0,2,2,6,0,2,1,2,6,3,1,3,0,1,5399902518894749584,-1
2,3,4,3,4,6,0,4,3,3,3,5,4,0,3,4,6,3,6,5,4,3,3,3,3,5,3,4,4,4,5,6,3,3,4,4,4,3,4,4,6,4,3,3,3,3,3,0,4,3,6,3,3,3,3,5,3,3,3,4,4,3,3,4,3,3,3,0,3,3,1,1,3,4,4,1,5,1,6,1,1,1,1,5,5,0,1,3,2,3,3,4,3,4,5,0,4,4,0,0,4,6,2,3,4,0,6,5,6,5,6,5,0,2,5,3,0,4,0,6,5,0,2,6,0,0,0,0,0,0,2,2,0,0,2,0,2,5,0,0,0,5,0,6,0,0,0,6,0,0,0,1,0,3,0,0,0,5,4,0,0,1,0,1,1,1,6,0,0,0,0,6,1,2,4,2,0,1,2,6,0,5,4,0,6,6,3,0,2,0,0,4,0,4,0,0,6,6,6,4,3,4,5,0,5,0,4,5,1,0,1,3,0,0,0,2,2,3,1,4,2,0,0,0,0,2,0,0,6,0,0,2,6,1,0,4,1,1,3,0,6,0,5,6,5,0,0,1,0,1,3,1,0,6,0,5,5,5,0,5,3,5,4,6,3,1,0,4,1,0,4,6,5,3,3,1,2,3,4,0,0,4,2,3,6,6,1,1,0,3,4,4,0,0,0,0,4,4,4,1,5,6,3,1,0,5,1,3,1,2,0,0,0,2,5,2,1,1,0,1,3,6,1,0,0,0,4,6,1,1,0,0,1,3,5,3,0,5,0,1,2,2,5,3,1,1,6,1,1,1,1,0,4,0,0,1,0,1,0,0,1,2,1,1,2,0,5,0,1,1,0,0,0,3,0,1,0,0,1,0,1,5,2,0,0,1,1,1,0,1,0,0,1,0,2,1,0,6,1,0,1,0,3,2,0,3,6,0,0,1,1,0,3,3,0,0,4,1,3,0,3,6,1,1,0,1,0,0,1,0,5,3,1,0,0,0,1,1,2,1,0,0,0,0,0,0,0,0,0,0,1,2,2,2,1,0,1,0,1,0,6,2,1,1,0,6,2,2,2,0,0,0,1,2,3,5,4,1,1,0,1,4,1,0,1,2,1,0,1,2,1,0,1,4,2,6,3,1,1,0,1,1781317115815731318,-1
3,5,5,5,0,5,0,5,5,5,6,0,5,0,5,5,5,6,4,3,5,5,5,5,0,5,5,6,4,0,3,0,5,0,4,6,6,3,4,4,3,6,0,6,6,3,5,6,5,6,3,6,5,5,4,3,4,0,0,3,3,4,4,3,5,5,5,5,4,6,6,4,2,1,4,4,2,1,5,1,4,6,1,5,4,6,5,1,2,1,3,3,3,4,4,3,1,4,1,1,4,2,3,5,3,1,2,3,5,3,1,3,1,2,2,4,1,1,3,5,4,6,2,1,3,1,2,2,3,4,0,2,2,3,3,6,2,4,1,2,3,5,3,4,4,2,2,6,4,4,0,1,5,3,2,6,6,2,0,2,3,5,6,0,1,1,0,4,5,0,2,6,3,4,6,5,5,0,2,2,5,1,3,2,3,0,2,3,0,2,5,3,6,4,3,5,6,4,4,2,4,3,0,3,5,2,4,1,1,0,4,3,6,0,2,0,1,5,1,1,0,4,0,2,1,5,3,1,2,3,5,5,0,3,3,3,6,1,2,3,3,3,5,6,2,4,4,5,3,1,3,4,5,2,6,2,2,3,0,4,3,0,3,3,3,6,5,0,1,1,0,4,5,2,3,6,2,0,3,2,3,2,2,3,5,2,1,6,3,3,6,4,0,4,2,3,2,5,3,0,5,6,2,1,3,3,1,3,1,5,1,2,4,1,6,2,1,1,2,1,0,6,1,6,0,0,4,1,0,1,1,3,1,3,5,2,6,3,4,1,0,2,3,3,0,1,5,1,1,1,1,0,4,1,2,1,0,1,0,0,2,0,1,1,2,6,5,0,2,1,1,4,6,3,1,1,4,0,4,0,2,5,0,2,3,4,1,3,0,1,4,5,1,1,2,1,0,6,1,0,1,0,3,4,2,6,6,0,0,1,1,0,3,3,0,0,4,1,3,0,4,6,3,1,5,1,0,0,1,0,4,3,1,0,0,0,1,1,0,1,0,0,2,0,0,0,0,1,0,1,1,2,3,1,1,3,1,0,2,6,6,1,1,1,1,3,2,6,2,2,0,1,1,1,2,1,3,1,1,0,1,2,1,1,1,2,1,1,2,1,5,0,1,3,2,2,3,1,3,4,1,1275940362205323333,-1
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,3,0,0,4,4,1,5,3,4,3,3,3,2,0,0,1,2,1,0,1,0,1,0,3,4,0,0,0,5,0,6,2,0,0,0,3,4,3,0,0,0,0,2,0,2,0,2,0,1,0,0,5,0,4,0,1,0,0,0,6,0,0,2,1,3,2,5,3,0,6,1,6,0,0,3,4,2,6,5,1,2,1,6,0,5,1,1,6,4,4,6,5,0,1,0,6,6,1,5,1,6,6,6,0,2,4,2,1,5,4,2,4,5,1,3,3,3,2,5,6,5,2,5,1,1,0,5,3,3,4,2,3,0,1,1,4,2,5,0,6,1,2,5,0,6,2,2,5,1,4,3,5,6,1,0,1,1,1,5,6,2,5,4,5,4,0,3,1,5,6,6,0,2,1,0,3,3,4,2,0,2,6,5,0,1,1,0,3,6,2,3,6,2,3,5,2,3,4,2,2,5,6,0,1,2,3,6,4,0,4,2,3,6,0,5,1,0,6,3,2,5,1,1,2,1,5,1,2,4,6,6,2,1,1,2,1,0,6,1,6,0,0,4,4,4,1,1,4,3,6,0,2,0,1,4,1,2,0,6,3,1,1,6,0,1,1,1,0,4,1,2,1,0,1,0,0,1,2,1,1,2,6,1,0,1,1,0,3,2,3,5,6,4,0,1,0,2,5,2,0,0,0,1,0,0,1,4,2,1,5,2,1,0,6,1,0,1,0,3,3,2,4,6,0,0,1,1,0,3,3,0,0,4,1,4,0,3,6,1,1,3,5,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,2,3,1,1,0,1,0,0,0,6,1,1,1,1,6,2,2,2,2,0,1,1,1,3,1,3,1,2,0,3,2,1,1,1,0,6,1,2,1,6,0,2,1,2,0,3,1,3,0,1,7808187725490285582,-1


In [13]:
leaf_train.to_csv('ffm_model2_train.csv', index=None)
leaf_test.to_csv('ffm_model2_test.csv', index=None)

In [58]:
del leaf_train
del leaf_test
gc.collect()

2068

## xgb

In [10]:
import xgboost as xgb

In [13]:
#cv
params = {
    'objective': 'binary:logistic',
    'eta': 0.05,
    'colsample_bytree': 0.886,
    'min_child_weight': 1.6,
    'max_depth': 4,
    'subsample': 0.886,
    'gamma': 0.1,
    'lambda': 10,
    'verbose_eval': True,
    'eval_metric': 'logloss',
    'seed': 201803,
    'missing': -1
}

len(predictors)

xgbtrain = xgb.DMatrix(train[predictors], train['is_trade_x'])

print('Start training')
cv = xgb.cv(
    params,
    dtrain=xgbtrain,
    num_boost_round=5000,
    nfold=3,
    verbose_eval=100,
    early_stopping_rounds=50,
    seed=201803)
print(str(cv.shape[0]) + ':' + str(cv.iloc[-1, :]['test-logloss-mean']))
print('End training')

691

KeyboardInterrupt: 

In [None]:
#train/test
params = {
    'objective': 'binary:logistic',
    'eta': 0.05,
    'colsample_bytree': 0.886,
    'min_child_weight': 1.6,
    'max_depth': 4,
    'subsample': 0.886,
    'gamma': 0.1,
    'lambda': 10,
    'verbose_eval': True,
    'eval_metric': 'logloss',
    'seed': 201803,
    'missing': -1
}

len(predictors)

xgbtrain = xgb.DMatrix(train[predictors], train[target])
model = xgb.train(params, xgbtrain, num_boost_round=cv.shape[0])

xgbtest = xgb.DMatrix(test[predictors])
y_pred = model.predict(xgbtest)

idx = test.instance_id.reset_index()
idx['predicted_score'] = y_pred
idx[['instance_id', 'predicted_score']].to_csv(
    'resultWithHistory_round2(model2_xgb_final).txt',
    sep=' ',
    header=['instance_id', 'predicted_score'],
    index=False)

## cjf新特征

In [3]:
df = pd.read_csv('df_final_0514.csv')

In [4]:
train_new_cjf = pd.read_csv('../cjf_new_fea_457.csv')

train_new_cjf.shape

(5164691, 25)

In [5]:
train_new_cjf.columns

Index(['instance_id', 'user_query_max1_item_score',
       'user_query_rank1_item_score', 'user_query_percent1_item_score',
       'user_query_max1_item_score2', 'user_query_rank1_item_score2',
       'user_query_percent1_item_score2', 'user_query_max1_item_score3',
       'user_query_rank1_item_score3', 'user_query_percent1_item_score3',
       'user_query_max1_shop_score', 'user_query_rank1_shop_score',
       'user_query_percent1_shop_score', 'user_query_max2_item_score',
       'user_query_rank2_item_score', 'user_query_percent2_item_score',
       'user_query_max2_item_score2', 'user_query_rank2_item_score2',
       'user_query_percent2_item_score2', 'user_query_max2_item_score3',
       'user_query_rank2_item_score3', 'user_query_percent2_item_score3',
       'user_query_max2_shop_score', 'user_query_rank2_shop_score',
       'user_query_percent2_shop_score'],
      dtype='object')

In [6]:
df = df.merge(train_new_cjf, on=['instance_id'], how='left')

df.shape

(3567628, 697)

In [9]:
df.to_csv('df_final_0514_new.csv', index=None)

## cjy新特征

In [5]:
train = pd.read_csv('train_model2_mix_final.csv')
test = pd.read_csv('test_model2_mix_final.csv')

train.shape
test.shape

(2357860, 642)

(1729656, 642)

In [7]:
df = pd.concat([train, test])

del train
del test
gc.collect()

df.shape

3046

(4087516, 642)

In [8]:
train_cjy_new = pd.read_csv('../../ijcai/CJY/cjy/input/cjy_goodfeas_457th_05_14.csv')

train_cjy_new.shape

(3567628, 32)

In [10]:
train_cjy_new.columns

Index(['instance_id', 'is_trade', 'next_time_sub_pred_1',
       'max_click_time_sub_pred_1', 'is_last_click_pred_1', 'max_click_pred_1',
       'user_minute_query_pred_1', 'user_query_minute_pred_1',
       'user_item_next_time_sub_pred_1', 'is_last_user_item_click_pred_1',
       'max_user_item_click_time_sub_pred_1', 'user_shop_next_time_sub_pred_1',
       'user_hour_query_pred_1', 'user_query_day_hour_pred_1',
       'user_item_last_time_sub_pred_1', 'user_item_num_pred_1',
       'user_item_day_num_pred_1', 'user_item_count_first_time_sub_pred_1',
       'user_query_day_pred_1', 'user_day_query_pred_1',
       'user_shop_count_first_time_sub_pred_1',
       'user_shop_last_time_sub_pred_1', 'user_shop_num_pred_1',
       'user_shop_day_num_pred_1', 'yesterhour_pred_1', 'user_count_pred_1',
       'user_item_hour_query_pred_1', 'user_item_day_query_pred_1',
       'max_user_item_click_pred_1', 'last_time_sub_pred_1',
       'user_yesterday_query_pred_1', 'item_minute_query_pred_1'

In [12]:
# df = df.merge(train_cjy_new, on=['instance_id'], how='left')
df = df[~df.max_click_time_sub_pred_1.isnull()]

del train_cjy_new
gc.collect()

df.shape

3344

(3567628, 673)

In [13]:
df.to_csv('df_final_0514.csv', index=None)

### Bagging

In [3]:
df = pd.read_csv('df_final_0514_new.csv')

train = df[df.is_trade_x != -1]
test = df[df.is_trade_x == -1]

train.shape
test.shape

(2357860, 697)

(1209768, 697)

In [4]:
import os
import lightgbm as lgb
def lgb_pipeline(iteration, params, dtrain, test, cat_feas, sub_ins_id, rst_path, prefix, cv_fold=3):
    print('='*20, iteration, '='*20)
    if not os.path.exists(rst_path):
        os.makedirs(rst_path)
    
#     通过cv找最佳的nround
#     lgb_log = lgb.cv(params, dtrain, num_boost_round=10000, early_stopping_rounds=50,
#                  nfold=cv_fold, verbose_eval=200, categorical_feature=cat_feas)

#     print(len(lgb_log['binary_logloss-mean']), lgb_log['binary_logloss-mean'][-1], 
#             (lgb_log['binary_logloss-stdv'][-1]))
#     nround = len(lgb_log['binary_logloss-mean']) + 10
    model = lgb.train(params, dtrain, 1400, categorical_feature=cat_feas)

    # predict
    test_y = model.predict(test)
    test_result = pd.DataFrame(sub_ins_id, columns=["instance_id"])
    test_result["predicted_score"] = test_y
    test_result.to_csv(rst_path+"lgb_{0}.csv".format(iteration), index=None, sep=' ')
    print('save to', rst_path + prefix +"{0}.csv".format(iteration))

In [5]:
# 特征重要性排列
imp_feas = pd.read_csv('lgb_667_imp.csv')['feature_name'].values

# must_use_fea_len = int(len(imp_feas)*0.7)
must_use_fea_len = 400
must_use_fea = imp_feas[:must_use_fea_len]
cadi_fea = imp_feas[must_use_fea_len:]

print(len(imp_feas), len(must_use_fea), len(cadi_fea))

691 400 291


In [6]:
bagging_cnt = 30

learning_rate_list = np.random.randint(25, 75, bagging_cnt) /1000.0
num_leaves_list = np.random.randint(2**3-3, 2**4, bagging_cnt)
max_depth_list = np.random.randint(3, 4, bagging_cnt)
feature_fraction_list = np.random.randint(500, 900, bagging_cnt)/1000.0
min_data_in_leaf_list = np.random.randint(20, 40, bagging_cnt)
lambda_l1_list = np.random.randint(2000, 6000, bagging_cnt)/10000.0
lambda_l2_list = np.random.randint(500, 3500, bagging_cnt)/10000.0

n_feature_list = np.random.randint(80, 240, bagging_cnt)

assert(len(learning_rate_list) == bagging_cnt)
assert(len(num_leaves_list) == bagging_cnt)
assert(len(max_depth_list) == bagging_cnt)
assert(len(feature_fraction_list) == bagging_cnt)
assert(len(min_data_in_leaf_list) == bagging_cnt)
assert(len(lambda_l1_list) == bagging_cnt)
assert(len(lambda_l2_list) == bagging_cnt)
assert(len(n_feature_list) == bagging_cnt)

random.shuffle(learning_rate_list)
random.shuffle(num_leaves_list)
random.shuffle(max_depth_list)
random.shuffle(feature_fraction_list)
random.shuffle(min_data_in_leaf_list)
random.shuffle(lambda_l1_list)
random.shuffle(lambda_l2_list)
random.shuffle(n_feature_list)

In [7]:
params = {
    'objective':'binary',
        # metric='binary_error',
        'num_leaves':7,
        'max_depth':3,
        'learning_rate':0.05,
        'reg_alpha' : .4,
        'reg_lambda' : .2,
        'random_state':1024,
        'colsample_bytree':0.8,
        'subsample':0.9,
        'n_jobs': 31,
        'histogram_pool_size':30270,
        'max_bin':63
}

cat_feas_cad = ['user_gender_id', 'user_occupation_id']
rst_path = './bagging2/'
prefix = 'lgb_'

bagging_cnt = 0
for param_item in zip(learning_rate_list, num_leaves_list, max_depth_list, feature_fraction_list, min_data_in_leaf_list, 
                      lambda_l1_list, lambda_l2_list, n_feature_list):
    bagging_cnt += 1
    
    params['learning_rate'] = param_item[0]
    params['num_leaves'] = param_item[1]
    params['max_depth'] = param_item[2]
    params['feature_fraction'] = param_item[3]
    params['min_data_in_leaf'] = param_item[4]
    params['reg_alpha'] = param_item[5]
    params['reg_lambda'] = param_item[6]
    
    use_feas = must_use_fea.tolist() + random.sample(list(cadi_fea), param_item[7])
    cat_feas = []
    for cat_f in cat_feas_cad:
        if cat_f in use_feas:
            cat_feas += [cat_f]
    
    train_data = lgb.Dataset(train[use_feas], label=train['is_trade_x'].astype(int), 
                     feature_name=use_feas, free_raw_data=False)
    
    print('use fea len:', len(use_feas))
    print('params: ', params)
    lgb_pipeline(bagging_cnt, params, dtrain=train_data, test=test[use_feas], cat_feas=[],
                 sub_ins_id =test['instance_id'].values, rst_path = rst_path, cv_fold=3, prefix=prefix)
    
    del train_data
    gc.collect()

use fea len: 621
params:  {'objective': 'binary', 'num_leaves': 5, 'max_depth': 3, 'learning_rate': 0.029, 'reg_alpha': 0.5172, 'reg_lambda': 0.3159, 'random_state': 1024, 'colsample_bytree': 0.8, 'subsample': 0.9, 'n_jobs': 31, 'histogram_pool_size': 30270, 'max_bin': 63, 'feature_fraction': 0.787, 'min_data_in_leaf': 27}
save to ./bagging2/lgb_1.csv


76

use fea len: 620
params:  {'objective': 'binary', 'num_leaves': 10, 'max_depth': 3, 'learning_rate': 0.039, 'reg_alpha': 0.3937, 'reg_lambda': 0.0511, 'random_state': 1024, 'colsample_bytree': 0.8, 'subsample': 0.9, 'n_jobs': 31, 'histogram_pool_size': 30270, 'max_bin': 63, 'feature_fraction': 0.813, 'min_data_in_leaf': 37, 'verbose': 1}
save to ./bagging2/lgb_2.csv


85

KeyboardInterrupt: 

In [8]:
bagging_rst_files = os.listdir(rst_path)

rst_cnt = 0
df_fnl_rst = pd.DataFrame()
for rst_file in bagging_rst_files:
    if rst_file[:len(prefix)] != prefix:
        continue
    else:
        print(rst_file)
        rst_cnt += 1
    if df_fnl_rst.shape[0] == 0:
        df_fnl_rst = pd.read_csv(rst_path + rst_file, sep=' ')
    else:
        df_crt_rst = pd.read_csv(rst_path + rst_file, sep=' ')
        df_fnl_rst['predicted_score'] += df_crt_rst['predicted_score']

print('rst count:', rst_cnt)
df_fnl_rst['predicted_score']/=rst_cnt
df_fnl_rst.head()

lgb_2.csv
lgb_1.csv
rst count: 2


Unnamed: 0,instance_id,predicted_score
0,329735819823914333,0.018575
1,1275940362205323333,0.008366
2,7808187725490285582,0.000362
3,326880572134500699,0.027301
4,1120676844864970839,0.019257


In [9]:
df_fnl_rst.to_csv(rst_path + prefix + 'fnl.csv', index=False, sep=' ')