In [8]:
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import lightgbm as lgbm
from sklearn.metrics import log_loss

In [2]:
def time2cov(time_):
    return time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time_))

In [3]:
def user_check(df, behaviour):
    user_day = df.groupby(['user_id', 'day', behaviour]).size().reset_index().rename(columns={0: 'user_id_query_day_{}'.format(behaviour)})
    df = pd.merge(df, user_day, how = 'left', on=['user_id', 'day',behaviour])
    user_day_hour = df.groupby(['user_id', 'day', 'hour', behaviour]).size().reset_index().rename(columns={0: 'user_id_query_day_hour_{}'.format(behaviour)})
    df = pd.merge(df, user_day_hour, how = 'left', on=['user_id', 'day', 'hour',behaviour])
    n = 0
    check_time_day = np.ones((len(df),1))
    num = {}
    bd = df.day.min()
    for u, i, d in zip(df.user_id, df[behaviour], df.day):
        n += 1
        try:
            num[(u,i)] += 1
        except:
            num[(u,i)] = 0
        check_time_day[n-1] = num[(u,i)]
        if d > bd:
            num = {}
        bd = d
    df['check_{}_time_day'.format(behaviour)] = check_time_day
    df['check_{}_ratio'.format(behaviour)] = df['check_{}_time_day'.format(behaviour)] / df['user_id_query_day_{}'.format(behaviour)]
    return df

In [39]:
def convert_time(df):
    df['hour'] = [int(datetime.datetime.fromtimestamp(i).strftime('%H')) for i in df.context_timestamp]
    df['day'] = [int(datetime.datetime.fromtimestamp(i).strftime('%d')) for i in df.context_timestamp]
    for f in ['user_id', 'item_id', 'shop_id', 'item_category_list', 'item_city_id', 'user_gender_id', 'user_age_level', 'user_occupation_id', 'item_brand_id']:
        user_query_day = df.groupby([f, 'day']).size().reset_index().rename(columns={0: '{}_query_day'.format(f)})
        df = pd.merge(df, user_query_day, how = 'left', on=[f, 'day'])
        query_day_hour = df.groupby([f, 'day', 'hour']).size().reset_index().rename(columns={0: '{}_query_day_hour'.format(f)})
        df = pd.merge(df, query_day_hour, 'left',on=[f, 'day', 'hour'])
    df['context_timestamp'] = df['context_timestamp'].apply(time2cov)
    df.sort_values(by='context_timestamp',inplace=True)

    for f in ['shop_id', 'item_brand_id', 'item_id', 'item_category_list', 'context_page_id','item_pv_level','item_sales_level','item_collected_level','item_price_level']:
        df = user_check(df, f)
    n = 0
    check_time_day = np.ones((len(df),1))
    num = {}
    bd = df.day.min()
    for u, d in zip(df.user_id, df.day):
        n += 1
        try:
            num[(u)] += 1
        except:
            num[(u)] = 0
        check_time_day[n-1] = num[(u)]
        if d > bd:
            num = {}
        bd = d
    df['check_time_day'] = check_time_day
    f1 = 'check_time_day'
    f2 = 'user_id_query_day'
    df['check_ratio_day_all'] = df[f1] / df[f2]
    return df

In [27]:
def run(features, label, df, clf,verbose = True):
    print(features[-1])
    X = df
    y = df[label]
    Loss = []
    T = X.context_timestamp <= '2018-09-23 23:59:59'
    X_train, X_test = X[T], X[~T]
    X_train, X_test = X_train[features], X_test[features]
    #norm = StandardScaler()
    #X_train = norm.fit_transform(X_train[features])
    #X_test = norm.transform(X_test[features])
    y_train, y_test = y[T], y[~T]
    clf.fit(X_train,y_train, eval_set = [(X_train, y_train), (X_test, y_test)], eval_metric='logloss', verbose=verbose,early_stopping_rounds=200)
    predict = clf.predict_proba(X_test)[:,1]
    logloss = log_loss(y_test, predict)
    print(logloss)
    return clf

In [40]:
df = pd.read_csv('data/train/round1_ijcai_18_train_20180301.txt',sep=' ')
df.context_timestamp += 8*60*60
df = convert_time(df)
item_category_list_unique = list(np.unique(df.item_category_list))
df.item_category_list.replace(item_category_list_unique, list(np.arange(len(item_category_list_unique))), inplace=True)
label = 'is_trade'

In [23]:
start_features = [
                  'item_category_list','item_city_id','item_price_level','item_sales_level','item_collected_level','item_pv_level',
                  'user_gender_id','user_age_level','user_occupation_id','user_star_level',
                  'context_page_id',
                  'shop_review_num_level','shop_review_positive_rate','shop_score_service','shop_score_delivery',
                  'hour','day',
                  'user_id_query_day_item_category_list','user_id_query_day_hour','user_id_query_day_hour_item_category_list',
                  'check_item_category_list_ratio','check_ratio_day_all','check_time_day','check_shop_id_ratio','check_item_brand_id_ratio',
                  'shop_id',
                  'item_id_query_day',
                  'user_id_query_day_item_brand_id','user_id_query_day_hour_item_brand_id','user_id_query_day_item_id',
                  #'check_item_pv_level_ratio','user_id_query_day_item_pv_level',
                  #'user_id_query_day_hour_item_pv_level', 'check_item_pv_level_time_day',
                 ]

clf = lgbm.LGBMClassifier(random_state=1, num_leaves = 6,
                          n_estimators=5000,max_depth=3,learning_rate = 0.08, n_jobs=30) #008154
clf = run(start_features, label, df, clf)

user_id_query_day_item_id
[1]	training's binary_logloss: 0.622078	valid_1's binary_logloss: 0.621696
Training until validation scores don't improve for 200 rounds.
[2]	training's binary_logloss: 0.561537	valid_1's binary_logloss: 0.560798
[3]	training's binary_logloss: 0.509396	valid_1's binary_logloss: 0.508335
[4]	training's binary_logloss: 0.464102	valid_1's binary_logloss: 0.462742
[5]	training's binary_logloss: 0.42448	valid_1's binary_logloss: 0.42284
[6]	training's binary_logloss: 0.389621	valid_1's binary_logloss: 0.387714
[7]	training's binary_logloss: 0.358807	valid_1's binary_logloss: 0.356642
[8]	training's binary_logloss: 0.33146	valid_1's binary_logloss: 0.329053
[9]	training's binary_logloss: 0.307112	valid_1's binary_logloss: 0.304476
[10]	training's binary_logloss: 0.285373	valid_1's binary_logloss: 0.282521
[11]	training's binary_logloss: 0.265917	valid_1's binary_logloss: 0.262849
[12]	training's binary_logloss: 0.248472	valid_1's binary_logloss: 0.245188
[13]	traini

[108]	training's binary_logloss: 0.0883088	valid_1's binary_logloss: 0.0806746
[109]	training's binary_logloss: 0.0882882	valid_1's binary_logloss: 0.0806651
[110]	training's binary_logloss: 0.0882634	valid_1's binary_logloss: 0.0806401
[111]	training's binary_logloss: 0.0882385	valid_1's binary_logloss: 0.0806256
[112]	training's binary_logloss: 0.08822	valid_1's binary_logloss: 0.0806123
[113]	training's binary_logloss: 0.0882009	valid_1's binary_logloss: 0.0806026
[114]	training's binary_logloss: 0.0881747	valid_1's binary_logloss: 0.080582
[115]	training's binary_logloss: 0.0881565	valid_1's binary_logloss: 0.0805698
[116]	training's binary_logloss: 0.0881305	valid_1's binary_logloss: 0.0805513
[117]	training's binary_logloss: 0.0881132	valid_1's binary_logloss: 0.0805424
[118]	training's binary_logloss: 0.0880941	valid_1's binary_logloss: 0.0805337
[119]	training's binary_logloss: 0.0880703	valid_1's binary_logloss: 0.0805129
[120]	training's binary_logloss: 0.0880473	valid_1's bi

[212]	training's binary_logloss: 0.0869335	valid_1's binary_logloss: 0.0799465
[213]	training's binary_logloss: 0.0869269	valid_1's binary_logloss: 0.0799445
[214]	training's binary_logloss: 0.0869214	valid_1's binary_logloss: 0.0799457
[215]	training's binary_logloss: 0.0869123	valid_1's binary_logloss: 0.0799425
[216]	training's binary_logloss: 0.0869004	valid_1's binary_logloss: 0.0799323
[217]	training's binary_logloss: 0.0868924	valid_1's binary_logloss: 0.0799253
[218]	training's binary_logloss: 0.0868852	valid_1's binary_logloss: 0.0799202
[219]	training's binary_logloss: 0.0868788	valid_1's binary_logloss: 0.0799214
[220]	training's binary_logloss: 0.086873	valid_1's binary_logloss: 0.0799141
[221]	training's binary_logloss: 0.0868642	valid_1's binary_logloss: 0.0799128
[222]	training's binary_logloss: 0.0868548	valid_1's binary_logloss: 0.0799135
[223]	training's binary_logloss: 0.0868499	valid_1's binary_logloss: 0.0799118
[224]	training's binary_logloss: 0.0868427	valid_1's 

[319]	training's binary_logloss: 0.0862367	valid_1's binary_logloss: 0.0798018
[320]	training's binary_logloss: 0.0862315	valid_1's binary_logloss: 0.0798023
[321]	training's binary_logloss: 0.0862229	valid_1's binary_logloss: 0.0798026
[322]	training's binary_logloss: 0.0862163	valid_1's binary_logloss: 0.0797973
[323]	training's binary_logloss: 0.0862067	valid_1's binary_logloss: 0.0797919
[324]	training's binary_logloss: 0.0862017	valid_1's binary_logloss: 0.0797907
[325]	training's binary_logloss: 0.0861942	valid_1's binary_logloss: 0.0797847
[326]	training's binary_logloss: 0.0861879	valid_1's binary_logloss: 0.0797846
[327]	training's binary_logloss: 0.0861849	valid_1's binary_logloss: 0.0797818
[328]	training's binary_logloss: 0.086178	valid_1's binary_logloss: 0.0797754
[329]	training's binary_logloss: 0.0861729	valid_1's binary_logloss: 0.0797741
[330]	training's binary_logloss: 0.0861645	valid_1's binary_logloss: 0.0797699
[331]	training's binary_logloss: 0.0861587	valid_1's 

[423]	training's binary_logloss: 0.0857435	valid_1's binary_logloss: 0.0797174
[424]	training's binary_logloss: 0.0857387	valid_1's binary_logloss: 0.079719
[425]	training's binary_logloss: 0.0857334	valid_1's binary_logloss: 0.0797199
[426]	training's binary_logloss: 0.0857285	valid_1's binary_logloss: 0.0797216
[427]	training's binary_logloss: 0.0857238	valid_1's binary_logloss: 0.0797162
[428]	training's binary_logloss: 0.0857183	valid_1's binary_logloss: 0.0797154
[429]	training's binary_logloss: 0.0857098	valid_1's binary_logloss: 0.0797172
[430]	training's binary_logloss: 0.0857044	valid_1's binary_logloss: 0.0797184
[431]	training's binary_logloss: 0.0857017	valid_1's binary_logloss: 0.0797178
[432]	training's binary_logloss: 0.0856975	valid_1's binary_logloss: 0.0797174
[433]	training's binary_logloss: 0.0856931	valid_1's binary_logloss: 0.0797162
[434]	training's binary_logloss: 0.0856911	valid_1's binary_logloss: 0.0797144
[435]	training's binary_logloss: 0.0856885	valid_1's 

[527]	training's binary_logloss: 0.0853133	valid_1's binary_logloss: 0.0797171
[528]	training's binary_logloss: 0.0853115	valid_1's binary_logloss: 0.0797167
[529]	training's binary_logloss: 0.0853056	valid_1's binary_logloss: 0.0797166
[530]	training's binary_logloss: 0.0853039	valid_1's binary_logloss: 0.0797159
[531]	training's binary_logloss: 0.085303	valid_1's binary_logloss: 0.0797176
[532]	training's binary_logloss: 0.0852993	valid_1's binary_logloss: 0.0797166
[533]	training's binary_logloss: 0.0852971	valid_1's binary_logloss: 0.0797219
[534]	training's binary_logloss: 0.0852943	valid_1's binary_logloss: 0.0797236
[535]	training's binary_logloss: 0.0852904	valid_1's binary_logloss: 0.0797205
[536]	training's binary_logloss: 0.0852841	valid_1's binary_logloss: 0.0797197
[537]	training's binary_logloss: 0.0852813	valid_1's binary_logloss: 0.0797247
[538]	training's binary_logloss: 0.0852768	valid_1's binary_logloss: 0.0797254
[539]	training's binary_logloss: 0.0852713	valid_1's 

[632]	training's binary_logloss: 0.0849667	valid_1's binary_logloss: 0.0797025
[633]	training's binary_logloss: 0.0849633	valid_1's binary_logloss: 0.0797043
[634]	training's binary_logloss: 0.084961	valid_1's binary_logloss: 0.079702
[635]	training's binary_logloss: 0.0849594	valid_1's binary_logloss: 0.0797039
[636]	training's binary_logloss: 0.0849581	valid_1's binary_logloss: 0.0797082
[637]	training's binary_logloss: 0.0849548	valid_1's binary_logloss: 0.0797031
[638]	training's binary_logloss: 0.0849507	valid_1's binary_logloss: 0.0797023
[639]	training's binary_logloss: 0.0849479	valid_1's binary_logloss: 0.0797003
[640]	training's binary_logloss: 0.0849444	valid_1's binary_logloss: 0.0796984
[641]	training's binary_logloss: 0.0849422	valid_1's binary_logloss: 0.0796991
[642]	training's binary_logloss: 0.0849375	valid_1's binary_logloss: 0.0796979
[643]	training's binary_logloss: 0.0849343	valid_1's binary_logloss: 0.0796986
[644]	training's binary_logloss: 0.0849301	valid_1's b

[736]	training's binary_logloss: 0.0846548	valid_1's binary_logloss: 0.079706
[737]	training's binary_logloss: 0.0846508	valid_1's binary_logloss: 0.0797088
[738]	training's binary_logloss: 0.0846455	valid_1's binary_logloss: 0.0797089
[739]	training's binary_logloss: 0.0846423	valid_1's binary_logloss: 0.0797084
[740]	training's binary_logloss: 0.0846396	valid_1's binary_logloss: 0.0797108
[741]	training's binary_logloss: 0.0846365	valid_1's binary_logloss: 0.079709
[742]	training's binary_logloss: 0.0846322	valid_1's binary_logloss: 0.079711
[743]	training's binary_logloss: 0.0846319	valid_1's binary_logloss: 0.0797108
[744]	training's binary_logloss: 0.0846311	valid_1's binary_logloss: 0.0797121
[745]	training's binary_logloss: 0.0846276	valid_1's binary_logloss: 0.0797088
[746]	training's binary_logloss: 0.0846252	valid_1's binary_logloss: 0.0797092
[747]	training's binary_logloss: 0.084623	valid_1's binary_logloss: 0.079709
[748]	training's binary_logloss: 0.084619	valid_1's binar

[843]	training's binary_logloss: 0.0843309	valid_1's binary_logloss: 0.0797315
[844]	training's binary_logloss: 0.0843277	valid_1's binary_logloss: 0.0797313
[845]	training's binary_logloss: 0.0843261	valid_1's binary_logloss: 0.0797326
[846]	training's binary_logloss: 0.0843229	valid_1's binary_logloss: 0.0797362
[847]	training's binary_logloss: 0.0843196	valid_1's binary_logloss: 0.0797368
[848]	training's binary_logloss: 0.0843153	valid_1's binary_logloss: 0.07974
[849]	training's binary_logloss: 0.0843133	valid_1's binary_logloss: 0.0797424
[850]	training's binary_logloss: 0.0843099	valid_1's binary_logloss: 0.079741
[851]	training's binary_logloss: 0.0843073	valid_1's binary_logloss: 0.079742
[852]	training's binary_logloss: 0.0843042	valid_1's binary_logloss: 0.0797416
[853]	training's binary_logloss: 0.0843013	valid_1's binary_logloss: 0.0797391
[854]	training's binary_logloss: 0.0842951	valid_1's binary_logloss: 0.0797392
[855]	training's binary_logloss: 0.0842916	valid_1's bin

In [29]:
start_features = ['item_category_list', 'item_price_level', 'item_sales_level', 'item_collected_level', 'item_pv_level', 
                  'user_gender_id', 'user_age_level', 'user_occupation_id', 'user_star_level', 
                  'context_page_id', 'shop_review_num_level', 'shop_review_positive_rate', 
                  'shop_score_service', 'shop_score_delivery', 'hour', 'day', 'user_id_query_day_hour', 
                  'check_item_category_list_ratio', 'check_ratio_day_all', 'check_time_day', 'shop_id', 
                  'item_id_query_day', 'check_shop_id_ratio', 'user_id_query_day_item_brand_id', 
                  'user_id_query_day_hour_item_brand_id', 'check_item_brand_id_ratio', 'user_id_query_day_item_id', 
                  'user_id_query_day', 'item_brand_id',
                 ]
print(len(start_features))
clf = lgbm.LGBMClassifier(random_state=1, num_leaves = 6,
                          n_estimators=5000,max_depth=3,learning_rate = 0.08, n_jobs=30) #008154

for j in ['check_item_category_list_time_day',
       'user_id_query_day_item_pv_level',
       'user_id_query_day_hour_item_pv_level', 'check_item_pv_level_time_day',
       'check_item_pv_level_ratio', 'user_id_query_day_item_sales_level',
       'user_id_query_day_hour_item_sales_level',
       'check_item_sales_level_time_day', 'check_item_sales_level_ratio',
       'user_id_query_day_item_collected_level',
       'user_id_query_day_hour_item_collected_level',
       'check_item_collected_level_time_day',
       'check_item_collected_level_ratio',
       'user_id_query_day_item_price_level',
       'user_id_query_day_hour_item_price_level',
       'check_item_price_level_time_day', 'check_item_price_level_ratio']:
    print(j)        
    clf = run(start_features + [j], label, df, clf,False)

29
check_item_category_list_time_day
check_item_category_list_time_day
0.07963547594419575
check_item_category_list_ratio
check_item_category_list_ratio
0.07953425004195597
user_id_query_day_item_pv_level
user_id_query_day_item_pv_level
0.07969346618514171
user_id_query_day_hour_item_pv_level
user_id_query_day_hour_item_pv_level
0.07963122727396559
check_item_pv_level_time_day
check_item_pv_level_time_day
0.07965062144430499
check_item_pv_level_ratio
check_item_pv_level_ratio
0.0795931804800677
user_id_query_day_item_sales_level
user_id_query_day_item_sales_level
0.07959117665153828
user_id_query_day_hour_item_sales_level
user_id_query_day_hour_item_sales_level
0.07959362835500462
check_item_sales_level_time_day
check_item_sales_level_time_day
0.07964441779301137
check_item_sales_level_ratio
check_item_sales_level_ratio
0.07965598563770808
user_id_query_day_item_collected_level
user_id_query_day_item_collected_level
0.07964523998845008
user_id_query_day_hour_item_collected_level
user_i

best so far: 0.07962037953591194 -> 0.08154

In [None]:
start_features = ['item_category_list', 'item_price_level', 'item_sales_level', 'item_collected_level', 'item_pv_level', 
                  'user_gender_id', 'user_age_level', 'user_occupation_id', 'user_star_level', 
                  'context_page_id', 'shop_review_num_level', 'shop_review_positive_rate', 
                  'shop_score_service', 'shop_score_delivery', 'hour', 'day', 'user_id_query_day_hour', 
                  'check_item_category_list_ratio', 'check_ratio_day_all', 'check_time_day', 'shop_id', 
                  'item_id_query_day', 'check_shop_id_ratio', 'user_id_query_day_item_brand_id', 
                  'user_id_query_day_hour_item_brand_id', 'check_item_brand_id_ratio', 'user_id_query_day_item_id', 
                  'user_id_query_day', 'item_brand_id',
                 ]
print(len(start_features))
clf = lgbm.LGBMClassifier(random_state=1, num_leaves = 6,
                          n_estimators=5000,max_depth=3,learning_rate = 0.08, n_jobs=30) #008154
clf = run(start_features, label, df, clf,True)

29
item_brand_id
[1]	training's binary_logloss: 0.622077	valid_1's binary_logloss: 0.621701
Training until validation scores don't improve for 200 rounds.
[2]	training's binary_logloss: 0.561536	valid_1's binary_logloss: 0.560814
[3]	training's binary_logloss: 0.509395	valid_1's binary_logloss: 0.508354
[4]	training's binary_logloss: 0.464099	valid_1's binary_logloss: 0.462754
[5]	training's binary_logloss: 0.424476	valid_1's binary_logloss: 0.422855
[6]	training's binary_logloss: 0.389616	valid_1's binary_logloss: 0.387733
[7]	training's binary_logloss: 0.358798	valid_1's binary_logloss: 0.356665
[8]	training's binary_logloss: 0.331451	valid_1's binary_logloss: 0.329077
[9]	training's binary_logloss: 0.307103	valid_1's binary_logloss: 0.304497
[10]	training's binary_logloss: 0.28536	valid_1's binary_logloss: 0.282525
[11]	training's binary_logloss: 0.265903	valid_1's binary_logloss: 0.262855
[12]	training's binary_logloss: 0.248458	valid_1's binary_logloss: 0.245197
[13]	training's bi

[108]	training's binary_logloss: 0.0880978	valid_1's binary_logloss: 0.0805642
[109]	training's binary_logloss: 0.0880765	valid_1's binary_logloss: 0.0805616
[110]	training's binary_logloss: 0.0880531	valid_1's binary_logloss: 0.0805416
[111]	training's binary_logloss: 0.0880355	valid_1's binary_logloss: 0.080529
[112]	training's binary_logloss: 0.0880085	valid_1's binary_logloss: 0.0805074
[113]	training's binary_logloss: 0.0879843	valid_1's binary_logloss: 0.080489
[114]	training's binary_logloss: 0.0879623	valid_1's binary_logloss: 0.0804816
[115]	training's binary_logloss: 0.0879419	valid_1's binary_logloss: 0.0804627
[116]	training's binary_logloss: 0.0879262	valid_1's binary_logloss: 0.0804583
[117]	training's binary_logloss: 0.0879047	valid_1's binary_logloss: 0.080451
[118]	training's binary_logloss: 0.0878877	valid_1's binary_logloss: 0.0804344
[119]	training's binary_logloss: 0.0878636	valid_1's binary_logloss: 0.0804188
[120]	training's binary_logloss: 0.0878478	valid_1's bi

[213]	training's binary_logloss: 0.0867946	valid_1's binary_logloss: 0.0799074
[214]	training's binary_logloss: 0.0867852	valid_1's binary_logloss: 0.0799057
[215]	training's binary_logloss: 0.0867798	valid_1's binary_logloss: 0.079903
[216]	training's binary_logloss: 0.0867736	valid_1's binary_logloss: 0.0798969
[217]	training's binary_logloss: 0.0867633	valid_1's binary_logloss: 0.079892
[218]	training's binary_logloss: 0.0867555	valid_1's binary_logloss: 0.0798947
[219]	training's binary_logloss: 0.0867493	valid_1's binary_logloss: 0.0798928
[220]	training's binary_logloss: 0.0867424	valid_1's binary_logloss: 0.0798888
[221]	training's binary_logloss: 0.0867369	valid_1's binary_logloss: 0.0798865
[222]	training's binary_logloss: 0.0867283	valid_1's binary_logloss: 0.0798831
[223]	training's binary_logloss: 0.0867192	valid_1's binary_logloss: 0.0798735
[224]	training's binary_logloss: 0.086713	valid_1's binary_logloss: 0.0798671
[225]	training's binary_logloss: 0.0867082	valid_1's bi

[319]	training's binary_logloss: 0.0861046	valid_1's binary_logloss: 0.079714
[320]	training's binary_logloss: 0.0861001	valid_1's binary_logloss: 0.0797124
[321]	training's binary_logloss: 0.086096	valid_1's binary_logloss: 0.0797142
[322]	training's binary_logloss: 0.0860885	valid_1's binary_logloss: 0.0797182
[323]	training's binary_logloss: 0.0860818	valid_1's binary_logloss: 0.0797148
[324]	training's binary_logloss: 0.0860767	valid_1's binary_logloss: 0.0797072
[325]	training's binary_logloss: 0.0860709	valid_1's binary_logloss: 0.0797053
[326]	training's binary_logloss: 0.0860653	valid_1's binary_logloss: 0.0797041
[327]	training's binary_logloss: 0.0860604	valid_1's binary_logloss: 0.0797053
[328]	training's binary_logloss: 0.0860547	valid_1's binary_logloss: 0.0797058
[329]	training's binary_logloss: 0.0860519	valid_1's binary_logloss: 0.0797025
[330]	training's binary_logloss: 0.0860462	valid_1's binary_logloss: 0.0796995
[331]	training's binary_logloss: 0.0860405	valid_1's b

[425]	training's binary_logloss: 0.0855921	valid_1's binary_logloss: 0.0796424
[426]	training's binary_logloss: 0.0855886	valid_1's binary_logloss: 0.0796382
[427]	training's binary_logloss: 0.085584	valid_1's binary_logloss: 0.0796207
[428]	training's binary_logloss: 0.085579	valid_1's binary_logloss: 0.0796161
[429]	training's binary_logloss: 0.0855748	valid_1's binary_logloss: 0.0796175
[430]	training's binary_logloss: 0.0855706	valid_1's binary_logloss: 0.0796212
[431]	training's binary_logloss: 0.0855656	valid_1's binary_logloss: 0.0796261
[432]	training's binary_logloss: 0.0855618	valid_1's binary_logloss: 0.0796291
[433]	training's binary_logloss: 0.0855597	valid_1's binary_logloss: 0.079628
[434]	training's binary_logloss: 0.0855551	valid_1's binary_logloss: 0.0796283
[435]	training's binary_logloss: 0.0855526	valid_1's binary_logloss: 0.0796307
[436]	training's binary_logloss: 0.0855511	valid_1's binary_logloss: 0.0796318
[437]	training's binary_logloss: 0.0855459	valid_1's bi

In [41]:
df.columns

Index(['instance_id', 'item_id', 'item_category_list', 'item_property_list',
       'item_brand_id', 'item_city_id', 'item_price_level', 'item_sales_level',
       'item_collected_level', 'item_pv_level', 'user_id', 'user_gender_id',
       'user_age_level', 'user_occupation_id', 'user_star_level', 'context_id',
       'context_timestamp', 'context_page_id', 'predict_category_property',
       'shop_id', 'shop_review_num_level', 'shop_review_positive_rate',
       'shop_star_level', 'shop_score_service', 'shop_score_delivery',
       'shop_score_description', 'is_trade', 'hour', 'day',
       'user_id_query_day', 'user_id_query_day_hour', 'item_id_query_day',
       'item_id_query_day_hour', 'shop_id_query_day', 'shop_id_query_day_hour',
       'item_category_list_query_day', 'item_category_list_query_day_hour',
       'item_city_id_query_day', 'item_city_id_query_day_hour',
       'user_gender_id_query_day', 'user_gender_id_query_day_hour',
       'user_age_level_query_day', 'user_age

In [36]:
print(len(np.unique(df.context_page_id)))

20


In [35]:
print(len(df))

478138


In [38]:
df.context_id[:3]

0    7943330529831398173
1    8211008059339466280
2     566319850199326447
Name: context_id, dtype: int64