In [1]:
import os
import pandas as pd
import numpy as np
import datetime as date
import datetime as dt

DATA_ROOT = "./data/ml100marathon-02-01"

In [2]:
#Import Data
off_train = pd.read_csv(os.path.join(DATA_ROOT,'train_offline.csv'))
off_train.columns = ['user_id','merchant_id','coupon_id','discount_rate','distance','date_received','date']
off_train = off_train[~off_train.coupon_id.isna()]
off_train.reset_index(drop=True, inplace=True)

off_test = pd.read_csv(os.path.join(DATA_ROOT,'test_offline.csv'))
off_test.columns  = ['user_id','merchant_id','coupon_id','discount_rate','distance','date_received']


In [3]:
off_train.head()

Unnamed: 0,user_id,merchant_id,coupon_id,discount_rate,distance,date_received,date
0,1439408,2632,8591.0,20:1,0.0,20160217.0,
1,1439408,2632,1078.0,20:1,0.0,20160319.0,
2,1832624,3381,7610.0,200:20,0.0,20160429.0,
3,2029232,3381,11951.0,200:20,1.0,20160129.0,
4,2223968,3381,9776.0,10:5,2.0,20160129.0,


In [4]:
off_train.dtypes

user_id            int64
merchant_id        int64
coupon_id        float64
discount_rate     object
distance         float64
date_received    float64
date             float64
dtype: object

In [5]:
# 按照時間劃分訓練集和測試集
# siding windows
# 本賽題提供用戶在2016年1月1日至2016年5月31日之間真實線下消費行為

#2016-1-1~2016-2-29 trian set(feature extract) , 2016-3-1~2016-3-31 test set
#2016-2-1~2016-3-31 train set(feature extract) , 2016-4-1~2016-4-30 test set
#2016-3-1~2016-4-30 train set(feature extract) , 2016-5-1~2016-5-31 test set

#dataset=>test set，feature=>train set


#數據集3 線上數據中領券和用券日期大於3月1日和小於4月30日的
#将3月15日到4月30日作为数据集提取特征，再测试5月1日到6月15日的数据
#使数据集3等于test集

#dataset裡面只有接收優惠券的記錄的，無消費記錄，可用於預測
#feature裡面存的是優惠券使用日期或接收優惠券的時間介於3月15日到6月30之間的記錄

dataset3 = off_test
feature3 = off_train[(off_train.date>=20160301)&(off_train.date<=20160430)|((pd.isnull(off_train.date)&(off_train.date_received>=20160301)&(off_train.date_received<=20160430)))]
 
#提取數據集2的測試集
#將2月1日到3月31日的作為數據集提取特徵，利用4月1日4月30日的作為測試集

#dataset裡面只存放優惠券接收日期介於4月1日到4月30之間的記錄
#feature裡面存的是優惠券使用日期或接收優惠券的時間介於2月1日到3月31之間的記錄
dataset2 = off_train[((off_train.date_received>=20160401)&(off_train.date_received<=20160430))]
feature2 = off_train[(off_train.date>=20160201)&(off_train.date<=20160331)|(pd.isnull(off_train.date)&(off_train.date_received>=20160201)&(off_train.date_received<=20160331))]

#提取數據集1的測試集
#dataset裡面只存放優惠券接收日期介於3月1日到3月31之間的記錄
#feature裡面存的是優惠券使用日期或接收優惠券的時間介於1月1日到2月29之間的記錄
dataset1 = off_train[(off_train.date_received>=20160301)&(off_train.date_received<=20160331)]
feature1 = off_train[(off_train.date>=20160101)&(off_train.date<=20160229)|(pd.isnull(off_train.date)&(off_train.date_received>=20160101)&(off_train.date_received<=20160229))]

In [6]:
# 提取特徵：
    #用戶領取的所有優惠券數目
    #用戶領取的特定優惠券數目
    #用戶此次之後/前領取的所有優惠券數目
    #用戶此次之後/前領取的特定優惠券數目
    #用戶上/下一次領取的時間間隔
    #用戶領取特定商家的優惠券數目
    #用戶領取的不同商家數目
    #用戶當天領取的優惠券數目
    #用戶當天領取的特定優惠券數目
    #用戶領取的所有優惠券種類數目
    #商家被領取的優惠券數目
    #商家被領取的特定優惠券數目
    #商家被多少不同用戶領取的數目
    #商家發行的所有優惠券種類數目

# 對dataset3進行操作
# 用戶收到的優惠券總和
t = dataset3[['user_id']]
t['this_month_user_received_all_coupon_count'] = 1
 #將t按照用戶id進行分組，然後統計所有用戶收取的優惠券數目,並初始化一個索引值
t = t.groupby('user_id').agg('sum').reset_index()
# 用戶收到特定優惠券的總和
t1 = dataset3[['user_id','coupon_id']]
t1['this_month_user_receive_same_coupon_count'] = 1
t1 = t1.groupby(['user_id','coupon_id']).agg('sum').reset_index()
 
# 用戶此次之前或之後領使用優惠券的時間
# lambda x:':'.join(x)  是添加冒號並在後面去加字符
# 將接收時間的一組按著':'分開，這樣就可以計算接受了優惠券的數量,apply是合併
# 最大接受的日期max_date_received/min_date_received
t2 = dataset3[['user_id','coupon_id','date_received']]
t2.date_received = t2.date_received.astype('str')
t2 = t2.groupby(['user_id','coupon_id'])['date_received'].agg(lambda x:':'.join(x)).reset_index()
t2['receive_number'] = t2.date_received.apply(lambda s:len(s.split(':')))
t2 = t2[t2.receive_number>1]
t2['max_date_received'] = t2.date_received.apply(lambda s:max([float(d) for d in s.split(':')]))
t2['min_date_received'] = t2.date_received.apply(lambda s:min([float(d) for d in s.split(':')]))
t2 = t2[['user_id','coupon_id','max_date_received','min_date_received']]
 
# 將表格中接收優惠券日期中為最近和最遠的日期時置為1其餘為0，若只接受了一次優惠券為-1
# 將兩表融合只保留左表數據,這樣得到的表，相當於保留了最近接收時間和最遠接受時間
t3 = dataset3[['user_id','coupon_id','date_received']]
t3 = pd.merge(t3,t2,on=['user_id','coupon_id'],how='left')
t3['this_month_user_receive_same_coupon_lastone'] = t3.max_date_received - t3.date_received.astype(float)
 
t3['this_month_user_receive_same_coupon_firstone'] = t3.date_received.astype(float) - t3.min_date_received
 
def isfirstlastone(x):
     
    if x == 0:
        return 1
    elif x > 0:
        return 0
    else:
        return -1
# 只接受過一次優惠券為者為 -1
 
t3.this_month_user_receive_same_coupon_lastone = t3.this_month_user_receive_same_coupon_lastone.apply(isfirstlastone)
t3.this_month_user_receive_same_coupon_firstone = t3.this_month_user_receive_same_coupon_firstone.apply(isfirstlastone)
 
# 第四個特徵,一個用戶所接收到的所有優惠券的數量
t4 = dataset3[['user_id','date_received']]
t4['this_day_user_receive_all_coupon_count'] = 1
t4 = t4.groupby(['user_id','date_received']).agg('sum').reset_index()
 
# 提取第五個特徵,一個用戶不同時間所接收到不同優惠券的數量
t5 = dataset3[['user_id','coupon_id','date_received']]
t5['this_day_user_receive_same_coupon_count'] = 1
t5 = t5.groupby(['user_id','coupon_id','date_received']).agg('sum').reset_index()
 
# 一個用戶不同優惠券 的接受時間
t6 = dataset3[['user_id','coupon_id','date_received']]
t6.date_received = t6.date_received.astype('str')
t6 = t6.groupby(['user_id','coupon_id'])['date_received'].agg(lambda x:':'.join(x)).reset_index()
t6.rename(columns ={'date_received':'dates'},inplace = True)
 
# 接收優惠券最近的日子天數
def get_day_gap_before(s):
    date_received,dates = s.split('-')
    dates = dates.split(':')
    
    gaps = []
    for d in dates:
        if len(d) > 4:
            this_gap = (dt.date(int(date_received[1:4]),(int(date_received[4:6])),(int(date_received[6:8]))) - dt.date((int(d[1:4])),(int(d[4:6])),(int(d[6:8])))).days
            if this_gap>0:
                gaps.append(this_gap)
    if len(gaps) == 0:
        return -1
    else:
        return min(gaps)
# 接收優惠券最遠的日子天數
def get_day_gap_after(s):
    date_received,dates = s.split('-')
    dates = dates.split(':')
    gaps = []
    for d in dates:
        if len(d) > 4:
            this_gap = (dt.datetime(int(d[0:4]),int(d[4:6]),int(d[6:8])) - dt.datetime(int(date_received[0:4]),int(date_received[4:6]),int(date_received[6:8]))).days
            if this_gap>0:
                gaps.append(this_gap)
    if len(gaps) == 0:
        return -1
    else:
        return min(gaps)
 
t7 = dataset3[['user_id','coupon_id','date_received']]
t7 = pd.merge(t7,t6,on=['user_id','coupon_id'],how='left')
t7['date_received_date'] = t7.date_received.astype('str') + '-' + t7.dates.astype('str')
 
t7['day_gap_before'] = t7.date_received_date.apply(get_day_gap_before)
t7['day_gap_after']  = t7.date_received_date.apply(get_day_gap_after)
t7 = t7[['user_id','coupon_id','date_received','day_gap_before','day_gap_after']]
# feature3 提取的特徵存入CSV中
other_feature3 = pd.merge(t1,t,on='user_id')
other_feature3 = pd.merge(other_feature3,t3,on=['user_id','coupon_id'])
other_feature3 = pd.merge(other_feature3,t4,on=['user_id','date_received'])
other_feature3 = pd.merge(other_feature3,t5,on=['user_id','coupon_id','date_received'])
other_feature3 = pd.merge(other_feature3,t7,on=['user_id','coupon_id','date_received'])
other_feature3.to_csv('data/ml100marathon-02-01/other_feature3.csv',index=None)
 
 
 
# 處理dataset2
t = dataset2[['user_id']]
t['this_month_user_received_all_coupon_count'] = 1
t = t.groupby('user_id').agg('sum').reset_index()
t1 = dataset2[['user_id','coupon_id']]
t1['this_month_user_receive_same_coupon_count'] = 1
t1 = t1.groupby(['user_id','coupon_id']).agg('sum').reset_index()
t2 = dataset3[['user_id','coupon_id','date_received']]
t2.date_received = t2.date_received.astype('str')
t2 = t2.groupby(['user_id','coupon_id'])['date_received'].agg(lambda x:':'.join(x)).reset_index()
t2['receive_number'] = t2.date_received.apply(lambda s:len(s.split(':')))
t2 = t2[t2.receive_number>1]
t2['max_date_received'] = t2.date_received.apply(lambda s:max([float(d) for d in s.split(':')]))
t2['min_date_received'] = t2.date_received.apply(lambda s:max([float(d) for d in s.split(':')]))
t2 = t2[['user_id','coupon_id','max_date_received','min_date_received']]
 
t3 = dataset2[['user_id','coupon_id','date_received']]
t3 = pd.merge(t3,t2,on=['user_id','coupon_id'],how='left')
t3['this_month_user_receive_same_coupon_lastone'] = t3.max_date_received - t3.date_received.astype('float')
t3['this_month_user_receive_same_coupon_firstone']= t3.date_received.astype('float') - t3.min_date_received
t3.this_month_user_receive_same_coupon_lastone = t3.this_month_user_receive_same_coupon_lastone.apply(isfirstlastone)
t3.this_month_user_receive_same_coupon_firstone= t3.this_month_user_receive_same_coupon_firstone.apply(isfirstlastone)
 
t4 = dataset2[['user_id','date_received']]
t4['this_day_user_receive_all_coupon_count'] = 1
t4 = t4.groupby(['user_id','date_received']).agg('sum').reset_index()
t5 = dataset2[['user_id','coupon_id','date_received']]
t5['this_day_user_receive_same_coupon_count'] = 1
t5 = t5.groupby(['user_id','coupon_id','date_received']).agg('sum').reset_index()
 
t6 = dataset2[['user_id','coupon_id','date_received']]
t6.date_received = t6.date_received.astype('str')
t6 = t6.groupby(['user_id','coupon_id'])['date_received'].agg(lambda x:':'.join(x)).reset_index()
t6.rename(columns={'date_received':'dates'},inplace=True)
 
t7 = dataset2[['user_id','coupon_id','date_received']]
t7 = pd.merge(t7,t6,on=['user_id','coupon_id'],how='left')
t7['date_received_date'] = t7.date_received.astype('str') + '-' + t7.dates
t7['day_gap_before'] = t7.date_received_date.apply(get_day_gap_before)
t7['day_gap_after']  = t7.date_received_date.apply(get_day_gap_before)
t7 = t7[['user_id','coupon_id','date_received','day_gap_before','day_gap_after']]
 
other_feature2 = pd.merge(t1,t,on='user_id')
other_feature2 = pd.merge(other_feature2,t3,on=['user_id','coupon_id'])
other_feature2 = pd.merge(other_feature2,t4,on=['user_id','date_received'])
other_feature2 = pd.merge(other_feature2,t5,on=['user_id','coupon_id','date_received'])
other_feature2 = pd.merge(other_feature2,t7,on=['user_id','coupon_id','date_received'])
other_feature2.to_csv('data/ml100marathon-02-01/other_feature2.csv',index=None)
 
# 處理dataset1
t = dataset1[['user_id']]
t['this_month_user_received_all_coupon_count'] = 1
t = t.groupby('user_id').agg('sum').reset_index()
t1 = dataset1[['user_id','coupon_id']]
t1['this_month_user_receive_same_coupon_count'] = 1
t1 = t1.groupby(['user_id','coupon_id']).agg('sum').reset_index()
t2 = dataset1[['user_id','coupon_id','date_received']]
t2.date_received = t2.date_received.astype('str')
t2 = t2.groupby(['user_id','coupon_id'])['date_received'].agg(lambda x:':'.join(x)).reset_index()
t2['receive_number'] = t2.date_received.apply(lambda s:len(s.split(':')))
t2 = t2[t2.receive_number>1]
t2['max_date_received'] = t2.date_received.apply(lambda s:max([float(d) for d in s.split(':')]))
t2['min_date_received'] = t2.date_received.apply(lambda s:max([float(d) for d in s.split(':')]))
t2 = t2[['user_id','coupon_id','max_date_received','min_date_received']]
 
t3 = dataset1[['user_id','coupon_id','date_received']]
t3 = pd.merge(t3,t2,on=['user_id','coupon_id'],how='left')
t3['this_month_user_receive_same_coupon_lastone'] = t3.max_date_received - t3.date_received.astype('float')
t3['this_month_user_receive_same_coupon_firstone']= t3.date_received.astype('float') - t3.min_date_received
t3.this_month_user_receive_same_coupon_lastone = t3.this_month_user_receive_same_coupon_lastone.apply(isfirstlastone)
t3.this_month_user_receive_same_coupon_firstone= t3.this_month_user_receive_same_coupon_firstone.apply(isfirstlastone)
 
t4 = dataset1[['user_id','date_received']]
t4['this_day_user_receive_all_coupon_count'] = 1
t4 = t4.groupby(['user_id','date_received']).agg('sum').reset_index()
t5 = dataset1[['user_id','coupon_id','date_received']]
t5['this_day_user_receive_same_coupon_count'] = 1
t5 = t5.groupby(['user_id','coupon_id','date_received']).agg('sum').reset_index()
 
t6 = dataset1[['user_id','coupon_id','date_received']]
t6.date_received = t6.date_received.astype('str')
t6 = t6.groupby(['user_id','coupon_id'])['date_received'].agg(lambda x:':'.join(x)).reset_index()
t6.rename(columns={'date_received':'dates'},inplace=True)
 
t7 = dataset1[['user_id','coupon_id','date_received']]
t7 = pd.merge(t7,t6,on=['user_id','coupon_id'],how='left')
t7['date_received_date'] = t7.date_received.astype('str') + '-' + t7.dates
t7['day_gap_before'] = t7.date_received_date.apply(get_day_gap_before)
t7['day_gap_after']  = t7.date_received_date.apply(get_day_gap_before)
t7 = t7[['user_id','coupon_id','date_received','day_gap_before','day_gap_after']]
 
other_feature1 = pd.merge(t1,t,on='user_id')
other_feature1 = pd.merge(other_feature1,t3,on=['user_id','coupon_id'])
other_feature1 = pd.merge(other_feature1,t4,on=['user_id','date_received'])
other_feature1 = pd.merge(other_feature1,t5,on=['user_id','coupon_id','date_received'])
other_feature1 = pd.merge(other_feature1,t7,on=['user_id','coupon_id','date_received'])

other_feature1.to_csv('data/ml100marathon-02-01/other_feature1.csv',index=None)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-

In [7]:
import datetime

#提取優惠券相關特徵
# 統一轉化為打折卷
def calc_discount_rate(s):
    s = str(s)
    s = s.split(':')
    if len(s) == 1:
        return float(s[0])
    else:
        return 1.0-float(s[1])/float(s[0])
def get_discount_man(s):
    s = str(s)
    s = s.split(':')
    if len(s) == 1:
        return 'null'
    else:
        return int(s[0])
def get_discount_jian(s):
    s = str(s)
    s = s.split(':')
    if len(s) == 1:
        return 'null'
    else:
        return int(s[1])
def is_man_jian(s):
    s = str(s)
    s = s.split(':')
    if len(s) == 1:
        return 0
    else:
        return 1
    
def getWeekday(row):
    if (np.isnan(row)) or (row==-1):
        return -1
    else:
        return pd.to_datetime(row, format = "%Y%m%d").dayofweek+1

def getMonth(row):
    if (np.isnan(row)) or (row==-1):
        return -1
    else:
        x = pd.to_datetime(row, format = "%Y%m%d")
        return datetime.datetime.strftime(x, '%m')

def getDaysDistance1(row):
    if (np.isnan(row)) or (row==-1):
        return -1
    else:
        d1 = pd.to_datetime(row, format = "%Y%m%d").date()
        d2 = dt.date(2016,4,30)
        delta = d1 - d2
        return delta.days
    
def getDaysDistance2(row):
    if (np.isnan(row)) or (row==-1):
        return -1
    else:
        d1 = pd.to_datetime(row, format = "%Y%m%d").date()
        d2 = dt.date(2016,3,31)
        delta = d1 - d2
        return delta.days
    
def getDaysDistance3(row):
    if (np.isnan(row)) or (row==-1):
        return -1
    else:
        d1 = pd.to_datetime(row, format = "%Y%m%d").date()
        d2 = dt.date(2016,2,29)
        delta = d1 - d2
        return delta.days
    
# 處理數據集3，處理時間屬性，顯示時間是第幾周
dataset3['day_of_week']  = dataset3.date_received.apply(getWeekday)
dataset3['day_of_month'] = dataset3.date_received.apply(getMonth).astype('int64')
dataset3['days_distance']= dataset3.date_received.apply(getDaysDistance1).astype('int64')
dataset3['discount_man'] = dataset3.discount_rate.apply(get_discount_man)
dataset3['discount_jian']= dataset3.discount_rate.apply(get_discount_jian)
dataset3['is_man_jian']  = dataset3.discount_rate.apply(is_man_jian)
dataset3['discount_rate']= dataset3.discount_rate.apply(calc_discount_rate)
d = dataset3[['coupon_id']]
d['coupon_count'] = 1
d = d.groupby('coupon_id').agg('sum').reset_index()
dataset3 = pd.merge(dataset3,d,on='coupon_id',how='left')
dataset3.to_csv('data/ml100marathon-02-01/coupon3_feature.csv',index=None)
 
# 數據集2
dataset2['day_of_week']  = dataset2.date_received.apply(getWeekday)
dataset2['day_of_month'] = dataset2.date_received.apply(getMonth).astype('int64')
dataset2['days_distance']= dataset2.date_received.apply(getDaysDistance2).astype('int64')
dataset2['discount_man'] = dataset2.discount_rate.apply(get_discount_man)
dataset2['discount_jian']= dataset2.discount_rate.apply(get_discount_jian)
dataset2['is_man_jian']  = dataset2.discount_rate.apply(is_man_jian)
dataset2['discount_rate']= dataset2.discount_rate.apply(calc_discount_rate)
d = dataset2[['coupon_id']]
d['coupon_count'] = 1
d = d.groupby('coupon_id').agg('sum').reset_index()
dataset2 = pd.merge(dataset2,d,on='coupon_id',how='left')
dataset2.to_csv('data/ml100marathon-02-01/coupon2_feature.csv',index=None)
 
# 數據集1
dataset1['day_of_week']  = dataset1.date_received.apply(getWeekday)
dataset1['day_of_month'] = dataset1.date_received.apply(getMonth).astype('int64')
dataset1['days_distance']= dataset1.date_received.apply(getDaysDistance3).astype('int64')
dataset1['discount_man'] = dataset1.discount_rate.apply(get_discount_man)
dataset1['discount_jian']= dataset1.discount_rate.apply(get_discount_jian)
dataset1['is_man_jian']  = dataset1.discount_rate.apply(is_man_jian)
dataset1['discount_rate']= dataset1.discount_rate.apply(calc_discount_rate)
d = dataset1[['coupon_id']]
d['coupon_count'] = 1
d = d.groupby('coupon_id').agg('sum').reset_index()
dataset1 = pd.merge(dataset1,d,on='coupon_id',how='left')
dataset1.to_csv('data/ml100marathon-02-01/coupon1_feature.csv',index=None)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

In [8]:
#提取商戶相關特徵
merchant3 = feature3[['merchant_id','coupon_id','distance','date_received','date']]
t = merchant3[['merchant_id']]
# 刪除重復的行數據
t.drop_duplicates(inplace=True)
# 顯示賣出的商品,以及賣出的數量
# []用來強調條件\或者新建一列並賦值   [[]]用來表示取哪個列來使用
t1 = merchant3[pd.notnull(merchant3.date)][['merchant_id']]
t1['total_sales'] = 1
t1 = t1.groupby('merchant_id').agg('sum').reset_index()
# 顯示使用了優惠券消費的商品，正樣本
t2 = merchant3[(pd.notnull(merchant3.date))&(pd.notnull(merchant3.coupon_id))][['merchant_id']]
t2['sales_use_coupon'] = 1
t2 = t2.groupby('merchant_id').agg('sum').reset_index()
# 提取商品優惠券的總數量
t3 = merchant3[pd.notnull(merchant3.coupon_id)][['merchant_id']]
t3['total_coupon'] = 1
t3 = t3.groupby('merchant_id').agg('sum').reset_index()
# 提取銷量與距離的關係
# 把數據中的空值全部替換為 -1
t4 = merchant3[(pd.notnull(merchant3.date))&(pd.notnull(merchant3.coupon_id))][['merchant_id','distance']]
t4.replace('null',-1,inplace=True)
t4.distance = t4.distance.astype('float')
t4.replace(-1,np.nan,inplace=True)
# 提取用戶和商店距離的最小值
t5 = t4.groupby('merchant_id').agg('min').reset_index()
t5.rename(columns={'distance':'merchant_min_distance'},inplace = True)
# 提取用戶和商店距離的最大值
t6 = t4.groupby('merchant_id').agg('max').reset_index()
t5.rename(columns={'distance':'merchant_max_distance'},inplace = True)
# 提取用戶和商品距離的平均值
t7 = t4.groupby('merchant_id').agg('mean').reset_index()
t7.rename(columns={'distance':'merchant_mean_distance'},inplace = True)
# 提取用戶與商品距離的中位數
t8 = t4.groupby('merchant_id').agg('median').reset_index()
# 把特徵集合入一張表裡
merchant3_feature = pd.merge(t,t1,on='merchant_id',how='left')
merchant3_feature = pd.merge(merchant3_feature,t2,on='merchant_id',how='left')
merchant3_feature = pd.merge(merchant3_feature,t3,on='merchant_id',how='left')
merchant3_feature = pd.merge(merchant3_feature,t5,on='merchant_id',how='left')
merchant3_feature = pd.merge(merchant3_feature,t6,on='merchant_id',how='left')
merchant3_feature = pd.merge(merchant3_feature,t7,on='merchant_id',how='left')
merchant3_feature = pd.merge(merchant3_feature,t8,on='merchant_id',how='left')
# merchant3_feature.head()
# 替換數據中的NAN為0，便於計算優惠券的使用率以及其他信息
# 優惠券的使用率、賣出的商品中使用優惠券的佔比
merchant3_feature.sales_use_coupon = merchant3_feature.sales_use_coupon.replace(np.nan,0)
merchant3_feature['merchant_coupon_transfer_rate'] = merchant3_feature.sales_use_coupon.astype('float') / merchant3_feature.total_sales
merchant3_feature['coupon_rate'] = merchant3_feature.sales_use_coupon.astype('float') / merchant3_feature.total_sales
merchant3_feature.total_coupon = merchant3_feature.total_coupon.replace(np.nan,0)
merchant3_feature.to_csv('data/ml100marathon-02-01/merchant3_feature.csv',index=None)
 
 
#對feature2進行操作
merchant2 = feature2[['merchant_id','coupon_id','distance','date_received','date']]
t = merchant2[['merchant_id']]
# 刪除重復的行數據
t.drop_duplicates(inplace=True)
# 顯示賣出的商品,以及賣出的數量
# []用來強調條件\或者新建一列並賦值   [[]]用來表示取哪個列來使用
t1 = merchant2[pd.notnull(merchant2.date)][['merchant_id']]
t1['total_sales'] = 1
t1 = t1.groupby('merchant_id').agg('sum').reset_index()
# 顯示使用了優惠券消費的商品，正樣本
t2 = merchant2[(pd.notnull(merchant2.date))&(pd.notnull(merchant2.coupon_id))][['merchant_id']]
t2['sales_use_coupon'] = 1
t2 = t2.groupby('merchant_id').agg('sum').reset_index()
# 提取商品優惠券的總數量
t3 = merchant2[pd.notnull(merchant2.coupon_id)][['merchant_id']]
t3['total_coupon'] = 1
t3 = t3.groupby('merchant_id').agg('sum').reset_index()
# 提取銷量與距離的關係
# 把數據中的空值全部替換為 -1
t4 = merchant2[(pd.notnull(merchant2.date))&(pd.notnull(merchant2.coupon_id))][['merchant_id','distance']]
t4.replace('null',-1,inplace=True)
t4.distance = t4.distance.astype('float')
t4.replace(-1,np.nan,inplace=True)
# 提取用戶和商店距離的最小值
t5 = t4.groupby('merchant_id').agg('min').reset_index()
t5.rename(columns={'distance':'merchant_min_distance'},inplace = True)
# 提取用戶和商店距離的最大值
t6 = t4.groupby('merchant_id').agg('max').reset_index()
t5.rename(columns={'distance':'merchant_max_distance'},inplace = True)
# 提取用戶和商品距離的平均值
t7 = t4.groupby('merchant_id').agg('mean').reset_index()
t7.rename(columns={'distance':'merchant_mean_distance'},inplace = True)
# 提取用戶與商品距離的中位數
t8 = t4.groupby('merchant_id').agg('median').reset_index()
# 把特徵集合入一張表裡
merchant2_feature = pd.merge(t,t1,on='merchant_id',how='left')
merchant2_feature = pd.merge(merchant2_feature,t2,on='merchant_id',how='left')
merchant2_feature = pd.merge(merchant2_feature,t3,on='merchant_id',how='left')
merchant2_feature = pd.merge(merchant2_feature,t5,on='merchant_id',how='left')
merchant2_feature = pd.merge(merchant2_feature,t6,on='merchant_id',how='left')
merchant2_feature = pd.merge(merchant2_feature,t7,on='merchant_id',how='left')
merchant2_feature = pd.merge(merchant2_feature,t8,on='merchant_id',how='left')
# merchant3_feature.head()
# 替換數據中的NAN為0，便於計算優惠券的使用率以及其他信息
# 優惠券的使用率、賣出的商品中使用優惠券的佔比
merchant2_feature.sales_use_coupon = merchant2_feature.sales_use_coupon.replace(np.nan,0)
merchant2_feature['merchant_coupon_transfer_rate'] = merchant2_feature.sales_use_coupon.astype('float') / merchant2_feature.total_sales
merchant2_feature['coupon_rate'] = merchant2_feature.sales_use_coupon.astype('float') / merchant2_feature.total_sales
merchant2_feature.total_coupon = merchant2_feature.total_coupon.replace(np.nan,0)
merchant2_feature.to_csv('data/ml100marathon-02-01/merchant2_feature.csv',index=None)
 
 
 
# 對feature1進行操作
merchant1 = feature1[['merchant_id','coupon_id','distance','date_received','date']]
t = merchant1[['merchant_id']]
# 刪除重復的行數據
t.drop_duplicates(inplace=True)
# 顯示賣出的商品,以及賣出的數量
# []用來強調條件\或者新建一列並賦值   [[]]用來表示取哪個列來使用
t1 = merchant1[pd.notnull(merchant1.date)][['merchant_id']]
t1['total_sales'] = 1
t1 = t1.groupby('merchant_id').agg('sum').reset_index()
# 顯示使用了優惠券消費的商品，正樣本
t2 = merchant1[(pd.notnull(merchant1.date))&(pd.notnull(merchant1.coupon_id))][['merchant_id']]
t2['sales_use_coupon'] = 1
t2 = t2.groupby('merchant_id').agg('sum').reset_index()
# 提取商品優惠券的總數量
t3 = merchant1[pd.notnull(merchant1.coupon_id)][['merchant_id']]
t3['total_coupon'] = 1
t3 = t3.groupby('merchant_id').agg('sum').reset_index()
# 提取銷量與距離的關係
# 把數據中的空值全部替換為 -1
t4 = merchant1[(pd.notnull(merchant1.date))&(pd.notnull(merchant1.coupon_id))][['merchant_id','distance']]
t4.replace('null',-1,inplace=True)
t4.distance = t4.distance.astype('float')
t4.replace(-1,np.nan,inplace=True)
# 提取用戶和商店距離的最小值
t5 = t4.groupby('merchant_id').agg('min').reset_index()
t5.rename(columns={'distance':'merchant_min_distance'},inplace = True)
# 提取用戶和商店距離的最大值
t6 = t4.groupby('merchant_id').agg('max').reset_index()
t5.rename(columns={'distance':'merchant_max_distance'},inplace = True)
# 提取用戶和商品距離的平均值
t7 = t4.groupby('merchant_id').agg('mean').reset_index()
t7.rename(columns={'distance':'merchant_mean_distance'},inplace = True)
# 提取用戶與商品距離的中位數
t8 = t4.groupby('merchant_id').agg('median').reset_index()
# 把特徵集合入一張表裡
merchant1_feature = pd.merge(t,t1,on='merchant_id',how='left')
merchant1_feature = pd.merge(merchant1_feature,t2,on='merchant_id',how='left')
merchant1_feature = pd.merge(merchant1_feature,t3,on='merchant_id',how='left')
merchant1_feature = pd.merge(merchant1_feature,t5,on='merchant_id',how='left')
merchant1_feature = pd.merge(merchant1_feature,t6,on='merchant_id',how='left')
merchant1_feature = pd.merge(merchant1_feature,t7,on='merchant_id',how='left')
merchant1_feature = pd.merge(merchant1_feature,t8,on='merchant_id',how='left')
# merchant3_feature.head()
# 替換數據中的NAN為0，便於計算優惠券的使用率以及其他信息
# 優惠券的使用率、賣出的商品中使用優惠券的佔比
merchant1_feature.sales_use_coupon = merchant1_feature.sales_use_coupon.replace(np.nan,0)
merchant1_feature['merchant_coupon_transfer_rate'] = merchant1_feature.sales_use_coupon.astype('float') / merchant1_feature.total_sales
merchant1_feature['coupon_rate'] = merchant1_feature.sales_use_coupon.astype('float') / merchant1_feature.total_sales
merchant1_feature.total_coupon = merchant1_feature.total_coupon.replace(np.nan,0)
merchant1_feature.to_csv('data/ml100marathon-02-01/merchant1_feature.csv',index=None)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [9]:
#提取用戶的相關特徵
#用戶的相關信息：
    #count_merchant
    #user_avg_distance,user_min_distance,user_max_distance
    #buy_use_coupon,buy_total,coupon_received
    #buy_use_coupon/coupon_received
    #buy_use_coupon/buy_total
    #user_date_datereceived_gap

def get_user_date_datereceived_gap(s):
    s = s.split(':')
    if s[0]=='null':
        return 0
    if s[0]=='nan' :
        return 0
    if s[1]=='null':
        return 0
    if s[1]=='nan' :
        return 0
    else:
        return 0
        #return(dt.date(int(s[0][0:4]),int(s[0][4:6]),int(s[0][6:8])) - dt.date(int(s[1][0:4]),int(s[1][4:6]),int(s[1][6:8]))).days


# 數據集3的處理
user3 = feature3[['user_id','merchant_id','coupon_id','discount_rate','distance','date_received','date']]
t = user3[['user_id']]
# 去掉數據中重復的用戶ID
t.drop_duplicates(inplace=True)
# 用戶購買商品的種類
t1 = user3[pd.notnull(user3.date)][['user_id','merchant_id']]
t1.drop_duplicates(inplace=True)
t1.merchant_id = 1
t1 = t1.groupby('user_id').agg('sum').reset_index()
t1.rename(columns={'merchant_id':'count_merchant'},inplace=True)
# 使用了優惠券購買商品的用戶id和距離
t2 = user3[(pd.notnull(user3.date))&(pd.notnull(user3.coupon_id))][['user_id','distance']]
t2.replace('null',-1,inplace=True)
t2.distance = t2.distance.astype('float')
t2.replace(-1,np.nan,inplace=True)
# 得到使用優惠券購買商品的用戶距商店的最短距離
t3 = t2.groupby('user_id').agg('min').reset_index()
t3.rename(columns={'distance':'user_min_dsitance'},inplace=True)
# 最大距離
t4 = t2.groupby('user_id').agg('max').reset_index()
t4.rename(columns={'distance':'user_max_distance'},inplace=True)
# 平均距離
t5 = t2.groupby('user_id').agg('mean').reset_index()
t5.rename(columns={'distance':'user_mean_distance'},inplace=True)
# 中位數距離
t6 = t2.groupby('user_id').agg('median').reset_index()
t6.rename(columns={'distance':'user_median_distance'},inplace=True)
# 每個用戶使用優惠券購買的商品數量
t7 = user3[(pd.notnull(user3.date))&(pd.notnull(user3.coupon_id))][['user_id']]
t7['buy_use_coupon'] = 1
t7 = t7.groupby('user_id').agg('sum').reset_index()
# 購買商品的總數
t8 = user3[pd.notnull(user3.date)][['user_id']]
t8['buy_total'] = 1
t8 = t8.groupby('user_id').agg('sum').reset_index()
# 接收優惠券的總數
t9 = user3[pd.notnull(user3.coupon_id)][['user_id']]
t9['coupon_received'] = 1
t9 = t9.groupby('user_id').agg('sum').reset_index()
# 收到優惠券的日期和使用之間的距離
t10 = user3[(pd.notnull(user3.date_received))&(pd.notnull(user3.date))][['user_id','date_received','date']]
t10['user_date_datereceived_gap'] = t10.date.to_string()+':'+t10.date_received.to_string()
t10.user_date_datereceived_gap = t10.user_date_datereceived_gap.apply(get_user_date_datereceived_gap)
t10 = t10[['user_id','user_date_datereceived_gap']]
# 將用戶優惠券使用時間的間隔取平均值
t11 = t10.groupby('user_id').agg('mean').reset_index()
t11.rename(columns={'user_date_datereceived_gap':'avg_user_date_datereceived_gap'},inplace=True)
# 間隔天數的最小值
t12 = t10.groupby('user_id').agg('min').reset_index()
t12.rename(columns={'user_date_datereceived_gap':'min_user_date_datereceived_gap'},inplace=True)
# 間隔天數的最大值
t13 = t10.groupby('user_id').agg('max').reset_index()
t13.rename(columns={'user_date_datereceived_gap':'max_user_date_datereceived_gap'},inplace=True)
# 合併特徵
user3_feature = pd.merge(t,t1,on='user_id',how='left')
user3_feature = pd.merge(user3_feature,t3,on='user_id',how='left')
user3_feature = pd.merge(user3_feature,t4,on='user_id',how='left')
user3_feature = pd.merge(user3_feature,t5,on='user_id',how='left')
user3_feature = pd.merge(user3_feature,t6,on='user_id',how='left')
user3_feature = pd.merge(user3_feature,t7,on='user_id',how='left')
user3_feature = pd.merge(user3_feature,t8,on='user_id',how='left')
user3_feature = pd.merge(user3_feature,t9,on='user_id',how='left')
user3_feature = pd.merge(user3_feature,t11,on='user_id',how='left')
user3_feature = pd.merge(user3_feature,t12,on='user_id',how='left')
user3_feature = pd.merge(user3_feature,t13,on='user_id',how='left')
user3_feature.count_merchant = user3_feature.count_merchant.replace(np.nan,0)
user3_feature.buy_user_coupon = user3_feature.buy_use_coupon.replace(np.nan,0)
user3_feature['buy_use_coupon_rate'] = user3_feature.buy_use_coupon.astype('float')/user3_feature.buy_total.astype('float')
user3_feature['user_coupon_transfer_rate'] = user3_feature.buy_use_coupon.astype('float')/user3_feature.buy_use_coupon.astype('float')
user3_feature.buy_total = user3_feature.buy_total.replace(np.nan,0)
user3_feature.coupon_received = user3_feature.coupon_received.replace(np.nan,0)
user3_feature.to_csv('data/ml100marathon-02-01/user3_feature.csv',index=None)
 
 
# 對數據集faeture2操作
user2 = feature2[['user_id','merchant_id','coupon_id','discount_rate','distance','date_received','date']]
t = user2[['user_id']]
# 去掉數據中重復的用戶ID
t.drop_duplicates(inplace=True)
# 用戶購買商品的種類
t1 = user2[pd.notnull(user2.date)][['user_id','merchant_id']]
t1.drop_duplicates(inplace=True)
t1.merchant_id = 1
t1 = t1.groupby('user_id').agg('sum').reset_index()
t1.rename(columns={'merchant_id':'count_merchant'},inplace=True)
# 使用了優惠券購買商品的用戶id和距離
t2 = user2[(pd.notnull(user2.date))&(pd.notnull(user2.coupon_id))][['user_id','distance']]
t2.replace('null',-1,inplace=True)
t2.distance = t2.distance.astype('float')
t2.replace(-1,np.nan,inplace=True)
# 得到使用優惠券購買商品的用戶距商店的最短距離
t3 = t2.groupby('user_id').agg('min').reset_index()
t3.rename(columns={'distance':'user_min_dsitance'},inplace=True)
# 最大距離
t4 = t2.groupby('user_id').agg('max').reset_index()
t4.rename(columns={'distance':'user_max_distance'},inplace=True)
# 平均距離
t5 = t2.groupby('user_id').agg('mean').reset_index()
t5.rename(columns={'distance':'user_mean_distance'},inplace=True)
# 中位數距離
t6 = t2.groupby('user_id').agg('median').reset_index()
t6.rename(columns={'distance':'user_median_distance'},inplace=True)
# 每個用戶使用優惠券購買的商品數量
t7 = user2[(pd.notnull(user2.date))&(pd.notnull(user2.coupon_id))][['user_id']]
t7['buy_use_coupon'] = 1
t7 = t7.groupby('user_id').agg('sum').reset_index()
# 購買商品的總數
t8 = user2[pd.notnull(user2.date)][['user_id']]
t8['buy_total'] = 1
t8 = t8.groupby('user_id').agg('sum').reset_index()
# 接收優惠券的總數
t9 = user2[pd.notnull(user2.coupon_id)][['user_id']]
t9['coupon_received'] = 1
t9 = t9.groupby('user_id').agg('sum').reset_index()
# 收到優惠券的日期和使用之間的距離
t10 = user2[(pd.notnull(user2.date_received))&(pd.notnull(user2.date))][['user_id','date_received','date']]
t10['user_date_datereceived_gap'] = t10.date.to_string()+':'+t10.date_received.to_string()
t10.user_date_datereceived_gap = t10.user_date_datereceived_gap.apply(get_user_date_datereceived_gap)
t10 = t10[['user_id','user_date_datereceived_gap']]
# 將用戶優惠券使用時間的間隔取平均值
t11 = t10.groupby('user_id').agg('mean').reset_index()
t11.rename(columns={'user_date_datereceived_gap':'avg_user_date_datereceived_gap'},inplace=True)
# 間隔天數的最小值
t12 = t10.groupby('user_id').agg('min').reset_index()
t12.rename(columns={'user_date_datereceived_gap':'min_user_date_datereceived_gap'},inplace=True)
# 間隔天數的最大值
t13 = t10.groupby('user_id').agg('max').reset_index()
t13.rename(columns={'user_date_datereceived_gap':'max_user_date_datereceived_gap'},inplace=True)
# 合併特徵
user2_feature = pd.merge(t,t1,on='user_id',how='left')
user2_feature = pd.merge(user2_feature,t3,on='user_id',how='left')
user2_feature = pd.merge(user2_feature,t4,on='user_id',how='left')
user2_feature = pd.merge(user2_feature,t5,on='user_id',how='left')
user2_feature = pd.merge(user2_feature,t6,on='user_id',how='left')
user2_feature = pd.merge(user2_feature,t7,on='user_id',how='left')
user2_feature = pd.merge(user2_feature,t8,on='user_id',how='left')
user2_feature = pd.merge(user2_feature,t9,on='user_id',how='left')
user2_feature = pd.merge(user2_feature,t11,on='user_id',how='left')
user2_feature = pd.merge(user2_feature,t12,on='user_id',how='left')
user2_feature = pd.merge(user2_feature,t13,on='user_id',how='left')
user2_feature.count_merchant = user2_feature.count_merchant.replace(np.nan,0)
user2_feature.buy_user_coupon = user2_feature.buy_use_coupon.replace(np.nan,0)
user2_feature['buy_use_coupon_rate'] = user2_feature.buy_use_coupon.astype('float')/user2_feature.buy_total.astype('float')
user2_feature['user_coupon_transfer_rate'] = user2_feature.buy_use_coupon.astype('float')/user2_feature.buy_use_coupon.astype('float')
user2_feature.buy_total = user2_feature.buy_total.replace(np.nan,0)
user2_feature.coupon_received = user2_feature.coupon_received.replace(np.nan,0)
user2_feature.to_csv('data/ml100marathon-02-01/user2_feature.csv',index=None)
 
 
# 對數據集dataset1操作
user1 = feature1[['user_id','merchant_id','coupon_id','discount_rate','distance','date_received','date']]
t = user1[['user_id']]
# 去掉數據中重復的用戶ID
t.drop_duplicates(inplace=True)
# 用戶購買商品的種類
t1 = user1[pd.notnull(user1.date)][['user_id','merchant_id']]
t1.drop_duplicates(inplace=True)
t1.merchant_id = 1
t1 = t1.groupby('user_id').agg('sum').reset_index()
t1.rename(columns={'merchant_id':'count_merchant'},inplace=True)
# 使用了優惠券購買商品的用戶id和距離
t2 = user1[(pd.notnull(user1.date))&(pd.notnull(user1.coupon_id))][['user_id','distance']]
t2.replace('null',-1,inplace=True)
t2.distance = t2.distance.astype('float')
t2.replace(-1,np.nan,inplace=True)
# 得到使用優惠券購買商品的用戶距商店的最短距離
t3 = t2.groupby('user_id').agg('min').reset_index()
t3.rename(columns={'distance':'user_min_dsitance'},inplace=True)
# 最大距離
t4 = t2.groupby('user_id').agg('max').reset_index()
t4.rename(columns={'distance':'user_max_distance'},inplace=True)
# 平均距離
t5 = t2.groupby('user_id').agg('mean').reset_index()
t5.rename(columns={'distance':'user_mean_distance'},inplace=True)
# 中位數距離
t6 = t2.groupby('user_id').agg('median').reset_index()
t6.rename(columns={'distance':'user_median_distance'},inplace=True)
# 每個用戶使用優惠券購買的商品數量
t7 = user1[(pd.notnull(user1.date))&(pd.notnull(user1.coupon_id))][['user_id']]
t7['buy_use_coupon'] = 1
t7 = t7.groupby('user_id').agg('sum').reset_index()
# 購買商品的總數
t8 = user1[pd.notnull(user1.date)][['user_id']]
t8['buy_total'] = 1
t8 = t8.groupby('user_id').agg('sum').reset_index()
# 接收優惠券的總數
t9 = user1[pd.notnull(user1.coupon_id)][['user_id']]
t9['coupon_received'] = 1
t9 = t9.groupby('user_id').agg('sum').reset_index()
# 收到優惠券的日期和使用之間的距離
t10 = user1[(pd.notnull(user1.date_received))&(pd.notnull(user1.date))][['user_id','date_received','date']]
t10['user_date_datereceived_gap'] = t10.date.to_string()+':'+t10.date_received.to_string()
t10.user_date_datereceived_gap = t10.user_date_datereceived_gap.apply(get_user_date_datereceived_gap)
t10 = t10[['user_id','user_date_datereceived_gap']]
# 將用戶優惠券使用時間的間隔取平均值
t11 = t10.groupby('user_id').agg('mean').reset_index()
t11.rename(columns={'user_date_datereceived_gap':'avg_user_date_datereceived_gap'},inplace=True)
# 間隔天數的最小值
t12 = t10.groupby('user_id').agg('min').reset_index()
t12.rename(columns={'user_date_datereceived_gap':'min_user_date_datereceived_gap'},inplace=True)
# 間隔天數的最大值
t13 = t10.groupby('user_id').agg('max').reset_index()
t13.rename(columns={'user_date_datereceived_gap':'max_user_date_datereceived_gap'},inplace=True)
# 合併特徵
user1_feature = pd.merge(t,t1,on='user_id',how='left')
user1_feature = pd.merge(user1_feature,t3,on='user_id',how='left')
user1_feature = pd.merge(user1_feature,t4,on='user_id',how='left')
user1_feature = pd.merge(user1_feature,t5,on='user_id',how='left')
user1_feature = pd.merge(user1_feature,t6,on='user_id',how='left')
user1_feature = pd.merge(user1_feature,t7,on='user_id',how='left')
user1_feature = pd.merge(user1_feature,t8,on='user_id',how='left')
user1_feature = pd.merge(user1_feature,t9,on='user_id',how='left')
user1_feature = pd.merge(user1_feature,t11,on='user_id',how='left')
user1_feature = pd.merge(user1_feature,t12,on='user_id',how='left')
user1_feature = pd.merge(user1_feature,t13,on='user_id',how='left')
user1_feature.count_merchant = user1_feature.count_merchant.replace(np.nan,0)
user1_feature.buy_user_coupon = user1_feature.buy_use_coupon.replace(np.nan,0)
user1_feature['buy_use_coupon_rate'] = user1_feature.buy_use_coupon.astype('float')/user1_feature.buy_total.astype('float')
user1_feature['user_coupon_transfer_rate'] = user1_feature.buy_use_coupon.astype('float')/user1_feature.buy_use_coupon.astype('float')
user1_feature.buy_total = user1_feature.buy_total.replace(np.nan,0)
user1_feature.coupon_received = user1_feature.coupon_received.replace(np.nan,0)
user1_feature.to_csv('data/ml100marathon-02-01/user1_feature.csv',index=None)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [10]:
# 用戶和商店之間聯繫的特徵
# 對數據集feature3進行處理
 
# 用戶和商店之間聯繫的特徵
all_user_merchant = feature3[['user_id','merchant_id']]
all_user_merchant.drop_duplicates(inplace=True)
# 只保留銷售了商品的商戶id
t = feature3[['user_id','merchant_id','date']]
t = t[pd.notnull(t.date)][['user_id','merchant_id']]
# 用戶一共買了特定商戶多少商品
t['user_merchant_buy_total'] = 1
t = t.groupby(['user_id','merchant_id']).agg('sum').reset_index()
t.drop_duplicates(inplace=True)
t1 = feature3[['user_id','merchant_id','coupon_id']]
t1 = t1[pd.notnull(t1.coupon_id)][['user_id','merchant_id']]
# 用戶一共收到一個商戶的優惠劵數目
t['user_merchant_received'] = 1
t1 = t1.groupby(['user_id','merchant_id']).agg('sum').reset_index()
t1.drop_duplicates(inplace = True)
 
t2 = feature3[['user_id','merchant_id','date','date_received']]
t2 = t2[(pd.notnull(t2.date))&(pd.notnull(t2.date_received))][['user_id','merchant_id']]
# 用戶在一家商戶使用優惠券購買的商品數目
t2['user_merchant_buy_use_coupon'] = 1
t2 = t2.groupby(['user_id','merchant_id']).agg('sum').reset_index()
t2.drop_duplicates(inplace = True)
# 用戶在一家商家的說有記錄總數
t3 = feature3[['user_id','merchant_id']]
t3['user_merchant_any'] = 1
t3 = t3.groupby(['user_id','merchant_id']).agg('sum').reset_index()
t3.drop_duplicates(inplace=True)
# 用戶未使用優惠券購買的商品數目
t4 = feature3[['user_id','merchant_id','date','coupon_id']]
t4 = t4[(pd.notnull(t4.date))&(pd.isnull(t4.coupon_id))][['user_id','merchant_id']]
t4['user_merchant_buy_common'] = 1
t4 = t4.groupby(['user_id','merchant_id']).agg('sum').reset_index()
t4.drop_duplicates(inplace = True)
user_merchant3 = pd.merge(all_user_merchant,t,on=['user_id','merchant_id'],how='left')
user_merchant3 = pd.merge(user_merchant3,t1,on=['user_id','merchant_id'],how='left')
user_merchant3 = pd.merge(user_merchant3,t2,on=['user_id','merchant_id'],how='left')
user_merchant3 = pd.merge(user_merchant3,t3,on=['user_id','merchant_id'],how='left')
user_merchant3 = pd.merge(user_merchant3,t4,on=['user_id','merchant_id'],how='left')
# 都是針對一個商戶和一個用戶
user_merchant3['user_merchant_coupon_transfer_rate'] = user_merchant3.user_merchant_buy_use_coupon.astype('float') / user_merchant3.user_merchant_received.astype('float')
user_merchant3['user_merchant_coupon_buy_rate'] = user_merchant3.user_merchant_buy_use_coupon.astype('float')/user_merchant3.user_merchant_buy_total.astype('float')
user_merchant3['user_merchant_rate'] = user_merchant3.user_merchant_buy_total.astype('float')/user_merchant3.user_merchant_any.astype('float')
user_merchant3['user_merchant_common_buy_rate'] = user_merchant3.user_merchant_buy_common.astype('float')/user_merchant3.user_merchant_buy_total.astype('float')
user_merchant3.to_csv('data/ml100marathon-02-01/user_merchant3.csv',index=None)
 
 
# 對於數據集feature2
all_user_merchant = feature2[['user_id','merchant_id']]
all_user_merchant.drop_duplicates(inplace=True)
t = feature2[['user_id','merchant_id','date']]
t = t[pd.notnull(t.date)][['user_id','merchant_id']]
t['user_merchant_buy_total'] = 1
t = t.groupby(['user_id','merchant_id']).agg('sum').reset_index()
t.drop_duplicates(inplace=True)
t1 = feature2[['user_id','merchant_id','coupon_id']]
t1 = t1[pd.notnull(t1.coupon_id)][['user_id','merchant_id']]
t1['user_merchant_received'] = 1
t1 = t1.groupby(['user_id','merchant_id']).agg('sum').reset_index()
t1.drop_duplicates(inplace = True)
 
t2 = feature2[['user_id','merchant_id','date','date_received']]
t2 = t2[(pd.notnull(t2.date))&(pd.notnull(t2.date_received))][['user_id','merchant_id']]
t2['user_merchant_buy_use_coupon'] = 1
t2 = t2.groupby(['user_id','merchant_id']).agg('sum').reset_index()
t2.drop_duplicates(inplace=True)
 
t3 = feature2[['user_id','merchant_id']]
t3['user_merchant_any'] = 1
t3 = t3.groupby(['user_id','merchant_id']).agg('sum').reset_index()
t3.drop_duplicates(inplace=True)
 
t4 = feature2[['user_id','merchant_id','date','coupon_id']]
t4 = t4[(pd.notnull(t4.date))&(pd.isnull(t4.coupon_id))][['user_id','merchant_id']]
t4['user_merchant_buy_common'] = 1
t4 = t4.groupby(['user_id','merchant_id']).agg('sum').reset_index()
t4.drop_duplicates(inplace=True)
 
user_merchant2 = pd.merge(all_user_merchant,t,on=['user_id','merchant_id'],how='left')
user_merchant2 = pd.merge(user_merchant2,t1,on=['user_id','merchant_id'],how='left')
user_merchant2 = pd.merge(user_merchant2,t2,on=['user_id','merchant_id'],how='left')
user_merchant2 = pd.merge(user_merchant2,t3,on=['user_id','merchant_id'],how='left')
user_merchant2 = pd.merge(user_merchant2,t4,on=['user_id','merchant_id'],how='left')
 
user_merchant2.user_merchant_buy_use_coupon = user_merchant2.user_merchant_buy_use_coupon.replace(np.nan,0)
user_merchant2.user_merchant_buy_common = user_merchant2.user_merchant_buy_common.replace(np.nan,0)
 
user_merchant2['user_merchant_coupon_transfer_rate'] = user_merchant2.user_merchant_buy_use_coupon.astype('float')/user_merchant2.user_merchant_received.astype('float')
user_merchant2['user_merchant_coupon_buy_rate'] = user_merchant2.user_merchant_buy_use_coupon.astype('float')/user_merchant2.user_merchant_buy_total.astype('float')
user_merchant2['user_merchant_rate'] = user_merchant2.user_merchant_buy_total.astype('float')/user_merchant2.user_merchant_any.astype('float')
user_merchant2['user_merchant_common_buy_rate'] = user_merchant2.user_merchant_buy_common.astype('float')/user_merchant2.user_merchant_buy_total.astype('float')
user_merchant2.to_csv('data/ml100marathon-02-01/user_merchant2.csv',index=None)
 
 
# 對於數據集feature1
all_user_merchant = feature1[['user_id','merchant_id']]
all_user_merchant.drop_duplicates(inplace=True)
t = feature1[['user_id','merchant_id','date']]
t = t[pd.notnull(t.date)][['user_id','merchant_id']]
t['user_merchant_buy_total'] = 1
t = t.groupby(['user_id','merchant_id']).agg('sum').reset_index()
t.drop_duplicates(inplace=True)
t1 = feature1[['user_id','merchant_id','coupon_id']]
t1 = t1[pd.notnull(t1.coupon_id)][['user_id','merchant_id']]
t1['user_merchant_received'] = 1
t1 = t1.groupby(['user_id','merchant_id']).agg('sum').reset_index()
t1.drop_duplicates(inplace = True)
 
t2 = feature1[['user_id','merchant_id','date','date_received']]
t2 = t2[(pd.notnull(t2.date))&(pd.notnull(t2.date_received))][['user_id','merchant_id']]
t2['user_merchant_buy_use_coupon'] = 1
t2 = t2.groupby(['user_id','merchant_id']).agg('sum').reset_index()
t2.drop_duplicates(inplace=True)
 
t3 = feature1[['user_id','merchant_id']]
t3['user_merchant_any'] = 1
t3 = t3.groupby(['user_id','merchant_id']).agg('sum').reset_index()
t3.drop_duplicates(inplace=True)
 
t4 = feature1[['user_id','merchant_id','date','coupon_id']]
t4 = t4[(pd.notnull(t4.date))&(pd.isnull(t4.coupon_id))][['user_id','merchant_id']]
t4['user_merchant_buy_common'] = 1
t4 = t4.groupby(['user_id','merchant_id']).agg('sum').reset_index()
t4.drop_duplicates(inplace=True)
 
user_merchant1 = pd.merge(all_user_merchant,t,on=['user_id','merchant_id'],how='left')
user_merchant1 = pd.merge(user_merchant1,t1,on=['user_id','merchant_id'],how='left')
user_merchant1 = pd.merge(user_merchant1,t2,on=['user_id','merchant_id'],how='left')
user_merchant1 = pd.merge(user_merchant1,t3,on=['user_id','merchant_id'],how='left')
user_merchant1 = pd.merge(user_merchant1,t4,on=['user_id','merchant_id'],how='left')
 
user_merchant1.user_merchant_buy_use_coupon = user_merchant1.user_merchant_buy_use_coupon.replace(np.nan,0)
user_merchant1.user_merchant_buy_common = user_merchant1.user_merchant_buy_common.replace(np.nan,0)
 
user_merchant1['user_merchant_coupon_transfer_rate'] = user_merchant1.user_merchant_buy_use_coupon.astype('float')/user_merchant1.user_merchant_received.astype('float')
user_merchant1['user_merchant_coupon_buy_rate'] = user_merchant1.user_merchant_buy_use_coupon.astype('float')/user_merchant1.user_merchant_buy_total.astype('float')
user_merchant1['user_merchant_rate'] = user_merchant1.user_merchant_buy_total.astype('float')/user_merchant1.user_merchant_any.astype('float')
user_merchant1['user_merchant_common_buy_rate'] = user_merchant1.user_merchant_buy_common.astype('float')/user_merchant1.user_merchant_buy_total.astype('float')
user_merchant1.to_csv('data/ml100marathon-02-01/user_merchant1.csv',index=None)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.o

In [28]:
# dataset1,2,3分別是劃分時間滑窗後的所提取的特徵的組合，方便接下來的劃分訓練集和測試集
# 此次合併後dataset1,2,3的特徵類型是一樣的，然後給dataset1,2添加標籤，標籤是get_label(s)這個函數所生成的
def get_label(row):
    if np.isnan(row['date_received']):
        return -1
    if not np.isnan(row['date']):
        td = pd.to_datetime(row['date'], format='%Y%m%d') -  pd.to_datetime(row['date_received'], format='%Y%m%d')
        if td <= pd.Timedelta(15, 'D'):
            return 1
    return 0   
 
coupon3 = pd.read_csv('data/ml100marathon-02-01/coupon3_feature.csv')
merchant3 = pd.read_csv('data/ml100marathon-02-01/merchant3_feature.csv')
user3 = pd.read_csv('data/ml100marathon-02-01/user3_feature.csv')
other_feature3 = pd.read_csv('data/ml100marathon-02-01/other_feature3.csv')
user_merchant3 = pd.read_csv('data/ml100marathon-02-01/user_merchant3.csv')
dataset3 = pd.merge(coupon3,merchant3,on='merchant_id',how='left')
dataset3 = pd.merge(dataset3,user3,on='user_id',how='left')
dataset3 = pd.merge(dataset3,user_merchant3,on=['user_id','merchant_id'],how='left')
dataset3 = pd.merge(dataset3,other_feature3,on=['user_id','coupon_id','date_received'],how='left')
dataset3.drop_duplicates(inplace=True)

 
dataset3.user_merchant_buy_total = dataset3.user_merchant_buy_total.replace(np.nan,0)
dataset3.user_merchant_any = dataset3.user_merchant_any.replace(np.nan,0)
dataset3.user_merchant_received = dataset3.user_merchant_received.replace(np.nan,0)
dataset3['is_weekend'] = dataset3.day_of_week.apply(lambda x:1 if x in (6,7) else 0)
# get_dummies 进行one-hot编码
weekday_dummies = pd.get_dummies(dataset3.day_of_week)
weekday_dummies.columns = ['weekday'+str(i+1) for i in range(weekday_dummies.shape[1])]
dataset3 = pd.concat([dataset3,weekday_dummies],axis=1)
# dataset3.columns
dataset3.drop(['merchant_id','day_of_week','coupon_count'],axis=1,inplace=True)
dataset3 = dataset3.replace('null',np.nan)
dataset3.to_csv('data/ml100marathon-02-01/dataset3.csv',index=None)
 
coupon2 = pd.read_csv('data/ml100marathon-02-01/coupon2_feature.csv')
merchant2 = pd.read_csv('data/ml100marathon-02-01/merchant2_feature.csv')
user2 = pd.read_csv('data/ml100marathon-02-01/user2_feature.csv')
user_merchant2 = pd.read_csv('data/ml100marathon-02-01/user_merchant2.csv')
other_feature2 = pd.read_csv('data/ml100marathon-02-01/other_Feature2.csv')
dataset2 = pd.merge(coupon2,merchant2,on='merchant_id',how='left')
dataset2 = pd.merge(dataset2,user2,on='user_id',how='left')
dataset2 = pd.merge(dataset2,user_merchant2,on=['user_id','merchant_id'],how='left')
dataset2 = pd.merge(dataset2,other_feature2,on=['user_id','coupon_id','date_received'],how='left')
dataset2.drop_duplicates(inplace=True)
# dataset2.head()
 
dataset2.user_merchant_buy_total = dataset2.user_merchant_buy_total.replace(np.nan,0)
dataset2.user_merchant_any = dataset2.user_merchant_any.replace(np.nan,0)
dataset2.user_merchant_received = dataset2.user_merchant_received.replace(np.nan,0)
dataset2['is_weekend'] = dataset2.day_of_week.apply(lambda x:1 if x in (6,7) else 0)
weekday_dummies = pd.get_dummies(dataset2.day_of_week)
weekday_dummies.columns = ['weekday'+str(i+1) for i in range(weekday_dummies.shape[1])]
dataset2 = pd.concat([dataset2,weekday_dummies],axis=1)
#dataset2['label'] = dataset2.date.astype('str') + ':' +  dataset2.date_received.astype('str')
dataset2['label'] = dataset2.apply(get_label, axis=1)
dataset2.drop(['merchant_id','day_of_week','date','date_received','coupon_id','coupon_count'],axis=1,inplace=True)
dataset2 = dataset2.replace('null',np.nan)
dataset2.to_csv('data/ml100marathon-02-01/dataset2.csv',index=None)
 
 
coupon1 = pd.read_csv('data/ml100marathon-02-01/coupon1_feature.csv')
merchant1 = pd.read_csv('data/ml100marathon-02-01/merchant1_feature.csv')
user1 = pd.read_csv('data/ml100marathon-02-01/user1_feature.csv')
user_merchant1 = pd.read_csv('data/ml100marathon-02-01/user_merchant1.csv')
other_feature1 = pd.read_csv('data/ml100marathon-02-01/other_feature1.csv')
dataset1 = pd.merge(coupon1,merchant1,on='merchant_id',how='left')
dataset1 = pd.merge(dataset1,user1,on='user_id',how='left')
dataset1 = pd.merge(dataset1,user_merchant1,on=['user_id','merchant_id'],how='left')
dataset1 = pd.merge(dataset1,other_feature1,on=['user_id','coupon_id','date_received'],how='left')
dataset1.drop_duplicates(inplace=True)
# print dataset1.shape
 
dataset1.user_merchant_buy_total = dataset1.user_merchant_buy_total.replace(np.nan,0)
dataset1.user_merchant_any = dataset1.user_merchant_any.replace(np.nan,0)
dataset1.user_merchant_received = dataset1.user_merchant_received.replace(np.nan,0)
dataset1['is_weekend'] = dataset1.day_of_week.apply(lambda x:1 if x in (6,7) else 0)
weekday_dummies = pd.get_dummies(dataset1.day_of_week)
weekday_dummies.columns = ['weekday'+str(i+1) for i in range(weekday_dummies.shape[1])]
dataset1 = pd.concat([dataset1,weekday_dummies],axis=1)
#dataset1['label'] = dataset1.date.astype('str') + ':' +  dataset1.date_received.astype('str')
dataset1['label'] = dataset1.apply(get_label, axis=1)
dataset1.drop(['merchant_id','day_of_week','date','date_received','coupon_id','coupon_count'],axis=1,inplace=True)
dataset1 = dataset1.replace('null',np.nan)
dataset1.to_csv('data/ml100marathon-02-01/dataset1.csv',index=None)

In [29]:
import pandas as pd
import xgboost as xgb
from sklearn.preprocessing import MinMaxScaler
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'
 
dataset1 = pd.read_csv('data/ml100marathon-02-01/dataset1.csv')
dataset1.label.replace(-1,0,inplace=True)
dataset2 = pd.read_csv('data/ml100marathon-02-01/dataset2.csv')
dataset2.label.replace(-1,0,inplace=True)
dataset3 = pd.read_csv('data/ml100marathon-02-01/dataset3.csv')
# 去重
dataset1.drop_duplicates(inplace=True)
dataset2.drop_duplicates(inplace=True)
dataset3.drop_duplicates(inplace=True)
 
dataset12 = pd.concat([dataset1,dataset2],axis=0)
# 再次組合成訓練集
dataset1_y = dataset1.label
dataset1_x = dataset1.drop(['user_id','label','day_gap_before','day_gap_after'],axis=1)  
# 'day_gap_before','day_gap_after' cause overfitting, 0.77
dataset2_y = dataset2.label
dataset2_x = dataset2.drop(['user_id','label','day_gap_before','day_gap_after'],axis=1)
dataset12_y = dataset12.label
dataset12_x = dataset12.drop(['user_id','label','day_gap_before','day_gap_after'],axis=1)
dataset3_preds = dataset3[['user_id','coupon_id','date_received']]
dataset3_x = dataset3.drop(['user_id','coupon_id','date_received','day_gap_before','day_gap_after','weekday8'],axis=1)
# dataset3_x = dataset3.drop(['user_id','coupon_id','date_received'],axis=1)
 
# print(dataset1_x.shape,dataset2_x.shape,dataset3_x.shape)
 
dataset1 = xgb.DMatrix(dataset1_x,label=dataset1_y)
dataset2 = xgb.DMatrix(dataset2_x,label=dataset2_y)
dataset12= xgb.DMatrix(dataset12_x,label=dataset12_y)
dataset3 = xgb.DMatrix(dataset3_x)
# 在XGBoost中，要將處理的數據存儲在對象DMatrix中，方便下一步處理

In [30]:
params={'booster':'gbtree',
        'objective': 'rank:pairwise',
        'eval_metric':'auc',
        'gamma':0.1,
        'min_child_weight':1.1,
        'max_depth':5,
        'lambda':10,
        'subsample':0.7,
        'colsample_bytree':0.7,
        'colsample_bylevel':0.7,
        'eta': 0.01,
        'tree_method':'exact',
        'seed':0,
        'nthread':12
        }
watchlist = [(dataset12,'train')]

# 模型訓練
model = xgb.train(params,dataset12,num_boost_round=3500,evals=watchlist)
 


[23:19:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[0]	train-auc:0.821032
[23:19:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[1]	train-auc:0.833978
[23:19:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 0 pruned nodes, max_depth=5
[2]	train-auc:0.838165
[23:19:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[3]	train-auc:0.844852
[23:19:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[4]	train-auc:0.85016
[23:19:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[5]	train-auc:0.851194
[23:19:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[6]	train-auc:0.851044
[23:19:48] src/tree/updater_prune.cc:74: tree pruning en

[23:19:55] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[61]	train-auc:0.856141
[23:19:56] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[62]	train-auc:0.85622
[23:19:56] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[63]	train-auc:0.85628
[23:19:56] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[64]	train-auc:0.856227
[23:19:56] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[65]	train-auc:0.856629
[23:19:56] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[66]	train-auc:0.856818
[23:19:56] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[67]	train-auc:0.856759
[23:19:56] src/tree/updater_prune.cc:74: tree prun

[23:20:04] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[122]	train-auc:0.861071
[23:20:04] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 0 pruned nodes, max_depth=5
[123]	train-auc:0.861117
[23:20:05] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[124]	train-auc:0.861109
[23:20:05] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[125]	train-auc:0.861104
[23:20:05] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[126]	train-auc:0.861172
[23:20:05] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[127]	train-auc:0.861224
[23:20:05] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[128]	train-auc:0.861238
[23:20:05] src/tree/updater_prune.cc:74: 

[23:20:13] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[182]	train-auc:0.863481
[23:20:13] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[183]	train-auc:0.863556
[23:20:13] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[184]	train-auc:0.863588
[23:20:13] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[185]	train-auc:0.863588
[23:20:13] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[186]	train-auc:0.863598
[23:20:13] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[187]	train-auc:0.863655
[23:20:14] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[188]	train-auc:0.863655
[23:20:14] src/tree/updater_prune.cc:74: 

[23:20:21] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[242]	train-auc:0.865359
[23:20:21] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[243]	train-auc:0.865377
[23:20:21] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[244]	train-auc:0.865374
[23:20:22] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[245]	train-auc:0.865436
[23:20:22] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[246]	train-auc:0.865467
[23:20:22] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[247]	train-auc:0.865566
[23:20:22] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[248]	train-auc:0.865613
[23:20:22] src/tree/updater_prune.cc:74: 

[23:20:30] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[302]	train-auc:0.866931
[23:20:30] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 0 pruned nodes, max_depth=5
[303]	train-auc:0.866954
[23:20:30] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[304]	train-auc:0.866971
[23:20:30] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[305]	train-auc:0.866982
[23:20:30] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[306]	train-auc:0.86701
[23:20:30] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[307]	train-auc:0.867033
[23:20:30] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[308]	train-auc:0.867055
[23:20:31] src/tree/updater_prune.cc:74: t

[23:20:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[362]	train-auc:0.868059
[23:20:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[363]	train-auc:0.868112
[23:20:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[364]	train-auc:0.868135
[23:20:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[365]	train-auc:0.868142
[23:20:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[366]	train-auc:0.868165
[23:20:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[367]	train-auc:0.8682
[23:20:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[368]	train-auc:0.868212
[23:20:39] src/tree/updater_prune.cc:74: tr

[23:20:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[422]	train-auc:0.86935
[23:20:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[423]	train-auc:0.869387
[23:20:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[424]	train-auc:0.869412
[23:20:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[425]	train-auc:0.869423
[23:20:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[426]	train-auc:0.869423
[23:20:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[427]	train-auc:0.869426
[23:20:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[428]	train-auc:0.869443
[23:20:48] src/tree/updater_prune.cc:74: t

[23:20:56] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[482]	train-auc:0.870599
[23:20:56] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[483]	train-auc:0.870621
[23:20:56] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[484]	train-auc:0.870631
[23:20:56] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[485]	train-auc:0.87065
[23:20:56] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[486]	train-auc:0.870683
[23:20:56] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[487]	train-auc:0.870698
[23:20:56] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[488]	train-auc:0.87071
[23:20:57] src/tree/updater_prune.cc:74: tr

[23:21:04] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[542]	train-auc:0.87176
[23:21:04] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[543]	train-auc:0.871771
[23:21:05] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[544]	train-auc:0.871799
[23:21:05] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[545]	train-auc:0.871843
[23:21:05] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[546]	train-auc:0.871855
[23:21:05] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[547]	train-auc:0.871859
[23:21:05] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[548]	train-auc:0.871874
[23:21:05] src/tree/updater_prune.cc:74: t

[23:21:13] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[602]	train-auc:0.872847
[23:21:13] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[603]	train-auc:0.872861
[23:21:14] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[604]	train-auc:0.872879
[23:21:14] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[605]	train-auc:0.872907
[23:21:14] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[606]	train-auc:0.872926
[23:21:14] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[607]	train-auc:0.872959
[23:21:14] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[608]	train-auc:0.872976
[23:21:14] src/tree/updater_prune.cc:74: 

[23:21:22] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[662]	train-auc:0.873824
[23:21:23] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[663]	train-auc:0.873843
[23:21:23] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[664]	train-auc:0.873873
[23:21:23] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[665]	train-auc:0.873882
[23:21:23] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[666]	train-auc:0.873895
[23:21:23] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[667]	train-auc:0.873925
[23:21:23] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[668]	train-auc:0.873948
[23:21:23] src/tree/updater_prune.cc:74: 

[23:21:31] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[722]	train-auc:0.874854
[23:21:32] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[723]	train-auc:0.874867
[23:21:32] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[724]	train-auc:0.874879
[23:21:32] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[725]	train-auc:0.874903
[23:21:32] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[726]	train-auc:0.874921
[23:21:32] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 2 pruned nodes, max_depth=5
[727]	train-auc:0.874934
[23:21:32] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[728]	train-auc:0.874963
[23:21:33] src/tree/updater_prune.cc:74: 

[23:21:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 52 extra nodes, 0 pruned nodes, max_depth=5
[782]	train-auc:0.875784
[23:21:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[783]	train-auc:0.875797
[23:21:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[784]	train-auc:0.875812
[23:21:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[785]	train-auc:0.875838
[23:21:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[786]	train-auc:0.875851
[23:21:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 0 pruned nodes, max_depth=5
[787]	train-auc:0.875864
[23:21:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[788]	train-auc:0.875886
[23:21:42] src/tree/updater_prune.cc:74: 

[23:21:50] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[842]	train-auc:0.876629
[23:21:50] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[843]	train-auc:0.87664
[23:21:50] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[844]	train-auc:0.876659
[23:21:50] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[845]	train-auc:0.876668
[23:21:50] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[846]	train-auc:0.876685
[23:21:50] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[847]	train-auc:0.876701
[23:21:51] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[848]	train-auc:0.876727
[23:21:51] src/tree/updater_prune.cc:74: t

[23:21:59] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[902]	train-auc:0.87752
[23:21:59] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[903]	train-auc:0.877534
[23:21:59] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[904]	train-auc:0.877555
[23:21:59] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[905]	train-auc:0.877577
[23:21:59] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[906]	train-auc:0.877596
[23:21:59] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[907]	train-auc:0.877621
[23:22:00] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 44 extra nodes, 0 pruned nodes, max_depth=5
[908]	train-auc:0.877634
[23:22:00] src/tree/updater_prune.cc:74: t

[23:22:08] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[962]	train-auc:0.878306
[23:22:08] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[963]	train-auc:0.878321
[23:22:08] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[964]	train-auc:0.878329
[23:22:08] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[965]	train-auc:0.878343
[23:22:08] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[966]	train-auc:0.878356
[23:22:08] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[967]	train-auc:0.878371
[23:22:09] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[968]	train-auc:0.878392
[23:22:09] src/tree/updater_prune.cc:74: 

[1021]	train-auc:0.878963
[23:22:17] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[1022]	train-auc:0.878985
[23:22:17] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[1023]	train-auc:0.878992
[23:22:18] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[1024]	train-auc:0.879001
[23:22:18] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1025]	train-auc:0.879012
[23:22:18] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[1026]	train-auc:0.879026
[23:22:18] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[1027]	train-auc:0.879045
[23:22:18] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[1028]	train-auc:0.879058
[23:22:1

[1081]	train-auc:0.879664
[23:22:27] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1082]	train-auc:0.879672
[23:22:27] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[1083]	train-auc:0.879683
[23:22:27] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1084]	train-auc:0.879698
[23:22:27] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[1085]	train-auc:0.879709
[23:22:28] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1086]	train-auc:0.879721
[23:22:28] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[1087]	train-auc:0.879732
[23:22:28] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 52 extra nodes, 0 pruned nodes, max_depth=5
[1088]	train-auc:0.879742
[23:22:2

[23:22:36] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[1142]	train-auc:0.880334
[23:22:37] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 2 pruned nodes, max_depth=5
[1143]	train-auc:0.880341
[23:22:37] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 0 pruned nodes, max_depth=5
[1144]	train-auc:0.880354
[23:22:37] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1145]	train-auc:0.88036
[23:22:37] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1146]	train-auc:0.880373
[23:22:37] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1147]	train-auc:0.88039
[23:22:37] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[1148]	train-auc:0.880408
[23:22:38] src/tree/updater_prune.cc

[23:22:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1202]	train-auc:0.88095
[23:22:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[1203]	train-auc:0.880959
[23:22:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1204]	train-auc:0.880965
[23:22:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[1205]	train-auc:0.880975
[23:22:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1206]	train-auc:0.880989
[23:22:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[1207]	train-auc:0.881
[23:22:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1208]	train-auc:0.881008
[23:22:47] src/tree/updater_prune.cc:7

[23:22:55] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1262]	train-auc:0.881548
[23:22:55] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[1263]	train-auc:0.881556
[23:22:55] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1264]	train-auc:0.881572
[23:22:55] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[1265]	train-auc:0.881584
[23:22:55] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[1266]	train-auc:0.881593
[23:22:56] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[1267]	train-auc:0.881602
[23:22:56] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 40 extra nodes, 0 pruned nodes, max_depth=5
[1268]	train-auc:0.881606
[23:22:56] src/tree/updater_prune.

[23:23:04] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1322]	train-auc:0.882091
[23:23:04] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1323]	train-auc:0.8821
[23:23:04] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[1324]	train-auc:0.882108
[23:23:04] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[1325]	train-auc:0.882123
[23:23:05] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1326]	train-auc:0.882134
[23:23:05] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1327]	train-auc:0.882146
[23:23:05] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[1328]	train-auc:0.882157
[23:23:05] src/tree/updater_prune.cc

[23:23:13] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 2 pruned nodes, max_depth=5
[1382]	train-auc:0.882606
[23:23:13] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[1383]	train-auc:0.882616
[23:23:13] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1384]	train-auc:0.882627
[23:23:14] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 38 extra nodes, 0 pruned nodes, max_depth=5
[1385]	train-auc:0.882631
[23:23:14] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1386]	train-auc:0.882639
[23:23:14] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1387]	train-auc:0.882649
[23:23:14] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1388]	train-auc:0.882659
[23:23:14] src/tree/updater_prune.

[1441]	train-auc:0.883077
[23:23:22] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1442]	train-auc:0.883081
[23:23:23] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[1443]	train-auc:0.883087
[23:23:23] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1444]	train-auc:0.8831
[23:23:23] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1445]	train-auc:0.883108
[23:23:23] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 2 pruned nodes, max_depth=5
[1446]	train-auc:0.883113
[23:23:23] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[1447]	train-auc:0.88312
[23:23:23] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[1448]	train-auc:0.883128
[23:23:23] 

[23:23:32] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 0 pruned nodes, max_depth=5
[1502]	train-auc:0.883533
[23:23:32] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[1503]	train-auc:0.883541
[23:23:32] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 2 pruned nodes, max_depth=5
[1504]	train-auc:0.883545
[23:23:32] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[1505]	train-auc:0.883556
[23:23:32] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[1506]	train-auc:0.883564
[23:23:32] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1507]	train-auc:0.883574
[23:23:33] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1508]	train-auc:0.883589
[23:23:33] src/tree/updater_prune.

[1561]	train-auc:0.883972
[23:23:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[1562]	train-auc:0.883978
[23:23:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1563]	train-auc:0.883985
[23:23:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[1564]	train-auc:0.883995
[23:23:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[1565]	train-auc:0.884005
[23:23:42] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 2 pruned nodes, max_depth=5
[1566]	train-auc:0.88401
[23:23:42] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[1567]	train-auc:0.884018
[23:23:42] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[1568]	train-auc:0.884024
[23:23:42

[23:23:51] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1622]	train-auc:0.884374
[23:23:51] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 2 pruned nodes, max_depth=5
[1623]	train-auc:0.884382
[23:23:51] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1624]	train-auc:0.88439
[23:23:51] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 52 extra nodes, 2 pruned nodes, max_depth=5
[1625]	train-auc:0.884395
[23:23:51] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[1626]	train-auc:0.884405
[23:23:51] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 0 pruned nodes, max_depth=5
[1627]	train-auc:0.884412
[23:23:52] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 50 extra nodes, 0 pruned nodes, max_depth=5
[1628]	train-auc:0.884418
[23:23:52] src/tree/updater_prune.c

[1681]	train-auc:0.884818
[23:24:01] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1682]	train-auc:0.884826
[23:24:01] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 2 pruned nodes, max_depth=5
[1683]	train-auc:0.884831
[23:24:01] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1684]	train-auc:0.884838
[23:24:01] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1685]	train-auc:0.884848
[23:24:01] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1686]	train-auc:0.884856
[23:24:01] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1687]	train-auc:0.884864
[23:24:02] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1688]	train-auc:0.884871
[23:24:0

[1741]	train-auc:0.885181
[23:24:11] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[1742]	train-auc:0.885184
[23:24:11] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 0 pruned nodes, max_depth=5
[1743]	train-auc:0.885187
[23:24:11] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1744]	train-auc:0.885195
[23:24:11] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1745]	train-auc:0.885201
[23:24:11] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 0 pruned nodes, max_depth=5
[1746]	train-auc:0.885205
[23:24:11] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 2 pruned nodes, max_depth=5
[1747]	train-auc:0.885208
[23:24:12] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 48 extra nodes, 0 pruned nodes, max_depth=5
[1748]	train-auc:0.885215
[23:24:1

[1801]	train-auc:0.885584
[23:24:21] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1802]	train-auc:0.885589
[23:24:21] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 0 pruned nodes, max_depth=5
[1803]	train-auc:0.885593
[23:24:21] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1804]	train-auc:0.885602
[23:24:21] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 0 pruned nodes, max_depth=5
[1805]	train-auc:0.885605
[23:24:21] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[1806]	train-auc:0.88561
[23:24:22] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[1807]	train-auc:0.88562
[23:24:22] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 40 extra nodes, 2 pruned nodes, max_depth=5
[1808]	train-auc:0.885627
[23:24:22]

[23:24:31] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[1862]	train-auc:0.885975
[23:24:31] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[1863]	train-auc:0.885981
[23:24:31] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[1864]	train-auc:0.885988
[23:24:31] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[1865]	train-auc:0.885991
[23:24:31] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1866]	train-auc:0.885997
[23:24:32] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[1867]	train-auc:0.886003
[23:24:32] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[1868]	train-auc:0.886008
[23:24:32] src/tree/updater_prune.

[23:24:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[1922]	train-auc:0.886316
[23:24:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 44 extra nodes, 0 pruned nodes, max_depth=5
[1923]	train-auc:0.886318
[23:24:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 2 pruned nodes, max_depth=5
[1924]	train-auc:0.886321
[23:24:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1925]	train-auc:0.886328
[23:24:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[1926]	train-auc:0.886338
[23:24:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 2 pruned nodes, max_depth=5
[1927]	train-auc:0.886343
[23:24:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[1928]	train-auc:0.88635
[23:24:41] src/tree/updater_prune.c

[23:24:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 0 pruned nodes, max_depth=5
[1982]	train-auc:0.886648
[23:24:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1983]	train-auc:0.886658
[23:24:50] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[1984]	train-auc:0.886664
[23:24:50] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[1985]	train-auc:0.886668
[23:24:50] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[1986]	train-auc:0.886673
[23:24:50] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[1987]	train-auc:0.886679
[23:24:50] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[1988]	train-auc:0.886683
[23:24:50] src/tree/updater_prune.

[23:24:59] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 50 extra nodes, 2 pruned nodes, max_depth=5
[2042]	train-auc:0.886959
[23:24:59] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 52 extra nodes, 0 pruned nodes, max_depth=5
[2043]	train-auc:0.886961
[23:24:59] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2044]	train-auc:0.886964
[23:24:59] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[2045]	train-auc:0.88697
[23:24:59] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[2046]	train-auc:0.886976
[23:24:59] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2047]	train-auc:0.886983
[23:25:00] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2048]	train-auc:0.886987
[23:25:00] src/tree/updater_prune.c

[23:25:08] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2102]	train-auc:0.887247
[23:25:09] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[2103]	train-auc:0.887253
[23:25:09] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[2104]	train-auc:0.887258
[23:25:09] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[2105]	train-auc:0.887262
[23:25:09] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[2106]	train-auc:0.887267
[23:25:09] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[2107]	train-auc:0.887272
[23:25:09] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[2108]	train-auc:0.887276
[23:25:10] src/tree/updater_prune.

[2161]	train-auc:0.887531
[23:25:18] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[2162]	train-auc:0.887538
[23:25:18] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 2 pruned nodes, max_depth=5
[2163]	train-auc:0.887542
[23:25:19] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2164]	train-auc:0.887547
[23:25:19] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[2165]	train-auc:0.887553
[23:25:19] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[2166]	train-auc:0.887559
[23:25:19] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 2 pruned nodes, max_depth=5
[2167]	train-auc:0.887565
[23:25:19] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2168]	train-auc:0.887571
[23:25:1

[23:25:28] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 2 pruned nodes, max_depth=5
[2222]	train-auc:0.887819
[23:25:28] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2223]	train-auc:0.887825
[23:25:28] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[2224]	train-auc:0.88783
[23:25:29] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[2225]	train-auc:0.887835
[23:25:29] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2226]	train-auc:0.887839
[23:25:29] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2227]	train-auc:0.887843
[23:25:29] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 2 pruned nodes, max_depth=5
[2228]	train-auc:0.887847
[23:25:29] src/tree/updater_prune.c

[2281]	train-auc:0.888067
[23:25:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 34 extra nodes, 0 pruned nodes, max_depth=5
[2282]	train-auc:0.888068
[23:25:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[2283]	train-auc:0.888074
[23:25:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 2 pruned nodes, max_depth=5
[2284]	train-auc:0.888078
[23:25:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2285]	train-auc:0.888081
[23:25:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 46 extra nodes, 0 pruned nodes, max_depth=5
[2286]	train-auc:0.888085
[23:25:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 46 extra nodes, 0 pruned nodes, max_depth=5
[2287]	train-auc:0.888088
[23:25:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 0 pruned nodes, max_depth=5
[2288]	train-auc:0.888091
[23:25:3

[23:25:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 50 extra nodes, 0 pruned nodes, max_depth=5
[2342]	train-auc:0.888346
[23:25:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 48 extra nodes, 2 pruned nodes, max_depth=5
[2343]	train-auc:0.888348
[23:25:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 40 extra nodes, 0 pruned nodes, max_depth=5
[2344]	train-auc:0.888352
[23:25:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 52 extra nodes, 0 pruned nodes, max_depth=5
[2345]	train-auc:0.888357
[23:25:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[2346]	train-auc:0.888361
[23:25:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2347]	train-auc:0.888365
[23:25:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 52 extra nodes, 0 pruned nodes, max_depth=5
[2348]	train-auc:0.88837
[23:25:50] src/tree/updater_prune.c

[23:25:58] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2402]	train-auc:0.888612
[23:25:58] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2403]	train-auc:0.888618
[23:25:59] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2404]	train-auc:0.888622
[23:25:59] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 46 extra nodes, 0 pruned nodes, max_depth=5
[2405]	train-auc:0.888626
[23:25:59] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[2406]	train-auc:0.888632
[23:25:59] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2407]	train-auc:0.888636
[23:25:59] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 2 pruned nodes, max_depth=5
[2408]	train-auc:0.888639
[23:25:59] src/tree/updater_prune.

[2461]	train-auc:0.888844
[23:26:09] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[2462]	train-auc:0.888846
[23:26:09] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 0 pruned nodes, max_depth=5
[2463]	train-auc:0.888848
[23:26:09] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 2 pruned nodes, max_depth=5
[2464]	train-auc:0.88885
[23:26:09] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[2465]	train-auc:0.888855
[23:26:09] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 0 pruned nodes, max_depth=5
[2466]	train-auc:0.888858
[23:26:09] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[2467]	train-auc:0.888861
[23:26:10] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[2468]	train-auc:0.888869
[23:26:10

[2521]	train-auc:0.889087
[23:26:19] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2522]	train-auc:0.889092
[23:26:19] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[2523]	train-auc:0.889096
[23:26:19] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 0 pruned nodes, max_depth=5
[2524]	train-auc:0.889101
[23:26:19] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2525]	train-auc:0.889104
[23:26:19] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 52 extra nodes, 0 pruned nodes, max_depth=5
[2526]	train-auc:0.889108
[23:26:19] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 46 extra nodes, 0 pruned nodes, max_depth=5
[2527]	train-auc:0.88911
[23:26:20] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 50 extra nodes, 0 pruned nodes, max_depth=5
[2528]	train-auc:0.889115
[23:26:20

[23:26:29] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 2 pruned nodes, max_depth=5
[2582]	train-auc:0.889333
[23:26:29] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 52 extra nodes, 0 pruned nodes, max_depth=5
[2583]	train-auc:0.889337
[23:26:29] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2584]	train-auc:0.88934
[23:26:29] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[2585]	train-auc:0.889345
[23:26:29] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 42 extra nodes, 0 pruned nodes, max_depth=5
[2586]	train-auc:0.889347
[23:26:29] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2587]	train-auc:0.889352
[23:26:30] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 0 pruned nodes, max_depth=5
[2588]	train-auc:0.889357
[23:26:30] src/tree/updater_prune.c

[23:26:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[2642]	train-auc:0.889556
[23:26:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 2 pruned nodes, max_depth=5
[2643]	train-auc:0.88956
[23:26:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 2 pruned nodes, max_depth=5
[2644]	train-auc:0.889565
[23:26:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[2645]	train-auc:0.889567
[23:26:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[2646]	train-auc:0.889571
[23:26:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 46 extra nodes, 0 pruned nodes, max_depth=5
[2647]	train-auc:0.889572
[23:26:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2648]	train-auc:0.889576
[23:26:39] src/tree/updater_prune.c

[2701]	train-auc:0.889777
[23:26:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 0 pruned nodes, max_depth=5
[2702]	train-auc:0.889781
[23:26:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[2703]	train-auc:0.889783
[23:26:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[2704]	train-auc:0.889785
[23:26:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[2705]	train-auc:0.889789
[23:26:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2706]	train-auc:0.889793
[23:26:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[2707]	train-auc:0.889796
[23:26:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 2 pruned nodes, max_depth=5
[2708]	train-auc:0.889799
[23:26:4

[23:26:58] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 46 extra nodes, 0 pruned nodes, max_depth=5
[2762]	train-auc:0.890011
[23:26:58] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 0 pruned nodes, max_depth=5
[2763]	train-auc:0.890015
[23:26:58] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2764]	train-auc:0.890017
[23:26:59] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2765]	train-auc:0.890022
[23:26:59] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2766]	train-auc:0.890027
[23:26:59] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[2767]	train-auc:0.890032
[23:26:59] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 0 pruned nodes, max_depth=5
[2768]	train-auc:0.890035
[23:26:59] src/tree/updater_prune.

[23:27:09] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[2822]	train-auc:0.890233
[23:27:09] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 42 extra nodes, 0 pruned nodes, max_depth=5
[2823]	train-auc:0.890235
[23:27:09] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2824]	train-auc:0.890239
[23:27:09] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 40 extra nodes, 0 pruned nodes, max_depth=5
[2825]	train-auc:0.890241
[23:27:09] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[2826]	train-auc:0.890245
[23:27:09] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[2827]	train-auc:0.890247
[23:27:10] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 2 pruned nodes, max_depth=5
[2828]	train-auc:0.890249
[23:27:10] src/tree/updater_prune.

[2881]	train-auc:0.890448
[23:27:19] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 42 extra nodes, 2 pruned nodes, max_depth=5
[2882]	train-auc:0.890451
[23:27:19] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[2883]	train-auc:0.890456
[23:27:19] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 2 pruned nodes, max_depth=5
[2884]	train-auc:0.890459
[23:27:20] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2885]	train-auc:0.890462
[23:27:20] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[2886]	train-auc:0.890465
[23:27:20] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2887]	train-auc:0.89047
[23:27:20] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[2888]	train-auc:0.890473
[23:27:20

[2941]	train-auc:0.890658
[23:27:29] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2942]	train-auc:0.890662
[23:27:30] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[2943]	train-auc:0.890663
[23:27:30] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 48 extra nodes, 0 pruned nodes, max_depth=5
[2944]	train-auc:0.890666
[23:27:30] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[2945]	train-auc:0.890669
[23:27:30] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2946]	train-auc:0.890675
[23:27:30] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 2 pruned nodes, max_depth=5
[2947]	train-auc:0.890677
[23:27:30] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[2948]	train-auc:0.890679
[23:27:3

[3001]	train-auc:0.890866
[23:27:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[3002]	train-auc:0.890869
[23:27:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[3003]	train-auc:0.890872
[23:27:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 48 extra nodes, 0 pruned nodes, max_depth=5
[3004]	train-auc:0.890874
[23:27:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 50 extra nodes, 0 pruned nodes, max_depth=5
[3005]	train-auc:0.890878
[23:27:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[3006]	train-auc:0.890882
[23:27:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[3007]	train-auc:0.890885
[23:27:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 50 extra nodes, 0 pruned nodes, max_depth=5
[3008]	train-auc:0.890887
[23:27:4

[23:27:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[3062]	train-auc:0.891069
[23:27:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[3063]	train-auc:0.891072
[23:27:50] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[3064]	train-auc:0.891077
[23:27:50] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[3065]	train-auc:0.891078
[23:27:50] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[3066]	train-auc:0.891084
[23:27:50] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 0 pruned nodes, max_depth=5
[3067]	train-auc:0.891086
[23:27:50] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[3068]	train-auc:0.89109
[23:27:50] src/tree/updater_prune.c

[23:27:59] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 52 extra nodes, 2 pruned nodes, max_depth=5
[3122]	train-auc:0.891273
[23:28:00] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[3123]	train-auc:0.891278
[23:28:00] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[3124]	train-auc:0.891284
[23:28:00] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[3125]	train-auc:0.891288
[23:28:00] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[3126]	train-auc:0.891293
[23:28:00] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 44 extra nodes, 0 pruned nodes, max_depth=5
[3127]	train-auc:0.891295
[23:28:00] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[3128]	train-auc:0.891301
[23:28:01] src/tree/updater_prune.

[3181]	train-auc:0.891469
[23:28:09] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 2 pruned nodes, max_depth=5
[3182]	train-auc:0.891469
[23:28:09] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 2 pruned nodes, max_depth=5
[3183]	train-auc:0.891473
[23:28:10] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 0 pruned nodes, max_depth=5
[3184]	train-auc:0.891476
[23:28:10] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[3185]	train-auc:0.89148
[23:28:10] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[3186]	train-auc:0.891486
[23:28:10] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[3187]	train-auc:0.891489
[23:28:10] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[3188]	train-auc:0.891493
[23:28:11

[23:28:19] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[3242]	train-auc:0.89167
[23:28:20] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[3243]	train-auc:0.891673
[23:28:20] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[3244]	train-auc:0.891677
[23:28:20] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[3245]	train-auc:0.89168
[23:28:20] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[3246]	train-auc:0.891684
[23:28:20] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[3247]	train-auc:0.891688
[23:28:20] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[3248]	train-auc:0.89169
[23:28:21] src/tree/updater_prune.cc:

[23:28:29] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[3302]	train-auc:0.891854
[23:28:30] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[3303]	train-auc:0.891857
[23:28:30] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[3304]	train-auc:0.891861
[23:28:30] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[3305]	train-auc:0.891864
[23:28:30] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[3306]	train-auc:0.891866
[23:28:30] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[3307]	train-auc:0.89187
[23:28:30] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[3308]	train-auc:0.891873
[23:28:31] src/tree/updater_prune.c

[23:28:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[3362]	train-auc:0.892042
[23:28:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[3363]	train-auc:0.892045
[23:28:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 2 pruned nodes, max_depth=5
[3364]	train-auc:0.892048
[23:28:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[3365]	train-auc:0.892054
[23:28:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[3366]	train-auc:0.892058
[23:28:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 50 extra nodes, 0 pruned nodes, max_depth=5
[3367]	train-auc:0.892061
[23:28:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[3368]	train-auc:0.892064
[23:28:40] src/tree/updater_prune.

[23:28:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 52 extra nodes, 2 pruned nodes, max_depth=5
[3422]	train-auc:0.892222
[23:28:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[3423]	train-auc:0.892224
[23:28:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[3424]	train-auc:0.892226
[23:28:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[3425]	train-auc:0.892229
[23:28:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 52 extra nodes, 0 pruned nodes, max_depth=5
[3426]	train-auc:0.892232
[23:28:50] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 52 extra nodes, 0 pruned nodes, max_depth=5
[3427]	train-auc:0.892237
[23:28:50] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=5
[3428]	train-auc:0.89224
[23:28:50] src/tree/updater_prune.c

[3481]	train-auc:0.892387
[23:28:59] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 0 pruned nodes, max_depth=5
[3482]	train-auc:0.892389
[23:28:59] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=5
[3483]	train-auc:0.892391
[23:28:59] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=5
[3484]	train-auc:0.892395
[23:28:59] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 52 extra nodes, 0 pruned nodes, max_depth=5
[3485]	train-auc:0.892399
[23:28:59] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 2 pruned nodes, max_depth=5
[3486]	train-auc:0.892403
[23:28:59] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 40 extra nodes, 0 pruned nodes, max_depth=5
[3487]	train-auc:0.892405
[23:28:59] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=5
[3488]	train-auc:0.892407
[23:29:0

In [31]:
# 對dataset3進行預測
dataset3_preds['label'] = model.predict(dataset3)
dataset3_preds.label = MinMaxScaler().fit_transform(dataset3_preds.label.values.reshape(-1, 1))

dataset3_preds = dataset3_preds[~dataset3_preds.coupon_id.isna()]
dataset3_preds.reset_index(drop=True, inplace=True)

dataset3_preds.head()


Unnamed: 0,user_id,coupon_id,date_received,label
0,1439408,11002.0,20160528.0,0.353321
1,1439408,8591.0,20160613.0,0.564775
2,1439408,8591.0,20160516.0,0.614043
3,2029232,1532.0,20160530.0,0.385282
4,2029232,12737.0,20160519.0,0.503747


In [35]:
print(dataset3_preds.shape)

(304096, 5)


In [32]:
def convertingToString(row):
    if (pd.isnull(row)) or (row==-1):
        return ''
    else:
        return str(int(row))

dataset3_preds.loc[:, "user_id"] = dataset3_preds["user_id"].apply(convertingToString)
dataset3_preds.loc[:, "coupon_id"] = dataset3_preds["coupon_id"].apply(convertingToString)
dataset3_preds.loc[:, "date_received"] = dataset3_preds["date_received"].apply(convertingToString)
dataset3_preds["uid"] = dataset3_preds[["user_id", "coupon_id", "date_received"]].apply(lambda x: '_'.join(x.values), axis=1)
dataset3_preds.reset_index(drop=True, inplace=True)

In [33]:
### NOTE: YOUR SUBMITION FILE SHOULD HAVE COLUMN NAME: uid, label
out = dataset3_preds.groupby("uid", as_index=False).mean()
out = out[["uid", "label"]]
out.columns = ["uid", "label"]
out.to_csv("ml100marathon-midterm-exam-ver5.csv", header=["uid", "label"], index=False) # submission format
out.head()

Unnamed: 0,uid,label
0,1000020_2705_20160519,0.474562
1,1000020_8192_20160513,0.452555
2,1000065_1455_20160527,0.416022
3,1000085_8067_20160513,0.37454
4,1000086_2418_20160613,0.440521


In [34]:
# feature_score來保存特徵對標籤的影響因子
feature_score = model.get_fscore()
feature_score = sorted(feature_score.items(), key=lambda x:x[1],reverse=True)
fs = []
for (key,value) in feature_score:
    fs.append("{0},{1}\n".format(key,value))
     
with open('xgb_feature_score.csv','w') as f:
    f.writelines("feature,score\n")
    f.writelines(fs)