In [1]:
import warnings
warnings.filterwarnings('ignore')

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm, tqdm_notebook, tqdm_pandas
tqdm.pandas(tqdm_notebook)

from sklearn.model_selection import StratifiedKFold, train_test_split, GroupKFold
import lightgbm as lgb
from sklearn.metrics import roc_auc_score
from sklearn import preprocessing

In [2]:
train_transaction = pd.read_csv('../input/train_transaction.csv')
train_identity = pd.read_csv('../input/train_identity.csv')

test_transaction = pd.read_csv('../input/test_transaction.csv')
test_identity = pd.read_csv('../input/test_identity.csv')

In [3]:
train_transaction = train_transaction.merge(train_identity, on=['TransactionID'],how='left')
test_transaction = test_transaction.merge(test_identity, on=['TransactionID'],how='left')

In [4]:
all_data = pd.concat([train_transaction,test_transaction])

In [5]:
id_feature = [ c for c in all_data.columns if c.find('id_') !=-1]
v_feature = [ c for c in all_data.columns if c.find('V') !=-1]
card_feature = [ c for c in all_data.columns if c.find('card') !=-1]
C_feature = [ c for c in all_data.columns if c.find('C') !=-1 and c != 'ProductCD']
D_feature = [ c for c in all_data.columns if c.find('D') !=-1 and c not in ['ProductCD','TransactionID','TransactionDT','DeviceType','DeviceInfo']]
M_feature = [ c for c in all_data.columns if c.find('M') !=-1]

In [6]:
# 청구자와 수납자의 mail domain이 같은지?
all_data['email_check'] = np.where(all_data['P_emaildomain']==all_data['R_emaildomain'],1,0)
# 모두 null인지
all_data['email_check_nan_all'] = np.where((all_data['P_emaildomain'].isna())&(all_data['R_emaildomain'].isna()),1,0)
# 하나만 null인지
all_data['email_check_nan_any'] = np.where((all_data['P_emaildomain'].isna())|(all_data['R_emaildomain'].isna()),1,0)

# 제공안된 email fillna
all_data['P_emaildomain'] = all_data['P_emaildomain'].fillna('email_not_provided')
all_data['R_emaildomain'] = all_data['R_emaildomain'].fillna('email_not_provided')

all_data['P_emaildomain_Country'] = all_data['P_emaildomain'].apply(lambda x: x.split('.')[-1])
all_data['P_emaildomain_Country'] = all_data['P_emaildomain_Country'].apply(lambda x: np.nan if x in ['email_not_provided', 'com', 'net', 'gmail', 'edu'] else x)
all_data['R_emaildomain_Country'] = all_data['R_emaildomain'].apply(lambda x: x.split('.')[-1])
all_data['R_emaildomain_Country'] = all_data['R_emaildomain_Country'].apply(lambda x: np.nan if x in ['email_not_provided', 'com', 'net', 'gmail', 'edu'] else x)

all_data['P_emaildomain_prefix'] = all_data['P_emaildomain'].apply(lambda x: x.split('.')[0]) 
all_data['R_emaildomain_prefix'] = all_data['R_emaildomain'].apply(lambda x: x.split('.')[0])

In [7]:
M_feature_col = M_feature.copy()
M_feature_col.remove('M4')

all_data['M_not_null'] = all_data[M_feature_col].notnull().sum(axis=1)

for m in M_feature_col:
    all_data[m] = all_data[m].map({'T':1,'F':0})
    
all_data['M_sum'] = all_data[M_feature_col].sum(axis=1)
all_data['collect_agree_ratio'] = all_data['M_sum'] / all_data['M_not_null']

In [8]:
all_data['DeviceInfo'] = all_data['DeviceInfo'].fillna('unknown_device').str.lower()

all_data['DeviceInfo_c'] = all_data['DeviceInfo']

device_match_dict = {
    'sm':'samsung',
    'samsung':'samsung',
    'huawei':'huawei',
    'moto':'moto',
    'rv':'rv:',
    'trident':'trident',
    'lg':'lg',
    'htc':'htc',
    'blade':'blade',
    'windows':'windows',
    'lenovo':'lenovo',
    'linux':'linux',
    'f3':'f3',
    'f5':'f5',
    'ios':'apple',
    'mac':'apple'
}
for dev_type_key, dev_type_value in device_match_dict.items():
    print(dev_type_key)
    all_data['DeviceInfo_c'] = all_data['DeviceInfo_c'].apply(lambda x: dev_type_value if x.find(dev_type_key)!=-1  else x)

all_data['DeviceInfo_c'] = all_data['DeviceInfo_c'].apply(lambda x: 'other_d_type' if x not in device_match_dict.values() else x)

sm
samsung
huawei
moto
rv
trident
lg
htc
blade
windows
lenovo
linux
f3
f5
ios
mac


In [9]:
all_data['id_30'] = all_data['id_30'].fillna('unknown').str.lower()
id_30_c_dict={'win':'windows','mac':'mac','ios':'ios','and':'android','linux':'linux'}
all_data['id_30_c'] = all_data['id_30'] 
for k, v in id_30_c_dict.items():
    all_data['id_30_c'] = all_data['id_30_c'].apply(lambda x: v if x.find(k)!=-1 else x)

all_data['id_30_v'] = all_data['id_30'].apply(lambda x: ''.join([i for i in x if i.isdigit()]))
all_data.loc[all_data['id_30']=='unknown','id_30_c'] = np.nan

In [10]:
all_data['id_31'] = all_data['id_31'].fillna('unknown').str.lower()
all_data['id_31_c'] = all_data['id_31']
br_list = ['safari','ie','chrome','edge','firefox','samsung','opera','google','android']
for b in br_list:
    all_data['id_31_c'] = all_data['id_31_c'].apply(lambda x: b if x.find(b)!=-1 else x)

all_data.loc[all_data['id_31']=='unknown','id_31_c'] = np.nan
all_data.loc[(~all_data['id_31_c'].isin(br_list+['other']))&(all_data['id_31_c'].notnull()),'id_31_c'] = 'other'
all_data['is_mobile_browser'] = all_data['id_31'].apply(lambda x: 1 if x.find('mobile')!=-1 else 0)
all_data['id_31_v'] = all_data['id_31'].apply(lambda x: ''.join([i for i in x if i.isdigit()]))

In [11]:
START_DATE = '2017-11-30'
import datetime
startdate = datetime.datetime.strptime(START_DATE, '%Y-%m-%d')
all_data['TransactionDT'] = all_data['TransactionDT'].apply(lambda x: (startdate + datetime.timedelta(seconds = x)))

all_data['dow'] = all_data['TransactionDT'].dt.dayofweek
all_data['day'] = all_data['TransactionDT'].dt.day
all_data['Transaction_hour'] = all_data['TransactionDT'].dt.hour

In [12]:
# encoding 해도 좋은 feature, category수가 많고, 개수에 따라 분포가 변함
category_encoding = ['card1','card2','card3','card5',
          'C1','C2','C3','C4','C5','C6','C7','C8','C9','C10','C11','C12','C13','C14',
          'D1','D2','D3','D4','D5','D6','D7','D8','D9',
          'addr1','addr2',
          'dist1','dist2',
          'P_emaildomain', 'R_emaildomain',
          'id_01','id_02','id_03','id_04','id_05','id_06','id_07','id_08','id_09','id_10',
          'id_11','id_13','id_14','id_17','id_18','id_19','id_20','id_21','id_22','id_24',
          'id_25','id_26','id_30','id_31','id_32','id_33',
          'DeviceInfo','DeviceInfo_c','id_30_c','id_30_v','id_31_v',
         ]
for col in tqdm_notebook(category_encoding):
    fq_encode = all_data[col].value_counts().to_dict()   
    all_data[col+'_fq_enc'] = all_data[col].map(fq_encode)

HBox(children=(IntProgress(value=0, max=64), HTML(value='')))




In [13]:
def fast_groupby(data, col1, col2, agg_type):
    new_col_name = f'{col1}_{col2}_{agg_type}'
    temp = data.groupby([col1])[col2].agg([agg_type]).reset_index().rename(columns={agg_type:new_col_name})
    temp.index = list(temp[col1])
    temp = temp[new_col_name].to_dict()
    data[new_col_name] = data[col1].map(temp)
    return data

In [14]:
########################### TransactionAmt

# Let's add some kind of client uID based on cardID ad addr columns
# The value will be very specific for each client so we need to remove it
# from final feature. But we can use it for aggregations.
all_data['uid'] = all_data['card1'].astype(str)+'_'+all_data['card2'].astype(str)
all_data['uid2'] = all_data['uid'].astype(str)+'_'+all_data['card3'].astype(str)+'_'+all_data['card4'].astype(str)
all_data['uid3'] = all_data['uid2'].astype(str)+'_'+all_data['addr1'].astype(str)+'_'+all_data['addr2'].astype(str)

# For our model current TransactionAmt is a noise
# https://www.kaggle.com/kyakovlev/ieee-check-noise
# (even if features importances are telling contrariwise)
# There are many unique values and model doesn't generalize well
# Lets do some aggregations
i_cols = ['card1','card2','card3','card5','uid','uid2','uid3']

for col in tqdm_notebook(i_cols):
    for agg_type in ['mean','std']:
        all_data = fast_groupby(all_data, col, 'TransactionAmt', agg_type )

HBox(children=(IntProgress(value=0, max=7), HTML(value='')))




In [15]:
all_data['ymd'] = all_data['TransactionDT'].dt.year*10000+all_data['TransactionDT'].dt.month*100 + all_data['TransactionDT'].dt.day

In [16]:
all_data = fast_groupby(all_data,'ymd','TransactionDT','count')
all_data = fast_groupby(all_data,'ymd','TransactionAmt','sum')

In [17]:
uid3_product_cd_agg = all_data.groupby(['uid3','ProductCD'])['dow'].agg({'uid2_dayofweek_mean':'mean','uid2_dayofweek_std':'std'}).reset_index()
all_data = all_data.merge(uid3_product_cd_agg, how='left', on = ['uid3','ProductCD'])

In [18]:
all_data['TransactionAmt_decimal_count'] = ((all_data['TransactionAmt'] - all_data['TransactionAmt'].astype(int))).astype(str).apply(lambda x: len(x.split('.')[1]))
all_data['TransactionAmt_decimal'] = ((all_data['TransactionAmt'] - all_data['TransactionAmt'].astype(int)) * 1000).astype(int)

In [19]:
def account_start_date(val):
    if np.isnan(val) :
        return np.NaN
    else:
        days=  int( str(val).split('.')[0])
        return pd.Timedelta( str(days) +' days')
    
for i in ['D1', 'D2',  'D4', 'D8','D10', 'D15']:
    all_data['account_start_day'] = all_data[i].apply(account_start_date)

    # account_make_date 컴퓨터가 인식할 수 있도록 수치형으로 바꿔 줌. 
    all_data['account_make_date'] = (all_data['TransactionDT'] - all_data['account_start_day']).dt.date
    all_data['account_make_date_{}'.format(i)] = (10000 * pd.to_datetime(all_data['account_make_date']).dt.year) + (100 * pd.to_datetime(all_data['account_make_date']).dt.month) + (1 * pd.to_datetime(all_data['account_make_date']).dt.day)

del all_data['account_make_date']
del all_data['account_start_day']

In [20]:
card1_d1_pcd_mean = all_data.groupby(['card1','account_make_date_D1','ProductCD'])['TransactionAmt'].agg({'card1_make_date_D1_productCD_Amt_mean':'mean','card1_make_date_D1_productCD_Amt_std':'std'}).reset_index()
all_data = all_data.merge(card1_d1_pcd_mean, how='left', on = ['card1','account_make_date_D1','ProductCD'])

In [21]:
from sklearn.decomposition import LatentDirichletAllocation as LDA
from sklearn.feature_extraction.text import CountVectorizer
import gc

def add_lda_feature(all_data, col1, col2):
    n_comp = 5
    print("add_lda_feature", col1, col2, n_comp)
    temp = all_data[[col1,col2]]
    col1col2_dict = {}
    def col1col2(row):
        nonlocal col1, col2
        if np.isnan(row[col1]):
            return
        if np.isnan(row[col2]):
            return
        col1col2_dict.setdefault(row[col1], []).append(str(row[col2]))
    
    temp.apply(lambda row:col1col2(row) , axis=1)
    
    col1_keys = list(col1col2_dict.keys())
    col1col2_dict_as_sentence = [' '.join(col1col2_dict[c]) for c in col1_keys]
    
    _as_matrix = CountVectorizer().fit_transform(col1col2_dict_as_sentence)
    
    topics_of_col1 = LDA(n_components=n_comp, n_jobs=-1,random_state=0).fit_transform(_as_matrix)
    
    col1_frame = pd.DataFrame(col1col2_dict.keys(),columns=[col1])
    topics_of_col1 = pd.DataFrame(topics_of_col1, columns=[f'{col1}_{col2}_LDA_{i}' for i in range(n_comp)])
    col1_frame = pd.concat([col1_frame, topics_of_col1], axis=1)
    
    col1_frame.index = list(col1_frame[col1])
    new_col_name=[f'{col1}_{col2}_LDA_{i}' for i in range(n_comp)]
    for c in new_col_name:
        print(c)
        temp_df = col1_frame[c].to_dict()  
        all_data[c] = all_data[col1].map(temp_df)
    
    #all_data = all_data.merge(col1_frame, on=[col1], how='left')
    del col1col2_dict
    del topics_of_col1
    del _as_matrix
    del col1_frame
    #del topics_of_col1
    gc.collect()
    return all_data, new_col_name

In [22]:
from sklearn.decomposition import NMF

def add_nmf_feature(all_data, col1, col2):
    n_comp = 5
    print("add_nmf_feature", col1, col2, n_comp)
    temp = all_data[[col1,col2]]
    col1col2_dict = {}
    def col1col2(row):
        nonlocal col1, col2
        if pd.isna(row[col1]):
            return
        if pd.isna(row[col2]):
            return
        col1col2_dict.setdefault(row[col1], []).append(str(row[col2]))
    
    temp.apply(lambda row:col1col2(row) , axis=1)
    
    col1_keys = list(col1col2_dict.keys())
    col1col2_dict_as_sentence = [' '.join(col1col2_dict[c]) for c in col1_keys]
    
    _as_matrix = CountVectorizer().fit_transform(col1col2_dict_as_sentence)
    
    topics_of_col1 = LDA(n_components=n_comp, n_jobs=-1,random_state=0).fit_transform(_as_matrix)
    
    col1_frame = pd.DataFrame(col1col2_dict.keys(),columns=[col1])
    topics_of_col1 = pd.DataFrame(topics_of_col1, columns=[f'{col1}_{col2}_NMF_{i}' for i in range(n_comp)])
    col1_frame = pd.concat([col1_frame, topics_of_col1], axis=1)
    
    col1_frame.index = list(col1_frame[col1])
    new_col_name=[f'{col1}_{col2}_NMF_{i}' for i in range(n_comp)]
    for c in new_col_name:
        print(c)
        temp_df = col1_frame[c].to_dict()  
        all_data[c] = all_data[col1].map(temp_df)
    
    #all_data = all_data.merge(col1_frame, on=[col1], how='left')
    del col1col2_dict
    del topics_of_col1
    del _as_matrix
    del col1_frame
    #del topics_of_col1
    gc.collect()
    return all_data, new_col_name

(0.9404788466588169, 0.9353552350432497, 0.9117537267548267, 0.9589567433316727)

In [23]:
all_data,_ = add_lda_feature(all_data, 'card1','addr1')
all_data,_ = add_lda_feature(all_data, 'card1','id_20')

add_lda_feature card1 addr1 5
card1_addr1_LDA_0
card1_addr1_LDA_1
card1_addr1_LDA_2
card1_addr1_LDA_3
card1_addr1_LDA_4
add_lda_feature card1 id_20 5
card1_id_20_LDA_0
card1_id_20_LDA_1
card1_id_20_LDA_2
card1_id_20_LDA_3
card1_id_20_LDA_4


In [24]:
all_data,_ = add_nmf_feature(all_data, 'card4','DeviceInfo_c')

add_nmf_feature card4 DeviceInfo_c 5
card4_DeviceInfo_c_NMF_0
card4_DeviceInfo_c_NMF_1
card4_DeviceInfo_c_NMF_2
card4_DeviceInfo_c_NMF_3
card4_DeviceInfo_c_NMF_4


In [25]:
def category_combine_feature(all_data, c1, c2):
    new_col_list = []
    new_col = f'{c1}_{c2}_combine'
    all_data[new_col] = all_data[c1].astype(str) + '_' + all_data[c2].astype(str)
    all_data[new_col] = pd.factorize(all_data[new_col], sort=True)[0]
    
    new_col_freq = new_col + '_fq_enc_combine'
    fq_encode = all_data[new_col].value_counts().to_dict()   
    all_data[new_col_freq] = all_data[new_col].map(fq_encode)
    
    new_col_list.append(new_col)
    new_col_list.append(new_col_freq)
    
    if new_col not in ['card1_card2_combine']: 
        for agg_type in ['mean','std']:
            new_col_name = new_col+'_TransactionAmt_'+agg_type
            #temp_df['TransactionAmt'] = temp_df['TransactionAmt'].astype(int)
            temp_df = all_data.groupby([new_col])['TransactionAmt'].agg([agg_type]).reset_index().rename(
                                                    columns={agg_type: new_col_name})

            temp_df.index = list(temp_df[new_col])
            temp_df = temp_df[new_col_name].to_dict()   

            all_data[new_col_name] = all_data[new_col].map(temp_df)
            new_col_list.append(new_col_name)
    
    print(new_col_list)
    return all_data, new_col_list

(0.9765178882384341, 0.9750723088222574, 0.9691847487013969, 0.9809598689431178)

In [26]:
for a in ['addr1']:
    for i in ['id_32','id_14','id_19']:
        new_col = f'{a}_{i}_combine'
        if new_col in all_data.columns:
            continue
        print(new_col)
        all_data, new_col_list = category_combine_feature(all_data,a,i)
for c in ['card1']:
    for i in ['card2','addr1']:
        new_col = f'{c}_{i}_combine'
        if new_col in all_data.columns:
            continue
        print(new_col)
        all_data, new_col_list = category_combine_feature(all_data,c,i)
        
#all_data, new_col_list = category_combine_feature(all_data,'id_17','id_33')

addr1_id_32_combine
['addr1_id_32_combine', 'addr1_id_32_combine_fq_enc_combine', 'addr1_id_32_combine_TransactionAmt_mean', 'addr1_id_32_combine_TransactionAmt_std']
addr1_id_14_combine
['addr1_id_14_combine', 'addr1_id_14_combine_fq_enc_combine', 'addr1_id_14_combine_TransactionAmt_mean', 'addr1_id_14_combine_TransactionAmt_std']
addr1_id_19_combine
['addr1_id_19_combine', 'addr1_id_19_combine_fq_enc_combine', 'addr1_id_19_combine_TransactionAmt_mean', 'addr1_id_19_combine_TransactionAmt_std']
card1_card2_combine
['card1_card2_combine', 'card1_card2_combine_fq_enc_combine']
card1_addr1_combine
['card1_addr1_combine', 'card1_addr1_combine_fq_enc_combine', 'card1_addr1_combine_TransactionAmt_mean', 'card1_addr1_combine_TransactionAmt_std']


In [27]:
all_data_temp = pd.concat([train_transaction,test_transaction])

In [28]:
all_data_temp['uid'] = all_data_temp['card1'].astype(str)+'_'+all_data_temp['card2'].astype(str)
all_data_temp['uid2'] = all_data_temp['uid'].astype(str)+'_'+all_data_temp['card3'].astype(str)+'_'+all_data_temp['card4'].astype(str)
all_data_temp['uid3'] = all_data_temp['uid2'].astype(str)+'_'+all_data_temp['addr1'].astype(str)+'_'+all_data_temp['addr2'].astype(str)
all_data_temp['uid3_next_click'] = all_data_temp['TransactionDT'] - all_data_temp.groupby(['uid3'])['TransactionDT'].shift(-1)
all_data_temp = all_data_temp.reset_index(drop=True)

In [29]:
all_data['uid3_next_click'] = all_data_temp['uid3_next_click']

In [30]:
card1_pcd_mean = all_data.groupby(['card1','account_make_date_D2','ProductCD'])['TransactionAmt'].agg({'card1_make_date_D2_productCD_Amt_mean':'mean','card1_make_date_D2_productCD_Amt_std':'std'}).reset_index()
all_data = all_data.merge(card1_pcd_mean, how='left', on = ['card1','account_make_date_D2','ProductCD'])

In [31]:
card1_pcd_mean = all_data.groupby(['card1','addr1','account_make_date_D1','ProductCD'])['TransactionAmt'].agg({'card1_addr1_make_date_D1_ProductCD_Amt_mean':'mean','card1_addr1_make_date_D1_ProductCD_Amt_std':'std'}).reset_index()
all_data = all_data.merge(card1_pcd_mean, how='left', on = ['card1','addr1','ProductCD','account_make_date_D1'])

In [32]:
card1_pcd_mean = all_data.groupby(['card1','id_20','account_make_date_D1','ProductCD'])['TransactionAmt'].agg({'card1_id_20_make_date_D1_ProductCD_Amt_mean':'mean','card1_id_20_make_date_D1_ProductCD_Amt_std':'std'}).reset_index()
all_data = all_data.merge(card1_pcd_mean, how='left', on = ['card1','id_20','ProductCD','account_make_date_D1'])

In [33]:
for p in all_data['ProductCD'].unique():
    all_data.loc[all_data['ProductCD']==p, 'ProductCD_Amt_expandmean'] = all_data.loc[all_data['ProductCD']==p, 'TransactionAmt'].cumsum()/np.arange(1,all_data.loc[all_data['ProductCD']==p, 'TransactionAmt'].shape[0]+1)

In [34]:
from sklearn.preprocessing import StandardScaler,MinMaxScaler
for p in all_data['ProductCD'].unique():
    mm = MinMaxScaler()
    all_data.loc[all_data['ProductCD']==p, 'TransactionAmt'] = mm.fit_transform(all_data.loc[all_data['ProductCD']==p, 'TransactionAmt'].values.reshape(-1,1))

In [35]:
card1_pcd_mean = all_data.groupby(['card1','account_make_date_D1','ProductCD'])['dist1'].agg({'card1_make_date_D1_ProductCD_dist1_mean':'mean','card1_make_date_D1_ProductCD_dist1_std':'std'}).reset_index()

all_data = all_data.merge(card1_pcd_mean, how='left', on = ['card1','ProductCD','account_make_date_D1'])

 

card1_pcd_mean = all_data.groupby(['card1','addr1','account_make_date_D1','ProductCD'])['dist1'].agg({'card1_addr1_make_date_D1_ProductCD_dist1_mean':'mean','card1_addr1_make_date_D1_ProductCD_dist1_std':'std'}).reset_index()

all_data = all_data.merge(card1_pcd_mean, how='left', on = ['card1','addr1','ProductCD','account_make_date_D1'])

 

card1_pcd_mean = all_data.groupby(['card1','card2','account_make_date_D1','ProductCD'])['dist1'].agg({'card1_card2_make_date_D1_ProductCD_dist1_mean':'mean','card1_card2_make_date_D1_ProductCD_dist1_std':'std'}).reset_index()

all_data = all_data.merge(card1_pcd_mean, how='left', on = ['card1','card2','ProductCD','account_make_date_D1'])

In [36]:
card1_pcd_mean = all_data.groupby(['id_02','ProductCD'])['TransactionAmt'].agg({'id_02_ProductCD_amt_mean':'mean'}).reset_index()

all_data = all_data.merge(card1_pcd_mean, how='left', on = ['id_02','ProductCD'])

In [37]:
card1_d1_pcd_mean = all_data.groupby(['card1','account_make_date_D1','ProductCD'])['Transaction_hour'].agg({'card1_make_date_D1_productCD_hour_mean':'mean'}).reset_index()
all_data = all_data.merge(card1_d1_pcd_mean, how='left', on = ['card1','account_make_date_D1','ProductCD'])

In [38]:
all_data['card1_maked1_pcd'] = all_data['card1'].astype(str) + all_data['account_make_date_D1'].astype(str) + all_data['ProductCD'].astype(str)
all_data['card1_maked1_pcd'] = pd.factorize(all_data['card1_maked1_pcd'])[0]
new_col_freq = 'card1_maked1_pcd' + '_fq_enc_combine'
fq_encode = all_data['card1_maked1_pcd'].value_counts().to_dict()   
all_data[new_col_freq] = all_data['card1_maked1_pcd'].map(fq_encode)

In [101]:
for col in ['account_make_date_D1']:
    new_col_freq = col + '_fq_enc_combine'
    fq_encode = all_data[col].value_counts().to_dict()   
    all_data[new_col_freq] = all_data[col].map(fq_encode)

In [145]:
all_data['d1_d2'] = all_data['account_make_date_D1'].astype(str) + '_' + all_data['account_make_date_D2'].astype(str) + '_' + all_data['ProductCD'].astype(str)

all_data['same_d1_d2'] = 0

all_data.loc[all_data['account_make_date_D1'] == all_data['account_make_date_D2'], 'same_d1_d2'] = 1
fq_encode = all_data['d1_d2'].value_counts().to_dict()   
all_data['d1_d2_fq_enc'] = all_data['d1_d2'].map(fq_encode)

In [146]:
object_col = []
for col in all_data.columns:
    if all_data[col].dtypes == 'object':
        object_col.append(col)
        
for col in object_col:
    all_data[col] = pd.factorize(all_data[col], sort=True)[0]

In [147]:
x_train = all_data.loc[all_data['isFraud'].notnull()]#.sample(frac=0.3,random_state=0).reset_index(drop=True)
#x_train = x_train.loc[x_train['TransactionDT']>2500000].reset_index(drop=True) # like lb hist dist
y_train = x_train.isFraud

x_test = all_data.loc[all_data['isFraud'].isnull()].reset_index(drop=True)

In [148]:
def mean_encoding(train, test, col):
    temp_dict = train.groupby([col])['isFraud'].agg(['mean']).reset_index().rename(
                                                        columns={'mean': col+'_target_mean'})
    temp_dict.index = temp_dict[col].values
    temp_dict = temp_dict[col+'_target_mean'].to_dict()

    train[col+'_target_mean'] = train[col].map(temp_dict)
    test[col+'_target_mean']  = test[col].map(temp_dict)
    return train, test

In [149]:
x_train, x_test = mean_encoding(x_train, x_test, 'ProductCD')
x_train, x_test = mean_encoding(x_train, x_test, 'M4')

In [150]:
param = {
        #'bagging_freq': 5,
        #'bagging_fraction': 0.8,
        'boost_from_average':'true',
        'boost': 'gbdt',
        'feature_fraction': 0.7,
        'bagging_fraction': 0.7,
        'learning_rate': 0.01,
        'subsample_freq':1,
        'max_bin':255,
        'max_depth': -1,
        'metric':'auc',
        'num_leaves': 256,
        'num_threads': 32,
        'tree_learner': 'serial',
        'objective': 'binary',
        #'scale_pos_weight':97,
        'verbosity': 1,
        'seed':42
        #'reg_lambda': 0.3,
    }

In [151]:
def get_train_column(train, must_delete_col):
    delete_col = must_delete_col.copy()
    unique_col = []
    for c in train.columns:
        if train[c].fillna(-9999).nunique()<2:
            unique_col.append(c)
            
    delete_col = delete_col + unique_col         
    train_columns = [c for c in train.columns if c not in delete_col]
    return train_columns, delete_col

In [159]:
category_feature_dict = dict()
delete_feature_dict = dict()
w_type = [x_train['ProductCD'].value_counts().index[0]]
other_type = list(x_train['ProductCD'].value_counts().index[1:].values)

category_feature_dict[str(w_type)] = ['addr1','addr2','P_emaildomain']

category_feature_dict[str(other_type)] = ['addr1','addr2','P_emaildomain','R_emaildomain','id_13','id_15', 'id_33', 'id_37', 'id_38','id_30_v','id_31_v']

#import pickle
#delete_v_feature = []
#with open('noise_v_cols.pickle', 'rb') as file:
#    delete_v_feature = pickle.load(file)
#    print(delete_v_feature)
#'same_d1_d2','d1_d2','d1_d2_fq_enc'    
delete_feautre_list = ['DT_M','card1_addr1_maked1_pcd','card1_addr1_maked1_pcd_fq_enc_combine','C13_over_cf_sum','TransactionID','isFraud','TransactionDT','bank_type','uid','uid2','uid3','ymd','ym','is_D_max','addr1_null']
delete_feature_dict[str(w_type)] =  delete_feautre_list +['P_emaildomain_Country','card1_TransactionAmt_std']#+ delete_v_feature #+ M_feature
delete_feature_dict[str(other_type)] = delete_feautre_list

['V206', 'V210', 'V211', 'V212', 'V213', 'V214', 'V215', 'V216', 'V217', 'V218', 'V221', 'V222', 'V223', 'V224', 'V225', 'V227', 'V228', 'V231', 'V233', 'V234', 'V235', 'V236', 'V237', 'V238', 'V307', 'V308', 'V309', 'V310', 'V311', 'V312', 'V313', 'V314', 'V315', 'V317', 'V319', 'V320', 'V321', 'V322', 'V325', 'V326', 'V327', 'V328', 'V329', 'V330', 'V331', 'V333', 'V334', 'V335', 'V336', 'V337', 'V338', 'V171', 'V172', 'V173', 'V174', 'V175', 'V176', 'V177', 'V178', 'V179', 'V180', 'V181', 'V183', 'V184', 'V185', 'V186', 'V187', 'V188', 'V189', 'V191', 'V192', 'V193', 'V194', 'V195', 'V196', 'V197', 'V198', 'V199', 'V200', 'V201', 'V203', 'V69', 'V70', 'V71', 'V72', 'V73', 'V75', 'V76', 'V77', 'V78', 'V79', 'V80', 'V84', 'V85', 'V86', 'V87', 'V88', 'V89', 'V90', 'V91', 'V92', 'V93', 'V97', 'V98', 'V99', 'V100', 'V101', 'V102', 'V1', 'V2', 'V3', 'V4', 'V5', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V18', 'V19', 'V21', 'V23', 'V25', 'V26', 'V29', 'V30', 'V31',

In [162]:
#must_del_col_list = ['DT_M','card1_addr1_maked1_pcd','card1_addr1_maked1_pcd_fq_enc_combine','C13_over_cf_sum','TransactionID','isFraud','TransactionDT','bank_type','uid','uid2','uid3','ymd','ym','is_D_max','addr1_null'] + delete_v_feature #+ M_feature

debug = False
oof_train = np.zeros(len(x_train))
oof_test = np.zeros(len(x_test))
NFOLD =5
SEED = 42

w_type = [x_train['ProductCD'].value_counts().index[0]]
other_type = list(x_train['ProductCD'].value_counts().index[1:].values)
product_type = list()
product_type.append(w_type)
product_type.append(other_type)

feature_importance_df = pd.DataFrame()
cv_list ={}
for p_type in product_type:
    x_train_type = x_train.loc[x_train['ProductCD'].isin(p_type)]
    y_train_type = x_train_type['isFraud']
    x_test_type = x_test.loc[x_test['ProductCD'].isin(p_type)]
    
    category_feature = category_feature_dict[str(p_type)].copy()
    must_del_col_list = delete_feature_dict[str(p_type)].copy()
    #param = org_param.copy()#lgbm_param_dict[str(p_type)]
    
    #must_del_col_list_temp = must_del_col_list.copy()
    #must_del_col_list_temp = must_del_col_list_temp + cat_combinefeature_dict[str(p_type)] 
    train_columns, _ = get_train_column(x_train_type, must_del_col_list)
    print(x_train_type.shape)
    print(len(train_columns), train_columns)
    kfold = StratifiedKFold(n_splits=NFOLD, shuffle=True, random_state=SEED)
    #split_groups = x_train_type['DT_M']
    #kfold = GroupKFold(n_splits=6)
    oof_train_type = np.zeros((x_train_type.shape[0],))
    oof_test_type = np.zeros((x_test_type.shape[0],))
    feature_importance_type = pd.DataFrame()
    type_score = 0
    for i, (train_index, cross_index) in enumerate(kfold.split(x_train_type,y_train_type),1):
        print(f"TYPE {p_type} - {i} FOLD Start")
        x_tr = x_train_type.iloc[train_index][train_columns]
        x_cr = x_train_type.iloc[cross_index][train_columns]
        y_tr = y_train_type.iloc[train_index]
        y_cr = y_train_type.iloc[cross_index]
        dtrain = lgb.Dataset(x_tr, label=y_tr, silent=True)
        dcross = lgb.Dataset(x_cr, label=y_cr, silent=True)

        clf = lgb.train(param, train_set=dtrain, num_boost_round=15000, valid_sets=[dtrain, dcross], 
                         early_stopping_rounds=100, verbose_eval=500, categorical_feature=category_feature)
        
        oof_train_type[cross_index] = clf.predict(x_cr)
        oof_test_type += clf.predict(x_test_type[train_columns])/NFOLD
        
        feature_importance = pd.DataFrame()
        feature_importance["Feature"] = x_tr.columns
        feature_importance["Importance"] = clf.feature_importance()
        feature_importance["FOLD"] = i
        feature_importance_type = pd.concat([feature_importance_type, feature_importance])
        cv_score = roc_auc_score(y_cr, oof_train_type[cross_index])
        type_score += cv_score / NFOLD
        print(f"{i} FOLD Score: ", cv_score)
        if debug is True:
            break
        
    print("Total CV: ", type_score)
    print("Total CV2: ", roc_auc_score(y_train_type, oof_train_type))
    cv_list[str(p_type)] = roc_auc_score(y_train_type, oof_train_type)
    oof_train[x_train_type.index] = oof_train_type
    oof_test[x_test_type.index] = oof_test_type
    feature_importance_type["TYPE"] = str(p_type)
    feature_importance_df = pd.concat([feature_importance_df, feature_importance_type])

(439670, 606)
326 ['C1', 'C11', 'C12', 'C13', 'C14', 'C2', 'C5', 'C6', 'C9', 'D1', 'D10', 'D11', 'D15', 'D2', 'D3', 'D4', 'D5', 'M1', 'M2', 'M3', 'M4', 'M5', 'M6', 'M7', 'M8', 'M9', 'P_emaildomain', 'TransactionAmt', 'V1', 'V10', 'V100', 'V101', 'V102', 'V103', 'V104', 'V105', 'V106', 'V107', 'V108', 'V109', 'V11', 'V110', 'V111', 'V112', 'V113', 'V114', 'V115', 'V116', 'V117', 'V118', 'V119', 'V12', 'V120', 'V121', 'V122', 'V123', 'V124', 'V125', 'V126', 'V127', 'V128', 'V129', 'V13', 'V130', 'V131', 'V132', 'V133', 'V134', 'V135', 'V136', 'V137', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V2', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V279', 'V28', 'V280', 'V281', 'V282', 'V283', 'V284', 'V285', 'V286', 'V287', 'V288', 'V289', 'V29', 'V290', 'V291', 'V292', 'V293', 'V294', 'V295', 'V296', 'V297', 'V298', 'V299', 'V3', 'V30', 'V300', 'V301', 'V302', 'V303', 'V304', 'V305', 'V306', 'V307', 'V308', 'V309', 'V31', 'V310', 'V311', 'V312', 'V313', 'V314', 'V315', 'V316', 'V3

New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999984	valid_1's auc: 0.969856
[1000]	training's auc: 1	valid_1's auc: 0.974137
Early stopping, best iteration is:
[1007]	training's auc: 1	valid_1's auc: 0.974144
1 FOLD Score:  0.9741435372150535
TYPE [4] - 2 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999981	valid_1's auc: 0.973296
[1000]	training's auc: 1	valid_1's auc: 0.978074
Early stopping, best iteration is:
[1053]	training's auc: 1	valid_1's auc: 0.978302
2 FOLD Score:  0.9783016434577088
TYPE [4] - 3 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999982	valid_1's auc: 0.969343
[1000]	training's auc: 1	valid_1's auc: 0.973303
Early stopping, best iteration is:
[1010]	training's auc: 1	valid_1's auc: 0.973289
3 FOLD Score:  0.9732891401542535
TYPE [4] - 4 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999985	valid_1's auc: 0.969103
[1000]	training's auc: 1	valid_1's auc: 0.973299
Early stopping, best iteration is:
[1175]	training's auc: 1	valid_1's auc: 0.974003
4 FOLD Score:  0.9740029971172902
TYPE [4] - 5 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999975	valid_1's auc: 0.971296
[1000]	training's auc: 0.999994	valid_1's auc: 0.97629
Early stopping, best iteration is:
[1076]	training's auc: 0.999994	valid_1's auc: 0.976497
5 FOLD Score:  0.9764970150021022
Total CV:  0.9752468665892817
Total CV2:  0.9750778226897338
(150870, 606)
564 ['C1', 'C10', 'C11', 'C12', 'C13', 'C14', 'C2', 'C3', 'C4', 'C6', 'C7', 'C8', 'D1', 'D10', 'D12', 'D13', 'D14', 'D15', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9', 'DeviceInfo', 'DeviceType', 'M4', 'P_emaildomain', 'ProductCD', 'R_emaildomain', 'TransactionAmt', 'V100', 'V101', 'V102', 'V103', 'V104', 'V105', 'V106', 'V107', 'V108', 'V109', 'V110', 'V111', 'V112', 'V113', 'V114', 'V115', 'V116', 'V117', 'V118', 'V119', 'V12', 'V120', 'V121', 'V122', 'V123', 'V124', 'V125', 'V126', 'V127', 'V128', 'V129', 'V13', 'V130', 'V131', 'V132', 'V133', 'V134', 'V135', 'V136', 'V137', 'V138', 'V139', 'V14', 'V140', 'V141', 

New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999823	valid_1's auc: 0.975522
Early stopping, best iteration is:
[879]	training's auc: 1	valid_1's auc: 0.97895
1 FOLD Score:  0.9789497220666014
TYPE [0, 2, 1, 3] - 2 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999831	valid_1's auc: 0.981335
[1000]	training's auc: 1	valid_1's auc: 0.984667
Early stopping, best iteration is:
[956]	training's auc: 1	valid_1's auc: 0.9845
2 FOLD Score:  0.9844996775646017
TYPE [0, 2, 1, 3] - 3 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999809	valid_1's auc: 0.981434
[1000]	training's auc: 1	valid_1's auc: 0.985122
Early stopping, best iteration is:
[1026]	training's auc: 1	valid_1's auc: 0.985174
3 FOLD Score:  0.9851737929484757
TYPE [0, 2, 1, 3] - 4 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999814	valid_1's auc: 0.979077
Early stopping, best iteration is:
[862]	training's auc: 1	valid_1's auc: 0.981684
4 FOLD Score:  0.9816837033539042
TYPE [0, 2, 1, 3] - 5 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999806	valid_1's auc: 0.98346
[1000]	training's auc: 1	valid_1's auc: 0.986538
Early stopping, best iteration is:
[926]	training's auc: 1	valid_1's auc: 0.986337
5 FOLD Score:  0.9863365521772796
Total CV:  0.9833286896221725
Total CV2:  0.9833307486712161


In [136]:
pd.DataFrame(oof_train,columns=[f'train_seed_{SEED}']).to_csv(f'../oof/goss_oof_train_seed_{SEED}.csv',index=False)
pd.DataFrame(oof_test,columns=[f'test_seed_{SEED}']).to_csv(f'../oof/goss_oof_test_seed_{SEED}.csv',index=False)

In [160]:
#card1_addr1 addr1_id_32 card1 card2 addr1_id_19 addr1_id_14
print(roc_auc_score(y_train, oof_train))
print(np.mean([c for c in cv_list.values()]))
print(cv_list)

0.9807238125970722
0.9787469534092774
{'[4]': 0.9742083722231305, '[0, 2, 1, 3]': 0.9832855345954242}


In [133]:
#card1_addr1 addr1_id_32 card1 card2 addr1_id_19 addr1_id_14
print(roc_auc_score(y_train, oof_train))
print(np.mean([c for c in cv_list.values()]))
print(cv_list)

0.9822726367443497
0.9802833773631142
{'[4]': 0.9768916615473957, '[0, 2, 1, 3]': 0.9836750931788327}


In [154]:
#card1_addr1 addr1_id_32 card1 card2 addr1_id_19 addr1_id_14
print(roc_auc_score(y_train, oof_train))
print(np.mean([c for c in cv_list.values()]))
print(cv_list)

0.9810973144554898
0.979067892895541
{'[4]': 0.9748050371198658, '[0, 2, 1, 3]': 0.9833307486712161}


In [134]:
fi = feature_importance_df.groupby(['Feature'])['Importance'].mean().reset_index()

fi = fi.sort_values('Importance',ascending=False)
fi.to_csv('../importance/goss.csv',index=False)
fi

Unnamed: 0,Feature,Importance
460,card1_addr1_make_date_D1_ProductCD_dist1_mean,5910.2
477,card1_make_date_D1_productCD_Amt_std,5408.6
478,card1_make_date_D1_productCD_hour_mean,5259.3
426,account_make_date_D1_fq_enc_combine,4829.9
430,addr1,4770.7
474,card1_make_date_D1_ProductCD_dist1_mean,4731.8
476,card1_make_date_D1_productCD_Amt_mean,4581.4
459,card1_addr1_make_date_D1_ProductCD_Amt_std,4230.1
514,id_02,4146.6
80,TransactionAmt,3944.5


In [135]:
sub = pd.read_csv('../input/sample_submission.csv')
sub['isFraud'] = oof_test
sub.to_csv('../output/goss.csv',index=False)

#### Seed Sampling

In [137]:
#must_del_col_list = ['DT_M','card1_addr1_maked1_pcd','card1_addr1_maked1_pcd_fq_enc_combine','C13_over_cf_sum','TransactionID','isFraud','TransactionDT','bank_type','uid','uid2','uid3','ymd','ym','is_D_max','addr1_null'] + delete_v_feature #+ M_feature

debug = False
oof_train = np.zeros(len(x_train))
oof_test = np.zeros(len(x_test))
NFOLD =5
SEED = 42

w_type = [x_train['ProductCD'].value_counts().index[0]]
other_type = list(x_train['ProductCD'].value_counts().index[1:].values)
product_type = list()
product_type.append(w_type)
product_type.append(other_type)

for seed in range(10):
    print(seed)
    param['seed'] = seed
    feature_importance_df = pd.DataFrame()
    cv_list ={}
    for p_type in product_type:
        x_train_type = x_train.loc[x_train['ProductCD'].isin(p_type)]
        y_train_type = x_train_type['isFraud']
        x_test_type = x_test.loc[x_test['ProductCD'].isin(p_type)]

        category_feature = category_feature_dict[str(p_type)].copy()
        must_del_col_list = delete_feature_dict[str(p_type)].copy()
        #param = org_param.copy()#lgbm_param_dict[str(p_type)]

        #must_del_col_list_temp = must_del_col_list.copy()
        #must_del_col_list_temp = must_del_col_list_temp + cat_combinefeature_dict[str(p_type)] 
        train_columns, _ = get_train_column(x_train_type, must_del_col_list)
        print(x_train_type.shape)
        print(len(train_columns), train_columns)
        kfold = StratifiedKFold(n_splits=NFOLD, shuffle=True, random_state=SEED)
        #split_groups = x_train_type['DT_M']
        #kfold = GroupKFold(n_splits=6)
        oof_train_type = np.zeros((x_train_type.shape[0],))
        oof_test_type = np.zeros((x_test_type.shape[0],))
        feature_importance_type = pd.DataFrame()
        type_score = 0
        for i, (train_index, cross_index) in enumerate(kfold.split(x_train_type,y_train_type),1):
            print(f"TYPE {p_type} - {i} FOLD Start")
            x_tr = x_train_type.iloc[train_index][train_columns]
            x_cr = x_train_type.iloc[cross_index][train_columns]
            y_tr = y_train_type.iloc[train_index]
            y_cr = y_train_type.iloc[cross_index]
            dtrain = lgb.Dataset(x_tr, label=y_tr, silent=True)
            dcross = lgb.Dataset(x_cr, label=y_cr, silent=True)

            clf = lgb.train(param, train_set=dtrain, num_boost_round=15000, valid_sets=[dtrain, dcross], 
                             early_stopping_rounds=100, verbose_eval=500, categorical_feature=category_feature)

            oof_train_type[cross_index] = clf.predict(x_cr)
            oof_test_type += clf.predict(x_test_type[train_columns])/NFOLD

            feature_importance = pd.DataFrame()
            feature_importance["Feature"] = x_tr.columns
            feature_importance["Importance"] = clf.feature_importance()
            feature_importance["FOLD"] = i
            feature_importance_type = pd.concat([feature_importance_type, feature_importance])
            cv_score = roc_auc_score(y_cr, oof_train_type[cross_index])
            type_score += cv_score / NFOLD
            print(f"{i} FOLD Score: ", cv_score)
            if debug is True:
                break

        print("Total CV: ", type_score)
        print("Total CV2: ", roc_auc_score(y_train_type, oof_train_type))
        cv_list[str(p_type)] = roc_auc_score(y_train_type, oof_train_type)
        oof_train[x_train_type.index] = oof_train_type
        oof_test[x_test_type.index] = oof_test_type
        feature_importance_type["TYPE"] = str(p_type)
        feature_importance_df = pd.concat([feature_importance_df, feature_importance_type])
    pd.DataFrame(oof_train,columns=[f'train_seed_{seed}']).to_csv(f'../oof/goss_oof_train_seed_{seed}.csv',index=False)
    pd.DataFrame(oof_test,columns=[f'test_seed_{seed}']).to_csv(f'../oof/goss_oof_test_seed_{seed}.csv',index=False)

0
(439670, 603)
325 ['C1', 'C11', 'C12', 'C13', 'C14', 'C2', 'C5', 'C6', 'C9', 'D1', 'D10', 'D11', 'D15', 'D2', 'D3', 'D4', 'D5', 'M1', 'M2', 'M3', 'M4', 'M5', 'M6', 'M7', 'M8', 'M9', 'P_emaildomain', 'TransactionAmt', 'V1', 'V10', 'V100', 'V101', 'V102', 'V103', 'V104', 'V105', 'V106', 'V107', 'V108', 'V109', 'V11', 'V110', 'V111', 'V112', 'V113', 'V114', 'V115', 'V116', 'V117', 'V118', 'V119', 'V12', 'V120', 'V121', 'V122', 'V123', 'V124', 'V125', 'V126', 'V127', 'V128', 'V129', 'V13', 'V130', 'V131', 'V132', 'V133', 'V134', 'V135', 'V136', 'V137', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V2', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V279', 'V28', 'V280', 'V281', 'V282', 'V283', 'V284', 'V285', 'V286', 'V287', 'V288', 'V289', 'V29', 'V290', 'V291', 'V292', 'V293', 'V294', 'V295', 'V296', 'V297', 'V298', 'V299', 'V3', 'V30', 'V300', 'V301', 'V302', 'V303', 'V304', 'V305', 'V306', 'V307', 'V308', 'V309', 'V31', 'V310', 'V311', 'V312', 'V313', 'V314', 'V315', 'V316', '

New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999878	valid_1's auc: 0.970224
[1000]	training's auc: 1	valid_1's auc: 0.975926
Early stopping, best iteration is:
[1120]	training's auc: 1	valid_1's auc: 0.976306
1 FOLD Score:  0.9763061914627875
TYPE [4] - 2 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999896	valid_1's auc: 0.973842
[1000]	training's auc: 1	valid_1's auc: 0.979371
Early stopping, best iteration is:
[1025]	training's auc: 1	valid_1's auc: 0.979423
2 FOLD Score:  0.9794231875775066
TYPE [4] - 3 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999884	valid_1's auc: 0.971387
[1000]	training's auc: 1	valid_1's auc: 0.975739
Early stopping, best iteration is:
[1083]	training's auc: 1	valid_1's auc: 0.976018
3 FOLD Score:  0.9760177619125641
TYPE [4] - 4 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999898	valid_1's auc: 0.970423
[1000]	training's auc: 1	valid_1's auc: 0.976125
Early stopping, best iteration is:
[1099]	training's auc: 1	valid_1's auc: 0.976308
4 FOLD Score:  0.9763081618448514
TYPE [4] - 5 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.99988	valid_1's auc: 0.971861
[1000]	training's auc: 1	valid_1's auc: 0.97765
Early stopping, best iteration is:
[1105]	training's auc: 1	valid_1's auc: 0.978092
5 FOLD Score:  0.9780920137919943
Total CV:  0.9772294633179408
Total CV2:  0.9771489918595836
(150870, 603)
561 ['C1', 'C10', 'C11', 'C12', 'C13', 'C14', 'C2', 'C3', 'C4', 'C6', 'C7', 'C8', 'D1', 'D10', 'D12', 'D13', 'D14', 'D15', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9', 'DeviceInfo', 'DeviceType', 'M4', 'P_emaildomain', 'ProductCD', 'R_emaildomain', 'TransactionAmt', 'V100', 'V101', 'V102', 'V103', 'V104', 'V105', 'V106', 'V107', 'V108', 'V109', 'V110', 'V111', 'V112', 'V113', 'V114', 'V115', 'V116', 'V117', 'V118', 'V119', 'V12', 'V120', 'V121', 'V122', 'V123', 'V124', 'V125', 'V126', 'V127', 'V128', 'V129', 'V13', 'V130', 'V131', 'V132', 'V133', 'V134', 'V135', 'V136', 'V137', 'V138', 'V139', 'V14', 'V140', 'V141', 'V142', 'V143',

New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999554	valid_1's auc: 0.975725
[1000]	training's auc: 1	valid_1's auc: 0.979892
Early stopping, best iteration is:
[943]	training's auc: 1	valid_1's auc: 0.979628
1 FOLD Score:  0.9796284516511878
TYPE [0, 2, 1, 3] - 2 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999579	valid_1's auc: 0.981407
[1000]	training's auc: 1	valid_1's auc: 0.984512
Early stopping, best iteration is:
[934]	training's auc: 1	valid_1's auc: 0.984355
2 FOLD Score:  0.9843548523474733
TYPE [0, 2, 1, 3] - 3 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999566	valid_1's auc: 0.981247
[1000]	training's auc: 1	valid_1's auc: 0.984923
Early stopping, best iteration is:
[949]	training's auc: 1	valid_1's auc: 0.984739
3 FOLD Score:  0.9847390869038083
TYPE [0, 2, 1, 3] - 4 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999584	valid_1's auc: 0.979134
[1000]	training's auc: 1	valid_1's auc: 0.982757
Early stopping, best iteration is:
[907]	training's auc: 1	valid_1's auc: 0.98239
4 FOLD Score:  0.9823898741230943
TYPE [0, 2, 1, 3] - 5 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999536	valid_1's auc: 0.983554
[1000]	training's auc: 1	valid_1's auc: 0.986658
Early stopping, best iteration is:
[991]	training's auc: 1	valid_1's auc: 0.986634
5 FOLD Score:  0.9866342246861969
Total CV:  0.9835492979423521
Total CV2:  0.9835519754968378
1
(439670, 603)
325 ['C1', 'C11', 'C12', 'C13', 'C14', 'C2', 'C5', 'C6', 'C9', 'D1', 'D10', 'D11', 'D15', 'D2', 'D3', 'D4', 'D5', 'M1', 'M2', 'M3', 'M4', 'M5', 'M6', 'M7', 'M8', 'M9', 'P_emaildomain', 'TransactionAmt', 'V1', 'V10', 'V100', 'V101', 'V102', 'V103', 'V104', 'V105', 'V106', 'V107', 'V108', 'V109', 'V11', 'V110', 'V111', 'V112', 'V113', 'V114', 'V115', 'V116', 'V117', 'V118', 'V119', 'V12', 'V120', 'V121', 'V122', 'V123', 'V124', 'V125', 'V126', 'V127', 'V128', 'V129', 'V13', 'V130', 'V131', 'V132', 'V133', 'V134', 'V135', 'V136', 'V137', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V2', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', '

New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999869	valid_1's auc: 0.970065
[1000]	training's auc: 1	valid_1's auc: 0.975188
Early stopping, best iteration is:
[1120]	training's auc: 1	valid_1's auc: 0.975726
1 FOLD Score:  0.9757260907316705
TYPE [4] - 2 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999894	valid_1's auc: 0.973857
[1000]	training's auc: 1	valid_1's auc: 0.97916
Early stopping, best iteration is:
[1012]	training's auc: 1	valid_1's auc: 0.979181
2 FOLD Score:  0.9791812555796364
TYPE [4] - 3 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999881	valid_1's auc: 0.971256
[1000]	training's auc: 1	valid_1's auc: 0.975524
Early stopping, best iteration is:
[1072]	training's auc: 1	valid_1's auc: 0.975834
3 FOLD Score:  0.9758341790955534
TYPE [4] - 4 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999899	valid_1's auc: 0.970444
[1000]	training's auc: 1	valid_1's auc: 0.976007
Early stopping, best iteration is:
[1075]	training's auc: 1	valid_1's auc: 0.976194
4 FOLD Score:  0.9761942977895774
TYPE [4] - 5 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999872	valid_1's auc: 0.971615
[1000]	training's auc: 1	valid_1's auc: 0.977766
Early stopping, best iteration is:
[1111]	training's auc: 1	valid_1's auc: 0.978329
5 FOLD Score:  0.9783294448873809
Total CV:  0.9770530536167638
Total CV2:  0.9769459383579838
(150870, 603)
561 ['C1', 'C10', 'C11', 'C12', 'C13', 'C14', 'C2', 'C3', 'C4', 'C6', 'C7', 'C8', 'D1', 'D10', 'D12', 'D13', 'D14', 'D15', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9', 'DeviceInfo', 'DeviceType', 'M4', 'P_emaildomain', 'ProductCD', 'R_emaildomain', 'TransactionAmt', 'V100', 'V101', 'V102', 'V103', 'V104', 'V105', 'V106', 'V107', 'V108', 'V109', 'V110', 'V111', 'V112', 'V113', 'V114', 'V115', 'V116', 'V117', 'V118', 'V119', 'V12', 'V120', 'V121', 'V122', 'V123', 'V124', 'V125', 'V126', 'V127', 'V128', 'V129', 'V13', 'V130', 'V131', 'V132', 'V133', 'V134', 'V135', 'V136', 'V137', 'V138', 'V139', 'V14', 'V140', 'V141', 'V142', 'V143

New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999573	valid_1's auc: 0.975567
[1000]	training's auc: 1	valid_1's auc: 0.979609
Early stopping, best iteration is:
[931]	training's auc: 1	valid_1's auc: 0.97938
1 FOLD Score:  0.9793803884364912
TYPE [0, 2, 1, 3] - 2 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999587	valid_1's auc: 0.981577
[1000]	training's auc: 1	valid_1's auc: 0.984959
Early stopping, best iteration is:
[941]	training's auc: 1	valid_1's auc: 0.984765
2 FOLD Score:  0.9847648755918514
TYPE [0, 2, 1, 3] - 3 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999577	valid_1's auc: 0.981519
[1000]	training's auc: 1	valid_1's auc: 0.985176
Early stopping, best iteration is:
[976]	training's auc: 1	valid_1's auc: 0.985077
3 FOLD Score:  0.9850767359385029
TYPE [0, 2, 1, 3] - 4 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999588	valid_1's auc: 0.978944
[1000]	training's auc: 1	valid_1's auc: 0.982461
Early stopping, best iteration is:
[932]	training's auc: 1	valid_1's auc: 0.982281
4 FOLD Score:  0.9822810209770779
TYPE [0, 2, 1, 3] - 5 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999556	valid_1's auc: 0.983306
[1000]	training's auc: 1	valid_1's auc: 0.986577
Early stopping, best iteration is:
[1002]	training's auc: 1	valid_1's auc: 0.986584
5 FOLD Score:  0.9865844077197552
Total CV:  0.9836174857327358
Total CV2:  0.9836461178790353
2
(439670, 603)
325 ['C1', 'C11', 'C12', 'C13', 'C14', 'C2', 'C5', 'C6', 'C9', 'D1', 'D10', 'D11', 'D15', 'D2', 'D3', 'D4', 'D5', 'M1', 'M2', 'M3', 'M4', 'M5', 'M6', 'M7', 'M8', 'M9', 'P_emaildomain', 'TransactionAmt', 'V1', 'V10', 'V100', 'V101', 'V102', 'V103', 'V104', 'V105', 'V106', 'V107', 'V108', 'V109', 'V11', 'V110', 'V111', 'V112', 'V113', 'V114', 'V115', 'V116', 'V117', 'V118', 'V119', 'V12', 'V120', 'V121', 'V122', 'V123', 'V124', 'V125', 'V126', 'V127', 'V128', 'V129', 'V13', 'V130', 'V131', 'V132', 'V133', 'V134', 'V135', 'V136', 'V137', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V2', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 

New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999874	valid_1's auc: 0.970806
[1000]	training's auc: 1	valid_1's auc: 0.975683
Early stopping, best iteration is:
[1064]	training's auc: 1	valid_1's auc: 0.975979
1 FOLD Score:  0.9759791822996589
TYPE [4] - 2 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999883	valid_1's auc: 0.973415
[1000]	training's auc: 1	valid_1's auc: 0.978855
Early stopping, best iteration is:
[1065]	training's auc: 1	valid_1's auc: 0.979052
2 FOLD Score:  0.9790523593465718
TYPE [4] - 3 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.99987	valid_1's auc: 0.970687
[1000]	training's auc: 1	valid_1's auc: 0.975169
Early stopping, best iteration is:
[1107]	training's auc: 1	valid_1's auc: 0.975624
3 FOLD Score:  0.9756240909835666
TYPE [4] - 4 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999898	valid_1's auc: 0.969659
[1000]	training's auc: 1	valid_1's auc: 0.975508
Early stopping, best iteration is:
[1071]	training's auc: 1	valid_1's auc: 0.975714
4 FOLD Score:  0.9757136951875547
TYPE [4] - 5 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999854	valid_1's auc: 0.971689
[1000]	training's auc: 1	valid_1's auc: 0.977134
Early stopping, best iteration is:
[1110]	training's auc: 1	valid_1's auc: 0.977807
5 FOLD Score:  0.9778074279785006
Total CV:  0.9768353511591705
Total CV2:  0.9767963040338773
(150870, 603)
561 ['C1', 'C10', 'C11', 'C12', 'C13', 'C14', 'C2', 'C3', 'C4', 'C6', 'C7', 'C8', 'D1', 'D10', 'D12', 'D13', 'D14', 'D15', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9', 'DeviceInfo', 'DeviceType', 'M4', 'P_emaildomain', 'ProductCD', 'R_emaildomain', 'TransactionAmt', 'V100', 'V101', 'V102', 'V103', 'V104', 'V105', 'V106', 'V107', 'V108', 'V109', 'V110', 'V111', 'V112', 'V113', 'V114', 'V115', 'V116', 'V117', 'V118', 'V119', 'V12', 'V120', 'V121', 'V122', 'V123', 'V124', 'V125', 'V126', 'V127', 'V128', 'V129', 'V13', 'V130', 'V131', 'V132', 'V133', 'V134', 'V135', 'V136', 'V137', 'V138', 'V139', 'V14', 'V140', 'V141', 'V142', 'V143

New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.99956	valid_1's auc: 0.976096
[1000]	training's auc: 1	valid_1's auc: 0.980138
Early stopping, best iteration is:
[947]	training's auc: 1	valid_1's auc: 0.979963
1 FOLD Score:  0.9799629553198692
TYPE [0, 2, 1, 3] - 2 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999594	valid_1's auc: 0.981487
[1000]	training's auc: 1	valid_1's auc: 0.984832
Early stopping, best iteration is:
[934]	training's auc: 1	valid_1's auc: 0.984624
2 FOLD Score:  0.984623536993059
TYPE [0, 2, 1, 3] - 3 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999576	valid_1's auc: 0.981245
[1000]	training's auc: 1	valid_1's auc: 0.984732
Early stopping, best iteration is:
[981]	training's auc: 1	valid_1's auc: 0.984641
3 FOLD Score:  0.9846413079948849
TYPE [0, 2, 1, 3] - 4 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999599	valid_1's auc: 0.979342
[1000]	training's auc: 1	valid_1's auc: 0.982926
Early stopping, best iteration is:
[916]	training's auc: 1	valid_1's auc: 0.982684
4 FOLD Score:  0.9826841785016497
TYPE [0, 2, 1, 3] - 5 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999523	valid_1's auc: 0.98366
[1000]	training's auc: 1	valid_1's auc: 0.986778
Early stopping, best iteration is:
[998]	training's auc: 1	valid_1's auc: 0.98678
5 FOLD Score:  0.9867801413775389
Total CV:  0.9837384240374003
Total CV2:  0.9837400562704034
3
(439670, 603)
325 ['C1', 'C11', 'C12', 'C13', 'C14', 'C2', 'C5', 'C6', 'C9', 'D1', 'D10', 'D11', 'D15', 'D2', 'D3', 'D4', 'D5', 'M1', 'M2', 'M3', 'M4', 'M5', 'M6', 'M7', 'M8', 'M9', 'P_emaildomain', 'TransactionAmt', 'V1', 'V10', 'V100', 'V101', 'V102', 'V103', 'V104', 'V105', 'V106', 'V107', 'V108', 'V109', 'V11', 'V110', 'V111', 'V112', 'V113', 'V114', 'V115', 'V116', 'V117', 'V118', 'V119', 'V12', 'V120', 'V121', 'V122', 'V123', 'V124', 'V125', 'V126', 'V127', 'V128', 'V129', 'V13', 'V130', 'V131', 'V132', 'V133', 'V134', 'V135', 'V136', 'V137', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V2', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V2

New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999884	valid_1's auc: 0.970798
[1000]	training's auc: 1	valid_1's auc: 0.976003
Early stopping, best iteration is:
[1062]	training's auc: 1	valid_1's auc: 0.976316
1 FOLD Score:  0.976315716692591
TYPE [4] - 2 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999883	valid_1's auc: 0.974466
[1000]	training's auc: 1	valid_1's auc: 0.979116
Early stopping, best iteration is:
[1055]	training's auc: 1	valid_1's auc: 0.979285
2 FOLD Score:  0.979285083084005
TYPE [4] - 3 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999885	valid_1's auc: 0.971113
[1000]	training's auc: 1	valid_1's auc: 0.975745
Early stopping, best iteration is:
[1098]	training's auc: 1	valid_1's auc: 0.976073
3 FOLD Score:  0.9760725779168962
TYPE [4] - 4 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999901	valid_1's auc: 0.970279
[1000]	training's auc: 1	valid_1's auc: 0.975335
Early stopping, best iteration is:
[1145]	training's auc: 1	valid_1's auc: 0.975773
4 FOLD Score:  0.9757726008760724
TYPE [4] - 5 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999862	valid_1's auc: 0.971917
[1000]	training's auc: 1	valid_1's auc: 0.977725
Early stopping, best iteration is:
[1078]	training's auc: 1	valid_1's auc: 0.978054
5 FOLD Score:  0.9780535480251026
Total CV:  0.9770999053189335
Total CV2:  0.9770250435560799
(150870, 603)
561 ['C1', 'C10', 'C11', 'C12', 'C13', 'C14', 'C2', 'C3', 'C4', 'C6', 'C7', 'C8', 'D1', 'D10', 'D12', 'D13', 'D14', 'D15', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9', 'DeviceInfo', 'DeviceType', 'M4', 'P_emaildomain', 'ProductCD', 'R_emaildomain', 'TransactionAmt', 'V100', 'V101', 'V102', 'V103', 'V104', 'V105', 'V106', 'V107', 'V108', 'V109', 'V110', 'V111', 'V112', 'V113', 'V114', 'V115', 'V116', 'V117', 'V118', 'V119', 'V12', 'V120', 'V121', 'V122', 'V123', 'V124', 'V125', 'V126', 'V127', 'V128', 'V129', 'V13', 'V130', 'V131', 'V132', 'V133', 'V134', 'V135', 'V136', 'V137', 'V138', 'V139', 'V14', 'V140', 'V141', 'V142', 'V143

New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999583	valid_1's auc: 0.975784
[1000]	training's auc: 1	valid_1's auc: 0.979907
Early stopping, best iteration is:
[952]	training's auc: 1	valid_1's auc: 0.979732
1 FOLD Score:  0.9797316641335581
TYPE [0, 2, 1, 3] - 2 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999592	valid_1's auc: 0.981558
[1000]	training's auc: 1	valid_1's auc: 0.984677
Early stopping, best iteration is:
[953]	training's auc: 1	valid_1's auc: 0.9845
2 FOLD Score:  0.984500476261313
TYPE [0, 2, 1, 3] - 3 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999573	valid_1's auc: 0.981593
[1000]	training's auc: 1	valid_1's auc: 0.984972
Early stopping, best iteration is:
[983]	training's auc: 1	valid_1's auc: 0.984879
3 FOLD Score:  0.9848790585024605
TYPE [0, 2, 1, 3] - 4 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999612	valid_1's auc: 0.979087
[1000]	training's auc: 1	valid_1's auc: 0.98248
Early stopping, best iteration is:
[919]	training's auc: 1	valid_1's auc: 0.982299
4 FOLD Score:  0.9822993602823331
TYPE [0, 2, 1, 3] - 5 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.99953	valid_1's auc: 0.983749
[1000]	training's auc: 1	valid_1's auc: 0.986996
Early stopping, best iteration is:
[982]	training's auc: 1	valid_1's auc: 0.986933
5 FOLD Score:  0.9869333877089159
Total CV:  0.9836687893777162
Total CV2:  0.983666775022663
4
(439670, 603)
325 ['C1', 'C11', 'C12', 'C13', 'C14', 'C2', 'C5', 'C6', 'C9', 'D1', 'D10', 'D11', 'D15', 'D2', 'D3', 'D4', 'D5', 'M1', 'M2', 'M3', 'M4', 'M5', 'M6', 'M7', 'M8', 'M9', 'P_emaildomain', 'TransactionAmt', 'V1', 'V10', 'V100', 'V101', 'V102', 'V103', 'V104', 'V105', 'V106', 'V107', 'V108', 'V109', 'V11', 'V110', 'V111', 'V112', 'V113', 'V114', 'V115', 'V116', 'V117', 'V118', 'V119', 'V12', 'V120', 'V121', 'V122', 'V123', 'V124', 'V125', 'V126', 'V127', 'V128', 'V129', 'V13', 'V130', 'V131', 'V132', 'V133', 'V134', 'V135', 'V136', 'V137', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V2', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V2

New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999874	valid_1's auc: 0.97064
[1000]	training's auc: 1	valid_1's auc: 0.975609
Early stopping, best iteration is:
[1097]	training's auc: 1	valid_1's auc: 0.975933
1 FOLD Score:  0.9759328826941936
TYPE [4] - 2 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999904	valid_1's auc: 0.973757
[1000]	training's auc: 1	valid_1's auc: 0.978906
Early stopping, best iteration is:
[1034]	training's auc: 1	valid_1's auc: 0.979
2 FOLD Score:  0.9789998599671427
TYPE [4] - 3 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999878	valid_1's auc: 0.970942
[1000]	training's auc: 1	valid_1's auc: 0.975693
Early stopping, best iteration is:
[1052]	training's auc: 1	valid_1's auc: 0.975937
3 FOLD Score:  0.9759370424180491
TYPE [4] - 4 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999901	valid_1's auc: 0.969688
[1000]	training's auc: 1	valid_1's auc: 0.975538
Early stopping, best iteration is:
[1099]	training's auc: 1	valid_1's auc: 0.975985
4 FOLD Score:  0.9759854327002346
TYPE [4] - 5 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.99986	valid_1's auc: 0.971447
[1000]	training's auc: 1	valid_1's auc: 0.977216
Early stopping, best iteration is:
[1064]	training's auc: 1	valid_1's auc: 0.977559
5 FOLD Score:  0.9775586339103997
Total CV:  0.976882770338004
Total CV2:  0.9768323551704292
(150870, 603)
561 ['C1', 'C10', 'C11', 'C12', 'C13', 'C14', 'C2', 'C3', 'C4', 'C6', 'C7', 'C8', 'D1', 'D10', 'D12', 'D13', 'D14', 'D15', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9', 'DeviceInfo', 'DeviceType', 'M4', 'P_emaildomain', 'ProductCD', 'R_emaildomain', 'TransactionAmt', 'V100', 'V101', 'V102', 'V103', 'V104', 'V105', 'V106', 'V107', 'V108', 'V109', 'V110', 'V111', 'V112', 'V113', 'V114', 'V115', 'V116', 'V117', 'V118', 'V119', 'V12', 'V120', 'V121', 'V122', 'V123', 'V124', 'V125', 'V126', 'V127', 'V128', 'V129', 'V13', 'V130', 'V131', 'V132', 'V133', 'V134', 'V135', 'V136', 'V137', 'V138', 'V139', 'V14', 'V140', 'V141', 'V142', 'V143',

New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999572	valid_1's auc: 0.975695
[1000]	training's auc: 1	valid_1's auc: 0.979705
Early stopping, best iteration is:
[928]	training's auc: 1	valid_1's auc: 0.979426
1 FOLD Score:  0.9794260968215409
TYPE [0, 2, 1, 3] - 2 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.99962	valid_1's auc: 0.981498
[1000]	training's auc: 1	valid_1's auc: 0.984853
Early stopping, best iteration is:
[938]	training's auc: 1	valid_1's auc: 0.984638
2 FOLD Score:  0.9846377906574451
TYPE [0, 2, 1, 3] - 3 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999591	valid_1's auc: 0.980962
[1000]	training's auc: 1	valid_1's auc: 0.98478
Early stopping, best iteration is:
[982]	training's auc: 1	valid_1's auc: 0.984723
3 FOLD Score:  0.9847232051268956
TYPE [0, 2, 1, 3] - 4 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999597	valid_1's auc: 0.978786
[1000]	training's auc: 1	valid_1's auc: 0.982604
Early stopping, best iteration is:
[905]	training's auc: 1	valid_1's auc: 0.982308
4 FOLD Score:  0.9823077773169058
TYPE [0, 2, 1, 3] - 5 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999531	valid_1's auc: 0.983501
[1000]	training's auc: 1	valid_1's auc: 0.986655
Early stopping, best iteration is:
[976]	training's auc: 1	valid_1's auc: 0.986592
5 FOLD Score:  0.9865917219936683
Total CV:  0.9835373183832912
Total CV2:  0.9835493100985911
5
(439670, 603)
325 ['C1', 'C11', 'C12', 'C13', 'C14', 'C2', 'C5', 'C6', 'C9', 'D1', 'D10', 'D11', 'D15', 'D2', 'D3', 'D4', 'D5', 'M1', 'M2', 'M3', 'M4', 'M5', 'M6', 'M7', 'M8', 'M9', 'P_emaildomain', 'TransactionAmt', 'V1', 'V10', 'V100', 'V101', 'V102', 'V103', 'V104', 'V105', 'V106', 'V107', 'V108', 'V109', 'V11', 'V110', 'V111', 'V112', 'V113', 'V114', 'V115', 'V116', 'V117', 'V118', 'V119', 'V12', 'V120', 'V121', 'V122', 'V123', 'V124', 'V125', 'V126', 'V127', 'V128', 'V129', 'V13', 'V130', 'V131', 'V132', 'V133', 'V134', 'V135', 'V136', 'V137', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V2', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', '

New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999881	valid_1's auc: 0.970288
[1000]	training's auc: 1	valid_1's auc: 0.975547
Early stopping, best iteration is:
[1047]	training's auc: 1	valid_1's auc: 0.975664
1 FOLD Score:  0.9756644096919369
TYPE [4] - 2 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999893	valid_1's auc: 0.973673
[1000]	training's auc: 1	valid_1's auc: 0.979145
Early stopping, best iteration is:
[1018]	training's auc: 1	valid_1's auc: 0.97925
2 FOLD Score:  0.9792502237031365
TYPE [4] - 3 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999874	valid_1's auc: 0.970797
[1000]	training's auc: 1	valid_1's auc: 0.975379
Early stopping, best iteration is:
[1051]	training's auc: 1	valid_1's auc: 0.975582
3 FOLD Score:  0.9755820617133344
TYPE [4] - 4 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999893	valid_1's auc: 0.970256
[1000]	training's auc: 1	valid_1's auc: 0.975759
Early stopping, best iteration is:
[1128]	training's auc: 1	valid_1's auc: 0.976373
4 FOLD Score:  0.9763730791102815
TYPE [4] - 5 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999855	valid_1's auc: 0.971718
[1000]	training's auc: 1	valid_1's auc: 0.977855
Early stopping, best iteration is:
[1065]	training's auc: 1	valid_1's auc: 0.978207
5 FOLD Score:  0.9782071974299351
Total CV:  0.9770153943297248
Total CV2:  0.9769420377194444
(150870, 603)
561 ['C1', 'C10', 'C11', 'C12', 'C13', 'C14', 'C2', 'C3', 'C4', 'C6', 'C7', 'C8', 'D1', 'D10', 'D12', 'D13', 'D14', 'D15', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9', 'DeviceInfo', 'DeviceType', 'M4', 'P_emaildomain', 'ProductCD', 'R_emaildomain', 'TransactionAmt', 'V100', 'V101', 'V102', 'V103', 'V104', 'V105', 'V106', 'V107', 'V108', 'V109', 'V110', 'V111', 'V112', 'V113', 'V114', 'V115', 'V116', 'V117', 'V118', 'V119', 'V12', 'V120', 'V121', 'V122', 'V123', 'V124', 'V125', 'V126', 'V127', 'V128', 'V129', 'V13', 'V130', 'V131', 'V132', 'V133', 'V134', 'V135', 'V136', 'V137', 'V138', 'V139', 'V14', 'V140', 'V141', 'V142', 'V143

New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999538	valid_1's auc: 0.975708
[1000]	training's auc: 1	valid_1's auc: 0.979847
Early stopping, best iteration is:
[935]	training's auc: 1	valid_1's auc: 0.979614
1 FOLD Score:  0.9796137377288499
TYPE [0, 2, 1, 3] - 2 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999589	valid_1's auc: 0.981104
[1000]	training's auc: 1	valid_1's auc: 0.984331
Early stopping, best iteration is:
[926]	training's auc: 1	valid_1's auc: 0.984073
2 FOLD Score:  0.984072927767943
TYPE [0, 2, 1, 3] - 3 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999584	valid_1's auc: 0.981369
[1000]	training's auc: 1	valid_1's auc: 0.985057
Early stopping, best iteration is:
[962]	training's auc: 1	valid_1's auc: 0.985001
3 FOLD Score:  0.9850007061554096
TYPE [0, 2, 1, 3] - 4 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999609	valid_1's auc: 0.979
[1000]	training's auc: 1	valid_1's auc: 0.982413
Early stopping, best iteration is:
[917]	training's auc: 1	valid_1's auc: 0.98218
4 FOLD Score:  0.982179571135193
TYPE [0, 2, 1, 3] - 5 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999547	valid_1's auc: 0.983469
[1000]	training's auc: 1	valid_1's auc: 0.986738
Early stopping, best iteration is:
[988]	training's auc: 1	valid_1's auc: 0.986684
5 FOLD Score:  0.9866840570187603
Total CV:  0.9835101999612311
Total CV2:  0.9835234057209783
6
(439670, 603)
325 ['C1', 'C11', 'C12', 'C13', 'C14', 'C2', 'C5', 'C6', 'C9', 'D1', 'D10', 'D11', 'D15', 'D2', 'D3', 'D4', 'D5', 'M1', 'M2', 'M3', 'M4', 'M5', 'M6', 'M7', 'M8', 'M9', 'P_emaildomain', 'TransactionAmt', 'V1', 'V10', 'V100', 'V101', 'V102', 'V103', 'V104', 'V105', 'V106', 'V107', 'V108', 'V109', 'V11', 'V110', 'V111', 'V112', 'V113', 'V114', 'V115', 'V116', 'V117', 'V118', 'V119', 'V12', 'V120', 'V121', 'V122', 'V123', 'V124', 'V125', 'V126', 'V127', 'V128', 'V129', 'V13', 'V130', 'V131', 'V132', 'V133', 'V134', 'V135', 'V136', 'V137', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V2', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', '

New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999874	valid_1's auc: 0.970441
[1000]	training's auc: 1	valid_1's auc: 0.975697
Early stopping, best iteration is:
[1082]	training's auc: 1	valid_1's auc: 0.976017
1 FOLD Score:  0.9760170114392186
TYPE [4] - 2 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999887	valid_1's auc: 0.973731
[1000]	training's auc: 1	valid_1's auc: 0.978892
Early stopping, best iteration is:
[1021]	training's auc: 1	valid_1's auc: 0.979021
2 FOLD Score:  0.9790214602295038
TYPE [4] - 3 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999881	valid_1's auc: 0.970207
[1000]	training's auc: 1	valid_1's auc: 0.975243
Early stopping, best iteration is:
[1110]	training's auc: 1	valid_1's auc: 0.975788
3 FOLD Score:  0.9757879177787117
TYPE [4] - 4 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999909	valid_1's auc: 0.969818
[1000]	training's auc: 1	valid_1's auc: 0.975322
Early stopping, best iteration is:
[1083]	training's auc: 1	valid_1's auc: 0.975657
4 FOLD Score:  0.975656581971378
TYPE [4] - 5 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999877	valid_1's auc: 0.971202
[1000]	training's auc: 1	valid_1's auc: 0.977276
Early stopping, best iteration is:
[1093]	training's auc: 1	valid_1's auc: 0.977717
5 FOLD Score:  0.9777165695191852
Total CV:  0.9768399081875995
Total CV2:  0.9767535225614218
(150870, 603)
561 ['C1', 'C10', 'C11', 'C12', 'C13', 'C14', 'C2', 'C3', 'C4', 'C6', 'C7', 'C8', 'D1', 'D10', 'D12', 'D13', 'D14', 'D15', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9', 'DeviceInfo', 'DeviceType', 'M4', 'P_emaildomain', 'ProductCD', 'R_emaildomain', 'TransactionAmt', 'V100', 'V101', 'V102', 'V103', 'V104', 'V105', 'V106', 'V107', 'V108', 'V109', 'V110', 'V111', 'V112', 'V113', 'V114', 'V115', 'V116', 'V117', 'V118', 'V119', 'V12', 'V120', 'V121', 'V122', 'V123', 'V124', 'V125', 'V126', 'V127', 'V128', 'V129', 'V13', 'V130', 'V131', 'V132', 'V133', 'V134', 'V135', 'V136', 'V137', 'V138', 'V139', 'V14', 'V140', 'V141', 'V142', 'V143

New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999548	valid_1's auc: 0.975527
[1000]	training's auc: 1	valid_1's auc: 0.979708
Early stopping, best iteration is:
[918]	training's auc: 1	valid_1's auc: 0.979339
1 FOLD Score:  0.9793387194685343
TYPE [0, 2, 1, 3] - 2 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999583	valid_1's auc: 0.981631
[1000]	training's auc: 1	valid_1's auc: 0.984791
Early stopping, best iteration is:
[923]	training's auc: 1	valid_1's auc: 0.984515
2 FOLD Score:  0.9845145456110732
TYPE [0, 2, 1, 3] - 3 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999582	valid_1's auc: 0.981267
[1000]	training's auc: 1	valid_1's auc: 0.984926
Early stopping, best iteration is:
[977]	training's auc: 1	valid_1's auc: 0.984869
3 FOLD Score:  0.9848690594340173
TYPE [0, 2, 1, 3] - 4 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999599	valid_1's auc: 0.979011
[1000]	training's auc: 1	valid_1's auc: 0.982775
Early stopping, best iteration is:
[917]	training's auc: 1	valid_1's auc: 0.982484
4 FOLD Score:  0.9824835981102529
TYPE [0, 2, 1, 3] - 5 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999533	valid_1's auc: 0.983697
[1000]	training's auc: 1	valid_1's auc: 0.986833
Early stopping, best iteration is:
[961]	training's auc: 1	valid_1's auc: 0.986783
5 FOLD Score:  0.9867829226455606
Total CV:  0.9835977690538877
Total CV2:  0.9836026973250236
7
(439670, 603)
325 ['C1', 'C11', 'C12', 'C13', 'C14', 'C2', 'C5', 'C6', 'C9', 'D1', 'D10', 'D11', 'D15', 'D2', 'D3', 'D4', 'D5', 'M1', 'M2', 'M3', 'M4', 'M5', 'M6', 'M7', 'M8', 'M9', 'P_emaildomain', 'TransactionAmt', 'V1', 'V10', 'V100', 'V101', 'V102', 'V103', 'V104', 'V105', 'V106', 'V107', 'V108', 'V109', 'V11', 'V110', 'V111', 'V112', 'V113', 'V114', 'V115', 'V116', 'V117', 'V118', 'V119', 'V12', 'V120', 'V121', 'V122', 'V123', 'V124', 'V125', 'V126', 'V127', 'V128', 'V129', 'V13', 'V130', 'V131', 'V132', 'V133', 'V134', 'V135', 'V136', 'V137', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V2', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', '

New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999894	valid_1's auc: 0.970789
[1000]	training's auc: 1	valid_1's auc: 0.975836
Early stopping, best iteration is:
[1034]	training's auc: 1	valid_1's auc: 0.976026
1 FOLD Score:  0.9760255077889007
TYPE [4] - 2 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999891	valid_1's auc: 0.973481
[1000]	training's auc: 1	valid_1's auc: 0.978954
Early stopping, best iteration is:
[1052]	training's auc: 1	valid_1's auc: 0.979242
2 FOLD Score:  0.9792417531388974
TYPE [4] - 3 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999884	valid_1's auc: 0.971277
[1000]	training's auc: 1	valid_1's auc: 0.975728
Early stopping, best iteration is:
[1074]	training's auc: 1	valid_1's auc: 0.975972
3 FOLD Score:  0.9759723030021128
TYPE [4] - 4 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999901	valid_1's auc: 0.970746
[1000]	training's auc: 1	valid_1's auc: 0.975875
Early stopping, best iteration is:
[1064]	training's auc: 1	valid_1's auc: 0.976172
4 FOLD Score:  0.9761720310122305
TYPE [4] - 5 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.99987	valid_1's auc: 0.971343
[1000]	training's auc: 1	valid_1's auc: 0.977469
Early stopping, best iteration is:
[1085]	training's auc: 1	valid_1's auc: 0.977896
5 FOLD Score:  0.9778959361477333
Total CV:  0.977061506217975
Total CV2:  0.9770392388981924
(150870, 603)
561 ['C1', 'C10', 'C11', 'C12', 'C13', 'C14', 'C2', 'C3', 'C4', 'C6', 'C7', 'C8', 'D1', 'D10', 'D12', 'D13', 'D14', 'D15', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9', 'DeviceInfo', 'DeviceType', 'M4', 'P_emaildomain', 'ProductCD', 'R_emaildomain', 'TransactionAmt', 'V100', 'V101', 'V102', 'V103', 'V104', 'V105', 'V106', 'V107', 'V108', 'V109', 'V110', 'V111', 'V112', 'V113', 'V114', 'V115', 'V116', 'V117', 'V118', 'V119', 'V12', 'V120', 'V121', 'V122', 'V123', 'V124', 'V125', 'V126', 'V127', 'V128', 'V129', 'V13', 'V130', 'V131', 'V132', 'V133', 'V134', 'V135', 'V136', 'V137', 'V138', 'V139', 'V14', 'V140', 'V141', 'V142', 'V143',

New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999553	valid_1's auc: 0.975833
[1000]	training's auc: 1	valid_1's auc: 0.979857
Early stopping, best iteration is:
[905]	training's auc: 1	valid_1's auc: 0.979449
1 FOLD Score:  0.9794487206290604
TYPE [0, 2, 1, 3] - 2 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999589	valid_1's auc: 0.981492
[1000]	training's auc: 1	valid_1's auc: 0.98462
Early stopping, best iteration is:
[955]	training's auc: 1	valid_1's auc: 0.984493
2 FOLD Score:  0.9844926736088259
TYPE [0, 2, 1, 3] - 3 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999589	valid_1's auc: 0.981426
[1000]	training's auc: 1	valid_1's auc: 0.985014
Early stopping, best iteration is:
[969]	training's auc: 1	valid_1's auc: 0.984914
3 FOLD Score:  0.9849141397195484
TYPE [0, 2, 1, 3] - 4 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999624	valid_1's auc: 0.978842
[1000]	training's auc: 1	valid_1's auc: 0.982669
Early stopping, best iteration is:
[924]	training's auc: 1	valid_1's auc: 0.982372
4 FOLD Score:  0.9823715348178391
TYPE [0, 2, 1, 3] - 5 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.99953	valid_1's auc: 0.983529
[1000]	training's auc: 1	valid_1's auc: 0.986552
Early stopping, best iteration is:
[1007]	training's auc: 1	valid_1's auc: 0.98657
5 FOLD Score:  0.9865697638058073
Total CV:  0.9835593665162163
Total CV2:  0.9836140034553
8
(439670, 603)
325 ['C1', 'C11', 'C12', 'C13', 'C14', 'C2', 'C5', 'C6', 'C9', 'D1', 'D10', 'D11', 'D15', 'D2', 'D3', 'D4', 'D5', 'M1', 'M2', 'M3', 'M4', 'M5', 'M6', 'M7', 'M8', 'M9', 'P_emaildomain', 'TransactionAmt', 'V1', 'V10', 'V100', 'V101', 'V102', 'V103', 'V104', 'V105', 'V106', 'V107', 'V108', 'V109', 'V11', 'V110', 'V111', 'V112', 'V113', 'V114', 'V115', 'V116', 'V117', 'V118', 'V119', 'V12', 'V120', 'V121', 'V122', 'V123', 'V124', 'V125', 'V126', 'V127', 'V128', 'V129', 'V13', 'V130', 'V131', 'V132', 'V133', 'V134', 'V135', 'V136', 'V137', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V2', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27'

New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999899	valid_1's auc: 0.970131
[1000]	training's auc: 1	valid_1's auc: 0.975249
Early stopping, best iteration is:
[1068]	training's auc: 1	valid_1's auc: 0.975616
1 FOLD Score:  0.9756157352499649
TYPE [4] - 2 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999903	valid_1's auc: 0.973881
[1000]	training's auc: 1	valid_1's auc: 0.979251
Early stopping, best iteration is:
[990]	training's auc: 1	valid_1's auc: 0.979157
2 FOLD Score:  0.9791570410254857
TYPE [4] - 3 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999893	valid_1's auc: 0.971319
[1000]	training's auc: 1	valid_1's auc: 0.975974
Early stopping, best iteration is:
[1116]	training's auc: 1	valid_1's auc: 0.976602
3 FOLD Score:  0.976602230845071
TYPE [4] - 4 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999909	valid_1's auc: 0.970017
[1000]	training's auc: 1	valid_1's auc: 0.975332
Early stopping, best iteration is:
[1118]	training's auc: 1	valid_1's auc: 0.975837
4 FOLD Score:  0.9758369616338444
TYPE [4] - 5 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999874	valid_1's auc: 0.971578
[1000]	training's auc: 1	valid_1's auc: 0.977253
Early stopping, best iteration is:
[1081]	training's auc: 1	valid_1's auc: 0.977716
5 FOLD Score:  0.9777161680922287
Total CV:  0.9769856273693189
Total CV2:  0.9768367779478018
(150870, 603)
561 ['C1', 'C10', 'C11', 'C12', 'C13', 'C14', 'C2', 'C3', 'C4', 'C6', 'C7', 'C8', 'D1', 'D10', 'D12', 'D13', 'D14', 'D15', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9', 'DeviceInfo', 'DeviceType', 'M4', 'P_emaildomain', 'ProductCD', 'R_emaildomain', 'TransactionAmt', 'V100', 'V101', 'V102', 'V103', 'V104', 'V105', 'V106', 'V107', 'V108', 'V109', 'V110', 'V111', 'V112', 'V113', 'V114', 'V115', 'V116', 'V117', 'V118', 'V119', 'V12', 'V120', 'V121', 'V122', 'V123', 'V124', 'V125', 'V126', 'V127', 'V128', 'V129', 'V13', 'V130', 'V131', 'V132', 'V133', 'V134', 'V135', 'V136', 'V137', 'V138', 'V139', 'V14', 'V140', 'V141', 'V142', 'V143

New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999552	valid_1's auc: 0.975756
[1000]	training's auc: 1	valid_1's auc: 0.979935
Early stopping, best iteration is:
[944]	training's auc: 1	valid_1's auc: 0.97966
1 FOLD Score:  0.9796603522949205
TYPE [0, 2, 1, 3] - 2 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999583	valid_1's auc: 0.981504
[1000]	training's auc: 1	valid_1's auc: 0.984597
Early stopping, best iteration is:
[949]	training's auc: 1	valid_1's auc: 0.98448
2 FOLD Score:  0.9844796640681632
TYPE [0, 2, 1, 3] - 3 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999579	valid_1's auc: 0.981168
[1000]	training's auc: 1	valid_1's auc: 0.985066
Early stopping, best iteration is:
[965]	training's auc: 1	valid_1's auc: 0.984943
3 FOLD Score:  0.9849425702505596
TYPE [0, 2, 1, 3] - 4 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999622	valid_1's auc: 0.979144
Early stopping, best iteration is:
[899]	training's auc: 1	valid_1's auc: 0.98211
4 FOLD Score:  0.9821100538022073
TYPE [0, 2, 1, 3] - 5 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999542	valid_1's auc: 0.983454
[1000]	training's auc: 1	valid_1's auc: 0.986625
Early stopping, best iteration is:
[979]	training's auc: 1	valid_1's auc: 0.986586
5 FOLD Score:  0.9865858214029484
Total CV:  0.98355569236376
Total CV2:  0.9835596736929269
9
(439670, 603)
325 ['C1', 'C11', 'C12', 'C13', 'C14', 'C2', 'C5', 'C6', 'C9', 'D1', 'D10', 'D11', 'D15', 'D2', 'D3', 'D4', 'D5', 'M1', 'M2', 'M3', 'M4', 'M5', 'M6', 'M7', 'M8', 'M9', 'P_emaildomain', 'TransactionAmt', 'V1', 'V10', 'V100', 'V101', 'V102', 'V103', 'V104', 'V105', 'V106', 'V107', 'V108', 'V109', 'V11', 'V110', 'V111', 'V112', 'V113', 'V114', 'V115', 'V116', 'V117', 'V118', 'V119', 'V12', 'V120', 'V121', 'V122', 'V123', 'V124', 'V125', 'V126', 'V127', 'V128', 'V129', 'V13', 'V130', 'V131', 'V132', 'V133', 'V134', 'V135', 'V136', 'V137', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V2', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V2

New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999874	valid_1's auc: 0.970336
[1000]	training's auc: 1	valid_1's auc: 0.975696
Early stopping, best iteration is:
[1037]	training's auc: 1	valid_1's auc: 0.975809
1 FOLD Score:  0.9758089188169192
TYPE [4] - 2 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999889	valid_1's auc: 0.97379
[1000]	training's auc: 1	valid_1's auc: 0.979543
Early stopping, best iteration is:
[1034]	training's auc: 1	valid_1's auc: 0.979659
2 FOLD Score:  0.9796591468245803
TYPE [4] - 3 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999871	valid_1's auc: 0.971222
[1000]	training's auc: 1	valid_1's auc: 0.975866
Early stopping, best iteration is:
[1091]	training's auc: 1	valid_1's auc: 0.976128
3 FOLD Score:  0.9761278857186935
TYPE [4] - 4 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.99991	valid_1's auc: 0.970584
[1000]	training's auc: 1	valid_1's auc: 0.976401
Early stopping, best iteration is:
[1054]	training's auc: 1	valid_1's auc: 0.976469
4 FOLD Score:  0.9764688243115678
TYPE [4] - 5 FOLD Start


New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'addr1', 'addr2']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999882	valid_1's auc: 0.971274
[1000]	training's auc: 1	valid_1's auc: 0.977302
Early stopping, best iteration is:
[1090]	training's auc: 1	valid_1's auc: 0.977758
5 FOLD Score:  0.9777578193762575
Total CV:  0.9771645190096037
Total CV2:  0.9771190932632602
(150870, 603)
561 ['C1', 'C10', 'C11', 'C12', 'C13', 'C14', 'C2', 'C3', 'C4', 'C6', 'C7', 'C8', 'D1', 'D10', 'D12', 'D13', 'D14', 'D15', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9', 'DeviceInfo', 'DeviceType', 'M4', 'P_emaildomain', 'ProductCD', 'R_emaildomain', 'TransactionAmt', 'V100', 'V101', 'V102', 'V103', 'V104', 'V105', 'V106', 'V107', 'V108', 'V109', 'V110', 'V111', 'V112', 'V113', 'V114', 'V115', 'V116', 'V117', 'V118', 'V119', 'V12', 'V120', 'V121', 'V122', 'V123', 'V124', 'V125', 'V126', 'V127', 'V128', 'V129', 'V13', 'V130', 'V131', 'V132', 'V133', 'V134', 'V135', 'V136', 'V137', 'V138', 'V139', 'V14', 'V140', 'V141', 'V142', 'V143

New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999557	valid_1's auc: 0.975507
[1000]	training's auc: 1	valid_1's auc: 0.979722
Early stopping, best iteration is:
[976]	training's auc: 1	valid_1's auc: 0.979617
1 FOLD Score:  0.9796166252209162
TYPE [0, 2, 1, 3] - 2 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999565	valid_1's auc: 0.981168
[1000]	training's auc: 1	valid_1's auc: 0.984575
Early stopping, best iteration is:
[935]	training's auc: 1	valid_1's auc: 0.984395
2 FOLD Score:  0.9843954015651231
TYPE [0, 2, 1, 3] - 3 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.99958	valid_1's auc: 0.98122
[1000]	training's auc: 1	valid_1's auc: 0.984818
Early stopping, best iteration is:
[981]	training's auc: 1	valid_1's auc: 0.984765
3 FOLD Score:  0.9847653363784157
TYPE [0, 2, 1, 3] - 4 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999602	valid_1's auc: 0.979198
Early stopping, best iteration is:
[891]	training's auc: 1	valid_1's auc: 0.982403
4 FOLD Score:  0.9824032369334561
TYPE [0, 2, 1, 3] - 5 FOLD Start


New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['P_emaildomain', 'R_emaildomain', 'addr1', 'addr2', 'id_13', 'id_15', 'id_30_v', 'id_31_v', 'id_33', 'id_37', 'id_38']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 100 rounds.
[500]	training's auc: 0.999549	valid_1's auc: 0.983447
[1000]	training's auc: 1	valid_1's auc: 0.986567
Early stopping, best iteration is:
[972]	training's auc: 1	valid_1's auc: 0.986501
5 FOLD Score:  0.9865009235807427
Total CV:  0.9835363047357308
Total CV2:  0.9834792404775533


Total CV2:  0.974203181381347, Total CV2:  0.9831092711580688