# exp35

下記のStatedate特徴量を作成
https://www.kaggle.com/code/thedevastator/amex-features-the-best-of-both-worlds



In [1]:
# ====================================================
# Library
# ====================================================
import gc
import warnings
warnings.filterwarnings('ignore')
import scipy as sp
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
from tqdm.auto import tqdm
import itertools


In [3]:

# ====================================================
# Get the difference
# ====================================================
def get_difference(data, num_features):
    df1 = []
    customer_ids = []
    for customer_id, df in tqdm(data.groupby(['customer_ID'])):
        # Get the differences
        diff_df1 = df[num_features].diff(1).iloc[[-1]].values.astype(np.float32)
        # Append to lists
        df1.append(diff_df1)
        customer_ids.append(customer_id)
    # Concatenate
    df1 = np.concatenate(df1, axis = 0)
    # Transform to dataframe
    df1 = pd.DataFrame(df1, columns = [col + '_diff1' for col in df[num_features].columns])
    # Add customer id
    df1['customer_ID'] = customer_ids
    return df1

# ====================================================
# Read & preprocess data and save it to disk
# ====================================================
def read_preprocess_data():
    train = pd.read_parquet('../input/AMEXdata-integerdtypes-parquetformat/train.parquet')
    features = train.drop(['customer_ID', 'S_2'], axis = 1).columns.to_list()
    cat_features = [
        "B_30",
        "B_38",
        "D_114",
        "D_116",
        "D_117",
        "D_120",
        "D_126",
        "D_63",
        "D_64",
        "D_66",
        "D_68",
    ]
    num_features = [col for col in features if col not in cat_features]
    print('Starting training feature engineer...')
    train_num_agg = train.groupby("customer_ID")[num_features].agg(['mean', 'std', 'min', 'max', 'last'])
    train_num_agg.columns = ['_'.join(x) for x in train_num_agg.columns]
    train_num_agg.reset_index(inplace = True)
    train_cat_agg = train.groupby("customer_ID")[cat_features].agg(['count', 'last', 'nunique'])
    train_cat_agg.columns = ['_'.join(x) for x in train_cat_agg.columns]
    train_cat_agg.reset_index(inplace = True)
    train_labels = pd.read_csv('../input/amex-default-prediction/train_labels.csv')
    # Transform float64 columns to float32
    cols = list(train_num_agg.dtypes[train_num_agg.dtypes == 'float64'].index)
    for col in tqdm(cols):
        train_num_agg[col] = train_num_agg[col].astype(np.float32)
    # Transform int64 columns to int32
    cols = list(train_cat_agg.dtypes[train_cat_agg.dtypes == 'int64'].index)
    for col in tqdm(cols):
        train_cat_agg[col] = train_cat_agg[col].astype(np.int32)
    # Get the difference
    train_diff = get_difference(train, num_features)
    train = train_num_agg.merge(train_cat_agg, how = 'inner', on = 'customer_ID').merge(train_diff, how = 'inner', on = 'customer_ID').merge(train_labels, how = 'inner', on = 'customer_ID')
    del train_num_agg, train_cat_agg, train_diff
    gc.collect()
    
    test = pd.read_parquet('../input/AMEXdata-integerdtypes-parquetformat/test.parquet')
    print('Starting test feature engineer...')
    test_num_agg = test.groupby("customer_ID")[num_features].agg(['mean', 'std', 'min', 'max', 'last'])
    test_num_agg.columns = ['_'.join(x) for x in test_num_agg.columns]
    test_num_agg.reset_index(inplace = True)
    test_cat_agg = test.groupby("customer_ID")[cat_features].agg(['count', 'last', 'nunique'])
    test_cat_agg.columns = ['_'.join(x) for x in test_cat_agg.columns]
    test_cat_agg.reset_index(inplace = True)
    # Transform float64 columns to float32
    cols = list(test_num_agg.dtypes[test_num_agg.dtypes == 'float64'].index)
    for col in tqdm(cols):
        test_num_agg[col] = test_num_agg[col].astype(np.float32)
    # Transform int64 columns to int32
    cols = list(test_cat_agg.dtypes[test_cat_agg.dtypes == 'int64'].index)
    for col in tqdm(cols):
        test_cat_agg[col] = test_cat_agg[col].astype(np.int32)
    # Get the difference
    test_diff = get_difference(test, num_features)
    test = test_num_agg.merge(test_cat_agg, how = 'inner', on = 'customer_ID').merge(test_diff, how = 'inner', on = 'customer_ID')
    del test_num_agg, test_cat_agg, test_diff
    gc.collect()
    # Save files to disk
    train.to_parquet('../input/best of both worlds/train_fe.parquet')
    test.to_parquet('../input/best of both worlds/test_fe.parquet')

# Read & Preprocess Data
read_preprocess_data()

Starting training feature engineer...


KeyboardInterrupt: 

# Training & Inference

In [3]:
# ====================================================
# Library
# ====================================================
import os
import gc
import warnings
warnings.filterwarnings('ignore')
import random
import scipy as sp
import numpy as np
import pandas as pd
import joblib
import itertools
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
from tqdm.auto import tqdm
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.preprocessing import LabelEncoder
import lightgbm as lgb
from itertools import combinations

import pickle

# ====================================================
# Configurations
# ====================================================
class CFG:
    
    
    input_dir = '../feature/exp03_amex-fe/'
    output_dir = '../output/exp33_statedate_lgb/'
    seed = 42
    n_folds = 5
    target = 'target'
    boosting_type = 'dart'
    metric = 'binary_logloss'
    model = "lgb"
    ver = "exp33"

# ====================================================
# Seed everything
# ====================================================
def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)

# ====================================================
# Read data
# ====================================================
def read_data():
    train = pd.read_parquet(CFG.input_dir + 'train_fe_plus_plus.parquet')
    test = pd.read_parquet(CFG.input_dir + 'test_fe_plus_plus.parquet')
    return train, test

# ====================================================
# Amex metric
# ====================================================
def amex_metric(y_true, y_pred):
    labels = np.transpose(np.array([y_true, y_pred]))
    labels = labels[labels[:, 1].argsort()[::-1]]
    weights = np.where(labels[:,0]==0, 20, 1)
    cut_vals = labels[np.cumsum(weights) <= int(0.04 * np.sum(weights))]
    top_four = np.sum(cut_vals[:,0]) / np.sum(labels[:,0])
    gini = [0,0]
    for i in [1,0]:
        labels = np.transpose(np.array([y_true, y_pred]))
        labels = labels[labels[:, i].argsort()[::-1]]
        weight = np.where(labels[:,0]==0, 20, 1)
        weight_random = np.cumsum(weight / np.sum(weight))
        total_pos = np.sum(labels[:, 0] *  weight)
        cum_pos_found = np.cumsum(labels[:, 0] * weight)
        lorentz = cum_pos_found / total_pos
        gini[i] = np.sum((lorentz - weight_random) * weight)
    return 0.5 * (gini[1]/gini[0] + top_four)

# ====================================================
# LGBM amex metric
# ====================================================
def lgb_amex_metric(y_pred, y_true):
    y_true = y_true.get_label()
    return 'amex_metric', amex_metric(y_true, y_pred), True


In [4]:
# seed_everything(CFG.seed)
# train, test = read_data()


# date_train = pd.read_pickle('../feature/exp18_3_tsfresh/train_variation_coefficient.pkl')
# date_test = pd.read_pickle('../feature/exp18_3_tsfresh/test_variation_coefficient.pkl')

# train = train.merge(date_train,on = "customer_ID",how = "left")
# test = test.merge(date_test,on = "customer_ID",how = "left")

# del date_train,date_test
# gc.collect

train = pd.read_pickle('../feature/exp30_statementdate/statedate_train.pkl')
test0 = pd.read_pickle('../feature/exp30_statementdate/statedate_test0.pkl')
test1 = pd.read_pickle('../feature/exp30_statementdate/statedate_test1.pkl')
test2 = pd.read_pickle('../feature/exp30_statementdate/statedate_test2.pkl')
test3 = pd.read_pickle('../feature/exp30_statementdate/statedate_test3.pkl')

test = pd.concat([test0,test1,test2,test3],axis = 0)


del test0,test1,test2,test3
gc.collect()

5259

In [5]:
test.head()

Unnamed: 0,S_2,LT,B_11_last,B_11_first,S_12_last,S_12_first,D_46_last,D_46_first,S_27_last,S_27_first,D_53_last,D_53_first,S_20_last,S_20_first,B_39_last,B_39_first,R_19_last,R_19_first,B_13_last,B_13_first,D_45_last,D_45_first,B_12_last,B_12_first,B_28_last,B_28_first,S_16_last,S_16_first,B_15_last,B_15_first,B_3_last,B_3_first,R_26_last,R_26_first,D_140_last,D_140_first,R_2_last,R_2_first,D_125_last,D_125_first,D_127_last,D_127_first,R_11_last,R_11_first,R_12_last,R_12_first,R_4_last,R_4_first,D_132_last,D_132_first,R_8_last,R_8_first,B_1_last,B_1_first,S_6_last,S_6_first,D_47_last,D_47_first,S_8_last,S_8_first,B_32_last,B_32_first,D_83_last,D_83_first,D_71_last,D_71_first,R_9_last,R_9_first,B_22_last,B_22_first,D_75_last,D_75_first,B_16_last,B_16_first,D_133_last,D_133_first,D_73_last,D_73_first,R_3_last,R_3_first,D_96_last,D_96_first,D_119_last,D_119_first,D_122_last,D_122_first,S_26_last,S_26_first,D_106_last,D_106_first,R_6_last,R_6_first,D_138_last,D_138_first,S_11_last,S_11_first,D_72_last,D_72_first,B_14_last,B_14_first,B_33_last,B_33_first,B_2_last,B_2_first,D_76_last,D_76_first,B_37_last,B_37_first,D_81_last,D_81_first,B_20_last,B_20_first,D_70_last,D_70_first,D_121_last,D_121_first,D_86_last,D_86_first,B_21_last,B_21_first,B_5_last,B_5_first,B_19_last,B_19_first,D_65_last,D_65_first,R_7_last,R_7_first,B_17_last,B_17_first,D_124_last,D_124_first,P_4_last,P_4_first,P_3_last,P_3_first,B_42_last,B_42_first,S_13_last,S_13_first,S_22_last,S_22_first,D_59_last,D_59_first,B_41_last,B_41_first,S_24_last,S_24_first,S_5_last,S_5_first,D_49_last,D_49_first,D_78_last,D_78_first,B_26_last,B_26_first,D_142_last,D_142_first,D_69_last,D_69_first,D_54_last,D_54_first,R_10_last,R_10_first,D_56_last,D_56_first,B_24_last,B_24_first,S_19_last,S_19_first,B_8_last,B_8_first,S_7_last,S_7_first,D_50_last,D_50_first,D_112_last,D_112_first,D_79_last,D_79_first,D_42_last,D_42_first,R_1_last,R_1_first,D_60_last,D_60_first,D_134_last,D_134_first,P_2_last,P_2_first,D_131_last,D_131_first,B_23_last,B_23_first,D_80_last,D_80_first,D_82_last,D_82_first,D_41_last,D_41_first,B_6_last,B_6_first,D_77_last,D_77_first,D_105_last,D_105_first,D_91_last,D_91_first,D_58_last,D_58_first,B_9_last,B_9_first,D_55_last,D_55_first,R_15_last,R_15_first,D_48_last,D_48_first,B_4_last,B_4_first,D_39_last,D_39_first,D_62_last,D_62_first,D_43_last,D_43_first,R_13_last,R_13_first,R_14_last,R_14_first,S_3_last,S_3_first,B_10_last,B_10_first,R_20_last,R_20_first,R_5_last,R_5_first,D_44_last,D_44_first,D_52_last,D_52_first,R_27_last,R_27_first,B_18_last,B_18_first,D_61_last,D_61_first,B_36_last,B_36_first,...,B_26_last_first_div,D_142_last_first_div,D_142_last_mean_sub,D_142_last_mean_div,D_142_last_max_div,D_69_last_first_div,D_69_last_mean_sub,D_69_last_mean_div,D_54_last_first_div,D_54_last_mean_sub,D_54_last_mean_div,R_10_last_first_div,R_10_last_mean_sub,R_10_last_mean_div,R_10_last_max_div,D_56_last_first_div,B_24_last_first_div,B_24_last_mean_sub,B_24_last_mean_div,B_24_last_max_div,S_19_last_first_div,B_8_last_first_div,B_8_last_mean_sub,B_8_last_mean_div,B_8_last_max_div,S_7_last_first_div,S_7_last_mean_sub,S_7_last_mean_div,S_7_last_max_div,S_7_last_min_div,B_11-P_2_last_first_div,B_11-P_2_last_mean_sub,B_11-P_2_last_mean_div,B_11-P_2_last_max_div,B_11-P_2_last_min_div,D_50_last_first_div,D_50_last_mean_sub,D_50_last_mean_div,D_50_last_max_div,D_50_last_min_div,D_112_last_first_div,D_112_last_mean_sub,D_112_last_mean_div,D_112_last_max_div,D_112_last_min_div,D_79_last_first_div,D_42_last_first_div,D_42_last_mean_sub,D_42_last_mean_div,D_42_last_max_div,D_42_last_min_div,R_1_last_first_div,R_1_last_mean_sub,R_1_last_mean_div,R_1_last_max_div,R_1_last_min_div,D_60_last_first_div,D_60_last_mean_sub,D_60_last_mean_div,D_60_last_max_div,D_60_last_min_div,D_134_last_first_div,D_134_last_mean_sub,D_134_last_mean_div,D_134_last_max_div,P_2_last_first_div,P_2_last_mean_sub,P_2_last_mean_div,P_2_last_max_div,P_2_last_min_div,D_131_last_first_div,D_131_last_mean_sub,D_131_last_mean_div,D_131_last_max_div,B_23_last_first_div,B_23_last_mean_sub,B_23_last_mean_div,B_23_last_max_div,D_80_last_first_div,D_80_last_mean_sub,D_80_last_mean_div,D_80_last_max_div,D_82_last_first_div,D_82_last_mean_sub,D_82_last_mean_div,D_82_last_max_div,D_41_last_first_div,D_41_last_mean_sub,D_41_last_mean_div,D_41_last_max_div,D_41_last_min_div,B_6_last_first_div,B_6_last_mean_sub,B_6_last_mean_div,B_6_last_max_div,D_77_last_first_div,D_77_last_mean_sub,D_77_last_mean_div,D_77_last_max_div,D_105_last_first_div,D_91_last_first_div,D_91_last_mean_sub,D_91_last_mean_div,D_91_last_max_div,D_58_last_first_div,D_58_last_mean_sub,D_58_last_mean_div,D_58_last_max_div,D_58_last_min_div,B_9_last_first_div,B_9_last_mean_sub,B_9_last_mean_div,B_9_last_max_div,B_9_last_min_div,B_17-P_2_last_first_div,B_17-P_2_last_mean_sub,B_17-P_2_last_mean_div,B_17-P_2_last_max_div,B_17-P_2_last_min_div,D_55_last_first_div,D_55_last_mean_sub,D_55_last_mean_div,D_55_last_max_div,D_55_last_min_div,R_15_last_first_div,R_15_last_mean_sub,R_15_last_mean_div,D_48_last_first_div,D_48_last_mean_sub,D_48_last_mean_div,D_48_last_max_div,D_48_last_min_div,B_14-P_2_last_first_div,B_14-P_2_last_mean_sub,B_14-P_2_last_mean_div,B_14-P_2_last_max_div,B_14-P_2_last_min_div,B_4_last_first_div,B_4_last_mean_sub,B_4_last_mean_div,B_4_last_max_div,B_4_last_min_div,D_39_last_first_div,D_39_last_mean_sub,D_39_last_mean_div,D_39_last_max_div,D_39_last_min_div,D_62_last_first_div,D_62_last_mean_sub,D_62_last_mean_div,D_43_last_first_div,D_43_last_mean_sub,D_43_last_mean_div,D_43_last_max_div,R_13_last_first_div,R_14_last_first_div,R_14_last_mean_sub,R_14_last_mean_div,R_14_last_max_div,S_3_last_first_div,S_3_last_mean_sub,S_3_last_mean_div,S_3_last_max_div,S_3_last_min_div,B_10_last_first_div,B_10_last_mean_sub,B_10_last_mean_div,B_10_last_max_div,R_20_last_first_div,R_20_last_mean_sub,R_20_last_mean_div,R_20_last_max_div,R_5_last_first_div,D_44_last_first_div,D_44_last_mean_sub,D_44_last_mean_div,D_44_last_max_div,S_16-P_3_last_first_div,S_16-P_3_last_mean_sub,S_16-P_3_last_mean_div,S_16-P_3_last_max_div,S_16-P_3_last_min_div,D_52_last_first_div,R_27_last_first_div,R_27_last_mean_sub,R_27_last_mean_div,R_27_last_max_div,R_27_last_min_div,B_18_last_first_div,B_18_last_mean_sub,B_18_last_mean_div,B_18_last_max_div,S_23-P_3_last_first_div,S_23-P_3_last_mean_sub,S_23-P_3_last_mean_div,S_23-P_3_last_max_div,S_23-P_3_last_min_div,SDist_last_first_div,SDist_last_mean_sub,SDist_last_mean_div,SDist_last_max_div,SDist_last_min_div,D_61_last_first_div,D_61_last_mean_sub,D_61_last_mean_div,D_61_last_max_div,B_17-P_3_last_first_div,B_17-P_3_last_mean_sub,B_17-P_3_last_mean_div,B_17-P_3_last_max_div,B_17-P_3_last_min_div,B_36_last_first_div,B_40_last_first_div,B_40_last_mean_sub,B_40_last_mean_div,B_40_last_max_div,S_25_last_first_div,S_25_last_mean_sub,S_25_last_mean_div,S_25_last_max_div,S_25_last_min_div,B_7_last_first_div,B_25_last_first_div,B_25_last_mean_sub,B_25_last_mean_div,B_25_last_max_div,S_23_last_first_div,S_23_last_mean_sub,S_23_last_mean_div,S_23_last_max_div,S_23_last_min_div,D_141_last_first_div,D_141_last_mean_sub,D_141_last_mean_div,D_141_last_max_div,D_141_last_min_div,D_51_last_first_div,D_51_last_mean_sub,D_51_last_mean_div,D_145_last_first_div,D_145_last_mean_sub,D_145_last_mean_div,D_145_last_max_div,D_145_last_min_div,D_131-P_3_last_first_div,D_131-P_3_last_mean_sub,D_131-P_3_last_mean_div,D_131-P_3_last_max_div,D_131-P_3_last_min_div,D_130_last_first_div
0,2019-10-12,9.0,0.005188,0.004416,0.18765,0.190516,0.445881,0.442145,0.346436,0.004302,,,0,0,,,0,0,0.063506,0.028347,0.025111,0.004483,0.058696,0.119198,0.140051,0.133804,0.009269,0.002531,0.014091,0.010489,0.003576,0.002347,-1,-1,0,0,0,0,0,-1,0,0,0,0,1.0,1.0,0,0,,,0,0,0.010779,0.010728,0,1,0.489448,0.474383,1454,996,0,0,0,-1,0.281546,0.014684,-1,-1,0,0,3,3,0,0,0.006273,0.007082,0.178798,0.178862,1,1,0,0,0.253783,0.245074,1,-1,0.080587,0.006562,-1,-1,0.0,0.0,-1,-1,49,52,0,0,0.008621,0.010182,1,1,1.009347,0.814497,,,0.007948,0.00797,0,0,0,0,0,-1,0.199254,0.198171,0,0,0.006617,0.00999,0.050187,0.009517,0,0,0,0,0.0,0.0,,,1,-1,0.964865,0.959531,0.560515,0.597524,,,574,415,0.980107,0.981078,25,-1,0,0,0.957597,0.954664,0.011795,0.002771,-1,-1,0,0,0.009328,0.001656,,,0.008136,0.007157,1.0,1.0,0,0,,,0.007062,0.009799,0.003337,0.008999,1.00873,1.003925,0.159703,0.105587,,,1.0,1.0,0,-1,0.103745,0.113189,0.0,0.0,0.013835,0.027472,,,0.56893,0.631315,0.0,0.0,0.147274,1.134792,0,0,1,1,0.0,0.0,0.024945,0.174591,0.002916,0.002916,,,0,-1,0.38132,0.205898,0.005263,0.001013,0.458377,0.114564,0,0,0.517214,0.626467,9,9,4,0,0.049326,0.051811,0.007398,0.006272,0,0,0.0,0.0,0.149413,0.168651,0.033599,-0.002919,0,0,0,0,0,0,0.11593,0.119043,1.002286,1.007666,0.590569,0.592103,0.589184,0.588734,0.001118,0.008146,...,5.631588,,,,,1.136748,0.001903,1.305328,1.0,0.0,1.0,,0.0,,,,0.720673,0.001859,1.357209,0.720673,0.370766,1.004786,0.00209,1.002076,0.999434,1.51253,-0.010627,0.937611,0.760212,1.51253,0.899254,0.031458,0.947147,1.0,0.899254,,,,,,1.0,0.0,1.0,1.0,1.0,-0.0,0.916568,-0.014271,0.879078,0.827852,1.0,,0.0,,,,0.503614,-0.028781,0.324646,0.165215,1.0,,,,,0.901182,-0.032457,0.94603,0.901182,1.0,,0.0,,,0.129781,-0.106992,0.579213,0.129781,,0.0,,,1.0,0.0,1.0,1.0,,0.0,,,,0.142877,-0.193114,0.114396,0.017603,1.0,0.0,1.0,1.0,,-0.0,0.777778,-0.0,,1.851988,0.004103,1.010878,0.916028,1.851988,5.195462,0.001529,1.409699,0.62433,230.311035,,,,,,4.001069,0.141305,1.445657,1.0,4.001069,,0.0,,0.825604,-0.061065,0.894402,0.825604,1.014391,0.902076,0.02225,0.961807,1.0,0.902076,1.0,-0.888889,0.910112,0.818182,1.0,inf,1.777778,1.8,0.5,inf,0.952046,0.013865,1.390978,1.17949,0.000563,1.082354,1.0,,,0.0,,,0.885932,-0.039227,0.792053,0.55965,1.0,-11.511744,-0.00347,0.906388,0.531734,,0.0,,,,,0.0,,,0.928592,0.034102,0.941741,1.048847,0.874829,0.973849,0.994661,-0.004338,0.995691,0.993126,1.0,0.997409,-9.6e-05,0.999837,0.995286,0.927516,0.030641,0.932712,1.071118,0.848747,0.425811,-16.503333,0.440628,0.295455,1.0,1.000765,-0.008368,0.985997,0.945893,,,,,,0.137268,1.36972,0.007327,1.019898,0.931921,0.999531,-0.003287,0.996625,0.991875,1.000716,0.130682,0.783422,0.000835,1.182794,0.635041,1.00647,-0.000226,0.998336,0.972656,1.033223,,0.0,,,,inf,0.888889,9.0,-0.0,0.111111,-0.0,,-0.0,0.938063,0.031418,0.946923,1.047084,0.883804,
1,2019-04-15,13.0,0.002235,0.010088,0.19071,0.18439,0.445957,0.459191,0.590974,0.473703,,,0,0,,,0,0,0.058585,0.030294,0.099732,0.077769,0.054184,0.024187,0.008613,0.022622,0.005311,0.002202,0.005461,0.005224,0.011386,0.001597,-1,-1,0,0,0,0,0,0,0,0,0,0,1.0,1.0,0,0,,,0,0,0.016562,0.02097,0,0,0.362383,0.340429,2402,2402,0,0,0,0,0.010397,0.009871,-1,-1,0,0,0,0,1,0,0.002767,0.002226,,,0,0,0,0,0.318151,0.285856,4,4,0.020564,0.007747,-1,-1,0.0,0.0,-1,-1,8,9,0,0,0.010847,0.009963,1,1,1.009245,1.001803,,,0.012894,0.023071,0,0,0,0,0,0,0.85485,0.832572,0,0,0.004334,0.007812,0.135907,0.034031,0,0,0,0,0.0,0.0,,,15,15,0.0,0.0,0.525659,0.588535,,,290,290,0.92859,0.919222,34,28,0,0,0.939332,0.914135,0.009336,0.031653,-1,-1,0,0,0.005945,0.002583,,,0.001395,0.003717,1.0,1.0,0,0,0.063766,0.060207,0.008168,0.005298,0.008257,0.006256,1.00864,1.001762,0.081717,0.044619,,,1.0,1.0,0,0,,,0.0,0.0,0.685577,0.246552,,,0.841177,0.894195,0.0,0.0,0.004756,0.013559,0,0,-1,-1,0.0,0.0,0.18272,0.209445,0.302335,0.300189,,,0,0,0.006375,0.002946,0.017454,0.023032,0.053444,0.061875,0,0,0.041712,0.085902,1,2,4,11,0.330806,0.328897,,,0,0,0.0,0.0,0.112195,0.073243,0.298735,0.295178,0,0,0,0,0,0,0.143998,0.150332,1.000501,1.006772,1.0,1.0,0.046726,0.075594,0.007424,0.008936,...,2.301986,,,,,0.375431,-0.002309,0.376635,1.0,0.0,1.0,,0.0,,,1.059109,1.541876,0.002915,1.554773,0.913617,1.319841,1.006866,0.004434,1.004415,0.999267,1.831435,0.003084,1.039222,0.657507,2.533602,0.948915,-0.012492,1.015116,1.240181,0.948915,,,,,,1.0,0.0,1.0,1.0,1.0,,,,,,,,0.0,,,,2.780659,0.177752,1.350027,0.824643,2.806691,,,,,0.940709,-0.020988,0.975656,0.920828,1.058792,,0.0,,,0.350745,-0.018015,0.208854,0.067298,,0.0,,,1.0,0.0,1.0,1.0,,0.0,,,,0.872402,-0.013181,0.932717,0.856449,1.007149,-0.001855,0.993902,0.976727,,,-0.461538,0.0,0.0,2.163655,0.001103,1.209317,0.69139,11.199025,0.757796,-0.064183,0.213798,0.057577,1.0,,,,,,0.863743,-0.010015,0.842178,0.710104,1.048196,,0.0,,0.485576,-0.028433,0.594655,0.357169,9.262001,0.939041,-0.002403,1.002903,1.079592,0.939041,0.5,-3.384615,0.22807,0.083333,1.0,0.363636,-1.076923,0.787879,0.235294,inf,1.005803,0.002292,1.006977,,,,,,,0.0,,,1.53183,-0.005037,0.957032,0.648563,2.01054,1.012051,0.000685,1.002299,0.984856,,0.0,,,,,0.0,,,0.887461,0.027329,0.9501,1.354771,0.802818,0.957865,0.993772,-0.00406,0.995959,0.991657,1.0,1.0,0.0,1.0,1.0,0.871027,0.027623,0.933855,1.539094,0.748129,0.884376,-2.886923,0.903405,0.5625,3.857143,0.618124,-0.023327,0.667012,0.413069,,,,,,0.83077,0.220514,-0.019753,0.217742,0.137261,1.00134,0.000512,1.000525,0.996615,1.005149,0.495014,1.592462,-0.031538,0.457267,0.127024,0.963573,1.4e-05,1.000101,0.963573,1.035825,,0.0,,,,,-0.461538,0.0,,0.0,,,,0.893165,0.02761,0.950097,1.36303,0.803748,
2,2019-10-16,13.0,0.00338,0.021077,0.185771,0.207518,0.702994,0.568702,0.007752,0.538246,,,0,0,,,0,0,0.048129,0.075478,0.06881,0.04167,0.052914,0.055798,0.284616,0.108532,0.001589,0.002503,0.006497,0.045434,0.015938,0.011387,-1,-1,0,0,0,0,0,0,0,0,0,0,1.0,1.0,0,0,,,0,0,0.001484,0.031007,0,0,0.204331,0.182048,379,1511,0,0,0,0,0.044768,0.034079,-1,-1,0,0,1,0,0,0,0.002045,0.00637,,,0,0,0,0,0.079235,0.051375,2,2,0.00446,0.008197,-1,-1,0.0,0.0,-1,-1,27,26,0,0,0.004427,0.039006,1,1,0.810072,1.009582,,,0.005816,0.038614,0,0,0,0,1,0,0.279283,0.263979,0,0,0.007911,0.002904,0.004851,0.017333,0,0,0,0,0.0,0.0,,,7,7,0.0,0.0,0.566616,0.716934,,,0,711,0.041972,0.97982,12,13,0,0,0.044327,0.976557,0.004455,0.031101,-1,-1,0,0,0.000994,0.001231,0.150203,0.103164,0.009446,0.000674,1.0,1.0,0,0,,,0.009076,0.001805,0.005802,0.000936,1.009375,1.008227,0.170267,0.185183,,,1.0,1.0,0,0,,,0.0,0.0,0.003803,0.165777,,,0.697522,0.774276,0.0,0.0,0.088291,0.034808,0,0,1,1,0.0,0.0,0.058534,0.230487,0.482407,0.401189,0.287646,0.098993,0,0,0.272541,0.003275,0.008629,0.003626,0.418986,0.207001,0,0,0.522954,0.250472,32,13,0,19,0.520753,0.440895,0.105303,0.101805,0,0,0.0,0.0,0.166165,0.165945,0.129189,0.298906,0,0,0,0,0,0,0.235105,0.198149,1.004291,1.003013,0.585974,1.0,0.508047,0.342173,0.004671,0.008249,...,0.807494,1.455969,0.022103,1.172547,1.0,14.0158,0.005579,2.442976,1.0,0.0,1.0,,0.0,,,,5.028291,0.003648,1.672192,0.973044,6.201734,1.001138,0.005212,1.005191,1.0,0.919453,0.003629,1.021779,0.815375,2.05702,0.921592,0.045764,0.938148,1.03929,0.850081,,,,,,1.0,0.150476,1.17713,1.0,58.581104,,,,,,,,0.0,,,,0.02294,-0.088585,0.041162,0.008897,4.898614,,,,,0.900869,-0.051433,0.931328,0.835242,1.036264,,0.0,,,2.536546,0.038129,1.760095,0.875716,,0.0,,,1.0,0.0,1.0,1.0,,0.0,,,,0.253957,-0.082627,0.41466,0.242844,1.202443,0.005558,1.011656,0.984873,2.905735,,0.0,,,83.211845,0.140658,2.06654,0.983492,169.153915,2.379592,0.00303,1.541162,0.927852,6.108582,,,,,,2.024081,0.136941,1.485527,1.0,2.024081,,0.0,,2.087872,0.182251,1.534925,0.848282,5.983654,0.94264,0.041629,0.943341,1.032826,0.844776,2.461539,14.692308,1.848889,0.864865,10.666667,0.0,-6.0,0.0,0.0,,1.181127,0.102091,1.243851,1.034365,-0.025204,0.806879,0.508099,,,0.0,,,1.001325,0.004705,1.02914,0.940986,1.253056,0.432207,-0.062899,0.67255,0.432207,,0.0,,,,,0.0,,,0.790877,0.10099,0.848368,1.502905,0.600531,1.186509,1.001275,-0.001195,0.998812,0.994529,1.002841,0.585974,-0.160711,0.784767,0.585974,0.746905,0.100107,0.812416,1.783277,0.533701,1.441205,12.574615,1.400142,0.733333,44.0,1.484767,0.190622,1.600526,0.955562,,,,,,0.566228,2.412502,0.04116,1.739979,0.859505,0.996783,-0.003041,0.996876,0.992125,1.000531,2.042847,0.956397,0.002037,1.463528,0.733396,0.97506,-0.004097,0.970127,0.94598,1.009429,1.010858,0.008377,1.009436,0.999257,1.01869,,0.0,,1.0,0.0,1.0,1.0,1.0,0.790332,0.104204,0.844662,1.484586,0.600117,0.999089
3,2019-04-22,13.0,0.139062,0.276393,0.257537,0.180898,0.414899,0.460239,1.045025,0.328769,,,0,0,,,0,0,0.147453,0.0719,0.23748,0.204183,0.098741,0.079677,0.228848,0.304583,0.003946,0.00565,0.25289,0.319683,0.498516,0.960779,-1,-1,0,0,0,0,0,1,0,0,0,0,1.0,1.0,0,0,,,0,0,0.149511,0.269828,0,0,0.171934,0.142214,1511,1454,0,0,0,0,0.017329,0.012918,-1,-1,1,2,9,11,12,12,0.009377,0.000903,,,1,5,0,0,0.03297,0.210552,1,1,0.037242,0.005672,-1,-1,0.0,0.0,-1,-1,29,16,0,0,0.26962,0.418853,0,0,0.205678,0.033261,,,0.146753,0.270511,0,0,17,17,2,1,0.223874,0.198072,0,0,0.003213,0.007064,0.022947,0.02912,16,16,0,0,0.0,0.0,0.668001,1.007431,4,4,0.0,0.0,0.628619,0.794439,,,711,435,0.960584,0.961166,43,31,0,0,0.976037,0.98245,0.320184,0.083406,-1,-1,0,0,0.002596,0.002732,0.255263,0.202383,0.003026,0.008834,1.0,1.0,0,0,,,0.004525,0.004747,0.008854,0.000301,0.0,0.0,0.552729,0.461584,,,0.018601,0.003437,0,0,,,0.0,0.0,0.641422,0.632045,,,0.513186,0.514222,0.0,0.0,0.19784,0.29614,2,0,2,2,0.0,0.168233,0.023546,0.014307,0.047288,0.047288,0.224398,0.040548,0,0,0.516813,0.733553,0.185255,0.508892,0.5247,0.596307,0,0,0.60252,0.603837,19,29,11,14,0.016577,0.009098,0.211615,0.298828,0,0,0.0,0.0,0.1812,0.2871,0.032124,0.01383,0,0,0,0,2,4,0.078085,0.078031,1.001619,1.002589,0.14979,0.144555,0.845192,0.758866,0.002787,0.009784,...,0.950302,1.261289,0.021318,1.091126,0.989581,0.342554,-0.001522,0.665319,1.0,0.0,1.0,,0.0,,,,0.953199,0.000778,1.207722,0.490035,29.38974,,0.0,,,1.197463,-0.004346,0.992199,0.922948,1.197463,1.57308,-0.192603,2.06105,3.08047,1.0,,,,,,5.412107,0.006057,1.482867,0.880483,5.412107,,,,,,,,0.0,,,,1.014836,-0.031021,0.953868,0.873156,1.014836,,,,,0.997984,0.038458,1.081011,0.997984,1.197753,,0.0,,,0.668064,-0.108627,0.645552,0.57947,inf,1.461538,3.714286,1.0,1.0,0.0,1.0,1.0,0.0,-0.060098,0.0,0.0,,1.645738,0.008896,1.607247,1.0,1.0,0.0,1.0,1.0,5.53407,,0.0,,,0.704535,-0.081684,0.863518,0.704209,1.226093,0.364035,-0.264561,0.411846,0.364035,1.0,0.313895,-0.330399,0.319066,0.280197,1.0,0.879916,-0.029218,0.947252,0.879916,1.011064,,0.0,,0.997818,0.051342,1.093149,0.997818,1.217171,2.553912,-0.216118,8.873968,-7.653941,1.0,0.655172,-10.230769,0.65,0.575758,1.0,0.785714,-4.846154,0.694175,0.478261,1.571429,1.821983,0.001022,1.0657,0.70815,-0.08954,0.702677,0.515514,,,0.0,,,0.631139,-0.113208,0.615473,0.474046,1.0,2.322831,0.007848,1.323266,1.0,,0.0,,,,0.5,-1.0,0.666667,0.5,0.791938,-0.019269,1.031829,1.370917,0.791938,1.000682,0.999032,-0.002331,0.997679,0.993565,1.000742,1.036212,0.004771,1.032902,0.904522,0.749328,-0.016616,1.035052,1.511124,0.749328,0.720603,-9.733077,0.693283,0.423077,4.4,1.113756,0.120421,1.166151,1.0,0.184903,-0.309791,0.112788,0.074092,1.0,0.284826,0.528046,-0.130716,0.636011,0.466438,1.002049,0.004635,1.004763,1.0,1.008812,0.679042,0.36443,-0.141254,0.420803,0.36443,0.987994,0.001233,1.009017,0.98362,1.044457,1.020655,0.009923,1.010906,1.0,1.020655,,0.0,,1.0,0.0,1.0,1.0,1.0,0.791274,-0.017849,1.029224,1.352012,0.791274,
4,2019-10-22,13.0,0.514632,0.347868,0.193552,0.190245,0.472838,0.455125,0.006672,0.003453,,,0,0,,,0,0,0.006022,0.037534,0.06858,0.048111,0.016391,0.013177,0.093116,0.090952,0.007018,0.002469,0.006982,0.001664,0.830857,0.544705,1,-1,0,0,0,0,0,0,0,0,0,1,1.0,1.0,0,0,,,0,0,0.563603,0.381377,0,0,0.0397,0.021461,772,1898,0,0,0,0,0.009581,0.011987,-1,-1,3,2,5,6,12,12,0.00794,0.000998,,,0,3,0,0,0.214071,0.189683,1,1,0.006477,0.007998,-1,-1,0.0,0.0,-1,-1,16,16,0,0,0.157513,0.140309,0,0,0.038021,0.072926,,,0.558505,0.387497,0,0,17,17,0,2,0.167715,0.143163,0,0,0.005947,0.001145,0.011126,0.024733,75,33,0,0,0.0,0.0,0.977441,1.001244,1,1,0.0,0.0,0.569771,0.543929,,,0,0,0.964983,0.964508,29,28,0,0,0.960863,0.972881,0.064127,0.107583,-1,-1,2,0,1e-05,0.002929,,,0.009732,0.00141,1.0,1.0,1,0,,,0.008543,0.006918,0.001778,0.009142,1.0062,1.009943,0.11501,0.109025,0.039149,0.007209,1.0,1.0,0,0,,,0.503154,0.0,0.800289,0.696405,,,0.254478,0.286302,0.0,0.0,0.223866,0.170675,0,0,-1,-1,0.0,0.0,0.011244,0.021586,0.006478,0.01392,,,0,0,0.182204,0.37313,0.656698,0.591506,0.673749,0.573654,0,0,0.959607,0.747534,18,21,26,6,0.008594,0.009331,0.071884,0.289821,0,0,0.0,0.0,0.168317,0.156751,0.022026,0.031143,0,0,0,0,3,2,0.021903,0.003956,0.01108,1.007664,0.210521,0.210309,0.89365,0.817836,0.009777,0.001487,...,0.003306,,,,,6.900372,0.005556,2.330205,1.0,0.0,1.0,inf,0.384615,1.625,0.333333,,1.234857,0.003013,1.544721,0.912373,0.194428,0.996294,0.000835,1.00083,0.996294,1.05489,-0.008406,0.931891,0.760927,1.093986,4.22563,0.1651,2.736918,1.0,-2.963758,5.43059,0.013378,1.519115,0.685768,17.514296,1.0,0.0,1.0,1.0,1.0,,,,,,,inf,0.34788,3.240411,0.991822,inf,1.149172,0.045195,1.059854,0.932478,1.149172,,,,,0.888842,-0.069622,0.785183,0.597697,1.0,,0.0,,,1.311648,-0.005031,0.97802,0.685608,,0.0,,,1.0,0.0,1.0,1.0,,0.0,,,,0.520888,-0.006943,0.618235,0.422802,0.465396,-0.004071,0.614094,0.465396,,,0.0,,,0.488312,-0.075248,0.707719,0.488312,2.197711,1.110214,0.046227,1.075723,1.0,1.186648,1.01122,0.071844,1.11034,0.993428,1.50655,1.174486,0.0538,1.086782,1.0,1.174486,,0.0,,1.283698,0.145168,1.178243,1.0,1.762813,0.664172,0.085055,0.532716,1.0,0.324844,0.857143,-0.076923,0.995745,0.857143,1.125,4.333333,14.153846,2.194805,1.0,26.0,0.921037,0.000615,1.077039,0.248028,-0.155061,0.316746,0.193959,,,0.0,,,1.07379,0.00321,1.019443,0.962023,1.082569,0.707241,-0.002394,0.901962,0.566729,,0.0,,,,1.5,1.076923,1.56,1.0,1.039326,0.065857,0.895234,1.223291,0.647571,5.536021,0.010996,-0.613634,0.017736,0.010987,1.021216,1.001009,0.000229,1.00109,0.987359,0.943609,0.015864,0.960004,2.22131,0.512152,1.244677,7.19,1.233366,0.904762,6.333333,1.092701,0.088996,1.110602,0.955862,0.891441,0.06728,1.197656,0.774427,14.378464,6.573663,1.315074,0.012723,1.186173,0.769509,0.347316,-0.200066,0.62936,0.347316,2.102011,1.257427,1.118373,0.018959,1.076143,1.0,1.346153,-0.049193,0.793467,0.543465,1.43482,,0.0,,,,,0.0,,,0.0,,,,1.047511,0.065058,0.89752,1.218899,0.649467,


In [5]:

# date_train = pd.read_pickle('../feature/Statement Dates/train_SDist.pkl')
# date_test = pd.read_pickle('../feature/Statement Dates/test_SDist.pkl')

# train = train.merge(date_train,on = "customer_ID",how = "left")
# test = test.merge(date_test,on = "customer_ID",how = "left")

In [6]:
train.head()

Unnamed: 0,customer_ID,S_2,LT,B_33_last,B_33_first,D_41_last,D_41_first,D_79_last,D_79_first,R_12_last,R_12_first,B_13_last,B_13_first,R_1_last,R_1_first,S_8_last,S_8_first,D_59_last,D_59_first,S_20_last,S_20_first,D_54_last,D_54_first,B_22_last,B_22_first,D_50_last,D_50_first,D_80_last,D_80_first,B_14_last,B_14_first,R_11_last,R_11_first,D_130_last,D_130_first,B_11_last,B_11_first,B_8_last,B_8_first,D_78_last,D_78_first,S_11_last,S_11_first,B_18_last,B_18_first,B_17_last,B_17_first,B_39_last,B_39_first,B_6_last,B_6_first,R_14_last,R_14_first,S_23_last,S_23_first,D_127_last,D_127_first,D_61_last,D_61_first,S_16_last,S_16_first,D_75_last,D_75_first,D_86_last,D_86_first,D_105_last,D_105_first,D_73_last,D_73_first,D_82_last,D_82_first,D_58_last,D_58_first,P_3_last,P_3_first,D_39_last,D_39_first,D_112_last,D_112_first,D_62_last,D_62_first,S_12_last,S_12_first,D_53_last,D_53_first,B_2_last,B_2_first,D_42_last,D_42_first,D_124_last,D_124_first,R_26_last,R_26_first,B_21_last,B_21_first,R_8_last,R_8_first,D_125_last,D_125_first,R_27_last,R_27_first,D_133_last,D_133_first,D_52_last,D_52_first,S_5_last,S_5_first,R_15_last,R_15_first,D_141_last,D_141_first,B_7_last,B_7_first,S_24_last,S_24_first,D_83_last,D_83_first,S_26_last,S_26_first,B_36_last,B_36_first,R_7_last,R_7_first,B_41_last,B_41_first,S_27_last,S_27_first,D_70_last,D_70_first,D_48_last,D_48_first,D_55_last,D_55_first,D_56_last,D_56_first,D_71_last,D_71_first,D_134_last,D_134_first,B_37_last,B_37_first,D_131_last,D_131_first,R_9_last,R_9_first,B_16_last,B_16_first,B_25_last,B_25_first,D_72_last,D_72_first,D_47_last,D_47_first,B_24_last,B_24_first,D_96_last,D_96_first,D_119_last,D_119_first,B_40_last,B_40_first,D_142_last,D_142_first,P_2_last,P_2_first,D_43_last,D_43_first,D_106_last,D_106_first,S_13_last,S_13_first,B_32_last,B_32_first,B_1_last,B_1_first,B_23_last,B_23_first,S_6_last,S_6_first,D_140_last,D_140_first,B_9_last,B_9_first,D_138_last,D_138_first,S_22_last,S_22_first,R_4_last,R_4_first,R_20_last,R_20_first,B_26_last,B_26_first,D_46_last,D_46_first,D_60_last,D_60_first,P_4_last,P_4_first,B_20_last,B_20_first,B_19_last,B_19_first,D_45_last,D_45_first,D_91_last,D_91_first,D_132_last,D_132_first,S_7_last,S_7_first,R_6_last,R_6_first,D_44_last,D_44_first,B_10_last,B_10_first,R_13_last,R_13_first,B_42_last,B_42_first,B_12_last,B_12_first,S_19_last,S_19_first,R_19_last,R_19_first,D_145_last,D_145_first,R_10_last,R_10_first,D_122_last,D_122_first,B_28_last,B_28_first,B_4_last,B_4_first,R_5_last,R_5_first,D_49_last,D_49_first,D_76_last,D_76_first,D_69_last,D_69_first,D_121_last,D_121_first,B_5_last,B_5_first,R_3_last,...,B_24_last_mean_sub,B_24_last_mean_div,B_24_last_max_div,D_96_last_first_div,D_96_last_mean_sub,D_96_last_mean_div,D_119_last_first_div,D_119_last_mean_sub,D_119_last_mean_div,D_119_last_max_div,D_119_last_min_div,B_40_last_first_div,B_40_last_mean_sub,B_40_last_mean_div,B_40_last_max_div,D_142_last_first_div,D_142_last_mean_sub,D_142_last_mean_div,D_142_last_max_div,P_2_last_first_div,P_2_last_mean_sub,P_2_last_mean_div,P_2_last_max_div,P_2_last_min_div,D_43_last_first_div,D_43_last_mean_sub,D_43_last_mean_div,D_43_last_max_div,D_106_last_first_div,S_13_last_first_div,S_13_last_mean_sub,S_13_last_mean_div,S_13_last_max_div,B_32_last_first_div,B_32_last_mean_sub,B_32_last_mean_div,B_1_last_first_div,B_1_last_mean_sub,B_1_last_mean_div,B_1_last_max_div,B_23_last_first_div,B_23_last_mean_sub,B_23_last_mean_div,B_23_last_max_div,S_16-P_3_last_first_div,S_16-P_3_last_mean_sub,S_16-P_3_last_mean_div,S_16-P_3_last_max_div,S_16-P_3_last_min_div,S_6_last_first_div,S_6_last_mean_sub,S_6_last_mean_div,B_17-P_3_last_first_div,B_17-P_3_last_mean_sub,B_17-P_3_last_mean_div,B_17-P_3_last_max_div,B_17-P_3_last_min_div,D_140_last_first_div,D_140_last_mean_sub,D_140_last_mean_div,D_140_last_max_div,D_140_last_min_div,B_9_last_first_div,B_9_last_mean_sub,B_9_last_mean_div,B_9_last_max_div,B_9_last_min_div,D_138_last_first_div,S_22_last_first_div,S_22_last_mean_sub,S_22_last_mean_div,S_22_last_max_div,R_4_last_first_div,R_20_last_first_div,R_20_last_mean_sub,R_20_last_mean_div,R_20_last_max_div,B_26_last_first_div,D_46_last_first_div,D_46_last_mean_sub,D_46_last_mean_div,D_46_last_max_div,D_46_last_min_div,D_60_last_first_div,D_60_last_mean_sub,D_60_last_mean_div,D_60_last_max_div,D_60_last_min_div,P_4_last_first_div,P_4_last_mean_sub,P_4_last_mean_div,P_4_last_max_div,B_20_last_first_div,B_20_last_mean_sub,B_20_last_mean_div,B_19_last_first_div,B_19_last_mean_sub,B_19_last_mean_div,B_19_last_max_div,B_19_last_min_div,D_45_last_first_div,D_45_last_mean_sub,D_45_last_mean_div,D_45_last_max_div,D_45_last_min_div,D_91_last_first_div,D_91_last_mean_sub,D_91_last_mean_div,D_91_last_max_div,B_14-P_3_last_first_div,B_14-P_3_last_mean_sub,B_14-P_3_last_mean_div,B_14-P_3_last_max_div,B_14-P_3_last_min_div,D_132_last_first_div,D_132_last_mean_sub,D_132_last_mean_div,D_132_last_max_div,D_132_last_min_div,S_7_last_first_div,S_7_last_mean_sub,S_7_last_mean_div,S_7_last_max_div,S_7_last_min_div,R_6_last_first_div,D_44_last_first_div,D_44_last_mean_sub,D_44_last_mean_div,D_44_last_max_div,B_10_last_first_div,B_10_last_mean_sub,B_10_last_mean_div,B_10_last_max_div,R_13_last_first_div,B_42_last_first_div,B_42_last_mean_sub,B_42_last_mean_div,B_42_last_max_div,B_42_last_min_div,B_12_last_first_div,B_12_last_mean_sub,B_12_last_mean_div,B_12_last_max_div,S_19_last_first_div,R_19_last_first_div,D_145_last_first_div,D_145_last_mean_sub,D_145_last_mean_div,D_145_last_max_div,D_145_last_min_div,R_10_last_first_div,R_10_last_mean_sub,R_10_last_mean_div,R_10_last_max_div,D_122_last_first_div,D_122_last_mean_sub,D_122_last_mean_div,D_122_last_max_div,D_122_last_min_div,B_28_last_first_div,B_28_last_mean_sub,B_28_last_mean_div,SDist_last_first_div,SDist_last_mean_sub,SDist_last_mean_div,SDist_last_max_div,SDist_last_min_div,B_4_last_first_div,B_4_last_mean_sub,B_4_last_mean_div,B_4_last_max_div,B_4_last_min_div,R_5_last_first_div,B_11-P_3_last_first_div,B_11-P_3_last_mean_sub,B_11-P_3_last_mean_div,B_11-P_3_last_max_div,B_11-P_3_last_min_div,D_49_last_first_div,D_39-P_2_last_first_div,D_39-P_2_last_mean_sub,D_39-P_2_last_mean_div,D_39-P_2_last_max_div,D_39-P_2_last_min_div,D_76_last_first_div,D_76_last_mean_sub,D_76_last_mean_div,D_76_last_max_div,D_69_last_first_div,D_69_last_mean_sub,D_69_last_mean_div,D_121_last_first_div,D_121_last_mean_sub,D_121_last_mean_div,D_121_last_max_div,D_121_last_min_div,B_5_last_first_div,B_5_last_mean_sub,B_5_last_mean_div,B_5_last_max_div,B_5_last_min_div,R_3_last_first_div,R_3_last_mean_sub,R_3_last_mean_div,R_3_last_max_div,S_25_last_first_div,S_25_last_mean_sub,S_25_last_mean_div,S_25_last_max_div,S_25_last_min_div,R_2_last_first_div,R_2_last_mean_sub,R_2_last_mean_div,D_81_last_first_div,B_15_last_first_div,B_15_last_mean_sub,B_15_last_mean_div,B_15_last_max_div,B_15_last_min_div,D_51_last_first_div,D_51_last_mean_sub,D_51_last_mean_div,D_65_last_first_div,D_65_last_mean_sub,D_65_last_mean_div,D_65_last_max_div,S_3_last_first_div,S_3_last_mean_sub,S_3_last_mean_div,S_3_last_max_div,S_3_last_min_div,B_14-P_2_last_first_div,B_14-P_2_last_mean_sub,B_14-P_2_last_mean_div,B_14-P_2_last_max_div,B_14-P_2_last_min_div,B_3_last_first_div,B_3_last_mean_sub,B_3_last_mean_div,B_3_last_max_div,D_77_last_first_div,D_77_last_mean_sub,D_77_last_mean_div,D_77_last_max_div,S_23-P_2_last_first_div,S_23-P_2_last_mean_sub,S_23-P_2_last_mean_div,S_23-P_2_last_max_div,S_23-P_2_last_min_div,target
0,-9223358381327749917,2018-03-12,13.0,0,0,0.0,0.0,3,4,1.0,1.0,0.004285,0.012224,0.500263,0.506896,1454,1021,29,24,0,1,1.0,1.0,0,1,0.066094,0.139606,0,1,0.104651,0.108195,0,1,1.000604,1.004831,0.410181,0.256874,1.003608,1.007159,0,0,18,11,0.199707,0.209696,0.890774,0.938532,,,0.016601,0.031276,0.0,0.0,0.561108,0.240555,0,0,0.819117,0.681185,0.005574,1.095235,5,2,0,0,0.460669,0.294759,,,-1,-1,0.483516,0.075279,0.495279,0.84007,0,9,1.0,1.0,0.021003,0.037017,0.19237,0.193683,0.024546,0.028451,0.073879,0.028331,,,25,28,-1,-1,0.009849,1.892154,1,0,0,0,1.008834,1.006111,0.683394,0.176973,0.064089,0.060464,0.012302,0.022977,0,0,0.0,0.0,0.301308,0.104675,0.944252,0.995073,0,0,0.00511,0.002772,0.003259,0.001315,0.086762,0.0,0,0,0.001064,0.004639,3,0,0.915675,0.231237,0.475399,0.267173,0.104848,0.095473,0.010098,0.003705,,,0.468113,0.302512,0.979774,0.976961,-1,-1,12,12,0.18954,0.183371,0,0,0.277156,0.255671,0.004177,0.003313,0,0,0.077549,0.05189,0.204851,0.036379,,,0.387708,0.342033,0.163717,0.358763,9,8,544,0,0,0,0.460779,0.298571,0.262997,0.084802,1,0,0,0,0.649579,0.654035,-1,-1,0.924151,0.991726,1,0,0,0,0.004165,0.00266,0.473508,0.457726,0.41277,0.695708,0.0,0.0,17,17,91,54,0.089258,0.061079,0,0,0.265461,0.136778,0.123822,0.694688,0.077045,0.0,1,1,0.035681,0.059039,0,0,,,0.006585,0.020512,0.003489,0.004496,0,0,0,0,0,0,1,1,0.122555,0.072914,9,5,1,0,11,23,,,0.00442,0.009524,0.657644,0.636495,0.020017,0.012831,0,...,0.000409,1.108518,0.547321,,0.0,,1.494502,0.009167,1.134053,0.995911,1.494502,5.630954,0.100039,1.954465,1.0,,,,,1.133538,-0.028161,0.932285,0.777395,1.13972,0.456338,-0.079927,0.671952,0.456338,1.125,inf,479.846154,8.479616,1.0,,0.0,,1.543279,0.029717,1.068938,0.993434,3.101304,0.063495,1.318271,0.984195,-1.919166,-0.086766,1.215334,-1.781752,0.484277,inf,0.769231,4.333333,4.01674,0.278981,3.394384,1.0,-8.523811,,0.0,,,,0.993186,0.015151,1.023882,0.985256,1.059132,1.0,0.931861,0.028877,1.032255,0.922905,inf,,0.0,,,1.56559,1.03448,0.014126,1.030751,0.967748,1.078132,0.593309,-0.120607,0.773881,0.593309,1.010161,,0.0,,,1.0,0.0,1.0,1.685185,11.461538,1.144101,0.91,1.685185,1.461371,0.011817,1.152588,0.997546,1.461371,,0.0,,,0.533735,0.330171,0.541938,1.0,0.427297,1.940815,0.063896,1.317002,1.0,1.940815,0.178242,-0.176493,0.412308,0.176512,1.0,inf,1.0,0.153846,1.181818,1.0,0.604364,0.002283,1.068349,0.573475,,,,,,,0.321015,-0.004129,0.614618,0.321015,0.776048,,,0.0,,,,,0.0,,,1.0,0.0,1.0,1.0,1.0,1.680814,0.029323,1.314524,0.687848,-7.963846,0.725042,0.456522,3.0,1.8,2.153846,1.314607,1.0,1.8,inf,0.145916,0.36199,0.190338,1.0,0.137076,0.478261,-0.04478,-2.587224,-0.17627,-0.024758,0.79196,,,,,0.464104,-0.001225,0.783019,1.033226,0.010078,1.015562,0.998924,1.033226,1.560012,0.004992,1.332197,0.950965,1.93187,0.0,-2.153846,0.0,0.0,0.098692,-0.308292,0.133646,0.048633,-0.033528,inf,0.846154,6.5,,0.475289,-0.002188,0.594167,0.334695,12.328146,inf,0.923077,13.0,,0.0,,,0.195432,-0.215269,0.418873,0.194998,1.0,1.210478,0.027383,0.911791,1.225037,0.7129,0.741279,-0.266956,0.69581,0.656047,0.760346,-0.000126,0.995214,0.74342,-1.708739,0.187205,-12.560626,0.865847,-0.474421,1
1,-9223193039457028513,2018-03-10,13.0,1,1,0.0,0.0,0,0,1.0,1.0,0.003083,0.005226,0.001226,0.00612,0,0,31,21,0,0,1.0,1.0,0,0,0.259233,0.28661,0,0,0.007321,0.008023,0,0,0.0,0.0,0.009093,0.008378,0.0,0.0,0,0,12,12,1.0,1.0,,,,,0.180466,0.168767,0.0,0.0,0.133664,0.131357,0,0,0.044802,0.03741,0.00307,0.009998,0,0,0,0,,,,,-1,0,0.005432,0.008955,0.671073,0.54877,0,0,1.0,1.0,0.326013,0.32992,0.193787,0.194084,,,0.815078,0.81244,,,9,9,-1,-1,0.001351,0.00479,0,0,0,0,1.000597,1.008928,0.008566,0.008639,0.1508,0.147121,0.007493,0.002381,0,0,0.0,0.0,0.02652,0.046161,0.083905,0.003954,0,0,0.007723,0.00469,0.009909,0.007588,0.0,0.0,0,0,,,0,0,0.066709,0.034129,0.04494,0.033918,0.21809,0.218205,0.012934,0.012236,,,0.006634,0.001739,0.0,0.0,-1,-1,0,0,0.007857,0.008252,0,0,0.891728,0.869725,0.005349,0.001463,0,0,0.296103,0.351133,0.018833,0.043152,,,1.001372,0.97931,0.089735,0.131658,-1,-1,0,0,0,0,0.006264,0.005445,0.013315,0.025385,1,1,0,0,0.00048,0.005607,-1,-1,0.301403,0.038341,0,0,0,0,0.008443,0.003896,0.453995,0.423997,0.001221,0.006901,0.0,0.0,0,0,0,0,0.264923,1.026881,0,0,,,,,0.0,0.0,0,0,0.293768,0.3027,0,0,,,0.00813,0.008124,0.008378,0.000117,0,0,0,0,0,0,4,3,0.015224,0.024579,0,2,0,0,-1,-1,0.096607,0.100384,0.004472,0.009743,0.769909,0.750049,0.009026,0.001927,2,...,0.000643,1.136737,0.608273,,0.0,,0.843277,-0.002936,0.990183,0.843277,1.084296,0.436432,-0.008791,0.68177,0.336407,,,,,1.022528,0.027304,1.028031,0.998898,1.038247,0.681575,0.007448,1.090516,0.681575,1.0,,0.0,,,,0.0,,1.150437,0.001972,1.459528,0.78805,0.524518,-0.005047,0.725145,0.312842,1.239862,-0.091235,1.158183,1.239862,1.0,1.0,0.0,1.0,,,,,,,0.0,,,,0.085573,-0.004717,0.09234,0.04974,1.379685,1.0,7.861009,0.088199,1.413685,0.996378,,,0.0,,,2.167173,1.07075,0.003743,1.008313,0.925989,1.07075,0.176961,-0.004024,0.232836,0.129517,1.0,,0.0,,,,0.0,,,0.0,,,,0.257988,-0.408719,0.39327,0.25527,1.043934,,0.0,,,1.227471,-0.086974,1.150794,1.227471,1.0,,,,,,,,,,,,,0.0,,,0.970491,-0.004378,0.985316,0.970491,,,,,,,1.000809,-0.002306,0.779058,0.523614,71.340797,,,0.0,,,,,0.0,,,1.333333,0.230769,1.061224,1.0,1.333333,0.619407,-0.000346,0.977793,1.015395,1.113077,1.037243,0.815789,1.409091,0.0,-0.692308,0.0,0.0,,,1.224998,-0.084469,1.146264,1.224998,1.0,1.0,1.022528,-0.027304,1.028031,1.038247,0.998898,0.962373,-0.003448,0.965542,0.93636,0.458989,-0.001973,0.693849,1.026479,0.006175,1.008085,0.994881,1.026479,4.683419,0.004647,2.061378,1.0,93.406754,1.0,0.923077,1.857143,1.0,1.004392,0.003108,1.00319,0.998939,1.007934,,0.0,,,1.9378,-0.000543,0.892517,0.468257,15.143852,,-0.230769,0.0,,0.0,,,,,,,,1.023436,-0.025664,1.026502,1.036844,0.996462,0.924755,-0.001068,0.724071,0.365997,0.985036,-0.001703,0.994427,0.983181,1.023298,-0.027953,1.033287,1.051143,0.998317,0
2,-9223189665817919541,2018-03-03,13.0,1,1,0.0,0.0,0,0,1.0,1.0,0.008432,0.00528,0.009023,0.005043,0,0,15,15,0,0,1.0,1.0,0,0,,,0,0,0.003035,0.003296,0,0,0.0,0.0,0.003671,0.001821,0.0,0.0,0,0,12,12,1.0,0.652748,,,,,0.305706,0.054037,0.0,0.0,0.139443,0.135341,0,0,-0.007332,0.176794,0.006144,0.003643,0,1,0,0,,,,,-1,-1,0.000725,0.121281,0.659608,0.60253,0,0,1.0,1.0,0.103692,0.071242,0.193807,0.189382,,,0.810628,0.814741,,,8,8,-1,-1,0.000464,0.00223,0,0,0,0,1.000532,1.00777,0.003724,0.004788,0.23538,0.242339,0.002747,0.00649,0,0,0.0,1.007308,0.00587,0.145962,0.077635,0.081397,0,0,0.006193,0.000624,0.003144,0.00108,0.0,0.0,0,0,,,0,0,-0.008593,0.080044,0.01806,0.216848,0.132955,0.131169,0.014388,0.010688,,,0.000461,0.008653,0.0,0.0,-1,-1,0,0,0.008517,0.006797,0,0,0.753127,0.727517,0.007078,0.003867,0,0,0.395966,0.402733,0.000834,0.055312,0.731493,0.698117,0.694073,0.812751,0.051419,0.051419,-1,-1,0,0,0,0,0.010064,0.007721,0.005932,0.114651,1,1,0,0,0.009033,0.003616,-1,-1,0.299462,0.299635,0,0,0,0,0.001434,0.003133,0.401518,0.407867,1.003075,0.007816,0.0,0.0,0,0,0,0,0.328769,0.303517,1,0,,,,,0.0,0.0,0,0,0.238819,0.0407,0,0,,,0.010445,0.014098,0.003946,0.007307,0,0,0,2,0,0,4,4,0.006428,0.039417,0,4,0,0,-1,-1,0.023075,0.014524,0.000131,0.008769,0.622573,0.603874,0.00472,0.00658,0,...,0.002306,1.483248,0.774868,,0.0,,0.983197,-0.002747,0.993111,0.98274,1.003552,0.015084,-0.021628,0.037144,0.015084,1.047809,0.01033,1.014324,0.995488,0.853979,-0.108374,0.864946,0.837483,1.0,1.0,0.0,1.0,1.0,1.0,,0.0,,,,0.0,,1.303369,0.003452,1.52216,1.0,0.051737,-0.038113,0.134673,0.051737,1.091132,-0.050713,1.084136,1.338939,0.959415,1.0,0.0,1.0,,,,,,,0.0,,,,2.497947,0.003326,1.582891,0.949284,442.948822,1.0,0.999423,-5.8e-05,0.999807,0.983529,,,0.0,,,0.457658,0.984433,0.012212,1.031368,0.903587,1.106982,128.343521,0.691463,3.21899,0.993477,8540.848633,,0.0,,,,0.0,,,0.0,,,,1.083197,0.012595,1.039836,0.999064,1.083197,inf,0.153846,1.181818,1.0,1.095689,-0.053433,1.088591,1.324489,0.963571,,,,,,,,,,,,,0.0,,,5.867812,0.115355,1.934314,0.976072,,,,,,,0.740891,0.000166,1.016176,0.688699,0.539996,,0.0,-1.615385,0.0,0.0,,,0.0,,,1.0,0.0,1.0,1.0,1.0,0.163064,-0.011815,0.352347,0.917131,-2.348462,0.922617,0.903226,1.0,0.0,-1.230769,0.0,0.0,,,1.09194,-0.054201,1.090075,1.344199,0.968683,1.0,0.853979,0.108374,0.864946,1.0,0.837483,1.588771,0.005904,1.343829,1.0,0.014982,-0.005065,0.025284,1.030967,0.009725,1.015869,1.0,1.030967,0.717219,0.000261,1.058538,0.523762,232.831055,,0.0,,,1.002009,0.003049,1.003133,1.0,1.007336,,0.0,,,0.330319,-0.005,0.100823,0.062721,1.0,2.0,0.153846,1.083333,,0.0,,,,,,,,0.853708,0.107495,0.865385,1.0,0.835581,0.56278,-0.000363,0.921161,0.459804,1.000803,0.00013,1.000401,1.0,0.818749,0.112613,0.831226,1.0,0.796741,0
3,-9223188534444851899,2018-03-27,13.0,1,1,0.0,0.0,0,0,1.0,1.0,0.005492,0.007156,0.009578,0.007458,0,0,28,18,0,0,1.0,1.0,0,0,,,0,0,0.004878,0.003092,0,0,0.0,0.0,0.008681,0.000347,1.002864,1.006723,0,0,12,12,1.0,1.0,,,,,0.177073,0.175636,0.0,0.0,0.131718,0.13708,0,0,,,0.000551,0.006621,0,0,0,0,,,,,-1,-1,0.004778,0.005778,0.607353,0.79062,0,0,1.0,1.0,,,0.191033,0.19076,,,0.819601,0.81118,,,2,2,-1,-1,0.008004,0.003586,0,0,0,0,1.000231,1.003492,0.007206,0.000818,0.018079,0.019451,0.00029,0.000555,0,0,0.0,0.0,0.010106,0.002392,0.077964,0.083044,0,0,0.001967,0.005402,0.002813,0.004349,0.0,0.0,0,0,,,0,0,,,0.003171,0.007771,,,0.007281,0.011974,,,0.00194,0.008216,0.0,0.0,-1,-1,0,0,0.008699,0.002089,0,0,0.121657,0.104661,0.006152,0.008429,0,0,0.17062,0.009778,0.008698,0.003072,,,0.787945,0.794211,,,-1,-1,0,0,0,0,0.002275,0.004192,0.002718,0.006938,1,0,0,0,0.008871,0.008665,-1,-1,0.302844,0.302426,0,0,0,0,0.006211,0.009272,,,1.009329,1.00085,0.0,0.0,0,0,0,0,0.114711,0.087112,0,0,,,0.203349,0.205107,0.0,0.0,0,0,0.237182,0.244036,0,0,,,0.006874,0.002347,0.003722,0.008295,0,0,0,0,0,0,1,1,0.002514,0.002091,0,0,0,0,-1,-1,,,0.00198,0.008637,0.129937,0.112862,0.000729,0.007016,0,...,0.001007,1.195688,0.729782,,0.0,,17.449144,0.044686,1.354838,0.982056,48.864212,2.83103,0.003426,1.649837,0.935211,,,,,0.99211,-0.003258,0.995882,0.991343,1.00165,,,,,1.0,,0.0,,,,0.0,,0.542659,-0.002505,0.475943,0.273558,0.391763,-0.002947,0.479783,0.280726,0.773983,0.083219,0.879396,1.003017,0.773983,inf,0.153846,1.181818,,,,,,,0.0,,,,1.023809,0.002457,1.383029,0.979256,6.677357,1.0,1.001384,0.003124,1.010422,0.996582,,,0.0,,,0.669787,,,,,,1.008471,0.003978,1.003957,1.0,1.008471,,0.0,,,,0.0,,,0.0,,,,1.316817,0.011091,1.10704,1.0,1.316817,,0.0,,,0.76502,0.087503,0.87318,1.0,0.76502,,,,,,0.991429,-0.000879,0.995696,0.991429,1.0,,,0.0,,,0.971911,-0.003205,0.986669,0.96292,,,,,,,2.928774,0.001006,1.171394,0.701117,0.448697,,,0.0,,,,,0.0,,,1.0,0.0,1.0,1.0,1.0,1.202436,-0.001979,0.559518,1.015395,0.574615,1.018886,0.96875,1.107143,,0.0,,,,,0.757551,0.091414,0.867532,1.0,0.757551,1.0,0.99211,0.003258,0.995882,1.00165,0.991343,,,,,0.229236,-0.001964,0.501996,1.151291,0.011376,1.095952,1.0,1.164813,0.103978,-0.004885,0.129938,0.075721,1.0,,0.0,,,1.004012,0.002935,1.003013,0.998134,1.007507,,0.0,,,0.835573,-0.002345,0.66704,0.507691,1.0,,0.0,,,0.0,,,0.990958,-0.001374,0.995459,0.990958,1.0,0.989821,0.00334,0.995753,1.004988,0.988306,0.995049,0.003376,1.563492,0.896708,,,,,0.998624,-0.001515,1.002314,1.012519,0.990655,0
4,-9223173911659837606,2018-03-10,13.0,0,0,0.0,0.0,0,0,1.0,1.0,0.012624,0.009117,0.006434,1.005596,0,0,31,21,0,0,1.0,1.0,1,1,0.099705,0.10664,0,0,0.025515,0.026088,0,0,0.0,0.0,0.079096,0.078558,1.000633,1.000358,2,1,15,12,0.124266,0.120605,0.882709,1.007584,,,0.000321,0.003008,0.0,0.0,0.404884,0.131705,0,0,0.91519,0.923156,0.004324,0.008477,7,7,0,0,,,,,-1,-1,0.551308,0.615955,0.471883,0.577471,13,0,1.0,1.0,0.032269,0.030988,0.190687,0.190312,0.12298,0.095178,0.037854,0.036276,,,6,7,-1,-1,0.006157,0.007553,0,1,0,0,1.002624,1.009568,0.007368,0.0044,0.068244,0.070355,0.021315,0.000422,0,0,0.0,0.0,1.002414,0.991645,0.948783,0.897345,0,0,0.00218,0.005288,0.003988,0.00725,0.0,12.931408,0,0,0.009231,0.006533,0,1,0.960002,0.912754,0.95568,0.929,0.079062,0.072311,0.013395,0.010335,0.15625,0.155529,0.117561,0.110493,0.0,0.0,-1,-1,12,12,0.046318,0.049839,0,0,0.211371,0.20546,0.001182,0.003337,0,0,0.252257,0.230853,0.679204,0.777073,,,0.040486,0.252421,0.500996,0.257392,-1,-1,0,0,0,0,0.11304,0.108639,0.954324,0.945057,0,0,0,0,0.642953,0.629239,-1,0,0.961779,0.912247,0,1,0,0,0.004186,0.004023,0.446243,0.47953,0.056928,0.051009,0.0,0.0,17,17,100,100,0.078763,0.05796,0,0,,,0.257634,0.173234,0.0,11.071306,4,3,0.007041,0.004603,0,0,,,0.008329,0.010681,0.00611,0.00947,0,0,0,0,0,2,2,1,0.216837,0.221729,45,48,0,1,-1,-1,,,0.004045,0.001288,0.377113,0.354298,0.011083,0.011143,2,...,-0.002868,0.291879,0.135152,,0.0,,1.092717,0.010248,1.042344,1.0,1.092717,0.874054,-0.033431,0.953088,0.874054,,,,,0.16039,-0.075181,0.350023,0.16039,1.059644,1.946434,0.235707,1.888493,0.942436,1.0,,0.0,,,,0.0,,1.040514,0.001421,1.012735,0.934418,1.009806,0.006169,1.006506,0.998611,0.821728,-0.046849,1.111358,3.510806,0.821728,,0.0,,0.955159,-0.118338,0.776368,0.512144,1.006329,,0.0,,,,1.021795,0.003183,1.004975,0.91046,1.051996,-inf,1.054297,0.028064,1.030056,0.994241,0.0,,0.0,,,1.040298,0.930583,-0.018067,0.961089,0.904905,1.0,1.116042,0.001896,1.034453,0.960041,1.137634,,0.0,,,1.0,0.0,1.0,1.0,5.538462,1.058632,1.0,1.098901,1.358914,0.007389,1.10352,0.928957,1.358914,,0.0,,,0.809543,-0.049045,1.12344,3.941387,0.809543,,,,,,1.4872,0.047117,1.223815,0.944046,1.525062,0.0,1.333333,0.076923,1.019608,1.0,1.52962,-0.001043,0.870945,0.564223,,,,,,,0.779727,-0.002385,0.777398,0.589489,0.645267,,,0.0,,,,0.0,-0.769231,0.0,0.0,2.0,0.230769,1.130435,1.0,2.0,0.977936,-0.002434,0.988897,0.884376,-1.963846,0.932197,0.473684,2.076923,0.9375,-0.538462,0.988176,0.9375,1.0,0.0,0.787284,-0.043945,1.125974,5.941692,0.787284,1.0,-51.340927,8.690565,3.035762,0.766263,-51.340927,,,,,3.139485,-0.000581,0.874338,1.064394,0.012185,1.033389,1.0,1.064394,0.994641,0.002325,1.265529,0.802899,3.378705,0.666667,0.692308,1.529412,0.666667,0.342733,-0.408232,0.450832,0.342586,4.327604,1.0,0.230769,1.3,inf,3.147263,0.002993,1.493097,0.906948,7.999972,,0.0,,,0.0,,,1.564005,0.030229,1.119516,0.848843,1.661849,0.066147,0.070879,0.174388,2.176639,0.066147,0.993777,0.01896,1.033272,0.980161,,,,,-3.018643,0.298171,5.502284,1.0,-3.018643,1


In [7]:

##LightGBMのEarlyStopping実装


class DartEarlyStopping(object):
    """DartEarlyStopping"""

    def __init__(self, data_name, monitor_metric, stopping_round):
        self.data_name = data_name
        self.monitor_metric = monitor_metric
        self.stopping_round = stopping_round
        self.best_score = None
        self.best_model = None
        self.best_score_list = []
        self.best_iter = 0

    def _is_higher_score(self, metric_score, is_higher_better):
        if self.best_score is None:
            return True
        return (self.best_score < metric_score) if is_higher_better else (self.best_score > metric_score)

    def _deepcopy(self, x):
        # copy.deepcopyではlightgbmのモデルは完全にコピーされないためpickleを使用
        return pickle.loads(pickle.dumps(x))

    def __call__(self, env):
        evals = env.evaluation_result_list
        for data, metric, score, is_higher_better in evals:
            if data != self.data_name or metric != self.monitor_metric:
                continue
            if not self._is_higher_score(score, is_higher_better):
                if env.iteration - self.best_iter > self.stopping_round:
                    # 終了させる
                    eval_result_str = '\t'.join([lgb.callback._format_eval_result(x) for x in self.best_score_list])
                    lgb.basic._log_info(f"Early stopping, best iteration is:\n[{self.best_iter+1}]\t{eval_result_str}") 
                    lgb.basic._log_info(f"You can get best model by \"DartEarlyStopping.best_model\"")
                    raise lgb.callback.EarlyStopException(self.best_iter, self.best_score_list)
                return
            # dartでは過去の木も更新されてしまうため、deepcopyしておく
            self.best_model = self._deepcopy(env.model)
            self.best_score_list = evals
            self.best_score = score
            return
        raise ValueError("monitoring metric not found")

In [None]:



cat_features = [
    "B_30",
    "B_38",
    "D_114",
    "D_116",
    "D_117",
    "D_120",
    "D_126",
    "D_63",
    "D_64",
    "D_66",
    "D_68"
]

# kmeans_list = ["kmeans pred 2","kmeans pred 3","kmeans pred 4"]

cat_features = [f"{cf}_last" for cf in cat_features]
# cat_features.extend(kmeans_list)

for cat_col in cat_features:
#     print(cat_col)
    encoder = LabelEncoder()
    train[cat_col] = encoder.fit_transform(train[cat_col])
    test[cat_col] = encoder.transform(test[cat_col])


features = [col for col in train.columns if col not in ['customer_ID','S_2', CFG.target]]
params = {
    'objective': 'binary',
    'metric': CFG.metric,
    'boosting': CFG.boosting_type,
    'seed': CFG.seed,
    'num_leaves': 100,
    'learning_rate': 0.01,
    'feature_fraction': 0.20,
    'bagging_freq': 10,
    'bagging_fraction': 0.50,
    'n_jobs': -1,
    'lambda_l2': 2,
    'min_data_in_leaf': 40,
    }
# Create a numpy array to store test predictions
test_predictions = np.zeros(len(test))
# Create a numpy array to store out of folds predictions
oof_predictions = np.zeros(len(train))

cids = []
tr_target = []

kfold = StratifiedKFold(n_splits = CFG.n_folds, shuffle = True, random_state = CFG.seed)
for fold, (trn_ind, val_ind) in enumerate(kfold.split(train, train[CFG.target])):
    print(' ')
    print('-'*50)
    print(f'Training fold {fold} with {len(features)} features...')
    x_train, x_val = train[features].iloc[trn_ind], train[features].iloc[val_ind]
    y_train, y_val = train[CFG.target].iloc[trn_ind], train[CFG.target].iloc[val_ind]
    lgb_train = lgb.Dataset(x_train, y_train, categorical_feature = cat_features)
    lgb_valid = lgb.Dataset(x_val, y_val, categorical_feature = cat_features)
    
#     des = DartEarlyStopping("valid_1", CFG.metric, 1000)
    
    model = lgb.train(
        params = params,
        train_set = lgb_train,
        num_boost_round = 10500,#10500
        valid_sets = [lgb_train, lgb_valid],
#         callbacks = [des],
        early_stopping_rounds = 1500,
#         eval_metric=[lgb_amex_metric],
        verbose_eval = 500,
        feval = lgb_amex_metric
        )
    
    # Save best model
    joblib.dump(model, f'{CFG.output_dir}lgbm_{CFG.boosting_type}_fold{fold}_seed{CFG.seed}.pkl')
    # Predict validation
    val_pred = model.predict(x_val)
    # Add to out of folds array
    oof_predictions[val_ind] = val_pred
    
    cids.extend(train["customer_ID"].loc[val_ind])
    tr_target.extend(train["target"].loc[val_ind])
    
    # Predict the test set
    test_pred = model.predict(test[features])
    test_predictions += test_pred / CFG.n_folds
    # Compute fold metric
    score = amex_metric(y_val, val_pred)
    print(f'Our fold {fold} CV score is {score}')
    del x_train, x_val, y_train, y_val, lgb_train, lgb_valid
    gc.collect()
    
# Compute out of folds metric
score = amex_metric(train[CFG.target], oof_predictions)
print(f'Our out of folds CV score is {score}')


# Create a dataframe to store test prediction
test_df = pd.DataFrame({'customer_ID': test['customer_ID'], 'prediction': test_predictions})
# test_df.to_csv(f'{CFG.output_dir}test_{CFG.model}_{score}_baseline_{CFG.n_folds}fold_seed{CFG.seed}.csv', index = False)
test_df.to_csv(f'{CFG.output_dir}test_{CFG.model}_{score}_{CFG.n_folds}fold_seed{CFG.seed}.csv', index = False)


dic_oof = {
    "customer_ID":cids,
    "target":tr_target,
    "tabnet_oot":oof_predictions
}

# Create a dataframe to store out of folds predictions
oof_df = pd.DataFrame(dic_oof)
# oof_df = pd.DataFrame({'customer_ID': train['customer_ID'], 'target': train[CFG.target], 'prediction': oof_predictions})
oof_df.to_csv(f'{CFG.output_dir}oof_{CFG.ver}_{CFG.model}_{score}_{CFG.n_folds}fold_seed{CFG.seed}.csv', index = False)


# # Create a dataframe to store out of folds predictions
# oof_df = pd.DataFrame({'customer_ID': train['customer_ID'], 'target': train[CFG.target], 'prediction': oof_predictions})
# oof_df.to_csv(f'../output/Amex LGBM Dart CV 0.7977/oof_lgbm_{CFG.boosting_type}_baseline_{CFG.n_folds}fold_seed{CFG.seed}.csv', index = False)


# # Create a dataframe to store test prediction
# test_df = pd.DataFrame({'customer_ID': test['customer_ID'], 'prediction': test_predictions})
# test_df.to_csv(f'../output/Amex LGBM Dart CV 0.7977/test_lgbm_{CFG.boosting_type}_baseline_{CFG.n_folds}fold_seed{CFG.seed}.csv', index = False)


 
--------------------------------------------------
Training fold 0 with 1581 features...
[LightGBM] [Info] Number of positive: 95062, number of negative: 272068
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 313117
[LightGBM] [Info] Number of data points in the train set: 367130, number of used features: 1580
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.258933 -> initscore=-1.051523
[LightGBM] [Info] Start training from score -1.051523
[500]	training's binary_logloss: 0.334653	training's amex_metric: 0.78057	valid_1's binary_logloss: 0.338362	valid_1's amex_metric: 0.765071
[1000]	training's binary_logloss: 0.244161	training's amex_metric: 0.796699	valid_1's binary_logloss: 0.252348	valid_1's amex_metric: 0.774877
[1500]	training's binary_logloss: 0.22035	training's amex_metric: 0.810691	valid_1's binary_logloss: 0.232759	valid_1's amex_metric: 0.780484
[2000]	training's binary_logloss: 0.206798	training's amex_metric: 0.823436	valid_1's b

In [None]:
x_train.head()