In [1]:
import os
import gc
import warnings
warnings.filterwarnings('ignore')
import random
import scipy as sp
import numpy as np
import pandas as pd
import joblib
import itertools
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
from tqdm.auto import tqdm
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.preprocessing import LabelEncoder
import lightgbm as lgb
from itertools import combinations
from sklearn.model_selection import KFold
import xgboost as xgb

In [2]:
# https://www.kaggle.com/kyakovlev
# https://www.kaggle.com/competitions/amex-default-prediction/discussion/327534
def amex_metric_mod(y_true, y_pred):

    labels     = np.transpose(np.array([y_true, y_pred]))
    labels     = labels[labels[:, 1].argsort()[::-1]]
    weights    = np.where(labels[:,0]==0, 20, 1)
    cut_vals   = labels[np.cumsum(weights) <= int(0.04 * np.sum(weights))]
    top_four   = np.sum(cut_vals[:,0]) / np.sum(labels[:,0])

    gini = [0,0]
    for i in [1,0]:
        labels         = np.transpose(np.array([y_true, y_pred]))
        labels         = labels[labels[:, i].argsort()[::-1]]
        weight         = np.where(labels[:,0]==0, 20, 1)
        weight_random  = np.cumsum(weight / np.sum(weight))
        total_pos      = np.sum(labels[:, 0] *  weight)
        cum_pos_found  = np.cumsum(labels[:, 0] * weight)
        lorentz        = cum_pos_found / total_pos
        gini[i]        = np.sum((lorentz - weight_random) * weight)

    return 0.5 * (gini[1]/gini[0] + top_four)

def xgb_amex_metric(y_pred, y_true):
    y_true = y_true.get_label()
    return 'amex_metric', amex_metric(y_true, y_pred), True

# **Train XGBoost model**

In [3]:
train = pd.read_parquet('C:\\Users\\16122\\AMEX Kaggle Competition\\train_newnn_fe.parquet')

In [4]:
cols = [col for col in list(train.columns) if col not in ['customer_ID','S_2']]

In [5]:
for col in cols:
    if train[col].dtype == 'int64':
        train[col] = train[col].astype('int8')

In [6]:
cat_features = ["B_30","B_38","D_114","D_116","D_117","D_120","D_126","D_63","D_64","D_66","D_68"]
cat_features = [f"{cf}_last" for cf in cat_features]
for cat_col in cat_features:
    encoder = LabelEncoder()
    train[cat_col] = encoder.fit_transform(train[cat_col])

In [7]:
# Create after-pay features
def after_pay(df):
    for bcol in [f'B_{i}' for i in [1,2,3,4,5,9,11,14,17,24]]+['D_39','D_131']+[f'S_{i}' for i in [16,23]]:
        for pcol in ['P_2','P_3']:
            check = bcol + '_last'
            if check in df.columns:
                df[f'{bcol}-{pcol}_last'] = df[f'{bcol}_last'] - df[f'{pcol}_last']

In [8]:
after_pay(train)

In [9]:
for col in list(train.columns):
    if train[col].dtype == 'float32':
        train[col] = train[col].round(2)

In [10]:
train.head()

Unnamed: 0,customer_ID,P_2_mean,P_2_std,P_2_min,P_2_max,P_2_last,D_39_mean,D_39_std,D_39_min,D_39_max,D_39_last,B_1_mean,B_1_std,B_1_min,B_1_max,B_1_last,B_2_mean,B_2_std,B_2_min,B_2_max,B_2_last,R_1_mean,R_1_std,R_1_min,R_1_max,R_1_last,S_3_mean,S_3_std,S_3_min,S_3_max,S_3_last,D_41_mean,D_41_std,D_41_min,D_41_max,D_41_last,B_3_mean,B_3_std,B_3_min,B_3_max,B_3_last,D_42_mean,D_42_std,D_42_min,D_42_max,D_42_last,D_43_mean,D_43_std,D_43_min,D_43_max,D_43_last,D_44_mean,D_44_std,D_44_min,D_44_max,D_44_last,B_4_mean,B_4_std,B_4_min,B_4_max,B_4_last,D_45_mean,D_45_std,D_45_min,D_45_max,D_45_last,B_5_mean,B_5_std,B_5_min,B_5_max,B_5_last,D_46_mean,D_46_std,D_46_min,D_46_max,D_46_last,D_47_mean,D_47_std,D_47_min,D_47_max,D_47_last,D_48_mean,D_48_std,D_48_min,D_48_max,D_48_last,D_49_mean,D_49_std,D_49_min,D_49_max,D_49_last,B_6_mean,B_6_std,B_6_min,B_6_max,B_6_last,B_7_mean,B_7_std,B_7_min,B_7_max,B_7_last,B_8_mean,B_8_std,B_8_min,B_8_max,B_8_last,D_50_mean,D_50_std,D_50_min,D_50_max,D_50_last,B_9_mean,B_9_std,B_9_min,B_9_max,B_9_last,R_3_mean,R_3_std,R_3_min,R_3_max,R_3_last,D_52_mean,D_52_std,D_52_min,D_52_max,D_52_last,P_3_mean,P_3_std,P_3_min,P_3_max,P_3_last,B_10_mean,B_10_std,B_10_min,B_10_max,B_10_last,D_53_mean,D_53_std,D_53_min,D_53_max,D_53_last,S_5_mean,S_5_std,S_5_min,S_5_max,S_5_last,B_11_mean,B_11_std,B_11_min,B_11_max,B_11_last,D_54_mean,D_54_std,D_54_min,D_54_max,D_54_last,S_7_mean,S_7_std,S_7_min,S_7_max,S_7_last,B_12_mean,B_12_std,B_12_min,B_12_max,B_12_last,S_8_mean,S_8_std,S_8_min,S_8_max,S_8_last,D_55_mean,D_55_std,D_55_min,D_55_max,D_55_last,D_56_mean,D_56_std,D_56_min,D_56_max,D_56_last,B_13_mean,B_13_std,B_13_min,B_13_max,B_13_last,R_5_mean,R_5_std,R_5_min,R_5_max,R_5_last,D_58_mean,D_58_std,D_58_min,D_58_max,D_58_last,S_9_mean,S_9_std,S_9_min,S_9_max,S_9_last,B_14_mean,B_14_std,B_14_min,B_14_max,B_14_last,D_59_mean,D_59_std,D_59_min,D_59_max,D_59_last,D_60_mean,D_60_std,D_60_min,D_60_max,D_60_last,D_61_mean,D_61_std,D_61_min,D_61_max,D_61_last,B_15_mean,B_15_std,B_15_min,B_15_max,B_15_last,S_11_mean,S_11_std,S_11_min,S_11_max,S_11_last,D_62_mean,D_62_std,D_62_min,D_62_max,D_62_last,D_65_mean,D_65_std,D_65_min,D_65_max,D_65_last,B_16_mean,B_16_std,B_16_min,B_16_max,B_16_last,B_17_mean,B_17_std,B_17_min,B_17_max,...,B_27_last_max_ratio,S_17_last_max_ratio,R_12_last_max_ratio,B_28_last_max_ratio,R_13_last_max_ratio,D_83_last_max_ratio,R_14_last_max_ratio,D_84_last_max_ratio,R_16_last_max_ratio,R_17_last_max_ratio,S_19_last_max_ratio,R_20_last_max_ratio,S_22_last_max_ratio,S_23_last_max_ratio,S_24_last_max_ratio,S_25_last_max_ratio,S_26_last_max_ratio,D_102_last_max_ratio,D_104_last_max_ratio,D_105_last_max_ratio,D_106_last_max_ratio,D_107_last_max_ratio,B_36_last_max_ratio,B_37_last_max_ratio,R_26_last_max_ratio,R_27_last_max_ratio,D_112_last_max_ratio,B_40_last_max_ratio,S_27_last_max_ratio,D_113_last_max_ratio,D_115_last_max_ratio,D_118_last_max_ratio,D_119_last_max_ratio,D_121_last_max_ratio,D_122_last_max_ratio,D_124_last_max_ratio,D_128_last_max_ratio,D_130_last_max_ratio,D_131_last_max_ratio,D_133_last_max_ratio,D_141_last_max_ratio,D_142_last_max_ratio,D_144_last_max_ratio,D_145_last_max_ratio,P_2_diff1,D_39_diff1,B_1_diff1,B_2_diff1,R_1_diff1,S_3_diff1,D_41_diff1,B_3_diff1,D_42_diff1,D_43_diff1,D_44_diff1,B_4_diff1,D_45_diff1,B_5_diff1,R_2_diff1,D_46_diff1,D_47_diff1,D_48_diff1,D_49_diff1,B_6_diff1,B_7_diff1,B_8_diff1,D_50_diff1,D_51_diff1,B_9_diff1,R_3_diff1,D_52_diff1,P_3_diff1,B_10_diff1,D_53_diff1,S_5_diff1,B_11_diff1,S_6_diff1,D_54_diff1,R_4_diff1,S_7_diff1,B_12_diff1,S_8_diff1,D_55_diff1,D_56_diff1,B_13_diff1,R_5_diff1,D_58_diff1,S_9_diff1,B_14_diff1,D_59_diff1,D_60_diff1,D_61_diff1,B_15_diff1,S_11_diff1,D_62_diff1,D_65_diff1,B_16_diff1,B_17_diff1,B_18_diff1,B_19_diff1,B_20_diff1,S_12_diff1,R_6_diff1,S_13_diff1,B_21_diff1,D_69_diff1,B_22_diff1,D_70_diff1,D_71_diff1,D_72_diff1,S_15_diff1,B_23_diff1,D_73_diff1,P_4_diff1,D_74_diff1,D_75_diff1,D_76_diff1,B_24_diff1,R_7_diff1,D_77_diff1,B_25_diff1,B_26_diff1,D_78_diff1,D_79_diff1,R_8_diff1,R_9_diff1,S_16_diff1,D_80_diff1,R_10_diff1,R_11_diff1,B_27_diff1,D_81_diff1,D_82_diff1,S_17_diff1,R_12_diff1,B_28_diff1,R_13_diff1,D_83_diff1,R_14_diff1,R_15_diff1,D_84_diff1,R_16_diff1,B_29_diff1,S_18_diff1,D_86_diff1,D_87_diff1,R_17_diff1,R_18_diff1,D_88_diff1,B_31_diff1,S_19_diff1,R_19_diff1,B_32_diff1,S_20_diff1,R_20_diff1,R_21_diff1,B_33_diff1,D_89_diff1,R_22_diff1,R_23_diff1,D_91_diff1,D_92_diff1,D_93_diff1,D_94_diff1,R_24_diff1,R_25_diff1,D_96_diff1,S_22_diff1,S_23_diff1,S_24_diff1,S_25_diff1,S_26_diff1,D_102_diff1,D_103_diff1,D_104_diff1,D_105_diff1,D_106_diff1,D_107_diff1,B_36_diff1,B_37_diff1,R_26_diff1,R_27_diff1,D_108_diff1,D_109_diff1,D_110_diff1,D_111_diff1,B_39_diff1,D_112_diff1,B_40_diff1,S_27_diff1,D_113_diff1,D_115_diff1,D_118_diff1,D_119_diff1,D_121_diff1,D_122_diff1,D_123_diff1,D_124_diff1,D_125_diff1,D_127_diff1,D_128_diff1,D_129_diff1,B_41_diff1,B_42_diff1,D_130_diff1,D_131_diff1,D_132_diff1,D_133_diff1,R_28_diff1,D_134_diff1,D_135_diff1,D_136_diff1,D_137_diff1,D_138_diff1,D_139_diff1,D_140_diff1,D_141_diff1,D_142_diff1,D_143_diff1,D_144_diff1,D_145_diff1,target,B_1-P_2_last,B_1-P_3_last,B_2-P_2_last,B_2-P_3_last,B_3-P_2_last,B_3-P_3_last,B_4-P_2_last,B_4-P_3_last,B_5-P_2_last,B_5-P_3_last,B_9-P_2_last,B_9-P_3_last,B_11-P_2_last,B_11-P_3_last,B_14-P_2_last,B_14-P_3_last,B_17-P_2_last,B_17-P_3_last,B_24-P_2_last,B_24-P_3_last,D_39-P_2_last,D_39-P_3_last,D_131-P_2_last,D_131-P_3_last,S_16-P_2_last,S_16-P_3_last,S_23-P_2_last,S_23-P_3_last
0,0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...,0.93,0.02,0.87,0.96,0.93,0.23,0.83,0,3,0,0.01,0.01,0.0,0.02,0.01,1.01,0.0,1.0,1.01,1.01,0.0,0.0,0.0,0.01,0.01,0.11,0.01,0.1,0.14,0.14,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,,,,,,,,,,,0.0,0.0,0,0,0,2.85,2.44,0,6,5,0.73,0.01,0.71,0.74,0.74,0.15,0.05,0.06,0.23,0.23,0.38,0.09,0.23,0.52,0.42,0.53,0.01,0.52,0.54,0.54,0.24,0.08,0.14,0.4,0.19,-1.0,0.0,-1,-1,-1,0.11,0.05,0.06,0.22,0.15,0.04,0.02,0.0,0.06,0.06,0.0,0.0,0.0,0.0,0.0,0.15,0.0,0.15,0.15,0.15,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0,0,0,0.2,0.0,0.2,0.21,0.2,0.68,0.05,0.58,0.74,0.63,0.27,0.18,0.1,0.74,0.33,,,,,,0.03,0.01,0.01,0.05,0.03,0.01,0.0,0.0,0.01,0.01,1.0,0.0,1.0,1.0,1.0,0.1,0.03,0.07,0.16,0.11,0.13,0.01,0.11,0.15,0.11,2510.0,429.579987,1544,3166,1544,0.22,0.07,0.15,0.35,0.19,0.16,0.0,0.15,0.17,0.17,0.1,0.01,0.07,0.12,0.1,0.0,0.0,0,0,0,0.06,0.07,0.0,0.16,0.01,0.04,0.03,0.01,0.09,0.01,0.02,0.01,0.01,0.06,0.01,7.77,0.44,7,8,8,0.53,0.39,0.14,1.01,0.26,0.23,0.07,0.12,0.38,0.23,0.03,0.02,0.01,0.06,0.01,16.620001,1.66,15,19,15,0.18,0.17,0.09,0.45,0.45,0.0,0.0,0,0,0,0.0,0.0,0,0,0,,,,,...,0.23,0.2,,0.15,,,,,,,0.65,,0.46,0.96,0.48,0.76,1.0,0.0,0.11,0.0,,,0.29,0.8,,0.34,,0.53,0.17,,0.05,0.0,0.0,0.0,,,0.98,,,0.33,,,0.74,,-0.0,0.0,-0.01,-0.0,0.01,0.03,0.0,-0.0,,,0.0,-1.0,0.0,0.1,0.0,-0.01,0.0,-0.04,0.0,-0.0,-0.0,0.0,0.0,0.0,0.01,0.0,-0.0,-0.02,0.0,,-0.02,0.0,0.0,0.0,0.0,0.03,0.0,-836.0,-0.01,0.0,0.01,0.0,0.0,-0.01,-0.02,0.0,0.12,-0.0,-0.03,-4.0,-0.01,0.0,0.0,,0.0,0.0,0.0,-0.0,-0.0,-170.0,0.0,-0.0,0.0,0.0,0.07,0.0,1.0,-0.0,,0.0,0.0,0.0,,-0.01,0.0,0.0,-0.0,0.0,0.0,0.0,0.0,0.0,-0.01,0.0,0.0,0.0,0.0,0.0,0.0,-0.0,0.0,-0.01,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.02,-0.0,-0.01,-0.01,-0.0,0.02,0.0,-0.0,0.01,0.0,0.0,-0.0,-0.01,0.0,0.0,0.0,0.0,,0.0,,0.0,-0.0,-0.05,0.0,-0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,-0.0,0.0,0.0,,0.0,0.0,,0.01,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,-0.0,0.0,0,-0.93,-0.62,0.07,0.38,-0.93,-0.62,4.07,4.37,-0.7,-0.4,-0.93,-0.62,-0.92,-0.62,-0.92,-0.62,,,-0.93,-0.63,-0.93,-0.63,-0.93,-0.63,-0.93,-0.63,-0.8,-0.5
1,00000fd6641609c6ece5454664794f0340ad84dddce9a2...,0.9,0.02,0.86,0.93,0.88,7.15,6.74,0,19,6,0.03,0.03,0.01,0.11,0.03,0.99,0.05,0.82,1.01,1.0,0.01,0.0,0.0,0.01,0.01,0.12,0.02,0.09,0.17,0.17,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,,,,,,0.14,0.17,0.06,0.53,0.06,0.0,0.0,0,0,0,0.85,0.8,0,3,1,0.26,0.01,0.24,0.27,0.27,0.04,0.04,0.0,0.17,0.03,0.45,0.01,0.43,0.47,0.44,0.39,0.01,0.38,0.4,0.4,0.05,0.03,0.01,0.11,0.01,-1.0,0.0,-1,-1,-1,0.2,0.02,0.17,0.23,0.17,0.03,0.01,0.02,0.07,0.03,0.0,0.0,0.0,0.0,0.0,,,,,,0.01,0.01,0.0,0.05,0.01,0.54,0.52,0,1,1,0.16,0.07,0.1,0.24,0.24,0.57,0.04,0.51,0.62,0.57,0.3,0.0,0.29,0.3,0.3,,,,,,0.02,0.02,0.0,0.05,0.04,0.01,0.02,0.0,0.08,0.01,1.0,0.0,1.0,1.0,1.0,0.1,0.04,0.07,0.21,0.21,0.03,0.0,0.02,0.03,0.02,1286.459961,772.369995,0,2402,1284,0.05,0.01,0.04,0.06,0.04,0.71,0.02,0.68,0.75,0.75,0.05,0.02,0.01,0.07,0.02,0.0,0.0,0,0,0,0.01,0.0,0.0,0.01,0.01,0.03,0.05,0.01,0.13,0.13,0.01,0.01,0.0,0.06,0.02,15.92,0.28,15,16,15,0.33,0.22,0.06,0.86,0.41,0.05,0.03,0.02,0.1,0.05,0.01,0.0,0.0,0.01,0.01,14.23,3.24,10,23,14,0.24,0.0,0.23,0.24,0.23,0.0,0.0,0,0,0,0.85,0.69,0,2,0,,,,,...,0.37,0.44,,0.46,,,,,1.0,,0.46,0.0,0.5,0.99,0.48,0.13,0.66,0.9,,,,,0.28,0.71,,0.22,,0.68,0.14,,0.0,0.0,0.26,0.0,,,1.0,,,0.79,,,0.67,,0.0,-12.0,0.0,-0.0,-0.0,0.01,0.0,-0.0,,-0.0,0.0,0.0,0.0,0.02,0.0,-0.03,0.0,0.0,0.0,-0.04,-0.0,0.0,,0.0,0.0,0.0,0.0,0.05,-0.0,,0.04,-0.0,0.0,0.0,0.0,0.07,-0.0,288.0,-0.01,0.05,0.01,0.0,0.0,,-0.0,-1.0,-0.11,-0.05,0.01,-2.0,-0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.01,0.0,-0.0,0.0,0.0,0.0,-0.0,0.0,-1.0,-0.0,,0.0,0.0,0.0,,-0.01,0.0,0.0,-0.01,-0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,,0.0,-0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.07,-0.01,-0.08,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,-0.0,0.0,0.0,,0.0,,0.0,-0.0,0.29,0.0,0.01,0.01,-0.01,0.01,0.0,0.0,0.0,0.0,0.0,-0.01,0.0,0.0,,0.0,0.0,,-0.01,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0,-0.85,-0.54,0.12,0.43,-0.88,-0.57,0.12,0.43,-0.85,-0.54,-0.87,-0.56,-0.87,-0.56,-0.86,-0.55,,,-0.88,-0.57,5.12,5.43,-0.88,-0.57,-0.87,-0.56,-0.75,-0.44
2,00001b22f846c82c51f6e3958ccd81970162bae8b007e8...,0.88,0.03,0.8,0.9,0.88,0.0,0.0,0,0,0,0.0,0.0,0.0,0.01,0.0,0.82,0.0,0.81,0.82,0.81,0.01,0.0,0.0,0.01,0.01,,,,,,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,,,,,,,,,,,0.08,0.28,0,1,0,2.23,1.69,1,7,2,0.24,0.01,0.22,0.25,0.25,0.0,0.0,0.0,0.01,0.0,0.46,0.06,0.41,0.65,0.43,0.33,0.01,0.32,0.34,0.34,0.09,0.06,0.03,0.26,0.08,-1.0,0.0,-1,-1,-1,0.18,0.02,0.13,0.21,0.18,0.03,0.02,0.02,0.08,0.03,0.0,0.0,0.0,0.0,0.0,,,,,,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0,0,0,0.2,0.0,0.2,0.2,0.2,0.62,0.08,0.38,0.68,0.63,0.27,0.05,0.16,0.3,0.3,,,,,,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.01,1.0,0.0,1.0,1.0,1.0,,,,,,0.01,0.0,0.01,0.02,0.01,0.0,0.0,0,0,0,0.08,0.02,0.06,0.1,0.1,0.21,0.0,0.2,0.21,0.21,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0,0,0,0.02,0.04,0.0,0.09,0.0,,,,,,0.0,0.0,0.0,0.01,0.01,15.92,0.28,15,16,15,0.0,0.0,0.0,0.01,0.0,0.11,0.06,0.04,0.25,0.14,0.0,0.0,0.0,0.01,0.01,12.0,0.0,12,12,12,0.41,0.08,0.15,0.44,0.44,0.0,0.0,0,0,0,0.08,0.28,0,1,0,,,,,...,0.55,1.0,,0.74,,,,,,,0.3,,0.18,0.86,0.0,0.68,0.54,0.0,,,,,0.42,0.54,,0.77,,0.74,,,0.03,0.0,0.0,0.0,,,,,,0.0,,,0.92,,0.01,0.0,-0.0,-0.01,-0.0,,0.0,0.0,,,0.0,-2.0,0.01,0.0,0.0,-0.05,-0.0,-0.06,0.0,0.01,-0.02,0.0,,0.0,0.01,0.0,0.01,0.04,-0.01,,-0.0,-0.0,0.0,0.0,0.0,,0.0,0.0,-0.0,-0.0,-0.0,0.0,-0.01,,0.0,-1.0,-0.01,0.06,0.0,0.0,-0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.01,0.0,-0.01,0.01,0.0,0.0,0.0,0.0,0.0,-0.01,,0.0,0.0,0.0,,-0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,-0.0,0.0,0.0,-0.01,0.0,-0.02,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,,0.0,-0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.0,0.0,-0.0,-0.0,0.01,0.0,0.0,,0.0,0.0,-0.0,-0.01,0.0,-0.0,0.0,0.0,,0.0,,0.0,-0.01,,0.0,-0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,,0.01,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,-0.01,0.0,0,-0.88,-0.62,-0.07,0.18,-0.87,-0.62,1.12,1.37,-0.88,-0.63,-0.87,-0.62,-0.88,-0.62,-0.87,-0.62,,,-0.88,-0.63,-0.88,-0.63,-0.88,-0.63,-0.87,-0.62,-0.75,-0.5
3,000041bdba6ecadd89a52d11886e8eaaec9325906c9723...,0.6,0.02,0.57,0.62,0.62,1.54,3.02,0,9,0,0.06,0.08,0.01,0.28,0.01,0.96,0.08,0.81,1.01,1.01,0.01,0.0,0.0,0.01,0.01,0.25,0.1,0.15,0.41,0.29,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,,,,,,0.06,0.04,0.01,0.15,0.05,0.0,0.0,0,0,0,2.23,2.83,0,8,0,0.07,0.01,0.06,0.09,0.09,0.09,0.07,0.0,0.28,0.12,0.43,0.03,0.38,0.47,0.41,0.4,0.01,0.39,0.41,0.41,0.08,0.06,0.01,0.18,0.01,-1.0,0.0,-1,-1,-1,0.16,0.03,0.08,0.2,0.17,0.06,0.07,0.0,0.25,0.01,1.0,0.0,1.0,1.01,1.01,0.44,0.04,0.34,0.48,0.43,0.05,0.05,0.0,0.18,0.02,0.62,0.65,0,2,2,0.2,0.0,0.2,0.2,0.2,0.61,0.09,0.35,0.7,0.67,0.31,0.08,0.19,0.43,0.41,0.0,0.0,0.0,0.01,0.0,0.06,0.04,0.0,0.15,0.02,0.04,0.07,0.0,0.24,0.01,1.0,0.0,1.0,1.0,1.0,0.26,0.08,0.15,0.37,0.28,0.05,0.03,0.01,0.08,0.07,961.309998,405.589996,528,1511,528,0.06,0.02,0.02,0.09,0.02,0.56,0.02,0.53,0.58,0.55,0.08,0.04,0.01,0.12,0.06,0.0,0.0,0,0,0,0.02,0.03,0.0,0.09,0.01,0.02,0.01,0.01,0.03,0.01,0.03,0.03,0.01,0.1,0.02,26.540001,2.03,24,29,29,0.67,0.33,0.08,1.01,0.39,0.07,0.05,0.03,0.17,0.03,0.0,0.0,0.0,0.01,0.0,12.46,1.66,10,14,14,0.31,0.18,0.09,0.44,0.43,0.0,0.0,0,0,0,1.08,0.49,0,2,1,0.41,0.5,0.0,1.01,...,0.05,0.2,,0.98,,,,,0.0,,0.51,,1.0,0.79,0.45,0.51,0.0,0.81,,,,,0.51,0.99,,0.55,0.0,0.9,0.0,,0.96,0.05,0.0,0.18,1.0,1.0,0.67,,,0.87,,,0.46,,-0.0,0.0,-0.0,-0.0,0.0,-0.03,0.0,0.01,,-0.01,0.0,0.0,0.01,0.0,0.0,-0.02,0.0,,0.0,-0.02,-0.0,0.0,-0.02,1.0,-0.01,2.0,0.0,-0.01,0.01,-0.01,-0.01,0.0,0.0,0.0,0.0,-0.03,-0.0,0.0,-0.02,0.02,-0.0,0.0,0.0,0.01,0.0,0.0,-0.49,-0.06,-0.0,0.0,-0.0,0.0,-1.0,0.32,0.0,0.0,0.0,-0.3,-0.0,0.0,-0.0,-0.0,0.0,0.0,-0.0,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,-0.0,-0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.01,0.0,0.0,0.0,0.0,-0.01,0.0,0.0,0.0,0.0,0.0,1.0,,0.0,0.0,0.0,0.0,0.0,,0.0,-0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.01,-0.01,0.01,-0.0,0.04,-0.0,0.0,0.0,,0.0,0.0,-0.0,-0.01,0.0,-0.0,0.0,0.0,,0.0,,0.0,0.03,0.06,0.0,0.01,-0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,-0.0,0.0,0.0,,0.0,0.0,,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0,-0.61,-0.66,0.38,0.33,-0.61,-0.66,-0.62,-0.67,-0.5,-0.55,-0.6,-0.65,-0.62,-0.67,-0.6,-0.65,0.39,0.34,-0.61,-0.66,-0.62,-0.67,-0.62,-0.67,-0.62,-0.67,-0.49,-0.54
4,00007889e4fcd2614b6cbe7f8f3d2e5c728eca32d9eb8a...,0.89,0.04,0.81,0.94,0.87,0.0,0.0,0,0,0,0.01,0.0,0.0,0.01,0.01,0.81,0.0,0.81,0.82,0.82,0.0,0.0,0.0,0.01,0.0,0.17,0.0,0.17,0.18,0.18,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,,,,,,0.05,0.01,0.04,0.06,0.04,0.0,0.0,0,0,0,11.69,9.38,3,25,21,0.21,0.12,0.06,0.31,0.07,0.0,0.0,0.0,0.01,0.0,0.47,0.08,0.37,0.69,0.47,0.47,0.01,0.46,0.48,0.48,0.25,0.09,0.14,0.49,0.33,-1.0,0.0,-1,-1,-1,0.08,0.05,0.03,0.2,0.05,0.12,0.07,0.04,0.22,0.16,0.39,0.51,0.0,1.01,1.01,0.09,0.02,0.07,0.14,0.1,0.01,0.0,0.0,0.01,0.0,0.15,0.38,0,1,0,0.23,0.03,0.19,0.26,0.25,0.53,0.09,0.25,0.58,0.57,0.1,0.07,0.04,0.26,0.13,,,,,,0.01,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.01,0.0,1.0,0.0,1.0,1.0,1.0,0.12,0.01,0.11,0.13,0.12,0.05,0.06,0.01,0.15,0.01,157.080002,383.420013,0,1021,0,0.2,0.04,0.13,0.25,0.25,0.18,0.01,0.16,0.19,0.18,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0,0,0,0.32,0.1,0.09,0.39,0.38,,,,,,0.0,0.0,0.0,0.01,0.01,23.15,3.72,18,28,28,0.0,0.0,0.0,0.01,0.0,0.36,0.26,0.08,0.72,0.6,0.01,0.0,0.0,0.01,0.01,12.54,1.39,12,17,13,0.28,0.16,0.07,0.43,0.42,0.0,0.0,0,0,0,0.0,0.0,0,0,0,,,,,...,1.0,0.18,,0.17,,,,,,,0.39,,1.0,0.95,0.96,0.7,0.79,0.0,0.16,0.0,,,0.77,0.19,,0.34,,0.56,,,0.0,0.0,0.06,0.0,,,0.16,0.39,,0.58,,,0.28,,-0.01,0.0,-0.0,0.0,-0.0,,0.0,0.0,,0.01,0.0,-1.0,-0.0,-0.0,0.0,0.01,-0.0,0.08,0.0,-0.01,-0.0,0.0,-0.0,0.0,-0.01,0.0,-0.0,0.0,0.0,,0.0,-0.01,0.0,0.0,0.0,,0.0,0.0,0.02,-0.01,-0.0,0.0,0.0,,0.0,1.0,-0.0,-0.03,0.0,1.0,0.01,0.0,0.0,,0.0,0.0,0.0,-0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.01,,0.0,0.0,0.0,,0.01,0.0,-0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,-0.0,0.0,0.0,0.01,0.0,-0.01,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.0,0.0,-0.0,-0.01,-0.01,0.01,0.0,-0.0,0.01,0.0,0.0,-0.01,0.0,0.0,0.0,0.0,0.0,,0.0,,0.0,-0.01,,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,-0.0,0.0,,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0,-0.86,-0.56,-0.06,0.25,-0.87,-0.56,20.129999,20.43,-0.87,-0.57,-0.87,-0.57,-0.87,-0.57,-0.86,-0.56,,,-0.86,-0.56,-0.87,-0.57,-0.87,-0.57,-0.86,-0.56,-0.74,-0.44


In [11]:
FEATURES = [col for col in train.columns if col not in ['customer_ID', 'target']]

In [13]:
# XGB MODEL PARAMETERS
xgb_parms = { 
    'max_depth':7, 
    'learning_rate':0.01, 
    'subsample':0.88,
    'eval_metric':'logloss',
    'objective':'binary:logistic',
    'tree_method':'hist',
    'gamma':1.5,
    'min_child_weight':8,
    'lambda':70,
    'eta':0.03,
    'random_state':42}

importances = []
gc.collect()
oof_predictions = np.zeros(len(train))
VER = 0

skf = KFold(n_splits=5, shuffle=True, random_state=42)
for fold,(train_idx, valid_idx) in enumerate(skf.split(train, train.target)):
    print(' ')
    print('-'*50)
    print(f'Training fold {fold} with {len(FEATURES)} features...')
    
    # TRAIN, VALID, TEST FOR FOLD K
    X_train = train.loc[train_idx, FEATURES]
    y_train = train.loc[train_idx, 'target']
    X_valid = train.loc[valid_idx, FEATURES]
    y_valid = train.loc[valid_idx, 'target']
    
    dtrain = xgb.DMatrix(data=X_train, label=y_train)
    dvalid = xgb.DMatrix(data=X_valid, label=y_valid)
    
    # TRAIN MODEL FOLD K
    model = xgb.train(xgb_parms, 
                dtrain=dtrain,
                evals=[(dtrain,'train'),(dvalid,'valid')],
                num_boost_round=9999,
                early_stopping_rounds=500,
                verbose_eval=100) 
    model.save_model(f'xgb_v{VER}_fold{fold}_fea{len(FEATURES)}_round2.xgb')
    
    # GET FEATURE IMPORTANCE FOR FOLD K
    dd = model.get_score(importance_type='weight')
    df = pd.DataFrame({'feature':dd.keys(),f'importance_{fold}':dd.values()})
    importances.append(df)
            
    # Stroe prediction of validation data
    oof_preds = model.predict(dvalid, iteration_range=(0, model.best_ntree_limit)) 
    oof_predictions[valid_idx] = np.array(oof_preds)
    score = amex_metric_mod(y_valid.values, oof_preds)
    print(f'Our fold {fold} CV score is {score}')
  
    del dtrain, dvalid, dd, df
    del X_valid, y_valid, model
    _ = gc.collect()
    
print('_'*50)
# Create a dataframe to save out of folds predictions
overall_score = amex_metric_mod(train['target'], oof_predictions)
oof_df = pd.DataFrame({'customer_ID': train['customer_ID'], 'target': train['target'], 'prediction': oof_predictions})
oof_df.to_csv(f'oof_xgb_v{VER}_{5}fold_seed{42}_fea{len(FEATURES)}_round2.csv', index = False)
print(f'Our out of folds CV score is {overall_score}')

 
--------------------------------------------------
Training fold 0 with 1285 features...
[0]	train-logloss:0.68665	valid-logloss:0.68664
[100]	train-logloss:0.36251	valid-logloss:0.36503
[200]	train-logloss:0.27240	valid-logloss:0.27729
[300]	train-logloss:0.24059	valid-logloss:0.24741
[400]	train-logloss:0.22718	valid-logloss:0.23562
[500]	train-logloss:0.22029	valid-logloss:0.23024
[600]	train-logloss:0.21566	valid-logloss:0.22722
[700]	train-logloss:0.21202	valid-logloss:0.22524
[800]	train-logloss:0.20909	valid-logloss:0.22380
[900]	train-logloss:0.20664	valid-logloss:0.22276
[1000]	train-logloss:0.20446	valid-logloss:0.22192
[1100]	train-logloss:0.20239	valid-logloss:0.22120
[1200]	train-logloss:0.20051	valid-logloss:0.22057
[1300]	train-logloss:0.19874	valid-logloss:0.22007
[1400]	train-logloss:0.19707	valid-logloss:0.21962
[1500]	train-logloss:0.19551	valid-logloss:0.21926
[1600]	train-logloss:0.19403	valid-logloss:0.21893
[1700]	train-logloss:0.19266	valid-logloss:0.21868
[18

[1500]	train-logloss:0.19649	valid-logloss:0.21589
[1600]	train-logloss:0.19501	valid-logloss:0.21557
[1700]	train-logloss:0.19355	valid-logloss:0.21525
[1800]	train-logloss:0.19215	valid-logloss:0.21500
[1900]	train-logloss:0.19082	valid-logloss:0.21479
[2000]	train-logloss:0.18950	valid-logloss:0.21458
[2100]	train-logloss:0.18824	valid-logloss:0.21441
[2200]	train-logloss:0.18701	valid-logloss:0.21423
[2300]	train-logloss:0.18582	valid-logloss:0.21408
[2400]	train-logloss:0.18461	valid-logloss:0.21392
[2500]	train-logloss:0.18355	valid-logloss:0.21380
[2600]	train-logloss:0.18241	valid-logloss:0.21367
[2700]	train-logloss:0.18131	valid-logloss:0.21355
[2800]	train-logloss:0.18020	valid-logloss:0.21345
[2900]	train-logloss:0.17912	valid-logloss:0.21336
[3000]	train-logloss:0.17811	valid-logloss:0.21328
[3100]	train-logloss:0.17702	valid-logloss:0.21320
[3200]	train-logloss:0.17602	valid-logloss:0.21313
[3300]	train-logloss:0.17507	valid-logloss:0.21304
[3400]	train-logloss:0.17405	va

[2600]	train-logloss:0.18184	valid-logloss:0.21822
[2700]	train-logloss:0.18078	valid-logloss:0.21811
[2800]	train-logloss:0.17969	valid-logloss:0.21800
[2900]	train-logloss:0.17868	valid-logloss:0.21790
[3000]	train-logloss:0.17762	valid-logloss:0.21780
[3100]	train-logloss:0.17658	valid-logloss:0.21771
[3200]	train-logloss:0.17558	valid-logloss:0.21763
[3300]	train-logloss:0.17449	valid-logloss:0.21754
[3400]	train-logloss:0.17350	valid-logloss:0.21747
[3500]	train-logloss:0.17247	valid-logloss:0.21741
[3600]	train-logloss:0.17148	valid-logloss:0.21736
[3700]	train-logloss:0.17053	valid-logloss:0.21730
[3800]	train-logloss:0.16952	valid-logloss:0.21725
[3900]	train-logloss:0.16858	valid-logloss:0.21721
[4000]	train-logloss:0.16755	valid-logloss:0.21717
[4100]	train-logloss:0.16662	valid-logloss:0.21712
[4200]	train-logloss:0.16570	valid-logloss:0.21709
[4300]	train-logloss:0.16481	valid-logloss:0.21705
[4400]	train-logloss:0.16387	valid-logloss:0.21703
[4500]	train-logloss:0.16296	va

In [None]:
oof_df.head()

In [None]:
# CLEAN RAM
del train
_ = gc.collect()

# **Prediction and submission**

In [None]:
test = pd.read_parquet('C:\\Users\\16122\\AMEX Kaggle Competition\\test_newnn_fe.parquet')

In [None]:
cols = [col for col in list(test.columns) if col not in ['customer_ID','S_2']]

In [None]:
for col in cols:
    if test[col].dtype == 'int64':
        test[col] = test[col].astype('int8')

In [None]:
cat_features = ["B_30","B_38","D_114","D_116","D_117","D_120","D_126","D_63","D_64","D_66","D_68"]
cat_features = [f"{cf}_last" for cf in cat_features]
for cat_col in cat_features:
    encoder = LabelEncoder()
    test[cat_col] = encoder.fit_transform(test[cat_col])

In [None]:
after_pay(test)

In [None]:
for col in list(test.columns):
    if test[col].dtype == 'float32':
        test[col] = test[col].round(2)

In [None]:
FEATURES = [col for col in test.columns if col not in ['customer_ID', 'target']]

In [None]:
def predict_test(test, features):
    # Create a numpy array to store test predictions
    test_predictions = np.zeros(len(test))
    X_test = test[features]
    dtest = xgb.DMatrix(X_test)
    for fold in range(5):
        # Predict the test set
        print(f'=========== Fold {fold} is predicting ===========')
        model = xgb.Booster()
        model.load_model(f'xgb_v{VER}_fold{fold}_fea{len(features)}_round2.xgb')
        test_pred = model.predict(dtest)
        test_predictions += test_pred / 5
    # Create a dataframe to save test prediction
    test_df = pd.DataFrame({'customer_ID': test['customer_ID'], 'prediction': test_predictions})
    test_df.to_csv(f'XGB_{5}folds_seed{42}_fe{len(features)}_round2.csv', index = False)

In [None]:
predict_test(test,FEATURES)

In [None]:
# double check the final result
pred_df = pd.read_csv(f'XGB_{5}folds_seed{42}_fe{len(FEATURES)}_round2.csv')

In [None]:
pred_df.head()