# **American Express - Default Prediction**

### **Setup**

In [1]:
import vaex
vaex.multithreading.thread_count_default = 8
import vaex.ml

import pandas as pd
import numpy as np
import os, gc, psutil, glob

### **Utility Functions**

In [2]:
def remove_output_files(file_pattern):
    fileList = glob.glob(file_patter)
    for filePath in fileList:
        try:
            os.remove(filePath)
        except:
            print("Error while deleting file: ", filePath)

In [3]:
def fill_and_convert_floats(ddf):
    for c in ddf.columns:
        if ddf[c].dtype == 'float64':
            ddf[c] = ddf[c].fillna(0.0).astype('float32')
    return ddf

def encode_cat_features(df):
    cat_features = ['D_63','D_64']
    label_encoder = vaex.ml.LabelEncoder(features=cat_features)
    df = label_encoder.fit_transform(df)
    df.drop(cat_features, inplace=True)
    df.rename('label_encoded_D_63','D_63')
    df.rename('label_encoded_D_64','D_64')
    df['D_64'] = df['D_64'].astype('float32')
    df['D_63'] = df['D_63'].astype('float32')
    df['B_31'] = df['B_31'].astype('float32')
    return df

def get_last_statement(df):
    return df.groupby(['customer_ID']).agg({col: vaex.agg.last(col) for col in df.get_column_names() if col not in ["customer_ID"]})

In [4]:
def get_last_statement_ex(df_test):
    delinquency_features = [col for col in df_test if col.startswith('D_')] 
    df = df_test.groupby(['customer_ID']).agg({col: vaex.agg.last(col) for col in df_test.get_column_names() if col not in delinquency_features + ["customer_ID"]})
    df.export_hdf5('./last-statement-p1.hdf5')
    del df
    gc.collect()
    delinquency_features = ['S_2'] + [col for col in df_test if col.startswith('D_') and len(col) == 4] 
    delinquency_features2 = ['S_2'] + [col for col in df_test if col.startswith('D_') and len(col) == 5]
    df_2 = df_test.groupby(['customer_ID']).agg({col: vaex.agg.last(col) for col in df_test.get_column_names() if col in delinquency_features})
    df_2.export_hdf5('./last-statement-p2.hdf5')
    del df_2
    gc.collect()
    df_3 = df_test.groupby(['customer_ID']).agg({col: vaex.agg.last(col) for col in df_test.get_column_names() if col in delinquency_features2})
    df_3.export_hdf5('./last-statement-p3.hdf5')
    del df_3
    gc.collect()
    last_statement_p1 = vaex.open('./last-statement-p1.hdf5')
    last_statement_p2 = vaex.open('./last-statement-p2.hdf5')
    last_statement_p3 = vaex.open('./last-statement-p3.hdf5')
    last_statement_p1 = last_statement_p1.drop('S_2')
    last_statement_p2 = last_statement_p2.drop('S_2')
    last_statement_p3 = last_statement_p3.drop('S_2')
    gc.collect()
    last_statement_p1 = last_statement_p1.join(last_statement_p2, how="inner", on='customer_ID')
    df_last_statement = last_statement_p1.join(last_statement_p3, how="inner", on='customer_ID')
    del last_statement_p1
    del last_statement_p2
    del last_statement_p3
    gc.collect()
    statement_path = './last-statement-p*.hdf5'
    remove_output_files(statement_path)
    return df_last_statement


In [5]:
def process_data(data):
    for i, df in enumerate(vaex.from_csv(f'../input/amex-default-prediction/{data}.csv', chunk_size=500_000)):
        df['S_2'] = df['S_2'].str.replace('-','').astype('float32')
        #df['R_26'] = df['R_26'].astype('int16')
        df = fill_and_convert_floats(df)
        df = encode_cat_features(df)
        df = get_last_statement(df)
        export_path = f'./{data}_{i:02}.hdf5'    
        df.export_hdf5(export_path)
        del df
        gc.collect()
    import_path = f'./{data}_*.hdf5'
    df = vaex.open(import_path)
    df.export_hdf5(f'./{data}.hdf5')
    del df
    gc.collect()
    remove_output_files(import_path)

In [6]:
def process_data_level2(df, data, flag):
    if flag == 1:
        df = get_last_statement_ex(df)     
    else:
        df = get_last_statement(df)
        df.drop('S_2', inplace=True)   
    label_encoder = vaex.ml.LabelEncoder(features=['customer_ID'])
    df = label_encoder.fit_transform(df)
    df_customer_map = df[['label_encoded_customer_ID', 'customer_ID']]
    df.drop('customer_ID', inplace=True)
    df.rename('label_encoded_customer_ID','customer_ID')
    df_customer_map.export_hdf5(f'./{data}_customer_map.hdf5')
    df.export_hdf5(f'./{data}v2.hdf5')
    del df
    del df_customer_map
    gc.collect()

In [7]:
def amex_metric(y_true: pd.DataFrame, y_pred: pd.DataFrame) -> float:

    def top_four_percent_captured(y_true: pd.DataFrame, y_pred: pd.DataFrame) -> float:
        df = (pd.concat([y_true, y_pred], axis='columns')
              .sort_values('prediction', ascending=False))
        df['weight'] = df['target'].apply(lambda x: 20 if x==0 else 1)
        four_pct_cutoff = int(0.04 * df['weight'].sum())
        df['weight_cumsum'] = df['weight'].cumsum()
        df_cutoff = df.loc[df['weight_cumsum'] <= four_pct_cutoff]
        return (df_cutoff['target'] == 1).sum() / (df['target'] == 1).sum()
        
    def weighted_gini(y_true: pd.DataFrame, y_pred: pd.DataFrame) -> float:
        df = (pd.concat([y_true, y_pred], axis='columns')
              .sort_values('prediction', ascending=False))
        df['weight'] = df['target'].apply(lambda x: 20 if x==0 else 1)
        df['random'] = (df['weight'] / df['weight'].sum()).cumsum()
        total_pos = (df['target'] * df['weight']).sum()
        df['cum_pos_found'] = (df['target'] * df['weight']).cumsum()
        df['lorentz'] = df['cum_pos_found'] / total_pos
        df['gini'] = (df['lorentz'] - df['random']) * df['weight']
        return df['gini'].sum()

    def normalized_weighted_gini(y_true: pd.DataFrame, y_pred: pd.DataFrame) -> float:
        y_true_pred = y_true.rename(columns={'target': 'prediction'})
        return weighted_gini(y_true, y_pred) / weighted_gini(y_true, y_true_pred)

    g = normalized_weighted_gini(y_true, y_pred)
    d = top_four_percent_captured(y_true, y_pred)

    return 0.5 * (g + d)

In [8]:
def amex_metric_np(y_true, y_pred):
    labels = np.transpose(np.array([y_true, y_pred]))
    labels = labels[labels[:, 1].argsort()[::-1]]
    weights = np.where(labels[:,0]==0, 20, 1)
    cut_vals = labels[np.cumsum(weights) <= int(0.04 * np.sum(weights))]
    top_four = np.sum(cut_vals[:,0]) / np.sum(labels[:,0])
    gini = [0,0]
    for i in [1,0]:
        labels = np.transpose(np.array([y_true, y_pred]))
        labels = labels[labels[:, i].argsort()[::-1]]
        weight = np.where(labels[:,0]==0, 20, 1)
        weight_random = np.cumsum(weight / np.sum(weight))
        total_pos = np.sum(labels[:, 0] *  weight)
        cum_pos_found = np.cumsum(labels[:, 0] * weight)
        lorentz = cum_pos_found / total_pos
        gini[i] = np.sum((lorentz - weight_random) * weight)
    return 0.5 * (gini[1]/gini[0] + top_four)

def lgb_amex_metric(y_pred, y_true):
    y_true = y_true.get_label()
    return 'amex_metric', amex_metric_np(y_true, y_pred), True

### **Preprocessing - Level 1 - Reduce Train & Test Datasets**

Train & Test datasets will be processed to reduce the size so we can perform feature engineering and build models without memory, CPU and hard disk constraints

In [9]:
df_train = vaex.open('../input/amex-prediction-starter-level-2/train_datav2.hdf5')
df_test = vaex.open('../input/amex-prediction-starter-level-2/test_datav2.hdf5')
df_train_map = vaex.open('../input/amex-prediction-starter-level-2/train_data_customer_map.hdf5')
df_test_map = vaex.open('../input/amex-prediction-starter-level-2/test_data_customer_map.hdf5')

### **Prepare Data to Train Model**

In [10]:
df_train_labels = vaex.open('../input/amex-default-prediction/train_labels.csv')
df_train_labels = df_train_labels.join(df_train_map, how="inner", on="customer_ID")

all_features = [col for col in df_train]

df_customer = df_train[all_features]
df_customer = df_customer.join(df_train_labels, left_on='customer_ID', right_on='label_encoded_customer_ID', how='inner')
df_customer.drop(['label_encoded_customer_ID'], inplace=True)

#,P_2,D_39,B_1,B_2,R_1,S_3,D_41,B_3,D_42,D_43,D_44,B_4,D_45,B_5,R_2,D_46,D_47,D_48,D_49,B_6,B_7,B_8,D_50,D_51,B_9,R_3,D_52,P_3,B_10,D_53,S_5,B_11,S_6,D_54,R_4,S_7,B_12,S_8,D_55,D_56,B_13,R_5,D_58,S_9,B_14,D_59,D_60,D_61,B_15,S_11,D_62,D_65,B_16,B_17,B_18,B_19,D_66,B_20,D_68,S_12,R_6,S_13,B_21,D_69,B_22,D_70,D_71,D_72,S_15,B_23,D_73,P_4,D_74,D_75,D_76,B_24,R_7,D_77,B_25,B_26,D_78,D_79,R_8,R_9,S_16,D_80,R_10,R_11,B_27,D_81,D_82,S_17,R_12,B_28,R_13,D_83,R_14,R_15,D_84,R_16,B_29,B_30,S_18,D_86,D_87,R_17,R_18,D_88,B_31,S_19,R_19,B_32,S_20,R_20,R_21,B_33,D_89,R_22,R_23,D_91,D_92,D_93,D_94,R_24,R_25,D_96,S_22,S_23,S_24,S_25,S_26,D_102,D_103,D_104,D_105,D_106,D_107,B_36,B_37,R_26,R_27,B_38,D_108,D_109,D_110,D_111,B_39,D_112,B_40,S_27,D_113,D_114,D_115,D_116,D_117,D_118,D_119,D_120,D_121,D_122,D_123,D_124,D_125,D_126,D_127,D_128,D_129,B_41,B_42,D_130,D_131,D_132,D_133,R_28,D_134,D_135,D_136,D_137,D_138,D_139,D_140,D_141,D_142,D_143,D_144,D_145,D_63,D_64,customer_ID,target
0,0.7442702,0.0030916873,0.0072427043,1.007496,0.009336631,0.29910848,0.003750894,0.0022871876,0.0,0.0,0.0037418173,0.05836785,0.8853801,0.012688401,0.0045256414,0.3764425,0.63416755,0.07867742,0.0,0.12910235,0.04106728,0.003342006,0.16706242,0.33532014,0.0043627215,0.006509866,0.23279026,0.5838689,0.29861808,0.008751293,0.0030340385,0.0017301164,0.0073325606,1.0094647,0.0068011335,0.25435725,0.010460004,0.0054463046,0.12951766,0.5913745,0.002919833,0.004855534,0.009396881,0.0,0.009415094,0.0694069,0.000982847,0.07935046,0.008964529,0.28673884,0.44811037,0.0024738235,0.0053694546,0.0,1.009645,0.00922647,0.0,0.0021670526,6.0,0.19010633,0.0057525164,0.005201095,0.006982812,0.001905763,0.006499635,0.007607226,0.0074345022,0.0051399865,0.5058207,0.025566528,0.0,0.002268651,0.00041639243,0.0056640273,0.0,0.001640105,0.009689874,0.42343187,0.0028987455,0.0035675801,0.0056366734,0.0007157872,0.009354974,0.0,0.005058291,0.00038713944,0.009863765,0.008546336,0.009050531,0.0009270649,0.5031435,0.006148745,1.0063536,0.07451593,0.0036630714,0.005190503,0.000863827,0.0016088556,0.008536328,0.005483388,0.0,0.0,0.006537585,0.00901568,0.0,0.0071298457,0.0044488194,0.0,1.0,0.0049272077,0.0021550814,0.0028910656,0.005200902,0.006891079,0.000495353,1.0023797,0.0020826089,0.00035686433,0.0030126462,0.003240912,0.008255401,0.006943732,0.0004970511,0.0062739803,0.0049009165,0.0014250554,0.041788224,0.13216524,0.0052063847,0.9776156,0.004089647,0.0042449883,0.0008384559,0.0011774149,0.0,0.0,0.0008451528,0.0018779807,0.004563586,0.0,1.0055594,1.0,0.0,0.009942509,0.0,0.0,0.0,1.0051179,0.031578425,0.0,0.0009948038,0.0,0.81625384,0.0,6.0,0.80700576,0.81417465,0.0,0.7262038,0.57253176,0.0040466255,0.22812885,0.0029846623,1.0,1.0007654,1.0038574,1.0015905,0.009653939,0.0,0.0011981075,0.0061936122,0.0,0.009960352,0.009150347,0.0,0.0,0.0,0.0,0.0,0.004333312,0.009860008,0.0029108082,0.0,0.0022399465,0.0022530325,0.004805641,3.0,3.0,0,0
1,0.8260247,0.23716606,0.0202482,1.0092281,0.0083649205,0.09630558,0.006065481,0.017009411,0.0,0.008889698,0.0036192448,0.048056666,0.28356326,0.019710377,0.005484134,0.5763002,0.5246397,0.0965258,0.0,0.093564026,0.1049171,0.00026890857,0.06650882,0.33795607,0.008917741,0.005071848,0.13925605,0.5259909,0.0565117,0.0,0.012162755,0.0028940304,0.0057552657,1.0015627,0.0060377787,0.06585398,0.023065902,0.5995024,0.09448304,0.19597995,0.027668525,0.0036475766,0.32277966,0.01047724,0.008190892,0.07101729,0.034526795,0.3327187,0.0073206285,0.36197263,0.24421707,0.008175531,0.33755976,0.008135456,0.6533203,0.0025766133,1.0,0.003182337,6.0,0.6563971,0.0016308156,0.007856963,0.0049321796,0.0016360391,0.006218958,0.009333168,0.01503285,0.0044198954,0.10441527,0.09113425,0.0,0.007612308,0.14299467,0.14174816,0.0,0.009908457,0.0022537925,0.22737052,0.013268213,0.009658001,0.008888395,0.004303078,0.0015333936,0.0,0.009798362,0.20292905,0.0010397487,0.0028578353,0.003293858,0.002790434,0.0,0.0018962498,1.0042117,0.06958941,0.008083812,0.0011859015,0.009549554,0.00296493,0.0021200862,0.0014943971,0.0,0.0,0.0044383854,0.0005637958,0.0,0.005755218,0.0038262955,0.0,1.0,0.008536949,0.008333227,0.00410364,0.0076752524,0.006535812,0.009610662,1.00873,0.0052953516,1.8971165e-05,0.003466399,0.0056946897,0.00035293208,0.0056567877,0.0077862367,0.0016733574,0.0015266039,0.0021299543,0.9103333,0.1372186,0.90683293,0.97052664,0.0032554036,0.0048619728,0.0025261415,0.006140534,0.0,0.0,0.0011024832,0.006506171,0.018021498,0.0,1.0061305,3.0,0.0,0.0066791945,0.0,0.0,0.0,1.0044028,0.019601787,0.009156267,0.0028790603,1.0,0.6262326,0.0,4.0,0.6178546,0.61576265,0.0,0.92044514,0.57846636,0.0055552865,0.13699508,0.002524778,1.0,1.0078406,1.0022663,1.0006521,0.004769318,0.0,0.004993957,0.007110466,0.0,0.009519239,0.001907316,0.0,0.0,0.0,0.0,0.0,0.008162384,0.0040216073,0.0041671325,0.0,0.0054446333,0.0053429133,0.007469514,0.0,3.0,1,0
2,0.6269004,0.23927279,0.019966587,1.0097741,0.001267091,0.09117943,0.0009548007,0.0058030146,0.0,0.07804137,0.006081624,0.10871921,0.12296996,0.22967255,0.0038574622,0.4187405,0.3165933,0.39594096,0.0,0.4272424,0.052101452,1.0082899,0.17636989,0.3343741,0.03303262,0.009728598,0.17344941,0.68114316,0.16604973,0.0,0.009444672,0.010056182,0.00097492366,1.0006644,0.0013716635,0.068693966,0.13774078,0.9242712,0.30222243,0.2943106,0.09549309,0.0041094683,0.16953284,0.0057367533,0.050976917,0.5070464,0.17539422,0.31553042,0.0026069835,0.48913622,0.14843681,0.0022381109,0.17638212,0.0,0.654829,0.0030405987,1.0,0.006812318,4.0,0.20630692,0.0058704345,0.50779396,0.0012639662,0.0069057355,0.0049685114,0.002123473,0.010931795,0.006926712,0.10243432,0.037987307,0.0,0.007404053,0.07639486,0.07581526,0.2423246,0.0014200755,7.928205e-05,0.13848315,0.086136386,0.009592745,0.004624207,0.0072751213,0.0037649088,0.0,0.006400917,0.20719963,0.0052712183,0.0055512483,0.002695792,0.0027427622,0.0,0.0014110162,1.0095836,0.13065998,0.009048969,0.005371967,0.005833623,0.0047281156,0.0011174124,0.0066383076,0.0,0.0,0.006315223,0.003113295,0.0,0.0019238798,0.0010252936,0.0,1.0,0.00071749825,0.004201771,0.0036211568,0.0033923183,0.00927377,0.005191739,1.001393,0.002119068,0.00940724,0.0067365356,0.0019949435,1.0019033,0.005334433,0.0022157405,0.002034182,0.001561906,0.007066627,0.94586205,0.13870473,0.95687556,0.97170913,0.012620617,0.003054777,0.005614539,0.0021460506,0.0,0.0,0.0071696793,7.217291e-05,0.018783055,0.0,1.0039651,3.0,0.0,0.0068947813,0.0,0.0,0.0,1.004128,0.022213355,0.3561055,0.40470228,1.0,0.103765376,0.0,4.0,0.0970103,0.09723184,0.0,0.50960654,0.29307255,0.007827951,0.27911335,0.0040189023,1.0,0.0010102473,1.0045854,1.0004623,0.0025914568,0.008295924,0.008881546,0.0019429665,0.0,0.008654135,0.002101059,0.0,0.0,0.0,0.0,0.0,0.009373477,0.0018623354,0.006743652,0.0,0.00042700476,0.003133304,0.006046255,0.0,3.0,2,0
3,0.6792098,0.007839169,0.0040927343,0.81233627,0.006715217,0.25260764,0.0038363836,0.02457634,0.0,0.39777175,0.12917748,0.36144692,0.07655569,0.15428993,0.003976343,0.4503665,0.9556774,0.3165758,0.0,0.06653474,0.07908157,1.0039173,0.060778443,0.3398136,0.0039019089,0.30219504,0.25861785,0.7790374,0.08561676,0.0,0.007734508,0.004820499,0.009577903,1.0053828,0.0073417486,0.17613009,0.18146825,0.32313827,0.55075616,0.17198287,0.38369396,0.008486559,0.2683044,0.03811923,0.0072081983,0.5059212,0.009058783,0.8583658,0.005544481,0.5648141,0.129275,0.0038904191,0.17332599,0.0062485547,0.48313633,0.0012339797,0.0,0.0017226592,6.0,0.18789214,0.003983759,0.00068594655,0.0008608274,0.009662942,0.0038492298,0.0009472437,0.012914489,0.0001193684,0.30102944,0.071516946,0.0,0.0014851968,0.1516837,0.14240015,0.0,0.0038456041,0.0011697252,0.12861063,0.004619265,0.009358505,0.0023182584,0.0011257341,0.008823462,0.0,0.0008431257,0.80261356,0.004902393,0.0015570628,0.0003971421,0.00811505,0.008578307,0.0065300874,1.0056909,0.37772843,0.007559142,0.003504547,0.0062785465,0.0038232745,0.0053847795,0.0027632492,0.0,0.0,0.0069290004,0.0046665487,0.0,0.0050570033,0.0056239385,0.0,1.0,0.0033122313,0.00031258565,0.0019176115,0.005515268,0.0027359298,0.0031280098,1.0010273,0.008440951,0.0006079294,0.008623812,0.0039229463,0.0013070587,0.0077820555,0.0073058354,0.002945974,0.008278423,0.0022673272,0.0,0.13449085,0.0,0.97728467,0.006027571,0.009808462,0.0028340856,0.00040023116,0.0,0.0,0.007815966,0.007385582,0.004992605,0.0,1.0004313,2.0,0.0,0.009925099,0.0,0.0,0.0,1.0020775,0.07933627,0.36774328,0.009474005,1.0,0.487216,0.0,-1.0,0.47914577,0.48101068,0.0,0.7432855,0.29305634,0.006328298,0.371228,0.008127392,1.0,0.0051342603,1.0027992,1.00293,0.009725316,0.0,7.937032e-05,0.0080709,0.0,0.0020307032,0.008603441,0.0,0.0,0.0,0.0,0.0,0.0023288885,0.00010507528,0.0039596446,0.0,0.0041512526,0.005179222,0.0010055145,0.0,3.0,3,1
4,1.0096321,0.26725265,0.04301789,1.0035907,0.0044892863,0.11040554,0.00095809955,0.0026458183,0.0,0.042315133,0.0073012314,0.019754479,0.38974518,0.063283525,0.0014467912,0.0,0.885419,0.0051678047,0.0,0.21596777,0.035979375,0.008214336,0.07764819,0.0010156367,0.05993133,0.0031908415,0.3269368,0.6086247,0.29852292,0.0,0.011627059,0.03393454,0.0015783152,1.0049578,0.0040039113,0.07670955,0.09667688,0.7547799,0.014878835,0.0,0.08808981,0.0010124461,0.0020500189,0.02420924,0.075015165,0.2370751,0.8202244,0.00744493,0.003914249,0.6011876,0.62904125,0.0070811203,0.005127121,0.0,1.0084263,0.0010786873,0.0,0.00716035,6.0,0.18569875,0.0062647965,0.85064644,0.004221163,0.0017321199,0.00033405007,0.005794446,0.011999004,0.007792015,0.2067904,0.027259203,0.0,0.007410509,0.008875878,0.007505536,0.0,0.0058019822,0.008726777,0.5877238,0.12421838,0.0021691045,0.0005318253,0.0068310443,0.0014072579,0.0,0.009977671,0.0065169525,0.003391172,0.0040499256,0.0019055984,0.008262631,0.0,0.0073894006,1.0034883,0.047915503,0.007420272,0.006382664,0.0019425307,9.37557e-05,0.004206323,0.0067985905,0.0,0.0,0.005085758,0.004596859,0.0,0.006598029,0.0053518726,0.0,1.0,0.009308903,0.0062352875,0.0050950097,0.0010395464,0.005917589,0.0017171666,1.000953,0.00061744504,0.008269731,0.004671726,0.006473493,0.008272626,0.0066731717,0.0009461598,0.006116943,0.0021831647,0.0021427933,0.9278721,0.13264877,0.9358823,0.9746796,0.0011540658,0.66044253,1.0027773,0.9701258,0.31796685,0.0,0.33419105,0.0005881481,0.03769961,0.0,1.0024946,2.0,0.0,0.0038823774,0.0,0.0,0.0,1.0071536,0.044800993,0.1667033,0.004984847,1.0,0.4207477,0.0,4.0,0.42007712,0.4134546,0.0,0.7788209,0.86337894,0.008975534,0.32646638,0.0099851,1.0,1.0003191,1.0035373,1.00624,0.0002354508,0.0,0.007086529,0.0012084114,0.0,0.005621218,0.0029517205,0.0,0.0,0.0,0.0,0.0,0.004518582,0.00360763,0.0075500486,0.0,0.0039542834,0.0054610507,0.009722869,0.0,4.0,4,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
458908,-0.010732798,0.15389529,0.9390215,0.026702344,0.50669587,0.1547671,0.009708461,0.8505409,0.0,0.11457598,0.0,0.8606014,0.6245297,0.028456893,1.0065972,0.43083224,0.8240996,0.88168436,0.0,0.0042294795,1.1106197,0.0035131194,0.0,0.007845853,0.5964901,0.10925264,0.013508147,0.10739652,0.013413096,0.15733635,0.062374197,0.95379806,0.0005659937,1.0096717,1.003239,0.10166101,0.012120413,0.46541685,0.7884152,0.0,0.012353972,0.50074697,0.3427649,0.014887834,0.31515938,0.33642864,0.2562516,0.9103909,0.006170234,0.08962494,0.07000712,0.0005373118,1.0085428,0.90542525,0.06014791,1.0089501,0.0,1.0042062,6.0,0.18471177,0.5897211,0.008761762,0.0011977579,0.00064404955,0.5006278,0.0027615472,0.009341554,0.0068840333,0.20410945,1.1768621,0.0,0.0059073926,0.14307551,0.20724908,0.0,0.0063238367,1.0673037,0.0,0.54631835,0.0037509818,0.0,0.008685234,1.0055007,0.0,0.0062949164,0.005711666,6.6791677e-06,0.0014696338,0.0011911344,1.0033156,0.0,0.004996704,1.0045792,0.5758983,0.0051396913,0.009295323,0.0029289434,0.0064385175,1.0069749,0.0033052193,0.0,1.0,0.0019034843,5.7394223e-05,0.0,0.004815031,0.006101245,0.0,1.0,0.009363952,0.009848565,0.006834963,0.0048868144,0.0040019266,0.00013541763,0.0025148776,0.1119049,0.00850305,0.009129402,0.006793807,0.0037465598,0.00089936465,0.0038004746,0.0011692159,0.0016088255,0.007364907,0.9768855,0.13288598,0.9693152,0.9710005,0.007541281,0.008552285,0.0056918804,0.0052909777,0.0,0.0,0.00637378,0.005507363,0.94291264,0.0,1.00498,7.0,0.0,0.0077835163,0.0,0.0,0.0,1.0003289,0.83395064,0.0010888684,0.0073816604,1.0,0.45546603,0.0,3.0,0.4516368,0.44499934,0.0,0.7467386,0.28694412,0.00452912,0.09525536,0.0059406846,1.0,0.005946864,1.0007541,0.0018127613,0.008881128,0.0,0.0038364262,0.009302833,0.0,0.009111942,0.008096797,0.0,0.0,0.0,0.0,0.0,0.0046884143,0.009099285,0.008349092,0.0,0.009857889,0.0096393945,0.0032499228,0.0,3.0,458908,1
458909,0.98327595,0.0057244147,0.0069068726,0.81337935,0.0053661023,0.0,0.007202984,0.0036768313,0.0,0.0,0.0042304946,0.021021364,0.72695994,0.006313816,0.00078040507,0.44211134,0.64188725,0.014520358,0.0,0.18936434,0.004508961,0.0066469763,0.0,0.34107646,0.001884732,0.0046286383,0.210999,0.57495755,0.29437584,0.0,0.008535922,0.006509571,1.0034624,1.003497,0.0055746264,0.0,0.5813706,0.0018552081,0.022384815,0.21595027,0.0060202843,0.0029598076,0.005065907,0.0,0.0014495626,0.5438629,0.0067420388,0.022679685,0.0013279049,0.28632006,0.46229452,0.003517098,0.0070429025,0.0,1.0032729,0.006004089,0.0,0.003229195,6.0,0.18705148,0.009778741,0.008324422,0.004859333,0.0063076606,0.008112218,0.004325458,0.06333871,0.0038619838,0.50536597,0.002806823,0.0,0.0052142707,0.0071500433,0.0077066766,0.0,0.0007867347,0.0021319492,0.4200109,0.009959736,0.0080520315,0.0033120941,0.0012923555,0.0056371796,0.0,0.004731492,0.0028174208,0.006005862,0.0037107314,0.009702263,0.0011811457,0.0,0.0048326193,1.0008994,0.024872607,0.0023719855,0.0014244387,0.0013194717,0.0075832848,0.0075729387,0.0016030468,0.0,0.0,0.003153448,0.006819042,0.0,0.004704107,0.009782924,0.0,1.0,0.003200642,0.0009819749,0.008973468,0.005373998,0.00497889,9.9949764e-05,1.0055256,0.0058865943,0.0016931843,0.002208185,0.00847473,0.0047746752,0.006722928,0.009123591,0.004475103,0.0026185852,0.0049327975,0.29722178,0.13362838,0.080583155,0.9725188,0.009570276,0.007543205,0.006064608,0.007968623,0.0,0.0,0.007110979,0.0038717967,0.003176388,0.0,1.0055238,2.0,0.0,0.0013386434,0.0,0.0,0.0,1.0077744,0.02661194,0.0,0.008791816,0.0,0.33765554,0.0,4.0,0.33375144,0.33462772,0.0,0.77378225,0.5797364,0.0073569766,0.13973016,0.00038146897,1.0,0.0004022521,1.0012528,0.003790631,0.005593932,0.0,0.002146472,0.009325658,0.0,0.0065709683,0.0003150691,0.0,0.0,0.0,0.0,0.0,0.009706496,0.004429807,0.008342922,0.0,0.0021246686,0.00021609438,0.0048905625,0.0,3.0,458909,0
458910,0.72096866,0.0050894828,0.031302024,1.0032947,0.0052362513,0.17875987,0.009009099,0.008636917,0.1605391,0.0,0.13453595,0.107759416,0.026988968,0.05648632,0.00559852,0.48715222,0.28522098,0.48084235,0.0,0.06031165,0.12387975,1.0016139,0.07184215,0.34159613,0.114279516,0.20246471,0.08516649,0.64755327,0.06312179,0.0,0.020179318,0.026864681,0.002539735,1.0002911,0.0066158883,0.6306356,0.071354434,0.7536176,0.41739935,0.0,0.08911299,0.007215213,0.25378424,0.11959174,0.047049686,0.55062884,0.21166225,0.73064536,0.0061504464,0.2827811,0.040108778,0.0049991896,0.17618072,0.0,0.64584184,0.0007372965,0.0,0.008780347,3.0,0.31612402,0.0073684193,0.42287716,0.00993323,0.6184682,0.0042415927,0.25798663,0.030960212,0.0024784456,0.2081813,0.10692492,0.0,0.9538614,0.0730319,0.07653165,0.0,0.006155196,0.005965474,0.0,0.0775387,0.0057412493,0.004881251,0.009527048,0.0051346654,0.0,0.005473892,0.00967213,0.0034211923,0.007293406,0.008868153,0.00018673309,0.0,0.19493647,1.0032221,0.10226934,0.0030335027,0.0081491135,0.0038439585,0.0071399384,0.008462646,0.0024204797,0.0,0.0,0.0019013052,0.00016902425,0.0,0.0070893248,0.0032546928,0.0,1.0,0.00020685559,0.003709426,0.0068693,0.0044991765,0.0041684434,0.0027598678,1.0056018,0.0016966596,0.0065643885,0.006692035,0.5021517,0.0035709124,0.00042873237,0.0024368956,0.00050979195,0.0034774158,0.008375788,0.95026344,0.13337597,0.9528536,0.97587323,0.0052964343,0.0033583571,0.0048561953,0.004768681,0.0,0.0,0.0024370134,0.006893684,0.040436845,0.18730062,0.029990096,2.0,0.0,0.0061753886,0.0,0.0,0.0,1.0088329,0.091260806,0.4349989,0.60499185,1.0,0.028236743,0.0,4.0,0.03174469,0.027866146,1.0,0.3517497,0.14581326,0.0030970424,0.502884,0.008586285,0.0,0.0011879465,1.0025718,1.003282,0.009644602,0.0,0.0022461286,0.00096346624,0.0,0.0022087744,0.0010606203,0.31421232,0.0019145568,0.007972907,0.004374351,0.50884074,0.009385814,0.005131873,0.0063971537,0.0,0.0019211783,0.0057583563,0.0093413,0.0,1.0,458910,0
458911,0.412607,0.0048325835,0.01934015,0.25289184,0.008848368,0.23793909,0.0066812397,0.0729416,0.0,0.06978209,0.009047604,0.16300212,0.057097506,0.0163966,0.008015047,0.53719634,0.40822434,0.32377964,0.0,0.04527436,0.42366704,1.0018169,0.13633303,0.001185005,0.14379266,0.0066666,0.14073755,0.45614272,0.008067752,0.27553064,0.022607403,0.0032071774,0.0045763147,1.0094582,0.0038632979,0.35350424,0.021550883,0.0022689083,0.3631653,0.08391014,0.041413642,0.0040947385,0.28007466,0.002542364,0.011020344,0.48773792,0.030280551,0.4593793,0.00640152,0.40221202,0.06492504,0.0022487429,0.17115642,1.0030811,0.4255781,0.0037782388,0.0,0.12231941,6.0,0.19288003,0.007383576,0.008579989,0.000834739,0.0008035071,0.006551581,0.25503692,0.009216939,0.003168103,0.60132694,0.377203,0.0,0.0022694736,0.14351217,0.20194525,0.0,0.0059824768,0.006989286,0.0,0.017523821,0.008114461,0.0054706302,0.006034148,0.009815677,0.17040846,0.0076578828,0.40513498,0.009795086,0.00036680463,0.0019753522,5.0032887,0.0,0.00980441,1.0072887,0.11956987,0.007651332,0.0061548124,0.0030824274,0.0034926548,0.0019258346,0.00380627,0.0,0.0,0.0030951244,0.0039204867,0.0,0.0034649686,0.0045098173,0.0,1.0,0.0054495404,0.0066268924,0.0053818026,0.0034462912,0.00022678367,0.0025363707,0.009055791,0.0034335782,0.008225633,0.007449276,0.0069801267,0.007893929,0.0065822517,0.008722734,0.0098865125,0.0001942073,0.007958235,0.9540517,0.13223706,0.93975586,0.96930027,0.06839706,0.1609435,1.0096657,0.9504618,0.05040687,0.0,0.34226346,0.0049209176,0.021087354,0.0,1.000522,3.0,0.0,0.0041951807,0.0,0.0,0.0,1.0055301,0.11889141,0.008503749,0.009802818,1.0,0.30182922,0.0,3.0,0.29960698,0.30401802,0.0,0.5403832,0.716116,0.00616917,0.82409465,0.002509715,1.0,0.0058077667,0.9990853,1.0007961,0.009692136,0.0,1.0058932,0.0065887417,0.0,0.008340216,0.0034220626,0.0,0.0,0.0,0.0,0.0,1.002399,0.0015617184,0.93739474,0.55745935,1.007775,0.0024812697,0.5494584,0.0,3.0,458911,0


### **Convert Vaex DataFrames to Pandas DataFrames**

In [11]:
df_train = df_train.to_pandas_df()
df_test = df_test.to_pandas_df()
df_train_map = df_train_map.to_pandas_df()
df_test_map = df_test_map.to_pandas_df()

df_train_labels = df_train_labels.to_pandas_df()
df_customer = df_customer.to_pandas_df()

### **Train Model**

In [12]:
import lightgbm as lgb
from lightgbm import log_evaluation
from sklearn.model_selection import train_test_split

In [13]:
y = df_customer.pop('target')
model_features = [col for col in df_customer]
X = df_customer[model_features]

In [14]:
X.shape

(458913, 189)

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1, random_state=42)

In [16]:
dtrain = lgb.Dataset(
    data=X_train,
    label=y_train
)

dvalid = lgb.Dataset(
    data=X_test,
    label=y_test,
    reference=dtrain
)

In [17]:
categorical_features = ['B_30', 'B_38', 'D_114', 'D_116', 'D_117', 'D_120', 'D_126', 'D_63', 'D_64', 'D_66', 'D_68']
df_customer['D_117'] = df_customer['D_117'] + 1
df_customer['D_126'] = df_customer['D_126'] + 1

In [18]:
lgb_params={
    "objective": "binary",
    "n_estimators": 1200,
    "learning_rate": 0.03,
    "reg_lambda": 50,
    "min_child_samples": 2400,
    'max_depth': 8,
    "num_leaves": 255,
    "colsample_bytree": 0.19,
    'verbose': -1,
    "random_state": 1
}
    
model = lgb.train(
    params=lgb_params,
    train_set=dtrain,
    valid_sets=[dvalid],
    feval=lgb_amex_metric,
    callbacks=[log_evaluation(100)]
)

  
  from ipykernel import kernelapp as app


[100]	valid_0's binary_logloss: 0.0385432	valid_0's amex_metric: nan
[200]	valid_0's binary_logloss: 0.013433	valid_0's amex_metric: nan
[300]	valid_0's binary_logloss: 0.0104473	valid_0's amex_metric: nan
[400]	valid_0's binary_logloss: 0.0106828	valid_0's amex_metric: nan
[500]	valid_0's binary_logloss: 0.0108432	valid_0's amex_metric: nan
[600]	valid_0's binary_logloss: 0.0116766	valid_0's amex_metric: nan
[700]	valid_0's binary_logloss: 0.0114033	valid_0's amex_metric: nan
[800]	valid_0's binary_logloss: 0.0115505	valid_0's amex_metric: nan
[900]	valid_0's binary_logloss: 0.0116415	valid_0's amex_metric: nan
[1000]	valid_0's binary_logloss: 0.0113862	valid_0's amex_metric: nan
[1100]	valid_0's binary_logloss: 0.0115424	valid_0's amex_metric: nan
[1200]	valid_0's binary_logloss: 0.0129676	valid_0's amex_metric: nan


In [19]:
gc.collect()

519

In [20]:
y_pred = model.predict(X_test)

In [21]:
y_pred

array([0.01288388])

### **Make Predictions**

In [22]:
model_features = [col for col in df_customer]
df_customer_test = df_test[model_features]

In [23]:
df_customer_test_pred = model.predict(df_customer_test)

In [24]:
df_customer_test_pred

array([0.0141366 , 0.00077073, 0.00850981, ..., 0.34175355, 0.70307158,
       0.01402809])

In [25]:
df_customer_test

Unnamed: 0,P_2,D_39,B_1,B_2,R_1,S_3,D_41,B_3,D_42,D_43,...,D_139,D_140,D_141,D_142,D_143,D_144,D_145,D_63,D_64,customer_ID
0,0.794581,0.000953,0.005632,0.810286,0.004518,0.249819,0.008213,0.004759,0.00000,0.093510,...,0.005481,0.000222,0.009534,0.000000,0.006538,0.001135,0.000323,0.0,2.0,0
1,0.943439,0.003285,0.013842,1.000161,0.002857,0.128938,0.008345,0.008910,0.00412,0.000000,...,1.007859,0.009740,0.924881,0.522326,1.006161,0.648153,0.642873,3.0,3.0,1
2,0.764847,0.001760,0.003702,0.810497,0.008001,0.000000,0.009685,0.001857,0.00000,0.103272,...,0.008729,0.002711,0.005979,0.000000,0.007977,0.003958,0.000977,0.0,1.0,2
3,0.610121,0.771469,0.613606,0.022343,0.003864,0.174504,0.008826,0.733658,0.00000,0.449111,...,1.006573,0.005742,0.907650,0.201316,1.004309,0.139868,0.098931,0.0,2.0,3
4,0.882037,0.005513,0.006477,0.819673,0.002524,0.000000,0.007767,0.000086,0.00000,0.000000,...,0.001783,0.008707,0.008966,0.000000,0.008267,0.005799,0.007622,0.0,2.0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
924616,0.679791,0.000315,0.025005,0.496837,0.251907,0.142065,0.003219,0.149840,0.00000,0.626851,...,1.005299,0.000072,1.036161,0.803950,1.001329,0.626038,0.096816,0.0,3.0,924616
924617,0.767409,0.004265,0.053714,0.321444,0.259923,0.336231,0.093263,0.258417,0.00000,0.000000,...,0.000975,0.009754,0.007228,0.000000,0.002002,0.003100,0.004315,3.0,2.0,924617
924618,0.417093,0.008597,0.002355,0.817596,0.006410,0.163120,0.008076,0.005141,0.00000,0.705349,...,1.006942,0.007578,0.865715,0.034273,1.006945,0.450350,0.099008,0.0,2.0,924618
924619,0.454026,0.155667,0.667262,0.030093,0.252782,0.171745,0.007521,0.808719,0.00000,0.039547,...,0.005669,0.006214,0.007119,0.000000,0.006731,0.005600,0.008319,0.0,1.0,924619


In [26]:
df_customer_test = pd.merge(df_customer_test, df_test_map, how="inner", left_on="customer_ID", right_on="label_encoded_customer_ID")

In [27]:
df_customer_test = df_customer_test.drop(columns={'customer_ID_x','label_encoded_customer_ID'})

In [28]:
df_customer_test_pred = pd.DataFrame(df_customer_test_pred.tolist())

In [29]:
df_customer_test['prediction'] = df_customer_test_pred

In [30]:
df_customer_test

Unnamed: 0,P_2,D_39,B_1,B_2,R_1,S_3,D_41,B_3,D_42,D_43,...,D_140,D_141,D_142,D_143,D_144,D_145,D_63,D_64,customer_ID_y,prediction
0,0.794581,0.000953,0.005632,0.810286,0.004518,0.249819,0.008213,0.004759,0.00000,0.093510,...,0.000222,0.009534,0.000000,0.006538,0.001135,0.000323,0.0,2.0,43906b6773ec5fb8cf4452ceec799976493b47fc579d4b...,0.014137
1,0.943439,0.003285,0.013842,1.000161,0.002857,0.128938,0.008345,0.008910,0.00412,0.000000,...,0.009740,0.924881,0.522326,1.006161,0.648153,0.642873,3.0,3.0,4390cd51fe9c61555bb344c39ff42fcae135e2bfccadd9...,0.000771
2,0.764847,0.001760,0.003702,0.810497,0.008001,0.000000,0.009685,0.001857,0.00000,0.103272,...,0.002711,0.005979,0.000000,0.007977,0.003958,0.000977,0.0,1.0,43946d3f3e74a62b41a17a43fbccc1457da3bce191658c...,0.008510
3,0.610121,0.771469,0.613606,0.022343,0.003864,0.174504,0.008826,0.733658,0.00000,0.449111,...,0.005742,0.907650,0.201316,1.004309,0.139868,0.098931,0.0,2.0,43958550b73643ac8d309ff22fbcb4ce0a23a0a914f437...,0.411592
4,0.882037,0.005513,0.006477,0.819673,0.002524,0.000000,0.007767,0.000086,0.00000,0.000000,...,0.008707,0.008966,0.000000,0.008267,0.005799,0.007622,0.0,2.0,4396f71c2a23f3ea73802cd79133adf767101ebee636f8...,0.003120
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
924616,0.679791,0.000315,0.025005,0.496837,0.251907,0.142065,0.003219,0.149840,0.00000,0.626851,...,0.000072,1.036161,0.803950,1.001329,0.626038,0.096816,0.0,3.0,fff5ba5c7925c14b17b13cf2a8db0b9221df47d7caabbe...,0.048898
924617,0.767409,0.004265,0.053714,0.321444,0.259923,0.336231,0.093263,0.258417,0.00000,0.000000,...,0.009754,0.007228,0.000000,0.002002,0.003100,0.004315,3.0,2.0,fff60f216e5b5df086502d86a04392050e67ee77d5e162...,0.109117
924618,0.417093,0.008597,0.002355,0.817596,0.006410,0.163120,0.008076,0.005141,0.00000,0.705349,...,0.007578,0.865715,0.034273,1.006945,0.450350,0.099008,0.0,2.0,fff9bb02cbd43adf24d7a5a13ce6c6ce9910c9991f9b4c...,0.341754
924619,0.454026,0.155667,0.667262,0.030093,0.252782,0.171745,0.007521,0.808719,0.00000,0.039547,...,0.006214,0.007119,0.000000,0.006731,0.005600,0.008319,0.0,1.0,fffd9644010686e7e2bf96ca6f3af174bd2b075a887445...,0.703072


In [31]:
df_customer_test = df_customer_test.rename(columns={"customer_ID_y": "customer_ID"})

In [32]:
final_prediction = df_customer_test[["customer_ID", "prediction"]]

### **Save File**

In [33]:
final_prediction.to_csv("./submission.csv",index=False)