In [1]:
import pandas as pd
import numpy as np

pd.options.display.max_columns = None
pd.options.display.max_rows = None

In [2]:
train_data = pd.read_parquet('../../data/initial_modelling/train_data_last_month.parquet')
test_data = pd.read_parquet('../../data/initial_modelling/test_data_last_month.parquet')

In [3]:
cat_cols = ['D_63', 'D_126', 'B_30', 'B_38']
train_data[cat_cols] = train_data[cat_cols].astype('category')
test_data[cat_cols] = test_data[cat_cols].astype('category')

In [4]:
train_data.info(show_counts = True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 386351 entries, 0 to 386350
Columns: 103 entries, customer_ID to end_of_month
dtypes: category(4), datetime64[ns](2), float32(94), int64(2), string(1)
memory usage: 154.8 MB


In [5]:
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
import numpy as np

def one_hot_encode_categories(df, categorical_columns=None, drop_original=True, handle_unknown='error'):
    """
    One-hot encode categorical columns in a DataFrame.
    
    Parameters:
    -----------
    df : pandas.DataFrame
        Input DataFrame containing categorical columns
    categorical_columns : list or None
        List of categorical column names to encode. If None, automatically detects categorical columns.
    drop_original : bool
        Whether to drop the original categorical columns
    handle_unknown : str
        Strategy for handling unknown categories in new data: 'error', 'ignore' or 'infrequent_if_exist'
    
    Returns:
    --------
    pandas.DataFrame
        DataFrame with one-hot encoded columns
    OneHotEncoder
        Fitted encoder for future transformations
    """
    # Make a copy to avoid modifying the original
    result_df = df.copy()
    
    # Automatically detect categorical columns if not specified
    if categorical_columns is None:
        categorical_columns = result_df.select_dtypes(include=['object', 'category']).columns.tolist()
    
    if not categorical_columns:
        print("No categorical columns found to encode.")
        return result_df, None
        
    # Initialize the encoder
    encoder = OneHotEncoder(sparse_output=False, handle_unknown=handle_unknown, drop='if_binary')
    
    # Fit and transform the categorical columns
    encoded_array = encoder.fit_transform(result_df[categorical_columns])
    
    # Get feature names
    feature_names = encoder.get_feature_names_out(categorical_columns)
    
    # Create a DataFrame with the encoded features
    encoded_df = pd.DataFrame(encoded_array, columns=feature_names, index=result_df.index)
    
    # Combine with the original DataFrame
    if drop_original:
        # Drop the original categorical columns
        result_df = result_df.drop(columns=categorical_columns)
    
    # Concatenate the encoded columns with the original DataFrame
    result_df = pd.concat([result_df, encoded_df], axis=1)
    
    print(f"One-hot encoded {len(categorical_columns)} categorical columns into {len(feature_names)} binary features.")
    
    return result_df, encoder

In [6]:
# Apply to your training data
train_data_oh, encoder = one_hot_encode_categories(train_data, handle_unknown='ignore')

One-hot encoded 4 categorical columns into 17 binary features.


In [7]:
train_data_oh.head()

Unnamed: 0,customer_ID,S_2,D_39,B_1,B_2,R_1,D_41,B_3,B_4,D_45,B_5,R_2,D_47,B_6,B_7,D_51,B_9,R_3,B_10,S_5,B_11,S_6,D_54,R_4,B_12,S_8,B_13,R_5,D_58,B_14,D_60,B_15,S_11,D_65,B_16,B_18,B_19,B_20,S_12,R_6,S_13,B_21,B_22,D_71,S_15,B_23,P_4,D_75,B_24,R_7,B_25,B_26,R_8,S_16,R_10,R_11,B_27,S_17,R_12,B_28,R_13,R_14,R_15,R_16,S_18,D_86,R_17,R_18,B_31,S_19,R_19,B_32,S_20,R_20,R_21,B_33,R_22,R_23,D_92,D_93,D_94,R_24,R_25,D_96,S_26,D_102,B_36,B_37,D_109,D_112,B_40,D_127,B_41,D_133,R_28,D_140,D_144,target,end_of_month,D_63_CL,D_63_CO,D_63_CR,D_63_XL,D_63_XM,D_63_XZ,B_30_0.0,B_30_1.0,B_30_2.0,B_38_1.0,B_38_2.0,B_38_3.0,B_38_4.0,B_38_5.0,B_38_6.0,B_38_7.0,D_126_1.0
0,0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...,2017-12-04,0.001082,0.00193,1.007504,0.001772,0.004626,0.006073,0.005182,0.735001,0.189011,0.001795,0.542119,0.221899,0.001681,0.668379,0.007081,0.006369,0.431864,0.007165,0.004027,0.002629,1.009308,0.006144,0.112579,0.759416,0.074886,0.00982,0.004934,0.009725,1.008534,0.007219,0.568667,0.001157,0.000227,1.007369,0.004829,0.004406,0.085943,0.005198,0.852393,0.00433,0.008983,0.430954,0.200863,0.004899,0.007455,0.00454,0.007681,0.006707,0.003785,0.00701,0.000629,0.008428,0.003407,0.003414,0.009385,0.006325,1.008528,0.005743,0.009544,0.005875,0.002376,0.00239,0.004931,0.00348,0.004335,0.0094,1,0.007469,0.009016,0.005824,0.006831,0.008907,0.009985,1.001439,1.4e-05,0.006258,0.001555,0.009442,0.003827,0.002946,0.005545,0.000115,0.022741,0.906699,0.007017,0.008906,0.005981,1.005701,0.001404,1.002499,0.007171,0.003673,0.001212,0.009607,0.004753,0,2017-12-31,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
1,00000fd6641609c6ece5454664794f0340ad84dddce9a2...,2017-12-29,0.302357,0.006711,0.819772,0.007966,0.000728,0.005235,0.001098,0.267228,0.006571,0.000905,0.399547,0.225348,0.017709,0.339235,0.006873,0.100048,0.29657,0.008128,0.000416,0.002475,1.00324,0.001278,0.029998,0.33029,0.068684,0.004056,0.000803,0.010855,0.403688,0.005928,0.449285,0.001572,0.090947,1.000319,0.001873,0.006513,0.192624,0.008571,0.001373,0.005025,0.006626,0.014687,0.300994,0.002473,0.001176,0.007739,0.005513,0.009534,0.016012,0.001688,0.00975,0.008904,0.007372,0.009307,0.006429,0.004212,1.008367,0.008966,0.003675,0.002812,0.003835,0.001464,0.002989,0.000434,0.003204,0.000289,1,0.007946,0.001519,0.005115,0.000802,0.003773,0.004639,1.009193,0.006015,0.009337,0.008599,0.008896,0.00761,0.000912,0.000542,0.008214,0.003408,0.007856,0.009352,0.005969,0.008268,1.006045,0.015016,0.002402,0.00952,0.005307,0.005935,0.009377,0.003133,0,2017-12-31,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
2,00001b22f846c82c51f6e3958ccd81970162bae8b007e8...,2017-12-12,0.007485,0.001891,0.816083,0.006484,0.007051,0.007042,0.092288,0.245794,0.004525,0.003638,0.335034,0.183796,0.079764,0.33479,0.00475,0.000374,0.296902,0.008666,0.002287,1.002712,1.00854,0.002012,0.008421,0.004028,0.007745,0.000842,0.000726,0.000684,0.002947,0.003256,0.283174,0.004786,0.002718,1.000791,0.009395,0.008664,0.193157,0.00238,0.004305,0.004392,0.008046,0.011265,0.504581,0.062451,0.004673,0.0056,0.009369,0.009639,0.003413,0.006417,0.000292,0.004796,0.001615,9.1e-05,0.004148,0.005096,1.005697,0.078434,0.000593,0.00461,0.00204,0.004785,0.001327,0.002912,0.007064,0.008038,1,0.005111,0.00262,0.000463,0.001379,0.006231,0.007522,1.001809,0.006854,0.003513,0.005027,0.009929,0.00896,0.009948,0.00588,0.009871,0.00486,0.002383,0.004557,0.001577,0.006629,1.00207,0.068245,0.004192,0.002416,0.00791,1.2e-05,0.006504,0.001522,0,2017-12-31,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,000041bdba6ecadd89a52d11886e8eaaec9325906c9723...,2017-12-29,0.007425,0.015824,0.96201,0.004479,0.004799,0.009391,0.015474,0.079261,0.072228,0.007786,0.404808,0.176701,0.027479,0.00153,0.028851,0.007758,0.192981,0.058831,0.01014,0.005843,1.007432,0.001348,0.072455,0.32351,0.056811,0.004176,0.081144,0.020386,0.553116,0.008326,0.363006,0.005494,0.090868,0.544162,0.001465,0.008698,0.285365,0.000137,0.422468,0.006275,0.009839,0.010007,0.408334,0.021129,0.000924,0.071955,0.001062,0.002822,0.039492,0.006089,0.000515,0.009825,0.006659,0.004472,0.006847,0.000613,1.007612,0.021147,0.005909,0.009812,0.000335,0.00156,0.006462,0.009857,0.004638,0.004583,1,0.001158,0.005139,0.005533,0.008412,0.001322,0.000592,1.00177,0.009854,0.009813,0.008975,0.005235,0.009285,0.002185,0.000387,0.003448,0.008122,0.009627,0.001459,0.017627,0.000689,1.005776,0.213689,0.006356,0.006112,0.000591,0.009965,0.009082,0.004616,0,2017-12-31,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
4,00007889e4fcd2614b6cbe7f8f3d2e5c728eca32d9eb8a...,2017-12-30,0.001762,0.007947,0.81067,0.003196,0.009694,0.004701,0.325619,0.06315,0.002887,0.003517,0.481234,0.058269,0.197958,0.004237,0.006993,0.003773,0.095455,0.002389,0.002799,1.004209,1.0026,0.005219,0.01461,0.00087,0.005653,0.005775,0.389202,0.000752,0.003208,0.008354,0.286313,0.00476,0.00395,0.534853,0.00114,0.000321,0.189436,0.005704,0.000857,0.003726,0.006707,0.007333,0.501014,0.194639,0.003384,0.141611,0.003763,0.009059,0.003173,0.004083,0.00993,0.004551,0.000888,0.000741,0.001511,0.000851,1.00594,0.293034,0.00064,0.004703,0.001742,0.0074,0.001367,0.007988,0.00585,0.003305,1,0.00283,0.009057,0.002063,0.007162,0.00346,0.009699,1.000918,0.005089,0.001218,0.001295,0.008614,0.007277,0.004411,0.002945,0.007626,0.00372,0.53492,0.009628,0.007394,0.002073,1.005385,0.165198,0.00996,0.008637,0.005915,0.006281,0.006562,0.005427,0,2017-12-31,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [8]:
encoder.feature_names_in_

array(['D_63', 'B_30', 'B_38', 'D_126'], dtype=object)

In [9]:
encoder.transform(test_data[encoder.feature_names_in_])

array([[0., 0., 1., ..., 0., 0., 1.],
       [0., 1., 0., ..., 0., 0., 1.],
       [0., 1., 0., ..., 0., 0., 1.],
       ...,
       [0., 1., 0., ..., 0., 0., 1.],
       [0., 1., 0., ..., 0., 0., 1.],
       [1., 0., 0., ..., 0., 0., 1.]])

In [10]:
test_data_oh = pd.concat([test_data.drop(columns=encoder.feature_names_in_),pd.DataFrame(encoder.transform(test_data[encoder.feature_names_in_]), columns=encoder.get_feature_names_out(),index=test_data.index)], axis=1)

In [11]:
test_data_oh.shape

(405607, 116)

In [12]:
from sklearn.model_selection import train_test_split
X_train, X_val = train_test_split(train_data_oh, test_size=0.3, random_state=0, stratify = train_data_oh['target'])

In [13]:
y_train = X_train['target']
y_val = X_val['target']

In [14]:
X_train.shape, X_val.shape

((270445, 116), (115906, 116))

In [15]:
X_train[['customer_ID', 'target']].to_parquet('../../data/initial_modelling/y_train.parquet', index = False)
X_val[['customer_ID', 'target']].to_parquet('../../data/initial_modelling/y_val.parquet', index = False)

In [16]:
drop_cols = ['customer_ID', 'target', 'S_2', 'end_of_month']

In [17]:
X_train = X_train.drop(columns = drop_cols)
X_val = X_val.drop(columns = drop_cols)

In [18]:
X_val.head(10)

Unnamed: 0,D_39,B_1,B_2,R_1,D_41,B_3,B_4,D_45,B_5,R_2,D_47,B_6,B_7,D_51,B_9,R_3,B_10,S_5,B_11,S_6,D_54,R_4,B_12,S_8,B_13,R_5,D_58,B_14,D_60,B_15,S_11,D_65,B_16,B_18,B_19,B_20,S_12,R_6,S_13,B_21,B_22,D_71,S_15,B_23,P_4,D_75,B_24,R_7,B_25,B_26,R_8,S_16,R_10,R_11,B_27,S_17,R_12,B_28,R_13,R_14,R_15,R_16,S_18,D_86,R_17,R_18,B_31,S_19,R_19,B_32,S_20,R_20,R_21,B_33,R_22,R_23,D_92,D_93,D_94,R_24,R_25,D_96,S_26,D_102,B_36,B_37,D_109,D_112,B_40,D_127,B_41,D_133,R_28,D_140,D_144,D_63_CL,D_63_CO,D_63_CR,D_63_XL,D_63_XM,D_63_XZ,B_30_0.0,B_30_1.0,B_30_2.0,B_38_1.0,B_38_2.0,B_38_3.0,B_38_4.0,B_38_5.0,B_38_6.0,B_38_7.0,D_126_1.0
18987,0.002942,0.032364,1.009652,0.03038,0.003394,0.003644,0.030278,1.103598,0.170297,0.001116,0.870468,0.198136,0.032464,0.339421,0.002276,0.00511,0.286894,0.058036,0.018683,0.000184,1.009922,0.00417,0.139378,0.753548,0.182328,0.009547,0.003163,0.082787,1.002155,0.096714,0.402411,0.00085,0.007406,1.003898,0.005725,0.00678,0.232633,0.001491,0.683061,0.006359,0.002923,0.101965,0.201421,0.025244,0.00506,0.004411,0.006587,0.001149,0.000354,0.005937,0.004396,0.000977,0.00622,0.007481,0.002995,0.34027,1.000757,0.044601,0.009202,0.005135,0.008831,0.004389,0.002665,0.007052,0.000411,0.00303,1,0.002724,0.006581,0.006841,0.003352,0.001604,0.006436,1.008488,0.00025,0.001507,1.003092,0.003671,0.000721,0.001944,0.006663,0.009705,0.059349,0.0094,0.007266,0.037875,0.007491,1.003468,0.003596,0.004512,0.009682,0.001885,0.001829,0.006862,0.002297,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
148768,0.003151,0.28936,0.112041,0.506558,0.248659,0.219512,0.285715,0.035022,0.015426,1.003749,0.313216,0.010247,0.497624,0.008243,0.847865,0.203579,0.027216,0.051729,0.241159,0.007127,1.008456,0.003981,0.012643,0.409083,0.024011,0.509825,0.82996,0.07006,0.119509,0.002624,0.447048,0.006833,0.925555,0.096094,0.448615,0.652263,0.186399,0.003189,0.290402,0.009004,0.006974,0.009133,0.304376,0.475709,0.009399,0.668169,0.002788,0.005868,0.112694,0.009988,1.000419,0.00454,1.002322,0.001649,0.004278,0.003349,1.004932,0.259825,0.005332,0.00473,0.008157,0.505702,0.005779,0.00485,0.004145,0.000412,1,0.001876,0.008532,0.003677,0.000692,0.007162,0.000507,0.003705,0.004623,0.001002,0.000466,0.005219,0.004939,0.007461,0.005173,0.001373,0.009986,0.320224,0.006207,0.295553,0.003759,0.00243,0.447166,0.000224,0.003176,0.882811,0.003107,0.005478,0.000888,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
5095,0.121072,0.373771,0.073985,0.002255,0.509808,0.553177,0.057456,0.33724,0.010931,1.003704,0.228855,0.011451,0.537469,0.007773,0.613306,0.205537,0.029276,0.018375,0.292809,0.008503,1.003255,0.000687,0.005793,0.009102,0.002595,0.003536,0.161302,0.047139,0.664607,0.004816,0.000815,0.000647,1.003179,0.215519,0.70957,1.00451,0.190505,0.007498,0.003836,0.000253,0.005911,0.008949,0.801601,0.402477,0.000141,0.136083,0.000701,0.00871,0.082875,0.009615,0.000991,0.0071,0.007073,0.006075,0.00914,0.007365,1.006962,0.033155,0.008815,0.007093,0.006679,0.003492,0.001747,0.00366,0.007915,0.003268,1,0.002242,0.007015,0.001018,0.007165,1.00773,0.007276,0.00619,0.006953,0.009629,0.00824,0.001959,0.006258,0.004572,0.000499,0.003579,0.004298,0.064751,0.006173,0.373623,0.004195,0.00941,0.379247,0.007006,0.007949,0.006319,0.007265,0.009457,0.006679,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
143200,0.210048,0.038128,1.005942,0.001762,0.002745,0.005699,0.053901,0.780319,0.06778,0.004955,0.621924,0.12985,0.044468,0.007813,0.00694,0.006647,0.191286,0.031222,0.024544,0.006213,1.006558,0.006776,0.07014,0.318015,0.060706,0.009979,0.087315,0.046872,0.75681,0.060184,0.405121,0.006676,0.087281,0.688696,0.000524,0.00346,0.354833,0.009048,0.513206,0.005232,0.004304,0.050005,0.408997,0.026427,0.00967,0.069179,0.009099,0.00685,0.004513,0.002772,0.00422,0.006567,0.002968,0.005717,0.003777,0.003909,1.007556,0.034514,0.007033,0.002382,0.009694,0.009191,0.009616,0.005307,0.007465,0.009357,1,0.00835,0.001105,0.000783,0.000528,0.006332,0.001145,1.00807,0.005603,0.006914,0.009364,0.003801,0.002751,0.00943,0.005829,0.00623,0.020219,0.009011,0.003612,0.033601,0.00698,1.00174,0.04073,0.006021,0.003636,0.006234,0.003715,0.001921,0.009872,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
266758,0.004366,0.008511,0.811274,0.001845,0.003424,0.006636,0.315805,0.297621,0.007835,0.002328,0.804688,0.039352,0.69371,0.005048,0.006839,0.100918,0.015904,0.001053,0.002636,1.002137,1.009275,0.0039,0.013767,0.002667,0.002773,0.008394,0.175219,0.005111,0.000941,0.004739,0.284391,0.00278,0.004265,0.52615,0.009419,0.008707,0.19082,0.007232,0.008264,0.006747,0.006874,0.01242,0.50561,0.660895,0.005535,0.133571,0.000834,0.004035,0.002883,0.007224,0.004455,0.004058,0.009321,0.007614,0.00033,0.007952,1.009456,0.20142,0.004865,0.007528,0.004814,0.009655,0.007968,0.006684,0.005373,0.003822,1,0.004204,0.004794,0.002486,0.005706,0.004386,0.004594,1.0091,0.001935,0.009387,0.004692,0.003305,0.009131,0.008021,0.008361,0.000159,0.008489,0.002955,0.007191,0.003836,0.007394,1.003045,0.592399,0.000175,0.002615,0.000217,0.000457,0.003494,0.009619,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
94454,0.00408,0.150432,0.086596,0.001722,0.009089,0.179499,0.17107,0.087125,0.023011,0.001244,0.184016,0.053078,0.132586,0.008639,0.611535,0.107663,0.030719,0.449496,0.125934,0.004965,1.006459,0.00599,0.03676,0.167292,0.05572,0.000254,0.121487,0.105347,0.82166,0.003998,0.609701,0.00749,0.503738,0.207323,0.168849,0.296262,0.2824,0.006971,0.53174,0.008274,0.006218,0.009391,0.606878,0.103535,0.005204,0.20748,0.003808,0.006799,0.186274,0.003042,0.000898,0.005132,0.004119,0.004734,0.005436,0.007641,1.009664,0.066775,0.005047,0.008581,0.002364,0.008155,0.009334,0.006727,0.007046,0.003116,1,0.003716,0.00659,0.007771,0.000477,0.003472,0.001987,0.002396,0.002833,0.005833,0.001703,0.000122,0.00414,0.003183,0.009988,0.003802,0.002391,0.006805,0.003981,0.153989,0.00163,1.007437,0.043985,0.001246,0.008107,0.002878,0.008288,0.006494,0.002594,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
128595,0.268338,0.031818,1.009517,0.008695,0.004851,0.008505,0.022745,0.103538,0.007074,0.007042,0.105887,0.184864,0.038018,0.009468,0.025835,0.100251,0.297012,0.010401,0.012034,0.007996,1.003516,0.001259,0.013903,0.744542,0.014665,0.004512,0.009462,0.00945,0.10485,0.007935,0.407137,0.005958,0.005415,1.004889,0.009074,0.0077,0.118936,0.004068,0.001252,0.007631,0.005614,0.013451,0.001795,0.019705,0.009024,0.003428,0.00195,0.008444,0.013631,0.000912,0.006113,0.007556,0.001001,0.00554,0.002381,0.008929,1.002544,0.026013,0.005694,0.008941,0.003822,0.001158,0.000824,0.001079,0.009026,0.001319,1,0.002199,0.001676,0.004038,0.006238,0.001511,0.000853,1.003996,0.005274,0.00056,0.002207,0.004814,0.007081,0.005389,0.001176,0.009417,0.009277,0.914817,0.007604,0.025477,0.001254,1.002192,0.02597,0.002412,0.004932,0.005824,0.004923,0.007975,0.005846,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
15388,0.004989,0.003726,0.818997,0.009297,0.005419,0.003881,0.071827,0.143213,0.001247,0.003521,0.462795,0.084109,0.153874,0.007468,0.330042,0.000285,0.037169,0.006719,0.008959,1.006881,1.000528,0.006334,0.026836,0.004771,0.002743,0.004139,0.109269,0.009437,0.005465,0.003401,0.28953,0.008624,0.008161,0.654105,0.000477,0.001751,0.188954,0.006707,0.008774,0.006002,0.006773,0.011248,0.506876,0.123418,0.004584,0.069259,0.000994,0.006552,0.005881,0.005569,0.003477,0.006314,0.00159,0.004328,0.002655,0.002787,1.004995,0.062817,0.000724,0.00585,0.003226,0.005167,0.000484,0.000343,0.004915,0.001708,1,0.007598,0.008598,0.009288,0.009538,0.00161,0.001381,1.001727,0.004162,0.007847,0.009315,0.003804,0.001601,0.003317,0.000422,0.000106,0.006187,0.470264,0.002827,0.005403,0.003517,1.00977,0.040909,0.009056,0.003491,0.007217,0.006875,0.003299,0.00626,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
77530,0.009476,0.041076,1.000535,0.014434,0.001684,0.004485,0.030723,0.405596,0.04603,0.006277,0.435184,0.200743,0.043654,0.001691,0.059121,0.102033,0.294932,0.109548,0.036023,0.000896,1.000933,0.002517,0.106526,0.493453,0.103943,0.009556,0.003997,0.087424,1.000405,0.003512,0.485892,0.002182,0.006322,1.007794,0.003901,0.005246,0.740012,0.004846,0.6835,0.006022,0.000803,0.344421,0.303497,0.030562,0.002011,0.006131,0.000154,0.009089,0.1492,0.000736,0.001461,0.006111,0.00905,0.001218,0.00172,0.086223,1.00003,0.039684,0.007894,0.00325,0.009379,0.000942,0.007378,0.007793,0.003095,0.009118,1,0.001256,0.003546,0.000826,0.003462,0.007278,0.002161,1.002178,0.001402,0.008408,0.008979,0.000779,0.007952,0.00968,0.001075,0.007395,0.00748,0.544205,0.007249,0.048803,0.006331,1.003559,0.008266,0.006986,0.001231,0.005772,0.005523,0.001081,0.009881,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
141910,0.009133,0.009301,0.810035,0.007793,0.005351,0.007692,0.006151,0.308333,0.00486,0.000432,0.340009,0.230773,0.005482,0.337823,0.000904,0.209172,0.245943,0.008671,0.003911,1.00362,1.002264,0.005387,0.009721,0.003416,0.002022,0.000267,0.00548,0.003513,1.004099,0.005258,0.283827,0.003276,0.003388,1.001222,0.005035,0.000512,0.19228,0.00589,0.009645,0.003641,0.000168,0.016906,0.507252,0.003669,0.005391,0.003401,0.001391,0.005724,0.004224,0.003561,0.006161,0.009649,0.008157,0.007226,0.008003,0.003166,1.002482,0.00501,0.005442,0.008932,0.003003,0.009655,0.006244,0.003859,0.001629,0.003207,1,0.003842,0.005627,0.000405,0.003779,0.001495,0.007742,1.001286,0.003857,0.003477,0.007891,0.007368,0.00753,0.002186,0.005837,0.001052,0.002294,0.009626,0.007903,0.001717,0.005559,1.008924,0.001546,0.009923,0.003686,0.000317,0.009818,0.002139,0.61204,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [19]:
from sklearn.preprocessing import RobustScaler
scaler = RobustScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

In [20]:
X_train_scaled = pd.DataFrame(X_train_scaled, columns = X_train.columns)
X_val_scaled = pd.DataFrame(X_val_scaled, columns = X_train.columns)

In [21]:
X_train_scaled.head(10)

Unnamed: 0,D_39,B_1,B_2,R_1,D_41,B_3,B_4,D_45,B_5,R_2,D_47,B_6,B_7,D_51,B_9,R_3,B_10,S_5,B_11,S_6,D_54,R_4,B_12,S_8,B_13,R_5,D_58,B_14,D_60,B_15,S_11,D_65,B_16,B_18,B_19,B_20,S_12,R_6,S_13,B_21,B_22,D_71,S_15,B_23,P_4,D_75,B_24,R_7,B_25,B_26,R_8,S_16,R_10,R_11,B_27,S_17,R_12,B_28,R_13,R_14,R_15,R_16,S_18,D_86,R_17,R_18,B_31,S_19,R_19,B_32,S_20,R_20,R_21,B_33,R_22,R_23,D_92,D_93,D_94,R_24,R_25,D_96,S_26,D_102,B_36,B_37,D_109,D_112,B_40,D_127,B_41,D_133,R_28,D_140,D_144,D_63_CL,D_63_CO,D_63_CR,D_63_XL,D_63_XM,D_63_XZ,B_30_0.0,B_30_1.0,B_30_2.0,B_38_1.0,B_38_2.0,B_38_3.0,B_38_4.0,B_38_5.0,B_38_6.0,B_38_7.0,D_126_1.0
0,0.720652,-0.066572,0.20863,0.609512,-0.297008,-0.011088,-0.048303,0.42148,12.728937,-0.811324,0.406045,0.682814,-0.173667,0.991528,0.663489,0.879433,0.703248,5.488661,0.044387,-0.745245,0.972543,-0.038405,12.181761,0.342773,8.445914,-0.137736,-0.271297,3.257563,1.061424,0.30209,-1.013085,0.512715,-0.109683,0.448496,0.000606,-0.008712,2.575421,0.863342,0.971101,0.047153,-0.884099,-0.28069,-0.305813,-0.140703,0.595118,-0.250798,0.170017,0.4788,5.512335,-0.144987,0.914214,0.929508,0.033029,0.005267,0.262921,7.792575,0.203855,0.571548,-0.466969,-0.76718,-0.124025,-0.153897,195.865814,-0.493427,-0.778512,-0.070329,0.0,-0.981706,0.666305,-0.085526,-0.486353,-0.945399,0.18171,0.000882,-0.812984,0.329872,182.653893,0.721508,0.570621,-0.275717,0.175758,-0.183385,14.227012,0.21909,0.098792,-0.002888,-0.51061,-0.196261,-0.176032,-0.111306,0.878038,0.397168,0.285756,-0.081433,126.522951,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.114763,6.311282,-0.868704,-0.274477,0.002373,4.573184,1.211365,-0.375803,0.175997,0.392321,-0.051711,-0.407804,4.422931,-0.012218,1.864739,0.96302,-0.209778,0.093657,7.323852,0.143947,-0.195173,-0.000829,-0.162234,-0.645492,-0.14332,0.80663,0.578047,1.612274,0.058673,-0.874756,-0.177837,0.826345,1.09535,-0.68025,6.864859,2.107103,0.022628,-0.925491,-0.006595,0.629871,79.380005,-0.364673,0.361428,4.683537,-0.081127,0.483319,-0.586058,0.136846,2.797631,0.106176,0.574635,0.420349,-0.207775,0.420807,-0.711521,0.322657,0.009362,1.384695,-0.176249,-0.974585,0.363429,0.249572,0.211891,0.17251,0.815717,0.754786,0.0,0.007273,-0.413702,-0.226694,0.642903,0.779045,-0.371885,-0.994087,-0.638108,-0.465351,0.48647,0.212762,-0.718463,0.744261,0.169079,-0.621975,0.649527,1.064372,0.036858,6.504683,-0.753308,-0.452205,3.567435,0.484824,0.513855,-0.761116,-0.717117,-0.352621,0.1466,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,0.119952,3.845824,-0.852362,-0.269085,-0.231215,1.8625,0.423915,-0.406826,0.283578,0.470427,0.082769,-0.168257,0.57323,-0.014297,1.583444,0.918891,-0.323294,-0.045983,4.505279,-0.363152,-0.78193,0.791859,-0.088516,-0.654071,0.146309,-0.188001,-0.075812,1.558769,1.005683,-0.267648,-0.211872,0.762531,1.089267,-0.54296,5.559654,2.092349,-0.113952,0.227411,-0.008532,0.141297,79.106053,-0.318398,0.363639,0.539701,0.329551,0.250327,0.358466,0.702048,2.838806,-0.404307,0.479017,0.079448,0.699103,0.615336,-0.841506,-0.757869,-0.229058,0.098496,0.810862,-0.972668,-0.07146,0.007562,-0.872689,0.341181,-0.287673,0.889453,0.0,0.939718,0.590563,0.626145,0.532242,0.194203,-0.633079,-0.999124,-0.090383,0.556708,0.098949,-0.463139,0.170315,-0.117296,-0.341138,-0.62646,13.314737,2.053042,-0.236166,3.986624,0.012079,-161.701094,-0.204281,-0.017967,-0.457354,-0.625285,0.62111,-0.811063,-0.767246,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,0.724787,-0.143107,0.000901,-0.160489,-0.990091,-0.014991,-0.28845,0.366301,1.041096,0.640631,-0.127575,0.557697,-0.216758,0.980493,-0.042234,0.891939,1.260962,-0.081387,-0.11883,0.225218,0.720373,-0.542138,2.310355,1.21159,3.574158,0.003165,-0.032224,-0.048534,0.051086,2.582279,1.403307,0.083289,-0.103797,0.061231,0.016168,-0.013344,14.353135,0.816301,1.459997,0.537053,-0.748534,8.210548,-0.998701,-0.186355,-0.838739,-0.004224,-0.034208,0.143216,0.029701,-0.211931,0.308911,0.025549,-0.092174,0.149409,-0.812896,-0.761359,0.595425,-0.221586,0.22347,-0.887804,0.119638,0.456123,0.414435,193.626497,0.378366,-0.535507,0.0,-0.858932,0.367489,-0.556377,-0.115663,0.018245,0.397699,1.1e-05,0.749139,-0.352842,0.322839,-0.2835,-0.900299,-0.534237,-0.368534,-0.875483,1.10584,0.238102,-0.384965,-0.172568,0.251628,0.291773,-0.10976,176.67532,-0.36087,0.512476,0.411731,-0.64599,60.902932,1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,-0.012869,-0.237586,-0.003951,0.538882,-0.918949,-0.041265,0.134702,1.553184,-0.15074,-0.255593,1.00942,-0.409901,1.018082,0.998863,-0.038617,-0.05853,-0.308852,-0.160182,-0.174378,154.185069,-0.128002,0.490671,-0.073863,-0.644718,-0.255956,0.162839,0.376038,-0.267812,-0.342575,-0.808695,-0.174462,0.643859,-0.102521,0.006348,-0.025356,0.002004,-0.113773,0.239936,-0.003577,-0.736217,-0.142995,-0.043129,0.341033,0.932479,-0.495087,-0.025726,-0.950443,-0.680632,-0.113733,-0.906409,-0.523082,0.31229,-0.81496,-0.137971,-0.226862,0.070453,0.729991,0.059962,0.599625,-0.698051,-0.967535,-0.966247,0.842569,-0.703489,0.903479,0.447247,0.0,-0.808733,0.566457,-0.333013,0.302448,-0.746086,-0.635694,0.008211,0.76748,-0.132453,0.332991,-0.259932,-0.202868,0.085306,-0.983512,0.669221,0.33049,-0.024089,0.483292,-0.243337,0.53079,0.782347,1.310747,0.106627,-0.258627,-0.245476,0.519492,0.766339,0.407527,0.0,-1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.089022,2.476033,-0.856144,0.45773,-0.562399,1.601243,1.252164,-0.433279,-0.065026,0.737676,0.076691,-0.354954,1.658036,-0.000814,2.956531,2.884752,-0.378846,1.91044,2.704581,0.174073,0.302477,-0.056775,-0.051013,-0.650687,1.090618,-0.02682,0.661079,0.873049,0.237192,0.426201,-0.212435,-0.763789,0.388641,-0.550909,1.204177,0.120137,0.027672,-0.464836,-0.010317,0.510772,79.200117,-0.067477,0.362353,1.633979,-0.636729,0.514519,0.212539,0.085595,1.622875,-0.064163,-0.629729,-0.385151,0.168932,-0.464317,-0.79162,0.611857,-0.540248,0.332101,-0.67591,-0.687111,-0.331138,-0.31913,0.628893,-0.028938,-0.785955,-0.031847,0.0,-0.144723,-0.485842,0.639225,-0.628637,-0.765082,0.472913,-0.993309,0.25586,0.325836,0.757786,0.248746,0.95386,-0.4047,0.075351,-0.994118,-0.205279,0.573206,0.489306,2.58657,-0.108717,-0.392878,3.07377,-0.523486,-0.596046,0.74221,-0.097462,0.513375,162.113643,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
6,-0.003807,-0.240157,0.006129,-0.104246,-0.375471,-0.024319,-0.065168,-0.226801,-0.28849,0.625336,1.221464,0.671914,-0.187687,0.97938,-0.060226,6.3e-05,0.706339,-0.12098,-0.171573,155.340056,0.872442,0.161646,1.137794,-0.654295,-0.313623,-0.828722,-0.26153,-0.287201,-0.349145,-0.990962,-0.197002,-0.921604,-0.106303,0.442814,0.01415,-0.001558,-0.150235,-0.743088,-0.008607,0.288932,-0.00519,8.071422,0.340339,-0.16491,0.309566,-0.258989,-0.164314,0.454603,-0.13941,-0.260266,0.434294,-0.221261,-0.243481,0.360509,0.531961,-0.519998,0.772856,0.372739,-0.912442,-0.106235,0.32835,-0.138406,0.329791,0.135769,0.654431,0.580295,0.0,-0.601899,-0.590155,0.693793,0.968666,0.455611,0.643986,0.003554,-0.743016,0.28603,-0.271096,-0.124496,0.829558,0.019901,-0.124431,0.048273,0.264306,-0.017221,0.613713,-0.257444,-0.060548,-162.986578,-0.126673,-0.359656,-0.72576,-0.512757,-0.759623,-0.58037,-0.940426,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.112951,6.416031,-0.865721,0.271397,-0.453141,6.068475,0.947614,0.455418,0.27009,-0.232099,-0.273909,-0.394174,2.168416,-0.013854,1.862922,1.860794,-0.397883,-0.092932,8.015169,-0.443503,-0.019576,0.764338,0.067857,-0.444967,-0.18964,0.346205,1.456835,3.39568,0.376358,-0.422006,0.029177,0.222983,1.084272,-0.680873,6.900543,2.089961,-0.104293,0.865197,-0.019166,-0.92765,79.007735,-0.34982,0.352613,2.36002,-0.713381,1.492145,-0.173099,0.363118,5.799527,0.376461,-0.782466,0.17956,0.784254,0.793353,0.397896,0.108029,0.047937,1.664991,0.410455,-0.920256,0.060844,0.578179,0.86302,-0.884279,0.606893,0.216058,0.0,-0.905382,-0.883638,0.412169,-0.004807,-0.281265,0.740113,-1.000731,-0.542614,-0.907891,-0.517943,0.372241,-0.200069,-0.052918,-0.037432,-0.802877,-0.614994,-0.020969,-0.329029,6.598892,0.276023,-0.335224,1.338268,0.514314,-0.581986,0.208535,-0.897095,195.925576,0.542517,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,-1.0
8,-0.023546,-0.029704,0.210922,0.066171,0.297963,0.006884,0.027037,1.061237,37.420341,0.643042,1.681746,0.699905,-0.188622,2.005825,-0.047698,3.850877,0.691008,1.668802,0.038628,0.144993,-0.80043,0.529047,21.176826,1.401883,15.046728,0.184787,-0.260252,4.867717,1.102189,102.707484,3.203262,-0.111418,-0.006929,0.442911,-0.024548,-0.003576,2.495974,-0.121718,2.358065,-0.64331,0.064988,8.384851,-1.001061,-0.152422,0.096238,-0.244307,-0.960234,-0.252649,-0.133069,0.354673,-0.185373,-0.319066,-0.392226,93.027935,0.498235,12.642282,0.612142,0.998977,0.134149,0.672789,0.743882,185.576559,-0.767508,0.48767,0.745033,-0.955542,0.0,-0.184638,0.255649,0.132824,-0.155449,0.606948,-0.006507,0.003053,0.835737,0.778928,183.122568,0.38864,0.622146,0.294127,-0.528308,-0.559395,105.181761,-0.00037,0.909653,-0.071687,-0.917321,-0.117651,-0.115885,-0.177572,-0.758744,-0.936724,-0.39423,-0.606264,0.029467,0.0,-1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.001476,-0.212506,-0.000588,-0.156845,-0.444236,-0.044533,-0.335846,0.614404,-0.274283,0.586613,0.13306,0.109591,-0.230764,0.983313,-0.055418,-0.052382,0.694485,-0.184009,-0.175387,155.503483,-0.307669,0.041421,-0.211278,-0.648308,-0.24762,0.189426,-0.267595,-0.244627,-0.338126,-0.355725,-0.182439,0.791467,-0.108186,0.449209,-0.041179,-0.003574,-0.021969,0.35589,-0.013248,0.640609,-0.415126,-0.068438,0.362795,-0.232418,-0.599423,-0.261568,-0.96647,0.209173,-0.166728,0.750148,0.003502,-0.426971,0.339877,0.07688,0.349972,-0.198368,-0.784132,-0.395103,0.7697,-0.293992,-0.243498,0.749606,-0.714226,-0.314437,-0.864185,0.437427,0.0,0.842472,-0.775879,0.754773,-0.507172,-0.881183,-0.248148,0.007155,-0.33513,0.079986,0.567742,-0.535435,0.245036,-0.273208,-0.248202,-0.929732,-0.408969,-0.004748,0.052754,-0.199663,0.273248,-0.478993,-0.213085,0.097479,-0.1685,-0.052841,0.295757,-0.163707,0.701428,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [22]:
X_val_scaled.head(10)

Unnamed: 0,D_39,B_1,B_2,R_1,D_41,B_3,B_4,D_45,B_5,R_2,D_47,B_6,B_7,D_51,B_9,R_3,B_10,S_5,B_11,S_6,D_54,R_4,B_12,S_8,B_13,R_5,D_58,B_14,D_60,B_15,S_11,D_65,B_16,B_18,B_19,B_20,S_12,R_6,S_13,B_21,B_22,D_71,S_15,B_23,P_4,D_75,B_24,R_7,B_25,B_26,R_8,S_16,R_10,R_11,B_27,S_17,R_12,B_28,R_13,R_14,R_15,R_16,S_18,D_86,R_17,R_18,B_31,S_19,R_19,B_32,S_20,R_20,R_21,B_33,R_22,R_23,D_92,D_93,D_94,R_24,R_25,D_96,S_26,D_102,B_36,B_37,D_109,D_112,B_40,D_127,B_41,D_133,R_28,D_140,D_144,D_63_CL,D_63_CO,D_63_CR,D_63_XL,D_63_XM,D_63_XZ,B_30_0.0,B_30_1.0,B_30_2.0,B_38_1.0,B_38_2.0,B_38_3.0,B_38_4.0,B_38_5.0,B_38_6.0,B_38_7.0,D_126_1.0
0,-0.027043,-0.008303,0.214722,3.999342,-0.415648,-0.032777,-0.236887,2.840724,3.197316,-0.784018,1.493226,0.68527,-0.16282,0.996512,-0.059614,-0.034351,0.649234,0.612988,-0.026307,-0.969406,0.985733,-0.1942,1.975463,0.879301,1.916334,0.844422,-0.273809,0.481736,1.149074,14.472981,0.391334,-0.837397,-0.102355,0.446615,-0.00954,-0.002866,0.946436,-0.71643,1.587599,0.247813,-0.531699,3.459983,-0.67029,-0.1366,-0.034558,-0.264685,0.27076,-0.780762,-0.172318,0.133179,-0.139759,-0.807825,0.179678,0.379877,-0.3979,61.978401,-0.793193,-0.21419,0.817842,0.00728,0.74186,-0.192219,-0.479757,0.361834,-0.916141,-0.391004,0.0,-0.454694,0.291674,0.335841,-0.339278,-0.685906,0.266932,0.006788,-0.950895,-0.699377,182.248177,-0.27025,-0.859973,-0.619104,0.329807,0.885723,5.737749,0.001331,0.442207,0.03738,0.49617,-0.080955,-0.219991,-0.199566,0.891882,-0.652427,-0.637892,0.343819,-0.58449,0.0,-1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,-0.026152,1.977417,-0.772095,82.579435,41.892905,1.096275,0.873062,-0.502559,-0.010512,190.166273,-0.242722,-0.393526,1.65261,0.002134,2.423644,1.871312,-0.319089,0.524382,2.026683,0.1074,0.698155,-0.230775,-0.122152,0.176148,-0.048577,97.506007,1.90957,0.361359,-0.177215,-0.595869,0.613941,0.302992,0.994659,-0.687369,3.038118,1.355212,-0.11046,-0.390125,0.661469,0.768509,0.112053,-0.148823,-0.324361,1.699715,0.790407,2.254807,-0.46092,0.136186,0.795826,0.905963,195.043822,-0.108815,189.119264,-0.695431,-0.141378,-0.383458,0.014365,0.970107,0.054843,-0.072427,0.6089,92.549985,0.127307,-0.064258,-0.176054,-0.914162,0.0,-0.624397,0.673589,-0.280677,-0.86758,0.408288,-0.901395,-0.998408,-0.079261,-0.80062,-0.915206,0.03595,-0.030048,0.470277,0.032579,-0.736246,0.346679,0.92034,0.231659,2.084902,-0.250509,-165.48336,1.646153,-0.957756,-0.380007,160.534905,-0.38236,0.072491,-0.840385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,0.476587,2.62963,-0.813934,-0.641837,86.941473,2.841442,-0.118794,0.442998,-0.103613,190.157691,-0.505524,-0.38661,1.808119,0.000722,1.73481,1.890117,-0.311408,0.055798,2.503295,0.320756,-0.322263,-0.869041,-0.235538,-0.64033,-0.314378,-0.316959,0.143797,0.144557,0.641865,-0.244856,-1.611444,-0.876161,1.087405,-0.538188,4.833828,2.09633,-0.016612,0.438119,-0.014428,-0.954103,-0.056911,-0.155965,1.346306,1.401186,-0.96947,0.235117,-0.862971,0.688319,0.538846,0.834905,-0.807092,0.393422,0.341454,0.120713,0.831324,0.359891,0.407134,-0.277173,0.741553,0.393028,0.317078,-0.358253,-0.658687,-0.294562,0.571232,-0.343435,0.0,-0.551123,0.376513,-0.79891,0.418156,197.366734,0.432375,-0.995922,0.385206,0.927804,0.505021,-0.608735,0.229518,-0.100204,-0.90024,-0.306803,-0.274475,0.164984,0.224787,2.705248,-0.163289,-164.330033,1.360412,0.241541,0.553115,0.158898,0.44868,0.852597,0.211339,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,0.855924,0.036233,0.210643,-0.723294,-0.527623,-0.022028,-0.134239,1.829273,1.073892,-0.052971,0.718962,0.293196,-0.11597,0.000844,-0.04592,-0.019589,0.292719,0.236289,0.027784,-0.034289,0.325771,0.31074,0.829484,-0.009748,0.406847,0.927942,-0.051585,0.142035,0.780411,8.622656,0.40485,0.273173,-0.00692,0.05288,-0.045327,-0.009851,3.739888,0.735903,1.186977,0.026007,-0.312287,1.440071,0.027164,-0.131779,0.84177,-0.018837,0.754565,0.32692,-0.136477,-0.470421,-0.174387,0.288808,-0.437133,0.054621,-0.241544,-0.27974,0.521917,-0.269693,0.390125,-0.534951,0.912304,0.696027,0.87536,0.024268,0.482054,0.873398,0.0,0.672401,-0.780661,-0.844714,-0.900248,0.24483,-0.775513,0.00637,0.116168,0.383874,0.710388,-0.244414,-0.460484,0.859012,0.163467,0.209253,1.464231,0.00018,-0.284221,0.003421,0.39391,-0.366365,-0.063765,0.067325,-0.290225,0.143366,-0.260879,-0.624666,0.791279,0.0,-1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,-0.02097,-0.192612,-0.003371,-0.709477,-0.41045,-0.01713,1.003811,0.319044,-0.167737,-0.553221,1.288308,-0.226414,2.417898,-0.007459,-0.046215,0.885585,-0.361272,-0.187551,-0.174379,154.432622,0.85883,-0.246505,-0.103555,-0.653465,-0.312172,0.621661,0.180549,-0.252963,-0.355378,-0.257187,-0.197236,-0.469487,-0.106108,-0.150164,0.015882,0.001189,-0.009401,0.386927,-0.003984,0.32419,0.096108,-0.021037,0.351781,2.454625,0.055808,0.225581,-0.837405,-0.220054,-0.150528,0.378838,-0.128349,-0.203401,0.76779,0.404423,-0.931008,0.468452,0.889506,0.648729,-0.037298,0.478649,-0.051074,0.781925,0.553974,0.29059,0.067305,-0.232713,0.0,-0.158124,-0.058391,-0.512916,0.128298,-0.138201,-0.095991,0.0074,-0.615108,0.879434,-0.143133,-0.342497,0.794931,0.580801,0.668844,-0.972473,0.183261,-0.017727,0.427151,-0.233097,0.476779,-0.150879,2.257161,-0.966389,-0.48983,-0.957703,-0.912086,-0.316444,0.745381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,-0.022191,0.903969,-0.800069,-0.729865,0.566762,0.886998,0.374896,-0.339541,0.146589,-0.759631,-0.645207,-0.147605,0.227936,0.003324,1.729607,0.950343,-0.306028,6.112463,0.963395,-0.227939,0.306383,0.158449,0.277006,-0.317417,0.344963,-0.951099,0.038655,0.695113,0.877856,-0.37584,1.425102,0.428274,0.490667,-0.548427,1.112964,0.606197,2.084094,0.336745,1.230691,0.624712,-0.008023,-0.138771,0.69204,0.182551,-0.007062,0.506126,-0.264568,0.317055,1.429941,-0.419036,-0.825274,0.007338,-0.218905,-0.126672,0.090382,0.4109,0.929627,-0.092175,-0.001487,0.686107,-0.534669,0.504433,0.820393,0.298967,0.398935,-0.373794,0.0,-0.255992,0.2933,0.516965,-0.910384,-0.318258,-0.60969,-0.999718,-0.436148,0.16732,-0.68918,-0.972097,-0.187331,-0.374564,0.993359,-0.263303,-0.482697,-0.006342,-0.211037,0.960027,-0.67643,0.574839,-0.050072,-0.776921,0.583919,-0.470769,0.653201,0.271796,-0.530586,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
6,1.10443,-0.012527,0.214574,0.420943,-0.164284,-0.007353,-0.269621,-0.288191,-0.183497,0.344496,-0.888593,0.609065,-0.141143,0.005811,0.00957,0.879174,0.686964,-0.056225,-0.087662,0.242223,-0.271137,-0.758277,-0.101298,0.860918,-0.164571,-0.128409,-0.257175,-0.211917,-0.199243,0.254694,0.414902,0.136222,-0.104734,0.447852,0.013503,-0.000929,-1.652659,-0.221151,-0.020522,0.498237,-0.104089,0.019059,-1.341028,-0.159181,0.718999,-0.268416,-0.622338,0.636612,-0.0579,-0.825369,0.196672,0.482946,-0.810202,0.021986,-0.520798,0.64944,-0.447416,-0.316473,0.126216,0.756998,-0.246796,-0.789937,-0.838471,-0.794037,0.791512,-0.732956,0.0,-0.559721,-0.668811,-0.210389,0.234054,-0.704156,-0.833169,0.002294,0.050591,-0.889162,-0.597046,-0.044086,0.391491,0.061052,-0.765151,0.829561,0.26931,2.678364,0.50923,-0.061134,-0.751821,-0.291674,-0.12586,-0.570732,-0.036775,0.068274,-0.01944,0.562131,0.060132,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,-1.0
7,-0.018315,-0.229581,0.00512,0.520223,-0.066308,-0.03154,-0.056346,-0.164057,-0.304206,-0.326045,0.223245,0.030566,0.311019,-0.000192,0.902943,-0.08068,-0.281976,-0.107956,-0.116031,155.168402,-0.857357,0.225046,0.112751,-0.64917,-0.312548,-0.200456,0.006391,-0.212045,-0.348581,-0.471425,-0.171611,0.644472,-0.101453,0.009671,-0.045651,-0.013445,-0.052072,0.285968,-0.002781,0.17761,0.080095,-0.066581,0.356036,0.263604,-0.12497,-0.018533,-0.806541,0.269065,-0.124685,0.063113,-0.319859,0.23919,-0.698483,-0.201484,-0.465912,-0.487527,0.026609,-0.113952,-0.853933,0.148026,-0.364523,-0.0483,-0.90483,-0.93649,-0.023415,-0.655257,0.0,0.521655,0.686584,0.812595,0.8895,-0.684717,-0.729026,2.3e-05,-0.171155,0.570858,0.701477,-0.243866,-0.686813,-0.347964,-0.915579,-0.982833,-0.068193,1.363961,-0.440462,-0.220643,-0.298944,0.96031,-0.06301,0.604046,-0.31854,0.323283,0.370823,-0.35461,0.13528,1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.000817,0.059012,0.204699,1.367999,-0.710534,-0.028379,-0.234954,0.656868,0.623385,0.19887,0.137233,0.700239,-0.119147,-0.017538,0.107321,0.896289,0.67921,1.336657,0.133706,-0.859044,-0.777884,-0.514397,1.431726,0.348372,0.943474,0.84625,-0.271609,0.525591,1.146443,-0.453647,0.807658,-0.583533,-0.103651,0.451481,-0.022093,-0.006092,12.544957,-0.071688,1.588634,0.181438,-0.868679,12.885378,-0.327314,-0.114921,-0.613969,-0.258153,-0.968365,0.761933,1.110439,-0.858826,-0.714924,0.199469,0.71644,-0.774953,-0.653036,14.956018,-0.933847,-0.241244,0.559919,-0.364142,0.850017,-0.829949,0.439018,0.505216,-0.384188,0.825817,0.0,-0.748672,-0.302597,-0.836246,-0.317374,0.430955,-0.575485,0.000475,-0.721374,0.683243,0.640138,-0.842206,0.562928,0.908331,-0.78516,0.435944,0.073039,1.582582,0.438844,0.124215,0.264093,-0.065926,-0.200342,0.237981,-0.760424,0.058757,0.100526,-0.789458,0.7929,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
9,-0.000649,-0.186502,-0.004733,0.2721,-0.07812,-0.011607,-0.341727,0.352556,-0.229356,-0.914365,-0.159257,0.872662,-0.268125,0.991714,-0.063644,1.925017,0.496529,-0.080526,-0.162618,154.662665,-0.516641,0.041574,-0.170513,-0.651937,-0.321494,-0.948645,-0.267692,-0.268072,1.151995,-0.174082,-0.200051,-0.37505,-0.107157,0.443272,-0.014287,-0.016053,0.023974,0.129051,-0.000726,-0.287266,-0.969595,0.153344,0.357297,-0.22455,0.028499,-0.268518,-0.730094,0.108158,-0.138968,-0.319954,0.206001,0.893451,0.547125,0.332953,0.603871,-0.41727,-0.459476,-0.432044,0.076359,0.755275,-0.408507,0.78183,0.217958,-0.255961,-0.674656,-0.355637,0.0,-0.230703,0.104734,-0.918294,-0.254326,-0.707383,0.524128,-0.000417,-0.231922,-0.304691,0.441278,0.460996,0.479798,-0.571378,0.164975,-0.79866,-0.493345,0.001998,0.568687,-0.249936,0.109725,0.820658,-0.228618,0.757415,-0.280426,-0.939327,0.959136,-0.581909,110.157484,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [23]:
from xgboost import XGBClassifier

In [24]:
model = XGBClassifier(random_state=0)

In [25]:
model.fit(X_train_scaled, y_train)

In [26]:
y_pred = model.predict(X_val_scaled)

In [27]:
from sklearn.metrics import classification_report
print(classification_report(y_val, y_pred, digits = 4))

              precision    recall  f1-score   support

           0     0.9204    0.9286    0.9245     89020
           1     0.7566    0.7342    0.7452     26886

    accuracy                         0.8836    115906
   macro avg     0.8385    0.8314    0.8349    115906
weighted avg     0.8824    0.8836    0.8829    115906



In [28]:
# Test set results

In [29]:
test_data_oh.drop(columns = drop_cols, inplace = True)

In [30]:
test_X_scaled = scaler.transform(test_data_oh)
test_X_scaled = pd.DataFrame(test_X_scaled, columns = test_data_oh.columns)

In [31]:
y_test = test_data['target']

In [32]:
y_pred_test = model.predict(test_X_scaled)

In [33]:
print(classification_report(y_test, y_pred_test, digits = 4))

              precision    recall  f1-score   support

           0     0.9370    0.9274    0.9322    307381
           1     0.7799    0.8048    0.7921     98226

    accuracy                         0.8977    405607
   macro avg     0.8584    0.8661    0.8621    405607
weighted avg     0.8989    0.8977    0.8982    405607

