In [1]:
import pandas as pd
import numpy as np

pd.options.display.max_columns = None
pd.options.display.max_rows = None

In [2]:
train_data = pd.read_parquet('../../data/initial_modelling/train_data_last_month.parquet')
test_data = pd.read_parquet('../../data/initial_modelling/test_data_last_month.parquet')

In [7]:
cat_cols = ['B_38', 'D_63', 'B_30', 'D_126']
train_data[cat_cols] = train_data[cat_cols].astype('category')
test_data[cat_cols] = test_data[cat_cols].astype('category')

In [8]:
train_data.info(show_counts = True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 386034 entries, 0 to 386033
Data columns (total 95 columns):
 #   Column        Non-Null Count   Dtype         
---  ------        --------------   -----         
 0   customer_ID   386034 non-null  string        
 1   S_2           386034 non-null  datetime64[ns]
 2   D_39          386034 non-null  float32       
 3   B_1           386034 non-null  float32       
 4   B_2           386034 non-null  float32       
 5   R_1           386034 non-null  float32       
 6   D_41          386034 non-null  float32       
 7   B_3           386034 non-null  float32       
 8   B_4           386034 non-null  float32       
 9   D_45          386034 non-null  float32       
 10  B_5           386034 non-null  float32       
 11  R_2           386034 non-null  float32       
 12  D_47          386034 non-null  float32       
 13  B_6           386034 non-null  float32       
 14  B_7           386034 non-null  float32       
 15  D_51          386

In [4]:
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
import numpy as np

def one_hot_encode_categories(df, categorical_columns=None, drop_original=True, handle_unknown='error'):
    """
    One-hot encode categorical columns in a DataFrame.
    
    Parameters:
    -----------
    df : pandas.DataFrame
        Input DataFrame containing categorical columns
    categorical_columns : list or None
        List of categorical column names to encode. If None, automatically detects categorical columns.
    drop_original : bool
        Whether to drop the original categorical columns
    handle_unknown : str
        Strategy for handling unknown categories in new data: 'error', 'ignore' or 'infrequent_if_exist'
    
    Returns:
    --------
    pandas.DataFrame
        DataFrame with one-hot encoded columns
    OneHotEncoder
        Fitted encoder for future transformations
    """
    # Make a copy to avoid modifying the original
    result_df = df.copy()
    
    # Automatically detect categorical columns if not specified
    if categorical_columns is None:
        categorical_columns = result_df.select_dtypes(include=['object', 'category']).columns.tolist()
    
    if not categorical_columns:
        print("No categorical columns found to encode.")
        return result_df, None
        
    # Initialize the encoder
    encoder = OneHotEncoder(sparse_output=False, handle_unknown=handle_unknown, drop='if_binary')
    
    # Fit and transform the categorical columns
    encoded_array = encoder.fit_transform(result_df[categorical_columns])
    
    # Get feature names
    feature_names = encoder.get_feature_names_out(categorical_columns)
    
    # Create a DataFrame with the encoded features
    encoded_df = pd.DataFrame(encoded_array, columns=feature_names, index=result_df.index)
    
    # Combine with the original DataFrame
    if drop_original:
        # Drop the original categorical columns
        result_df = result_df.drop(columns=categorical_columns)
    
    # Concatenate the encoded columns with the original DataFrame
    result_df = pd.concat([result_df, encoded_df], axis=1)
    
    print(f"One-hot encoded {len(categorical_columns)} categorical columns into {len(feature_names)} binary features.")
    
    return result_df, encoder

In [9]:
# Apply to your training data
train_data_oh, encoder = one_hot_encode_categories(train_data, handle_unknown='ignore')

One-hot encoded 4 categorical columns into 17 binary features.


In [17]:
train_data_oh.head()

Unnamed: 0,customer_ID,S_2,D_39,B_1,B_2,R_1,D_41,B_3,B_4,D_45,B_5,R_2,D_47,B_6,B_7,D_51,B_9,R_3,B_10,S_5,B_11,S_6,D_54,R_4,B_12,S_8,B_13,R_5,D_58,B_14,D_60,S_11,D_65,B_16,B_18,B_19,B_20,S_12,R_6,S_13,B_21,B_22,D_71,S_15,B_23,P_4,D_75,B_24,R_7,B_26,R_8,S_16,R_10,R_11,B_27,S_17,R_12,B_28,R_13,R_14,R_15,R_16,S_18,D_86,R_17,R_18,B_31,S_19,R_19,B_32,S_20,R_20,R_21,B_33,R_22,R_23,D_92,D_93,D_94,R_24,R_25,D_96,D_102,B_36,D_127,D_133,R_28,D_140,D_144,target,end_of_month,D_63_CL,D_63_CO,D_63_CR,D_63_XL,D_63_XM,D_63_XZ,B_30_0.0,B_30_1.0,B_30_2.0,B_38_1.0,B_38_2.0,B_38_3.0,B_38_4.0,B_38_5.0,B_38_6.0,B_38_7.0,D_126_1.0
0,0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...,2018-02-21,0.002425,0.019837,1.008307,0.000607,0.006174,0.007454,0.080204,0.740062,0.132626,0.001436,0.538275,0.150155,0.060502,0.670537,0.001064,0.009852,0.325464,0.050146,0.00726,0.00651,1.001046,0.00444,0.11106,0.755829,0.092181,0.002498,0.00572,0.029953,0.141639,0.561116,0.009596,0.005605,1.005261,0.007416,0.00971,0.187413,0.004807,0.846702,0.003347,0.006082,0.312937,0.200894,0.044315,0.003468,0.009681,0.008714,0.003488,1.3e-05,0.008551,0.005027,0.004827,0.008204,0.002737,0.00709,1.001785,0.098844,0.009521,0.004815,0.009732,0.00846,0.007482,0.003417,0.004134,0.0071,1,0.000869,0.002693,0.008749,0.004063,0.005139,0.007752,1.004296,0.000684,0.000934,0.004344,0.004264,0.007412,0.004308,0.00628,0.005712,0.922532,0.009383,1.007527,0.000122,0.007725,0.001976,0.006346,0,2018-02-28,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
1,00000fd6641609c6ece5454664794f0340ad84dddce9a2...,2018-02-06,0.53629,0.034558,1.005419,0.007248,0.006274,0.008733,0.020876,0.263736,0.010155,0.003805,0.398607,0.211673,0.033179,0.340006,0.011734,0.109444,0.301616,0.006361,0.015018,0.005666,1.005178,0.000279,0.022597,0.319952,0.008499,0.00333,0.008624,0.023292,0.524772,0.444329,0.00066,0.003267,1.001247,0.002385,0.00035,0.188421,0.002118,0.288901,0.008382,0.009769,0.010515,0.406084,0.014946,0.005555,0.009073,0.009167,0.009282,0.007601,0.006927,6.7e-05,0.00467,0.005223,1.5e-05,0.003619,1.002889,0.014986,0.003408,0.009129,0.003361,0.009003,0.00515,0.002534,0.002449,0.007003,1,0.009282,0.000871,0.005917,0.009783,0.001053,0.006891,1.000591,0.005175,0.001321,0.008394,0.005752,0.009342,0.006949,0.005339,0.002806,0.001432,0.004645,0.003569,0.009592,0.006052,0.004627,0.002528,0,2018-02-28,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
2,00001b22f846c82c51f6e3958ccd81970162bae8b007e8...,2018-02-09,0.001526,0.008318,0.818953,0.006756,0.009511,0.004373,0.057312,0.245516,0.000215,0.009075,0.339566,0.178068,0.04298,0.337689,0.000422,0.008294,0.302024,0.004721,0.007528,1.006839,1.006134,0.00138,0.0061,0.009502,0.002143,0.008246,0.009205,0.002675,0.007859,0.288868,0.005108,0.005106,1.007293,0.00192,0.009832,0.187889,0.0008,0.000768,0.006931,0.001579,0.013982,0.508765,0.028005,0.007414,0.009046,0.003384,0.005028,0.00357,0.002032,0.00147,0.006335,0.009125,0.005929,0.006845,1.007555,0.05083,8.5e-05,0.005226,0.009405,0.00076,0.001418,0.007448,0.000301,0.007136,1,0.00878,0.006727,0.004732,0.003573,0.007283,0.006645,1.004516,0.006888,0.001481,0.005212,0.005087,0.001395,0.0025,0.006046,0.009363,0.004414,0.006159,0.002729,0.00198,0.005101,0.009543,0.007325,0,2018-02-28,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,000041bdba6ecadd89a52d11886e8eaaec9325906c9723...,2018-02-03,0.00838,0.014589,1.009999,0.003768,0.008239,0.004804,0.008486,0.074935,0.115955,0.009204,0.410036,0.195544,0.012364,0.008012,0.034357,0.001734,0.401879,0.030321,0.004926,0.009158,1.004057,0.003462,0.075619,0.174018,0.060876,0.003396,0.006757,0.016034,0.885493,0.367468,0.00122,0.169134,1.006742,0.002138,0.001995,0.35061,0.004044,0.423225,0.009143,0.005501,0.01252,0.501547,0.003451,0.008617,0.002992,0.005909,0.002774,0.003175,0.004093,0.00313,0.0041,0.003846,0.000866,0.004267,1.003564,0.017212,0.001522,0.006544,0.0092,0.003933,0.008703,0.002066,0.008195,0.004076,1,0.009488,0.007342,0.00361,0.002357,0.008257,0.006674,1.007992,0.001581,0.004146,0.001289,0.005549,0.001761,0.007192,0.00513,0.005974,0.003777,0.008666,0.001997,0.001157,0.000564,0.006928,0.004819,0,2018-02-28,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
4,00007889e4fcd2614b6cbe7f8f3d2e5c728eca32d9eb8a...,2018-02-27,0.004148,0.008522,0.813281,0.005484,0.004855,0.003961,0.284587,0.071558,0.005731,0.002636,0.484715,0.054377,0.164024,0.000174,0.009553,0.007358,0.124556,0.008466,0.009344,1.00693,1.007329,0.005195,0.009397,0.002761,0.007728,0.00735,0.380526,0.00727,0.004304,0.287433,0.003862,0.000983,0.529498,0.005764,0.00688,0.19237,0.000626,0.00128,0.006742,0.006121,0.013919,0.509899,0.153219,0.002815,0.134695,0.002929,0.001414,0.001871,0.000962,0.002861,0.005402,0.007709,0.004798,2.6e-05,1.003287,0.261153,0.00872,0.007174,0.002435,0.009797,0.001105,0.009264,0.008749,0.009143,1,0.000208,0.001196,0.005008,0.004838,0.009211,0.006099,1.004277,0.009683,0.005074,0.009063,0.00923,0.007937,0.009874,0.000453,0.008853,0.578903,0.009517,0.003045,0.002636,0.002415,0.002953,0.006325,0,2018-02-28,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [12]:
encoder.feature_names_in_

array(['D_63', 'B_30', 'B_38', 'D_126'], dtype=object)

In [15]:
encoder.transform(test_data[encoder.feature_names_in_])

array([[0., 0., 1., ..., 0., 0., 1.],
       [0., 1., 0., ..., 0., 0., 1.],
       [0., 1., 0., ..., 0., 0., 1.],
       ...,
       [0., 1., 0., ..., 0., 0., 1.],
       [0., 1., 0., ..., 0., 0., 1.],
       [1., 0., 0., ..., 0., 0., 1.]])

In [19]:
test_data_oh = pd.concat([test_data.drop(columns=encoder.feature_names_in_),pd.DataFrame(encoder.transform(test_data[encoder.feature_names_in_]), columns=encoder.get_feature_names_out(),index=test_data.index)], axis=1)

In [21]:
test_data_oh.shape

(391349, 108)

In [50]:
from sklearn.model_selection import train_test_split
X_train, X_val = train_test_split(train_data_oh, test_size=0.3, random_state=0, stratify = train_data_oh['target'])

In [51]:
y_train = X_train['target']
y_val = X_val['target']

In [52]:
X_train.shape, X_val.shape

((270223, 108), (115811, 108))

In [29]:
X_train[['customer_ID', 'target']].to_parquet('../../data/initial_modelling/y_train.parquet', index = False)
X_val[['customer_ID', 'target']].to_parquet('../../data/initial_modelling/y_val.parquet', index = False)

In [53]:
drop_cols = ['customer_ID', 'target', 'S_2', 'end_of_month']

In [54]:
X_train = X_train.drop(columns = drop_cols)
X_val = X_val.drop(columns = drop_cols)

In [55]:
X_val.head(10)

Unnamed: 0,D_39,B_1,B_2,R_1,D_41,B_3,B_4,D_45,B_5,R_2,D_47,B_6,B_7,D_51,B_9,R_3,B_10,S_5,B_11,S_6,D_54,R_4,B_12,S_8,B_13,R_5,D_58,B_14,D_60,S_11,D_65,B_16,B_18,B_19,B_20,S_12,R_6,S_13,B_21,B_22,D_71,S_15,B_23,P_4,D_75,B_24,R_7,B_26,R_8,S_16,R_10,R_11,B_27,S_17,R_12,B_28,R_13,R_14,R_15,R_16,S_18,D_86,R_17,R_18,B_31,S_19,R_19,B_32,S_20,R_20,R_21,B_33,R_22,R_23,D_92,D_93,D_94,R_24,R_25,D_96,D_102,B_36,D_127,D_133,R_28,D_140,D_144,D_63_CL,D_63_CO,D_63_CR,D_63_XL,D_63_XM,D_63_XZ,B_30_0.0,B_30_1.0,B_30_2.0,B_38_1.0,B_38_2.0,B_38_3.0,B_38_4.0,B_38_5.0,B_38_6.0,B_38_7.0,D_126_1.0
88325,0.120957,0.035195,1.004083,0.004925,0.001032,0.008988,0.004605,0.203044,0.07545,0.006382,0.735497,0.298437,0.02746,0.337887,0.051187,3e-06,0.296798,0.07315,0.017205,0.004084,1.004965,4.3e-05,0.032905,0.665858,0.038356,0.000374,0.00622,0.017933,1.005045,0.243441,0.002926,0.092435,1.007904,0.003588,0.009979,0.329074,0.005426,0.430447,0.005244,0.003521,0.008785,0.206735,0.015719,0.003484,0.003096,0.003422,0.006192,0.002962,0.002545,0.009316,0.009305,0.009885,0.006346,0.009987,1.003134,0.009227,0.008306,0.004441,0.008891,0.003154,0.002763,0.005386,0.008888,0.001875,1,0.004231,0.009065,0.001634,0.004603,0.006467,0.00476,1.003279,0.009976,0.000156,0.002362,0.009334,0.008191,0.000628,0.008596,0.008027,0.005424,0.005984,0.009219,0.001255,0.004179,0.002441,0.007002,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
193017,0.00172,0.001524,0.813657,0.000218,0.001056,0.009691,0.01472,0.056135,0.000175,0.00764,0.98586,0.181085,0.031458,0.002181,0.00956,0.003043,0.296927,0.006165,0.003032,1.005942,1.002414,0.001315,0.013563,0.007538,0.005112,0.002641,0.00717,0.003924,0.005425,0.28123,0.00144,0.00029,1.008139,0.002709,0.009276,0.189697,0.00573,0.007814,0.002408,0.000216,0.010108,0.503985,0.016175,0.008739,0.007404,0.001946,0.008612,0.009489,0.003646,0.002551,0.009739,0.000797,0.009569,0.007202,1.000197,0.014193,0.003337,0.004228,0.009351,0.006876,0.004256,0.004741,0.003319,0.003399,1,0.005198,0.001395,0.000272,0.005588,0.008388,0.001862,1.004562,0.003931,0.007479,0.002614,0.003106,0.007311,0.009697,0.003515,0.001204,0.175911,0.004604,0.006039,0.002105,0.003323,0.004103,0.009339,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
40534,0.47185,0.018508,0.813154,0.007822,0.008074,0.064667,0.233385,0.05048,0.001866,0.008061,0.057846,0.038433,0.271255,0.007484,0.012184,0.102959,0.05598,0.025713,0.010016,0.002763,1.001114,0.008404,0.041913,0.10434,0.090945,0.003674,0.211854,0.016622,0.027233,0.365584,0.006827,0.251002,0.584839,0.258146,0.00079,0.224926,0.007859,0.005775,0.012626,0.008864,0.011425,0.707718,0.257138,0.008765,0.202443,0.002255,0.002568,0.000449,0.005869,0.035506,0.002641,0.009004,0.000636,0.003298,1.007228,0.170887,0.008695,0.000537,0.005625,0.007995,0.003497,0.006232,0.003083,0.00743,1,0.001157,0.000848,0.006349,0.001337,0.001023,0.007651,1.004773,0.006614,0.006662,0.000264,0.004536,0.001058,0.005747,0.001888,0.002615,0.001948,0.004887,0.006033,0.008546,0.006798,0.007247,0.002695,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
176556,0.005953,0.007611,0.819612,0.008702,0.005017,0.0026,0.157042,0.278636,0.000282,0.006101,0.351115,0.033643,0.235353,0.334022,0.005273,0.004867,0.033581,0.002144,0.00407,1.000365,1.003282,0.004653,0.006817,0.007157,0.006367,0.007687,0.635153,0.004225,0.002586,0.287302,0.00102,0.000535,0.532575,0.000974,0.00073,0.187517,0.009375,0.004795,0.00105,0.000376,0.015446,0.505249,0.21749,0.005107,0.402494,0.009637,0.005589,0.007935,0.001054,0.002765,0.008124,0.002988,0.003555,0.00181,1.008538,0.189585,0.008112,0.001808,0.004845,0.006352,0.005411,0.003592,0.005444,0.002983,1,0.006697,0.007942,0.008503,0.008916,0.008992,0.008573,1.003086,0.001257,0.001356,0.009462,0.006382,0.007623,0.006569,0.002384,0.00394,0.439094,0.008016,0.006517,0.006614,0.009997,0.005797,0.672997,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
25107,0.914097,0.372201,0.062589,1.005858,1.281813,0.649139,0.219919,0.185468,0.009233,1.009922,0.241652,0.0036,0.959769,0.006046,0.64495,0.003138,0.009305,0.012424,0.325252,0.000198,0.007834,1.009372,0.015364,0.002465,0.008569,1.009028,0.45939,0.088829,0.249077,0.280814,0.684729,1.005448,0.15555,0.909458,1.006194,0.191248,0.806774,0.009099,0.000724,0.002022,0.007099,0.500211,0.884895,0.004402,0.336563,0.007525,0.339687,0.138273,2.00141,0.006055,0.006824,0.002742,0.003468,0.004348,1.008689,0.157413,0.007467,4.416104,0.003338,0.002107,0.00944,0.004189,0.000269,0.001755,1,0.009489,0.001823,0.009987,0.001813,2.007152,0.00602,0.008621,0.002299,0.008366,0.009758,0.007653,0.000627,1.00252,0.009906,0.002835,0.008352,0.000234,0.000124,0.004336,0.008127,0.00568,1.120396,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
260677,1.060211,0.096243,0.009476,0.009667,1.538073,0.318853,0.035278,0.234094,0.029004,0.008038,0.31166,0.101313,0.080044,0.335096,0.385723,0.007472,0.031862,0.028901,0.075475,0.007148,0.009382,0.005236,0.034555,0.936569,0.07832,0.004824,0.033865,0.078057,0.835826,0.123809,0.246076,1.00604,0.213468,0.186632,1.009557,0.191872,0.001742,0.288649,0.007669,0.001546,0.095923,0.000557,0.060926,0.00354,0.137851,0.003502,0.007438,0.150261,0.004772,0.005953,0.002043,0.004405,0.001948,0.009913,1.005209,0.047072,0.005564,0.002411,0.009316,0.000897,0.003344,0.009065,0.009813,0.001911,1,0.004802,0.004374,0.004292,0.003907,0.003188,0.006338,0.006109,0.000797,0.007907,0.00637,0.005159,0.007783,0.003887,0.003518,0.005056,0.003846,0.006919,1.003078,0.005001,0.009005,0.004683,0.004042,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
311849,0.001049,0.061602,1.00316,0.009076,0.009511,0.141556,0.310965,0.059283,0.534998,0.007962,0.352769,0.2944,0.052143,0.338737,0.544988,0.008216,0.452425,0.064516,0.059917,0.008989,1.009302,0.009471,0.501437,1.002293,0.626746,0.006639,0.000854,0.589767,0.781856,0.406574,0.000433,0.339669,1.007216,0.258093,0.000675,0.384602,0.000183,0.682022,0.003035,0.003728,0.001225,0.105987,0.05102,0.002164,0.000334,0.001739,0.005373,0.007422,0.00621,0.006832,0.006071,0.006232,0.002648,0.634006,1.002644,0.351086,0.005056,0.002069,0.008778,0.003201,0.008244,0.004262,0.004859,0.005592,1,0.001609,0.009767,0.006696,0.009209,0.006363,0.005281,1.006859,0.009497,0.002182,0.00116,0.001801,0.00119,0.003645,0.004654,0.0087,0.003579,0.000605,0.00834,0.003738,0.006391,0.009038,0.000587,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
237114,0.004595,0.00899,0.817466,0.006562,0.006139,0.008025,0.02782,0.145357,0.008742,0.006196,0.488399,0.162101,0.026901,0.342532,0.009863,0.002054,0.294528,0.007662,0.004817,1.00242,1.006739,0.006346,0.008897,0.003736,0.00248,0.001846,0.006606,0.003053,0.004123,0.287707,0.000151,0.002212,1.008177,0.000184,0.006207,0.185666,0.0096,0.00104,0.003827,0.007283,0.012294,0.509632,0.021343,0.004187,0.004302,0.002304,0.005624,0.008701,0.000817,0.009128,0.006849,0.006598,0.004443,0.007796,1.007485,0.027058,0.001749,0.006457,0.006088,0.002638,0.009111,0.001902,0.001482,0.005991,1,0.009185,0.005177,0.003561,0.005822,0.009474,0.00687,1.002316,0.001566,0.003078,0.007619,0.004253,0.008797,0.004448,0.005546,0.004173,0.006486,0.006595,0.008327,0.005835,0.008645,0.004015,0.003548,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
17785,1.064861,0.415193,0.001849,0.007748,0.488336,0.08103,0.07109,0.034467,0.086052,0.00447,0.482694,0.081054,0.069137,0.00016,0.304941,0.308225,0.116424,0.001267,0.399915,0.006289,1.006996,0.002038,0.022959,0.244955,0.321675,0.009798,0.034894,0.234772,0.624913,0.363442,0.062433,0.334949,0.208196,0.007505,0.184167,0.19196,0.007013,0.005051,0.008898,0.004016,0.051976,0.408328,0.061162,0.007355,0.138245,0.009695,0.004983,0.249838,0.002442,0.002698,0.007103,0.508554,0.006714,0.000986,1.009774,0.172584,0.006217,0.00937,0.005054,0.506976,0.009653,0.005734,0.003546,0.005005,1,0.003699,0.005812,0.005613,0.000514,0.003936,0.007145,0.008664,0.00225,0.009486,0.006865,0.000312,0.006007,0.000717,0.00028,0.000835,0.109386,6.6e-05,0.003544,0.008704,0.008144,0.002153,0.005036,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
378326,0.915783,0.054062,0.002374,0.008785,0.007572,0.009657,0.004709,0.060529,0.010042,0.008896,0.65568,0.189627,0.02959,0.336655,0.01527,0.505011,0.180244,0.003235,0.029606,1.008251,1.007788,0.009381,0.012412,0.008757,0.018223,0.00782,2.1e-05,0.016564,0.414445,0.282358,0.083464,0.343281,0.448598,0.004215,0.062155,0.18838,0.003294,0.002773,0.001495,0.00693,0.010638,0.509149,0.019028,0.008368,0.074793,0.008979,0.007647,0.066269,0.00648,0.00128,0.002646,0.008403,0.008833,0.008097,1.00354,0.021116,0.007427,0.007558,0.005902,0.002634,0.005396,0.005826,0.003368,0.001428,1,0.009687,0.005331,0.003918,0.008912,0.001428,0.002202,0.003109,0.009857,0.006399,0.001514,0.006453,0.003707,0.009718,0.004182,0.00925,0.008932,0.001707,1.009332,0.002319,0.005885,0.009986,0.009603,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0


In [56]:
from sklearn.preprocessing import RobustScaler
scaler = RobustScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

In [57]:
X_train_scaled = pd.DataFrame(X_train_scaled, columns = X_train.columns)
X_val_scaled = pd.DataFrame(X_val_scaled, columns = X_train.columns)

In [58]:
X_train_scaled.head(10)

Unnamed: 0,D_39,B_1,B_2,R_1,D_41,B_3,B_4,D_45,B_5,R_2,D_47,B_6,B_7,D_51,B_9,R_3,B_10,S_5,B_11,S_6,D_54,R_4,B_12,S_8,B_13,R_5,D_58,B_14,D_60,S_11,D_65,B_16,B_18,B_19,B_20,S_12,R_6,S_13,B_21,B_22,D_71,S_15,B_23,P_4,D_75,B_24,R_7,B_26,R_8,S_16,R_10,R_11,B_27,S_17,R_12,B_28,R_13,R_14,R_15,R_16,S_18,D_86,R_17,R_18,B_31,S_19,R_19,B_32,S_20,R_20,R_21,B_33,R_22,R_23,D_92,D_93,D_94,R_24,R_25,D_96,D_102,B_36,D_127,D_133,R_28,D_140,D_144,D_63_CL,D_63_CO,D_63_CR,D_63_XL,D_63_XM,D_63_XZ,B_30_0.0,B_30_1.0,B_30_2.0,B_38_1.0,B_38_2.0,B_38_3.0,B_38_4.0,B_38_5.0,B_38_6.0,B_38_7.0,D_126_1.0
0,-0.09234,-0.179184,0.005511,-0.358213,-0.830086,-0.014621,-0.248679,0.398645,-0.298152,-0.895949,1.743955,0.778834,-0.168891,1.003377,-0.055942,-0.07022,0.701141,-0.075761,-0.117106,154.671111,-0.731415,-0.524039,-0.099336,-0.637713,-0.258112,0.624936,-0.264184,-0.2406,-0.34662,-0.189953,-0.194801,-0.09335,0.453367,0.006936,0.000174,0.348935,-0.363672,-0.018399,0.010298,0.370727,-0.200185,0.361136,-0.176685,-0.343606,-0.249899,0.596609,-0.839411,0.848318,0.415128,-0.495343,0.555486,-0.250483,-0.480877,0.118248,-0.505002,-0.293364,-0.821713,0.768591,-0.630215,-0.541403,-0.847114,0.235637,0.548836,-0.739062,0.0,0.478225,-0.315801,-0.617955,0.876819,-0.494403,0.880331,0.001481,0.140212,-0.403684,0.145079,-0.984856,-0.38701,0.281075,0.634471,-0.58154,2.060366,-0.615877,0.029447,-0.952089,-0.240178,0.522962,-0.580366,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.612272,-0.005333,0.208336,-0.718587,0.444595,-0.022155,-0.014321,-0.389799,-0.10426,-0.59849,-0.180305,1.326962,-0.138426,1.0043,0.134976,-0.078017,0.685506,5.531883,-0.012388,-0.895955,-0.794075,0.055619,2.595322,0.35892,1.595477,0.637655,-0.267043,0.523625,0.584228,-0.564003,-0.085233,-0.001965,0.446397,-0.022646,-0.013002,17.082814,-0.381787,0.990588,0.946708,0.087701,-0.424692,-0.340223,-0.139413,0.109099,-0.252935,0.226885,0.213508,-0.388033,-0.353623,-0.677864,-0.709799,-0.217832,-0.478303,0.531035,0.958592,-0.064985,0.031016,0.751804,0.936601,-0.361093,-0.752776,-0.319844,0.505865,0.389486,0.0,-0.22425,-0.968776,-0.169272,-0.659069,0.705056,0.298055,-0.000253,0.605604,0.225894,181.884884,-0.64992,0.374676,0.343857,-0.624992,0.642741,-0.014972,0.875184,-0.618548,-0.575995,-0.909723,-0.025012,0.717438,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2.137318,0.053736,0.203783,-0.938258,0.201177,-0.012908,-0.36085,-0.204697,0.699555,-0.761914,-0.750451,1.887921,-0.13284,0.992581,0.00403,-0.067945,0.873174,0.308653,0.008755,-0.329931,-0.850768,-0.141533,-0.144128,-0.002332,0.023663,0.568464,-0.283372,-0.122189,1.120974,-0.372662,-0.996431,-0.091692,0.443375,-0.03586,-0.01345,8.788065,-0.709632,0.648917,-0.557632,-0.449543,0.052178,-0.004003,-0.168891,0.587226,-0.265388,-0.610351,0.412703,0.783056,-0.14033,0.75115,-0.962227,-0.416723,0.943866,-0.547002,0.668608,-0.384925,-0.442428,-0.574936,0.775303,-0.974987,0.934835,-0.67984,-0.273421,-0.977952,0.0,0.50221,0.752285,-0.952262,0.86339,0.261182,0.046568,-0.000142,-0.5652,0.356735,0.058313,0.81791,0.196444,0.439304,0.767467,-0.714939,-0.00038,0.305645,0.510251,-0.782966,0.922314,-0.720903,-0.311874,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,-1.0
3,-0.096258,-0.207411,0.002801,0.212193,-0.439213,-0.041781,-0.348323,0.38957,-0.230332,0.683254,0.030338,0.59959,-0.224658,-0.016679,-0.045391,1.910875,0.711462,-0.136672,-0.154588,0.520753,0.533645,0.174473,0.101968,-0.645727,0.266643,0.311671,-0.278495,-0.234381,-0.346581,-0.181455,0.237741,0.089456,0.443171,-0.016716,-0.013732,-0.087156,-0.910955,-0.012375,-0.906951,0.013277,-0.193512,0.334293,-0.192001,0.382583,-0.258409,0.107352,-0.240672,-0.662332,0.037259,-0.155389,0.63683,93.643924,-0.937919,0.1623,-0.102432,-0.381439,-0.574382,0.888804,0.845828,0.649736,0.336668,-0.791629,-0.706961,0.439333,0.0,-0.80996,-0.391681,-0.021612,0.789243,-0.517037,-0.525999,0.003355,0.047938,0.285281,0.826397,0.879444,-0.769798,-0.73525,0.224853,-0.08492,-0.011694,0.043794,-0.210563,-0.72511,0.345268,0.395598,145.876066,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,-1.0
4,-0.100242,0.634642,-0.830302,85.396077,-0.433001,1.753446,1.174787,-0.482649,-0.183011,0.133301,-1.016579,-0.407764,1.497093,-0.001347,1.796303,-0.047236,-0.330474,0.395357,0.530754,0.402339,-0.79836,-0.164888,-0.0684,-0.638493,-0.254514,0.872808,0.86703,-0.061332,-0.213859,-0.56798,-0.701918,0.996833,-0.568351,3.080227,1.497693,-0.01516,0.598627,-0.007326,393.352596,78.508761,-0.093351,2.367281,1.438318,0.791094,0.99867,-0.059874,0.102436,-0.085126,0.590199,227.772626,-0.113414,0.34419,-0.434059,-0.286948,0.534864,0.281532,0.298263,0.510805,0.189416,-0.21187,0.591172,0.901544,0.702129,-0.085752,0.0,0.528813,-0.031311,0.849004,199.112084,-0.824828,197.067382,-0.993343,0.929812,0.48739,-0.805466,-0.068947,-0.187121,-0.599525,-0.32033,-0.218378,-0.026372,0.346761,0.557356,0.379117,0.416902,-0.30864,-0.984649,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
5,0.517716,2.219104,-0.850628,-0.591949,0.625213,1.734598,1.275753,1.499812,0.325752,0.067877,1.199769,-0.415631,1.796882,0.001543,1.033621,-0.015063,-0.367873,0.603618,2.671934,-0.309069,-0.149255,-0.948479,0.36886,0.303514,-0.225832,-0.375781,0.788578,2.302685,0.25435,-0.577706,-0.505982,0.99376,-0.674063,4.656659,1.703517,35.954062,-0.247874,-0.015382,-0.663866,79.187968,0.274892,-0.662342,1.924643,0.681332,1.012117,0.082559,0.320433,-0.038092,0.51766,0.462689,0.367902,0.476932,-0.682307,0.688698,-0.235348,1.290857,-0.445308,0.744113,0.409801,0.233231,-0.27033,0.417561,0.331479,-0.117184,0.0,0.512637,-0.184314,0.901054,-0.36714,0.000564,-0.377299,-0.997397,0.442758,-0.750616,0.564854,0.266741,0.696269,0.182986,0.869582,0.555779,0.938942,0.684291,0.004949,-0.730182,0.155868,-0.832407,0.278518,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
6,1.434455,0.036966,0.204946,-0.329928,0.039718,-0.045201,-0.078404,-0.4686,-0.20603,-0.402,-0.085012,0.609877,-0.138528,-0.014898,-0.032272,-0.066274,0.634055,2.056649,0.06983,0.335544,0.711235,0.274939,5.077458,0.008067,6.581249,-0.159023,-0.260503,1.681361,1.089209,2.995459,0.590305,-0.091472,0.445818,-0.005128,-0.000679,5.040269,-0.74056,2.351744,0.104206,-0.124382,2.444369,-0.001326,-0.15345,-0.616449,-0.273235,128.849942,0.147092,-0.810483,0.553104,0.616429,-0.637076,-0.519255,-0.289531,-0.893419,-177.670338,0.088776,0.726776,0.400555,-0.479921,0.732155,-0.061354,-0.087222,0.803786,-0.09339,0.0,0.477284,0.045181,0.900475,0.532362,-0.638308,-0.103442,-0.996616,0.643744,0.002953,0.278449,0.367912,-0.937886,0.717508,0.877015,-0.420559,1.474077,-0.397947,0.305826,0.007756,0.811289,0.869147,0.083301,0.0,-1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
7,-0.077194,1.079904,-0.873982,171.524888,77.682134,1.321848,0.82805,-0.363185,1.289797,187.461273,-0.509552,-0.411823,1.711933,-0.003602,1.567904,-0.033587,-0.348633,1.000463,1.158825,-0.41856,-0.631182,0.355319,0.325267,-0.146196,0.090353,0.261262,1.127288,0.856547,-0.153244,-0.791851,44.457423,0.997147,-0.794514,1.563132,1.701893,0.239118,10.890988,-0.007036,-0.706387,78.060093,-0.359418,0.019027,1.937473,0.274867,1.238872,-0.254824,-0.398618,56.140608,0.835349,-0.854848,188.398869,0.239238,-0.698104,0.026952,-0.510666,2.395043,0.916748,210.267542,-0.984235,-0.726322,0.617947,-0.977976,0.570719,0.674684,0.0,-0.078099,-0.985338,-0.729594,0.272175,196.207715,0.535032,-0.996068,0.704516,-0.179458,0.135061,-0.926455,-0.219172,197.01837,-0.532394,0.322151,-0.017604,0.033776,0.116272,-0.642556,-0.571077,0.217997,0.249772,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
8,1.32481,0.022725,0.211449,-0.552515,-0.653562,-0.038466,-0.307197,0.637592,-0.046013,-0.293851,0.232084,0.659547,-0.174451,0.999684,0.060557,-0.022697,0.716708,4.177398,0.014856,0.024488,-0.472667,-0.168786,0.852685,0.795626,1.43969,0.151964,-0.272138,0.205932,1.204438,-0.016242,-0.174372,-0.092084,0.444569,-0.037094,-0.012741,-0.09533,0.703572,0.993967,-0.217939,-0.891003,0.199016,-0.672322,-0.140691,-0.080346,-0.281752,0.794057,0.608949,-0.463809,-0.086649,-0.652873,0.434629,0.51554,-0.692308,-0.576578,0.979428,-0.308695,0.302795,0.624466,-0.331831,-0.331698,-0.806799,-0.595482,-0.514037,-0.936888,0.0,-0.329854,-0.292867,-0.408176,0.478281,-0.264991,-0.894149,0.006485,0.188404,-0.259085,0.534119,-0.935069,0.167668,0.025623,0.198548,-0.656864,0.215883,-0.867911,176.852073,0.509671,-0.249616,-0.466539,0.391264,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
9,-0.073348,-0.205609,0.005405,-0.874639,-0.30378,-0.004201,-0.342843,0.322379,-0.257363,0.435415,1.714866,1.628877,-0.251785,-0.012172,-0.031646,0.006435,0.505795,-0.142419,-0.146176,155.147089,-0.374254,0.454959,-0.1322,-0.645159,-0.319912,0.825213,-0.272747,-0.231406,1.216012,-0.187317,-0.184943,-0.098921,0.447783,-0.039707,-0.009782,-0.148407,0.750239,-0.003661,-0.178701,-0.368353,0.133619,0.348361,-0.207952,0.012181,-0.259955,-0.624737,-0.128771,-0.440658,0.396239,0.033114,-0.540621,-0.398889,-0.413368,-0.31946,-0.335455,-0.432979,0.14724,-0.857929,0.117659,-0.923327,-0.783432,0.562987,0.55964,-0.197254,0.0,-0.682285,0.753024,-0.04452,-0.957671,-0.070161,-0.423189,0.007071,-0.64392,-0.97268,-0.434501,-0.50979,-0.958309,0.518155,-0.804195,0.094449,0.000385,0.603414,-0.09818,0.494998,0.286806,0.917193,-0.221321,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [59]:
X_val_scaled.head(10)

Unnamed: 0,D_39,B_1,B_2,R_1,D_41,B_3,B_4,D_45,B_5,R_2,D_47,B_6,B_7,D_51,B_9,R_3,B_10,S_5,B_11,S_6,D_54,R_4,B_12,S_8,B_13,R_5,D_58,B_14,D_60,S_11,D_65,B_16,B_18,B_19,B_20,S_12,R_6,S_13,B_21,B_22,D_71,S_15,B_23,P_4,D_75,B_24,R_7,B_26,R_8,S_16,R_10,R_11,B_27,S_17,R_12,B_28,R_13,R_14,R_15,R_16,S_18,D_86,R_17,R_18,B_31,S_19,R_19,B_32,S_20,R_20,R_21,B_33,R_22,R_23,D_92,D_93,D_94,R_24,R_25,D_96,D_102,B_36,D_127,D_133,R_28,D_140,D_144,D_63_CL,D_63_CO,D_63_CR,D_63_XL,D_63_XM,D_63_XZ,B_30_0.0,B_30_1.0,B_30_2.0,B_38_1.0,B_38_2.0,B_38_3.0,B_38_4.0,B_38_5.0,B_38_6.0,B_38_7.0,D_126_1.0
0,0.309842,0.035923,0.205886,-0.153049,-0.825474,-0.004669,-0.341864,0.009004,1.201696,0.197299,1.062378,1.260657,-0.176534,0.992158,0.091281,-0.081772,0.69714,0.965958,-0.013762,-0.367047,0.020457,-0.99155,0.218162,0.709964,0.132685,-0.92479,-0.269327,-0.120081,1.211725,-0.396423,-0.456662,-0.000973,0.451345,-0.022345,0.002932,14.238489,0.027615,0.994859,0.029635,-0.449135,-0.165227,-0.658765,-0.170371,-0.319911,-0.270158,-0.342274,0.187452,-0.450444,-0.510801,0.827486,0.754647,0.858613,0.266187,0.846251,-0.325509,-0.403889,0.641107,-0.13259,0.748681,-0.412773,-0.465632,0.042278,0.762465,-0.625089,0.0,-0.153403,0.772195,-0.684007,-0.088177,0.265657,-0.061047,0.001756,0.987343,-0.966865,-0.565628,0.847238,0.608355,-0.879462,0.710649,0.554826,-0.010433,0.185048,0.633481,-0.769676,-0.165345,-0.520131,0.27091,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,-0.097772,-0.210495,-0.000336,-0.963083,-0.821358,-0.001417,-0.299217,-0.450626,-0.322064,0.433237,1.839608,0.586058,-0.161671,-0.01607,-0.028727,-0.052426,0.697622,-0.09354,-0.136793,155.035181,-0.472506,-0.747581,-0.10002,-0.642644,-0.275228,-0.491564,-0.266868,-0.252959,-0.340394,-0.208973,-0.733784,-0.10175,0.451638,-0.027817,0.00173,-0.090025,0.085247,-0.003527,-0.525894,-0.970069,-0.113388,0.340926,-0.168606,0.704929,-0.253854,-0.626758,0.652034,0.764908,-0.297974,-0.500965,0.836554,-0.851166,0.910891,0.331274,-0.892813,-0.37752,-0.340702,-0.174199,0.839072,0.283127,-0.174887,-0.082612,-0.342411,-0.31993,0.0,0.04004,-0.724616,-0.94942,0.107323,0.642081,-0.631277,0.00304,-0.215389,0.497258,-0.519663,-0.384583,0.435634,0.908686,-0.302536,-0.765975,0.493508,-0.087887,0.070907,-0.614378,-0.336052,-0.193821,0.695384,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.509376,-0.086202,-0.000881,0.345405,0.36637,0.252745,0.622717,-0.468319,-0.287838,0.512103,-1.041334,-0.233976,0.729844,-0.000143,-0.021164,0.912152,-0.198478,0.215638,-0.076166,-0.571977,-0.723652,0.611633,0.366346,-0.443752,0.777989,-0.294187,0.263468,-0.132519,-0.306533,0.209459,0.270666,0.172449,-0.074998,1.562491,-0.012765,3.531686,0.488965,-0.008342,1.475907,0.393145,-0.061738,1.026103,0.76385,0.710068,0.484308,-0.567219,-0.508303,-0.918516,0.131645,5.970287,-0.503524,0.692968,-0.876018,-0.390722,0.465102,0.454449,0.71787,-0.896152,0.106414,0.492312,-0.322767,0.206047,-0.389183,0.487056,0.0,-0.768193,-0.831357,0.234777,-0.735922,-0.801319,0.507541,0.003252,0.31848,0.334011,-0.948387,-0.101696,-0.791603,0.129738,-0.627074,-0.492915,-0.020707,-0.031788,0.069741,0.562563,0.357523,0.423691,-0.511167,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,-0.0833,-0.165946,0.006113,0.496752,-0.150923,-0.034202,0.300841,0.245507,-0.319893,0.144577,-0.130903,-0.26151,0.596365,0.980551,-0.041087,-0.034821,-0.281784,-0.157153,-0.127779,154.170097,-0.304752,-0.107481,-0.210999,-0.643428,-0.259835,0.472818,1.360235,-0.250102,-0.344802,-0.178855,-0.811915,-0.101481,-0.14002,-0.038617,-0.012868,-0.314186,0.7766,-0.010659,-0.791888,-0.944863,0.095837,0.345175,0.610422,-0.003408,1.241443,0.855123,0.071584,0.475485,-0.798848,-0.458903,0.531679,-0.439052,-0.292009,-0.665818,0.718247,0.553724,0.6027,-0.647566,-0.046957,0.185244,0.050137,-0.305075,0.079207,-0.403216,0.0,0.339731,0.553075,0.654526,0.76742,0.760343,0.689071,0.001564,-0.7475,-0.726912,0.730135,0.263408,0.496817,0.291887,-0.528104,-0.236428,1.27145,0.586771,0.155391,0.209595,0.996032,0.138995,121.216774,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,3.021207,2.502226,-0.813705,172.078356,215.96082,2.954841,0.565941,-0.045986,-0.138698,188.386964,-0.470725,-0.434215,3.289591,-0.004461,1.80307,-0.051508,-0.372066,0.005453,2.660303,-0.96978,-192.670992,192.539315,-0.07039,-0.653067,-0.232819,191.86028,0.904834,0.552379,0.037926,-0.211041,126.648947,0.997572,-0.609085,5.61744,1.704717,0.069427,152.003632,-0.000491,-0.855714,-0.685399,-0.231317,0.328232,3.193085,-0.140911,0.991913,0.448204,64.209433,24.746469,385.759754,0.187039,0.286138,-0.485323,-0.3094,-0.196449,0.747418,0.382909,0.475284,862.707579,-0.343195,-0.608479,0.834774,-0.189616,-0.947509,-0.649165,0.0,0.89827,-0.64111,0.943761,-0.641383,392.363384,0.18666,-0.993212,-0.540174,0.674628,0.784108,0.514675,-0.87604,196.657094,0.972028,-0.450245,-0.001777,-0.951731,-0.975652,-0.206705,0.622743,0.115888,202.46512,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
5,3.520701,0.482686,-0.871224,0.662765,259.335727,1.42788,-0.212541,0.106148,0.261507,0.507786,-0.253391,0.127491,0.018961,0.983778,1.05573,-0.009672,-0.288176,0.266064,0.49206,0.108176,-192.371842,0.004222,0.2453,1.266178,0.623079,-0.074414,-0.197699,0.450201,0.948977,-0.989851,44.872753,0.99822,-0.537028,1.117256,1.710463,0.133561,-0.670959,0.659891,0.50471,-0.760506,3.250017,-1.352168,0.004565,-0.309141,0.239851,-0.326929,0.426598,26.978903,-0.08043,0.16695,-0.61627,-0.172333,-0.613504,0.832549,0.075311,-0.202949,0.099314,-0.529642,0.832104,-0.834689,-0.352544,0.754642,0.946148,-0.617835,0.0,-0.039078,-0.14333,-0.16612,-0.226232,-0.377083,0.249331,-0.995725,-0.838923,0.582928,0.165822,0.021557,0.528244,-0.236982,-0.302001,-0.020343,-0.015097,0.369842,176.465803,-0.085159,0.798037,-0.079739,-0.266602,1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
6,-0.100065,0.229178,0.204887,0.561068,0.609689,0.608211,0.949809,-0.440779,10.504178,0.493574,-0.125771,1.23745,-0.08477,0.994713,1.514884,-0.002489,1.275929,0.829402,0.357007,0.393676,0.858532,0.816324,7.925627,1.401217,7.352587,0.272597,-0.283232,5.303818,0.865178,0.412784,-0.921389,0.269423,0.450489,1.562157,-0.012963,19.946926,-0.966681,1.589155,-0.402936,-0.416559,-0.461519,-0.997593,-0.033767,-0.577417,-0.280612,-0.66668,0.030133,0.379918,0.197547,0.339574,0.144079,0.171425,-0.473452,116.234048,-0.420183,1.411214,-0.001139,-0.596554,0.72634,-0.403978,0.6018,-0.175442,-0.03681,0.119017,0.0,-0.677814,0.909287,0.302365,0.825499,0.245189,0.041288,0.005338,0.891955,-0.561844,-0.785009,-0.642689,-0.765544,-0.284657,-0.075459,0.685028,-0.015885,-0.878486,0.477893,-0.315972,0.27631,0.775485,-0.894052,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
7,-0.087944,-0.155851,0.003789,0.12859,0.038852,-0.009118,-0.243985,-0.171481,-0.148637,0.162322,0.295281,0.476928,-0.178613,1.006109,-0.027856,-0.061977,0.688698,-0.069864,-0.1213,154.488845,0.363358,0.217077,-0.176781,-0.650455,-0.307535,-0.643476,-0.268329,-0.261215,-0.342415,-0.176847,-0.973922,-0.099648,0.451685,-0.043535,-0.003511,-0.504479,0.819181,-0.01953,-0.24793,0.143868,-0.027713,0.359916,-0.148608,-0.182901,-0.265594,-0.557808,0.078436,0.618193,-0.844719,0.790445,0.290917,0.2403,-0.11441,0.441125,0.514787,-0.309214,-0.654451,0.261727,0.197518,-0.50922,0.770801,-0.632406,-0.706901,0.198873,0.0,0.837386,0.013445,-0.30852,0.153732,0.854797,0.353937,0.000793,-0.686002,-0.382738,0.393765,-0.157672,0.727278,-0.126281,0.102332,-0.19123,-0.007293,0.305862,0.475637,0.067216,0.726196,-0.210942,-0.35629,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,3.536596,2.816857,-0.879484,0.332595,81.656091,0.328394,-0.06155,-0.518419,1.416309,-0.161328,0.27757,0.011031,-0.021587,-0.022138,0.822838,2.893771,0.026316,-0.171021,3.308428,-0.025001,0.413025,-0.609023,0.054554,-0.154838,3.60918,0.876279,-0.195034,1.936654,0.62149,0.19883,10.636893,0.26426,-0.543587,0.002047,0.300489,0.142612,0.32864,-0.010052,0.745565,-0.371064,1.527604,0.019219,0.005481,0.435039,0.241339,0.866295,-0.044749,45.521642,-0.530619,-0.472169,0.338945,94.680108,0.339803,-0.818248,0.956957,0.463455,0.228388,0.831522,-0.005782,93.778816,0.876389,0.109566,-0.297341,0.001552,0.0,-0.259819,0.137305,0.091356,-0.899,-0.230421,0.408105,-0.99317,-0.549893,0.898439,0.256175,-0.937168,0.179746,-0.861902,-0.947689,-0.83746,0.296869,-0.984987,-0.370545,0.591475,0.626279,-0.576659,-0.086174,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
9,3.026971,0.173994,-0.878915,0.51104,0.2814,-0.001574,-0.341426,-0.43688,-0.122324,0.66872,0.814591,0.635162,-0.168615,0.988459,-0.012268,4.79352,0.263667,-0.139889,0.093891,155.393297,0.565943,0.798976,-0.118961,-0.64014,-0.114349,0.498292,-0.28539,-0.133063,0.294695,-0.203381,14.557611,0.273373,-0.244499,-0.018436,0.092061,-0.225429,-0.376748,-0.015436,-0.704671,0.088278,-0.092589,0.358293,-0.157567,0.632634,0.001194,0.728322,0.466676,11.338145,0.249625,-0.750509,-0.502413,0.579869,0.763711,0.496699,-0.247113,-0.340765,0.467419,0.477035,0.161009,-0.509887,0.047082,0.127455,-0.332722,-0.714553,0.0,0.937734,0.043489,-0.23888,0.766607,-0.722055,-0.564416,-0.998726,0.963593,0.281425,-0.720412,0.27744,-0.271676,0.912663,-0.169515,0.791465,-6.4e-05,-0.660488,177.572203,-0.575131,0.175196,0.96158,0.743279,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [60]:
from xgboost import XGBClassifier

In [61]:
model = XGBClassifier(random_state=0)

In [62]:
model.fit(X_train_scaled, y_train)

In [63]:
y_pred = model.predict(X_val_scaled)

In [66]:
from sklearn.metrics import classification_report
print(classification_report(y_val, y_pred))

              precision    recall  f1-score   support

           0       0.93      0.93      0.93     88967
           1       0.78      0.76      0.77     26844

    accuracy                           0.89    115811
   macro avg       0.85      0.85      0.85    115811
weighted avg       0.89      0.89      0.89    115811



In [67]:
# Test set results

In [68]:
test_data_oh.drop(columns = drop_cols, inplace = True)

In [69]:
test_X_scaled = scaler.transform(test_data_oh)
test_X_scaled = pd.DataFrame(test_X_scaled, columns = test_data_oh.columns)

In [71]:
y_test = test_data['target']

In [72]:
y_pred_test = model.predict(test_X_scaled)

In [73]:
print(classification_report(y_test, y_pred_test, digits = 4))

              precision    recall  f1-score   support

           0     0.9346    0.9367    0.9356    299141
           1     0.7931    0.7874    0.7902     92208

    accuracy                         0.9015    391349
   macro avg     0.8638    0.8620    0.8629    391349
weighted avg     0.9013    0.9015    0.9014    391349

