##  Data Preprocessing

Step 2: 

Perform various transformations on our data as required for efficient and effective processing by Machine Learning algorithms based on the type and needs of individual features, identified through the data explorations performed in Step 1 - Data Exploration - Application Data.

### Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from IPython.display import display # Allows the use of display() for DataFrames
from sklearn.preprocessing import RobustScaler, StandardScaler, MinMaxScaler, LabelBinarizer, Imputer

# Show all of the data in a dataframe
pd.set_option('display.max_columns', None)

In [2]:
# Training Set
application_train = pd.read_csv('data/tmp/application_train_bureau_data_merged.csv', index_col=0)
application_test = pd.read_csv('data/tmp/application_test_bureau_data_merged.csv', index_col=0)
print("Loaded Training Set: {0} rows {1} cols".format(application_train.shape[0], application_train.shape[1]))
print("Loaded Testing Set: {0} rows {1} cols".format(application_test.shape[0], application_test.shape[1]))

Loaded Training Set: 307511 rows 320 cols
Loaded Testing Set: 48744 rows 319 cols


### Flags

In [3]:
use_imputer = 'Most_Frequent'      # Valid values are 'Mean', 'Median', 'Most_Frequent' and 'None'
use_replace_outliers = True        # Remove Outliers from Dataset
use_scaler = 'Standard'              # Valid values are 'Robust', 'Standard', 'MinMax", 'None'
use_log_transform = None

In [4]:
target_train = application_train['TARGET']
application_train_SK_ID_CURR = application_train['SK_ID_CURR']
application_train = application_train.drop('SK_ID_CURR', axis=1)
application_train = application_train.drop('TARGET', axis=1)

application_test_SK_ID_CURR = application_test['SK_ID_CURR']
application_test = application_test.drop('SK_ID_CURR', axis=1)

In [5]:
display(application_train.head())
display(application_test.head())

Unnamed: 0,AMT_ANNUITY,AMT_CREDIT,AMT_GOODS_PRICE,AMT_INCOME_TOTAL,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_YEAR,APARTMENTS_AVG,APARTMENTS_MEDI,APARTMENTS_MODE,BASEMENTAREA_AVG,BASEMENTAREA_MEDI,BASEMENTAREA_MODE,BOOL_FLAG_EMAIL,BOOL_FLAG_EMP_PHONE,BOOL_FLAG_OWN_CAR,BOOL_FLAG_OWN_REALTY,BOOL_FLAG_PHONE,BOOL_FLAG_WORK_PHONE,CNT_CHILDREN,CNT_FAM_MEMBERS,CODE_GENDER_F,CODE_GENDER_M,CODE_GENDER_XNA,COMMONAREA_AVG,COMMONAREA_MEDI,COMMONAREA_MODE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_ID_PUBLISH,DAYS_LAST_PHONE_CHANGE,DAYS_REGISTRATION,DEF_30_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,ELEVATORS_AVG,ELEVATORS_MEDI,ELEVATORS_MODE,EMERGENCYSTATE_MODE_No,EMERGENCYSTATE_MODE_Yes,ENTRANCES_AVG,ENTRANCES_MEDI,ENTRANCES_MODE,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,FLAG_CONT_MOBILE,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_2,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_MOBIL,FLOORSMAX_AVG,FLOORSMAX_MEDI,FLOORSMAX_MODE,FLOORSMIN_AVG,FLOORSMIN_MEDI,FLOORSMIN_MODE,FONDKAPREMONT_MODE_not specified,FONDKAPREMONT_MODE_org spec account,FONDKAPREMONT_MODE_reg oper account,FONDKAPREMONT_MODE_reg oper spec account,HOUR_APPR_PROCESS_START_0,HOUR_APPR_PROCESS_START_1,HOUR_APPR_PROCESS_START_10,HOUR_APPR_PROCESS_START_11,HOUR_APPR_PROCESS_START_12,HOUR_APPR_PROCESS_START_13,HOUR_APPR_PROCESS_START_14,HOUR_APPR_PROCESS_START_15,HOUR_APPR_PROCESS_START_16,HOUR_APPR_PROCESS_START_17,HOUR_APPR_PROCESS_START_18,HOUR_APPR_PROCESS_START_19,HOUR_APPR_PROCESS_START_2,HOUR_APPR_PROCESS_START_20,HOUR_APPR_PROCESS_START_21,HOUR_APPR_PROCESS_START_22,HOUR_APPR_PROCESS_START_23,HOUR_APPR_PROCESS_START_3,HOUR_APPR_PROCESS_START_4,HOUR_APPR_PROCESS_START_5,HOUR_APPR_PROCESS_START_6,HOUR_APPR_PROCESS_START_7,HOUR_APPR_PROCESS_START_8,HOUR_APPR_PROCESS_START_9,HOUSETYPE_MODE_not specified,HOUSETYPE_MODE_org spec account,HOUSETYPE_MODE_reg oper account,HOUSETYPE_MODE_reg oper spec account,LANDAREA_AVG,LANDAREA_MEDI,LANDAREA_MODE,LIVE_CITY_NOT_WORK_CITY,LIVE_REGION_NOT_WORK_REGION,LIVINGAPARTMENTS_AVG,LIVINGAPARTMENTS_MEDI,LIVINGAPARTMENTS_MODE,LIVINGAREA_AVG,LIVINGAREA_MEDI,LIVINGAREA_MODE,NAME_CONTRACT_TYPE_Cash loans,NAME_CONTRACT_TYPE_Revolving loans,NAME_EDUCATION_TYPE_Academic degree,NAME_EDUCATION_TYPE_Higher education,NAME_EDUCATION_TYPE_Incomplete higher,NAME_EDUCATION_TYPE_Lower secondary,NAME_EDUCATION_TYPE_Secondary / secondary special,NAME_FAMILY_STATUS_Civil marriage,NAME_FAMILY_STATUS_Married,NAME_FAMILY_STATUS_Separated,NAME_FAMILY_STATUS_Single / not married,NAME_FAMILY_STATUS_Unknown,NAME_FAMILY_STATUS_Widow,NAME_HOUSING_TYPE_Co-op apartment,NAME_HOUSING_TYPE_House / apartment,NAME_HOUSING_TYPE_Municipal apartment,NAME_HOUSING_TYPE_Office apartment,NAME_HOUSING_TYPE_Rented apartment,NAME_HOUSING_TYPE_With parents,NAME_INCOME_TYPE_Businessman,NAME_INCOME_TYPE_Commercial associate,NAME_INCOME_TYPE_Maternity leave,NAME_INCOME_TYPE_Pensioner,NAME_INCOME_TYPE_State servant,NAME_INCOME_TYPE_Student,NAME_INCOME_TYPE_Unemployed,NAME_INCOME_TYPE_Working,NAME_TYPE_SUITE_Children,NAME_TYPE_SUITE_Family,NAME_TYPE_SUITE_Group of people,NAME_TYPE_SUITE_Other_A,NAME_TYPE_SUITE_Other_B,"NAME_TYPE_SUITE_Spouse, partner",NAME_TYPE_SUITE_Unaccompanied,NONLIVINGAPARTMENTS_AVG,NONLIVINGAPARTMENTS_MEDI,NONLIVINGAPARTMENTS_MODE,NONLIVINGAREA_AVG,NONLIVINGAREA_MEDI,NONLIVINGAREA_MODE,OBS_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,OCCUPATION_TYPE_Accountants,OCCUPATION_TYPE_Cleaning staff,OCCUPATION_TYPE_Cooking staff,OCCUPATION_TYPE_Core staff,OCCUPATION_TYPE_Drivers,OCCUPATION_TYPE_HR staff,OCCUPATION_TYPE_High skill tech staff,OCCUPATION_TYPE_IT staff,OCCUPATION_TYPE_Laborers,OCCUPATION_TYPE_Low-skill Laborers,OCCUPATION_TYPE_Managers,OCCUPATION_TYPE_Medicine staff,OCCUPATION_TYPE_Private service staff,OCCUPATION_TYPE_Realty agents,OCCUPATION_TYPE_Sales staff,OCCUPATION_TYPE_Secretaries,OCCUPATION_TYPE_Security staff,OCCUPATION_TYPE_Waiters/barmen staff,ORGANIZATION_TYPE_Advertising,ORGANIZATION_TYPE_Agriculture,ORGANIZATION_TYPE_Bank,ORGANIZATION_TYPE_Business Entity Type 1,ORGANIZATION_TYPE_Business Entity Type 2,ORGANIZATION_TYPE_Business Entity Type 3,ORGANIZATION_TYPE_Cleaning,ORGANIZATION_TYPE_Construction,ORGANIZATION_TYPE_Culture,ORGANIZATION_TYPE_Electricity,ORGANIZATION_TYPE_Emergency,ORGANIZATION_TYPE_Government,ORGANIZATION_TYPE_Hotel,ORGANIZATION_TYPE_Housing,ORGANIZATION_TYPE_Industry: type 1,ORGANIZATION_TYPE_Industry: type 10,ORGANIZATION_TYPE_Industry: type 11,ORGANIZATION_TYPE_Industry: type 12,ORGANIZATION_TYPE_Industry: type 13,ORGANIZATION_TYPE_Industry: type 2,ORGANIZATION_TYPE_Industry: type 3,ORGANIZATION_TYPE_Industry: type 4,ORGANIZATION_TYPE_Industry: type 5,ORGANIZATION_TYPE_Industry: type 6,ORGANIZATION_TYPE_Industry: type 7,ORGANIZATION_TYPE_Industry: type 8,ORGANIZATION_TYPE_Industry: type 9,ORGANIZATION_TYPE_Insurance,ORGANIZATION_TYPE_Kindergarten,ORGANIZATION_TYPE_Legal Services,ORGANIZATION_TYPE_Medicine,ORGANIZATION_TYPE_Military,ORGANIZATION_TYPE_Mobile,ORGANIZATION_TYPE_Other,ORGANIZATION_TYPE_Police,ORGANIZATION_TYPE_Postal,ORGANIZATION_TYPE_Realtor,ORGANIZATION_TYPE_Religion,ORGANIZATION_TYPE_Restaurant,ORGANIZATION_TYPE_School,ORGANIZATION_TYPE_Security,ORGANIZATION_TYPE_Security Ministries,ORGANIZATION_TYPE_Self-employed,ORGANIZATION_TYPE_Services,ORGANIZATION_TYPE_Telecom,ORGANIZATION_TYPE_Trade: type 1,ORGANIZATION_TYPE_Trade: type 2,ORGANIZATION_TYPE_Trade: type 3,ORGANIZATION_TYPE_Trade: type 4,ORGANIZATION_TYPE_Trade: type 5,ORGANIZATION_TYPE_Trade: type 6,ORGANIZATION_TYPE_Trade: type 7,ORGANIZATION_TYPE_Transport: type 1,ORGANIZATION_TYPE_Transport: type 2,ORGANIZATION_TYPE_Transport: type 3,ORGANIZATION_TYPE_Transport: type 4,ORGANIZATION_TYPE_University,ORGANIZATION_TYPE_XNA,OWN_CAR_AGE,REGION_POPULATION_RELATIVE,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,TOTALAREA_MODE,WALLSMATERIAL_MODE_Block,WALLSMATERIAL_MODE_Mixed,WALLSMATERIAL_MODE_Monolithic,WALLSMATERIAL_MODE_Others,WALLSMATERIAL_MODE_Panel,"WALLSMATERIAL_MODE_Stone, brick",WALLSMATERIAL_MODE_Wooden,WALLSMATERIAL_MODE_not specified,WEEKDAY_APPR_PROCESS_START_FRIDAY,WEEKDAY_APPR_PROCESS_START_MONDAY,WEEKDAY_APPR_PROCESS_START_SATURDAY,WEEKDAY_APPR_PROCESS_START_SUNDAY,WEEKDAY_APPR_PROCESS_START_THURSDAY,WEEKDAY_APPR_PROCESS_START_TUESDAY,WEEKDAY_APPR_PROCESS_START_WEDNESDAY,YEARS_BEGINEXPLUATATION_AVG,YEARS_BEGINEXPLUATATION_MEDI,YEARS_BEGINEXPLUATATION_MODE,YEARS_BUILD_AVG,YEARS_BUILD_MEDI,YEARS_BUILD_MODE,EXT_SOURCE_2_X_AMT_GOODS_PRICE_X_AMT_ANNUITY,AMT_CREDIT_X_AMT_ANNUITY,EXT_SOURCE_3_X_AMT_GOODS_PRICE,AMT_ANNUITY_X_EXT_SOURCE_1,EXT_SOURCE_2_X_AMT_CREDIT_X_AMT_GOODS_PRICE,PER_CREDIT_INCOME,PER_GOODS_CREDIT,PER_ANNUITY_INCOME,NUM_BUREAU_REPORTS,SUM_BUREAU_BALANCES,SUM_CREDIT_CURRENCY_CODE_0,SUM_CREDIT_CURRENCY_CODE_1,SUM_CREDIT_CURRENCY_CODE_2,SUM_CREDIT_CURRENCY_CODE_3,SUM_CREDIT_ACTIVE_CODE_0,SUM_CREDIT_ACTIVE_CODE_1,SUM_CREDIT_ACTIVE_CODE_2,SUM_CREDIT_ACTIVE_CODE_3,SUM_CREDIT_TYPE_CODE_0,SUM_CREDIT_TYPE_CODE_1,SUM_CREDIT_TYPE_CODE_2,SUM_CREDIT_TYPE_CODE_3,SUM_CREDIT_TYPE_CODE_4,SUM_CREDIT_TYPE_CODE_5,SUM_CREDIT_TYPE_CODE_6,SUM_CREDIT_TYPE_CODE_7,SUM_CREDIT_TYPE_CODE_8,SUM_CREDIT_TYPE_CODE_9,SUM_CREDIT_TYPE_CODE_10,SUM_CREDIT_TYPE_CODE_11,SUM_CREDIT_TYPE_CODE_12,SUM_CREDIT_TYPE_CODE_13,SUM_CREDIT_TYPE_CODE_14,SUM_STATUS_0,SUM_STATUS_1,SUM_STATUS_2,SUM_STATUS_3,SUM_STATUS_4,SUM_STATUS_5,SUM_STATUS_C,SUM_STATUS_X,MED_DAYS_CREDIT,SUM_CREDIT_DAYS_OVERDUE,SUM_DAYS_CREDIT_ENDDATE,SUM_DAYS_ENDDATE_FACT,SUM_AMT_CREDIT_MAX_OVERDUE,SUM_CNT_CREDIT_PROLONG,SUM_AMT_CREDIT_SUM,SUM_AMT_CREDIT_SUM_DEBT,SUM_AMT_CREDIT_SUM_LIMIT,SUM_DAYS_CREDIT_UPDATE
0,-0.100602,-0.466302,-0.475167,0.768915,-0.06795715,-0.08210023,-0.313873,-0.3594746,-0.1805048,-0.518462,-1.220893,-1.212969,-1.175363,-0.9707245,-0.9645809,-0.9068657,0,1,0,1,1,0,-0.577538,-1.333484,0,1,0,-0.726509,-0.7224681,-0.6889908,-1.50688,-0.951317,-0.579154,0.213389,-0.377164,4.163149,5.253007,-0.8583456,-0.8496421,-0.8241475,1,0,-1.145063,-1.134183,-1.070846,-3.006518,-1.317463,-2.129249,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,-1.394668,-1.386822,-1.364761,-1.168163,-1.161247,-1.127794,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,-0.5688307,-0.5665211,-0.5231353,0,0,-1.547211,-1.546294,-1.519122,-1.132894,-1.127239,-1.091746,1,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,-0.3337948,-0.3300117,-0.3156709,-0.6092616,-0.6010822,-0.5745318,0.774465,0.781455,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,-0.149452,2,2,0,0,0,0,-1.133974,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,-0.1305869,-0.1295235,-0.1052773,-2.032588,-2.026462,-1.969733,-0.065001,0.046911,1.002399,0.232668,-0.234145,0.09241,0.013799,0.052774,8.0,110.0,8.0,0.0,0.0,0.0,2.0,0.0,6.0,0.0,0.0,0.0,0.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,45.0,27.0,0.0,0.0,0.0,0.0,23.0,15.0,-1042.5,0.0,-2094.0,-4185.0,8405.145,0.0,865055.565,245781.0,31988.565,-3999.0
1,0.809853,2.075169,2.271216,1.848214,-0.06795715,-0.08210023,-0.313873,-0.3594746,-0.1805048,-1.195657,-0.2835724,-0.2749912,-0.2882069,-0.6693872,-0.6622924,-0.6214169,0,1,0,0,1,0,-0.577538,-0.140277,1,0,0,0.3804806,0.3877292,0.1749049,0.166821,-0.483002,-1.790855,-0.159609,-1.084884,-0.321603,-0.276616,0.01150913,0.02091687,0.06760339,1,0,-1.634439,-1.622001,-1.555725,-1.369223,0.565106,3.252436e-16,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0.6380982,0.6399703,0.6811774,1.108199,1.107338,1.151683,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,-1.030726,-1.030527,-1.001022,0,0,-0.4507666,-0.4414522,-0.4839125,-0.6728109,-0.6665319,-0.6407319,1,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,-0.1860087,-0.1812377,-0.3156709,-0.3987103,-0.3882036,-0.5745318,0.029986,0.034878,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,-1.25275,1,1,0,0,0,0,-0.402976,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0.1737698,0.1714084,0.1738538,0.6638757,0.662203,0.696069,0.885205,1.680583,-0.023711,-1.151526,1.876832,-0.331341,0.026271,0.306438,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,-1.586612,-1.244579,-1.237169,-1.389684,-0.06795715,-0.08210023,-0.313873,-0.3594746,-0.1805048,-1.195657,-1.326137e-16,-1.177077e-16,8.460355000000001e-17,-5.619441e-17,1.544375e-16,-2.247735e-16,0,1,1,1,1,1,-0.577538,-1.333484,0,1,0,3.9281590000000004e-17,-3.443572e-18,-1.4210350000000002e-17,0.689509,-1.301491,-0.306869,-0.175455,-0.20124,-0.321603,-0.276616,6.410866e-17,3.2154750000000004e-17,7.070663000000001e-17,0,0,-8.410677e-18,-1.438546e-16,-1.626449e-16,3.134003e-16,0.217545,1.253633,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1.618672e-16,1.710787e-16,1.054501e-16,1.209482e-16,-1.936134e-16,-1.228873e-16,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,-6.129820000000001e-17,-1.008236e-16,-1.149761e-16,0,0,2.121311e-17,1.710325e-16,-9.801555000000001e-17,-1.146487e-16,-2.346175e-16,-1.671853e-16,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,2.7923910000000004e-17,1.1574720000000002e-17,3.688663e-17,9.200822e-17,2.6784270000000002e-17,-1.531646e-17,-0.714493,-0.711699,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,-0.783451,2,2,0,0,0,0,-1.221089e-16,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1.634213e-15,-5.797342e-16,-1.032054e-15,-1.496838e-16,-1.42392e-15,-4.67503e-16,0.357347,1.974664,-1.596684,-0.063979,0.213628,0.019573,0.012649,0.015731,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,0.312157,-0.735421,-0.665667,-0.310385,-2.7681290000000003e-17,-2.0331730000000002e-17,-1.1876610000000002e-17,-2.2490330000000002e-17,2.212403e-17,0.090996,-1.326137e-16,-1.177077e-16,8.460355000000001e-17,-5.619441e-17,1.544375e-16,-2.247735e-16,0,1,0,1,0,0,-0.577538,-0.140277,1,0,0,3.9281590000000004e-17,-3.443572e-18,-1.4210350000000002e-17,0.680114,1.090232,-0.369143,-0.416807,1.400759,-0.321603,-0.276616,6.410866e-17,3.2154750000000004e-17,7.070663000000001e-17,0,0,-8.410677e-18,-1.438546e-16,-1.626449e-16,3.134003e-16,0.712841,3.252436e-16,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1.618672e-16,1.710787e-16,1.054501e-16,1.209482e-16,-1.936134e-16,-1.228873e-16,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,-6.129820000000001e-17,-1.008236e-16,-1.149761e-16,0,0,2.121311e-17,1.710325e-16,-9.801555000000001e-17,-1.146487e-16,-2.346175e-16,-1.671853e-16,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,2.7923910000000004e-17,1.1574720000000002e-17,3.688663e-17,9.200822e-17,2.6784270000000002e-17,-1.531646e-17,0.774465,0.781455,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,-0.928991,2,2,0,0,0,0,-1.221089e-16,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1.634213e-15,-5.797342e-16,-1.032054e-15,-1.496838e-16,-1.42392e-15,-4.67503e-16,-0.13839,-0.229567,-0.023711,-0.063979,0.22363,0.034344,0.011105,-0.004603,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,-0.335294,-0.161401,0.096335,-0.526244,-0.06795715,-0.08210023,-0.313873,-0.3594746,-0.1805048,-1.195657,-1.326137e-16,-1.177077e-16,8.460355000000001e-17,-5.619441e-17,1.544375e-16,-2.247735e-16,0,1,0,1,0,0,-0.577538,-1.333484,0,1,0,3.9281590000000004e-17,-3.443572e-18,-1.4210350000000002e-17,0.892535,1.089382,0.307263,0.179258,-0.18658,-0.321603,-0.276616,6.410866e-17,3.2154750000000004e-17,7.070663000000001e-17,0,0,-8.410677e-18,-1.438546e-16,-1.626449e-16,3.134003e-16,-1.00419,3.252436e-16,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1.618672e-16,1.710787e-16,1.054501e-16,1.209482e-16,-1.936134e-16,-1.228873e-16,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,-6.129820000000001e-17,-1.008236e-16,-1.149761e-16,1,0,2.121311e-17,1.710325e-16,-9.801555000000001e-17,-1.146487e-16,-2.346175e-16,-1.671853e-16,1,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,2.7923910000000004e-17,1.1574720000000002e-17,3.688663e-17,9.200822e-17,2.6784270000000002e-17,-1.531646e-17,-0.714493,-0.711699,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.56357,2,2,0,1,0,0,-1.221089e-16,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1.634213e-15,-5.797342e-16,-1.032054e-15,-1.496838e-16,-1.42392e-15,-4.67503e-16,0.017241,0.054117,-0.023711,-0.063979,-0.014482,0.004306,0.003445,0.007279,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


Unnamed: 0,AMT_ANNUITY,AMT_CREDIT,AMT_GOODS_PRICE,AMT_INCOME_TOTAL,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_YEAR,APARTMENTS_AVG,APARTMENTS_MEDI,APARTMENTS_MODE,BASEMENTAREA_AVG,BASEMENTAREA_MEDI,BASEMENTAREA_MODE,BOOL_FLAG_EMAIL,BOOL_FLAG_EMP_PHONE,BOOL_FLAG_OWN_CAR,BOOL_FLAG_OWN_REALTY,BOOL_FLAG_PHONE,BOOL_FLAG_WORK_PHONE,CNT_CHILDREN,CNT_FAM_MEMBERS,CODE_GENDER_F,CODE_GENDER_M,CODE_GENDER_XNA,COMMONAREA_AVG,COMMONAREA_MEDI,COMMONAREA_MODE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_ID_PUBLISH,DAYS_LAST_PHONE_CHANGE,DAYS_REGISTRATION,DEF_30_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,ELEVATORS_AVG,ELEVATORS_MEDI,ELEVATORS_MODE,EMERGENCYSTATE_MODE_No,EMERGENCYSTATE_MODE_Yes,ENTRANCES_AVG,ENTRANCES_MEDI,ENTRANCES_MODE,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,FLAG_CONT_MOBILE,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_2,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_MOBIL,FLOORSMAX_AVG,FLOORSMAX_MEDI,FLOORSMAX_MODE,FLOORSMIN_AVG,FLOORSMIN_MEDI,FLOORSMIN_MODE,FONDKAPREMONT_MODE_not specified,FONDKAPREMONT_MODE_org spec account,FONDKAPREMONT_MODE_reg oper account,FONDKAPREMONT_MODE_reg oper spec account,HOUR_APPR_PROCESS_START_0,HOUR_APPR_PROCESS_START_1,HOUR_APPR_PROCESS_START_10,HOUR_APPR_PROCESS_START_11,HOUR_APPR_PROCESS_START_12,HOUR_APPR_PROCESS_START_13,HOUR_APPR_PROCESS_START_14,HOUR_APPR_PROCESS_START_15,HOUR_APPR_PROCESS_START_16,HOUR_APPR_PROCESS_START_17,HOUR_APPR_PROCESS_START_18,HOUR_APPR_PROCESS_START_19,HOUR_APPR_PROCESS_START_2,HOUR_APPR_PROCESS_START_20,HOUR_APPR_PROCESS_START_21,HOUR_APPR_PROCESS_START_22,HOUR_APPR_PROCESS_START_23,HOUR_APPR_PROCESS_START_3,HOUR_APPR_PROCESS_START_4,HOUR_APPR_PROCESS_START_5,HOUR_APPR_PROCESS_START_6,HOUR_APPR_PROCESS_START_7,HOUR_APPR_PROCESS_START_8,HOUR_APPR_PROCESS_START_9,HOUSETYPE_MODE_not specified,HOUSETYPE_MODE_org spec account,HOUSETYPE_MODE_reg oper account,HOUSETYPE_MODE_reg oper spec account,LANDAREA_AVG,LANDAREA_MEDI,LANDAREA_MODE,LIVE_CITY_NOT_WORK_CITY,LIVE_REGION_NOT_WORK_REGION,LIVINGAPARTMENTS_AVG,LIVINGAPARTMENTS_MEDI,LIVINGAPARTMENTS_MODE,LIVINGAREA_AVG,LIVINGAREA_MEDI,LIVINGAREA_MODE,NAME_CONTRACT_TYPE_Cash loans,NAME_CONTRACT_TYPE_Revolving loans,NAME_EDUCATION_TYPE_Academic degree,NAME_EDUCATION_TYPE_Higher education,NAME_EDUCATION_TYPE_Incomplete higher,NAME_EDUCATION_TYPE_Lower secondary,NAME_EDUCATION_TYPE_Secondary / secondary special,NAME_FAMILY_STATUS_Civil marriage,NAME_FAMILY_STATUS_Married,NAME_FAMILY_STATUS_Separated,NAME_FAMILY_STATUS_Single / not married,NAME_FAMILY_STATUS_Unknown,NAME_FAMILY_STATUS_Widow,NAME_HOUSING_TYPE_Co-op apartment,NAME_HOUSING_TYPE_House / apartment,NAME_HOUSING_TYPE_Municipal apartment,NAME_HOUSING_TYPE_Office apartment,NAME_HOUSING_TYPE_Rented apartment,NAME_HOUSING_TYPE_With parents,NAME_INCOME_TYPE_Businessman,NAME_INCOME_TYPE_Commercial associate,NAME_INCOME_TYPE_Maternity leave,NAME_INCOME_TYPE_Pensioner,NAME_INCOME_TYPE_State servant,NAME_INCOME_TYPE_Student,NAME_INCOME_TYPE_Unemployed,NAME_INCOME_TYPE_Working,NAME_TYPE_SUITE_Children,NAME_TYPE_SUITE_Family,NAME_TYPE_SUITE_Group of people,NAME_TYPE_SUITE_Other_A,NAME_TYPE_SUITE_Other_B,"NAME_TYPE_SUITE_Spouse, partner",NAME_TYPE_SUITE_Unaccompanied,NONLIVINGAPARTMENTS_AVG,NONLIVINGAPARTMENTS_MEDI,NONLIVINGAPARTMENTS_MODE,NONLIVINGAREA_AVG,NONLIVINGAREA_MEDI,NONLIVINGAREA_MODE,OBS_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,OCCUPATION_TYPE_Accountants,OCCUPATION_TYPE_Cleaning staff,OCCUPATION_TYPE_Cooking staff,OCCUPATION_TYPE_Core staff,OCCUPATION_TYPE_Drivers,OCCUPATION_TYPE_HR staff,OCCUPATION_TYPE_High skill tech staff,OCCUPATION_TYPE_IT staff,OCCUPATION_TYPE_Laborers,OCCUPATION_TYPE_Low-skill Laborers,OCCUPATION_TYPE_Managers,OCCUPATION_TYPE_Medicine staff,OCCUPATION_TYPE_Private service staff,OCCUPATION_TYPE_Realty agents,OCCUPATION_TYPE_Sales staff,OCCUPATION_TYPE_Secretaries,OCCUPATION_TYPE_Security staff,OCCUPATION_TYPE_Waiters/barmen staff,ORGANIZATION_TYPE_Advertising,ORGANIZATION_TYPE_Agriculture,ORGANIZATION_TYPE_Bank,ORGANIZATION_TYPE_Business Entity Type 1,ORGANIZATION_TYPE_Business Entity Type 2,ORGANIZATION_TYPE_Business Entity Type 3,ORGANIZATION_TYPE_Cleaning,ORGANIZATION_TYPE_Construction,ORGANIZATION_TYPE_Culture,ORGANIZATION_TYPE_Electricity,ORGANIZATION_TYPE_Emergency,ORGANIZATION_TYPE_Government,ORGANIZATION_TYPE_Hotel,ORGANIZATION_TYPE_Housing,ORGANIZATION_TYPE_Industry: type 1,ORGANIZATION_TYPE_Industry: type 10,ORGANIZATION_TYPE_Industry: type 11,ORGANIZATION_TYPE_Industry: type 12,ORGANIZATION_TYPE_Industry: type 13,ORGANIZATION_TYPE_Industry: type 2,ORGANIZATION_TYPE_Industry: type 3,ORGANIZATION_TYPE_Industry: type 4,ORGANIZATION_TYPE_Industry: type 5,ORGANIZATION_TYPE_Industry: type 6,ORGANIZATION_TYPE_Industry: type 7,ORGANIZATION_TYPE_Industry: type 8,ORGANIZATION_TYPE_Industry: type 9,ORGANIZATION_TYPE_Insurance,ORGANIZATION_TYPE_Kindergarten,ORGANIZATION_TYPE_Legal Services,ORGANIZATION_TYPE_Medicine,ORGANIZATION_TYPE_Military,ORGANIZATION_TYPE_Mobile,ORGANIZATION_TYPE_Other,ORGANIZATION_TYPE_Police,ORGANIZATION_TYPE_Postal,ORGANIZATION_TYPE_Realtor,ORGANIZATION_TYPE_Religion,ORGANIZATION_TYPE_Restaurant,ORGANIZATION_TYPE_School,ORGANIZATION_TYPE_Security,ORGANIZATION_TYPE_Security Ministries,ORGANIZATION_TYPE_Self-employed,ORGANIZATION_TYPE_Services,ORGANIZATION_TYPE_Telecom,ORGANIZATION_TYPE_Trade: type 1,ORGANIZATION_TYPE_Trade: type 2,ORGANIZATION_TYPE_Trade: type 3,ORGANIZATION_TYPE_Trade: type 4,ORGANIZATION_TYPE_Trade: type 5,ORGANIZATION_TYPE_Trade: type 6,ORGANIZATION_TYPE_Trade: type 7,ORGANIZATION_TYPE_Transport: type 1,ORGANIZATION_TYPE_Transport: type 2,ORGANIZATION_TYPE_Transport: type 3,ORGANIZATION_TYPE_Transport: type 4,ORGANIZATION_TYPE_University,ORGANIZATION_TYPE_XNA,OWN_CAR_AGE,REGION_POPULATION_RELATIVE,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,TOTALAREA_MODE,WALLSMATERIAL_MODE_Block,WALLSMATERIAL_MODE_Mixed,WALLSMATERIAL_MODE_Monolithic,WALLSMATERIAL_MODE_Others,WALLSMATERIAL_MODE_Panel,"WALLSMATERIAL_MODE_Stone, brick",WALLSMATERIAL_MODE_Wooden,WALLSMATERIAL_MODE_not specified,WEEKDAY_APPR_PROCESS_START_FRIDAY,WEEKDAY_APPR_PROCESS_START_MONDAY,WEEKDAY_APPR_PROCESS_START_SATURDAY,WEEKDAY_APPR_PROCESS_START_SUNDAY,WEEKDAY_APPR_PROCESS_START_THURSDAY,WEEKDAY_APPR_PROCESS_START_TUESDAY,WEEKDAY_APPR_PROCESS_START_WEDNESDAY,YEARS_BEGINEXPLUATATION_AVG,YEARS_BEGINEXPLUATATION_MEDI,YEARS_BEGINEXPLUATATION_MODE,YEARS_BUILD_AVG,YEARS_BUILD_MEDI,YEARS_BUILD_MODE,EXT_SOURCE_2_X_AMT_GOODS_PRICE_X_AMT_ANNUITY,AMT_CREDIT_X_AMT_ANNUITY,EXT_SOURCE_3_X_AMT_GOODS_PRICE,AMT_ANNUITY_X_EXT_SOURCE_1,EXT_SOURCE_2_X_AMT_CREDIT_X_AMT_GOODS_PRICE,PER_CREDIT_INCOME,PER_GOODS_CREDIT,PER_ANNUITY_INCOME,NUM_BUREAU_REPORTS,SUM_BUREAU_BALANCES,SUM_CREDIT_CURRENCY_CODE_0,SUM_CREDIT_CURRENCY_CODE_1,SUM_CREDIT_CURRENCY_CODE_2,SUM_CREDIT_CURRENCY_CODE_3,SUM_CREDIT_ACTIVE_CODE_0,SUM_CREDIT_ACTIVE_CODE_1,SUM_CREDIT_ACTIVE_CODE_2,SUM_CREDIT_ACTIVE_CODE_3,SUM_CREDIT_TYPE_CODE_0,SUM_CREDIT_TYPE_CODE_1,SUM_CREDIT_TYPE_CODE_2,SUM_CREDIT_TYPE_CODE_3,SUM_CREDIT_TYPE_CODE_4,SUM_CREDIT_TYPE_CODE_5,SUM_CREDIT_TYPE_CODE_6,SUM_CREDIT_TYPE_CODE_7,SUM_CREDIT_TYPE_CODE_8,SUM_CREDIT_TYPE_CODE_9,SUM_CREDIT_TYPE_CODE_10,SUM_CREDIT_TYPE_CODE_11,SUM_CREDIT_TYPE_CODE_12,SUM_CREDIT_TYPE_CODE_13,SUM_CREDIT_TYPE_CODE_14,SUM_STATUS_0,SUM_STATUS_1,SUM_STATUS_2,SUM_STATUS_3,SUM_STATUS_4,SUM_STATUS_5,SUM_STATUS_C,SUM_STATUS_X,MED_DAYS_CREDIT,SUM_CREDIT_DAYS_OVERDUE,SUM_DAYS_CREDIT_ENDDATE,SUM_DAYS_ENDDATE_FACT,SUM_AMT_CREDIT_MAX_OVERDUE,SUM_CNT_CREDIT_PROLONG,SUM_AMT_CREDIT_SUM,SUM_AMT_CREDIT_SUM_DEBT,SUM_AMT_CREDIT_SUM_LIMIT,SUM_DAYS_CREDIT_UPDATE
0,-0.443327,-0.001503,-0.125916,-0.310385,-0.067957,-0.0821,-0.313873,-0.359475,-0.180505,-1.195657,-0.677194,-0.669517,-0.62089,-0.554502,-0.547045,-0.485138,1,1,0,1,0,0,-0.577538,-0.140277,1,0,0,0.071953,0.0676,0.06534,0.734193,0.486777,-1.445696,0.952072,0.060346,-0.321603,-0.276616,0.067705,0.06584,0.067271,1,0,-0.167731,-0.159959,-0.102494,1.796945,1.442258,-2.013788,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,-0.987919,-0.981269,-0.955377,0.071357,0.067752,0.063421,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0.016599,0.017191,0.018356,0,0,0.098132,0.096983,0.094974,-0.7292,-0.722069,-0.676205,1,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0.016022,0.012506,0.010989,0.022119,0.022573,0.02422,-0.714493,-0.711699,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-30969400000000.0,-0.14591,2,2,0,0,0,0,-0.81958,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,-0.106993,-0.106195,-0.083639,-0.020351,-0.021624,-0.020557,0.058677,0.000666,0.233456,-0.845295,-0.02544,-0.065886,-0.060218,-0.052351,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,-0.707449,-0.993075,-1.078418,-0.886011,-0.067957,-0.0821,-0.313873,-0.359475,-0.180505,0.835928,0.065128,0.06478,0.063989,0.030572,0.029742,0.026793,0,1,0,1,0,0,-0.577538,-0.140277,0,1,0,0.071953,0.0676,0.06534,0.464485,2.305643,-0.908413,-1.168898,1.195227,-0.321603,-0.276616,0.067705,0.06584,0.067271,0,0,0.029115,0.028101,0.027659,0.450952,-1.167051,-0.44646,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0.072418,0.070578,0.069458,0.071357,0.067752,0.063421,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0.016599,0.017191,0.018356,0,0,0.098132,0.096983,0.094974,0.062633,0.060094,0.059693,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0.016022,0.012506,0.010989,0.022119,0.022573,0.02422,-0.714493,-0.711699,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-30969400000000.0,1.078997,2,2,0,0,0,0,0.05928,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0.025795,0.024956,0.026549,-0.020351,-0.021624,-0.020557,-0.778163,0.70255,0.464595,-0.37687,-0.918395,-0.065674,-0.060237,-0.052378,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,0.146197,0.269189,0.509086,0.768915,-0.067957,-0.0821,-0.313873,0.99461,-0.180505,1.513123,0.065128,0.06478,0.063989,0.030572,0.029742,0.026793,0,1,1,1,0,0,-0.577538,-0.140277,0,1,0,0.071953,0.0676,0.06534,0.916824,2.296293,0.337075,-0.125479,-0.800588,-0.321603,-0.276616,0.067705,0.06584,0.067271,0,0,0.029115,0.028101,0.027659,-0.006815,0.971389,0.573978,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0.072418,0.070578,0.069458,0.071357,0.067752,0.063421,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0.016599,0.017191,0.018356,0,0,0.098132,0.096983,0.094974,0.062633,0.060094,0.059693,1,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0.016022,0.012506,0.010989,0.022119,0.022573,0.02422,-0.714493,-0.711699,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,-30969400000000.0,-0.127762,2,2,0,0,0,0,0.05928,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0.025795,0.024956,0.026549,-0.020351,-0.021624,-0.020557,0.051599,0.039354,0.272641,-0.064956,0.069451,-0.065557,-0.060301,-0.052253,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,1.912533,-0.290866,-0.205291,2.567746,-0.067957,-0.0821,-0.313873,-0.359475,-0.180505,0.835928,2.471782,2.485382,2.59637,2.052065,2.06775,2.161249,0,1,0,1,1,0,2.192084,2.246138,1,0,0,1.722286,1.739586,1.836619,-0.472274,0.093256,0.804133,1.031303,-0.850893,-0.321603,-0.276616,2.621073,2.632594,2.740643,1,0,1.789771,1.791316,1.83702,0.169333,-0.024708,0.583796,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1.450619,1.450104,1.498964,-2.078489,-2.068462,-2.039365,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2.664438,2.685336,2.762572,0,0,2.681107,2.707917,2.850552,3.330809,3.348558,3.50581,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1.128909,1.150099,1.204761,1.109525,1.138136,1.226307,-0.714493,-0.711699,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-30969400000000.0,0.399377,2,2,0,0,0,0,3.460315,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0.454533,0.449012,0.431347,3.152919,3.144285,3.156327,-0.002356,-0.556291,-0.145261,0.253649,-0.026689,-0.069892,-0.06026,-0.052618,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,0.509224,0.160974,0.493211,0.409148,-0.050449,-0.055069,-0.302958,0.381078,-0.165863,0.147741,0.065128,0.06478,0.063989,0.030572,0.029742,0.026793,0,1,1,0,0,1,0.807273,1.052931,0,1,0,0.071953,0.0676,0.06534,-0.686757,0.369485,0.839908,-0.168142,-0.275979,-0.321603,-0.276616,0.067705,0.06584,0.067271,0,0,0.029115,0.028101,0.027659,-2.152052,-0.46478,-0.061602,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0.072418,0.070578,0.069458,0.071357,0.067752,0.063421,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0.016599,0.017191,0.018356,1,0,0.098132,0.096983,0.094974,0.062633,0.060094,0.059693,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0.016022,0.012506,0.010989,0.022119,0.022573,0.02422,-0.714493,-0.711699,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-30969400000000.0,-0.783451,2,2,0,1,0,0,0.05928,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0.025795,0.024956,0.026549,-0.020351,-0.021624,-0.020557,-0.111333,0.081972,-0.054525,-1.138783,-0.051993,-0.065704,-0.060319,-0.052383,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [28]:
# Load the list of features indentified in the exploration step which need preprocessing
#non_numeric_features = pd.read_csv('data/tmp/bureau_non_numeric_features.csv', header=0, index_col=0, names=["feature"])
non_numeric_features = []
bureau_numeric_features = pd.read_csv('data/tmp/bureau_numeric_features.csv', header=0, index_col=0, names=["feature"])
application_numeric_features = pd.read_csv('data/tmp/numeric_features.csv', header=0, index_col=0, names=["feature"])
#string_to_bool_features = pd.read_csv('data/tmp/bureau_string_to_bool.csv', header=0, index_col=0, names=["feature"])
string_to_bool_features = []
log_transform_features = pd.read_csv('data/tmp/bureau_log_transform.csv', header=0, index_col=0, names=["feature"])

In [25]:
display(numeric_features.head)

<bound method NDFrame.head of                        feature
0           NUM_BUREAU_REPORTS
1          SUM_BUREAU_BALANCES
2   SUM_CREDIT_CURRENCY_CODE_0
3   SUM_CREDIT_CURRENCY_CODE_1
4   SUM_CREDIT_CURRENCY_CODE_2
5   SUM_CREDIT_CURRENCY_CODE_3
6     SUM_CREDIT_ACTIVE_CODE_0
7     SUM_CREDIT_ACTIVE_CODE_1
8     SUM_CREDIT_ACTIVE_CODE_2
9     SUM_CREDIT_ACTIVE_CODE_3
10      SUM_CREDIT_TYPE_CODE_0
11      SUM_CREDIT_TYPE_CODE_1
12      SUM_CREDIT_TYPE_CODE_2
13      SUM_CREDIT_TYPE_CODE_3
14      SUM_CREDIT_TYPE_CODE_4
15      SUM_CREDIT_TYPE_CODE_5
16      SUM_CREDIT_TYPE_CODE_6
17      SUM_CREDIT_TYPE_CODE_7
18      SUM_CREDIT_TYPE_CODE_8
19      SUM_CREDIT_TYPE_CODE_9
20     SUM_CREDIT_TYPE_CODE_10
21     SUM_CREDIT_TYPE_CODE_11
22     SUM_CREDIT_TYPE_CODE_12
23     SUM_CREDIT_TYPE_CODE_13
24     SUM_CREDIT_TYPE_CODE_14
25                SUM_STATUS_0
26                SUM_STATUS_1
27                SUM_STATUS_2
28                SUM_STATUS_3
29                SUM_STATUS_4
30       

In [8]:
print("Non-Numeric {0}, Numeric {1}, Bool {2}, Log Transform {3}".format(\
        len(non_numeric_features), len(numeric_features), \
        len(string_to_bool_features), len(log_transform_features)))

Non-Numeric 0, Numeric 42, Bool 0, Log Transform 1


In [9]:
''' 
Applies an in-place transformation that converts a Y/N field to binary 1/0
'''
def make_bool(dataFrame, featureName): 
    
    lb = LabelBinarizer()
    bool_name = "BOOL_{0}".format(featureName)    
    dataFrame[bool_name] = lb.fit_transform(dataFrame[featureName])
    
    # This seems to work more consistently than df.drop
    # From: https://stackoverflow.com/questions/43838198/df-drop-is-not-working
    del(dataFrame[featureName])    
    #result = dataFrame.drop(featureName, axis=1) 
    
    return dataFrame

In [10]:
''' 
Applies an in-place transformation that converts NaN, Inf and -Inf to numeric values
'''
def make_numeric(dataFrame, featureName): 
        
    return np.nan_to_num(dataFrame[featureName])

In [11]:
'''
Applies an in-place abs() to numeric features
'''
def abs_transform(dataFrame, featureName):     
    
    transformed = dataFrame[featureName].apply(lambda x: abs(x))
        
    dataFrame[featureName] = transformed
    
    return dataFrame

In [12]:
'''
Applies an in-place log transformation to numeric features
'''
def log_transform(dataFrame, featureName):     
    
    transformed = dataFrame[featureName].apply(lambda x: np.log(x + 1))
    
    transformed_name = "LOG_{0}".format(featureName)

    print("Transformed Name: {0}".format(transformed_name))
    
    dataFrame[transformed_name] = transformed

    # This seems to work more consistently than df.drop
    # From: https://stackoverflow.com/questions/43838198/df-drop-is-not-working
    del(dataFrame[featureName])
    #result = dataFrame.drop(featureName, axis=1)
    
    return dataFrame

In [13]:
'''
Applies an in-place transformation that offsets all values in a column by the minimum value in the column
'''

def find_offset(dataFrames, featureName): 
    
    offset = 0
    
    for df in dataFrames: 
        if (np.min(df[featureName]) < offset): 
            offset = np.min(df[featureName])

    return abs(offset) + 1
            
            
def offset_negative_values(dataFrame, featureName, offset): 
    print("Offsetting Feature {0} by {1}".format(featureName, offset))
    return dataFrame[featureName].apply(lambda x: (x + offset)), offset

In [14]:
# Tukey's Method for detecting outliers
# Adapted from the customer segments exercise

def return_outliers(dataFrame, featureList): 
    
    outliers = []
    count = 0
    
    frame_stats = []

    for feature_count, feature in enumerate(featureList): 

        feature_stats = {}

        dataFrame[feature] = np.nan_to_num(dataFrame[feature])

        # Calculate Q1 (25th percentile of the data) for the given feature
        Q1 = np.percentile(dataFrame[feature], 25)

        # Calculate Q3 (75th percentile of the data) for the given feature
        Q3 = np.percentile(dataFrame[feature], 75)

        # TODO: Use the interquartile range to calculate an outlier step
        step = (Q3 - Q1) * 1.5

        feature_stats['Q1_percentile'] = Q1
        feature_stats['Q3_percentile'] = Q3
        feature_stats['step'] = step
        feature_stats['feature'] = feature
        feature_stats['feature_min'] = np.min(dataFrame[feature])
        feature_stats['feature_max'] = np.max(dataFrame[feature])
        feature_stats['feature_median'] = np.median(dataFrame[feature])

        # Calculate the median of the interquartile range
        # TODO: This just sounds good intuitively.  It's probably worth researching the actual best practice.
        feature_stats['IQ_median'] = np.median([Q1, Q3])

        # Save a list of the outliers
        feature_stats['outliers'] = dataFrame.index[~((dataFrame[feature] >= Q1 - step) & (dataFrame[feature] <= Q3 + step))]

        # Just keep a count of the number of fields we identified
        count += len(feature_stats['outliers'])

        frame_stats.append(feature_stats)

    outliers.append(frame_stats)   
        
    print("Total Outliers Identified: {0}".format(count))
        
    return outliers

In [15]:
def replace_outliers(dataFrame, outliers):
    
    df = dataFrame.copy()
    
    count = 0
    
    for ol in outliers: 

        for feature_count, feature in enumerate(ol): 

            # Some features have infrequent but meaningful values, and averaging to 0 would be a problem.
            # We'll just skip these when they come up.
            # example: "How many credit applications were made in the past hour?" 
            if(feature['IQ_median'] >= 1): 

                featureName = feature['feature']
                print("Processing feature {0}, max: {1}, min {2}, mean {3}]".format(featureName, \
                        np.max(df[featureName]), np.min(df[featureName]), np.mean(df[featureName])))
            
                try: 
                    df.iloc[feature['outliers'], df.columns.get_loc(featureName)] = feature['IQ_median']
                    count += len(feature['outliers'])
                except Exception as e:
                        print("Error: {0}".format(str(e)))
 
                print("Processing Complete for {0}, max: {1}, min {2}, mean {3}]".format(featureName, \
                        np.max(df[featureName]), np.min(df[featureName]), np.mean(df[featureName])))
 

    print("Total Outliers Modified: {0}".format(count))
    return df

## Preprocessing

### Transform Date Fields to Positive Integers

In [16]:
application_train = abs_transform(application_train, 'SUM_DAYS_ENDDATE_FACT')
application_test = abs_transform(application_test, 'SUM_DAYS_ENDDATE_FACT')

application_train = abs_transform(application_train, 'SUM_DAYS_CREDIT_ENDDATE')
application_test = abs_transform(application_test, 'SUM_DAYS_CREDIT_ENDDATE')

application_train = abs_transform(application_train, 'SUM_DAYS_CREDIT_UPDATE')
application_test = abs_transform(application_test, 'SUM_DAYS_CREDIT_UPDATE')

In [17]:
display(application_train.head())

Unnamed: 0,AMT_ANNUITY,AMT_CREDIT,AMT_GOODS_PRICE,AMT_INCOME_TOTAL,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_YEAR,APARTMENTS_AVG,APARTMENTS_MEDI,APARTMENTS_MODE,BASEMENTAREA_AVG,BASEMENTAREA_MEDI,BASEMENTAREA_MODE,BOOL_FLAG_EMAIL,BOOL_FLAG_EMP_PHONE,BOOL_FLAG_OWN_CAR,BOOL_FLAG_OWN_REALTY,BOOL_FLAG_PHONE,BOOL_FLAG_WORK_PHONE,CNT_CHILDREN,CNT_FAM_MEMBERS,CODE_GENDER_F,CODE_GENDER_M,CODE_GENDER_XNA,COMMONAREA_AVG,COMMONAREA_MEDI,COMMONAREA_MODE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_ID_PUBLISH,DAYS_LAST_PHONE_CHANGE,DAYS_REGISTRATION,DEF_30_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,ELEVATORS_AVG,ELEVATORS_MEDI,ELEVATORS_MODE,EMERGENCYSTATE_MODE_No,EMERGENCYSTATE_MODE_Yes,ENTRANCES_AVG,ENTRANCES_MEDI,ENTRANCES_MODE,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,FLAG_CONT_MOBILE,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_2,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_MOBIL,FLOORSMAX_AVG,FLOORSMAX_MEDI,FLOORSMAX_MODE,FLOORSMIN_AVG,FLOORSMIN_MEDI,FLOORSMIN_MODE,FONDKAPREMONT_MODE_not specified,FONDKAPREMONT_MODE_org spec account,FONDKAPREMONT_MODE_reg oper account,FONDKAPREMONT_MODE_reg oper spec account,HOUR_APPR_PROCESS_START_0,HOUR_APPR_PROCESS_START_1,HOUR_APPR_PROCESS_START_10,HOUR_APPR_PROCESS_START_11,HOUR_APPR_PROCESS_START_12,HOUR_APPR_PROCESS_START_13,HOUR_APPR_PROCESS_START_14,HOUR_APPR_PROCESS_START_15,HOUR_APPR_PROCESS_START_16,HOUR_APPR_PROCESS_START_17,HOUR_APPR_PROCESS_START_18,HOUR_APPR_PROCESS_START_19,HOUR_APPR_PROCESS_START_2,HOUR_APPR_PROCESS_START_20,HOUR_APPR_PROCESS_START_21,HOUR_APPR_PROCESS_START_22,HOUR_APPR_PROCESS_START_23,HOUR_APPR_PROCESS_START_3,HOUR_APPR_PROCESS_START_4,HOUR_APPR_PROCESS_START_5,HOUR_APPR_PROCESS_START_6,HOUR_APPR_PROCESS_START_7,HOUR_APPR_PROCESS_START_8,HOUR_APPR_PROCESS_START_9,HOUSETYPE_MODE_not specified,HOUSETYPE_MODE_org spec account,HOUSETYPE_MODE_reg oper account,HOUSETYPE_MODE_reg oper spec account,LANDAREA_AVG,LANDAREA_MEDI,LANDAREA_MODE,LIVE_CITY_NOT_WORK_CITY,LIVE_REGION_NOT_WORK_REGION,LIVINGAPARTMENTS_AVG,LIVINGAPARTMENTS_MEDI,LIVINGAPARTMENTS_MODE,LIVINGAREA_AVG,LIVINGAREA_MEDI,LIVINGAREA_MODE,NAME_CONTRACT_TYPE_Cash loans,NAME_CONTRACT_TYPE_Revolving loans,NAME_EDUCATION_TYPE_Academic degree,NAME_EDUCATION_TYPE_Higher education,NAME_EDUCATION_TYPE_Incomplete higher,NAME_EDUCATION_TYPE_Lower secondary,NAME_EDUCATION_TYPE_Secondary / secondary special,NAME_FAMILY_STATUS_Civil marriage,NAME_FAMILY_STATUS_Married,NAME_FAMILY_STATUS_Separated,NAME_FAMILY_STATUS_Single / not married,NAME_FAMILY_STATUS_Unknown,NAME_FAMILY_STATUS_Widow,NAME_HOUSING_TYPE_Co-op apartment,NAME_HOUSING_TYPE_House / apartment,NAME_HOUSING_TYPE_Municipal apartment,NAME_HOUSING_TYPE_Office apartment,NAME_HOUSING_TYPE_Rented apartment,NAME_HOUSING_TYPE_With parents,NAME_INCOME_TYPE_Businessman,NAME_INCOME_TYPE_Commercial associate,NAME_INCOME_TYPE_Maternity leave,NAME_INCOME_TYPE_Pensioner,NAME_INCOME_TYPE_State servant,NAME_INCOME_TYPE_Student,NAME_INCOME_TYPE_Unemployed,NAME_INCOME_TYPE_Working,NAME_TYPE_SUITE_Children,NAME_TYPE_SUITE_Family,NAME_TYPE_SUITE_Group of people,NAME_TYPE_SUITE_Other_A,NAME_TYPE_SUITE_Other_B,"NAME_TYPE_SUITE_Spouse, partner",NAME_TYPE_SUITE_Unaccompanied,NONLIVINGAPARTMENTS_AVG,NONLIVINGAPARTMENTS_MEDI,NONLIVINGAPARTMENTS_MODE,NONLIVINGAREA_AVG,NONLIVINGAREA_MEDI,NONLIVINGAREA_MODE,OBS_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,OCCUPATION_TYPE_Accountants,OCCUPATION_TYPE_Cleaning staff,OCCUPATION_TYPE_Cooking staff,OCCUPATION_TYPE_Core staff,OCCUPATION_TYPE_Drivers,OCCUPATION_TYPE_HR staff,OCCUPATION_TYPE_High skill tech staff,OCCUPATION_TYPE_IT staff,OCCUPATION_TYPE_Laborers,OCCUPATION_TYPE_Low-skill Laborers,OCCUPATION_TYPE_Managers,OCCUPATION_TYPE_Medicine staff,OCCUPATION_TYPE_Private service staff,OCCUPATION_TYPE_Realty agents,OCCUPATION_TYPE_Sales staff,OCCUPATION_TYPE_Secretaries,OCCUPATION_TYPE_Security staff,OCCUPATION_TYPE_Waiters/barmen staff,ORGANIZATION_TYPE_Advertising,ORGANIZATION_TYPE_Agriculture,ORGANIZATION_TYPE_Bank,ORGANIZATION_TYPE_Business Entity Type 1,ORGANIZATION_TYPE_Business Entity Type 2,ORGANIZATION_TYPE_Business Entity Type 3,ORGANIZATION_TYPE_Cleaning,ORGANIZATION_TYPE_Construction,ORGANIZATION_TYPE_Culture,ORGANIZATION_TYPE_Electricity,ORGANIZATION_TYPE_Emergency,ORGANIZATION_TYPE_Government,ORGANIZATION_TYPE_Hotel,ORGANIZATION_TYPE_Housing,ORGANIZATION_TYPE_Industry: type 1,ORGANIZATION_TYPE_Industry: type 10,ORGANIZATION_TYPE_Industry: type 11,ORGANIZATION_TYPE_Industry: type 12,ORGANIZATION_TYPE_Industry: type 13,ORGANIZATION_TYPE_Industry: type 2,ORGANIZATION_TYPE_Industry: type 3,ORGANIZATION_TYPE_Industry: type 4,ORGANIZATION_TYPE_Industry: type 5,ORGANIZATION_TYPE_Industry: type 6,ORGANIZATION_TYPE_Industry: type 7,ORGANIZATION_TYPE_Industry: type 8,ORGANIZATION_TYPE_Industry: type 9,ORGANIZATION_TYPE_Insurance,ORGANIZATION_TYPE_Kindergarten,ORGANIZATION_TYPE_Legal Services,ORGANIZATION_TYPE_Medicine,ORGANIZATION_TYPE_Military,ORGANIZATION_TYPE_Mobile,ORGANIZATION_TYPE_Other,ORGANIZATION_TYPE_Police,ORGANIZATION_TYPE_Postal,ORGANIZATION_TYPE_Realtor,ORGANIZATION_TYPE_Religion,ORGANIZATION_TYPE_Restaurant,ORGANIZATION_TYPE_School,ORGANIZATION_TYPE_Security,ORGANIZATION_TYPE_Security Ministries,ORGANIZATION_TYPE_Self-employed,ORGANIZATION_TYPE_Services,ORGANIZATION_TYPE_Telecom,ORGANIZATION_TYPE_Trade: type 1,ORGANIZATION_TYPE_Trade: type 2,ORGANIZATION_TYPE_Trade: type 3,ORGANIZATION_TYPE_Trade: type 4,ORGANIZATION_TYPE_Trade: type 5,ORGANIZATION_TYPE_Trade: type 6,ORGANIZATION_TYPE_Trade: type 7,ORGANIZATION_TYPE_Transport: type 1,ORGANIZATION_TYPE_Transport: type 2,ORGANIZATION_TYPE_Transport: type 3,ORGANIZATION_TYPE_Transport: type 4,ORGANIZATION_TYPE_University,ORGANIZATION_TYPE_XNA,OWN_CAR_AGE,REGION_POPULATION_RELATIVE,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,TOTALAREA_MODE,WALLSMATERIAL_MODE_Block,WALLSMATERIAL_MODE_Mixed,WALLSMATERIAL_MODE_Monolithic,WALLSMATERIAL_MODE_Others,WALLSMATERIAL_MODE_Panel,"WALLSMATERIAL_MODE_Stone, brick",WALLSMATERIAL_MODE_Wooden,WALLSMATERIAL_MODE_not specified,WEEKDAY_APPR_PROCESS_START_FRIDAY,WEEKDAY_APPR_PROCESS_START_MONDAY,WEEKDAY_APPR_PROCESS_START_SATURDAY,WEEKDAY_APPR_PROCESS_START_SUNDAY,WEEKDAY_APPR_PROCESS_START_THURSDAY,WEEKDAY_APPR_PROCESS_START_TUESDAY,WEEKDAY_APPR_PROCESS_START_WEDNESDAY,YEARS_BEGINEXPLUATATION_AVG,YEARS_BEGINEXPLUATATION_MEDI,YEARS_BEGINEXPLUATATION_MODE,YEARS_BUILD_AVG,YEARS_BUILD_MEDI,YEARS_BUILD_MODE,EXT_SOURCE_2_X_AMT_GOODS_PRICE_X_AMT_ANNUITY,AMT_CREDIT_X_AMT_ANNUITY,EXT_SOURCE_3_X_AMT_GOODS_PRICE,AMT_ANNUITY_X_EXT_SOURCE_1,EXT_SOURCE_2_X_AMT_CREDIT_X_AMT_GOODS_PRICE,PER_CREDIT_INCOME,PER_GOODS_CREDIT,PER_ANNUITY_INCOME,NUM_BUREAU_REPORTS,SUM_BUREAU_BALANCES,SUM_CREDIT_CURRENCY_CODE_0,SUM_CREDIT_CURRENCY_CODE_1,SUM_CREDIT_CURRENCY_CODE_2,SUM_CREDIT_CURRENCY_CODE_3,SUM_CREDIT_ACTIVE_CODE_0,SUM_CREDIT_ACTIVE_CODE_1,SUM_CREDIT_ACTIVE_CODE_2,SUM_CREDIT_ACTIVE_CODE_3,SUM_CREDIT_TYPE_CODE_0,SUM_CREDIT_TYPE_CODE_1,SUM_CREDIT_TYPE_CODE_2,SUM_CREDIT_TYPE_CODE_3,SUM_CREDIT_TYPE_CODE_4,SUM_CREDIT_TYPE_CODE_5,SUM_CREDIT_TYPE_CODE_6,SUM_CREDIT_TYPE_CODE_7,SUM_CREDIT_TYPE_CODE_8,SUM_CREDIT_TYPE_CODE_9,SUM_CREDIT_TYPE_CODE_10,SUM_CREDIT_TYPE_CODE_11,SUM_CREDIT_TYPE_CODE_12,SUM_CREDIT_TYPE_CODE_13,SUM_CREDIT_TYPE_CODE_14,SUM_STATUS_0,SUM_STATUS_1,SUM_STATUS_2,SUM_STATUS_3,SUM_STATUS_4,SUM_STATUS_5,SUM_STATUS_C,SUM_STATUS_X,MED_DAYS_CREDIT,SUM_CREDIT_DAYS_OVERDUE,SUM_DAYS_CREDIT_ENDDATE,SUM_DAYS_ENDDATE_FACT,SUM_AMT_CREDIT_MAX_OVERDUE,SUM_CNT_CREDIT_PROLONG,SUM_AMT_CREDIT_SUM,SUM_AMT_CREDIT_SUM_DEBT,SUM_AMT_CREDIT_SUM_LIMIT,SUM_DAYS_CREDIT_UPDATE
0,-0.100602,-0.466302,-0.475167,0.768915,-0.06795715,-0.08210023,-0.313873,-0.3594746,-0.1805048,-0.518462,-1.220893,-1.212969,-1.175363,-0.9707245,-0.9645809,-0.9068657,0,1,0,1,1,0,-0.577538,-1.333484,0,1,0,-0.726509,-0.7224681,-0.6889908,-1.50688,-0.951317,-0.579154,0.213389,-0.377164,4.163149,5.253007,-0.8583456,-0.8496421,-0.8241475,1,0,-1.145063,-1.134183,-1.070846,-3.006518,-1.317463,-2.129249,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,-1.394668,-1.386822,-1.364761,-1.168163,-1.161247,-1.127794,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,-0.5688307,-0.5665211,-0.5231353,0,0,-1.547211,-1.546294,-1.519122,-1.132894,-1.127239,-1.091746,1,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,-0.3337948,-0.3300117,-0.3156709,-0.6092616,-0.6010822,-0.5745318,0.774465,0.781455,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,-0.149452,2,2,0,0,0,0,-1.133974,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,-0.1305869,-0.1295235,-0.1052773,-2.032588,-2.026462,-1.969733,-0.065001,0.046911,1.002399,0.232668,-0.234145,0.09241,0.013799,0.052774,8.0,110.0,8.0,0.0,0.0,0.0,2.0,0.0,6.0,0.0,0.0,0.0,0.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,45.0,27.0,0.0,0.0,0.0,0.0,23.0,15.0,-1042.5,0.0,2094.0,4185.0,8405.145,0.0,865055.565,245781.0,31988.565,3999.0
1,0.809853,2.075169,2.271216,1.848214,-0.06795715,-0.08210023,-0.313873,-0.3594746,-0.1805048,-1.195657,-0.2835724,-0.2749912,-0.2882069,-0.6693872,-0.6622924,-0.6214169,0,1,0,0,1,0,-0.577538,-0.140277,1,0,0,0.3804806,0.3877292,0.1749049,0.166821,-0.483002,-1.790855,-0.159609,-1.084884,-0.321603,-0.276616,0.01150913,0.02091687,0.06760339,1,0,-1.634439,-1.622001,-1.555725,-1.369223,0.565106,3.252436e-16,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0.6380982,0.6399703,0.6811774,1.108199,1.107338,1.151683,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,-1.030726,-1.030527,-1.001022,0,0,-0.4507666,-0.4414522,-0.4839125,-0.6728109,-0.6665319,-0.6407319,1,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,-0.1860087,-0.1812377,-0.3156709,-0.3987103,-0.3882036,-0.5745318,0.029986,0.034878,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,-1.25275,1,1,0,0,0,0,-0.402976,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0.1737698,0.1714084,0.1738538,0.6638757,0.662203,0.696069,0.885205,1.680583,-0.023711,-1.151526,1.876832,-0.331341,0.026271,0.306438,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,-1.586612,-1.244579,-1.237169,-1.389684,-0.06795715,-0.08210023,-0.313873,-0.3594746,-0.1805048,-1.195657,-1.326137e-16,-1.177077e-16,8.460355000000001e-17,-5.619441e-17,1.544375e-16,-2.247735e-16,0,1,1,1,1,1,-0.577538,-1.333484,0,1,0,3.9281590000000004e-17,-3.443572e-18,-1.4210350000000002e-17,0.689509,-1.301491,-0.306869,-0.175455,-0.20124,-0.321603,-0.276616,6.410866e-17,3.2154750000000004e-17,7.070663000000001e-17,0,0,-8.410677e-18,-1.438546e-16,-1.626449e-16,3.134003e-16,0.217545,1.253633,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1.618672e-16,1.710787e-16,1.054501e-16,1.209482e-16,-1.936134e-16,-1.228873e-16,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,-6.129820000000001e-17,-1.008236e-16,-1.149761e-16,0,0,2.121311e-17,1.710325e-16,-9.801555000000001e-17,-1.146487e-16,-2.346175e-16,-1.671853e-16,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,2.7923910000000004e-17,1.1574720000000002e-17,3.688663e-17,9.200822e-17,2.6784270000000002e-17,-1.531646e-17,-0.714493,-0.711699,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,-0.783451,2,2,0,0,0,0,-1.221089e-16,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1.634213e-15,-5.797342e-16,-1.032054e-15,-1.496838e-16,-1.42392e-15,-4.67503e-16,0.357347,1.974664,-1.596684,-0.063979,0.213628,0.019573,0.012649,0.015731,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,0.312157,-0.735421,-0.665667,-0.310385,-2.7681290000000003e-17,-2.0331730000000002e-17,-1.1876610000000002e-17,-2.2490330000000002e-17,2.212403e-17,0.090996,-1.326137e-16,-1.177077e-16,8.460355000000001e-17,-5.619441e-17,1.544375e-16,-2.247735e-16,0,1,0,1,0,0,-0.577538,-0.140277,1,0,0,3.9281590000000004e-17,-3.443572e-18,-1.4210350000000002e-17,0.680114,1.090232,-0.369143,-0.416807,1.400759,-0.321603,-0.276616,6.410866e-17,3.2154750000000004e-17,7.070663000000001e-17,0,0,-8.410677e-18,-1.438546e-16,-1.626449e-16,3.134003e-16,0.712841,3.252436e-16,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1.618672e-16,1.710787e-16,1.054501e-16,1.209482e-16,-1.936134e-16,-1.228873e-16,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,-6.129820000000001e-17,-1.008236e-16,-1.149761e-16,0,0,2.121311e-17,1.710325e-16,-9.801555000000001e-17,-1.146487e-16,-2.346175e-16,-1.671853e-16,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,2.7923910000000004e-17,1.1574720000000002e-17,3.688663e-17,9.200822e-17,2.6784270000000002e-17,-1.531646e-17,0.774465,0.781455,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,-0.928991,2,2,0,0,0,0,-1.221089e-16,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1.634213e-15,-5.797342e-16,-1.032054e-15,-1.496838e-16,-1.42392e-15,-4.67503e-16,-0.13839,-0.229567,-0.023711,-0.063979,0.22363,0.034344,0.011105,-0.004603,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,-0.335294,-0.161401,0.096335,-0.526244,-0.06795715,-0.08210023,-0.313873,-0.3594746,-0.1805048,-1.195657,-1.326137e-16,-1.177077e-16,8.460355000000001e-17,-5.619441e-17,1.544375e-16,-2.247735e-16,0,1,0,1,0,0,-0.577538,-1.333484,0,1,0,3.9281590000000004e-17,-3.443572e-18,-1.4210350000000002e-17,0.892535,1.089382,0.307263,0.179258,-0.18658,-0.321603,-0.276616,6.410866e-17,3.2154750000000004e-17,7.070663000000001e-17,0,0,-8.410677e-18,-1.438546e-16,-1.626449e-16,3.134003e-16,-1.00419,3.252436e-16,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1.618672e-16,1.710787e-16,1.054501e-16,1.209482e-16,-1.936134e-16,-1.228873e-16,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,-6.129820000000001e-17,-1.008236e-16,-1.149761e-16,1,0,2.121311e-17,1.710325e-16,-9.801555000000001e-17,-1.146487e-16,-2.346175e-16,-1.671853e-16,1,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,2.7923910000000004e-17,1.1574720000000002e-17,3.688663e-17,9.200822e-17,2.6784270000000002e-17,-1.531646e-17,-0.714493,-0.711699,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.56357,2,2,0,1,0,0,-1.221089e-16,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1.634213e-15,-5.797342e-16,-1.032054e-15,-1.496838e-16,-1.42392e-15,-4.67503e-16,0.017241,0.054117,-0.023711,-0.063979,-0.014482,0.004306,0.003445,0.007279,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


### Remove Outliers

In [18]:
numeric = [] 

for feature in log_transform_features['feature']:
    numeric.append("{0}".format(feature))

for feature in numeric_features['feature']: 
    numeric.append("{0}".format(feature))

if use_replace_outliers is True: 
    print("Replacing Outliers")
    print("Before: AMT_INCOME_TOTAL Max: {0}".format(np.max(application_train['AMT_INCOME_TOTAL'])))
    outliers_train = return_outliers(application_train, numeric)
    outliers_test = return_outliers(application_test, numeric)
    application_train = replace_outliers(application_train, outliers_train)
    application_test = replace_outliers(application_test, outliers_test)
    print("After: AMT_INCOME_TOTAL Max: {0}".format(np.max(application_train['AMT_INCOME_TOTAL'])))

Replacing Outliers
Before: AMT_INCOME_TOTAL Max: 2.9275127246996657
Total Outliers Identified: 1045936
Total Outliers Identified: 0
Processing feature NUM_BUREAU_REPORTS, max: 116.0, min 0.0, mean 1.7034805258998864]
Processing Complete for NUM_BUREAU_REPORTS, max: 5.0, min 0.0, mean 0.6243516492092965]
Processing feature SUM_BUREAU_BALANCES, max: 2657.0, min 0.0, mean 47.80841010565476]
Processing Complete for SUM_BUREAU_BALANCES, max: 72.0, min 0.0, mean 6.492499455304038]
Processing feature SUM_CREDIT_CURRENCY_CODE_0, max: 116.0, min 0.0, mean 1.7012204441467134]
Processing Complete for SUM_CREDIT_CURRENCY_CODE_0, max: 5.0, min 0.0, mean 0.6244784739407696]
Processing feature SUM_STATUS_0, max: 696.0, min 0.0, mean 15.009817534982488]
Processing Complete for SUM_STATUS_0, max: 22.0, min 0.0, mean 2.0333532784193085]
Processing feature SUM_DAYS_CREDIT_ENDDATE, max: 214193.0, min 0.0, mean 2098.2946105992955]
Processing Complete for SUM_DAYS_CREDIT_ENDDATE, max: 1370.0, min 0.0, mean 

### Remove Non-Numeric Values from Numeric Fields

In [20]:
# Assemble a list of all numeric fields
numeric = [] 

for feature in log_transform_features['feature']:
    numeric.append("{0}".format(feature))

for feature in numeric_features['feature']: 
    numeric.append("{0}".format(feature))

if str.lower(use_imputer) != 'none': 

    print("Using Imputer {0}".format(use_imputer))
    
    imputer = Imputer(strategy=str.lower(use_imputer))
    application_train[numeric] = imputer.fit_transform(application_train[numeric])
    application_test[numeric] = imputer.fit_transform(application_test[numeric])

else: 
    
    print("Using np.nan_to_num()")
    numeric = list(application_test.select_dtypes(include=[np.number]).columns.values)

    for feature in numeric: 
        application_train[feature] = make_numeric(application_train, feature)
        application_test[feature] = make_numeric(application_test, feature)

Using Imputer Most_Frequent


### Log Transform Skewed Numeric Features

In [19]:
''' 
# Skipping Log transformations for the time being... 

# Ensure all values are numeric, then log transform them
for feature in log_transform_features['feature']: 
        application_train[feature] = make_numeric(application_train, feature)
    
        print("Performing Log Transformations on: {0}".format(feature))
        application_train = log_transform(application_train, feature)
'''

' \n# Skipping Log transformations for the time being... \n\n# Ensure all values are numeric, then log transform them\nfor feature in log_transform_features[\'feature\']: \n        application_train[feature] = make_numeric(application_train, feature)\n    \n        print("Performing Log Transformations on: {0}".format(feature))\n        application_train = log_transform(application_train, feature)\n'

### Scale Numeric Fields

In [29]:
# Initialize a scaler, then apply it to the features

if str.lower(use_scaler) == 'robust': 
    scaler = RobustScaler()

if str.lower(use_scaler) == 'standard': 
    scaler = StandardScaler()
    
if str.lower(use_scaler) == 'minmax': 
    scaler = MinMaxScaler()
    
if str.lower(use_scaler) != 'none':

    # Assemble a list of all numeric fields
    numeric = [] 

    if use_log_transform is True: 
    
        for feature in log_transform_features['feature']:
            numeric.append("LOG_{0}".format(feature))

    else: 

        for feature in log_transform_features['feature']:
            numeric.append("{0}".format(feature))
        
            
    for feature in numeric_features['feature']: 
        numeric.append("{0}".format(feature))
        
    for feature in application_numeric_features['feature']:
        numeric.append("{0}".format(feature))
        
    # Technique for selecting numeric fields from: 
    # https://stackoverflow.com/questions/25039626/how-do-i-find-numeric-columns-in-pandas
    # numeric = list(application_test.select_dtypes(include=[np.number]).columns.values)

    scaler = scaler.fit(application_train[numeric])

print("Using Scaler: {0}".format(use_scaler))

Using Scaler: Standard


In [30]:
application_train[numeric] = scaler.transform(application_train[numeric])
application_test[numeric] = scaler.transform(application_test[numeric])

### Preview
Show our transformed dataset

In [31]:
# Show an example of a record with scaling applied
display(application_train.head(n = 5))

Unnamed: 0,AMT_ANNUITY,AMT_CREDIT,AMT_GOODS_PRICE,AMT_INCOME_TOTAL,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_YEAR,APARTMENTS_AVG,APARTMENTS_MEDI,APARTMENTS_MODE,BASEMENTAREA_AVG,BASEMENTAREA_MEDI,BASEMENTAREA_MODE,BOOL_FLAG_EMAIL,BOOL_FLAG_EMP_PHONE,BOOL_FLAG_OWN_CAR,BOOL_FLAG_OWN_REALTY,BOOL_FLAG_PHONE,BOOL_FLAG_WORK_PHONE,CNT_CHILDREN,CNT_FAM_MEMBERS,CODE_GENDER_F,CODE_GENDER_M,CODE_GENDER_XNA,COMMONAREA_AVG,COMMONAREA_MEDI,COMMONAREA_MODE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_ID_PUBLISH,DAYS_LAST_PHONE_CHANGE,DAYS_REGISTRATION,DEF_30_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,ELEVATORS_AVG,ELEVATORS_MEDI,ELEVATORS_MODE,EMERGENCYSTATE_MODE_No,EMERGENCYSTATE_MODE_Yes,ENTRANCES_AVG,ENTRANCES_MEDI,ENTRANCES_MODE,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,FLAG_CONT_MOBILE,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_2,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_MOBIL,FLOORSMAX_AVG,FLOORSMAX_MEDI,FLOORSMAX_MODE,FLOORSMIN_AVG,FLOORSMIN_MEDI,FLOORSMIN_MODE,FONDKAPREMONT_MODE_not specified,FONDKAPREMONT_MODE_org spec account,FONDKAPREMONT_MODE_reg oper account,FONDKAPREMONT_MODE_reg oper spec account,HOUR_APPR_PROCESS_START_0,HOUR_APPR_PROCESS_START_1,HOUR_APPR_PROCESS_START_10,HOUR_APPR_PROCESS_START_11,HOUR_APPR_PROCESS_START_12,HOUR_APPR_PROCESS_START_13,HOUR_APPR_PROCESS_START_14,HOUR_APPR_PROCESS_START_15,HOUR_APPR_PROCESS_START_16,HOUR_APPR_PROCESS_START_17,HOUR_APPR_PROCESS_START_18,HOUR_APPR_PROCESS_START_19,HOUR_APPR_PROCESS_START_2,HOUR_APPR_PROCESS_START_20,HOUR_APPR_PROCESS_START_21,HOUR_APPR_PROCESS_START_22,HOUR_APPR_PROCESS_START_23,HOUR_APPR_PROCESS_START_3,HOUR_APPR_PROCESS_START_4,HOUR_APPR_PROCESS_START_5,HOUR_APPR_PROCESS_START_6,HOUR_APPR_PROCESS_START_7,HOUR_APPR_PROCESS_START_8,HOUR_APPR_PROCESS_START_9,HOUSETYPE_MODE_not specified,HOUSETYPE_MODE_org spec account,HOUSETYPE_MODE_reg oper account,HOUSETYPE_MODE_reg oper spec account,LANDAREA_AVG,LANDAREA_MEDI,LANDAREA_MODE,LIVE_CITY_NOT_WORK_CITY,LIVE_REGION_NOT_WORK_REGION,LIVINGAPARTMENTS_AVG,LIVINGAPARTMENTS_MEDI,LIVINGAPARTMENTS_MODE,LIVINGAREA_AVG,LIVINGAREA_MEDI,LIVINGAREA_MODE,NAME_CONTRACT_TYPE_Cash loans,NAME_CONTRACT_TYPE_Revolving loans,NAME_EDUCATION_TYPE_Academic degree,NAME_EDUCATION_TYPE_Higher education,NAME_EDUCATION_TYPE_Incomplete higher,NAME_EDUCATION_TYPE_Lower secondary,NAME_EDUCATION_TYPE_Secondary / secondary special,NAME_FAMILY_STATUS_Civil marriage,NAME_FAMILY_STATUS_Married,NAME_FAMILY_STATUS_Separated,NAME_FAMILY_STATUS_Single / not married,NAME_FAMILY_STATUS_Unknown,NAME_FAMILY_STATUS_Widow,NAME_HOUSING_TYPE_Co-op apartment,NAME_HOUSING_TYPE_House / apartment,NAME_HOUSING_TYPE_Municipal apartment,NAME_HOUSING_TYPE_Office apartment,NAME_HOUSING_TYPE_Rented apartment,NAME_HOUSING_TYPE_With parents,NAME_INCOME_TYPE_Businessman,NAME_INCOME_TYPE_Commercial associate,NAME_INCOME_TYPE_Maternity leave,NAME_INCOME_TYPE_Pensioner,NAME_INCOME_TYPE_State servant,NAME_INCOME_TYPE_Student,NAME_INCOME_TYPE_Unemployed,NAME_INCOME_TYPE_Working,NAME_TYPE_SUITE_Children,NAME_TYPE_SUITE_Family,NAME_TYPE_SUITE_Group of people,NAME_TYPE_SUITE_Other_A,NAME_TYPE_SUITE_Other_B,"NAME_TYPE_SUITE_Spouse, partner",NAME_TYPE_SUITE_Unaccompanied,NONLIVINGAPARTMENTS_AVG,NONLIVINGAPARTMENTS_MEDI,NONLIVINGAPARTMENTS_MODE,NONLIVINGAREA_AVG,NONLIVINGAREA_MEDI,NONLIVINGAREA_MODE,OBS_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,OCCUPATION_TYPE_Accountants,OCCUPATION_TYPE_Cleaning staff,OCCUPATION_TYPE_Cooking staff,OCCUPATION_TYPE_Core staff,OCCUPATION_TYPE_Drivers,OCCUPATION_TYPE_HR staff,OCCUPATION_TYPE_High skill tech staff,OCCUPATION_TYPE_IT staff,OCCUPATION_TYPE_Laborers,OCCUPATION_TYPE_Low-skill Laborers,OCCUPATION_TYPE_Managers,OCCUPATION_TYPE_Medicine staff,OCCUPATION_TYPE_Private service staff,OCCUPATION_TYPE_Realty agents,OCCUPATION_TYPE_Sales staff,OCCUPATION_TYPE_Secretaries,OCCUPATION_TYPE_Security staff,OCCUPATION_TYPE_Waiters/barmen staff,ORGANIZATION_TYPE_Advertising,ORGANIZATION_TYPE_Agriculture,ORGANIZATION_TYPE_Bank,ORGANIZATION_TYPE_Business Entity Type 1,ORGANIZATION_TYPE_Business Entity Type 2,ORGANIZATION_TYPE_Business Entity Type 3,ORGANIZATION_TYPE_Cleaning,ORGANIZATION_TYPE_Construction,ORGANIZATION_TYPE_Culture,ORGANIZATION_TYPE_Electricity,ORGANIZATION_TYPE_Emergency,ORGANIZATION_TYPE_Government,ORGANIZATION_TYPE_Hotel,ORGANIZATION_TYPE_Housing,ORGANIZATION_TYPE_Industry: type 1,ORGANIZATION_TYPE_Industry: type 10,ORGANIZATION_TYPE_Industry: type 11,ORGANIZATION_TYPE_Industry: type 12,ORGANIZATION_TYPE_Industry: type 13,ORGANIZATION_TYPE_Industry: type 2,ORGANIZATION_TYPE_Industry: type 3,ORGANIZATION_TYPE_Industry: type 4,ORGANIZATION_TYPE_Industry: type 5,ORGANIZATION_TYPE_Industry: type 6,ORGANIZATION_TYPE_Industry: type 7,ORGANIZATION_TYPE_Industry: type 8,ORGANIZATION_TYPE_Industry: type 9,ORGANIZATION_TYPE_Insurance,ORGANIZATION_TYPE_Kindergarten,ORGANIZATION_TYPE_Legal Services,ORGANIZATION_TYPE_Medicine,ORGANIZATION_TYPE_Military,ORGANIZATION_TYPE_Mobile,ORGANIZATION_TYPE_Other,ORGANIZATION_TYPE_Police,ORGANIZATION_TYPE_Postal,ORGANIZATION_TYPE_Realtor,ORGANIZATION_TYPE_Religion,ORGANIZATION_TYPE_Restaurant,ORGANIZATION_TYPE_School,ORGANIZATION_TYPE_Security,ORGANIZATION_TYPE_Security Ministries,ORGANIZATION_TYPE_Self-employed,ORGANIZATION_TYPE_Services,ORGANIZATION_TYPE_Telecom,ORGANIZATION_TYPE_Trade: type 1,ORGANIZATION_TYPE_Trade: type 2,ORGANIZATION_TYPE_Trade: type 3,ORGANIZATION_TYPE_Trade: type 4,ORGANIZATION_TYPE_Trade: type 5,ORGANIZATION_TYPE_Trade: type 6,ORGANIZATION_TYPE_Trade: type 7,ORGANIZATION_TYPE_Transport: type 1,ORGANIZATION_TYPE_Transport: type 2,ORGANIZATION_TYPE_Transport: type 3,ORGANIZATION_TYPE_Transport: type 4,ORGANIZATION_TYPE_University,ORGANIZATION_TYPE_XNA,OWN_CAR_AGE,REGION_POPULATION_RELATIVE,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,TOTALAREA_MODE,WALLSMATERIAL_MODE_Block,WALLSMATERIAL_MODE_Mixed,WALLSMATERIAL_MODE_Monolithic,WALLSMATERIAL_MODE_Others,WALLSMATERIAL_MODE_Panel,"WALLSMATERIAL_MODE_Stone, brick",WALLSMATERIAL_MODE_Wooden,WALLSMATERIAL_MODE_not specified,WEEKDAY_APPR_PROCESS_START_FRIDAY,WEEKDAY_APPR_PROCESS_START_MONDAY,WEEKDAY_APPR_PROCESS_START_SATURDAY,WEEKDAY_APPR_PROCESS_START_SUNDAY,WEEKDAY_APPR_PROCESS_START_THURSDAY,WEEKDAY_APPR_PROCESS_START_TUESDAY,WEEKDAY_APPR_PROCESS_START_WEDNESDAY,YEARS_BEGINEXPLUATATION_AVG,YEARS_BEGINEXPLUATATION_MEDI,YEARS_BEGINEXPLUATATION_MODE,YEARS_BUILD_AVG,YEARS_BUILD_MEDI,YEARS_BUILD_MODE,EXT_SOURCE_2_X_AMT_GOODS_PRICE_X_AMT_ANNUITY,AMT_CREDIT_X_AMT_ANNUITY,EXT_SOURCE_3_X_AMT_GOODS_PRICE,AMT_ANNUITY_X_EXT_SOURCE_1,EXT_SOURCE_2_X_AMT_CREDIT_X_AMT_GOODS_PRICE,PER_CREDIT_INCOME,PER_GOODS_CREDIT,PER_ANNUITY_INCOME,NUM_BUREAU_REPORTS,SUM_BUREAU_BALANCES,SUM_CREDIT_CURRENCY_CODE_0,SUM_CREDIT_CURRENCY_CODE_1,SUM_CREDIT_CURRENCY_CODE_2,SUM_CREDIT_CURRENCY_CODE_3,SUM_CREDIT_ACTIVE_CODE_0,SUM_CREDIT_ACTIVE_CODE_1,SUM_CREDIT_ACTIVE_CODE_2,SUM_CREDIT_ACTIVE_CODE_3,SUM_CREDIT_TYPE_CODE_0,SUM_CREDIT_TYPE_CODE_1,SUM_CREDIT_TYPE_CODE_2,SUM_CREDIT_TYPE_CODE_3,SUM_CREDIT_TYPE_CODE_4,SUM_CREDIT_TYPE_CODE_5,SUM_CREDIT_TYPE_CODE_6,SUM_CREDIT_TYPE_CODE_7,SUM_CREDIT_TYPE_CODE_8,SUM_CREDIT_TYPE_CODE_9,SUM_CREDIT_TYPE_CODE_10,SUM_CREDIT_TYPE_CODE_11,SUM_CREDIT_TYPE_CODE_12,SUM_CREDIT_TYPE_CODE_13,SUM_CREDIT_TYPE_CODE_14,SUM_STATUS_0,SUM_STATUS_1,SUM_STATUS_2,SUM_STATUS_3,SUM_STATUS_4,SUM_STATUS_5,SUM_STATUS_C,SUM_STATUS_X,MED_DAYS_CREDIT,SUM_CREDIT_DAYS_OVERDUE,SUM_DAYS_CREDIT_ENDDATE,SUM_DAYS_ENDDATE_FACT,SUM_AMT_CREDIT_MAX_OVERDUE,SUM_CNT_CREDIT_PROLONG,SUM_AMT_CREDIT_SUM,SUM_AMT_CREDIT_SUM_DEBT,SUM_AMT_CREDIT_SUM_LIMIT,SUM_DAYS_CREDIT_UPDATE,SK_ID_CURR,TARGET
0,-0.100602,-0.466302,-0.475167,0.768915,-0.06795715,-0.08210023,-0.313873,-0.3594746,-0.1805048,-0.518462,-1.220893,-1.212969,-1.175363,-0.9707245,-0.9645809,-0.9068657,0,1,0,1,1,0,-0.577538,-1.333484,0,1,0,-0.726509,-0.7224681,-0.6889908,-1.50688,-0.951317,-0.579154,0.213389,-0.377164,4.163149,5.253007,-0.8583456,-0.8496421,-0.8241475,1,0,-1.145063,-1.134183,-1.070846,-3.006518,-1.317463,-2.129249,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,-1.394668,-1.386822,-1.364761,-1.168163,-1.161247,-1.127794,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,-0.5688307,-0.5665211,-0.5231353,0,0,-1.547211,-1.546294,-1.519122,-1.132894,-1.127239,-1.091746,1,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,-0.3337948,-0.3300117,-0.3156709,-0.6092616,-0.6010822,-0.5745318,0.774465,0.781455,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,-0.149452,2,2,0,0,0,0,-1.133974,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,-0.1305869,-0.1295235,-0.1052773,-2.032588,-2.026462,-1.969733,-0.065001,0.046911,1.002399,0.232668,-0.234145,0.09241,0.013799,0.052774,0.307543,0.616593,0.307393,-0.024437,-0.010391,-0.00255,0.996753,-0.00255,1.968236,-0.075635,-0.028102,-0.138733,-0.007702,1.012598,3.520264,0.0,-0.033562,-0.001803,-0.003825,-0.022737,-0.043898,-0.001803,-0.124818,-0.004771,-0.019285,0.599756,9.24113,-0.090937,-0.066203,-0.059805,-0.046781,0.002266,0.169359,-1.286057,-0.024989,0.677852,1.0573,0.021912,-0.089078,0.681318,0.032617,0.525573,0.923069,100002,1
1,0.809853,2.075169,2.271216,1.848214,-0.06795715,-0.08210023,-0.313873,-0.3594746,-0.1805048,-1.195657,-0.2835724,-0.2749912,-0.2882069,-0.6693872,-0.6622924,-0.6214169,0,1,0,0,1,0,-0.577538,-0.140277,1,0,0,0.3804806,0.3877292,0.1749049,0.166821,-0.483002,-1.790855,-0.159609,-1.084884,-0.321603,-0.276616,0.01150913,0.02091687,0.06760339,1,0,-1.634439,-1.622001,-1.555725,-1.369223,0.565106,3.214773e-16,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0.6380982,0.6399703,0.6811774,1.108199,1.107338,1.151683,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,-1.030726,-1.030527,-1.001022,0,0,-0.4507666,-0.4414522,-0.4839125,-0.6728109,-0.6665319,-0.6407319,1,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,-0.1860087,-0.1812377,-0.3156709,-0.3987103,-0.3882036,-0.5745318,0.029986,0.034878,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,-1.25275,1,1,0,0,0,0,-0.402976,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0.1737698,0.1714084,0.1738538,0.6638757,0.662203,0.696069,0.885205,1.680583,-0.023711,-1.151526,1.876832,-0.331341,0.026271,0.306438,-0.511156,-0.499935,-0.511184,-0.024437,-0.010391,-0.00255,-0.457098,-0.00255,-0.425688,-0.075635,-0.028102,-0.138733,-0.007702,-0.455496,-0.386883,0.0,-0.033562,-0.001803,-0.003825,-0.022737,-0.043898,-0.001803,-0.124818,-0.004771,-0.019285,-0.494403,-0.17618,-0.090937,-0.066203,-0.059805,-0.046781,-0.35255,-0.270597,0.5356,-0.024989,-0.493959,-0.487069,-0.00889,-0.089078,-0.499592,-0.193505,-0.119904,-0.497316,100003,0
2,-1.586612,-1.244579,-1.237169,-1.389684,-0.06795715,-0.08210023,-0.313873,-0.3594746,-0.1805048,-1.195657,-1.324288e-16,-1.088118e-16,9.341859e-17,-7.087843e-17,1.565401e-16,-2.282394e-16,0,1,1,1,1,1,-0.577538,-1.333484,0,1,0,3.824181e-17,3.0568830000000004e-17,-3.2267880000000004e-17,0.689509,-1.301491,-0.306869,-0.175455,-0.20124,-0.321603,-0.276616,4.2989540000000006e-17,2.8098080000000002e-18,5.651939e-17,0,0,-1.6405440000000003e-17,-1.469971e-16,-1.738746e-16,3.221345e-16,0.217545,1.253633,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1.660263e-16,1.795818e-16,1.174654e-16,1.200239e-16,-2.119136e-16,-1.488588e-16,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,-5.542921e-17,-1.196783e-16,-1.135435e-16,0,0,3.1287440000000004e-17,1.78103e-16,-1.211218e-16,-1.044588e-16,-1.939274e-16,-1.704433e-16,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,3.21408e-17,1.8506600000000003e-17,3.942831e-17,1.226471e-16,4.342077e-17,1.1440580000000001e-17,-0.714493,-0.711699,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,-0.783451,2,2,0,0,0,0,-1.133285e-16,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1.629268e-15,-5.763607e-16,-1.032193e-15,-2.185751e-16,-1.428593e-15,-4.607559e-16,0.357347,1.974664,-1.596684,-0.063979,0.213628,0.019573,0.012649,0.015731,-0.511156,-0.499935,-0.511184,-0.024437,-0.010391,-0.00255,-0.457098,-0.00255,-0.425688,-0.075635,-0.028102,-0.138733,-0.007702,-0.455496,-0.386883,0.0,-0.033562,-0.001803,-0.003825,-0.022737,-0.043898,-0.001803,-0.124818,-0.004771,-0.019285,-0.494403,-0.17618,-0.090937,-0.066203,-0.059805,-0.046781,-0.35255,-0.270597,0.5356,-0.024989,-0.493959,-0.487069,-0.00889,-0.089078,-0.499592,-0.193505,-0.119904,-0.497316,100004,0
3,0.312157,-0.735421,-0.665667,-0.310385,-3.6970010000000004e-17,2.328355e-19,-2.370702e-17,-6.477698e-18,-8.118403000000001e-17,0.090996,-1.324288e-16,-1.088118e-16,9.341859e-17,-7.087843e-17,1.565401e-16,-2.282394e-16,0,1,0,1,0,0,-0.577538,-0.140277,1,0,0,3.824181e-17,3.0568830000000004e-17,-3.2267880000000004e-17,0.680114,1.090232,-0.369143,-0.416807,1.400759,-0.321603,-0.276616,4.2989540000000006e-17,2.8098080000000002e-18,5.651939e-17,0,0,-1.6405440000000003e-17,-1.469971e-16,-1.738746e-16,3.221345e-16,0.712841,3.214773e-16,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1.660263e-16,1.795818e-16,1.174654e-16,1.200239e-16,-2.119136e-16,-1.488588e-16,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,-5.542921e-17,-1.196783e-16,-1.135435e-16,0,0,3.1287440000000004e-17,1.78103e-16,-1.211218e-16,-1.044588e-16,-1.939274e-16,-1.704433e-16,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,3.21408e-17,1.8506600000000003e-17,3.942831e-17,1.226471e-16,4.342077e-17,1.1440580000000001e-17,0.774465,0.781455,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,-0.928991,2,2,0,0,0,0,-1.133285e-16,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1.629268e-15,-5.763607e-16,-1.032193e-15,-2.185751e-16,-1.428593e-15,-4.607559e-16,-0.13839,-0.229567,-0.023711,-0.063979,0.22363,0.034344,0.011105,-0.004603,-0.511156,-0.499935,-0.511184,-0.024437,-0.010391,-0.00255,-0.457098,-0.00255,-0.425688,-0.075635,-0.028102,-0.138733,-0.007702,-0.455496,-0.386883,0.0,-0.033562,-0.001803,-0.003825,-0.022737,-0.043898,-0.001803,-0.124818,-0.004771,-0.019285,-0.494403,-0.17618,-0.090937,-0.066203,-0.059805,-0.046781,-0.35255,-0.270597,0.5356,-0.024989,-0.493959,-0.487069,-0.00889,-0.089078,-0.499592,-0.193505,-0.119904,-0.497316,100006,0
4,-0.335294,-0.161401,0.096335,-0.526244,-0.06795715,-0.08210023,-0.313873,-0.3594746,-0.1805048,-1.195657,-1.324288e-16,-1.088118e-16,9.341859e-17,-7.087843e-17,1.565401e-16,-2.282394e-16,0,1,0,1,0,0,-0.577538,-1.333484,0,1,0,3.824181e-17,3.0568830000000004e-17,-3.2267880000000004e-17,0.892535,1.089382,0.307263,0.179258,-0.18658,-0.321603,-0.276616,4.2989540000000006e-17,2.8098080000000002e-18,5.651939e-17,0,0,-1.6405440000000003e-17,-1.469971e-16,-1.738746e-16,3.221345e-16,-1.00419,3.214773e-16,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1.660263e-16,1.795818e-16,1.174654e-16,1.200239e-16,-2.119136e-16,-1.488588e-16,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,-5.542921e-17,-1.196783e-16,-1.135435e-16,1,0,3.1287440000000004e-17,1.78103e-16,-1.211218e-16,-1.044588e-16,-1.939274e-16,-1.704433e-16,1,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,3.21408e-17,1.8506600000000003e-17,3.942831e-17,1.226471e-16,4.342077e-17,1.1440580000000001e-17,-0.714493,-0.711699,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.56357,2,2,0,1,0,0,-1.133285e-16,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1.629268e-15,-5.763607e-16,-1.032193e-15,-2.185751e-16,-1.428593e-15,-4.607559e-16,0.017241,0.054117,-0.023711,-0.063979,-0.014482,0.004306,0.003445,0.007279,-0.511156,-0.499935,-0.511184,-0.024437,-0.010391,-0.00255,-0.457098,-0.00255,-0.425688,-0.075635,-0.028102,-0.138733,-0.007702,-0.455496,-0.386883,0.0,-0.033562,-0.001803,-0.003825,-0.022737,-0.043898,-0.001803,-0.124818,-0.004771,-0.019285,-0.494403,-0.17618,-0.090937,-0.066203,-0.059805,-0.046781,-0.35255,-0.270597,0.5356,-0.024989,-0.493959,-0.487069,-0.00889,-0.089078,-0.499592,-0.193505,-0.119904,-0.497316,100007,0


### Output
Save out our preprocessed data to temporary intermediate files

In [32]:
application_train['SK_ID_CURR'] = application_train_SK_ID_CURR
application_train['TARGET'] = target_train
application_test['SK_ID_CURR'] = application_test_SK_ID_CURR

train_filename = 'data/tmp/bureau_train_preprocessed_'
test_filename = 'data/tmp/bureau_test_preprocessed_'

if str.lower(use_imputer) is not 'none': 
    train_filename += "imputation_{0}_".format(str.lower(use_imputer))
    test_filename += "imputation_{0}_".format(str.lower(use_imputer))

if use_scaler is 'None':
    train_filename += "no_scaling_"
    test_filename += "no_scaling_"
else: 
    train_filename += "{0}_scaler_".format(str.lower(use_scaler))
    test_filename += "{0}_scaler_".format(str.lower(use_scaler))

if use_replace_outliers is True: 
    train_filename += "outliers_replaced_"
    test_filename += "outliers_replaced_"
else: 
    train_filename += "with_outliers_"
    test_filename += "with_outliers_"

if use_log_transform is True: 
    train_filename += "log_transform_monetary_"
    test_filename += "log_transform_monetary_"
else: 
    train_filename += "no_log"
    test_filename += "no_log"
    
train_filename += ".csv"
test_filename += ".csv"

print("Writing {0}".format(train_filename))
application_train.to_csv(train_filename)
print("Writing {0}".format(test_filename))
application_test.to_csv(test_filename)
print("Output Complete")

Writing data/tmp/bureau_train_preprocessed_imputation_most_frequent_standard_scaler_outliers_replaced_no_log.csv
Writing data/tmp/bureau_test_preprocessed_imputation_most_frequent_standard_scaler_outliers_replaced_no_log.csv
Output Complete
