##  Data Preprocessing

Step 2: 

Perform various transformations on our data as required for efficient and effective processing by Machine Learning algorithms based on the type and needs of individual features, identified through the data explorations performed in Step 1 - Data Exploration - Application Data.

### Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from IPython.display import display # Allows the use of display() for DataFrames
from sklearn.preprocessing import RobustScaler, StandardScaler, MinMaxScaler
from sklearn.preprocessing import LabelBinarizer

# Show all of the data in a dataframe
pd.set_option('display.max_columns', None)

In [2]:
# Training Set
application_train = pd.read_csv('data/tmp/application_train_bureau_data_merged.csv', index_col=0)
application_test = pd.read_csv('data/tmp/application_test_bureau_data_merged.csv', index_col=0)
print("Loaded Training Set: {0} rows {1} cols".format(application_train.shape[0], application_train.shape[1]))
print("Loaded Testing Set: {0} rows {1} cols".format(application_test.shape[0], application_test.shape[1]))

Loaded Training Set: 307511 rows 315 cols
Loaded Testing Set: 48744 rows 314 cols


In [3]:
target_train = application_train['TARGET']
application_train_SK_ID_CURR = application_train['SK_ID_CURR']
application_train = application_train.drop('SK_ID_CURR', axis=1)
application_train = application_train.drop('TARGET', axis=1)

application_test_SK_ID_CURR = application_test['SK_ID_CURR']
application_test = application_test.drop('SK_ID_CURR', axis=1)

In [4]:
display(application_train.head())
display(application_test.head())

Unnamed: 0,AMT_ANNUITY,AMT_CREDIT,AMT_GOODS_PRICE,AMT_INCOME_TOTAL,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_YEAR,APARTMENTS_AVG,APARTMENTS_MEDI,APARTMENTS_MODE,BASEMENTAREA_AVG,BASEMENTAREA_MEDI,BASEMENTAREA_MODE,BOOL_FLAG_EMAIL,BOOL_FLAG_EMP_PHONE,BOOL_FLAG_OWN_CAR,BOOL_FLAG_OWN_REALTY,BOOL_FLAG_PHONE,BOOL_FLAG_WORK_PHONE,CNT_CHILDREN,CNT_FAM_MEMBERS,CODE_GENDER_F,CODE_GENDER_M,CODE_GENDER_XNA,COMMONAREA_AVG,COMMONAREA_MEDI,COMMONAREA_MODE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_ID_PUBLISH,DAYS_LAST_PHONE_CHANGE,DAYS_REGISTRATION,DEF_30_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,ELEVATORS_AVG,ELEVATORS_MEDI,ELEVATORS_MODE,EMERGENCYSTATE_MODE_No,EMERGENCYSTATE_MODE_Yes,ENTRANCES_AVG,ENTRANCES_MEDI,ENTRANCES_MODE,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,FLAG_CONT_MOBILE,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_2,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_MOBIL,FLOORSMAX_AVG,FLOORSMAX_MEDI,FLOORSMAX_MODE,FLOORSMIN_AVG,FLOORSMIN_MEDI,FLOORSMIN_MODE,FONDKAPREMONT_MODE_not specified,FONDKAPREMONT_MODE_org spec account,FONDKAPREMONT_MODE_reg oper account,FONDKAPREMONT_MODE_reg oper spec account,HOUR_APPR_PROCESS_START_0,HOUR_APPR_PROCESS_START_1,HOUR_APPR_PROCESS_START_10,HOUR_APPR_PROCESS_START_11,HOUR_APPR_PROCESS_START_12,HOUR_APPR_PROCESS_START_13,HOUR_APPR_PROCESS_START_14,HOUR_APPR_PROCESS_START_15,HOUR_APPR_PROCESS_START_16,HOUR_APPR_PROCESS_START_17,HOUR_APPR_PROCESS_START_18,HOUR_APPR_PROCESS_START_19,HOUR_APPR_PROCESS_START_2,HOUR_APPR_PROCESS_START_20,HOUR_APPR_PROCESS_START_21,HOUR_APPR_PROCESS_START_22,HOUR_APPR_PROCESS_START_23,HOUR_APPR_PROCESS_START_3,HOUR_APPR_PROCESS_START_4,HOUR_APPR_PROCESS_START_5,HOUR_APPR_PROCESS_START_6,HOUR_APPR_PROCESS_START_7,HOUR_APPR_PROCESS_START_8,HOUR_APPR_PROCESS_START_9,HOUSETYPE_MODE_not specified,HOUSETYPE_MODE_org spec account,HOUSETYPE_MODE_reg oper account,HOUSETYPE_MODE_reg oper spec account,LANDAREA_AVG,LANDAREA_MEDI,LANDAREA_MODE,LIVE_CITY_NOT_WORK_CITY,LIVE_REGION_NOT_WORK_REGION,LIVINGAPARTMENTS_AVG,LIVINGAPARTMENTS_MEDI,LIVINGAPARTMENTS_MODE,LIVINGAREA_AVG,LIVINGAREA_MEDI,LIVINGAREA_MODE,NAME_CONTRACT_TYPE_Cash loans,NAME_CONTRACT_TYPE_Revolving loans,NAME_EDUCATION_TYPE_Academic degree,NAME_EDUCATION_TYPE_Higher education,NAME_EDUCATION_TYPE_Incomplete higher,NAME_EDUCATION_TYPE_Lower secondary,NAME_EDUCATION_TYPE_Secondary / secondary special,NAME_FAMILY_STATUS_Civil marriage,NAME_FAMILY_STATUS_Married,NAME_FAMILY_STATUS_Separated,NAME_FAMILY_STATUS_Single / not married,NAME_FAMILY_STATUS_Unknown,NAME_FAMILY_STATUS_Widow,NAME_HOUSING_TYPE_Co-op apartment,NAME_HOUSING_TYPE_House / apartment,NAME_HOUSING_TYPE_Municipal apartment,NAME_HOUSING_TYPE_Office apartment,NAME_HOUSING_TYPE_Rented apartment,NAME_HOUSING_TYPE_With parents,NAME_INCOME_TYPE_Businessman,NAME_INCOME_TYPE_Commercial associate,NAME_INCOME_TYPE_Maternity leave,NAME_INCOME_TYPE_Pensioner,NAME_INCOME_TYPE_State servant,NAME_INCOME_TYPE_Student,NAME_INCOME_TYPE_Unemployed,NAME_INCOME_TYPE_Working,NAME_TYPE_SUITE_Children,NAME_TYPE_SUITE_Family,NAME_TYPE_SUITE_Group of people,NAME_TYPE_SUITE_Other_A,NAME_TYPE_SUITE_Other_B,"NAME_TYPE_SUITE_Spouse, partner",NAME_TYPE_SUITE_Unaccompanied,NONLIVINGAPARTMENTS_AVG,NONLIVINGAPARTMENTS_MEDI,NONLIVINGAPARTMENTS_MODE,NONLIVINGAREA_AVG,NONLIVINGAREA_MEDI,NONLIVINGAREA_MODE,OBS_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,OCCUPATION_TYPE_Accountants,OCCUPATION_TYPE_Cleaning staff,OCCUPATION_TYPE_Cooking staff,OCCUPATION_TYPE_Core staff,OCCUPATION_TYPE_Drivers,OCCUPATION_TYPE_HR staff,OCCUPATION_TYPE_High skill tech staff,OCCUPATION_TYPE_IT staff,OCCUPATION_TYPE_Laborers,OCCUPATION_TYPE_Low-skill Laborers,OCCUPATION_TYPE_Managers,OCCUPATION_TYPE_Medicine staff,OCCUPATION_TYPE_Private service staff,OCCUPATION_TYPE_Realty agents,OCCUPATION_TYPE_Sales staff,OCCUPATION_TYPE_Secretaries,OCCUPATION_TYPE_Security staff,OCCUPATION_TYPE_Waiters/barmen staff,ORGANIZATION_TYPE_Advertising,ORGANIZATION_TYPE_Agriculture,ORGANIZATION_TYPE_Bank,ORGANIZATION_TYPE_Business Entity Type 1,ORGANIZATION_TYPE_Business Entity Type 2,ORGANIZATION_TYPE_Business Entity Type 3,ORGANIZATION_TYPE_Cleaning,ORGANIZATION_TYPE_Construction,ORGANIZATION_TYPE_Culture,ORGANIZATION_TYPE_Electricity,ORGANIZATION_TYPE_Emergency,ORGANIZATION_TYPE_Government,ORGANIZATION_TYPE_Hotel,ORGANIZATION_TYPE_Housing,ORGANIZATION_TYPE_Industry: type 1,ORGANIZATION_TYPE_Industry: type 10,ORGANIZATION_TYPE_Industry: type 11,ORGANIZATION_TYPE_Industry: type 12,ORGANIZATION_TYPE_Industry: type 13,ORGANIZATION_TYPE_Industry: type 2,ORGANIZATION_TYPE_Industry: type 3,ORGANIZATION_TYPE_Industry: type 4,ORGANIZATION_TYPE_Industry: type 5,ORGANIZATION_TYPE_Industry: type 6,ORGANIZATION_TYPE_Industry: type 7,ORGANIZATION_TYPE_Industry: type 8,ORGANIZATION_TYPE_Industry: type 9,ORGANIZATION_TYPE_Insurance,ORGANIZATION_TYPE_Kindergarten,ORGANIZATION_TYPE_Legal Services,ORGANIZATION_TYPE_Medicine,ORGANIZATION_TYPE_Military,ORGANIZATION_TYPE_Mobile,ORGANIZATION_TYPE_Other,ORGANIZATION_TYPE_Police,ORGANIZATION_TYPE_Postal,ORGANIZATION_TYPE_Realtor,ORGANIZATION_TYPE_Religion,ORGANIZATION_TYPE_Restaurant,ORGANIZATION_TYPE_School,ORGANIZATION_TYPE_Security,ORGANIZATION_TYPE_Security Ministries,ORGANIZATION_TYPE_Self-employed,ORGANIZATION_TYPE_Services,ORGANIZATION_TYPE_Telecom,ORGANIZATION_TYPE_Trade: type 1,ORGANIZATION_TYPE_Trade: type 2,ORGANIZATION_TYPE_Trade: type 3,ORGANIZATION_TYPE_Trade: type 4,ORGANIZATION_TYPE_Trade: type 5,ORGANIZATION_TYPE_Trade: type 6,ORGANIZATION_TYPE_Trade: type 7,ORGANIZATION_TYPE_Transport: type 1,ORGANIZATION_TYPE_Transport: type 2,ORGANIZATION_TYPE_Transport: type 3,ORGANIZATION_TYPE_Transport: type 4,ORGANIZATION_TYPE_University,ORGANIZATION_TYPE_XNA,OWN_CAR_AGE,REGION_POPULATION_RELATIVE,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,TOTALAREA_MODE,WALLSMATERIAL_MODE_Block,WALLSMATERIAL_MODE_Mixed,WALLSMATERIAL_MODE_Monolithic,WALLSMATERIAL_MODE_Others,WALLSMATERIAL_MODE_Panel,"WALLSMATERIAL_MODE_Stone, brick",WALLSMATERIAL_MODE_Wooden,WALLSMATERIAL_MODE_not specified,WEEKDAY_APPR_PROCESS_START_FRIDAY,WEEKDAY_APPR_PROCESS_START_MONDAY,WEEKDAY_APPR_PROCESS_START_SATURDAY,WEEKDAY_APPR_PROCESS_START_SUNDAY,WEEKDAY_APPR_PROCESS_START_THURSDAY,WEEKDAY_APPR_PROCESS_START_TUESDAY,WEEKDAY_APPR_PROCESS_START_WEDNESDAY,YEARS_BEGINEXPLUATATION_AVG,YEARS_BEGINEXPLUATATION_MEDI,YEARS_BEGINEXPLUATATION_MODE,YEARS_BUILD_AVG,YEARS_BUILD_MEDI,YEARS_BUILD_MODE,PER_CREDIT_INCOME,PER_GOODS_CREDIT,PER_ANNUITY_INCOME,NUM_BUREAU_REPORTS,SUM_BUREAU_BALANCES,SUM_CREDIT_CURRENCY_CODE_0,SUM_CREDIT_CURRENCY_CODE_1,SUM_CREDIT_CURRENCY_CODE_2,SUM_CREDIT_CURRENCY_CODE_3,SUM_CREDIT_ACTIVE_CODE_0,SUM_CREDIT_ACTIVE_CODE_1,SUM_CREDIT_ACTIVE_CODE_2,SUM_CREDIT_ACTIVE_CODE_3,SUM_CREDIT_TYPE_CODE_0,SUM_CREDIT_TYPE_CODE_1,SUM_CREDIT_TYPE_CODE_2,SUM_CREDIT_TYPE_CODE_3,SUM_CREDIT_TYPE_CODE_4,SUM_CREDIT_TYPE_CODE_5,SUM_CREDIT_TYPE_CODE_6,SUM_CREDIT_TYPE_CODE_7,SUM_CREDIT_TYPE_CODE_8,SUM_CREDIT_TYPE_CODE_9,SUM_CREDIT_TYPE_CODE_10,SUM_CREDIT_TYPE_CODE_11,SUM_CREDIT_TYPE_CODE_12,SUM_CREDIT_TYPE_CODE_13,SUM_CREDIT_TYPE_CODE_14,SUM_STATUS_0,SUM_STATUS_1,SUM_STATUS_2,SUM_STATUS_3,SUM_STATUS_4,SUM_STATUS_5,SUM_STATUS_C,SUM_STATUS_X,MED_DAYS_CREDIT,SUM_CREDIT_DAYS_OVERDUE,SUM_DAYS_CREDIT_ENDDATE,SUM_DAYS_ENDDATE_FACT,SUM_AMT_CREDIT_MAX_OVERDUE,SUM_CNT_CREDIT_PROLONG,SUM_AMT_CREDIT_SUM,SUM_AMT_CREDIT_SUM_DEBT,SUM_AMT_CREDIT_SUM_LIMIT,SUM_DAYS_CREDIT_UPDATE
0,-0.166065,-0.478095,-0.505662,0.142129,-0.058766,-0.070987,-0.269947,-0.30862,-0.155837,-0.346719,-0.34518,-0.342055,-0.327407,0.003066,0.006034,0.028603,-0.245215,0.468697,-0.717914,0.664531,1.599337,-0.499013,-0.577538,-1.265685,-1.388135,1.388176,-0.003607,0.018432,0.020727,0.034883,-1.50688,-0.743059,-0.579154,0.206994,-0.379837,4.163504,5.25326,-0.368513,-0.365348,-0.355991,0.963763,-0.08734,-0.051932,-0.049475,-0.030398,-0.476522,-1.301593,-1.007542,0.043245,-0.004771,-0.062669,-0.00255,-0.059477,-0.054269,-0.034802,-0.100138,-0.016332,-0.090534,-0.024402,-0.006502,-0.022529,-0.018305,0.639065,-0.009017,-0.123882,-0.310738,-0.013853,-0.297632,-0.062538,0.001803,-0.198993,-0.197647,-0.188297,0.355764,0.356104,0.368213,-1.536135,-0.136428,1.779079,-0.202211,-0.011406,-0.016726,2.674329,-0.371135,-0.353932,-0.334584,-0.314523,-0.296432,-0.266452,-0.225656,-0.174123,-0.11257,-0.031509,-0.062486,-0.036315,-0.022091,-0.011548,-0.063371,-0.082723,-0.109417,-0.13916,-0.174934,-0.227457,-0.312659,-1.536135,-0.136428,1.779079,-0.202211,0.162782,0.165022,0.185154,-0.467814,-0.205869,-0.166853,-0.165974,-0.154904,-0.36414,-0.362243,-0.346934,0.324395,-0.324395,-0.0231,-0.567262,-0.185945,-0.112095,0.638808,-0.327423,-1.329812,-0.262121,2.401419,-0.00255,-0.234957,-0.060515,0.356313,-0.194264,-0.092646,-0.126998,-0.225179,-0.005703,-0.550997,-0.004032,-0.468573,-0.275564,-0.007651,-0.008459,0.967876,-0.103625,-0.387514,-0.029699,-0.053142,-0.076087,-0.195944,0.487175,-0.100846,-0.099729,-0.095487,-0.261343,-0.258124,-0.247602,0.242861,0.252132,-0.181557,-0.12395,-0.140418,-0.313823,-0.253753,-0.042827,-0.196033,-0.041394,2.138285,-0.082782,-0.27329,-0.16898,-0.093269,-0.049479,-0.341411,-0.065283,-0.149481,-0.066354,-0.037377,-0.089691,-0.090662,-0.140875,-0.188513,1.8769,-0.02909,-0.149481,-0.035128,-0.055668,-0.042713,-0.18713,-0.056136,-0.098552,-0.058225,-0.01883,-0.094187,-0.034661,-0.014762,-0.038621,-0.103801,-0.05348,-0.044178,-0.019088,-0.065333,-0.008835,-0.105232,-0.044104,-0.151279,-0.031509,-0.194354,-0.092949,-0.032124,-0.239507,-0.087585,-0.084047,-0.035908,-0.016628,-0.076968,-0.17257,-0.103304,-0.080379,-0.377813,-0.071751,-0.043358,-0.033659,-0.078848,-0.107173,-0.014428,-0.012624,-0.045345,-0.161651,-0.025575,-0.084964,-0.062249,-0.133669,-0.065833,-0.468635,-0.455284,-0.149452,-0.103064,-0.062699,-0.291208,-0.547236,-0.124004,-0.231267,-0.411375,-0.176135,-0.086733,-0.076281,-0.072886,-0.522963,1.935056,-0.133215,-1.016959,-0.442421,-0.444395,-0.351712,-0.235673,-0.443749,-0.461015,2.218375,0.960978,0.960861,0.961652,1.016443,1.023576,1.042389,0.159961,0.018189,0.141793,8.0,110.0,8.0,0.0,0.0,0.0,2.0,0.0,6.0,0.0,0.0,0.0,0.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,45.0,27.0,0.0,0.0,0.0,0.0,23.0,15.0,-1042.5,0.0,-2094.0,-4185.0,8405.145,0.0,865055.565,245781.0,31988.565,-3999.0
1,0.59271,1.72545,1.60048,0.426792,-0.058766,-0.070987,-0.269947,-0.30862,-0.155837,-0.885565,0.396431,0.401242,0.380977,0.236003,0.23993,0.252119,-0.245215,0.468697,-0.717914,-1.50482,1.599337,-0.499013,-0.577538,-0.167621,0.720391,-0.72037,-0.003607,1.012286,1.017847,0.81437,0.166821,-0.485941,-1.790855,-0.163104,-1.078697,-0.32048,-0.275663,0.431106,0.436172,0.468762,0.963763,-0.08734,-0.387436,-0.385082,-0.369801,0.323239,0.566501,-1.527258,0.043245,-0.004771,-0.062669,-0.00255,-0.059477,-0.054269,-0.034802,-0.100138,-0.016332,-0.090534,-0.024402,-0.006502,-0.022529,-0.018305,0.639065,-0.009017,-0.123882,-0.310738,-0.013853,-0.297632,-0.062538,0.001803,1.165958,1.166763,1.193952,1.824859,1.824092,1.852433,-1.536135,-0.136428,1.779079,-0.202211,-0.011406,-0.016726,-0.373926,2.694436,-0.353932,-0.334584,-0.314523,-0.296432,-0.266452,-0.225656,-0.174123,-0.11257,-0.031509,-0.062486,-0.036315,-0.022091,-0.011548,-0.063371,-0.082723,-0.109417,-0.13916,-0.174934,-0.227457,-0.312659,-1.536135,-0.136428,1.779079,-0.202211,-0.228092,-0.227585,-0.222405,-0.467814,-0.205869,0.648129,0.655182,0.617489,0.014866,0.017768,0.027531,0.324395,-0.324395,-0.0231,1.762853,-0.185945,-0.112095,-1.565416,-0.327423,0.751986,-0.262121,-0.416421,-0.00255,-0.234957,-0.060515,0.356313,-0.194264,-0.092646,-0.126998,-0.225179,-0.005703,-0.550997,-0.004032,-0.468573,3.628919,-0.007651,-0.008459,-1.03319,-0.103625,2.580551,-0.029699,-0.053142,-0.076087,-0.195944,-2.052651,0.045223,0.047355,-0.095487,-0.059838,-0.054163,-0.247602,-0.174085,-0.168527,-0.181557,-0.12395,-0.140418,3.186507,-0.253753,-0.042827,-0.196033,-0.041394,-0.467664,-0.082782,-0.27329,-0.16898,-0.093269,-0.049479,-0.341411,-0.065283,-0.149481,-0.066354,-0.037377,-0.089691,-0.090662,-0.140875,-0.188513,-0.532794,-0.02909,-0.149481,-0.035128,-0.055668,-0.042713,-0.18713,-0.056136,-0.098552,-0.058225,-0.01883,-0.094187,-0.034661,-0.014762,-0.038621,-0.103801,-0.05348,-0.044178,-0.019088,-0.065333,-0.008835,-0.105232,-0.044104,-0.151279,-0.031509,-0.194354,-0.092949,-0.032124,-0.239507,-0.087585,-0.084047,-0.035908,-0.016628,-0.076968,5.794739,-0.103304,-0.080379,-0.377813,-0.071751,-0.043358,-0.033659,-0.078848,-0.107173,-0.014428,-0.012624,-0.045345,-0.161651,-0.025575,-0.084964,-0.062249,-0.133669,-0.065833,-0.468635,-0.455284,-1.25275,-2.067573,-2.051813,-0.291208,-0.547236,-0.124004,-0.231267,0.197887,5.677469,-0.086733,-0.076281,-0.072886,-0.522963,-0.516781,-0.133215,-1.016959,-0.442421,2.250251,-0.351712,-0.235673,-0.443749,-0.461015,-0.45078,0.987275,0.987155,0.987948,1.505968,1.504599,1.508933,-0.17445,-0.168532,0.276183,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,-1.404507,-1.152888,-1.090025,-0.427196,-0.058766,-0.070987,-0.269947,-0.30862,-0.155837,-0.885565,-0.602452,-0.600864,-0.59305,-0.534146,-0.533388,-0.523697,-0.245215,0.468697,1.392925,0.664531,1.599337,2.003956,-0.577538,-1.265685,-1.388135,1.388176,-0.003607,-0.28919,-0.288724,-0.283095,0.689509,-0.935313,-0.306869,-0.178827,-0.206116,-0.32048,-0.275663,-0.368513,-0.365348,-0.355991,-1.037599,-0.08734,-0.72294,-0.72069,-0.709204,-0.767499,0.221612,1.193214,0.043245,-0.004771,-0.062669,-0.00255,-0.059477,-0.054269,-0.034802,-0.100138,-0.016332,-0.090534,-0.024402,-0.006502,-0.022529,-0.018305,-1.564786,-0.009017,-0.123882,-0.310738,-0.013853,-0.297632,-0.062538,0.001803,-0.744581,-0.743018,-0.740798,-0.525834,-0.524829,-0.522462,0.650984,-0.136428,-0.562089,-0.202211,-0.011406,-0.016726,-0.373926,-0.371135,-0.353932,-0.334584,-0.314523,-0.296432,-0.266452,-0.225656,-0.174123,-0.11257,-0.031509,-0.062486,-0.036315,-0.022091,-0.011548,-0.063371,-0.082723,-0.109417,-0.13916,-0.174934,-0.227457,3.198372,0.650984,-0.136428,-0.562089,-0.202211,-0.440702,-0.440853,-0.431913,-0.467814,-0.205869,-0.455166,-0.455213,-0.45302,-0.564727,-0.56318,-0.555204,-3.082659,3.082659,-0.0231,-0.567262,-0.185945,-0.112095,0.638808,-0.327423,-1.329812,-0.262121,2.401419,-0.00255,-0.234957,-0.060515,0.356313,-0.194264,-0.092646,-0.126998,-0.225179,-0.005703,-0.550997,-0.004032,-0.468573,-0.275564,-0.007651,-0.008459,0.967876,-0.103625,-0.387514,-0.029699,-0.053142,-0.076087,-0.195944,0.487175,-0.100846,-0.099729,-0.095487,-0.261343,-0.258124,-0.247602,-0.591031,-0.589187,-0.181557,-0.12395,-0.140418,-0.313823,-0.253753,-0.042827,-0.196033,-0.041394,2.138285,-0.082782,-0.27329,-0.16898,-0.093269,-0.049479,-0.341411,-0.065283,-0.149481,-0.066354,-0.037377,-0.089691,-0.090662,-0.140875,-0.188513,-0.532794,-0.02909,-0.149481,-0.035128,-0.055668,-0.042713,5.343875,-0.056136,-0.098552,-0.058225,-0.01883,-0.094187,-0.034661,-0.014762,-0.038621,-0.103801,-0.05348,-0.044178,-0.019088,-0.065333,-0.008835,-0.105232,-0.044104,-0.151279,-0.031509,-0.194354,-0.092949,-0.032124,-0.239507,-0.087585,-0.084047,-0.035908,-0.016628,-0.076968,-0.17257,-0.103304,-0.080379,-0.377813,-0.071751,-0.043358,-0.033659,-0.078848,-0.107173,-0.014428,-0.012624,-0.045345,-0.161651,-0.025575,-0.084964,-0.062249,-0.133669,-0.065833,-0.468635,2.430559,-0.783451,-0.103064,-0.062699,-0.291208,-0.547236,-0.124004,-0.231267,-0.572048,-0.176135,-0.086733,-0.076281,-0.072886,-0.522963,-0.516781,-0.133215,0.983323,-0.442421,2.250251,-0.351712,-0.235673,-0.443749,-0.461015,-0.45078,-1.020852,-1.020765,-1.020127,-0.698001,-0.698343,-0.698841,0.072629,0.003323,0.082723,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,0.177929,-0.71143,-0.651753,-0.142533,-0.058766,-0.070987,-0.269947,-0.30862,-0.155837,-0.885565,-0.602452,-0.600864,-0.59305,-0.534146,-0.533388,-0.523697,-0.245215,0.468697,-0.717914,0.664531,-0.625259,-0.499013,-0.577538,-0.167621,0.720391,-0.72037,-0.003607,-0.28919,-0.288724,-0.283095,0.680114,0.377805,-0.369143,-0.418302,1.375829,-0.32048,-0.275663,-0.368513,-0.365348,-0.355991,-1.037599,-0.08734,-0.72294,-0.72069,-0.709204,-0.767499,0.7131,-1.527258,0.043245,-0.004771,-0.062669,-0.00255,-0.059477,-0.054269,-0.034802,-0.100138,-0.016332,-0.090534,-0.024402,-0.006502,-0.022529,-0.018305,0.639065,-0.009017,-0.123882,-0.310738,-0.013853,-0.297632,-0.062538,0.001803,-0.744581,-0.743018,-0.740798,-0.525834,-0.524829,-0.522462,0.650984,-0.136428,-0.562089,-0.202211,-0.011406,-0.016726,-0.373926,-0.371135,-0.353932,-0.334584,-0.314523,-0.296432,-0.266452,4.431515,-0.174123,-0.11257,-0.031509,-0.062486,-0.036315,-0.022091,-0.011548,-0.063371,-0.082723,-0.109417,-0.13916,-0.174934,-0.227457,-0.312659,0.650984,-0.136428,-0.562089,-0.202211,-0.440702,-0.440853,-0.431913,-0.467814,-0.205869,-0.455166,-0.455213,-0.45302,-0.564727,-0.56318,-0.555204,0.324395,-0.324395,-0.0231,-0.567262,-0.185945,-0.112095,0.638808,3.054149,-1.329812,-0.262121,-0.416421,-0.00255,-0.234957,-0.060515,0.356313,-0.194264,-0.092646,-0.126998,-0.225179,-0.005703,-0.550997,-0.004032,-0.468573,-0.275564,-0.007651,-0.008459,0.967876,-0.103625,-0.387514,-0.029699,-0.053142,-0.076087,-0.195944,0.487175,-0.100846,-0.099729,-0.095487,-0.261343,-0.258124,-0.247602,0.242861,0.252132,-0.181557,-0.12395,-0.140418,-0.313823,-0.253753,-0.042827,-0.196033,-0.041394,2.138285,-0.082782,-0.27329,-0.16898,-0.093269,-0.049479,-0.341411,-0.065283,-0.149481,-0.066354,-0.037377,-0.089691,-0.090662,-0.140875,-0.188513,1.8769,-0.02909,-0.149481,-0.035128,-0.055668,-0.042713,-0.18713,-0.056136,-0.098552,-0.058225,-0.01883,-0.094187,-0.034661,-0.014762,-0.038621,-0.103801,-0.05348,-0.044178,-0.019088,-0.065333,-0.008835,-0.105232,-0.044104,-0.151279,-0.031509,-0.194354,-0.092949,-0.032124,-0.239507,-0.087585,-0.084047,-0.035908,-0.016628,-0.076968,-0.17257,-0.103304,-0.080379,-0.377813,-0.071751,-0.043358,-0.033659,-0.078848,-0.107173,-0.014428,-0.012624,-0.045345,-0.161651,-0.025575,-0.084964,-0.062249,-0.133669,-0.065833,-0.468635,-0.455284,-0.928991,-0.103064,-0.062699,-0.291208,-0.547236,-0.124004,-0.231267,-0.572048,-0.176135,-0.086733,-0.076281,-0.072886,-0.522963,-0.516781,-0.133215,0.983323,-0.442421,-0.444395,-0.351712,-0.235673,-0.443749,-0.461015,2.218375,-1.020852,-1.020765,-1.020127,-0.698001,-0.698343,-0.698841,0.091853,0.004154,0.039259,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,-0.361658,-0.213734,-0.06739,-0.199466,-0.058766,-0.070987,-0.269947,-0.30862,-0.155837,-0.885565,-0.602452,-0.600864,-0.59305,-0.534146,-0.533388,-0.523697,-0.245215,0.468697,-0.717914,0.664531,-0.625259,-0.499013,-0.577538,-1.265685,-1.388135,1.388176,-0.003607,-0.28919,-0.288724,-0.283095,0.892535,0.377339,0.307263,0.173129,-0.191639,-0.32048,-0.275663,-0.368513,-0.365348,-0.355991,-1.037599,-0.08734,-0.72294,-0.72069,-0.709204,-0.767499,-0.990729,-1.527258,0.043245,-0.004771,-0.062669,-0.00255,-0.059477,-0.054269,-0.034802,-0.100138,-0.016332,-0.090534,-0.024402,-0.006502,-0.022529,-0.018305,-1.564786,-0.009017,-0.123882,-0.310738,-0.013853,3.359858,-0.062538,0.001803,-0.744581,-0.743018,-0.740798,-0.525834,-0.524829,-0.522462,0.650984,-0.136428,-0.562089,-0.202211,-0.011406,-0.016726,-0.373926,2.694436,-0.353932,-0.334584,-0.314523,-0.296432,-0.266452,-0.225656,-0.174123,-0.11257,-0.031509,-0.062486,-0.036315,-0.022091,-0.011548,-0.063371,-0.082723,-0.109417,-0.13916,-0.174934,-0.227457,-0.312659,0.650984,-0.136428,-0.562089,-0.202211,-0.440702,-0.440853,-0.431913,2.137601,-0.205869,-0.455166,-0.455213,-0.45302,-0.564727,-0.56318,-0.555204,0.324395,-0.324395,-0.0231,-0.567262,-0.185945,-0.112095,0.638808,-0.327423,-1.329812,-0.262121,2.401419,-0.00255,-0.234957,-0.060515,0.356313,-0.194264,-0.092646,-0.126998,-0.225179,-0.005703,-0.550997,-0.004032,-0.468573,-0.275564,-0.007651,-0.008459,0.967876,-0.103625,-0.387514,-0.029699,-0.053142,-0.076087,-0.195944,0.487175,-0.100846,-0.099729,-0.095487,-0.261343,-0.258124,-0.247602,-0.591031,-0.589187,-0.181557,-0.12395,-0.140418,3.186507,-0.253753,-0.042827,-0.196033,-0.041394,-0.467664,-0.082782,-0.27329,-0.16898,-0.093269,-0.049479,-0.341411,-0.065283,-0.149481,-0.066354,-0.037377,-0.089691,-0.090662,-0.140875,-0.188513,-0.532794,-0.02909,-0.149481,-0.035128,-0.055668,-0.042713,-0.18713,-0.056136,-0.098552,-0.058225,-0.01883,-0.094187,-0.034661,-0.014762,-0.038621,-0.103801,-0.05348,-0.044178,-0.019088,-0.065333,-0.008835,-0.105232,-0.044104,-0.151279,-0.031509,-0.194354,-0.092949,-0.032124,-0.239507,-0.087585,-0.084047,-0.035908,60.139641,-0.076968,-0.17257,-0.103304,-0.080379,-0.377813,-0.071751,-0.043358,-0.033659,-0.078848,-0.107173,-0.014428,-0.012624,-0.045345,-0.161651,-0.025575,-0.084964,-0.062249,-0.133669,-0.065833,-0.468635,-0.455284,0.56357,-0.103064,-0.062699,-0.291208,1.827367,-0.124004,-0.231267,-0.572048,-0.176135,-0.086733,-0.076281,-0.072886,-0.522963,-0.516781,-0.133215,0.983323,-0.442421,-0.444395,-0.351712,-0.235673,2.253525,-0.461015,-0.45078,-1.020852,-1.020765,-1.020127,-0.698001,-0.698343,-0.698841,0.046191,-0.010885,0.063702,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


Unnamed: 0,AMT_ANNUITY,AMT_CREDIT,AMT_GOODS_PRICE,AMT_INCOME_TOTAL,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_YEAR,APARTMENTS_AVG,APARTMENTS_MEDI,APARTMENTS_MODE,BASEMENTAREA_AVG,BASEMENTAREA_MEDI,BASEMENTAREA_MODE,BOOL_FLAG_EMAIL,BOOL_FLAG_EMP_PHONE,BOOL_FLAG_OWN_CAR,BOOL_FLAG_OWN_REALTY,BOOL_FLAG_PHONE,BOOL_FLAG_WORK_PHONE,CNT_CHILDREN,CNT_FAM_MEMBERS,CODE_GENDER_F,CODE_GENDER_M,CODE_GENDER_XNA,COMMONAREA_AVG,COMMONAREA_MEDI,COMMONAREA_MODE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_ID_PUBLISH,DAYS_LAST_PHONE_CHANGE,DAYS_REGISTRATION,DEF_30_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,ELEVATORS_AVG,ELEVATORS_MEDI,ELEVATORS_MODE,EMERGENCYSTATE_MODE_No,EMERGENCYSTATE_MODE_Yes,ENTRANCES_AVG,ENTRANCES_MEDI,ENTRANCES_MODE,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,FLAG_CONT_MOBILE,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_2,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_MOBIL,FLOORSMAX_AVG,FLOORSMAX_MEDI,FLOORSMAX_MODE,FLOORSMIN_AVG,FLOORSMIN_MEDI,FLOORSMIN_MODE,FONDKAPREMONT_MODE_not specified,FONDKAPREMONT_MODE_org spec account,FONDKAPREMONT_MODE_reg oper account,FONDKAPREMONT_MODE_reg oper spec account,HOUR_APPR_PROCESS_START_0,HOUR_APPR_PROCESS_START_1,HOUR_APPR_PROCESS_START_10,HOUR_APPR_PROCESS_START_11,HOUR_APPR_PROCESS_START_12,HOUR_APPR_PROCESS_START_13,HOUR_APPR_PROCESS_START_14,HOUR_APPR_PROCESS_START_15,HOUR_APPR_PROCESS_START_16,HOUR_APPR_PROCESS_START_17,HOUR_APPR_PROCESS_START_18,HOUR_APPR_PROCESS_START_19,HOUR_APPR_PROCESS_START_2,HOUR_APPR_PROCESS_START_20,HOUR_APPR_PROCESS_START_21,HOUR_APPR_PROCESS_START_22,HOUR_APPR_PROCESS_START_23,HOUR_APPR_PROCESS_START_3,HOUR_APPR_PROCESS_START_4,HOUR_APPR_PROCESS_START_5,HOUR_APPR_PROCESS_START_6,HOUR_APPR_PROCESS_START_7,HOUR_APPR_PROCESS_START_8,HOUR_APPR_PROCESS_START_9,HOUSETYPE_MODE_not specified,HOUSETYPE_MODE_org spec account,HOUSETYPE_MODE_reg oper account,HOUSETYPE_MODE_reg oper spec account,LANDAREA_AVG,LANDAREA_MEDI,LANDAREA_MODE,LIVE_CITY_NOT_WORK_CITY,LIVE_REGION_NOT_WORK_REGION,LIVINGAPARTMENTS_AVG,LIVINGAPARTMENTS_MEDI,LIVINGAPARTMENTS_MODE,LIVINGAREA_AVG,LIVINGAREA_MEDI,LIVINGAREA_MODE,NAME_CONTRACT_TYPE_Cash loans,NAME_CONTRACT_TYPE_Revolving loans,NAME_EDUCATION_TYPE_Academic degree,NAME_EDUCATION_TYPE_Higher education,NAME_EDUCATION_TYPE_Incomplete higher,NAME_EDUCATION_TYPE_Lower secondary,NAME_EDUCATION_TYPE_Secondary / secondary special,NAME_FAMILY_STATUS_Civil marriage,NAME_FAMILY_STATUS_Married,NAME_FAMILY_STATUS_Separated,NAME_FAMILY_STATUS_Single / not married,NAME_FAMILY_STATUS_Unknown,NAME_FAMILY_STATUS_Widow,NAME_HOUSING_TYPE_Co-op apartment,NAME_HOUSING_TYPE_House / apartment,NAME_HOUSING_TYPE_Municipal apartment,NAME_HOUSING_TYPE_Office apartment,NAME_HOUSING_TYPE_Rented apartment,NAME_HOUSING_TYPE_With parents,NAME_INCOME_TYPE_Businessman,NAME_INCOME_TYPE_Commercial associate,NAME_INCOME_TYPE_Maternity leave,NAME_INCOME_TYPE_Pensioner,NAME_INCOME_TYPE_State servant,NAME_INCOME_TYPE_Student,NAME_INCOME_TYPE_Unemployed,NAME_INCOME_TYPE_Working,NAME_TYPE_SUITE_Children,NAME_TYPE_SUITE_Family,NAME_TYPE_SUITE_Group of people,NAME_TYPE_SUITE_Other_A,NAME_TYPE_SUITE_Other_B,"NAME_TYPE_SUITE_Spouse, partner",NAME_TYPE_SUITE_Unaccompanied,NONLIVINGAPARTMENTS_AVG,NONLIVINGAPARTMENTS_MEDI,NONLIVINGAPARTMENTS_MODE,NONLIVINGAREA_AVG,NONLIVINGAREA_MEDI,NONLIVINGAREA_MODE,OBS_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,OCCUPATION_TYPE_Accountants,OCCUPATION_TYPE_Cleaning staff,OCCUPATION_TYPE_Cooking staff,OCCUPATION_TYPE_Core staff,OCCUPATION_TYPE_Drivers,OCCUPATION_TYPE_HR staff,OCCUPATION_TYPE_High skill tech staff,OCCUPATION_TYPE_IT staff,OCCUPATION_TYPE_Laborers,OCCUPATION_TYPE_Low-skill Laborers,OCCUPATION_TYPE_Managers,OCCUPATION_TYPE_Medicine staff,OCCUPATION_TYPE_Private service staff,OCCUPATION_TYPE_Realty agents,OCCUPATION_TYPE_Sales staff,OCCUPATION_TYPE_Secretaries,OCCUPATION_TYPE_Security staff,OCCUPATION_TYPE_Waiters/barmen staff,ORGANIZATION_TYPE_Advertising,ORGANIZATION_TYPE_Agriculture,ORGANIZATION_TYPE_Bank,ORGANIZATION_TYPE_Business Entity Type 1,ORGANIZATION_TYPE_Business Entity Type 2,ORGANIZATION_TYPE_Business Entity Type 3,ORGANIZATION_TYPE_Cleaning,ORGANIZATION_TYPE_Construction,ORGANIZATION_TYPE_Culture,ORGANIZATION_TYPE_Electricity,ORGANIZATION_TYPE_Emergency,ORGANIZATION_TYPE_Government,ORGANIZATION_TYPE_Hotel,ORGANIZATION_TYPE_Housing,ORGANIZATION_TYPE_Industry: type 1,ORGANIZATION_TYPE_Industry: type 10,ORGANIZATION_TYPE_Industry: type 11,ORGANIZATION_TYPE_Industry: type 12,ORGANIZATION_TYPE_Industry: type 13,ORGANIZATION_TYPE_Industry: type 2,ORGANIZATION_TYPE_Industry: type 3,ORGANIZATION_TYPE_Industry: type 4,ORGANIZATION_TYPE_Industry: type 5,ORGANIZATION_TYPE_Industry: type 6,ORGANIZATION_TYPE_Industry: type 7,ORGANIZATION_TYPE_Industry: type 8,ORGANIZATION_TYPE_Industry: type 9,ORGANIZATION_TYPE_Insurance,ORGANIZATION_TYPE_Kindergarten,ORGANIZATION_TYPE_Legal Services,ORGANIZATION_TYPE_Medicine,ORGANIZATION_TYPE_Military,ORGANIZATION_TYPE_Mobile,ORGANIZATION_TYPE_Other,ORGANIZATION_TYPE_Police,ORGANIZATION_TYPE_Postal,ORGANIZATION_TYPE_Realtor,ORGANIZATION_TYPE_Religion,ORGANIZATION_TYPE_Restaurant,ORGANIZATION_TYPE_School,ORGANIZATION_TYPE_Security,ORGANIZATION_TYPE_Security Ministries,ORGANIZATION_TYPE_Self-employed,ORGANIZATION_TYPE_Services,ORGANIZATION_TYPE_Telecom,ORGANIZATION_TYPE_Trade: type 1,ORGANIZATION_TYPE_Trade: type 2,ORGANIZATION_TYPE_Trade: type 3,ORGANIZATION_TYPE_Trade: type 4,ORGANIZATION_TYPE_Trade: type 5,ORGANIZATION_TYPE_Trade: type 6,ORGANIZATION_TYPE_Trade: type 7,ORGANIZATION_TYPE_Transport: type 1,ORGANIZATION_TYPE_Transport: type 2,ORGANIZATION_TYPE_Transport: type 3,ORGANIZATION_TYPE_Transport: type 4,ORGANIZATION_TYPE_University,ORGANIZATION_TYPE_XNA,OWN_CAR_AGE,REGION_POPULATION_RELATIVE,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,TOTALAREA_MODE,WALLSMATERIAL_MODE_Block,WALLSMATERIAL_MODE_Mixed,WALLSMATERIAL_MODE_Monolithic,WALLSMATERIAL_MODE_Others,WALLSMATERIAL_MODE_Panel,"WALLSMATERIAL_MODE_Stone, brick",WALLSMATERIAL_MODE_Wooden,WALLSMATERIAL_MODE_not specified,WEEKDAY_APPR_PROCESS_START_FRIDAY,WEEKDAY_APPR_PROCESS_START_MONDAY,WEEKDAY_APPR_PROCESS_START_SATURDAY,WEEKDAY_APPR_PROCESS_START_SUNDAY,WEEKDAY_APPR_PROCESS_START_THURSDAY,WEEKDAY_APPR_PROCESS_START_TUESDAY,WEEKDAY_APPR_PROCESS_START_WEDNESDAY,YEARS_BEGINEXPLUATATION_AVG,YEARS_BEGINEXPLUATATION_MEDI,YEARS_BEGINEXPLUATATION_MODE,YEARS_BUILD_AVG,YEARS_BUILD_MEDI,YEARS_BUILD_MODE,PER_CREDIT_INCOME,PER_GOODS_CREDIT,PER_ANNUITY_INCOME,NUM_BUREAU_REPORTS,SUM_BUREAU_BALANCES,SUM_CREDIT_CURRENCY_CODE_0,SUM_CREDIT_CURRENCY_CODE_1,SUM_CREDIT_CURRENCY_CODE_2,SUM_CREDIT_CURRENCY_CODE_3,SUM_CREDIT_ACTIVE_CODE_0,SUM_CREDIT_ACTIVE_CODE_1,SUM_CREDIT_ACTIVE_CODE_2,SUM_CREDIT_ACTIVE_CODE_3,SUM_CREDIT_TYPE_CODE_0,SUM_CREDIT_TYPE_CODE_1,SUM_CREDIT_TYPE_CODE_2,SUM_CREDIT_TYPE_CODE_3,SUM_CREDIT_TYPE_CODE_4,SUM_CREDIT_TYPE_CODE_5,SUM_CREDIT_TYPE_CODE_6,SUM_CREDIT_TYPE_CODE_7,SUM_CREDIT_TYPE_CODE_8,SUM_CREDIT_TYPE_CODE_9,SUM_CREDIT_TYPE_CODE_10,SUM_CREDIT_TYPE_CODE_11,SUM_CREDIT_TYPE_CODE_12,SUM_CREDIT_TYPE_CODE_13,SUM_CREDIT_TYPE_CODE_14,SUM_STATUS_0,SUM_STATUS_1,SUM_STATUS_2,SUM_STATUS_3,SUM_STATUS_4,SUM_STATUS_5,SUM_STATUS_C,SUM_STATUS_X,MED_DAYS_CREDIT,SUM_CREDIT_DAYS_OVERDUE,SUM_DAYS_CREDIT_ENDDATE,SUM_DAYS_ENDDATE_FACT,SUM_AMT_CREDIT_MAX_OVERDUE,SUM_CNT_CREDIT_PROLONG,SUM_AMT_CREDIT_SUM,SUM_AMT_CREDIT_SUM_DEBT,SUM_AMT_CREDIT_SUM_LIMIT,SUM_DAYS_CREDIT_UPDATE
0,-0.451692,-0.075097,-0.237829,-0.142533,-0.058766,-0.070987,-0.269947,-0.30862,-0.155837,-0.885565,0.084996,0.088601,0.115333,0.324811,0.329103,0.35883,4.07805,0.468697,-0.717914,0.664531,-0.625259,-0.499013,-0.577538,-0.167621,0.720391,-0.72037,-0.003607,-0.28919,-0.288724,-0.283095,0.734193,0.046492,-1.445696,0.939934,0.052196,-0.32048,-0.275663,-0.368513,-0.365348,-0.355991,0.963763,-0.08734,0.618104,0.620766,0.647424,1.8698,1.436908,-0.932428,0.043245,-0.004771,-0.062669,-0.00255,-0.059477,-0.054269,-0.034802,-0.100138,-0.016332,-0.090534,-0.024402,-0.006502,-0.022529,-0.018305,0.639065,-0.009017,-0.123882,-0.310738,-0.013853,-0.297632,-0.062538,0.001803,0.074128,0.075366,0.088286,-0.525834,-0.524829,-0.522462,0.650984,-0.136428,-0.562089,-0.202211,-0.011406,-0.016726,-0.373926,-0.371135,-0.353932,-0.334584,-0.314523,-0.296432,-0.266452,-0.225656,5.743072,-0.11257,-0.031509,-0.062486,-0.036315,-0.022091,-0.011548,-0.063371,-0.082723,-0.109417,-0.13916,-0.174934,-0.227457,-0.312659,0.650984,-0.136428,-0.562089,-0.202211,-0.440702,-0.440853,-0.431913,-0.467814,-0.205869,-0.455166,-0.455213,-0.45302,-0.031586,-0.028042,-0.001921,0.324395,-0.324395,-0.0231,1.762853,-0.185945,-0.112095,-1.565416,-0.327423,0.751986,-0.262121,-0.416421,-0.00255,-0.234957,-0.060515,0.356313,-0.194264,-0.092646,-0.126998,-0.225179,-0.005703,-0.550997,-0.004032,-0.468573,-0.275564,-0.007651,-0.008459,0.967876,-0.103625,-0.387514,-0.029699,-0.053142,-0.076087,-0.195944,0.487175,-0.100846,-0.099729,-0.095487,-0.261343,-0.258124,-0.247602,-0.591031,-0.589187,-0.181557,-0.12395,-0.140418,-0.313823,-0.253753,-0.042827,-0.196033,-0.041394,-0.467664,-0.082782,-0.27329,-0.16898,-0.093269,-0.049479,-0.341411,-0.065283,-0.149481,-0.066354,-0.037377,-0.089691,-0.090662,-0.140875,-0.188513,-0.532794,-0.02909,-0.149481,-0.035128,-0.055668,-0.042713,-0.18713,-0.056136,-0.098552,-0.058225,-0.01883,-0.094187,-0.034661,-0.014762,-0.038621,-0.103801,-0.05348,-0.044178,-0.019088,-0.065333,-0.008835,-0.105232,-0.044104,6.610323,-0.031509,-0.194354,-0.092949,-0.032124,-0.239507,-0.087585,-0.084047,-0.035908,-0.016628,-0.076968,-0.17257,-0.103304,-0.080379,-0.377813,-0.071751,-0.043358,-0.033659,-0.078848,-0.107173,-0.014428,-0.012624,-0.045345,-0.161651,-0.025575,-0.084964,-0.062249,-0.133669,-0.065833,-0.468635,-0.455284,-0.14591,-0.103064,-0.062699,-0.291208,-0.547236,-0.124004,-0.231267,-0.149338,-0.176135,-0.086733,-0.076281,-0.072886,-0.522963,1.935056,-0.133215,-1.016959,-0.442421,-0.444395,-0.351712,-0.235673,-0.443749,2.169126,-0.45078,0.963017,0.962899,0.963691,-0.698001,-0.698343,-0.698841,1.959564,1.082492,3.134419,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,-0.671811,-0.934825,-0.968283,-0.294354,-0.058766,-0.070987,-0.269947,-0.30862,-0.155837,0.730973,-0.602452,-0.600864,-0.59305,-0.534146,-0.533388,-0.523697,-0.245215,0.468697,-0.717914,0.664531,-0.625259,-0.499013,-0.577538,-0.167621,-1.388135,1.388176,-0.003607,-0.28919,-0.288724,-0.283095,0.464485,1.045097,-0.908413,-1.164546,1.17287,-0.32048,-0.275663,-0.368513,-0.365348,-0.355991,-1.037599,-0.08734,-0.72294,-0.72069,-0.709204,1.21233,-1.152337,0.087207,0.043245,-0.004771,-0.062669,-0.00255,-0.059477,-0.054269,-0.034802,-0.100138,-0.016332,-0.090534,-0.024402,-0.006502,-0.022529,-0.018305,0.639065,-0.009017,-0.123882,-0.310738,-0.013853,-0.297632,-0.062538,0.001803,-0.744581,-0.743018,-0.740798,-0.525834,-0.524829,-0.522462,0.650984,-0.136428,-0.562089,-0.202211,-0.011406,-0.016726,-0.373926,-0.371135,-0.353932,-0.334584,-0.314523,-0.296432,-0.266452,-0.225656,-0.174123,-0.11257,-0.031509,-0.062486,-0.036315,-0.022091,-0.011548,-0.063371,-0.082723,-0.109417,-0.13916,-0.174934,-0.227457,3.198372,0.650984,-0.136428,-0.562089,-0.202211,-0.440702,-0.440853,-0.431913,-0.467814,-0.205869,-0.455166,-0.455213,-0.45302,-0.564727,-0.56318,-0.555204,0.324395,-0.324395,-0.0231,-0.567262,-0.185945,-0.112095,0.638808,-0.327423,0.751986,-0.262121,-0.416421,-0.00255,-0.234957,-0.060515,0.356313,-0.194264,-0.092646,-0.126998,-0.225179,-0.005703,-0.550997,-0.004032,-0.468573,-0.275564,-0.007651,-0.008459,0.967876,-0.103625,-0.387514,-0.029699,-0.053142,-0.076087,-0.195944,0.487175,-0.100846,-0.099729,-0.095487,-0.261343,-0.258124,-0.247602,-0.591031,-0.589187,-0.181557,-0.12395,-0.140418,-0.313823,-0.253753,-0.042827,-0.196033,-0.041394,-0.467664,12.079882,-0.27329,-0.16898,-0.093269,-0.049479,-0.341411,-0.065283,-0.149481,-0.066354,-0.037377,-0.089691,-0.090662,-0.140875,-0.188513,-0.532794,-0.02909,-0.149481,-0.035128,-0.055668,-0.042713,-0.18713,-0.056136,-0.098552,-0.058225,-0.01883,-0.094187,-0.034661,-0.014762,-0.038621,-0.103801,-0.05348,-0.044178,-0.019088,-0.065333,-0.008835,-0.105232,-0.044104,-0.151279,-0.031509,-0.194354,-0.092949,-0.032124,-0.239507,-0.087585,-0.084047,-0.035908,-0.016628,-0.076968,-0.17257,-0.103304,-0.080379,2.646809,-0.071751,-0.043358,-0.033659,-0.078848,-0.107173,-0.014428,-0.012624,-0.045345,-0.161651,-0.025575,-0.084964,-0.062249,-0.133669,-0.065833,-0.468635,-0.455284,1.078997,-0.103064,-0.062699,-0.291208,-0.547236,-0.124004,-0.231267,-0.572048,-0.176135,-0.086733,-0.076281,-0.072886,-0.522963,-0.516781,-0.133215,0.983323,2.260293,-0.444395,-0.351712,-0.235673,-0.443749,-0.461015,-0.45078,-1.020852,-1.020765,-1.020127,-0.698001,-0.698343,-0.698841,2.589443,0.995877,2.847896,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,2.943856,0.159601,0.24914,0.142129,-0.058766,-0.070987,-0.269947,1.035362,-0.155837,1.269819,-0.602452,-0.600864,-0.59305,-0.534146,-0.533388,-0.523697,-0.245215,0.468697,1.392925,0.664531,-0.625259,-0.499013,-0.577538,-0.167621,-1.388135,1.388176,-0.003607,-0.28919,-0.288724,-0.283095,0.916824,1.039964,0.337075,-0.129239,-0.797961,-0.32048,-0.275663,-0.368513,-0.365348,-0.355991,-1.037599,-0.08734,-0.72294,-0.72069,-0.709204,-0.767499,0.96966,0.75106,0.043245,-0.004771,-0.062669,-0.00255,-0.059477,-0.054269,-0.034802,-0.100138,-0.016332,-0.090534,-0.024402,-0.006502,-0.022529,-0.018305,-1.564786,-0.009017,-0.123882,-0.310738,-0.013853,3.359858,-0.062538,0.001803,-0.744581,-0.743018,-0.740798,-0.525834,-0.524829,-0.522462,0.650984,-0.136428,-0.562089,-0.202211,-0.011406,-0.016726,-0.373926,-0.371135,-0.353932,-0.334584,3.179418,-0.296432,-0.266452,-0.225656,-0.174123,-0.11257,-0.031509,-0.062486,-0.036315,-0.022091,-0.011548,-0.063371,-0.082723,-0.109417,-0.13916,-0.174934,-0.227457,-0.312659,0.650984,-0.136428,-0.562089,-0.202211,-0.440702,-0.440853,-0.431913,-0.467814,-0.205869,-0.455166,-0.455213,-0.45302,-0.564727,-0.56318,-0.555204,0.324395,-0.324395,-0.0231,1.762853,-0.185945,-0.112095,-1.565416,-0.327423,0.751986,-0.262121,-0.416421,-0.00255,-0.234957,-0.060515,0.356313,-0.194264,-0.092646,-0.126998,-0.225179,-0.005703,-0.550997,-0.004032,-0.468573,-0.275564,-0.007651,-0.008459,0.967876,-0.103625,-0.387514,-0.029699,-0.053142,-0.076087,-0.195944,-2.052651,-0.100846,-0.099729,-0.095487,-0.261343,-0.258124,-0.247602,-0.591031,-0.589187,-0.181557,-0.12395,-0.140418,-0.313823,3.940835,-0.042827,-0.196033,-0.041394,-0.467664,-0.082782,-0.27329,-0.16898,-0.093269,-0.049479,-0.341411,-0.065283,-0.149481,-0.066354,-0.037377,-0.089691,-0.090662,-0.140875,-0.188513,-0.532794,-0.02909,-0.149481,-0.035128,-0.055668,-0.042713,-0.18713,-0.056136,-0.098552,-0.058225,-0.01883,-0.094187,-0.034661,-0.014762,-0.038621,-0.103801,-0.05348,-0.044178,-0.019088,-0.065333,-0.008835,-0.105232,-0.044104,-0.151279,-0.031509,-0.194354,-0.092949,-0.032124,-0.239507,-0.087585,-0.084047,-0.035908,-0.016628,-0.076968,-0.17257,-0.103304,-0.080379,-0.377813,-0.071751,-0.043358,-0.033659,-0.078848,-0.107173,-0.014428,-0.012624,-0.045345,-0.161651,-0.025575,-0.084964,16.064424,-0.133669,-0.065833,-0.468635,0.099686,-0.127762,-0.103064,-0.062699,-0.291208,-0.547236,-0.124004,-0.231267,-0.572048,-0.176135,-0.086733,-0.076281,-0.072886,-0.522963,-0.516781,-0.133215,0.983323,-0.442421,2.250251,-0.351712,-0.235673,-0.443749,-0.461015,-0.45078,-1.020852,-1.020765,-1.020127,-0.698001,-0.698343,-0.698841,2.667978,0.891096,-2.353927,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,1.511684,2.42484,2.805729,0.616567,-0.058766,-0.070987,-0.269947,-0.30862,-0.155837,0.730973,2.576474,2.588689,2.684276,2.33972,2.352302,2.431038,-0.245215,0.468697,-0.717914,0.664531,1.599337,-0.499013,2.192084,2.028507,0.720391,-0.72037,-0.003607,2.216957,2.232013,2.313723,-0.472274,-0.169561,0.804133,1.018549,-0.847636,-0.32048,-0.275663,2.829962,2.840733,2.940974,0.963763,-0.08734,1.960122,1.963195,2.005034,1.074769,-0.018778,0.757447,0.043245,-0.004771,-0.062669,-0.00255,-0.059477,-0.054269,-0.034802,-0.100138,-0.016332,-0.090534,-0.024402,-0.006502,-0.022529,-0.018305,0.639065,-0.009017,-0.123882,-0.310738,-0.013853,-0.297632,-0.062538,0.001803,1.711545,1.712134,1.746454,-0.231733,-0.23095,-0.225333,-1.536135,-0.136428,1.779079,-0.202211,-0.011406,-0.016726,-0.373926,2.694436,-0.353932,-0.334584,-0.314523,-0.296432,-0.266452,-0.225656,-0.174123,-0.11257,-0.031509,-0.062486,-0.036315,-0.022091,-0.011548,-0.063371,-0.082723,-0.109417,-0.13916,-0.174934,-0.227457,-0.312659,-1.536135,-0.136428,1.779079,-0.202211,2.898903,2.9165,2.987328,-0.467814,-0.205869,2.976039,2.995899,3.105407,3.312953,3.329587,3.470298,0.324395,-0.324395,-0.0231,-0.567262,-0.185945,-0.112095,0.638808,-0.327423,0.751986,-0.262121,-0.416421,-0.00255,-0.234957,-0.060515,0.356313,-0.194264,-0.092646,-0.126998,-0.225179,-0.005703,-0.550997,-0.004032,-0.468573,-0.275564,-0.007651,-0.008459,0.967876,-0.103625,-0.387514,-0.029699,-0.053142,-0.076087,-0.195944,0.487175,1.344864,1.363567,1.409118,1.383595,1.408237,1.483945,-0.591031,-0.589187,-0.181557,-0.12395,-0.140418,-0.313823,-0.253753,-0.042827,-0.196033,-0.041394,-0.467664,-0.082782,-0.27329,-0.16898,-0.093269,-0.049479,2.929025,-0.065283,-0.149481,-0.066354,-0.037377,-0.089691,-0.090662,-0.140875,-0.188513,1.8769,-0.02909,-0.149481,-0.035128,-0.055668,-0.042713,-0.18713,-0.056136,-0.098552,-0.058225,-0.01883,-0.094187,-0.034661,-0.014762,-0.038621,-0.103801,-0.05348,-0.044178,-0.019088,-0.065333,-0.008835,-0.105232,-0.044104,-0.151279,-0.031509,-0.194354,-0.092949,-0.032124,-0.239507,-0.087585,-0.084047,-0.035908,-0.016628,-0.076968,-0.17257,-0.103304,-0.080379,-0.377813,-0.071751,-0.043358,-0.033659,-0.078848,-0.107173,-0.014428,-0.012624,-0.045345,-0.161651,-0.025575,-0.084964,-0.062249,-0.133669,-0.065833,-0.468635,-0.455284,0.399377,-0.103064,-0.062699,-0.291208,-0.547236,-0.124004,-0.231267,3.417812,-0.176135,-0.086733,-0.076281,-0.072886,1.912181,-0.516781,-0.133215,-1.016959,-0.442421,-0.444395,-0.351712,-0.235673,-0.443749,-0.461015,2.218375,1.011533,1.011411,1.012206,1.957837,1.948663,1.939503,57.203297,1.404329,-19.56276,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,0.342165,0.065776,0.236966,0.047242,-0.058766,-0.070987,-0.269947,-0.30862,-0.155837,-0.885565,-0.602452,-0.600864,-0.59305,-0.534146,-0.533388,-0.523697,-0.245215,0.468697,1.392925,-1.50482,-0.625259,2.003956,0.807273,0.930443,-1.388135,1.388176,-0.003607,-0.28919,-0.288724,-0.283095,-0.686757,-0.017904,0.839908,-0.17157,-0.279919,-0.32048,-0.275663,-0.368513,-0.365348,-0.355991,-1.037599,-0.08734,-0.72294,-0.72069,-0.709204,-0.059146,-0.455466,-1.527258,0.043245,-0.004771,-0.062669,-0.00255,-0.059477,-0.054269,-0.034802,-0.100138,-0.016332,-0.090534,-0.024402,-0.006502,-0.022529,-0.018305,0.639065,-0.009017,-0.123882,-0.310738,-0.013853,-0.297632,-0.062538,0.001803,-0.744581,-0.743018,-0.740798,-0.525834,-0.524829,-0.522462,0.650984,-0.136428,-0.562089,-0.202211,-0.011406,-0.016726,-0.373926,-0.371135,-0.353932,-0.334584,-0.314523,-0.296432,-0.266452,-0.225656,-0.174123,-0.11257,-0.031509,-0.062486,-0.036315,-0.022091,-0.011548,-0.063371,-0.082723,9.139337,-0.13916,-0.174934,-0.227457,-0.312659,0.650984,-0.136428,-0.562089,-0.202211,-0.440702,-0.440853,-0.431913,2.137601,-0.205869,-0.455166,-0.455213,-0.45302,-0.564727,-0.56318,-0.555204,0.324395,-0.324395,-0.0231,-0.567262,-0.185945,-0.112095,0.638808,-0.327423,0.751986,-0.262121,-0.416421,-0.00255,-0.234957,-0.060515,0.356313,-0.194264,-0.092646,-0.126998,-0.225179,-0.005703,-0.550997,-0.004032,-0.468573,-0.275564,-0.007651,-0.008459,0.967876,-0.103625,-0.387514,-0.029699,-0.053142,-0.076087,-0.195944,0.487175,-0.100846,-0.099729,-0.095487,-0.261343,-0.258124,-0.247602,-0.591031,-0.589187,-0.181557,-0.12395,-0.140418,-0.313823,-0.253753,-0.042827,-0.196033,-0.041394,-0.467664,-0.082782,-0.27329,-0.16898,-0.093269,-0.049479,-0.341411,-0.065283,-0.149481,-0.066354,-0.037377,-0.089691,-0.090662,-0.140875,-0.188513,1.8769,-0.02909,-0.149481,-0.035128,-0.055668,-0.042713,-0.18713,-0.056136,-0.098552,-0.058225,-0.01883,-0.094187,-0.034661,-0.014762,-0.038621,-0.103801,-0.05348,-0.044178,-0.019088,-0.065333,-0.008835,-0.105232,-0.044104,-0.151279,-0.031509,-0.194354,-0.092949,-0.032124,-0.239507,-0.087585,-0.084047,-0.035908,-0.016628,-0.076968,-0.17257,-0.103304,-0.080379,-0.377813,-0.071751,-0.043358,-0.033659,-0.078848,-0.107173,-0.014428,-0.012624,-0.045345,-0.161651,-0.025575,-0.084964,-0.062249,-0.133669,-0.065833,-0.468635,1.32062,-0.783451,-0.103064,-0.062699,-0.291208,1.827367,-0.124004,-0.231267,-0.572048,-0.176135,-0.086733,-0.076281,-0.072886,-0.522963,-0.516781,-0.133215,0.983323,2.260293,-0.444395,-0.351712,-0.235673,-0.443749,-0.461015,-0.45078,-1.020852,-1.020765,-1.020127,-0.698001,-0.698343,-0.698841,2.378808,0.836596,2.773216,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [5]:
# Load the list of features indentified in the exploration step which need preprocessing
#non_numeric_features = pd.read_csv('data/tmp/bureau_non_numeric_features.csv', header=0, index_col=0, names=["feature"])
non_numeric_features = []
numeric_features = pd.read_csv('data/tmp/bureau_numeric_features.csv', header=0, index_col=0, names=["feature"])
#string_to_bool_features = pd.read_csv('data/tmp/bureau_string_to_bool.csv', header=0, index_col=0, names=["feature"])
string_to_bool_features = []
log_transform_features = pd.read_csv('data/tmp/bureau_log_transform.csv', header=0, index_col=0, names=["feature"])

In [6]:
display(numeric_features.head())

Unnamed: 0,feature
0,NUM_BUREAU_REPORTS
1,SUM_BUREAU_BALANCES
2,SUM_CREDIT_CURRENCY_CODE_0
3,SUM_CREDIT_CURRENCY_CODE_1
4,SUM_CREDIT_CURRENCY_CODE_2


In [7]:
print("Non-Numeric {0}, Numeric {1}, Bool {2}, Log Transform {3}".format(\
        len(non_numeric_features), len(numeric_features), \
        len(string_to_bool_features), len(log_transform_features)))

Non-Numeric 0, Numeric 42, Bool 0, Log Transform 1


In [8]:
'''
Scales a list of numeric features to a range of [0 .. 1] without changing the distribution of the data.  
Accepts a List of column names.  Returns a new dataFrame.
''' 
def scale_features(dataFrame, featureList): 
    dataFrame_transform = pd.DataFrame(data = dataFrame)
    dataFrame_transform[featureList] = scaler.fit_transform(dataFrame_transform[featureList])
    return dataFrame_transform

In [9]:
''' 
Applies an in-place transformation that converts a Y/N field to binary 1/0
'''
def make_bool(dataFrame, featureName): 
    
    lb = LabelBinarizer()
    bool_name = "BOOL_{0}".format(featureName)    
    dataFrame[bool_name] = lb.fit_transform(dataFrame[featureName])
    
    # This seems to work more consistently than df.drop
    # From: https://stackoverflow.com/questions/43838198/df-drop-is-not-working
    del(dataFrame[featureName])    
    #result = dataFrame.drop(featureName, axis=1) 
    
    return dataFrame

In [10]:
''' 
Applies an in-place transformation that converts NaN, Inf and -Inf to numeric values
'''
def make_numeric(dataFrame, featureName): 
        
    return np.nan_to_num(dataFrame[featureName])

In [11]:
'''
Applies an in-place abs() to numeric features
'''
def abs_transform(dataFrame, featureName):     
    
    transformed = dataFrame[featureName].apply(lambda x: abs(x))
        
    dataFrame[featureName] = transformed
    
    return dataFrame

In [12]:
'''
Applies an in-place log transformation to numeric features
'''
def log_transform(dataFrame, featureName):     
    
    transformed = dataFrame[featureName].apply(lambda x: np.log(x + 1))
    
    transformed_name = "LOG_{0}".format(featureName)

    print("Transformed Name: {0}".format(transformed_name))
    
    dataFrame[transformed_name] = transformed

    # This seems to work more consistently than df.drop
    # From: https://stackoverflow.com/questions/43838198/df-drop-is-not-working
    del(dataFrame[featureName])
    #result = dataFrame.drop(featureName, axis=1)
    
    return dataFrame

In [13]:
'''
Applies an in-place transformation that offsets all values in a column by the minimum value in the column
'''

def find_offset(dataFrames, featureName): 
    
    offset = 0
    
    for df in dataFrames: 
        if (np.min(df[featureName]) < offset): 
            offset = np.min(df[featureName])

    return abs(offset) + 1
            
            
def offset_negative_values(dataFrame, featureName, offset): 
    print("Offsetting Feature {0} by {1}".format(featureName, offset))
    return dataFrame[featureName].apply(lambda x: (x + offset)), offset

In [14]:
# Tukey's Method for detecting outliers
# Adapted from the customer segments exercise

def return_outliers(dataFrame, featureList): 
    
    outliers = []
    count = 0
    
    frame_stats = []

    for feature_count, feature in enumerate(featureList): 

        feature_stats = {}

        dataFrame[feature] = np.nan_to_num(dataFrame[feature])

        # Calculate Q1 (25th percentile of the data) for the given feature
        Q1 = np.percentile(dataFrame[feature], 25)

        # Calculate Q3 (75th percentile of the data) for the given feature
        Q3 = np.percentile(dataFrame[feature], 75)

        # TODO: Use the interquartile range to calculate an outlier step
        step = (Q3 - Q1) * 1.5

        feature_stats['Q1_percentile'] = Q1
        feature_stats['Q3_percentile'] = Q3
        feature_stats['step'] = step
        feature_stats['feature'] = feature
        feature_stats['feature_min'] = np.min(dataFrame[feature])
        feature_stats['feature_max'] = np.max(dataFrame[feature])
        feature_stats['feature_median'] = np.median(dataFrame[feature])

        # Calculate the median of the interquartile range
        # TODO: This just sounds good intuitively.  It's probably worth researching the actual best practice.
        feature_stats['IQ_median'] = np.median([Q1, Q3])

        # Save a list of the outliers
        feature_stats['outliers'] = dataFrame.index[~((dataFrame[feature] >= Q1 - step) & (dataFrame[feature] <= Q3 + step))]

        # Just keep a count of the number of fields we identified
        count += len(feature_stats['outliers'])

        frame_stats.append(feature_stats)

    outliers.append(frame_stats)   
        
    print("Total Outliers Identified: {0}".format(count))
        
    return outliers

In [15]:
def replace_outliers(dataFrame, outliers):
    
    count = 0
    
    for ol in outliers: 

        for feature_count, feature in enumerate(ol): 

            # Some features have infrequent but meaningful values, and averaging to 0 would be a problem.
            # We'll just skip these when they come up.
            # example: "How many credit applications were made in the past hour?" 
            if(feature['IQ_median'] >= 1): 

                featureName = feature['feature']
                print("Processing feature {0}".format(featureName))

                '''
                # Perhaps we can one-shot these?
                median = feature['IQ_median']

                try: 
                    frame.iloc[feature_count, feature['outliers']] = median

                    #df.feature_a.iloc[[1, 3, 15]] = 88

                    print("Replacing {2} values at Feature {0} with {1}".format(featureName, median, len(feature['outliers'])))

                except Exception as e: 
                     print("Skipped Feature {0} because {1}".format(featureName, str(e)))

                '''

                for out_index in feature['outliers']: 

                    try: 
                        median = feature['IQ_median']
                        value = dataFrame.iloc[feature_count, out_index:out_index]

                        # Correct way to set value on the dataframe
                        # Per https://pandas.pydata.org/pandas-docs/version/0.21/indexing.html#indexing-label

                        dataFrame.iloc[feature_count, out_index:out_index] = median

                        #print("Replacing value {2} at index {0} with {1}".format(out_index, median, value))
                    except Exception as e:
                        print("Skipped Record at {1} Index {0} because {2}".format(out_index, featureName, str(e)))

                    # Keep track of the number of values we modified
                    count += 1

    print("Total Outliers Modified: {0}".format(count))
    return dataFrame

## Preprocessing

### Log Transform Skewed Numeric Features

In [19]:
application_train = abs_transform(application_train, 'SUM_DAYS_ENDDATE_FACT')
application_test = abs_transform(application_test, 'SUM_DAYS_ENDDATE_FACT')

application_train = abs_transform(application_train, 'SUM_DAYS_CREDIT_ENDDATE')
application_test = abs_transform(application_test, 'SUM_DAYS_CREDIT_ENDDATE')

application_train = abs_transform(application_train, 'SUM_DAYS_CREDIT_UPDATE')
application_test = abs_transform(application_test, 'SUM_DAYS_CREDIT_UPDATE')

In [20]:
''' 
# Skipping Log transformations for the time being... 

# Ensure all values are numeric, then log transform them
for feature in log_transform_features['feature']: 
        application_train[feature] = make_numeric(application_train, feature)
    
        print("Performing Log Transformations on: {0}".format(feature))
        application_train = log_transform(application_train, feature)
'''

' \n# Skipping Log transformations for the time being... \n\n# Ensure all values are numeric, then log transform them\nfor feature in log_transform_features[\'feature\']: \n        application_train[feature] = make_numeric(application_train, feature)\n    \n        print("Performing Log Transformations on: {0}".format(feature))\n        application_train = log_transform(application_train, feature)\n'

### Remove Non-Numeric Values from Numeric Fields

In [21]:
numeric = list(application_test.select_dtypes(include=[np.number]).columns.values)

# Apply the np.nan_to_num transformation
for feature in numeric: 
    application_train[feature] = make_numeric(application_train, feature)
    application_test[feature] = make_numeric(application_test, feature)

### Scale Numeric Fields

In [23]:
# Initialize a scaler, then apply it to the features
scaler = StandardScaler()

# Technique for selecting numeric fields from: 
# https://stackoverflow.com/questions/25039626/how-do-i-find-numeric-columns-in-pandas
numeric = list(application_test.select_dtypes(include=[np.number]).columns.values)

scaler = scaler.fit(application_train)

In [24]:
application_train[numeric] = scaler.transform(application_train[numeric])
application_test[numeric] = scaler.transform(application_test[numeric])

### Preview
Show our transformed dataset

In [25]:
# Show an example of a record with scaling applied
display(application_train.head(n = 5))

Unnamed: 0,AMT_ANNUITY,AMT_CREDIT,AMT_GOODS_PRICE,AMT_INCOME_TOTAL,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_YEAR,APARTMENTS_AVG,APARTMENTS_MEDI,APARTMENTS_MODE,BASEMENTAREA_AVG,BASEMENTAREA_MEDI,BASEMENTAREA_MODE,BOOL_FLAG_EMAIL,BOOL_FLAG_EMP_PHONE,BOOL_FLAG_OWN_CAR,BOOL_FLAG_OWN_REALTY,BOOL_FLAG_PHONE,BOOL_FLAG_WORK_PHONE,CNT_CHILDREN,CNT_FAM_MEMBERS,CODE_GENDER_F,CODE_GENDER_M,CODE_GENDER_XNA,COMMONAREA_AVG,COMMONAREA_MEDI,COMMONAREA_MODE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_ID_PUBLISH,DAYS_LAST_PHONE_CHANGE,DAYS_REGISTRATION,DEF_30_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,ELEVATORS_AVG,ELEVATORS_MEDI,ELEVATORS_MODE,EMERGENCYSTATE_MODE_No,EMERGENCYSTATE_MODE_Yes,ENTRANCES_AVG,ENTRANCES_MEDI,ENTRANCES_MODE,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,FLAG_CONT_MOBILE,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_2,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_MOBIL,FLOORSMAX_AVG,FLOORSMAX_MEDI,FLOORSMAX_MODE,FLOORSMIN_AVG,FLOORSMIN_MEDI,FLOORSMIN_MODE,FONDKAPREMONT_MODE_not specified,FONDKAPREMONT_MODE_org spec account,FONDKAPREMONT_MODE_reg oper account,FONDKAPREMONT_MODE_reg oper spec account,HOUR_APPR_PROCESS_START_0,HOUR_APPR_PROCESS_START_1,HOUR_APPR_PROCESS_START_10,HOUR_APPR_PROCESS_START_11,HOUR_APPR_PROCESS_START_12,HOUR_APPR_PROCESS_START_13,HOUR_APPR_PROCESS_START_14,HOUR_APPR_PROCESS_START_15,HOUR_APPR_PROCESS_START_16,HOUR_APPR_PROCESS_START_17,HOUR_APPR_PROCESS_START_18,HOUR_APPR_PROCESS_START_19,HOUR_APPR_PROCESS_START_2,HOUR_APPR_PROCESS_START_20,HOUR_APPR_PROCESS_START_21,HOUR_APPR_PROCESS_START_22,HOUR_APPR_PROCESS_START_23,HOUR_APPR_PROCESS_START_3,HOUR_APPR_PROCESS_START_4,HOUR_APPR_PROCESS_START_5,HOUR_APPR_PROCESS_START_6,HOUR_APPR_PROCESS_START_7,HOUR_APPR_PROCESS_START_8,HOUR_APPR_PROCESS_START_9,HOUSETYPE_MODE_not specified,HOUSETYPE_MODE_org spec account,HOUSETYPE_MODE_reg oper account,HOUSETYPE_MODE_reg oper spec account,LANDAREA_AVG,LANDAREA_MEDI,LANDAREA_MODE,LIVE_CITY_NOT_WORK_CITY,LIVE_REGION_NOT_WORK_REGION,LIVINGAPARTMENTS_AVG,LIVINGAPARTMENTS_MEDI,LIVINGAPARTMENTS_MODE,LIVINGAREA_AVG,LIVINGAREA_MEDI,LIVINGAREA_MODE,NAME_CONTRACT_TYPE_Cash loans,NAME_CONTRACT_TYPE_Revolving loans,NAME_EDUCATION_TYPE_Academic degree,NAME_EDUCATION_TYPE_Higher education,NAME_EDUCATION_TYPE_Incomplete higher,NAME_EDUCATION_TYPE_Lower secondary,NAME_EDUCATION_TYPE_Secondary / secondary special,NAME_FAMILY_STATUS_Civil marriage,NAME_FAMILY_STATUS_Married,NAME_FAMILY_STATUS_Separated,NAME_FAMILY_STATUS_Single / not married,NAME_FAMILY_STATUS_Unknown,NAME_FAMILY_STATUS_Widow,NAME_HOUSING_TYPE_Co-op apartment,NAME_HOUSING_TYPE_House / apartment,NAME_HOUSING_TYPE_Municipal apartment,NAME_HOUSING_TYPE_Office apartment,NAME_HOUSING_TYPE_Rented apartment,NAME_HOUSING_TYPE_With parents,NAME_INCOME_TYPE_Businessman,NAME_INCOME_TYPE_Commercial associate,NAME_INCOME_TYPE_Maternity leave,NAME_INCOME_TYPE_Pensioner,NAME_INCOME_TYPE_State servant,NAME_INCOME_TYPE_Student,NAME_INCOME_TYPE_Unemployed,NAME_INCOME_TYPE_Working,NAME_TYPE_SUITE_Children,NAME_TYPE_SUITE_Family,NAME_TYPE_SUITE_Group of people,NAME_TYPE_SUITE_Other_A,NAME_TYPE_SUITE_Other_B,"NAME_TYPE_SUITE_Spouse, partner",NAME_TYPE_SUITE_Unaccompanied,NONLIVINGAPARTMENTS_AVG,NONLIVINGAPARTMENTS_MEDI,NONLIVINGAPARTMENTS_MODE,NONLIVINGAREA_AVG,NONLIVINGAREA_MEDI,NONLIVINGAREA_MODE,OBS_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,OCCUPATION_TYPE_Accountants,OCCUPATION_TYPE_Cleaning staff,OCCUPATION_TYPE_Cooking staff,OCCUPATION_TYPE_Core staff,OCCUPATION_TYPE_Drivers,OCCUPATION_TYPE_HR staff,OCCUPATION_TYPE_High skill tech staff,OCCUPATION_TYPE_IT staff,OCCUPATION_TYPE_Laborers,OCCUPATION_TYPE_Low-skill Laborers,OCCUPATION_TYPE_Managers,OCCUPATION_TYPE_Medicine staff,OCCUPATION_TYPE_Private service staff,OCCUPATION_TYPE_Realty agents,OCCUPATION_TYPE_Sales staff,OCCUPATION_TYPE_Secretaries,OCCUPATION_TYPE_Security staff,OCCUPATION_TYPE_Waiters/barmen staff,ORGANIZATION_TYPE_Advertising,ORGANIZATION_TYPE_Agriculture,ORGANIZATION_TYPE_Bank,ORGANIZATION_TYPE_Business Entity Type 1,ORGANIZATION_TYPE_Business Entity Type 2,ORGANIZATION_TYPE_Business Entity Type 3,ORGANIZATION_TYPE_Cleaning,ORGANIZATION_TYPE_Construction,ORGANIZATION_TYPE_Culture,ORGANIZATION_TYPE_Electricity,ORGANIZATION_TYPE_Emergency,ORGANIZATION_TYPE_Government,ORGANIZATION_TYPE_Hotel,ORGANIZATION_TYPE_Housing,ORGANIZATION_TYPE_Industry: type 1,ORGANIZATION_TYPE_Industry: type 10,ORGANIZATION_TYPE_Industry: type 11,ORGANIZATION_TYPE_Industry: type 12,ORGANIZATION_TYPE_Industry: type 13,ORGANIZATION_TYPE_Industry: type 2,ORGANIZATION_TYPE_Industry: type 3,ORGANIZATION_TYPE_Industry: type 4,ORGANIZATION_TYPE_Industry: type 5,ORGANIZATION_TYPE_Industry: type 6,ORGANIZATION_TYPE_Industry: type 7,ORGANIZATION_TYPE_Industry: type 8,ORGANIZATION_TYPE_Industry: type 9,ORGANIZATION_TYPE_Insurance,ORGANIZATION_TYPE_Kindergarten,ORGANIZATION_TYPE_Legal Services,ORGANIZATION_TYPE_Medicine,ORGANIZATION_TYPE_Military,ORGANIZATION_TYPE_Mobile,ORGANIZATION_TYPE_Other,ORGANIZATION_TYPE_Police,ORGANIZATION_TYPE_Postal,ORGANIZATION_TYPE_Realtor,ORGANIZATION_TYPE_Religion,ORGANIZATION_TYPE_Restaurant,ORGANIZATION_TYPE_School,ORGANIZATION_TYPE_Security,ORGANIZATION_TYPE_Security Ministries,ORGANIZATION_TYPE_Self-employed,ORGANIZATION_TYPE_Services,ORGANIZATION_TYPE_Telecom,ORGANIZATION_TYPE_Trade: type 1,ORGANIZATION_TYPE_Trade: type 2,ORGANIZATION_TYPE_Trade: type 3,ORGANIZATION_TYPE_Trade: type 4,ORGANIZATION_TYPE_Trade: type 5,ORGANIZATION_TYPE_Trade: type 6,ORGANIZATION_TYPE_Trade: type 7,ORGANIZATION_TYPE_Transport: type 1,ORGANIZATION_TYPE_Transport: type 2,ORGANIZATION_TYPE_Transport: type 3,ORGANIZATION_TYPE_Transport: type 4,ORGANIZATION_TYPE_University,ORGANIZATION_TYPE_XNA,OWN_CAR_AGE,REGION_POPULATION_RELATIVE,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,TOTALAREA_MODE,WALLSMATERIAL_MODE_Block,WALLSMATERIAL_MODE_Mixed,WALLSMATERIAL_MODE_Monolithic,WALLSMATERIAL_MODE_Others,WALLSMATERIAL_MODE_Panel,"WALLSMATERIAL_MODE_Stone, brick",WALLSMATERIAL_MODE_Wooden,WALLSMATERIAL_MODE_not specified,WEEKDAY_APPR_PROCESS_START_FRIDAY,WEEKDAY_APPR_PROCESS_START_MONDAY,WEEKDAY_APPR_PROCESS_START_SATURDAY,WEEKDAY_APPR_PROCESS_START_SUNDAY,WEEKDAY_APPR_PROCESS_START_THURSDAY,WEEKDAY_APPR_PROCESS_START_TUESDAY,WEEKDAY_APPR_PROCESS_START_WEDNESDAY,YEARS_BEGINEXPLUATATION_AVG,YEARS_BEGINEXPLUATATION_MEDI,YEARS_BEGINEXPLUATATION_MODE,YEARS_BUILD_AVG,YEARS_BUILD_MEDI,YEARS_BUILD_MODE,PER_CREDIT_INCOME,PER_GOODS_CREDIT,PER_ANNUITY_INCOME,NUM_BUREAU_REPORTS,SUM_BUREAU_BALANCES,SUM_CREDIT_CURRENCY_CODE_0,SUM_CREDIT_CURRENCY_CODE_1,SUM_CREDIT_CURRENCY_CODE_2,SUM_CREDIT_CURRENCY_CODE_3,SUM_CREDIT_ACTIVE_CODE_0,SUM_CREDIT_ACTIVE_CODE_1,SUM_CREDIT_ACTIVE_CODE_2,SUM_CREDIT_ACTIVE_CODE_3,SUM_CREDIT_TYPE_CODE_0,SUM_CREDIT_TYPE_CODE_1,SUM_CREDIT_TYPE_CODE_2,SUM_CREDIT_TYPE_CODE_3,SUM_CREDIT_TYPE_CODE_4,SUM_CREDIT_TYPE_CODE_5,SUM_CREDIT_TYPE_CODE_6,SUM_CREDIT_TYPE_CODE_7,SUM_CREDIT_TYPE_CODE_8,SUM_CREDIT_TYPE_CODE_9,SUM_CREDIT_TYPE_CODE_10,SUM_CREDIT_TYPE_CODE_11,SUM_CREDIT_TYPE_CODE_12,SUM_CREDIT_TYPE_CODE_13,SUM_CREDIT_TYPE_CODE_14,SUM_STATUS_0,SUM_STATUS_1,SUM_STATUS_2,SUM_STATUS_3,SUM_STATUS_4,SUM_STATUS_5,SUM_STATUS_C,SUM_STATUS_X,MED_DAYS_CREDIT,SUM_CREDIT_DAYS_OVERDUE,SUM_DAYS_CREDIT_ENDDATE,SUM_DAYS_ENDDATE_FACT,SUM_AMT_CREDIT_MAX_OVERDUE,SUM_CNT_CREDIT_PROLONG,SUM_AMT_CREDIT_SUM,SUM_AMT_CREDIT_SUM_DEBT,SUM_AMT_CREDIT_SUM_LIMIT,SUM_DAYS_CREDIT_UPDATE
0,-0.166065,-0.478095,-0.505662,0.142129,-0.058766,-0.070987,-0.269947,-0.30862,-0.155837,-0.346719,-0.34518,-0.342055,-0.327407,0.003066,0.006034,0.028603,-0.245215,0.468697,-0.717914,0.664531,1.599337,-0.499013,-0.577538,-1.265685,-1.388135,1.388176,-0.003607,0.018432,0.020727,0.034883,-1.50688,-0.743059,-0.579154,0.206994,-0.379837,4.163504,5.25326,-0.368513,-0.365348,-0.355991,0.963763,-0.08734,-0.051932,-0.049475,-0.030398,-0.476522,-1.301593,-1.007542,0.043245,-0.004771,-0.062669,-0.00255,-0.059477,-0.054269,-0.034802,-0.100138,-0.016332,-0.090534,-0.024402,-0.006502,-0.022529,-0.018305,0.639065,-0.009017,-0.123882,-0.310738,-0.013853,-0.297632,-0.062538,0.001803,-0.198993,-0.197647,-0.188297,0.355764,0.356104,0.368213,-1.536135,-0.136428,1.779079,-0.202211,-0.011406,-0.016726,2.674329,-0.371135,-0.353932,-0.334584,-0.314523,-0.296432,-0.266452,-0.225656,-0.174123,-0.11257,-0.031509,-0.062486,-0.036315,-0.022091,-0.011548,-0.063371,-0.082723,-0.109417,-0.13916,-0.174934,-0.227457,-0.312659,-1.536135,-0.136428,1.779079,-0.202211,0.162782,0.165022,0.185154,-0.467814,-0.205869,-0.166853,-0.165974,-0.154904,-0.36414,-0.362243,-0.346934,0.324395,-0.324395,-0.0231,-0.567262,-0.185945,-0.112095,0.638808,-0.327423,-1.329812,-0.262121,2.401419,-0.00255,-0.234957,-0.060515,0.356313,-0.194264,-0.092646,-0.126998,-0.225179,-0.005703,-0.550997,-0.004032,-0.468573,-0.275564,-0.007651,-0.008459,0.967876,-0.103625,-0.387514,-0.029699,-0.053142,-0.076087,-0.195944,0.487175,-0.100846,-0.099729,-0.095487,-0.261343,-0.258124,-0.247602,0.242861,0.252132,-0.181557,-0.12395,-0.140418,-0.313823,-0.253753,-0.042827,-0.196033,-0.041394,2.138285,-0.082782,-0.27329,-0.16898,-0.093269,-0.049479,-0.341411,-0.065283,-0.149481,-0.066354,-0.037377,-0.089691,-0.090662,-0.140875,-0.188513,1.8769,-0.02909,-0.149481,-0.035128,-0.055668,-0.042713,-0.18713,-0.056136,-0.098552,-0.058225,-0.01883,-0.094187,-0.034661,-0.014762,-0.038621,-0.103801,-0.05348,-0.044178,-0.019088,-0.065333,-0.008835,-0.105232,-0.044104,-0.151279,-0.031509,-0.194354,-0.092949,-0.032124,-0.239507,-0.087585,-0.084047,-0.035908,-0.016628,-0.076968,-0.17257,-0.103304,-0.080379,-0.377813,-0.071751,-0.043358,-0.033659,-0.078848,-0.107173,-0.014428,-0.012624,-0.045345,-0.161651,-0.025575,-0.084964,-0.062249,-0.133669,-0.065833,-0.468635,-0.455284,-0.149452,-0.103064,-0.062699,-0.291208,-0.547236,-0.124004,-0.231267,-0.411375,-0.176135,-0.086733,-0.076281,-0.072886,-0.522963,1.935056,-0.133215,-1.016959,-0.442421,-0.444395,-0.351712,-0.235673,-0.443749,-0.461015,2.218375,0.960978,0.960861,0.961652,1.016443,1.023576,1.042389,0.159961,0.018189,0.141793,1.760329,0.546811,1.763811,-0.024437,-0.010391,-0.00255,0.996753,-0.00255,1.968236,-0.075635,-0.028102,-0.138733,-0.007702,1.012598,3.520264,0.0,-0.033562,-0.001803,-0.003825,-0.022737,-0.043898,-0.001803,-0.124818,-0.004771,-0.019285,0.812763,9.24113,-0.090937,-0.066203,-0.059805,-0.046781,0.002266,0.169359,-1.286057,-0.024989,-0.000598,1.152029,0.021912,-0.089078,0.102418,0.032617,0.525573,1.261514
1,0.59271,1.72545,1.60048,0.426792,-0.058766,-0.070987,-0.269947,-0.30862,-0.155837,-0.885565,0.396431,0.401242,0.380977,0.236003,0.23993,0.252119,-0.245215,0.468697,-0.717914,-1.50482,1.599337,-0.499013,-0.577538,-0.167621,0.720391,-0.72037,-0.003607,1.012286,1.017847,0.81437,0.166821,-0.485941,-1.790855,-0.163104,-1.078697,-0.32048,-0.275663,0.431106,0.436172,0.468762,0.963763,-0.08734,-0.387436,-0.385082,-0.369801,0.323239,0.566501,-1.527258,0.043245,-0.004771,-0.062669,-0.00255,-0.059477,-0.054269,-0.034802,-0.100138,-0.016332,-0.090534,-0.024402,-0.006502,-0.022529,-0.018305,0.639065,-0.009017,-0.123882,-0.310738,-0.013853,-0.297632,-0.062538,0.001803,1.165958,1.166763,1.193952,1.824859,1.824092,1.852433,-1.536135,-0.136428,1.779079,-0.202211,-0.011406,-0.016726,-0.373926,2.694436,-0.353932,-0.334584,-0.314523,-0.296432,-0.266452,-0.225656,-0.174123,-0.11257,-0.031509,-0.062486,-0.036315,-0.022091,-0.011548,-0.063371,-0.082723,-0.109417,-0.13916,-0.174934,-0.227457,-0.312659,-1.536135,-0.136428,1.779079,-0.202211,-0.228092,-0.227585,-0.222405,-0.467814,-0.205869,0.648129,0.655182,0.617489,0.014866,0.017768,0.027531,0.324395,-0.324395,-0.0231,1.762853,-0.185945,-0.112095,-1.565416,-0.327423,0.751986,-0.262121,-0.416421,-0.00255,-0.234957,-0.060515,0.356313,-0.194264,-0.092646,-0.126998,-0.225179,-0.005703,-0.550997,-0.004032,-0.468573,3.628919,-0.007651,-0.008459,-1.03319,-0.103625,2.580551,-0.029699,-0.053142,-0.076087,-0.195944,-2.052651,0.045223,0.047355,-0.095487,-0.059838,-0.054163,-0.247602,-0.174085,-0.168527,-0.181557,-0.12395,-0.140418,3.186507,-0.253753,-0.042827,-0.196033,-0.041394,-0.467664,-0.082782,-0.27329,-0.16898,-0.093269,-0.049479,-0.341411,-0.065283,-0.149481,-0.066354,-0.037377,-0.089691,-0.090662,-0.140875,-0.188513,-0.532794,-0.02909,-0.149481,-0.035128,-0.055668,-0.042713,-0.18713,-0.056136,-0.098552,-0.058225,-0.01883,-0.094187,-0.034661,-0.014762,-0.038621,-0.103801,-0.05348,-0.044178,-0.019088,-0.065333,-0.008835,-0.105232,-0.044104,-0.151279,-0.031509,-0.194354,-0.092949,-0.032124,-0.239507,-0.087585,-0.084047,-0.035908,-0.016628,-0.076968,5.794739,-0.103304,-0.080379,-0.377813,-0.071751,-0.043358,-0.033659,-0.078848,-0.107173,-0.014428,-0.012624,-0.045345,-0.161651,-0.025575,-0.084964,-0.062249,-0.133669,-0.065833,-0.468635,-0.455284,-1.25275,-2.067573,-2.051813,-0.291208,-0.547236,-0.124004,-0.231267,0.197887,5.677469,-0.086733,-0.076281,-0.072886,-0.522963,-0.516781,-0.133215,-1.016959,-0.442421,2.250251,-0.351712,-0.235673,-0.443749,-0.461015,-0.45078,0.987275,0.987155,0.987948,1.505968,1.504599,1.508933,-0.17445,-0.168532,0.276183,-0.476245,-0.420349,-0.476383,-0.024437,-0.010391,-0.00255,-0.457098,-0.00255,-0.425688,-0.075635,-0.028102,-0.138733,-0.007702,-0.455496,-0.386883,0.0,-0.033562,-0.001803,-0.003825,-0.022737,-0.043898,-0.001803,-0.124818,-0.004771,-0.019285,-0.406781,-0.17618,-0.090937,-0.066203,-0.059805,-0.046781,-0.35255,-0.270597,0.5356,-0.024989,-0.292165,-0.387237,-0.00889,-0.089078,-0.267807,-0.193505,-0.119904,-0.38986
2,-1.404507,-1.152888,-1.090025,-0.427196,-0.058766,-0.070987,-0.269947,-0.30862,-0.155837,-0.885565,-0.602452,-0.600864,-0.59305,-0.534146,-0.533388,-0.523697,-0.245215,0.468697,1.392925,0.664531,1.599337,2.003956,-0.577538,-1.265685,-1.388135,1.388176,-0.003607,-0.28919,-0.288724,-0.283095,0.689509,-0.935313,-0.306869,-0.178827,-0.206116,-0.32048,-0.275663,-0.368513,-0.365348,-0.355991,-1.037599,-0.08734,-0.72294,-0.72069,-0.709204,-0.767499,0.221612,1.193214,0.043245,-0.004771,-0.062669,-0.00255,-0.059477,-0.054269,-0.034802,-0.100138,-0.016332,-0.090534,-0.024402,-0.006502,-0.022529,-0.018305,-1.564786,-0.009017,-0.123882,-0.310738,-0.013853,-0.297632,-0.062538,0.001803,-0.744581,-0.743018,-0.740798,-0.525834,-0.524829,-0.522462,0.650984,-0.136428,-0.562089,-0.202211,-0.011406,-0.016726,-0.373926,-0.371135,-0.353932,-0.334584,-0.314523,-0.296432,-0.266452,-0.225656,-0.174123,-0.11257,-0.031509,-0.062486,-0.036315,-0.022091,-0.011548,-0.063371,-0.082723,-0.109417,-0.13916,-0.174934,-0.227457,3.198372,0.650984,-0.136428,-0.562089,-0.202211,-0.440702,-0.440853,-0.431913,-0.467814,-0.205869,-0.455166,-0.455213,-0.45302,-0.564727,-0.56318,-0.555204,-3.082659,3.082659,-0.0231,-0.567262,-0.185945,-0.112095,0.638808,-0.327423,-1.329812,-0.262121,2.401419,-0.00255,-0.234957,-0.060515,0.356313,-0.194264,-0.092646,-0.126998,-0.225179,-0.005703,-0.550997,-0.004032,-0.468573,-0.275564,-0.007651,-0.008459,0.967876,-0.103625,-0.387514,-0.029699,-0.053142,-0.076087,-0.195944,0.487175,-0.100846,-0.099729,-0.095487,-0.261343,-0.258124,-0.247602,-0.591031,-0.589187,-0.181557,-0.12395,-0.140418,-0.313823,-0.253753,-0.042827,-0.196033,-0.041394,2.138285,-0.082782,-0.27329,-0.16898,-0.093269,-0.049479,-0.341411,-0.065283,-0.149481,-0.066354,-0.037377,-0.089691,-0.090662,-0.140875,-0.188513,-0.532794,-0.02909,-0.149481,-0.035128,-0.055668,-0.042713,5.343875,-0.056136,-0.098552,-0.058225,-0.01883,-0.094187,-0.034661,-0.014762,-0.038621,-0.103801,-0.05348,-0.044178,-0.019088,-0.065333,-0.008835,-0.105232,-0.044104,-0.151279,-0.031509,-0.194354,-0.092949,-0.032124,-0.239507,-0.087585,-0.084047,-0.035908,-0.016628,-0.076968,-0.17257,-0.103304,-0.080379,-0.377813,-0.071751,-0.043358,-0.033659,-0.078848,-0.107173,-0.014428,-0.012624,-0.045345,-0.161651,-0.025575,-0.084964,-0.062249,-0.133669,-0.065833,-0.468635,2.430559,-0.783451,-0.103064,-0.062699,-0.291208,-0.547236,-0.124004,-0.231267,-0.572048,-0.176135,-0.086733,-0.076281,-0.072886,-0.522963,-0.516781,-0.133215,0.983323,-0.442421,2.250251,-0.351712,-0.235673,-0.443749,-0.461015,-0.45078,-1.020852,-1.020765,-1.020127,-0.698001,-0.698343,-0.698841,0.072629,0.003323,0.082723,-0.476245,-0.420349,-0.476383,-0.024437,-0.010391,-0.00255,-0.457098,-0.00255,-0.425688,-0.075635,-0.028102,-0.138733,-0.007702,-0.455496,-0.386883,0.0,-0.033562,-0.001803,-0.003825,-0.022737,-0.043898,-0.001803,-0.124818,-0.004771,-0.019285,-0.406781,-0.17618,-0.090937,-0.066203,-0.059805,-0.046781,-0.35255,-0.270597,0.5356,-0.024989,-0.292165,-0.387237,-0.00889,-0.089078,-0.267807,-0.193505,-0.119904,-0.38986
3,0.177929,-0.71143,-0.651753,-0.142533,-0.058766,-0.070987,-0.269947,-0.30862,-0.155837,-0.885565,-0.602452,-0.600864,-0.59305,-0.534146,-0.533388,-0.523697,-0.245215,0.468697,-0.717914,0.664531,-0.625259,-0.499013,-0.577538,-0.167621,0.720391,-0.72037,-0.003607,-0.28919,-0.288724,-0.283095,0.680114,0.377805,-0.369143,-0.418302,1.375829,-0.32048,-0.275663,-0.368513,-0.365348,-0.355991,-1.037599,-0.08734,-0.72294,-0.72069,-0.709204,-0.767499,0.7131,-1.527258,0.043245,-0.004771,-0.062669,-0.00255,-0.059477,-0.054269,-0.034802,-0.100138,-0.016332,-0.090534,-0.024402,-0.006502,-0.022529,-0.018305,0.639065,-0.009017,-0.123882,-0.310738,-0.013853,-0.297632,-0.062538,0.001803,-0.744581,-0.743018,-0.740798,-0.525834,-0.524829,-0.522462,0.650984,-0.136428,-0.562089,-0.202211,-0.011406,-0.016726,-0.373926,-0.371135,-0.353932,-0.334584,-0.314523,-0.296432,-0.266452,4.431515,-0.174123,-0.11257,-0.031509,-0.062486,-0.036315,-0.022091,-0.011548,-0.063371,-0.082723,-0.109417,-0.13916,-0.174934,-0.227457,-0.312659,0.650984,-0.136428,-0.562089,-0.202211,-0.440702,-0.440853,-0.431913,-0.467814,-0.205869,-0.455166,-0.455213,-0.45302,-0.564727,-0.56318,-0.555204,0.324395,-0.324395,-0.0231,-0.567262,-0.185945,-0.112095,0.638808,3.054149,-1.329812,-0.262121,-0.416421,-0.00255,-0.234957,-0.060515,0.356313,-0.194264,-0.092646,-0.126998,-0.225179,-0.005703,-0.550997,-0.004032,-0.468573,-0.275564,-0.007651,-0.008459,0.967876,-0.103625,-0.387514,-0.029699,-0.053142,-0.076087,-0.195944,0.487175,-0.100846,-0.099729,-0.095487,-0.261343,-0.258124,-0.247602,0.242861,0.252132,-0.181557,-0.12395,-0.140418,-0.313823,-0.253753,-0.042827,-0.196033,-0.041394,2.138285,-0.082782,-0.27329,-0.16898,-0.093269,-0.049479,-0.341411,-0.065283,-0.149481,-0.066354,-0.037377,-0.089691,-0.090662,-0.140875,-0.188513,1.8769,-0.02909,-0.149481,-0.035128,-0.055668,-0.042713,-0.18713,-0.056136,-0.098552,-0.058225,-0.01883,-0.094187,-0.034661,-0.014762,-0.038621,-0.103801,-0.05348,-0.044178,-0.019088,-0.065333,-0.008835,-0.105232,-0.044104,-0.151279,-0.031509,-0.194354,-0.092949,-0.032124,-0.239507,-0.087585,-0.084047,-0.035908,-0.016628,-0.076968,-0.17257,-0.103304,-0.080379,-0.377813,-0.071751,-0.043358,-0.033659,-0.078848,-0.107173,-0.014428,-0.012624,-0.045345,-0.161651,-0.025575,-0.084964,-0.062249,-0.133669,-0.065833,-0.468635,-0.455284,-0.928991,-0.103064,-0.062699,-0.291208,-0.547236,-0.124004,-0.231267,-0.572048,-0.176135,-0.086733,-0.076281,-0.072886,-0.522963,-0.516781,-0.133215,0.983323,-0.442421,-0.444395,-0.351712,-0.235673,-0.443749,-0.461015,2.218375,-1.020852,-1.020765,-1.020127,-0.698001,-0.698343,-0.698841,0.091853,0.004154,0.039259,-0.476245,-0.420349,-0.476383,-0.024437,-0.010391,-0.00255,-0.457098,-0.00255,-0.425688,-0.075635,-0.028102,-0.138733,-0.007702,-0.455496,-0.386883,0.0,-0.033562,-0.001803,-0.003825,-0.022737,-0.043898,-0.001803,-0.124818,-0.004771,-0.019285,-0.406781,-0.17618,-0.090937,-0.066203,-0.059805,-0.046781,-0.35255,-0.270597,0.5356,-0.024989,-0.292165,-0.387237,-0.00889,-0.089078,-0.267807,-0.193505,-0.119904,-0.38986
4,-0.361658,-0.213734,-0.06739,-0.199466,-0.058766,-0.070987,-0.269947,-0.30862,-0.155837,-0.885565,-0.602452,-0.600864,-0.59305,-0.534146,-0.533388,-0.523697,-0.245215,0.468697,-0.717914,0.664531,-0.625259,-0.499013,-0.577538,-1.265685,-1.388135,1.388176,-0.003607,-0.28919,-0.288724,-0.283095,0.892535,0.377339,0.307263,0.173129,-0.191639,-0.32048,-0.275663,-0.368513,-0.365348,-0.355991,-1.037599,-0.08734,-0.72294,-0.72069,-0.709204,-0.767499,-0.990729,-1.527258,0.043245,-0.004771,-0.062669,-0.00255,-0.059477,-0.054269,-0.034802,-0.100138,-0.016332,-0.090534,-0.024402,-0.006502,-0.022529,-0.018305,-1.564786,-0.009017,-0.123882,-0.310738,-0.013853,3.359858,-0.062538,0.001803,-0.744581,-0.743018,-0.740798,-0.525834,-0.524829,-0.522462,0.650984,-0.136428,-0.562089,-0.202211,-0.011406,-0.016726,-0.373926,2.694436,-0.353932,-0.334584,-0.314523,-0.296432,-0.266452,-0.225656,-0.174123,-0.11257,-0.031509,-0.062486,-0.036315,-0.022091,-0.011548,-0.063371,-0.082723,-0.109417,-0.13916,-0.174934,-0.227457,-0.312659,0.650984,-0.136428,-0.562089,-0.202211,-0.440702,-0.440853,-0.431913,2.137601,-0.205869,-0.455166,-0.455213,-0.45302,-0.564727,-0.56318,-0.555204,0.324395,-0.324395,-0.0231,-0.567262,-0.185945,-0.112095,0.638808,-0.327423,-1.329812,-0.262121,2.401419,-0.00255,-0.234957,-0.060515,0.356313,-0.194264,-0.092646,-0.126998,-0.225179,-0.005703,-0.550997,-0.004032,-0.468573,-0.275564,-0.007651,-0.008459,0.967876,-0.103625,-0.387514,-0.029699,-0.053142,-0.076087,-0.195944,0.487175,-0.100846,-0.099729,-0.095487,-0.261343,-0.258124,-0.247602,-0.591031,-0.589187,-0.181557,-0.12395,-0.140418,3.186507,-0.253753,-0.042827,-0.196033,-0.041394,-0.467664,-0.082782,-0.27329,-0.16898,-0.093269,-0.049479,-0.341411,-0.065283,-0.149481,-0.066354,-0.037377,-0.089691,-0.090662,-0.140875,-0.188513,-0.532794,-0.02909,-0.149481,-0.035128,-0.055668,-0.042713,-0.18713,-0.056136,-0.098552,-0.058225,-0.01883,-0.094187,-0.034661,-0.014762,-0.038621,-0.103801,-0.05348,-0.044178,-0.019088,-0.065333,-0.008835,-0.105232,-0.044104,-0.151279,-0.031509,-0.194354,-0.092949,-0.032124,-0.239507,-0.087585,-0.084047,-0.035908,60.139641,-0.076968,-0.17257,-0.103304,-0.080379,-0.377813,-0.071751,-0.043358,-0.033659,-0.078848,-0.107173,-0.014428,-0.012624,-0.045345,-0.161651,-0.025575,-0.084964,-0.062249,-0.133669,-0.065833,-0.468635,-0.455284,0.56357,-0.103064,-0.062699,-0.291208,1.827367,-0.124004,-0.231267,-0.572048,-0.176135,-0.086733,-0.076281,-0.072886,-0.522963,-0.516781,-0.133215,0.983323,-0.442421,-0.444395,-0.351712,-0.235673,2.253525,-0.461015,-0.45078,-1.020852,-1.020765,-1.020127,-0.698001,-0.698343,-0.698841,0.046191,-0.010885,0.063702,-0.476245,-0.420349,-0.476383,-0.024437,-0.010391,-0.00255,-0.457098,-0.00255,-0.425688,-0.075635,-0.028102,-0.138733,-0.007702,-0.455496,-0.386883,0.0,-0.033562,-0.001803,-0.003825,-0.022737,-0.043898,-0.001803,-0.124818,-0.004771,-0.019285,-0.406781,-0.17618,-0.090937,-0.066203,-0.059805,-0.046781,-0.35255,-0.270597,0.5356,-0.024989,-0.292165,-0.387237,-0.00889,-0.089078,-0.267807,-0.193505,-0.119904,-0.38986


### Output
Save out our preprocessed data to temporary intermediate files

In [27]:
application_train['SK_ID_CURR'] = application_train_SK_ID_CURR
application_train['TARGET'] = target_train
application_test['SK_ID_CURR'] = application_test_SK_ID_CURR

application_train.to_csv('data/tmp/bureau_application_train_preprocessed.csv')
application_test.to_csv('data/tmp/bureau_application_train_preprocessed.csv')