# Real Data

In [1]:

#-------------- default module --------------#
import os
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import missingno as msno
import plotly.express as px

# ------------------ side dish -------------- #

from IPython.display import HTML
from matplotlib.colors import ListedColormap

# ---------- layout & optional view ------------ #
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all" # able to run multiple command in 1 line

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', '{:.2f}'.format)  # display whole numbers without +3
pd.set_option('display.float_format', '{:,.1f}'.format) # thousand separated by comma
pd.options.display.float_format = '{:,}'.format # thousand separated by comma

sns.color_palette("PuBu")
sns.set_style("whitegrid")

custom_palette = ["#085a92", "#043657", "#f8f7f3", "#8d1e21", "#ec3237"]
sns.set_palette(custom_palette)
sns.set(style="whitegrid", font_scale=1.2, palette=custom_palette)

pd.set_option('display.max_columns', None)

%matplotlib inline

In [2]:
 def summary(df):
        summary_df = pd.DataFrame(df.dtypes, columns=['dtypes'])
        summary_df['missing#'] = df.isna().sum()
        summary_df['missing%'] = round((df.isna().sum()) / len(df), 2)
        summary_df['duplicated#'] = df.duplicated().sum()
        summary_df['duplicated%'] = (df.duplicated().sum()) / len(df)
        summary_df['uniques'] = df.nunique().values
        summary_df['count'] = df.count().values
        summary_df['unique'] = [df[col].unique()[:4] for col in df.columns]
        summary_df['mode'] = df.mode().transpose().iloc[:, 0].values
 
     # Merge numerical summary information
        numerical_cols = df.select_dtypes(include=['number']).columns
        numerical_summary_df = pd.DataFrame({
            'Mean': df[numerical_cols].mean(),
            'Median': df[numerical_cols].median()
        })
     
        summary_df = summary_df.merge(numerical_summary_df, left_index=True, right_index=True, how='left')
 
     # Apply background gradient to the summary DataFrame
        style = summary_df.style.background_gradient(cmap='Blues')
     
        return style 

In [3]:
folder = '/Users/amarindraardinova/Documents/Data Scientist/Rakamin/Rakamin/04. VIX/VIX 03/week3/tugas_akhir/'

data_apptrain = f"{folder}application_test.csv"
data_apptest  = f"{folder}application_train.csv"
data_bureau_balance = f"{folder}bureau_balance.csv"
data_bureau   = f"{folder}bureau.csv"
data_credit_card_balance = f"{folder}credit_card_balance.csv"


In [4]:
apptrain   = pd.read_csv(data_apptrain)
apptest    = pd.read_csv(data_apptest)
bureau_bal = pd.read_csv(data_bureau_balance)
bureau     = pd.read_csv(data_bureau)
cc_balance = pd.read_csv(data_credit_card_balance)

In [5]:
apptrain.shape
apptest.shape
bureau_bal.shape
bureau.shape
cc_balance.shape

(48744, 121)

(307511, 122)

(27299925, 3)

(1716428, 17)

(3840312, 23)

# Understanding Apptrain & Apptest

In [6]:
summary(apptrain)
summary(apptest)

Unnamed: 0,dtypes,missing#,missing%,duplicated#,duplicated%,uniques,count,unique,mode,Mean,Median
SK_ID_CURR,int64,0,0.0,0,0.0,48744,48744,[100001 100005 100013 100028],100001,277796.67635,277549.0
NAME_CONTRACT_TYPE,object,0,0.0,0,0.0,2,48744,['Cash loans' 'Revolving loans'],Cash loans,,
CODE_GENDER,object,0,0.0,0,0.0,2,48744,['F' 'M'],F,,
FLAG_OWN_CAR,object,0,0.0,0,0.0,2,48744,['N' 'Y'],N,,
FLAG_OWN_REALTY,object,0,0.0,0,0.0,2,48744,['Y' 'N'],Y,,
CNT_CHILDREN,int64,0,0.0,0,0.0,11,48744,[0 2 1 3],0.000000,0.397054,0.0
AMT_INCOME_TOTAL,float64,0,0.0,0,0.0,606,48744,[135000. 99000. 202500. 315000.],135000.000000,178431.805855,157500.0
AMT_CREDIT,float64,0,0.0,0,0.0,2937,48744,[ 568800. 222768. 663264. 1575000.],450000.000000,516740.435561,450000.0
AMT_ANNUITY,float64,24,0.0,0,0.0,7491,48720,[20560.5 17370. 69777. 49018.5],27652.500000,29426.240209,26199.0
AMT_GOODS_PRICE,float64,0,0.0,0,0.0,677,48744,[ 450000. 180000. 630000. 1575000.],450000.000000,462618.840473,396000.0


Unnamed: 0,dtypes,missing#,missing%,duplicated#,duplicated%,uniques,count,unique,mode,Mean,Median
SK_ID_CURR,int64,0,0.0,0,0.0,307511,307511,[100002 100003 100004 100006],100002,278180.518577,278202.0
TARGET,int64,0,0.0,0,0.0,2,307511,[1 0],0.000000,0.080729,0.0
NAME_CONTRACT_TYPE,object,0,0.0,0,0.0,2,307511,['Cash loans' 'Revolving loans'],Cash loans,,
CODE_GENDER,object,0,0.0,0,0.0,3,307511,['M' 'F' 'XNA'],F,,
FLAG_OWN_CAR,object,0,0.0,0,0.0,2,307511,['N' 'Y'],N,,
FLAG_OWN_REALTY,object,0,0.0,0,0.0,2,307511,['Y' 'N'],Y,,
CNT_CHILDREN,int64,0,0.0,0,0.0,15,307511,[0 1 2 3],0.000000,0.417052,0.0
AMT_INCOME_TOTAL,float64,0,0.0,0,0.0,2548,307511,[202500. 270000. 67500. 135000.],135000.000000,168797.919297,147150.0
AMT_CREDIT,float64,0,0.0,0,0.0,5603,307511,[ 406597.5 1293502.5 135000. 312682.5],450000.000000,599025.999706,513531.0
AMT_ANNUITY,float64,12,0.0,0,0.0,13672,307499,[24700.5 35698.5 6750. 29686.5],9000.000000,27108.573909,24903.0


In [7]:
apptrain.describe()
apptest.describe()

Unnamed: 0,SK_ID_CURR,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,APARTMENTS_AVG,BASEMENTAREA_AVG,YEARS_BEGINEXPLUATATION_AVG,YEARS_BUILD_AVG,COMMONAREA_AVG,ELEVATORS_AVG,ENTRANCES_AVG,FLOORSMAX_AVG,FLOORSMIN_AVG,LANDAREA_AVG,LIVINGAPARTMENTS_AVG,LIVINGAREA_AVG,NONLIVINGAPARTMENTS_AVG,NONLIVINGAREA_AVG,APARTMENTS_MODE,BASEMENTAREA_MODE,YEARS_BEGINEXPLUATATION_MODE,YEARS_BUILD_MODE,COMMONAREA_MODE,ELEVATORS_MODE,ENTRANCES_MODE,FLOORSMAX_MODE,FLOORSMIN_MODE,LANDAREA_MODE,LIVINGAPARTMENTS_MODE,LIVINGAREA_MODE,NONLIVINGAPARTMENTS_MODE,NONLIVINGAREA_MODE,APARTMENTS_MEDI,BASEMENTAREA_MEDI,YEARS_BEGINEXPLUATATION_MEDI,YEARS_BUILD_MEDI,COMMONAREA_MEDI,ELEVATORS_MEDI,ENTRANCES_MEDI,FLOORSMAX_MEDI,FLOORSMIN_MEDI,LANDAREA_MEDI,LIVINGAPARTMENTS_MEDI,LIVINGAREA_MEDI,NONLIVINGAPARTMENTS_MEDI,NONLIVINGAREA_MEDI,TOTALAREA_MODE,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_2,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR
count,48744.0,48744.0,48744.0,48744.0,48720.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,16432.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,28212.0,48736.0,40076.0,24857.0,21103.0,25888.0,16926.0,15249.0,23555.0,25165.0,25423.0,16278.0,20490.0,15964.0,25192.0,15397.0,22660.0,24857.0,21103.0,25888.0,16926.0,15249.0,23555.0,25165.0,25423.0,16278.0,20490.0,15964.0,25192.0,15397.0,22660.0,24857.0,21103.0,25888.0,16926.0,15249.0,23555.0,25165.0,25423.0,16278.0,20490.0,15964.0,25192.0,15397.0,22660.0,26120.0,48715.0,48715.0,48715.0,48715.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,42695.0,42695.0,42695.0,42695.0,42695.0,42695.0
mean,277796.67634990974,0.3970539963892991,178431.80585487446,516740.4355612999,29426.240209359607,462618.84047267353,0.0212256410840308,-16068.084605284754,67485.36632200886,-4967.652716231742,-3051.712949286066,11.786027263875363,0.9999794846545216,0.8097201706876744,0.2047021171836533,0.9983998030526834,0.2631298211061874,0.1626456589528967,2.1467667815526013,2.038158542589857,2.0125964221237487,12.007365009026753,0.0188330871491875,0.0551657639914656,0.0420359428852781,0.0774659445265058,0.2246635483341539,0.1742163138027244,0.501179810639628,0.5180210756705393,0.5001056254095524,0.1223877378605624,0.0900654835805335,0.978828159765142,0.7511370908661232,0.047623660567906,0.0851682954786669,0.1517772382276972,0.2337062187782716,0.2384230863742474,0.0671920937042459,0.105885254322225,0.1122862575420768,0.0092314801584724,0.0293872903795233,0.1190780061954379,0.088998071364261,0.9782923478059332,0.7583271475835992,0.0452230375762345,0.0805699384419443,0.1471606397774686,0.2293901034496322,0.233853986976287,0.0659141093216203,0.1108742295164119,0.1106867815179421,0.0083575436773397,0.0281614783759929,0.1228086776360783,0.0895291001279438,0.97882203723733,0.754343619283942,0.0474203816643714,0.084128210571004,0.151200162924697,0.2331536404043582,0.2378458901584961,0.0680690190336749,0.1070632924079178,0.1133677318196252,0.0089788530233162,0.0292962886142983,0.1071285336906584,1.4476444626911629,0.1436518526121318,1.435738478907934,0.1011392794827055,-1077.7662276382734,0.0,0.7866198916789758,0.000102576727392,0.0147505333989824,0.0874774331199737,4.103069095683571e-05,0.0884621697029378,0.0044928606597735,0.0,0.0011693746922698,0.0,0.0,0.0,0.0,0.0,0.0,0.0015591662563597,0.0,0.0,0.0,0.0021079751727368,0.0018034898700081,0.0027872116172853,0.0092985127064059,0.546902447593395,1.9837685911699263
std,103169.54729627114,0.7090468312730408,101522.59153614278,365397.0041738182,16016.368315258078,336710.215039643,0.0144281944847799,4325.900393031723,144348.5071360403,3552.6120351234963,1569.2767091933626,11.46288905839524,0.0045293868766553,0.3925258931629874,0.40348791847306,0.039970853052941,0.4403367986035462,0.3690458544150365,0.8904228948066035,0.5226939600464158,0.5158041404113929,3.2781720386173405,0.1359366803927346,0.2283058733536803,0.2006732382228069,0.2673320746259048,0.4173648428199108,0.3792992767040749,0.2051423134981523,0.181278116872197,0.1894979946872295,0.1131123895542974,0.0815363190367713,0.0493179821282833,0.1131884074773505,0.082868385018195,0.1391643374088615,0.1006688785538279,0.1473609272382501,0.1649761738359041,0.0819089788783152,0.0982840490803638,0.114859806409869,0.0487491336344512,0.0720074731937994,0.1134646759752031,0.0826549544847549,0.0537824501055472,0.1101169916590286,0.081168603653778,0.1375093743604278,0.1017475981265168,0.1464849258339535,0.1650336652924792,0.082879597258372,0.1039802352742901,0.116698786639595,0.0466572476040076,0.0735040322480206,0.1141841663298828,0.0810221902577918,0.0496628647882828,0.111997676264637,0.0828922047238031,0.1390143759565826,0.1009306765790225,0.1476290126701911,0.1652405974727326,0.0828687142692624,0.0997368426432867,0.116502877950748,0.0481484727382851,0.0729979731434501,0.1114199118013073,3.608053291294409,0.5144127979103412,3.5801246270605107,0.4037911740880358,878.9207400551999,0.0,0.4096980366630701,0.0101276013760116,0.1205539436026075,0.2825363153445389,0.0064054546427842,0.2839687809536897,0.0668787456761704,0.0,0.0341764716971026,0.0,0.0,0.0,0.0,0.0,0.0,0.0394558892253312,0.0,0.0,0.0,0.0463726845599316,0.046132357297717,0.054037423513359,0.1109235568863875,0.693305422913648,1.8388727350274967
min,100001.0,0.0,26941.5,45000.0,2295.0,45000.0,0.0002529999999999,-25195.0,-17463.0,-23722.0,-6348.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0134579104986917,8.09785587553435e-06,0.0005272652387098,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-4361.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,188557.75,0.0,112500.0,260640.0,17973.0,225000.0,0.010006,-19637.0,-2910.0,-7459.25,-4448.0,4.0,1.0,1.0,0.0,1.0,0.0,0.0,2.0,2.0,2.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3436950390207541,0.4080661708688298,0.363945238612397,0.0619,0.0467,0.9767,0.6872,0.0081,0.0,0.0745,0.1667,0.1042,0.019,0.0504,0.048575,0.0,0.0,0.0588,0.0425,0.9762,0.6929,0.0076,0.0,0.069,0.1667,0.0833,0.016525,0.0551,0.0456,0.0,0.0,0.0625,0.04615,0.9767,0.6914,0.008,0.0,0.069,0.1667,0.0833,0.019,0.0513,0.049,0.0,0.0,0.0432,0.0,0.0,0.0,0.0,-1766.25,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,277549.0,0.0,157500.0,450000.0,26199.0,396000.0,0.01885,-15785.0,-1293.0,-4490.0,-3234.0,9.0,1.0,1.0,0.0,1.0,0.0,0.0,2.0,2.0,2.0,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5067712615026179,0.5587579430597367,0.5190973382084597,0.0928,0.0781,0.9816,0.7552,0.0227,0.0,0.1379,0.1667,0.2083,0.0483,0.0756,0.077,0.0,0.0038,0.0851,0.077,0.9816,0.7583,0.0203,0.0,0.1379,0.1667,0.2083,0.0462,0.0817,0.0751,0.0,0.0012,0.0926,0.0778,0.9816,0.7585,0.0223,0.0,0.1379,0.1667,0.2083,0.0488,0.077,0.0776,0.0,0.0031,0.0707,0.0,0.0,0.0,0.0,-863.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
75%,367555.5,1.0,225000.0,675000.0,37390.5,630000.0,0.028663,-12496.0,-296.0,-1901.0,-1706.0,15.0,1.0,1.0,0.0,1.0,1.0,0.0,3.0,2.0,2.0,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6659559434807897,0.6584970137027254,0.6528965519806539,0.1485,0.1134,0.9866,0.8164,0.0539,0.16,0.2069,0.3333,0.375,0.0868,0.1269,0.1376,0.0051,0.029,0.1502,0.11355,0.9866,0.8236,0.0517,0.1208,0.2069,0.3333,0.375,0.0856,0.1322,0.1306,0.0039,0.0245,0.1499,0.113,0.9866,0.8189,0.0538,0.16,0.2069,0.3333,0.375,0.088,0.1266,0.137425,0.0039,0.028025,0.1357,2.0,0.0,2.0,0.0,-363.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0
max,456250.0,20.0,4410000.0,2245500.0,180576.0,2245500.0,0.072508,-7338.0,365243.0,0.0,0.0,74.0,1.0,1.0,1.0,1.0,1.0,1.0,21.0,3.0,3.0,23.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9391445326561508,0.8549996664047012,0.8825303127941461,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,354.0,34.0,351.0,24.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2.0,2.0,2.0,6.0,7.0,17.0


Unnamed: 0,SK_ID_CURR,TARGET,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,APARTMENTS_AVG,BASEMENTAREA_AVG,YEARS_BEGINEXPLUATATION_AVG,YEARS_BUILD_AVG,COMMONAREA_AVG,ELEVATORS_AVG,ENTRANCES_AVG,FLOORSMAX_AVG,FLOORSMIN_AVG,LANDAREA_AVG,LIVINGAPARTMENTS_AVG,LIVINGAREA_AVG,NONLIVINGAPARTMENTS_AVG,NONLIVINGAREA_AVG,APARTMENTS_MODE,BASEMENTAREA_MODE,YEARS_BEGINEXPLUATATION_MODE,YEARS_BUILD_MODE,COMMONAREA_MODE,ELEVATORS_MODE,ENTRANCES_MODE,FLOORSMAX_MODE,FLOORSMIN_MODE,LANDAREA_MODE,LIVINGAPARTMENTS_MODE,LIVINGAREA_MODE,NONLIVINGAPARTMENTS_MODE,NONLIVINGAREA_MODE,APARTMENTS_MEDI,BASEMENTAREA_MEDI,YEARS_BEGINEXPLUATATION_MEDI,YEARS_BUILD_MEDI,COMMONAREA_MEDI,ELEVATORS_MEDI,ENTRANCES_MEDI,FLOORSMAX_MEDI,FLOORSMIN_MEDI,LANDAREA_MEDI,LIVINGAPARTMENTS_MEDI,LIVINGAREA_MEDI,NONLIVINGAPARTMENTS_MEDI,NONLIVINGAREA_MEDI,TOTALAREA_MODE,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_2,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR
count,307511.0,307511.0,307511.0,307511.0,307511.0,307499.0,307233.0,307511.0,307511.0,307511.0,307511.0,307511.0,104582.0,307511.0,307511.0,307511.0,307511.0,307511.0,307511.0,307509.0,307511.0,307511.0,307511.0,307511.0,307511.0,307511.0,307511.0,307511.0,307511.0,134133.0,306851.0,246546.0,151450.0,127568.0,157504.0,103023.0,92646.0,143620.0,152683.0,154491.0,98869.0,124921.0,97312.0,153161.0,93997.0,137829.0,151450.0,127568.0,157504.0,103023.0,92646.0,143620.0,152683.0,154491.0,98869.0,124921.0,97312.0,153161.0,93997.0,137829.0,151450.0,127568.0,157504.0,103023.0,92646.0,143620.0,152683.0,154491.0,98869.0,124921.0,97312.0,153161.0,93997.0,137829.0,159080.0,306490.0,306490.0,306490.0,306490.0,307510.0,307511.0,307511.0,307511.0,307511.0,307511.0,307511.0,307511.0,307511.0,307511.0,307511.0,307511.0,307511.0,307511.0,307511.0,307511.0,307511.0,307511.0,307511.0,307511.0,307511.0,265992.0,265992.0,265992.0,265992.0,265992.0,265992.0
mean,278180.51857657125,0.0807288194568649,0.4170517477423572,168797.9192969845,599025.9997057016,27108.573909183444,538396.2074288895,0.02086811205778,-16036.995066843136,63815.04590404896,-4986.120327538419,-2994.2023732484367,12.061090818687727,0.9999967480838085,0.8198893698111612,0.1993684778755882,0.9981334001060124,0.2810663683575546,0.0567199222141647,2.152665450442101,2.05246316391934,2.031520823645333,12.063418869568894,0.0151441737043552,0.0507689155835075,0.0406587081437737,0.0781728133302548,0.2304535447512446,0.1795545525200724,0.5021298056566625,0.5143926741308462,0.5108529061799658,0.117440499174645,0.0884422190517998,0.9777348581623324,0.752471432592722,0.0446207154113507,0.0789415123241888,0.1497246700680495,0.2262819070366558,0.2318935004905481,0.0663331841723969,0.1007747749506741,0.1073990193325977,0.0088086726172111,0.0283577570757968,0.1142310069329811,0.0875432122475855,0.9770653729429092,0.7596373227337584,0.0425531377501457,0.074489736109177,0.1451926586456907,0.2223150474784939,0.2280584925507489,0.0649576844565765,0.1056448567494245,0.1059750504371217,0.0080763875442833,0.0270223196859877,0.1178499207659293,0.0879548546657469,0.977752264069484,0.7557462721916463,0.0445951017852902,0.0780778443113772,0.1492127807286993,0.2258965900926267,0.2316249380493379,0.0671687490493992,0.1019544732407103,0.1086067360489941,0.0086510133302126,0.0282359205972618,0.1025466626854412,1.4222454239942577,0.1434206662533851,1.4052921791901856,0.100048941237887,-962.8587883320868,4.227491049100682e-05,0.7100233812774177,8.129790479039775e-05,0.0151149064586307,0.0880553866365756,0.0001918630553053,0.0813759507789965,0.0038957955975558,2.276341334131137e-05,0.0039120551785139,6.503832383231819e-06,0.0035250771517116,0.0029364803210291,0.0012097128232811,0.0099281001330033,0.0002666571277125,0.0081297904790397,0.0005951006630657,0.000507298925892,0.0003349473677364,0.0064024481939306,0.0070002105326475,0.0343619356973142,0.2673952600078197,0.2654741495984841,1.899974435321363
std,102790.1753484246,0.2724186456483939,0.7221213844376252,237123.14627885623,402490.77699585486,14493.737315118333,369446.4605400576,0.0138312801227047,4363.98863178556,141275.76651872724,3522.8863209630717,1509.4504190030234,11.944811582242762,0.0018033070153514,0.3842801989387643,0.3995262281502265,0.0431638941424323,0.4495205468567583,0.2313070397227076,0.9106815691792972,0.50903390281568,0.5027370329147687,3.2658322554378705,0.122126476282152,0.2195258287969606,0.1974986188284236,0.2684437723734045,0.4211238359138972,0.3838166153855957,0.2110622492739249,0.1910601549849365,0.1948443644637488,0.1082402913003223,0.0824381587356851,0.0592233143583628,0.1132799266322471,0.0760357450504093,0.1345760011003441,0.100049120760359,0.144640699548004,0.1613802888001374,0.0811836407017936,0.0925761339604974,0.1105645231837133,0.0477316620503479,0.069523183321236,0.1079360390875328,0.0843071748692455,0.0645754370804801,0.1101110273419482,0.0744445225383915,0.1322561441505065,0.1009769881602466,0.1437094065953157,0.1611597714954759,0.0817502778084354,0.0978804465787937,0.1118452658778338,0.0462762662198356,0.0702538590439445,0.1090759060011531,0.0821787495146341,0.0598973185051196,0.1120663096440438,0.0761442622409146,0.1344671476906749,0.1003683944976322,0.1450670259193514,0.1619335414571561,0.082167010280072,0.0936423327115383,0.1122602586753476,0.0474147279078026,0.0701664815068248,0.1074623241496187,2.4009887461090083,0.4466984293815287,2.37980335197938,0.3622908039755738,826.8084870406566,0.0065017890454897,0.4537519684327393,0.0090161832165508,0.1220102228135412,0.2833758928629898,0.0138501576770174,0.273412048944513,0.0622947108003936,0.0047710553540692,0.0624240632668451,0.0025502570915978,0.059267718073753,0.0541097673764287,0.0347599388276926,0.099144162337849,0.0163274887415966,0.0897982361093955,0.0243874650658622,0.0225176202684461,0.0182985318224376,0.0838491284474772,0.1107574063243545,0.2046848758128244,0.9160023961526176,0.7940556483207578,1.869294998181557
min,100002.0,0.0,0.0,25650.0,45000.0,1615.5,40500.0,0.00029,-25229.0,-17912.0,-24672.0,-7197.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0145681324124455,8.173616518884397e-08,0.0005272652387098,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-4292.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,189145.5,0.0,0.0,112500.0,270000.0,16524.0,238500.0,0.010006,-19682.0,-2760.0,-7479.5,-4299.0,5.0,1.0,1.0,0.0,1.0,0.0,0.0,2.0,2.0,2.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3340072886740926,0.3924574161782719,0.3706496323299817,0.0577,0.0442,0.9767,0.6872,0.0078,0.0,0.069,0.1667,0.0833,0.0187,0.0504,0.0453,0.0,0.0,0.0525,0.0407,0.9767,0.6994,0.0072,0.0,0.069,0.1667,0.0833,0.0166,0.0542,0.0427,0.0,0.0,0.0583,0.0437,0.9767,0.6914,0.0079,0.0,0.069,0.1667,0.0833,0.0187,0.0513,0.0457,0.0,0.0,0.0412,0.0,0.0,0.0,0.0,-1570.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,278202.0,0.0,0.0,147150.0,513531.0,24903.0,450000.0,0.01885,-15750.0,-1213.0,-4504.0,-3254.0,9.0,1.0,1.0,0.0,1.0,0.0,0.0,2.0,2.0,2.0,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5059979305057544,0.5659614260608526,0.5352762504724826,0.0876,0.0763,0.9816,0.7552,0.0211,0.0,0.1379,0.1667,0.2083,0.0481,0.0756,0.0745,0.0,0.0036,0.084,0.0746,0.9816,0.7648,0.019,0.0,0.1379,0.1667,0.2083,0.0458,0.0771,0.0731,0.0,0.0011,0.0864,0.0758,0.9816,0.7585,0.0208,0.0,0.1379,0.1667,0.2083,0.0487,0.0761,0.0749,0.0,0.0031,0.0688,0.0,0.0,0.0,0.0,-757.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
75%,367142.5,0.0,1.0,202500.0,808650.0,34596.0,679500.0,0.028663,-12413.0,-289.0,-2010.0,-1720.0,15.0,1.0,1.0,0.0,1.0,1.0,0.0,3.0,2.0,2.0,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6750525982300696,0.6636170897493769,0.6690566947824041,0.1485,0.1122,0.9866,0.8232,0.0515,0.12,0.2069,0.3333,0.375,0.0856,0.121,0.1299,0.0039,0.0277,0.1439,0.1124,0.9866,0.8236,0.049,0.1208,0.2069,0.3333,0.375,0.0841,0.1313,0.1252,0.0039,0.0231,0.1489,0.1116,0.9866,0.8256,0.0513,0.12,0.2069,0.3333,0.375,0.0868,0.1231,0.1303,0.0039,0.0266,0.1276,2.0,0.0,2.0,0.0,-274.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0
max,456255.0,1.0,19.0,117000000.0,4050000.0,258025.5,4050000.0,0.072508,-7489.0,365243.0,0.0,0.0,91.0,1.0,1.0,1.0,1.0,1.0,1.0,20.0,3.0,3.0,23.0,1.0,1.0,1.0,1.0,1.0,1.0,0.962692770561306,0.8549996664047012,0.8960095494948396,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,348.0,34.0,344.0,24.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,9.0,8.0,27.0,261.0,25.0


## Handling Missing Value

### OWN CAR AGE
Merubah nilai FLAG OWN CAR dan fillna OWN CAR AGE menjadi 0

#### Apptrain

In [8]:
apptrain.loc[(apptrain['FLAG_OWN_CAR'] == 'Y') & pd.isna(apptrain['OWN_CAR_AGE']), 'FLAG_OWN_CAR'] = 'N'

In [9]:
apptrain.FLAG_OWN_CAR.value_counts()
apptrain.OWN_CAR_AGE.isna().sum()

FLAG_OWN_CAR
N    32312
Y    16432
Name: count, dtype: int64

32312

In [10]:
apptrain[(apptrain['FLAG_OWN_CAR'] == 'N') & pd.isna(apptrain['OWN_CAR_AGE'])].shape
apptrain[(apptrain['FLAG_OWN_CAR'] == 'Y') & pd.isna(apptrain['OWN_CAR_AGE'])].shape

(32312, 121)

(0, 121)

In [11]:
apptrain.loc[(apptrain['FLAG_OWN_CAR'] == 'N') & pd.isna(apptrain['OWN_CAR_AGE']), 'OWN_CAR_AGE'] = 0

In [12]:
apptrain.FLAG_OWN_CAR.value_counts()
apptrain.OWN_CAR_AGE.isna().sum()

FLAG_OWN_CAR
N    32312
Y    16432
Name: count, dtype: int64

0

#### Apptest

In [13]:
apptest.FLAG_OWN_CAR.value_counts()
apptest.OWN_CAR_AGE.isna().sum()

FLAG_OWN_CAR
N    202924
Y    104587
Name: count, dtype: int64

202929

In [14]:
apptest[(apptest['FLAG_OWN_CAR'] == 'Y') & pd.isna(apptest['OWN_CAR_AGE'])].shape

(5, 122)

In [15]:
apptest.loc[(apptest['FLAG_OWN_CAR'] == 'Y') & pd.isna(apptest['OWN_CAR_AGE']), 'FLAG_OWN_CAR'] = 'N'

In [16]:
apptest.loc[(apptest['FLAG_OWN_CAR'] == 'N') & pd.isna(apptest['OWN_CAR_AGE']), 'OWN_CAR_AGE'] = 0

In [17]:
apptest.FLAG_OWN_CAR.value_counts()
apptest.OWN_CAR_AGE.isna().sum()

FLAG_OWN_CAR
N    202929
Y    104582
Name: count, dtype: int64

0

### AMT REQ

dipilih mode, karena kolom hanya memiliki nilai 0 sebagai yang sering muncul pada semua kolomnya

In [18]:
apptrain['AMT_REQ_CREDIT_BUREAU_HOUR'].fillna(apptrain['AMT_REQ_CREDIT_BUREAU_HOUR'].median(), inplace=True)
apptrain['AMT_REQ_CREDIT_BUREAU_DAY'].fillna(apptrain['AMT_REQ_CREDIT_BUREAU_DAY'].median(), inplace=True)
apptrain['AMT_REQ_CREDIT_BUREAU_WEEK'].fillna(apptrain['AMT_REQ_CREDIT_BUREAU_WEEK'].median(), inplace=True)
apptrain['AMT_REQ_CREDIT_BUREAU_MON'].fillna(apptrain['AMT_REQ_CREDIT_BUREAU_MON'].median(), inplace=True)
apptrain['AMT_REQ_CREDIT_BUREAU_QRT'].fillna(apptrain['AMT_REQ_CREDIT_BUREAU_QRT'].median(), inplace=True)
apptrain['AMT_REQ_CREDIT_BUREAU_YEAR'].fillna(apptrain['AMT_REQ_CREDIT_BUREAU_YEAR'].median(), inplace=True)

apptest['AMT_REQ_CREDIT_BUREAU_HOUR'].fillna(apptest['AMT_REQ_CREDIT_BUREAU_HOUR'].median(), inplace=True)
apptest['AMT_REQ_CREDIT_BUREAU_DAY'].fillna(apptest['AMT_REQ_CREDIT_BUREAU_DAY'].median(), inplace=True)
apptest['AMT_REQ_CREDIT_BUREAU_WEEK'].fillna(apptest['AMT_REQ_CREDIT_BUREAU_WEEK'].median(), inplace=True)
apptest['AMT_REQ_CREDIT_BUREAU_MON'].fillna(apptest['AMT_REQ_CREDIT_BUREAU_MON'].median(), inplace=True)
apptest['AMT_REQ_CREDIT_BUREAU_QRT'].fillna(apptest['AMT_REQ_CREDIT_BUREAU_QRT'].median(), inplace=True)
apptest['AMT_REQ_CREDIT_BUREAU_YEAR'].fillna(apptest['AMT_REQ_CREDIT_BUREAU_YEAR'].median(), inplace=True)

### EXT SOURCE

dipilih median, karena angka Mode dan Median sama.

In [19]:
apptrain['EXT_SOURCE_1'].fillna(apptrain['EXT_SOURCE_1'].median(), inplace=True)
apptrain['EXT_SOURCE_2'].fillna(apptrain['EXT_SOURCE_2'].median(), inplace=True)
apptrain['EXT_SOURCE_3'].fillna(apptrain['EXT_SOURCE_3'].median(), inplace=True)

apptest['EXT_SOURCE_1'].fillna(apptest['EXT_SOURCE_1'].median(), inplace=True)
apptest['EXT_SOURCE_2'].fillna(apptest['EXT_SOURCE_2'].median(), inplace=True)
apptest['EXT_SOURCE_3'].fillna(apptest['EXT_SOURCE_3'].median(), inplace=True)

### PERSONAL BELONGING

karena datanya object, maka dicari yang paling banyak, yaitu menggunakan Mode

In [20]:
apptrain['OCCUPATION_TYPE'].fillna(apptrain['OCCUPATION_TYPE'].mode()[0], inplace=True)
apptrain['NAME_TYPE_SUITE'].fillna(apptrain['NAME_TYPE_SUITE'].mode()[0], inplace=True)
apptrain['AMT_ANNUITY'].fillna(apptrain['AMT_ANNUITY'].mode()[0], inplace=True)

apptest['OCCUPATION_TYPE'].fillna(apptest['OCCUPATION_TYPE'].mode()[0], inplace=True)
apptest['NAME_TYPE_SUITE'].fillna(apptest['NAME_TYPE_SUITE'].mode()[0], inplace=True)
apptest['AMT_ANNUITY'].fillna(apptest['AMT_ANNUITY'].mode()[0], inplace=True)

### CNT SOCIAL CIRCLE

dipilih median karena nilai Mode dan Median sama.

In [21]:
apptrain['OBS_30_CNT_SOCIAL_CIRCLE'].fillna(apptrain['OBS_30_CNT_SOCIAL_CIRCLE'].median(), inplace=True)
apptrain['DEF_30_CNT_SOCIAL_CIRCLE'].fillna(apptrain['DEF_30_CNT_SOCIAL_CIRCLE'].median(), inplace=True)
apptrain['OBS_60_CNT_SOCIAL_CIRCLE'].fillna(apptrain['OBS_60_CNT_SOCIAL_CIRCLE'].median(), inplace=True)
apptrain['DEF_60_CNT_SOCIAL_CIRCLE'].fillna(apptrain['DEF_60_CNT_SOCIAL_CIRCLE'].median(), inplace=True)

apptest['OBS_30_CNT_SOCIAL_CIRCLE'].fillna(apptest['OBS_30_CNT_SOCIAL_CIRCLE'].median(), inplace=True)
apptest['DEF_30_CNT_SOCIAL_CIRCLE'].fillna(apptest['DEF_30_CNT_SOCIAL_CIRCLE'].median(), inplace=True)
apptest['OBS_60_CNT_SOCIAL_CIRCLE'].fillna(apptest['OBS_60_CNT_SOCIAL_CIRCLE'].median(), inplace=True)
apptest['DEF_60_CNT_SOCIAL_CIRCLE'].fillna(apptest['DEF_60_CNT_SOCIAL_CIRCLE'].median(), inplace=True)

### Apptest DAYS_LAST_PHONE_CHANGE

In [22]:
apptest = apptest.dropna(subset=['AMT_GOODS_PRICE', 'CNT_FAM_MEMBERS','DAYS_LAST_PHONE_CHANGE'])

## Dropping Columns Missing Value more than 20%

In [23]:
apptest.columns.tolist()

['SK_ID_CURR',
 'TARGET',
 'NAME_CONTRACT_TYPE',
 'CODE_GENDER',
 'FLAG_OWN_CAR',
 'FLAG_OWN_REALTY',
 'CNT_CHILDREN',
 'AMT_INCOME_TOTAL',
 'AMT_CREDIT',
 'AMT_ANNUITY',
 'AMT_GOODS_PRICE',
 'NAME_TYPE_SUITE',
 'NAME_INCOME_TYPE',
 'NAME_EDUCATION_TYPE',
 'NAME_FAMILY_STATUS',
 'NAME_HOUSING_TYPE',
 'REGION_POPULATION_RELATIVE',
 'DAYS_BIRTH',
 'DAYS_EMPLOYED',
 'DAYS_REGISTRATION',
 'DAYS_ID_PUBLISH',
 'OWN_CAR_AGE',
 'FLAG_MOBIL',
 'FLAG_EMP_PHONE',
 'FLAG_WORK_PHONE',
 'FLAG_CONT_MOBILE',
 'FLAG_PHONE',
 'FLAG_EMAIL',
 'OCCUPATION_TYPE',
 'CNT_FAM_MEMBERS',
 'REGION_RATING_CLIENT',
 'REGION_RATING_CLIENT_W_CITY',
 'WEEKDAY_APPR_PROCESS_START',
 'HOUR_APPR_PROCESS_START',
 'REG_REGION_NOT_LIVE_REGION',
 'REG_REGION_NOT_WORK_REGION',
 'LIVE_REGION_NOT_WORK_REGION',
 'REG_CITY_NOT_LIVE_CITY',
 'REG_CITY_NOT_WORK_CITY',
 'LIVE_CITY_NOT_WORK_CITY',
 'ORGANIZATION_TYPE',
 'EXT_SOURCE_1',
 'EXT_SOURCE_2',
 'EXT_SOURCE_3',
 'APARTMENTS_AVG',
 'BASEMENTAREA_AVG',
 'YEARS_BEGINEXPLUATATION_A

In [24]:
excluded = ['APARTMENTS_AVG',
         'BASEMENTAREA_AVG',
         'YEARS_BEGINEXPLUATATION_AVG',
         'YEARS_BUILD_AVG',
         'COMMONAREA_AVG',
         'ELEVATORS_AVG',
         'ENTRANCES_AVG',
         'FLOORSMAX_AVG',
         'FLOORSMIN_AVG',
         'LANDAREA_AVG',
         'LIVINGAPARTMENTS_AVG',
         'LIVINGAREA_AVG',
         'NONLIVINGAPARTMENTS_AVG',
         'NONLIVINGAREA_AVG',
         'APARTMENTS_MODE',
         'BASEMENTAREA_MODE',
         'YEARS_BEGINEXPLUATATION_MODE',
         'YEARS_BUILD_MODE',
         'COMMONAREA_MODE',
         'ELEVATORS_MODE',
         'ENTRANCES_MODE',
         'FLOORSMAX_MODE',
         'FLOORSMIN_MODE',
         'LANDAREA_MODE',
         'LIVINGAPARTMENTS_MODE',
         'LIVINGAREA_MODE',
         'NONLIVINGAPARTMENTS_MODE',
         'NONLIVINGAREA_MODE',
         'APARTMENTS_MEDI',
         'BASEMENTAREA_MEDI',
         'YEARS_BEGINEXPLUATATION_MEDI',
         'YEARS_BUILD_MEDI',
         'COMMONAREA_MEDI',
         'ELEVATORS_MEDI',
         'ENTRANCES_MEDI',
         'FLOORSMAX_MEDI',
         'FLOORSMIN_MEDI',
         'LANDAREA_MEDI',
         'LIVINGAPARTMENTS_MEDI',
         'LIVINGAREA_MEDI',
         'NONLIVINGAPARTMENTS_MEDI',
         'NONLIVINGAREA_MEDI',
         'FONDKAPREMONT_MODE',
         'HOUSETYPE_MODE',
         'TOTALAREA_MODE',
         'WALLSMATERIAL_MODE',
         'EMERGENCYSTATE_MODE',
         'FLAG_DOCUMENT_2',
         'FLAG_DOCUMENT_10',
         'FLAG_DOCUMENT_12',
         'FLAG_DOCUMENT_13',
         'FLAG_DOCUMENT_14',
         'FLAG_DOCUMENT_15',
         'FLAG_DOCUMENT_16',
         'FLAG_DOCUMENT_17',
         'FLAG_DOCUMENT_19',
         'FLAG_DOCUMENT_20',
         'FLAG_DOCUMENT_21',
            
           ]

### Excluding Apptrain & Apptest

In [25]:
apptrain_clean = apptrain.drop(excluded, axis=1)
apptest_clean  = apptest.drop(excluded, axis=1)


In [26]:
len(excluded)

58

## After Handling Missing Value

In [27]:
summary(apptrain_clean)

Unnamed: 0,dtypes,missing#,missing%,duplicated#,duplicated%,uniques,count,unique,mode,Mean,Median
SK_ID_CURR,int64,0,0.0,0,0.0,48744,48744,[100001 100005 100013 100028],100001,277796.67635,277549.0
NAME_CONTRACT_TYPE,object,0,0.0,0,0.0,2,48744,['Cash loans' 'Revolving loans'],Cash loans,,
CODE_GENDER,object,0,0.0,0,0.0,2,48744,['F' 'M'],F,,
FLAG_OWN_CAR,object,0,0.0,0,0.0,2,48744,['N' 'Y'],N,,
FLAG_OWN_REALTY,object,0,0.0,0,0.0,2,48744,['Y' 'N'],Y,,
CNT_CHILDREN,int64,0,0.0,0,0.0,11,48744,[0 2 1 3],0.000000,0.397054,0.0
AMT_INCOME_TOTAL,float64,0,0.0,0,0.0,606,48744,[135000. 99000. 202500. 315000.],135000.000000,178431.805855,157500.0
AMT_CREDIT,float64,0,0.0,0,0.0,2937,48744,[ 568800. 222768. 663264. 1575000.],450000.000000,516740.435561,450000.0
AMT_ANNUITY,float64,0,0.0,0,0.0,7491,48744,[20560.5 17370. 69777. 49018.5],27652.500000,29425.366876,26221.5
AMT_GOODS_PRICE,float64,0,0.0,0,0.0,677,48744,[ 450000. 180000. 630000. 1575000.],450000.000000,462618.840473,396000.0


In [28]:
apptest_clean.columns

Index(['SK_ID_CURR', 'TARGET', 'NAME_CONTRACT_TYPE', 'CODE_GENDER',
       'FLAG_OWN_CAR', 'FLAG_OWN_REALTY', 'CNT_CHILDREN', 'AMT_INCOME_TOTAL',
       'AMT_CREDIT', 'AMT_ANNUITY', 'AMT_GOODS_PRICE', 'NAME_TYPE_SUITE',
       'NAME_INCOME_TYPE', 'NAME_EDUCATION_TYPE', 'NAME_FAMILY_STATUS',
       'NAME_HOUSING_TYPE', 'REGION_POPULATION_RELATIVE', 'DAYS_BIRTH',
       'DAYS_EMPLOYED', 'DAYS_REGISTRATION', 'DAYS_ID_PUBLISH', 'OWN_CAR_AGE',
       'FLAG_MOBIL', 'FLAG_EMP_PHONE', 'FLAG_WORK_PHONE', 'FLAG_CONT_MOBILE',
       'FLAG_PHONE', 'FLAG_EMAIL', 'OCCUPATION_TYPE', 'CNT_FAM_MEMBERS',
       'REGION_RATING_CLIENT', 'REGION_RATING_CLIENT_W_CITY',
       'WEEKDAY_APPR_PROCESS_START', 'HOUR_APPR_PROCESS_START',
       'REG_REGION_NOT_LIVE_REGION', 'REG_REGION_NOT_WORK_REGION',
       'LIVE_REGION_NOT_WORK_REGION', 'REG_CITY_NOT_LIVE_CITY',
       'REG_CITY_NOT_WORK_CITY', 'LIVE_CITY_NOT_WORK_CITY',
       'ORGANIZATION_TYPE', 'EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3',
       'OBS_3

In [29]:
summary(apptest_clean)

Unnamed: 0,dtypes,missing#,missing%,duplicated#,duplicated%,uniques,count,unique,mode,Mean,Median
SK_ID_CURR,int64,0,0.0,0,0.0,307232,307232,[100002 100003 100004 100006],100002,278169.833207,278192.5
TARGET,int64,0,0.0,0,0.0,2,307232,[1 0],0.000000,0.080734,0.0
NAME_CONTRACT_TYPE,object,0,0.0,0,0.0,2,307232,['Cash loans' 'Revolving loans'],Cash loans,,
CODE_GENDER,object,0,0.0,0,0.0,3,307232,['M' 'F' 'XNA'],F,,
FLAG_OWN_CAR,object,0,0.0,0,0.0,2,307232,['N' 'Y'],N,,
FLAG_OWN_REALTY,object,0,0.0,0,0.0,2,307232,['Y' 'N'],Y,,
CNT_CHILDREN,int64,0,0.0,0,0.0,15,307232,[0 1 2 3],0.000000,0.416962,0.0
AMT_INCOME_TOTAL,float64,0,0.0,0,0.0,2547,307232,[202500. 270000. 67500. 135000.],135000.000000,168833.383059,148500.0
AMT_CREDIT,float64,0,0.0,0,0.0,5603,307232,[ 406597.5 1293502.5 135000. 312682.5],450000.000000,599316.048076,514602.0
AMT_ANNUITY,float64,0,0.0,0,0.0,13672,307232,[24700.5 35698.5 6750. 29686.5],9000.000000,27119.748537,24916.5


## Feature Engineering Age, Duration

In [30]:
apptrain_clean.head()

Unnamed: 0,SK_ID_CURR,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,NAME_TYPE_SUITE,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,ORGANIZATION_TYPE,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_11,FLAG_DOCUMENT_18,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR
0,100001,Cash loans,F,N,Y,0,135000.0,568800.0,20560.5,450000.0,Unaccompanied,Working,Higher education,Married,House / apartment,0.01885,-19241,-2329,-5170.0,-812,0.0,1,1,0,1,0,1,Laborers,2.0,2,2,TUESDAY,18,0,0,0,0,0,0,Kindergarten,0.7526144906031748,0.7896543511176771,0.1595195404777181,0.0,0.0,0.0,0.0,-1740.0,1,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
1,100005,Cash loans,M,N,Y,0,99000.0,222768.0,17370.0,180000.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.035792,-18064,-4469,-9118.0,-1623,0.0,1,1,0,1,0,0,Low-skill Laborers,2.0,2,2,FRIDAY,9,0,0,0,0,0,0,Self-employed,0.5649902017969249,0.2916555320093651,0.4329616670974407,0.0,0.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0
2,100013,Cash loans,M,Y,Y,0,202500.0,663264.0,69777.0,630000.0,Unaccompanied,Working,Higher education,Married,House / apartment,0.019101,-20038,-4458,-2175.0,-3503,5.0,1,1,0,1,0,0,Drivers,2.0,2,2,MONDAY,14,0,0,0,0,0,0,Transport: type 3,0.5067712615026179,0.6997868302051784,0.6109913280868294,0.0,0.0,0.0,0.0,-856.0,0,0,0,0,0,1,0,0,0,0.0,0.0,0.0,0.0,1.0,4.0
3,100028,Cash loans,F,N,Y,2,315000.0,1575000.0,49018.5,1575000.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.026392,-13976,-1866,-2000.0,-4208,0.0,1,1,0,1,1,0,Sales staff,4.0,2,2,WEDNESDAY,11,0,0,0,0,0,0,Business Entity Type 3,0.5257339776824489,0.5096770801723647,0.6127042441012546,0.0,0.0,0.0,0.0,-1805.0,1,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0
4,100038,Cash loans,M,Y,N,1,180000.0,625500.0,32067.0,625500.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.010032,-13040,-2191,-4000.0,-4262,16.0,1,1,1,1,0,0,Laborers,3.0,2,2,FRIDAY,5,0,0,0,0,1,1,Business Entity Type 3,0.202144992067999,0.4256872940912229,0.5190973382084597,0.0,0.0,0.0,0.0,-821.0,1,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,2.0


In [31]:
apptrain_clean[['DAYS_BIRTH', 'DAYS_EMPLOYED', 'DAYS_REGISTRATION', 'DAYS_ID_PUBLISH','DAYS_LAST_PHONE_CHANGE']].describe().round()

Unnamed: 0,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,DAYS_LAST_PHONE_CHANGE
count,48744.0,48744.0,48744.0,48744.0,48744.0
mean,-16068.0,67485.0,-4968.0,-3052.0,-1078.0
std,4326.0,144349.0,3553.0,1569.0,879.0
min,-25195.0,-17463.0,-23722.0,-6348.0,-4361.0
25%,-19637.0,-2910.0,-7459.0,-4448.0,-1766.0
50%,-15785.0,-1293.0,-4490.0,-3234.0,-863.0
75%,-12496.0,-296.0,-1901.0,-1706.0,-363.0
max,-7338.0,365243.0,0.0,0.0,0.0


In [32]:
# Convert Negatives Value into Positives

columns_to_convert = ['DAYS_BIRTH', 'DAYS_EMPLOYED', 'DAYS_REGISTRATION', 'DAYS_ID_PUBLISH','DAYS_LAST_PHONE_CHANGE']

apptrain_clean[columns_to_convert] = apptrain_clean[columns_to_convert].abs()
apptest_clean[columns_to_convert]  = apptest_clean[columns_to_convert].abs()

apptrain_clean[columns_to_convert].describe().round()

Unnamed: 0,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,DAYS_LAST_PHONE_CHANGE
count,48744.0,48744.0,48744.0,48744.0,48744.0
mean,16068.0,71496.0,4968.0,3052.0,1078.0
std,4326.0,142404.0,3553.0,1569.0,879.0
min,7338.0,1.0,0.0,0.0,0.0
25%,12496.0,1048.0,1901.0,1706.0,363.0
50%,15785.0,2432.0,4490.0,3234.0,863.0
75%,19637.0,6167.0,7459.0,4448.0,1766.0
max,25195.0,365243.0,23722.0,6348.0,4361.0


In [33]:
apptrain_clean.head()
apptest_clean.head()

Unnamed: 0,SK_ID_CURR,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,NAME_TYPE_SUITE,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,ORGANIZATION_TYPE,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_11,FLAG_DOCUMENT_18,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR
0,100001,Cash loans,F,N,Y,0,135000.0,568800.0,20560.5,450000.0,Unaccompanied,Working,Higher education,Married,House / apartment,0.01885,19241,2329,5170.0,812,0.0,1,1,0,1,0,1,Laborers,2.0,2,2,TUESDAY,18,0,0,0,0,0,0,Kindergarten,0.7526144906031748,0.7896543511176771,0.1595195404777181,0.0,0.0,0.0,0.0,1740.0,1,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
1,100005,Cash loans,M,N,Y,0,99000.0,222768.0,17370.0,180000.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.035792,18064,4469,9118.0,1623,0.0,1,1,0,1,0,0,Low-skill Laborers,2.0,2,2,FRIDAY,9,0,0,0,0,0,0,Self-employed,0.5649902017969249,0.2916555320093651,0.4329616670974407,0.0,0.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0
2,100013,Cash loans,M,Y,Y,0,202500.0,663264.0,69777.0,630000.0,Unaccompanied,Working,Higher education,Married,House / apartment,0.019101,20038,4458,2175.0,3503,5.0,1,1,0,1,0,0,Drivers,2.0,2,2,MONDAY,14,0,0,0,0,0,0,Transport: type 3,0.5067712615026179,0.6997868302051784,0.6109913280868294,0.0,0.0,0.0,0.0,856.0,0,0,0,0,0,1,0,0,0,0.0,0.0,0.0,0.0,1.0,4.0
3,100028,Cash loans,F,N,Y,2,315000.0,1575000.0,49018.5,1575000.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.026392,13976,1866,2000.0,4208,0.0,1,1,0,1,1,0,Sales staff,4.0,2,2,WEDNESDAY,11,0,0,0,0,0,0,Business Entity Type 3,0.5257339776824489,0.5096770801723647,0.6127042441012546,0.0,0.0,0.0,0.0,1805.0,1,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0
4,100038,Cash loans,M,Y,N,1,180000.0,625500.0,32067.0,625500.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.010032,13040,2191,4000.0,4262,16.0,1,1,1,1,0,0,Laborers,3.0,2,2,FRIDAY,5,0,0,0,0,1,1,Business Entity Type 3,0.202144992067999,0.4256872940912229,0.5190973382084597,0.0,0.0,0.0,0.0,821.0,1,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,2.0


Unnamed: 0,SK_ID_CURR,TARGET,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,NAME_TYPE_SUITE,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,ORGANIZATION_TYPE,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_11,FLAG_DOCUMENT_18,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR
0,100002,1,Cash loans,M,N,Y,0,202500.0,406597.5,24700.5,351000.0,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,0.018801,9461,637,3648.0,2120,0.0,1,1,0,1,1,0,Laborers,1.0,2,2,WEDNESDAY,10,0,0,0,0,0,0,Business Entity Type 3,0.0830369673913225,0.2629485927471776,0.1393757800997895,2.0,2.0,2.0,2.0,1134.0,1,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0
1,100003,0,Cash loans,F,N,N,0,270000.0,1293502.5,35698.5,1129500.0,Family,State servant,Higher education,Married,House / apartment,0.0035409999999999,16765,1188,1186.0,291,0.0,1,1,0,1,1,0,Core staff,2.0,1,1,MONDAY,11,0,0,0,0,0,0,School,0.3112673113812225,0.6222457752555098,0.5352762504724826,1.0,0.0,1.0,0.0,828.0,1,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
2,100004,0,Revolving loans,M,Y,Y,0,67500.0,135000.0,6750.0,135000.0,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,0.010032,19046,225,4260.0,2531,26.0,1,1,1,1,1,0,Laborers,1.0,2,2,MONDAY,9,0,0,0,0,0,0,Government,0.5059979305057544,0.5559120833904428,0.7295666907060153,0.0,0.0,0.0,0.0,815.0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
3,100006,0,Cash loans,F,N,Y,0,135000.0,312682.5,29686.5,297000.0,Unaccompanied,Working,Secondary / secondary special,Civil marriage,House / apartment,0.008019,19005,3039,9833.0,2437,0.0,1,1,0,1,0,0,Laborers,2.0,2,2,WEDNESDAY,17,0,0,0,0,0,0,Business Entity Type 3,0.5059979305057544,0.6504416904014653,0.5352762504724826,2.0,0.0,2.0,0.0,617.0,1,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0
4,100007,0,Cash loans,M,N,Y,0,121500.0,513000.0,21865.5,513000.0,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,0.028663,19932,3038,4311.0,3458,0.0,1,1,0,1,0,0,Core staff,1.0,2,2,THURSDAY,11,0,0,0,0,1,1,Religion,0.5059979305057544,0.3227382869704046,0.5352762504724826,0.0,0.0,0.0,0.0,1106.0,0,0,0,0,0,1,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0


In [34]:
# ['DAYS_BIRTH', 'DAYS_EMPLOYED', 'DAYS_REGISTRATION', 'DAYS_ID_PUBLISH','DAYS_LAST_PHONE_CHANGE']

apptrain_clean['AGE']                   = apptrain_clean['DAYS_BIRTH'] // 365
apptrain_clean['YEAR_EMPLOYMENT']       = apptrain_clean['DAYS_EMPLOYED'] // 365
apptrain_clean['YEAR_REGISTRATION']     = apptrain_clean['DAYS_REGISTRATION'] // 365
apptrain_clean['YEAR_ID_PUBLISH']       = apptrain_clean['DAYS_ID_PUBLISH'] // 365
apptrain_clean['YEAR_LAST_PHONE_CHANGE']= apptrain_clean['DAYS_LAST_PHONE_CHANGE'] // 365

apptest_clean['AGE']                   = apptest_clean['DAYS_BIRTH'] // 365
apptest_clean['YEAR_EMPLOYMENT']       = apptest_clean['DAYS_EMPLOYED'] // 365
apptest_clean['YEAR_REGISTRATION']     = apptest_clean['DAYS_REGISTRATION'] // 365
apptest_clean['YEAR_ID_PUBLISH']       = apptest_clean['DAYS_ID_PUBLISH'] // 365
apptest_clean['YEAR_LAST_PHONE_CHANGE']= apptest_clean['DAYS_LAST_PHONE_CHANGE'] // 365

In [35]:
apptrain_clean[['AGE']].describe().round(2)
apptest_clean[['AGE']].describe().round(2)

Unnamed: 0,AGE
count,48744.0
mean,43.53
std,11.85
min,20.0
25%,34.0
50%,43.0
75%,53.0
max,69.0


Unnamed: 0,AGE
count,307232.0
mean,43.44
std,11.95
min,20.0
25%,34.0
50%,43.0
75%,53.0
max,69.0


In [36]:
# Age Category

conditions = [
    (apptrain_clean['AGE'] >= 20) & (apptrain_clean['AGE'] <= 29),
    (apptrain_clean['AGE'] >= 30) & (apptrain_clean['AGE'] <= 39),
    (apptrain_clean['AGE'] >= 40) & (apptrain_clean['AGE'] <= 49),
    (apptrain_clean['AGE'] >= 50) & (apptrain_clean['AGE'] <= 59),
    (apptrain_clean['AGE'] >= 60)
]

categories = ['TWENTIES', 'THIRTIES', 'FOURTIES', 'FIFTIES', 'SIXTIES & ABOVE']

apptrain_clean['AGE_CAT'] = np.select(conditions, categories, default='UNKNOWN')


# Age Category

conditions = [
    (apptest_clean['AGE'] >= 20) & (apptest_clean['AGE'] <= 29),
    (apptest_clean['AGE'] >= 30) & (apptest_clean['AGE'] <= 39),
    (apptest_clean['AGE'] >= 40) & (apptest_clean['AGE'] <= 49),
    (apptest_clean['AGE'] >= 50) & (apptest_clean['AGE'] <= 59),
    (apptest_clean['AGE'] >= 60)
]

categories = ['TWENTIES', 'THIRTIES', 'FOURTIES', 'FIFTIES', 'SIXTIES & ABOVE']

apptest_clean['AGE_CAT'] = np.select(conditions, categories, default='UNKNOWN')



## Feature Engineering AMT

In [37]:
apptrain_clean[['AMT_INCOME_TOTAL', 'AMT_CREDIT','AMT_ANNUITY','AMT_GOODS_PRICE']].describe().round(2)
apptest_clean[['AMT_INCOME_TOTAL', 'AMT_CREDIT','AMT_ANNUITY','AMT_GOODS_PRICE']].describe().round(2)

Unnamed: 0,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE
count,48744.0,48744.0,48744.0,48744.0
mean,178431.81,516740.44,29425.37,462618.84
std,101522.59,365397.0,16012.47,336710.22
min,26941.5,45000.0,2295.0,45000.0
25%,112500.0,260640.0,17973.0,225000.0
50%,157500.0,450000.0,26221.5,396000.0
75%,225000.0,675000.0,37390.5,630000.0
max,4410000.0,2245500.0,180576.0,2245500.0


Unnamed: 0,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE
count,307232.0,307232.0,307232.0,307232.0
mean,168833.38,599316.05,27119.75,538397.14
std,237216.07,402517.95,14492.29,369446.7
min,25650.0,45000.0,1615.5,40500.0
25%,112500.0,270000.0,16551.0,238500.0
50%,148500.0,514602.0,24916.5,450000.0
75%,202500.0,808650.0,34596.0,679500.0
max,117000000.0,4050000.0,258025.5,4050000.0


In [38]:
conditions = [
    (apptrain_clean['AMT_INCOME_TOTAL'] >= 25000)  & (apptrain_clean['AMT_INCOME_TOTAL'] <= 50000),
    (apptrain_clean['AMT_INCOME_TOTAL'] >= 50000) & (apptrain_clean['AMT_INCOME_TOTAL'] <= 100000),
    (apptrain_clean['AMT_INCOME_TOTAL'] >= 100000) & (apptrain_clean['AMT_INCOME_TOTAL'] <= 150000),
    (apptrain_clean['AMT_INCOME_TOTAL'] >= 150000) & (apptrain_clean['AMT_INCOME_TOTAL'] <= 250000),
    (apptrain_clean['AMT_INCOME_TOTAL'] >= 250000)
]

categories = ['LOW INCOME', 'LOW MID INCOME', 'UPPER MID INCOME', 'HIGH', 'UPPER HIGH']

apptrain_clean['INCOME_CAT'] = np.select(conditions, categories, default='UNKNOWN')



conditions = [
    (apptest_clean['AMT_INCOME_TOTAL'] >= 25000) & (apptest_clean['AMT_INCOME_TOTAL'] <= 50000),
    (apptest_clean['AMT_INCOME_TOTAL'] >= 50000) & (apptest_clean['AMT_INCOME_TOTAL'] <= 100000),
    (apptest_clean['AMT_INCOME_TOTAL'] >= 100000) & (apptest_clean['AMT_INCOME_TOTAL'] <= 150000),
    (apptest_clean['AMT_INCOME_TOTAL'] >= 150000) & (apptest_clean['AMT_INCOME_TOTAL'] <= 250000),
    (apptest_clean['AMT_INCOME_TOTAL'] >= 250000)
]

categories = ['LOW INCOME', 'LOW MID INCOME', 'UPPER MID INCOME', 'HIGH', 'UPPER HIGH']

apptest_clean['INCOME_CAT'] = np.select(conditions, categories, default='UNKNOWN')

In [39]:
conditions = [
    (apptrain_clean['AMT_CREDIT'] >= 45000)  & (apptrain_clean['AMT_CREDIT'] <= 250000),
    (apptrain_clean['AMT_CREDIT'] >= 250000) & (apptrain_clean['AMT_CREDIT'] <= 500000),
    (apptrain_clean['AMT_CREDIT'] >= 500000) & (apptrain_clean['AMT_CREDIT'] <= 750000),
    (apptrain_clean['AMT_CREDIT'] >= 750000) & (apptrain_clean['AMT_CREDIT'] <= 1000000),
    (apptrain_clean['AMT_CREDIT'] >= 1000000)
]

categories = ['LOW CREDIT', 'MID CREDIT', 'UPPER MID CREDIT', 'HIGH CREDIT', 'UPPER HIGH CREDIT']

apptrain_clean['CREDIT_CAT'] = np.select(conditions, categories, default='UNKNOWN')



conditions = [
    (apptest_clean['AMT_CREDIT'] >= 45000)  & (apptest_clean['AMT_CREDIT'] <= 250000),
    (apptest_clean['AMT_CREDIT'] >= 250000) & (apptest_clean['AMT_CREDIT'] <= 500000),
    (apptest_clean['AMT_CREDIT'] >= 500000) & (apptest_clean['AMT_CREDIT'] <= 750000),
    (apptest_clean['AMT_CREDIT'] >= 750000) & (apptest_clean['AMT_CREDIT'] <= 1000000),
    (apptest_clean['AMT_CREDIT'] >= 1000000)
]

categories = ['LOW CREDIT', 'MID CREDIT', 'UPPER MID CREDIT', 'HIGH CREDIT', 'UPPER HIGH CREDIT']

apptest_clean['CREDIT_CAT'] = np.select(conditions, categories, default='UNKNOWN')

## Feature Engineering Ratio

In [40]:
# Apptrain_clean

# Debt-to-Income Ratio (DTI):
apptrain_clean['DTI%'] = (100 * ( apptrain_clean['AMT_ANNUITY'] / apptrain_clean['AMT_INCOME_TOTAL'])).round(2)

# Credit Utilization Ratio:
apptrain_clean['CUR%'] = (apptrain_clean['AMT_CREDIT'] / apptrain_clean['AMT_INCOME_TOTAL']).round(2)

# Price of Goods to Annual Salary Ratio:
apptrain_clean['PRICE_TO_INCOME%'] = (100 * (apptrain_clean['AMT_GOODS_PRICE'] / apptrain_clean['AMT_INCOME_TOTAL'])).round(2)

# Annuity-to-Total Credit Ratio:
apptrain_clean['ANNUITY_TO_CREDIT%'] = (100 * (apptrain_clean['AMT_ANNUITY'] / apptrain_clean['AMT_CREDIT'])).round(2)


In [41]:
# Apptest_clean

# Debt-to-Income Ratio (DTI):
apptest_clean['DTI%'] = (100 * ( apptest_clean['AMT_ANNUITY'] / apptest_clean['AMT_INCOME_TOTAL'])).round(2)

# Credit Utilization Ratio:
apptest_clean['CUR%'] = (apptest_clean['AMT_CREDIT'] / apptest_clean['AMT_INCOME_TOTAL']).round(2)

# Price of Goods to Annual Salary Ratio:
apptest_clean['PRICE_TO_INCOME%'] = (100 * (apptest_clean['AMT_GOODS_PRICE'] / apptest_clean['AMT_INCOME_TOTAL'])).round(2)

# Annuity-to-Total Credit Ratio:
apptest_clean['ANNUITY_TO_CREDIT%'] = (100 * (apptest_clean['AMT_ANNUITY'] / apptest_clean['AMT_CREDIT'])).round(2)


In [42]:
apptrain_clean['INCOME_CREDIT_RATIO%'] = (100 * (apptrain_clean['AMT_INCOME_TOTAL'] / apptrain_clean['AMT_CREDIT'])).round(2)
apptrain_clean['CREDIT_ANNUITY_RATIO%'] = (100 * (apptrain_clean['AMT_ANNUITY']    / apptrain_clean['AMT_CREDIT'])).round(2)
apptrain_clean['GOODS_CREDIT_RATIO%'] = (100 * (apptrain_clean['AMT_GOODS_PRICE'] / apptrain_clean['AMT_CREDIT'])).round(2)

apptest_clean['INCOME_CREDIT_RATIO%'] = (100 * (apptest_clean['AMT_INCOME_TOTAL'] / apptest_clean['AMT_CREDIT'])).round(2)
apptest_clean['CREDIT_ANNUITY_RATIO%'] = (100 * (apptest_clean['AMT_ANNUITY']    / apptest_clean['AMT_CREDIT'])).round(2)
apptest_clean['GOODS_CREDIT_RATIO%'] = (100 * (apptest_clean['AMT_GOODS_PRICE'] / apptest_clean['AMT_CREDIT'])).round(2)

In [43]:
apptrain_clean.head()
apptest_clean.head()

Unnamed: 0,SK_ID_CURR,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,NAME_TYPE_SUITE,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,ORGANIZATION_TYPE,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_11,FLAG_DOCUMENT_18,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR,AGE,YEAR_EMPLOYMENT,YEAR_REGISTRATION,YEAR_ID_PUBLISH,YEAR_LAST_PHONE_CHANGE,AGE_CAT,INCOME_CAT,CREDIT_CAT,DTI%,CUR%,PRICE_TO_INCOME%,ANNUITY_TO_CREDIT%,INCOME_CREDIT_RATIO%,CREDIT_ANNUITY_RATIO%,GOODS_CREDIT_RATIO%
0,100001,Cash loans,F,N,Y,0,135000.0,568800.0,20560.5,450000.0,Unaccompanied,Working,Higher education,Married,House / apartment,0.01885,19241,2329,5170.0,812,0.0,1,1,0,1,0,1,Laborers,2.0,2,2,TUESDAY,18,0,0,0,0,0,0,Kindergarten,0.7526144906031748,0.7896543511176771,0.1595195404777181,0.0,0.0,0.0,0.0,1740.0,1,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,52,6,14.0,2,4.0,FIFTIES,UPPER MID INCOME,UPPER MID CREDIT,15.23,4.21,333.33,3.61,23.73,3.61,79.11
1,100005,Cash loans,M,N,Y,0,99000.0,222768.0,17370.0,180000.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.035792,18064,4469,9118.0,1623,0.0,1,1,0,1,0,0,Low-skill Laborers,2.0,2,2,FRIDAY,9,0,0,0,0,0,0,Self-employed,0.5649902017969249,0.2916555320093651,0.4329616670974407,0.0,0.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0,49,12,24.0,4,0.0,FOURTIES,LOW MID INCOME,LOW CREDIT,17.55,2.25,181.82,7.8,44.44,7.8,80.8
2,100013,Cash loans,M,Y,Y,0,202500.0,663264.0,69777.0,630000.0,Unaccompanied,Working,Higher education,Married,House / apartment,0.019101,20038,4458,2175.0,3503,5.0,1,1,0,1,0,0,Drivers,2.0,2,2,MONDAY,14,0,0,0,0,0,0,Transport: type 3,0.5067712615026179,0.6997868302051784,0.6109913280868294,0.0,0.0,0.0,0.0,856.0,0,0,0,0,0,1,0,0,0,0.0,0.0,0.0,0.0,1.0,4.0,54,12,5.0,9,2.0,FIFTIES,HIGH,UPPER MID CREDIT,34.46,3.28,311.11,10.52,30.53,10.52,94.98
3,100028,Cash loans,F,N,Y,2,315000.0,1575000.0,49018.5,1575000.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.026392,13976,1866,2000.0,4208,0.0,1,1,0,1,1,0,Sales staff,4.0,2,2,WEDNESDAY,11,0,0,0,0,0,0,Business Entity Type 3,0.5257339776824489,0.5096770801723647,0.6127042441012546,0.0,0.0,0.0,0.0,1805.0,1,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0,38,5,5.0,11,4.0,THIRTIES,UPPER HIGH,UPPER HIGH CREDIT,15.56,5.0,500.0,3.11,20.0,3.11,100.0
4,100038,Cash loans,M,Y,N,1,180000.0,625500.0,32067.0,625500.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.010032,13040,2191,4000.0,4262,16.0,1,1,1,1,0,0,Laborers,3.0,2,2,FRIDAY,5,0,0,0,0,1,1,Business Entity Type 3,0.202144992067999,0.4256872940912229,0.5190973382084597,0.0,0.0,0.0,0.0,821.0,1,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,2.0,35,6,10.0,11,2.0,THIRTIES,HIGH,UPPER MID CREDIT,17.82,3.48,347.5,5.13,28.78,5.13,100.0


Unnamed: 0,SK_ID_CURR,TARGET,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,NAME_TYPE_SUITE,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,ORGANIZATION_TYPE,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_11,FLAG_DOCUMENT_18,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR,AGE,YEAR_EMPLOYMENT,YEAR_REGISTRATION,YEAR_ID_PUBLISH,YEAR_LAST_PHONE_CHANGE,AGE_CAT,INCOME_CAT,CREDIT_CAT,DTI%,CUR%,PRICE_TO_INCOME%,ANNUITY_TO_CREDIT%,INCOME_CREDIT_RATIO%,CREDIT_ANNUITY_RATIO%,GOODS_CREDIT_RATIO%
0,100002,1,Cash loans,M,N,Y,0,202500.0,406597.5,24700.5,351000.0,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,0.018801,9461,637,3648.0,2120,0.0,1,1,0,1,1,0,Laborers,1.0,2,2,WEDNESDAY,10,0,0,0,0,0,0,Business Entity Type 3,0.0830369673913225,0.2629485927471776,0.1393757800997895,2.0,2.0,2.0,2.0,1134.0,1,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0,25,1,9.0,5,3.0,TWENTIES,HIGH,MID CREDIT,12.2,2.01,173.33,6.07,49.8,6.07,86.33
1,100003,0,Cash loans,F,N,N,0,270000.0,1293502.5,35698.5,1129500.0,Family,State servant,Higher education,Married,House / apartment,0.0035409999999999,16765,1188,1186.0,291,0.0,1,1,0,1,1,0,Core staff,2.0,1,1,MONDAY,11,0,0,0,0,0,0,School,0.3112673113812225,0.6222457752555098,0.5352762504724826,1.0,0.0,1.0,0.0,828.0,1,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,45,3,3.0,0,2.0,FOURTIES,UPPER HIGH,UPPER HIGH CREDIT,13.22,4.79,418.33,2.76,20.87,2.76,87.32
2,100004,0,Revolving loans,M,Y,Y,0,67500.0,135000.0,6750.0,135000.0,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,0.010032,19046,225,4260.0,2531,26.0,1,1,1,1,1,0,Laborers,1.0,2,2,MONDAY,9,0,0,0,0,0,0,Government,0.5059979305057544,0.5559120833904428,0.7295666907060153,0.0,0.0,0.0,0.0,815.0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,52,0,11.0,6,2.0,FIFTIES,LOW MID INCOME,LOW CREDIT,10.0,2.0,200.0,5.0,50.0,5.0,100.0
3,100006,0,Cash loans,F,N,Y,0,135000.0,312682.5,29686.5,297000.0,Unaccompanied,Working,Secondary / secondary special,Civil marriage,House / apartment,0.008019,19005,3039,9833.0,2437,0.0,1,1,0,1,0,0,Laborers,2.0,2,2,WEDNESDAY,17,0,0,0,0,0,0,Business Entity Type 3,0.5059979305057544,0.6504416904014653,0.5352762504724826,2.0,0.0,2.0,0.0,617.0,1,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0,52,8,26.0,6,1.0,FIFTIES,UPPER MID INCOME,MID CREDIT,21.99,2.32,220.0,9.49,43.17,9.49,94.98
4,100007,0,Cash loans,M,N,Y,0,121500.0,513000.0,21865.5,513000.0,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,0.028663,19932,3038,4311.0,3458,0.0,1,1,0,1,0,0,Core staff,1.0,2,2,THURSDAY,11,0,0,0,0,1,1,Religion,0.5059979305057544,0.3227382869704046,0.5352762504724826,0.0,0.0,0.0,0.0,1106.0,0,0,0,0,0,1,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,54,8,11.0,9,3.0,FIFTIES,UPPER MID INCOME,UPPER MID CREDIT,18.0,4.22,422.22,4.26,23.68,4.26,100.0


In [44]:
# Feature Engineering : Credit & Interest Amount

apptrain_clean['AMT_INTEREST_PAID']  =        apptrain_clean['AMT_CREDIT'] - apptrain_clean['AMT_GOODS_PRICE']
apptrain_clean['AMT_INTEREST_RATE%'] = (100 * apptrain_clean['AMT_INTEREST_PAID'] / apptrain_clean['AMT_GOODS_PRICE']).round(2)

apptest_clean['AMT_INTEREST_PAID']  =        apptest_clean['AMT_CREDIT'] - apptest_clean['AMT_GOODS_PRICE']
apptest_clean['AMT_INTEREST_RATE%'] = (100 * apptest_clean['AMT_INTEREST_PAID'] / apptest_clean['AMT_GOODS_PRICE']).round(2)

apptrain_clean['YEAR_CREDIT_DURATION'] = apptrain_clean['AMT_CREDIT'] // apptrain_clean['AMT_ANNUITY']
apptest_clean['YEAR_CREDIT_DURATION']  = apptest_clean['AMT_CREDIT'] // apptest_clean['AMT_ANNUITY']

In [45]:
apptest_clean.describe()
apptrain_clean.describe()

Unnamed: 0,SK_ID_CURR,TARGET,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_11,FLAG_DOCUMENT_18,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR,AGE,YEAR_EMPLOYMENT,YEAR_REGISTRATION,YEAR_ID_PUBLISH,YEAR_LAST_PHONE_CHANGE,DTI%,CUR%,PRICE_TO_INCOME%,ANNUITY_TO_CREDIT%,INCOME_CREDIT_RATIO%,CREDIT_ANNUITY_RATIO%,GOODS_CREDIT_RATIO%,AMT_INTEREST_PAID,AMT_INTEREST_RATE%,YEAR_CREDIT_DURATION
count,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0,307232.0
mean,278169.8332074784,0.0807337777314863,0.4169617748151234,168833.3830588448,599316.0480760467,27119.74853693625,538397.1396111082,0.020870455776091,16038.752161233206,67758.21018969378,4986.339538334766,2994.380334730757,4.105535881678992,1.0,0.8197974169357358,0.1988725132798667,0.998131705030726,0.2806543589209457,0.0567128424122487,2.1526110561399854,2.0524391990417663,2.0314843505884803,12.063089131340485,0.0151514165191125,0.0507694510988438,0.0406533173627747,0.0781624309967711,0.2304089417768982,0.1795125507759608,0.5043319468164199,0.514503864074453,0.515716055203609,1.4176713363191331,0.1429506041037392,1.400781819602125,0.0997129205291115,962.8821672221644,0.7104175346318092,8.137173211123842e-05,0.014669695865014,0.0881223310071867,0.0001399593792313,0.0814238100197896,0.0038830590563482,0.0039156077491927,0.0081339183418393,0.0055397875221331,0.006060566607645,0.0297364857827309,0.2314146963857931,0.2296831059264659,1.7787404957816897,43.44075486928445,185.11319784397455,13.169298770961358,7.71395883241329,2.225230444745339,18.098483165816067,3.959092314602645,354.43270893005933,5.369747877825227,39.956768044995314,5.369747877825227,90.06857062415374,60918.90846493855,12.299520134621394,21.17886808665764
std,102788.0305045816,0.2724262770518235,0.7220381473454429,237216.06628320875,402517.9527640585,14492.289671928793,369446.7004740707,0.0138324393813117,4363.827260421781,139471.45858125488,3523.1742843417933,1509.377901611859,9.012706598938973,0.0,0.3843567261846384,0.3991525464234927,0.0431834518409109,0.449319648868316,0.2312934716814124,0.9106141679744147,0.5090909154255818,0.502793676591285,3.265935235984056,0.1221552277449059,0.2195269249838318,0.1974860807564074,0.2684274574284808,0.4210952843283818,0.3837815450148025,0.1394035659885144,0.1908802082423494,0.1747427194913464,2.398561331487439,0.4460568867913789,2.3774030597695144,0.3617189800231221,826.9228726765654,0.4535693229572304,0.0090202758043262,0.1202270474389597,0.2834731862747017,0.0118296342331749,0.2734853133662741,0.0621931949535191,0.0624522894702537,0.0898208437603985,0.0780287435897631,0.1030837324475831,0.1907803256766472,0.8570823691844845,0.7442422459703679,1.7657278452557332,11.95416725501949,382.04808672104735,9.647634828477162,4.134328759838493,2.193931980092232,9.45879959610561,2.6903073545865968,242.77101989435695,2.249159093914131,50.80910731457118,2.249159093914131,9.663567155682063,70495.205507147,12.404529479544896,7.857498437009648
min,100002.0,0.0,0.0,25650.0,45000.0,1615.5,40500.0,0.00029,7489.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0145681324124455,8.173616518884397e-08,0.0005272652387098,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,0.02,0.0,0.39,0.61,1.18,0.61,16.67,-765000.0,-85.0,8.0
25%,189138.75,0.0,0.0,112500.0,270000.0,16551.0,238500.0,0.010006,12415.0,933.0,2009.0,1720.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,2.0,2.0,2.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5059979305057544,0.3929450311230969,0.4170996682522097,0.0,0.0,0.0,0.0,274.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,34.0,2.0,5.0,4.0,0.0,11.48,2.02,184.0,3.69,19.36,3.69,83.47,0.0,0.0,15.0
50%,278192.5,0.0,0.0,148500.0,514602.0,24916.5,450000.0,0.01885,15752.5,2219.5,4504.0,3255.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,2.0,2.0,2.0,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5059979305057544,0.5659614260608526,0.5352762504724826,0.0,0.0,0.0,0.0,757.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,43.0,6.0,12.0,8.0,2.0,16.29,3.27,294.12,5.0,30.61,5.0,89.38,39204.0,11.88,20.0
75%,367125.25,0.0,1.0,202500.0,808650.0,34596.0,679500.0,0.028663,19684.0,5710.0,7480.0,4299.0,5.0,1.0,1.0,0.0,1.0,1.0,0.0,3.0,2.0,2.0,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5059979305057544,0.6634327886788485,0.6363761710860439,2.0,0.0,2.0,0.0,1570.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,53.0,15.0,20.0,11.0,4.0,22.91,5.17,461.54,6.41,49.54,6.41,100.0,99972.0,19.8,27.0
max,456255.0,1.0,19.0,117000000.0,4050000.0,258025.5,4050000.0,0.072508,25229.0,365243.0,24672.0,7197.0,91.0,1.0,1.0,1.0,1.0,1.0,1.0,20.0,3.0,3.0,23.0,1.0,1.0,1.0,1.0,1.0,1.0,0.962692770561306,0.8549996664047012,0.8960095494948396,348.0,34.0,344.0,24.0,4292.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,9.0,8.0,27.0,261.0,25.0,69.0,1000.0,67.0,19.0,11.0,187.6,84.74,8473.68,12.44,20800.33,12.44,666.67,540000.0,500.0,164.0


Unnamed: 0,SK_ID_CURR,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_11,FLAG_DOCUMENT_18,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR,AGE,YEAR_EMPLOYMENT,YEAR_REGISTRATION,YEAR_ID_PUBLISH,YEAR_LAST_PHONE_CHANGE,DTI%,CUR%,PRICE_TO_INCOME%,ANNUITY_TO_CREDIT%,INCOME_CREDIT_RATIO%,CREDIT_ANNUITY_RATIO%,GOODS_CREDIT_RATIO%,AMT_INTEREST_PAID,AMT_INTEREST_RATE%,YEAR_CREDIT_DURATION
count,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0,48744.0
mean,277796.67634990974,0.3970539963892991,178431.80585487446,516740.4355612999,29425.36687592319,462618.84047267353,0.0212256410840308,16068.084605284754,71496.39890037748,4967.652716231742,3051.712949286066,3.973165928114229,0.9999794846545216,0.8097201706876744,0.2047021171836533,0.9983998030526834,0.2631298211061874,0.1626456589528967,2.1467667815526013,2.038158542589857,2.0125964221237487,12.007365009026753,0.0188330871491875,0.0551657639914656,0.0420359428852781,0.0774659445265058,0.2246635483341539,0.1742163138027244,0.5035350475737924,0.5180277615178049,0.5034828649988542,1.446783193828984,0.1435663876579681,1.4348842934515018,0.1010791071721647,1077.7662276382734,0.7866198916789758,0.000102576727392,0.0147505333989824,0.0874774331199737,4.103069095683571e-05,0.0884621697029378,0.0044928606597735,0.0011693746922698,0.0015591662563597,0.0018463810930576,0.0015796816018381,0.0024413261119317,0.0081445921549318,0.4790333169210569,1.9857828655834564,43.52664943377646,195.35044313146236,13.113244707040868,7.852104874446086,2.496553421959626,18.29829414902347,3.1675867799113733,282.36989085836206,6.663976079107171,48.924756277695714,6.663976079107171,89.3582564007878,54121.59508862629,13.188784465780405,16.641125061546038
std,103169.54729627114,0.7090468312730408,101522.59153614278,365397.0041738182,16012.473120797484,336710.215039643,0.0144281944847799,4325.900393031723,142404.42299159896,3552.6120351234963,1569.2767091933626,8.679626900283239,0.0045293868766553,0.3925258931629874,0.40348791847306,0.039970853052941,0.4403367986035462,0.3690458544150365,0.8904228948066035,0.5226939600464158,0.5158041404113929,3.2781720386173405,0.1359366803927346,0.2283058733536803,0.2006732382228069,0.2673320746259048,0.4173648428199108,0.3792992767040749,0.1560904402974688,0.181263991198688,0.1719779534746105,3.607152542610668,0.5142716781102135,3.5792306836867334,0.4036785706266914,878.9207400551999,0.4096980366630701,0.0101276013760116,0.1205539436026075,0.2825363153445389,0.0064054546427842,0.2839687809536897,0.0668787456761704,0.0341764716971026,0.0394558892253312,0.0434055447222133,0.0431791534677219,0.0505817219986262,0.1038580987000175,0.6734487859806141,1.7210007847657294,11.85444638223596,390.0832817455533,9.734742692896765,4.300836661387446,2.3355693542473666,9.547596663059618,2.1371478325643345,193.3992811421553,2.46121357892163,40.80668318804308,2.46121357892163,9.27590548980512,61334.86560864147,12.46968123436896,6.29853691056797
min,100001.0,0.0,26941.5,45000.0,2295.0,45000.0,0.0002529999999999,7338.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0134579104986917,8.09785587553435e-06,0.0005272652387098,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,0.57,0.09,9.25,1.54,3.28,1.54,25.0,-315000.0,-35.0,3.0
25%,188557.75,0.0,112500.0,260640.0,17973.0,225000.0,0.010006,12496.0,1048.0,1901.0,1706.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,2.0,2.0,2.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4652868775446804,0.4080820171401921,0.3996756156233169,0.0,0.0,0.0,0.0,363.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,34.0,2.0,5.0,4.0,0.0,11.63,1.7,150.0,4.85,24.82,4.85,82.56,0.0,0.0,12.0
50%,277549.0,0.0,157500.0,450000.0,26221.5,396000.0,0.01885,15785.0,2432.0,4490.0,3234.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,2.0,2.0,2.0,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5067712615026179,0.5587579430597367,0.5190973382084597,0.0,0.0,0.0,0.0,863.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,43.0,6.0,12.0,8.0,2.0,16.59,2.67,236.54,6.14,37.505,6.14,88.34,37422.0,13.2,16.0
75%,367555.5,1.0,225000.0,675000.0,37390.5,630000.0,0.028663,19637.0,6167.0,7459.25,4448.0,5.0,1.0,1.0,0.0,1.0,1.0,0.0,3.0,2.0,2.0,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.551291470752723,0.6584798020441781,0.6246146584503397,2.0,0.0,2.0,0.0,1766.25,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,53.0,16.0,20.0,12.0,4.0,22.99,4.03,358.97,7.94,58.97,7.94,100.0,85536.0,21.12,20.0
max,456250.0,20.0,4410000.0,2245500.0,180576.0,2245500.0,0.072508,25195.0,365243.0,23722.0,6348.0,74.0,1.0,1.0,1.0,1.0,1.0,1.0,21.0,3.0,3.0,23.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9391445326561508,0.8549996664047012,0.8825303127941461,354.0,34.0,351.0,24.0,4361.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,6.0,7.0,17.0,69.0,1000.0,64.0,17.0,11.0,202.47,30.44,2500.0,26.52,1081.08,26.52,153.85,356400.0,300.0,65.0


In [46]:
apptest_final  = apptest_clean[apptest_clean['AMT_INTEREST_PAID'] >= 0]
apptrain_final = apptrain_clean[apptrain_clean['AMT_INTEREST_PAID'] >= 0]

In [47]:
apptest_final.describe()
apptrain_final.describe()

Unnamed: 0,SK_ID_CURR,TARGET,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_11,FLAG_DOCUMENT_18,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR,AGE,YEAR_EMPLOYMENT,YEAR_REGISTRATION,YEAR_ID_PUBLISH,YEAR_LAST_PHONE_CHANGE,DTI%,CUR%,PRICE_TO_INCOME%,ANNUITY_TO_CREDIT%,INCOME_CREDIT_RATIO%,CREDIT_ANNUITY_RATIO%,GOODS_CREDIT_RATIO%,AMT_INTEREST_PAID,AMT_INTEREST_RATE%,YEAR_CREDIT_DURATION
count,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0,306972.0
mean,278167.5299897059,0.0807598087121952,0.416917503876575,168837.94236586726,599608.5329688049,27131.9622229389,538486.3765978656,0.0208703537130422,16039.153671996142,67773.22672751913,4986.289775750449,2994.412966003414,4.105172458725877,1.0,0.8197555477372529,0.1986304939864222,0.9981301226170464,0.280449031181997,0.0566924670654001,2.1525969795290774,2.0524510378796763,2.031484956282658,12.062582255059093,0.015157734255893,0.0507766180628852,0.040658431387879,0.078176511212749,0.2304118942444262,0.1795082287635354,0.504332442950603,0.5144917428874836,0.5157548926140058,1.4175266799577813,0.1429087994996286,1.4006424038674536,0.0996801011167142,962.9886927797976,0.7110029579245013,8.14406525676609e-05,0.0146723479665897,0.088196969104674,0.0001400779224163,0.0814895169592015,0.0038863479405287,0.0039189242015558,0.0081408076306633,0.0055347067484982,0.0060526692988285,0.0297551568221205,0.2314478193450868,0.2296854436235226,1.7785498351641191,43.4418676621972,185.15429420272855,13.16913594725252,7.714013004443402,2.2254798483249285,18.10665044368868,3.961013317175508,354.4890235917282,5.370061047913165,39.93189730659475,5.370061047913165,89.99499684010267,61122.15637093937,12.341397521598063,21.179866567634832
std,102789.52444391049,0.2724663352568501,0.7219796806672449,237302.7531020761,402543.95461321576,14491.033648911338,369526.9774129383,0.0138332878978774,4363.831448627108,139484.16223738214,3523.308975118376,1509.301087991479,9.012037334663594,0.0,0.3843915595169712,0.3989698477129378,0.0432017016044193,0.449219355631082,0.2312544171615748,0.9105371986162596,0.5091603995368739,0.5028583802405597,3.2659381463199,0.122180301104752,0.2195415908972735,0.1974979757114729,0.26844958384911,0.4210971751111661,0.3837779362741131,0.1393972585360758,0.1908865551221931,0.1747240624376841,2.398529583716859,0.4459280137080879,2.3773788013490944,0.3615866239294458,826.9556982328862,0.4532972767610448,0.0090240947064081,0.1202377530878703,0.2835816032800995,0.0118346422358722,0.2735858538297045,0.0622194250320693,0.0624786279543572,0.0898585621167243,0.0779579101938736,0.1030331846185354,0.1908430858505414,0.8572879908461437,0.7443787986182154,1.7656064860007776,11.95417523387222,382.0829035208572,9.648015424943882,4.134129357195075,2.1940241502264617,9.457990488659595,2.690545063353889,242.827703408372,2.250085642186582,50.81700386001928,2.250085642186582,9.081555733839686,70029.62793464589,12.31554915779737,7.8607503926555875
min,100002.0,0.0,0.0,25650.0,45000.0,1615.5,40500.0,0.00029,7489.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0145681324124455,8.173616518884397e-08,0.0005272652387098,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,0.02,0.0,0.39,0.61,1.18,0.61,16.67,0.0,0.0,8.0
25%,189135.75,0.0,0.0,112500.0,270000.0,16564.5,238500.0,0.010006,12415.0,933.0,2009.0,1720.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,2.0,2.0,2.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5059979305057544,0.3928954775909344,0.4170996682522097,0.0,0.0,0.0,0.0,274.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,34.0,2.0,5.0,4.0,0.0,11.49,2.02,184.0,3.69,19.35,3.69,83.47,0.0,0.0,15.0
50%,278191.5,0.0,0.0,148500.0,514867.5,24930.0,450000.0,0.01885,15753.0,2219.0,4504.0,3254.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,2.0,2.0,2.0,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5059979305057544,0.5659614260608526,0.5352762504724826,0.0,0.0,0.0,0.0,757.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,43.0,6.0,12.0,8.0,2.0,16.3,3.27,294.12,5.0,30.56,5.0,89.38,39204.0,11.88,20.0
75%,367123.5,0.0,1.0,202500.0,808650.0,34600.5,679500.0,0.028663,19684.0,5711.25,7480.0,4299.0,5.0,1.0,1.0,0.0,1.0,1.0,0.0,3.0,2.0,2.0,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5059979305057544,0.6634327886788485,0.6363761710860439,2.0,0.0,2.0,0.0,1570.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,53.0,15.0,20.0,11.0,4.0,22.92,5.17,461.54,6.41,49.5,6.41,100.0,99972.0,19.8,27.0
max,456255.0,1.0,19.0,117000000.0,4050000.0,258025.5,4050000.0,0.072508,25229.0,365243.0,24672.0,7197.0,91.0,1.0,1.0,1.0,1.0,1.0,1.0,20.0,3.0,3.0,23.0,1.0,1.0,1.0,1.0,1.0,1.0,0.962692770561306,0.8549996664047012,0.8960095494948396,348.0,34.0,344.0,24.0,4292.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,9.0,8.0,27.0,261.0,25.0,69.0,1000.0,67.0,19.0,11.0,187.6,84.74,8473.68,12.44,20800.33,12.44,100.0,540000.0,500.0,164.0


Unnamed: 0,SK_ID_CURR,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_11,FLAG_DOCUMENT_18,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR,AGE,YEAR_EMPLOYMENT,YEAR_REGISTRATION,YEAR_ID_PUBLISH,YEAR_LAST_PHONE_CHANGE,DTI%,CUR%,PRICE_TO_INCOME%,ANNUITY_TO_CREDIT%,INCOME_CREDIT_RATIO%,CREDIT_ANNUITY_RATIO%,GOODS_CREDIT_RATIO%,AMT_INTEREST_PAID,AMT_INTEREST_RATE%,YEAR_CREDIT_DURATION
count,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0,48740.0
mean,277794.3130693476,0.3970865818629462,178436.20116105868,516762.0699835864,29426.743085761183,462626.3389413213,0.0212264280672958,16068.025194911776,71494.46261797292,4967.779790726303,3051.6926343865407,3.9728970045137455,0.9999794829708658,0.809725071809602,0.2046778826425933,0.998399671727534,0.2631103816167419,0.1626590069757899,2.146799343455068,2.038120640131309,2.012556421830119,12.0072630283135,0.0188346327451785,0.0551702913418137,0.0420393926959376,0.0774517849815346,0.224661469019286,0.1742306114074682,0.5035267621598999,0.5180248196715006,0.5034723591138266,1.4468608945424704,0.1435781698810012,1.434961017644645,0.1010874025441116,1077.7436807550266,0.7866844480919163,0.0001025851456709,0.0147517439474764,0.0874846122281493,4.103405826836274e-05,0.08846942962659,0.0044932293803857,0.0011694706606483,0.0015592942141977,0.0018465326220763,0.0015798112433319,0.0024415264669675,0.00814526056627,0.4790315962248666,1.9857816988100123,43.52648748461223,195.3451374640952,13.113602790315962,7.852031185884284,2.496491588018055,18.299042470250303,3.167696142798522,282.371311243332,6.6641126384899465,48.92350923266312,6.6641126384899465,89.35403118588427,54135.73104226508,13.19222630283135,16.640849405006154
std,103168.49892660863,0.7090668020701549,101525.12175288356,365400.0608170472,16012.173739728192,336711.6159492744,0.0144282839236852,4325.934168304944,142403.09063428856,3552.645568215502,1569.300538059974,8.679186784614572,0.0045295727319672,0.3925220261639681,0.4034701809669607,0.0399724905903513,0.4403263411304667,0.3690580563052057,0.8904435366621518,0.522679035262122,0.5157865164198527,3.2781284099708303,0.13594215135395,0.2283146946619005,0.2006811113137709,0.2673096930868631,0.4173634714073631,0.3793115571531265,0.1560870717155911,0.1812657124373021,0.1719748719440071,3.6072818292036506,0.5142911360712655,3.5793589351258595,0.4036940965714798,878.9469686323401,0.4096528656034602,0.0101280169099622,0.1205588163564629,0.2825467975064013,0.0064057174737979,0.2839793024648878,0.0668814775965772,0.0341778724545166,0.0394575057321984,0.0434073226048031,0.0431809229120506,0.0505837927365905,0.1038623342091391,0.6734611608640757,1.7210237136352644,11.854520402199924,390.0796328277283,9.734831375106754,4.30090348144862,2.335634354627979,9.547556006020365,2.1371879952713004,193.40308862130377,2.4612684067282564,40.80762908951615,2.4612684067282564,9.26423547595097,61308.82565577055,12.464346734470237,6.298721859279069
min,100001.0,0.0,26941.5,45000.0,2295.0,45000.0,0.0002529999999999,7338.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0134579104986917,8.09785587553435e-06,0.0005272652387098,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,0.57,0.09,9.25,1.54,3.28,1.54,25.0,0.0,0.0,3.0
25%,188557.75,0.0,112500.0,260640.0,17973.0,225000.0,0.010006,12496.0,1048.0,1901.0,1706.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,2.0,2.0,2.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4652868775446804,0.4080820171401921,0.3996756156233169,0.0,0.0,0.0,0.0,363.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,34.0,2.0,5.0,4.0,0.0,11.6375,1.7,150.0,4.85,24.82,4.85,82.56,0.0,0.0,12.0
50%,277542.0,0.0,157500.0,450000.0,26221.5,396000.0,0.01885,15785.0,2432.0,4490.0,3234.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,2.0,2.0,2.0,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5067712615026179,0.5587579430597367,0.5190973382084597,0.0,0.0,0.0,0.0,863.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,43.0,6.0,12.0,8.0,2.0,16.59,2.67,236.67,6.14,37.5,6.14,88.34,37422.0,13.2,16.0
75%,367549.0,1.0,225000.0,675000.0,37390.5,630000.0,0.028663,19637.0,6166.25,7459.25,4448.0,5.0,1.0,1.0,0.0,1.0,1.0,0.0,3.0,2.0,2.0,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5512690577689836,0.6584798020441781,0.6246146584503397,2.0,0.0,2.0,0.0,1766.25,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,53.0,16.0,20.0,12.0,4.0,22.99,4.03,358.97,7.94,58.97,7.94,100.0,85536.0,21.12,20.0
max,456250.0,20.0,4410000.0,2245500.0,180576.0,2245500.0,0.072508,25195.0,365243.0,23722.0,6348.0,74.0,1.0,1.0,1.0,1.0,1.0,1.0,21.0,3.0,3.0,23.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9391445326561508,0.8549996664047012,0.8825303127941461,354.0,34.0,351.0,24.0,4361.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,6.0,7.0,17.0,69.0,1000.0,64.0,17.0,11.0,202.47,30.44,2500.0,26.52,1081.08,26.52,100.0,356400.0,300.0,65.0


# Saving into New Files

In [48]:
apptrain_final.to_csv('data_unsupervised.csv', index=False)
apptest_final.to_csv('data_supervised.csv', index=False)

In [49]:
print(f"Row & Columns oada Apptrain_Clean:",apptrain_clean.shape)
print(f"Row & Columns pada Apptest_Clean :",apptest_clean.shape)

Row & Columns oada Apptrain_Clean: (48744, 81)
Row & Columns pada Apptest_Clean : (307232, 82)
