# Classification Project

# SetUp

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier

pd.set_option("display.max_columns", 300)
pd.set_option("display.max_info_columns", 300)

application_train = pd.read_csv("/Users/drewpeterson/Documents/Programming/Python/LFZ_Python/LFZ_Week12/Classification_Project/home-credit-default-risk/application_train.csv")
application_test = pd.read_csv("/Users/drewpeterson/Documents/Programming/Python/LFZ_Python/LFZ_Week12/Classification_Project/home-credit-default-risk/application_test.csv")
credit_card_balance = pd.read_csv("/Users/drewpeterson/Documents/Programming/Python/LFZ_Python/LFZ_Week12/Classification_Project/home-credit-default-risk/credit_card_balance.csv")
installments_payments = pd.read_csv("/Users/drewpeterson/Documents/Programming/Python/LFZ_Python/LFZ_Week12/Classification_Project/home-credit-default-risk/installments_payments.csv")
previous_application = pd.read_csv("/Users/drewpeterson/Documents/Programming/Python/LFZ_Python/LFZ_Week12/Classification_Project/home-credit-default-risk/previous_application.csv")
POS_CASH_balance = pd.read_csv("/Users/drewpeterson/Documents/Programming/Python/LFZ_Python/LFZ_Week12/Classification_Project/home-credit-default-risk/POS_CASH_balance.csv")
bureau = pd.read_csv("/Users/drewpeterson/Documents/Programming/Python/LFZ_Python/LFZ_Week12/Classification_Project/home-credit-default-risk/bureau.csv")
bureau_balance = pd.read_csv("/Users/drewpeterson/Documents/Programming/Python/LFZ_Python/LFZ_Week12/Classification_Project/home-credit-default-risk/bureau_balance.csv")

# ONE at a Time

## Installments Payments

In [2]:
installments_payments["DAYS_DIFFERENCE"] = installments_payments.DAYS_INSTALMENT - installments_payments.DAYS_ENTRY_PAYMENT
installments_payments["AMT_DIFFERENCE"] = installments_payments.AMT_PAYMENT - installments_payments.AMT_INSTALMENT

installments_payments.drop(columns=["NUM_INSTALMENT_NUMBER", "DAYS_INSTALMENT", "DAYS_ENTRY_PAYMENT", "AMT_INSTALMENT", "AMT_PAYMENT"],inplace= True)
installments_payments.dropna(inplace=True)

installments_payments.NUM_INSTALMENT_VERSION = installments_payments.groupby(["SK_ID_PREV","SK_ID_CURR"]).NUM_INSTALMENT_VERSION.transform(lambda x: x.median())
installments_payments.DAYS_DIFFERENCE = installments_payments.groupby(["SK_ID_PREV","SK_ID_CURR"]).DAYS_DIFFERENCE.transform(lambda x: x.mean())
installments_payments.AMT_DIFFERENCE = installments_payments.groupby(["SK_ID_PREV","SK_ID_CURR"]).AMT_DIFFERENCE.transform(lambda x: x.mean())

installments_payments.drop_duplicates(inplace=True)

In [10]:
test_ids = set(application_test.SK_ID_CURR)

In [11]:
credit_ids = set(credit_card_balance.SK_ID_CURR)

In [15]:
test_ids.issubset(credit_ids)

False

In [17]:
matching_ids = []
missing_ids = []
for each_id in test_ids:
    if each_id in credit_ids:
        matching_ids.append(each_id)
    elif each_id not in credit_ids:
        missing_ids.append(each_id)


In [19]:
len(matching_ids)

16653

In [20]:
len(missing_ids)

32091

In [18]:
len(test_ids)

48744

In [21]:
installment_matching_ids = []
installment_missing_ids = []
for each_id in test_ids:
    if each_id in installment_ids:
        installment_matching_ids.append(each_id)
    elif each_id not in installment_ids:
        installment_missing_ids.append(each_id)

In [22]:
len(installment_matching_ids)

47943

In [23]:
len(installment_missing_ids)

801

In [13]:
installment_ids = set(installments_payments.SK_ID_CURR)

In [None]:
prev_ids = set(previous_application.SK_ID_CURR)
prev_matching_ids = []
prev_missing_ids = []
for each_id in test_ids:
    if each_id in prev_ids:
        prev_matching_ids.append(each_id)
    elif each_id not in prev_ids:
        prev_missing_ids.append(each_id)

In [None]:
len(installment_matching_ids)

In [None]:
len(installment_matching_ids)

In [28]:
def count_SK_ID_CURR_in_test(example_list):
    example_set = set(example_list)
    example_matching_ids = []
    example_missing_ids = []
    for each_id in test_ids:
        if each_id in example_set:
            example_matching_ids.append(each_id)
        elif each_id not in example_set:
            example_missing_ids.append(each_id)
    print(f"IDs Matching in Application_Test: {len(example_matching_ids)}")
    print(f"IDs Missing in Application_Test: {len(example_missing_ids)}")

In [29]:
count_SK_ID_CURR_in_test(previous_application.SK_ID_CURR)

IDs Matching in Application_Test: 47800
IDs Missing in Application_Test: 944


Most IDs match in Installment Payments and Previous Application

Next Step: merge Application Train and Application Test with Installment Payments and Previous Application, but not credit card

NOTE: bureau has some information as well, but not for starters

In [31]:
count_SK_ID_CURR_in_test(bureau.SK_ID_CURR)

IDs Matching in Application_Test: 42320
IDs Missing in Application_Test: 6424


In [14]:
test_ids.issubset(installment_ids)

False

There are IDs in Application_Test, that do not have an ID in Cred Card Balance

So when you merge them, there will be nulls in those areas

In [88]:
count_SK_ID_CURR_in_test(POS_CASH_balance.SK_ID_CURR)
POS_CASH_balance.groupby("SK_ID_PREV").MONTHS_BALANCE.count().sort_values()

SK_ID_PREV
2293209     1
2127388     1
2127376     1
2027482     1
1468683     1
           ..
2346240    96
1220271    96
2710334    96
2332615    96
1186496    96
Name: MONTHS_BALANCE, Length: 936325, dtype: int64

In [82]:
count_SK_ID_CURR_in_test(POS_CASH_balance.SK_ID_CURR)

IDs Matching in Application_Test: 47808
IDs Missing in Application_Test: 936


In [81]:
count_SK_ID_CURR_in_test(_first.SK_ID_CURR_y)

IDs Matching in Application_Test: 11051
IDs Missing in Application_Test: 37693


## Credit Card Balance

In [67]:
credit_card_balance = pd.read_csv("/Users/drewpeterson/Documents/Programming/Python/LFZ_Python/LFZ_Week12/Classification_Project/home-credit-default-risk/credit_card_balance.csv")

In [68]:
credit_card_balance = credit_card_balance.fillna(0)

credit_card_balance["AMT_DRAWINGS_TOTAL"] = credit_card_balance.AMT_DRAWINGS_ATM_CURRENT + credit_card_balance.AMT_DRAWINGS_OTHER_CURRENT + credit_card_balance.AMT_DRAWINGS_CURRENT + credit_card_balance.AMT_DRAWINGS_POS_CURRENT
credit_card_balance["CNT_DRAWINGS_TOTAL"] = credit_card_balance.CNT_DRAWINGS_ATM_CURRENT + credit_card_balance.CNT_DRAWINGS_OTHER_CURRENT + credit_card_balance.CNT_DRAWINGS_CURRENT + credit_card_balance.CNT_DRAWINGS_POS_CURRENT
credit_card_balance["COUNT_CREDIT_CARD"] = credit_card_balance.groupby(["SK_ID_PREV","SK_ID_CURR"]).AMT_BALANCE.transform(lambda x: x.count())

credit_card_balance = credit_card_balance.drop(columns = ["SK_DPD", "AMT_DRAWINGS_ATM_CURRENT", "AMT_DRAWINGS_OTHER_CURRENT", "AMT_DRAWINGS_CURRENT", "AMT_DRAWINGS_POS_CURRENT", "CNT_DRAWINGS_ATM_CURRENT", "CNT_DRAWINGS_OTHER_CURRENT", "CNT_DRAWINGS_CURRENT", "CNT_DRAWINGS_POS_CURRENT", "AMT_RECEIVABLE_PRINCIPAL", "AMT_RECIVABLE", "AMT_PAYMENT_CURRENT", "MONTHS_BALANCE"])

contract_status_dict = {"Refused":1, "Demand":2, "Sent proposal":3, "Active": 4 , "Signed":5 ,"Completed": 6,"Approved":7}
credit_card_balance.NAME_CONTRACT_STATUS = credit_card_balance.NAME_CONTRACT_STATUS.map(contract_status_dict)

def compress_median(df_example, list_of_columns):
    for each_column in list_of_columns:
        df_example[each_column] = df_example.groupby(["SK_ID_PREV","SK_ID_CURR"])[each_column].transform(lambda x: x.median())
    return df_example.drop_duplicates()

credit_card_balance = compress_median(credit_card_balance, credit_card_balance.drop(columns = ["SK_ID_PREV","SK_ID_CURR"]).columns)
credit_card_balance.drop_duplicates(inplace=True)

## Previous Application

In [32]:
previous_application.drop(columns =["RATE_INTEREST_PRIMARY", "RATE_INTEREST_PRIVILEGED"], inplace=True)
previous_application.dropna(subset=["AMT_CREDIT", "PRODUCT_COMBINATION"], inplace=True)

previous_application.AMT_DOWN_PAYMENT = previous_application.AMT_DOWN_PAYMENT.interpolate(method='linear', x=previous_application.AMT_APPLICATION)
previous_application.RATE_DOWN_PAYMENT = previous_application.RATE_DOWN_PAYMENT.interpolate(method='linear', x=previous_application.AMT_DOWN_PAYMENT)
previous_application.CNT_PAYMENT = previous_application.CNT_PAYMENT.interpolate(method='linear', x=previous_application.AMT_APPLICATION)
previous_application.AMT_GOODS_PRICE  = previous_application.AMT_GOODS_PRICE.interpolate(method='linear', x=previous_application.AMT_APPLICATION)

previous_application.AMT_ANNUITY =  previous_application.groupby("NAME_CONTRACT_TYPE").AMT_ANNUITY.transform(lambda x: x.fillna(x.median()))
previous_application.NAME_TYPE_SUITE = previous_application.NAME_TYPE_SUITE.fillna("Unaccompanied")
previous_application.drop(columns = ["DAYS_FIRST_DRAWING", "DAYS_FIRST_DUE","DAYS_LAST_DUE_1ST_VERSION","DAYS_LAST_DUE","DAYS_TERMINATION","NFLAG_INSURED_ON_APPROVAL"], inplace=True)

weekday_encoding_map = {'TUESDAY':2, 'WEDNESDAY':3, 'MONDAY':1, 'FRIDAY':5, 'THURSDAY':4, 'SATURDAY':6, 'SUNDAY':7}
previous_application.WEEKDAY_APPR_PROCESS_START = previous_application.WEEKDAY_APPR_PROCESS_START.map(weekday_encoding_map)

yield_encoding_map = {'XNA':0, 'middle':3, 'high':4, 'low_normal':2, 'low_action':1}
previous_application.NAME_YIELD_GROUP = previous_application.NAME_YIELD_GROUP.map(yield_encoding_map)

def categorical_ordinal_encoding(df_example):
    df_example = df_example.copy()
    for each_column in df_example.select_dtypes(include="object").columns:
        map_dict = {}
        categories = list(df_example[each_column].value_counts(ascending=True).index)
        for each_category in categories:
            # for each_index in range(1, len(df_example[each_column].value_counts(ascending=True).index)):
            map_dict[each_category] = categories.index(each_category)
        df_example[each_column] = df_example[each_column].map(map_dict)
    return df_example

previous_application = categorical_ordinal_encoding(previous_application)

## FILL NA Simple

In [36]:
for each_column in application_test.select_dtypes(exclude="object").columns:
    application_test[each_column] = application_test[each_column].fillna(application_train[each_column].median())
    
for each_column in application_test.select_dtypes(include="object").columns:
    application_test[each_column] = application_test[each_column].fillna(application_train[each_column].mode()[0])

for each_column in application_train.select_dtypes(exclude="object").columns:
    application_train[each_column] = application_train[each_column].fillna(application_train[each_column].median())
    
for each_column in application_train.select_dtypes(include="object").columns:
    application_train[each_column] = application_train[each_column].fillna(application_train[each_column].mode()[0])

## Apply Same Encoding

In [37]:
import category_encoders as ce
one = ce.OneHotEncoder(use_cat_names=True)
combined_data = pd.concat([application_train.drop(columns ="TARGET"), application_test])
one.fit(combined_data)

In [38]:
application_train_encoded = one.transform(application_train.drop(columns ="TARGET"))
application_train_encoded["TARGET"] = application_train.TARGET

In [39]:
application_test_encoded = one.transform(application_test)

In [40]:
application_train_encoded

Unnamed: 0,SK_ID_CURR,NAME_CONTRACT_TYPE_Cash loans,NAME_CONTRACT_TYPE_Revolving loans,CODE_GENDER_M,CODE_GENDER_F,CODE_GENDER_XNA,FLAG_OWN_CAR_N,FLAG_OWN_CAR_Y,FLAG_OWN_REALTY_Y,FLAG_OWN_REALTY_N,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,NAME_TYPE_SUITE_Unaccompanied,NAME_TYPE_SUITE_Family,"NAME_TYPE_SUITE_Spouse, partner",NAME_TYPE_SUITE_Children,NAME_TYPE_SUITE_Other_A,NAME_TYPE_SUITE_Other_B,NAME_TYPE_SUITE_Group of people,NAME_INCOME_TYPE_Working,NAME_INCOME_TYPE_State servant,NAME_INCOME_TYPE_Commercial associate,NAME_INCOME_TYPE_Pensioner,NAME_INCOME_TYPE_Unemployed,NAME_INCOME_TYPE_Student,NAME_INCOME_TYPE_Businessman,NAME_INCOME_TYPE_Maternity leave,NAME_EDUCATION_TYPE_Secondary / secondary special,NAME_EDUCATION_TYPE_Higher education,NAME_EDUCATION_TYPE_Incomplete higher,NAME_EDUCATION_TYPE_Lower secondary,NAME_EDUCATION_TYPE_Academic degree,NAME_FAMILY_STATUS_Single / not married,NAME_FAMILY_STATUS_Married,NAME_FAMILY_STATUS_Civil marriage,NAME_FAMILY_STATUS_Widow,NAME_FAMILY_STATUS_Separated,NAME_FAMILY_STATUS_Unknown,NAME_HOUSING_TYPE_House / apartment,NAME_HOUSING_TYPE_Rented apartment,NAME_HOUSING_TYPE_With parents,NAME_HOUSING_TYPE_Municipal apartment,NAME_HOUSING_TYPE_Office apartment,NAME_HOUSING_TYPE_Co-op apartment,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE_Laborers,OCCUPATION_TYPE_Core staff,OCCUPATION_TYPE_Accountants,OCCUPATION_TYPE_Managers,OCCUPATION_TYPE_Drivers,OCCUPATION_TYPE_Sales staff,OCCUPATION_TYPE_Cleaning staff,OCCUPATION_TYPE_Cooking staff,OCCUPATION_TYPE_Private service staff,OCCUPATION_TYPE_Medicine staff,OCCUPATION_TYPE_Security staff,OCCUPATION_TYPE_High skill tech staff,OCCUPATION_TYPE_Waiters/barmen staff,OCCUPATION_TYPE_Low-skill Laborers,OCCUPATION_TYPE_Realty agents,OCCUPATION_TYPE_Secretaries,OCCUPATION_TYPE_IT staff,OCCUPATION_TYPE_HR staff,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,WEEKDAY_APPR_PROCESS_START_WEDNESDAY,WEEKDAY_APPR_PROCESS_START_MONDAY,WEEKDAY_APPR_PROCESS_START_THURSDAY,WEEKDAY_APPR_PROCESS_START_SUNDAY,WEEKDAY_APPR_PROCESS_START_SATURDAY,WEEKDAY_APPR_PROCESS_START_FRIDAY,WEEKDAY_APPR_PROCESS_START_TUESDAY,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,ORGANIZATION_TYPE_Business Entity Type 3,ORGANIZATION_TYPE_School,ORGANIZATION_TYPE_Government,ORGANIZATION_TYPE_Religion,ORGANIZATION_TYPE_Other,ORGANIZATION_TYPE_XNA,ORGANIZATION_TYPE_Electricity,ORGANIZATION_TYPE_Medicine,ORGANIZATION_TYPE_Business Entity Type 2,ORGANIZATION_TYPE_Self-employed,ORGANIZATION_TYPE_Transport: type 2,ORGANIZATION_TYPE_Construction,ORGANIZATION_TYPE_Housing,ORGANIZATION_TYPE_Kindergarten,ORGANIZATION_TYPE_Trade: type 7,ORGANIZATION_TYPE_Industry: type 11,ORGANIZATION_TYPE_Military,ORGANIZATION_TYPE_Services,ORGANIZATION_TYPE_Security Ministries,ORGANIZATION_TYPE_Transport: type 4,ORGANIZATION_TYPE_Industry: type 1,ORGANIZATION_TYPE_Emergency,ORGANIZATION_TYPE_Security,ORGANIZATION_TYPE_Trade: type 2,ORGANIZATION_TYPE_University,ORGANIZATION_TYPE_Transport: type 3,ORGANIZATION_TYPE_Police,ORGANIZATION_TYPE_Business Entity Type 1,ORGANIZATION_TYPE_Postal,ORGANIZATION_TYPE_Industry: type 4,ORGANIZATION_TYPE_Agriculture,ORGANIZATION_TYPE_Restaurant,ORGANIZATION_TYPE_Culture,ORGANIZATION_TYPE_Hotel,ORGANIZATION_TYPE_Industry: type 7,ORGANIZATION_TYPE_Trade: type 3,ORGANIZATION_TYPE_Industry: type 3,ORGANIZATION_TYPE_Bank,ORGANIZATION_TYPE_Industry: type 9,ORGANIZATION_TYPE_Insurance,ORGANIZATION_TYPE_Trade: type 6,ORGANIZATION_TYPE_Industry: type 2,ORGANIZATION_TYPE_Transport: type 1,ORGANIZATION_TYPE_Industry: type 12,ORGANIZATION_TYPE_Mobile,ORGANIZATION_TYPE_Trade: type 1,ORGANIZATION_TYPE_Industry: type 5,ORGANIZATION_TYPE_Industry: type 10,ORGANIZATION_TYPE_Legal Services,ORGANIZATION_TYPE_Advertising,ORGANIZATION_TYPE_Trade: type 5,ORGANIZATION_TYPE_Cleaning,ORGANIZATION_TYPE_Industry: type 13,ORGANIZATION_TYPE_Trade: type 4,ORGANIZATION_TYPE_Telecom,ORGANIZATION_TYPE_Industry: type 8,ORGANIZATION_TYPE_Realtor,ORGANIZATION_TYPE_Industry: type 6,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,APARTMENTS_AVG,BASEMENTAREA_AVG,YEARS_BEGINEXPLUATATION_AVG,YEARS_BUILD_AVG,COMMONAREA_AVG,ELEVATORS_AVG,ENTRANCES_AVG,FLOORSMAX_AVG,FLOORSMIN_AVG,LANDAREA_AVG,LIVINGAPARTMENTS_AVG,LIVINGAREA_AVG,NONLIVINGAPARTMENTS_AVG,NONLIVINGAREA_AVG,APARTMENTS_MODE,BASEMENTAREA_MODE,YEARS_BEGINEXPLUATATION_MODE,YEARS_BUILD_MODE,COMMONAREA_MODE,ELEVATORS_MODE,ENTRANCES_MODE,FLOORSMAX_MODE,FLOORSMIN_MODE,LANDAREA_MODE,LIVINGAPARTMENTS_MODE,LIVINGAREA_MODE,NONLIVINGAPARTMENTS_MODE,NONLIVINGAREA_MODE,APARTMENTS_MEDI,BASEMENTAREA_MEDI,YEARS_BEGINEXPLUATATION_MEDI,YEARS_BUILD_MEDI,COMMONAREA_MEDI,ELEVATORS_MEDI,ENTRANCES_MEDI,FLOORSMAX_MEDI,FLOORSMIN_MEDI,LANDAREA_MEDI,LIVINGAPARTMENTS_MEDI,LIVINGAREA_MEDI,NONLIVINGAPARTMENTS_MEDI,NONLIVINGAREA_MEDI,FONDKAPREMONT_MODE_reg oper account,FONDKAPREMONT_MODE_org spec account,FONDKAPREMONT_MODE_reg oper spec account,FONDKAPREMONT_MODE_not specified,HOUSETYPE_MODE_block of flats,HOUSETYPE_MODE_terraced house,HOUSETYPE_MODE_specific housing,TOTALAREA_MODE,"WALLSMATERIAL_MODE_Stone, brick",WALLSMATERIAL_MODE_Block,WALLSMATERIAL_MODE_Panel,WALLSMATERIAL_MODE_Mixed,WALLSMATERIAL_MODE_Wooden,WALLSMATERIAL_MODE_Others,WALLSMATERIAL_MODE_Monolithic,EMERGENCYSTATE_MODE_No,EMERGENCYSTATE_MODE_Yes,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_2,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR,TARGET
0,100002,1,0,1,0,0,1,0,1,0,0,202500.0,406597.5,24700.5,351000.0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0.018801,-9461,-637,-3648.0,-2120,9.0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1.0,2,2,1,0,0,0,0,0,0,10,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.083037,0.262949,0.139376,0.0247,0.0369,0.9722,0.6192,0.0143,0.00,0.0690,0.0833,0.1250,0.0369,0.0202,0.0190,0.0000,0.0000,0.0252,0.0383,0.9722,0.6341,0.0144,0.0000,0.0690,0.0833,0.1250,0.0377,0.0220,0.0198,0.0,0.0000,0.0250,0.0369,0.9722,0.6243,0.0144,0.00,0.0690,0.0833,0.1250,0.0375,0.0205,0.0193,0.0000,0.0000,1,0,0,0,1,0,0,0.0149,1,0,0,0,0,0,0,1,0,2.0,2.0,2.0,2.0,-1134.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0,1
1,100003,1,0,0,1,0,1,0,0,1,0,270000.0,1293502.5,35698.5,1129500.0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0.003541,-16765,-1188,-1186.0,-291,9.0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.0,1,1,0,1,0,0,0,0,0,11,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.311267,0.622246,0.535276,0.0959,0.0529,0.9851,0.7960,0.0605,0.08,0.0345,0.2917,0.3333,0.0130,0.0773,0.0549,0.0039,0.0098,0.0924,0.0538,0.9851,0.8040,0.0497,0.0806,0.0345,0.2917,0.3333,0.0128,0.0790,0.0554,0.0,0.0000,0.0968,0.0529,0.9851,0.7987,0.0608,0.08,0.0345,0.2917,0.3333,0.0132,0.0787,0.0558,0.0039,0.0100,1,0,0,0,1,0,0,0.0714,0,1,0,0,0,0,0,1,0,1.0,0.0,1.0,0.0,-828.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0
2,100004,0,1,1,0,0,0,1,1,0,0,67500.0,135000.0,6750.0,135000.0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0.010032,-19046,-225,-4260.0,-2531,26.0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1.0,2,2,0,1,0,0,0,0,0,9,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.505998,0.555912,0.729567,0.0876,0.0763,0.9816,0.7552,0.0211,0.00,0.1379,0.1667,0.2083,0.0481,0.0756,0.0745,0.0000,0.0036,0.0840,0.0746,0.9816,0.7648,0.0190,0.0000,0.1379,0.1667,0.2083,0.0458,0.0771,0.0731,0.0,0.0011,0.0864,0.0758,0.9816,0.7585,0.0208,0.00,0.1379,0.1667,0.2083,0.0487,0.0761,0.0749,0.0000,0.0031,1,0,0,0,1,0,0,0.0688,0,0,1,0,0,0,0,1,0,0.0,0.0,0.0,0.0,-815.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0
3,100006,1,0,0,1,0,1,0,1,0,0,135000.0,312682.5,29686.5,297000.0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0.008019,-19005,-3039,-9833.0,-2437,9.0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.0,2,2,1,0,0,0,0,0,0,17,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.505998,0.650442,0.535276,0.0876,0.0763,0.9816,0.7552,0.0211,0.00,0.1379,0.1667,0.2083,0.0481,0.0756,0.0745,0.0000,0.0036,0.0840,0.0746,0.9816,0.7648,0.0190,0.0000,0.1379,0.1667,0.2083,0.0458,0.0771,0.0731,0.0,0.0011,0.0864,0.0758,0.9816,0.7585,0.0208,0.00,0.1379,0.1667,0.2083,0.0487,0.0761,0.0749,0.0000,0.0031,1,0,0,0,1,0,0,0.0688,0,0,1,0,0,0,0,1,0,2.0,0.0,2.0,0.0,-617.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0,0
4,100007,1,0,1,0,0,1,0,1,0,0,121500.0,513000.0,21865.5,513000.0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0.028663,-19932,-3038,-4311.0,-3458,9.0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1.0,2,2,0,0,1,0,0,0,0,11,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.505998,0.322738,0.535276,0.0876,0.0763,0.9816,0.7552,0.0211,0.00,0.1379,0.1667,0.2083,0.0481,0.0756,0.0745,0.0000,0.0036,0.0840,0.0746,0.9816,0.7648,0.0190,0.0000,0.1379,0.1667,0.2083,0.0458,0.0771,0.0731,0.0,0.0011,0.0864,0.0758,0.9816,0.7585,0.0208,0.00,0.1379,0.1667,0.2083,0.0487,0.0761,0.0749,0.0000,0.0031,1,0,0,0,1,0,0,0.0688,0,0,1,0,0,0,0,1,0,0.0,0.0,0.0,0.0,-1106.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
307506,456251,1,0,1,0,0,1,0,0,1,0,157500.0,254700.0,27558.0,225000.0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0.032561,-9327,-236,-8456.0,-1982,9.0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1.0,1,1,0,0,1,0,0,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.145570,0.681632,0.535276,0.2021,0.0887,0.9876,0.8300,0.0202,0.22,0.1034,0.6042,0.2708,0.0594,0.1484,0.1965,0.0753,0.1095,0.1008,0.0172,0.9782,0.7125,0.0172,0.0806,0.0345,0.4583,0.0417,0.0094,0.0882,0.0853,0.0,0.0125,0.2040,0.0887,0.9876,0.8323,0.0203,0.22,0.1034,0.6042,0.2708,0.0605,0.1509,0.2001,0.0757,0.1118,1,0,0,0,1,0,0,0.2898,1,0,0,0,0,0,0,1,0,0.0,0.0,0.0,0.0,-273.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0,0
307507,456252,1,0,0,1,0,1,0,1,0,0,72000.0,269550.0,12001.5,225000.0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0.025164,-20775,365243,-4388.0,-4090,9.0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1.0,2,2,0,1,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.505998,0.115992,0.535276,0.0247,0.0435,0.9727,0.6260,0.0022,0.00,0.1034,0.0833,0.1250,0.0579,0.0202,0.0257,0.0000,0.0000,0.0252,0.0451,0.9727,0.6406,0.0022,0.0000,0.1034,0.0833,0.1250,0.0592,0.0220,0.0267,0.0,0.0000,0.0250,0.0435,0.9727,0.6310,0.0022,0.00,0.1034,0.0833,0.1250,0.0589,0.0205,0.0261,0.0000,0.0000,1,0,0,0,1,0,0,0.0214,1,0,0,0,0,0,0,1,0,0.0,0.0,0.0,0.0,0.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0,0
307508,456253,1,0,0,1,0,1,0,1,0,0,153000.0,677664.0,29979.0,585000.0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0.005002,-14966,-7921,-6737.0,-5150,9.0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1.0,3,3,0,0,1,0,0,0,0,9,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.744026,0.535722,0.218859,0.1031,0.0862,0.9816,0.7484,0.0123,0.00,0.2069,0.1667,0.2083,0.0481,0.0841,0.9279,0.0000,0.0000,0.1050,0.0894,0.9816,0.7583,0.0124,0.0000,0.2069,0.1667,0.2083,0.0458,0.0918,0.9667,0.0,0.0000,0.1041,0.0862,0.9816,0.7518,0.0124,0.00,0.2069,0.1667,0.2083,0.0487,0.0855,0.9445,0.0000,0.0000,1,0,0,0,1,0,0,0.7970,0,0,1,0,0,0,0,1,0,6.0,0.0,6.0,0.0,-1909.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1.0,0.0,0.0,1.0,0.0,1.0,0
307509,456254,1,0,0,1,0,1,0,1,0,0,171000.0,370107.0,20205.0,319500.0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0.005313,-11961,-4786,-2562.0,-931,9.0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.0,2,2,1,0,0,0,0,0,0,9,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.505998,0.514163,0.661024,0.0124,0.0763,0.9771,0.7552,0.0211,0.00,0.0690,0.0417,0.2083,0.0481,0.0756,0.0061,0.0000,0.0036,0.0126,0.0746,0.9772,0.7648,0.0190,0.0000,0.0690,0.0417,0.2083,0.0458,0.0771,0.0063,0.0,0.0011,0.0125,0.0758,0.9771,0.7585,0.0208,0.00,0.0690,0.0417,0.2083,0.0487,0.0761,0.0062,0.0000,0.0031,1,0,0,0,1,0,0,0.0086,1,0,0,0,0,0,0,1,0,0.0,0.0,0.0,0.0,-322.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,1


# Initial Merge Data

In [56]:
first_merge_df = installments_payments.merge(credit_card_balance, on=["SK_ID_PREV", "SK_ID_CURR"])
first_merge_df.shape

(72459, 16)

In [115]:
second_merge_df = first_merge_df.merge(previous_application, on=["SK_ID_PREV", "SK_ID_CURR"])
duplicated_SK_ID_CURRs = second_merge_df[second_merge_df.SK_ID_CURR.duplicated()].index
second_merge_df.drop(duplicated_SK_ID_CURRs, inplace=True)
second_merge_df.drop(columns="SK_ID_PREV", inplace=True)
second_merge_df.shape


(62076, 42)

In [122]:
train_merge = second_merge_df.merge(application_train_encoded, on="SK_ID_CURR")
train_merge.shape

(52962, 287)

### Merging with Application_Test
Need to preserve all rows in Application_Test in order to submit an entry.

However, right merge, results in significant NULL values.

In [123]:
test_merge = second_merge_df.merge(application_test_encoded, on="SK_ID_CURR", how="inner")
test_merge

Unnamed: 0,SK_ID_CURR,NUM_INSTALMENT_VERSION,DAYS_DIFFERENCE,AMT_DIFFERENCE,AMT_BALANCE,AMT_CREDIT_LIMIT_ACTUAL,AMT_INST_MIN_REGULARITY,AMT_PAYMENT_TOTAL_CURRENT,AMT_TOTAL_RECEIVABLE,CNT_INSTALMENT_MATURE_CUM,NAME_CONTRACT_STATUS_x,SK_DPD_DEF,AMT_DRAWINGS_TOTAL,CNT_DRAWINGS_TOTAL,COUNT_CREDIT_CARD,NAME_CONTRACT_TYPE,AMT_ANNUITY_x,AMT_APPLICATION,AMT_CREDIT_x,AMT_DOWN_PAYMENT,AMT_GOODS_PRICE_x,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START_x,FLAG_LAST_APPL_PER_CONTRACT,NFLAG_LAST_APPL_IN_DAY,RATE_DOWN_PAYMENT,NAME_CASH_LOAN_PURPOSE,NAME_CONTRACT_STATUS_y,DAYS_DECISION,NAME_PAYMENT_TYPE,CODE_REJECT_REASON,NAME_TYPE_SUITE,NAME_CLIENT_TYPE,NAME_GOODS_CATEGORY,NAME_PORTFOLIO,NAME_PRODUCT_TYPE,CHANNEL_TYPE,SELLERPLACE_AREA,NAME_SELLER_INDUSTRY,CNT_PAYMENT,NAME_YIELD_GROUP,PRODUCT_COMBINATION,NAME_CONTRACT_TYPE_Cash loans,NAME_CONTRACT_TYPE_Revolving loans,CODE_GENDER_M,CODE_GENDER_F,CODE_GENDER_XNA,FLAG_OWN_CAR_N,FLAG_OWN_CAR_Y,FLAG_OWN_REALTY_Y,FLAG_OWN_REALTY_N,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT_y,AMT_ANNUITY_y,AMT_GOODS_PRICE_y,NAME_TYPE_SUITE_Unaccompanied,NAME_TYPE_SUITE_Family,"NAME_TYPE_SUITE_Spouse, partner",NAME_TYPE_SUITE_Children,NAME_TYPE_SUITE_Other_A,NAME_TYPE_SUITE_Other_B,NAME_TYPE_SUITE_Group of people,NAME_INCOME_TYPE_Working,NAME_INCOME_TYPE_State servant,NAME_INCOME_TYPE_Commercial associate,NAME_INCOME_TYPE_Pensioner,NAME_INCOME_TYPE_Unemployed,NAME_INCOME_TYPE_Student,NAME_INCOME_TYPE_Businessman,NAME_INCOME_TYPE_Maternity leave,NAME_EDUCATION_TYPE_Secondary / secondary special,NAME_EDUCATION_TYPE_Higher education,NAME_EDUCATION_TYPE_Incomplete higher,NAME_EDUCATION_TYPE_Lower secondary,NAME_EDUCATION_TYPE_Academic degree,NAME_FAMILY_STATUS_Single / not married,NAME_FAMILY_STATUS_Married,NAME_FAMILY_STATUS_Civil marriage,NAME_FAMILY_STATUS_Widow,NAME_FAMILY_STATUS_Separated,NAME_FAMILY_STATUS_Unknown,NAME_HOUSING_TYPE_House / apartment,NAME_HOUSING_TYPE_Rented apartment,NAME_HOUSING_TYPE_With parents,NAME_HOUSING_TYPE_Municipal apartment,NAME_HOUSING_TYPE_Office apartment,NAME_HOUSING_TYPE_Co-op apartment,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE_Laborers,OCCUPATION_TYPE_Core staff,OCCUPATION_TYPE_Accountants,OCCUPATION_TYPE_Managers,OCCUPATION_TYPE_Drivers,OCCUPATION_TYPE_Sales staff,OCCUPATION_TYPE_Cleaning staff,OCCUPATION_TYPE_Cooking staff,OCCUPATION_TYPE_Private service staff,OCCUPATION_TYPE_Medicine staff,OCCUPATION_TYPE_Security staff,OCCUPATION_TYPE_High skill tech staff,OCCUPATION_TYPE_Waiters/barmen staff,OCCUPATION_TYPE_Low-skill Laborers,OCCUPATION_TYPE_Realty agents,OCCUPATION_TYPE_Secretaries,OCCUPATION_TYPE_IT staff,OCCUPATION_TYPE_HR staff,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,WEEKDAY_APPR_PROCESS_START_WEDNESDAY,WEEKDAY_APPR_PROCESS_START_MONDAY,WEEKDAY_APPR_PROCESS_START_THURSDAY,WEEKDAY_APPR_PROCESS_START_SUNDAY,WEEKDAY_APPR_PROCESS_START_SATURDAY,WEEKDAY_APPR_PROCESS_START_FRIDAY,WEEKDAY_APPR_PROCESS_START_TUESDAY,HOUR_APPR_PROCESS_START_y,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,ORGANIZATION_TYPE_Business Entity Type 3,ORGANIZATION_TYPE_School,ORGANIZATION_TYPE_Government,ORGANIZATION_TYPE_Religion,ORGANIZATION_TYPE_Other,ORGANIZATION_TYPE_XNA,ORGANIZATION_TYPE_Electricity,ORGANIZATION_TYPE_Medicine,ORGANIZATION_TYPE_Business Entity Type 2,ORGANIZATION_TYPE_Self-employed,ORGANIZATION_TYPE_Transport: type 2,ORGANIZATION_TYPE_Construction,ORGANIZATION_TYPE_Housing,ORGANIZATION_TYPE_Kindergarten,ORGANIZATION_TYPE_Trade: type 7,ORGANIZATION_TYPE_Industry: type 11,ORGANIZATION_TYPE_Military,ORGANIZATION_TYPE_Services,ORGANIZATION_TYPE_Security Ministries,ORGANIZATION_TYPE_Transport: type 4,ORGANIZATION_TYPE_Industry: type 1,ORGANIZATION_TYPE_Emergency,ORGANIZATION_TYPE_Security,ORGANIZATION_TYPE_Trade: type 2,ORGANIZATION_TYPE_University,ORGANIZATION_TYPE_Transport: type 3,ORGANIZATION_TYPE_Police,ORGANIZATION_TYPE_Business Entity Type 1,ORGANIZATION_TYPE_Postal,ORGANIZATION_TYPE_Industry: type 4,ORGANIZATION_TYPE_Agriculture,ORGANIZATION_TYPE_Restaurant,ORGANIZATION_TYPE_Culture,ORGANIZATION_TYPE_Hotel,ORGANIZATION_TYPE_Industry: type 7,ORGANIZATION_TYPE_Trade: type 3,ORGANIZATION_TYPE_Industry: type 3,ORGANIZATION_TYPE_Bank,ORGANIZATION_TYPE_Industry: type 9,ORGANIZATION_TYPE_Insurance,ORGANIZATION_TYPE_Trade: type 6,ORGANIZATION_TYPE_Industry: type 2,ORGANIZATION_TYPE_Transport: type 1,ORGANIZATION_TYPE_Industry: type 12,ORGANIZATION_TYPE_Mobile,ORGANIZATION_TYPE_Trade: type 1,ORGANIZATION_TYPE_Industry: type 5,ORGANIZATION_TYPE_Industry: type 10,ORGANIZATION_TYPE_Legal Services,ORGANIZATION_TYPE_Advertising,ORGANIZATION_TYPE_Trade: type 5,ORGANIZATION_TYPE_Cleaning,ORGANIZATION_TYPE_Industry: type 13,ORGANIZATION_TYPE_Trade: type 4,ORGANIZATION_TYPE_Telecom,ORGANIZATION_TYPE_Industry: type 8,ORGANIZATION_TYPE_Realtor,ORGANIZATION_TYPE_Industry: type 6,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,APARTMENTS_AVG,BASEMENTAREA_AVG,YEARS_BEGINEXPLUATATION_AVG,YEARS_BUILD_AVG,COMMONAREA_AVG,ELEVATORS_AVG,ENTRANCES_AVG,FLOORSMAX_AVG,FLOORSMIN_AVG,LANDAREA_AVG,LIVINGAPARTMENTS_AVG,LIVINGAREA_AVG,NONLIVINGAPARTMENTS_AVG,NONLIVINGAREA_AVG,APARTMENTS_MODE,BASEMENTAREA_MODE,YEARS_BEGINEXPLUATATION_MODE,YEARS_BUILD_MODE,COMMONAREA_MODE,ELEVATORS_MODE,ENTRANCES_MODE,FLOORSMAX_MODE,FLOORSMIN_MODE,LANDAREA_MODE,LIVINGAPARTMENTS_MODE,LIVINGAREA_MODE,NONLIVINGAPARTMENTS_MODE,NONLIVINGAREA_MODE,APARTMENTS_MEDI,BASEMENTAREA_MEDI,YEARS_BEGINEXPLUATATION_MEDI,YEARS_BUILD_MEDI,COMMONAREA_MEDI,ELEVATORS_MEDI,ENTRANCES_MEDI,FLOORSMAX_MEDI,FLOORSMIN_MEDI,LANDAREA_MEDI,LIVINGAPARTMENTS_MEDI,LIVINGAREA_MEDI,NONLIVINGAPARTMENTS_MEDI,NONLIVINGAREA_MEDI,FONDKAPREMONT_MODE_reg oper account,FONDKAPREMONT_MODE_org spec account,FONDKAPREMONT_MODE_reg oper spec account,FONDKAPREMONT_MODE_not specified,HOUSETYPE_MODE_block of flats,HOUSETYPE_MODE_terraced house,HOUSETYPE_MODE_specific housing,TOTALAREA_MODE,"WALLSMATERIAL_MODE_Stone, brick",WALLSMATERIAL_MODE_Block,WALLSMATERIAL_MODE_Panel,WALLSMATERIAL_MODE_Mixed,WALLSMATERIAL_MODE_Wooden,WALLSMATERIAL_MODE_Others,WALLSMATERIAL_MODE_Monolithic,EMERGENCYSTATE_MODE_No,EMERGENCYSTATE_MODE_Yes,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_2,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR
0,112468,0.0,2.055556,-7.250000,0.0000,180000.0,0.000,224.145,0.0000,3.0,4.0,0.0,0.0,0.0,12.0,0,9000.0,180000.0,180000.0,0.000000,180000.00,3,9,1,1,0.000000,24,3,-386,2,8,6,3,27,1,1,2,10,10,0.0,0,8,1,0,0,1,0,0,1,0,1,0,112500.0,573408.0,27585.0,495000.0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0.025164,-9991,-969,-1639.0,-2663,15.0,1,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2.0,2,2,1,0,0,0,0,0,0,14,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.254623,0.526273,0.483050,0.0876,0.0763,0.9816,0.7552,0.0211,0.00,0.1379,0.1667,0.2083,0.0481,0.0756,0.0745,0.0000,0.0036,0.0840,0.0746,0.9816,0.7648,0.0190,0.0000,0.1379,0.1667,0.2083,0.0458,0.0771,0.0731,0.0000,0.0011,0.0864,0.0758,0.9816,0.7585,0.0208,0.00,0.1379,0.1667,0.2083,0.0487,0.0761,0.0749,0.0000,0.0031,1,0,0,0,1,0,0,0.0688,0,0,1,0,0,0,0,1,0,0.0,0.0,0.0,0.0,-641.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,1.0,1.0
1,112028,0.0,3.652174,-29.250000,76228.9425,900000.0,4868.640,2344.635,76916.5425,5.0,4.0,0.0,1899.0,1.0,30.0,0,45000.0,0.0,900000.0,13136.538462,351750.00,1,8,1,1,0.092154,24,3,-926,2,8,6,1,27,1,1,2,3,10,0.0,0,8,1,0,1,0,0,0,1,0,1,2,135000.0,285264.0,30852.0,252000.0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0.020713,-14297,-6941,-63.0,-4754,9.0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,4.0,3,3,1,0,0,0,0,0,0,8,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0.505998,0.478798,0.631355,0.0876,0.0763,0.9816,0.7552,0.0211,0.00,0.1379,0.1667,0.2083,0.0481,0.0756,0.0745,0.0000,0.0036,0.0840,0.0746,0.9816,0.7648,0.0190,0.0000,0.1379,0.1667,0.2083,0.0458,0.0771,0.0731,0.0000,0.0011,0.0864,0.0758,0.9816,0.7585,0.0208,0.00,0.1379,0.1667,0.2083,0.0487,0.0761,0.0749,0.0000,0.0031,1,0,0,0,1,0,0,0.0688,0,0,1,0,0,0,0,1,0,1.0,0.0,1.0,0.0,-2321.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
2,195100,0.0,3.500000,0.000000,403550.5275,450000.0,21560.985,22500.000,402877.7775,12.5,4.0,0.0,0.0,0.0,26.0,0,22500.0,0.0,450000.0,302.884615,369900.00,6,6,1,1,0.009586,24,3,-801,2,8,6,3,27,1,1,7,-1,10,0.0,0,8,1,0,1,0,0,1,0,1,0,1,112500.0,577147.5,29596.5,459000.0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0.018029,-12786,-1879,-4663.0,-1436,9.0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3.0,3,2,0,0,0,0,0,0,1,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.247220,0.673838,0.431192,0.2082,0.1757,0.9906,0.8708,0.1098,0.20,0.2414,0.3333,0.2500,0.1325,0.1689,0.2566,0.0039,0.0087,0.2122,0.1823,0.9906,0.8759,0.1108,0.2014,0.2414,0.3333,0.2500,0.1356,0.1846,0.2674,0.0039,0.0092,0.2103,0.1757,0.9906,0.8725,0.1105,0.20,0.2414,0.3333,0.2500,0.1348,0.1719,0.2612,0.0039,0.0089,1,0,0,0,1,0,0,0.2637,0,0,1,0,0,0,0,1,0,2.0,0.0,2.0,0.0,-1076.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
3,134812,0.0,5.306122,-91.836735,189716.0400,270000.0,13500.000,16875.000,189716.0400,38.5,4.0,0.0,0.0,0.0,88.0,0,13500.0,0.0,112500.0,10237.500000,102372.75,2,13,1,1,0.104456,24,3,-2688,2,8,6,3,27,1,1,6,1222,9,0.0,0,11,1,0,0,1,0,1,0,1,0,0,112500.0,568800.0,24039.0,450000.0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0.007305,-21129,-837,-11588.0,-4618,9.0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,2.0,3,3,0,0,0,0,0,0,1,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.505998,0.509644,0.513694,0.0876,0.0763,0.9816,0.7552,0.0211,0.00,0.1379,0.1667,0.2083,0.0481,0.0756,0.0745,0.0000,0.0036,0.0840,0.0746,0.9816,0.7648,0.0190,0.0000,0.1379,0.1667,0.2083,0.0458,0.0771,0.0731,0.0000,0.0011,0.0864,0.0758,0.9816,0.7585,0.0208,0.00,0.1379,0.1667,0.2083,0.0487,0.0761,0.0749,0.0000,0.0031,1,0,0,0,1,0,0,0.0688,0,0,1,0,0,0,0,1,0,4.0,0.0,4.0,0.0,-284.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,5.0
4,163385,0.0,2.303867,-124.309392,63391.2750,67500.0,3375.000,6750.000,63391.2750,45.5,4.0,0.0,0.0,0.0,94.0,0,3375.0,0.0,67500.0,8032.500000,34803.00,2,14,1,1,0.215627,24,3,-2866,2,8,6,3,27,1,1,6,1006,9,0.0,0,11,1,0,0,1,0,1,0,1,0,0,112500.0,218938.5,16497.0,189000.0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0.024610,-23294,-12111,-15722.0,-4851,9.0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.0,2,2,0,0,0,0,0,1,0,11,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.505998,0.720499,0.315472,0.0876,0.0763,0.9816,0.7552,0.0211,0.00,0.1379,0.1667,0.2083,0.0481,0.0756,0.0745,0.0000,0.0036,0.0840,0.0746,0.9816,0.7648,0.0190,0.0000,0.1379,0.1667,0.2083,0.0458,0.0771,0.0731,0.0000,0.0011,0.0864,0.0758,0.9816,0.7585,0.0208,0.00,0.1379,0.1667,0.2083,0.0487,0.0761,0.0749,0.0000,0.0031,1,0,0,0,1,0,0,0.0688,0,0,1,0,0,0,0,1,0,4.0,0.0,4.0,0.0,-1505.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,1.0,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9109,413851,0.0,0.000000,0.000000,0.0000,0.0,0.000,0.000,0.0000,1.0,4.0,0.0,0.0,0.0,16.0,0,0.0,0.0,0.0,0.000000,0.00,1,20,1,1,0.000000,24,3,-525,2,8,6,2,27,1,0,6,36,8,0.0,0,11,1,0,0,1,0,0,1,1,0,1,135000.0,601470.0,32760.0,450000.0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0.030755,-10707,-867,-733.0,-796,31.0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3.0,2,2,0,1,0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.505998,0.385224,0.382502,0.1856,0.1086,0.9955,0.7552,0.0211,0.16,0.0690,0.6667,0.2083,0.0546,0.0756,0.1785,0.0000,0.0823,0.1891,0.1127,0.9955,0.7648,0.0190,0.1611,0.0690,0.6667,0.2083,0.0558,0.0771,0.1860,0.0000,0.0871,0.1874,0.1086,0.9955,0.7585,0.0208,0.16,0.0690,0.6667,0.2083,0.0555,0.0761,0.1817,0.0000,0.0840,1,0,0,0,1,0,0,0.1583,1,0,0,0,0,0,0,1,0,0.0,0.0,0.0,0.0,-525.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,1.0,1.0
9110,448241,0.0,0.000000,0.000000,0.0000,0.0,0.000,0.000,0.0000,0.0,4.0,0.0,0.0,0.0,11.0,0,0.0,0.0,0.0,1418.100000,0.00,3,8,1,1,0.077707,24,3,-331,2,8,6,3,27,1,0,4,240,9,0.0,0,11,1,0,0,1,0,1,0,1,0,0,157500.0,218016.0,17352.0,180000.0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0.007020,-22505,-5346,-4257.0,-4257,9.0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.0,2,2,0,0,0,0,0,1,0,10,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.505998,0.454729,0.616122,0.0124,0.0763,0.9762,0.7552,0.0211,0.00,0.1379,0.1667,0.2083,0.0481,0.0756,0.0106,0.0000,0.0036,0.0126,0.0746,0.9762,0.7648,0.0190,0.0000,0.1379,0.1667,0.2083,0.0458,0.0771,0.0110,0.0000,0.0011,0.0125,0.0758,0.9762,0.7585,0.0208,0.00,0.1379,0.1667,0.2083,0.0487,0.0761,0.0108,0.0000,0.0031,1,0,0,0,1,0,0,0.0124,0,0,1,0,0,0,0,0,1,1.0,0.0,1.0,0.0,0.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,1.0,3.0
9111,423133,0.0,0.000000,0.000000,0.0000,157500.0,0.000,0.000,0.0000,0.0,6.0,0.0,0.0,0.0,12.0,0,7875.0,157500.0,157500.0,4811.625000,157500.00,6,12,1,1,0.095323,24,3,-375,2,8,6,1,27,1,1,7,-1,10,0.0,0,8,1,0,0,1,0,0,1,1,0,0,153000.0,405000.0,32125.5,405000.0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0.026392,-11190,-1882,-1085.0,-3885,11.0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1.0,2,2,1,0,0,0,0,0,0,16,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.323780,0.692530,0.657784,0.0876,0.0763,0.9816,0.7552,0.0211,0.00,0.1379,0.1667,0.2083,0.0481,0.0756,0.0745,0.0000,0.0036,0.0840,0.0746,0.9816,0.7648,0.0190,0.0000,0.1379,0.1667,0.2083,0.0458,0.0771,0.0731,0.0000,0.0011,0.0864,0.0758,0.9816,0.7585,0.0208,0.00,0.1379,0.1667,0.2083,0.0487,0.0761,0.0749,0.0000,0.0031,1,0,0,0,1,0,0,0.0688,0,0,1,0,0,0,0,1,0,0.0,0.0,0.0,0.0,-375.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0
9112,445165,0.0,0.000000,0.000000,0.0000,337500.0,0.000,0.000,0.0000,0.0,4.0,0.0,0.0,0.0,15.0,0,16875.0,337500.0,337500.0,7200.000000,337500.00,1,13,1,1,0.065630,24,3,-449,2,8,5,3,27,1,1,7,-1,10,0.0,0,8,1,0,1,0,0,1,0,1,0,2,157500.0,285723.0,22239.0,238500.0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0.031329,-12467,-5296,-5592.0,-4439,9.0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4.0,2,2,0,0,0,0,0,0,1,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.217004,0.644071,0.825636,0.0876,0.1469,0.9762,0.7552,0.0211,0.00,0.2759,0.1667,0.2083,0.0481,0.0756,0.1087,0.0000,0.0036,0.0840,0.1525,0.9762,0.7648,0.0190,0.0000,0.2759,0.1667,0.2083,0.0458,0.0771,0.1133,0.0000,0.0011,0.0864,0.1469,0.9762,0.7585,0.0208,0.00,0.2759,0.1667,0.2083,0.0487,0.0761,0.1107,0.0000,0.0031,1,0,0,0,1,0,0,0.1029,0,0,1,0,0,0,0,1,0,0.0,0.0,0.0,0.0,-2723.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0


In [121]:
test_merge = second_merge_df.merge(application_test_encoded, on="SK_ID_CURR", how="right")
test_merge

Unnamed: 0,SK_ID_CURR,NUM_INSTALMENT_VERSION,DAYS_DIFFERENCE,AMT_DIFFERENCE,AMT_BALANCE,AMT_CREDIT_LIMIT_ACTUAL,AMT_INST_MIN_REGULARITY,AMT_PAYMENT_TOTAL_CURRENT,AMT_TOTAL_RECEIVABLE,CNT_INSTALMENT_MATURE_CUM,NAME_CONTRACT_STATUS_x,SK_DPD_DEF,AMT_DRAWINGS_TOTAL,CNT_DRAWINGS_TOTAL,COUNT_CREDIT_CARD,NAME_CONTRACT_TYPE,AMT_ANNUITY_x,AMT_APPLICATION,AMT_CREDIT_x,AMT_DOWN_PAYMENT,AMT_GOODS_PRICE_x,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START_x,FLAG_LAST_APPL_PER_CONTRACT,NFLAG_LAST_APPL_IN_DAY,RATE_DOWN_PAYMENT,NAME_CASH_LOAN_PURPOSE,NAME_CONTRACT_STATUS_y,DAYS_DECISION,NAME_PAYMENT_TYPE,CODE_REJECT_REASON,NAME_TYPE_SUITE,NAME_CLIENT_TYPE,NAME_GOODS_CATEGORY,NAME_PORTFOLIO,NAME_PRODUCT_TYPE,CHANNEL_TYPE,SELLERPLACE_AREA,NAME_SELLER_INDUSTRY,CNT_PAYMENT,NAME_YIELD_GROUP,PRODUCT_COMBINATION,NAME_CONTRACT_TYPE_Cash loans,NAME_CONTRACT_TYPE_Revolving loans,CODE_GENDER_M,CODE_GENDER_F,CODE_GENDER_XNA,FLAG_OWN_CAR_N,FLAG_OWN_CAR_Y,FLAG_OWN_REALTY_Y,FLAG_OWN_REALTY_N,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT_y,AMT_ANNUITY_y,AMT_GOODS_PRICE_y,NAME_TYPE_SUITE_Unaccompanied,NAME_TYPE_SUITE_Family,"NAME_TYPE_SUITE_Spouse, partner",NAME_TYPE_SUITE_Children,NAME_TYPE_SUITE_Other_A,NAME_TYPE_SUITE_Other_B,NAME_TYPE_SUITE_Group of people,NAME_INCOME_TYPE_Working,NAME_INCOME_TYPE_State servant,NAME_INCOME_TYPE_Commercial associate,NAME_INCOME_TYPE_Pensioner,NAME_INCOME_TYPE_Unemployed,NAME_INCOME_TYPE_Student,NAME_INCOME_TYPE_Businessman,NAME_INCOME_TYPE_Maternity leave,NAME_EDUCATION_TYPE_Secondary / secondary special,NAME_EDUCATION_TYPE_Higher education,NAME_EDUCATION_TYPE_Incomplete higher,NAME_EDUCATION_TYPE_Lower secondary,NAME_EDUCATION_TYPE_Academic degree,NAME_FAMILY_STATUS_Single / not married,NAME_FAMILY_STATUS_Married,NAME_FAMILY_STATUS_Civil marriage,NAME_FAMILY_STATUS_Widow,NAME_FAMILY_STATUS_Separated,NAME_FAMILY_STATUS_Unknown,NAME_HOUSING_TYPE_House / apartment,NAME_HOUSING_TYPE_Rented apartment,NAME_HOUSING_TYPE_With parents,NAME_HOUSING_TYPE_Municipal apartment,NAME_HOUSING_TYPE_Office apartment,NAME_HOUSING_TYPE_Co-op apartment,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE_Laborers,OCCUPATION_TYPE_Core staff,OCCUPATION_TYPE_Accountants,OCCUPATION_TYPE_Managers,OCCUPATION_TYPE_Drivers,OCCUPATION_TYPE_Sales staff,OCCUPATION_TYPE_Cleaning staff,OCCUPATION_TYPE_Cooking staff,OCCUPATION_TYPE_Private service staff,OCCUPATION_TYPE_Medicine staff,OCCUPATION_TYPE_Security staff,OCCUPATION_TYPE_High skill tech staff,OCCUPATION_TYPE_Waiters/barmen staff,OCCUPATION_TYPE_Low-skill Laborers,OCCUPATION_TYPE_Realty agents,OCCUPATION_TYPE_Secretaries,OCCUPATION_TYPE_IT staff,OCCUPATION_TYPE_HR staff,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,WEEKDAY_APPR_PROCESS_START_WEDNESDAY,WEEKDAY_APPR_PROCESS_START_MONDAY,WEEKDAY_APPR_PROCESS_START_THURSDAY,WEEKDAY_APPR_PROCESS_START_SUNDAY,WEEKDAY_APPR_PROCESS_START_SATURDAY,WEEKDAY_APPR_PROCESS_START_FRIDAY,WEEKDAY_APPR_PROCESS_START_TUESDAY,HOUR_APPR_PROCESS_START_y,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,ORGANIZATION_TYPE_Business Entity Type 3,ORGANIZATION_TYPE_School,ORGANIZATION_TYPE_Government,ORGANIZATION_TYPE_Religion,ORGANIZATION_TYPE_Other,ORGANIZATION_TYPE_XNA,ORGANIZATION_TYPE_Electricity,ORGANIZATION_TYPE_Medicine,ORGANIZATION_TYPE_Business Entity Type 2,ORGANIZATION_TYPE_Self-employed,ORGANIZATION_TYPE_Transport: type 2,ORGANIZATION_TYPE_Construction,ORGANIZATION_TYPE_Housing,ORGANIZATION_TYPE_Kindergarten,ORGANIZATION_TYPE_Trade: type 7,ORGANIZATION_TYPE_Industry: type 11,ORGANIZATION_TYPE_Military,ORGANIZATION_TYPE_Services,ORGANIZATION_TYPE_Security Ministries,ORGANIZATION_TYPE_Transport: type 4,ORGANIZATION_TYPE_Industry: type 1,ORGANIZATION_TYPE_Emergency,ORGANIZATION_TYPE_Security,ORGANIZATION_TYPE_Trade: type 2,ORGANIZATION_TYPE_University,ORGANIZATION_TYPE_Transport: type 3,ORGANIZATION_TYPE_Police,ORGANIZATION_TYPE_Business Entity Type 1,ORGANIZATION_TYPE_Postal,ORGANIZATION_TYPE_Industry: type 4,ORGANIZATION_TYPE_Agriculture,ORGANIZATION_TYPE_Restaurant,ORGANIZATION_TYPE_Culture,ORGANIZATION_TYPE_Hotel,ORGANIZATION_TYPE_Industry: type 7,ORGANIZATION_TYPE_Trade: type 3,ORGANIZATION_TYPE_Industry: type 3,ORGANIZATION_TYPE_Bank,ORGANIZATION_TYPE_Industry: type 9,ORGANIZATION_TYPE_Insurance,ORGANIZATION_TYPE_Trade: type 6,ORGANIZATION_TYPE_Industry: type 2,ORGANIZATION_TYPE_Transport: type 1,ORGANIZATION_TYPE_Industry: type 12,ORGANIZATION_TYPE_Mobile,ORGANIZATION_TYPE_Trade: type 1,ORGANIZATION_TYPE_Industry: type 5,ORGANIZATION_TYPE_Industry: type 10,ORGANIZATION_TYPE_Legal Services,ORGANIZATION_TYPE_Advertising,ORGANIZATION_TYPE_Trade: type 5,ORGANIZATION_TYPE_Cleaning,ORGANIZATION_TYPE_Industry: type 13,ORGANIZATION_TYPE_Trade: type 4,ORGANIZATION_TYPE_Telecom,ORGANIZATION_TYPE_Industry: type 8,ORGANIZATION_TYPE_Realtor,ORGANIZATION_TYPE_Industry: type 6,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,APARTMENTS_AVG,BASEMENTAREA_AVG,YEARS_BEGINEXPLUATATION_AVG,YEARS_BUILD_AVG,COMMONAREA_AVG,ELEVATORS_AVG,ENTRANCES_AVG,FLOORSMAX_AVG,FLOORSMIN_AVG,LANDAREA_AVG,LIVINGAPARTMENTS_AVG,LIVINGAREA_AVG,NONLIVINGAPARTMENTS_AVG,NONLIVINGAREA_AVG,APARTMENTS_MODE,BASEMENTAREA_MODE,YEARS_BEGINEXPLUATATION_MODE,YEARS_BUILD_MODE,COMMONAREA_MODE,ELEVATORS_MODE,ENTRANCES_MODE,FLOORSMAX_MODE,FLOORSMIN_MODE,LANDAREA_MODE,LIVINGAPARTMENTS_MODE,LIVINGAREA_MODE,NONLIVINGAPARTMENTS_MODE,NONLIVINGAREA_MODE,APARTMENTS_MEDI,BASEMENTAREA_MEDI,YEARS_BEGINEXPLUATATION_MEDI,YEARS_BUILD_MEDI,COMMONAREA_MEDI,ELEVATORS_MEDI,ENTRANCES_MEDI,FLOORSMAX_MEDI,FLOORSMIN_MEDI,LANDAREA_MEDI,LIVINGAPARTMENTS_MEDI,LIVINGAREA_MEDI,NONLIVINGAPARTMENTS_MEDI,NONLIVINGAREA_MEDI,FONDKAPREMONT_MODE_reg oper account,FONDKAPREMONT_MODE_org spec account,FONDKAPREMONT_MODE_reg oper spec account,FONDKAPREMONT_MODE_not specified,HOUSETYPE_MODE_block of flats,HOUSETYPE_MODE_terraced house,HOUSETYPE_MODE_specific housing,TOTALAREA_MODE,"WALLSMATERIAL_MODE_Stone, brick",WALLSMATERIAL_MODE_Block,WALLSMATERIAL_MODE_Panel,WALLSMATERIAL_MODE_Mixed,WALLSMATERIAL_MODE_Wooden,WALLSMATERIAL_MODE_Others,WALLSMATERIAL_MODE_Monolithic,EMERGENCYSTATE_MODE_No,EMERGENCYSTATE_MODE_Yes,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_2,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR
0,100001,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,0,0,1,0,1,0,1,0,0,135000.0,568800.0,20560.5,450000.0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0.018850,-19241,-2329,-5170.0,-812,9.0,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.0,2,2,0,0,0,0,0,0,1,18,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.752614,0.789654,0.159520,0.0660,0.0590,0.9732,0.7552,0.0211,0.00,0.1379,0.1250,0.2083,0.0481,0.0756,0.0505,0.0000,0.0036,0.0672,0.0612,0.9732,0.7648,0.0190,0.0000,0.1379,0.1250,0.2083,0.0458,0.0771,0.0526,0.0000,0.0011,0.0666,0.0590,0.9732,0.7585,0.0208,0.00,0.1379,0.1250,0.2083,0.0487,0.0761,0.0514,0.0000,0.0031,1,0,0,0,1,0,0,0.0392,1,0,0,0,0,0,0,1,0,0.0,0.0,0.0,0.0,-1740.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
1,100005,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,0,1,0,0,1,0,1,0,0,99000.0,222768.0,17370.0,180000.0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0.035792,-18064,-4469,-9118.0,-1623,9.0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,2.0,2,2,0,0,0,0,0,1,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.564990,0.291656,0.432962,0.0876,0.0763,0.9816,0.7552,0.0211,0.00,0.1379,0.1667,0.2083,0.0481,0.0756,0.0745,0.0000,0.0036,0.0840,0.0746,0.9816,0.7648,0.0190,0.0000,0.1379,0.1667,0.2083,0.0458,0.0771,0.0731,0.0000,0.0011,0.0864,0.0758,0.9816,0.7585,0.0208,0.00,0.1379,0.1667,0.2083,0.0487,0.0761,0.0749,0.0000,0.0031,1,0,0,0,1,0,0,0.0688,0,0,1,0,0,0,0,1,0,0.0,0.0,0.0,0.0,0.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0
2,100013,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,0,1,0,0,0,1,1,0,0,202500.0,663264.0,69777.0,630000.0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0.019101,-20038,-4458,-2175.0,-3503,5.0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2.0,2,2,0,1,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.505998,0.699787,0.610991,0.0876,0.0763,0.9816,0.7552,0.0211,0.00,0.1379,0.1667,0.2083,0.0481,0.0756,0.0745,0.0000,0.0036,0.0840,0.0746,0.9816,0.7648,0.0190,0.0000,0.1379,0.1667,0.2083,0.0458,0.0771,0.0731,0.0000,0.0011,0.0864,0.0758,0.9816,0.7585,0.0208,0.00,0.1379,0.1667,0.2083,0.0487,0.0761,0.0749,0.0000,0.0031,1,0,0,0,1,0,0,0.0688,0,0,1,0,0,0,0,1,0,0.0,0.0,0.0,0.0,-856.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,1.0,4.0
3,100028,0.0,1.666667,-837.478929,7431.3000,225000.0,6438.3300,6750.0000,7311.33,16.0,4.0,0.0,10042.02,4.0,49.0,0.0,11250.0,0.0,225000.0,0.0,160647.975,4.0,9.0,1.0,1.0,0.000000,24.0,3.0,-1490.0,2.0,8.0,6.0,3.0,27.0,1.0,1.0,3.0,-1.0,10.0,0.0,0.0,8.0,1,0,0,1,0,1,0,1,0,2,315000.0,1575000.0,49018.5,1575000.0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0.026392,-13976,-1866,-2000.0,-4208,9.0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,4.0,2,2,1,0,0,0,0,0,0,11,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.525734,0.509677,0.612704,0.3052,0.1974,0.9970,0.9592,0.1165,0.32,0.2759,0.3750,0.0417,0.2042,0.2404,0.3673,0.0386,0.0800,0.3109,0.2049,0.9970,0.9608,0.1176,0.3222,0.2759,0.3750,0.0417,0.2089,0.2626,0.3827,0.0389,0.0847,0.3081,0.1974,0.9970,0.9597,0.1173,0.32,0.2759,0.3750,0.0417,0.2078,0.2446,0.3739,0.0388,0.0817,1,0,0,0,1,0,0,0.3700,0,0,1,0,0,0,0,1,0,0.0,0.0,0.0,0.0,-1805.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0
4,100038,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,0,1,0,0,0,1,0,1,1,180000.0,625500.0,32067.0,625500.0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0.010032,-13040,-2191,-4000.0,-4262,16.0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3.0,2,2,0,0,0,0,0,1,0,5,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.202145,0.425687,0.535276,0.0876,0.0763,0.9816,0.7552,0.0211,0.00,0.1379,0.1667,0.2083,0.0481,0.0756,0.0745,0.0000,0.0036,0.0840,0.0746,0.9816,0.7648,0.0190,0.0000,0.1379,0.1667,0.2083,0.0458,0.0771,0.0731,0.0000,0.0011,0.0864,0.0758,0.9816,0.7585,0.0208,0.00,0.1379,0.1667,0.2083,0.0487,0.0761,0.0749,0.0000,0.0031,1,0,0,0,1,0,0,0.0688,0,0,1,0,0,0,0,1,0,0.0,0.0,0.0,0.0,-821.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48739,456221,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,0,0,1,0,1,0,1,0,0,121500.0,412560.0,17473.5,270000.0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0.002042,-19970,-5169,-9094.0,-3399,9.0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1.0,3,3,1,0,0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.505998,0.648575,0.643026,0.0876,0.0763,0.9816,0.7552,0.0211,0.00,0.1379,0.1667,0.2083,0.0481,0.0756,0.0745,0.0000,0.0036,0.0840,0.0746,0.9816,0.7648,0.0190,0.0000,0.1379,0.1667,0.2083,0.0458,0.0771,0.0731,0.0000,0.0011,0.0864,0.0758,0.9816,0.7585,0.0208,0.00,0.1379,0.1667,0.2083,0.0487,0.0761,0.0749,0.0000,0.0031,1,0,0,0,1,0,0,0.0688,0,0,1,0,0,0,0,1,0,1.0,0.0,1.0,0.0,-684.0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0
48740,456222,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,0,0,1,0,1,0,0,1,2,157500.0,622413.0,31909.5,495000.0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0.035792,-11186,-1149,-3015.0,-3003,9.0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,4.0,2,2,0,1,0,0,0,0,0,11,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.505998,0.684596,0.535276,0.0876,0.0763,0.9816,0.7552,0.0211,0.00,0.1379,0.1667,0.2083,0.0481,0.0756,0.0745,0.0000,0.0036,0.0840,0.0746,0.9816,0.7648,0.0190,0.0000,0.1379,0.1667,0.2083,0.0458,0.0771,0.0731,0.0000,0.0011,0.0864,0.0758,0.9816,0.7585,0.0208,0.00,0.1379,0.1667,0.2083,0.0487,0.0761,0.0749,0.0000,0.0031,1,0,0,0,1,0,0,0.0688,0,0,1,0,0,0,0,1,0,2.0,0.0,2.0,0.0,0.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0
48741,456223,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,0,0,1,0,0,1,1,0,1,202500.0,315000.0,33205.5,315000.0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0.026392,-15922,-3037,-2681.0,-1504,4.0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3.0,2,2,1,0,0,0,0,0,0,12,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.733503,0.632770,0.283712,0.1113,0.1364,0.9955,0.7552,0.0211,0.16,0.1379,0.3333,0.2083,0.0481,0.0756,0.1383,0.0000,0.0542,0.1134,0.1415,0.9955,0.7648,0.0190,0.1611,0.1379,0.3333,0.2083,0.0458,0.0771,0.1441,0.0000,0.0574,0.1124,0.1364,0.9955,0.7585,0.0208,0.16,0.1379,0.3333,0.2083,0.0487,0.0761,0.1408,0.0000,0.0554,1,0,0,0,1,0,0,0.1663,1,0,0,0,0,0,0,1,0,0.0,0.0,0.0,0.0,-838.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,3.0,1.0
48742,456224,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,0,1,0,0,1,0,0,1,0,225000.0,450000.0,25128.0,450000.0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0.018850,-13968,-2731,-1461.0,-1364,9.0,1,1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.0,2,2,0,1,0,0,0,0,0,10,0,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.373090,0.445701,0.595456,0.1629,0.0723,0.9896,0.7552,0.0211,0.16,0.0690,0.6250,0.2083,0.0481,0.0756,0.1563,0.0000,0.1490,0.1660,0.0750,0.9896,0.7648,0.0190,0.1611,0.0690,0.6250,0.2083,0.0458,0.0771,0.1204,0.0000,0.1577,0.1645,0.0723,0.9896,0.7585,0.0208,0.16,0.0690,0.6250,0.2083,0.0487,0.0761,0.1591,0.0000,0.1521,1,0,0,0,1,0,0,0.1974,0,0,1,0,0,0,0,1,0,0.0,0.0,0.0,0.0,-2308.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,2.0


In [None]:
# RIGHT MERGING APPLICATION TEST TO PRESERVE SK_ID_CURR
# HOWEVER, THIS RESULTS IN THOUSANDS OF NULLS (80% of Data, 39000 out of 48000)
# COMMENNTED OUT CODE FILLS IN NULLS, BUT NOT TREMENDOUSLY ACCURATE
# for each_column in test_merge.drop(columns = ["SK_ID_CURR","AMT_REQ_CREDIT_BUREAU_YEAR"]).columns:
#      test_merge[each_column] = test_merge.groupby("AMT_REQ_CREDIT_BUREAU_YEAR")[each_column].transform(lambda x: x.fillna(test_merge[each_column].mean()))

# UPDATED MERGE DATA

In [41]:
installments_previous_merge = installments_payments.merge(previous_application, on=["SK_ID_PREV", "SK_ID_CURR"])
installments_previous_merge.shape

(958834, 32)

In [55]:
duplicated_SK_ID_CURRs = installments_previous_merge[installments_previous_merge.SK_ID_CURR.duplicated()].index
installments_previous_merge.drop(duplicated_SK_ID_CURRs, inplace=True)
installments_previous_merge.drop(columns="SK_ID_PREV", inplace=True)

In [56]:
train_merge = installments_previous_merge.merge(application_train_encoded, on="SK_ID_CURR")
train_merge.shape

(289398, 276)

In [47]:
application_test_encoded

Unnamed: 0,SK_ID_CURR,NAME_CONTRACT_TYPE_Cash loans,NAME_CONTRACT_TYPE_Revolving loans,CODE_GENDER_M,CODE_GENDER_F,CODE_GENDER_XNA,FLAG_OWN_CAR_N,FLAG_OWN_CAR_Y,FLAG_OWN_REALTY_Y,FLAG_OWN_REALTY_N,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,NAME_TYPE_SUITE_Unaccompanied,NAME_TYPE_SUITE_Family,"NAME_TYPE_SUITE_Spouse, partner",NAME_TYPE_SUITE_Children,NAME_TYPE_SUITE_Other_A,NAME_TYPE_SUITE_Other_B,NAME_TYPE_SUITE_Group of people,NAME_INCOME_TYPE_Working,NAME_INCOME_TYPE_State servant,NAME_INCOME_TYPE_Commercial associate,NAME_INCOME_TYPE_Pensioner,NAME_INCOME_TYPE_Unemployed,NAME_INCOME_TYPE_Student,NAME_INCOME_TYPE_Businessman,NAME_INCOME_TYPE_Maternity leave,NAME_EDUCATION_TYPE_Secondary / secondary special,NAME_EDUCATION_TYPE_Higher education,NAME_EDUCATION_TYPE_Incomplete higher,NAME_EDUCATION_TYPE_Lower secondary,NAME_EDUCATION_TYPE_Academic degree,NAME_FAMILY_STATUS_Single / not married,NAME_FAMILY_STATUS_Married,NAME_FAMILY_STATUS_Civil marriage,NAME_FAMILY_STATUS_Widow,NAME_FAMILY_STATUS_Separated,NAME_FAMILY_STATUS_Unknown,NAME_HOUSING_TYPE_House / apartment,NAME_HOUSING_TYPE_Rented apartment,NAME_HOUSING_TYPE_With parents,NAME_HOUSING_TYPE_Municipal apartment,NAME_HOUSING_TYPE_Office apartment,NAME_HOUSING_TYPE_Co-op apartment,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE_Laborers,OCCUPATION_TYPE_Core staff,OCCUPATION_TYPE_Accountants,OCCUPATION_TYPE_Managers,OCCUPATION_TYPE_Drivers,OCCUPATION_TYPE_Sales staff,OCCUPATION_TYPE_Cleaning staff,OCCUPATION_TYPE_Cooking staff,OCCUPATION_TYPE_Private service staff,OCCUPATION_TYPE_Medicine staff,OCCUPATION_TYPE_Security staff,OCCUPATION_TYPE_High skill tech staff,OCCUPATION_TYPE_Waiters/barmen staff,OCCUPATION_TYPE_Low-skill Laborers,OCCUPATION_TYPE_Realty agents,OCCUPATION_TYPE_Secretaries,OCCUPATION_TYPE_IT staff,OCCUPATION_TYPE_HR staff,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,WEEKDAY_APPR_PROCESS_START_WEDNESDAY,WEEKDAY_APPR_PROCESS_START_MONDAY,WEEKDAY_APPR_PROCESS_START_THURSDAY,WEEKDAY_APPR_PROCESS_START_SUNDAY,WEEKDAY_APPR_PROCESS_START_SATURDAY,WEEKDAY_APPR_PROCESS_START_FRIDAY,WEEKDAY_APPR_PROCESS_START_TUESDAY,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,ORGANIZATION_TYPE_Business Entity Type 3,ORGANIZATION_TYPE_School,ORGANIZATION_TYPE_Government,ORGANIZATION_TYPE_Religion,ORGANIZATION_TYPE_Other,ORGANIZATION_TYPE_XNA,ORGANIZATION_TYPE_Electricity,ORGANIZATION_TYPE_Medicine,ORGANIZATION_TYPE_Business Entity Type 2,ORGANIZATION_TYPE_Self-employed,ORGANIZATION_TYPE_Transport: type 2,ORGANIZATION_TYPE_Construction,ORGANIZATION_TYPE_Housing,ORGANIZATION_TYPE_Kindergarten,ORGANIZATION_TYPE_Trade: type 7,ORGANIZATION_TYPE_Industry: type 11,ORGANIZATION_TYPE_Military,ORGANIZATION_TYPE_Services,ORGANIZATION_TYPE_Security Ministries,ORGANIZATION_TYPE_Transport: type 4,ORGANIZATION_TYPE_Industry: type 1,ORGANIZATION_TYPE_Emergency,ORGANIZATION_TYPE_Security,ORGANIZATION_TYPE_Trade: type 2,ORGANIZATION_TYPE_University,ORGANIZATION_TYPE_Transport: type 3,ORGANIZATION_TYPE_Police,ORGANIZATION_TYPE_Business Entity Type 1,ORGANIZATION_TYPE_Postal,ORGANIZATION_TYPE_Industry: type 4,ORGANIZATION_TYPE_Agriculture,ORGANIZATION_TYPE_Restaurant,ORGANIZATION_TYPE_Culture,ORGANIZATION_TYPE_Hotel,ORGANIZATION_TYPE_Industry: type 7,ORGANIZATION_TYPE_Trade: type 3,ORGANIZATION_TYPE_Industry: type 3,ORGANIZATION_TYPE_Bank,ORGANIZATION_TYPE_Industry: type 9,ORGANIZATION_TYPE_Insurance,ORGANIZATION_TYPE_Trade: type 6,ORGANIZATION_TYPE_Industry: type 2,ORGANIZATION_TYPE_Transport: type 1,ORGANIZATION_TYPE_Industry: type 12,ORGANIZATION_TYPE_Mobile,ORGANIZATION_TYPE_Trade: type 1,ORGANIZATION_TYPE_Industry: type 5,ORGANIZATION_TYPE_Industry: type 10,ORGANIZATION_TYPE_Legal Services,ORGANIZATION_TYPE_Advertising,ORGANIZATION_TYPE_Trade: type 5,ORGANIZATION_TYPE_Cleaning,ORGANIZATION_TYPE_Industry: type 13,ORGANIZATION_TYPE_Trade: type 4,ORGANIZATION_TYPE_Telecom,ORGANIZATION_TYPE_Industry: type 8,ORGANIZATION_TYPE_Realtor,ORGANIZATION_TYPE_Industry: type 6,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,APARTMENTS_AVG,BASEMENTAREA_AVG,YEARS_BEGINEXPLUATATION_AVG,YEARS_BUILD_AVG,COMMONAREA_AVG,ELEVATORS_AVG,ENTRANCES_AVG,FLOORSMAX_AVG,FLOORSMIN_AVG,LANDAREA_AVG,LIVINGAPARTMENTS_AVG,LIVINGAREA_AVG,NONLIVINGAPARTMENTS_AVG,NONLIVINGAREA_AVG,APARTMENTS_MODE,BASEMENTAREA_MODE,YEARS_BEGINEXPLUATATION_MODE,YEARS_BUILD_MODE,COMMONAREA_MODE,ELEVATORS_MODE,ENTRANCES_MODE,FLOORSMAX_MODE,FLOORSMIN_MODE,LANDAREA_MODE,LIVINGAPARTMENTS_MODE,LIVINGAREA_MODE,NONLIVINGAPARTMENTS_MODE,NONLIVINGAREA_MODE,APARTMENTS_MEDI,BASEMENTAREA_MEDI,YEARS_BEGINEXPLUATATION_MEDI,YEARS_BUILD_MEDI,COMMONAREA_MEDI,ELEVATORS_MEDI,ENTRANCES_MEDI,FLOORSMAX_MEDI,FLOORSMIN_MEDI,LANDAREA_MEDI,LIVINGAPARTMENTS_MEDI,LIVINGAREA_MEDI,NONLIVINGAPARTMENTS_MEDI,NONLIVINGAREA_MEDI,FONDKAPREMONT_MODE_reg oper account,FONDKAPREMONT_MODE_org spec account,FONDKAPREMONT_MODE_reg oper spec account,FONDKAPREMONT_MODE_not specified,HOUSETYPE_MODE_block of flats,HOUSETYPE_MODE_terraced house,HOUSETYPE_MODE_specific housing,TOTALAREA_MODE,"WALLSMATERIAL_MODE_Stone, brick",WALLSMATERIAL_MODE_Block,WALLSMATERIAL_MODE_Panel,WALLSMATERIAL_MODE_Mixed,WALLSMATERIAL_MODE_Wooden,WALLSMATERIAL_MODE_Others,WALLSMATERIAL_MODE_Monolithic,EMERGENCYSTATE_MODE_No,EMERGENCYSTATE_MODE_Yes,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_2,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR
0,100001,1,0,0,1,0,1,0,1,0,0,135000.0,568800.0,20560.5,450000.0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0.018850,-19241,-2329,-5170.0,-812,9.0,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.0,2,2,0,0,0,0,0,0,1,18,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.752614,0.789654,0.159520,0.0660,0.0590,0.9732,0.7552,0.0211,0.00,0.1379,0.1250,0.2083,0.0481,0.0756,0.0505,0.0000,0.0036,0.0672,0.0612,0.9732,0.7648,0.0190,0.0000,0.1379,0.1250,0.2083,0.0458,0.0771,0.0526,0.0000,0.0011,0.0666,0.0590,0.9732,0.7585,0.0208,0.00,0.1379,0.1250,0.2083,0.0487,0.0761,0.0514,0.0000,0.0031,1,0,0,0,1,0,0,0.0392,1,0,0,0,0,0,0,1,0,0.0,0.0,0.0,0.0,-1740.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
1,100005,1,0,1,0,0,1,0,1,0,0,99000.0,222768.0,17370.0,180000.0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0.035792,-18064,-4469,-9118.0,-1623,9.0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,2.0,2,2,0,0,0,0,0,1,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.564990,0.291656,0.432962,0.0876,0.0763,0.9816,0.7552,0.0211,0.00,0.1379,0.1667,0.2083,0.0481,0.0756,0.0745,0.0000,0.0036,0.0840,0.0746,0.9816,0.7648,0.0190,0.0000,0.1379,0.1667,0.2083,0.0458,0.0771,0.0731,0.0000,0.0011,0.0864,0.0758,0.9816,0.7585,0.0208,0.00,0.1379,0.1667,0.2083,0.0487,0.0761,0.0749,0.0000,0.0031,1,0,0,0,1,0,0,0.0688,0,0,1,0,0,0,0,1,0,0.0,0.0,0.0,0.0,0.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0
2,100013,1,0,1,0,0,0,1,1,0,0,202500.0,663264.0,69777.0,630000.0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0.019101,-20038,-4458,-2175.0,-3503,5.0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2.0,2,2,0,1,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.505998,0.699787,0.610991,0.0876,0.0763,0.9816,0.7552,0.0211,0.00,0.1379,0.1667,0.2083,0.0481,0.0756,0.0745,0.0000,0.0036,0.0840,0.0746,0.9816,0.7648,0.0190,0.0000,0.1379,0.1667,0.2083,0.0458,0.0771,0.0731,0.0000,0.0011,0.0864,0.0758,0.9816,0.7585,0.0208,0.00,0.1379,0.1667,0.2083,0.0487,0.0761,0.0749,0.0000,0.0031,1,0,0,0,1,0,0,0.0688,0,0,1,0,0,0,0,1,0,0.0,0.0,0.0,0.0,-856.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,1.0,4.0
3,100028,1,0,0,1,0,1,0,1,0,2,315000.0,1575000.0,49018.5,1575000.0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0.026392,-13976,-1866,-2000.0,-4208,9.0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,4.0,2,2,1,0,0,0,0,0,0,11,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.525734,0.509677,0.612704,0.3052,0.1974,0.9970,0.9592,0.1165,0.32,0.2759,0.3750,0.0417,0.2042,0.2404,0.3673,0.0386,0.0800,0.3109,0.2049,0.9970,0.9608,0.1176,0.3222,0.2759,0.3750,0.0417,0.2089,0.2626,0.3827,0.0389,0.0847,0.3081,0.1974,0.9970,0.9597,0.1173,0.32,0.2759,0.3750,0.0417,0.2078,0.2446,0.3739,0.0388,0.0817,1,0,0,0,1,0,0,0.3700,0,0,1,0,0,0,0,1,0,0.0,0.0,0.0,0.0,-1805.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0
4,100038,1,0,1,0,0,0,1,0,1,1,180000.0,625500.0,32067.0,625500.0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0.010032,-13040,-2191,-4000.0,-4262,16.0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3.0,2,2,0,0,0,0,0,1,0,5,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.202145,0.425687,0.535276,0.0876,0.0763,0.9816,0.7552,0.0211,0.00,0.1379,0.1667,0.2083,0.0481,0.0756,0.0745,0.0000,0.0036,0.0840,0.0746,0.9816,0.7648,0.0190,0.0000,0.1379,0.1667,0.2083,0.0458,0.0771,0.0731,0.0000,0.0011,0.0864,0.0758,0.9816,0.7585,0.0208,0.00,0.1379,0.1667,0.2083,0.0487,0.0761,0.0749,0.0000,0.0031,1,0,0,0,1,0,0,0.0688,0,0,1,0,0,0,0,1,0,0.0,0.0,0.0,0.0,-821.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48739,456221,1,0,0,1,0,1,0,1,0,0,121500.0,412560.0,17473.5,270000.0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0.002042,-19970,-5169,-9094.0,-3399,9.0,1,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1.0,3,3,1,0,0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.505998,0.648575,0.643026,0.0876,0.0763,0.9816,0.7552,0.0211,0.00,0.1379,0.1667,0.2083,0.0481,0.0756,0.0745,0.0000,0.0036,0.0840,0.0746,0.9816,0.7648,0.0190,0.0000,0.1379,0.1667,0.2083,0.0458,0.0771,0.0731,0.0000,0.0011,0.0864,0.0758,0.9816,0.7585,0.0208,0.00,0.1379,0.1667,0.2083,0.0487,0.0761,0.0749,0.0000,0.0031,1,0,0,0,1,0,0,0.0688,0,0,1,0,0,0,0,1,0,1.0,0.0,1.0,0.0,-684.0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0
48740,456222,1,0,0,1,0,1,0,0,1,2,157500.0,622413.0,31909.5,495000.0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0.035792,-11186,-1149,-3015.0,-3003,9.0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,4.0,2,2,0,1,0,0,0,0,0,11,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.505998,0.684596,0.535276,0.0876,0.0763,0.9816,0.7552,0.0211,0.00,0.1379,0.1667,0.2083,0.0481,0.0756,0.0745,0.0000,0.0036,0.0840,0.0746,0.9816,0.7648,0.0190,0.0000,0.1379,0.1667,0.2083,0.0458,0.0771,0.0731,0.0000,0.0011,0.0864,0.0758,0.9816,0.7585,0.0208,0.00,0.1379,0.1667,0.2083,0.0487,0.0761,0.0749,0.0000,0.0031,1,0,0,0,1,0,0,0.0688,0,0,1,0,0,0,0,1,0,2.0,0.0,2.0,0.0,0.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0
48741,456223,1,0,0,1,0,0,1,1,0,1,202500.0,315000.0,33205.5,315000.0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0.026392,-15922,-3037,-2681.0,-1504,4.0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3.0,2,2,1,0,0,0,0,0,0,12,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.733503,0.632770,0.283712,0.1113,0.1364,0.9955,0.7552,0.0211,0.16,0.1379,0.3333,0.2083,0.0481,0.0756,0.1383,0.0000,0.0542,0.1134,0.1415,0.9955,0.7648,0.0190,0.1611,0.1379,0.3333,0.2083,0.0458,0.0771,0.1441,0.0000,0.0574,0.1124,0.1364,0.9955,0.7585,0.0208,0.16,0.1379,0.3333,0.2083,0.0487,0.0761,0.1408,0.0000,0.0554,1,0,0,0,1,0,0,0.1663,1,0,0,0,0,0,0,1,0,0.0,0.0,0.0,0.0,-838.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,3.0,1.0
48742,456224,1,0,1,0,0,1,0,0,1,0,225000.0,450000.0,25128.0,450000.0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0.018850,-13968,-2731,-1461.0,-1364,9.0,1,1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.0,2,2,0,1,0,0,0,0,0,10,0,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.373090,0.445701,0.595456,0.1629,0.0723,0.9896,0.7552,0.0211,0.16,0.0690,0.6250,0.2083,0.0481,0.0756,0.1563,0.0000,0.1490,0.1660,0.0750,0.9896,0.7648,0.0190,0.1611,0.0690,0.6250,0.2083,0.0458,0.0771,0.1204,0.0000,0.1577,0.1645,0.0723,0.9896,0.7585,0.0208,0.16,0.0690,0.6250,0.2083,0.0487,0.0761,0.1591,0.0000,0.1521,1,0,0,0,1,0,0,0.1974,0,0,1,0,0,0,0,1,0,0.0,0.0,0.0,0.0,-2308.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,2.0


In [57]:
test_merged.isnull().sum()

SK_ID_PREV                    1216
SK_ID_CURR                       0
NUM_INSTALMENT_VERSION        1216
DAYS_DIFFERENCE               1216
AMT_DIFFERENCE                1216
                              ... 
AMT_REQ_CREDIT_BUREAU_DAY        0
AMT_REQ_CREDIT_BUREAU_WEEK       0
AMT_REQ_CREDIT_BUREAU_MON        0
AMT_REQ_CREDIT_BUREAU_QRT        0
AMT_REQ_CREDIT_BUREAU_YEAR       0
Length: 276, dtype: int64

In [58]:
test_merged = installments_previous_merge.merge(application_test_encoded, on="SK_ID_CURR", how="right")
test_merged.shape

(48744, 275)

# UPDATED EARLY MODEL

In [59]:
# UPDATED
X = train_merge.drop(columns="TARGET")
y = train_merge.TARGET

In [60]:
# XGB BOOST MODEL
xgb_model = XGBClassifier()
xgb_model.fit(X, y)
xgb_model

In [63]:
df_entry = pd.DataFrame([])
df_entry["SK_ID_CURR"] = test_merged.SK_ID_CURR.astype("int")
df_entry["TARGET"] = np.round(xgb_model.predict_proba(test_merged)[:,1],1)
df_entry
df_entry.to_csv("Test_Entry_XGB_8.csv", index =False)

# THIRD PASS MERGE

In [77]:
_first = installments_payments.merge(credit_card_balance, on=["SK_ID_PREV"])
_first.shape

(72459, 17)

In [80]:
_first

Unnamed: 0,SK_ID_PREV,SK_ID_CURR_x,NUM_INSTALMENT_VERSION,DAYS_DIFFERENCE,AMT_DIFFERENCE,SK_ID_CURR_y,AMT_BALANCE,AMT_CREDIT_LIMIT_ACTUAL,AMT_INST_MIN_REGULARITY,AMT_PAYMENT_TOTAL_CURRENT,AMT_TOTAL_RECEIVABLE,CNT_INSTALMENT_MATURE_CUM,NAME_CONTRACT_STATUS,SK_DPD_DEF,AMT_DRAWINGS_TOTAL,CNT_DRAWINGS_TOTAL,COUNT_CREDIT_CARD
0,1330831,151639,0.0,2.388350,-32.766990,151639,0.000,0.0,0.000,0.000,0.000,19.0,4.0,0.0,0.0,0.0,96.0
1,2723183,112102,0.0,5.480000,0.000000,112102,264679.380,270000.0,13406.535,35.640,264679.380,5.0,4.0,0.0,0.0,0.0,13.0
2,1570206,147645,0.0,0.257143,-649.559571,147645,440095.455,450000.0,22124.115,22725.045,438749.955,17.0,4.0,0.0,18000.0,2.0,35.0
3,1594684,100193,0.0,1.064516,0.000000,100193,0.000,0.0,0.000,0.000,0.000,5.0,4.0,0.0,0.0,0.0,85.0
4,2391610,183431,0.0,10.653846,0.000000,183431,237661.200,247500.0,12375.000,12375.000,236785.275,36.5,4.0,0.0,0.0,0.0,76.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72454,1683621,430213,0.0,0.000000,0.000000,430213,0.000,0.0,0.000,0.000,0.000,0.0,4.0,0.0,0.0,0.0,11.0
72455,1982325,437333,0.0,0.000000,0.000000,437333,0.000,0.0,0.000,0.000,0.000,0.0,4.0,0.0,0.0,0.0,9.0
72456,2305961,408738,0.0,0.000000,0.000000,408738,0.000,0.0,0.000,0.000,0.000,0.0,4.0,0.0,0.0,0.0,3.0
72457,1538868,424735,0.0,0.000000,0.000000,424735,0.000,675000.0,0.000,9.000,0.000,0.0,4.0,0.0,18.0,1.0,2.0


In [94]:
_blank = installments_payments.merge(POS_CASH_balance, on=["SK_ID_PREV", "SK_ID_CURR"],how="inner")
_blank.shape

(9894519, 11)

In [101]:
__blank = _blank.merge(credit_card_balance, on=["SK_ID_PREV"], how="inner")
__blank.shape

(0, 23)

In [106]:
application_test.merge(POS_CASH_balance, on=["SK_ID_CURR"], how="inner").SK_ID_CURR.nunique()

47808

In [98]:
POS_CASH_balance

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,MONTHS_BALANCE,CNT_INSTALMENT,CNT_INSTALMENT_FUTURE,NAME_CONTRACT_STATUS,SK_DPD,SK_DPD_DEF
0,1803195,182943,-31,48.0,45.0,Active,0,0
1,1715348,367990,-33,36.0,35.0,Active,0,0
2,1784872,397406,-32,12.0,9.0,Active,0,0
3,1903291,269225,-35,48.0,42.0,Active,0,0
4,2341044,334279,-35,36.0,35.0,Active,0,0
...,...,...,...,...,...,...,...,...
10001353,2448283,226558,-20,6.0,0.0,Active,843,0
10001354,1717234,141565,-19,12.0,0.0,Active,602,0
10001355,1283126,315695,-21,10.0,0.0,Active,609,0
10001356,1082516,450255,-22,12.0,0.0,Active,614,0


In [109]:
contract_status_dict = {"Refused":1, "Demand":2, "Sent proposal":3, "Active": 4 , "Signed":5 ,"Completed": 6,"Approved":7}
POS_CASH_balance.NAME_CONTRACT_STATUS = POS_CASH_balance.NAME_CONTRACT_STATUS.map(contract_status_dict)

In [113]:
POS_CASH_balance = POS_CASH_balance.fillna(0)

In [118]:
def compress_mean(df_example, list_of_columns):
    for each_column in list_of_columns:
        df_example[each_column] = df_example.groupby(["SK_ID_PREV","SK_ID_CURR"])[each_column].transform(lambda x: x.mean())
    return df_example.drop_duplicates()

POS_CASH_balance_revised = compress_median(POS_CASH_balance, POS_CASH_balance.drop(columns = ["SK_ID_PREV","SK_ID_CURR"]).columns)

KeyboardInterrupt: 

In [117]:
count_SK_ID_CURR_in_test(POS_CASH_balance.SK_ID_CURR)
POS_CASH_balance.groupby(["SK_ID_CURR", "SK_ID_PREV"]).MONTHS_BALANCE.count().sort_values()

IDs Matching in Application_Test: 47808
IDs Missing in Application_Test: 936


SK_ID_CURR  SK_ID_PREV
103800      1103932        1
395910      1672189        1
434960      2220885        1
176981      2001793        1
434949      2581728        1
                          ..
188537      2230035       96
165493      2332615       96
270252      1455438       96
289119      2551828       96
192017      1252382       96
Name: MONTHS_BALANCE, Length: 936325, dtype: int64

In [73]:
credit_card_balance.shape

(104307, 13)

# Early Model

In [104]:
# SUCCESS WITH SIMPLE
X = application_train_encoded.drop(columns="TARGET")
y = application_train_encoded.TARGET

In [111]:
# MERGING MULTIPLE DATAFRAMES RESULTS IN NULLs FOR test_merge
X = train_merge.drop(columns="TARGET")
y = train_merge.TARGET

In [106]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.2)

In [114]:
# LOGISTIC REGRESSION MODEL
lgr_model = LogisticRegression()
lgr_model.fit(X_train, y_train)
#lgr_model.predict_proba(X_test) # Predicted Probabilities

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [112]:
# XGB BOOST MODEL
xgb_model = XGBClassifier()
xgb_model.fit(X_train, y_train)
xgb_model

# Early Entry

In [110]:
df_entry = pd.DataFrame([])
df_entry["SK_ID_CURR"] = application_test_encoded.SK_ID_CURR.astype("int")
df_entry["TARGET"] = np.round(xgb_model.predict_proba(application_test_encoded)[:,1],1)
df_entry
df_entry.to_csv("Test_Entry_XGB_7.csv", index =False)

# SCRAP

## TIP: More important to remove data than impute data

In [None]:
# NOTE of Potentially Similar Columns

# Previous Application -- AMT_APPLICATION and AMT_CREDIT

# Credit Card Balance -- Several Options

# def application_encoding(df_example):
#     df_example = df_example.copy()
#     for each_column in df_example.select_dtypes(include="object").columns:
#         map_dict = {}
#         categories = list(df_example[each_column].value_counts(ascending=True).index)
#         for each_category in categories:
#             map_dict[each_category] = categories.index(each_category)
#         df_example[each_column] = df_example[each_column].map(map_dict)
        
#     return df_example

## Compress Columns: Engineering

DAYS_DIFFERENCE
* If entry payment has larger abs, then paid on time
* If negative, how many days late

AMT_DIFFERECE 
* If negative, then they did not fully pay the installment they were supposed to



NOTE: Opportuntiies to also merge POS_CASH_balance and Bureau 

In [None]:
# bureau_df = bureau_balance.merge(bureau, on="SK_ID_BUREAU")
# bureau_df.groupby("SK_ID_BUREAU").STATUS.count().reset_index().sort_values(by="STATUS")
# MERGE BREAKS: third_merge_df.merge(bureau_df, on="SK_ID_CURR")

In [57]:
# from sklearn.model_selection import GridSearchCV

# param_grid = {
#     "n_estimators": [100, 200, 300],
#     "max_depth": [5, 10, 15],
#     "learning_rate": [0.01, 0.1, 0.2],
# }

# grid = GridSearchCV(xgb_model, param_grid, cv=3, verbose=1)
# grid.fit(X_train, y_train)
# grid.best_estimator_.predict(X_test)

ORDINAL ENCODE THE DATA IN PREVIOUS APPLICATION (Did it for Previous Application)

Simplify so dont need to one hot encode everything, control the amount of columns


For application_train 

Train encoder on training data

Then use encode for application_test

## IDEAS

Need to right merge with application test in order to preserve all SK_ID_CURR values

As it stands, test_merge has many nulls 

However, that leaves many NULLs where an SKID was not listed with previous information (Credit Card Balance does not include them all)

Tried to fill empty nulls with the mean for that category after the fact 

Aim to find a way to preserve information




More in depth way to handle the NULLs in General

Future: interpolate based on highest correlated column



Remove closely correlated variables

Add information from bureau and POS CASH 





# ACHEIVED TARGET

I achieved the best Score by not even including the other columns

Simple encoding of the application_train and application_test with an XG Boost Model

Tried to string together many other features, but ended up with many Nulls that didnt line up with the SK_ID_CURR of the application_test. THis made me impute many of the values, based on unspecific groupings