In [19]:
import pandas as pd
from sklearn.metrics import auc, roc_curve
import pandasql as pdsql
import sas7bdat as sas
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split
pd.set_option('display.max_columns', 500)
%matplotlib inline

In [85]:
def dataframe_with_null_dummies(df):
    '''
    INPUT: dataframe
    OUTPUT: dataframe with dummy columns for IS_NULL for all columns that contain null values'''
    new_df = df.copy()
    for col in new_df.columns:
        new_df[f'{col}_IS_NULL'] = new_df[col].isna()
    return new_df

def get_dummy_dataframe_for_categorical(df, dummy_column_type='O'):
    '''
    INPUT: dataframe
    OUTPUT: dataframe with dummy columns added for all of X type
    columns'''
    new_df = df.copy()
    dtypes = list(df.dtypes.values)
    for i in range(len(dtypes)):
        if dtypes[i] == dummy_column_type:
            dum_col_to_add = pd.get_dummies(df.iloc[:,i],prefix='DUM_')
            new_df = pd.concat([new_df, dum_col_to_add], axis=1)
    return new_df

def get_valid_columns(df, valid_types=['bool', 'uint8', 'int64', 'float64']):
    dtypes = list(df.dtypes.values)
    valid_columns = []
    for i in range(len(dtypes)):
        if dtypes[i] in valid_types:
            valid_columns.append(i)
    return valid_columns

def plot_roc(model, x_columns, y_true, title="model type"):
    '''
    INPUT: fitted model, array of x values, array of target values, optional:
    title of outputted figure
    OUTPUT: ROC curve with AUC value
    '''

    y_pred = model.predict_proba(x_columns)

    fpr, tpr, threshold = roc_curve(y_true, y_pred[:, 1])
    area_under_curve = auc(fpr, tpr)

    # method I: plt
    fig, ax = plt.subplots()
    if title == "model type":
        model_name = str(type(model)).split('.')[-1].strip(">\'")
    else:
        model_name = title

    plt.title(f'{model_name} ROC')
    ax.plot(fpr, tpr, 'k', label='C AUC = %0.3f' % area_under_curve)

    ax.legend(loc='lower right')
    ax.plot([0, 1], [0, 1], 'r--')
    plt.xlim([0, 1])
    plt.ylim([0, 1])
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    plt.show()

In [92]:
def all_df_featurization(train_df, test_df, target_name, other_columns_to_ignore):
    x_column_set = list(set(df_application_train).intersection(df_application_test)\
                     - set(other_columns_to_ignore))
    train_df = train_df[x_column_set].copy()
    test_df = test_df[x_column_set].copy()
    
    clean_train_df = dataframe_with_null_dummies(train_df)
    clean_test_df = dataframe_with_null_dummies(test_df)
    
    
    clean_train_df = get_dummy_dataframe_for_categorical(clean_train_df)
    clean_test_df = get_dummy_dataframe_for_categorical(clean_test_df)
    
    val_columns = set(clean_train_df.columns).intersection(clean_test_df.columns)
    clean_train_df = clean_train_df.iloc[:,val_columns].copy()
    clean_test_df = clean_test_df.iloc[:,val_columns].copy()
    
    clean_train_mean = clean_train_df.mean()
    clean_test_mean = clean_test_df.mean()
    
    for col in clean_df.columns:
        clean_train_df[col] = clean_train_df[col].fillna(clean_mean[col])
        clean_test_df[col] = clean_test_df[col].fillna(clean_test_mean[col])
        
    return clean_train_df, clean_test_df

In [91]:
!ls '/Users/ElliottC/.kaggle/competitions/home-credit-default-risk/csvs/'

file_path = '/Users/ElliottC/.kaggle/competitions/home-credit-default-risk/csvs/'

df_application_train = pd.read_csv(file_path + 'application_train.csv')
df_application_test = pd.read_csv(file_path + 'application_test.csv')

HomeCredit_columns_description.csv bureau_balance.csv
POS_CASH_balance.csv               credit_card_balance.csv
application_test.csv               installments_payments.csv
application_train.csv              previous_application.csv
bureau.csv                         sample_submission.csv


In [95]:
train_df, test_df = all_df_featurization(df_application_train, df_application_test, 'TARGET', ['SK_ID_CURR'])

TypeError: len() of unsized object

In [65]:
columns

In [56]:
testing_df = all_df_featurization(df_application_test, 'test')[columns.remove('TARGET')]

AttributeError: 'NoneType' object has no attribute 'remove'

In [33]:
model = RandomForestClassifier()
model.fit(training_df.drop('TARGET', axis=1), training_df['TARGET'])

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [94]:
df_application_train.head()

Unnamed: 0,SK_ID_CURR,TARGET,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,NAME_TYPE_SUITE,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,ORGANIZATION_TYPE,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,APARTMENTS_AVG,BASEMENTAREA_AVG,YEARS_BEGINEXPLUATATION_AVG,YEARS_BUILD_AVG,COMMONAREA_AVG,ELEVATORS_AVG,ENTRANCES_AVG,FLOORSMAX_AVG,FLOORSMIN_AVG,LANDAREA_AVG,LIVINGAPARTMENTS_AVG,LIVINGAREA_AVG,NONLIVINGAPARTMENTS_AVG,NONLIVINGAREA_AVG,APARTMENTS_MODE,BASEMENTAREA_MODE,YEARS_BEGINEXPLUATATION_MODE,YEARS_BUILD_MODE,COMMONAREA_MODE,ELEVATORS_MODE,ENTRANCES_MODE,FLOORSMAX_MODE,FLOORSMIN_MODE,LANDAREA_MODE,LIVINGAPARTMENTS_MODE,LIVINGAREA_MODE,NONLIVINGAPARTMENTS_MODE,NONLIVINGAREA_MODE,APARTMENTS_MEDI,BASEMENTAREA_MEDI,YEARS_BEGINEXPLUATATION_MEDI,YEARS_BUILD_MEDI,COMMONAREA_MEDI,ELEVATORS_MEDI,ENTRANCES_MEDI,FLOORSMAX_MEDI,FLOORSMIN_MEDI,LANDAREA_MEDI,LIVINGAPARTMENTS_MEDI,LIVINGAREA_MEDI,NONLIVINGAPARTMENTS_MEDI,NONLIVINGAREA_MEDI,FONDKAPREMONT_MODE,HOUSETYPE_MODE,TOTALAREA_MODE,WALLSMATERIAL_MODE,EMERGENCYSTATE_MODE,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_2,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR
0,100002,1,Cash loans,M,N,Y,0,202500.0,406597.5,24700.5,351000.0,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,0.018801,-9461,-637,-3648.0,-2120,,1,1,0,1,1,0,Laborers,1.0,2,2,WEDNESDAY,10,0,0,0,0,0,0,Business Entity Type 3,0.083037,0.262949,0.139376,0.0247,0.0369,0.9722,0.6192,0.0143,0.0,0.069,0.0833,0.125,0.0369,0.0202,0.019,0.0,0.0,0.0252,0.0383,0.9722,0.6341,0.0144,0.0,0.069,0.0833,0.125,0.0377,0.022,0.0198,0.0,0.0,0.025,0.0369,0.9722,0.6243,0.0144,0.0,0.069,0.0833,0.125,0.0375,0.0205,0.0193,0.0,0.0,reg oper account,block of flats,0.0149,"Stone, brick",No,2.0,2.0,2.0,2.0,-1134.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0
1,100003,0,Cash loans,F,N,N,0,270000.0,1293502.5,35698.5,1129500.0,Family,State servant,Higher education,Married,House / apartment,0.003541,-16765,-1188,-1186.0,-291,,1,1,0,1,1,0,Core staff,2.0,1,1,MONDAY,11,0,0,0,0,0,0,School,0.311267,0.622246,,0.0959,0.0529,0.9851,0.796,0.0605,0.08,0.0345,0.2917,0.3333,0.013,0.0773,0.0549,0.0039,0.0098,0.0924,0.0538,0.9851,0.804,0.0497,0.0806,0.0345,0.2917,0.3333,0.0128,0.079,0.0554,0.0,0.0,0.0968,0.0529,0.9851,0.7987,0.0608,0.08,0.0345,0.2917,0.3333,0.0132,0.0787,0.0558,0.0039,0.01,reg oper account,block of flats,0.0714,Block,No,1.0,0.0,1.0,0.0,-828.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
2,100004,0,Revolving loans,M,Y,Y,0,67500.0,135000.0,6750.0,135000.0,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,0.010032,-19046,-225,-4260.0,-2531,26.0,1,1,1,1,1,0,Laborers,1.0,2,2,MONDAY,9,0,0,0,0,0,0,Government,,0.555912,0.729567,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-815.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
3,100006,0,Cash loans,F,N,Y,0,135000.0,312682.5,29686.5,297000.0,Unaccompanied,Working,Secondary / secondary special,Civil marriage,House / apartment,0.008019,-19005,-3039,-9833.0,-2437,,1,1,0,1,0,0,Laborers,2.0,2,2,WEDNESDAY,17,0,0,0,0,0,0,Business Entity Type 3,,0.650442,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,0.0,2.0,0.0,-617.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,
4,100007,0,Cash loans,M,N,Y,0,121500.0,513000.0,21865.5,513000.0,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,0.028663,-19932,-3038,-4311.0,-3458,,1,1,0,1,0,0,Core staff,1.0,2,2,THURSDAY,11,0,0,0,0,1,1,Religion,,0.322738,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-1106.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0


In [35]:
list(set(training_df.columns) - set(testing_df.columns))

['DUM__Maternity leave', 'DUM__Unknown', 'TARGET']

In [34]:
model.predict(testing_df)

ValueError: Number of features of the model must match the input. Model n_features is 365 and input n_features is 362 

In [127]:
clean_df = dataframe_with_null_dummies(clean_df)

In [128]:
clean_df = get_dummy_dataframe_for_categorical(clean_df)

In [129]:
clean_df.head()

Unnamed: 0,SK_ID_CURR,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,NAME_TYPE_SUITE,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,ORGANIZATION_TYPE,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,APARTMENTS_AVG,BASEMENTAREA_AVG,YEARS_BEGINEXPLUATATION_AVG,YEARS_BUILD_AVG,COMMONAREA_AVG,ELEVATORS_AVG,ENTRANCES_AVG,FLOORSMAX_AVG,FLOORSMIN_AVG,LANDAREA_AVG,LIVINGAPARTMENTS_AVG,LIVINGAREA_AVG,NONLIVINGAPARTMENTS_AVG,NONLIVINGAREA_AVG,APARTMENTS_MODE,BASEMENTAREA_MODE,YEARS_BEGINEXPLUATATION_MODE,YEARS_BUILD_MODE,COMMONAREA_MODE,ELEVATORS_MODE,ENTRANCES_MODE,FLOORSMAX_MODE,FLOORSMIN_MODE,LANDAREA_MODE,LIVINGAPARTMENTS_MODE,LIVINGAREA_MODE,NONLIVINGAPARTMENTS_MODE,NONLIVINGAREA_MODE,APARTMENTS_MEDI,BASEMENTAREA_MEDI,YEARS_BEGINEXPLUATATION_MEDI,YEARS_BUILD_MEDI,COMMONAREA_MEDI,ELEVATORS_MEDI,ENTRANCES_MEDI,FLOORSMAX_MEDI,FLOORSMIN_MEDI,LANDAREA_MEDI,LIVINGAPARTMENTS_MEDI,LIVINGAREA_MEDI,NONLIVINGAPARTMENTS_MEDI,NONLIVINGAREA_MEDI,FONDKAPREMONT_MODE,HOUSETYPE_MODE,TOTALAREA_MODE,WALLSMATERIAL_MODE,EMERGENCYSTATE_MODE,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_2,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR,AMT_ANNUITY_IS_NULL,AMT_GOODS_PRICE_IS_NULL,NAME_TYPE_SUITE_IS_NULL,OWN_CAR_AGE_IS_NULL,OCCUPATION_TYPE_IS_NULL,CNT_FAM_MEMBERS_IS_NULL,EXT_SOURCE_1_IS_NULL,EXT_SOURCE_2_IS_NULL,EXT_SOURCE_3_IS_NULL,APARTMENTS_AVG_IS_NULL,BASEMENTAREA_AVG_IS_NULL,YEARS_BEGINEXPLUATATION_AVG_IS_NULL,YEARS_BUILD_AVG_IS_NULL,COMMONAREA_AVG_IS_NULL,ELEVATORS_AVG_IS_NULL,ENTRANCES_AVG_IS_NULL,FLOORSMAX_AVG_IS_NULL,FLOORSMIN_AVG_IS_NULL,LANDAREA_AVG_IS_NULL,LIVINGAPARTMENTS_AVG_IS_NULL,LIVINGAREA_AVG_IS_NULL,NONLIVINGAPARTMENTS_AVG_IS_NULL,NONLIVINGAREA_AVG_IS_NULL,APARTMENTS_MODE_IS_NULL,BASEMENTAREA_MODE_IS_NULL,YEARS_BEGINEXPLUATATION_MODE_IS_NULL,YEARS_BUILD_MODE_IS_NULL,COMMONAREA_MODE_IS_NULL,ELEVATORS_MODE_IS_NULL,ENTRANCES_MODE_IS_NULL,FLOORSMAX_MODE_IS_NULL,FLOORSMIN_MODE_IS_NULL,LANDAREA_MODE_IS_NULL,LIVINGAPARTMENTS_MODE_IS_NULL,LIVINGAREA_MODE_IS_NULL,NONLIVINGAPARTMENTS_MODE_IS_NULL,NONLIVINGAREA_MODE_IS_NULL,APARTMENTS_MEDI_IS_NULL,BASEMENTAREA_MEDI_IS_NULL,YEARS_BEGINEXPLUATATION_MEDI_IS_NULL,YEARS_BUILD_MEDI_IS_NULL,COMMONAREA_MEDI_IS_NULL,ELEVATORS_MEDI_IS_NULL,ENTRANCES_MEDI_IS_NULL,FLOORSMAX_MEDI_IS_NULL,FLOORSMIN_MEDI_IS_NULL,LANDAREA_MEDI_IS_NULL,LIVINGAPARTMENTS_MEDI_IS_NULL,LIVINGAREA_MEDI_IS_NULL,NONLIVINGAPARTMENTS_MEDI_IS_NULL,NONLIVINGAREA_MEDI_IS_NULL,FONDKAPREMONT_MODE_IS_NULL,HOUSETYPE_MODE_IS_NULL,TOTALAREA_MODE_IS_NULL,WALLSMATERIAL_MODE_IS_NULL,EMERGENCYSTATE_MODE_IS_NULL,OBS_30_CNT_SOCIAL_CIRCLE_IS_NULL,DEF_30_CNT_SOCIAL_CIRCLE_IS_NULL,OBS_60_CNT_SOCIAL_CIRCLE_IS_NULL,DEF_60_CNT_SOCIAL_CIRCLE_IS_NULL,DAYS_LAST_PHONE_CHANGE_IS_NULL,AMT_REQ_CREDIT_BUREAU_HOUR_IS_NULL,AMT_REQ_CREDIT_BUREAU_DAY_IS_NULL,AMT_REQ_CREDIT_BUREAU_WEEK_IS_NULL,AMT_REQ_CREDIT_BUREAU_MON_IS_NULL,AMT_REQ_CREDIT_BUREAU_QRT_IS_NULL,AMT_REQ_CREDIT_BUREAU_YEAR_IS_NULL,DUM__Cash loans,DUM__Revolving loans,DUM__F,DUM__M,DUM__N,DUM__Y,DUM__N.1,DUM__Y.1,DUM__Children,DUM__Family,DUM__Group of people,DUM__Other_A,DUM__Other_B,"DUM__Spouse, partner",DUM__Unaccompanied,DUM__Businessman,DUM__Commercial associate,DUM__Pensioner,DUM__State servant,DUM__Student,DUM__Unemployed,DUM__Working,DUM__Academic degree,DUM__Higher education,DUM__Incomplete higher,DUM__Lower secondary,DUM__Secondary / secondary special,DUM__Civil marriage,DUM__Married,DUM__Separated,DUM__Single / not married,DUM__Widow,DUM__Co-op apartment,DUM__House / apartment,DUM__Municipal apartment,DUM__Office apartment,DUM__Rented apartment,DUM__With parents,DUM__Accountants,DUM__Cleaning staff,DUM__Cooking staff,DUM__Core staff,DUM__Drivers,DUM__HR staff,DUM__High skill tech staff,DUM__IT staff,DUM__Laborers,DUM__Low-skill Laborers,DUM__Managers,DUM__Medicine staff,DUM__Private service staff,DUM__Realty agents,DUM__Sales staff,DUM__Secretaries,DUM__Security staff,DUM__Waiters/barmen staff,DUM__FRIDAY,DUM__MONDAY,DUM__SATURDAY,DUM__SUNDAY,DUM__THURSDAY,DUM__TUESDAY,DUM__WEDNESDAY,DUM__Advertising,DUM__Agriculture,DUM__Bank,DUM__Business Entity Type 1,DUM__Business Entity Type 2,DUM__Business Entity Type 3,DUM__Cleaning,DUM__Construction,DUM__Culture,DUM__Electricity,DUM__Emergency,DUM__Government,DUM__Hotel,DUM__Housing,DUM__Industry: type 1,DUM__Industry: type 10,DUM__Industry: type 11,DUM__Industry: type 12,DUM__Industry: type 13,DUM__Industry: type 2,DUM__Industry: type 3,DUM__Industry: type 4,DUM__Industry: type 5,DUM__Industry: type 6,DUM__Industry: type 7,DUM__Industry: type 8,DUM__Industry: type 9,DUM__Insurance,DUM__Kindergarten,DUM__Legal Services,DUM__Medicine,DUM__Military,DUM__Mobile,DUM__Other,DUM__Police,DUM__Postal,DUM__Realtor,DUM__Religion,DUM__Restaurant,DUM__School,DUM__Security,DUM__Security Ministries,DUM__Self-employed,DUM__Services,DUM__Telecom,DUM__Trade: type 1,DUM__Trade: type 2,DUM__Trade: type 3,DUM__Trade: type 4,DUM__Trade: type 5,DUM__Trade: type 6,DUM__Trade: type 7,DUM__Transport: type 1,DUM__Transport: type 2,DUM__Transport: type 3,DUM__Transport: type 4,DUM__University,DUM__XNA,DUM__not specified,DUM__org spec account,DUM__reg oper account,DUM__reg oper spec account,DUM__block of flats,DUM__specific housing,DUM__terraced house,DUM__Block,DUM__Mixed,DUM__Monolithic,DUM__Others,DUM__Panel,"DUM__Stone, brick",DUM__Wooden,DUM__No,DUM__Yes
0,100001,Cash loans,F,N,Y,0,135000.0,568800.0,20560.5,450000.0,Unaccompanied,Working,Higher education,Married,House / apartment,0.01885,-19241,-2329,-5170.0,-812,,1,1,0,1,0,1,,2.0,2,2,TUESDAY,18,0,0,0,0,0,0,Kindergarten,0.752614,0.789654,0.15952,0.066,0.059,0.9732,,,,0.1379,0.125,,,,0.0505,,,0.0672,0.0612,0.9732,,,,0.1379,0.125,,,,0.0526,,,0.0666,0.059,0.9732,,,,0.1379,0.125,,,,0.0514,,,,block of flats,0.0392,"Stone, brick",No,0.0,0.0,0.0,0.0,-1740.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,False,False,False,True,True,False,False,False,False,False,False,False,True,True,True,False,False,True,True,True,False,True,True,False,False,False,True,True,True,False,False,True,True,True,False,True,True,False,False,False,True,True,True,False,False,True,True,True,False,True,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,1,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0
1,100005,Cash loans,M,N,Y,0,99000.0,222768.0,17370.0,180000.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.035792,-18064,-4469,-9118.0,-1623,,1,1,0,1,0,0,Low-skill Laborers,2.0,2,2,FRIDAY,9,0,0,0,0,0,0,Self-employed,0.56499,0.291656,0.432962,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0,False,False,False,True,False,False,False,False,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False,1,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,100013,Cash loans,M,Y,Y,0,202500.0,663264.0,69777.0,630000.0,,Working,Higher education,Married,House / apartment,0.019101,-20038,-4458,-2175.0,-3503,5.0,1,1,0,1,0,0,Drivers,2.0,2,2,MONDAY,14,0,0,0,0,0,0,Transport: type 3,,0.699787,0.610991,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-856.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,1.0,4.0,False,False,True,False,False,False,True,False,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,100028,Cash loans,F,N,Y,2,315000.0,1575000.0,49018.5,1575000.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.026392,-13976,-1866,-2000.0,-4208,,1,1,0,1,1,0,Sales staff,4.0,2,2,WEDNESDAY,11,0,0,0,0,0,0,Business Entity Type 3,0.525734,0.509677,0.612704,0.3052,0.1974,0.997,0.9592,0.1165,0.32,0.2759,0.375,0.0417,0.2042,0.2404,0.3673,0.0386,0.08,0.3109,0.2049,0.997,0.9608,0.1176,0.3222,0.2759,0.375,0.0417,0.2089,0.2626,0.3827,0.0389,0.0847,0.3081,0.1974,0.997,0.9597,0.1173,0.32,0.2759,0.375,0.0417,0.2078,0.2446,0.3739,0.0388,0.0817,reg oper account,block of flats,0.37,Panel,No,0.0,0.0,0.0,0.0,-1805.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,1,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0
4,100038,Cash loans,M,Y,N,1,180000.0,625500.0,32067.0,625500.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.010032,-13040,-2191,-4000.0,-4262,16.0,1,1,1,1,0,0,,3.0,2,2,FRIDAY,5,0,0,0,0,1,1,Business Entity Type 3,0.202145,0.425687,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-821.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,False,False,False,False,True,False,False,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,False,False,False,False,False,True,True,True,True,True,True,1,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
clean_df = clean_df.fillna(clean_df.mean())
val_columns = get_valid_columns(clean_df)
return clean_df, val_columns

In [121]:
df_application_test.fillna(df_application_test.mean())

Unnamed: 0,SK_ID_CURR,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,NAME_TYPE_SUITE,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,ORGANIZATION_TYPE,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,APARTMENTS_AVG,BASEMENTAREA_AVG,YEARS_BEGINEXPLUATATION_AVG,YEARS_BUILD_AVG,COMMONAREA_AVG,ELEVATORS_AVG,ENTRANCES_AVG,FLOORSMAX_AVG,FLOORSMIN_AVG,LANDAREA_AVG,LIVINGAPARTMENTS_AVG,LIVINGAREA_AVG,NONLIVINGAPARTMENTS_AVG,NONLIVINGAREA_AVG,APARTMENTS_MODE,BASEMENTAREA_MODE,YEARS_BEGINEXPLUATATION_MODE,YEARS_BUILD_MODE,COMMONAREA_MODE,ELEVATORS_MODE,ENTRANCES_MODE,FLOORSMAX_MODE,FLOORSMIN_MODE,LANDAREA_MODE,LIVINGAPARTMENTS_MODE,LIVINGAREA_MODE,NONLIVINGAPARTMENTS_MODE,NONLIVINGAREA_MODE,APARTMENTS_MEDI,BASEMENTAREA_MEDI,YEARS_BEGINEXPLUATATION_MEDI,YEARS_BUILD_MEDI,COMMONAREA_MEDI,ELEVATORS_MEDI,ENTRANCES_MEDI,FLOORSMAX_MEDI,FLOORSMIN_MEDI,LANDAREA_MEDI,LIVINGAPARTMENTS_MEDI,LIVINGAREA_MEDI,NONLIVINGAPARTMENTS_MEDI,NONLIVINGAREA_MEDI,FONDKAPREMONT_MODE,HOUSETYPE_MODE,TOTALAREA_MODE,WALLSMATERIAL_MODE,EMERGENCYSTATE_MODE,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_2,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR
0,100001,Cash loans,F,N,Y,0,135000.0,568800.0,20560.5,450000.0,Unaccompanied,Working,Higher education,Married,House / apartment,0.018850,-19241,-2329,-5170.0,-812,11.786027,1,1,0,1,0,1,,2.0,2,2,TUESDAY,18,0,0,0,0,0,0,Kindergarten,0.752614,0.789654,0.159520,0.066000,0.059000,0.973200,0.751137,0.047624,0.085168,0.137900,0.125000,0.238423,0.067192,0.105885,0.050500,0.009231,0.029387,0.067200,0.061200,0.973200,0.758327,0.045223,0.08057,0.137900,0.12500,0.233854,0.065914,0.110874,0.052600,0.008358,0.028161,0.066600,0.059000,0.973200,0.754344,0.04742,0.084128,0.1379,0.125000,0.237846,0.068069,0.107063,0.051400,0.008979,0.029296,,block of flats,0.039200,"Stone, brick",No,0.0,0.0,0.0,0.0,-1740.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,100005,Cash loans,M,N,Y,0,99000.0,222768.0,17370.0,180000.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.035792,-18064,-4469,-9118.0,-1623,11.786027,1,1,0,1,0,0,Low-skill Laborers,2.0,2,2,FRIDAY,9,0,0,0,0,0,0,Self-employed,0.564990,0.291656,0.432962,0.122388,0.090065,0.978828,0.751137,0.047624,0.085168,0.151777,0.233706,0.238423,0.067192,0.105885,0.112286,0.009231,0.029387,0.119078,0.088998,0.978292,0.758327,0.045223,0.08057,0.147161,0.22939,0.233854,0.065914,0.110874,0.110687,0.008358,0.028161,0.122809,0.089529,0.978822,0.754344,0.04742,0.084128,0.1512,0.233154,0.237846,0.068069,0.107063,0.113368,0.008979,0.029296,,,0.107129,,,0.0,0.0,0.0,0.0,0.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,3.000000
2,100013,Cash loans,M,Y,Y,0,202500.0,663264.0,69777.0,630000.0,,Working,Higher education,Married,House / apartment,0.019101,-20038,-4458,-2175.0,-3503,5.000000,1,1,0,1,0,0,Drivers,2.0,2,2,MONDAY,14,0,0,0,0,0,0,Transport: type 3,0.501180,0.699787,0.610991,0.122388,0.090065,0.978828,0.751137,0.047624,0.085168,0.151777,0.233706,0.238423,0.067192,0.105885,0.112286,0.009231,0.029387,0.119078,0.088998,0.978292,0.758327,0.045223,0.08057,0.147161,0.22939,0.233854,0.065914,0.110874,0.110687,0.008358,0.028161,0.122809,0.089529,0.978822,0.754344,0.04742,0.084128,0.1512,0.233154,0.237846,0.068069,0.107063,0.113368,0.008979,0.029296,,,0.107129,,,0.0,0.0,0.0,0.0,-856.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,1.000000,4.000000
3,100028,Cash loans,F,N,Y,2,315000.0,1575000.0,49018.5,1575000.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.026392,-13976,-1866,-2000.0,-4208,11.786027,1,1,0,1,1,0,Sales staff,4.0,2,2,WEDNESDAY,11,0,0,0,0,0,0,Business Entity Type 3,0.525734,0.509677,0.612704,0.305200,0.197400,0.997000,0.959200,0.116500,0.320000,0.275900,0.375000,0.041700,0.204200,0.240400,0.367300,0.038600,0.080000,0.310900,0.204900,0.997000,0.960800,0.117600,0.32220,0.275900,0.37500,0.041700,0.208900,0.262600,0.382700,0.038900,0.084700,0.308100,0.197400,0.997000,0.959700,0.11730,0.320000,0.2759,0.375000,0.041700,0.207800,0.244600,0.373900,0.038800,0.081700,reg oper account,block of flats,0.370000,Panel,No,0.0,0.0,0.0,0.0,-1805.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,3.000000
4,100038,Cash loans,M,Y,N,1,180000.0,625500.0,32067.0,625500.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.010032,-13040,-2191,-4000.0,-4262,16.000000,1,1,1,1,0,0,,3.0,2,2,FRIDAY,5,0,0,0,0,1,1,Business Entity Type 3,0.202145,0.425687,0.500106,0.122388,0.090065,0.978828,0.751137,0.047624,0.085168,0.151777,0.233706,0.238423,0.067192,0.105885,0.112286,0.009231,0.029387,0.119078,0.088998,0.978292,0.758327,0.045223,0.08057,0.147161,0.22939,0.233854,0.065914,0.110874,0.110687,0.008358,0.028161,0.122809,0.089529,0.978822,0.754344,0.04742,0.084128,0.1512,0.233154,0.237846,0.068069,0.107063,0.113368,0.008979,0.029296,,,0.107129,,,0.0,0.0,0.0,0.0,-821.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.002108,0.001803,0.002787,0.009299,0.546902,1.983769
5,100042,Cash loans,F,Y,Y,0,270000.0,959688.0,34600.5,810000.0,Unaccompanied,State servant,Secondary / secondary special,Married,House / apartment,0.025164,-18604,-12009,-6116.0,-2027,10.000000,1,1,0,1,1,0,Drivers,2.0,2,2,MONDAY,15,0,0,0,0,0,0,Government,0.501180,0.628904,0.392774,0.241200,0.008400,0.982100,0.755200,0.045200,0.160000,0.137900,0.333300,0.375000,0.168300,0.194200,0.221800,0.011600,0.073100,0.245800,0.008800,0.982100,0.764800,0.045700,0.16110,0.137900,0.33330,0.375000,0.172100,0.212100,0.231100,0.011700,0.077400,0.243600,0.008400,0.982100,0.758500,0.04550,0.160000,0.1379,0.333300,0.375000,0.171200,0.197500,0.225800,0.011600,0.074600,not specified,block of flats,0.215100,Block,No,0.0,0.0,0.0,0.0,-1705.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,1.000000,2.000000
6,100057,Cash loans,M,Y,Y,2,180000.0,499221.0,22117.5,373500.0,Unaccompanied,Working,Higher education,Married,House / apartment,0.022800,-16685,-2580,-10125.0,-241,3.000000,1,1,0,1,0,0,High skill tech staff,4.0,2,2,THURSDAY,9,0,0,0,0,1,1,Industry: type 9,0.760851,0.571084,0.651260,0.122388,0.090065,0.978828,0.751137,0.047624,0.085168,0.151777,0.233706,0.238423,0.067192,0.105885,0.112286,0.009231,0.029387,0.119078,0.088998,0.978292,0.758327,0.045223,0.08057,0.147161,0.22939,0.233854,0.065914,0.110874,0.110687,0.008358,0.028161,0.122809,0.089529,0.978822,0.754344,0.04742,0.084128,0.1512,0.233154,0.237846,0.068069,0.107063,0.113368,0.008979,0.029296,,,0.107129,,,1.0,0.0,1.0,0.0,-1182.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000
7,100065,Cash loans,M,N,Y,0,166500.0,180000.0,14220.0,180000.0,Unaccompanied,Working,Higher education,Single / not married,With parents,0.005144,-9516,-1387,-5063.0,-2055,11.786027,1,1,1,1,1,0,Core staff,1.0,2,2,FRIDAY,7,0,0,0,0,0,0,Self-employed,0.565290,0.613033,0.312365,0.122388,0.090065,0.978828,0.751137,0.047624,0.085168,0.151777,0.233706,0.238423,0.067192,0.105885,0.112286,0.009231,0.029387,0.119078,0.088998,0.978292,0.758327,0.045223,0.08057,0.147161,0.22939,0.233854,0.065914,0.110874,0.110687,0.008358,0.028161,0.122809,0.089529,0.978822,0.754344,0.04742,0.084128,0.1512,0.233154,0.237846,0.068069,0.107063,0.113368,0.008979,0.029296,,,0.107129,,,0.0,0.0,0.0,0.0,-1182.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,2.000000
8,100066,Cash loans,F,N,Y,0,315000.0,364896.0,28957.5,315000.0,Unaccompanied,State servant,Higher education,Married,House / apartment,0.046220,-12744,-1013,-1686.0,-3171,11.786027,1,1,0,1,0,0,Core staff,2.0,1,1,THURSDAY,18,0,0,0,0,0,0,School,0.718507,0.808788,0.522697,0.103100,0.111500,0.978100,0.751137,0.047624,0.000000,0.206900,0.166700,0.238423,0.067192,0.105885,0.112286,0.009231,0.029387,0.105000,0.115700,0.978200,0.758327,0.045223,0.00000,0.206900,0.16670,0.233854,0.065914,0.110874,0.110687,0.008358,0.028161,0.104100,0.111500,0.978100,0.754344,0.04742,0.000000,0.2069,0.166700,0.237846,0.068069,0.107063,0.113368,0.008979,0.029296,,block of flats,0.070200,"Stone, brick",No,0.0,0.0,0.0,0.0,-829.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,5.000000
9,100067,Cash loans,F,Y,Y,1,162000.0,45000.0,5337.0,45000.0,Family,Working,Higher education,Civil marriage,House / apartment,0.018634,-10395,-2625,-8124.0,-3041,5.000000,1,1,1,1,1,0,Sales staff,3.0,2,2,TUESDAY,14,0,0,0,0,0,0,Trade: type 2,0.210562,0.444848,0.194068,0.122388,0.090065,0.978828,0.751137,0.047624,0.085168,0.151777,0.233706,0.238423,0.067192,0.105885,0.112286,0.009231,0.029387,0.119078,0.088998,0.978292,0.758327,0.045223,0.08057,0.147161,0.22939,0.233854,0.065914,0.110874,0.110687,0.008358,0.028161,0.122809,0.089529,0.978822,0.754344,0.04742,0.084128,0.1512,0.233154,0.237846,0.068069,0.107063,0.113368,0.008979,0.029296,,,0.107129,,,4.0,0.0,4.0,0.0,-1423.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,2.000000


In [140]:
cleaned_df = all_df_featurization(df_application_test)

In [142]:
model.predict(cleaned_df)

ValueError: Number of features of the model must match the input. Model n_features is 171 and input n_features is 309 

In [138]:
cleaned_df.mean()['OWN_CAR_AGE']

11.786027263875365

In [116]:
model.predict(cleaned_df.iloc[:,valid_columns])

ValueError: Number of features of the model must match the input. Model n_features is 171 and input n_features is 309 

In [114]:
cleaned_df

Unnamed: 0,SK_ID_CURR,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,NAME_TYPE_SUITE,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,ORGANIZATION_TYPE,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,APARTMENTS_AVG,BASEMENTAREA_AVG,YEARS_BEGINEXPLUATATION_AVG,YEARS_BUILD_AVG,COMMONAREA_AVG,ELEVATORS_AVG,ENTRANCES_AVG,FLOORSMAX_AVG,FLOORSMIN_AVG,LANDAREA_AVG,LIVINGAPARTMENTS_AVG,LIVINGAREA_AVG,NONLIVINGAPARTMENTS_AVG,NONLIVINGAREA_AVG,APARTMENTS_MODE,BASEMENTAREA_MODE,YEARS_BEGINEXPLUATATION_MODE,YEARS_BUILD_MODE,COMMONAREA_MODE,ELEVATORS_MODE,ENTRANCES_MODE,FLOORSMAX_MODE,FLOORSMIN_MODE,LANDAREA_MODE,LIVINGAPARTMENTS_MODE,LIVINGAREA_MODE,NONLIVINGAPARTMENTS_MODE,NONLIVINGAREA_MODE,APARTMENTS_MEDI,BASEMENTAREA_MEDI,YEARS_BEGINEXPLUATATION_MEDI,YEARS_BUILD_MEDI,COMMONAREA_MEDI,ELEVATORS_MEDI,ENTRANCES_MEDI,FLOORSMAX_MEDI,FLOORSMIN_MEDI,LANDAREA_MEDI,LIVINGAPARTMENTS_MEDI,LIVINGAREA_MEDI,NONLIVINGAPARTMENTS_MEDI,NONLIVINGAREA_MEDI,FONDKAPREMONT_MODE,HOUSETYPE_MODE,TOTALAREA_MODE,WALLSMATERIAL_MODE,EMERGENCYSTATE_MODE,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_2,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR,AMT_ANNUITY_IS_NULL,AMT_GOODS_PRICE_IS_NULL,NAME_TYPE_SUITE_IS_NULL,OWN_CAR_AGE_IS_NULL,OCCUPATION_TYPE_IS_NULL,CNT_FAM_MEMBERS_IS_NULL,EXT_SOURCE_1_IS_NULL,EXT_SOURCE_2_IS_NULL,EXT_SOURCE_3_IS_NULL,APARTMENTS_AVG_IS_NULL,BASEMENTAREA_AVG_IS_NULL,YEARS_BEGINEXPLUATATION_AVG_IS_NULL,YEARS_BUILD_AVG_IS_NULL,COMMONAREA_AVG_IS_NULL,ELEVATORS_AVG_IS_NULL,ENTRANCES_AVG_IS_NULL,FLOORSMAX_AVG_IS_NULL,FLOORSMIN_AVG_IS_NULL,LANDAREA_AVG_IS_NULL,LIVINGAPARTMENTS_AVG_IS_NULL,LIVINGAREA_AVG_IS_NULL,NONLIVINGAPARTMENTS_AVG_IS_NULL,NONLIVINGAREA_AVG_IS_NULL,APARTMENTS_MODE_IS_NULL,BASEMENTAREA_MODE_IS_NULL,YEARS_BEGINEXPLUATATION_MODE_IS_NULL,YEARS_BUILD_MODE_IS_NULL,COMMONAREA_MODE_IS_NULL,ELEVATORS_MODE_IS_NULL,ENTRANCES_MODE_IS_NULL,FLOORSMAX_MODE_IS_NULL,FLOORSMIN_MODE_IS_NULL,LANDAREA_MODE_IS_NULL,LIVINGAPARTMENTS_MODE_IS_NULL,LIVINGAREA_MODE_IS_NULL,NONLIVINGAPARTMENTS_MODE_IS_NULL,NONLIVINGAREA_MODE_IS_NULL,APARTMENTS_MEDI_IS_NULL,BASEMENTAREA_MEDI_IS_NULL,YEARS_BEGINEXPLUATATION_MEDI_IS_NULL,YEARS_BUILD_MEDI_IS_NULL,COMMONAREA_MEDI_IS_NULL,ELEVATORS_MEDI_IS_NULL,ENTRANCES_MEDI_IS_NULL,FLOORSMAX_MEDI_IS_NULL,FLOORSMIN_MEDI_IS_NULL,LANDAREA_MEDI_IS_NULL,LIVINGAPARTMENTS_MEDI_IS_NULL,LIVINGAREA_MEDI_IS_NULL,NONLIVINGAPARTMENTS_MEDI_IS_NULL,NONLIVINGAREA_MEDI_IS_NULL,FONDKAPREMONT_MODE_IS_NULL,HOUSETYPE_MODE_IS_NULL,TOTALAREA_MODE_IS_NULL,WALLSMATERIAL_MODE_IS_NULL,EMERGENCYSTATE_MODE_IS_NULL,OBS_30_CNT_SOCIAL_CIRCLE_IS_NULL,DEF_30_CNT_SOCIAL_CIRCLE_IS_NULL,OBS_60_CNT_SOCIAL_CIRCLE_IS_NULL,DEF_60_CNT_SOCIAL_CIRCLE_IS_NULL,DAYS_LAST_PHONE_CHANGE_IS_NULL,AMT_REQ_CREDIT_BUREAU_HOUR_IS_NULL,AMT_REQ_CREDIT_BUREAU_DAY_IS_NULL,AMT_REQ_CREDIT_BUREAU_WEEK_IS_NULL,AMT_REQ_CREDIT_BUREAU_MON_IS_NULL,AMT_REQ_CREDIT_BUREAU_QRT_IS_NULL,AMT_REQ_CREDIT_BUREAU_YEAR_IS_NULL,DUM__Cash loans,DUM__Revolving loans,DUM__F,DUM__M,DUM__N,DUM__Y,DUM__N.1,DUM__Y.1,DUM__Children,DUM__Family,DUM__Group of people,DUM__Other_A,DUM__Other_B,"DUM__Spouse, partner",DUM__Unaccompanied,DUM__Businessman,DUM__Commercial associate,DUM__Pensioner,DUM__State servant,DUM__Student,DUM__Unemployed,DUM__Working,DUM__Academic degree,DUM__Higher education,DUM__Incomplete higher,DUM__Lower secondary,DUM__Secondary / secondary special,DUM__Civil marriage,DUM__Married,DUM__Separated,DUM__Single / not married,DUM__Widow,DUM__Co-op apartment,DUM__House / apartment,DUM__Municipal apartment,DUM__Office apartment,DUM__Rented apartment,DUM__With parents,DUM__Accountants,DUM__Cleaning staff,DUM__Cooking staff,DUM__Core staff,DUM__Drivers,DUM__HR staff,DUM__High skill tech staff,DUM__IT staff,DUM__Laborers,DUM__Low-skill Laborers,DUM__Managers,DUM__Medicine staff,DUM__Private service staff,DUM__Realty agents,DUM__Sales staff,DUM__Secretaries,DUM__Security staff,DUM__Waiters/barmen staff,DUM__FRIDAY,DUM__MONDAY,DUM__SATURDAY,DUM__SUNDAY,DUM__THURSDAY,DUM__TUESDAY,DUM__WEDNESDAY,DUM__Advertising,DUM__Agriculture,DUM__Bank,DUM__Business Entity Type 1,DUM__Business Entity Type 2,DUM__Business Entity Type 3,DUM__Cleaning,DUM__Construction,DUM__Culture,DUM__Electricity,DUM__Emergency,DUM__Government,DUM__Hotel,DUM__Housing,DUM__Industry: type 1,DUM__Industry: type 10,DUM__Industry: type 11,DUM__Industry: type 12,DUM__Industry: type 13,DUM__Industry: type 2,DUM__Industry: type 3,DUM__Industry: type 4,DUM__Industry: type 5,DUM__Industry: type 6,DUM__Industry: type 7,DUM__Industry: type 8,DUM__Industry: type 9,DUM__Insurance,DUM__Kindergarten,DUM__Legal Services,DUM__Medicine,DUM__Military,DUM__Mobile,DUM__Other,DUM__Police,DUM__Postal,DUM__Realtor,DUM__Religion,DUM__Restaurant,DUM__School,DUM__Security,DUM__Security Ministries,DUM__Self-employed,DUM__Services,DUM__Telecom,DUM__Trade: type 1,DUM__Trade: type 2,DUM__Trade: type 3,DUM__Trade: type 4,DUM__Trade: type 5,DUM__Trade: type 6,DUM__Trade: type 7,DUM__Transport: type 1,DUM__Transport: type 2,DUM__Transport: type 3,DUM__Transport: type 4,DUM__University,DUM__XNA,DUM__not specified,DUM__org spec account,DUM__reg oper account,DUM__reg oper spec account,DUM__block of flats,DUM__specific housing,DUM__terraced house,DUM__Block,DUM__Mixed,DUM__Monolithic,DUM__Others,DUM__Panel,"DUM__Stone, brick",DUM__Wooden,DUM__No,DUM__Yes
0,100001,Cash loans,F,N,Y,0,135000.0,568800.0,20560.5,450000.0,Unaccompanied,Working,Higher education,Married,House / apartment,0.018850,-19241,-2329,-5170.0,-812,11.786027,1,1,0,1,0,1,,2.0,2,2,TUESDAY,18,0,0,0,0,0,0,Kindergarten,0.752614,0.789654,0.159520,0.066000,0.059000,0.973200,0.751137,0.047624,0.085168,0.137900,0.125000,0.238423,0.067192,0.105885,0.050500,0.009231,0.029387,0.067200,0.061200,0.973200,0.758327,0.045223,0.08057,0.137900,0.12500,0.233854,0.065914,0.110874,0.052600,0.008358,0.028161,0.066600,0.059000,0.973200,0.754344,0.04742,0.084128,0.1379,0.125000,0.237846,0.068069,0.107063,0.051400,0.008979,0.029296,,block of flats,0.039200,"Stone, brick",No,0.0,0.0,0.0,0.0,-1740.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,False,False,False,True,True,False,False,False,False,False,False,False,True,True,True,False,False,True,True,True,False,True,True,False,False,False,True,True,True,False,False,True,True,True,False,True,True,False,False,False,True,True,True,False,False,True,True,True,False,True,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,1,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0
1,100005,Cash loans,M,N,Y,0,99000.0,222768.0,17370.0,180000.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.035792,-18064,-4469,-9118.0,-1623,11.786027,1,1,0,1,0,0,Low-skill Laborers,2.0,2,2,FRIDAY,9,0,0,0,0,0,0,Self-employed,0.564990,0.291656,0.432962,0.122388,0.090065,0.978828,0.751137,0.047624,0.085168,0.151777,0.233706,0.238423,0.067192,0.105885,0.112286,0.009231,0.029387,0.119078,0.088998,0.978292,0.758327,0.045223,0.08057,0.147161,0.22939,0.233854,0.065914,0.110874,0.110687,0.008358,0.028161,0.122809,0.089529,0.978822,0.754344,0.04742,0.084128,0.1512,0.233154,0.237846,0.068069,0.107063,0.113368,0.008979,0.029296,,,0.107129,,,0.0,0.0,0.0,0.0,0.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,3.000000,False,False,False,True,False,False,False,False,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False,1,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,100013,Cash loans,M,Y,Y,0,202500.0,663264.0,69777.0,630000.0,,Working,Higher education,Married,House / apartment,0.019101,-20038,-4458,-2175.0,-3503,5.000000,1,1,0,1,0,0,Drivers,2.0,2,2,MONDAY,14,0,0,0,0,0,0,Transport: type 3,0.501180,0.699787,0.610991,0.122388,0.090065,0.978828,0.751137,0.047624,0.085168,0.151777,0.233706,0.238423,0.067192,0.105885,0.112286,0.009231,0.029387,0.119078,0.088998,0.978292,0.758327,0.045223,0.08057,0.147161,0.22939,0.233854,0.065914,0.110874,0.110687,0.008358,0.028161,0.122809,0.089529,0.978822,0.754344,0.04742,0.084128,0.1512,0.233154,0.237846,0.068069,0.107063,0.113368,0.008979,0.029296,,,0.107129,,,0.0,0.0,0.0,0.0,-856.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,1.000000,4.000000,False,False,True,False,False,False,True,False,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,100028,Cash loans,F,N,Y,2,315000.0,1575000.0,49018.5,1575000.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.026392,-13976,-1866,-2000.0,-4208,11.786027,1,1,0,1,1,0,Sales staff,4.0,2,2,WEDNESDAY,11,0,0,0,0,0,0,Business Entity Type 3,0.525734,0.509677,0.612704,0.305200,0.197400,0.997000,0.959200,0.116500,0.320000,0.275900,0.375000,0.041700,0.204200,0.240400,0.367300,0.038600,0.080000,0.310900,0.204900,0.997000,0.960800,0.117600,0.32220,0.275900,0.37500,0.041700,0.208900,0.262600,0.382700,0.038900,0.084700,0.308100,0.197400,0.997000,0.959700,0.11730,0.320000,0.2759,0.375000,0.041700,0.207800,0.244600,0.373900,0.038800,0.081700,reg oper account,block of flats,0.370000,Panel,No,0.0,0.0,0.0,0.0,-1805.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,3.000000,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,1,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0
4,100038,Cash loans,M,Y,N,1,180000.0,625500.0,32067.0,625500.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.010032,-13040,-2191,-4000.0,-4262,16.000000,1,1,1,1,0,0,,3.0,2,2,FRIDAY,5,0,0,0,0,1,1,Business Entity Type 3,0.202145,0.425687,0.500106,0.122388,0.090065,0.978828,0.751137,0.047624,0.085168,0.151777,0.233706,0.238423,0.067192,0.105885,0.112286,0.009231,0.029387,0.119078,0.088998,0.978292,0.758327,0.045223,0.08057,0.147161,0.22939,0.233854,0.065914,0.110874,0.110687,0.008358,0.028161,0.122809,0.089529,0.978822,0.754344,0.04742,0.084128,0.1512,0.233154,0.237846,0.068069,0.107063,0.113368,0.008979,0.029296,,,0.107129,,,0.0,0.0,0.0,0.0,-821.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.002108,0.001803,0.002787,0.009299,0.546902,1.983769,False,False,False,False,True,False,False,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,False,False,False,False,False,True,True,True,True,True,True,1,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,100042,Cash loans,F,Y,Y,0,270000.0,959688.0,34600.5,810000.0,Unaccompanied,State servant,Secondary / secondary special,Married,House / apartment,0.025164,-18604,-12009,-6116.0,-2027,10.000000,1,1,0,1,1,0,Drivers,2.0,2,2,MONDAY,15,0,0,0,0,0,0,Government,0.501180,0.628904,0.392774,0.241200,0.008400,0.982100,0.755200,0.045200,0.160000,0.137900,0.333300,0.375000,0.168300,0.194200,0.221800,0.011600,0.073100,0.245800,0.008800,0.982100,0.764800,0.045700,0.16110,0.137900,0.33330,0.375000,0.172100,0.212100,0.231100,0.011700,0.077400,0.243600,0.008400,0.982100,0.758500,0.04550,0.160000,0.1379,0.333300,0.375000,0.171200,0.197500,0.225800,0.011600,0.074600,not specified,block of flats,0.215100,Block,No,0.0,0.0,0.0,0.0,-1705.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,1.000000,2.000000,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0
6,100057,Cash loans,M,Y,Y,2,180000.0,499221.0,22117.5,373500.0,Unaccompanied,Working,Higher education,Married,House / apartment,0.022800,-16685,-2580,-10125.0,-241,3.000000,1,1,0,1,0,0,High skill tech staff,4.0,2,2,THURSDAY,9,0,0,0,0,1,1,Industry: type 9,0.760851,0.571084,0.651260,0.122388,0.090065,0.978828,0.751137,0.047624,0.085168,0.151777,0.233706,0.238423,0.067192,0.105885,0.112286,0.009231,0.029387,0.119078,0.088998,0.978292,0.758327,0.045223,0.08057,0.147161,0.22939,0.233854,0.065914,0.110874,0.110687,0.008358,0.028161,0.122809,0.089529,0.978822,0.754344,0.04742,0.084128,0.1512,0.233154,0.237846,0.068069,0.107063,0.113368,0.008979,0.029296,,,0.107129,,,1.0,0.0,1.0,0.0,-1182.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,False,False,False,False,False,False,False,False,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,100065,Cash loans,M,N,Y,0,166500.0,180000.0,14220.0,180000.0,Unaccompanied,Working,Higher education,Single / not married,With parents,0.005144,-9516,-1387,-5063.0,-2055,11.786027,1,1,1,1,1,0,Core staff,1.0,2,2,FRIDAY,7,0,0,0,0,0,0,Self-employed,0.565290,0.613033,0.312365,0.122388,0.090065,0.978828,0.751137,0.047624,0.085168,0.151777,0.233706,0.238423,0.067192,0.105885,0.112286,0.009231,0.029387,0.119078,0.088998,0.978292,0.758327,0.045223,0.08057,0.147161,0.22939,0.233854,0.065914,0.110874,0.110687,0.008358,0.028161,0.122809,0.089529,0.978822,0.754344,0.04742,0.084128,0.1512,0.233154,0.237846,0.068069,0.107063,0.113368,0.008979,0.029296,,,0.107129,,,0.0,0.0,0.0,0.0,-1182.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,2.000000,False,False,False,True,False,False,False,False,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False,1,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,100066,Cash loans,F,N,Y,0,315000.0,364896.0,28957.5,315000.0,Unaccompanied,State servant,Higher education,Married,House / apartment,0.046220,-12744,-1013,-1686.0,-3171,11.786027,1,1,0,1,0,0,Core staff,2.0,1,1,THURSDAY,18,0,0,0,0,0,0,School,0.718507,0.808788,0.522697,0.103100,0.111500,0.978100,0.751137,0.047624,0.000000,0.206900,0.166700,0.238423,0.067192,0.105885,0.112286,0.009231,0.029387,0.105000,0.115700,0.978200,0.758327,0.045223,0.00000,0.206900,0.16670,0.233854,0.065914,0.110874,0.110687,0.008358,0.028161,0.104100,0.111500,0.978100,0.754344,0.04742,0.000000,0.2069,0.166700,0.237846,0.068069,0.107063,0.113368,0.008979,0.029296,,block of flats,0.070200,"Stone, brick",No,0.0,0.0,0.0,0.0,-829.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,5.000000,False,False,False,True,False,False,False,False,False,False,False,False,True,True,False,False,False,True,True,True,True,True,True,False,False,False,True,True,False,False,False,True,True,True,True,True,True,False,False,False,True,True,False,False,False,True,True,True,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,1,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0
9,100067,Cash loans,F,Y,Y,1,162000.0,45000.0,5337.0,45000.0,Family,Working,Higher education,Civil marriage,House / apartment,0.018634,-10395,-2625,-8124.0,-3041,5.000000,1,1,1,1,1,0,Sales staff,3.0,2,2,TUESDAY,14,0,0,0,0,0,0,Trade: type 2,0.210562,0.444848,0.194068,0.122388,0.090065,0.978828,0.751137,0.047624,0.085168,0.151777,0.233706,0.238423,0.067192,0.105885,0.112286,0.009231,0.029387,0.119078,0.088998,0.978292,0.758327,0.045223,0.08057,0.147161,0.22939,0.233854,0.065914,0.110874,0.110687,0.008358,0.028161,0.122809,0.089529,0.978822,0.754344,0.04742,0.084128,0.1512,0.233154,0.237846,0.068069,0.107063,0.113368,0.008979,0.029296,,,0.107129,,,4.0,0.0,4.0,0.0,-1423.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,2.000000,False,False,False,False,False,False,False,False,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False,1,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [80]:
df_application_test.fillna(df_application_test.mean())

Unnamed: 0,SK_ID_CURR,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,NAME_TYPE_SUITE,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,ORGANIZATION_TYPE,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,APARTMENTS_AVG,BASEMENTAREA_AVG,YEARS_BEGINEXPLUATATION_AVG,YEARS_BUILD_AVG,COMMONAREA_AVG,ELEVATORS_AVG,ENTRANCES_AVG,FLOORSMAX_AVG,FLOORSMIN_AVG,LANDAREA_AVG,LIVINGAPARTMENTS_AVG,LIVINGAREA_AVG,NONLIVINGAPARTMENTS_AVG,NONLIVINGAREA_AVG,APARTMENTS_MODE,BASEMENTAREA_MODE,YEARS_BEGINEXPLUATATION_MODE,YEARS_BUILD_MODE,COMMONAREA_MODE,ELEVATORS_MODE,ENTRANCES_MODE,FLOORSMAX_MODE,FLOORSMIN_MODE,LANDAREA_MODE,LIVINGAPARTMENTS_MODE,LIVINGAREA_MODE,NONLIVINGAPARTMENTS_MODE,NONLIVINGAREA_MODE,APARTMENTS_MEDI,BASEMENTAREA_MEDI,YEARS_BEGINEXPLUATATION_MEDI,YEARS_BUILD_MEDI,COMMONAREA_MEDI,ELEVATORS_MEDI,ENTRANCES_MEDI,FLOORSMAX_MEDI,FLOORSMIN_MEDI,LANDAREA_MEDI,LIVINGAPARTMENTS_MEDI,LIVINGAREA_MEDI,NONLIVINGAPARTMENTS_MEDI,NONLIVINGAREA_MEDI,FONDKAPREMONT_MODE,HOUSETYPE_MODE,TOTALAREA_MODE,WALLSMATERIAL_MODE,EMERGENCYSTATE_MODE,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_2,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR
0,100001,Cash loans,F,N,Y,0,135000.0,568800.0,20560.5,450000.0,Unaccompanied,Working,Higher education,Married,House / apartment,0.018850,-19241,-2329,-5170.0,-812,11.786027,1,1,0,1,0,1,,2.0,2,2,TUESDAY,18,0,0,0,0,0,0,Kindergarten,0.752614,0.789654,0.159520,0.066000,0.059000,0.973200,0.751137,0.047624,0.085168,0.137900,0.125000,0.238423,0.067192,0.105885,0.050500,0.009231,0.029387,0.067200,0.061200,0.973200,0.758327,0.045223,0.08057,0.137900,0.12500,0.233854,0.065914,0.110874,0.052600,0.008358,0.028161,0.066600,0.059000,0.973200,0.754344,0.04742,0.084128,0.1379,0.125000,0.237846,0.068069,0.107063,0.051400,0.008979,0.029296,,block of flats,0.039200,"Stone, brick",No,0.0,0.0,0.0,0.0,-1740.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,100005,Cash loans,M,N,Y,0,99000.0,222768.0,17370.0,180000.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.035792,-18064,-4469,-9118.0,-1623,11.786027,1,1,0,1,0,0,Low-skill Laborers,2.0,2,2,FRIDAY,9,0,0,0,0,0,0,Self-employed,0.564990,0.291656,0.432962,0.122388,0.090065,0.978828,0.751137,0.047624,0.085168,0.151777,0.233706,0.238423,0.067192,0.105885,0.112286,0.009231,0.029387,0.119078,0.088998,0.978292,0.758327,0.045223,0.08057,0.147161,0.22939,0.233854,0.065914,0.110874,0.110687,0.008358,0.028161,0.122809,0.089529,0.978822,0.754344,0.04742,0.084128,0.1512,0.233154,0.237846,0.068069,0.107063,0.113368,0.008979,0.029296,,,0.107129,,,0.0,0.0,0.0,0.0,0.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,3.000000
2,100013,Cash loans,M,Y,Y,0,202500.0,663264.0,69777.0,630000.0,,Working,Higher education,Married,House / apartment,0.019101,-20038,-4458,-2175.0,-3503,5.000000,1,1,0,1,0,0,Drivers,2.0,2,2,MONDAY,14,0,0,0,0,0,0,Transport: type 3,0.501180,0.699787,0.610991,0.122388,0.090065,0.978828,0.751137,0.047624,0.085168,0.151777,0.233706,0.238423,0.067192,0.105885,0.112286,0.009231,0.029387,0.119078,0.088998,0.978292,0.758327,0.045223,0.08057,0.147161,0.22939,0.233854,0.065914,0.110874,0.110687,0.008358,0.028161,0.122809,0.089529,0.978822,0.754344,0.04742,0.084128,0.1512,0.233154,0.237846,0.068069,0.107063,0.113368,0.008979,0.029296,,,0.107129,,,0.0,0.0,0.0,0.0,-856.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,1.000000,4.000000
3,100028,Cash loans,F,N,Y,2,315000.0,1575000.0,49018.5,1575000.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.026392,-13976,-1866,-2000.0,-4208,11.786027,1,1,0,1,1,0,Sales staff,4.0,2,2,WEDNESDAY,11,0,0,0,0,0,0,Business Entity Type 3,0.525734,0.509677,0.612704,0.305200,0.197400,0.997000,0.959200,0.116500,0.320000,0.275900,0.375000,0.041700,0.204200,0.240400,0.367300,0.038600,0.080000,0.310900,0.204900,0.997000,0.960800,0.117600,0.32220,0.275900,0.37500,0.041700,0.208900,0.262600,0.382700,0.038900,0.084700,0.308100,0.197400,0.997000,0.959700,0.11730,0.320000,0.2759,0.375000,0.041700,0.207800,0.244600,0.373900,0.038800,0.081700,reg oper account,block of flats,0.370000,Panel,No,0.0,0.0,0.0,0.0,-1805.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,3.000000
4,100038,Cash loans,M,Y,N,1,180000.0,625500.0,32067.0,625500.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.010032,-13040,-2191,-4000.0,-4262,16.000000,1,1,1,1,0,0,,3.0,2,2,FRIDAY,5,0,0,0,0,1,1,Business Entity Type 3,0.202145,0.425687,0.500106,0.122388,0.090065,0.978828,0.751137,0.047624,0.085168,0.151777,0.233706,0.238423,0.067192,0.105885,0.112286,0.009231,0.029387,0.119078,0.088998,0.978292,0.758327,0.045223,0.08057,0.147161,0.22939,0.233854,0.065914,0.110874,0.110687,0.008358,0.028161,0.122809,0.089529,0.978822,0.754344,0.04742,0.084128,0.1512,0.233154,0.237846,0.068069,0.107063,0.113368,0.008979,0.029296,,,0.107129,,,0.0,0.0,0.0,0.0,-821.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.002108,0.001803,0.002787,0.009299,0.546902,1.983769
5,100042,Cash loans,F,Y,Y,0,270000.0,959688.0,34600.5,810000.0,Unaccompanied,State servant,Secondary / secondary special,Married,House / apartment,0.025164,-18604,-12009,-6116.0,-2027,10.000000,1,1,0,1,1,0,Drivers,2.0,2,2,MONDAY,15,0,0,0,0,0,0,Government,0.501180,0.628904,0.392774,0.241200,0.008400,0.982100,0.755200,0.045200,0.160000,0.137900,0.333300,0.375000,0.168300,0.194200,0.221800,0.011600,0.073100,0.245800,0.008800,0.982100,0.764800,0.045700,0.16110,0.137900,0.33330,0.375000,0.172100,0.212100,0.231100,0.011700,0.077400,0.243600,0.008400,0.982100,0.758500,0.04550,0.160000,0.1379,0.333300,0.375000,0.171200,0.197500,0.225800,0.011600,0.074600,not specified,block of flats,0.215100,Block,No,0.0,0.0,0.0,0.0,-1705.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,1.000000,2.000000
6,100057,Cash loans,M,Y,Y,2,180000.0,499221.0,22117.5,373500.0,Unaccompanied,Working,Higher education,Married,House / apartment,0.022800,-16685,-2580,-10125.0,-241,3.000000,1,1,0,1,0,0,High skill tech staff,4.0,2,2,THURSDAY,9,0,0,0,0,1,1,Industry: type 9,0.760851,0.571084,0.651260,0.122388,0.090065,0.978828,0.751137,0.047624,0.085168,0.151777,0.233706,0.238423,0.067192,0.105885,0.112286,0.009231,0.029387,0.119078,0.088998,0.978292,0.758327,0.045223,0.08057,0.147161,0.22939,0.233854,0.065914,0.110874,0.110687,0.008358,0.028161,0.122809,0.089529,0.978822,0.754344,0.04742,0.084128,0.1512,0.233154,0.237846,0.068069,0.107063,0.113368,0.008979,0.029296,,,0.107129,,,1.0,0.0,1.0,0.0,-1182.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000
7,100065,Cash loans,M,N,Y,0,166500.0,180000.0,14220.0,180000.0,Unaccompanied,Working,Higher education,Single / not married,With parents,0.005144,-9516,-1387,-5063.0,-2055,11.786027,1,1,1,1,1,0,Core staff,1.0,2,2,FRIDAY,7,0,0,0,0,0,0,Self-employed,0.565290,0.613033,0.312365,0.122388,0.090065,0.978828,0.751137,0.047624,0.085168,0.151777,0.233706,0.238423,0.067192,0.105885,0.112286,0.009231,0.029387,0.119078,0.088998,0.978292,0.758327,0.045223,0.08057,0.147161,0.22939,0.233854,0.065914,0.110874,0.110687,0.008358,0.028161,0.122809,0.089529,0.978822,0.754344,0.04742,0.084128,0.1512,0.233154,0.237846,0.068069,0.107063,0.113368,0.008979,0.029296,,,0.107129,,,0.0,0.0,0.0,0.0,-1182.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,2.000000
8,100066,Cash loans,F,N,Y,0,315000.0,364896.0,28957.5,315000.0,Unaccompanied,State servant,Higher education,Married,House / apartment,0.046220,-12744,-1013,-1686.0,-3171,11.786027,1,1,0,1,0,0,Core staff,2.0,1,1,THURSDAY,18,0,0,0,0,0,0,School,0.718507,0.808788,0.522697,0.103100,0.111500,0.978100,0.751137,0.047624,0.000000,0.206900,0.166700,0.238423,0.067192,0.105885,0.112286,0.009231,0.029387,0.105000,0.115700,0.978200,0.758327,0.045223,0.00000,0.206900,0.16670,0.233854,0.065914,0.110874,0.110687,0.008358,0.028161,0.104100,0.111500,0.978100,0.754344,0.04742,0.000000,0.2069,0.166700,0.237846,0.068069,0.107063,0.113368,0.008979,0.029296,,block of flats,0.070200,"Stone, brick",No,0.0,0.0,0.0,0.0,-829.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,5.000000
9,100067,Cash loans,F,Y,Y,1,162000.0,45000.0,5337.0,45000.0,Family,Working,Higher education,Civil marriage,House / apartment,0.018634,-10395,-2625,-8124.0,-3041,5.000000,1,1,1,1,1,0,Sales staff,3.0,2,2,TUESDAY,14,0,0,0,0,0,0,Trade: type 2,0.210562,0.444848,0.194068,0.122388,0.090065,0.978828,0.751137,0.047624,0.085168,0.151777,0.233706,0.238423,0.067192,0.105885,0.112286,0.009231,0.029387,0.119078,0.088998,0.978292,0.758327,0.045223,0.08057,0.147161,0.22939,0.233854,0.065914,0.110874,0.110687,0.008358,0.028161,0.122809,0.089529,0.978822,0.754344,0.04742,0.084128,0.1512,0.233154,0.237846,0.068069,0.107063,0.113368,0.008979,0.029296,,,0.107129,,,4.0,0.0,4.0,0.0,-1423.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,2.000000


In [88]:
averages = df_application_test.mean()

In [90]:
df_application_test.head().fillna(value=averages)

Unnamed: 0,SK_ID_CURR,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,NAME_TYPE_SUITE,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,ORGANIZATION_TYPE,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,APARTMENTS_AVG,BASEMENTAREA_AVG,YEARS_BEGINEXPLUATATION_AVG,YEARS_BUILD_AVG,COMMONAREA_AVG,ELEVATORS_AVG,ENTRANCES_AVG,FLOORSMAX_AVG,FLOORSMIN_AVG,LANDAREA_AVG,LIVINGAPARTMENTS_AVG,LIVINGAREA_AVG,NONLIVINGAPARTMENTS_AVG,NONLIVINGAREA_AVG,APARTMENTS_MODE,BASEMENTAREA_MODE,YEARS_BEGINEXPLUATATION_MODE,YEARS_BUILD_MODE,COMMONAREA_MODE,ELEVATORS_MODE,ENTRANCES_MODE,FLOORSMAX_MODE,FLOORSMIN_MODE,LANDAREA_MODE,LIVINGAPARTMENTS_MODE,LIVINGAREA_MODE,NONLIVINGAPARTMENTS_MODE,NONLIVINGAREA_MODE,APARTMENTS_MEDI,BASEMENTAREA_MEDI,YEARS_BEGINEXPLUATATION_MEDI,YEARS_BUILD_MEDI,COMMONAREA_MEDI,ELEVATORS_MEDI,ENTRANCES_MEDI,FLOORSMAX_MEDI,FLOORSMIN_MEDI,LANDAREA_MEDI,LIVINGAPARTMENTS_MEDI,LIVINGAREA_MEDI,NONLIVINGAPARTMENTS_MEDI,NONLIVINGAREA_MEDI,FONDKAPREMONT_MODE,HOUSETYPE_MODE,TOTALAREA_MODE,WALLSMATERIAL_MODE,EMERGENCYSTATE_MODE,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_2,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR
0,100001,Cash loans,F,N,Y,0,135000.0,568800.0,20560.5,450000.0,Unaccompanied,Working,Higher education,Married,House / apartment,0.01885,-19241,-2329,-5170.0,-812,11.786027,1,1,0,1,0,1,,2.0,2,2,TUESDAY,18,0,0,0,0,0,0,Kindergarten,0.752614,0.789654,0.15952,0.066,0.059,0.9732,0.751137,0.047624,0.085168,0.1379,0.125,0.238423,0.067192,0.105885,0.0505,0.009231,0.029387,0.0672,0.0612,0.9732,0.758327,0.045223,0.08057,0.1379,0.125,0.233854,0.065914,0.110874,0.0526,0.008358,0.028161,0.0666,0.059,0.9732,0.754344,0.04742,0.084128,0.1379,0.125,0.237846,0.068069,0.107063,0.0514,0.008979,0.029296,,block of flats,0.0392,"Stone, brick",No,0.0,0.0,0.0,0.0,-1740.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
1,100005,Cash loans,M,N,Y,0,99000.0,222768.0,17370.0,180000.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.035792,-18064,-4469,-9118.0,-1623,11.786027,1,1,0,1,0,0,Low-skill Laborers,2.0,2,2,FRIDAY,9,0,0,0,0,0,0,Self-employed,0.56499,0.291656,0.432962,0.122388,0.090065,0.978828,0.751137,0.047624,0.085168,0.151777,0.233706,0.238423,0.067192,0.105885,0.112286,0.009231,0.029387,0.119078,0.088998,0.978292,0.758327,0.045223,0.08057,0.147161,0.22939,0.233854,0.065914,0.110874,0.110687,0.008358,0.028161,0.122809,0.089529,0.978822,0.754344,0.04742,0.084128,0.1512,0.233154,0.237846,0.068069,0.107063,0.113368,0.008979,0.029296,,,0.107129,,,0.0,0.0,0.0,0.0,0.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0
2,100013,Cash loans,M,Y,Y,0,202500.0,663264.0,69777.0,630000.0,,Working,Higher education,Married,House / apartment,0.019101,-20038,-4458,-2175.0,-3503,5.0,1,1,0,1,0,0,Drivers,2.0,2,2,MONDAY,14,0,0,0,0,0,0,Transport: type 3,0.50118,0.699787,0.610991,0.122388,0.090065,0.978828,0.751137,0.047624,0.085168,0.151777,0.233706,0.238423,0.067192,0.105885,0.112286,0.009231,0.029387,0.119078,0.088998,0.978292,0.758327,0.045223,0.08057,0.147161,0.22939,0.233854,0.065914,0.110874,0.110687,0.008358,0.028161,0.122809,0.089529,0.978822,0.754344,0.04742,0.084128,0.1512,0.233154,0.237846,0.068069,0.107063,0.113368,0.008979,0.029296,,,0.107129,,,0.0,0.0,0.0,0.0,-856.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,1.0,4.0
3,100028,Cash loans,F,N,Y,2,315000.0,1575000.0,49018.5,1575000.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.026392,-13976,-1866,-2000.0,-4208,11.786027,1,1,0,1,1,0,Sales staff,4.0,2,2,WEDNESDAY,11,0,0,0,0,0,0,Business Entity Type 3,0.525734,0.509677,0.612704,0.3052,0.1974,0.997,0.9592,0.1165,0.32,0.2759,0.375,0.0417,0.2042,0.2404,0.3673,0.0386,0.08,0.3109,0.2049,0.997,0.9608,0.1176,0.3222,0.2759,0.375,0.0417,0.2089,0.2626,0.3827,0.0389,0.0847,0.3081,0.1974,0.997,0.9597,0.1173,0.32,0.2759,0.375,0.0417,0.2078,0.2446,0.3739,0.0388,0.0817,reg oper account,block of flats,0.37,Panel,No,0.0,0.0,0.0,0.0,-1805.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0
4,100038,Cash loans,M,Y,N,1,180000.0,625500.0,32067.0,625500.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.010032,-13040,-2191,-4000.0,-4262,16.0,1,1,1,1,0,0,,3.0,2,2,FRIDAY,5,0,0,0,0,1,1,Business Entity Type 3,0.202145,0.425687,0.500106,0.122388,0.090065,0.978828,0.751137,0.047624,0.085168,0.151777,0.233706,0.238423,0.067192,0.105885,0.112286,0.009231,0.029387,0.119078,0.088998,0.978292,0.758327,0.045223,0.08057,0.147161,0.22939,0.233854,0.065914,0.110874,0.110687,0.008358,0.028161,0.122809,0.089529,0.978822,0.754344,0.04742,0.084128,0.1512,0.233154,0.237846,0.068069,0.107063,0.113368,0.008979,0.029296,,,0.107129,,,0.0,0.0,0.0,0.0,-821.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.002108,0.001803,0.002787,0.009299,0.546902,1.983769


In [77]:
cleaned_df.iloc[:,valid_columns]

Unnamed: 0,SK_ID_CURR,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,APARTMENTS_AVG,BASEMENTAREA_AVG,YEARS_BEGINEXPLUATATION_AVG,YEARS_BUILD_AVG,COMMONAREA_AVG,ELEVATORS_AVG,ENTRANCES_AVG,FLOORSMAX_AVG,FLOORSMIN_AVG,LANDAREA_AVG,LIVINGAPARTMENTS_AVG,LIVINGAREA_AVG,NONLIVINGAPARTMENTS_AVG,NONLIVINGAREA_AVG,APARTMENTS_MODE,BASEMENTAREA_MODE,YEARS_BEGINEXPLUATATION_MODE,YEARS_BUILD_MODE,COMMONAREA_MODE,ELEVATORS_MODE,ENTRANCES_MODE,FLOORSMAX_MODE,FLOORSMIN_MODE,LANDAREA_MODE,LIVINGAPARTMENTS_MODE,LIVINGAREA_MODE,NONLIVINGAPARTMENTS_MODE,NONLIVINGAREA_MODE,APARTMENTS_MEDI,BASEMENTAREA_MEDI,YEARS_BEGINEXPLUATATION_MEDI,YEARS_BUILD_MEDI,COMMONAREA_MEDI,ELEVATORS_MEDI,ENTRANCES_MEDI,FLOORSMAX_MEDI,FLOORSMIN_MEDI,LANDAREA_MEDI,LIVINGAPARTMENTS_MEDI,LIVINGAREA_MEDI,NONLIVINGAPARTMENTS_MEDI,NONLIVINGAREA_MEDI,TOTALAREA_MODE,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_2,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR,AMT_ANNUITY_IS_NULL,AMT_GOODS_PRICE_IS_NULL,NAME_TYPE_SUITE_IS_NULL,OWN_CAR_AGE_IS_NULL,OCCUPATION_TYPE_IS_NULL,CNT_FAM_MEMBERS_IS_NULL,EXT_SOURCE_1_IS_NULL,EXT_SOURCE_2_IS_NULL,EXT_SOURCE_3_IS_NULL,APARTMENTS_AVG_IS_NULL,BASEMENTAREA_AVG_IS_NULL,YEARS_BEGINEXPLUATATION_AVG_IS_NULL,YEARS_BUILD_AVG_IS_NULL,COMMONAREA_AVG_IS_NULL,ELEVATORS_AVG_IS_NULL,ENTRANCES_AVG_IS_NULL,FLOORSMAX_AVG_IS_NULL,FLOORSMIN_AVG_IS_NULL,LANDAREA_AVG_IS_NULL,LIVINGAPARTMENTS_AVG_IS_NULL,LIVINGAREA_AVG_IS_NULL,NONLIVINGAPARTMENTS_AVG_IS_NULL,NONLIVINGAREA_AVG_IS_NULL,APARTMENTS_MODE_IS_NULL,BASEMENTAREA_MODE_IS_NULL,YEARS_BEGINEXPLUATATION_MODE_IS_NULL,YEARS_BUILD_MODE_IS_NULL,COMMONAREA_MODE_IS_NULL,ELEVATORS_MODE_IS_NULL,ENTRANCES_MODE_IS_NULL,FLOORSMAX_MODE_IS_NULL,FLOORSMIN_MODE_IS_NULL,LANDAREA_MODE_IS_NULL,LIVINGAPARTMENTS_MODE_IS_NULL,LIVINGAREA_MODE_IS_NULL,NONLIVINGAPARTMENTS_MODE_IS_NULL,NONLIVINGAREA_MODE_IS_NULL,APARTMENTS_MEDI_IS_NULL,BASEMENTAREA_MEDI_IS_NULL,YEARS_BEGINEXPLUATATION_MEDI_IS_NULL,YEARS_BUILD_MEDI_IS_NULL,COMMONAREA_MEDI_IS_NULL,ELEVATORS_MEDI_IS_NULL,ENTRANCES_MEDI_IS_NULL,FLOORSMAX_MEDI_IS_NULL,FLOORSMIN_MEDI_IS_NULL,LANDAREA_MEDI_IS_NULL,LIVINGAPARTMENTS_MEDI_IS_NULL,LIVINGAREA_MEDI_IS_NULL,NONLIVINGAPARTMENTS_MEDI_IS_NULL,NONLIVINGAREA_MEDI_IS_NULL,FONDKAPREMONT_MODE_IS_NULL,HOUSETYPE_MODE_IS_NULL,TOTALAREA_MODE_IS_NULL,WALLSMATERIAL_MODE_IS_NULL,EMERGENCYSTATE_MODE_IS_NULL,OBS_30_CNT_SOCIAL_CIRCLE_IS_NULL,DEF_30_CNT_SOCIAL_CIRCLE_IS_NULL,OBS_60_CNT_SOCIAL_CIRCLE_IS_NULL,DEF_60_CNT_SOCIAL_CIRCLE_IS_NULL,DAYS_LAST_PHONE_CHANGE_IS_NULL,AMT_REQ_CREDIT_BUREAU_HOUR_IS_NULL,AMT_REQ_CREDIT_BUREAU_DAY_IS_NULL,AMT_REQ_CREDIT_BUREAU_WEEK_IS_NULL,AMT_REQ_CREDIT_BUREAU_MON_IS_NULL,AMT_REQ_CREDIT_BUREAU_QRT_IS_NULL,AMT_REQ_CREDIT_BUREAU_YEAR_IS_NULL,DUM__Cash loans,DUM__Revolving loans,DUM__F,DUM__M,DUM__N,DUM__Y,DUM__N.1,DUM__Y.1,DUM__Children,DUM__Family,DUM__Group of people,DUM__Other_A,DUM__Other_B,"DUM__Spouse, partner",DUM__Unaccompanied,DUM__Businessman,DUM__Commercial associate,DUM__Pensioner,DUM__State servant,DUM__Student,DUM__Unemployed,DUM__Working,DUM__Academic degree,DUM__Higher education,DUM__Incomplete higher,DUM__Lower secondary,DUM__Secondary / secondary special,DUM__Civil marriage,DUM__Married,DUM__Separated,DUM__Single / not married,DUM__Widow,DUM__Co-op apartment,DUM__House / apartment,DUM__Municipal apartment,DUM__Office apartment,DUM__Rented apartment,DUM__With parents,DUM__Accountants,DUM__Cleaning staff,DUM__Cooking staff,DUM__Core staff,DUM__Drivers,DUM__HR staff,DUM__High skill tech staff,DUM__IT staff,DUM__Laborers,DUM__Low-skill Laborers,DUM__Managers,DUM__Medicine staff,DUM__Private service staff,DUM__Realty agents,DUM__Sales staff,DUM__Secretaries,DUM__Security staff,DUM__Waiters/barmen staff,DUM__FRIDAY,DUM__MONDAY,DUM__SATURDAY,DUM__SUNDAY,DUM__THURSDAY,DUM__TUESDAY,DUM__WEDNESDAY,DUM__Advertising,DUM__Agriculture,DUM__Bank,DUM__Business Entity Type 1,DUM__Business Entity Type 2,DUM__Business Entity Type 3,DUM__Cleaning,DUM__Construction,DUM__Culture,DUM__Electricity,DUM__Emergency,DUM__Government,DUM__Hotel,DUM__Housing,DUM__Industry: type 1,DUM__Industry: type 10,DUM__Industry: type 11,DUM__Industry: type 12,DUM__Industry: type 13,DUM__Industry: type 2,DUM__Industry: type 3,DUM__Industry: type 4,DUM__Industry: type 5,DUM__Industry: type 6,DUM__Industry: type 7,DUM__Industry: type 8,DUM__Industry: type 9,DUM__Insurance,DUM__Kindergarten,DUM__Legal Services,DUM__Medicine,DUM__Military,DUM__Mobile,DUM__Other,DUM__Police,DUM__Postal,DUM__Realtor,DUM__Religion,DUM__Restaurant,DUM__School,DUM__Security,DUM__Security Ministries,DUM__Self-employed,DUM__Services,DUM__Telecom,DUM__Trade: type 1,DUM__Trade: type 2,DUM__Trade: type 3,DUM__Trade: type 4,DUM__Trade: type 5,DUM__Trade: type 6,DUM__Trade: type 7,DUM__Transport: type 1,DUM__Transport: type 2,DUM__Transport: type 3,DUM__Transport: type 4,DUM__University,DUM__XNA,DUM__not specified,DUM__org spec account,DUM__reg oper account,DUM__reg oper spec account,DUM__block of flats,DUM__specific housing,DUM__terraced house,DUM__Block,DUM__Mixed,DUM__Monolithic,DUM__Others,DUM__Panel,"DUM__Stone, brick",DUM__Wooden,DUM__No,DUM__Yes
0,100001,0,135000.0,568800.0,20560.5,450000.0,0.018850,-19241,-2329,-5170.0,-812,,1,1,0,1,0,1,2.0,2,2,18,0,0,0,0,0,0,0.752614,0.789654,0.159520,0.0660,0.0590,0.9732,,,,0.1379,0.1250,,,,0.0505,,,0.0672,0.0612,0.9732,,,,0.1379,0.1250,,,,0.0526,,,0.0666,0.0590,0.9732,,,,0.1379,0.1250,,,,0.0514,,,0.0392,0.0,0.0,0.0,0.0,-1740.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,False,False,False,True,True,False,False,False,False,False,False,False,True,True,True,False,False,True,True,True,False,True,True,False,False,False,True,True,True,False,False,True,True,True,False,True,True,False,False,False,True,True,True,False,False,True,True,True,False,True,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,1,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0
1,100005,0,99000.0,222768.0,17370.0,180000.0,0.035792,-18064,-4469,-9118.0,-1623,,1,1,0,1,0,0,2.0,2,2,9,0,0,0,0,0,0,0.564990,0.291656,0.432962,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0,False,False,False,True,False,False,False,False,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False,1,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,100013,0,202500.0,663264.0,69777.0,630000.0,0.019101,-20038,-4458,-2175.0,-3503,5.0,1,1,0,1,0,0,2.0,2,2,14,0,0,0,0,0,0,,0.699787,0.610991,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-856.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,1.0,4.0,False,False,True,False,False,False,True,False,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,100028,2,315000.0,1575000.0,49018.5,1575000.0,0.026392,-13976,-1866,-2000.0,-4208,,1,1,0,1,1,0,4.0,2,2,11,0,0,0,0,0,0,0.525734,0.509677,0.612704,0.3052,0.1974,0.9970,0.9592,0.1165,0.3200,0.2759,0.3750,0.0417,0.2042,0.2404,0.3673,0.0386,0.0800,0.3109,0.2049,0.9970,0.9608,0.1176,0.3222,0.2759,0.3750,0.0417,0.2089,0.2626,0.3827,0.0389,0.0847,0.3081,0.1974,0.9970,0.9597,0.1173,0.32,0.2759,0.3750,0.0417,0.2078,0.2446,0.3739,0.0388,0.0817,0.3700,0.0,0.0,0.0,0.0,-1805.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,1,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,1,0
4,100038,1,180000.0,625500.0,32067.0,625500.0,0.010032,-13040,-2191,-4000.0,-4262,16.0,1,1,1,1,0,0,3.0,2,2,5,0,0,0,0,1,1,0.202145,0.425687,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-821.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,False,False,False,False,True,False,False,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,False,False,False,False,False,True,True,True,True,True,True,1,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,100042,0,270000.0,959688.0,34600.5,810000.0,0.025164,-18604,-12009,-6116.0,-2027,10.0,1,1,0,1,1,0,2.0,2,2,15,0,0,0,0,0,0,,0.628904,0.392774,0.2412,0.0084,0.9821,0.7552,0.0452,0.1600,0.1379,0.3333,0.3750,0.1683,0.1942,0.2218,0.0116,0.0731,0.2458,0.0088,0.9821,0.7648,0.0457,0.1611,0.1379,0.3333,0.3750,0.1721,0.2121,0.2311,0.0117,0.0774,0.2436,0.0084,0.9821,0.7585,0.0455,0.16,0.1379,0.3333,0.3750,0.1712,0.1975,0.2258,0.0116,0.0746,0.2151,0.0,0.0,0.0,0.0,-1705.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,1.0,2.0,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,1,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0
6,100057,2,180000.0,499221.0,22117.5,373500.0,0.022800,-16685,-2580,-10125.0,-241,3.0,1,1,0,1,0,0,4.0,2,2,9,0,0,0,0,1,1,0.760851,0.571084,0.651260,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,0.0,1.0,0.0,-1182.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0,False,False,False,False,False,False,False,False,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False,1,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,100065,0,166500.0,180000.0,14220.0,180000.0,0.005144,-9516,-1387,-5063.0,-2055,,1,1,1,1,1,0,1.0,2,2,7,0,0,0,0,0,0,0.565290,0.613033,0.312365,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-1182.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,2.0,False,False,False,True,False,False,False,False,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False,1,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,100066,0,315000.0,364896.0,28957.5,315000.0,0.046220,-12744,-1013,-1686.0,-3171,,1,1,0,1,0,0,2.0,1,1,18,0,0,0,0,0,0,0.718507,0.808788,0.522697,0.1031,0.1115,0.9781,,,0.0000,0.2069,0.1667,,,,,,,0.1050,0.1157,0.9782,,,0.0000,0.2069,0.1667,,,,,,,0.1041,0.1115,0.9781,,,0.00,0.2069,0.1667,,,,,,,0.0702,0.0,0.0,0.0,0.0,-829.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,5.0,False,False,False,True,False,False,False,False,False,False,False,False,True,True,False,False,False,True,True,True,True,True,True,False,False,False,True,True,False,False,False,True,True,True,True,True,True,False,False,False,True,True,False,False,False,True,True,True,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,1,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0
9,100067,1,162000.0,45000.0,5337.0,45000.0,0.018634,-10395,-2625,-8124.0,-3041,5.0,1,1,1,1,1,0,3.0,2,2,14,0,0,0,0,0,0,0.210562,0.444848,0.194068,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.0,0.0,4.0,0.0,-1423.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,2.0,False,False,False,False,False,False,False,False,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False,1,0,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [38]:
cleaned_df.iloc[:,valid_columns]

Unnamed: 0,SK_ID_CURR,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,APARTMENTS_AVG,BASEMENTAREA_AVG,YEARS_BEGINEXPLUATATION_AVG,YEARS_BUILD_AVG,COMMONAREA_AVG,ELEVATORS_AVG,ENTRANCES_AVG,FLOORSMAX_AVG,FLOORSMIN_AVG,LANDAREA_AVG,LIVINGAPARTMENTS_AVG,LIVINGAREA_AVG,NONLIVINGAPARTMENTS_AVG,NONLIVINGAREA_AVG,APARTMENTS_MODE,BASEMENTAREA_MODE,YEARS_BEGINEXPLUATATION_MODE,YEARS_BUILD_MODE,COMMONAREA_MODE,ELEVATORS_MODE,ENTRANCES_MODE,FLOORSMAX_MODE,FLOORSMIN_MODE,LANDAREA_MODE,LIVINGAPARTMENTS_MODE,LIVINGAREA_MODE,NONLIVINGAPARTMENTS_MODE,NONLIVINGAREA_MODE,APARTMENTS_MEDI,BASEMENTAREA_MEDI,YEARS_BEGINEXPLUATATION_MEDI,YEARS_BUILD_MEDI,COMMONAREA_MEDI,ELEVATORS_MEDI,ENTRANCES_MEDI,FLOORSMAX_MEDI,FLOORSMIN_MEDI,LANDAREA_MEDI,LIVINGAPARTMENTS_MEDI,LIVINGAREA_MEDI,NONLIVINGAPARTMENTS_MEDI,NONLIVINGAREA_MEDI,TOTALAREA_MODE,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_2,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR,AMT_ANNUITY_IS_NULL,AMT_GOODS_PRICE_IS_NULL,NAME_TYPE_SUITE_IS_NULL,OWN_CAR_AGE_IS_NULL,OCCUPATION_TYPE_IS_NULL,CNT_FAM_MEMBERS_IS_NULL,EXT_SOURCE_1_IS_NULL,EXT_SOURCE_2_IS_NULL,EXT_SOURCE_3_IS_NULL,APARTMENTS_AVG_IS_NULL,BASEMENTAREA_AVG_IS_NULL,YEARS_BEGINEXPLUATATION_AVG_IS_NULL,YEARS_BUILD_AVG_IS_NULL,COMMONAREA_AVG_IS_NULL,ELEVATORS_AVG_IS_NULL,ENTRANCES_AVG_IS_NULL,FLOORSMAX_AVG_IS_NULL,FLOORSMIN_AVG_IS_NULL,LANDAREA_AVG_IS_NULL,LIVINGAPARTMENTS_AVG_IS_NULL,LIVINGAREA_AVG_IS_NULL,NONLIVINGAPARTMENTS_AVG_IS_NULL,NONLIVINGAREA_AVG_IS_NULL,APARTMENTS_MODE_IS_NULL,BASEMENTAREA_MODE_IS_NULL,YEARS_BEGINEXPLUATATION_MODE_IS_NULL,YEARS_BUILD_MODE_IS_NULL,COMMONAREA_MODE_IS_NULL,ELEVATORS_MODE_IS_NULL,ENTRANCES_MODE_IS_NULL,FLOORSMAX_MODE_IS_NULL,FLOORSMIN_MODE_IS_NULL,LANDAREA_MODE_IS_NULL,LIVINGAPARTMENTS_MODE_IS_NULL,LIVINGAREA_MODE_IS_NULL,NONLIVINGAPARTMENTS_MODE_IS_NULL,NONLIVINGAREA_MODE_IS_NULL,APARTMENTS_MEDI_IS_NULL,BASEMENTAREA_MEDI_IS_NULL,YEARS_BEGINEXPLUATATION_MEDI_IS_NULL,YEARS_BUILD_MEDI_IS_NULL,COMMONAREA_MEDI_IS_NULL,ELEVATORS_MEDI_IS_NULL,ENTRANCES_MEDI_IS_NULL,FLOORSMAX_MEDI_IS_NULL,FLOORSMIN_MEDI_IS_NULL,LANDAREA_MEDI_IS_NULL,LIVINGAPARTMENTS_MEDI_IS_NULL,LIVINGAREA_MEDI_IS_NULL,NONLIVINGAPARTMENTS_MEDI_IS_NULL,NONLIVINGAREA_MEDI_IS_NULL,FONDKAPREMONT_MODE_IS_NULL,HOUSETYPE_MODE_IS_NULL,TOTALAREA_MODE_IS_NULL,WALLSMATERIAL_MODE_IS_NULL,EMERGENCYSTATE_MODE_IS_NULL,OBS_30_CNT_SOCIAL_CIRCLE_IS_NULL,DEF_30_CNT_SOCIAL_CIRCLE_IS_NULL,OBS_60_CNT_SOCIAL_CIRCLE_IS_NULL,DEF_60_CNT_SOCIAL_CIRCLE_IS_NULL,DAYS_LAST_PHONE_CHANGE_IS_NULL,AMT_REQ_CREDIT_BUREAU_HOUR_IS_NULL,AMT_REQ_CREDIT_BUREAU_DAY_IS_NULL,AMT_REQ_CREDIT_BUREAU_WEEK_IS_NULL,AMT_REQ_CREDIT_BUREAU_MON_IS_NULL,AMT_REQ_CREDIT_BUREAU_QRT_IS_NULL,AMT_REQ_CREDIT_BUREAU_YEAR_IS_NULL
0,100001,0,135000.0,568800.0,20560.5,450000.0,0.018850,-19241,-2329,-5170.0,-812,,1,1,0,1,0,1,2.0,2,2,18,0,0,0,0,0,0,0.752614,0.789654,0.159520,0.0660,0.0590,0.9732,,,,0.1379,0.1250,,,,0.0505,,,0.0672,0.0612,0.9732,,,,0.1379,0.1250,,,,0.0526,,,0.0666,0.0590,0.9732,,,,0.1379,0.1250,,,,0.0514,,,0.0392,0.0,0.0,0.0,0.0,-1740.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,False,False,False,True,True,False,False,False,False,False,False,False,True,True,True,False,False,True,True,True,False,True,True,False,False,False,True,True,True,False,False,True,True,True,False,True,True,False,False,False,True,True,True,False,False,True,True,True,False,True,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,100005,0,99000.0,222768.0,17370.0,180000.0,0.035792,-18064,-4469,-9118.0,-1623,,1,1,0,1,0,0,2.0,2,2,9,0,0,0,0,0,0,0.564990,0.291656,0.432962,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0,False,False,False,True,False,False,False,False,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False
2,100013,0,202500.0,663264.0,69777.0,630000.0,0.019101,-20038,-4458,-2175.0,-3503,5.0,1,1,0,1,0,0,2.0,2,2,14,0,0,0,0,0,0,,0.699787,0.610991,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-856.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,1.0,4.0,False,False,True,False,False,False,True,False,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False
3,100028,2,315000.0,1575000.0,49018.5,1575000.0,0.026392,-13976,-1866,-2000.0,-4208,,1,1,0,1,1,0,4.0,2,2,11,0,0,0,0,0,0,0.525734,0.509677,0.612704,0.3052,0.1974,0.9970,0.9592,0.1165,0.3200,0.2759,0.3750,0.0417,0.2042,0.2404,0.3673,0.0386,0.0800,0.3109,0.2049,0.9970,0.9608,0.1176,0.3222,0.2759,0.3750,0.0417,0.2089,0.2626,0.3827,0.0389,0.0847,0.3081,0.1974,0.9970,0.9597,0.1173,0.32,0.2759,0.3750,0.0417,0.2078,0.2446,0.3739,0.0388,0.0817,0.3700,0.0,0.0,0.0,0.0,-1805.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,100038,1,180000.0,625500.0,32067.0,625500.0,0.010032,-13040,-2191,-4000.0,-4262,16.0,1,1,1,1,0,0,3.0,2,2,5,0,0,0,0,1,1,0.202145,0.425687,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-821.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,False,False,False,False,True,False,False,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,False,False,False,False,False,True,True,True,True,True,True
5,100042,0,270000.0,959688.0,34600.5,810000.0,0.025164,-18604,-12009,-6116.0,-2027,10.0,1,1,0,1,1,0,2.0,2,2,15,0,0,0,0,0,0,,0.628904,0.392774,0.2412,0.0084,0.9821,0.7552,0.0452,0.1600,0.1379,0.3333,0.3750,0.1683,0.1942,0.2218,0.0116,0.0731,0.2458,0.0088,0.9821,0.7648,0.0457,0.1611,0.1379,0.3333,0.3750,0.1721,0.2121,0.2311,0.0117,0.0774,0.2436,0.0084,0.9821,0.7585,0.0455,0.16,0.1379,0.3333,0.3750,0.1712,0.1975,0.2258,0.0116,0.0746,0.2151,0.0,0.0,0.0,0.0,-1705.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,1.0,2.0,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
6,100057,2,180000.0,499221.0,22117.5,373500.0,0.022800,-16685,-2580,-10125.0,-241,3.0,1,1,0,1,0,0,4.0,2,2,9,0,0,0,0,1,1,0.760851,0.571084,0.651260,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,0.0,1.0,0.0,-1182.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0,False,False,False,False,False,False,False,False,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False
7,100065,0,166500.0,180000.0,14220.0,180000.0,0.005144,-9516,-1387,-5063.0,-2055,,1,1,1,1,1,0,1.0,2,2,7,0,0,0,0,0,0,0.565290,0.613033,0.312365,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-1182.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,2.0,False,False,False,True,False,False,False,False,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False
8,100066,0,315000.0,364896.0,28957.5,315000.0,0.046220,-12744,-1013,-1686.0,-3171,,1,1,0,1,0,0,2.0,1,1,18,0,0,0,0,0,0,0.718507,0.808788,0.522697,0.1031,0.1115,0.9781,,,0.0000,0.2069,0.1667,,,,,,,0.1050,0.1157,0.9782,,,0.0000,0.2069,0.1667,,,,,,,0.1041,0.1115,0.9781,,,0.00,0.2069,0.1667,,,,,,,0.0702,0.0,0.0,0.0,0.0,-829.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,5.0,False,False,False,True,False,False,False,False,False,False,False,False,True,True,False,False,False,True,True,True,True,True,True,False,False,False,True,True,False,False,False,True,True,True,True,True,True,False,False,False,True,True,False,False,False,True,True,True,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
9,100067,1,162000.0,45000.0,5337.0,45000.0,0.018634,-10395,-2625,-8124.0,-3041,5.0,1,1,1,1,1,0,3.0,2,2,14,0,0,0,0,0,0,0.210562,0.444848,0.194068,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.0,0.0,4.0,0.0,-1423.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,2.0,False,False,False,False,False,False,False,False,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False


In [31]:
predictions = model.predict(cleaned_df.iloc[:,valid_columns])

ValueError: Input contains NaN, infinity or a value too large for dtype('float32').

In [29]:
X_train, X_test, y_train, y_test = train_test_split(relevant_x_df, target_y, test_size=0.33, random_state=42)

In [30]:
model = RandomForestClassifier()

model.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [14]:
plot_roc(model, X_test, y_test)