In [1]:
import pandas as pd
import numpy as np
import os
from hyperopt import fmin, tpe, hp, STATUS_OK,Trials
from hyperopt import space_eval
import hyperopt.pyll.stochastic
#import pickle
import time
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
pd.options.display.max_columns = 999
pd.options.display.max_rows = 999

In [6]:
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
    """
    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    
    return df

In [3]:
tmp_app_train = pd.read_csv('home_credit_train.csv')
tmp_app_train = reduce_mem_usage(tmp_app_train)
tmp_app_test = pd.read_csv('home_credit_test.csv')
tmp_app_test = reduce_mem_usage(tmp_app_test)



Memory usage of dataframe is 1389.97 MB
Memory usage after optimization is: 427.19 MB
Decreased by 69.3%
Memory usage of dataframe is 220.16 MB
Memory usage after optimization is: 67.08 MB
Decreased by 69.5%


In [7]:
app_train = pd.read_csv('app_train_new.csv')
app_test = pd.read_csv('app_test_new.csv')

app_train = reduce_mem_usage(app_train)
app_test = reduce_mem_usage(app_test)

Memory usage of dataframe is 18.75 MB
Memory usage after optimization is: 5.86 MB
Decreased by 68.7%
Memory usage of dataframe is 2.98 MB
Memory usage after optimization is: 0.93 MB
Decreased by 68.7%


In [None]:
tmp_app_train.shape

In [5]:
tmp_app_test.shape

(48744, 592)

###### Joining with application new fields

In [8]:
tmp_app_train = pd.merge(tmp_app_train,app_train,on=['SK_ID_CURR'],how='inner')
tmp_app_test = pd.merge(tmp_app_test,app_test,on=['SK_ID_CURR'],how='inner')

In [9]:
prev = pd.read_csv('previous_default_rates.csv')
prev = reduce_mem_usage(prev)

Memory usage of dataframe is 12.93 MB
Memory usage after optimization is: 3.88 MB
Decreased by 70.0%


In [10]:
tmp_app_train = pd.merge(tmp_app_train,prev,on=['SK_ID_CURR'],how='left')
tmp_app_test = pd.merge(tmp_app_test,prev,on=['SK_ID_CURR'],how='left')

In [11]:
tmp_app_train = tmp_app_train.fillna(-1)
tmp_app_test = tmp_app_test.fillna(-1)

In [12]:
tmp_app_train.shape

(307228, 604)

In [13]:
tmp_app_test.shape

(48744, 603)

##### Removing features with unique value =1

In [14]:
def remove_col(df):
    remove_col_list =[]
    for col in df.columns:
        if (len(df[col].unique()))==1:
            remove_col_list.append(col)
    return remove_col_list

In [15]:
# VAR_SK_DPD , VAR_SK_DPD_DEF

In [16]:
remove_col_list = remove_col(tmp_app_train)
remove_col_list

['FLAG_MOBIL']

In [17]:
tmp_app_train = tmp_app_train.drop(columns=remove_col_list)
tmp_app_test = tmp_app_test.drop(columns=remove_col_list)

##### Adding features

In [18]:
tmp_app_train['OBS_TOTAL'] = tmp_app_train['OBS_30_CNT_SOCIAL_CIRCLE'] + tmp_app_train['OBS_60_CNT_SOCIAL_CIRCLE']
tmp_app_train['DEF_TOTAL'] = tmp_app_train['DEF_30_CNT_SOCIAL_CIRCLE'] + tmp_app_train['DEF_60_CNT_SOCIAL_CIRCLE']

tmp_app_test['OBS_TOTAL'] = tmp_app_test['OBS_30_CNT_SOCIAL_CIRCLE'] + tmp_app_test['OBS_60_CNT_SOCIAL_CIRCLE']
tmp_app_test['DEF_TOTAL'] = tmp_app_test['DEF_30_CNT_SOCIAL_CIRCLE'] + tmp_app_test['DEF_60_CNT_SOCIAL_CIRCLE']


In [19]:
tmp_app_train['OBS_DIFF'] = tmp_app_train['OBS_30_CNT_SOCIAL_CIRCLE'] - tmp_app_train['OBS_60_CNT_SOCIAL_CIRCLE']
tmp_app_test['OBS_DIFF'] = tmp_app_test['OBS_30_CNT_SOCIAL_CIRCLE'] - tmp_app_test['OBS_60_CNT_SOCIAL_CIRCLE']

tmp_app_train['DEF_DIFF'] = tmp_app_train['DEF_30_CNT_SOCIAL_CIRCLE'] - tmp_app_train['DEF_60_CNT_SOCIAL_CIRCLE']
tmp_app_test['DEF_DIFF'] = tmp_app_test['DEF_30_CNT_SOCIAL_CIRCLE'] - tmp_app_test['DEF_60_CNT_SOCIAL_CIRCLE']



In [20]:
tmp_app_train['RATIO_SOCIAL'] = tmp_app_train['DEF_TOTAL'] / (1+tmp_app_train['OBS_TOTAL'])
tmp_app_train['RATIO_SOCIAL_DIFF'] = tmp_app_train['DEF_DIFF'] / (1+tmp_app_train['OBS_DIFF'])

tmp_app_test['RATIO_SOCIAL'] = tmp_app_test['DEF_TOTAL'] / (1+tmp_app_test['OBS_TOTAL'])
tmp_app_test['RATIO_SOCIAL_DIFF'] = tmp_app_test['DEF_DIFF'] / (1+tmp_app_test['OBS_DIFF'])

In [21]:
tmp_app_train.head()

Unnamed: 0,SK_ID_CURR,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,APARTMENTS_AVG,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_11,FLAG_DOCUMENT_18,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR,DOCUMENT_IND_MEAN,DOCUMENT_IND_STD,DOCUMENT_IND_KURT,DOCUMENT_IND_SUM,FLAG_IND_SUM,FLAG_IND_KURT,FLAG_IND_STD,FLAG_IND_MEAN,EXT_SOURCE_MISSING,ORG_CORRECT,ORG_DEFAULT,DEFAULT_RATE_ORG_TYPE,NAME_TYPE_CORRECT,NAME_TYPE_DEFAULT,DEFAULT_RATE_NAME_TYPE,OCCUPATION_TYPE_CORRECT,OCCUPATION_TYPE_DEFAULT,DEFAULT_RATE_OCC_TYPE,INCOME_TYPE_CORRECT,INCOME_TYPE_DEFAULT,DEFAULT_RATE_INCOME_TYPE,EDUCATION_TYPE_CORRECT,EDUCATION_TYPE_DEFAULT,DEFAULT_RATE_EDUCATION_TYPE,FAMILY_STATUS_CORRECT,FAMILY_STATUS_DEFAULT,DEFAULT_RATE_FAMILY_STATUS,HOUSING_TYPE_CORRECT,HOUSING_TYPE_DEFAULT,DEFAULT_RATE_HOUSING_TYPE,TOTALAREA_NAN,EXT_SOURCE_3_NAN,EMERGENCYSTATE_NAN,AMT_REQ_CREDIT_BUREAU_NAN,DEF_SOCIAL_CIRCLE_NAN,COMMONAREA_NAN,NONLIVINGAPARTMENTS_NAN,LIVINGAPARTMENTS_NAN,EXT_SOURCE_1_NAN,EXT_SOURCE_2_NAN,NONLIVINGAREA_NAN,YEARS_BEGINEXPLUATATION_NAN,AMT_ANNUITY_NAN,APARTMENT_FLAG,DAYS_EMPLOYED_ANOM,NAME_CONTRACT_TYPE_Cash loans_x,NAME_CONTRACT_TYPE_Revolving loans_x,TARGET,BUREAU_MONTHS_x,LATEST_STATUS_0,LATEST_STATUS_1,LATEST_STATUS_2,LATEST_STATUS_3,LATEST_STATUS_4,LATEST_STATUS_5,LATEST_STATUS_C,LATEST_STATUS_X,DAYS_CREDIT_UPDATE,TOTAL_SUM_AMT_CREDIT_SUM,TOTAL_SUM_AMT_CREDIT_SUM_DEBT,BUREAU_MAX_AMT_CREDIT_MAX_OVERDUE,BUREAU_MAX_AMT_CREDIT_SUM,BUREAU_MAX_AMT_CREDIT_SUM_DEBT,BUREAU_MAX_AMT_CREDIT_SUM_LIMIT,BUREAU_MAX_AMT_CREDIT_SUM_OVERDUE,BADDEBT_SUM_AMT_CREDIT_SUM,BADDEBT_SUM_AMT_CREDIT_SUM_DEBT,CLOSED_SUM_AMT_CREDIT_SUM,CLOSED_SUM_AMT_CREDIT_SUM_DEBT,ACTIVE_SUM_AMT_CREDIT_SUM,ACTIVE_SUM_AMT_CREDIT_SUM_DEBT,BUREAU_AMT_ANNUITY_SUM,DAYS_CREDIT,CREDIT_DAY_OVERDUE,DAYS_CREDIT_ENDDATE,DAYS_ENDDATE_FACT,BUREAU_SUM_CNT_CREDIT_PROLONG,BUREAU_CNT_CREDIT_TYPE_Another type of loan,BUREAU_CNT_CREDIT_TYPE_Car loan,BUREAU_CNT_CREDIT_TYPE_Cash loan (non-earmarked),BUREAU_CNT_CREDIT_TYPE_Consumer credit,BUREAU_CNT_CREDIT_TYPE_Credit card,BUREAU_CNT_CREDIT_TYPE_Interbank credit,BUREAU_CNT_CREDIT_TYPE_Loan for business development,BUREAU_CNT_CREDIT_TYPE_Loan for purchase of shares (margin lending),BUREAU_CNT_CREDIT_TYPE_Loan for the purchase of equipment,BUREAU_CNT_CREDIT_TYPE_Loan for working capital replenishment,BUREAU_CNT_CREDIT_TYPE_Microloan,BUREAU_CNT_CREDIT_TYPE_Mobile operator loan,BUREAU_CNT_CREDIT_TYPE_Mortgage,BUREAU_CNT_CREDIT_TYPE_Real estate loan,BUREAU_CNT_CREDIT_TYPE_Unknown type of loan,BUREAU_CNT_CREDIT_ACTIVE_Active,BUREAU_CNT_CREDIT_ACTIVE_Bad debt,BUREAU_CNT_CREDIT_ACTIVE_Closed,BUREAU_CNT_CREDIT_ACTIVE_Sold,BUREAU_TOTAL_COUNT,STATUS_0,STATUS_1,STATUS_2,STATUS_3,STATUS_4,STATUS_5,STATUS_C,STATUS_X,BUREAU_MONTHS_y,BUREAU_FLAG,LATEST_LATEST_MONTHS_BALANCE_x,LATEST_LATEST_AMT_BALANCE_x,LATEST_LATEST_AMT_CREDIT_LIMIT_ACTUAL_x,LATEST_LATEST_AMT_DRAWINGS_ATM_CURRENT_x,LATEST_LATEST_AMT_DRAWINGS_OTHER_CURRENT_x,LATEST_LATEST_AMT_DRAWINGS_POS_CURRENT_x,LATEST_LATEST_AMT_PAYMENT_CURRENT_x,LATEST_LATEST_CNT_DRAWINGS_ATM_CURRENT_x,LATEST_LATEST_CNT_DRAWINGS_CURRENT_x,LATEST_LATEST_CNT_DRAWINGS_OTHER_CURRENT_x,LATEST_LATEST_CNT_INSTALMENT_MATURE_CUM_x,LATEST_LATEST_SK_DPD_x,LATEST_LATEST_SK_DPD_DEF_x,NAME_CONTRACT_STATUS_Active,NAME_CONTRACT_STATUS_Completed,NAME_CONTRACT_STATUS_Demand,NAME_CONTRACT_STATUS_Sent proposal,NAME_CONTRACT_STATUS_Signed,SK_DPD,SK_DPD_DEF,DEFAULT_RATE_CC_STATUS_x,CC_SK_DPD_DEF_COUNT,CC_SK_DPD_COUNT,CNT_CC_PREV_APP,MEAN_MONTHS_BALANCE,MEAN_AMT_BALANCE,MEAN_AMT_CREDIT_LIMIT_ACTUAL,MEAN_AMT_DRAWINGS_ATM_CURRENT,MEAN_AMT_DRAWINGS_CURRENT,MEAN_AMT_DRAWINGS_OTHER_CURRENT,MEAN_AMT_DRAWINGS_POS_CURRENT,MEAN_AMT_INST_MIN_REGULARITY,MEAN_AMT_PAYMENT_CURRENT,MEAN_AMT_PAYMENT_TOTAL_CURRENT,MEAN_AMT_RECEIVABLE_PRINCIPAL,MEAN_AMT_RECIVABLE,MEAN_AMT_TOTAL_RECEIVABLE,MEAN_CNT_DRAWINGS_ATM_CURRENT,MEAN_CNT_DRAWINGS_CURRENT,MEAN_CNT_DRAWINGS_OTHER_CURRENT,MEAN_CNT_DRAWINGS_POS_CURRENT,MEAN_CNT_INSTALMENT_MATURE_CUM,MAX_AMT_BALANCE,MAX_AMT_CREDIT_LIMIT_ACTUAL,MAX_AMT_DRAWINGS_ATM_CURRENT,MAX_AMT_DRAWINGS_CURRENT,MAX_AMT_DRAWINGS_OTHER_CURRENT,MAX_AMT_DRAWINGS_POS_CURRENT,MAX_AMT_INST_MIN_REGULARITY,MAX_AMT_PAYMENT_CURRENT,MAX_AMT_PAYMENT_TOTAL_CURRENT,MAX_AMT_RECEIVABLE_PRINCIPAL,MAX_AMT_RECIVABLE,MAX_AMT_TOTAL_RECEIVABLE,MAX_CNT_DRAWINGS_ATM_CURRENT,MAX_CNT_DRAWINGS_CURRENT,MAX_CNT_DRAWINGS_OTHER_CURRENT,MAX_CNT_DRAWINGS_POS_CURRENT,MAX_CNT_INSTALMENT_MATURE_CUM,MIN_MONTHS_BALANCE,MIN_AMT_BALANCE,MIN_AMT_CREDIT_LIMIT_ACTUAL,MIN_AMT_DRAWINGS_ATM_CURRENT,MIN_AMT_DRAWINGS_CURRENT,MIN_AMT_DRAWINGS_OTHER_CURRENT,MIN_AMT_DRAWINGS_POS_CURRENT,MIN_AMT_INST_MIN_REGULARITY,MIN_AMT_PAYMENT_CURRENT,MIN_AMT_PAYMENT_TOTAL_CURRENT,MIN_AMT_RECEIVABLE_PRINCIPAL,MIN_AMT_RECIVABLE,MIN_AMT_TOTAL_RECEIVABLE,MIN_CNT_DRAWINGS_ATM_CURRENT,MIN_CNT_DRAWINGS_CURRENT,MIN_CNT_DRAWINGS_POS_CURRENT,MIN_CNT_INSTALMENT_MATURE_CUM,SUM_MONTHS_BALANCE,SUM_AMT_BALANCE,SUM_AMT_CREDIT_LIMIT_ACTUAL,SUM_AMT_DRAWINGS_ATM_CURRENT,SUM_AMT_DRAWINGS_CURRENT,SUM_AMT_DRAWINGS_OTHER_CURRENT,SUM_AMT_DRAWINGS_POS_CURRENT,SUM_AMT_INST_MIN_REGULARITY,SUM_AMT_PAYMENT_CURRENT,SUM_AMT_PAYMENT_TOTAL_CURRENT,SUM_AMT_RECEIVABLE_PRINCIPAL,SUM_AMT_RECIVABLE,SUM_AMT_TOTAL_RECEIVABLE,SUM_CNT_DRAWINGS_ATM_CURRENT,SUM_CNT_DRAWINGS_CURRENT,SUM_CNT_DRAWINGS_OTHER_CURRENT,SUM_CNT_DRAWINGS_POS_CURRENT,SUM_CNT_INSTALMENT_MATURE_CUM,VAR_MONTHS_BALANCE,VAR_AMT_BALANCE,VAR_AMT_CREDIT_LIMIT_ACTUAL,VAR_AMT_DRAWINGS_ATM_CURRENT,VAR_AMT_DRAWINGS_CURRENT,VAR_AMT_DRAWINGS_OTHER_CURRENT,VAR_AMT_DRAWINGS_POS_CURRENT,VAR_AMT_INST_MIN_REGULARITY,VAR_AMT_PAYMENT_CURRENT,VAR_AMT_PAYMENT_TOTAL_CURRENT,VAR_AMT_RECEIVABLE_PRINCIPAL,VAR_AMT_RECIVABLE,VAR_AMT_TOTAL_RECEIVABLE,VAR_CNT_DRAWINGS_ATM_CURRENT,VAR_CNT_DRAWINGS_CURRENT,VAR_CNT_DRAWINGS_OTHER_CURRENT,VAR_CNT_DRAWINGS_POS_CURRENT,VAR_CNT_INSTALMENT_MATURE_CUM,SUM_AMT_DRAWINGS_CURRENT / CNT_DRAWINGS_CURRENT,SUM_AMT_INST_MIN_REGULARITY / AMT_PAYMENT_TOTAL_CURRENT,SUM_AMT_DRAWINGS_CURRENT / AMT_CREDIT_LIMIT_ACTUAL,SUM_AMT_BALANCE / AMT_CREDIT_LIMIT_ACTUAL,SUM_AMT_DRAWINGS_ATM_CURRENT / AMT_DRAWINGS_CURRENT,SUM_CNT_DRAWINGS_ATM_CURRENT / CNT_DRAWINGS_CURRENT,MEAN_AMT_DRAWINGS_CURRENT / CNT_DRAWINGS_CURRENT,MEAN_AMT_INST_MIN_REGULARITY / AMT_PAYMENT_TOTAL_CURRENT,MEAN_AMT_DRAWINGS_CURRENT / AMT_CREDIT_LIMIT_ACTUAL,MEAN_AMT_BALANCE / AMT_CREDIT_LIMIT_ACTUAL,MEAN_AMT_DRAWINGS_ATM_CURRENT / AMT_DRAWINGS_CURRENT,MEAN_CNT_DRAWINGS_ATM_CURRENT / CNT_DRAWINGS_CURRENT,MAX_AMT_DRAWINGS_CURRENT / CNT_DRAWINGS_CURRENT,MAX_AMT_INST_MIN_REGULARITY / AMT_PAYMENT_TOTAL_CURRENT,MAX_AMT_DRAWINGS_CURRENT / AMT_CREDIT_LIMIT_ACTUAL,MAX_AMT_BALANCE / AMT_CREDIT_LIMIT_ACTUAL,MAX_AMT_DRAWINGS_ATM_CURRENT / AMT_DRAWINGS_CURRENT,MAX_CNT_DRAWINGS_ATM_CURRENT / CNT_DRAWINGS_CURRENT,CC_FLAG,MEAN_NUM_INSTALMENT_VERSION,MEAN_NUM_INSTALMENT_NUMBER,MEAN_DAYS_INSTALMENT,MEAN_DAYS_ENTRY_PAYMENT,MEAN_AMT_INSTALMENT,MEAN_AMT_PAYMENT,MEAN_DAYS_DIFF,MEAN_AMT_DIFF,MEAN_AMT_DAYS_DIFF,VAR_NUM_INSTALMENT_VERSION,VAR_NUM_INSTALMENT_NUMBER,VAR_DAYS_INSTALMENT,VAR_DAYS_ENTRY_PAYMENT,VAR_AMT_INSTALMENT,VAR_AMT_PAYMENT,VAR_DAYS_DIFF,VAR_AMT_DIFF,SUM_NUM_INSTALMENT_VERSION,SUM_NUM_INSTALMENT_NUMBER,SUM_DAYS_INSTALMENT,SUM_DAYS_ENTRY_PAYMENT,SUM_AMT_INSTALMENT,SUMAMT_PAYMENT,SUM_DAYS_DIFF,SUM_AMT_DIFF,SUM_AMT_DAYS_DIFF,MIN_NUM_INSTALMENT_VERSION,MIN_NUM_INSTALMENT_NUMBER,MIN_DAYS_INSTALMENT,MIN_DAYS_ENTRY_PAYMENT,MIN_AMT_INSTALMENT,MIN_AMT_PAYMENT,MIN_DAYS_DIFF,MIN_AMT_DIFF,MIN_AMT_DAYS_DIFF,INST_NUM_NULL_INSTS,INST_NUM_NULL_APPS,SUM_DPD,SUM_DBD,INSTALLMENT_FLAG,MEAN_DEFAULT_RATE_POS_STATUS,MAX_DEFAULT_RATE_POS_STATUS,MEAN_SK_DPD,MEAN_SK_DPD_DEF,MAX_CNT_INSTALMENT,MAX_SK_DPD,MAX_SK_DPD_DEF,SUM_SK_DPD,SUM_SK_DPD_DEF,MIN_CNT_INSTALMENT_FUTURE,MEAN_POS_STATUS_Active,MEAN_POS_STATUS_Amortized debt,MEAN_POS_STATUS_Approved,MEAN_POS_STATUS_Canceled,MEAN_POS_STATUS_Completed,MEAN_POS_STATUS_Demand,MEAN_POS_STATUS_Returned to the store,MEAN_POS_STATUS_Signed,SUM_POS_STATUS_Active,SUM_POS_STATUS_Amortized debt,SUM_POS_STATUS_Approved,SUM_POS_STATUS_Canceled,SUM_POS_STATUS_Completed,SUM_POS_STATUS_Demand,SUM_POS_STATUS_Returned to the store,SUM_POS_STATUS_Signed,SIZE_POS_MONTH_BAL,POS_CNT_DIFF_MEAN,POS_CNT_DIFF_MIN,POS_CNT_DIFF_MAX,POS_CNT_DIFF_VAR,POS_FLAG,NAME_CONTRACT_TYPE_Cash loans_y,NAME_CONTRACT_TYPE_Consumer loans,NAME_CONTRACT_TYPE_Revolving loans_y,NAME_CONTRACT_TYPE_XNA,NAME_CASH_LOAN_PURPOSE_Building a house or an annex,NAME_CASH_LOAN_PURPOSE_Business development,NAME_CASH_LOAN_PURPOSE_Buying a garage,NAME_CASH_LOAN_PURPOSE_Buying a holiday home / land,NAME_CASH_LOAN_PURPOSE_Buying a home,NAME_CASH_LOAN_PURPOSE_Buying a new car,NAME_CASH_LOAN_PURPOSE_Buying a used car,NAME_CASH_LOAN_PURPOSE_Car repairs,NAME_CASH_LOAN_PURPOSE_Education,NAME_CASH_LOAN_PURPOSE_Everyday expenses,NAME_CASH_LOAN_PURPOSE_Furniture,NAME_CASH_LOAN_PURPOSE_Gasification / water supply,NAME_CASH_LOAN_PURPOSE_Hobby,NAME_CASH_LOAN_PURPOSE_Journey,NAME_CASH_LOAN_PURPOSE_Medicine,NAME_CASH_LOAN_PURPOSE_Money for a third person,NAME_CASH_LOAN_PURPOSE_Other,NAME_CASH_LOAN_PURPOSE_Payments on other loans,NAME_CASH_LOAN_PURPOSE_Purchase of electronic equipment,NAME_CASH_LOAN_PURPOSE_Refusal to name the goal,NAME_CASH_LOAN_PURPOSE_Repairs,NAME_CASH_LOAN_PURPOSE_Urgent needs,NAME_CASH_LOAN_PURPOSE_Wedding / gift / holiday,NAME_CASH_LOAN_PURPOSE_XAP,NAME_CASH_LOAN_PURPOSE_XNA,NAME_CONTRACT_STATUS_Approved,NAME_CONTRACT_STATUS_Canceled,NAME_CONTRACT_STATUS_Refused,NAME_CONTRACT_STATUS_Unused offer,NAME_PAYMENT_TYPE_Cash through the bank,NAME_PAYMENT_TYPE_Cashless from the account of the employer,NAME_PAYMENT_TYPE_Non-cash from your account,NAME_PAYMENT_TYPE_XNA,NAME_TYPE_SUITE_Children,NAME_TYPE_SUITE_Family,NAME_TYPE_SUITE_Group of people,NAME_TYPE_SUITE_Other_A,NAME_TYPE_SUITE_Other_B,"NAME_TYPE_SUITE_Spouse, partner",NAME_TYPE_SUITE_Unaccompanied,NAME_CLIENT_TYPE_New,NAME_CLIENT_TYPE_Refreshed,NAME_CLIENT_TYPE_Repeater,NAME_CLIENT_TYPE_XNA,NAME_GOODS_CATEGORY_Additional Service,NAME_GOODS_CATEGORY_Animals,NAME_GOODS_CATEGORY_Audio/Video,NAME_GOODS_CATEGORY_Auto Accessories,NAME_GOODS_CATEGORY_Clothing and Accessories,NAME_GOODS_CATEGORY_Computers,NAME_GOODS_CATEGORY_Construction Materials,NAME_GOODS_CATEGORY_Consumer Electronics,NAME_GOODS_CATEGORY_Direct Sales,NAME_GOODS_CATEGORY_Education,NAME_GOODS_CATEGORY_Fitness,NAME_GOODS_CATEGORY_Furniture,NAME_GOODS_CATEGORY_Gardening,NAME_GOODS_CATEGORY_Homewares,NAME_GOODS_CATEGORY_House Construction,NAME_GOODS_CATEGORY_Insurance,NAME_GOODS_CATEGORY_Jewelry,NAME_GOODS_CATEGORY_Medical Supplies,NAME_GOODS_CATEGORY_Medicine,NAME_GOODS_CATEGORY_Mobile,NAME_GOODS_CATEGORY_Office Appliances,NAME_GOODS_CATEGORY_Other,NAME_GOODS_CATEGORY_Photo / Cinema Equipment,NAME_GOODS_CATEGORY_Sport and Leisure,NAME_GOODS_CATEGORY_Tourism,NAME_GOODS_CATEGORY_Vehicles,NAME_GOODS_CATEGORY_Weapon,NAME_GOODS_CATEGORY_XNA,NAME_PORTFOLIO_Cards,NAME_PORTFOLIO_Cars,NAME_PORTFOLIO_Cash,NAME_PORTFOLIO_POS,NAME_PORTFOLIO_XNA,CHANNEL_TYPE_AP+ (Cash loan),CHANNEL_TYPE_Car dealer,CHANNEL_TYPE_Channel of corporate sales,CHANNEL_TYPE_Contact center,CHANNEL_TYPE_Country-wide,CHANNEL_TYPE_Credit and cash offices,CHANNEL_TYPE_Regional / Local,CHANNEL_TYPE_Stone,NAME_PRODUCT_TYPE_XNA,NAME_PRODUCT_TYPE_walk-in,NAME_PRODUCT_TYPE_x-sell,NAME_SELLER_INDUSTRY_Auto technology,NAME_SELLER_INDUSTRY_Clothing,NAME_SELLER_INDUSTRY_Connectivity,NAME_SELLER_INDUSTRY_Construction,NAME_SELLER_INDUSTRY_Consumer electronics,NAME_SELLER_INDUSTRY_Furniture,NAME_SELLER_INDUSTRY_Industry,NAME_SELLER_INDUSTRY_Jewelry,NAME_SELLER_INDUSTRY_MLM partners,NAME_SELLER_INDUSTRY_Tourism,NAME_SELLER_INDUSTRY_XNA,NAME_YIELD_GROUP_XNA,NAME_YIELD_GROUP_high,NAME_YIELD_GROUP_low_action,NAME_YIELD_GROUP_low_normal,NAME_YIELD_GROUP_middle,PRODUCT_COMBINATION_Card Street,PRODUCT_COMBINATION_Card X-Sell,PRODUCT_COMBINATION_Cash,PRODUCT_COMBINATION_Cash Street: high,PRODUCT_COMBINATION_Cash Street: low,PRODUCT_COMBINATION_Cash Street: middle,PRODUCT_COMBINATION_Cash X-Sell: high,PRODUCT_COMBINATION_Cash X-Sell: low,PRODUCT_COMBINATION_Cash X-Sell: middle,PRODUCT_COMBINATION_POS household with interest,PRODUCT_COMBINATION_POS household without interest,PRODUCT_COMBINATION_POS industry with interest,PRODUCT_COMBINATION_POS industry without interest,PRODUCT_COMBINATION_POS mobile with interest,PRODUCT_COMBINATION_POS mobile without interest,PRODUCT_COMBINATION_POS other with interest,PRODUCT_COMBINATION_POS others without interest,SUM_AMT_ANNUITY,SUM_AMT_APPLICATION,SUM_AMT_CREDIT,SUM_AMT_DOWN_PAYMENT,SUM_SELLERPLACE_AREA,SUM_AMT_GOODS_PRICE,SUM_RATE_DOWN_PAYMENT,SUM_CNT_PAYMENT,SUM_NFLAG_INSURED_ON_APPROVAL,SUM_DEFAULT_RATE_PREV_STATUS,SUM_DEFAULT_RATE_PRODUCT_COMBINATION,SUM_DEFAULT_RATE_CLIENT_TYPE,SUM_DEFAULT_RATE_GOODS_CAT,SUM_DEFAULT_RATE_CHANNEL_TYPE,SUM_AMT_APPLICATION - AMT_CREDIT,SUM_AMT_ANNUITY*CNT_PAYMENT - AMT_CREDIT,MIN_AMT_ANNUITY,MIN_AMT_APPLICATION,MIN_AMT_CREDIT,MIN_AMT_DOWN_PAYMENT,MIN_SELLERPLACE_AREA,MIN_AMT_GOODS_PRICE,MIN_RATE_DOWN_PAYMENT,MIN_CNT_PAYMENT,MIN_NFLAG_INSURED_ON_APPROVAL,MIN_DEFAULT_RATE_PREV_STATUS,MIN_DEFAULT_RATE_PRODUCT_COMBINATION,MIN_DEFAULT_RATE_CLIENT_TYPE,MIN_DEFAULT_RATE_GOODS_CAT,MIN_DEFAULT_RATE_CHANNEL_TYPE,MIN_AMT_APPLICATION - AMT_CREDIT,MIN_AMT_ANNUITY*CNT_PAYMENT - AMT_CREDIT,MEAN_AMT_ANNUITY,MEAN_AMT_APPLICATION,MEAN_AMT_CREDIT,MEAN_AMT_DOWN_PAYMENT,MEAN_SELLERPLACE_AREA,MEAN_AMT_GOODS_PRICE,MEAN_RATE_DOWN_PAYMENT,MEAN_CNT_PAYMENT,MEAN_NFLAG_INSURED_ON_APPROVAL,MEAN_DEFAULT_RATE_PREV_STATUS,MEAN_DEFAULT_RATE_PRODUCT_COMBINATION,MEAN_DEFAULT_RATE_CLIENT_TYPE,MEAN_DEFAULT_RATE_GOODS_CAT,MEAN_DEFAULT_RATE_CHANNEL_TYPE,MEAN_AMT_APPLICATION - AMT_CREDIT,MEAN_AMT_ANNUITY*CNT_PAYMENT - AMT_CREDIT,MAX_AMT_ANNUITY,MAX_AMT_APPLICATION,MAX_AMT_CREDIT,MAX_AMT_DOWN_PAYMENT,MAX_SELLERPLACE_AREA,MAX_AMT_GOODS_PRICE,MAX_RATE_DOWN_PAYMENT,MAX_CNT_PAYMENT,MAX_NFLAG_INSURED_ON_APPROVAL,MAX_DEFAULT_RATE_PREV_STATUS,MAX_DEFAULT_RATE_PRODUCT_COMBINATION,MAX_DEFAULT_RATE_CLIENT_TYPE,MAX_DEFAULT_RATE_GOODS_CAT,MAX_DEFAULT_RATE_CHANNEL_TYPE,MAX_AMT_APPLICATION - AMT_CREDIT,MAX_AMT_ANNUITY*CNT_PAYMENT - AMT_CREDIT,CNT_PREV_APP,DAYS_FIRST_DUE,DAYS_FIRST_DRAWING,DAYS_LAST_DUE_1ST_VERSION,DAYS_LAST_DUE,DAYS_TERMINATION,DAYS_TERMINATION_ANOM,DAYS_LAST_DUE_ANOM,DAYS_LAST_DUE_1ST_VERSION_ANOM,DAYS_FIRST_DUE_ANOM,DAYS_FIRST_DRAWING_ANOM,RATE_DOWN_PAY_NAN,NFLAG_INSURED_ON_APPROVAL_NAN,AMT_GOODS_PRICE_NAN,CNT_PAYMENT_NAN,PREV_APP_FLAG,EXT_SOURCE_1 EXT_SOURCE_3,EXT_SOURCE_2 EXT_SOURCE_3,EXT_SOURCE_1 EXT_SOURCE_2,EXT_SOURCE prod,EXT_SOURCE_1 / DAYS_BIRTH_x,EXT_SOURCE_2 / DAYS_BIRTH_x,EXT_SOURCE_3 / DAYS_BIRTH_x,NEW_CREDIT_TO_ANNUITY_RATIO,NEW_INC_PER_CHLD,NEW_SCORES_STD,NEW_ANNUITY_TO_INCOME_RATIO,NEW_CREDIT_TO_INCOME_RATIO,EXT_SOURCE sum,EXT_SOURCE_mean,EXT_SOURCE - EXT_SOURCE_3,EXT_SOURCE / mean,EXT_SOURCE_1 / EXT_SOURCE_2,EXT_SOURCE_2 / EXT_SOURCE_3,EXT_SOURCE_1 / EXT_SOURCE_3,NEW_CREDIT_TO_GOODS_RATIO,NEW_EMPLOY_TO_BIRTH_RATIO,NEW_PHONE_TO_BIRTH_RATIO,app_INCOME_CHILDREN,app_INCOME_FAM,popular_AMT_GOODS_PRICE,most_popular_AMT_GOODS_PRICE,AMT_CREDIT - AMT_GOODS_PRICE,AMT_INCOME_TOTAL - AMT_GOODS_PRICE,EXT_SOURCE_3 / DAYS_BIRTH_y,EXT_SOURCE_1 / DAYS_BIRTH_y,EXT_SOURCE_2 / DAYS_BIRTH_y,MAX_DEFAULT_RATE_REJECT_REASON,MAX_DEFAULT_RATE_LOAN_PURPOSE,MEAN_DEFAULT_RATE_REJECT_REASON,MEAN_DEFAULT_RATE_LOAN_PURPOSE,OBS_TOTAL,DEF_TOTAL,OBS_DIFF,DEF_DIFF,RATIO_SOCIAL,RATIO_SOCIAL_DIFF
0,100002,1,0,1,0,202500.0,406597.5,24700.5,351000.0,0.018799,-9461,-637,-3648.0,-2120,1,0,1,1,0,1.0,2,2,10,0,0,0,0,0,0,0.083008,0.262939,0.139404,0.024704,2.0,2.0,2.0,2.0,-1134.0,1,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0,0.049988,0.223633,20.0,1,4,-1.875,0.516602,0.666504,0,61615,6318,0.102539,228185,20337,0.089111,49302,5832,0.118286,143375.0,15208.0,0.106079,198682,19507,0.098206,40934.0,4452.0,0.108765,251375,21255,0.084534,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,16.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,-7.0,865055.6,245781.0,5043.64502,450000.0,245781.0,31988.564453,0.0,0.0,0.0,383067.0,0.0,481988.5625,245781.0,0.0,-103.0,0.0,780.0,-36.0,0.0,0.0,0.0,0.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,6.0,0.0,8.0,45.0,27.0,0.0,0.0,0.0,0.0,23.0,15.0,110.0,1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1,2.0,19.0,565.0,587.0,53093.746094,53093.746094,-12.0,0.0,-0.0,0.052643,31.671875,28500.0,29604.257812,101164100.0,101164100.0,24.257311,0.0,20.0,190.0,5605.0,5993.0,219625.7,219625.7,-388.0,0.0,0.0,1.0,1.0,25.0,49.0,9251.775391,9251.775391,-31.0,0.0,-0.0,0.0,0.0,388.0,0.0,1,0.073425,0.073425,0.0,0.0,24.0,0.0,0.0,0.0,0.0,6.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,19.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,19.0,9.0,9.0,9.0,0.0,1,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,9251.775391,179055.0,179055.0,0.0,500.0,179055.0,0.0,24.0,0.0,0.082092,0.088562,0.098816,0.111816,0.07959,0.0,42987.6,9251.775391,179055.0,179055.0,0.0,500.0,179055.0,0.0,24.0,0.0,0.082092,0.088562,0.098816,0.111816,0.07959,0.0,42987.601562,9251.775391,179055.0,179055.0,0.0,500.0,179055.0,0.0,24.0,0.0,0.082092,0.088562,0.098816,0.111816,0.07959,0.0,42987.601562,9251.775391,179055.0,179055.0,0.0,500.0,179055.0,0.0,24.0,0.0,0.082092,0.088562,0.098816,0.111816,0.07959,0.0,42987.601562,1.0,-565.0,-1.0,125.0,-25.0,-17.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1,0.011574,0.036652,0.02182,0.003042,-9e-06,-2.8e-05,-1.5e-05,16.46875,202500.0,0.092041,0.121948,2.007812,0.485352,0.161743,0.206543,0.018814,0.315674,1.885742,0.595215,1.158203,0.067322,0.119873,202500.0,101250.0,0,0,55597.5,-148500.0,-1.5e-05,-9e-06,-2.8e-05,0.086365,0.090271,0.086365,0.090271,4.0,4.0,0.0,0.0,0.799805,0.0
1,100003,0,0,0,0,270000.0,1293502.5,35698.5,1129500.0,0.003542,-16765,-1188,-1186.0,-291,1,0,1,1,0,2.0,1,1,11,0,0,0,0,0,0,0.311279,0.62207,0.535156,0.095886,1.0,0.0,1.0,0.0,-828.0,1,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.049988,0.223633,20.0,1,4,-1.875,0.516602,0.666504,1,8357,526,0.062927,37139,3009,0.080994,25795,1735,0.067261,20433.0,1247.0,0.061035,70791,4006,0.05658,181425.0,14832.0,0.081787,251375,21255,0.084534,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-43.0,1017400.0,0.0,0.0,810000.0,0.0,810000.0,0.0,0.0,0.0,207400.5,0.0,810000.0,0.0,0.0,-606.0,0.0,1216.0,-540.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,3.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1,2.0,12.0,2310.0,2324.0,560835.375,560835.375,-1.0,0.0,-0.0,0.040009,9.828125,573735.875,573541.8125,12219660000.0,12219660000.0,13.89,0.0,26.0,127.0,34454.0,34633.0,1618865.0,1618865.0,-179.0,0.0,0.0,1.0,1.0,536.0,544.0,6662.970215,6662.970215,-14.0,0.0,-0.0,0.0,0.0,179.0,0.0,1,0.073486,0.073975,0.0,0.0,12.0,0.0,0.0,0.0,0.0,0.0,0.928711,0.0,0.0,0.0,0.071411,0.0,0.0,0.0,26.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,28.0,4.125,3.375,5.5,1.421875,1,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,3.0,0.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,169661.96875,1306309.5,1452573.0,6885.0,1599.0,1306309.5,0.100037,30.0,2.0,0.246338,0.221558,0.253906,0.241699,0.268555,-146263.5,195964.7,6737.310059,68809.5,68053.5,0.0,-1.0,68809.5,0.0,6.0,0.0,0.082092,0.067261,0.079163,0.062134,0.07959,-135882.0,12794.219727,56553.988281,435436.5,484191.0,3442.5,533.0,435436.5,0.050018,10.0,0.666504,0.082092,0.073853,0.084595,0.080566,0.089539,-48754.5,65321.550781,98356.992188,900000.0,1035882.0,6885.0,1400.0,900000.0,0.100037,12.0,1.0,0.082092,0.083801,0.095459,0.103577,0.09906,756.0,144401.9375,3.0,-716.0,-1.0,-386.0,-536.0,-527.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1,0.166626,0.333008,0.193604,0.103638,-1.9e-05,-3.7e-05,-3.2e-05,36.21875,270000.0,0.1604,0.132202,4.789062,1.46875,0.489502,0.398438,0.21167,0.500488,1.162109,0.581543,1.145508,0.070862,0.049377,270000.0,90000.0,0,0,164002.5,-859500.0,-3.2e-05,-1.9e-05,-3.7e-05,0.086365,0.09552,0.086365,0.092041,2.0,0.0,0.0,0.0,0.0,0.0
2,100004,1,1,1,0,67500.0,135000.0,6750.0,135000.0,0.010033,-19046,-225,-4260.0,-2531,1,1,1,1,0,1.0,2,2,9,0,0,0,0,0,0,0.505859,0.556152,0.729492,0.060181,0.0,0.0,0.0,0.0,-815.0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,5,6.0,0.408203,0.833496,1,9665,726,0.075134,228185,20337,0.089111,49302,5832,0.118286,143375.0,15208.0,0.106079,198682,19507,0.098206,40934.0,4452.0,0.108765,251375,21255,0.084534,1,0,1,0,0,1,1,1,1,0,1,1,0,0,0,0,1,0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-382.0,189037.8,0.0,0.0,94537.796875,0.0,0.0,0.0,0.0,0.0,189037.796875,0.0,0.0,0.0,0.0,-408.0,0.0,-382.0,-382.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1,2.0,3.0,784.0,795.0,10573.964844,10573.964844,-3.0,0.0,-0.0,0.333252,1.0,900.0,1157.333374,9071372.0,9071372.0,17.333334,0.0,4.0,6.0,2262.0,2285.0,21288.46,21288.46,-23.0,0.0,0.0,1.0,1.0,724.0,727.0,5357.25,5357.25,-11.0,0.0,-0.0,0.0,0.0,23.0,0.0,1,0.073547,0.073975,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.75,0.0,0.0,0.0,0.25,0.0,0.0,0.0,3.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,4.0,1.5,1.5,1.5,0.0,1,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,5357.25,24282.0,20106.0,4860.0,30.0,24282.0,0.212036,4.0,0.0,0.082092,0.085571,0.098816,0.094666,0.082336,4176.0,1323.0,5357.25,24282.0,20106.0,4860.0,30.0,24282.0,0.212036,4.0,0.0,0.082092,0.085571,0.098816,0.094666,0.082336,4176.0,1323.0,5357.25,24282.0,20106.0,4860.0,30.0,24282.0,0.212036,4.0,0.0,0.082092,0.085571,0.098816,0.094666,0.082336,4176.0,1323.0,5357.25,24282.0,20106.0,4860.0,30.0,24282.0,0.212036,4.0,0.0,0.082092,0.085571,0.098816,0.094666,0.082336,4176.0,1323.0,1.0,-784.0,-1.0,-694.0,-724.0,-714.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1,0.369141,0.405762,0.28125,0.2052,-2.7e-05,-2.9e-05,-3.8e-05,20.0,67500.0,0.117371,0.099976,2.0,1.791992,0.597168,0.333008,0.343506,0.909668,0.762207,0.693359,1.0,0.01181,0.042786,67500.0,33750.0,0,0,0.0,-67500.0,-3.8e-05,-2.7e-05,-2.9e-05,0.086365,0.090271,0.086365,0.090271,0.0,0.0,0.0,0.0,0.0,0.0
3,100006,0,0,1,0,135000.0,312682.5,29686.5,297000.0,0.008018,-19005,-3039,-9832.0,-2437,1,0,1,0,0,2.0,2,2,17,0,0,0,0,0,0,0.505859,0.650391,0.535156,0.048096,2.0,0.0,2.0,0.0,-617.0,1,0,0,0,0,0,0,0,0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.049988,0.223633,20.0,1,3,-3.333984,0.547852,0.5,2,61615,6318,0.102539,228185,20337,0.089111,49302,5832,0.118286,143375.0,15208.0,0.106079,198682,19507,0.098206,26791.0,2960.0,0.110474,251375,21255,0.084534,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,1,0,0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1,-1.0,0.0,270000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076965,0.0,0.0,1.0,-3.5,0.0,270000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,270000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-6.0,0.0,270000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-21.0,0.0,1620000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,2.0,10.0,545.0,575.0,691786.875,691786.875,-1.0,0.0,-0.0,0.116638,8.398438,31817.0,39222.117188,28256810000.0,28256810000.0,645.049988,0.0,18.0,71.0,4036.0,4346.0,1007153.0,1007153.0,-310.0,0.0,0.0,1.0,1.0,11.0,12.0,2482.919922,2482.919922,-77.0,0.0,-0.0,0.0,0.0,310.0,0.0,1,0.07489,0.102905,0.0,0.0,48.0,0.0,0.0,0.0,0.0,0.0,0.856934,0.0,0.0,0.0,0.095215,0.0,0.047607,0.0,18.0,0.0,0.0,0.0,2.0,0.0,1.0,0.0,21.0,2.554688,0.666504,4.5,3.675781,1,5.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,5.0,5.0,3.0,1.0,0.0,4.0,0.0,0.0,5.0,0.0,1.0,0.0,0.0,0.0,0.0,3.0,1.0,0.0,8.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,1.0,0.0,3.0,2.0,3.0,0.0,0.0,0.0,0.0,1.0,7.0,0.0,1.0,5.0,0.0,4.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,7.0,4.0,2.0,0.0,2.0,1.0,1.0,1.0,2.0,0.0,0.0,0.0,1.0,2.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,141907.046875,2449829.25,2625259.5,69680.34375,8048.0,2449829.25,0.326904,138.0,0.0,0.850098,0.862793,0.862793,0.882812,0.862793,-175430.15625,1205268.0,2482.919922,0.0,0.0,2693.340088,-1.0,26912.339844,0.109009,0.0,0.0,0.082092,0.067261,0.095459,0.07489,0.07959,-218115.0,-270000.0,23651.175781,272203.25,291695.5,34840.171875,894.222229,408304.875,0.163452,23.0,0.0,0.094421,0.095825,0.095825,0.098083,0.095886,-19492.240234,200877.953125,39954.511719,688500.0,906615.0,66987.0,8025.0,688500.0,0.217773,48.0,0.0,0.136353,0.129883,0.098816,0.103577,0.09906,66987.0,662797.8125,9.0,-151.0,-1.0,1259.0,-151.0,-143.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1,0.270752,0.348145,0.329102,0.176147,-2.7e-05,-3.4e-05,-2.8e-05,10.53125,135000.0,0.076416,0.219849,2.316406,1.691406,0.563965,0.621094,0.312256,0.777832,1.214844,0.945312,1.052734,0.159912,0.032471,135000.0,45000.0,0,0,15682.5,-162000.0,-2.8e-05,-2.7e-05,-3.4e-05,0.143677,0.09552,0.092712,0.093201,4.0,0.0,0.0,0.0,0.0,0.0
4,100007,1,0,1,0,121500.0,513000.0,21865.5,513000.0,0.028656,-19932,-3038,-4312.0,-3458,1,0,1,0,0,1.0,2,2,11,0,0,0,0,1,1,0.505859,0.322754,0.535156,0.171875,0.0,0.0,0.0,0.0,-1106.0,0,0,0,0,0,1,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.049988,0.223633,20.0,1,3,-3.333984,0.547852,0.5,2,80,5,0.0625,228185,20337,0.089111,25795,1735,0.067261,143375.0,15208.0,0.106079,198682,19507,0.098206,40934.0,4452.0,0.108765,251375,21255,0.084534,1,1,1,0,0,1,1,1,1,0,1,1,0,0,0,1,0,0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-783.0,146250.0,0.0,0.0,146250.0,0.0,0.0,0.0,0.0,0.0,146250.0,0.0,0.0,0.0,0.0,-1149.0,0.0,-783.0,-783.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1,2.0,17.0,2326.0,2318.0,22678.785156,22678.785156,12.0,22655.654297,7.71,0.140991,16.75,465297.96875,461933.96875,61668200.0,64771280.0,63.865734,8084830.0,77.0,465.0,67888.0,68128.0,835985.3,806128.0,-240.0,29857.365234,-1016.352844,1.0,1.0,14.0,14.0,1821.780029,0.18,-31.0,0.0,-781.229492,0.0,0.0,240.0,0.0,1,0.073547,0.078247,0.0,0.0,24.0,0.0,0.0,0.0,0.0,0.0,0.939453,0.0,0.0,0.0,0.045441,0.0,0.0,0.015152,62.0,0.0,0.0,0.0,3.0,0.0,0.0,1.0,66.0,6.101562,4.5,8.5,2.175781,1,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,4.0,6.0,0.0,0.0,0.0,5.0,0.0,0.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,5.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,4.0,2.0,0.0,1.0,0.0,0.0,0.0,3.0,1.0,1.0,0.0,2.0,1.0,3.0,0.0,0.0,1.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,3.0,0.0,0.0,3.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,3.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,73672.828125,903181.5,999832.5,6781.5,2455.0,903181.5,0.319092,124.0,3.0,0.492676,0.563965,0.576172,0.580078,0.598633,-96651.0,640477.6,1834.290039,17176.5,14616.0,3105.0,-1.0,17176.5,0.100159,10.0,0.0,0.082092,0.083801,0.095459,0.082947,0.082336,-59400.0,3726.899902,12278.804688,150530.25,166638.75,3390.75,409.166656,150530.25,0.159546,20.671875,0.600098,0.082092,0.093933,0.096008,0.096741,0.099731,-16108.5,106746.273438,22678.785156,247500.0,284400.0,3676.5,1200.0,247500.0,0.218872,48.0,1.0,0.082092,0.128174,0.098816,0.103577,0.147095,2560.5,340127.28125,6.0,-344.0,-1.0,346.0,-354.0,-347.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1,0.270752,0.172729,0.163208,0.087341,-2.5e-05,-1.6e-05,-2.7e-05,23.46875,121500.0,0.115112,0.179932,4.222656,1.363281,0.454346,0.293457,0.192261,1.567383,0.603027,0.945312,1.0,0.152466,0.055481,121500.0,60750.0,0,0,0.0,-391500.0,-2.7e-05,-2.5e-05,-1.6e-05,0.086365,0.09552,0.086365,0.09375,0.0,0.0,0.0,0.0,0.0,0.0


In [59]:
tmp_app_test.shape

(48744, 608)

In [60]:
tmp_app_train.shape

(307228, 609)

In [24]:
remove_col_list = remove_col(tmp_app_test)
remove_col_list

[]

In [61]:
train =  tmp_app_train.drop(columns=no_imp_feat)
test = tmp_app_test.drop(columns=no_imp_feat)

In [62]:
#tmp_app_train = tmp_app_train.sample(n=100000)
X = train.drop(columns = ['SK_ID_CURR','TARGET']).values
y = train['TARGET'].values

In [94]:
# train = tmp_app_train.drop(columns=list(f_imp[f_imp==0].index))
test = tmp_app_test.drop(columns=list(f_imp[f_imp==0].index))

In [67]:
train.to_csv('train.csv',index=False)
test.to_csv('test.csv',index=False)

In [97]:
test = pd.read_csv('test.csv')

In [98]:
test.shape

(48744, 503)

##### We need to divide training data to train, validation and test in the ratio 0.7,0.2,0.1

In [82]:
from sklearn.model_selection import train_test_split
def split_data(X,y,size):
    # test_size ideal 0.2 ; random_state can be any value
    X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=size,random_state=42)
    return X_train,X_test,y_train,y_test
############################################

In [83]:
X_train,X_val,y_train,y_val = split_data(X,y,0.3)
X_val,X_test,y_val,y_test = split_data(X_val,y_val,.33)

##### We use this to scale the data

In [66]:
from sklearn.preprocessing import StandardScaler 
sc_X = StandardScaler()


In [69]:
X = sc_X.fit_transform(X)

In [81]:
X.shape

(307228, 502)

In [84]:
X_train = sc_X.transform(X_train)
X_test = sc_X.transform(X_test)
X_val = sc_X.transform(X_val)

In [29]:
# X_train = pd.DataFrame(sc_X.fit_transform(X_train),columns=X_train.columns)

In [30]:
# X_val = pd.DataFrame(sc_X.fit_transform(X_val),columns=X_val.columns)
# X_test = pd.DataFrame(sc_X.fit_transform(X_test),columns=X_test.columns)

In [31]:
# print('training shape: '+ str(X_train.shape))
# print('Cross val shape: '+ str(X_val.shape))
# print('Test shape: '+ str(X_test.shape))

training shape: (276505, 607)
Cross val shape: (20584, 607)
Test shape: (10139, 607)


In [None]:
np.unique(y,return_counts = True)

In [68]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold

### Parameter tuning

In [74]:
def objective(params):
    params = {
      'bagging_fraction': '{:.5f}'.format(params['bagging_fraction']),
         'learning_rate': '{:.5f}'.format(params['learning_rate']),
         'colsample_bytree': '{:.4f}'.format(params['colsample_bytree']),        
        'max_bin': int(params['max_bin']),
        'num_leaves': int(params['num_leaves']),
        'n_estimators': int(params['n_estimators']),
        'min_data_in_leaf': int(params['min_data_in_leaf']),
        'feature_fraction': '{:.4f}'.format(params['feature_fraction']), 
        'min_gain_to_split': '{:.4f}'.format(params['min_gain_to_split']),
        'reg_alpha': '{:.4f}'.format(params['reg_alpha']),
        'reg_lambda': '{:.4f}'.format(params['reg_lambda']),
        'max_depth': int(params['max_depth']),
        'seed': 0,
        'min_child_weight' : int(params['min_child_weight']),
        'scale_pos_weight' : 0.087
        }
    clf = LGBMClassifier(
        
        **params
    )
    kfold = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
    results = cross_val_score(clf, X, y, cv=kfold,scoring='roc_auc')
  
    #clf.fit(X_train, y_train)    
 
    #y_pred_prob_val = clf.predict_proba(X_val)[:,1]
    #y_pred_prob_train = clf.predict_proba(X_train)[:,1]
    #y_pred_prob_test = clf.predict_proba(X_test)[:,1]
    #auc_score_val = roc_auc_score(y_val, y_pred_prob_val)
    #auc_score_train = roc_auc_score(y_train, y_pred_prob_train)
    #auc_score_test = roc_auc_score(y_test, y_pred_prob_test)
    print(results,'mean auc score: ',results.mean())
    print(params)
    #print("val_score {:.5f} \t train_score {:.5f} \t test_score: {:.5f} \nparams {}".format(auc_score_val,auc_score_train,auc_score_test, params))
    return (1-results.mean())
   #return (1-auc_score_val)

In [75]:
space = {
      'bagging_fraction' : hp.uniform('bagging_fraction', .3, 0.9),
       'learning_rate' : hp.uniform('learning_rate', .01, .1),
     'colsample_bytree': hp.uniform('colsample_bytree', 0.1, 0.9),
    'max_bin': hp.quniform('max_bin', 120, 420, 5),
    'num_leaves': hp.quniform('num_leaves', 20, 140, 2),
    'n_estimators': hp.quniform('n_estimators', 1200, 2100, 20),
    'min_data_in_leaf': hp.quniform('min_data_in_leaf', 10, 280, 10),
        'feature_fraction': hp.uniform('feature_fraction', 0.2, 0.9),
        'min_gain_to_split': hp.uniform('min_gain_to_split', 0.1, 0.9),
           'reg_alpha': hp.uniform('reg_alpha', .01, .1),
        'reg_lambda': hp.uniform('reg_lambda', .01, .1),
        'max_depth': hp.quniform('max_depth', -1, 30, 1),
        'seed': 0,
        'min_child_weight' : hp.quniform('min_child_weight', 50, 150, 10)
}

In [76]:
best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=30)

[0.77952877 0.78784819 0.78987475] mean auc score:  0.7857505726128915
{'bagging_fraction': '0.31928', 'learning_rate': '0.07383', 'colsample_bytree': '0.5603', 'max_bin': 280, 'num_leaves': 108, 'n_estimators': 1620, 'min_data_in_leaf': 50, 'feature_fraction': '0.7409', 'min_gain_to_split': '0.4740', 'reg_alpha': '0.0755', 'reg_lambda': '0.0435', 'max_depth': 7, 'seed': 0, 'min_child_weight': 80, 'scale_pos_weight': 0.087}
[0.77666496 0.78521207 0.7878154 ] mean auc score:  0.7832308092458188
{'bagging_fraction': '0.59659', 'learning_rate': '0.08438', 'colsample_bytree': '0.4655', 'max_bin': 240, 'num_leaves': 28, 'n_estimators': 1480, 'min_data_in_leaf': 160, 'feature_fraction': '0.8498', 'min_gain_to_split': '0.5884', 'reg_alpha': '0.0329', 'reg_lambda': '0.0467', 'max_depth': 22, 'seed': 0, 'min_child_weight': 130, 'scale_pos_weight': 0.087}
[0.77823097 0.78665701 0.78924301] mean auc score:  0.7847103268085408
{'bagging_fraction': '0.46840', 'learning_rate': '0.08604', 'colsample_

In [None]:
 best = {'bagging_fraction': '0.68136', 'learning_rate': '0.02325',
         'colsample_bytree': '0.7283', 'max_bin': 180, 'num_leaves': 48, 
         'n_estimators': 1300, 'min_data_in_leaf': 230, 'feature_fraction': '0.2674',
         'min_gain_to_split': '0.5626', 'reg_alpha': '0.0791', 'reg_lambda': '0.0606',
         'max_depth': 16, 'seed': 0, 'min_child_weight': 120}

In [None]:
#val_score 0.79288 	 train_score 0.88460 	 test_score: 0.78729
best = {'bagging_fraction': '0.67174', 'learning_rate': '0.02449',
        'colsample_bytree': '0.6401', 'max_bin': 315, 'num_leaves': 44, 
        'n_estimators': 1500, 'min_data_in_leaf': 130, 'feature_fraction': '0.2692',
        'min_gain_to_split': '0.8799', 'reg_alpha': '0.0351', 'reg_lambda': '0.0508',
        'max_depth': 14, 'seed': 0, 'subsample_for_bin':800000,
        'min_child_weight': 130, 'subsample_frequency': '0.83434'}

In [None]:
# best = {'bagging_fraction': '0.50064', 'learning_rate': '0.02807', 
#         'colsample_bytree': '0.4822', 'max_bin': 390, 'num_leaves': 26, 
#         'n_estimators': 1800, 'min_data_in_leaf': 120, 'feature_fraction': '0.3009',
#         'min_gain_to_split': '0.3432', 'reg_alpha': '0.0547', 
#         'reg_lambda': '0.0410', 'max_depth': 22, 'seed': 0, 'min_child_weight': 90 }
#         #'subsample_frequency': '0.77138'}

In [None]:
# best = {'bagging_fraction': '0.78328', 'learning_rate': '0.03708', 
#         'colsample_bytree': '0.4451', 'max_bin': 385, 'num_leaves': 28,
#         'n_estimators': 1500, 'min_data_in_leaf': 170, 'feature_fraction': '0.2330',
#         'min_gain_to_split': '0.1414', 'reg_alpha': '0.0415', 'reg_lambda': '0.0549',
#         'max_depth': 15, 'seed': 0, 'min_child_weight': 100, 
#         'subsample_frequency': '0.77583'}

In [77]:
best_params = space_eval(space, best)

In [78]:
best_params

{'bagging_fraction': 0.40650893650961334,
 'colsample_bytree': 0.8966358403650636,
 'feature_fraction': 0.21407021825056116,
 'learning_rate': 0.024217122627912854,
 'max_bin': 150.0,
 'max_depth': 19.0,
 'min_child_weight': 50.0,
 'min_data_in_leaf': 210.0,
 'min_gain_to_split': 0.12341280434109966,
 'n_estimators': 1740.0,
 'num_leaves': 98.0,
 'reg_alpha': 0.045042345994512505,
 'reg_lambda': 0.06385424987056496,
 'seed': 0}

In [79]:
best = {'bagging_fraction': 0.40650893650961334,
 'colsample_bytree': 0.8966358403650636,
 'feature_fraction': 0.21407021825056116,
 'learning_rate': 0.024217122627912854,
 'max_bin': 150,
 'max_depth': 19,
 'min_child_weight': 50,
 'min_data_in_leaf': 210,
 'min_gain_to_split': 0.12341280434109966,
 'n_estimators': 1740,
 'num_leaves': 98,
 'reg_alpha': 0.045042345994512505,
 'reg_lambda': 0.06385424987056496,
 'seed': 0}

In [91]:
classifier = LGBMClassifier(
    **best
                           )
classifier.fit(X, y)

LGBMClassifier(bagging_fraction=0.40650893650961334, boosting_type='gbdt',
        class_weight=None, colsample_bytree=0.8966358403650636,
        feature_fraction=0.21407021825056116,
        learning_rate=0.024217122627912854, max_bin=150, max_depth=19,
        min_child_samples=20, min_child_weight=50, min_data_in_leaf=210,
        min_gain_to_split=0.12341280434109966, min_split_gain=0.0,
        n_estimators=1740, n_jobs=-1, num_leaves=98, objective=None,
        random_state=None, reg_alpha=0.045042345994512505,
        reg_lambda=0.06385424987056496, seed=0, silent=True, subsample=1.0,
        subsample_for_bin=200000, subsample_freq=0)

In [90]:
y

array([1, 0, 0, ..., 0, 1, 0], dtype=int8)

#### FEATURE IMPORTANCE

In [44]:
# f_imp_2 = pd.Series(classifier.booster_.feature_importance(importance_type='gain'), index = X_train.columns)

In [45]:
# f_imp_2.sort_values(ascending=False).reset_index().to_csv('feature_importance_2.csv',index=False)

In [46]:
# f_imp_2.sort_values(ascending=False)

EXT_SOURCE_mean                                                        48226.532679
EXT_SOURCE sum                                                         17731.448555
NEW_CREDIT_TO_ANNUITY_RATIO                                            17715.334166
EXT_SOURCE_2  EXT_SOURCE_3                                             17029.879327
EXT_SOURCE_1  EXT_SOURCE_3                                             16460.827658
EXT_SOURCE / mean                                                      14965.259868
EXT_SOURCE prod                                                        11818.507584
EXT_SOURCE_2 / DAYS_BIRTH_x                                             9441.717042
EXT_SOURCE_1  EXT_SOURCE_2                                              9322.067615
EXT_SOURCE_3 / DAYS_BIRTH_x                                             9103.344234
EXT_SOURCE_1 / EXT_SOURCE_2                                             8828.068591
DAYS_BIRTH                                                              7561

In [48]:
# len(list(f_imp[f_imp==0].index))

126

In [None]:
#list2=pd.read_csv('feature_importance_2.csv')

In [None]:
# list(list2[list2['0']==0]['index'])

In [49]:
# len(set(list(f_imp_2[f_imp_2==0].index)).intersection(set(list(f_imp[f_imp==0].index))))

105

In [52]:
# no_imp_feat = list(set(list(f_imp_2[f_imp_2==0].index)).intersection(set(list(f_imp[f_imp==0].index))))

In [86]:
y_pred_prob_test = classifier.predict_proba(X_test)[:,1]
y_pred_prob_train = classifier.predict_proba(X_train)[:,1]
y_pred_prob_val = classifier.predict_proba(X_val)[:,1]
print("cross validation ROC score: "+str(roc_auc_score(y_val, y_pred_prob_val)))
print("train validation ROC score: "+str(roc_auc_score(y_train, y_pred_prob_train)))
print("test validation ROC score: "+str(roc_auc_score(y_test, y_pred_prob_test)))

cross validation ROC score: 0.7920794687487851
train validation ROC score: 0.9800075384508335
test validation ROC score: 0.7863595131478331


In [None]:
cross validation ROC score: 0.9137931034482758
train validation ROC score: 0.8705278275836836

cross validation ROC score: 0.9310344827586207
train validation ROC score: 0.8705024149628342

cross validation ROC score: 0.9482758620689655
train validation ROC score: 0.8704024642319563

In [None]:
cross validation ROC score: 0.794314762557148
train validation ROC score: 0.8779015198010918

    
cross validation ROC score: 0.7945101703751719
train validation ROC score: 0.8934470093237312

cross validation ROC score: 0.7945233962936337
train validation ROC score: 0.8931255026150717
    
cross validation ROC score: 0.7940571775008592
train validation ROC score: 0.8870916546568616

In [None]:
y_val

In [None]:
y_pred_prob_val

In [None]:
cross validation ROC score: 0.7964193387212095
train validation ROC score: 0.9228320299500886
test validation ROC score: 0.7903973805554734  - 0.796

cross validation ROC score: 0.7962487655738522
train validation ROC score: 0.9099766559268216
test validation ROC score: 0.7902121371458123   - 

In [None]:
np.unique(y_train,return_counts=True)

In [None]:
cross validation ROC score: 0.7965079835029962
train validation ROC score: 0.9047996835447147
test validation ROC score: 0.7896592733525682

cross validation ROC score: 0.7959102995391298
train validation ROC score: 0.8701826256905496
test validation ROC score: 0.790035373608547

cross validation ROC score: 0.7958931388679629
train validation ROC score: 0.8669098756140712
test validation ROC score: 0.7893263039968563
    
cross validation ROC score: 0.7964702776754957
train validation ROC score: 0.9065846432128992
test validation ROC score: 0.7895785055538685
    
cross validation ROC score: 0.7958607164810749
train validation ROC score: 0.8948006588412571
test validation ROC score: 0.7892271133767896

cross validation ROC score: 0.7961526581073808
train validation ROC score: 0.9182231590583936
test validation ROC score: 0.7891496300357079



In [100]:
test.shape

(48744, 503)

##### Predicting the test file

In [101]:
# #Predicting TARGET in test data set
X = test.drop(columns = ['SK_ID_CURR'])
X = sc_X.transform(X)
target = classifier.predict(X)
target_prob = classifier.predict_proba(X)[:,1]

  if diff:


##### Saving the submission file

In [102]:
test_sk_id = tmp_app_test['SK_ID_CURR']
submit_df = pd.DataFrame(data = list(zip(test_sk_id,target_prob)),columns = ['SK_ID_CURR','TARGET'])
submit_df.to_csv('submission_lgbm.csv',index = False)