In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

from sklearn.metrics import roc_auc_score, precision_recall_curve, roc_curve, average_precision_score
from xgboost import XGBClassifier
from sklearn.model_selection import KFold, train_test_split

In [2]:
import gc
gc.enable()

In [3]:
def train_model(train_, val_x_, val_y_, test_, y_, folds_):
    feats_ = [f_ for f_ in test_.columns if f_ not in ['SK_ID_CURR']]
    oof_preds = np.zeros(train_.shape[0])
    val_preds = np.zeros(val_x_.shape[0])
    sub_preds = np.zeros(test_.shape[0])
    

    
    for n_fold, (trn_idx, val_idx) in enumerate(folds_.split(train_)):
        #print(train_.type)
        trn_x, trn_y = pd.DataFrame(train_).iloc[trn_idx], pd.DataFrame(y_).iloc[trn_idx]
        val_x, val_y = pd.DataFrame(train_).iloc[val_idx], pd.DataFrame(y_).iloc[val_idx]
        
        clf = XGBClassifier(
            min_child_weight = 0.01, 
            learning_rate = 0.3, 
            max_depth = 10, 
            n_estimators = 50, 
            n_jobs = -1,
            gamma = 0.5, 
            subsample = 0.9, 
            colsample_bytree = 0.8, 
            booster = 'gbtree', 
            scale_pos_weight = 2, 
            reg_alpha = 1
            )
        clf.fit(trn_x, trn_y, 
            eval_set = [(trn_x, trn_y), (val_x, val_y)],
            eval_metric = 'auc', verbose =True, early_stopping_rounds = 10)
        
        oof_preds[val_idx] = clf.predict_proba(val_x)[:, 1]
        val_preds += clf.predict_proba(pd.DataFrame(val_x_[feats_]))[:, 1] / folds_.n_splits
        sub_preds += clf.predict_proba(pd.DataFrame(test_[feats_]))[:, 1] / folds_.n_splits
        print('fold %2d validate AUC score %.6f'%(n_fold + 1,roc_auc_score(val_y_, val_preds) * folds_.n_splits))
        print('fold %2d AUC %.6f'%(n_fold+1, roc_auc_score(val_y, oof_preds[val_idx])))
        del clf, trn_x, trn_y, val_x, val_y
        gc.collect()
    print('validate AUC score %.6f'%roc_auc_score(val_y_, val_preds))
    print('full AUC score %.6f'%roc_auc_score(y_, oof_preds))
    test_['TARGET'] = sub_preds
    
    return oof_preds, test_[['SK_ID_CURR', 'TARGET']]

In [4]:
train = pd.read_csv('../data/train_.csv')
test = pd.read_csv('../data/test_.csv')
y = pd.read_csv('../data/y_.csv')
val_x = pd.read_csv('../data/val_x_.csv')
val_y = pd.read_csv('../data/val_y_.csv')

In [5]:
print(train.shape,test.shape,val_x.shape)
#train_x, val_x, train_y, val_y = train_test_split(train, y, test_size=0.1, random_state = 14)
folds = KFold(n_splits=5, shuffle=True, random_state=0)

(508761, 389) (48744, 390) (30752, 390)


In [8]:
y.head()

Unnamed: 0.1,Unnamed: 0,0
0,0,0
1,1,0
2,2,0
3,3,1
4,4,0


In [9]:
val_y.head()

Unnamed: 0.1,Unnamed: 0,TARGET
0,32413,0
1,226702,0
2,106022,0
3,62854,0
4,232481,0


In [10]:
val_x.head()

Unnamed: 0.1,Unnamed: 0,SK_ID_CURR,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,NAME_TYPE_SUITE,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,ORGANIZATION_TYPE,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,APARTMENTS_AVG,BASEMENTAREA_AVG,YEARS_BEGINEXPLUATATION_AVG,YEARS_BUILD_AVG,COMMONAREA_AVG,ELEVATORS_AVG,ENTRANCES_AVG,FLOORSMAX_AVG,FLOORSMIN_AVG,LANDAREA_AVG,LIVINGAPARTMENTS_AVG,LIVINGAREA_AVG,NONLIVINGAPARTMENTS_AVG,NONLIVINGAREA_AVG,APARTMENTS_MODE,BASEMENTAREA_MODE,YEARS_BEGINEXPLUATATION_MODE,YEARS_BUILD_MODE,COMMONAREA_MODE,ELEVATORS_MODE,ENTRANCES_MODE,FLOORSMAX_MODE,FLOORSMIN_MODE,LANDAREA_MODE,LIVINGAPARTMENTS_MODE,LIVINGAREA_MODE,NONLIVINGAPARTMENTS_MODE,NONLIVINGAREA_MODE,APARTMENTS_MEDI,BASEMENTAREA_MEDI,YEARS_BEGINEXPLUATATION_MEDI,YEARS_BUILD_MEDI,COMMONAREA_MEDI,ELEVATORS_MEDI,ENTRANCES_MEDI,FLOORSMAX_MEDI,FLOORSMIN_MEDI,LANDAREA_MEDI,LIVINGAPARTMENTS_MEDI,LIVINGAREA_MEDI,NONLIVINGAPARTMENTS_MEDI,NONLIVINGAREA_MEDI,FONDKAPREMONT_MODE,HOUSETYPE_MODE,TOTALAREA_MODE,WALLSMATERIAL_MODE,EMERGENCYSTATE_MODE,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_2,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR,bur_SK_ID_BUREAU,bur_DAYS_CREDIT,bur_CREDIT_DAY_OVERDUE,bur_DAYS_CREDIT_ENDDATE,bur_DAYS_ENDDATE_FACT,bur_AMT_CREDIT_MAX_OVERDUE,bur_CNT_CREDIT_PROLONG,bur_AMT_CREDIT_SUM,bur_AMT_CREDIT_SUM_DEBT,bur_AMT_CREDIT_SUM_LIMIT,bur_AMT_CREDIT_SUM_OVERDUE,bur_DAYS_CREDIT_UPDATE,bur_AMT_ANNUITY,bur_ca_Active,bur_ca_Bad debt,bur_ca_Closed,bur_ca_Sold,bur_cc_currency 1,bur_cc_currency 2,bur_cc_currency 3,bur_cc_currency 4,bur_ct_Another type of loan,bur_ct_Car loan,bur_ct_Cash loan (non-earmarked),bur_ct_Consumer credit,bur_ct_Credit card,bur_ct_Interbank credit,bur_ct_Loan for business development,bur_ct_Loan for purchase of shares (margin lending),bur_ct_Loan for the purchase of equipment,bur_ct_Loan for working capital replenishment,bur_ct_Microloan,bur_ct_Mobile operator loan,bur_ct_Mortgage,bur_ct_Real estate loan,bur_ct_Unknown type of loan,bur_bur_bal_MONTHS_BALANCE,bur_bur_bal_bur_bal_status_0,bur_bur_bal_bur_bal_status_1,bur_bur_bal_bur_bal_status_2,bur_bur_bal_bur_bal_status_3,bur_bur_bal_bur_bal_status_4,bur_bur_bal_bur_bal_status_5,bur_bur_bal_bur_bal_status_C,bur_bur_bal_bur_bal_status_X,bur_bur_bal_bur_cnt,prev_SK_ID_PREV,prev_AMT_ANNUITY,prev_AMT_APPLICATION,prev_AMT_CREDIT,prev_AMT_DOWN_PAYMENT,prev_AMT_GOODS_PRICE,prev_HOUR_APPR_PROCESS_START,prev_NFLAG_LAST_APPL_IN_DAY,prev_RATE_DOWN_PAYMENT,prev_RATE_INTEREST_PRIMARY,prev_RATE_INTEREST_PRIVILEGED,prev_DAYS_DECISION,prev_SELLERPLACE_AREA,prev_CNT_PAYMENT,prev_DAYS_FIRST_DRAWING,prev_DAYS_FIRST_DUE,prev_DAYS_LAST_DUE_1ST_VERSION,prev_DAYS_LAST_DUE,prev_DAYS_TERMINATION,prev_NFLAG_INSURED_ON_APPROVAL,prev_NAME_CONTRACT_TYPE_Cash loans,prev_NAME_CONTRACT_TYPE_Consumer loans,prev_NAME_CONTRACT_TYPE_Revolving loans,prev_NAME_CONTRACT_TYPE_XNA,prev_WEEKDAY_APPR_PROCESS_START_FRIDAY,prev_WEEKDAY_APPR_PROCESS_START_MONDAY,prev_WEEKDAY_APPR_PROCESS_START_SATURDAY,prev_WEEKDAY_APPR_PROCESS_START_SUNDAY,prev_WEEKDAY_APPR_PROCESS_START_THURSDAY,prev_WEEKDAY_APPR_PROCESS_START_TUESDAY,prev_WEEKDAY_APPR_PROCESS_START_WEDNESDAY,prev_FLAG_LAST_APPL_PER_CONTRACT_N,prev_FLAG_LAST_APPL_PER_CONTRACT_Y,prev_NAME_CASH_LOAN_PURPOSE_Building a house or an annex,prev_NAME_CASH_LOAN_PURPOSE_Business development,prev_NAME_CASH_LOAN_PURPOSE_Buying a garage,prev_NAME_CASH_LOAN_PURPOSE_Buying a holiday home / land,prev_NAME_CASH_LOAN_PURPOSE_Buying a home,prev_NAME_CASH_LOAN_PURPOSE_Buying a new car,prev_NAME_CASH_LOAN_PURPOSE_Buying a used car,prev_NAME_CASH_LOAN_PURPOSE_Car repairs,prev_NAME_CASH_LOAN_PURPOSE_Education,prev_NAME_CASH_LOAN_PURPOSE_Everyday expenses,prev_NAME_CASH_LOAN_PURPOSE_Furniture,prev_NAME_CASH_LOAN_PURPOSE_Gasification / water supply,prev_NAME_CASH_LOAN_PURPOSE_Hobby,prev_NAME_CASH_LOAN_PURPOSE_Journey,prev_NAME_CASH_LOAN_PURPOSE_Medicine,prev_NAME_CASH_LOAN_PURPOSE_Money for a third person,prev_NAME_CASH_LOAN_PURPOSE_Other,prev_NAME_CASH_LOAN_PURPOSE_Payments on other loans,prev_NAME_CASH_LOAN_PURPOSE_Purchase of electronic equipment,prev_NAME_CASH_LOAN_PURPOSE_Refusal to name the goal,prev_NAME_CASH_LOAN_PURPOSE_Repairs,prev_NAME_CASH_LOAN_PURPOSE_Urgent needs,prev_NAME_CASH_LOAN_PURPOSE_Wedding / gift / holiday,prev_NAME_CASH_LOAN_PURPOSE_XAP,prev_NAME_CASH_LOAN_PURPOSE_XNA,prev_NAME_CONTRACT_STATUS_Approved,prev_NAME_CONTRACT_STATUS_Canceled,prev_NAME_CONTRACT_STATUS_Refused,prev_NAME_CONTRACT_STATUS_Unused offer,prev_NAME_PAYMENT_TYPE_Cash through the bank,prev_NAME_PAYMENT_TYPE_Cashless from the account of the employer,prev_NAME_PAYMENT_TYPE_Non-cash from your account,prev_NAME_PAYMENT_TYPE_XNA,prev_CODE_REJECT_REASON_CLIENT,prev_CODE_REJECT_REASON_HC,prev_CODE_REJECT_REASON_LIMIT,prev_CODE_REJECT_REASON_SCO,prev_CODE_REJECT_REASON_SCOFR,prev_CODE_REJECT_REASON_SYSTEM,prev_CODE_REJECT_REASON_VERIF,prev_CODE_REJECT_REASON_XAP,prev_CODE_REJECT_REASON_XNA,prev_NAME_TYPE_SUITE_Children,prev_NAME_TYPE_SUITE_Family,prev_NAME_TYPE_SUITE_Group of people,prev_NAME_TYPE_SUITE_Other_A,prev_NAME_TYPE_SUITE_Other_B,"prev_NAME_TYPE_SUITE_Spouse, partner",prev_NAME_TYPE_SUITE_Unaccompanied,prev_NAME_CLIENT_TYPE_New,prev_NAME_CLIENT_TYPE_Refreshed,prev_NAME_CLIENT_TYPE_Repeater,prev_NAME_CLIENT_TYPE_XNA,prev_NAME_GOODS_CATEGORY_Additional Service,prev_NAME_GOODS_CATEGORY_Animals,prev_NAME_GOODS_CATEGORY_Audio/Video,prev_NAME_GOODS_CATEGORY_Auto Accessories,prev_NAME_GOODS_CATEGORY_Clothing and Accessories,prev_NAME_GOODS_CATEGORY_Computers,prev_NAME_GOODS_CATEGORY_Construction Materials,prev_NAME_GOODS_CATEGORY_Consumer Electronics,prev_NAME_GOODS_CATEGORY_Direct Sales,prev_NAME_GOODS_CATEGORY_Education,prev_NAME_GOODS_CATEGORY_Fitness,prev_NAME_GOODS_CATEGORY_Furniture,prev_NAME_GOODS_CATEGORY_Gardening,prev_NAME_GOODS_CATEGORY_Homewares,prev_NAME_GOODS_CATEGORY_House Construction,prev_NAME_GOODS_CATEGORY_Insurance,prev_NAME_GOODS_CATEGORY_Jewelry,prev_NAME_GOODS_CATEGORY_Medical Supplies,prev_NAME_GOODS_CATEGORY_Medicine,prev_NAME_GOODS_CATEGORY_Mobile,prev_NAME_GOODS_CATEGORY_Office Appliances,prev_NAME_GOODS_CATEGORY_Other,prev_NAME_GOODS_CATEGORY_Photo / Cinema Equipment,prev_NAME_GOODS_CATEGORY_Sport and Leisure,prev_NAME_GOODS_CATEGORY_Tourism,prev_NAME_GOODS_CATEGORY_Vehicles,prev_NAME_GOODS_CATEGORY_Weapon,prev_NAME_GOODS_CATEGORY_XNA,prev_NAME_PORTFOLIO_Cards,prev_NAME_PORTFOLIO_Cars,prev_NAME_PORTFOLIO_Cash,prev_NAME_PORTFOLIO_POS,prev_NAME_PORTFOLIO_XNA,prev_NAME_PRODUCT_TYPE_XNA,prev_NAME_PRODUCT_TYPE_walk-in,prev_NAME_PRODUCT_TYPE_x-sell,prev_CHANNEL_TYPE_AP+ (Cash loan),prev_CHANNEL_TYPE_Car dealer,prev_CHANNEL_TYPE_Channel of corporate sales,prev_CHANNEL_TYPE_Contact center,prev_CHANNEL_TYPE_Country-wide,prev_CHANNEL_TYPE_Credit and cash offices,prev_CHANNEL_TYPE_Regional / Local,prev_CHANNEL_TYPE_Stone,prev_NAME_SELLER_INDUSTRY_Auto technology,prev_NAME_SELLER_INDUSTRY_Clothing,prev_NAME_SELLER_INDUSTRY_Connectivity,prev_NAME_SELLER_INDUSTRY_Construction,prev_NAME_SELLER_INDUSTRY_Consumer electronics,prev_NAME_SELLER_INDUSTRY_Furniture,prev_NAME_SELLER_INDUSTRY_Industry,prev_NAME_SELLER_INDUSTRY_Jewelry,prev_NAME_SELLER_INDUSTRY_MLM partners,prev_NAME_SELLER_INDUSTRY_Tourism,prev_NAME_SELLER_INDUSTRY_XNA,prev_NAME_YIELD_GROUP_XNA,prev_NAME_YIELD_GROUP_high,prev_NAME_YIELD_GROUP_low_action,prev_NAME_YIELD_GROUP_low_normal,prev_NAME_YIELD_GROUP_middle,prev_PRODUCT_COMBINATION_Card Street,prev_PRODUCT_COMBINATION_Card X-Sell,prev_PRODUCT_COMBINATION_Cash,prev_PRODUCT_COMBINATION_Cash Street: high,prev_PRODUCT_COMBINATION_Cash Street: low,prev_PRODUCT_COMBINATION_Cash Street: middle,prev_PRODUCT_COMBINATION_Cash X-Sell: high,prev_PRODUCT_COMBINATION_Cash X-Sell: low,prev_PRODUCT_COMBINATION_Cash X-Sell: middle,prev_PRODUCT_COMBINATION_POS household with interest,prev_PRODUCT_COMBINATION_POS household without interest,prev_PRODUCT_COMBINATION_POS industry with interest,prev_PRODUCT_COMBINATION_POS industry without interest,prev_PRODUCT_COMBINATION_POS mobile with interest,prev_PRODUCT_COMBINATION_POS mobile without interest,prev_PRODUCT_COMBINATION_POS other with interest,prev_PRODUCT_COMBINATION_POS others without interest,pos_SK_ID_PREV,pos_MONTHS_BALANCE,pos_CNT_INSTALMENT,pos_CNT_INSTALMENT_FUTURE,pos_SK_DPD,pos_SK_DPD_DEF,pos_ncs_Active,pos_ncs_Amortized debt,pos_ncs_Approved,pos_ncs_Canceled,pos_ncs_Completed,pos_ncs_Demand,pos_ncs_Returned to the store,pos_ncs_Signed,pos_ncs_XNA,cc_bal_SK_ID_PREV,cc_bal_MONTHS_BALANCE,cc_bal_AMT_BALANCE,cc_bal_AMT_CREDIT_LIMIT_ACTUAL,cc_bal_AMT_DRAWINGS_ATM_CURRENT,cc_bal_AMT_DRAWINGS_CURRENT,cc_bal_AMT_DRAWINGS_OTHER_CURRENT,cc_bal_AMT_DRAWINGS_POS_CURRENT,cc_bal_AMT_INST_MIN_REGULARITY,cc_bal_AMT_PAYMENT_CURRENT,cc_bal_AMT_PAYMENT_TOTAL_CURRENT,cc_bal_AMT_RECEIVABLE_PRINCIPAL,cc_bal_AMT_RECIVABLE,cc_bal_AMT_TOTAL_RECEIVABLE,cc_bal_CNT_DRAWINGS_ATM_CURRENT,cc_bal_CNT_DRAWINGS_CURRENT,cc_bal_CNT_DRAWINGS_OTHER_CURRENT,cc_bal_CNT_DRAWINGS_POS_CURRENT,cc_bal_CNT_INSTALMENT_MATURE_CUM,cc_bal_SK_DPD,cc_bal_SK_DPD_DEF,cc_bal_ncs_Active,cc_bal_ncs_Approved,cc_bal_ncs_Completed,cc_bal_ncs_Demand,cc_bal_ncs_Refused,cc_bal_ncs_Sent proposal,cc_bal_ncs_Signed,inst_SK_ID_PREV,inst_NUM_INSTALMENT_VERSION,inst_NUM_INSTALMENT_NUMBER,inst_DAYS_INSTALMENT,inst_DAYS_ENTRY_PAYMENT,inst_AMT_INSTALMENT,inst_AMT_PAYMENT,EXT_SOURCE_1over2_NAminus1_Add0.1,EXT_SOURCE_2over1_NAminus1_Add0.1,EXT_SOURCE_1over3_NAminus1_Add0.1,EXT_SOURCE_3over1_NAminus1_Add0.1,EXT_SOURCE_2over3_NAminus1_Add0.1,EXT_SOURCE_3over2_NAminus1_Add0.1,EXT_SOURCE_1_log,EXT_SOURCE_2_log,EXT_SOURCE_3_log
0,32413,137571,0,1,0,0,0,90000.0,337500.0,20394.0,337500.0,0,2,0,0,0,0.01522,-15042,-2271,-6308.0,-4533,-1.0,1,1,0,1,0,0,7,1.0,2,2,6,8,0,0,0,0,0,0,31,-1.0,0.6836,-1.0,0.2412,0.0761,0.982,0.7554,0.0649,0.08,0.0345,0.3333,0.0417,0.0327,0.1967,0.0893,0.0,0.0,0.2458,0.079,0.982,0.7646,0.0655,0.0806,0.0345,0.3333,0.0417,0.0335,0.2148,0.0931,0.0,0.0,0.2437,0.0761,0.982,0.7583,0.0653,0.08,0.0345,0.3333,0.0417,0.0333,0.2001,0.0909,0.0,0.0,3,0,0.1058,0,0,1.0,0.0,0.0,0.0,-1550.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,6.0,8547.412,58948.5,95901.0,52290.0,117897.0,12.164,1.0,0.7236,-1.0,-1.0,-779.5,549.6667,7.5,243243.0,-854.3333,121412.0,243019.0,243021.0,0.0,0.5,0.1666,0.3333,0.0,0.0,0.5,0.3333,0.0,0.1666,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.5,0.5,0.3333,0.1666,0.0,0.3333,0.0,0.0,0.6665,0.0,0.0,0.1666,0.0,0.0,0.0,0.0,0.8335,0.0,0.0,0.1666,0.0,0.0,0.0,0.0,0.1666,0.1666,0.0,0.8335,0.0,0.0,0.0,0.1666,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8335,0.3333,0.0,0.1666,0.1666,0.3333,0.5,0.1666,0.3333,0.0,0.0,0.0,0.0,0.1666,0.8335,0.0,0.0,0.0,0.0,0.0,0.0,0.1666,0.0,0.0,0.0,0.0,0.0,0.8335,0.6665,0.1666,0.0,0.1666,0.0,0.1666,0.1666,0.3333,0.0,0.0,0.0,0.0,0.1666,0.0,0.1666,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17.0,-19.0,18.36,13.65,0.0,0.0,0.9414,0.0,0.0,0.0,0.05884,0.0,0.0,0.0,0.0,25.0,-13.0,31619.598,216540.0,7200.0,13451.04,0.0,6251.04,1994.0,15846.819,13219.137,31088.92,31644.404,31644.404,0.08,0.16,0.0,0.08,9.6,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,56.0,0.2856,16.73,-446.0,-452.8,10200.322,10200.322,-1.148,-0.871,1.0,1.0,-0.871,-1.148,-1.0,0.521,-1.0
1,226702,362592,0,0,1,1,0,216000.0,835380.0,31086.0,675000.0,0,2,1,1,0,0.0188,-18093,-5164,-4930.0,-1639,5.0,1,1,1,1,0,0,5,2.0,2,2,6,13,0,0,0,0,0,0,0,-1.0,0.594,0.2213,0.0258,0.0,0.9844,-1.0,-1.0,0.0,0.0345,0.0417,-1.0,0.0,-1.0,0.0079,-1.0,0.0,0.0263,0.0,0.9844,-1.0,-1.0,0.0,0.0345,0.0417,-1.0,0.0,-1.0,0.0083,-1.0,0.0,0.026,0.0,0.9844,-1.0,-1.0,0.0,0.0345,0.0417,-1.0,0.0,-1.0,0.0081,-1.0,0.0,-1,2,0.0094,0,0,0.0,0.0,0.0,0.0,-9.0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,-264.8,0.0,14170.0,-1.0,0.0,0.0,464400.0,391069.0,45230.76,0.0,-23.67,-1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3333,0.6665,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.297,-0.771,-2.8,-0.357,2.16,0.463,-1.0,0.4663,0.2
2,106022,223011,0,1,1,0,3,90000.0,450000.0,20979.0,450000.0,0,0,0,1,0,0.01522,-8546,-1801,-3188.0,-1221,17.0,1,1,0,1,0,0,11,5.0,2,2,2,14,0,0,0,1,1,1,9,-1.0,0.0997,-1.0,0.0186,0.0633,0.9873,0.823,0.0,0.0,0.1034,0.0417,-1.0,0.0132,0.0151,0.0195,0.0,0.0,0.0189,0.0657,0.9873,0.83,0.0,0.0,0.1034,0.0417,-1.0,0.0135,0.0165,0.0203,0.0,0.0,0.0187,0.0633,0.9873,0.8257,0.0,0.0,0.1034,0.0417,-1.0,0.0135,0.0154,0.0198,0.0,0.0,-1,0,0.0153,0,0,0.0,0.0,0.0,0.0,-433.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,13899.69,68710.5,67599.0,4500.0,68710.5,17.0,1.0,0.068,-1.0,-1.0,-433.0,40.0,6.0,365243.0,-402.0,-252.0,-252.0,-249.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,8.0,-11.5,6.0,2.75,0.0,0.0,0.75,0.0,0.0,0.0,0.125,0.0,0.0,0.125,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,6.0,1.0,3.5,-327.0,-339.5,13899.285,13899.285,-4.508,-0.2219,1.0,1.0,-0.2219,-4.508,-1.0,0.09503,-1.0
3,62854,172904,0,1,0,0,0,247500.0,2290743.0,67108.5,2047500.0,1,3,0,1,0,0.01662,-17284,365243,-8200.0,-837,-1.0,1,0,0,1,0,0,-1,2.0,2,2,3,10,0,0,0,0,0,0,5,-1.0,0.12225,0.2045,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1,-1,-1.0,-1,-1,1.0,1.0,1.0,1.0,-890.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,2.0,5.0,6.0,-988.0,0.0,-135.1,-841.0,0.0,0.0,417417.75,330114.38,0.0,0.0,-697.5,-1.0,0.1666,0.0,0.8335,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,14.0,12792.515,122142.86,147456.33,-1.0,171000.0,11.93,1.0,-1.0,-1.0,-1.0,-557.0,-1.0,15.0,365243.0,-667.75,157.25,-517.75,-511.0,1.0,0.643,0.0,0.3572,0.0,0.0,0.2142,0.1428,0.0714,0.2856,0.2142,0.0714,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0714,0.0,0.1428,0.0,0.0,0.0,0.0,0.0,0.0,0.3572,0.4285,0.3572,0.2856,0.3572,0.0,0.3572,0.0,0.0,0.643,0.0,0.2142,0.0,0.0714,0.0714,0.0,0.0,0.643,0.0,0.0,0.2142,0.0,0.0,0.0,0.0,0.0714,0.0714,0.0,0.9287,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.3572,0.0,0.3572,0.0,0.2856,0.2856,0.7144,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.643,0.2856,0.0,0.0,0.0714,0.3572,0.0,0.2856,0.2856,0.0,0.0714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,29.0,-19.72,22.14,18.6,0.0,0.0,0.8623,0.0,0.0,0.0,0.138,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,24.0,1.167,3.916,-569.5,-583.5,71848.914,71848.914,-4.05,-0.247,-2.957,-0.3381,0.73,1.369,-1.0,0.11536,0.186
4,232481,369273,0,0,1,1,0,135000.0,472500.0,44635.5,454500.0,0,3,1,1,0,0.00933,-23224,365243,-3944.0,-4556,8.0,1,0,0,1,1,0,-1,2.0,2,2,3,11,0,0,0,0,0,0,5,0.8057,0.6636,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1,-1,-1.0,-1,-1,7.0,0.0,7.0,0.0,-1267.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2.0,16818.436,90459.0,82804.5,11560.5,90459.0,14.5,1.0,0.1324,-1.0,-1.0,-1874.0,38.5,6.0,365243.0,-1843.0,-1693.0,-1693.0,-1684.5,0.5,0.0,1.0,0.0,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.5,0.5,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,15.0,-60.06,6.0,2.8,0.6,0.0,0.8667,0.0,0.0,0.0,0.1333,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,13.0,1.0,3.615,-1811.0,-1820.0,16783.809,15523.151,1.186,0.8433,-1.006,-0.994,-0.8486,-1.179,0.591,0.5093,-1.0


In [11]:
oof_preds, test_preds = train_model(train, val_x, val_y['TARGET'].values.ravel(), test, y['0'].values.ravel(), folds)
test_preds.to_csv('../data/xgb_submission.csv', index=False)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[0]	validation_0-auc:0.956117	validation_1-auc:0.950894
Multiple eval metrics have been passed: 'validation_1-auc' will be used for early stopping.

Will train until validation_1-auc hasn't improved in 10 rounds.
[1]	validation_0-auc:0.968353	validation_1-auc:0.963516
[2]	validation_0-auc:0.978755	validation_1-auc:0.974154
[3]	validation_0-auc:0.98005	validation_1-auc:0.975088
[4]	validation_0-auc:0.981193	validation_1-auc:0.975582
[5]	validation_0-auc:0.98256	validation_1-auc:0.976394
[6]	validation_0-auc:0.983536	validation_1-auc:0.97678
[7]	validation_0-auc:0.984305	validation_1-auc:0.976992
[8]	validation_0-auc:0.985276	validation_1-auc:0.97718
[9]	validation_0-auc:0.985967	validation_1-auc:0.977402
[10]	validation_0-auc:0.986645	validation_1-auc:0.97756
[11]	validation_0-auc:0.987314	validation_1-auc:0.977713
[12]	validation_0-auc:0.988087	validation_1-auc:0.977833
[13]	validation_0-auc:0.98852	validation_1-auc:0.9779
[14]	validation_0-auc:0.989255	validation_1-auc:0.978022
[15]	v

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[0]	validation_0-auc:0.953291	validation_1-auc:0.95011
Multiple eval metrics have been passed: 'validation_1-auc' will be used for early stopping.

Will train until validation_1-auc hasn't improved in 10 rounds.
[1]	validation_0-auc:0.969663	validation_1-auc:0.966405
[2]	validation_0-auc:0.978609	validation_1-auc:0.975043
[3]	validation_0-auc:0.980612	validation_1-auc:0.976373
[4]	validation_0-auc:0.981706	validation_1-auc:0.976801
[5]	validation_0-auc:0.982939	validation_1-auc:0.977411
[6]	validation_0-auc:0.9837	validation_1-auc:0.977716
[7]	validation_0-auc:0.984878	validation_1-auc:0.977916
[8]	validation_0-auc:0.985707	validation_1-auc:0.978167
[9]	validation_0-auc:0.98644	validation_1-auc:0.978311
[10]	validation_0-auc:0.987099	validation_1-auc:0.978574
[11]	validation_0-auc:0.987821	validation_1-auc:0.978693
[12]	validation_0-auc:0.988405	validation_1-auc:0.97871
[13]	validation_0-auc:0.988924	validation_1-auc:0.978716
[14]	validation_0-auc:0.98947	validation_1-auc:0.978758
[15]

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[0]	validation_0-auc:0.951452	validation_1-auc:0.947173
Multiple eval metrics have been passed: 'validation_1-auc' will be used for early stopping.

Will train until validation_1-auc hasn't improved in 10 rounds.
[1]	validation_0-auc:0.970345	validation_1-auc:0.966262
[2]	validation_0-auc:0.978719	validation_1-auc:0.97493
[3]	validation_0-auc:0.980616	validation_1-auc:0.976352
[4]	validation_0-auc:0.981644	validation_1-auc:0.976923
[5]	validation_0-auc:0.982677	validation_1-auc:0.977421
[6]	validation_0-auc:0.983694	validation_1-auc:0.97779
[7]	validation_0-auc:0.984527	validation_1-auc:0.977985
[8]	validation_0-auc:0.985461	validation_1-auc:0.978123
[9]	validation_0-auc:0.986268	validation_1-auc:0.978253
[10]	validation_0-auc:0.986928	validation_1-auc:0.978392
[11]	validation_0-auc:0.987654	validation_1-auc:0.97849
[12]	validation_0-auc:0.988212	validation_1-auc:0.978596
[13]	validation_0-auc:0.988761	validation_1-auc:0.978664
[14]	validation_0-auc:0.989383	validation_1-auc:0.978724
[

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[0]	validation_0-auc:0.952882	validation_1-auc:0.949958
Multiple eval metrics have been passed: 'validation_1-auc' will be used for early stopping.

Will train until validation_1-auc hasn't improved in 10 rounds.
[1]	validation_0-auc:0.970862	validation_1-auc:0.967949
[2]	validation_0-auc:0.978825	validation_1-auc:0.975426
[3]	validation_0-auc:0.980869	validation_1-auc:0.976669
[4]	validation_0-auc:0.981885	validation_1-auc:0.977077
[5]	validation_0-auc:0.983033	validation_1-auc:0.977495
[6]	validation_0-auc:0.983989	validation_1-auc:0.977826
[7]	validation_0-auc:0.984977	validation_1-auc:0.978007
[8]	validation_0-auc:0.985672	validation_1-auc:0.978077
[9]	validation_0-auc:0.986451	validation_1-auc:0.97824
[10]	validation_0-auc:0.987267	validation_1-auc:0.978308
[11]	validation_0-auc:0.987815	validation_1-auc:0.978406
[12]	validation_0-auc:0.988563	validation_1-auc:0.978545
[13]	validation_0-auc:0.989071	validation_1-auc:0.97867
[14]	validation_0-auc:0.989622	validation_1-auc:0.978693


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[0]	validation_0-auc:0.951332	validation_1-auc:0.94556
Multiple eval metrics have been passed: 'validation_1-auc' will be used for early stopping.

Will train until validation_1-auc hasn't improved in 10 rounds.
[1]	validation_0-auc:0.970203	validation_1-auc:0.965496
[2]	validation_0-auc:0.979124	validation_1-auc:0.974632
[3]	validation_0-auc:0.980999	validation_1-auc:0.976162
[4]	validation_0-auc:0.982012	validation_1-auc:0.976718
[5]	validation_0-auc:0.983108	validation_1-auc:0.977185
[6]	validation_0-auc:0.98411	validation_1-auc:0.977467
[7]	validation_0-auc:0.984821	validation_1-auc:0.977692
[8]	validation_0-auc:0.985633	validation_1-auc:0.977915
[9]	validation_0-auc:0.986357	validation_1-auc:0.978096
[10]	validation_0-auc:0.986995	validation_1-auc:0.978228
[11]	validation_0-auc:0.987732	validation_1-auc:0.978189
[12]	validation_0-auc:0.98855	validation_1-auc:0.978368
[13]	validation_0-auc:0.989143	validation_1-auc:0.978443
[14]	validation_0-auc:0.989781	validation_1-auc:0.9785
[15