In [16]:
import pandas as pd
import numpy as np
import seaborn as sns
from scipy import stats
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import MinMaxScaler

from sklearn.metrics import (
    accuracy_score, confusion_matrix, classification_report,
    roc_auc_score, roc_curve, auc,
    plot_confusion_matrix, plot_roc_curve
)

from xgboost import XGBClassifier

pd.set_option('display.float', '{:.2f}'.format)
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 50)

In [330]:
data = pd.read_csv("./data/train/loan_train.csv")
data_y = pd.read_csv("./data/train/loan_train_label.csv")
data = pd.merge(data,data_y)
data.head()

Unnamed: 0,id,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,emp_title,emp_length,home_ownership,annual_inc,pymnt_plan,purpose,title,addr_state,dti,delinq_2yrs,earliest_cr_line,inq_last_6mths,open_acc,pub_rec,revol_bal,revol_util,total_acc,initial_list_status,out_prncp,out_prncp_inv,recoveries,collection_recovery_fee,policy_code,tot_cur_bal,total_rev_hi_lim,loan_status
0,1,15000,15000,15000.0,60 months,19.99,397.33,E,E4,Sales Consultant,2 years,RENT,46900.0,n,debt_consolidation,Debt consolidation,VA,21.08,0,Jan-2010,2,10,0,11736,39.3,17,w,14401.39,14401.39,0.0,0.0,1,43804.0,29900.0,0
1,2,20000,20000,20000.0,36 months,18.99,733.02,E,E1,TRANSPORTAION MANAGER,10+ years,RENT,80000.0,n,debt_consolidation,Debt consolidation,NJ,22.71,0,Nov-1994,0,10,0,40950,97.7,29,w,11947.7,11947.7,0.0,0.0,1,50777.0,41900.0,0
2,3,10000,10000,10000.0,60 months,16.99,248.48,D,D3,supervisor,10+ years,MORTGAGE,55000.0,n,debt_consolidation,Debt consolidation,AL,15.03,0,Sep-2000,0,10,1,1759,12.0,25,w,9101.23,9101.23,0.0,0.0,1,149114.0,14700.0,0
3,4,12000,12000,12000.0,36 months,15.31,417.81,C,C2,TM AG LLC,2 years,OTHER,40000.0,n,credit_card,Credit card refinancing,SD,14.52,0,Aug-2005,0,10,0,8996,84.1,14,f,0.0,0.0,0.0,0.0,1,75043.0,10700.0,0
4,5,20000,20000,20000.0,36 months,9.99,645.25,B,B3,Instructor,10+ years,OWN,85000.0,n,home_improvement,Home improvement,DE,13.24,1,Mar-1993,0,8,0,7974,47.5,29,w,0.0,0.0,0.0,0.0,1,259060.0,16800.0,0


In [331]:
test = pd.read_csv("./data/test/loan_test.csv")
test

Unnamed: 0,id,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,emp_title,emp_length,home_ownership,annual_inc,pymnt_plan,purpose,title,addr_state,dti,delinq_2yrs,earliest_cr_line,inq_last_6mths,open_acc,pub_rec,revol_bal,revol_util,total_acc,initial_list_status,out_prncp,out_prncp_inv,recoveries,collection_recovery_fee,policy_code,tot_cur_bal,total_rev_hi_lim
0,16001,2400,2400,2400.00,36 months,13.67,81.65,B,B5,Chase,6 years,MORTGAGE,85000.00,n,major_purchase,Apple Computer and Printer,KS,24.62,0,Jun-1998,3,10,1,27331,87.90,34,f,0.00,0.00,0.00,0.00,1,,
1,16002,20000,20000,20000.00,60 months,19.52,524.56,E,E2,Production manager,10+ years,MORTGAGE,74000.00,n,debt_consolidation,Debt consolidation,WI,22.27,0,Jan-2002,2,18,0,36512,68.20,43,w,0.00,0.00,0.00,0.00,1,258440.00,53500.00
2,16003,10625,10625,10625.00,36 months,15.31,369.94,C,C2,smithfield packing co.,10+ years,MORTGAGE,30404.00,n,credit_card,Credit card refinancing,NC,33.98,0,May-1978,2,7,1,12504,82.80,15,f,0.00,0.00,0.00,0.00,1,63443.00,15100.00
3,16004,16800,16800,16800.00,36 months,7.90,525.68,A,A4,L.A.co. Sheriff's Dept,5 years,MORTGAGE,80000.00,n,debt_consolidation,All together,CA,14.15,0,Apr-1985,0,10,0,12195,59.80,17,f,0.00,0.00,0.00,0.00,1,322284.00,20400.00
4,16005,10000,10000,9831.69,60 months,13.98,232.58,C,C3,Military,8 years,MORTGAGE,60000.00,n,debt_consolidation,Debt Consol,CO,18.40,0,Oct-2000,1,9,0,12398,29.70,30,f,0.00,0.00,0.00,0.00,1,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3995,19996,10500,10500,10500.00,60 months,18.54,269.73,E,E1,material handler,7 years,RENT,33000.00,n,credit_card,Credit card refinancing,TX,38.00,0,Jul-2003,1,9,0,4920,20.80,16,w,9094.47,9094.47,0.00,0.00,1,38056.00,23600.00
3996,19997,20000,20000,20000.00,60 months,16.29,489.45,D,D2,Nurse practitioner,1 year,RENT,100000.00,n,debt_consolidation,Debt consolidation,IN,13.49,4,Mar-1996,5,8,0,3571,57.60,45,f,15863.81,15863.81,0.00,0.00,1,328997.00,6200.00
3997,19998,10000,10000,10000.00,36 months,7.89,312.86,A,A5,City driver,5 years,RENT,70000.00,n,debt_consolidation,Debt consolidation,CA,17.73,0,Jan-1995,0,10,0,9257,68.60,17,w,7716.61,7716.61,0.00,0.00,1,31920.00,13500.00
3998,19999,35000,35000,35000.00,36 months,8.39,1103.09,A,A5,Director of Operations,2 years,MORTGAGE,85000.00,n,debt_consolidation,Debt consolidation,VA,2.88,0,Dec-2000,1,6,0,6715,14.30,26,f,0.00,0.00,0.00,0.00,1,16476.00,47100.00


In [332]:
data.describe()

Unnamed: 0,id,loan_amnt,funded_amnt,funded_amnt_inv,int_rate,installment,annual_inc,dti,delinq_2yrs,inq_last_6mths,open_acc,pub_rec,revol_bal,revol_util,total_acc,out_prncp,out_prncp_inv,recoveries,collection_recovery_fee,policy_code,tot_cur_bal,total_rev_hi_lim,loan_status
count,16000.0,16000.0,16000.0,16000.0,16000.0,16000.0,16000.0,16000.0,16000.0,16000.0,16000.0,16000.0,16000.0,15991.0,16000.0,16000.0,16000.0,16000.0,16000.0,16000.0,14004.0,14004.0,16000.0
mean,8000.5,14691.87,14671.51,14611.22,14.55,441.05,71059.51,18.22,0.31,0.82,11.35,0.18,15662.83,56.86,24.99,5888.97,5886.72,217.28,23.84,1.0,130164.46,29139.61,0.5
std,4618.95,8429.44,8421.27,8435.31,4.54,246.14,44610.86,8.24,0.87,1.06,5.12,0.49,18030.81,23.74,11.8,8062.96,8059.83,840.29,137.55,0.0,143455.34,28559.0,0.5
min,1.0,1000.0,1000.0,0.0,5.32,24.32,4524.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
25%,4000.75,8000.0,8000.0,8000.0,11.53,264.46,44000.0,12.11,0.0,0.0,8.0,0.0,6227.5,39.8,16.0,0.0,0.0,0.0,0.0,1.0,28740.75,13100.0,0.0
50%,8000.5,12800.0,12700.0,12600.0,14.31,386.75,60000.0,17.85,0.0,0.0,11.0,0.0,11472.5,58.5,23.0,0.0,0.0,0.0,0.0,1.0,70114.0,22000.0,0.5
75%,12000.25,20000.0,20000.0,20000.0,17.57,576.53,85000.0,23.93,0.0,1.0,14.0,0.0,19849.0,75.2,32.0,10263.24,10261.1,0.0,0.0,1.0,196170.5,36600.0,1.0
max,16000.0,35000.0,35000.0,35000.0,28.99,1374.63,950000.0,120.66,20.0,8.0,52.0,8.0,867528.0,148.0,91.0,35000.0,35000.0,24862.1,6543.04,1.0,1969261.0,1035000.0,1.0


In [333]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 16000 entries, 0 to 15999
Data columns (total 35 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   id                       16000 non-null  int64  
 1   loan_amnt                16000 non-null  int64  
 2   funded_amnt              16000 non-null  int64  
 3   funded_amnt_inv          16000 non-null  float64
 4   term                     16000 non-null  object 
 5   int_rate                 16000 non-null  float64
 6   installment              16000 non-null  float64
 7   grade                    16000 non-null  object 
 8   sub_grade                16000 non-null  object 
 9   emp_title                15011 non-null  object 
 10  emp_length               15191 non-null  object 
 11  home_ownership           16000 non-null  object 
 12  annual_inc               16000 non-null  float64
 13  pymnt_plan               16000 non-null  object 
 14  purpose               

## 변수 추출

In [334]:
data = data[['loan_status','loan_amnt','term','int_rate','sub_grade','emp_length','home_ownership','annual_inc','purpose','dti',
           'delinq_2yrs','earliest_cr_line','inq_last_6mths','open_acc','pub_rec','revol_bal','revol_util','total_acc',
           'initial_list_status','out_prncp','recoveries','tot_cur_bal']]
test = test[['loan_amnt','term','int_rate','sub_grade','emp_length','home_ownership','annual_inc','purpose','dti',
           'delinq_2yrs','earliest_cr_line','inq_last_6mths','open_acc','pub_rec','revol_bal','revol_util','total_acc',
           'initial_list_status','out_prncp','recoveries','tot_cur_bal']]
data.head()

Unnamed: 0,loan_status,loan_amnt,term,int_rate,sub_grade,emp_length,home_ownership,annual_inc,purpose,dti,delinq_2yrs,earliest_cr_line,inq_last_6mths,open_acc,pub_rec,revol_bal,revol_util,total_acc,initial_list_status,out_prncp,recoveries,tot_cur_bal
0,0,15000,60 months,19.99,E4,2 years,RENT,46900.0,debt_consolidation,21.08,0,Jan-2010,2,10,0,11736,39.3,17,w,14401.39,0.0,43804.0
1,0,20000,36 months,18.99,E1,10+ years,RENT,80000.0,debt_consolidation,22.71,0,Nov-1994,0,10,0,40950,97.7,29,w,11947.7,0.0,50777.0
2,0,10000,60 months,16.99,D3,10+ years,MORTGAGE,55000.0,debt_consolidation,15.03,0,Sep-2000,0,10,1,1759,12.0,25,w,9101.23,0.0,149114.0
3,0,12000,36 months,15.31,C2,2 years,OTHER,40000.0,credit_card,14.52,0,Aug-2005,0,10,0,8996,84.1,14,f,0.0,0.0,75043.0
4,0,20000,36 months,9.99,B3,10+ years,OWN,85000.0,home_improvement,13.24,1,Mar-1993,0,8,0,7974,47.5,29,w,0.0,0.0,259060.0


## 기타 변수 처리

emp_length, home_ownership, earliest_cr_line

In [335]:
data.emp_length.unique()

array(['2 years', '10+ years', '3 years', '6 years', '5 years', '4 years',
       '< 1 year', '7 years', '8 years', '1 year', nan, '9 years'],
      dtype=object)

In [336]:
data = data.replace({'2 years':2, '10+ years':10, '3 years':3, '6 years':6, '5 years':5, '4 years':4,
       '< 1 year':0, '7 years':7, '8 years':8, '1 year':1, '9 years':9})
test = test.replace({'2 years':2, '10+ years':10, '3 years':3, '6 years':6, '5 years':5, '4 years':4,
       '< 1 year':0, '7 years':7, '8 years':8, '1 year':1, '9 years':9})

In [337]:
# 결측치는 최빈값인 10으로 대체
data = data.fillna({'emp_length':10})
test = test.fillna({'emp_length':10})

In [338]:
tmp = data[(data['home_ownership'] == 'OTHER') | (data['home_ownership'] == 'NONE')].index
data = data.drop(tmp)
test.loc[2621,'home_ownership']='RENT'

In [339]:
data['earliest_cr_line'] = data['earliest_cr_line'].str.split("-").str[1]
test['earliest_cr_line'] = test['earliest_cr_line'].str.split("-").str[1]
test['earliest_cr_line']= test['earliest_cr_line'].fillna('2000')
data.head()

Unnamed: 0,loan_status,loan_amnt,term,int_rate,sub_grade,emp_length,home_ownership,annual_inc,purpose,dti,delinq_2yrs,earliest_cr_line,inq_last_6mths,open_acc,pub_rec,revol_bal,revol_util,total_acc,initial_list_status,out_prncp,recoveries,tot_cur_bal
0,0,15000,60 months,19.99,E4,2.0,RENT,46900.0,debt_consolidation,21.08,0,2010,2,10,0,11736,39.3,17,w,14401.39,0.0,43804.0
1,0,20000,36 months,18.99,E1,10.0,RENT,80000.0,debt_consolidation,22.71,0,1994,0,10,0,40950,97.7,29,w,11947.7,0.0,50777.0
2,0,10000,60 months,16.99,D3,10.0,MORTGAGE,55000.0,debt_consolidation,15.03,0,2000,0,10,1,1759,12.0,25,w,9101.23,0.0,149114.0
4,0,20000,36 months,9.99,B3,10.0,OWN,85000.0,home_improvement,13.24,1,1993,0,8,0,7974,47.5,29,w,0.0,0.0,259060.0
5,0,7200,36 months,20.2,E3,3.0,OWN,53000.0,debt_consolidation,15.17,1,2005,1,9,0,4401,61.1,14,f,3937.98,0.0,14033.0


In [340]:
data = data.astype({'earliest_cr_line' : int})
test = test.astype({'earliest_cr_line' : int})
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 15997 entries, 0 to 15999
Data columns (total 22 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   loan_status          15997 non-null  int64  
 1   loan_amnt            15997 non-null  int64  
 2   term                 15997 non-null  object 
 3   int_rate             15997 non-null  float64
 4   sub_grade            15997 non-null  object 
 5   emp_length           15997 non-null  float64
 6   home_ownership       15997 non-null  object 
 7   annual_inc           15997 non-null  float64
 8   purpose              15997 non-null  object 
 9   dti                  15997 non-null  float64
 10  delinq_2yrs          15997 non-null  int64  
 11  earliest_cr_line     15997 non-null  int64  
 12  inq_last_6mths       15997 non-null  int64  
 13  open_acc             15997 non-null  int64  
 14  pub_rec              15997 non-null  int64  
 15  revol_bal            15997 non-null 

## 범주형 변수 처리

In [342]:
data = pd.get_dummies(data)
test = pd.get_dummies(test)
data.head()

Unnamed: 0,loan_status,loan_amnt,int_rate,emp_length,annual_inc,dti,delinq_2yrs,earliest_cr_line,inq_last_6mths,open_acc,pub_rec,revol_bal,revol_util,total_acc,out_prncp,recoveries,tot_cur_bal,term_ 36 months,term_ 60 months,sub_grade_A1,sub_grade_A2,sub_grade_A3,sub_grade_A4,sub_grade_A5,sub_grade_B1,...,sub_grade_F5,sub_grade_G1,sub_grade_G2,sub_grade_G3,sub_grade_G4,sub_grade_G5,home_ownership_MORTGAGE,home_ownership_OWN,home_ownership_RENT,purpose_car,purpose_credit_card,purpose_debt_consolidation,purpose_educational,purpose_home_improvement,purpose_house,purpose_major_purchase,purpose_medical,purpose_moving,purpose_other,purpose_renewable_energy,purpose_small_business,purpose_vacation,purpose_wedding,initial_list_status_f,initial_list_status_w
0,0,15000,19.99,2.0,46900.0,21.08,0,2010,2,10,0,11736,39.3,17,14401.39,0.0,43804.0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1
1,0,20000,18.99,10.0,80000.0,22.71,0,1994,0,10,0,40950,97.7,29,11947.7,0.0,50777.0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1
2,0,10000,16.99,10.0,55000.0,15.03,0,2000,0,10,1,1759,12.0,25,9101.23,0.0,149114.0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1
4,0,20000,9.99,10.0,85000.0,13.24,1,1993,0,8,0,7974,47.5,29,0.0,0.0,259060.0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1
5,0,7200,20.2,3.0,53000.0,15.17,1,2005,1,9,0,4401,61.1,14,3937.98,0.0,14033.0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0


## trian data null 처리

In [343]:
for column in data.columns:
    if data[column].isna().sum() != 0:
        missing = data[column].isna().sum()
        portion = (missing / data.shape[0]) * 100
        print(f"'{column}': number of missing values '{missing}' ==> '{portion:.3f}%'")

'revol_util': number of missing values '9' ==> '0.056%'
'tot_cur_bal': number of missing values '1996' ==> '12.477%'


In [344]:
data['tot_cur_bal']=data['tot_cur_bal'].fillna(data['tot_cur_bal'].mean())

In [345]:
data.dropna(inplace=True)

In [346]:
for column in data.columns:
    if data[column].isna().sum() != 0:
        missing = data[column].isna().sum()
        portion = (missing / data.shape[0]) * 100
        print(f"'{column}': number of missing values '{missing}' ==> '{portion:.3f}%'")

In [347]:
for column in test.columns:
    if test[column].isna().sum() != 0:
        missing = test[column].isna().sum()
        portion = (missing / test.shape[0]) * 100
        print(f"'{column}': number of missing values '{missing}' ==> '{portion:.3f}%'")

'revol_util': number of missing values '7' ==> '0.175%'
'tot_cur_bal': number of missing values '482' ==> '12.050%'


In [348]:
data['annual_inc']=np.log1p(data['annual_inc'])
data['delinq_2yrs']=np.log1p(data['delinq_2yrs'])
data['open_acc']=np.log1p(data['open_acc'])
data['pub_rec']=np.log1p(data['pub_rec'])
data['revol_bal']=np.log1p(data['revol_bal'])
data['total_acc']=np.log1p(data['total_acc'])
data['out_prncp']=np.log1p(data['out_prncp'])
data['tot_cur_bal']=np.log1p(data['tot_cur_bal'])


test['annual_inc']=np.log1p(test['annual_inc'])
test['delinq_2yrs']=np.log1p(test['delinq_2yrs'])
test['open_acc']=np.log1p(test['open_acc'])
test['pub_rec']=np.log1p(test['pub_rec'])
test['revol_bal']=np.log1p(test['revol_bal'])
test['total_acc']=np.log1p(test['total_acc'])
test['out_prncp']=np.log1p(test['out_prncp'])
test['tot_cur_bal']=np.log1p(test['tot_cur_bal'])

### validation split

In [349]:
train, val = train_test_split(data, test_size=0.1, random_state=42)

print(train.shape)
print(val.shape)

(14389, 73)
(1599, 73)


In [350]:
X_train, y_train = train.drop('loan_status', axis=1), train.loan_status
X_test, y_test = val.drop('loan_status', axis=1), val.loan_status

### scaling

In [351]:
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
test = scaler.transform(test)

In [352]:
def print_score(true, pred, train=True):
    if train:
        clf_report = pd.DataFrame(classification_report(true, pred, output_dict=True))
        print("Train Result:\n================================================")
        print(f"Accuracy Score: {accuracy_score(true, pred) * 100:.2f}%")
        print("_______________________________________________")
        print(f"CLASSIFICATION REPORT:\n{clf_report}")
        print("_______________________________________________")
        print(f"Confusion Matrix: \n {confusion_matrix(true, pred)}\n")
        
    elif train==False:
        clf_report = pd.DataFrame(classification_report(true, pred, output_dict=True))
        print("Test Result:\n================================================")        
        print(f"Accuracy Score: {accuracy_score(true, pred) * 100:.2f}%")
        print("_______________________________________________")
        print(f"CLASSIFICATION REPORT:\n{clf_report}")
        print("_______________________________________________")
        print(f"Confusion Matrix: \n {confusion_matrix(true, pred)}\n")

In [353]:
X_train = np.array(X_train).astype(np.float32)
X_test = np.array(X_test).astype(np.float32)
y_train = np.array(y_train).astype(np.float32)
y_test = np.array(y_test).astype(np.float32)
test = np.array(test).astype(np.float32)

In [354]:
param_grid = dict(
     n_estimators=stats.randint(100, 500),
     max_depth=stats.randint(2, 8),
     learning_rate=stats.uniform(0, 0.5)
 )

xgb_clf = XGBClassifier(use_label_encoder=False)
xgb_cv = RandomizedSearchCV(
     xgb_clf, param_grid, cv=3, n_iter=10, 
     scoring='roc_auc', n_jobs=-1, verbose=1
)
xgb_cv.fit(X_train, y_train)

best_params = xgb_cv.best_params_
print(f"Best Parameters: {best_params}")
xgb_clf = XGBClassifier(**best_params)
xgb_clf.fit(X_train, y_train)

y_train_pred = xgb_clf.predict(X_train)
y_test_pred = xgb_clf.predict(X_test)

print_score(y_train, y_train_pred, train=True)
print_score(y_test, y_test_pred, train=False)

Fitting 3 folds for each of 10 candidates, totalling 30 fits
Best Parameters: {'learning_rate': 0.05786139179998273, 'max_depth': 5, 'n_estimators': 192}




Train Result:
Accuracy Score: 77.19%
_______________________________________________
CLASSIFICATION REPORT:
              0.0     1.0  accuracy  macro avg  weighted avg
precision    0.75    0.79      0.77       0.77          0.77
recall       0.81    0.73      0.77       0.77          0.77
f1-score     0.78    0.76      0.77       0.77          0.77
support   7191.00 7198.00      0.77   14389.00      14389.00
_______________________________________________
Confusion Matrix: 
 [[5830 1361]
 [1921 5277]]

Test Result:
Accuracy Score: 74.11%
_______________________________________________
CLASSIFICATION REPORT:
             0.0    1.0  accuracy  macro avg  weighted avg
precision   0.73   0.75      0.74       0.74          0.74
recall      0.76   0.72      0.74       0.74          0.74
f1-score    0.75   0.74      0.74       0.74          0.74
support   800.00 799.00      0.74    1599.00       1599.00
_______________________________________________
Confusion Matrix: 
 [[609 191]
 [223 576]

In [355]:
len(test)

4000

### 최종 예측 및 저장

In [356]:
pred = xgb_clf.predict(test)
pred

array([0., 1., 0., ..., 0., 0., 0.], dtype=float32)

In [357]:
submit_df = pd.read_csv("./data/test/loan_test_label.csv")
submit_df['loan_status'] = pred
submit_df.head()

Unnamed: 0,id,loan_status
0,16001,0.0
1,16002,1.0
2,16003,0.0
3,16004,0.0
4,16005,1.0


In [358]:
submit_df.to_csv("./data/test/loan_test_label.csv")