In [1]:
import pandas as pd
import numpy as np
import os
import random
from copy import deepcopy
import math
from itertools import product
import pickle

import joblib
from tqdm import tqdm

import matplotlib.pyplot as plt
import seaborn as sb

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, confusion_matrix, ConfusionMatrixDisplay
from supervised.automl import AutoML

In [7]:
tmp = pd.read_csv('./expermData/merged_ver_4_0.csv', index_col=0)
tmp.drop(['Unnamed: 0.1'], inplace=True, axis=1)

In [8]:
cate_list = [
    'income_type',
    'employment_type',
    'houseown_type',
    'purpose',
    'gender'
]

In [9]:
for cate in cate_list:
    tmp = pd.get_dummies\
    (data = tmp, columns=[cate], prefix=cate)

In [10]:
tmp['yearly_income'].replace(0, 1, inplace=True)
tmp['desired_amount'].replace(0, 1, inplace=True)
tmp['ratio_EY'] = tmp['existing_loan_amt']/tmp['yearly_income']
tmp['ratio_DY'] = tmp['desired_amount']/tmp['yearly_income']
tmp['ratio_LY'] = tmp['loan_limit']/tmp['yearly_income']
tmp['ratio_LD'] = tmp['loan_limit']/tmp['desired_amount']
tmp['personal_rehabilitation_c'] = tmp['personal_rehabilitation_complete_yn']
tmp['personal_rehabilitation_n'] = 0
tmp['personal_rehabilitation_y'] = 0
tmp.loc[tmp[tmp["personal_rehabilitation_yn"]==0].index, 'personal_rehabilitation_n']=1
tmp.loc[tmp[(tmp["personal_rehabilitation_yn"]==1)&
           (tmp["personal_rehabilitation_complete_yn"]==0)
           ].index, 'personal_rehabilitation_y']=1
tmp['time_interval'] = pd.to_datetime(tmp['loanapply_insert_time']) - pd.to_datetime(tmp['insert_time'])
tmp['time_interval']=tmp['time_interval'].dt.total_seconds()

In [11]:
tmp['age'] = 2022-tmp['birth_year']

In [12]:
tmp['link_account'] = 0
tmp.loc[tmp[tmp["loan_rate"]==0].index, 'link_account']=1

In [13]:
tmp.columns

Index(['application_id', 'user_id', 'birth_year', 'insert_time',
       'credit_score', 'yearly_income', 'desired_amount',
       'personal_rehabilitation_yn', 'personal_rehabilitation_complete_yn',
       'existing_loan_cnt', 'existing_loan_amt', 'loanapply_insert_time',
       'bank_id', 'product_id', 'loan_limit', 'loan_rate', 'is_applied',
       'work_days', 'income_type_EARNEDINCOME', 'income_type_EARNEDINCOME2',
       'income_type_FREELANCER', 'income_type_OTHERINCOME',
       'income_type_PRACTITIONER', 'income_type_PRIVATEBUSINESS',
       'employment_type_계약직', 'employment_type_기타', 'employment_type_일용직',
       'employment_type_정규직', 'houseown_type_기타가족소유', 'houseown_type_배우자',
       'houseown_type_자가', 'houseown_type_전월세', 'purpose_기타', 'purpose_대환대출',
       'purpose_사업자금', 'purpose_생활비', 'purpose_자동차구입', 'purpose_전월세보증금',
       'purpose_주택구입', 'purpose_투자', 'gender_0.0', 'gender_1.0', 'gender_A',
       'ratio_EY', 'ratio_DY', 'ratio_LY', 'ratio_LD',
       'personal_r

In [14]:
tmp.drop(['houseown_type_배우자',
 'personal_rehabilitation_y',
 'personal_rehabilitation_n',
 'personal_rehabilitation_c',
 'purpose_자동차구입',
 'gender_A',
 'link_account',
 'purpose_투자',
 'income_type_PRACTITIONER',
 'employment_type_일용직',
 'purpose_전월세보증금',
 'income_type_FREELANCER',
 'purpose_기타',
 'purpose_주택구입',
 'purpose_사업자금',
 'income_type_OTHERINCOME'] ,inplace=True,axis=1)

In [15]:
tmp.to_csv('./valiData/merged_5_0.csv')

# 데이터 분리

In [2]:
df=pd.read_csv('./valiData/merged_5_0.csv',index_col=0)

In [3]:
df.columns

Index(['application_id', 'user_id', 'birth_year', 'insert_time',
       'credit_score', 'yearly_income', 'desired_amount',
       'personal_rehabilitation_yn', 'personal_rehabilitation_complete_yn',
       'existing_loan_cnt', 'existing_loan_amt', 'loanapply_insert_time',
       'bank_id', 'product_id', 'loan_limit', 'loan_rate', 'is_applied',
       'work_days', 'income_type_EARNEDINCOME', 'income_type_EARNEDINCOME2',
       'income_type_PRIVATEBUSINESS', 'employment_type_계약직',
       'employment_type_기타', 'employment_type_정규직', 'houseown_type_기타가족소유',
       'houseown_type_자가', 'houseown_type_전월세', 'purpose_대환대출', 'purpose_생활비',
       'gender_0.0', 'gender_1.0', 'ratio_EY', 'ratio_DY', 'ratio_LY',
       'ratio_LD', 'time_interval', 'age'],
      dtype='object')

In [4]:
df.shape

(13527250, 37)

In [5]:
df.drop(['application_id', 'user_id','insert_time',
               'loanapply_insert_time', 'bank_id', 'product_id',
               ], axis=1, inplace=True)

In [6]:
df.columns

Index(['birth_year', 'credit_score', 'yearly_income', 'desired_amount',
       'personal_rehabilitation_yn', 'personal_rehabilitation_complete_yn',
       'existing_loan_cnt', 'existing_loan_amt', 'loan_limit', 'loan_rate',
       'is_applied', 'work_days', 'income_type_EARNEDINCOME',
       'income_type_EARNEDINCOME2', 'income_type_PRIVATEBUSINESS',
       'employment_type_계약직', 'employment_type_기타', 'employment_type_정규직',
       'houseown_type_기타가족소유', 'houseown_type_자가', 'houseown_type_전월세',
       'purpose_대환대출', 'purpose_생활비', 'gender_0.0', 'gender_1.0', 'ratio_EY',
       'ratio_DY', 'ratio_LY', 'ratio_LD', 'time_interval', 'age'],
      dtype='object')

In [7]:
model1_train_X = df[(df['is_applied'].isnull()==False)&((df['credit_score'].isnull()==False))].loc[:, df.columns !='is_applied']
model1_train_y = df[(df['is_applied'].isnull()==False)&((df['credit_score'].isnull()==False))]['is_applied']
model2_train_X = df[(df['is_applied'].isnull()==False)&((df['credit_score'].isnull()==True))].loc[:, df.columns !='is_applied']
model2_train_X.drop(['credit_score'], axis=1,inplace=True)
model2_train_y = df[(df['is_applied'].isnull()==False)&((df['credit_score'].isnull()==True))]['is_applied']

model1_target= df[(df['is_applied'].isnull()==True)&((df['credit_score'].isnull()==False))].loc[:, df.columns !='is_applied']
model2_target= df[(df['is_applied'].isnull()==True)&((df['credit_score'].isnull()==True))].loc[:, df.columns !='is_applied']
model2_target.drop(['credit_score'], axis=1,inplace=True)

In [8]:
model1_train_X = model1_train_X.sample(20_0000, random_state=42)
model1_train_y = model1_train_y.sample(20_0000, random_state=42)
model2_train_X = model2_train_X.sample(10_0000, random_state=42)
model2_train_y = model2_train_y.sample(10_0000, random_state=42)

In [9]:
model1_train_X.shape, model1_train_y.shape

((200000, 30), (200000,))

In [10]:
model2_train_X.shape, model2_train_y.shape

((100000, 29), (100000,))

In [11]:
model1_target.shape, model2_target.shape, 

((3000756, 30), (256483, 29))

In [12]:
model1 = AutoML()
model1.fit(model1_train_X, model1_train_y)

Linear algorithm was disabled.
AutoML directory: AutoML_5
The task is binary_classification with evaluation metric logloss
AutoML will use algorithms: ['Baseline', 'Decision Tree', 'Random Forest', 'Xgboost', 'Neural Network']
AutoML will ensemble available models
AutoML steps: ['simple_algorithms', 'default_algorithms', 'ensemble']
* Step simple_algorithms will try to check up to 2 models
1_Baseline logloss 0.216889 trained in 0.87 seconds


Glyph 44592 (\N{HANGUL SYLLABLE GI}) missing from current font.
Glyph 53440 (\N{HANGUL SYLLABLE TA}) missing from current font.
Glyph 44032 (\N{HANGUL SYLLABLE GA}) missing from current font.
Glyph 51313 (\N{HANGUL SYLLABLE JOG}) missing from current font.
Glyph 49548 (\N{HANGUL SYLLABLE SO}) missing from current font.
Glyph 50976 (\N{HANGUL SYLLABLE YU}) missing from current font.
Glyph 51221 (\N{HANGUL SYLLABLE JEONG}) missing from current font.
Glyph 44508 (\N{HANGUL SYLLABLE GYU}) missing from current font.
Glyph 51649 (\N{HANGUL SYLLABLE JIG}) missing from current font.
Glyph 44228 (\N{HANGUL SYLLABLE GYE}) missing from current font.
Glyph 50557 (\N{HANGUL SYLLABLE YAG}) missing from current font.
Glyph 44592 (\N{HANGUL SYLLABLE GI}) missing from current font.
Glyph 53440 (\N{HANGUL SYLLABLE TA}) missing from current font.
Glyph 44032 (\N{HANGUL SYLLABLE GA}) missing from current font.
Glyph 51313 (\N{HANGUL SYLLABLE JOG}) missing from current font.
Glyph 49548 (\N{HANGUL SYLLABLE

2_DecisionTree logloss 0.191461 trained in 36.52 seconds


Glyph 44228 (\N{HANGUL SYLLABLE GYE}) missing from current font.
Glyph 50557 (\N{HANGUL SYLLABLE YAG}) missing from current font.
Glyph 51649 (\N{HANGUL SYLLABLE JIG}) missing from current font.
Glyph 44592 (\N{HANGUL SYLLABLE GI}) missing from current font.
Glyph 53440 (\N{HANGUL SYLLABLE TA}) missing from current font.
Glyph 51221 (\N{HANGUL SYLLABLE JEONG}) missing from current font.
Glyph 44508 (\N{HANGUL SYLLABLE GYU}) missing from current font.
Glyph 44032 (\N{HANGUL SYLLABLE GA}) missing from current font.
Glyph 51313 (\N{HANGUL SYLLABLE JOG}) missing from current font.
Glyph 49548 (\N{HANGUL SYLLABLE SO}) missing from current font.
Glyph 50976 (\N{HANGUL SYLLABLE YU}) missing from current font.
Glyph 51088 (\N{HANGUL SYLLABLE JA}) missing from current font.
Glyph 51204 (\N{HANGUL SYLLABLE JEON}) missing from current font.
Glyph 50900 (\N{HANGUL SYLLABLE WEOL}) missing from current font.
Glyph 49464 (\N{HANGUL SYLLABLE SE}) missing from current font.
Glyph 45824 (\N{HANGUL SYLLA

* Step default_algorithms will try to check up to 3 models


Glyph 51088 (\N{HANGUL SYLLABLE JA}) missing from current font.
Glyph 44032 (\N{HANGUL SYLLABLE GA}) missing from current font.
Glyph 44592 (\N{HANGUL SYLLABLE GI}) missing from current font.
Glyph 53440 (\N{HANGUL SYLLABLE TA}) missing from current font.
Glyph 49373 (\N{HANGUL SYLLABLE SAENG}) missing from current font.
Glyph 54876 (\N{HANGUL SYLLABLE HWAL}) missing from current font.
Glyph 48708 (\N{HANGUL SYLLABLE BI}) missing from current font.
Glyph 51088 (\N{HANGUL SYLLABLE JA}) missing from current font.
Glyph 44032 (\N{HANGUL SYLLABLE GA}) missing from current font.
Glyph 44592 (\N{HANGUL SYLLABLE GI}) missing from current font.
Glyph 53440 (\N{HANGUL SYLLABLE TA}) missing from current font.
Glyph 49373 (\N{HANGUL SYLLABLE SAENG}) missing from current font.
Glyph 54876 (\N{HANGUL SYLLABLE HWAL}) missing from current font.
Glyph 48708 (\N{HANGUL SYLLABLE BI}) missing from current font.


Exception while producing SHAP explanations. Passing parameters norm and vmin/vmax simultaneously is not supported. Please pass vmin/vmax directly to the norm when creating it.
Continuing ...
3_Default_Xgboost logloss 0.16543 trained in 41.23 seconds


Glyph 49373 (\N{HANGUL SYLLABLE SAENG}) missing from current font.
Glyph 54876 (\N{HANGUL SYLLABLE HWAL}) missing from current font.
Glyph 48708 (\N{HANGUL SYLLABLE BI}) missing from current font.
Glyph 44592 (\N{HANGUL SYLLABLE GI}) missing from current font.
Glyph 53440 (\N{HANGUL SYLLABLE TA}) missing from current font.
Glyph 51088 (\N{HANGUL SYLLABLE JA}) missing from current font.
Glyph 44032 (\N{HANGUL SYLLABLE GA}) missing from current font.
Glyph 51313 (\N{HANGUL SYLLABLE JOG}) missing from current font.
Glyph 49548 (\N{HANGUL SYLLABLE SO}) missing from current font.
Glyph 50976 (\N{HANGUL SYLLABLE YU}) missing from current font.
Glyph 45824 (\N{HANGUL SYLLABLE DAE}) missing from current font.
Glyph 54872 (\N{HANGUL SYLLABLE HWAN}) missing from current font.
Glyph 52636 (\N{HANGUL SYLLABLE CUL}) missing from current font.
Glyph 51221 (\N{HANGUL SYLLABLE JEONG}) missing from current font.
Glyph 44508 (\N{HANGUL SYLLABLE GYU}) missing from current font.
Glyph 51649 (\N{HANGUL SYL

4_Default_NeuralNetwork logloss 0.181073 trained in 36.5 seconds


Glyph 51204 (\N{HANGUL SYLLABLE JEON}) missing from current font.
Glyph 50900 (\N{HANGUL SYLLABLE WEOL}) missing from current font.
Glyph 49464 (\N{HANGUL SYLLABLE SE}) missing from current font.
Glyph 51088 (\N{HANGUL SYLLABLE JA}) missing from current font.
Glyph 44032 (\N{HANGUL SYLLABLE GA}) missing from current font.
Glyph 44592 (\N{HANGUL SYLLABLE GI}) missing from current font.
Glyph 53440 (\N{HANGUL SYLLABLE TA}) missing from current font.
Glyph 51313 (\N{HANGUL SYLLABLE JOG}) missing from current font.
Glyph 49548 (\N{HANGUL SYLLABLE SO}) missing from current font.
Glyph 50976 (\N{HANGUL SYLLABLE YU}) missing from current font.
Glyph 49373 (\N{HANGUL SYLLABLE SAENG}) missing from current font.
Glyph 54876 (\N{HANGUL SYLLABLE HWAL}) missing from current font.
Glyph 48708 (\N{HANGUL SYLLABLE BI}) missing from current font.
Glyph 51221 (\N{HANGUL SYLLABLE JEONG}) missing from current font.
Glyph 44508 (\N{HANGUL SYLLABLE GYU}) missing from current font.
Glyph 51649 (\N{HANGUL SYL

5_Default_RandomForest logloss 0.182692 trained in 20.65 seconds


Glyph 44592 (\N{HANGUL SYLLABLE GI}) missing from current font.
Glyph 53440 (\N{HANGUL SYLLABLE TA}) missing from current font.
Glyph 44228 (\N{HANGUL SYLLABLE GYE}) missing from current font.
Glyph 50557 (\N{HANGUL SYLLABLE YAG}) missing from current font.
Glyph 51649 (\N{HANGUL SYLLABLE JIG}) missing from current font.
Glyph 51221 (\N{HANGUL SYLLABLE JEONG}) missing from current font.
Glyph 44508 (\N{HANGUL SYLLABLE GYU}) missing from current font.
Glyph 44032 (\N{HANGUL SYLLABLE GA}) missing from current font.
Glyph 51313 (\N{HANGUL SYLLABLE JOG}) missing from current font.
Glyph 49548 (\N{HANGUL SYLLABLE SO}) missing from current font.
Glyph 50976 (\N{HANGUL SYLLABLE YU}) missing from current font.
Glyph 51088 (\N{HANGUL SYLLABLE JA}) missing from current font.
Glyph 51204 (\N{HANGUL SYLLABLE JEON}) missing from current font.
Glyph 50900 (\N{HANGUL SYLLABLE WEOL}) missing from current font.
Glyph 49464 (\N{HANGUL SYLLABLE SE}) missing from current font.
Glyph 44592 (\N{HANGUL SYLLA

* Step ensemble will try to check up to 1 model
Ensemble logloss 0.16543 trained in 6.26 seconds


Glyph 51204 (\N{HANGUL SYLLABLE JEON}) missing from current font.
Glyph 50900 (\N{HANGUL SYLLABLE WEOL}) missing from current font.
Glyph 49464 (\N{HANGUL SYLLABLE SE}) missing from current font.
Glyph 51088 (\N{HANGUL SYLLABLE JA}) missing from current font.
Glyph 44032 (\N{HANGUL SYLLABLE GA}) missing from current font.
Glyph 49373 (\N{HANGUL SYLLABLE SAENG}) missing from current font.
Glyph 54876 (\N{HANGUL SYLLABLE HWAL}) missing from current font.
Glyph 48708 (\N{HANGUL SYLLABLE BI}) missing from current font.
Glyph 44592 (\N{HANGUL SYLLABLE GI}) missing from current font.
Glyph 53440 (\N{HANGUL SYLLABLE TA}) missing from current font.
Glyph 51313 (\N{HANGUL SYLLABLE JOG}) missing from current font.
Glyph 49548 (\N{HANGUL SYLLABLE SO}) missing from current font.
Glyph 50976 (\N{HANGUL SYLLABLE YU}) missing from current font.
Glyph 51221 (\N{HANGUL SYLLABLE JEONG}) missing from current font.
Glyph 44508 (\N{HANGUL SYLLABLE GYU}) missing from current font.
Glyph 51649 (\N{HANGUL SYL

AutoML fit time: 148.43 seconds
AutoML best model: 3_Default_Xgboost


AutoML()

In [13]:
model1_target['predict']=model1.predict(model1_target)

In [14]:
model2 = AutoML()
model2.fit(model2_train_X, model2_train_y)

Linear algorithm was disabled.
AutoML directory: AutoML_6
The task is binary_classification with evaluation metric logloss
AutoML will use algorithms: ['Baseline', 'Decision Tree', 'Random Forest', 'Xgboost', 'Neural Network']
AutoML will ensemble available models
AutoML steps: ['simple_algorithms', 'default_algorithms', 'ensemble']
* Step simple_algorithms will try to check up to 2 models
1_Baseline logloss 0.165777 trained in 0.66 seconds


Glyph 51204 (\N{HANGUL SYLLABLE JEON}) missing from current font.
Glyph 50900 (\N{HANGUL SYLLABLE WEOL}) missing from current font.
Glyph 49464 (\N{HANGUL SYLLABLE SE}) missing from current font.
Glyph 51088 (\N{HANGUL SYLLABLE JA}) missing from current font.
Glyph 44032 (\N{HANGUL SYLLABLE GA}) missing from current font.
Glyph 44592 (\N{HANGUL SYLLABLE GI}) missing from current font.
Glyph 53440 (\N{HANGUL SYLLABLE TA}) missing from current font.
Glyph 51313 (\N{HANGUL SYLLABLE JOG}) missing from current font.
Glyph 49548 (\N{HANGUL SYLLABLE SO}) missing from current font.
Glyph 50976 (\N{HANGUL SYLLABLE YU}) missing from current font.
Glyph 51221 (\N{HANGUL SYLLABLE JEONG}) missing from current font.
Glyph 44508 (\N{HANGUL SYLLABLE GYU}) missing from current font.
Glyph 51649 (\N{HANGUL SYLLABLE JIG}) missing from current font.
Glyph 44228 (\N{HANGUL SYLLABLE GYE}) missing from current font.
Glyph 50557 (\N{HANGUL SYLLABLE YAG}) missing from current font.
Glyph 51204 (\N{HANGUL SYLLA

2_DecisionTree logloss 0.149589 trained in 13.38 seconds


Glyph 44228 (\N{HANGUL SYLLABLE GYE}) missing from current font.
Glyph 50557 (\N{HANGUL SYLLABLE YAG}) missing from current font.
Glyph 51649 (\N{HANGUL SYLLABLE JIG}) missing from current font.
Glyph 51221 (\N{HANGUL SYLLABLE JEONG}) missing from current font.
Glyph 44508 (\N{HANGUL SYLLABLE GYU}) missing from current font.
Glyph 44592 (\N{HANGUL SYLLABLE GI}) missing from current font.
Glyph 53440 (\N{HANGUL SYLLABLE TA}) missing from current font.
Glyph 44032 (\N{HANGUL SYLLABLE GA}) missing from current font.
Glyph 51313 (\N{HANGUL SYLLABLE JOG}) missing from current font.
Glyph 49548 (\N{HANGUL SYLLABLE SO}) missing from current font.
Glyph 50976 (\N{HANGUL SYLLABLE YU}) missing from current font.
Glyph 51088 (\N{HANGUL SYLLABLE JA}) missing from current font.
Glyph 51204 (\N{HANGUL SYLLABLE JEON}) missing from current font.
Glyph 50900 (\N{HANGUL SYLLABLE WEOL}) missing from current font.
Glyph 49464 (\N{HANGUL SYLLABLE SE}) missing from current font.
Glyph 45824 (\N{HANGUL SYLLA

* Step default_algorithms will try to check up to 3 models


Glyph 51221 (\N{HANGUL SYLLABLE JEONG}) missing from current font.
Glyph 44508 (\N{HANGUL SYLLABLE GYU}) missing from current font.
Glyph 51649 (\N{HANGUL SYLLABLE JIG}) missing from current font.
Glyph 51088 (\N{HANGUL SYLLABLE JA}) missing from current font.
Glyph 44032 (\N{HANGUL SYLLABLE GA}) missing from current font.
Glyph 44592 (\N{HANGUL SYLLABLE GI}) missing from current font.
Glyph 53440 (\N{HANGUL SYLLABLE TA}) missing from current font.
Glyph 51313 (\N{HANGUL SYLLABLE JOG}) missing from current font.
Glyph 49548 (\N{HANGUL SYLLABLE SO}) missing from current font.
Glyph 50976 (\N{HANGUL SYLLABLE YU}) missing from current font.
Glyph 45824 (\N{HANGUL SYLLABLE DAE}) missing from current font.
Glyph 54872 (\N{HANGUL SYLLABLE HWAN}) missing from current font.
Glyph 52636 (\N{HANGUL SYLLABLE CUL}) missing from current font.
Glyph 49373 (\N{HANGUL SYLLABLE SAENG}) missing from current font.
Glyph 54876 (\N{HANGUL SYLLABLE HWAL}) missing from current font.
Glyph 48708 (\N{HANGUL SY

Exception while producing SHAP explanations. Passing parameters norm and vmin/vmax simultaneously is not supported. Please pass vmin/vmax directly to the norm when creating it.
Continuing ...
3_Default_Xgboost logloss 0.132478 trained in 9.86 seconds


Glyph 49373 (\N{HANGUL SYLLABLE SAENG}) missing from current font.
Glyph 54876 (\N{HANGUL SYLLABLE HWAL}) missing from current font.
Glyph 48708 (\N{HANGUL SYLLABLE BI}) missing from current font.
Glyph 44592 (\N{HANGUL SYLLABLE GI}) missing from current font.
Glyph 53440 (\N{HANGUL SYLLABLE TA}) missing from current font.
Glyph 45824 (\N{HANGUL SYLLABLE DAE}) missing from current font.
Glyph 54872 (\N{HANGUL SYLLABLE HWAN}) missing from current font.
Glyph 52636 (\N{HANGUL SYLLABLE CUL}) missing from current font.
Glyph 51088 (\N{HANGUL SYLLABLE JA}) missing from current font.
Glyph 44032 (\N{HANGUL SYLLABLE GA}) missing from current font.
Glyph 51313 (\N{HANGUL SYLLABLE JOG}) missing from current font.
Glyph 49548 (\N{HANGUL SYLLABLE SO}) missing from current font.
Glyph 50976 (\N{HANGUL SYLLABLE YU}) missing from current font.
Glyph 51221 (\N{HANGUL SYLLABLE JEONG}) missing from current font.
Glyph 44508 (\N{HANGUL SYLLABLE GYU}) missing from current font.
Glyph 51649 (\N{HANGUL SYL

4_Default_NeuralNetwork logloss 0.144856 trained in 14.61 seconds


Glyph 51088 (\N{HANGUL SYLLABLE JA}) missing from current font.
Glyph 44032 (\N{HANGUL SYLLABLE GA}) missing from current font.
Glyph 49373 (\N{HANGUL SYLLABLE SAENG}) missing from current font.
Glyph 54876 (\N{HANGUL SYLLABLE HWAL}) missing from current font.
Glyph 48708 (\N{HANGUL SYLLABLE BI}) missing from current font.
Glyph 51204 (\N{HANGUL SYLLABLE JEON}) missing from current font.
Glyph 50900 (\N{HANGUL SYLLABLE WEOL}) missing from current font.
Glyph 49464 (\N{HANGUL SYLLABLE SE}) missing from current font.
Glyph 51221 (\N{HANGUL SYLLABLE JEONG}) missing from current font.
Glyph 44508 (\N{HANGUL SYLLABLE GYU}) missing from current font.
Glyph 51649 (\N{HANGUL SYLLABLE JIG}) missing from current font.
Glyph 44592 (\N{HANGUL SYLLABLE GI}) missing from current font.
Glyph 53440 (\N{HANGUL SYLLABLE TA}) missing from current font.
Glyph 51313 (\N{HANGUL SYLLABLE JOG}) missing from current font.
Glyph 49548 (\N{HANGUL SYLLABLE SO}) missing from current font.
Glyph 50976 (\N{HANGUL SY

Exception while producing SHAP explanations. Passing parameters norm and vmin/vmax simultaneously is not supported. Please pass vmin/vmax directly to the norm when creating it.
Continuing ...
5_Default_RandomForest logloss 0.142654 trained in 10.05 seconds


Glyph 44592 (\N{HANGUL SYLLABLE GI}) missing from current font.
Glyph 53440 (\N{HANGUL SYLLABLE TA}) missing from current font.
Glyph 49373 (\N{HANGUL SYLLABLE SAENG}) missing from current font.
Glyph 54876 (\N{HANGUL SYLLABLE HWAL}) missing from current font.
Glyph 48708 (\N{HANGUL SYLLABLE BI}) missing from current font.
Glyph 45824 (\N{HANGUL SYLLABLE DAE}) missing from current font.
Glyph 54872 (\N{HANGUL SYLLABLE HWAN}) missing from current font.
Glyph 52636 (\N{HANGUL SYLLABLE CUL}) missing from current font.
Glyph 51204 (\N{HANGUL SYLLABLE JEON}) missing from current font.
Glyph 50900 (\N{HANGUL SYLLABLE WEOL}) missing from current font.
Glyph 49464 (\N{HANGUL SYLLABLE SE}) missing from current font.
Glyph 51221 (\N{HANGUL SYLLABLE JEONG}) missing from current font.
Glyph 44508 (\N{HANGUL SYLLABLE GYU}) missing from current font.
Glyph 51649 (\N{HANGUL SYLLABLE JIG}) missing from current font.
Glyph 44228 (\N{HANGUL SYLLABLE GYE}) missing from current font.
Glyph 50557 (\N{HANGU

* Step ensemble will try to check up to 1 model
Ensemble logloss 0.132395 trained in 2.97 seconds


Glyph 49373 (\N{HANGUL SYLLABLE SAENG}) missing from current font.
Glyph 54876 (\N{HANGUL SYLLABLE HWAL}) missing from current font.
Glyph 48708 (\N{HANGUL SYLLABLE BI}) missing from current font.
Glyph 51088 (\N{HANGUL SYLLABLE JA}) missing from current font.
Glyph 44032 (\N{HANGUL SYLLABLE GA}) missing from current font.
Glyph 51204 (\N{HANGUL SYLLABLE JEON}) missing from current font.
Glyph 50900 (\N{HANGUL SYLLABLE WEOL}) missing from current font.
Glyph 49464 (\N{HANGUL SYLLABLE SE}) missing from current font.
Glyph 51221 (\N{HANGUL SYLLABLE JEONG}) missing from current font.
Glyph 44508 (\N{HANGUL SYLLABLE GYU}) missing from current font.
Glyph 51649 (\N{HANGUL SYLLABLE JIG}) missing from current font.
Glyph 44592 (\N{HANGUL SYLLABLE GI}) missing from current font.
Glyph 53440 (\N{HANGUL SYLLABLE TA}) missing from current font.
Glyph 45824 (\N{HANGUL SYLLABLE DAE}) missing from current font.
Glyph 54872 (\N{HANGUL SYLLABLE HWAN}) missing from current font.
Glyph 52636 (\N{HANGUL 

AutoML fit time: 57.63 seconds
AutoML best model: Ensemble


AutoML()

In [16]:
model2_target['predict']=model2.predict(model2_target)

In [17]:
os.mkdir('./result')

In [18]:
pd.concat([model1_target, model2_target]).to_csv('./result/predict_result.csv')

# loan_result와 결합

In [51]:
df=pd.read_csv('./valiData/merged_5_0.csv',index_col=0)

In [52]:
df.shape

(13527250, 37)

In [53]:
predict_result=pd.read_csv('./result/predict_result.csv',index_col=0)

In [54]:
predict_result.shape

(3257239, 31)

In [55]:
df=df.loc[predict_result.index,['application_id','loanapply_insert_time','bank_id','product_id','loan_limit','loan_rate']]

In [56]:
df.shape

(3257239, 6)

In [57]:
predict_result['myindex'] = df['application_id'].astype(str) + df['loanapply_insert_time']+\
df['bank_id'].astype(str)+ df['product_id'].astype(str)
predict_result.sort_values(by='myindex',inplace=True)
predict_result.reset_index(inplace=True)

In [58]:
loan_df = pd.read_csv('./data/loan_result.csv')
loan_df=loan_df[loan_df['is_applied'].isna()]
loan_df.shape

(3257239, 7)

In [59]:
loan_df['myindex'] = loan_df['application_id'].astype(str) + loan_df['loanapply_insert_time']+\
loan_df['bank_id'].astype(str)+ loan_df['product_id'].astype(str)
loan_df.sort_values(by='myindex',inplace=True)
loan_df.reset_index(inplace=True)

In [60]:
predict_result

Unnamed: 0,index,birth_year,credit_score,yearly_income,desired_amount,personal_rehabilitation_yn,personal_rehabilitation_complete_yn,existing_loan_cnt,existing_loan_amt,loan_limit,...,gender_0.0,gender_1.0,ratio_EY,ratio_DY,ratio_LY,ratio_LD,time_interval,age,predict,myindex
0,2285974,1994.0,930.0,29000000.0,25000000.0,0.0,-1.0,2.0,20000000.0,26000000.0,...,0,1,0.689655,0.862069,0.896552,1.040000,2.0,28.0,0,10000042022-04-08 14:55:2213123
1,2285975,1994.0,930.0,29000000.0,25000000.0,0.0,-1.0,2.0,20000000.0,26000000.0,...,0,1,0.689655,0.862069,0.896552,1.040000,2.0,28.0,0,10000042022-04-08 14:55:2214197
2,2285976,1994.0,930.0,29000000.0,25000000.0,0.0,-1.0,2.0,20000000.0,13000000.0,...,0,1,0.689655,0.862069,0.448276,0.520000,2.0,28.0,0,10000042022-04-08 14:55:242130
3,1794083,1987.0,590.0,23000000.0,7000000.0,0.0,-1.0,4.0,65000000.0,17000000.0,...,1,0,2.826087,0.304348,0.739130,2.428571,20.0,35.0,1,1000012022-05-27 20:32:2058175
4,1794084,1987.0,590.0,23000000.0,7000000.0,0.0,-1.0,4.0,65000000.0,17000000.0,...,1,0,2.826087,0.304348,0.739130,2.428571,24.0,35.0,0,1000012022-05-27 20:32:2419127
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3257234,533850,1992.0,820.0,50000000.0,40000000.0,0.0,-1.0,3.0,33000000.0,50000000.0,...,1,0,0.660000,0.800000,1.000000,1.250000,2.0,30.0,0,9999792022-05-27 16:21:3827176
3257235,533851,1992.0,820.0,50000000.0,40000000.0,0.0,-1.0,3.0,33000000.0,39000000.0,...,1,0,0.660000,0.800000,0.780000,0.975000,2.0,30.0,0,9999792022-05-27 16:21:4622164
3257236,533853,1992.0,820.0,50000000.0,40000000.0,0.0,-1.0,3.0,33000000.0,50000000.0,...,1,0,0.660000,0.800000,1.000000,1.250000,3.0,30.0,0,9999792022-05-27 16:22:2049136
3257237,533852,1992.0,820.0,50000000.0,40000000.0,0.0,-1.0,3.0,33000000.0,31000000.0,...,1,0,0.660000,0.800000,0.620000,0.775000,2.0,30.0,0,9999792022-05-27 16:22:204939


In [61]:
loan_df

Unnamed: 0,index,application_id,loanapply_insert_time,bank_id,product_id,loan_limit,loan_rate,is_applied,myindex
0,9361839,1000000,2022-06-09 05:06:10,63,226,30000000.0,15.6,,10000002022-06-09 05:06:1063226
1,2493135,1000005,2022-06-09 15:54:46,13,262,14000000.0,16.4,,10000052022-06-09 15:54:4613262
2,2493136,1000005,2022-06-09 15:54:46,24,70,3000000.0,13.0,,10000052022-06-09 15:54:462470
3,2493134,1000005,2022-06-09 15:54:46,29,265,23000000.0,6.4,,10000052022-06-09 15:54:4629265
4,2493128,1000005,2022-06-09 15:54:46,35,29,5000000.0,10.6,,10000052022-06-09 15:54:463529
...,...,...,...,...,...,...,...,...,...
3257234,9371589,999999,2022-06-08 00:14:31,59,150,17000000.0,17.7,,9999992022-06-08 00:14:3159150
3257235,9371590,999999,2022-06-08 00:14:32,25,169,22000000.0,19.9,,9999992022-06-08 00:14:3225169
3257236,9371591,999999,2022-06-08 00:14:32,33,110,15000000.0,15.6,,9999992022-06-08 00:14:3233110
3257237,9371588,999999,2022-06-08 00:14:38,18,78,1000000.0,10.5,,9999992022-06-08 00:14:381878


In [66]:
my_df=pd.concat([loan_df,predict_result[['myindex','predict']]],axis=1)

In [67]:
my_df.shape

(3257239, 11)

In [68]:
my_df['is_applied']=my_df['predict']

In [69]:
my_df.drop(['myindex','predict'],axis=1,inplace=True)

In [70]:
my_df.to_csv('./result/androzoo_predict_result.csv')

In [71]:
my_df['is_applied'].value_counts()

0    3160237
1      97002
Name: is_applied, dtype: int64

In [72]:
my_df.head()

Unnamed: 0,index,application_id,loanapply_insert_time,bank_id,product_id,loan_limit,loan_rate,is_applied
0,9361839,1000000,2022-06-09 05:06:10,63,226,30000000.0,15.6,0
1,2493135,1000005,2022-06-09 15:54:46,13,262,14000000.0,16.4,0
2,2493136,1000005,2022-06-09 15:54:46,24,70,3000000.0,13.0,0
3,2493134,1000005,2022-06-09 15:54:46,29,265,23000000.0,6.4,1
4,2493128,1000005,2022-06-09 15:54:46,35,29,5000000.0,10.6,0
