In [16]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px

In [17]:
from pingouin import ttest, anova

In [18]:
import warnings
warnings.filterwarnings(action='ignore')

In [19]:
pd.set_option('display.max_columns', 500)

---

In [20]:
loan_borrowers = pd.read_csv('../../data/interim/loan_borrower.csv')

In [21]:
loan_borrowers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 28 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   loan_id                     100000 non-null  int64  
 1   member_id                   100000 non-null  int64  
 2   date                        100000 non-null  object 
 3   purpose                     100000 non-null  object 
 4   is_joint_application        99029 non-null   float64
 5   loan_amount                 99983 non-null   float64
 6   term                        99983 non-null   object 
 7   term_in_months              99983 non-null   float64
 8   interest_rate               100000 non-null  float64
 9   monthly_payment             100000 non-null  int64  
 10  grade                       100000 non-null  object 
 11  loan_status                 100000 non-null  object 
 12  residential_state           100000 non-null  object 
 13  years_employmen

In [22]:
loan_borrowers.head()

Unnamed: 0,loan_id,member_id,date,purpose,is_joint_application,loan_amount,term,term_in_months,interest_rate,monthly_payment,grade,loan_status,residential_state,years_employment,home_ownership,annual_income,income_verified,dti_ratio,length_credit_history,n_total_credit_lines,n_open_credit_lines,n_open_credit_lines_1_year,revolving_balance,revolving_utilization_rate,n_derogatory_record,n_delinquency_2_years,n_charge_off_1_year,n_inquiries_6_months
0,1888978,2305095,2014-12-10,Debt Consolidation,0.0,25190.0,60 months,60.0,6.25,490,E3,Current,NM,10+ years,rent,56471,1,16.8,6,11,9.0,6,14301,49.02,0,19,10,0
1,1299695,2610493,2014-09-15,Debt Consolidation,0.0,21189.0,60 months,60.0,10.49,455,B3,Current,WA,2-5 years,rent,55038,0,19.99,22,8,7.0,4,18262,72.4,1,0,0,0
2,1875016,2491679,2014-09-11,Debt Consolidation,0.0,29908.0,60 months,60.0,9.11,622,B2,Current,MS,< 1 year,rent,56610,1,14.33,5,8,5.0,5,10799,66.27,0,1,1,0
3,1440478,2092798,2016-04-22,Home Improvement,0.0,13053.0,48 months,48.0,11.89,343,B3,Current,TX,6-9 years,own,54887,1,14.8,12,14,7.0,3,15272,61.05,1,0,0,3
4,1124634,2633077,2016-02-03,Debt Consolidation,0.0,24613.0,60 months,60.0,15.13,587,A3,Current,MA,2-5 years,rent,53522,1,10.14,4,21,19.0,10,19316,56.39,2,14,7,1


In [36]:
loan_borrowers.tail()

Unnamed: 0,loan_id,member_id,date,purpose,is_joint_application,loan_amount,term,term_in_months,interest_rate,monthly_payment,grade,loan_status,residential_state,years_employment,home_ownership,annual_income,income_verified,dti_ratio,length_credit_history,n_total_credit_lines,n_open_credit_lines,n_open_credit_lines_1_year,revolving_balance,revolving_utilization_rate,n_derogatory_record,n_delinquency_2_years,n_charge_off_1_year,n_inquiries_6_months
99995,1208167,2069462,2016-05-25,Debt Consolidation,0.0,25874.0,60 months,60.0,10.95,562,A2,Current,LA,2-5 years,own,58987,0,21.62,2,16,10.0,8,15113,77.01,0,6,5,18
99996,1097349,2218175,2015-06-05,Debt Consolidation,0.0,18557.0,60 months,60.0,5.31,353,A1,Current,MO,6-9 years,rent,54245,1,15.6,11,11,7.0,4,13504,80.18,0,0,0,1
99997,1873914,2262127,2014-11-13,Debt Consolidation,0.0,15877.0,36 months,36.0,9.87,511,A2,Current,DE,1 year,rent,54923,0,13.96,2,21,13.0,10,18108,77.28,0,0,0,1
99998,1572282,2501174,2015-06-06,Debt Consolidation,0.0,15601.0,60 months,60.0,7.31,311,C3,Current,CA,1 year,own,57809,1,9.07,36,18,12.0,4,17918,61.01,10,0,0,1
99999,1877390,2134068,2015-08-10,Other,0.0,20500.0,48 months,48.0,12.98,550,A2,Current,MN,6-9 years,rent,52150,1,11.35,6,13,13.0,5,17487,89.47,0,0,0,1


----

# <center>**Expectations**</center>

**Loan Nature**
- loan terms &uarr; &rarr; PD &uarr;
- loan grade &darr; &rarr; PD &uarr;
- purpose for debt consolidation &rarr; PD &uarr; 
- joint application &rarr; PD &uarr;           

**Employment History**
- annual income &darr; &rarr; PD &uarr;
- debt-to-income ratio &uarr; &rarr; PD &uarr;
- On mortgage or renting &rarr; PD &uarr;
- (income not verified) and (loan amount &uarr;) &rarr; PD &uarr;
- (loan term &uarr;) and (monthly payment &uarr;) and (annual income &darr;) &rarr; PD &uarr;        

**Credit History**
- (credit history length &darr;) and (number of total credit lines &uarr;) and (number of open credit lines &darr;) &rarr; PD &uarr;
- (credit history length &darr;) or (number of delinquencies &uarr;) &rarr; PD &uarr;
- (number of derogatory records &uarr;) or (number of delinquencies &uarr;) or (number of charge-off &uarr;) &rarr; PD &uarr;
- (total credit lines - open credit lines) &uarr; &rarr; PD &uarr;
- revolving utilization rate &uarr; &rarr; PD &uarr;
- number of hard inquries within half a year &uarr; ==> urgent need to expand credit limit &rarr; PD &uarr;

---

# <center>**Findings**</center>

---

- There is something going on behind the healthcare loans
- Difference due to income verification is basically negligible
- Shortest term loan borrowers on about the same loan amount turn out to have the highest pressure to pay monthly compared with their longer term counterparts
- Defaulters are likely to have a wide range of closed credit lines reopened, given that they have a short credit history
- Borrowers can be split into two groups based on revovling utilization rate:
   1. Fully Utilized Group
   2. Non-Fully Utilized Group

---