<h1>Lending Club Data</h1>

<h2>Multivariate Regression</h2>

In [7]:
# import libraries
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import statsmodels.api as sm
import numpy as np
import math
from sklearn.linear_model import LogisticRegression

In [8]:
# read data into dataframe
loansData = pd.read_csv("LoanStats3d.csv", skiprows=[0])
loansData
loansData.head()

Unnamed: 0,id,member_id,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,...,total_rec_late_fee,recoveries,collection_recovery_fee,last_pymnt_d,last_pymnt_amnt,next_pymnt_d,last_credit_pull_d,collections_12_mths_ex_med,mths_since_last_major_derog,policy_code
0,53624987,57155760,6000,6000,6000,36 months,11.53%,197.95,B,B5,...,0,0,0,,0.0,Aug-2015,Jul-2015,0,,1
1,53584764,57115519,6200,6200,6200,36 months,7.89%,193.98,A,A5,...,0,0,0,Jul-2015,193.98,Aug-2015,Jul-2015,0,,1
2,53222534,56742275,19000,19000,19000,36 months,8.18%,596.97,B,B1,...,0,0,0,Jul-2015,596.97,Aug-2015,Jul-2015,0,47.0,1
3,53564350,57095071,21000,21000,20975,60 months,21.99%,579.88,F,F1,...,0,0,0,,0.0,Aug-2015,Jul-2015,0,,1
4,52038169,55467933,10000,10000,10000,60 months,10.99%,217.38,B,B4,...,0,0,0,Jul-2015,217.38,Aug-2015,Jul-2015,0,,1


In [50]:
# remove na values for annual income 
annual_inc = loansData["annual_inc"].dropna()
annual_inc

0          62000
1          80000
2          84000
3         107000
4          45000
5          27000
6          51000
7          36000
8          75000
9          50000
10         39000
11        150000
12         17500
13         70000
14         52000
15        127000
16         43000
17         90000
18         43900
19        106000
20         25000
21         28000
22         44000
23        113000
24        120000
25         48500
26        112750
27         61000
28         77000
29         51563
           ...  
180072     70000
180073     48000
180074     72000
180075     93600
180076     50000
180077     57000
180078     54000
180079     90000
180080     52000
180081     55000
180082    103000
180083     92000
180084     65000
180085     50000
180086     30000
180087     75000
180088     57000
180089     40000
180090     54000
180091     27000
180092     50000
180093     32000
180094     40000
180095     25400
180096     63000
180097     79000
180098     31000
180099     644

In [51]:
# remove null values for interest rate
int_rate = loansData["int_rate"].dropna()
int_rate


0          11.53%
1           7.89%
2           8.18%
3          21.99%
4          10.99%
5          17.57%
6          15.61%
7          17.86%
8          18.55%
9          11.53%
10         20.99%
11         17.86%
12         18.25%
13         10.99%
14          7.89%
15         16.99%
16         20.99%
17         19.99%
18         15.61%
19         12.69%
20         12.29%
21         21.99%
22         19.19%
23         12.29%
24         16.99%
25         19.99%
26         13.99%
27         17.57%
28         19.19%
29         17.86%
           ...   
180072     11.99%
180073     15.99%
180074      6.99%
180075     13.66%
180076     15.59%
180077     11.44%
180078      8.19%
180079     17.86%
180080     17.86%
180081     15.59%
180082     15.99%
180083      8.67%
180084      6.03%
180085      8.19%
180086     14.99%
180087      6.49%
180088     14.31%
180089      9.49%
180090      6.03%
180091     16.49%
180092      8.67%
180093     19.24%
180094      9.49%
180095     15.59%
180096    

In [61]:
# remove % from interest rate values
int_rate = interestrate.map(lambda x: float(x.rstrip("%")))
int_rate


0         11.53
1          7.89
2          8.18
3         21.99
4         10.99
5         17.57
6         15.61
7         17.86
8         18.55
9         11.53
10        20.99
11        17.86
12        18.25
13        10.99
14         7.89
15        16.99
16        20.99
17        19.99
18        15.61
19        12.69
20        12.29
21        21.99
22        19.19
23        12.29
24        16.99
25        19.99
26        13.99
27        17.57
28        19.19
29        17.86
          ...  
180072    11.99
180073    15.99
180074     6.99
180075    13.66
180076    15.59
180077    11.44
180078     8.19
180079    17.86
180080    17.86
180081    15.59
180082    15.99
180083     8.67
180084     6.03
180085     8.19
180086    14.99
180087     6.49
180088    14.31
180089     9.49
180090     6.03
180091    16.49
180092     8.67
180093    19.24
180094     9.49
180095    15.59
180096    11.99
180097    11.99
180098    11.99
180099    19.99
180100    15.99
180101    11.99
Name: int_rate, dtype: f

In [68]:
# use annual income to model interest rate 
pd.DataFrame({"int_rate": int_rate, "annual_inc": annual_inc})

# convert columns (series) into array 
X = np.asarray(annual_inc)
y = np.asarray(int_rate)

# fit OLS model with intercept on annual income 
X = sm.add_constant(X)
model = sm.OLS(y, X)
f = model.fit()


In [70]:
# regression results 
f.summary()


0,1,2,3
Dep. Variable:,y,R-squared:,0.012
Model:,OLS,Adj. R-squared:,0.012
Method:,Least Squares,F-statistic:,2103.0
Date:,"Sun, 30 Aug 2015",Prob (F-statistic):,0.0
Time:,19:35:36,Log-Likelihood:,-520270.0
No. Observations:,180102,AIC:,1041000.0
Df Residuals:,180100,BIC:,1041000.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5
,coef,std err,t,P>|t|,[95.0% Conf. Int.]
const,13.4117,0.016,814.911,0.000,13.379 13.444
x1,-7.807e-06,1.7e-07,-45.860,0.000,-8.14e-06 -7.47e-06

0,1,2,3
Omnibus:,9973.069,Durbin-Watson:,1.994
Prob(Omnibus):,0.0,Jarque-Bera (JB):,11901.801
Skew:,0.586,Prob(JB):,0.0
Kurtosis:,3.461,Cond. No.,155000.0


In [99]:
# add home ownership to model 

# drop null values 
home_ownership = loansData["home_ownership"].dropna()

# create dummies for categorical values
home_ownership_dummies = pd.get_dummies(home_ownership)
home_ownership_dummies 


# loansData["home_ownership_dummies"] = home_ownership_dummies
# loansData

# loansData = loansData.join(home_ownership_dummies) 
# loansData 

# pd.DataFrame({"int_rate": int_rate, "home_ownership": home_ownership, "annual_inc": annual_inc})
# X = np.asarray([[annual_inc, home_ownership]])



Unnamed: 0,MORTGAGE,OWN,RENT
0,0,0,1
1,0,0,1
2,1,0,0
3,1,0,0
4,0,0,1
5,0,0,1
6,1,0,0
7,0,0,1
8,1,0,0
9,1,0,0
