# Services Available

1. Transfer funds
2. Summary
3. Cancel transfers
4. Submit order
5. Create portfolio
6. Listed Loans

One request per second limit.

# Doing it

In [10]:
import os
credentials = os.environ['LENDING_CLUB_API']
investor_id = 5809260
import sys
sys.path.append('..')
from scripts.model import create_matrix
import numpy as np


In [3]:
import requests

In [4]:
headers = {'Authorization': credentials}

In [5]:
r = requests.get('https://api.lendingclub.com/api/investor/v1/loans/listing?showAll=true',
                headers=headers)

In [6]:
assert r.status_code == 200

In [7]:
investor_id = 5809260

In [8]:
loans = r.json()['loans']

In [9]:
import pandas as pd
df = pd.DataFrame(loans)

In [8]:
df.earliest_cr_line = df.earliest_cr_line.astype(np.datetime64).dt.year.astype(int)

AttributeError: 'DataFrame' object has no attribute 'earliest_cr_line'

## Predicting the Values

In [9]:
from sklearn.externals import joblib
from scripts.model import create_matrix
from app.process_api import rename_columns, process_columns

## Steps

1. Load the Model
2. Load the df
3. turn the df into the same matrix as before
4. Predict the values

In [10]:
model = joblib.load('../model/rf_model.pkl')

In [11]:
df = rename_columns(df)

In [12]:
df = process_columns(df)

In [13]:
y, X = create_matrix(df)

In [15]:
[i for i in X.columns]

['Intercept',
 'C(home_ownership)[T.OWN]',
 'C(home_ownership)[T.RENT]',
 'C(home_ownership)[T.OTHER]',
 'C(grade)[T.B]',
 'C(grade)[T.C]',
 'C(grade)[T.D]',
 'C(grade)[T.E]',
 'C(grade)[T.F]',
 'C(purpose)[T.credit_card]',
 'C(purpose)[T.debt_consolidation]',
 'C(purpose)[T.home_improvement]',
 'C(purpose)[T.major_purchase]',
 'C(purpose)[T.medical]',
 'C(purpose)[T.moving]',
 'C(purpose)[T.other]',
 'C(purpose)[T.small_business]',
 'C(purpose)[T.vacation]',
 'C(purpose)[T.OH]',
 'C(purpose)[T.SD]',
 'C(purpose)[T.AZ]',
 'C(purpose)[T.KY]',
 'C(purpose)[T.AL]',
 'C(purpose)[T.NE]',
 'C(purpose)[T.CA]',
 'C(purpose)[T.MD]',
 'C(purpose)[T.NC]',
 'C(purpose)[T.SC]',
 'C(purpose)[T.MO]',
 'C(purpose)[T.TN]',
 'C(purpose)[T.ME]',
 'C(purpose)[T.ND]',
 'C(purpose)[T.NY]',
 'C(purpose)[T.NH]',
 'C(purpose)[T.AR]',
 'C(purpose)[T.CO]',
 'C(purpose)[T.NJ]',
 'C(purpose)[T.LA]',
 'C(purpose)[T.MA]',
 'C(purpose)[T.WV]',
 'C(purpose)[T.GA]',
 'C(purpose)[T.UT]',
 'C(purpose)[T.MS]',
 'C(purpose

In [38]:
for i in X.columns:
    print(i)

Intercept
C(home_ownership)[T.OWN]
C(home_ownership)[T.RENT]
C(home_ownership)[T.OTHER]
C(grade)[T.B]
C(grade)[T.C]
C(grade)[T.D]
C(grade)[T.E]
C(grade)[T.F]
C(purpose)[T.credit_card]
C(purpose)[T.debt_consolidation]
C(purpose)[T.home_improvement]
C(purpose)[T.major_purchase]
C(purpose)[T.medical]
C(purpose)[T.moving]
C(purpose)[T.other]
C(purpose)[T.small_business]
C(purpose)[T.vacation]
C(purpose)[T.ND]
C(purpose)[T.DC]
C(purpose)[T.WI]
C(purpose)[T.WY]
C(purpose)[T.UT]
C(purpose)[T.MS]
C(purpose)[T.NE]
C(purpose)[T.ME]
C(purpose)[T.CA]
C(purpose)[T.TX]
C(purpose)[T.IL]
C(purpose)[T.FL]
C(purpose)[T.TN]
C(purpose)[T.HI]
C(purpose)[T.RI]
C(purpose)[T.NV]
C(purpose)[T.OH]
C(purpose)[T.NJ]
C(purpose)[T.MN]
C(purpose)[T.MD]
C(purpose)[T.KS]
C(purpose)[T.IN]
C(purpose)[T.VA]
C(purpose)[T.OK]
C(purpose)[T.NC]
C(purpose)[T.DE]
C(purpose)[T.MA]
C(purpose)[T.AL]
C(purpose)[T.MT]
C(purpose)[T.WV]
C(purpose)[T.CO]
C(purpose)[T.KY]
C(purpose)[T.AZ]
C(purpose)[T.OR]
C(purpose)[T.MO]
C(purpose)[T.

In [22]:
model = model.predict_proba(X)

ValueError: Number of features of the model must  match the input. Model n_features is 86 and  input n_features is 737 

## Compatibility Issues

In [None]:
import re
def convert(name):
    s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
    return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()

In [None]:
df.columns = [convert(i) for i in df.columns]

In [None]:
import numpy as np

In [None]:
df = df.rename(columns={'collections12_mths_ex_med': 
    'collections_12_mths_ex_med', 'loan_amount': 'loan_amnt',
    'delinq2_yrs': 'delinq_2yrs', 'inq_last6_mths':'inq_last_6mths',
                       'addr_zip':'zip_code'})

In [None]:
df['ratio_mth_inc_all_payments'] = (df.installment + df.revol_bal * .02) / (df.annual_inc / 12)

In [None]:
df.columns

## Processing

In [None]:
df.emp_length = df.emp_length / 12

In [None]:
import sys
sys.path

In [None]:
model.create_matrix()

In [None]:
from app.process_api import process_columns, rename_columns

In [None]:
df = clean_columns(df)
df = category_processing(df)

In [None]:
df

## Converting the Matrix

In [None]:
y, X = create_matrix(df)

In [None]:
df.home_ownership.value_counts()

In [None]:
X.columns

In [None]:
df.home_ownership = df.home_ownership.astype('category')

In [None]:
for i in X.columns:
    print(i)

In [None]:
df.zip_code

In [None]:
len(df.addr_state.value_counts())

In [11]:
reference_df = pd.read_pickle('../cleaned_df.pkl')

In [None]:
unique_states = reference_df.addr_state.unique()

In [17]:
purposes = reference_df.purpose.unique()

In [47]:
purposes

[debt_consolidation, credit_card, medical, home_improvement, car, other, small_business, major_purchase, vacation, moving]
Categories (10, object): [debt_consolidation, credit_card, medical, home_improvement, ..., small_business, major_purchase, vacation, moving]

In [None]:
import pickle
with open('state_list.pkl', 'wb') as picklefile:
    pickle.dump(list(unique_states.get_values()), picklefile)

In [19]:
import pickle
with open('purpose_list.pkl', 'wb') as picklefile:
    pickle.dump(set(purposes), picklefile)

In [None]:
with open('state_list.pkl', 'rb') as picklefile:
    state_list = pickle.load(picklefile)

In [None]:
unique_states = df.addr_state.unique()

In [None]:
set(state_list) - set(unique_states.get_values())

In [None]:
len(df.purpose.unique())

In [None]:
df.addr_state = df.addr_state.astype('category')
df.addr_state.cat.add_categories(state_list)

In [None]:
state_list

In [None]:
df.addr_state

In [None]:
df.addr_state.cat.

# More Advanced Payback Stats

Here I just find the pay stats for each and every point. 

In [13]:
reference_df.columns

Index(['total_pymnt', 'zip_code', 'member_id', 'id', 'loan_amnt', 'int_rate',
       'installment', 'emp_length', 'home_ownership', 'grade', 'sub_grade',
       'emp_title', 'issue_d', 'loan_status', 'annual_inc',
       'verification_status', 'purpose', 'addr_state', 'inq_last_6mths', 'dti',
       'revol_util', 'mths_since_last_delinq', 'pub_rec', 'revol_bal',
       'open_acc', 'collections_12_mths_ex_med', 'delinq_2yrs',
       'earliest_cr_line', 'fico_range_low', 'last_credit_pull_d',
       'ratio_inc_debt', 'ratio_inc_installment', 'ratio_mth_inc_all_payments',
       'year_issued', 'month_issued', 'delinq'],
      dtype='object')

In [15]:
reference_df[reference_df.delinq == 1].groupby('grade')['total_pymnt'].mean()

grade
A    5720.001445
B    6157.045291
C    6216.491646
D    6474.710493
E    7983.640198
F    8859.668932
G    9144.884202
Name: total_pymnt, dtype: float64

In [16]:
grouped_deliq = reference_df[reference_df.delinq == 1].groupby('grade')

In [18]:
grouped_deliq['total_pymnt'].mean() / grouped_deliq['loan_amnt'].mean()

grade
A    0.487823
B    0.494928
C    0.458241
D    0.445103
E    0.440516
F    0.452705
G    0.427903
dtype: float64