## Lending Club Analysis

### Prepare datasets for analysis

Import basic libraries

In [1]:
import numpy as np
import pandas as pd

Change pandas print options so we can print all desired rows/columns without truncation

In [2]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

Read in the data

In [3]:
acc_df = pd.read_csv(
    '../input/accepted_2007_to_2017Q3.csv.gz',
    compression='gzip',
    dtype={ 0: str, 18: str, 48: str, 58: str, 117: str, 128: str, 129: str, 130: str, 133: str, 134: str, 135: str, 138: str, 144: str, 145: str, 146: str },
    low_memory=True
)

In [4]:
acc_df = acc_df[['loan_amnt', 'title', 'annual_inc', 'term', 'emp_length']]

In [5]:
acc_df['debt_to_income_ratio'] = acc_df['loan_amnt'] / acc_df['term'].str.extract('([0-9]+)', expand=False).astype(float) * 12 / acc_df['annual_inc'] * 100

In [6]:
acc_df.head(5)

Unnamed: 0,loan_amnt,title,annual_inc,term,emp_length,debt_to_income_ratio
0,15000.0,Debt consolidation,78000.0,60 months,10+ years,3.846154
1,10400.0,Credit card refinancing,58000.0,36 months,8 years,5.977011
2,21425.0,Credit card refinancing,63800.0,60 months,6 years,6.716301
3,12800.0,Car financing,125000.0,60 months,10+ years,2.048
4,7650.0,Debt consolidation,50000.0,36 months,< 1 year,5.1


In [7]:
acc_df = acc_df[['loan_amnt', 'title', 'debt_to_income_ratio', 'emp_length']]
acc_df['accepted'] = 1
acc_df.head(5)

Unnamed: 0,loan_amnt,title,debt_to_income_ratio,emp_length,accepted
0,15000.0,Debt consolidation,3.846154,10+ years,1
1,10400.0,Credit card refinancing,5.977011,8 years,1
2,21425.0,Credit card refinancing,6.716301,6 years,1
3,12800.0,Car financing,2.048,10+ years,1
4,7650.0,Debt consolidation,5.1,< 1 year,1


In [8]:
acc_df = acc_df.rename(index=str, columns={"loan_amnt": "amountRequested", "title": "reason", "debt_to_income_ratio": "salaryImpactRatio", "emp_length": "termOfService"})
acc_df.head(5)

Unnamed: 0,amountRequested,reason,salaryImpactRatio,termOfService,accepted
0,15000.0,Debt consolidation,3.846154,10+ years,1
1,10400.0,Credit card refinancing,5.977011,8 years,1
2,21425.0,Credit card refinancing,6.716301,6 years,1
3,12800.0,Car financing,2.048,10+ years,1
4,7650.0,Debt consolidation,5.1,< 1 year,1


In [9]:
acc_df.to_csv('../input/accepted_small.csv.gz', compression='gzip', index_label=False)