In [26]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt
import seaborn as sns
%matplotlib inline
pd.set_option('display.max_columns', None)

In [27]:
# Columns from kaggle dataset
old_col= '''Index(['Loan ID', 'Customer ID', 'Loan Status', 'Current Loan Amount', 'Term',
       'Credit Score', 'Annual Income', 'Years in current job',
       'Home Ownership', 'Purpose', 'Monthly Debt', 'Years of Credit History',
       'Months since last delinquent', 'Number of Open Accounts',
       'Number of Credit Problems', 'Current Credit Balance',
       'Maximum Open Credit', 'Bankruptcies', 'Tax Liens'],
      dtype='object')'''

# Read in CSV

In [28]:
#df = pd.read_csv('~/Downloads/LoanStats3c.csv',header = 1)
#df.to_pickle('./data/loan_data_2014.zip',compression = 'zip')

# Drop unnecessary columns:

In [48]:
df = pd.read_pickle('./data/loan_data_2014.zip',compression = 'zip')
df.shape

(235631, 145)

In [49]:
# Only take rows of loans that are charged off or fully paid as results
df = df.loc[(df['loan_status'] == 'Fully Paid') | (df['loan_status'] == 'Charged Off')]

In [50]:
df=df.drop(columns = [
        # Only looking at loans for individuals, not joint
        'annual_inc_joint',
        'application_type',
        'dti_joint',
        'verification_status_joint',
        'revol_bal_joint', 


        # Don't need specific ID info
        'id',
        'member_id',
    
        # Only care about information available pre loan
        'collection_recovery_fee',
        'collections_12_mths_ex_med',
        
        # Hard to classify, may use regex text processing in future
        'emp_title',
    
        # Keeping the hardship status but looking at pre-loan metrics only
        'hardship_flag',
        'hardship_type',
        'hardship_reason',
        'deferral_term',
        'hardship_amount',
        'hardship_start_date',
        'hardship_end_date',
        'payment_plan_start_date',
        'hardship_length',
        'hardship_dpd',
        'hardship_loan_status',
        'orig_projected_additional_accrued_interest',
        'hardship_payoff_balance_amount',
        'hardship_last_payment_amount',
    
        'last_pymnt_amnt',
        'last_pymnt_d',
        'next_pymnt_d',
        'pymnt_plan',
        'total_pymnt',
        'total_pymnt_inv',
        'total_rec_int',
        'total_rec_late_fee',
        'total_rec_prncp',
        # Trying to extrapolate beyond certain issuing dates of loans
        'issue_d',
        'mths_since_last_major_derog',
        'mths_since_last_record',
        'mths_since_recent_inq',
        'num_tl_30dpd',
        'num_tl_120dpd_2m',
        
        'debt_settlement_flag',
        'debt_settlement_flag_date',
        'desc',
        
        # sub_grade category already contains grade
        'grade',
    
    
        # This is for investors / loan funding
        'initial_list_status',
        'funded_amnt',
        'funded_amnt_inv',              
        # Probably useful metrics, but all NaN in this dataset              
        'open_acc_6m',                 
        'open_il_12m',
        'open_il_24m',                     
        'open_act_il',   
        'open_rv_12m',
        'open_rv_24m',
                      
        'out_prncp', 
        'out_prncp_inv',
        'policy_code',
        'recoveries',
        # Columns related to debt settlement; we only care about pre loan features              
        'settlement_status',
        'settlement_date',
        'settlement_amount',
        'settlement_percentage',
        'settlement_term',
        # Only looking for one applicant 
        #'sec_app_fico_range_low', 
        #'sec_app_fico_range_high', 
        'sec_app_earliest_cr_line', 
        'sec_app_inq_last_6mths', 
        'sec_app_mort_acc', 
        'sec_app_open_acc', 
        'sec_app_revol_util',
        'sec_app_open_act_il',
        'sec_app_num_rev_accts',
        'sec_app_chargeoff_within_12_mths',
        'sec_app_collections_12_mths_ex_med', 
        'sec_app_mths_since_last_major_derog', 
        'title',
        'total_cu_tl',
                      

        'url',
    
        'zip_code'])

print('Before dropping NaN\'s: ',df.shape)
df.dropna(how='all', inplace = True)
print('After dropping NaN Rows: ',df.shape)
pre_drop_col = df.columns
df.dropna(axis=1, how='all',inplace = True)
print('After dropping NaN Columns: ',df.shape)
post_drop_col = df.columns

Before dropping NaN's:  (223102, 70)
After dropping NaN Rows:  (223102, 70)
After dropping NaN Columns:  (223102, 63)


In [51]:
dropped_nan_columns = set(pre_drop_col) - set(post_drop_col)
print("Dropped NaN Columns: ", dropped_nan_columns)

Dropped NaN Columns:  {'il_util', 'inq_last_12m', 'all_util', 'inq_fi', 'mths_since_rcnt_il', 'total_bal_il', 'max_bal_bc'}


In [52]:

#plt.figure(figsize=(24, 24))
#sns.heatmap(df.corr(),vmin = -1, vmax = 1, cmap = 'seismic')

In [53]:
df.sample(5)

Unnamed: 0,loan_amnt,term,int_rate,installment,sub_grade,emp_length,home_ownership,annual_inc,verification_status,loan_status,purpose,addr_state,dti,delinq_2yrs,earliest_cr_line,inq_last_6mths,mths_since_last_delinq,open_acc,pub_rec,revol_bal,revol_util,total_acc,last_credit_pull_d,acc_now_delinq,tot_coll_amt,tot_cur_bal,total_rev_hi_lim,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,bc_util,chargeoff_within_12_mths,delinq_amnt,mo_sin_old_il_acct,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_bc_dlq,mths_since_recent_revol_delinq,num_accts_ever_120_pd,num_actv_bc_tl,num_actv_rev_tl,num_bc_sats,num_bc_tl,num_il_tl,num_op_rev_tl,num_rev_accts,num_rev_tl_bal_gt_0,num_sats,num_tl_90g_dpd_24m,num_tl_op_past_12m,pct_tl_nvr_dlq,percent_bc_gt_75,pub_rec_bankruptcies,tax_liens,tot_hi_cred_lim,total_bal_ex_mort,total_bc_limit,total_il_high_credit_limit,hardship_status,disbursement_method
167023,8500.0,36 months,8.90%,269.91,A5,10+ years,RENT,39000.0,Verified,Fully Paid,credit_card,TX,5.38,0.0,Mar-1998,1.0,51.0,13.0,0.0,7009.0,32.9%,37.0,Jul-2018,0.0,30673.0,7009.0,21300.0,7.0,539.0,10867.0,37.5,0.0,0.0,172.0,193.0,12.0,12.0,0.0,21.0,51.0,51.0,1.0,4.0,5.0,8.0,23.0,2.0,13.0,35.0,5.0,13.0,0.0,1.0,97.3,25.0,0.0,0.0,21300.0,7009.0,17400.0,0.0,,Cash
176300,4000.0,36 months,12.99%,134.76,B5,2 years,MORTGAGE,40000.0,Not Verified,Fully Paid,credit_card,MD,16.14,0.0,Apr-2007,0.0,,7.0,0.0,5579.0,74.4%,9.0,Mar-2019,0.0,0.0,188905.0,7500.0,7.0,26986.0,706.0,85.9,0.0,0.0,84.0,19.0,8.0,7.0,2.0,18.0,,,0.0,1.0,4.0,1.0,1.0,3.0,4.0,4.0,4.0,7.0,0.0,3.0,100.0,100.0,0.0,0.0,194990.0,17833.0,5000.0,13290.0,,Cash
167508,16500.0,36 months,6.62%,506.62,A2,10+ years,MORTGAGE,61800.0,Not Verified,Fully Paid,credit_card,SD,15.39,0.0,Sep-2001,0.0,,8.0,0.0,13791.0,39.9%,33.0,Aug-2018,0.0,0.0,172794.0,34600.0,5.0,21599.0,20743.0,30.9,0.0,0.0,151.0,147.0,5.0,5.0,1.0,35.0,,,0.0,3.0,5.0,3.0,10.0,14.0,5.0,18.0,5.0,8.0,0.0,2.0,100.0,33.3,0.0,0.0,210399.0,32658.0,30000.0,22799.0,,Cash
3283,20400.0,36 months,8.67%,645.59,B1,10+ years,MORTGAGE,100000.0,Source Verified,Fully Paid,credit_card,MN,16.4,0.0,Jun-1991,2.0,,19.0,0.0,38069.0,79.7%,36.0,Oct-2017,0.0,0.0,205250.0,77100.0,5.0,11403.0,34171.0,85.6,0.0,0.0,121.0,282.0,2.0,2.0,5.0,2.0,,,0.0,7.0,11.0,7.0,15.0,4.0,14.0,27.0,11.0,19.0,0.0,4.0,100.0,57.1,0.0,0.0,335186.0,88559.0,62500.0,54286.0,,Cash
199020,15000.0,36 months,10.99%,491.01,B2,2 years,MORTGAGE,60000.0,Source Verified,Fully Paid,debt_consolidation,TX,9.86,0.0,Sep-2000,1.0,,9.0,0.0,23641.0,34.5%,18.0,Mar-2019,0.0,0.0,154182.0,68600.0,1.0,17131.0,23693.0,33.4,0.0,0.0,94.0,162.0,5.0,5.0,1.0,116.0,,,0.0,2.0,4.0,4.0,8.0,3.0,8.0,14.0,4.0,9.0,0.0,1.0,100.0,50.0,0.0,0.0,204953.0,23641.0,35600.0,0.0,,Cash


In [54]:
#df[df['num_tl_120dpd_2m'] >= 1][['num_tl_30dpd','num_tl_120dpd_2m','num_tl_90g_dpd_24m']]

In [55]:
# df[['int_rate','grade','sub_grade']].sort_values(by = 'int_rate')

# Clean Columns

In [56]:
def drop_percentage_signs(df, column_name):
    new_column_name = column_name + '_percent'
    df[new_column_name] = df[column_name].astype(str).str[:-1].astype(np.float64)
    return df.drop(columns = [column_name])

In [57]:
df.shape

(223102, 63)

In [58]:
#df['revol_util'].fillna('0%',inplace = True)
#df['revol_util'] =df['revol_util'].astype(str).str[:-1]


In [59]:
# Clean columns
# Convert loan terms into two categories 'short' and 'long'
df['term'] = df['term'].str.replace('36 months','short').str.replace('60 months','long').str.strip()

# Convert interest rate from percentage to float
df['int_rate'].dropna(axis = 0,how = 'any', inplace = True)
df=drop_percentage_signs(df,'int_rate')


# Fill Na's with 0.0% in revolving balance (unused revolving balance counts as NaN)
df['revol_util'].fillna(value = '0.0%',inplace = True)
df=drop_percentage_signs(df, 'revol_util')

In [60]:
df.shape

(223102, 63)

# One hot encoding 'Months Since' Variables

- First do a pd.cut() to bin the data into categories.
- Rename these during the function call
- Then fill nans as another category
- Run a pd.get_dummies on the resulting column

In [61]:
df['mths_since_last_delinq'] = \
pd.cut(df['mths_since_last_delinq'],5,labels =
       ['0-3 years','3-6 years','6-9 years','9-12 years','12-15 years'
       ]).cat.add_categories('never').fillna('never') # 

df['mths_since_recent_bc'] = \
pd.cut(df['mths_since_recent_bc'],5,labels =
       ['0-10 years','10-20 years','20-30 years','30-40 years','40-50 years']
      ).cat.add_categories('never').fillna('never')# 

df['mths_since_recent_bc_dlq'] = \
pd.cut(df['mths_since_recent_bc_dlq'],5, labels = 
      ['0-3 years','3-6 years','6-9 years','9-12 years','12-15 years'
       ]).cat.add_categories('never').fillna('never') # 

df['mths_since_recent_revol_delinq'] = \
pd.cut(df['mths_since_recent_revol_delinq'],5, labels = 
      ['0-3 years','3-6 years','6-9 years','9-12 years','12-15 years'
       ]).cat.add_categories('never').fillna('never') # 


In [62]:
#pd.get_dummies(df,drop_first = True)

In [63]:
def add_days_since_col(df,column_name):
    new_col_name = 'days_since_' + column_name
    df[new_col_name] = df[column_name].apply(lambda x: (dt.datetime.today().date() - 
                                                      dt.datetime.strptime(x,'%b-%Y').date()).days)
    return df.drop(columns = [column_name])
# df['days_since_earliest_cr_line'] = df['earliest_cr_line'].apply(lambda x: (dt.datetime.today().date() - 
#                                                       dt.datetime.strptime(x,'%b-%Y').date()).days)
#df=df.drop(columns = ['earliest_cr_line'])

df = add_days_since_col(df,'earliest_cr_line')
#df = add_days_since_col(df,'last_credit_pull_d')

In [65]:
#df = add_days_since_col(df,'last_credit_pull_d')

In [66]:
df.loc[df['last_credit_pull_d'].isna()]

Unnamed: 0,loan_amnt,term,installment,sub_grade,emp_length,home_ownership,annual_inc,verification_status,loan_status,purpose,addr_state,dti,delinq_2yrs,inq_last_6mths,mths_since_last_delinq,open_acc,pub_rec,revol_bal,total_acc,last_credit_pull_d,acc_now_delinq,tot_coll_amt,tot_cur_bal,total_rev_hi_lim,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,bc_util,chargeoff_within_12_mths,delinq_amnt,mo_sin_old_il_acct,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_bc_dlq,mths_since_recent_revol_delinq,num_accts_ever_120_pd,num_actv_bc_tl,num_actv_rev_tl,num_bc_sats,num_bc_tl,num_il_tl,num_op_rev_tl,num_rev_accts,num_rev_tl_bal_gt_0,num_sats,num_tl_90g_dpd_24m,num_tl_op_past_12m,pct_tl_nvr_dlq,percent_bc_gt_75,pub_rec_bankruptcies,tax_liens,tot_hi_cred_lim,total_bal_ex_mort,total_bc_limit,total_il_high_credit_limit,hardship_status,disbursement_method,int_rate_percent,revol_util_percent,days_since_earliest_cr_line
15956,1500.0,short,49.43,B4,10+ years,RENT,120000.0,Source Verified,Fully Paid,major_purchase,MS,1.94,0.0,0.0,3-6 years,4.0,1.0,880.0,27.0,,0.0,0.0,4400.0,3250.0,1.0,1100.0,1125.0,19.0,0.0,0.0,142.0,233.0,24.0,23.0,2.0,0-10 years,3-6 years,3-6 years,0.0,2.0,3.0,5.0,10.0,6.0,3.0,19.0,3.0,4.0,0.0,0.0,56.0,0.0,1.0,0.0,19954.0,4400.0,2250.0,16704.0,,Cash,11.44,27.0,8733
26198,12000.0,short,395.37,B4,2 years,RENT,85000.0,Source Verified,Fully Paid,debt_consolidation,NY,9.93,0.0,1.0,never,8.0,0.0,11529.0,9.0,,0.0,0.0,21852.0,18100.0,3.0,3121.0,3280.0,68.0,0.0,0.0,85.0,220.0,10.0,8.0,0.0,0-10 years,never,never,0.0,5.0,6.0,5.0,5.0,2.0,6.0,7.0,5.0,7.0,0.0,2.0,100.0,40.0,0.0,0.0,30100.0,21852.0,16400.0,12000.0,,Cash,11.44,64.0,8337
26368,19750.0,long,449.28,C2,10+ years,RENT,44136.0,Verified,Charged Off,debt_consolidation,CA,27.62,0.0,0.0,never,10.0,0.0,8397.0,20.0,,0.0,0.0,71132.0,24250.0,4.0,7113.0,6375.0,43.0,0.0,0.0,210.0,216.0,7.0,7.0,0.0,0-10 years,6-9 years,3-6 years,0.0,2.0,6.0,4.0,5.0,9.0,6.0,11.0,5.0,10.0,0.0,3.0,95.0,50.0,0.0,0.0,100812.0,71132.0,12750.0,76562.0,,Cash,12.99,35.0,8214
30551,20725.0,short,639.84,A3,,MORTGAGE,60000.0,Verified,Fully Paid,credit_card,GA,19.64,0.0,3.0,never,8.0,0.0,14308.0,20.0,,0.0,0.0,32797.0,22300.0,2.0,4685.0,3000.0,62.0,0.0,0.0,177.0,240.0,16.0,16.0,1.0,0-10 years,never,never,0.0,3.0,6.0,4.0,4.0,7.0,6.0,12.0,6.0,7.0,0.0,0.0,100.0,66.7,0.0,0.0,54959.0,32797.0,9000.0,32659.0,,Cash,6.99,64.0,8976
32506,7550.0,short,259.85,C4,< 1 year,RENT,27000.0,Not Verified,Fully Paid,debt_consolidation,FL,14.09,0.0,0.0,never,7.0,0.0,8106.0,13.0,,0.0,0.0,14498.0,15300.0,2.0,2071.0,3200.0,56.0,0.0,0.0,108.0,148.0,13.0,13.0,0.0,0-10 years,never,never,0.0,4.0,5.0,7.0,7.0,3.0,5.0,10.0,4.0,7.0,0.0,0.0,100.0,25.0,0.0,0.0,26604.0,14498.0,12800.0,11304.0,,Cash,14.49,53.0,6176
38118,25000.0,short,846.57,C2,10+ years,MORTGAGE,68500.0,Source Verified,Fully Paid,debt_consolidation,GA,25.05,1.0,0.0,0-3 years,10.0,1.0,21077.0,35.0,,0.0,0.0,311988.0,22000.0,3.0,31199.0,699.0,95.3,0.0,0.0,124.0,420.0,96.0,7.0,4.0,0-10 years,never,never,0.0,5.0,7.0,5.0,11.0,12.0,7.0,18.0,7.0,10.0,0.0,1.0,94.3,100.0,0.0,1.0,323300.0,59595.0,15000.0,41875.0,,Cash,13.35,95.8,14455
53768,20000.0,long,409.28,A5,10+ years,OWN,90001.0,Source Verified,Fully Paid,debt_consolidation,MT,10.45,0.0,1.0,never,7.0,0.0,16202.0,13.0,,0.0,0.0,241185.0,45200.0,1.0,34455.0,16498.0,49.5,0.0,0.0,152.0,195.0,40.0,17.0,1.0,0-10 years,never,never,0.0,2.0,2.0,2.0,2.0,6.0,4.0,6.0,2.0,7.0,0.0,0.0,100.0,0.0,0.0,0.0,290376.0,23923.0,32700.0,22814.0,,Cash,8.39,35.8,7607
55207,8000.0,short,269.52,C1,10+ years,OWN,110000.0,Not Verified,Fully Paid,debt_consolidation,CA,9.32,3.0,3.0,0-3 years,12.0,0.0,8254.0,40.0,,0.0,0.0,392649.0,12800.0,3.0,35695.0,35.0,88.3,0.0,0.0,159.0,259.0,14.0,11.0,6.0,0-10 years,0-3 years,0-3 years,2.0,1.0,3.0,1.0,6.0,15.0,5.0,19.0,3.0,12.0,0.0,1.0,75.0,100.0,0.0,0.0,480072.0,19887.0,300.0,29196.0,,Cash,12.99,64.5,9555
60638,8000.0,short,267.6,B5,3 years,RENT,70000.0,Verified,Fully Paid,debt_consolidation,TX,25.54,0.0,0.0,3-6 years,9.0,0.0,4526.0,17.0,,0.0,912.0,42169.0,10900.0,6.0,4685.0,469.0,80.5,0.0,0.0,146.0,235.0,8.0,8.0,0.0,0-10 years,3-6 years,3-6 years,1.0,2.0,7.0,2.0,4.0,7.0,7.0,10.0,7.0,9.0,0.0,5.0,93.7,50.0,0.0,0.0,61939.0,42169.0,2400.0,51039.0,,Cash,12.49,41.5,8853
61548,35000.0,long,843.9,D1,10+ years,OWN,98000.0,Verified,Charged Off,debt_consolidation,CA,25.85,1.0,0.0,0-3 years,27.0,0.0,30451.0,63.0,,0.0,95.0,50727.0,60800.0,2.0,1879.0,4967.0,56.4,0.0,0.0,166.0,436.0,11.0,11.0,0.0,0-10 years,never,0-3 years,0.0,3.0,19.0,5.0,15.0,6.0,25.0,57.0,19.0,27.0,0.0,1.0,96.8,0.0,0.0,0.0,98754.0,50727.0,11400.0,37954.0,,Cash,15.61,50.1,14973


In [20]:
df.sample(5)

Unnamed: 0,loan_amnt,term,installment,sub_grade,emp_length,home_ownership,annual_inc,verification_status,loan_status,purpose,addr_state,dti,delinq_2yrs,inq_last_6mths,mths_since_last_delinq,open_acc,pub_rec,revol_bal,total_acc,total_rec_prncp,total_rec_int,total_rec_late_fee,last_credit_pull_d,acc_now_delinq,tot_coll_amt,tot_cur_bal,total_rev_hi_lim,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,bc_util,chargeoff_within_12_mths,delinq_amnt,mo_sin_old_il_acct,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_bc_dlq,mths_since_recent_revol_delinq,num_accts_ever_120_pd,num_actv_bc_tl,num_actv_rev_tl,num_bc_sats,num_bc_tl,num_il_tl,num_op_rev_tl,num_rev_accts,num_rev_tl_bal_gt_0,num_sats,num_tl_90g_dpd_24m,num_tl_op_past_12m,pct_tl_nvr_dlq,percent_bc_gt_75,pub_rec_bankruptcies,tax_liens,tot_hi_cred_lim,total_bal_ex_mort,total_bc_limit,total_il_high_credit_limit,hardship_status,disbursement_method,int_rate_percent,revol_util_percent,days_since_earliest_cr_line
177262,3950.0,short,133.08,B5,10+ years,OWN,60000.0,Not Verified,Fully Paid,credit_card,TX,10.88,0.0,1.0,never,7.0,2.0,4062.0,15.0,3950.0,840.53,0.0,May-2017,0.0,0.0,20536.0,7100.0,4.0,2934.0,1438.0,73.9,0.0,0.0,51.0,35.0,11.0,11.0,0.0,0-10 years,never,never,0.0,2.0,2.0,3.0,6.0,6.0,5.0,8.0,2.0,7.0,0.0,1.0,100.0,33.3,0.0,2.0,32365.0,20536.0,5500.0,24565.0,,Cash,12.99,57.2,4776
158049,7250.0,short,249.52,C4,6 years,MORTGAGE,80000.0,Not Verified,Charged Off,debt_consolidation,AZ,30.09,0.0,3.0,never,13.0,0.0,62325.0,32.0,2078.14,916.1,0.0,Feb-2017,0.0,0.0,424565.0,78000.0,3.0,32659.0,5356.0,83.9,0.0,0.0,136.0,423.0,22.0,16.0,4.0,0-10 years,never,never,0.0,5.0,8.0,5.0,12.0,8.0,10.0,20.0,8.0,13.0,0.0,0.0,100.0,80.0,0.0,0.0,537954.0,90418.0,33200.0,39954.0,,Cash,14.49,79.9,14728
104345,16000.0,short,539.03,C1,1 year,MORTGAGE,190000.0,Not Verified,Fully Paid,debt_consolidation,MD,7.14,0.0,1.0,never,10.0,0.0,29894.0,17.0,16000.0,2841.16,0.0,Jul-2017,0.0,0.0,333387.0,39300.0,2.0,37043.0,4906.0,85.9,0.0,0.0,21.0,146.0,29.0,21.0,5.0,0-10 years,never,never,0.0,5.0,5.0,6.0,6.0,1.0,8.0,11.0,5.0,10.0,0.0,0.0,100.0,83.3,0.0,0.0,357865.0,55083.0,34800.0,32290.0,,Cash,12.99,76.1,6207
100072,21000.0,short,655.07,A4,3 years,RENT,90000.0,Source Verified,Fully Paid,debt_consolidation,NY,7.31,1.0,0.0,0-3 years,11.0,0.0,11927.0,21.0,21000.0,2568.99,0.0,Jun-2017,0.0,0.0,22641.0,43100.0,5.0,2058.0,25273.0,32.1,0.0,0.0,171.0,205.0,6.0,6.0,0.0,0-10 years,never,0-3 years,0.0,2.0,2.0,4.0,7.0,7.0,7.0,14.0,2.0,11.0,0.0,5.0,90.5,0.0,0.0,0.0,74972.0,22641.0,37200.0,31872.0,,Cash,7.69,27.7,8002
158849,5600.0,short,187.32,B5,7 years,MORTGAGE,48000.0,Not Verified,Fully Paid,debt_consolidation,SC,8.08,0.0,1.0,3-6 years,7.0,0.0,1017.0,17.0,5600.0,1143.24,0.0,Nov-2018,0.0,0.0,92680.0,10700.0,9.0,13240.0,1483.0,40.7,0.0,0.0,49.0,90.0,4.0,4.0,1.0,0-10 years,6-9 years,3-6 years,0.0,1.0,1.0,1.0,6.0,3.0,5.0,13.0,1.0,7.0,0.0,1.0,94.1,0.0,0.0,0.0,107250.0,6805.0,2500.0,8000.0,,Cash,12.49,9.5,4562


# Patsy.dmatrix (pass string like last project, but no y~)