In [1]:
options nosource nonotes;
/* Summary - BLOG POST

    *Please note that instead of signing up for a Tumblr account, I have chosen to leverage an already existing 
        account on Github to consolidate work and platforms.

    The variables I have chosen to evaluate are as follows below:

    Variables: 
        1) purpose - This is the purpose of why the loan was requested.  A high level grouping of the loan types.
        2) int_rate - What interest rate the loan is at
        3) dti - Debt to income ratio
        4) revol_util - revolving credit utilization

*/

In [2]:
/* This option might pop an error, but reduces all the log lines of code when loading the data set 
- to shorten the notebook.  You can ignore the error "Expecting page 1 ..." */
options nosource nonotes;

/* Impor the file */
proc import datafile = '/folders/myfolders/sasuser.v94/LoanStats3a.csv'
out = work.loanstats
dbms = CSV;
run;

In [3]:
/* Set a new data set */
DATA new; set work.loanstats;

In [4]:
/* Here I need to convert some data - int_rate = 25% - needs to be adjusted to decimal form of .25 */
/* might be an easier way - but new to SAS syntax, and want to work at data scrubbing */

/* set new variable */
int_rate_d = int_rate;

/* remove '%' */
int_rate_d = substr(int_rate_d,1,index(int_rate_d, '%')-1);

/* convert to decimal */
int_rate_d = input(int_rate_d, 8.);

/* divide by 100 to get decimal notation */
int_rate_d = int_rate_d / 100;


/* same with revoling utilization */
revol_util_d = revol_util;
revol_util_d = substr(revol_util_d,1,index(revol_util_d, '%')-1);
revol_util_d = input(revol_util_d, 8.);
revol_util_d = revol_util_d / 100;

In [5]:
/* Subset the data */
/* I do not want to sub-set the data at this point in time. */

/* Set some labels */
LABEL   purpose = "Purpose for Loan"
        int_rate = "interest rate"
        dti = "debt to income ratio"
        revol_util = "revolving utilization"

        int_rate_bin = "interest rate (binned)"
        dti_bin = "debt to income ratio (binned)"
        revol_util_bin = "revolving utilization (binned)";


In [6]:
/* Run distribution frequencies on purpose, interest rate, dti and revolving utilization */
Proc freq; tables purpose int_rate dti revol_util;
run;

Purpose for Loan,Purpose for Loan,Purpose for Loan,Purpose for Loan,Purpose for Loan
purpose,Frequency,Percent,Cumulative Frequency,Cumulative Percent
car,1615,3.8,1615,3.8
credit_card,5477,12.88,7092,16.67
debt_consolidation,19776,46.49,26868,63.17
educational,422,0.99,27290,64.16
home_improvement,3199,7.52,30489,71.68
house,426,1.0,30915,72.68
major_purchase,2311,5.43,33226,78.11
medical,753,1.77,33979,79.88
moving,629,1.48,34608,81.36
other,4425,10.4,39033,91.77

interest rate,interest rate,interest rate,interest rate,interest rate
int_rate,Frequency,Percent,Cumulative Frequency,Cumulative Percent
10.00%,251,0.59,251,0.59
10.01%,8,0.02,259,0.61
10.08%,55,0.13,314,0.74
10.14%,11,0.03,325,0.76
10.20%,18,0.04,343,0.81
10.25%,228,0.54,571,1.34
10.28%,32,0.08,603,1.42
10.33%,9,0.02,612,1.44
10.36%,256,0.6,868,2.04
10.37%,470,1.1,1338,3.15

debt to income ratio,debt to income ratio,debt to income ratio,debt to income ratio,debt to income ratio
dti,Frequency,Percent,Cumulative Frequency,Cumulative Percent
0.0,206,0.48,206,0.48
0.01,3,0.01,209,0.49
0.02,5,0.01,214,0.5
0.03,2,0.0,216,0.51
0.04,3,0.01,219,0.51
0.05,2,0.0,221,0.52
0.06,1,0.0,222,0.52
0.07,5,0.01,227,0.53
0.08,5,0.01,232,0.55
0.09,4,0.01,236,0.55

revolving utilization,revolving utilization,revolving utilization,revolving utilization,revolving utilization
revol_util,Frequency,Percent,Cumulative Frequency,Cumulative Percent
0%,1070,2.52,1070,2.52
0.01%,1,0.00,1071,2.52
0.03%,1,0.00,1072,2.53
0.04%,1,0.00,1073,2.53
0.05%,1,0.00,1074,2.53
0.10%,61,0.14,1135,2.67
0.12%,1,0.00,1136,2.68
0.16%,1,0.00,1137,2.68
0.20%,64,0.15,1201,2.83
0.30%,43,0.10,1244,2.93
