# Consumer Loan Case Study
Objective : Understand the driving factors (or driver variables) behind loan default thereby cutting down the amount of credit loss.

In [1]:
#Import libraries.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
#Importing the loan.csv file in the dataframe for analysis.
loan_master = pd.read_csv("loan.csv", low_memory=False)

### Data Understanding
Analysis of the Dataframe. Count of rows and columns and column names in the dataframe.

In [3]:
#Number of rows and columns.
loan_master.shape

(39717, 111)

In [4]:
#Summary of the loan_master dataframe.
loan_master.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39717 entries, 0 to 39716
Columns: 111 entries, id to total_il_high_credit_limit
dtypes: float64(74), int64(13), object(24)
memory usage: 33.6+ MB


In [13]:
#Column names in the dataset.
pd.set_option('display.max_rows', 60)
pd.set_option('display.max_columns', 60)
loan_master.columns

Index(['id', 'member_id', 'loan_amnt', 'funded_amnt', 'funded_amnt_inv',
       'term', 'int_rate', 'installment', 'grade', 'sub_grade',
       ...
       'num_tl_90g_dpd_24m', 'num_tl_op_past_12m', 'pct_tl_nvr_dlq',
       'percent_bc_gt_75', 'pub_rec_bankruptcies', 'tax_liens',
       'tot_hi_cred_lim', 'total_bal_ex_mort', 'total_bc_limit',
       'total_il_high_credit_limit'],
      dtype='object', length=111)

### Data Cleaning
1. Selecting relevant data.
2. Treating missing values.
3. Removing columns with all 0's.
4. Checking the case of all categorical variables. 

In [10]:
#Checking unique values in Loan Status.
loan_master.loan_status.unique()

array(['Fully Paid', 'Charged Off', 'Current'], dtype=object)

In [11]:
#Subsetting Loan_Status = "Charged Off"for Risk Analytics.
loan_charged_off = loan_master.loc[loan_master['loan_status'] == 'Charged Off']

In [12]:
#Checking rows and columns of new filtered dataframe.
loan_charged_off.shape

(5627, 111)

In [14]:
#Checking the Missing values.
loan_charged_off.isnull().sum()

id                               0
member_id                        0
loan_amnt                        0
funded_amnt                      0
funded_amnt_inv                  0
                              ... 
tax_liens                        1
tot_hi_cred_lim               5627
total_bal_ex_mort             5627
total_bc_limit                5627
total_il_high_credit_limit    5627
Length: 111, dtype: int64

In [16]:
#Checking missing values percentages of the columns.
round(loan_charged_off.isnull().sum()/len(loan_charged_off.index), 2) * 100

id                              0.0
member_id                       0.0
loan_amnt                       0.0
funded_amnt                     0.0
funded_amnt_inv                 0.0
                              ...  
tax_liens                       0.0
tot_hi_cred_lim               100.0
total_bal_ex_mort             100.0
total_bc_limit                100.0
total_il_high_credit_limit    100.0
Length: 111, dtype: float64

In [18]:
#Removing cloumns with missing values greater than 60%.
missing_value_cols = loan_charged_off.columns[100*(loan_charged_off.isnull().sum()/len(loan_charged_off.index)) > 60]

In [20]:
#Dropping missing values columns greater than 60% missing values.
loan_charged_off = loan_charged_off.drop(missing_value_cols, axis=1)

In [21]:
#Checking the Missing values.
loan_charged_off.isnull().sum()

id                               0
member_id                        0
loan_amnt                        0
funded_amnt                      0
funded_amnt_inv                  0
term                             0
int_rate                         0
installment                      0
grade                            0
sub_grade                        0
emp_title                      484
emp_length                     228
home_ownership                   0
annual_inc                       0
verification_status              0
issue_d                          0
loan_status                      0
pymnt_plan                       0
url                              0
desc                          1802
purpose                          0
title                            2
zip_code                         0
addr_state                       0
dti                              0
delinq_2yrs                      0
earliest_cr_line                 0
inq_last_6mths                   0
open_acc            