<h1 align="center">Project - Loan approval analysis</h1>

In [140]:
""" 
Problem Statement
Dream Housing Finance Inc. specializes in home loans across different market segments - rural, urban and semi-urban. 
Thier loan eligibility process is based on customer details provided while filling an online application form. 
To create a targeted marketing campaign for different segments, 
they have asked for a comprehensive analysis of the data collected so far.
"""    
print("Loan Approval Analysis")




In [70]:
# importing the modules
import pandas as pd
import numpy as np
from scipy.stats import mode

In [71]:
bank = pd.read_csv("loan_approval_system.csv")
bank.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


In [72]:
# checking for categorical data
categorical_var = bank.select_dtypes( include = "object" )
print(categorical_var)

numerical_var = bank.select_dtypes( include = "number" )
print(numerical_var)

      Loan_ID  Gender Married Dependents     Education Self_Employed  \
0    LP001002    Male      No          0      Graduate            No   
1    LP001003    Male     Yes          1      Graduate            No   
2    LP001005    Male     Yes          0      Graduate           Yes   
3    LP001006    Male     Yes          0  Not Graduate            No   
4    LP001008    Male      No          0      Graduate            No   
5    LP001011    Male     Yes          2      Graduate           Yes   
6    LP001013    Male     Yes          0  Not Graduate            No   
7    LP001014    Male     Yes         3+      Graduate            No   
8    LP001018    Male     Yes          2      Graduate            No   
9    LP001020    Male     Yes          1      Graduate            No   
10   LP001024    Male     Yes          2      Graduate            No   
11   LP001027    Male     Yes          2      Graduate           NaN   
12   LP001028    Male     Yes          2      Graduate          

In [96]:
# handling the missing data
"""
Sometimes customers forget to fill in all the details or they don't want to share other details. 
Because of that, some of the fields in the dataset will have missing values. 
Now you have to check which columns have missing values and also check the count of missing values each column has. 
If you get the columns that have missing values, try to fill them.
"""

# dropping the Loan_id column
# del bank['Loan_ID']
banks = bank.drop(['Loan_ID'],axis = 1)

# checking number of null values that are present in each column
print("number of null values".center(100,"-"))
print(banks.isnull().sum())

# calculating modes for dataframe
bank_mode = banks.mode()
print("mode values".center(100,"-"))
print(bank_mode)

# filling the missing values with the respective mode values
print("Cleaned data".center(100,"-"))
print(banks)
# converted the bank_mode.loc[0]
# bank_mode = list(bank_mode.loc[0])

# Taking only the bank_mode data values and not the columns
for x,y in zip(banks.columns,bank_mode.iloc[0]):
    banks[x].fillna(y,inplace = True)


# check again for null values
print("cross check for null values".center(100,'-'))
print(banks.isnull().sum())


---------------------------------------number of null values----------------------------------------
Gender               13
Married               3
Dependents           15
Education             0
Self_Employed        32
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
Loan_Status           0
dtype: int64
--------------------------------------------mode values---------------------------------------------
  Gender Married Dependents Education Self_Employed  ApplicantIncome  \
0   Male     Yes          0  Graduate            No             2500   

   CoapplicantIncome  LoanAmount  Loan_Amount_Term  Credit_History  \
0                0.0       120.0             360.0             1.0   

  Property_Area Loan_Status  
0     Semiurban           Y  
--------------------------------------------Cleaned data--------------------------------------------
     Gender Married Dependents     Education Self_

In [105]:
# Loan amount vs gender
"""
    Now let's check the loan amount of an average person based on 'Gender', 'Married', 'Self_Employed'. 
    This will give a basic idea of the average loan amount of a person.

"""
avg_loan_amount = banks.pivot_table(values = ['LoanAmount'],index = ['Gender','Married','Self_Employed'],aggfunc="mean")

print(avg_loan_amount)

                              LoanAmount
Gender Married Self_Employed            
Female No      No             114.768116
               Yes            125.272727
       Yes     No             133.714286
               Yes            282.250000
Male   No      No             129.508621
               Yes            180.588235
       Yes     No             152.608150
               Yes            167.420000


In [139]:
# loan approval vs employement

"""
    Now let's check the percentage of loan approved based on a person's employment type.
    
"""

loan_approved_se = banks[(banks['Self_Employed'].values == "Yes") & (banks['Loan_Status'].values == "Y")]
# banks.iloc[:,0].values

print(len(loan_approved_se))

loan_approved_nse = banks[(banks['Self_Employed'].values == "No") & (banks['Loan_Status'].values == "Y")]
print(len(loan_approved_nse))

percentage_se = (len(loan_approved_se)/len(df['Loan_Status'])) * 100

percentage_nse = (len(loan_approved_nse)/len(df['Loan_Status'])) * 100

print(f"The percentage of self employeed's loan approval is : {percentage_se} %")
print(f"\nThe percentage of not self employeed's loan approval is : {percentage_nse} %")

56
366
The percentage of self employeed's loan approval is : 9.120521172638437 %

The percentage of not self employeed's loan approval is : 59.60912052117264 %


In [158]:
# Transform the loan tenure from months to years

"""
    A government audit is happening real soon! 
    So the company wants to find out those applicants with long loan amount term.
    
"""


loan_term = banks['Loan_Amount_Term'].apply(lambda x:x/12)
# type is series
# print(loan_term)
# banks['yearly_term'] = banks['Loan_Amount_Term'].apply(lambda x:x/12)

big_loan_term = len(loan_term[loan_term>=25])
# print(big_loan_term)
# big_loan_term = len(banks[banks['yearly_term']>= 25])



In [163]:
# Income/ Credit History vs Loan Amount

"""
    Now let's check the average income of an applicant and the average loan given to a person based on their income.
    
"""

loan_groupby = banks.groupby('Loan_Status')['ApplicantIncome', 'Credit_History']
mean_values = loan_groupby.mean()