<a href="https://colab.research.google.com/github/farzan2002/Machine_Learning_Project/blob/main/Project_4%5DLoan_Status_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Importing Libraries**

In [35]:
import pandas as pd
import numpy as np
import plotly
plotly.io.renderers.default = 'colab'
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score

import warnings
warnings.filterwarnings("ignore")

# **Loading and Exploring the Data**

In [36]:
# Loading the Dataset into the Pandas DataFrame

loan_dataset = pd.read_csv('/content/drive/MyDrive/Colab datasets/loan.csv')

In [37]:
# Reading the first 5 rows

loan_dataset.head(5)

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


In [38]:
# Dataset Information

loan_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 614 entries, 0 to 613
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Loan_ID            614 non-null    object 
 1   Gender             601 non-null    object 
 2   Married            611 non-null    object 
 3   Dependents         599 non-null    object 
 4   Education          614 non-null    object 
 5   Self_Employed      582 non-null    object 
 6   ApplicantIncome    614 non-null    int64  
 7   CoapplicantIncome  614 non-null    float64
 8   LoanAmount         592 non-null    float64
 9   Loan_Amount_Term   600 non-null    float64
 10  Credit_History     564 non-null    float64
 11  Property_Area      614 non-null    object 
 12  Loan_Status        614 non-null    object 
dtypes: float64(4), int64(1), object(8)
memory usage: 62.5+ KB


In [39]:
# Checking the Number of Rows & Columns

loan_dataset.shape

(614, 13)

In [40]:
# Printing the Statistical Values of the Dataset

loan_dataset.describe()

Unnamed: 0,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History
count,614.0,614.0,592.0,600.0,564.0
mean,5403.459283,1621.245798,146.412162,342.0,0.842199
std,6109.041673,2926.248369,85.587325,65.12041,0.364878
min,150.0,0.0,9.0,12.0,0.0
25%,2877.5,0.0,100.0,360.0,1.0
50%,3812.5,1188.5,128.0,360.0,1.0
75%,5795.0,2297.25,168.0,360.0,1.0
max,81000.0,41667.0,700.0,480.0,1.0


In [41]:
# Checking Null values

loan_dataset.isnull().sum()

Loan_ID               0
Gender               13
Married               3
Dependents           15
Education             0
Self_Employed        32
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
Loan_Status           0
dtype: int64

In [42]:
# Dropping Rows with Missing Values from the DataFrame

loan_dataset = loan_dataset.dropna()

In [43]:
# Checking the Null Values again for any Errors

loan_dataset.isnull().sum()

Loan_ID              0
Gender               0
Married              0
Dependents           0
Education            0
Self_Employed        0
ApplicantIncome      0
CoapplicantIncome    0
LoanAmount           0
Loan_Amount_Term     0
Credit_History       0
Property_Area        0
Loan_Status          0
dtype: int64

# **Data Visualization**

In [44]:
## Visualizing Loan Distribution by Marital Status using Bar Chart

# Define color mapping for loan status
color_map = {'N': 'red', 'Y': 'green'}

fig = px.bar(loan_dataset, x="Married", color="Loan_Status", color_discrete_map=color_map, barmode="group")
fig.show()

In [45]:
## Visualizing Loan Distribution by Education using Bar Chart

# Define color mapping for loan status
color_map = {'N': 'red', 'Y': 'green'}

fig = px.bar(loan_dataset, x="Education", color="Loan_Status", color_discrete_map=color_map, barmode="group")
fig.show()

In [46]:
## Visualizing Loan Distribution by Gender using Bar Chart

# Define color mapping for loan status
color_map = {'N': 'red', 'Y': 'green'}

fig = px.bar(loan_dataset, x="Gender", color="Loan_Status", color_discrete_map=color_map, barmode='group')
fig.show()

In [47]:
# Visualizing Loan Amount Distribution using BoxPlot

fig = px.box(loan_dataset, y='LoanAmount', title='Loan Amount Distribution')
fig.show()

In [48]:
## Visualizing Loan Status Proportion using Pie Chart

# Define color mapping for loan status
color_map = {'N': 'red', 'Y': 'green'}

fig = px.pie(loan_dataset, names='Loan_Status',color='Loan_Status', color_discrete_map=color_map , title='Loan Status Proportion')
fig.show()

# **Data Preprocessing for Modeling**

# ***Label Encoding for the Model***

In [49]:
# Replacing the 'N' & 'Y' from Loan_Status Column to 0 & 1 Respectively

loan_dataset.replace({'Loan_Status' : {'N':0, 'Y':1}}, inplace=True)

In [50]:
# Replacing the '3+' from Dependents Column to 4
loan_dataset['Dependents'].replace('3+','4', inplace=True)

In [51]:
# Checking the Unique Values in Dependents Column for any Errors

loan_dataset['Dependents'].unique()

array(['1', '0', '2', '4'], dtype=object)

In [52]:
# Replacing the 'No' & 'Yes' from Married Column to 0 & 1 Respectively

loan_dataset.replace({'Married' : {'No':0, 'Yes':1}}, inplace=True)

In [53]:
# Replacing the 'Female' & 'Male' from Gender Column to 0 & 1 Respectively

loan_dataset.replace({'Gender' : {'Female':0, 'Male':1}}, inplace=True)

In [54]:
# Replacing the 'Not Graduate' & 'Graduate' from Education Column to 0 & 1 Respectively

loan_dataset.replace({'Education' : {'Not Graduate':0, 'Graduate':1}}, inplace=True)

In [55]:
# Replacing the 'Rural', 'Semiurban' & 'Urban' from Property_Area Column to 0, 1 & 2 Respectively

loan_dataset.replace({'Property_Area' : {'Rural':0, 'Semiurban':1, 'Urban':2}}, inplace=True)

In [56]:
# Replacing the 'No' & 'Yes' from Self_Employed Column to 0 & 1 Respectively

loan_dataset.replace({'Self_Employed' : {'No':0, 'Yes':1}}, inplace=True)

In [57]:
loan_dataset.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
1,LP001003,1,1,1,1,0,4583,1508.0,128.0,360.0,1.0,0,0
2,LP001005,1,1,0,1,1,3000,0.0,66.0,360.0,1.0,2,1
3,LP001006,1,1,0,0,0,2583,2358.0,120.0,360.0,1.0,2,1
4,LP001008,1,0,0,1,0,6000,0.0,141.0,360.0,1.0,2,1
5,LP001011,1,1,2,1,1,5417,4196.0,267.0,360.0,1.0,2,1


# **Splitting the data into features & targets**

In [58]:
X = loan_dataset.drop(columns=['Loan_ID', 'Loan_Status'], axis=1)
Y = loan_dataset['Loan_Status']

In [59]:
X.head()

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area
1,1,1,1,1,0,4583,1508.0,128.0,360.0,1.0,0
2,1,1,0,1,1,3000,0.0,66.0,360.0,1.0,2
3,1,1,0,0,0,2583,2358.0,120.0,360.0,1.0,2
4,1,0,0,1,0,6000,0.0,141.0,360.0,1.0,2
5,1,1,2,1,1,5417,4196.0,267.0,360.0,1.0,2


In [60]:
Y.head()

1    0
2    1
3    1
4    1
5    1
Name: Loan_Status, dtype: int64

# **Split the data into Training and Testing data**

In [61]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.1, stratify=Y, random_state=2)

# **Training the Model**

## ***Support Vector Machine***

In [62]:
classifier = svm.SVC(kernel = 'linear')

In [63]:
# Training the Model using Training Data

classifier.fit(X_train, Y_train)

# **Model Evaluation**

In [64]:
# Accuracy score using training data

X_train_pred = classifier.predict(X_train)
training_data_accuracy = accuracy_score(X_train_pred, Y_train)
training_data_accuracy_percent = round(training_data_accuracy*100, 2)
print ('Accuracy Score on training data :', training_data_accuracy_percent)

Accuracy Score on training data : 79.86


In [65]:
#Accuracy score using testing data
X_test_pred = classifier.predict(X_test)
testing_data_accuracy = accuracy_score(X_test_pred, Y_test)
testing_data_accuracy_percent = round(testing_data_accuracy*100, 2)
print ('Accuracy Score on testing data :', testing_data_accuracy_percent)

Accuracy Score on testing data : 83.33


# **Saving the Model**

In [66]:
import pickle

In [67]:
filename = 'trained_model.sav'
pickle.dump(classifier, open(filename,'wb'))

In [68]:
# Loading the saved model

loaded_model = pickle.load(open('trained_model.sav', 'rb'))