**Problem Statement**

My home loan prediction model utilizes various parameters such as marital status, number of dependents, and location to assess whether an individual qualifies for a loan facility from a bank. By considering these factors, the model extracts relevant features and analyzes their impact on loan eligibility. This predictive approach enables banks to streamline their loan approval process, providing timely assistance to eligible applicants while managing risk effectively.

In [None]:
# Importing depenedencies
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import seaborn as sns

In [None]:
# Changing dataset into pandas DataFrame
loan_dataset = pd.read_csv('/content/loan_dataset.csv')

In [None]:
# Checking the first 5 rows of the DataFrame
loan_dataset.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


In [None]:
# Checking the number of rows and columns
loan_dataset.shape

(614, 13)

In [None]:
# Statistical Measures
loan_dataset.describe()

Unnamed: 0,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History
count,614.0,614.0,592.0,600.0,564.0
mean,5403.459283,1621.245798,146.412162,342.0,0.842199
std,6109.041673,2926.248369,85.587325,65.12041,0.364878
min,150.0,0.0,9.0,12.0,0.0
25%,2877.5,0.0,100.0,360.0,1.0
50%,3812.5,1188.5,128.0,360.0,1.0
75%,5795.0,2297.25,168.0,360.0,1.0
max,81000.0,41667.0,700.0,480.0,1.0


In [None]:
# Checking if there exists any null values in the DataFrame
loan_dataset.isnull().sum()

Loan_ID               0
Gender               13
Married               3
Dependents           15
Education             0
Self_Employed        32
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
Loan_Status           0
dtype: int64

In [None]:
# Dropping the null values from the DataFrame
loan_dataset = loan_dataset.dropna()

In [None]:
# Checking the null values again after dropping the null values
loan_dataset.isnull().sum()

Loan_ID              0
Gender               0
Married              0
Dependents           0
Education            0
Self_Employed        0
ApplicantIncome      0
CoapplicantIncome    0
LoanAmount           0
Loan_Amount_Term     0
Credit_History       0
Property_Area        0
Loan_Status          0
dtype: int64

In [None]:
# Label Encoding
loan_dataset.replace({'Loan_Status':{'Y':1,'N':0}},inplace = True)

In [None]:
loan_dataset.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,0
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,1
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,1
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,1
5,LP001011,Male,Yes,2,Graduate,Yes,5417,4196.0,267.0,360.0,1.0,Urban,1


In [None]:
# replacing the value of 3+ to 4
loan_dataset = loan_dataset.replace(to_replace='3+', value=4)


In [None]:
# Changing the categorical feature values into numerical values
loan_dataset.replace({'Gender':{'Male':1,'Female':0},
                      'Married':{'Yes':1,'No':0},
                      'Education':{'Graduate':1,'Not Graduate':0},
                      'Self_Employed':{'Yes':1,'No':0},
                      'Property_Area':{'Rural':0,'Semiurban':1,'Urban':2}},inplace = True)

In [None]:
# Checking the feature values after changing categorical into numerical values
loan_dataset.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
1,LP001003,1,1,1,1,0,4583,1508.0,128.0,360.0,1.0,0,0
2,LP001005,1,1,0,1,1,3000,0.0,66.0,360.0,1.0,2,1
3,LP001006,1,1,0,0,0,2583,2358.0,120.0,360.0,1.0,2,1
4,LP001008,1,0,0,1,0,6000,0.0,141.0,360.0,1.0,2,1
5,LP001011,1,1,2,1,1,5417,4196.0,267.0,360.0,1.0,2,1


Separating the features from the labels

In [None]:
x = loan_dataset.drop(columns=['Loan_ID','Loan_Status'],axis=1)
y = loan_dataset['Loan_Status']

In [None]:
print(x)
print(y)

     Gender  Married Dependents  Education  Self_Employed  ApplicantIncome  \
1         1        1          1          1              0             4583   
2         1        1          0          1              1             3000   
3         1        1          0          0              0             2583   
4         1        0          0          1              0             6000   
5         1        1          2          1              1             5417   
..      ...      ...        ...        ...            ...              ...   
609       0        0          0          1              0             2900   
610       1        1          4          1              0             4106   
611       1        1          1          1              0             8072   
612       1        1          2          1              0             7583   
613       0        0          0          1              1             4583   

     CoapplicantIncome  LoanAmount  Loan_Amount_Term  Credit_Hi

Splitting the data into training and testing data

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.1, stratify = y,random_state = 2)

Model Building and Training

In [None]:
classifier = svm.SVC(kernel = 'linear')

In [None]:
classifier.fit(x_train,y_train)

In [None]:
# Calculating model accuracy on training data
training_prediction = classifier.predict(x_train)
training_accuracy = accuracy_score(training_prediction,y_train)

In [None]:
print("Accuracy on training data is :", training_accuracy)

Accuracy on training data is : 0.7986111111111112


In [None]:
# Calculating model accuracy on testing data
testing_prediction = classifier.predict(x_test)
testing_accuracy = accuracy_score(testing_prediction,y_test)

In [None]:
print("Accuracy on training data is :", training_accuracy)

Accuracy on training data is : 0.7986111111111112


Making our own prediction model

In [None]:
input= [1,	1,	0,	0,	0,	2583,	2358.0,	120.0,	360.0,	1.0,	2		]
np_input = np.asarray(input)
np_input_reshaped = np_input.reshape(1,-1)

prediction = classifier.predict(np_input_reshaped)
if (prediction[0]== 1):
  print("The person is eligible for loan")
else:
  print("The person is not eligible for loan")


The person is eligible for loan


