In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.simplefilter("ignore")

In [3]:
data = pd.read_csv(r"loan_data_set.csv")

In [4]:
data.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 614 entries, 0 to 613
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Loan_ID            614 non-null    object 
 1   Gender             601 non-null    object 
 2   Married            611 non-null    object 
 3   Dependents         599 non-null    object 
 4   Education          614 non-null    object 
 5   Self_Employed      582 non-null    object 
 6   ApplicantIncome    614 non-null    int64  
 7   CoapplicantIncome  614 non-null    float64
 8   LoanAmount         592 non-null    float64
 9   Loan_Amount_Term   600 non-null    float64
 10  Credit_History     564 non-null    float64
 11  Property_Area      614 non-null    object 
 12  Loan_Status        614 non-null    object 
dtypes: float64(4), int64(1), object(8)
memory usage: 62.5+ KB


In [7]:
data.isna().sum()

Loan_ID               0
Gender               13
Married               3
Dependents           15
Education             0
Self_Employed        32
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
Loan_Status           0
dtype: int64

In [8]:
data.dropna(inplace=True)

In [10]:
data.isna().sum()

Loan_ID              0
Gender               0
Married              0
Dependents           0
Education            0
Self_Employed        0
ApplicantIncome      0
CoapplicantIncome    0
LoanAmount           0
Loan_Amount_Term     0
Credit_History       0
Property_Area        0
Loan_Status          0
dtype: int64

In [12]:
!pip install category_encoders

Defaulting to user installation because normal site-packages is not writeable


In [13]:
from category_encoders import OrdinalEncoder
from category_encoders import BinaryEncoder

In [15]:
ordinal_features = ["Property_Area",'Dependents']
binary_features = ['Self_Employed','Education','Married','Gender']

OE = OrdinalEncoder()
for i in data.columns:
    if i in ordinal_features and data[i].dtype=="object" :
        data[i] = OE.fit_transform(data[i])
    else:
        continue

In [16]:
def binary_encoder(Series):
    unique = Series.unique()
    encode = {
            unique[0]:0,
            unique[1]:1
          }
    Series = Series.map(encode)
    return Series

In [17]:
BE = BinaryEncoder()
for i in data.columns:
    if i in binary_features and data[i].dtype=="object" :
        data[i] = binary_encoder(data[i])
    else:
        continue

In [18]:
data.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
1,LP001003,0,0,1,0,0,4583,1508.0,128.0,360.0,1.0,1,N
2,LP001005,0,0,2,0,1,3000,0.0,66.0,360.0,1.0,2,Y
3,LP001006,0,0,2,1,0,2583,2358.0,120.0,360.0,1.0,2,Y
4,LP001008,0,1,2,0,0,6000,0.0,141.0,360.0,1.0,2,Y
5,LP001011,0,0,3,0,1,5417,4196.0,267.0,360.0,1.0,2,Y


In [19]:
data["Loan_Status"] = binary_encoder(data["Loan_Status"])

In [20]:
X = data.iloc[:,1:].drop("Loan_Status",axis=1)
Y = data.iloc[:,-1]

In [21]:
from sklearn.preprocessing import StandardScaler

scale = StandardScaler()
X = pd.DataFrame(scale.fit_transform(X))


In [22]:
from sklearn.model_selection import train_test_split as split
Xtrain, Xtest, Ytrain, Ytest = split(X, Y, train_size = 0.75, random_state=33)

In [23]:
Xtrain.shape, Ytrain.shape

((360, 11), (360,))

In [24]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

In [25]:
from sklearn.linear_model import LogisticRegression
LR = LogisticRegression()
LR.fit(Xtrain,Ytrain)
ypred = LR.predict(Xtest)

print("Accuracy : ",round(accuracy_score(Ytest,ypred),2)*100)
print("Confusion Matrix\n",confusion_matrix(Ytest,ypred))

Accuracy :  82.0
Confusion Matrix
 [[ 2 15]
 [ 7 96]]


In [26]:
from sklearn.tree import DecisionTreeClassifier
DT = DecisionTreeClassifier(criterion="gini",random_state=33)
DT.fit(Xtrain,Ytrain)
ypred = DT.predict(Xtest)

print("Accuracy : ",round(accuracy_score(Ytest,ypred),2)*100)
print("Confusion Matrix\n",confusion_matrix(Ytest,ypred))

Accuracy :  88.0
Confusion Matrix
 [[10  7]
 [ 7 96]]


In [27]:
from sklearn.naive_bayes import GaussianNB
GNB = GaussianNB()
GNB.fit(Xtrain,Ytrain)
ypred = GNB.predict(Xtest)

print("Accuracy : ",round(accuracy_score(Ytest,ypred),2)*100)
print("Confusion Matrix\n",confusion_matrix(Ytest,ypred))

Accuracy :  80.0
Confusion Matrix
 [[ 8  9]
 [15 88]]


In [28]:
from sklearn.ensemble import RandomForestClassifier
RFC = RandomForestClassifier(n_estimators=100,criterion="gini",random_state = 33)
RFC.fit(Xtrain,Ytrain)
ypred = RFC.predict(Xtest)

print("Accuracy : ",round(accuracy_score(Ytest,ypred),2)*100)
print("Confusion Matrix\n",confusion_matrix(Ytest,ypred))

Accuracy :  89.0
Confusion Matrix
 [[  7  10]
 [  3 100]]


In [29]:
from sklearn.ensemble import ExtraTreesClassifier
ETC = ExtraTreesClassifier(random_state=33)
ETC.fit(Xtrain,Ytrain)
ypred = ETC.predict(Xtest)

print("Accuracy : ",round(accuracy_score(Ytest,ypred),2)*100)
print("Confusion Matrix\n",confusion_matrix(Ytest,ypred))

Accuracy :  81.0
Confusion Matrix
 [[ 4 13]
 [10 93]]


In [30]:
from sklearn.neighbors import KNeighborsClassifier
KNC = KNeighborsClassifier(n_neighbors=5)
KNC.fit(Xtrain,Ytrain)
ypred = KNC.predict(Xtest)

print("Accuracy : ",round(accuracy_score(Ytest,ypred),2)*100)
print("Confusion Matrix\n",confusion_matrix(Ytest,ypred))

Accuracy :  80.0
Confusion Matrix
 [[ 4 13]
 [11 92]]


<h1 style = "color : red"}> Prediction</h1>

In [31]:
RFC.predict([Xtest.iloc[4]])[0]

1.0