In [58]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, f1_score, recall_score

In [86]:
df= pd.read_csv("/content/loan_data_set.csv")

### Dataset before preprocessing

In [87]:
df.head(10)

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y
5,LP001011,Male,Yes,2,Graduate,Yes,5417,4196.0,267.0,360.0,1.0,Urban,Y
6,LP001013,Male,Yes,0,Not Graduate,No,2333,1516.0,95.0,360.0,1.0,Urban,Y
7,LP001014,Male,Yes,3+,Graduate,No,3036,2504.0,158.0,360.0,0.0,Semiurban,N
8,LP001018,Male,Yes,2,Graduate,No,4006,1526.0,168.0,360.0,1.0,Urban,Y
9,LP001020,Male,Yes,1,Graduate,No,12841,10968.0,349.0,360.0,1.0,Semiurban,N


###Drop the Loan ID Column

In [88]:
df.drop(['Loan_ID'],axis=1,inplace=True)

###Label Encoding

In [89]:
col = ['Gender','Married','Dependents','Education','Self_Employed','Property_Area','Loan_Status']
le=LabelEncoder()
for c in col:
    df[c]=le.fit_transform(df[c])

### Displaying the number of Null values in each column

In [90]:
df.isnull().sum()

Gender                0
Married               0
Dependents            0
Education             0
Self_Employed         0
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
Loan_Status           0
dtype: int64

###Replace the Null values to mean value of the entire column

In [91]:
for column in ["LoanAmount", "Loan_Amount_Term", "Credit_History"]:
    mean = int(df[column].mean(skipna=True))
    df[column] = df[column].replace(np.NaN, mean)

### seperate the features and label and Split the train-test with ratio 20%

In [93]:
X = df.iloc[:, 0:11]
y = df.iloc[:, 11]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

### Scaling dataset

In [94]:
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)

### Dataset after preprocessing

In [114]:
df.head(10)

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,1,0,0,0,0,5849,0.0,146.0,360.0,1.0,2,1
1,1,1,1,0,0,4583,1508.0,128.0,360.0,1.0,0,0
2,1,1,0,0,1,3000,0.0,66.0,360.0,1.0,2,1
3,1,1,0,1,0,2583,2358.0,120.0,360.0,1.0,2,1
4,1,0,0,0,0,6000,0.0,141.0,360.0,1.0,2,1
5,1,1,2,0,1,5417,4196.0,267.0,360.0,1.0,2,1
6,1,1,0,1,0,2333,1516.0,95.0,360.0,1.0,2,1
7,1,1,3,0,0,3036,2504.0,158.0,360.0,0.0,1,0
8,1,1,2,0,0,4006,1526.0,168.0,360.0,1.0,2,1
9,1,1,1,0,0,12841,10968.0,349.0,360.0,1.0,1,0


### Initialize models

In [109]:
knn = KNeighborsClassifier(n_neighbors=13,metric='euclidean')
naive = GaussianNB()

### Fitting dataset to the models

In [110]:
knn.fit(X_train, y_train)
naive.fit(X_train, y_train)

### Predicting Phase

In [111]:
knn_y_pred = knn.predict(X_test)
naive_y_pred = naive.predict(X_test)

### Results

In [112]:
print("KNN Evaluation...")
print("Accuracy: ", accuracy_score(y_test, knn_y_pred))
print("F1-Score: ", f1_score(y_test, knn_y_pred))
print("Precision-Score: ", precision_score(y_test, knn_y_pred))
print("Recall-Score: ", recall_score(y_test, knn_y_pred))

KNN Evaluation...
Accuracy:  0.7317073170731707
F1-Score:  0.8156424581005587
Precision-Score:  0.7448979591836735
Recall-Score:  0.9012345679012346


In [113]:
print("Naive Bayes Evaluation...")
print("Accuracy: ", accuracy_score(y_test, naive_y_pred))
print("F1-Score: ", f1_score(y_test, naive_y_pred))
print("Precision-Score: ", precision_score(y_test, naive_y_pred))
print("Recall-Score: ", recall_score(y_test, naive_y_pred))

Naive Bayes Evaluation...
Accuracy:  0.7560975609756098
F1-Score:  0.8275862068965517
Precision-Score:  0.7741935483870968
Recall-Score:  0.8888888888888888
