# Importing the libraries

In [352]:
import numpy as np 
import pandas as pd 
import seaborn as sns 
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
import pickle
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder


# Preprocessing

In [353]:
data = pd.read_csv('loan_prediction.csv')
data.head(5)

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


In [354]:
data.shape

(614, 13)

In [355]:
data.describe()

Unnamed: 0,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History
count,614.0,614.0,592.0,600.0,564.0
mean,5403.459283,1621.245798,146.412162,342.0,0.842199
std,6109.041673,2926.248369,85.587325,65.12041,0.364878
min,150.0,0.0,9.0,12.0,0.0
25%,2877.5,0.0,100.0,360.0,1.0
50%,3812.5,1188.5,128.0,360.0,1.0
75%,5795.0,2297.25,168.0,360.0,1.0
max,81000.0,41667.0,700.0,480.0,1.0


In [356]:
data.isnull().sum()

Loan_ID               0
Gender               13
Married               3
Dependents           15
Education             0
Self_Employed        32
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
Loan_Status           0
dtype: int64

In [357]:
data['Gender'].fillna(data['Gender'].mode()[0],inplace=True)

In [358]:
data['Married'].fillna(data['Married'].mode()[0],inplace=True)

In [359]:
data['Dependents'].fillna(data['Dependents'].mode()[0],inplace=True)

In [360]:
data['Self_Employed'].fillna(data['Self_Employed'].mode()[0],inplace=True)

In [361]:
data['Credit_History'].fillna(data['Credit_History'].mode()[0],inplace=True)

In [362]:
data['LoanAmount'].fillna(data['LoanAmount'].mean(),inplace=True)


In [363]:
data['Loan_Amount_Term'].fillna(data['Loan_Amount_Term'].mean(),inplace=True)

In [364]:
#Encoding
data.replace({"Loan_Status":{'Y':0,'N':1}},inplace=True)
data.replace({"Gender":{'Male':0,'Female':1},"Married":{'Yes':0,'No':1}},inplace=True)
data.replace({"Education":{'Graduate':0,'Not Graduate':1}},inplace=True)
data.replace({"Self_Employed":{'Yes':0,'No':1}},inplace=True)
data.replace({"Dependents":{'3+':4}},inplace=True)
data.replace({"Property_Area":{'Rural':0,'Urban':1,'Semiurban':2}},inplace=True)


In [365]:
data.head(5)

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,0,1,0,0,1,5849,0.0,146.412162,360.0,1.0,1,0
1,LP001003,0,0,1,0,1,4583,1508.0,128.0,360.0,1.0,0,1
2,LP001005,0,0,0,0,0,3000,0.0,66.0,360.0,1.0,1,0
3,LP001006,0,0,0,1,1,2583,2358.0,120.0,360.0,1.0,1,0
4,LP001008,0,1,0,0,1,6000,0.0,141.0,360.0,1.0,1,0


In [366]:
data.isnull().sum()

Loan_ID              0
Gender               0
Married              0
Dependents           0
Education            0
Self_Employed        0
ApplicantIncome      0
CoapplicantIncome    0
LoanAmount           0
Loan_Amount_Term     0
Credit_History       0
Property_Area        0
Loan_Status          0
dtype: int64

In [367]:
data.head(5)

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,0,1,0,0,1,5849,0.0,146.412162,360.0,1.0,1,0
1,LP001003,0,0,1,0,1,4583,1508.0,128.0,360.0,1.0,0,1
2,LP001005,0,0,0,0,0,3000,0.0,66.0,360.0,1.0,1,0
3,LP001006,0,0,0,1,1,2583,2358.0,120.0,360.0,1.0,1,0
4,LP001008,0,1,0,0,1,6000,0.0,141.0,360.0,1.0,1,0


In [368]:
data=data.drop(columns=['Loan_ID'],axis=1)

# Splitting the data


In [369]:
X = data.drop(columns=['Loan_Status'],axis=1)
Y = data['Loan_Status']

In [370]:
print(X.shape)
print(Y.shape)

(614, 11)
(614,)


# Training the data

In [371]:
X_train, X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.25,stratify=Y)

In [372]:
print(X.shape, X_train.shape,Y_train.shape, X_test.shape,Y_test.shape)

(614, 11) (460, 11) (460,) (154, 11) (154,)


# Taining the model
Decision Tree Classifier

In [373]:
DTClassifier=DecisionTreeClassifier(criterion='entropy', random_state=2)
DTClassifier.fit(X_train,Y_train)

DecisionTreeClassifier(criterion='entropy', random_state=2)

In [374]:
X_train_prediction = DTClassifier.predict(X_train)
training_data_accuray = accuracy_score(X_train_prediction,Y_train)

In [375]:
print('Accuracy on training data : ', training_data_accuray)

Accuracy on training data :  1.0


In [376]:
X_test_prediction = DTClassifier.predict(X_test)
training_data_accuray = accuracy_score(X_test_prediction,Y_test)

In [377]:
print('Accuracy on test data : ', training_data_accuray)

Accuracy on test data :  0.6298701298701299


Random Forest Classifier

In [378]:
classifier = RandomForestClassifier(random_state=0)

In [379]:
classifier.fit(X_train,Y_train)

RandomForestClassifier(random_state=0)

In [380]:
X_train_prediction = classifier.predict(X_train)
training_data_accuray = accuracy_score(X_train_prediction,Y_train)

In [381]:
print('Accuracy on training data : ', training_data_accuray)

Accuracy on training data :  1.0


In [382]:
X_test_prediction = classifier.predict(X_test)
training_data_accuray = accuracy_score(X_test_prediction,Y_test)

In [383]:
print('Accuracy on test data : ', training_data_accuray)

Accuracy on test data :  0.7792207792207793


# Saving the model and trying the prediction system

In [384]:
file = 'trained_model.pkl'
pickle.dump(classifier,open(file,'wb'))

In [385]:
loaded_model = pickle.load(open('trained_model.pkl','rb'))

In [386]:
input_data=(1,1,1,1,0,4583,1508.0,128.0,360.0,1.0,0)

input_np =np.asarray(input_data) 

input_reshape = input_np.reshape(1,-1)

prediction = loaded_model.predict(input_reshape)
print(prediction)

if(prediction[0]==0):
    print("Your Loan is Passed")
else:
    print("Your Loan is Rejected")    

[0]
Your Loan is Passed
