#Loan Status Predicting

###Importing the dataset and Libraries

In [21]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
from sklearn.metrics import accuracy_score, confusion_matrix

In [22]:
df = pd.read_csv('/content/Loan_status.csv')
df.drop(columns=['Loan_ID'], inplace=True)
df.shape

(614, 12)

In [23]:
df.head(2)

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N


##Data Preprocessing

###1. Replacing the Values and adding the columns

In [24]:
df.replace(to_replace='3+', value=4, inplace=True)

In [25]:
df['Total_Income'] = df.insert(5, "Total_Income", None)
df['Total_Income'] = df['ApplicantIncome'] + df['CoapplicantIncome']
df.drop(columns=['ApplicantIncome', 'CoapplicantIncome'], inplace=True)
df.head(2)

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,Total_Income,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,Male,No,0,Graduate,No,5849.0,,360.0,1.0,Urban,Y
1,Male,Yes,1,Graduate,No,6091.0,128.0,360.0,1.0,Rural,N


###2. Replacing the null values with mode.

In [26]:
df.isnull().sum()

Gender              13
Married              3
Dependents          15
Education            0
Self_Employed       32
Total_Income         0
LoanAmount          22
Loan_Amount_Term    14
Credit_History      50
Property_Area        0
Loan_Status          0
dtype: int64

In [27]:
null_cols = ['Gender', 'Married', 'Dependents', 'Self_Employed', 'LoanAmount', 'Loan_Amount_Term', 'Credit_History']

for columns in null_cols:
  df[columns]= df[columns].fillna(df[columns].mode()[0])

df.isnull().sum()

Gender              0
Married             0
Dependents          0
Education           0
Self_Employed       0
Total_Income        0
LoanAmount          0
Loan_Amount_Term    0
Credit_History      0
Property_Area       0
Loan_Status         0
dtype: int64

###3. Encoding the Categorical Variables with the help of Label Encoder

In [28]:
cat_var = ['Gender', 'Married', 'Education', 'Self_Employed', 'Property_Area', 'Loan_Status']
for columns in cat_var:
  df[columns] = le.fit_transform(df[columns])

In [29]:
df.tail()

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,Total_Income,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
609,0,0,0,0,0,2900.0,71.0,360.0,1.0,0,1
610,1,1,4,0,0,4106.0,40.0,180.0,1.0,0,1
611,1,1,1,0,0,8312.0,253.0,360.0,1.0,2,1
612,1,1,2,0,0,7583.0,187.0,360.0,1.0,2,1
613,0,0,0,0,1,4583.0,133.0,360.0,0.0,1,0


##Training our data for machine Learning Models.

###1. Defining the dpenden and Independent variables

In [30]:
x = df.drop(columns=['Loan_Status'], axis=1).values
y = df['Loan_Status'].values

print(x.shape)
print(y.shape)

(614, 10)
(614,)


###2. Spliting into Training and test sets.

In [31]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

##3. Defining in a Standardized method to fit all the Machine Learning Models

In [32]:
def classify(classifier, x, y):
  x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)
  classifier.fit(x_train, y_train)
  print(classifier.predict(x_test))
  print('Accuracy Score Training Set: ', accuracy_score(classifier.predict(x_train).round(), y_train))
  print('Accuracy Score Test Set: ', accuracy_score(classifier.predict(x_test).round(), y_test))


###4. (i) Linear Regression

In [33]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression()
classify(classifier, x, y)

[1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 1
 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 0 0 1 1 1 1 1 0 1]
Accuracy Score Training Set:  0.8024439918533605
Accuracy Score Test Set:  0.8292682926829268


###5. (ii) Decision tree classification

In [34]:
from sklearn.tree import DecisionTreeClassifier
classifier  = DecisionTreeClassifier()
classify(classifier, x, y)

[0 1 1 1 1 1 1 1 0 1 1 1 0 1 1 1 0 1 0 0 0 1 1 0 0 1 1 1 0 0 1 1 1 1 1 0 1
 1 1 1 0 0 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 0 1 0 1 1 1 1 0 1 1 1
 1 1 0 1 1 0 1 1 1 0 1 0 1 0 0 1 1 0 0 0 1 0 0 1 0 1 1 1 1 0 1 1 0 0 1 1 1
 1 0 1 0 0 1 1 0 1 1 0 1]
Accuracy Score Training Set:  1.0
Accuracy Score Test Set:  0.7398373983739838


###6. (iii) Random Forest Classification.

In [35]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier()
classify(classifier, x, y)

[1 1 1 1 1 0 1 0 0 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 0 1 1 1 0 0 1 1 1 1 1 0 1
 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1
 1 1 1 0 0 1 1 1 1 1 0 1]
Accuracy Score Training Set:  1.0
Accuracy Score Test Set:  0.7804878048780488


#### Since we got the best Accuracy Score in Linear Regression, so we will move ahead with Linear Regression model

In [36]:
df.tail(2)

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,Total_Income,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
612,1,1,2,0,0,7583.0,187.0,360.0,1.0,2,1
613,0,0,0,0,1,4583.0,133.0,360.0,0.0,1,0


## Testing our Model with input given as a new datapoints.

In [37]:
# Example 1: for the condition on Loan Granted

input_data_raw = (1,1,0,0,0,6200,120,180,1,2)
input_data = np.asarray(input_data_raw).reshape(1,-1)
classifier_1 = LogisticRegression()
classifier_1.fit(x_train, y_train)
prediction = classifier_1.predict(input_data)
print(prediction)

if (prediction == 0):
  print('No Loan Granted')
else:
  print('Loan Granted')

[1]
Loan Granted


In [38]:
## Example 2: for the condition of loan not granted
# (same dataset but Loan amount increased a lot)

input_data_raw = (1,1,0,0,0,6200,12000,180,1,2)
input_data = np.asarray(input_data_raw).reshape(1,-1)
classifier_2 = LogisticRegression()
classifier_2.fit(x_train, y_train)
prediction = classifier_2.predict(input_data)
print(prediction)

if (prediction == 0):
  print('No Loan Granted')
else:
  print('Loan Granted')

[0]
No Loan Granted


# Prediction with the help of input data as a dataset.

## Example for a granted Loan

In [42]:
G = input('Please enter your Gender: ')
M = input('Are u married: ')
D = input('Are there any dependents: ')
E = input('Are your Educated(y-1,N-0): ')
SE = input('Are u selfemployed (y:1,N-0): ')
TI = input('Enter your annual Income: ')
LA = input('What is the LoanAmount: ')
LT = input('How much days will it take u to repay us: ')
CH = input('You have a credit history (y/n): ')
PA = input('Any property area owned: ')

Please enter your Gender: 1
Are u married: 0
Are there any dependents: 0
Are your Educated(y-1,N-0): 0
Are u selfemployed (y:1,N-0): 0
Enter your annual Income: 120
What is the LoanAmount: 130
How much days will it take u to repay us: 360
You have a credit history (y/n): 1
Any property area owned: 2


In [43]:
input_data_entered = (G,M,D,E,SE,TI,LA,LT,CH,PA)
input_data_final = np.asarray(input_data_entered).reshape(1,-1)
classifier_inp = LogisticRegression()
classifier_inp.fit(x_train, y_train)
prediction_inp = classifier_inp.predict(input_data_final)
print(prediction_inp)

if prediction_inp == 0:
  print('Sorry Loan cannot be granted')
else:
  print('Granted Loan')

[1]
Granted Loan


  X = check_array(X, **check_params)


## Example for Not granted Loan

In [48]:
G = input('Please enter your Gender: ')
M = input('Are u married: ')
D = input('Are there any dependents: ')
E = input('Are your Educated(y-1,N-0): ')
SE = input('Are u selfemployed (y:1,N-0): ')
TI = input('Enter your annual Income: ')
LA = input('What is the LoanAmount: ')
LT = input('How much days will it take u to repay us: ')
CH = input('You have a credit history (y/n): ')
PA = input('Any property area owned: ')

Please enter your Gender: 1
Are u married: 0
Are there any dependents: 1
Are your Educated(y-1,N-0): 0
Are u selfemployed (y:1,N-0): 1
Enter your annual Income: 3400
What is the LoanAmount: 240000
How much days will it take u to repay us: 360
You have a credit history (y/n): 1
Any property area owned: 1


In [49]:
input_data_entered = (G,M,D,E,SE,TI,LA,LT,CH,PA)
input_data_final = np.asarray(input_data_entered).reshape(1,-1)
classifier_inp = LogisticRegression()
classifier_inp.fit(x_train, y_train)
prediction_inp = classifier_inp.predict(input_data_final)
print(prediction_inp)

if prediction_inp == 0:
  print('Sorry Loan cannot be granted')
else:
  print('Granted Loan')

[0]
Sorry Loan cannot be granted


  X = check_array(X, **check_params)
