In [101]:
#Load libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier, export_graphviz

In [102]:
#Load dataset
url = "https://raw.githubusercontent.com/callxpert/datasets/master/Loan-applicant-details.csv"
names = ['Loan_ID','Gender','Married','Dependents','Education','Self_Employed','ApplicantIncome','CoapplicantIncome','LoanAmount','Loan_Amount_Term','Credit_History','Property_Area','Loan_Status']
dataset = pd.read_csv(url, names=names)

In [103]:
dataset.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128,360,1,Rural,N
1,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66,360,1,Urban,Y
2,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120,360,1,Urban,Y
3,LP001008,Male,No,0,Graduate,No,6000,0.0,141,360,1,Urban,Y
4,LP001011,Male,Yes,2,Graduate,Yes,5417,4196.0,267,360,1,Urban,Y


In [104]:
#sklearn requires all inputs to be numeric, we should convert all our categorical variables into numeric by encoding the categories
from sklearn.preprocessing import LabelEncoder
var_mod = ['Gender','Married','Dependents','Education','Self_Employed','Property_Area','Loan_Status']
le = LabelEncoder()
for i in var_mod:
    dataset[i] = le.fit_transform(dataset[i])
dataset.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001003,1,1,1,0,0,4583,1508.0,128,360,1,0,0
1,LP001005,1,1,0,0,1,3000,0.0,66,360,1,2,1
2,LP001006,1,1,0,1,0,2583,2358.0,120,360,1,2,1
3,LP001008,1,0,0,0,0,6000,0.0,141,360,1,2,1
4,LP001011,1,1,2,0,1,5417,4196.0,267,360,1,2,1


In [105]:
#using 80% of dataset for training the model and 20% of the records to evaluate our model
#Though our dataset has lot of columns, we are only going to use the Income fields, loan amount, loan duration and credit history fields to train our model
array = dataset.values
X = array[:,6:11]
Y = array[:,12]
X = X.astype('int')
Y = Y.astype('int')
x_train, x_test, y_train, y_test = model_selection.train_test_split(X, Y, test_size=0.2, random_state=7)

In [106]:
#Evaluating the model and training the model
"""Logistic Regression : Logistic Regression is a classification algorithm. 
It is used to predict a binary outcome (1 / 0, Yes / No, True / False) given a set of independent variables. 
To represent binary / categorical outcome, we use dummy variables"""
model = LogisticRegression(solver='liblinear')
model.fit(x_train,y_train)
predictions = model.predict(x_test)
print(accuracy_score(y_test, predictions))
print(model.predict([[ 1234,    0,   1,   360,     1]]))

0.7708333333333334
[1]


In [107]:
"""Decision tree : Decision tree is a type of supervised learning algorithm (having a pre-defined target variable) 
that is mostly used in classification problems. It works for both categorical and continuous input and output variables"""
model = DecisionTreeClassifier()
model.fit(x_train,y_train)
predictions = model.predict(x_test)
print(accuracy_score(y_test, predictions))
print(model.predict([[ 1234,     0,   1,   360,     1]]))

0.6458333333333334
[0]


In [115]:
"""Random forest : Random forests or random decision forests are an ensemble learning method for 
classification, regression and other tasks, that operate by constructing a multitude of decision trees at 
training time and outputting the class that is the mode of the classes 
(classification) or mean prediction (regression) of the individual trees"""
model = RandomForestClassifier(n_estimators=100)
model.fit(x_train, y_train)
predictions = model.predict(x_test)
print(accuracy_score(y_test, predictions))
print(model.predict([[ 1234,     0,   1,   360,     1]]))

0.7604166666666666
[0]
