In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
data = pd.read_csv('loan_prediction.csv')

In [4]:
data.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


In [3]:
# Loan_ID : Unique Loan ID

# Gender : Male/ Female

# Married : Applicant married (Y/N)

# Dependents : Number of dependents

# Education : Applicant Education (Graduate/ Under Graduate)

# Self_Employed : Self employed (Y/N)

# ApplicantIncome : Applicant income

# CoapplicantIncome : Coapplicant income

# LoanAmount : Loan amount in thousands of dollars

# Loan_Amount_Term : Term of loan in months

# Credit_History : Credit history meets guidelines yes or no

# Property_Area : Urban/ Semi Urban/ Rural

# Loan_Status : Loan approved (Y/N) this is the target variable

In [5]:
data.tail()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
609,LP002978,Female,No,0,Graduate,No,2900,0.0,71.0,360.0,1.0,Rural,Y
610,LP002979,Male,Yes,3+,Graduate,No,4106,0.0,40.0,180.0,1.0,Rural,Y
611,LP002983,Male,Yes,1,Graduate,No,8072,240.0,253.0,360.0,1.0,Urban,Y
612,LP002984,Male,Yes,2,Graduate,No,7583,0.0,187.0,360.0,1.0,Urban,Y
613,LP002990,Female,No,0,Graduate,Yes,4583,0.0,133.0,360.0,0.0,Semiurban,N


In [6]:
data.shape

(614, 13)

In [7]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 614 entries, 0 to 613
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Loan_ID            614 non-null    object 
 1   Gender             601 non-null    object 
 2   Married            611 non-null    object 
 3   Dependents         599 non-null    object 
 4   Education          614 non-null    object 
 5   Self_Employed      582 non-null    object 
 6   ApplicantIncome    614 non-null    int64  
 7   CoapplicantIncome  614 non-null    float64
 8   LoanAmount         592 non-null    float64
 9   Loan_Amount_Term   600 non-null    float64
 10  Credit_History     564 non-null    float64
 11  Property_Area      614 non-null    object 
 12  Loan_Status        614 non-null    object 
dtypes: float64(4), int64(1), object(8)
memory usage: 62.5+ KB


### 1. Check Null Values In The Dataset

In [8]:
data.isnull().sum()

Loan_ID               0
Gender               13
Married               3
Dependents           15
Education             0
Self_Employed        32
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
Loan_Status           0
dtype: int64

In [9]:
data.isnull().sum()*100 / len(data)

Loan_ID              0.000000
Gender               2.117264
Married              0.488599
Dependents           2.442997
Education            0.000000
Self_Employed        5.211726
ApplicantIncome      0.000000
CoapplicantIncome    0.000000
LoanAmount           3.583062
Loan_Amount_Term     2.280130
Credit_History       8.143322
Property_Area        0.000000
Loan_Status          0.000000
dtype: float64

### 2. Handling The missing Values

In [10]:
data = data.drop('Loan_ID',axis=1)

In [11]:
data.head(1)

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y


In [12]:
columns = ['Gender','Dependents','LoanAmount','Loan_Amount_Term']

In [13]:
data = data.dropna(subset=columns)

In [14]:
data.isnull().sum()*100 / len(data)

Gender               0.000000
Married              0.000000
Dependents           0.000000
Education            0.000000
Self_Employed        5.424955
ApplicantIncome      0.000000
CoapplicantIncome    0.000000
LoanAmount           0.000000
Loan_Amount_Term     0.000000
Credit_History       8.679928
Property_Area        0.000000
Loan_Status          0.000000
dtype: float64

In [15]:
data.shape

(553, 12)

In [16]:
data['Self_Employed'].mode()[0]

'No'

In [17]:
data['Self_Employed'] =data['Self_Employed'].fillna(data['Self_Employed'].mode()[0])

In [18]:
data.isnull().sum()*100 / len(data)

Gender               0.000000
Married              0.000000
Dependents           0.000000
Education            0.000000
Self_Employed        0.000000
ApplicantIncome      0.000000
CoapplicantIncome    0.000000
LoanAmount           0.000000
Loan_Amount_Term     0.000000
Credit_History       8.679928
Property_Area        0.000000
Loan_Status          0.000000
dtype: float64

In [19]:
data['Gender'].unique()

array(['Male', 'Female'], dtype=object)

In [20]:
data['Self_Employed'].unique()

array(['No', 'Yes'], dtype=object)

In [21]:
data['Credit_History'].mode()[0]

1.0

In [22]:
data['Credit_History'] =data['Credit_History'].fillna(data['Credit_History'].mode()[0])

In [23]:
data.isnull().sum()*100 / len(data)

Gender               0.0
Married              0.0
Dependents           0.0
Education            0.0
Self_Employed        0.0
ApplicantIncome      0.0
CoapplicantIncome    0.0
LoanAmount           0.0
Loan_Amount_Term     0.0
Credit_History       0.0
Property_Area        0.0
Loan_Status          0.0
dtype: float64

### 3. Handling Categorical Columns

In [24]:
data.sample(5)

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
339,Female,No,0,Graduate,No,4160,0.0,71.0,360.0,1.0,Semiurban,Y
470,Male,Yes,1,Graduate,Yes,3450,2079.0,162.0,360.0,1.0,Semiurban,Y
428,Male,Yes,0,Graduate,No,2920,16.120001,87.0,360.0,1.0,Rural,Y
510,Male,No,0,Not Graduate,No,3598,1287.0,100.0,360.0,1.0,Rural,N
306,Female,No,0,Graduate,No,3762,1666.0,135.0,360.0,1.0,Rural,Y


In [25]:
data['Gender'].value_counts()

Male      449
Female    104
Name: Gender, dtype: int64

In [26]:
data['Married'].value_counts()

Yes    359
No     194
Name: Married, dtype: int64

In [27]:
data['Education'].value_counts()

Graduate        437
Not Graduate    116
Name: Education, dtype: int64

In [28]:
data['Self_Employed'].value_counts()

No     481
Yes     72
Name: Self_Employed, dtype: int64

In [29]:
data['Dependents'].unique()

array(['1', '0', '2', '3+'], dtype=object)

In [30]:
data['Dependents'] =data['Dependents'].replace(to_replace="3+",value='4')

In [31]:
data['Dependents'].unique()

array(['1', '0', '2', '4'], dtype=object)

In [32]:
data['Loan_Status'].unique()

array(['N', 'Y'], dtype=object)

In [33]:
data

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
1,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y
5,Male,Yes,2,Graduate,Yes,5417,4196.0,267.0,360.0,1.0,Urban,Y
...,...,...,...,...,...,...,...,...,...,...,...,...
609,Female,No,0,Graduate,No,2900,0.0,71.0,360.0,1.0,Rural,Y
610,Male,Yes,4,Graduate,No,4106,0.0,40.0,180.0,1.0,Rural,Y
611,Male,Yes,1,Graduate,No,8072,240.0,253.0,360.0,1.0,Urban,Y
612,Male,Yes,2,Graduate,No,7583,0.0,187.0,360.0,1.0,Urban,Y


In [34]:
one_hot_encoded = pd.get_dummies(data[['Gender', 'Married', 'Self_Employed']],drop_first=True )

In [35]:
one_hot_encoded

Unnamed: 0,Gender_Male,Married_Yes,Self_Employed_Yes
1,1,1,0
2,1,1,1
3,1,1,0
4,1,0,0
5,1,1,1
...,...,...,...
609,0,0,0
610,1,1,0
611,1,1,0
612,1,1,0


In [36]:
# Concatenate the one-hot encoded columns with the original data
data = pd.concat([data, one_hot_encoded], axis=1)

In [37]:
data

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status,Gender_Male,Married_Yes,Self_Employed_Yes
1,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N,1,1,0
2,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y,1,1,1
3,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y,1,1,0
4,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y,1,0,0
5,Male,Yes,2,Graduate,Yes,5417,4196.0,267.0,360.0,1.0,Urban,Y,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
609,Female,No,0,Graduate,No,2900,0.0,71.0,360.0,1.0,Rural,Y,0,0,0
610,Male,Yes,4,Graduate,No,4106,0.0,40.0,180.0,1.0,Rural,Y,1,1,0
611,Male,Yes,1,Graduate,No,8072,240.0,253.0,360.0,1.0,Urban,Y,1,1,0
612,Male,Yes,2,Graduate,No,7583,0.0,187.0,360.0,1.0,Urban,Y,1,1,0


In [38]:
from sklearn.preprocessing import OrdinalEncoder
encoder = OrdinalEncoder()

education_mapping = {'Not Graduate': 0, 'Graduate': 1}
property_area_mapping = {'Rural': 0, 'Semiurban': 1, 'Urban': 2}

data['Education'] = data['Education'].replace(education_mapping)
data['Property_Area'] = data['Property_Area'].replace(property_area_mapping)

In [39]:
data

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status,Gender_Male,Married_Yes,Self_Employed_Yes
1,Male,Yes,1,1,No,4583,1508.0,128.0,360.0,1.0,0,N,1,1,0
2,Male,Yes,0,1,Yes,3000,0.0,66.0,360.0,1.0,2,Y,1,1,1
3,Male,Yes,0,0,No,2583,2358.0,120.0,360.0,1.0,2,Y,1,1,0
4,Male,No,0,1,No,6000,0.0,141.0,360.0,1.0,2,Y,1,0,0
5,Male,Yes,2,1,Yes,5417,4196.0,267.0,360.0,1.0,2,Y,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
609,Female,No,0,1,No,2900,0.0,71.0,360.0,1.0,0,Y,0,0,0
610,Male,Yes,4,1,No,4106,0.0,40.0,180.0,1.0,0,Y,1,1,0
611,Male,Yes,1,1,No,8072,240.0,253.0,360.0,1.0,2,Y,1,1,0
612,Male,Yes,2,1,No,7583,0.0,187.0,360.0,1.0,2,Y,1,1,0


In [40]:
from sklearn.preprocessing import LabelEncoder
encod = LabelEncoder()

data['Loan_Status'] = encod.fit_transform(data['Loan_Status'])

In [41]:
data

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status,Gender_Male,Married_Yes,Self_Employed_Yes
1,Male,Yes,1,1,No,4583,1508.0,128.0,360.0,1.0,0,0,1,1,0
2,Male,Yes,0,1,Yes,3000,0.0,66.0,360.0,1.0,2,1,1,1,1
3,Male,Yes,0,0,No,2583,2358.0,120.0,360.0,1.0,2,1,1,1,0
4,Male,No,0,1,No,6000,0.0,141.0,360.0,1.0,2,1,1,0,0
5,Male,Yes,2,1,Yes,5417,4196.0,267.0,360.0,1.0,2,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
609,Female,No,0,1,No,2900,0.0,71.0,360.0,1.0,0,1,0,0,0
610,Male,Yes,4,1,No,4106,0.0,40.0,180.0,1.0,0,1,1,1,0
611,Male,Yes,1,1,No,8072,240.0,253.0,360.0,1.0,2,1,1,1,0
612,Male,Yes,2,1,No,7583,0.0,187.0,360.0,1.0,2,1,1,1,0


In [42]:
data = data.drop(['Gender', 'Married', 'Self_Employed'], axis=1)

In [43]:
data

Unnamed: 0,Dependents,Education,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status,Gender_Male,Married_Yes,Self_Employed_Yes
1,1,1,4583,1508.0,128.0,360.0,1.0,0,0,1,1,0
2,0,1,3000,0.0,66.0,360.0,1.0,2,1,1,1,1
3,0,0,2583,2358.0,120.0,360.0,1.0,2,1,1,1,0
4,0,1,6000,0.0,141.0,360.0,1.0,2,1,1,0,0
5,2,1,5417,4196.0,267.0,360.0,1.0,2,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...
609,0,1,2900,0.0,71.0,360.0,1.0,0,1,0,0,0
610,4,1,4106,0.0,40.0,180.0,1.0,0,1,1,1,0
611,1,1,8072,240.0,253.0,360.0,1.0,2,1,1,1,0
612,2,1,7583,0.0,187.0,360.0,1.0,2,1,1,1,0


In [44]:
data = data[['Gender_Male', 'Married_Yes', 'Self_Employed_Yes', 'Dependents', 'Education', 'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term', 'Credit_History', 'Property_Area', 'Loan_Status']]

In [45]:
data

Unnamed: 0,Gender_Male,Married_Yes,Self_Employed_Yes,Dependents,Education,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
1,1,1,0,1,1,4583,1508.0,128.0,360.0,1.0,0,0
2,1,1,1,0,1,3000,0.0,66.0,360.0,1.0,2,1
3,1,1,0,0,0,2583,2358.0,120.0,360.0,1.0,2,1
4,1,0,0,0,1,6000,0.0,141.0,360.0,1.0,2,1
5,1,1,1,2,1,5417,4196.0,267.0,360.0,1.0,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...
609,0,0,0,0,1,2900,0.0,71.0,360.0,1.0,0,1
610,1,1,0,4,1,4106,0.0,40.0,180.0,1.0,0,1
611,1,1,0,1,1,8072,240.0,253.0,360.0,1.0,2,1
612,1,1,0,2,1,7583,0.0,187.0,360.0,1.0,2,1


### 4. Store Feature Matrix In X And Response (Target) In Vector y

In [46]:
X = data.drop('Loan_Status',axis=1)

In [47]:
y = data['Loan_Status']

In [48]:
X

Unnamed: 0,Gender_Male,Married_Yes,Self_Employed_Yes,Dependents,Education,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area
1,1,1,0,1,1,4583,1508.0,128.0,360.0,1.0,0
2,1,1,1,0,1,3000,0.0,66.0,360.0,1.0,2
3,1,1,0,0,0,2583,2358.0,120.0,360.0,1.0,2
4,1,0,0,0,1,6000,0.0,141.0,360.0,1.0,2
5,1,1,1,2,1,5417,4196.0,267.0,360.0,1.0,2
...,...,...,...,...,...,...,...,...,...,...,...
609,0,0,0,0,1,2900,0.0,71.0,360.0,1.0,0
610,1,1,0,4,1,4106,0.0,40.0,180.0,1.0,0
611,1,1,0,1,1,8072,240.0,253.0,360.0,1.0,2
612,1,1,0,2,1,7583,0.0,187.0,360.0,1.0,2


In [49]:
y

1      0
2      1
3      1
4      1
5      1
      ..
609    1
610    1
611    1
612    1
613    0
Name: Loan_Status, Length: 553, dtype: int32

### 5. Feature Scaling

In [50]:
data.head()

Unnamed: 0,Gender_Male,Married_Yes,Self_Employed_Yes,Dependents,Education,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
1,1,1,0,1,1,4583,1508.0,128.0,360.0,1.0,0,0
2,1,1,1,0,1,3000,0.0,66.0,360.0,1.0,2,1
3,1,1,0,0,0,2583,2358.0,120.0,360.0,1.0,2,1
4,1,0,0,0,1,6000,0.0,141.0,360.0,1.0,2,1
5,1,1,1,2,1,5417,4196.0,267.0,360.0,1.0,2,1


In [51]:
cols = ['ApplicantIncome','CoapplicantIncome','LoanAmount','Loan_Amount_Term']

In [52]:
from sklearn.preprocessing import StandardScaler
st = StandardScaler()
X[cols]=st.fit_transform(X[cols])

In [53]:
X

Unnamed: 0,Gender_Male,Married_Yes,Self_Employed_Yes,Dependents,Education,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area
1,1,1,0,1,1,-0.128694,-0.049699,-0.214368,0.279961,1.0,0
2,1,1,1,0,1,-0.394296,-0.545638,-0.952675,0.279961,1.0,2
3,1,1,0,0,0,-0.464262,0.229842,-0.309634,0.279961,1.0,2
4,1,0,0,0,1,0.109057,-0.545638,-0.059562,0.279961,1.0,2
5,1,1,1,2,1,0.011239,0.834309,1.440866,0.279961,1.0,2
...,...,...,...,...,...,...,...,...,...,...,...
609,0,0,0,0,1,-0.411075,-0.545638,-0.893134,0.279961,1.0,0
610,1,1,0,4,1,-0.208727,-0.545638,-1.262287,-2.468292,1.0,0
611,1,1,0,1,1,0.456706,-0.466709,1.274152,0.279961,1.0,2
612,1,1,0,2,1,0.374659,-0.545638,0.488213,0.279961,1.0,2


### 6. Splitting The Dataset Into The Training Set And Test Set & Applying K-Fold Cross Validation 

In [54]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, confusion_matrix
import numpy as np

In [55]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [56]:
from sklearn.linear_model import LogisticRegression

In [57]:
model_li = LogisticRegression()
model_li.fit(X_train,y_train)

LogisticRegression()

### print trainging and testing score

In [58]:
model_li.score(X_train,y_train)

0.8076923076923077

In [59]:
model_li.score(X_test,y_test)

0.7927927927927928

### How try all diffrent classification algorith and find the testing score

In [60]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBRFClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import SVC

In [61]:
model_SVM = SVC()
model_SVM.fit(X_train,y_train)

SVC()

In [62]:
model_SVM.score(X_train,y_train)

0.8257918552036199

In [63]:
model_SVM.score(X_test,y_test)

0.8018018018018018

In [64]:
from sklearn.model_selection import KFold,cross_val_score

In [65]:
print('KNN : ', cross_val_score(KNeighborsClassifier(), X_train,y_train, cv=10))

print('CART : ', cross_val_score(DecisionTreeClassifier(), X_train,y_train, cv=10))

print('RF : ', cross_val_score(RandomForestClassifier(), X_train,y_train, cv=10))

print('SVM : ', cross_val_score(SVC(), X_train,y_train, cv=10))

print('ADABOOST : ', cross_val_score(AdaBoostClassifier(), X_train,y_train, cv=10))

# print('XGB : ', cross_val_score(XGBRFClassifier(), X_train,y_train, cv=10))

KNN :  [0.82222222 0.8        0.70454545 0.68181818 0.70454545 0.68181818
 0.63636364 0.70454545 0.63636364 0.72727273]
CART :  [0.64444444 0.66666667 0.68181818 0.63636364 0.70454545 0.63636364
 0.68181818 0.61363636 0.70454545 0.75      ]
RF :  [0.8        0.82222222 0.79545455 0.75       0.79545455 0.72727273
 0.75       0.75       0.75       0.77272727]
SVM :  [0.82222222 0.8        0.84090909 0.86363636 0.77272727 0.79545455
 0.75       0.79545455 0.72727273 0.77272727]
ADABOOST :  [0.8        0.8        0.86363636 0.77272727 0.72727273 0.70454545
 0.72727273 0.72727273 0.75       0.70454545]


In [66]:
scores1 = cross_val_score(KNeighborsClassifier(), X_train,y_train, cv=10)
print('KNN  : ', np.average(scores1))

scores2 = cross_val_score(DecisionTreeClassifier(), X_train,y_train, cv=10)
print('CART : ', np.average(scores2))

scores3 = cross_val_score(RandomForestClassifier(), X_train,y_train, cv=10)
print('RF   : ', np.average(scores3))

scores4 = cross_val_score(SVC(), X_train,y_train, cv=10)
print('SVM  : ', np.average(scores4))

scores5 = cross_val_score(AdaBoostClassifier(), X_train,y_train, cv=10)
print('ADB  : ', np.average(scores5))

# scores6 = cross_val_score(XGBRFClassifier(), X_train,y_train, cv=10)
# print('XGB  : ', np.average(scores6))

KNN  :  0.7099494949494949
CART :  0.6765656565656565
RF   :  0.784949494949495
SVM  :  0.794040404040404
ADB  :  0.7577272727272728


### Here we see svc gives us best score so we can use svc algorithm

In [67]:
reg_rf = SVC()
reg_rf.fit(X_train, y_train)

SVC()

In [68]:
y_pred = reg_rf.predict(X_test)

In [69]:
reg_rf.score(X_train, y_train)

0.8257918552036199

In [70]:
reg_rf.score(X_test, y_test)

0.8018018018018018

In [71]:
import pickle

In [72]:
pickle.dump(reg_rf, open('Model', 'wb'))

In [74]:
X.head()

Unnamed: 0,Gender_Male,Married_Yes,Self_Employed_Yes,Dependents,Education,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area
1,1,1,0,1,1,-0.128694,-0.049699,-0.214368,0.279961,1.0,0
2,1,1,1,0,1,-0.394296,-0.545638,-0.952675,0.279961,1.0,2
3,1,1,0,0,0,-0.464262,0.229842,-0.309634,0.279961,1.0,2
4,1,0,0,0,1,0.109057,-0.545638,-0.059562,0.279961,1.0,2
5,1,1,1,2,1,0.011239,0.834309,1.440866,0.279961,1.0,2


### End