In [1]:
import numpy as np
import pandas as pd
import seaborn

In [2]:
df = pd.read_csv('Bank_Personal_Loan_Modelling.csv')
df.head()

Unnamed: 0,ID,Age,Experience,Income,ZIP Code,Family,CCAvg,Education,Mortgage,Personal Loan,Securities Account,CD Account,Online,CreditCard
0,1,25,1,49,91107,4,1.6,1,0,0,1,0,0,0
1,2,45,19,34,90089,3,1.5,1,0,0,1,0,0,0
2,3,39,15,11,94720,1,1.0,1,0,0,0,0,0,0
3,4,35,9,100,94112,1,2.7,2,0,0,0,0,0,0
4,5,35,8,45,91330,4,1.0,2,0,0,0,0,0,1


In [3]:
df['Personal Loan'].value_counts()

0    4520
1     480
Name: Personal Loan, dtype: int64

In [4]:
df = df.drop(['ID', 'ZIP Code', 'CCAvg', 'Mortgage', 'Securities Account', 'CD Account', 'Online', 'CreditCard'], axis=1)
df.head()

Unnamed: 0,Age,Experience,Income,Family,Education,Personal Loan
0,25,1,49,4,1,0
1,45,19,34,3,1,0
2,39,15,11,1,1,0
3,35,9,100,1,2,0
4,35,8,45,4,2,0


In [5]:
df['Education'].value_counts()

1    2096
3    1501
2    1403
Name: Education, dtype: int64

In [6]:
df.shape

(5000, 6)

In [7]:
x = df.iloc[:, 0:5]
y = df.iloc[:, 5]
print(x)
print(y)

      Age  Experience  Income  Family  Education
0      25           1      49       4          1
1      45          19      34       3          1
2      39          15      11       1          1
3      35           9     100       1          2
4      35           8      45       4          2
...   ...         ...     ...     ...        ...
4995   29           3      40       1          3
4996   30           4      15       4          1
4997   63          39      24       2          3
4998   65          40      49       3          2
4999   28           4      83       3          1

[5000 rows x 5 columns]
0       0
1       0
2       0
3       0
4       0
       ..
4995    0
4996    0
4997    0
4998    0
4999    0
Name: Personal Loan, Length: 5000, dtype: int64


In [8]:
df.isnull().sum()

Age              0
Experience       0
Income           0
Family           0
Education        0
Personal Loan    0
dtype: int64

In [9]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)
print(x_train.shape)
print(x_test.shape)

(3350, 5)
(1650, 5)


In [10]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train_sc = sc.fit_transform(x_train)
x_test_sc = sc.fit_transform(x_test)
print(x_train_sc.shape)
print(x_test_sc.shape)

(3350, 5)
(1650, 5)


In [11]:
def classify(model, x, y):
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)
    model.fit(x_train, y_train)
    score = model.score(x_test, y_test)
    print(f'Accuracy: {score*100}')

In [12]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
classify(lr, x, y)

Accuracy: 34.41162868964144


In [13]:
from sklearn.linear_model import LogisticRegression
lr_log = LogisticRegression()
classify(lr_log, x, y)

Accuracy: 94.60606060606061


In [24]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier()
classify(rf, x, y)
pred = rf.predict(x_test)
print(pred)
pd.DataFrame({'actual': y_test, 'prediction': pred, 'diff': (y_test - pred)}).head()

Accuracy: 98.3030303030303
[0 1 0 ... 0 0 0]


Unnamed: 0,actual,prediction,diff
1501,0,0,0
2586,1,1,0
2653,0,0,0
1055,0,0,0
705,0,0,0


In [27]:
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier()
classify(dt, x, y)
pred_dt = dt.predict(x_test)
print(pred_dt)
pd.DataFrame({'actual': y_test, 'prediction': pred_dt, 'diff': (y_test - pred_dt)})

Accuracy: 97.27272727272728
[0 1 0 ... 0 0 0]


Unnamed: 0,actual,prediction,diff
1501,0,0,0
2586,1,1,0
2653,0,0,0
1055,0,0,0
705,0,0,0
...,...,...,...
908,0,0,0
2114,0,0,0
3896,0,0,0
1627,0,0,0


In [16]:
from sklearn.naive_bayes import GaussianNB
gs = GaussianNB()
classify(gs, x, y)

Accuracy: 92.48484848484848


In [17]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
classify(knn, x, y)

Accuracy: 91.21212121212122


In [18]:
from sklearn.svm import SVC
sv = SVC()
classify(sv, x, y)
pred_sc = rf.predict(x_test)
print(pred_sc)
pd.DataFrame({'actual': y_test, 'prediction': pred_sc, 'diff': (y_test - pred_sc)}).head()

Accuracy: 90.24242424242425


In [19]:
import joblib
joblib.dump(rf, 'Bank Loan Prediction.pkl')

['Bank Loan Prediction.pkl']

In [20]:
mod = joblib.load('Bank Loan Prediction.pkl')