# Universalbank

In [1]:
# Importing the libraries 

import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt 

# Ignore harmless warnings 

import warnings 
warnings.filterwarnings("ignore")

import pandasql as psql

# pip install pandasql

In [31]:
# Load the Universal bank data

bankdata = pd.read_csv(r"D:\00 Datasets\Bank\Universalbank.csv", header=0) 
bankdata.head()

Unnamed: 0,ID,Age,Experience,Income,ZIP Code,Family,CCAvg,Education,Mortgage,Personal Loan,Securities Account,CD Account,Online,CreditCard
0,1,25,1,49,91107,4,1.6,1,0,0,1,0,0,0
1,2,45,19,34,90089,3,1.5,1,0,0,1,0,0,0
2,3,39,15,11,94720,1,1.0,1,0,0,0,0,0,0
3,4,35,9,100,94112,1,2.7,2,0,0,0,0,0,0
4,5,35,8,45,91330,4,1.0,2,0,0,0,0,0,1


In [3]:
# Displaying the dataset information

bankdata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 14 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   ID                  5000 non-null   int64  
 1   Age                 5000 non-null   int64  
 2   Experience          5000 non-null   int64  
 3   Income              5000 non-null   int64  
 4   ZIP Code            5000 non-null   int64  
 5   Family              5000 non-null   int64  
 6   CCAvg               5000 non-null   float64
 7   Education           5000 non-null   int64  
 8   Mortgage            5000 non-null   int64  
 9   Personal Loan       5000 non-null   int64  
 10  Securities Account  5000 non-null   int64  
 11  CD Account          5000 non-null   int64  
 12  Online              5000 non-null   int64  
 13  CreditCard          5000 non-null   int64  
dtypes: float64(1), int64(13)
memory usage: 547.0 KB


In [4]:
# Count the target or dependent variable by '0' & '1' and their proportion 
# (> 10 : 1, then the dataset is imbalance data)

CreditCard_count = bankdata.CreditCard.value_counts()
print('Class 0:', CreditCard_count[0])
print('Class 1:', CreditCard_count[1])
print('Proportion:', round(CreditCard_count[0] / CreditCard_count[1], 2), ': 1')
print('Total Bank records:', len(bankdata))

Class 0: 3530
Class 1: 1470
Proportion: 2.4 : 1
Total Bank records: 5000


In [32]:
# Delete the columns which are not influencing the target variable

del bankdata['ID']
del bankdata['ZIP Code']

In [33]:
# Displaying the dataset columns

bankdata.columns

Index(['Age', 'Experience', 'Income', 'Family', 'CCAvg', 'Education',
       'Mortgage', 'Personal Loan', 'Securities Account', 'CD Account',
       'Online', 'CreditCard'],
      dtype='object')

In [7]:
# cols1 is variables - crating a dummy variables

cols1 = ['Family', 'Education']
print(cols1)

['Family', 'Education']


In [8]:
# cols2 variables - MinMaxScalar function

cols2 = ['Age', 'Experience', 'Income', 'CCAvg', 'Mortgage']
print(cols2)

['Age', 'Experience', 'Income', 'CCAvg', 'Mortgage']


In [34]:
# cols3 variables - Normalization

cols3 = ['Age', 'Experience', 'Income', 'Family', 'CCAvg', 'Education', 'Mortgage', 
         'Personal Loan', 'Securities Account', 'CD Account', 'Online'] 
print(cols3)

['Age', 'Experience', 'Income', 'Family', 'CCAvg', 'Education', 'Mortgage', 'Personal Loan', 'Securities Account', 'CD Account', 'Online']


In [9]:
# Create dummy variable for all range values

bankdata = pd.get_dummies(bankdata, columns=cols1)
bankdata.head().T

Unnamed: 0,0,1,2,3,4
Age,25.0,45.0,39.0,35.0,35.0
Experience,1.0,19.0,15.0,9.0,8.0
Income,49.0,34.0,11.0,100.0,45.0
CCAvg,1.6,1.5,1.0,2.7,1.0
Mortgage,0.0,0.0,0.0,0.0,0.0
Personal Loan,0.0,0.0,0.0,0.0,0.0
Securities Account,1.0,1.0,0.0,0.0,0.0
CD Account,0.0,0.0,0.0,0.0,0.0
Online,0.0,0.0,0.0,0.0,0.0
CreditCard,0.0,0.0,0.0,0.0,1.0


In [35]:
# Identify the dependent and Target variables

IndepVar = []
for col in bankdata.columns:
    if col != 'CreditCard':
        IndepVar.append(col)

TargetVar = 'CreditCard'

x = bankdata[IndepVar]
y = bankdata[TargetVar]

In [36]:
# Splitting the dataset into train and test 

from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.30, random_state = 42)
x_test_F1 = x_test.copy()

In [37]:
# Scaling the features by using MinMaxScaler

from sklearn.preprocessing import MinMaxScaler

mmscaler = MinMaxScaler(feature_range=(0, 1))

x_train[cols3] = mmscaler.fit_transform(x_train[cols3])
x_train = pd.DataFrame(x_train)

x_test[cols3] = mmscaler.fit_transform(x_test[cols3])
x_test = pd.DataFrame(x_test)

In [46]:
x_test.head()

Unnamed: 0,Age,Experience,Income,Family,CCAvg,Education,Mortgage,Personal Loan,Securities Account,CD Account,Online
1501,0.159091,0.152174,0.137755,0.333333,0.03,0.5,0.0,0.0,1.0,0.0,0.0
2586,0.545455,0.565217,0.719388,1.0,0.61,0.0,0.0,1.0,0.0,0.0,0.0
2653,0.159091,0.173913,0.576531,0.333333,0.31,0.0,0.64252,0.0,0.0,0.0,1.0
1055,0.181818,0.195652,0.27551,0.0,0.1,0.0,0.0,0.0,1.0,0.0,1.0
705,0.886364,0.847826,0.112245,0.666667,0.07,0.5,0.0,0.0,0.0,0.0,1.0


# SVM - Linear Kernel

In [38]:
# Training the SVM algorithm with train dataset

from sklearn.svm import SVC

bankdataSVM = SVC(C=1.0, kernel='linear', degree=3, gamma='scale', coef0=0.0, shrinking=True, 
                  probability=False, tol=0.001, cache_size=200, class_weight=None, verbose=False, 
                  max_iter=- 1, decision_function_shape='ovr', break_ties=False, random_state=None)

bankdataSVM = bankdataSVM.fit(x_train, y_train)

In [39]:
# To display the default paramaters

params1 = bankdataSVM.get_params()
print(params1)

{'C': 1.0, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 'scale', 'kernel': 'linear', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}


In [40]:
# Predictions with test data

y_pred = bankdataSVM.predict(x_test)

In [41]:
# Confusion matrix and classification report

from sklearn.metrics import classification_report, confusion_matrix

print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[1053   20]
 [ 356   71]]
              precision    recall  f1-score   support

           0       0.75      0.98      0.85      1073
           1       0.78      0.17      0.27       427

    accuracy                           0.75      1500
   macro avg       0.76      0.57      0.56      1500
weighted avg       0.76      0.75      0.69      1500



In [42]:
# Evalution metrics

import sklearn.metrics as metrics
from sklearn.metrics import roc_curve, roc_auc_score

print("Accuracy:", (round(metrics.accuracy_score(y_test, y_pred) * 100, 2)), "%")
print("Precision:", (round(metrics.precision_score(y_test, y_pred, average='micro') * 100, 2)), '%')
print("Recall:", (round(metrics.recall_score(y_test, y_pred, average='micro') * 100, 2)), "%")
print("f1-score:", (round(metrics.f1_score(y_test, y_pred, average='micro') * 100, 2)), '%')
print('roc_auc_score:', round(roc_auc_score(y_test, y_pred), 3))

Accuracy: 74.93 %
Precision: 74.93 %
Recall: 74.93 %
f1-score: 74.93 %
roc_auc_score: 0.574


# Polynomial Kernel

In [49]:
# Training the SVM algorithm

from sklearn.svm import SVC

bankdataSVMPoly = SVC(kernel='poly', degree=2)
bankdataSVMPoly.fit(x_train, y_train)

# Predicting the values

y_pred2 = bankdataSVMPoly.predict(x_test)

# Confusion matrix and classification report

from sklearn.metrics import classification_report, confusion_matrix

print(confusion_matrix(y_test,y_pred2))
print(classification_report(y_test,y_pred2))

# Evalution metrics

import sklearn.metrics as metrics
from sklearn.metrics import roc_curve, roc_auc_score

print("Accuracy:", (round(metrics.accuracy_score(y_test, y_pred2) * 100, 2)), "%")
print("Precision:", (round(metrics.precision_score(y_test, y_pred2, average='micro') * 100, 2)), '%')
print("Recall:", (round(metrics.recall_score(y_test, y_pred2, average='micro') * 100, 2)), "%")
print("f1-score:", (round(metrics.f1_score(y_test, y_pred2, average='micro') * 100, 2)), '%')
print('roc_auc_score:', round(roc_auc_score(y_test, y_pred2), 3))


[[1065    8]
 [ 359   68]]
              precision    recall  f1-score   support

           0       0.75      0.99      0.85      1073
           1       0.89      0.16      0.27       427

    accuracy                           0.76      1500
   macro avg       0.82      0.58      0.56      1500
weighted avg       0.79      0.76      0.69      1500

Accuracy: 75.53 %
Precision: 75.53 %
Recall: 75.53 %
f1-score: 75.53 %
roc_auc_score: 0.576


# Gaussian Kernel

In [44]:
# Training the SVM algorithm

from sklearn.svm import SVC

bankdataSVMGaussian = SVC(kernel='rbf', random_state = 42, class_weight='balanced')
bankdataSVMGaussian.fit(x_train, y_train)

# Predicting the values

y_pred3 = bankdataSVMGaussian.predict(x_test)

# Confusion matrix and classification report

from sklearn.metrics import classification_report, confusion_matrix

print(confusion_matrix(y_test,y_pred3))
print(classification_report(y_test,y_pred3))

# Evalution metrics

import sklearn.metrics as metrics
from sklearn.metrics import roc_curve, roc_auc_score

print("Accuracy:", (round(metrics.accuracy_score(y_test, y_pred3) * 100, 2)), "%")
print("Precision:", (round(metrics.precision_score(y_test, y_pred3, average='micro') * 100, 2)), '%')
print("Recall:", (round(metrics.recall_score(y_test, y_pred3, average='micro') * 100, 2)), "%")
print("f1-score:", (round(metrics.f1_score(y_test, y_pred3, average='micro') * 100, 2)), '%')
print('roc_auc_score:', round(roc_auc_score(y_test, y_pred3), 3))

[[786 287]
 [265 162]]
              precision    recall  f1-score   support

           0       0.75      0.73      0.74      1073
           1       0.36      0.38      0.37       427

    accuracy                           0.63      1500
   macro avg       0.55      0.56      0.55      1500
weighted avg       0.64      0.63      0.63      1500

Accuracy: 63.2 %
Precision: 63.2 %
Recall: 63.2 %
f1-score: 63.2 %
roc_auc_score: 0.556


# Sigmoid Kernel

In [45]:
# Training the SVM algorithm

from sklearn.svm import SVC

bankdataSVMSig = SVC(kernel='sigmoid', random_state = 42, class_weight='balanced')
bankdataSVMSig.fit(x_train, y_train)

# Predicting the values

y_pred4 = bankdataSVMSig.predict(x_test)

# Confusion matrix and classification report

from sklearn.metrics import classification_report, confusion_matrix

print(confusion_matrix(y_test,y_pred4))
print(classification_report(y_test,y_pred4))

# Evalution metrics

import sklearn.metrics as metrics
from sklearn.metrics import roc_curve, roc_auc_score

print("Accuracy:", (round(metrics.accuracy_score(y_test, y_pred4) * 100, 2)), "%")
print("Precision:", (round(metrics.precision_score(y_test, y_pred4, average='micro') * 100, 2)), '%')
print("Recall:", (round(metrics.recall_score(y_test, y_pred4, average='micro') * 100, 2)), "%")
print("f1-score:", (round(metrics.f1_score(y_test, y_pred4, average='micro') * 100, 2)), '%')
print('roc_auc_score:', round(roc_auc_score(y_test, y_pred4), 3))

[[590 483]
 [201 226]]
              precision    recall  f1-score   support

           0       0.75      0.55      0.63      1073
           1       0.32      0.53      0.40       427

    accuracy                           0.54      1500
   macro avg       0.53      0.54      0.52      1500
weighted avg       0.62      0.54      0.57      1500

Accuracy: 54.4 %
Precision: 54.4 %
Recall: 54.4 %
f1-score: 54.4 %
roc_auc_score: 0.54
