In [1]:
# import library
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

# load the data
df = pd.read_csv('bank.csv',delimiter=';')

In [2]:
df # display the data

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,30,unemployed,married,primary,no,1787,no,no,cellular,19,oct,79,1,-1,0,unknown,no
1,33,services,married,secondary,no,4789,yes,yes,cellular,11,may,220,1,339,4,failure,no
2,35,management,single,tertiary,no,1350,yes,no,cellular,16,apr,185,1,330,1,failure,no
3,30,management,married,tertiary,no,1476,yes,yes,unknown,3,jun,199,4,-1,0,unknown,no
4,59,blue-collar,married,secondary,no,0,yes,no,unknown,5,may,226,1,-1,0,unknown,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4516,33,services,married,secondary,no,-333,yes,no,cellular,30,jul,329,5,-1,0,unknown,no
4517,57,self-employed,married,tertiary,yes,-3313,yes,yes,unknown,9,may,153,1,-1,0,unknown,no
4518,57,technician,married,secondary,no,295,no,no,cellular,19,aug,151,11,-1,0,unknown,no
4519,28,blue-collar,married,secondary,no,1137,no,no,cellular,6,feb,129,4,211,3,other,no


In [3]:
# get the data with desired columns
df = df[['age','marital','education','housing','loan','duration','campaign','pdays','y']]

In [4]:
# make the necessary replacements. 

df['y'].replace(['no','yes'], [0,1],inplace=True)
df['loan'].replace(['no','yes'], [0,1],inplace=True)
df['housing'].replace(['no','yes'], [0,1],inplace=True)
df = df[df['education']!='unknown']
df['education'].replace(['primary', 'secondary', 'tertiary'], [0,1,2],inplace=True)
df['marital'].replace(['married', 'single', 'divorced'], [0,1,2],inplace=True)
df['pdays'].replace([-1], [0],inplace=True)

In [5]:
df # again display the data

Unnamed: 0,age,marital,education,housing,loan,duration,campaign,pdays,y
0,30,0,0,0,0,79,1,0,0
1,33,0,1,1,1,220,1,339,0
2,35,1,2,1,0,185,1,330,0
3,30,0,2,1,1,199,4,0,0
4,59,0,1,1,0,226,1,0,0
...,...,...,...,...,...,...,...,...,...
4516,33,0,1,1,0,329,5,0,0
4517,57,0,2,1,1,153,1,0,0
4518,57,0,1,0,0,151,11,0,0
4519,28,0,1,0,0,129,4,211,0


In [6]:
# define X and y
X = df.iloc[:,0:8].values
y = df.iloc[:, 8].values

In [7]:
# splitting the data into training and test set
from sklearn.model_selection import train_test_split,cross_val_score
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [8]:
# scale the data
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Linear SVM Model

In [9]:
# build linear svm model

from sklearn.svm import SVC
clf_svm = SVC(kernel = 'linear', random_state = 0)
clf_svm.fit(X_train, y_train)

SVC(kernel='linear', random_state=0)

In [10]:
y_pred = clf_svm.predict(X_test) # prediction on test data

In [11]:
# confusion matrix
from sklearn.metrics import confusion_matrix,accuracy_score
cm = confusion_matrix(y_test, y_pred)

In [12]:
print(cm)

[[981   0]
 [103   0]]


In [13]:
cross_val_score(clf_svm, X, y, cv=3,n_jobs=-1) # cross_val_score

array([0.88442907, 0.88373702, 0.88434903])

In [14]:
accuracy_score(y_test, y_pred) # accuracy

0.9049815498154982

# logistic Model

In [15]:
# fit the Logistic Regression
from sklearn.linear_model import LogisticRegression
clf_logit = LogisticRegression(random_state = 0)
clf_logit.fit(X_train, y_train)

# prediction on test set
y_pred = clf_logit.predict(X_test)

# Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred) # accuracy

[[961  20]
 [ 79  24]]


0.9086715867158671

In [16]:
cross_val_score(clf_logit, X, y, cv=3,n_jobs=-1)  # cross_val_score

array([0.88719723, 0.88719723, 0.89127424])

# KNN

In [17]:
# fit the K-NN model
from sklearn.neighbors import KNeighborsClassifier
clf_knn = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
clf_knn.fit(X_train, y_train)

# prediction on test set
y_pred = clf_knn.predict(X_test)

# Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[947  34]
 [ 75  28]]


In [18]:
accuracy_score(y_test, y_pred) # accuracy

0.8994464944649446

In [19]:
cross_val_score(clf_knn, X, y, cv=3,n_jobs=-1) # cross_val_score

array([0.88442907, 0.88373702, 0.87742382])