In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score
import warnings
warnings.filterwarnings("ignore")

In [2]:
df=pd.read_csv(r"bank_Dataset for DT.csv")
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,deposit
0,59,0,1,1,0,2343,1,0,2,5,8,1042,1,-1,0,3,1
1,56,0,1,1,0,45,0,0,2,5,8,1467,1,-1,0,3,1
2,41,9,1,1,0,1270,1,0,2,5,8,1389,1,-1,0,3,1
3,55,7,1,1,0,2476,1,0,2,5,8,579,1,-1,0,3,1
4,54,0,1,2,0,184,0,0,2,5,8,673,2,-1,0,3,1


In [3]:
x=df.iloc[:,:-1]
y=df.iloc[:,-1]

## Splitting the data

In [4]:
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.30,random_state=1)

In [5]:
ytrain

2506     1
368      1
2827     1
9197     0
9910     0
        ..
7813     0
10955    0
905      1
5192     1
235      1
Name: deposit, Length: 7813, dtype: int64

### Bagging

In [6]:
from sklearn.ensemble import BaggingClassifier

In [7]:
bg=BaggingClassifier(LogisticRegression()) # n_estimators=10 by default
bg.fit(xtrain,ytrain)
ypred=bg.predict(xtest)
print(classification_report(ytest,ypred))

              precision    recall  f1-score   support

           0       0.76      0.81      0.79      1760
           1       0.78      0.72      0.75      1589

    accuracy                           0.77      3349
   macro avg       0.77      0.77      0.77      3349
weighted avg       0.77      0.77      0.77      3349



In [8]:
bg=BaggingClassifier(DecisionTreeClassifier())
bg.fit(xtrain,ytrain)
ypred=bg.predict(xtest)
print(classification_report(ytest,ypred))

              precision    recall  f1-score   support

           0       0.84      0.83      0.83      1760
           1       0.81      0.82      0.82      1589

    accuracy                           0.83      3349
   macro avg       0.83      0.83      0.83      3349
weighted avg       0.83      0.83      0.83      3349



In [9]:
from sklearn.ensemble import RandomForestClassifier

In [10]:
rf=RandomForestClassifier() # n_estimators=100 by default
rf.fit(xtrain,ytrain)
ypred=rf.predict(xtest)
print(classification_report(ytest,ypred))

              precision    recall  f1-score   support

           0       0.88      0.82      0.85      1760
           1       0.82      0.88      0.85      1589

    accuracy                           0.85      3349
   macro avg       0.85      0.85      0.85      3349
weighted avg       0.85      0.85      0.85      3349



In [11]:
from sklearn.ensemble import VotingClassifier

In [12]:
# Creating the list of estimators(models)
models=[]
accuracy=[]
models.append(("LR",LogisticRegression()))
models.append(("DT",DecisionTreeClassifier()))

In [13]:
models

[('LR', LogisticRegression()), ('DT', DecisionTreeClassifier())]

In [14]:
vc_hard=VotingClassifier(estimators=models,voting ='hard') # passing the list of models in the estimators
vc_hard.fit(xtrain,ytrain)
ypred=vc_hard.predict(xtest)
Accuracy = accuracy_scoreore(ytest, ypred)
print("Accuracy of Hard-Voting: ",round(Accuracy*100,2),"%\n")
print(classification_report(ytest,ypred))

Accuracy of Hard-Voting:  74.11 %

              precision    recall  f1-score   support

           0       0.70      0.88      0.78      1760
           1       0.82      0.58      0.68      1589

    accuracy                           0.74      3349
   macro avg       0.76      0.73      0.73      3349
weighted avg       0.76      0.74      0.73      3349



In [15]:
vc_soft=VotingClassifier(estimators=models,voting ='soft')
vc_soft.fit(xtrain,ytrain)
ypred=vc_soft.predict(xtest)
Soft_Accuracy = accuracy_score(ytest, ypred)
print("Accuracy of Soft-Voting: ",round(Soft_Accuracy*100,2),"%\n")
print(classification_report(ytest,ypred))

Accuracy of Soft-Voting:  78.74 %

              precision    recall  f1-score   support

           0       0.80      0.80      0.80      1760
           1       0.78      0.77      0.78      1589

    accuracy                           0.79      3349
   macro avg       0.79      0.79      0.79      3349
weighted avg       0.79      0.79      0.79      3349



## Boosting

In [16]:
from sklearn.ensemble import AdaBoostClassifier

In [17]:
ada=AdaBoostClassifier()
ada.fit(xtrain,ytrain)
ypred=ada.predict(xtest)
print(classification_report(ytest,ypred))

              precision    recall  f1-score   support

           0       0.82      0.85      0.84      1760
           1       0.83      0.80      0.81      1589

    accuracy                           0.83      3349
   macro avg       0.83      0.83      0.83      3349
weighted avg       0.83      0.83      0.83      3349



In [18]:
from sklearn.ensemble import GradientBoostingClassifier

In [19]:
gd=GradientBoostingClassifier()
gd.fit(xtrain,ytrain)
ypred=ada.predict(xtest)
print(classification_report(ytest,ypred))

              precision    recall  f1-score   support

           0       0.82      0.85      0.84      1760
           1       0.83      0.80      0.81      1589

    accuracy                           0.83      3349
   macro avg       0.83      0.83      0.83      3349
weighted avg       0.83      0.83      0.83      3349



In [20]:
#pip install xgboost

In [21]:
from xgboost import XGBClassifier

In [22]:
xgb=XGBClassifier()
xgb.fit(xtrain,ytrain)
ypred=xgb.predict(xtest)
print(classification_report(ytest,ypred))

              precision    recall  f1-score   support

           0       0.88      0.82      0.85      1760
           1       0.82      0.87      0.84      1589

    accuracy                           0.85      3349
   macro avg       0.85      0.85      0.85      3349
weighted avg       0.85      0.85      0.85      3349

