# Ensembling

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import LinearSVC
from sklearn.svm import SVC

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('bank.csv')

In [3]:
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,deposit
0,59,0,1,1,0,2343,1,0,2,5,8,1042,1,-1,0,3,1
1,56,0,1,1,0,45,0,0,2,5,8,1467,1,-1,0,3,1
2,41,9,1,1,0,1270,1,0,2,5,8,1389,1,-1,0,3,1
3,55,7,1,1,0,2476,1,0,2,5,8,579,1,-1,0,3,1
4,54,0,1,2,0,184,0,0,2,5,8,673,2,-1,0,3,1


In [4]:
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [5]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.40,random_state=1)

In [6]:
def classify(model):
    model.fit(X_train,y_train)
    y_pred = model.predict(X_test)
    print(classification_report(y_test,y_pred))

## Naive Aggregation

### Hard Voting

In [7]:
lr = LogisticRegression(random_state=1)
dt1 = DecisionTreeClassifier(random_state=1)
dt2 = DecisionTreeClassifier(criterion='entropy',random_state=1)
svm1 = LinearSVC(random_state=1)
svm2 = SVC(random_state=1)

In [8]:
model_list = [('lr',lr),('dt1',dt1),('dt2',dt2),('svm1',svm1),('svm2',svm2)]

In [9]:
from sklearn.ensemble import VotingClassifier

In [10]:
vc1 = VotingClassifier(estimators=model_list)

In [11]:
classify(vc1)

              precision    recall  f1-score   support

           0       0.80      0.84      0.82      2361
           1       0.81      0.76      0.78      2104

    accuracy                           0.80      4465
   macro avg       0.80      0.80      0.80      4465
weighted avg       0.80      0.80      0.80      4465



## Soft voting

In [12]:
model_list = [('lr',lr),('dt1',dt1),('dt2',dt2)]
vc2 = VotingClassifier(estimators=model_list,voting='soft')

In [13]:
classify(vc2)

              precision    recall  f1-score   support

           0       0.83      0.81      0.82      2361
           1       0.80      0.82      0.81      2104

    accuracy                           0.82      4465
   macro avg       0.82      0.82      0.82      4465
weighted avg       0.82      0.82      0.82      4465



# Bootstrap aggregation

### Bagging

In [14]:
from sklearn.ensemble import BaggingClassifier

In [15]:
bc1 = BaggingClassifier(LogisticRegression(random_state=1),n_estimators = 10,max_samples=100,random_state=1)  #10 Logistic regression and 100 sample size for each sample

In [16]:
classify(bc1)

              precision    recall  f1-score   support

           0       0.76      0.83      0.79      2361
           1       0.78      0.71      0.74      2104

    accuracy                           0.77      4465
   macro avg       0.77      0.77      0.77      4465
weighted avg       0.77      0.77      0.77      4465



### Pasting

In [17]:
bc2 = BaggingClassifier(LogisticRegression(random_state=1),n_estimators = 10,max_samples=100,random_state=1,bootstrap=False)

In [18]:
classify(bc2)

              precision    recall  f1-score   support

           0       0.76      0.83      0.80      2361
           1       0.79      0.71      0.75      2104

    accuracy                           0.78      4465
   macro avg       0.78      0.77      0.77      4465
weighted avg       0.78      0.78      0.77      4465



In [19]:
# Bagging
# Random forest
bc3 = BaggingClassifier(DecisionTreeClassifier(random_state=1),n_estimators = 10,max_samples=100,random_state=1)

In [20]:
classify(bc3)

              precision    recall  f1-score   support

           0       0.76      0.86      0.81      2361
           1       0.82      0.69      0.75      2104

    accuracy                           0.78      4465
   macro avg       0.79      0.78      0.78      4465
weighted avg       0.79      0.78      0.78      4465



# Random forest

In [21]:
from sklearn.ensemble import RandomForestClassifier

In [22]:
rfc = RandomForestClassifier(n_estimators=10,max_features=10,random_state=1)

In [23]:
classify(rfc)

              precision    recall  f1-score   support

           0       0.84      0.82      0.83      2361
           1       0.81      0.83      0.82      2104

    accuracy                           0.82      4465
   macro avg       0.82      0.82      0.82      4465
weighted avg       0.82      0.82      0.82      4465



# Stacking

In [24]:
#!pip install mlxtend

In [25]:
from mlxtend.classifier import StackingClassifier 

In [26]:
lr = LogisticRegression(random_state=1)
dt1 = DecisionTreeClassifier(random_state=1)
dt2 = DecisionTreeClassifier(criterion='entropy',random_state=1)

In [27]:
model_list = [lr,dt1,dt2]

In [28]:
lr2 = LogisticRegression(random_state=1)

In [29]:
sc = StackingClassifier(classifiers=model_list,meta_classifier=lr2)

In [30]:
classify(sc)

              precision    recall  f1-score   support

           0       0.88      0.72      0.79      2361
           1       0.74      0.89      0.81      2104

    accuracy                           0.80      4465
   macro avg       0.81      0.80      0.80      4465
weighted avg       0.81      0.80      0.80      4465

