<a href="https://colab.research.google.com/github/himanshuarora1703/Tutorial-notebooks/blob/main/Ensemble/Voting_Ensemble.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mlxtend.plotting import plot_decision_regions
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
import seaborn as sns

In [2]:
data = load_iris()

data.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])

In [3]:
X = pd.DataFrame(data['data'],columns=data['feature_names'])
y = pd.Series(data['target'],name='target')

X

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [4]:
ind = y[y==0].index

In [5]:
X.drop(index=ind,inplace=True)

In [6]:
X.reset_index(drop=True,inplace=True)

In [7]:
X

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,7.0,3.2,4.7,1.4
1,6.4,3.2,4.5,1.5
2,6.9,3.1,4.9,1.5
3,5.5,2.3,4.0,1.3
4,6.5,2.8,4.6,1.5
...,...,...,...,...
95,6.7,3.0,5.2,2.3
96,6.3,2.5,5.0,1.9
97,6.5,3.0,5.2,2.0
98,6.2,3.4,5.4,2.3


In [8]:
y.drop(index=y[y == 0].index,inplace=True)

In [9]:
y.reset_index(drop=True,inplace=True)

In [10]:
y

0     1
1     1
2     1
3     1
4     1
     ..
95    2
96    2
97    2
98    2
99    2
Name: target, Length: 100, dtype: int64

In [11]:
y = np.where(y == 1,0,1)

In [12]:
y = pd.Series(y,name='target')

In [13]:
y_temp = np.where(y==0,'Versicolor','Virginica')
y_temp = pd.Series(y_temp)

In [14]:
temp_df = pd.concat([X,y],axis=1)
temp_df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,7.0,3.2,4.7,1.4,0
1,6.4,3.2,4.5,1.5,0
2,6.9,3.1,4.9,1.5,0
3,5.5,2.3,4.0,1.3,0
4,6.5,2.8,4.6,1.5,0
...,...,...,...,...,...
95,6.7,3.0,5.2,2.3,1
96,6.3,2.5,5.0,1.9,1
97,6.5,3.0,5.2,2.0,1
98,6.2,3.4,5.4,2.3,1


In [15]:
X = X.drop(columns=X.columns[2:])

## Voting classifier for different models

In [16]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
# make instances of models

lr = LogisticRegression()
svm = SVC(probability=True)
dt = DecisionTreeClassifier()

estimators = [('lr',lr),('svm',svm),('dt',dt)]

In [17]:
for i in range(3):
    score = cross_val_score(estimators[i][1],X,y,cv=10,scoring='accuracy')
    print(f'The accuracy score for {estimators[i][0]} is', np.mean(score))


The accuracy score for lr is 0.75
The accuracy score for svm is 0.74
The accuracy score for dt is 0.53


In [18]:
# build a voting classifier
from sklearn.ensemble import VotingClassifier

vc = VotingClassifier(estimators)
score = cross_val_score(vc,X,y,cv=10,scoring='accuracy')
print(f'The accuracy score for vc hard is', np.mean(score))

The accuracy score for vc hard is 0.75


In [19]:
# build a voting classifier
from sklearn.ensemble import VotingClassifier
lr = LogisticRegression()
svm = SVC(probability=True)
dt = DecisionTreeClassifier()

estimators = [('lr',lr),('svm',svm),('dt',dt)]

vc = VotingClassifier(estimators,voting='soft')
score = cross_val_score(vc,X,y,cv=10,scoring='accuracy')
print(f'The accuracy score for vc soft is', np.mean(score))

The accuracy score for vc soft is 0.61


## Weighted voting classifier 

In [20]:
# choose weightage of different models
max = 0
for i in range(1,6):
    for j in range(1,6):
        for k in range(1,6):
            vc = VotingClassifier(estimators,weights=[i,j,k])
            score = cross_val_score(vc,X,y,cv=10,scoring='accuracy')
            print(f'The weight of lr is {i}, svm is {j}, dt is {k} and accuracy is {np.mean(score)} ')
            if np.mean(score) > max:
                max = np.mean(score)
            else:
                pass
print('The maximum score is',max)

The weight of lr is 1, svm is 1, dt is 1 and accuracy is 0.75 
The weight of lr is 1, svm is 1, dt is 2 and accuracy is 0.68 
The weight of lr is 1, svm is 1, dt is 3 and accuracy is 0.54 
The weight of lr is 1, svm is 1, dt is 4 and accuracy is 0.53 
The weight of lr is 1, svm is 1, dt is 5 and accuracy is 0.54 
The weight of lr is 1, svm is 2, dt is 1 and accuracy is 0.74 
The weight of lr is 1, svm is 2, dt is 2 and accuracy is 0.75 
The weight of lr is 1, svm is 2, dt is 3 and accuracy is 0.7 
The weight of lr is 1, svm is 2, dt is 4 and accuracy is 0.54 
The weight of lr is 1, svm is 2, dt is 5 and accuracy is 0.55 
The weight of lr is 1, svm is 3, dt is 1 and accuracy is 0.74 
The weight of lr is 1, svm is 3, dt is 2 and accuracy is 0.74 
The weight of lr is 1, svm is 3, dt is 3 and accuracy is 0.75 
The weight of lr is 1, svm is 3, dt is 4 and accuracy is 0.7 
The weight of lr is 1, svm is 3, dt is 5 and accuracy is 0.55 
The weight of lr is 1, svm is 4, dt is 1 and accuracy is 

## Classifiers of Logistic regression

In [21]:
# define different LR models

lr1 = LogisticRegression(C=0.001)
lr2 = LogisticRegression(C=0.01)
lr3 = LogisticRegression(C=0.1)
lr4 = LogisticRegression(C=1)
lr5 = LogisticRegression(C=10)
lr6 = LogisticRegression(C=100)

estimators = [('1',lr1),('2',lr2),('3',lr3),('4',lr4),('5',lr5),('6',lr6)]

vc = VotingClassifier(estimators=estimators)
score = cross_val_score(vc,X,y,cv=10,scoring='accuracy')
print(f'The accuracy score for vc is', np.mean(score))

The accuracy score for vc is 0.75


No change in the final accuracy of the model

In [22]:
# Define different models for svm

svm1 = SVC(kernel='poly',degree=1,probability=True)
svm2 = SVC(kernel='poly',degree=2,probability=True)
svm3 = SVC(kernel='poly',degree=3,probability=True)
svm4 = SVC(kernel='poly',degree=4,probability=True)
svm5 = SVC(kernel='poly',degree=5,probability=True)
svm6 = SVC(kernel='poly',degree=6,probability=True)

estimators = [('1',svm1),('2',svm2),('3',svm3),('4',svm4),('5',svm5),('6',svm6)]

vc = VotingClassifier(estimators=estimators)
score = cross_val_score(vc,X,y,cv=10,scoring='accuracy')
print(f'The accuracy score for vc is', np.mean(score))

The accuracy score for vc is 0.6900000000000002
