In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

## Voting Ensemble - Classification

In [49]:
df = sns.load_dataset('iris').drop(columns=['petal_length','petal_width'])
df.head()

Unnamed: 0,sepal_length,sepal_width,species
0,5.1,3.5,setosa
1,4.9,3.0,setosa
2,4.7,3.2,setosa
3,4.6,3.1,setosa
4,5.0,3.6,setosa


In [50]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

df['species'] = le.fit_transform(df['species'])
df.head()

Unnamed: 0,sepal_length,sepal_width,species
0,5.1,3.5,0
1,4.9,3.0,0
2,4.7,3.2,0
3,4.6,3.1,0
4,5.0,3.6,0


In [51]:
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [52]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import cross_val_score

In [53]:
lgr = LogisticRegression()
svc = SVC()
knn = KNeighborsClassifier()
rfc = RandomForestClassifier()

estimators = [('lgr',lgr),('svc',lgr),('knn',knn),('rfc',rfc)]

In [54]:
for estimator in estimators:
    _ = cross_val_score(estimator[1],X,y,cv=10,scoring='accuracy')
    print(estimator[0],np.round(np.mean(_),2))

lgr 0.81
svc 0.81
knn 0.76
rfc 0.73


In [55]:
from sklearn.ensemble import VotingClassifier

### Hard Voting

In [56]:
vc_hard = VotingClassifier(estimators)
_ = cross_val_score(vc_hard,X,y,cv=10,scoring='accuracy')
print(np.round(np.mean(_),2))

0.79


### Soft Voting

In [57]:
vc_soft = VotingClassifier(estimators,voting='soft')
_ = cross_val_score(vc_soft,X,y,cv=10,scoring='accuracy')
print(np.round(np.mean(_),2))

0.79


### Weighted Voting

In [58]:
vc_weight = VotingClassifier(estimators,weights=[4,3,2,0])
_ = cross_val_score(vc_weight,X,y,cv=10,scoring='accuracy')
print(np.round(np.mean(_),2))

0.81


### Hyperparameter Voting

In [59]:
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=1000,n_features=20)

svc1 = SVC(probability=True,kernel='poly',degree=1)
svc2 = SVC(probability=True,kernel='poly',degree=2)
svc3 = SVC(probability=True,kernel='poly',degree=3)
svc4 = SVC(probability=True,kernel='poly',degree=4)
svc5 = SVC(probability=True,kernel='poly',degree=5)

estimators = [('svc1',svc1),('svc2',svc2),('svc3',svc3),
              ('svc4',svc4),('svc5',svc5)]

In [60]:
for estimator in estimators:
    _ = cross_val_score(estimator[1],X,y,cv=10,scoring='accuracy')
    print(estimator[0],np.round(np.mean(_),2))

svc1 0.9
svc2 0.51
svc3 0.9
svc4 0.52
svc5 0.88


In [61]:
vc_tuned = VotingClassifier(estimators,voting='soft')
_ = cross_val_score(vc_tuned,X,y,cv=10,scoring='accuracy')
print(np.round(np.mean(_),2))

0.9


## Voting Ensemble - Regression

In [68]:
from sklearn.datasets import fetch_california_housing
data = fetch_california_housing(as_frame=True)

df = pd.DataFrame(data.frame)
df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [114]:
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [118]:
from sklearn.linear_model import LinearRegression,SGDRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor

In [128]:
lnr = LinearRegression()
knr = KNeighborsRegressor()
dtr = DecisionTreeRegressor()

estimators = [('lnr',lnr),('knr',knr),('dtr',dtr)]

In [120]:
for estimator in estimators:
    _ = cross_val_score(estimator[1],X,y,cv=10,scoring='r2')
    print(estimator[0],np.round(np.mean(_),2))

lnr 0.51
knr -0.14
dtr 0.23


In [121]:
from sklearn.ensemble import VotingRegressor

In [122]:
vr = VotingRegressor(estimators=estimators)
_ = cross_val_score(vr,X,y,cv=10,scoring='r2')
print(np.round(np.mean(_),2))

0.48


### Weighted Voting

In [129]:
vc_weight = VotingRegressor(estimators,weights=[10,1,10])
_ = cross_val_score(vc_weight,X,y,cv=10,scoring='r2')
print(np.round(np.mean(_),2))

0.54


### Hyperparameter Voting

In [124]:
dtr1 = DecisionTreeRegressor(max_depth=1)
dtr2 = DecisionTreeRegressor(max_depth=3)
dtr3 = DecisionTreeRegressor(max_depth=5)
dtr4 = DecisionTreeRegressor(max_depth=7)
dtr5 = DecisionTreeRegressor(max_depth=None)

estimators = [('dtr1',dtr1),('dtr2',dtr2),('dtr3',dtr3),
              ('dtr4',dtr4),('dtr5',dtr5)]

In [125]:
for estimator in estimators:
    _ = cross_val_score(estimator[1],X,y,cv=10,scoring='r2')
    print(estimator[0],np.round(np.mean(_),2))

dtr1 0.13
dtr2 0.36
dtr3 0.43
dtr4 0.47
dtr5 0.23


In [126]:
vc_tuned = VotingRegressor(estimators)
_ = cross_val_score(vc_tuned,X,y,cv=10,scoring='r2')
print(np.round(np.mean(_),2))

0.5
