Documentation Link: https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.VotingRegressor.html

Voting Blog: https://vitalflux.com/hard-vs-soft-voting-classifier-python-example/

In [1]:
import numpy as np
import pandas as pd

from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.model_selection import cross_val_score

from sklearn.ensemble import VotingRegressor

In [2]:
# Load dataset

# return_X_y : bool, default=False
#     If True, returns ``(data, target)`` instead of a Bunch object.
#     See below for more information about the `data` and `target` object.

# X, y = load_boston(return_X_y=True)

#### We can use below code

In [3]:
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
target = raw_df.values[1::2, 2]

In [4]:
data.shape

(506, 13)

In [5]:
target.shape

(506,)

In [6]:
lr = LinearRegression()
dt = DecisionTreeRegressor()
svr = SVR()

In [7]:
estimators = [('lr',lr),('dt',dt),('svr',svr)]

In [9]:
for estimator in estimators:
    scores = cross_val_score(estimator=estimator[1], X=data, y=target, scoring='r2', cv=10)
    print(estimator[0],np.round(np.mean(scores),2))

lr 0.2
dt -0.06
svr -0.41


In [10]:
vr = VotingRegressor(estimators)
scores = cross_val_score(estimator=vr, X=data, y=target, scoring='r2', cv=10)
print("Voting Regressor",np.round(np.mean(scores),2))

Voting Regressor 0.44


In [11]:
for i in range(1,4):
    for j in range(1,4):
        for k in range(1,4):
            vr = VotingRegressor(estimators,weights=[i,j,k])
            scores = cross_val_score(estimator=vr, X=data, y=target, scoring='r2', cv=10)
            print("For i={},j={},k={}".format(i,j,k), np.round(np.mean(scores), 2))

For i=1,j=1,k=1 0.38
For i=1,j=1,k=2 0.31
For i=1,j=1,k=3 0.23
For i=1,j=2,k=1 0.4
For i=1,j=2,k=2 0.35
For i=1,j=2,k=3 0.35
For i=1,j=3,k=1 0.26
For i=1,j=3,k=2 0.36
For i=1,j=3,k=3 0.37
For i=2,j=1,k=1 0.46
For i=2,j=1,k=2 0.41
For i=2,j=1,k=3 0.36
For i=2,j=2,k=1 0.45
For i=2,j=2,k=2 0.39
For i=2,j=2,k=3 0.4
For i=2,j=3,k=1 0.41
For i=2,j=3,k=2 0.45
For i=2,j=3,k=3 0.39
For i=3,j=1,k=1 0.44
For i=3,j=1,k=2 0.43
For i=3,j=1,k=3 0.39
For i=3,j=2,k=1 0.44
For i=3,j=2,k=2 0.45
For i=3,j=2,k=3 0.4
For i=3,j=3,k=1 0.42
For i=3,j=3,k=2 0.39
For i=3,j=3,k=3 0.38


In [12]:
# using the same algorithm

dt1 = DecisionTreeRegressor(max_depth=1)
dt2 = DecisionTreeRegressor(max_depth=3)
dt3 = DecisionTreeRegressor(max_depth=5)
dt4 = DecisionTreeRegressor(max_depth=7)
dt5 = DecisionTreeRegressor(max_depth=None)

In [13]:
estimators2 = [('dt1',dt1),('dt2',dt2),('dt3',dt3),('dt4',dt4),('dt5',dt5)]

In [14]:
for estimator in estimators2:
    scores = cross_val_score(estimator=estimator[1], X=data, y=target, scoring='r2', cv=10)
    print(estimator[0], np.round(np.mean(scores), 2))

dt1 -0.85
dt2 -0.21
dt3 0.03
dt4 0.03
dt5 -0.15


In [15]:
vr = VotingRegressor(estimators=estimators2)
scores = cross_val_score(estimator=vr, X=data, y=target, scoring='r2', cv=10)
print("Voting Regressor", np.round(np.mean(scores),2))

Voting Regressor 0.2
