In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import fetch_california_housing

In [2]:
X,y=fetch_california_housing(return_X_y=True)

In [3]:
X

array([[   8.3252    ,   41.        ,    6.98412698, ...,    2.55555556,
          37.88      , -122.23      ],
       [   8.3014    ,   21.        ,    6.23813708, ...,    2.10984183,
          37.86      , -122.22      ],
       [   7.2574    ,   52.        ,    8.28813559, ...,    2.80225989,
          37.85      , -122.24      ],
       ...,
       [   1.7       ,   17.        ,    5.20554273, ...,    2.3256351 ,
          39.43      , -121.22      ],
       [   1.8672    ,   18.        ,    5.32951289, ...,    2.12320917,
          39.43      , -121.32      ],
       [   2.3886    ,   16.        ,    5.25471698, ...,    2.61698113,
          39.37      , -121.24      ]])

In [4]:
y

array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894])

In [5]:
X.shape

(20640, 8)

In [6]:
y.shape

(20640,)

In [7]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import cross_val_score

In [11]:
lr=LinearRegression()
dt=DecisionTreeRegressor()
knn=KNeighborsRegressor()

In [12]:
estimators = [('lr',lr),('dt',dt),('knn',knn)]

In [13]:
for estimator in estimators:
    scores = cross_val_score(estimator[1],X,y,scoring='r2',cv=10)
    print(estimator[0],np.round(np.mean(scores),2))

lr 0.51
dt 0.23
knn -0.14


## Voting Ensembel Regresson

In [14]:
from sklearn.ensemble import VotingRegressor

## Hard Voting

In [15]:
vr = VotingRegressor(estimators)
scores = cross_val_score(vr,X,y,scoring='r2',cv=10)
print("Voting Regressor",np.round(np.mean(scores),2))

Voting Regressor 0.48


## Weighted Voting

In [16]:
for i in range(1,4):
    for j in range(1,4):
        for k in range(1,4):
            vr = VotingRegressor(estimators,weights=[i,j,k])
            scores = cross_val_score(vr,X,y,scoring='r2',cv=10)
            print("For i={},j={},k={}".format(i,j,k),np.round(np.mean(scores),2))


For i=1,j=1,k=1 0.48
For i=1,j=1,k=2 0.39
For i=1,j=1,k=3 0.31
For i=1,j=2,k=1 0.48
For i=1,j=2,k=2 0.43
For i=1,j=2,k=3 0.38
For i=1,j=3,k=1 0.46
For i=1,j=3,k=2 0.44
For i=1,j=3,k=3 0.41
For i=2,j=1,k=1 0.52
For i=2,j=1,k=2 0.45
For i=2,j=1,k=3 0.39
For i=2,j=2,k=1 0.52
For i=2,j=2,k=2 0.48
For i=2,j=2,k=3 0.43
For i=2,j=3,k=1 0.51
For i=2,j=3,k=2 0.48
For i=2,j=3,k=3 0.45
For i=3,j=1,k=1 0.53
For i=3,j=1,k=2 0.48
For i=3,j=1,k=3 0.43
For i=3,j=2,k=1 0.54
For i=3,j=2,k=2 0.5
For i=3,j=2,k=3 0.47
For i=3,j=3,k=1 0.54
For i=3,j=3,k=2 0.51
For i=3,j=3,k=3 0.48


In [18]:
## Using The Same Algorithm

In [17]:
dt1 = DecisionTreeRegressor(max_depth=1)
dt2 = DecisionTreeRegressor(max_depth=3)
dt3 = DecisionTreeRegressor(max_depth=5)
dt4 = DecisionTreeRegressor(max_depth=7)
dt5 = DecisionTreeRegressor(max_depth=None)

In [19]:
estimators = [('dt1',dt1),('dt2',dt2),('dt3',dt3),('dt4',dt4),('dt5',dt5)]

In [20]:
for estimator in estimators:
    scores = cross_val_score(estimator[1],X,y,scoring='r2',cv=10)
    print(estimator[0],np.round(np.mean(scores),2))

dt1 0.13
dt2 0.36
dt3 0.43
dt4 0.47
dt5 0.24


In [21]:
vr = VotingRegressor(estimators)
scores = cross_val_score(vr,X,y,scoring='r2',cv=10)
print("Voting Regressor",np.round(np.mean(scores),2))

Voting Regressor 0.5
