In [65]:
import pandas as pd
import numpy as np

data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep=r"\s+", skiprows=22, header=None)  # Note the 'r' prefix for raw string
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
target = raw_df.values[1::2, 2]

In [66]:
X = data
y = target

In [67]:
X.shape

(506, 13)

In [68]:
y.shape

(506,)

In [73]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.model_selection import cross_val_score

In [37]:
lr = LinearRegression()
dt = DecisionTreeRegressor
svr = SVR()

In [39]:
estimators = [('lr',lr), ('dt', dt),('svr', svr)]

In [41]:
for estimator in estimators:
    scores = cross_val_score(estimator[1], X, y, scoring='r2', cv=10)
    print(estimator[0], np.round(np.mean(scores), 2))

lr 0.2
dt -0.23
svr -0.41


In [43]:
from sklearn.ensemble import VotingRegressor

In [49]:
vr = VotingRegressor(estimators)
scores = cross_val_score(vr,X,y,scoring = 'r2', cv=10)
print('Voting Regressor: ', np.round(np.mean(scores),2))

Voting Regressor:  0.46


In [51]:
for i in range(1,4):
  for j in range(1,4):
    for k in range(1,4):
      vr = VotingRegressor(estimators,weights=[i,j,k])
      scores = cross_val_score(vr,X,y,scoring='r2',cv=10)
      print("For i={},j={},k={}".format(i,j,k),np.round(np.mean(scores),2))

For i=1,j=1,k=1 0.41
For i=1,j=1,k=2 0.36
For i=1,j=1,k=3 0.28
For i=1,j=2,k=1 0.42
For i=1,j=2,k=2 0.4
For i=1,j=2,k=3 0.36
For i=1,j=3,k=1 0.35
For i=1,j=3,k=2 0.38
For i=1,j=3,k=3 0.36
For i=2,j=1,k=1 0.45
For i=2,j=1,k=2 0.42
For i=2,j=1,k=3 0.35
For i=2,j=2,k=1 0.45
For i=2,j=2,k=2 0.46
For i=2,j=2,k=3 0.41
For i=2,j=3,k=1 0.41
For i=2,j=3,k=2 0.44
For i=2,j=3,k=3 0.41
For i=3,j=1,k=1 0.44
For i=3,j=1,k=2 0.43
For i=3,j=1,k=3 0.39
For i=3,j=2,k=1 0.44
For i=3,j=2,k=2 0.45
For i=3,j=2,k=3 0.43
For i=3,j=3,k=1 0.44
For i=3,j=3,k=2 0.43
For i=3,j=3,k=3 0.42


In [53]:
dt1 = DecisionTreeRegressor(max_depth=1)
dt2 = DecisionTreeRegressor(max_depth=3)
dt3 = DecisionTreeRegressor(max_depth=5)
dt4 = DecisionTreeRegressor(max_depth=7)
dt5 = DecisionTreeRegressor(max_depth=None)

In [56]:
estimators = [('dt1',dt1),('dt2',dt2),('dt3',dt3),('dt4',dt4),('dt5',dt5)]

In [58]:
for estimator in estimators:
  scores = cross_val_score(estimator[1],X,y,scoring='r2',cv=10)
  print(estimator[0],np.round(np.mean(scores),2))

dt1 -0.85
dt2 -0.11
dt3 0.03
dt4 0.08
dt5 -0.14


In [60]:
vr = VotingRegressor(estimators)
scores = cross_val_score(vr,X,y,scoring='r2',cv=10)
print("Voting Regressor",np.round(np.mean(scores),2))

Voting Regressor 0.18
