In [1]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.datasets import fetch_california_housing


In [2]:
housing = fetch_california_housing()

In [3]:
df = pd.DataFrame(housing.data)

In [4]:
df.columns = housing.feature_names

In [5]:
df['price'] = housing.target

In [6]:
df.shape

(20640, 9)

In [7]:
df.sample(10)

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,price
20124,2.4167,28.0,6.493506,1.337662,545.0,3.538961,34.42,-118.75,1.917
17742,6.234,7.0,6.511551,1.082508,1047.0,3.455446,37.31,-121.78,2.922
18040,4.6065,22.0,5.389535,1.013566,1291.0,2.501938,37.23,-121.97,4.459
19216,5.3409,15.0,6.081081,1.011583,716.0,2.764479,38.48,-122.68,2.446
1577,7.6518,11.0,6.919643,1.012277,2286.0,2.551339,37.8,-122.02,3.593
16542,4.4766,5.0,7.013544,1.076749,1549.0,3.496614,37.79,-121.22,1.694
2544,2.5417,50.0,6.473088,1.141643,837.0,2.371105,40.78,-124.16,0.854
768,4.3897,35.0,6.141304,1.043478,543.0,2.951087,37.65,-122.09,1.906
3384,4.45,32.0,5.156187,1.066937,1278.0,2.592292,34.24,-118.28,2.636
14960,5.9202,9.0,6.115132,1.009868,947.0,3.115132,32.76,-116.92,1.813


In [8]:
x = df.iloc[:1000,0:8]
y = df.iloc[:1000,8]

In [9]:
x.shape

(1000, 8)

In [10]:
y.shape

(1000,)

In [11]:
x

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.023810,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.971880,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.802260,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25
...,...,...,...,...,...,...,...,...
995,4.8624,11.0,5.680000,1.044706,5826.0,2.741647,37.71,-121.75
996,9.1531,25.0,5.811765,0.952941,254.0,2.988235,37.74,-121.77
997,4.7361,22.0,6.080220,1.036264,2474.0,2.718681,37.70,-121.80
998,5.4324,17.0,5.975831,0.965257,2222.0,3.356495,37.69,-121.80


In [12]:
y

0      4.526
1      3.585
2      3.521
3      3.413
4      3.422
       ...  
995    1.924
996    4.188
997    2.168
998    2.155
999    2.277
Name: price, Length: 1000, dtype: float64

In [13]:
dtr = DecisionTreeRegressor()
lr = LinearRegression()
svr= SVR()

In [14]:
estimators = [('decision tree regressor',dtr),('linear regressor',lr),('support vector regressor',svr)]


In [15]:
from sklearn.model_selection import cross_val_score
for estimator in estimators:
    score = cross_val_score(estimator[1],x,y,cv=10,scoring='r2')
    print(f"{estimator[0]} : {np.round(np.mean(score),2)}")

decision tree regressor : 0.08
linear regressor : 0.36
support vector regressor : -0.48


In [16]:
from sklearn.ensemble import VotingRegressor

In [17]:
reg = VotingRegressor(estimators=estimators)

In [18]:
score = cross_val_score(reg,x,y,cv=10,scoring='r2')
print(np.round(np.mean(score),2))

0.35


In [19]:
for i in range(1,4):
  for j in range(1,4):
    for k in range(1,4):
      vr = VotingRegressor(estimators,weights=[i,j,k])
      scores = cross_val_score(vr,x,y,scoring='r2',cv=10)
      print("For i={},j={},k={}".format(i,j,k),np.round(np.mean(scores),2))

For i=1,j=1,k=1 0.36
For i=1,j=1,k=2 0.23
For i=1,j=1,k=3 0.14
For i=1,j=2,k=1 0.4
For i=1,j=2,k=2 0.32
For i=1,j=2,k=3 0.23
For i=1,j=3,k=1 0.42
For i=1,j=3,k=2 0.35
For i=1,j=3,k=3 0.29
For i=2,j=1,k=1 0.35
For i=2,j=1,k=2 0.3
For i=2,j=1,k=3 0.23
For i=2,j=2,k=1 0.41
For i=2,j=2,k=2 0.38
For i=2,j=2,k=3 0.3
For i=2,j=3,k=1 0.43
For i=2,j=3,k=2 0.4
For i=2,j=3,k=3 0.34
For i=3,j=1,k=1 0.34
For i=3,j=1,k=2 0.31
For i=3,j=1,k=3 0.27
For i=3,j=2,k=1 0.39
For i=3,j=2,k=2 0.36
For i=3,j=2,k=3 0.32
For i=3,j=3,k=1 0.45
For i=3,j=3,k=2 0.39
For i=3,j=3,k=3 0.36


In [20]:
from sklearn.datasets import make_regression

In [21]:
x,y = make_regression(n_samples=1000, n_features=4, n_informative=15, n_targets=1, noise=10)

In [22]:
x.shape

(1000, 4)

In [23]:
y.shape

(1000,)

In [26]:
dtr1 = DecisionTreeRegressor(max_depth=3)
dtr2 = DecisionTreeRegressor(max_depth=4)
dtr3 = DecisionTreeRegressor(max_depth=5)
dtr4 = DecisionTreeRegressor(max_depth=None)

In [27]:
estimators = [('dt1',dtr1),('dt2',dtr2),('dt3',dtr3),('dt4',dtr4)]

In [28]:
for estimator in estimators:
    score = cross_val_score(estimator[1],x,y,cv = 10,scoring="r2")
    print(f"{estimator[0]}: {np.round(np.mean(score),2)}")

dt1: 0.5
dt2: 0.56
dt3: 0.65
dt4: 0.81


In [29]:
reg = VotingRegressor(estimators = estimators)
score = cross_val_score(reg,x,y,cv=10,scoring="r2")
print(np.round(np.mean(score),2))

0.71
