In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.ensemble import BaggingRegressor

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler

In [2]:
data = pd.read_csv('data/boston_housing.csv')
data.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [3]:
X = data.drop('MEDV',axis=1)
y = data['MEDV']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=2)

In [5]:
scaler = StandardScaler()
X_train2 = scaler.fit_transform(X_train)
X_test2 = scaler.transform(X_test)
X2 = scaler.fit_transform(X)

In [6]:
lr = LinearRegression()
rid = Ridge()
las = Lasso()
dt = DecisionTreeRegressor()
knn = KNeighborsRegressor()
svm = SVR()
rf = RandomForestRegressor()

In [7]:
estimator_list = [('lr',lr),('dt',dt),('knn',knn),('rid',rid),('las',las),('svm',svm)]

In [8]:
vt_reg = VotingRegressor(estimators=estimator_list )

In [11]:
bag = BaggingRegressor(base_estimator=dt,n_estimators=100,max_samples=0.3,bootstrap=True,n_jobs=-1)
past = BaggingRegressor(base_estimator=dt,n_estimators=100,max_samples=1,bootstrap=False,n_jobs=-1)
subspace = BaggingRegressor(base_estimator=dt,n_estimators=100,max_samples=1,bootstrap=False,max_features=0.5,bootstrap_features=True,n_jobs=-1)
patch = BaggingRegressor(base_estimator=dt,n_estimators=100,max_samples=0.3,bootstrap=True,max_features=0.5,bootstrap_features=True,n_jobs=-1)

In [12]:
model_list = [lr,las,rid,dt,knn,svm,vt_reg,bag,past,subspace,patch,rf]

In [13]:
def cross_validate(model,X,y):
    cv_score = cross_val_score(model,X,y,cv=10,scoring='r2')
    print('model ::::::   ', model)
    print('cross validated score === ',cv_score.mean()*100)

In [14]:
for model in model_list:
    cross_validate(model,X2,y)

model ::::::    LinearRegression()
cross validated score ===  20.25289900605654
model ::::::    Lasso()
cross validated score ===  18.08549037728676
model ::::::    Ridge()
cross validated score ===  20.87462813450241
model ::::::    DecisionTreeRegressor()
cross validated score ===  -3.999039741287288
model ::::::    KNeighborsRegressor()
cross validated score ===  38.11080442810903
model ::::::    SVR()
cross validated score ===  24.474341283386664
model ::::::    VotingRegressor(estimators=[('lr', LinearRegression()),
                            ('dt', DecisionTreeRegressor()),
                            ('knn', KNeighborsRegressor()), ('rid', Ridge()),
                            ('las', Lasso()), ('svm', SVR())])
cross validated score ===  49.383764509544946
model ::::::    BaggingRegressor(base_estimator=DecisionTreeRegressor(), max_samples=0.3,
                 n_estimators=100, n_jobs=-1)
cross validated score ===  53.176894554721365
model ::::::    BaggingRegressor(base_estim