In [11]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.pipeline import Pipeline
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

dataset  = load_boston()

# set dataFrame
x = pd.DataFrame(dataset.data, columns=dataset.feature_names)
y = pd.DataFrame(dataset.target, columns=['y'])

# split the data for hold-out
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.2, random_state=0)
# reshape target valiables
y_train, y_true = y_train.as_matrix().ravel(), y_test.as_matrix().ravel()

# set pipeline
pipe_ols = Pipeline([('scl', StandardScaler()), ('est', LinearRegression())])
pipe_ridge = Pipeline([('scl', StandardScaler()), ('est', Ridge(random_state=1))])
pipe_rf = Pipeline([('scl', StandardScaler()), ('est', RandomForestRegressor(random_state=1))])
pipe_gbr = Pipeline([('scl', StandardScaler()), ('est', GradientBoostingRegressor(random_state=1))])

# optimize paramters
pipe_ols.fit(x_train, y_train)
pipe_ridge.fit(x_train, y_train)
pipe_rf.fit(x_train, y_train)
pipe_gbr.fit(x_train, y_train)

# get R2 score
r2_score_ols = r2_score(y_true, pipe_ols.predict(x_test))
r2_score_ridge = r2_score(y_true, pipe_ridge.predict(x_test))
r2_score_rf = r2_score(y_true, pipe_rf.predict(x_test))
r2_score_gbr = r2_score(y_true, pipe_gbr.predict(x_test))

In [12]:
# print the performance
print('R2 score of OLS: %.6f' %r2_score_ols)
print('R2 score of RIDGE: %.6f' %r2_score_ridge)
print('R2 score of RF: %.6f' %r2_score_rf)
print('R2 score of GBR: %.6f' %r2_score_gbr)

R2 score of OLS: 0.589201
R2 score of RIDGE: 0.588120
R2 score of RF: 0.706710
R2 score of GBR: 0.779037
