In [1]:
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split

In [2]:
import pandas as pd
import numpy as np

data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
X = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
y = raw_df.values[1::2, 2]

In [4]:
# from sklearn.preprocessing import StandardScaler

In [17]:
# sc = StandardScaler()
# X = sc.fit_transform(X)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [5]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.linear_model import Lasso
from sklearn.ensemble import RandomForestRegressor

In [6]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

In [9]:
models_params = [
    {'name': 'LinearRegression', 'model': LinearRegression(), 'params': {'fit_intercept': [True, False]}},
    {'name': 'DecisionTreeRegressor', 'model': DecisionTreeRegressor(), 'params': {'criterion': ['squared_error', 'absolute_error', 'poisson'], 'max_features': ['auto', 'sqrt', 'log2']}},
    {'name': 'SVR', 'model': SVR(), 'params': {'kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 'shrinking': [True, False]}},
    {'name': 'Lasso', 'model': Lasso(), 'params': {'alpha': range(1,3), 'selection': ['cyclic', 'random']}},
    {'name': 'RandomForestRegressor', 'model': RandomForestRegressor(), 'params': {'criterion': ['squared_error', 'absolute_error', 'poisson'], 'max_features': ['auto', 'sqrt', 'log2']}},
]

In [13]:
import warnings
warnings.filterwarnings('ignore')

In [14]:
res_grid_search = []
for model in models_params:
    res_grid_search.append((model['name'], GridSearchCV(model['model'], model['params'], cv=10).fit(X_train, y_train)))

In [15]:
for r in res_grid_search:
    print(r[0], r[1].best_score_, r[1].best_params_)

LinearRegression 0.722207691884909 {'fit_intercept': True}
DecisionTreeRegressor 0.6527577251361619 {'criterion': 'squared_error', 'max_features': 'auto'}
SVR 0.7157831724329036 {'kernel': 'linear', 'shrinking': True}
Lasso 0.6742316528341445 {'alpha': 1, 'selection': 'random'}
RandomForestRegressor 0.871960097107567 {'criterion': 'squared_error', 'max_features': 'sqrt'}


In [16]:
res_randomize_search = []
for model in models_params:
    res_randomize_search.append((model['name'], RandomizedSearchCV(model['model'], model['params'], cv=10).fit(X_train, y_train)))

In [17]:
for r in res_randomize_search:
    print(r[0], r[1].best_score_, r[1].best_params_)

LinearRegression 0.722207691884909 {'fit_intercept': True}
DecisionTreeRegressor 0.703795498704538 {'max_features': 'auto', 'criterion': 'squared_error'}
SVR 0.7157831724329036 {'shrinking': True, 'kernel': 'linear'}
Lasso 0.6742342160234889 {'selection': 'random', 'alpha': 1}
RandomForestRegressor 0.8646522053159151 {'max_features': 'sqrt', 'criterion': 'absolute_error'}
