In [24]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn import ensemble
from sklearn import datasets
from sklearn.utils import shuffle
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn import linear_model, svm, neural_network, isotonic
from sklearn import preprocessing

In [3]:
# Load data
boston = datasets.load_boston()
X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, test_size=0.1)
print(X_train.shape, X_test.shape)

(455, 13) (51, 13)


In [28]:
n_estimators = 500
max_depth = 5

# Gradient boosting
gbr = ensemble.GradientBoostingRegressor(n_estimators=n_estimators, max_depth=max_depth)

# Random Forest
rf = ensemble.RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth)

# Linear regression
lr = linear_model.LinearRegression()

# Robust regression
huber = linear_model.HuberRegressor()

# Ridge
ridge = linear_model.Ridge()

# linear SVR
linear_svr = svm.LinearSVR()

# SVR
svr = svm.SVR()

# Neural network
nn = neural_network.MLPRegressor(hidden_layer_sizes=(13, 8), activation='relu')

all_clf = [gbr, rf, lr, huber, ridge, linear_svr, svr, nn]

In [29]:
for clf in all_clf:
    clf.fit(X_train, y_train)
    mse = mean_squared_error(y_test, clf.predict(X_test))
    print("MSE: %.4f" % mse)

MSE: 12.0236
MSE: 12.6120
MSE: 14.5655
MSE: 18.2920
MSE: 15.3630
MSE: 16.3226
MSE: 65.7830
MSE: 39.0526




In [30]:
# Normalize the data and run again
normalizer = preprocessing.Normalizer().fit(X_train)  # fit does nothing
X_normalized = normalizer.transform(X_train)
for clf in all_clf:
    clf.fit(X_normalized, y_train)
    mse = mean_squared_error(y_test, clf.predict(normalizer.transform(X_test)))
    print("MSE: %.4f" % mse)

MSE: 8.5533
MSE: 10.6060
MSE: 14.7588
MSE: 21.8410
MSE: 41.3197
MSE: 53.7500
MSE: 58.7770
MSE: 56.3232


