In [1]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import cross_validate
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.metrics import mean_absolute_error

In [2]:
data, target = fetch_california_housing(as_frame=True, return_X_y=True)
target *= 100  # rescale the target in k$

In [3]:
tree = DecisionTreeRegressor(random_state=0)
cv_results = cross_validate(tree, data, target, n_jobs=2)
scores = cv_results["test_score"]

print(
    "R2 score obtained by cross-validation: "
    f"{scores.mean():.3f} ± {scores.std():.3f}"
)

R2 score obtained by cross-validation: 0.354 ± 0.087


In [8]:
%%time
param_grid = {
    "max_depth": [5, 8, None],
    "min_samples_split": [2, 10, 30, 50],
    "min_samples_leaf": [0.01, 0.05, 0.1, 1],
}
cv = 3

tree = GridSearchCV(
    DecisionTreeRegressor(random_state=0),
    param_grid=param_grid,
    cv=cv,
    n_jobs=2,
)
cv_results = cross_validate(
    tree, data, target, n_jobs=2, return_estimator=True
)
scores = cv_results["test_score"]

print(
    "R2 score obtained by cross-validation: "
    f"{scores.mean():.3f} ± {scores.std():.3f}"
)

R2 score obtained by cross-validation: 0.523 ± 0.107
CPU times: total: 15.6 ms
Wall time: 15 s


In [5]:
%%time
estimator = DecisionTreeRegressor(random_state=0)
bagging_regressor = BaggingRegressor(
    estimator=estimator, n_estimators=20, random_state=0
)

cv_results = cross_validate(bagging_regressor, data, target, n_jobs=2)
scores = cv_results["test_score"]

print(
    "R2 score obtained by cross-validation: "
    f"{scores.mean():.3f} ± {scores.std():.3f}"
)

R2 score obtained by cross-validation: 0.642 ± 0.083
CPU times: total: 46.9 ms
Wall time: 16.9 s
