In [2]:
from sklearn.datasets import fetch_california_housing
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error




In [8]:
# load boston housing dataset
boston = fetch_california_housing()
X, y = boston.data, boston.target

# split dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(boston.DESCR)
print(len(X_train))
print(X_train[:5])
print(y_train[:5])

.. _california_housing_dataset:

California Housing dataset
--------------------------

**Data Set Characteristics:**

    :Number of Instances: 20640

    :Number of Attributes: 8 numeric, predictive attributes and the target

    :Attribute Information:
        - MedInc        median income in block group
        - HouseAge      median house age in block group
        - AveRooms      average number of rooms per household
        - AveBedrms     average number of bedrooms per household
        - Population    block group population
        - AveOccup      average number of household members
        - Latitude      block group latitude
        - Longitude     block group longitude

    :Missing Attribute Values: None

This dataset was obtained from the StatLib repository.
https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html

The target variable is the median house value for California districts,
expressed in hundreds of thousands of dollars ($100,000).

This dataset was derived

In [5]:
# define Gradient Boosting regressor and set hyperparameters
gb_reg = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3)

In [6]:
# train the regressor on the train set
gb_reg.fit(X_train, y_train)

# predict the targets of test set
y_pred = gb_reg.predict(X_test)

# calculate mean squared error
mse = mean_squared_error(y_test, y_pred)
print('MSE:', mse)

MSE: 0.2940161448268125
