In [15]:
# imports
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [16]:
# Load the California housing dataset
california_housing = fetch_california_housing()
X, y = california_housing.data, california_housing.target

In [17]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [18]:
# Define the pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('feature_selector', SelectKBest(score_func=f_regression, k = 5)),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])

In [19]:
# Fit the pipeline on the training data
pipeline.fit(X_train, y_train)

In [20]:
# Make predictions on the test data
y_predict = pipeline.predict(X_test)

In [21]:
# Evaluate the performance of the model
mse = mean_squared_error(y_test, y_predict)
mae = mean_absolute_error(y_test, y_predict)
print(f'Mean Squared Error: {mse:.2f}')
print(f'Mean Absolute Error: {mae:.2f}')

Mean Squared Error: 0.45
Mean Absolute Error: 0.47
