In [11]:
import xgboost as xgb
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

#Loading the California housing dataset
data = fetch_california_housing(as_frame=True)
X, y = data.data, data.target

#Splitting the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Creating an XGBoost regressor
model = xgb.XGBRegressor()

#Training the model on the training data
model.fit(X_train, y_train)

#Making predictions on the test set
predictions = model.predict(X_test)

# Calculate the mean squared error and R-squared score
mse = mean_squared_error(y_test, predictions)
r2 = r2_score(y_test, predictions)

print("Mean Squared Error:", mse)
print("R-squared Score:", r2)

Mean Squared Error: 0.2225899267544737
R-squared Score: 0.8301370561019205


In the above example, the calculated MSE is around 0.22, indicating that the XGBoost regressor's predictions are rather accurate.

The R2 value of 0.830 shows that the XGBoost regressor explains about 83% of the variation in the target variable, indicating a rather ideal match.

Let's further improve the performance of the XGBoost model with parameter tuning. For example, defining max_depth and n_estimators parameters in our case led to improved model performance.

In [12]:

#Creating an XGBoost regressor
model = xgb.XGBRegressor(max_depth=4, n_estimators=500)

#Training the model on the training data
model.fit(X_train, y_train)

#Making predictions on the test set
predictions = model.predict(X_test)

# Calculate the mean squared error and R-squared score
mse = mean_squared_error(y_test, predictions)
r2 = r2_score(y_test, predictions)

print("Mean Squared Error:", mse)
print("R-squared Score:", r2)

Mean Squared Error: 0.20484014861251143
R-squared Score: 0.8436822762863774
