Import all the required frameworks

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
import xgboost as xgb
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
import warnings

# Ignore warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

Load the data and splitting it into train and test set

In [None]:
# Load the California Housing dataset
california_housing = fetch_california_housing()
X, y = california_housing.data, california_housing.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
print(california_housing)

{'data': array([[   8.3252    ,   41.        ,    6.98412698, ...,    2.55555556,
          37.88      , -122.23      ],
       [   8.3014    ,   21.        ,    6.23813708, ...,    2.10984183,
          37.86      , -122.22      ],
       [   7.2574    ,   52.        ,    8.28813559, ...,    2.80225989,
          37.85      , -122.24      ],
       ...,
       [   1.7       ,   17.        ,    5.20554273, ...,    2.3256351 ,
          39.43      , -121.22      ],
       [   1.8672    ,   18.        ,    5.32951289, ...,    2.12320917,
          39.43      , -121.32      ],
       [   2.3886    ,   16.        ,    5.25471698, ...,    2.61698113,
          39.37      , -121.24      ]]), 'target': array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894]), 'frame': None, 'target_names': ['MedHouseVal'], 'feature_names': ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude'], 'DESCR': '.. _california_housing_dataset:\n\nCalifornia Housing dataset\n-

In [None]:
# Initialize the models
xgb_regressor = xgb.XGBRegressor(objective='reg:squarederror', random_state=42)
rf_regressor = RandomForestRegressor(random_state=42)
gb_regressor = GradientBoostingRegressor(random_state=42)

# Data Modeling
model_xgb = xgb_regressor.fit(X_train, y_train)
model_rf = rf_regressor.fit(X_train, y_train)
model_gb = gb_regressor.fit(X_train, y_train)

# Define parameter grids for each model
"""
param_grid_xgb = {
    'lambda': [0.01, 0.1, 1, 10],
    'gamma': [0, 0.1, 1, 10],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2],
    'n_estimators': [100, 200, 300]
}

param_grid_rf = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

param_grid_gb = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Set up GridSearchCV for each model
grid_search_xgb = GridSearchCV(estimator=xgb_regressor, param_grid=param_grid_xgb, cv=5, scoring='neg_mean_squared_error', verbose=1, n_jobs=-1)
grid_search_rf = GridSearchCV(estimator=rf_regressor, param_grid=param_grid_rf, cv=5, scoring='neg_mean_squared_error', verbose=1, n_jobs=-1)
grid_search_gb = GridSearchCV(estimator=gb_regressor, param_grid=param_grid_gb, cv=5, scoring='neg_mean_squared_error', verbose=1, n_jobs=-1)

# Fit the models
print("Tuning XGBoost...")
grid_search_xgb.fit(X_train, y_train)
print("Tuning RandomForest...")
grid_search_rf.fit(X_train, y_train)
print("Tuning GradientBoosting...")
grid_search_gb.fit(X_train, y_train)

# Best models
best_xgb = grid_search_xgb.best_estimator_
best_rf = grid_search_rf.best_estimator_
best_gb = grid_search_gb.best_estimator_


# Predictions
y_pred_xgb = best_xgb.predict(X_test)
y_pred_rf = best_rf.predict(X_test)
y_pred_gb = best_gb.predict(X_test)
"""

'\nparam_grid_xgb = {\n    \'lambda\': [0.01, 0.1, 1, 10],\n    \'gamma\': [0, 0.1, 1, 10],\n    \'max_depth\': [3, 5, 7],\n    \'learning_rate\': [0.01, 0.1, 0.2],\n    \'n_estimators\': [100, 200, 300]\n}\n\nparam_grid_rf = {\n    \'n_estimators\': [100, 200, 300],\n    \'max_depth\': [None, 10, 20, 30],\n    \'min_samples_split\': [2, 5, 10],\n    \'min_samples_leaf\': [1, 2, 4]\n}\n\nparam_grid_gb = {\n    \'n_estimators\': [100, 200, 300],\n    \'learning_rate\': [0.01, 0.1, 0.2],\n    \'max_depth\': [3, 5, 7],\n    \'min_samples_split\': [2, 5, 10],\n    \'min_samples_leaf\': [1, 2, 4]\n}\n\n# Set up GridSearchCV for each model\ngrid_search_xgb = GridSearchCV(estimator=xgb_regressor, param_grid=param_grid_xgb, cv=5, scoring=\'neg_mean_squared_error\', verbose=1, n_jobs=-1)\ngrid_search_rf = GridSearchCV(estimator=rf_regressor, param_grid=param_grid_rf, cv=5, scoring=\'neg_mean_squared_error\', verbose=1, n_jobs=-1)\ngrid_search_gb = GridSearchCV(estimator=gb_regressor, param_grid

Model Prediction

In [None]:
y_pred_xgb = model_xgb.predict(X_test)
y_pred_rf = model_rf.predict(X_test)
y_pred_gb = model_gb.predict(X_test)

Model Evaluation

In [None]:
# Calculate Mean Squared Error
mse_xgb = mean_squared_error(y_test, y_pred_xgb)
mse_rf = mean_squared_error(y_test, y_pred_rf)
mse_gb = mean_squared_error(y_test, y_pred_gb)

# Print the results
#print("Best parameters for XGBRegressor: ", grid_search_xgb.best_params_)
print("Mean Squared Error for XGBRegressor: ", mse_xgb)

#print("Best parameters for RandomForestRegressor: ", grid_search_rf.best_params_)
print("Mean Squared Error for RandomForestRegressor: ", mse_rf)

#print("Best parameters for GradientBoostingRegressor: ", grid_search_gb.best_params_)
print("Mean Squared Error for GradientBoostingRegressor: ", mse_gb)

Mean Squared Error for XGBRegressor:  0.2225899267544737
Mean Squared Error for RandomForestRegressor:  0.2553684927247781
Mean Squared Error for GradientBoostingRegressor:  0.2939973248643864
