In [11]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

# Load data
data = fetch_openml(name='boston', version=1, as_frame=True)
X = data.data
y = data.target

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define parameter grid for GridSearchCV
param_grid_gridsearch = {
    'max_depth': [3, 5, 7],
    'learning_rate': [0.1, 0.01, 0.001],
    'n_estimators': [100, 200, 300]
}

# Define parameter grid for RandomizedSearchCV
param_dist_randomsearch = {
    'max_depth': [3, 5, 7, 9, 11, 13, 15],
    'learning_rate': np.linspace(0.001, 0.1, 100),
    'n_estimators': np.arange(100, 1000, 100)
}


# Perform GridSearchCV
grid_search = GridSearchCV(estimator=XGBRegressor(random_state=42, enable_categorical=True),
                           param_grid=param_grid_gridsearch,
                           scoring='neg_mean_squared_error',
                           cv=5,
                           verbose=1)
grid_search.fit(X_train, y_train)
print("Grid Search Best Parameters:", grid_search.best_params_)

best_grid_model = grid_search.best_estimator_
y_pred_grid = best_grid_model.predict(X_test)
mse_grid = mean_squared_error(y_test, y_pred_grid)
print("Grid Search MSE on test set:", mse_grid)

# Perform RandomizedSearchCV
random_search = RandomizedSearchCV(estimator=XGBRegressor(random_state=42, enable_categorical=True),
                                   param_distributions=param_dist_randomsearch,
                                   n_iter=100,
                                   scoring='neg_mean_squared_error',
                                   cv=5,
                                   verbose=1,
                                   random_state=42)
random_search.fit(X_train, y_train)
print("Random Search Best Parameters:", random_search.best_params_)
best_random_model = random_search.best_estimator_
y_pred_random = best_random_model.predict(X_test)
mse_random = mean_squared_error(y_test, y_pred_random)
print("Random Search MSE on test set:", mse_random)


  warn(


Fitting 5 folds for each of 27 candidates, totalling 135 fits
Grid Search Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 200}
Grid Search MSE on test set: 5.868017593238944
Fitting 5 folds for each of 100 candidates, totalling 500 fits
Random Search Best Parameters: {'n_estimators': 700, 'max_depth': 3, 'learning_rate': 0.095}
Random Search MSE on test set: 5.760615621800232


In [20]:
from hyperopt import hp, tpe, fmin, Trials
from hyperopt.pyll import scope


# Define search space for Hyperopt
param_space_hyperopt = {
    'max_depth': hp.choice('max_depth', np.arange(3, 16, dtype=int)),
    'learning_rate': hp.loguniform('learning_rate', np.log(0.001), np.log(0.1)),
    'n_estimators': hp.choice('n_estimators', np.arange(100, 1000, 100, dtype=int))
}

# Define objective function for Hyperopt
def objective(params):
    model = XGBRegressor(**params, random_state=42, enable_categorical=True)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    return mse


# Perform Hyperopt
trials = Trials()
best_hyperopt = fmin(fn=objective,
                     space=param_space_hyperopt,
                     algo=tpe.suggest,
                     max_evals=100,
                     trials=trials,
                     rstate=np.random.default_rng(42),
                     return_argmin=False
                     )
print("Hyperopt Best Parameters:", best_hyperopt)

best_hyperopt_model = XGBRegressor(**best_hyperopt, random_state=42, enable_categorical=True)
best_hyperopt_model.fit(X_train, y_train)
y_pred_hyperopt = best_hyperopt_model.predict(X_test)
mse_hyperopt = mean_squared_error(y_test, y_pred_hyperopt)
print("Hyperopt MSE on test set:", mse_hyperopt)

100%|██████████| 100/100 [01:07<00:00,  1.49trial/s, best loss: 5.174801631248915]
Hyperopt Best Parameters: {'learning_rate': 0.08393931195869206, 'max_depth': 3, 'n_estimators': 500}
Hyperopt MSE on test set: 5.174801631248915
