In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from xgboost import XGBRegressor
import optuna
import mlflow
import mlflow.xgboost

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_df = pd.read_csv('../data/processed/feature_engineered_train.csv')
eval_df = pd.read_csv('../data/processed/feature_engineered_eval.csv')

target = 'price'
X_train = train_df.drop(columns=[target])
y_train = train_df[target]

X_eval = eval_df.drop(columns=[target])
y_eval = eval_df[target]

In [3]:
def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 200, 1000),
        "max_depth": trial.suggest_int("max_depth", 3, 10),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
        "subsample": trial.suggest_float("subsample", 0.5, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 10),
        "gamma": trial.suggest_float("gamma", 0.0, 5.0),
        "reg_alpha": trial.suggest_float("reg_alpha", 1e-8, 10.0, log=True),
        "reg_lambda": trial.suggest_float("reg_lambda", 1e-8, 10.0, log=True),
        "random_state": 42,
        "n_jobs": -1,
        "tree_method": "hist",
    }

    with mlflow.start_run(nested=True):
        model = XGBRegressor(**params)
        model.fit(X_train, y_train)

        y_pred = model.predict(X_eval)
        rmse = float(np.sqrt(mean_squared_error(y_eval, y_pred)))
        mae = float(mean_absolute_error(y_eval, y_pred))
        r2 = float(r2_score(y_eval, y_pred))

        # Log hyperparameters + metrics
        mlflow.log_params(params)
        mlflow.log_metrics({"rmse": rmse, "mae": mae, "r2": r2})

    return rmse

In [4]:
mlflow.set_tracking_uri("/home/abi/Documents/mlops/Housing_ML/mlruns")
mlflow.set_experiment("xgboost_optuna_housing")

study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=15)

print("Best params:", study.best_trial.params)

  return FileStore(store_uri, store_uri)
2026/01/11 20:57:08 INFO mlflow.tracking.fluent: Experiment with name 'xgboost_optuna_housing' does not exist. Creating a new experiment.
[I 2026-01-11 20:57:08,345] A new study created in memory with name: no-name-f8700e2b-a115-4900-8004-13aace803656
[I 2026-01-11 20:57:34,402] Trial 0 finished with value: 75037.74470339902 and parameters: {'n_estimators': 494, 'max_depth': 4, 'learning_rate': 0.05563101302339174, 'subsample': 0.7742627795278351, 'colsample_bytree': 0.5950185052103487, 'min_child_weight': 6, 'gamma': 2.4940762874737485, 'reg_alpha': 4.8971684235508976e-08, 'reg_lambda': 0.6481834679389016}. Best is trial 0 with value: 75037.74470339902.
[I 2026-01-11 20:58:14,819] Trial 1 finished with value: 74958.36819817258 and parameters: {'n_estimators': 404, 'max_depth': 9, 'learning_rate': 0.20563752011752146, 'subsample': 0.5009108275996911, 'colsample_bytree': 0.7738298325253898, 'min_child_weight': 5, 'gamma': 2.3142471523551396, 'reg

Best params: {'n_estimators': 593, 'max_depth': 10, 'learning_rate': 0.038787552508186364, 'subsample': 0.8606494357968268, 'colsample_bytree': 0.656784383669625, 'min_child_weight': 8, 'gamma': 3.3517033988119564, 'reg_alpha': 0.2233992178821375, 'reg_lambda': 4.23877450741062e-07}


In [5]:
best_params = study.best_trial.params
best_model = XGBRegressor(**best_params)
best_model.fit(X_train, y_train)

y_pred = best_model.predict(X_eval)

mae = mean_absolute_error(y_eval, y_pred)
rmse = np.sqrt(mean_squared_error(y_eval, y_pred))
r2 = r2_score(y_eval, y_pred)

print("Final tuned model performance:")
print("MAE:", mae)
print("RMSE:", rmse)
print("R²:", r2)

# Log final model
with mlflow.start_run(run_name="best_xgboost_model"):
    mlflow.log_params(best_params)
    mlflow.log_metrics({"rmse": rmse, "mae": mae, "r2": r2})
    mlflow.xgboost.log_model(best_model, name="model")

Final tuned model performance:
MAE: 30815.53226373018
RMSE: 69940.0680630262
R²: 0.9621982302583633


