In [1]:
import os
import pickle
import click
import mlflow
import numpy as np
from mlflow.entities import ViewType
from mlflow.tracking import MlflowClient
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

In [2]:


HPO_EXPERIMENT_NAME = "random-forest-hyperopt"
EXPERIMENT_NAME = "random-forest-best-models"
RF_PARAMS = ["max_depth", "n_estimators", "min_samples_split", "min_samples_leaf", "random_state"]

mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment(EXPERIMENT_NAME)
mlflow.sklearn.autolog()

2026/02/18 16:49:44 INFO mlflow.tracking.fluent: Experiment with name 'random-forest-best-models' does not exist. Creating a new experiment.


In [3]:
def load_pickle(filename):
    with open(filename, "rb") as f_in:
        return pickle.load(f_in)

In [4]:
def train_and_log_model(data_path, params):
    X_train, y_train = load_pickle(os.path.join(data_path, "train.pkl"))
    X_val, y_val = load_pickle(os.path.join(data_path, "val.pkl"))
    X_test, y_test = load_pickle(os.path.join(data_path, "test.pkl"))

    with mlflow.start_run():
        new_params = {}
        for param in RF_PARAMS:
            new_params[param] = int(params[param])

        rf = RandomForestRegressor(**new_params)
        rf.fit(X_train, y_train)

        val_rmse = np.sqrt(mean_squared_error(y_val, rf.predict(X_val)))
        mlflow.log_metric("val_rmse", val_rmse)
        test_rmse = np.sqrt(mean_squared_error(y_test, rf.predict(X_test)))
        mlflow.log_metric("test_rmse", test_rmse)

In [5]:

client = MlflowClient()
data_path = "./output"
top_n = 5

# Retrieve the top_n model runs and log the models
experiment = client.get_experiment_by_name(HPO_EXPERIMENT_NAME)
runs = client.search_runs(
    experiment_ids=experiment.experiment_id,
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=top_n,
    order_by=["metrics.rmse ASC"],
)
for run in runs:
    train_and_log_model(data_path=data_path, params=run.data.params)

# Select the model with the lowest test RMSE
experiment = client.get_experiment_by_name(EXPERIMENT_NAME)
best_run = client.search_runs(
    experiment_ids=experiment.experiment_id,
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=top_n,
    order_by=["metrics.test_rmse ASC"],
)[0]

# Register the best model
run_id = best_run.info.run_id
model_uri = f"runs:/{run_id}/model"
mlflow.register_model(model_uri=model_uri, name="rf-best-model")
print(f"Best test RMSE: {best_run.data.metrics['test_rmse']:.3f}")




üèÉ View run whimsical-ray-313 at: http://127.0.0.1:5000/#/experiments/3/runs/4047b65d9ce749bb97ddc3e6ef298e21
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/3




üèÉ View run charming-duck-427 at: http://127.0.0.1:5000/#/experiments/3/runs/2cd35642d78641569349096c256b8f07
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/3




üèÉ View run angry-kit-719 at: http://127.0.0.1:5000/#/experiments/3/runs/f61bb151ed724255a638b4133782eb4e
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/3




üèÉ View run caring-wasp-118 at: http://127.0.0.1:5000/#/experiments/3/runs/47aaaa5568ef4afd9ac2b8fa2aecb36e
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/3


Successfully registered model 'rf-best-model'.
2026/02/18 16:50:28 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: rf-best-model, version 1


üèÉ View run ambitious-koi-551 at: http://127.0.0.1:5000/#/experiments/3/runs/27d55651ee4a4e98ae93b23a2749b5cf
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/3
Best test RMSE: 5.555


Created version '1' of model 'rf-best-model'.
