<a href="https://colab.research.google.com/github/deshm084/Distributed-ML-Pipeline-with-Ray-Tune-MLflow/blob/main/Distributed%20ML%20Pipeline%20with%20Ray%20Tune%20%26%20MLflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install ray[tune] mlflow

Collecting mlflow
  Downloading mlflow-3.8.1-py3-none-any.whl.metadata (31 kB)
Collecting ray[tune]
  Downloading ray-2.53.0-cp312-cp312-manylinux2014_x86_64.whl.metadata (22 kB)
Collecting tensorboardX>=1.9 (from ray[tune])
  Downloading tensorboardx-2.6.4-py3-none-any.whl.metadata (6.2 kB)
Collecting mlflow-skinny==3.8.1 (from mlflow)
  Downloading mlflow_skinny-3.8.1-py3-none-any.whl.metadata (31 kB)
Collecting mlflow-tracing==3.8.1 (from mlflow)
  Downloading mlflow_tracing-3.8.1-py3-none-any.whl.metadata (19 kB)
Collecting Flask-CORS<7 (from mlflow)
  Downloading flask_cors-6.0.2-py3-none-any.whl.metadata (5.3 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<24 (from mlflow)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting huey<3,>=2.5.0 (from mlflow)
  Downloading huey-2.6.

In [None]:
import os
import time
import mlflow
import mlflow.sklearn
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from ray import tune
from ray.tune.search.optuna import OptunaSearch  # Smart search algorithm
import numpy as np # Import numpy for sqrt

# --- 1. The Training Function (The "Minion" Logic) ---
# This function runs on a separate process (Worker)
def train_model(config):
    # Ensure MLflow experiment is set for each worker process
    mlflow.set_experiment("Diabetes_Distributed_tuning")

    # 1. Load Data (Each worker loads its own copy or reads from shared storage)
    data = load_diabetes()
    X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2)

    # 2. Build Model using hyperparams passed in 'config'
    model = RandomForestRegressor(
        n_estimators=config["n_estimators"],
        max_depth=config["max_depth"],
        min_samples_split=config["min_samples_split"]
    )

    # 3. Train
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    # Fix: Calculate RMSE by taking the square root of MSE
    mse = mean_squared_error(y_test, predictions)
    rmse = np.sqrt(mse)

    # 4. Log to MLflow (The "Registry")
    # We create a new run for every trial
    with mlflow.start_run(nested=True):
        mlflow.log_params(config)
        mlflow.log_metric("rmse", rmse)
        # We tag this model so we can find it later
        mlflow.set_tag("mode", "distributed_trial")

    # 5. Report back to Ray (The "Boss")
    tune.report({"rmse": rmse})

# --- 2. The Orchestrator (The "Boss") ---
def run_distributed_pipeline():
    print("üöÄ Initializing Ray Cluster...")
    # Define the search space (The Grid)
    search_space = {
        "n_estimators": tune.randint(50, 500),
        "max_depth": tune.randint(2, 20),
        "min_samples_split": tune.randint(2, 10)
    }

    # The mlflow.set_experiment call has been moved to train_model function
    # mlflow.set_experiment("Diabetes_Distributed_tuning")

    print("‚ö° Starting Distributed Tuning (Running parallel trials)...")

    # This executes the training function in parallel across available cores
    analysis = tune.run(
        train_model,
        config=search_space,
        metric="rmse",
        mode="min",            # We want to minimize Error
        num_samples=10,        # Run 10 different experiments
        resources_per_trial={"cpu": 1}, # 1 CPU per worker
        verbose=1
    )

    print("\n‚úÖ Tuning Complete.")
    best_trial = analysis.get_best_trial("rmse", "min", "last")
    print(f"üèÜ Best Hyperparameters found: {best_trial.config}")
    print(f"üìâ Lowest RMSE: {best_trial.last_result['rmse']:.4f}")

    return best_trial.config
# --- 3. Production Deployment Simulation (FIXED) ---
def register_and_serve(best_config):
    print("\nüì¶ Promoting Best Model to Production Registry...")

    # Retrain final model on all data
    data = load_diabetes()
    model = RandomForestRegressor(
        n_estimators=best_config["n_estimators"],
        max_depth=best_config["max_depth"],
        min_samples_split=best_config["min_samples_split"]
    )
    model.fit(data.data, data.target)

    # Log the final "Production" model
    # We capture the 'run' object to get the ID safely
    with mlflow.start_run(run_name="PRODUCTION_MODEL") as run:
        mlflow.log_params(best_config)
        mlflow.sklearn.log_model(model, "random_forest_model")

        # --- THE FIX ---
        # Instead of guessing the file path, we construct the official URI
        # Format: runs:/<run_id>/<artifact_path>
        run_id = run.info.run_id
        model_uri = f"runs:/{run_id}/random_forest_model"

        print(f"üîí Model Versioned at: {model_uri}")

        # SIMULATE SERVING
        print("\nü§ñ Mock Inference Server Online...")

        # This instructs MLflow to look up the run internally
        loaded_model = mlflow.sklearn.load_model(model_uri)

        sample_data = data.data[0:1]
        prediction = loaded_model.predict(sample_data)
        print(f"   Input: Patient Data [0.03, 0.05, ...]")
        print(f"   Prediction: Disease Progression = {prediction[0]:.2f}")

if __name__ == "__main__":
    # If best_config is already in memory from the previous cell, you can just run:
    # register_and_serve(best_config)

    # Otherwise, re-run the full pipeline:
    best_config = run_distributed_pipeline()
    register_and_serve(best_config)

üöÄ Initializing Ray Cluster...
‚ö° Starting Distributed Tuning (Running parallel trials)...
+--------------------------------------------------------------------+
| Configuration for experiment     train_model_2026-01-11_20-29-06   |
+--------------------------------------------------------------------+
| Search algorithm                 BasicVariantGenerator             |
| Scheduler                        FIFOScheduler                     |
| Number of trials                 10                                |
+--------------------------------------------------------------------+

View detailed results here: /root/ray_results/train_model_2026-01-11_20-29-06
To visualize your results with TensorBoard, run: `tensorboard --logdir /tmp/ray/session_2026-01-11_20-08-27_551419_858/artifacts/2026-01-11_20-29-06/train_model_2026-01-11_20-29-06/driver_artifacts`

Trial status: 10 PENDING
Current time: 2026-01-11 20:29:06. Total running time: 0s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+---

[36m(train_model pid=41868)[0m 2026/01/11 20:29:31 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
[36m(train_model pid=41868)[0m 2026/01/11 20:29:31 INFO mlflow.store.db.utils: Updating database tables
[36m(train_model pid=41868)[0m 2026/01/11 20:29:31 INFO alembic.runtime.migration: Context impl SQLiteImpl.
[36m(train_model pid=41868)[0m 2026/01/11 20:29:31 INFO alembic.runtime.migration: Will assume non-transactional DDL.
[36m(train_model pid=41868)[0m 2026/01/11 20:29:31 INFO alembic.runtime.migration: Running upgrade  -> 451aebb31d03, add metric step
[36m(train_model pid=41868)[0m 2026/01/11 20:29:31 INFO alembic.runtime.migration: Running upgrade 451aebb31d03 -> 90e64c465722, migrate user column to tags



Trial train_model_2d03c_00001 started with configuration:
+---------------------------------------------+
| Trial train_model_2d03c_00001 config        |
+---------------------------------------------+
| max_depth                                18 |
| min_samples_split                         8 |
| n_estimators                             89 |
+---------------------------------------------+


[36m(train_model pid=41868)[0m 2026/01/11 20:29:31 INFO alembic.runtime.migration: Running upgrade 90e64c465722 -> 181f10493468, allow nulls for metric values
[36m(train_model pid=41868)[0m 2026/01/11 20:29:31 INFO alembic.runtime.migration: Running upgrade 181f10493468 -> df50e92ffc5e, Add Experiment Tags Table
[36m(train_model pid=41868)[0m 2026/01/11 20:29:31 INFO alembic.runtime.migration: Running upgrade df50e92ffc5e -> 7ac759974ad8, Update run tags with larger limit
[36m(train_model pid=41868)[0m 2026/01/11 20:29:32 INFO alembic.runtime.migration: Running upgrade 7ac759974ad8 -> 89d4b8295536, create latest metrics table
[36m(train_model pid=41868)[0m 2026/01/11 20:29:32 INFO alembic.runtime.migration: Running upgrade 89d4b8295536 -> 2b4d017a5e9b, add model registry tables to db
[36m(train_model pid=41868)[0m 2026/01/11 20:29:32 INFO alembic.runtime.migration: Running upgrade 2b4d017a5e9b -> cfd24bdc0731, Update run status constraint with killed
[36m(train_model pid=4


Trial train_model_2d03c_00000 completed after 1 iterations at 2026-01-11 20:29:36. Total running time: 29s
+--------------------------------------------------+
| Trial train_model_2d03c_00000 result             |
+--------------------------------------------------+
| checkpoint_dir_name                              |
| time_this_iter_s                         7.43821 |
| time_total_s                             7.43821 |
| training_iteration                             1 |
| rmse                                     47.5034 |
+--------------------------------------------------+


[36m(train_model pid=41914)[0m 2026/01/11 20:29:33 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
[36m(train_model pid=41914)[0m 2026/01/11 20:29:33 INFO mlflow.store.db.utils: Updating database tables
[36m(train_model pid=41868)[0m 2026/01/11 20:29:35 INFO alembic.runtime.migration: Context impl SQLiteImpl.[32m [repeated 2x across cluster][0m
[36m(train_model pid=41868)[0m 2026/01/11 20:29:35 INFO alembic.runtime.migration: Will assume non-transactional DDL.[32m [repeated 2x across cluster][0m
[36m(train_model pid=41914)[0m 2026/01/11 20:29:33 INFO alembic.runtime.migration: Running upgrade  -> 451aebb31d03, add metric step
[36m(train_model pid=41914)[0m 2026/01/11 20:29:33 INFO alembic.runtime.migration: Running upgrade 451aebb31d03 -> 90e64c465722, migrate user column to tags
[36m(train_model pid=41914)[0m 2026/01/11 20:29:33 INFO alembic.runtime.migration: Running upgrade 90e64c465722 -> 181f10493468, allow nulls for metric values



Trial status: 1 TERMINATED | 1 RUNNING | 8 PENDING
Current time: 2026-01-11 20:29:36. Total running time: 30s
Logical resource usage: 1.0/2 CPUs, 0/0 GPUs
Current best trial: 2d03c_00000 with rmse=47.50343578945332 and params={'n_estimators': 68, 'max_depth': 18, 'min_samples_split': 7}
+---------------------------------------------------------------------------------------------------------------------------------+
| Trial name                status         n_estimators     max_depth     min_samples_split     iter     total time (s)      rmse |
+---------------------------------------------------------------------------------------------------------------------------------+
| train_model_2d03c_00001   RUNNING                  89            18                     8                                       |
| train_model_2d03c_00000   TERMINATED               68            18                     7        1            7.43821   47.5034 |
| train_model_2d03c_00002   PENDING                

[36m(train_model pid=42141)[0m 2026/01/11 20:30:03 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
[36m(train_model pid=41914)[0m 2026/01/11 20:29:34 INFO alembic.runtime.migration: Running upgrade 181f10493468 -> df50e92ffc5e, Add Experiment Tags Table
[36m(train_model pid=41914)[0m 2026/01/11 20:29:34 INFO alembic.runtime.migration: Running upgrade df50e92ffc5e -> 7ac759974ad8, Update run tags with larger limit
[36m(train_model pid=41914)[0m 2026/01/11 20:29:34 INFO alembic.runtime.migration: Running upgrade 7ac759974ad8 -> 89d4b8295536, create latest metrics table
[36m(train_model pid=41914)[0m 2026/01/11 20:29:34 INFO alembic.runtime.migration: Running upgrade 89d4b8295536 -> 2b4d017a5e9b, add model registry tables to db
[36m(train_model pid=41914)[0m 2026/01/11 20:29:34 INFO alembic.runtime.migration: Running upgrade 2b4d017a5e9b -> cfd24bdc0731, Update run status constraint with killed
[36m(train_model pid=41914)[0m 2026/01/11 20:29:34 INFO al


Trial status: 2 TERMINATED | 2 RUNNING | 6 PENDING
Current time: 2026-01-11 20:30:06. Total running time: 1min 0s
Logical resource usage: 2.0/2 CPUs, 0/0 GPUs
Current best trial: 2d03c_00000 with rmse=47.50343578945332 and params={'n_estimators': 68, 'max_depth': 18, 'min_samples_split': 7}
+---------------------------------------------------------------------------------------------------------------------------------+
| Trial name                status         n_estimators     max_depth     min_samples_split     iter     total time (s)      rmse |
+---------------------------------------------------------------------------------------------------------------------------------+
| train_model_2d03c_00002   RUNNING                 126            10                     5                                       |
| train_model_2d03c_00003   RUNNING                 233            17                     2                                       |
| train_model_2d03c_00000   TERMINATED         

[36m(train_model pid=42141)[0m 2026/01/11 20:30:07 INFO alembic.runtime.migration: Running upgrade 1a0cddfcaa16 -> 534353b11cbc, add scorer tables
[36m(train_model pid=42141)[0m 2026/01/11 20:30:07 INFO alembic.runtime.migration: Running upgrade 534353b11cbc -> 71994744cf8e, add evaluation datasets
[36m(train_model pid=42141)[0m 2026/01/11 20:30:07 INFO alembic.runtime.migration: Running upgrade 71994744cf8e -> 3da73c924c2f, add outputs to dataset record
[36m(train_model pid=42141)[0m 2026/01/11 20:30:07 INFO alembic.runtime.migration: Running upgrade 3da73c924c2f -> bf29a5ff90ea, add jobs table
[36m(train_model pid=42141)[0m 2026/01/11 20:30:07 INFO alembic.runtime.migration: Running upgrade bf29a5ff90ea -> 1bd49d398cd23, add secrets tables
[36m(train_model pid=42141)[0m 2026/01/11 20:30:08 INFO mlflow.tracking.fluent: Experiment with name 'Diabetes_Distributed_tuning' does not exist. Creating a new experiment.
[36m(train_model pid=42177)[0m 2026/01/11 20:30:04 INFO mlfl


Trial train_model_2d03c_00002 completed after 1 iterations at 2026-01-11 20:30:09. Total running time: 1min 3s
+--------------------------------------------------+
| Trial train_model_2d03c_00002 result             |
+--------------------------------------------------+
| checkpoint_dir_name                              |
| time_this_iter_s                         8.43205 |
| time_total_s                             8.43205 |
| training_iteration                             1 |
| rmse                                     58.6654 |
+--------------------------------------------------+

Trial train_model_2d03c_00003 completed after 1 iterations at 2026-01-11 20:30:10. Total running time: 1min 4s
+--------------------------------------------------+
| Trial train_model_2d03c_00003 result             |
+--------------------------------------------------+
| checkpoint_dir_name                              |
| time_this_iter_s                         8.62535 |
| time_total_s                    

[36m(train_model pid=42458)[0m 2026/01/11 20:30:35 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
[36m(train_model pid=42177)[0m 2026/01/11 20:30:04 INFO mlflow.store.db.utils: Updating database tables
[36m(train_model pid=42177)[0m 2026/01/11 20:30:08 INFO alembic.runtime.migration: Context impl SQLiteImpl.[32m [repeated 3x across cluster][0m
[36m(train_model pid=42177)[0m 2026/01/11 20:30:08 INFO alembic.runtime.migration: Will assume non-transactional DDL.[32m [repeated 3x across cluster][0m
[36m(train_model pid=42177)[0m 2026/01/11 20:30:04 INFO alembic.runtime.migration: Running upgrade  -> 451aebb31d03, add metric step
[36m(train_model pid=42177)[0m 2026/01/11 20:30:04 INFO alembic.runtime.migration: Running upgrade 451aebb31d03 -> 90e64c465722, migrate user column to tags
[36m(train_model pid=42177)[0m 2026/01/11 20:30:04 INFO alembic.runtime.migration: Running upgrade 90e64c465722 -> 181f10493468, allow nulls for metric values
[36m(tra


Trial status: 4 TERMINATED | 2 RUNNING | 4 PENDING
Current time: 2026-01-11 20:30:37. Total running time: 1min 30s
Logical resource usage: 2.0/2 CPUs, 0/0 GPUs
Current best trial: 2d03c_00000 with rmse=47.50343578945332 and params={'n_estimators': 68, 'max_depth': 18, 'min_samples_split': 7}
+---------------------------------------------------------------------------------------------------------------------------------+
| Trial name                status         n_estimators     max_depth     min_samples_split     iter     total time (s)      rmse |
+---------------------------------------------------------------------------------------------------------------------------------+
| train_model_2d03c_00004   RUNNING                 342             6                     9                                       |
| train_model_2d03c_00005   RUNNING                 178            18                     5                                       |
| train_model_2d03c_00000   TERMINATED        

[36m(train_model pid=42458)[0m 2026/01/11 20:30:37 INFO alembic.runtime.migration: Running upgrade 867495a8f9d4 -> 5b0e9adcef9c, add cascade deletion to trace tables foreign keys
[36m(train_model pid=42458)[0m 2026/01/11 20:30:37 INFO alembic.runtime.migration: Running upgrade 5b0e9adcef9c -> 4465047574b1, increase max dataset schema size
[36m(train_model pid=42458)[0m 2026/01/11 20:30:37 INFO alembic.runtime.migration: Running upgrade 4465047574b1 -> f5a4f2784254, increase run tag value limit to 8000
[36m(train_model pid=42458)[0m 2026/01/11 20:30:37 INFO alembic.runtime.migration: Running upgrade f5a4f2784254 -> 0584bdc529eb, add cascading deletion to datasets from experiments
[36m(train_model pid=42458)[0m 2026/01/11 20:30:37 INFO alembic.runtime.migration: Running upgrade 0584bdc529eb -> 400f98739977, add logged model tables
[36m(train_model pid=42458)[0m 2026/01/11 20:30:37 INFO alembic.runtime.migration: Running upgrade 400f98739977 -> 6953534de441, add step to inputs


Trial train_model_2d03c_00005 completed after 1 iterations at 2026-01-11 20:30:40. Total running time: 1min 34s
+--------------------------------------------------+
| Trial train_model_2d03c_00005 result             |
+--------------------------------------------------+
| checkpoint_dir_name                              |
| time_this_iter_s                         6.38232 |
| time_total_s                             6.38232 |
| training_iteration                             1 |
| rmse                                     55.4621 |
+--------------------------------------------------+

Trial train_model_2d03c_00004 completed after 1 iterations at 2026-01-11 20:30:40. Total running time: 1min 34s
+--------------------------------------------------+
| Trial train_model_2d03c_00004 result             |
+--------------------------------------------------+
| checkpoint_dir_name                              |
| time_this_iter_s                         6.97607 |
| time_total_s                  

[36m(train_model pid=42732)[0m 2026/01/11 20:31:05 INFO mlflow.store.db.utils: Creating initial MLflow database tables...[32m [repeated 2x across cluster][0m
[36m(train_model pid=42462)[0m 2026/01/11 20:30:39 INFO alembic.runtime.migration: Context impl SQLiteImpl.[32m [repeated 4x across cluster][0m
[36m(train_model pid=42462)[0m 2026/01/11 20:30:39 INFO alembic.runtime.migration: Will assume non-transactional DDL.[32m [repeated 4x across cluster][0m
[36m(train_model pid=42462)[0m 2026/01/11 20:30:35 INFO mlflow.store.db.utils: Updating database tables
[36m(train_model pid=42462)[0m 2026/01/11 20:30:35 INFO alembic.runtime.migration: Running upgrade  -> 451aebb31d03, add metric step
[36m(train_model pid=42462)[0m 2026/01/11 20:30:35 INFO alembic.runtime.migration: Running upgrade 451aebb31d03 -> 90e64c465722, migrate user column to tags
[36m(train_model pid=42462)[0m 2026/01/11 20:30:35 INFO alembic.runtime.migration: Running upgrade 90e64c465722 -> 181f10493468, a


Trial status: 6 TERMINATED | 2 RUNNING | 2 PENDING
Current time: 2026-01-11 20:31:07. Total running time: 2min 0s
Logical resource usage: 2.0/2 CPUs, 0/0 GPUs
Current best trial: 2d03c_00000 with rmse=47.50343578945332 and params={'n_estimators': 68, 'max_depth': 18, 'min_samples_split': 7}
+---------------------------------------------------------------------------------------------------------------------------------+
| Trial name                status         n_estimators     max_depth     min_samples_split     iter     total time (s)      rmse |
+---------------------------------------------------------------------------------------------------------------------------------+
| train_model_2d03c_00006   RUNNING                  91             4                     7                                       |
| train_model_2d03c_00007   RUNNING                 481             8                     9                                       |
| train_model_2d03c_00000   TERMINATED         

[36m(train_model pid=42732)[0m 2026/01/11 20:31:07 INFO alembic.runtime.migration: Running upgrade 867495a8f9d4 -> 5b0e9adcef9c, add cascade deletion to trace tables foreign keys
[36m(train_model pid=42735)[0m 2026/01/11 20:31:07 INFO alembic.runtime.migration: Running upgrade 5b0e9adcef9c -> 4465047574b1, increase max dataset schema size
[36m(train_model pid=42732)[0m 2026/01/11 20:31:07 INFO alembic.runtime.migration: Running upgrade 4465047574b1 -> f5a4f2784254, increase run tag value limit to 8000
[36m(train_model pid=42732)[0m 2026/01/11 20:31:07 INFO alembic.runtime.migration: Running upgrade f5a4f2784254 -> 0584bdc529eb, add cascading deletion to datasets from experiments
[36m(train_model pid=42735)[0m 2026/01/11 20:31:07 INFO alembic.runtime.migration: Running upgrade 0584bdc529eb -> 400f98739977, add logged model tables
[36m(train_model pid=42732)[0m 2026/01/11 20:31:07 INFO alembic.runtime.migration: Running upgrade 400f98739977 -> 6953534de441, add step to inputs


Trial train_model_2d03c_00006 completed after 1 iterations at 2026-01-11 20:31:09. Total running time: 2min 3s
+--------------------------------------------------+
| Trial train_model_2d03c_00006 result             |
+--------------------------------------------------+
| checkpoint_dir_name                              |
| time_this_iter_s                         5.73902 |
| time_total_s                             5.73902 |
| training_iteration                             1 |
| rmse                                     57.1949 |
+--------------------------------------------------+

Trial train_model_2d03c_00007 completed after 1 iterations at 2026-01-11 20:31:14. Total running time: 2min 8s
+--------------------------------------------------+
| Trial train_model_2d03c_00007 result             |
+--------------------------------------------------+
| checkpoint_dir_name                              |
| time_this_iter_s                         10.3617 |
| time_total_s                    

[36m(train_model pid=42965)[0m 2026/01/11 20:31:33 INFO mlflow.store.db.utils: Creating initial MLflow database tables...[32m [repeated 2x across cluster][0m
[36m(train_model pid=42735)[0m 2026/01/11 20:31:09 INFO alembic.runtime.migration: Context impl SQLiteImpl.[32m [repeated 4x across cluster][0m
[36m(train_model pid=42735)[0m 2026/01/11 20:31:09 INFO alembic.runtime.migration: Will assume non-transactional DDL.[32m [repeated 4x across cluster][0m
[36m(train_model pid=42735)[0m 2026/01/11 20:31:05 INFO mlflow.store.db.utils: Updating database tables
[36m(train_model pid=42735)[0m 2026/01/11 20:31:05 INFO alembic.runtime.migration: Running upgrade  -> 451aebb31d03, add metric step
[36m(train_model pid=42732)[0m 2026/01/11 20:31:05 INFO alembic.runtime.migration: Running upgrade 451aebb31d03 -> 90e64c465722, migrate user column to tags
[36m(train_model pid=42735)[0m 2026/01/11 20:31:05 INFO alembic.runtime.migration: Running upgrade 90e64c465722 -> 181f10493468, a


Trial status: 8 TERMINATED | 2 RUNNING
Current time: 2026-01-11 20:31:37. Total running time: 2min 30s
Logical resource usage: 2.0/2 CPUs, 0/0 GPUs
Current best trial: 2d03c_00000 with rmse=47.50343578945332 and params={'n_estimators': 68, 'max_depth': 18, 'min_samples_split': 7}
+---------------------------------------------------------------------------------------------------------------------------------+
| Trial name                status         n_estimators     max_depth     min_samples_split     iter     total time (s)      rmse |
+---------------------------------------------------------------------------------------------------------------------------------+
| train_model_2d03c_00008   RUNNING                 155            12                     5                                       |
| train_model_2d03c_00009   RUNNING                 350            10                     6                                       |
| train_model_2d03c_00000   TERMINATED               68   

[36m(train_model pid=42965)[0m 2026/01/11 20:31:37 INFO mlflow.tracking.fluent: Experiment with name 'Diabetes_Distributed_tuning' does not exist. Creating a new experiment.



Trial train_model_2d03c_00008 completed after 1 iterations at 2026-01-11 20:31:38. Total running time: 2min 32s
+--------------------------------------------------+
| Trial train_model_2d03c_00008 result             |
+--------------------------------------------------+
| checkpoint_dir_name                              |
| time_this_iter_s                         6.67394 |
| time_total_s                             6.67394 |
| training_iteration                             1 |
| rmse                                      60.663 |
+--------------------------------------------------+


2026-01-11 20:31:39,521	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/root/ray_results/train_model_2026-01-11_20-29-06' in 0.0138s.



Trial train_model_2d03c_00009 completed after 1 iterations at 2026-01-11 20:31:39. Total running time: 2min 33s
+--------------------------------------------------+
| Trial train_model_2d03c_00009 result             |
+--------------------------------------------------+
| checkpoint_dir_name                              |
| time_this_iter_s                         7.20752 |
| time_total_s                             7.20752 |
| training_iteration                             1 |
| rmse                                     58.5662 |
+--------------------------------------------------+

Trial status: 10 TERMINATED
Current time: 2026-01-11 20:31:39. Total running time: 2min 33s
Logical resource usage: 1.0/2 CPUs, 0/0 GPUs
Current best trial: 2d03c_00000 with rmse=47.50343578945332 and params={'n_estimators': 68, 'max_depth': 18, 'min_samples_split': 7}
+---------------------------------------------------------------------------------------------------------------------------------+
| Trial



üîí Model Versioned at: runs:/fc46d25b431242b8a8814e510f45810a/random_forest_model

ü§ñ Mock Inference Server Online...


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

   Input: Patient Data [0.03, 0.05, ...]
   Prediction: Disease Progression = 181.03
