In [1]:
import os
import uuid
import pickle
from typing import Union, Tuple, List, Dict
import numpy as np


import pandas as pd

import mlflow

from sklearn.feature_extraction import DictVectorizer
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import make_pipeline

```bash
mlflow server \
  --backend-store-uri sqlite:///mlflow.db \
  --default-artifact-root file:./mlruns/artifacts \
  --host 0.0.0.0 \
  --serve-artifacts
```

setting tracking_uri to 127.0.0.1:5000 sets artifact uri to mflow-artifact but no full path
sqlite:///mlflow.db sets it to mlruns with full path

In [2]:
TRACKING_SERVER_HOST = "127.0.0.1" # fill in with the public IP
mlflow.set_tracking_uri(f"http://{TRACKING_SERVER_HOST}:5000")

In [5]:
mlflow.search_experiments()

[<Experiment: artifact_location='/home/habeeb/Mlops-proj/02-experimental_tracking/mlruns/2', creation_time=1750195955848, experiment_id='2', last_update_time=1750195955848, lifecycle_stage='active', name='nyc-taxi-exp-weighted-main10', tags={}>,
 <Experiment: artifact_location='mlflow-artifacts:/1', creation_time=1750188577811, experiment_id='1', last_update_time=1750188577811, lifecycle_stage='active', name='nyc-taxi-exp-weighted-main9', tags={}>,
 <Experiment: artifact_location='mlflow-artifacts:/0', creation_time=1750188571745, experiment_id='0', last_update_time=1750188571745, lifecycle_stage='active', name='Default', tags={}>]

In [4]:
from mlflow.tracking import MlflowClient

MLFLOW_TRACKING_URI = f"http://{TRACKING_SERVER_HOST}:5000"

client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

In [6]:
from mlflow.entities import ViewType
runs = client.search_runs(
    experiment_ids=["2"],
    filter_string="metrics.rmse < 9",
    order_by=["metrics.rmse ASC"],
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=5
)
print(runs)

[<Run: data=<RunData: metrics={'rmse': 6.547743727595545}, params={'colsample_bytree': '0.9384114002268024',
 'learning_rate': '0.6983559553082959',
 'max_depth': '25',
 'min_child_weight': '0.9420599625801453',
 'reg_alpha': '0.24874193603521913',
 'reg_lambda': '0.007004825823791956',
 'subsample': '0.8689464633146126'}, tags={'mlflow.parentRunId': 'ee510635d77b4672850c00f20462a362',
 'mlflow.runName': 'capricious-slug-403',
 'mlflow.source.git.commit': '23a313921adf03d4e718b0b7082162f481f0b59d',
 'mlflow.source.name': '/home/habeeb/Mlops-proj/02-experimental_tracking/main.py',
 'mlflow.source.type': 'LOCAL',
 'mlflow.user': 'habeeb'}>, info=<RunInfo: artifact_uri='/home/habeeb/Mlops-proj/02-experimental_tracking/mlruns/2/2e77c7527ee44d94a9544fa39034731f/artifacts', end_time=1750196419640, experiment_id='2', lifecycle_stage='active', run_id='2e77c7527ee44d94a9544fa39034731f', run_name='capricious-slug-403', run_uuid='2e77c7527ee44d94a9544fa39034731f', start_time=1750196418304, status

In [21]:
metric='rmse'

all_runs = client.search_runs(
        experiment_ids=["1"],
        filter_string="",
        run_view_type=ViewType.ACTIVE_ONLY,
        max_results=200
    )

parent_runs = [
    run for run in all_runs
    if "mlflow.parentRunId" not in run.data.tags and metric in run.data.metrics
]

if not parent_runs:
    print("No parent runs with the specified metric found.")

In [22]:
parent_runs[:1][0].info.run_id

'f45e23cfb3e042fbbe74b7c38fd532d5'

In [23]:
parent_runs[:1][0].data.metrics[metric]

6.429093486876433

In [24]:
run = client.get_run(parent_runs[:1][0].info.run_id)
artifact_uri = run.info.artifact_uri
artifact_uri

'mlflow-artifacts:/1/f45e23cfb3e042fbbe74b7c38fd532d5/artifacts'

In [28]:
run

<Run: data=<RunData: metrics={'mae': 4.242991482376694,
 'mse': 41.33324306299698,
 'r2': 0.7210381205739279,
 'rmse': 6.429093486876433}, params={'categorical_transformer': 'onehot',
 'numerical_transformer': 'none',
 'objective': 'reg:squarederror',
 'seed': '42'}, tags={'developer': 'habeeb',
 'mlflow.log-model.history': '[{"run_id": "f45e23cfb3e042fbbe74b7c38fd532d5", '
                             '"artifact_path": "model", "utc_time_created": '
                             '"2025-06-17 19:42:59.821940", "model_uuid": '
                             '"04bc9e915b9d48bfa04ea448a245351f", "flavors": '
                             '{"python_function": {"model_path": "model.pkl", '
                             '"predict_fn": "predict", "loader_module": '
                             '"mlflow.sklearn", "python_version": "3.12.7", '
                             '"env": {"conda": "conda.yaml", "virtualenv": '
                             '"python_env.yaml"}}, "sklearn": '
                 

In [15]:
from typing import Dict, List, Tuple, Union, Optional, Any
import pandas as pd
import yaml
import urllib.parse

In [17]:
urllib.parse.urlparse('gs://mlfflow-bucket-nyc/1')

ParseResult(scheme='gs', netloc='mlfflow-bucket-nyc', path='/1', params='', query='', fragment='')

In [25]:
parsed = urllib.parse.urlparse(artifact_uri)
parsed

ParseResult(scheme='mlflow-artifacts', netloc='', path='/1/f45e23cfb3e042fbbe74b7c38fd532d5/artifacts', params='', query='', fragment='')

In [20]:
parsed.scheme==""

True

In [27]:
os.path.join(artifact_uri, "model", "MLmodel")

'mlflow-artifacts:/1/f45e23cfb3e042fbbe74b7c38fd532d5/artifacts/model/MLmodel'

In [26]:
os.path.join(parsed.path, "model", "MLmodel")

'/1/f45e23cfb3e042fbbe74b7c38fd532d5/artifacts/model/MLmodel'

In [None]:
run_try = parent_runs[:1].info.run_id
run_id = run_try.info.run_id

[<Run: data=<RunData: metrics={'mae': 4.284408121113809,
  'mse': 42.87294792226681,
  'r2': 0.7106465101056003,
  'rmse': 6.547743727595545}, params={'categorical_transformer': 'onehot',
  'numerical_transformer': 'none',
  'objective': 'reg:squarederror',
  'seed': '42'}, tags={'developer': 'habeeb',
  'mlflow.log-model.history': '[{"run_id": "ee510635d77b4672850c00f20462a362", '
                              '"artifact_path": "model", "utc_time_created": '
                              '"2025-06-17 21:40:22.929820", "model_uuid": '
                              '"94bf43eb0b1744a19b14b5a0e8489dc0", "flavors": '
                              '{"python_function": {"model_path": "model.pkl", '
                              '"predict_fn": "predict", "loader_module": '
                              '"mlflow.sklearn", "python_version": "3.12.7", '
                              '"env": {"conda": "conda.yaml", "virtualenv": '
                              '"python_env.yaml"}}, "sklearn": '
 