In [1]:
import mlflow

mlflow.__version__

'2.21.3'

In [2]:
from src.utils.folder_operations import get_project_root

# set mlflow tracking uri
mlflow.set_tracking_uri(
    # (get_project_root() / 'mlflow_new/mlruns').as_uri()
    "http://localhost:3000"
)

import os
os.environ['AWS_ACCESS_KEY_ID'] = 'mlflow'
os.environ['AWS_SECRET_ACCESS_KEY'] = 'mlflow123'
os.environ['MLFLOW_S3_ENDPOINT_URL'] = 'http://localhost:9000'

# Search runs

In [4]:
import random
from typing import Dict 
from src.utils.run_batch import create_runs_batch

## Generating Demo Runs

In [5]:
experiments = mlflow.search_experiments(view_type=mlflow.tracking.client.ViewType.ACTIVE_ONLY,
                                        max_results=3)

for experiment in experiments:
    create_runs_batch(experiment_id=experiment.experiment_id,n_runs=random.randint(1,5))

In [16]:
experiment_name = experiments[0].name
print(f"Experiment name: {experiment_name}")
runs = mlflow.search_runs(experiment_names=[experiment_name])

Experiment name: Default


In [17]:
runs.head()

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.m1,metrics.m3,metrics.m2,metrics.metric1,...,tags.mlflow.log-model.history,tags.mlflow.source.name,tags.mlflow.source.type,tags.mlflow.user,tags.mlflow.runName,tags.mlflow.parentRunId,tags.my-tag,tags.my-tag2,tags.client-tag,tags.mlflow.note.content
0,f13ecbf26c1146e8ba65dfe3353ce35f,0,FINISHED,s3://mlflow/0/f13ecbf26c1146e8ba65dfe3353ce35f...,2025-05-07 14:39:52.768000+00:00,2025-05-07 14:39:54.454000+00:00,,,,,...,"[{""run_id"": ""f13ecbf26c1146e8ba65dfe3353ce35f""...",/Users/caiosainvallio/projects/mlflow_new/.ven...,LOCAL,caiosainvallio,registering-model,,,,,
1,6398052dc0474903a54057c55683a766,0,FINISHED,s3://mlflow/0/6398052dc0474903a54057c55683a766...,2025-05-07 14:34:32.408000+00:00,2025-05-07 14:34:34.563000+00:00,,,,,...,"[{""run_id"": ""6398052dc0474903a54057c55683a766""...",/Users/caiosainvallio/projects/mlflow_new/.ven...,LOCAL,caiosainvallio,registering-model,,,,,
2,67a9bf456c4b4d7e8a8277ea351ec6eb,0,FINISHED,s3://mlflow/0/67a9bf456c4b4d7e8a8277ea351ec6eb...,2025-05-07 14:31:48.742000+00:00,2025-05-07 14:31:50.277000+00:00,,,,,...,"[{""run_id"": ""67a9bf456c4b4d7e8a8277ea351ec6eb""...",/Users/caiosainvallio/projects/mlflow_new/.ven...,LOCAL,caiosainvallio,registering_model_providing_name,,,,,
3,4ce3cd327eb54eabb8dd4cdb715391cd,0,FINISHED,s3://mlflow/0/4ce3cd327eb54eabb8dd4cdb715391cd...,2025-05-07 14:30:53.103000+00:00,2025-05-07 14:30:55.045000+00:00,,,,,...,"[{""run_id"": ""4ce3cd327eb54eabb8dd4cdb715391cd""...",/Users/caiosainvallio/projects/mlflow_new/.ven...,LOCAL,caiosainvallio,logging_model,,,,,
4,826b6838ae3f4fbd856072f4f0a735fe,0,FINISHED,s3://mlflow/0/826b6838ae3f4fbd856072f4f0a735fe...,2025-05-07 14:29:31.182000+00:00,2025-05-07 14:29:32.599000+00:00,,,,,...,"[{""run_id"": ""826b6838ae3f4fbd856072f4f0a735fe""...",/Users/caiosainvallio/projects/mlflow_new/.ven...,LOCAL,caiosainvallio,class_model_with_parameters,,,,,


In [None]:
# # delete runs 
# for run in runs['run_id']:
#     mlflow.delete_run(run_id=run)

# Using filter strings
In order to filter your MLflow runs, you will need to write search queries, which are pseudo-SQL conditions expressed in a distinct syntax.

In [7]:
runs = mlflow.search_runs(experiment_names=[experiment_name], filter_string="tags.project_type = 'development'")

In [8]:
runs.head()

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time


In [9]:
runs[["metrics.metric_1","metrics.metric_2","params.param_1","params.param_2","tags.algorithm_type"]]

KeyError: "None of [Index(['metrics.metric_1', 'metrics.metric_2', 'params.param_1',\n       'params.param_2', 'tags.algorithm_type'],\n      dtype='object')] are in the [columns]"

In [10]:
runs = mlflow.search_runs(experiment_names=[experiment_name], filter_string="tags.project_type = 'development' AND metrics.metric_1 > 0.8")


In [11]:
runs[["metrics.metric_1","metrics.metric_2","params.param_1","params.param_2","tags.algorithm_type"]]


KeyError: "None of [Index(['metrics.metric_1', 'metrics.metric_2', 'params.param_1',\n       'params.param_2', 'tags.algorithm_type'],\n      dtype='object')] are in the [columns]"

# Returning Run Objects
To return Run objects we have to specify this using the parameter output_format

output_format – The output format to be returned. If pandas, a pandas.DataFrame is returned and, if list, a list of mlflow.entities.Run is returned.

In [12]:
runs = mlflow.search_runs(experiment_names=[experiment_name], filter_string="tags.project_type = 'development'", output_format="list")

In [13]:
for run in runs:
    print(type(run))
    print(f"Name: {run.info.run_name}, ID: {run.info.run_id}, Metrics: {run.data.metrics}")
    print("\n")