**Compute Key-Performance Indicators (KPIs) from the MLflow server.**

# IMPORTS

In [1]:
import mlflow
import pandas as pd
import plotly.express as px

# OPTIONS

In [2]:
pd.options.display.max_columns = None

# CONFIGS

In [3]:
MAX_RESULTS = 100
TRACKING_URI = "http://localhost:5000"
REGISTRY_URI = "http://localhost:5000"

# CLIENTS

In [4]:
client = mlflow.tracking.MlflowClient(tracking_uri=TRACKING_URI, registry_uri=REGISTRY_URI)

# INDICATORS

In [5]:
experiments = client.search_experiments(
    view_type=mlflow.entities.ViewType.ALL, max_results=MAX_RESULTS, order_by=["creation_time DESC"]
)
experiments = [dict(experiment) for experiment in experiments]
experiments = pd.DataFrame(experiments).assign(
    creation_time=lambda data: pd.to_datetime(data["creation_time"], unit="ms"),
    last_update_time=lambda data: pd.to_datetime(data["last_update_time"], unit="ms"),
)
print(experiments.shape)
experiments.head()

(1, 7)


Unnamed: 0,artifact_location,creation_time,experiment_id,last_update_time,lifecycle_stage,name,tags
0,file:///home/fmind/mlops-python-package/mlruns...,2024-07-20 12:43:59.721,562757566260573932,2024-07-20 12:43:59.721,active,bikes,{}


In [6]:
runs = client.search_runs(
    experiment_ids=experiments["experiment_id"].unique(),
    run_view_type=mlflow.entities.ViewType.ALL,
    max_results=MAX_RESULTS,
    order_by=["created DESC"],
)
runs = [dict(run.info) | dict(run.data) for run in runs]
runs = pd.DataFrame(runs).assign(
    end_time=lambda data: pd.to_datetime(data["end_time"], unit="ms"),
    start_time=lambda data: pd.to_datetime(data["start_time"], unit="ms"),
)
runs = pd.concat([runs, pd.json_normalize(runs['tags'])], axis="columns")
print(runs.shape)
runs.head()

(28, 29)


Unnamed: 0,artifact_uri,end_time,experiment_id,lifecycle_stage,run_id,run_name,run_uuid,start_time,status,user_id,metrics,params,tags,mlflow.user,mlflow.source.name,mlflow.source.type,mlflow.project.entryPoint,mlflow.source.git.commit,mlflow.source.git.repoURL,mlflow.gitRepoURL,mlflow.runName,mlflow.project.env,mlflow.project.backend,mlflow.datasets,estimator_name,estimator_class,mlflow.log-model.history,mlflow.autologging,mlflow.parentRunId
0,file:///home/fmind/mlops-python-package/mlruns...,2024-07-21 14:08:59.757,562757566260573932,active,2f6920e2006841d7a72567e9f25df89f,Explanations,2f6920e2006841d7a72567e9f25df89f,2024-07-21 14:08:29.817,FINISHED,fmind,{},{'conf_file': 'confs/explanations.yaml'},"{'mlflow.user': 'fmind', 'mlflow.source.name':...",fmind,file:///home/fmind/mlops-python-package,PROJECT,main,f071dd1397f5c7592fd82a2fe169f5ecaf3af901,git@github.com:fmind/mlops-python-package,git@github.com:fmind/mlops-python-package,Explanations,virtualenv,local,,,,,,
1,file:///home/fmind/mlops-python-package/mlruns...,2024-07-21 14:08:28.634,562757566260573932,active,94fa6a85393745f9b8bb5cac75f865e4,Evaluations,94fa6a85393745f9b8bb5cac75f865e4,2024-07-21 14:08:24.395,FINISHED,fmind,"{'example_count': 13903.0, 'mean_absolute_erro...",{'conf_file': 'confs/evaluations.yaml'},"{'mlflow.user': 'fmind', 'mlflow.source.name':...",fmind,file:///home/fmind/mlops-python-package,PROJECT,main,f071dd1397f5c7592fd82a2fe169f5ecaf3af901,git@github.com:fmind/mlops-python-package,git@github.com:fmind/mlops-python-package,Evaluations,virtualenv,local,"[{""name"":""ce0fe6e33c74e2fa3659d51482be5f27"",""h...",,,,,
2,file:///home/fmind/mlops-python-package/mlruns...,2024-07-21 14:08:23.205,562757566260573932,active,da669581953a4a46ab84efaabc8f3e92,Inference,da669581953a4a46ab84efaabc8f3e92,2024-07-21 14:08:19.775,FINISHED,fmind,{},{'conf_file': 'confs/inference.yaml'},"{'mlflow.user': 'fmind', 'mlflow.source.name':...",fmind,file:///home/fmind/mlops-python-package,PROJECT,main,f071dd1397f5c7592fd82a2fe169f5ecaf3af901,git@github.com:fmind/mlops-python-package,git@github.com:fmind/mlops-python-package,Inference,virtualenv,local,,,,,,
3,file:///home/fmind/mlops-python-package/mlruns...,2024-07-21 14:08:18.412,562757566260573932,active,023a21263eaa43228b324fb432932cf4,Promotion,023a21263eaa43228b324fb432932cf4,2024-07-21 14:08:15.431,FINISHED,fmind,{},{'conf_file': 'confs/promotion.yaml'},"{'mlflow.user': 'fmind', 'mlflow.source.name':...",fmind,file:///home/fmind/mlops-python-package,PROJECT,main,f071dd1397f5c7592fd82a2fe169f5ecaf3af901,git@github.com:fmind/mlops-python-package,git@github.com:fmind/mlops-python-package,Promotion,virtualenv,local,,,,,,
4,file:///home/fmind/mlops-python-package/mlruns...,2024-07-21 14:08:13.567,562757566260573932,active,d37276201a5b4ff98addae88f5ed1a34,Training,d37276201a5b4ff98addae88f5ed1a34,2024-07-21 14:07:54.473,FINISHED,fmind,{'training_mean_squared_error': 124.5105461557...,"{'conf_file': 'confs/training.yaml', 'memory':...","{'mlflow.user': 'fmind', 'mlflow.source.name':...",fmind,file:///home/fmind/mlops-python-package,PROJECT,main,f071dd1397f5c7592fd82a2fe169f5ecaf3af901,git@github.com:fmind/mlops-python-package,git@github.com:fmind/mlops-python-package,Training,virtualenv,local,,Pipeline,sklearn.pipeline.Pipeline,"[{""run_id"": ""d37276201a5b4ff98addae88f5ed1a34""...",,


In [7]:
models = client.search_registered_models(
    max_results=MAX_RESULTS, order_by=["creation_timestamp DESC"]
)
models = [dict(model) for model in models]
models = pd.DataFrame(models).assign(
    creation_timestamp=lambda data: pd.to_datetime(data["creation_timestamp"], unit="ms"),
    last_updated_timestamp=lambda data: pd.to_datetime(data["last_updated_timestamp"], unit="ms"),
).drop(columns=['latest_versions'])
print(models.shape)
models

(1, 6)


Unnamed: 0,aliases,creation_timestamp,description,last_updated_timestamp,name,tags
0,{'Champion': '3'},2024-07-20 12:46:27.013,,2024-07-21 14:08:17.804,bikes,{}


In [8]:
versions = client.search_model_versions(
    max_results=MAX_RESULTS, order_by=["creation_timestamp DESC"]
)
versions = [dict(version) for version in versions]
versions = pd.DataFrame(versions).assign(
    aliases=lambda data: data['aliases'].map(lambda x: x[0] if len(x) else None),
    creation_timestamp=lambda data: pd.to_datetime(data["creation_timestamp"], unit="ms"),
    last_updated_timestamp=lambda data: pd.to_datetime(data["last_updated_timestamp"], unit="ms"),
)
print(versions.shape)
versions.head()

(3, 14)


Unnamed: 0,aliases,creation_timestamp,current_stage,description,last_updated_timestamp,name,run_id,run_link,source,status,status_message,tags,user_id,version
0,Champion,2024-07-21 14:08:12.799,,,2024-07-21 14:08:12.799,bikes,d37276201a5b4ff98addae88f5ed1a34,,file:///home/fmind/mlops-python-package/mlruns...,READY,,{},,3
1,,2024-07-20 12:52:29.627,,,2024-07-20 12:52:29.627,bikes,b428ae1513a54e16a5bf3959cf3eeb8e,,file:///home/fmind/mlops-python-package/mlruns...,READY,,{},,2
2,,2024-07-20 12:46:27.017,,,2024-07-20 12:46:27.017,bikes,795eace5f76a4fe4af3754460b5e25e4,,file:///home/fmind/mlops-python-package/mlruns...,READY,,{},,1


# DASHBOARDS

In [9]:
px.strip(
    experiments,
    x="creation_time",
    color="lifecycle_stage",
    hover_name="name",
    hover_data=experiments.columns,
    title="Experiment Creation Time",
)

In [10]:
px.strip(
    models,
    x="creation_timestamp",
    hover_name="name",
    hover_data=models.columns,
    title="Model Creation Timestamp",
)

In [11]:
px.strip(
    models,
    x="creation_timestamp",
    hover_name="name",
    hover_data=models.columns,
    title="Model Creation Timestamp",
)

In [12]:
px.strip(
    versions,
    x="creation_timestamp",
    color="name",
    hover_name="name",
    hover_data=versions.columns,
    title="Version Creation Timestamp",
)

In [13]:
px.strip(
    runs,
    x="start_time",
    color="experiment_id",
    hover_name="run_name",
    hover_data=runs.columns,
    title="Run Start Time",
)

In [14]:
px.bar(runs, x="estimator_class", title="Run Estimator Class Distribution")