**Compute Key-Performance Indicators (KPIs) from the MLflow server.**

# IMPORTS

In [1]:
import mlflow
import pandas as pd
import plotly.express as px

# OPTIONS

In [2]:
pd.options.display.max_columns = None

# CONFIGS

In [3]:
MAX_RESULTS = 100
TRACKING_URI = "http://localhost:5000"
REGISTRY_URI = "http://localhost:5000"

# CLIENTS

In [4]:
client = mlflow.tracking.MlflowClient(tracking_uri=TRACKING_URI, registry_uri=REGISTRY_URI)

# INDICATORS

In [5]:
experiments = client.search_experiments(
    view_type=mlflow.entities.ViewType.ALL, max_results=MAX_RESULTS, order_by=["creation_time DESC"]
)
experiments = [dict(experiment) for experiment in experiments]
experiments = pd.DataFrame(experiments).assign(
    creation_time=lambda data: pd.to_datetime(data["creation_time"], unit="ms"),
    last_update_time=lambda data: pd.to_datetime(data["last_update_time"], unit="ms"),
)
print(experiments.shape)
experiments.head()

(1, 7)


Unnamed: 0,artifact_location,creation_time,experiment_id,last_update_time,lifecycle_stage,name,tags
0,file:///home/lgcorzo/mlops-python-package/mlruns...,2024-07-21 15:02:02.224,598443335336095652,2024-07-21 15:02:02.224,active,regression_model_template,{}


In [6]:
runs = client.search_runs(
    experiment_ids=experiments["experiment_id"].unique(),
    run_view_type=mlflow.entities.ViewType.ALL,
    max_results=MAX_RESULTS,
    order_by=["created DESC"],
)
runs = [dict(run.info) | dict(run.data) for run in runs]
runs = pd.DataFrame(runs).assign(
    end_time=lambda data: pd.to_datetime(data["end_time"], unit="ms"),
    start_time=lambda data: pd.to_datetime(data["start_time"], unit="ms"),
    run_time_secs=lambda data: (data['end_time'] - data['start_time']).map(lambda t: t.total_seconds()),
)
runs = pd.concat([runs, pd.json_normalize(runs['tags'])], axis="columns")
print(runs.shape)
runs.head()

(18, 30)


Unnamed: 0,artifact_uri,end_time,experiment_id,lifecycle_stage,run_id,run_name,run_uuid,start_time,status,user_id,metrics,params,tags,run_time_secs,mlflow.user,mlflow.source.name,mlflow.source.type,mlflow.project.entryPoint,mlflow.source.git.commit,mlflow.source.git.repoURL,mlflow.gitRepoURL,mlflow.runName,mlflow.project.env,mlflow.project.backend,mlflow.datasets,estimator_name,estimator_class,mlflow.log-model.history,mlflow.autologging,mlflow.parentRunId
0,file:///home/lgcorzo/mlops-python-package/mlruns...,2024-07-21 15:31:53.829,598443335336095652,active,4df1e5931159491c963f2252add508cd,Explanations,4df1e5931159491c963f2252add508cd,2024-07-21 15:31:20.207,FINISHED,lgcorzo,{},{'conf_file': 'confs/explanations.yaml'},"{'mlflow.user': 'lgcorzo', 'mlflow.source.name':...",33.622,lgcorzo,file:///home/lgcorzo/mlops-python-package,PROJECT,main,ee17d0a9de59efd2eb99d667786ac417ec8b3b63,git@github.com:lgcorzo/mlops-python-package,git@github.com:lgcorzo/mlops-python-package,Explanations,virtualenv,local,,,,,,
1,file:///home/lgcorzo/mlops-python-package/mlruns...,2024-07-21 15:31:13.361,598443335336095652,active,e993a53ee04e4357bad5cec101ab4031,Evaluations,e993a53ee04e4357bad5cec101ab4031,2024-07-21 15:31:07.565,FINISHED,lgcorzo,"{'example_count': 13903.0, 'mean_absolute_erro...",{'conf_file': 'confs/evaluations.yaml'},"{'mlflow.user': 'lgcorzo', 'mlflow.source.name':...",5.796,lgcorzo,file:///home/lgcorzo/mlops-python-package,PROJECT,main,ee17d0a9de59efd2eb99d667786ac417ec8b3b63,git@github.com:lgcorzo/mlops-python-package,git@github.com:lgcorzo/mlops-python-package,Evaluations,virtualenv,local,"[{""name"":""ce0fe6e33c74e2fa3659d51482be5f27"",""h...",,,,,
2,file:///home/lgcorzo/mlops-python-package/mlruns...,2024-07-21 15:31:04.758,598443335336095652,active,c17b893e525b44ed89e349421e72510a,Inference,c17b893e525b44ed89e349421e72510a,2024-07-21 15:30:59.023,FINISHED,lgcorzo,{},{'conf_file': 'confs/inference.yaml'},"{'mlflow.user': 'lgcorzo', 'mlflow.source.name':...",5.735,lgcorzo,file:///home/lgcorzo/mlops-python-package,PROJECT,main,ee17d0a9de59efd2eb99d667786ac417ec8b3b63,git@github.com:lgcorzo/mlops-python-package,git@github.com:lgcorzo/mlops-python-package,Inference,virtualenv,local,,,,,,
3,file:///home/lgcorzo/mlops-python-package/mlruns...,2024-07-21 15:30:56.969,598443335336095652,active,d538cc2f3f644b6e97b611c1d96801ac,Promotion,d538cc2f3f644b6e97b611c1d96801ac,2024-07-21 15:30:51.594,FINISHED,lgcorzo,{},{'conf_file': 'confs/promotion.yaml'},"{'mlflow.user': 'lgcorzo', 'mlflow.source.name':...",5.375,lgcorzo,file:///home/lgcorzo/mlops-python-package,PROJECT,main,ee17d0a9de59efd2eb99d667786ac417ec8b3b63,git@github.com:lgcorzo/mlops-python-package,git@github.com:lgcorzo/mlops-python-package,Promotion,virtualenv,local,,,,,,
4,file:///home/lgcorzo/mlops-python-package/mlruns...,2024-07-21 15:30:46.302,598443335336095652,active,a75ea3e9742c48fd9c34a5b8abf9bd89,Training,a75ea3e9742c48fd9c34a5b8abf9bd89,2024-07-21 15:29:25.495,FINISHED,lgcorzo,{'training_mean_squared_error': 124.5105461557...,"{'conf_file': 'confs/training.yaml', 'memory':...","{'mlflow.user': 'lgcorzo', 'mlflow.source.name':...",80.807,lgcorzo,file:///home/lgcorzo/mlops-python-package,PROJECT,main,ee17d0a9de59efd2eb99d667786ac417ec8b3b63,git@github.com:lgcorzo/mlops-python-package,git@github.com:lgcorzo/mlops-python-package,Training,virtualenv,local,,Pipeline,sklearn.pipeline.Pipeline,"[{""run_id"": ""a75ea3e9742c48fd9c34a5b8abf9bd89""...",,


In [7]:
models = client.search_registered_models(
    max_results=MAX_RESULTS, order_by=["creation_timestamp DESC"]
)
models = [dict(model) for model in models]
models = pd.DataFrame(models).assign(
    creation_timestamp=lambda data: pd.to_datetime(data["creation_timestamp"], unit="ms"),
    last_updated_timestamp=lambda data: pd.to_datetime(data["last_updated_timestamp"], unit="ms"),
).drop(columns=['latest_versions'])
print(models.shape)
models

(1, 6)


Unnamed: 0,aliases,creation_timestamp,description,last_updated_timestamp,name,tags
0,{'Champion': '2'},2024-07-21 15:03:06.208,,2024-07-21 15:30:56.443,regression_model_template,{}


In [8]:
versions = client.search_model_versions(
    max_results=MAX_RESULTS, order_by=["creation_timestamp DESC"]
)
versions = [dict(version) for version in versions]
versions = pd.DataFrame(versions).assign(
    aliases=lambda data: data['aliases'].map(lambda x: x[0] if len(x) else None),
    creation_timestamp=lambda data: pd.to_datetime(data["creation_timestamp"], unit="ms"),
    last_updated_timestamp=lambda data: pd.to_datetime(data["last_updated_timestamp"], unit="ms"),
)
print(versions.shape)
versions.head()

(2, 14)


Unnamed: 0,aliases,creation_timestamp,current_stage,description,last_updated_timestamp,name,run_id,run_link,source,status,status_message,tags,user_id,version
0,Champion,2024-07-21 15:30:44.679,,,2024-07-21 15:30:44.679,regression_model_template,a75ea3e9742c48fd9c34a5b8abf9bd89,,file:///home/lgcorzo/mlops-python-package/mlruns...,READY,,{},,2
1,,2024-07-21 15:03:06.212,,,2024-07-21 15:03:06.212,regression_model_template,47302e957d8542198a281aad07b2413b,,file:///home/lgcorzo/mlops-python-package/mlruns...,READY,,{},,1


# DASHBOARDS

In [9]:
px.strip(
    experiments,
    x="creation_time",
    color="lifecycle_stage",
    hover_name="name",
    hover_data=experiments.columns,
    title="Experiment Creation Time",
)

In [10]:
px.strip(
    models,
    x="creation_timestamp",
    hover_name="name",
    hover_data=models.columns,
    title="Model Creation Timestamp",
)

In [11]:
px.strip(
    models,
    x="creation_timestamp",
    hover_name="name",
    hover_data=models.columns,
    title="Model Creation Timestamp",
)

In [12]:
px.strip(
    versions,
    x="creation_timestamp",
    color="name",
    hover_name="name",
    hover_data=versions.columns,
    title="Version Creation Timestamp",
)

In [13]:
px.strip(
    runs,
    x="start_time",
    color="experiment_id",
    hover_name="run_name",
    hover_data=runs.columns,
    title="Run Start Time",
)

In [14]:
px.strip(
    runs,
    x="run_time_secs",
    color="run_name",
    hover_name="run_id",
    hover_data=runs.columns,
    title="Run Time",
)

In [15]:
px.bar(runs, x="estimator_class", title="Run Estimator Class Distribution")