## Import & Setting

### Import

In [2]:
import numpy as np
import pandas as pd
from mlflow import MlflowClient
from tqdm import tqdm

from cate.plot import LinePlot, Ticks

### Setting

In [3]:
REMOTE_TRACKING_URI = "http://ec2-44-217-145-52.compute-1.amazonaws.com:5000"
client = MlflowClient(REMOTE_TRACKING_URI)

### Functions

In [4]:
def get_metric_dfs(
    metrics: dict, columns: list[str]
) -> tuple[pd.DataFrame, pd.DataFrame]:
    mean_metric = {}
    std_metric = {}
    for model_name, metric in metrics.items():
        mean_metric[model_name] = {}
        std_metric[model_name] = {}
        for column in columns:
            mean, std = metric[column]
            mean_metric[model_name][column] = mean
            std_metric[model_name][column] = std
    return pd.DataFrame(mean_metric), pd.DataFrame(std_metric)

## Process

In [5]:
parent_run_id = "958128ce482042c8aea9d2a5474e3b1c"
parent_run = client.get_run(parent_run_id)
experiment_id = parent_run.info.experiment_id
child_runs = client.search_runs(
    experiment_ids=[experiment_id],
    filter_string=f"tags.mlflow.parentRunId='{parent_run_id}'",
    order_by=["start_time DESC"],
)
run_ids = [run.info.run_id for run in child_runs]

In [7]:
metrics = {}
for run_id in tqdm(run_ids):
    metric = {}
    for metrics_column in client.get_run(run_id).data.metrics.keys():
        values = [
            metric_history.value
            for metric_history in client.get_metric_history(run_id, metrics_column)
        ]
        metric[metrics_column] = [np.mean(values), np.std(values)]
    model_name = client.get_run(run_id).data.tags["model"]
    metrics[model_name] = metric

100%|██████████| 5/5 [00:58<00:00, 11.63s/it]


In [8]:
metrics_columns = (
    {
        metrics_name: [f"{metrics_name}_at_{i}" for i in range(0, 100, 10)]
        for metrics_name in ["qini", "uplift"]
    }
    | {"auuc": ["auuc"]}
    | {
        "system": [
            "system/cpu_utilization_percentage",
            "system/disk_available_megabytes",
            "system/disk_usage_megabytes",
            "system/disk_usage_percentage",
            "system/gpu_0_memory_usage_megabytes",
            "system/gpu_0_memory_usage_percentage",
            "system/gpu_0_power_usage_percentage",
            "system/gpu_0_power_usage_watts",
            "system/gpu_0_utilization_percentage",
            "system/network_receive_megabytes",
            "system/network_transmit_megabytes",
            "system/system_memory_usage_megabytes",
            "system/system_memory_usage_percentage",
        ]
    }
)

In [None]:
mean_dfs, std_dfs = {}, {}
for name, columns in metrics_columns.items():
    mean_dfs[name], std_dfs[name] = get_metric_dfs(metrics, columns)

In [None]:
for name in ["uplift", "qini"]:
    x_ticks = Ticks(
        ticks=list(range(0, 10)),
        labels=[f"{i}0" if i != 0 else "0" for i in range(0, 10)],
    )
    line_plot = LinePlot(x_ticks=x_ticks)
    fig = line_plot(mean_dfs["uplift"], "uplift curve", "percentile", "uplift")
    client.log_figure(parent_run_id, figure=fig, artifact_file=f"{name}.png")

In [None]:
for name, df in mean_dfs.items():
    client.log_table(parent_run_id, df.reset_index(), f"{name}_mean.json")
for name, df in std_dfs.items():
    client.log_table(parent_run_id, df.reset_index(), f"{name}_std.json")

In [None]:
# from mlflow.store.artifact.artifact_repository_registry import get_artifact_repository

# repository = get_artifact_repository(parent_run.info.artifact_uri)
# for file in repository.list_artifacts():
#     repository.delete_artifacts(file.path)