In [None]:
from pathlib import Path

import pandas as pd

from analytics.app.data.load import list_pipelines, load_pipeline_logs
from analytics.app.data.transform import (
    df_aggregate_eval_metric,
    dfs_models_and_evals,
    patch_yearbook_time,
    pipeline_leaf_times_df,
)
from analytics.plotting.common.save import save_plot
from analytics.plotting.common.tradeoff_scatterplot import plot_tradeoff_scatter
from modyn.supervisor.internal.grpc.enums import PipelineStage
from modyn.supervisor.internal.pipeline_executor.models import PipelineLogs

%load_ext autoreload
%autoreload 2

In [None]:
pipelines_dirs = [
    Path("/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/20_datadrift_static"),
    Path("/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/21_datadrift_dynamic"),
]

pipeline_logs: dict[int, PipelineLogs] = {}
pipelines: dict[int, tuple[str, Path]] = {}

for dir in pipelines_dirs:
    dir_pipelines = list_pipelines(dir)
    pipelines.update(dir_pipelines)
    max_pipeline_id = max(dir_pipelines.keys())
    print(pipelines)
    pipeline_logs.update({p_id: load_pipeline_logs(p_id, dir) for (p_id, (_, p_path)) in dir_pipelines.items()})
    assert dir.exists()

In [None]:
from modyn.supervisor.internal.pipeline_executor.models import StageLog

df = StageLog.df(
    [x for x in pipeline_logs.get(63).supervisor_logs.stage_runs if x.id == PipelineStage.TRAIN.name], extended=True
)

max_trigger_idx = df["trigger_idx"].idxmax()
time_at_trainer = df.loc[max_trigger_idx, "train_time_at_trainer"]
time_at_trainer

In [None]:
# doesn't do anything unless include_composite_model = True
composite_model_variant = "currently_active_model"

patch_yearbook = True
dataset_id = "yearbook_test"
eval_handler = "periodic-delta+-1y"
metric = "Accuracy"
include_composite_model = True

In [None]:
pipelines = {p_id: (pname, p_path) for p_id, (pname, p_path) in pipelines.items()}
pipeline_ids = list(pipelines.keys())

[(p_id, pname) for p_id, (pname, _) in pipelines.items() if p_id in pipeline_ids]

# Wrangle data

In [None]:
list_df_eval_single: list[pd.DataFrame] = []
df_leaf_list: list[pd.DataFrame] = []

for pipeline_id in pipeline_ids:
    logs = pipeline_logs[pipeline_id]
    df_leaf_single = pipeline_leaf_times_df(logs, use_traintime_patch_at_trainer=True, pipeline_id=pipeline_id)
    df_leaf_single["pipeline_id"] = pipeline_id
    df_leaf_list.append(df_leaf_single)

    _, _, df_eval_single = dfs_models_and_evals(
        pipeline_logs[pipeline_id], df_leaf_single["sample_time"].max(), pipelines[pipeline_id][0]
    )
    df_eval_single["pipeline_id"] = pipeline_id
    list_df_eval_single.append(df_eval_single)

df_adjusted = pd.concat(list_df_eval_single)
df_adjusted

df_leaf = pd.concat(df_leaf_list)
df_leaf

In [None]:
print(df_leaf["id"].unique())
assert set(df_leaf["id"].unique()) == {
    "TRAIN",
    "INIT_CLUSTER_CONNECTION",
    "EVALUATE_TRIGGER_POLICY",
    "INFORM_SELECTOR_REMAINING_DATA",
    "INFORM_SELECTOR_ABOUT_TRIGGER",
    "TRAINING_COMPLETED",
    "STORE_TRAINED_MODEL",
    "EVALUATE",
    "DONE",
}

In [None]:
df_adjusted = df_adjusted[
    (df_adjusted["dataset_id"] == dataset_id)
    & (df_adjusted["eval_handler"] == eval_handler)
    & (df_adjusted["metric"] == metric)
]

# in percent (0-100)
df_adjusted["value"] = df_adjusted["value"] * 100
df_adjusted

In [None]:
if patch_yearbook:
    for column in ["interval_start", "interval_center", "interval_end"]:
        patch_yearbook_time(df_adjusted, column)
    patch_yearbook_time(df_leaf, "sample_time")

In [None]:
df_adjusted = df_adjusted.sort_values(by=["interval_center"])

In [None]:
# Reduce to composite models
df_adjusted = df_adjusted[df_adjusted[composite_model_variant]]
df_adjusted[composite_model_variant].unique()

In [None]:
# reduce evaluation interval to interval where all policies have evaluations
min_active_eval_center_per_pipeline = (
    df_adjusted[df_adjusted[composite_model_variant]].groupby("pipeline_ref")["interval_center"].min()
)
maximum_min = min_active_eval_center_per_pipeline.max()
print(maximum_min, min_active_eval_center_per_pipeline)

assert maximum_min < pd.Timestamp("1940-01-01")

df_adjusted = df_adjusted[df_adjusted["interval_center"] >= maximum_min]
df_adjusted["interval_center"].unique()

In [None]:
df_adjusted["interval_center"] = df_adjusted["interval_center"].astype(str).str.split("-").str[0]

In [None]:
df_adjusted

In [None]:
# Aggregate metrics to a scalar value per pipeline
mean_accuracies = df_aggregate_eval_metric(
    df_adjusted,
    group_by=["pipeline_id", "pipeline_ref", "metric"],
    in_col="value",
    out_col="metric_value",
    aggregate_func="mean",
)
mean_accuracies

In [None]:
df_triggers = df_leaf[df_leaf["id"] == PipelineStage.TRAIN.name]

In [None]:
df_triggers = df_leaf[df_leaf["id"] == PipelineStage.TRAIN.name]
df_triggers = df_triggers[df_triggers["sample_time"] > maximum_min]
df_triggers

In [None]:
# Find number of trigger per pipeline that are after maximum_min

# before the cutoff there was one trigger (equivalent to start of our reduced dataset): +1
num_triggers = df_triggers.groupby("pipeline_id").aggregate(count=("id", "count"), sum_duration=("duration", "sum"))
num_triggers["count"] += 1
num_triggers

In [None]:
merged = num_triggers.merge(mean_accuracies, on="pipeline_id")
assert mean_accuracies.shape[0] == merged.shape[0]
merged

In [None]:
def create_type(x: str):
    if "year" in x:
        return "time"
    elif "samples" in x:
        return "amount"
    elif "d" in x:
        return "drift"
    else:
        return "unknown"


merged["type"] = merged["pipeline_ref"].apply(lambda x: create_type(x))
merged

In [None]:
renamed = merged[
    merged["pipeline_id"].isin(
        [
            # static thresholds
            113,  # 0.03
            112,  # 0.05
            107,  # 0.07
            109,  # 0.09
            85,  # 0.12
            # dyn quantile
            353,  # % 0.05
            345,  # % 0.10
            357,  # % 0.15
            # dyn roll. avg
            372,  # Δ 2.0
            370,  # Δ 1.0
            369,  # Δ 0.5
            363,  # Δ 0.05
        ]
    )
].copy()
renamed["Trigger Type"] = renamed["pipeline_ref"].apply(
    lambda x: "Dyn. Quantile % [0.05/0.1/0.15]"
    if "quant" in x
    else ("Roll. Avg Δ [2.0/1.0/0.05/0.5]" if "roll" in x else ("static MMD threshold\n[0.03/0.05/0.07/0.09/0.12]"))
)
renamed

In [None]:
fig = plot_tradeoff_scatter(
    renamed,
    x="count",
    y="metric_value",
    hue="Trigger Type",
    style="Trigger Type",
    x_label="Number of Triggers",
    y_label="Mean Accuracy %",
    height_factor=0.6,
    width_factor=0.7,
)

save_plot(fig, "tradeoff_drift_yearbook_triggers_performance")

In [None]:
in_minutes = renamed.copy()
in_minutes["sum_duration"] = in_minutes["sum_duration"] / 60

fig = plot_tradeoff_scatter(
    in_minutes,
    x="sum_duration",
    y="metric_value",
    hue="Trigger Type",
    style="Trigger Type",
    x_label="Total Cost (Minutes)",
    y_label="Mean Accuracy %",
    height_factor=0.6,
    width_factor=0.7,
)

save_plot(fig, "tradeoff_drift_yearbook_cost_performance")

In [None]:
fig = plot_tradeoff_scatter(
    renamed,
    x="count",
    y="sum_duration",
    hue="Trigger Type",
    style="Trigger Type",
    x_label="Number of Triggers",
    y_label="Total Cost (seconds)",
    height_factor=0.6,
    width_factor=0.8,
)

save_plot(fig, "tradeoff_drift_yearbook_triggers_cost")