In [None]:
from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

from analytics.app.data.load import list_pipelines
from analytics.app.data.transform import (
    df_aggregate_eval_metric,
    dfs_models_and_evals,
    logs_dataframe,
    patch_yearbook_time,
)
from modyn.supervisor.internal.grpc.enums import PipelineStage

%load_ext autoreload
%autoreload 2

In [None]:
# INPUTS

pipelines_dir = Path("/Users/robinholzinger/robin/dev/eth/modyn-sigmod-data/yearbook/triggering/logs_agg")
output_dir = Path("/Users/robinholzinger/robin/dev/eth/modyn-2/.analytics.log/.data/_plots")
assert pipelines_dir.exists()
assert output_dir.exists()

In [None]:
pipelines = list_pipelines(pipelines_dir)
max_pipeline_id = max(pipelines.keys())
pipelines

In [None]:
from analytics.app.data.load import load_pipeline_logs

pipeline_logs = {p_id: load_pipeline_logs(p_id, pipelines_dir) for (p_id, (_, p_path)) in pipelines.items()}

In [None]:
type(pipeline_logs[32])

In [None]:
# mode:
# single pipeline
pipeline_ids = (
    [
        # yearly triggers
        p_id
        for p_id, (p, _) in pipelines.items()
        if "timetrigger" in p and ("_1y" in p or "_3y" in p or "_5y" in p)
    ]
    + [
        # amount 500, 2000
        p_id
        for p_id, (p, _) in pipelines.items()
        if "amount" in p and ("500" in p or "2000" in p)
    ]
    + [
        # drift
        p_id
        for p_id, (p, _) in pipelines.items()
        if p
        in {
            "yearbook_mmdalibi_250_0.05_5d",
            "yearbook_mmdalibi_250_0.07_1d",
            "yearbook_mmdalibi_250_0.07_5d",
            "yearbook_mmdalibi_250_0.05_1d",
            "yearbook_mmdalibi_500_0.05_1d",
            "yearbook_mmdalibi_100_0.05_1d",
        }
    ]
)
composite_model_variant = "currently_active_model"  # currently_trained_model
patch_yearbook = True
dataset_id = "yearbook_test"
eval_handler = "slidingmatrix"
metric = "Accuracy"
include_composite_model = False


def pipeline_name_mapper(name: str) -> str:
    name = name.replace("yearbook_", "")
    name = name.replace("timetrigger_", "")  # "every "
    name = name.replace("amounttrigger_", "")  # "every "
    name = name.replace("mmdalibi_", "")
    if name.endswith("y"):
        name = name[:-1] + (" years" if not name.endswith("1y") else " year")
    elif not name.endswith("d"):  # dataamount
        name = name + " samples"
    else:  # drift
        name = name.replace("_", "/")
    return name


pipelines = {p_id: (pipeline_name_mapper(pname), p_path) for p_id, (pname, p_path) in pipelines.items()}

[(p_id, pname) for p_id, (pname, _) in pipelines.items() if p_id in pipeline_ids]

# Wrangle data

In [None]:
list_df_eval_single: list[pd.DataFrame] = []
list_df_all: list[pd.DataFrame] = []

for pipeline_id in pipeline_ids:
    df_all = logs_dataframe(pipeline_logs[pipeline_id], pipelines[pipeline_id][0])
    list_df_all.append(df_all)

    _, _, df_eval_single = dfs_models_and_evals(
        pipeline_logs[pipeline_id], df_all["sample_time"].max(), pipelines[pipeline_id][0]
    )
    list_df_eval_single.append(df_eval_single)

df_adjusted = pd.concat(list_df_eval_single)
df_adjusted

df_all = pd.concat(list_df_all)

In [None]:
df_adjusted = df_adjusted[
    (df_adjusted["dataset_id"] == dataset_id)
    & (df_adjusted["eval_handler"] == eval_handler)
    & (df_adjusted["metric"] == metric)
]

# in percent (0-100)
df_adjusted["value"] = df_adjusted["value"] * 100
df_adjusted

In [None]:
if patch_yearbook:
    for column in ["interval_start", "interval_center", "interval_end"]:
        patch_yearbook_time(df_adjusted, column)
    patch_yearbook_time(df_all, "sample_time")

In [None]:
df_adjusted = df_adjusted.sort_values(by=["interval_center"])

In [None]:
# Reduce to composite models
df_adjusted = df_adjusted[df_adjusted[composite_model_variant]]
df_adjusted[composite_model_variant].unique()

# Dump Data backup

# Create Plot

In [None]:
# reduce evaluation interval to interval where all policies have evaluations
min_active_eval_center_per_pipeline = (
    df_adjusted[df_adjusted[composite_model_variant]].groupby("pipeline_ref")["interval_center"].min()
)
maximum_min = min_active_eval_center_per_pipeline.max()
print(maximum_min, min_active_eval_center_per_pipeline)

df_adjusted = df_adjusted[df_adjusted["interval_center"] >= maximum_min]
df_adjusted["interval_center"].unique()

In [None]:
df_adjusted["interval_center"] = df_adjusted["interval_center"].astype(str).str.split("-").str[0]

In [None]:
# Aggregate metrics to a scalar value per pipeline
mean_accuracies = df_aggregate_eval_metric(
    df_adjusted,
    group_by=["pipeline_ref", "metric"],
    in_col="value",
    out_col="metric_value",
    aggregate_func="mean",
)

In [None]:
df_triggers = df_all[df_all["id"] == PipelineStage.HANDLE_SINGLE_TRIGGER.name]

In [None]:
df_triggers = df_all[df_all["id"] == PipelineStage.HANDLE_SINGLE_TRIGGER.name]
df_triggers = df_triggers[df_triggers["sample_time"] > maximum_min]
df_triggers

In [None]:
# Find number of trigger per pipeline that are after maximum_min

# before the cutoff there was one trigger (equivalent to start of our reduced dataset): +1
num_triggers = df_triggers.groupby("pipeline_ref").aggregate(count=("id", "count")) + 1
num_triggers

In [None]:
merged = num_triggers.merge(mean_accuracies, on="pipeline_ref")
assert mean_accuracies.shape[0] == merged.shape[0]
merged

In [None]:
def create_type(x: str):
    if "year" in x:
        return "yearly"
    elif "samples" in x:
        return "amount"
    elif "d" in x:
        return "drift"
    else:
        return "unknown"


merged["type"] = merged["pipeline_ref"].apply(lambda x: create_type(x))
merged

In [None]:
palette = sns.color_palette("RdBu", 10)
palette

In [None]:
palette2 = sns.color_palette("colorblind", 10)
palette2

In [None]:
# Create the heatmap
from collections import defaultdict

from analytics.plotting.common.common import INIT_PLOT

INIT_PLOT()
# sns.set_theme(style="ticks")
# plt.rcParams['svg.fonttype'] = 'none'
sns.set_style("whitegrid")

FONTSIZE = 20
DOUBLE_FIG_WIDTH = 10
DOUBLE_FIG_HEIGHT = 3.5
DOUBLE_FIG_SIZE = (DOUBLE_FIG_WIDTH, 1.5 * DOUBLE_FIG_HEIGHT)

fig = plt.figure(
    edgecolor="black",
    frameon=True,
    figsize=DOUBLE_FIG_SIZE,
    dpi=300,
)

ax = sns.scatterplot(
    merged,
    x="count",
    y="metric_value",
    hue="type",
    palette={"drift": palette[-2], "yearly": palette2[1], "amount": palette[1]},
    s=200,
    legend=False,
    marker="X",
    # annotations
)
ax.set(ylim=(85, 94.5))
ax.set(xlim=(-4, 85))

for i in range(merged.shape[0]):
    offsets = defaultdict(lambda: (+1.5, -0.25))
    offsets.update(
        {
            # x, y
            "3 years": (-3, +0.5),
            "1 year": (-2, -0.85),
            "5 years": (-3, +0.5),
            "500 samples": (-5, +0.5),
            "2000 samples": (+1.7, -0.25),
            "250/0.05/5d": (-2, +0.5),
            "100/0.05/1d": (+1.5, -0.7),
            "500/0.05/1d": (+1.5, 0.15),
            "250/0.07/1d": (+1.5, -0.55),
            "250/0.05/1d": (-10, +0.4),
        }
    )
    plt.rc("text", usetex=True)
    plt.text(
        x=merged["count"][i] + offsets[merged["pipeline_ref"][i]][0],
        y=merged["metric_value"][i] + offsets[merged["pipeline_ref"][i]][1],
        s=r"\textbf{" + merged["pipeline_ref"][i] + "}",
        fontdict=dict(color="black", fontsize=17),
    )
    plt.rc("text", usetex=False)


# Adjust x-axis tick labels
plt.xlabel("Number of triggers", labelpad=10)
plt.xticks(
    ticks=[x for x in range(0, 80 + 1, 20)],
    labels=[x for x in range(0, 80 + 1, 20)],
    rotation=0,
    # ha='right'
)

# Set y-axis ticks to be equally spaced
plt.ylabel("Mean Accuracy %", labelpad=15)
plt.yticks(
    ticks=[x for x in range(86, 95 + 1, 3)],
    labels=[x for x in range(86, 95 + 1, 3)],
    rotation=0,
)

# Display the plot
plt.tight_layout()
plt.show()

# Save Plot as svg

In [None]:
for img_type in ["png", "svg"]:
    img_path = output_dir / f"scatter_yb.{img_type}"
    fig.savefig(img_path, bbox_inches="tight", transparent=True)