In [None]:
from pathlib import Path

import pandas as pd
from matplotlib import pyplot as plt
from matplotlib.figure import Figure

from analytics.app.data.load import list_pipelines
from analytics.app.data.transform import dfs_models_and_evals, logs_dataframe, patch_yearbook_time
from analytics.plotting.common.common import init_plot
from analytics.plotting.common.font import setup_font
from analytics.plotting.common.heatmap import build_heatmap
from analytics.plotting.common.save import save_plot

%load_ext autoreload
%autoreload 2

In [None]:
pipelines_dir = Path(
    "/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/00_varying_periodic_intervals/baselines_time"
)
assert pipelines_dir.exists()

In [None]:
pipelines = list_pipelines(pipelines_dir)
max_pipeline_id = max(pipelines.keys())
pipelines

In [None]:
from analytics.app.data.load import load_pipeline_logs

pipeline_logs = {p_id: load_pipeline_logs(p_id, pipelines_dir) for (p_id, (_, p_path)) in pipelines.items()}

In [None]:
# mode:
pipeline_id = 16

# doesn't do anything unless include_composite_model = True
composite_model_variant = "currently_active_model"

patch_yearbook = True
dataset_id = "yearbook_test"
eval_handler = "periodic-delta+-1y"
metrics = [
    "Accuracy",
    "F1-weighted",
    "F1-macro",
    "F1-micro",
    "ROC-AUC",
]
include_composite_model = False

# Wrangle data

In [None]:
pipeline_log = pipeline_logs[pipeline_id]
pipeline_ref = f"{pipeline_id}".zfill(len(str(max_pipeline_id))) + f" - {pipelines[pipeline_id][0]}"

df_all = logs_dataframe(pipeline_log, pipeline_ref)

df_logs_models, _, df_eval_single = dfs_models_and_evals(
    # subtracting would interfere with yearbook patching
    pipeline_log,
    df_all["sample_time"].max(),
    pipeline_ref,
)

df_adjusted = df_eval_single


df_adjusted = df_adjusted[
    (df_adjusted["dataset_id"] == dataset_id)
    & (df_adjusted["eval_handler"] == eval_handler)
    & (df_adjusted["metric"].isin(metrics))
]

# in percent (0-100)
df_adjusted["value"] = df_adjusted["value"] * 100
df_adjusted

In [None]:
if patch_yearbook:
    for column in ["interval_start", "interval_center", "interval_end"]:
        patch_yearbook_time(df_adjusted, column)
    for column in ["train_start", "train_end", "real_train_end", "usage_start", "usage_end"]:
        patch_yearbook_time(df_logs_models, column)

    # correction for -1 second in timestamp format before patching
    df_logs_models["usage_end"] = (
        df_logs_models["usage_end"].dt.to_period("M") + 1
    ).dt.to_timestamp()  # december (because of -1 second in timestamp format) -> start of year

df_logs_models

In [None]:
df_adjusted = df_adjusted.sort_values(by=["interval_center"])
df_adjusted["interval_center"] = df_adjusted["interval_center"].dt.to_period("M")
df_adjusted

In [None]:
# Add composite model

assert df_adjusted["pipeline_ref"].nunique() <= 1
# add the pipeline time series which is the performance of different models stitched together dep.
# w.r.t which model was active
pipeline_composite_model = df_adjusted[df_adjusted[composite_model_variant]]
pipeline_composite_model["model_idx"] = 0
pipeline_composite_model["id_model"] = 0

label_map = {k: f"{k}" for k, v in df_adjusted[["model_idx", "id_model"]].values}
label_map[0] = "Pipeline composite model"

if include_composite_model:
    df_adjusted = pd.concat([pipeline_composite_model, df_adjusted])
else:
    df_adjusted["model_idx"] = df_adjusted["model_idx"]

# Create Plot

In [None]:
def plot_heatmap_grid(
    data: dict[tuple[int, str], dict[tuple[int, str], pd.DataFrame]],
    cbar_label: str,
    # Define vmin and vmax for the color scale to be consistent across heatmaps
    vmin: float = 0,
    vmax: float = 0.3,
    nrows: int = 4,
    ncols: int = 4,
    cbar: bool = False,
    single_cbar: bool = False,
    height_factor: float = 1.2,
    width_factor: float = 1.0,
    x_space_factor: float = 1,
    y_space_factor: float = 1,
    grid_alpha: float = 0.0,
) -> Figure:
    init_plot()
    setup_font(small_label=True, small_title=True)

    double_fig_width = 10
    double_fig_height = 3.5
    fig, axs = plt.subplots(
        nrows=nrows,
        ncols=ncols,
        edgecolor="black",
        frameon=True,
        figsize=(double_fig_width * width_factor, 2.2 * double_fig_height * height_factor),
        dpi=450,
        squeeze=True,
    )

    for x, row_key in enumerate(data):
        (row, row_title) = row_key
        for y, col_key in enumerate(data[row_key]):
            (col, col_title) = col_key
            title, cell_data = data[row_key][col_key]
            ax = axs[x, y] if nrows > 1 else axs[y]

            _ = build_heatmap(
                cell_data,
                # note that for some years we have two interval centers
                # This is because the evaluation epochs are yearly and the interval offsets are bound by the dataset
                # start, therefore the right interval end is asymmetrically far to the right compared to the left bound.
                # We can still act as if we have a value for every year
                x_ticks=[1950, 1975, 2000],
                y_ticks=[1950, 1975, 2000],
                x_label=col_title,
                y_label=row_title,
                reverse_col=True,
                # x_label,
                # color_label = "MMD",  # TODO
                title_label=title,
                target_ax=ax,
                square=False,
                width_factor=width_factor,
                height_factor=height_factor,
                cbar=single_cbar,
                vmin=vmin,
                vmax=vmax,
                grid_alpha=grid_alpha,
            )
            ax.label_outer()  # Remove labels for inner plots, keep only on outer

    if cbar:
        cbar_ax = fig.add_axes([1, 0.25, 0.02, 0.6])  # Adjust the colorbar position
        custom_cbar = fig.colorbar(ax.collections[0], cax=cbar_ax)  # use last printed axis
        custom_cbar.set_label(cbar_label)  # Set your custom label here

    plt.subplots_adjust(wspace=0.1 * x_space_factor, hspace=0.1 * y_space_factor)
    return fig

In [None]:
df_train_end_years_per_model = df_logs_models[["model_idx", "real_train_end"]]
df_train_end_years_per_model["real_train_end"] = df_train_end_years_per_model["real_train_end"].dt.year
df_train_end_years_per_model

In [None]:
df_merged = df_adjusted.merge(df_train_end_years_per_model, on="model_idx", how="left")
df_merged

In [None]:
def generate_heatmap_data_for_handler(data: pd.DataFrame, metric: str) -> pd.DataFrame:
    # build heatmap matrix dataframe:
    data_filtered = data[data["metric"] == metric]
    pt_data = data_filtered.pivot(index=["real_train_end"], columns="interval_center", values="value")
    return pt_data

In [None]:
plot_content = {
    (0, "Trained up to"): {
        (0, "Evaluation Year"): ("Accuracy", generate_heatmap_data_for_handler(df_merged, "Accuracy")),
        (1, "Evaluation Year"): ("ROC-AUC", generate_heatmap_data_for_handler(df_merged, "ROC-AUC")),
    }
}

vmin = 40
vmax = 100
print(vmin, vmax)

fig = plot_heatmap_grid(
    data=plot_content,
    cbar_label="[Metric] %",
    nrows=1,
    ncols=2,
    vmin=vmin,
    vmax=vmax,
    cbar=True,
    single_cbar=False,
    width_factor=0.7,
    height_factor=0.53,
    grid_alpha=0.5,
)

save_plot(fig, "evaluation_metrics_yb_one")

In [None]:
plot_content = {
    (0, "Trained up to"): {
        (0, "Evaluation Year"): ("F1-micro", generate_heatmap_data_for_handler(df_merged, "F1-micro")),
        (1, "Evaluation Year"): ("F1-macro", generate_heatmap_data_for_handler(df_merged, "F1-macro")),
        (2, "Evaluation Year"): ("F1-weighted", generate_heatmap_data_for_handler(df_merged, "F1-weighted")),
    }
}

vmin = 40
vmax = 100
print(vmin, vmax)

fig = plot_heatmap_grid(
    data=plot_content,
    cbar_label="[Metric] %",
    nrows=1,
    ncols=3,
    vmin=vmin,
    vmax=vmax,
    cbar=True,
    single_cbar=False,
    width_factor=1,
    height_factor=0.53,
    grid_alpha=0.5,
)

save_plot(fig, "evaluation_metrics_yb_two")