In [None]:
from pathlib import Path

import pandas as pd
from matplotlib import pyplot as plt
from matplotlib.figure import Figure

from analytics.app.data.load import list_pipelines
from analytics.app.data.transform import dfs_models_and_evals, logs_dataframe
from analytics.plotting.common.common import init_plot
from analytics.plotting.common.font import setup_font
from analytics.plotting.common.heatmap import build_heatmap
from analytics.plotting.common.save import save_plot

%load_ext autoreload
%autoreload 2

In [None]:
pipelines_dir = Path(
    "/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/huffpost/10_baselines_time"
)
assert pipelines_dir.exists()

In [None]:
pipelines = list_pipelines(pipelines_dir)
max_pipeline_id = max(pipelines.keys())
pipelines

In [None]:
from analytics.app.data.load import load_pipeline_logs

pipeline_logs = {p_id: load_pipeline_logs(p_id, pipelines_dir) for (p_id, (_, p_path)) in pipelines.items()}

In [None]:
# mode:
pipeline_id = 273

# doesn't do anything unless include_composite_model = True
composite_model_variant = "currently_active_model"

dataset_id = "huffpost_kaggle_test"
eval_handler = "periodic-current"
metrics = [
    "Accuracy",
    "F1-micro",
    "F1-macro",
    "F1-weighted",
    "Top-10-Accuracy",
    "Top-5-Accuracy",
    "Top-2-Accuracy",
]
include_composite_model = False

# Wrangle data

In [None]:
pipeline_log = pipeline_logs[pipeline_id]
pipeline_ref = f"{pipeline_id}".zfill(len(str(max_pipeline_id))) + f" - {pipelines[pipeline_id][0]}"

df_all = logs_dataframe(pipeline_log, pipeline_ref)

df_logs_models, _, df_eval_single = dfs_models_and_evals(
    # subtracting would interfere with yearbook patching
    pipeline_log,
    df_all["sample_time"].max(),
    pipeline_ref,
)

df_adjusted = df_eval_single

df_adjusted = df_adjusted[
    (df_adjusted["dataset_id"] == dataset_id)
    & (df_adjusted["eval_handler"] == eval_handler)
    & (df_adjusted["metric"].isin(metrics))
]

# in percent (0-100)
df_adjusted["value"] = df_adjusted["value"] * 100
df_adjusted

In [None]:
df_adjusted = df_adjusted.sort_values(by=["interval_center"])
df_adjusted["interval_center"] = df_adjusted["interval_center"].dt.to_period("M")
df_adjusted

In [None]:
# Add composite model

assert df_adjusted["pipeline_ref"].nunique() <= 1
# add the pipeline time series which is the performance of different models stitched together dep.
# w.r.t which model was active
pipeline_composite_model = df_adjusted[df_adjusted[composite_model_variant]]
pipeline_composite_model["model_idx"] = 0
pipeline_composite_model["id_model"] = 0

label_map = {k: f"{k}" for k, v in df_adjusted[["model_idx", "id_model"]].values}
label_map[0] = "Pipeline composite model"

if include_composite_model:
    df_adjusted = pd.concat([pipeline_composite_model, df_adjusted])
else:
    df_adjusted["model_idx"] = df_adjusted["model_idx"]

# Create Plot

In [None]:
def plot_heatmap_grid(
    data: dict[tuple[int, str], dict[tuple[int, str], pd.DataFrame]],
    cbar_label: str,
    # Define vmin and vmax for the color scale to be consistent across heatmaps
    vmin: float = 0,
    vmax: float = 0.3,
    nrows: int = 4,
    ncols: int = 4,
    cbar: bool = False,
    single_cbar: bool = False,
    height_factor: float = 1.2,
    width_factor: float = 1.0,
    x_space_factor: float = 1,
    y_space_factor: float = 1,
    grid_alpha: float = 0.0,
) -> Figure:
    init_plot()
    setup_font(small_label=True, small_title=True)

    double_fig_width = 10
    double_fig_height = 3.5
    fig, axs = plt.subplots(
        nrows=nrows,
        ncols=ncols,
        edgecolor="black",
        frameon=True,
        figsize=(double_fig_width * width_factor, 2.2 * double_fig_height * height_factor),
        dpi=450,
        squeeze=True,
    )

    for x, row_key in enumerate(data):
        (row, row_title) = row_key
        for y, col_key in enumerate(data[row_key]):
            (col, col_title) = col_key
            title, cell_data = data[row_key][col_key]
            ax = axs[x, y] if nrows > 1 else axs[y]
            # print([(i, period.to_timestamp().strftime('%b %Y')) for i, period in list(enumerate(cell_data.columns))[::1]])
            # available:
            # [(0, 'Jan 2012'), (1, 'Apr 2012'), (2, 'Jul 2012'), (3, 'Oct 2012'), (4, 'Jan 2013'), (5, 'Apr 2013'), (6, 'Jul 2013'), (7, 'Oct 2013'), (8, 'Jan 2014'), (9, 'Apr 2014'), (10, 'Jul 2014'), (11, 'Oct 2014'), (12, 'Jan 2015'), (13, 'Apr 2015'), (14, 'Jul 2015'), (15, 'Oct 2015'), (16, 'Jan 2016'), (17, 'Apr 2016'), (18, 'Jul 2016'), (19, 'Oct 2016'), (20, 'Jan 2017'), (21, 'Apr 2017'), (22, 'Jul 2017'), (23, 'Oct 2017'), (24, 'Jan 2018'), (25, 'Apr 2018'), (26, 'Jul 2018'), (27, 'Oct 2018'), (28, 'Jan 2019'), (29, 'Apr 2019'), (30, 'Jul 2019'), (31, 'Oct 2019'), (32, 'Jan 2020'), (33, 'Apr 2020'), (34, 'Jul 2020'), (35, 'Oct 2020'), (36, 'Jan 2021'), (37, 'Apr 2021'), (38, 'Jul 2021'), (39, 'Oct 2021'), (40, 'Jan 2022'), (41, 'Apr 2022'), (42, 'Jul 2022')]

            # print([(i, period.to_timestamp().strftime('%b %Y')) for i, period in list(enumerate(cell_data.index))[::1]])
            # [(0, 'Jul 2012'), (1, 'Jan 2013'), (2, 'Jul 2013'), (3, 'Jan 2014'), (4, 'Jul 2014'), (5, 'Jan 2015'), (6, 'Jul 2015'), (7, 'Jan 2016'), (8, 'Jul 2016'), (9, 'Jan 2017'), (10, 'Jul 2017'), (11, 'Jan 2018'), (12, 'Jul 2018'), (13, 'Jan 2019'), (14, 'Jul 2019'), (15, 'Jan 2020'), (16, 'Jul 2020'), (17, 'Jan 2021'), (18, 'Jul 2021'), (19, 'Jan 2022'), (20, 'Jul 2022')]

            _ = build_heatmap(
                cell_data,
                x_custom_ticks=[
                    (i, f"{period.to_timestamp().strftime('%b %Y')}".replace(" ", "\n"))
                    for i, period in list(enumerate(cell_data.columns))[::1]
                    if period in [pd.Period("Apr 2014"), pd.Period("Jul 2018"), pd.Period("Jan 2022")]
                ],
                y_custom_ticks=[
                    (i, f"{period.to_timestamp().strftime('%b %Y')}".replace(" ", "\n"))
                    for i, period in list(enumerate(cell_data.index))[::1]
                    if period in [pd.Period("Jul 2014"), pd.Period("Jul 2018"), pd.Period("Jan 2022")]
                ],
                x_label=col_title,
                y_label=row_title,
                reverse_col=True,
                # x_label,
                # color_label = "MMD",  # TODO
                title_label=title,
                target_ax=ax,
                square=False,
                width_factor=width_factor,
                height_factor=height_factor,
                cbar=single_cbar,
                vmin=vmin,
                vmax=vmax,
                grid_alpha=grid_alpha,
            )
            ax.label_outer()  # Remove labels for inner plots, keep only on outer

    if cbar:
        cbar_ax = fig.add_axes([1, 0.25, 0.02, 0.6])  # Adjust the colorbar position
        custom_cbar = fig.colorbar(ax.collections[0], cax=cbar_ax)  # use last printed axis
        custom_cbar.set_label(cbar_label)  # Set your custom label here

    plt.subplots_adjust(wspace=0.1 * x_space_factor, hspace=0.1 * y_space_factor)
    return fig

In [None]:
df_train_end_years_per_model = df_logs_models[["model_idx", "real_train_end"]]
df_train_end_years_per_model["real_train_end"] = df_train_end_years_per_model["real_train_end"].dt.to_period("M")
df_train_end_years_per_model

In [None]:
df_merged = df_adjusted.merge(df_train_end_years_per_model, on="model_idx", how="left")
df_merged

In [None]:
def generate_heatmap_data_for_handler(data: pd.DataFrame, metric: str) -> pd.DataFrame:
    # build heatmap matrix dataframe:
    data_filtered = data[data["metric"] == metric]
    pt_data = data_filtered.pivot(index=["real_train_end"], columns="interval_center", values="value")
    return pt_data

In [None]:
plot_content = {
    (0, "Evaluation Year"): {
        (0, "Trained up to"): (
            "Accuracy",
            generate_heatmap_data_for_handler(
                df_merged, "Accuracy"
            ),  # almost identical to F1-micro and F1-weighted; macro is broken
        ),
        (1, "Trained up to"): ("Top-2-Accuracy", generate_heatmap_data_for_handler(df_merged, "Top-2-Accuracy")),
        (2, "Trained up to"): ("Top-5-Accuracy", generate_heatmap_data_for_handler(df_merged, "Top-5-Accuracy")),
        (3, "Trained up to"): ("Top-10-Accuracy", generate_heatmap_data_for_handler(df_merged, "Top-10-Accuracy")),
    }
}


# find vmin and vmax
vmin = 1
vmax = 0
for row_key in plot_content:
    for col_key in plot_content[row_key]:
        (_, cell_data) = plot_content[row_key][col_key]
        vmin = min(vmin, cell_data.min().min())
        vmax = max(vmax, cell_data.max().max())
print(vmin, vmax)

fig = plot_heatmap_grid(
    data=plot_content,
    cbar_label="[Metric] %",
    nrows=1,
    ncols=4,
    vmin=vmin,
    vmax=vmax,
    cbar=True,
    single_cbar=False,
    width_factor=1,
    height_factor=0.48,
    grid_alpha=0.5,
)

# TODO: we have already seen the Accuracy plot in Offline eval chapter (with another color scale though)
save_plot(fig, "evaluation_metrics_hp")