In [None]:
from pathlib import Path

import matplotlib.dates as mdates
import pandas as pd
from matplotlib.ticker import FixedFormatter, FixedLocator

from analytics.app.data.load import list_pipelines
from analytics.app.data.transform import dfs_models_and_evals, logs_dataframe, patch_yearbook_time
from analytics.plotting.common.metric_over_time import plot_metric_over_time
from analytics.plotting.common.save import save_plot

%load_ext autoreload
%autoreload 2

In [None]:
pipelines_dir = Path(
    "/Users/robinholzinger/robin/dev/eth/modyn-robinholzi-data/data/triggering/yearbook/10_baselines_time"
)
assert pipelines_dir.exists()

In [None]:
pipelines = list_pipelines(pipelines_dir)
max_pipeline_id = max(pipelines.keys())
pipelines

In [None]:
from analytics.app.data.load import load_pipeline_logs

pipeline_logs = {p_id: load_pipeline_logs(p_id, pipelines_dir) for (p_id, (_, p_path)) in pipelines.items()}

In [None]:
# mode:
pipeline_ids = [20, 24, 26, 33]

# doesn't do anything unless include_composite_model = True
composite_model_variant = "currently_active_model"

patch_yearbook = True
dataset_id = "yearbook_test"
eval_handler = "periodic-delta+-1y"
metric = "Accuracy"
include_composite_model = True

# Wrangle data

In [None]:
all_eval_dfs = []
for pipeline_id in pipeline_ids:
    pipeline_log = pipeline_logs[pipeline_id]
    pipeline_ref = f"{pipeline_id}".zfill(len(str(max_pipeline_id))) + f" - {pipelines[pipeline_id][0]}"

    df_all = logs_dataframe(pipeline_log, pipeline_ref)

    df_logs_models, _, df_eval_single = dfs_models_and_evals(
        # subtracting would interfere with yearbook patching
        pipeline_log,
        df_all["sample_time"].max(),
        pipeline_ref,
    )
    all_eval_dfs.append(df_eval_single)

df_adjusted = pd.concat(all_eval_dfs)

df_adjusted = df_adjusted[
    (df_adjusted["dataset_id"] == dataset_id)
    & (df_adjusted["eval_handler"] == eval_handler)
    & (df_adjusted["metric"] == metric)
]

# in percent (0-100)
df_adjusted["value"] = df_adjusted["value"] * 100
df_adjusted

In [None]:
# Name transformer
import re


def name_transformer(name):
    # reduce to number yearbook_timetrigger_10y
    return re.sub(r".*yearbook_timetriggers?_(\d+)y", r"trigger every \1 years", name).replace("1 years", "1 year")


df_adjusted["pipeline_ref"] = df_adjusted["pipeline_ref"].apply(name_transformer)
df_adjusted

In [None]:
df_adjusted["pipeline_ref"].unique()

In [None]:
# df_adjusted["dataset_id"].unique()
df_adjusted[df_adjusted["dataset_id"] == "yearbook-test"]["pipeline_ref"].unique()

In [None]:
if patch_yearbook:
    for column in ["interval_start", "interval_center", "interval_end"]:
        patch_yearbook_time(df_adjusted, column)

In [None]:
df_adjusted = df_adjusted.sort_values(by=["interval_center"])

In [None]:
# Add composite model

pipeline_composite_model = df_adjusted[df_adjusted[composite_model_variant]]
pipeline_composite_model["model_idx"] = -1  # "00-pipeline-composite-model"
# number_digits = len(str(df_adjusted["model_idx"].max()))
# df_adjusted["model_idx"] = df_adjusted["model_idx"].astype(str).str.zfill(number_digits)
df_adjusted = pd.concat([df_adjusted, pipeline_composite_model])

# df_composite = df_adjusted[df_adjusted[composite_model_variant]]
# df_composite

In [None]:
# only composite models
reduced = df_adjusted[df_adjusted["model_idx"] == -1].copy()
reduced

# Create Plot

In [None]:
fig = plot_metric_over_time(
    reduced,
    x="interval_center",
    y="value",
    hue="pipeline_ref",
    style="pipeline_ref",
    width_factor=0.85,
    height_factor=0.75,
    legend_label="TimeTrigger Pipeline",
    small_legend_fonts=True,
    # x_date_locator=mdates.YearLocator(20),
    # x_date_formatter=mdates.DateFormatter("%Y"),  # %b\n
    x_date_locator=FixedLocator([mdates.date2num(pd.Timestamp(f"{y}-01-01")) for y in [1940, 1970, 2000]]),
    x_date_formatter=FixedFormatter([str(year) for year in [1940, 1970, 2000]]),
    y_ticks=[50, 70, 90],
    x_label="Evaluation Year",
    y_label="Accuracy (%)",
    ylim=(45, 103),
    markers=False,
)

save_plot(fig, "simple_yb_composite_models_over_time")