In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import matplotlib as mpl

mpl.rcParams.update(
    {
        "text.usetex": True,
        "font.family": "serif",
        "font.serif": ["Times"],
        "axes.labelsize": 14,  # Match your ECAI paper font size
        "font.size": 14,
    }
)

In [None]:
dataset_name = "Complex"

dataset = pd.read_csv("../Datasets/" + dataset_name + ".csv")

instances = sorted(dataset["instance"].unique())
print(instances)
all_arm_index_list = dataset["arm_index"].unique()
valid_arm_index_list = [item for item in all_arm_index_list if item >= 0]
number_of_arms = len(valid_arm_index_list)
number_of_trails = len(dataset["repetition"].unique())
max_horizon_time = len(dataset["iteration"].unique())
combined_search_algorithms = dataset[dataset["arm_index"] < 0]["optimizer"].unique()

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import interp1d

for instance in instances:
    print(instance)
    df = dataset[(dataset["instance"] == instance)][
        [
            "instance",
            "arm_name",
            "arm_index",
            "repetition",
            "iteration",
            "loss",
            "eval_time",
        ]
    ]
    filtered_df = df

    filtered_df["incumbent_loss"] = (
        filtered_df.sort_values(["instance", "arm_index", "repetition", "iteration"])
        .groupby(["instance", "arm_index", "repetition"])["loss"]
        .cummin()
    )

    # First, get max iteration per (instance, arm_index)
    filtered_df["max_iteration"] = filtered_df.groupby(
        ["instance", "arm_index", "repetition"]
    )["iteration"].transform("max")

    # Compute iteration ratio
    filtered_df["iteration_ratio"] = (filtered_df["iteration"] + 1) / filtered_df[
        "max_iteration"
    ]
    filtered_df["avg_eval_time"] = filtered_df.groupby(
        ["instance", "arm_index", "iteration"]
    )["eval_time"].transform("mean")

    # Compute total eval time per (instance, arm_index)
    total_eval_time = (
        filtered_df.groupby(["instance", "arm_index", "repetition"])[
            "avg_eval_time"
        ].transform("sum")
        # / number_of_trails
    )
    # print(total_eval_time.min())

    # Scale it by iteration ratio
    filtered_df["cum_eval_time"] = (total_eval_time) * filtered_df["iteration_ratio"]

    filtered_df["performance"] = 1 - filtered_df["incumbent_loss"]

    filtered_df = filtered_df.sort_values(
        ["instance", "arm_index", "repetition", "iteration"]
    )
    filtered_df = filtered_df.reset_index(drop=True)

    arm_time_ranges = (
        df.groupby("arm_name")["cum_eval_time"].agg(["min", "max"]).reset_index()
    )

    # Store interpolated results
    all_data = []

    for (arm, rep), group in filtered_df.groupby(["arm_name", "repetition"]):
        group_sorted = group.sort_values("cum_eval_time")

        arm_min = arm_time_ranges.loc[arm_time_ranges["arm_name"] == arm, "min"].values[
            0
        ]
        arm_max = arm_time_ranges.loc[arm_time_ranges["arm_name"] == arm, "max"].values[
            0
        ]

        # Define common interpolation time grid for this arm
        interp_times = np.linspace(arm_min, arm_max, 100)

        # Sort in case cum_eval_time isn't sorted
        group_sorted = group.sort_values("cum_eval_time")

        # Interpolation function
        interp_func = interp1d(
            group_sorted["cum_eval_time"],
            group_sorted["performance"],
            bounds_error=False,
            fill_value="extrapolate",
        )

        # Interpolate to common times
        interp_perf = interp_func(interp_times)

        # Store results
        temp = pd.DataFrame(
            {
                "cum_eval_time": interp_times,
                "performance": interp_perf,
                "arm_name": arm,
                "repetition": rep,
            }
        )
        all_data.append(temp)

    # Combine
    aligned_df = pd.concat(all_data)

    # Now compute mean & CI
    summary = (
        aligned_df.groupby(["arm_name", "cum_eval_time"])["performance"]
        .agg(["mean", "std", "count"])
        .reset_index()
    )
    summary["ci95"] = 1.96 * summary["std"] / np.sqrt(summary["count"])

    # Plot
    plt.figure(figsize=(3.5, 2.5))
    arm_labels = ["XTab", "FLAML", "RealMLP", "TabForestPFN", "TabPFN(v2)"]
    arm_names = ["xtab", "flaml", "realmlp", "tabforestpfn", "tabpfn_v2_phe"]
    for arm, label in zip(arm_names, arm_labels):
        arm_data = summary[summary["arm_name"] == arm]
        plt.plot(
            arm_data["cum_eval_time"], arm_data["mean"], label=label, linewidth=2.5
        )
        plt.fill_between(
            arm_data["cum_eval_time"],
            arm_data["mean"] - arm_data["ci95"],
            arm_data["mean"] + arm_data["ci95"],
            alpha=0.1,
        )

    # handles, labels = ax.get_legend_handles_labels()
    # new_labels = ["XTab", "FLAML", "RealMLP", "TabForestPFN", "TabPFN(v2)"]
    # ax.legend(handles=handles, labels=new_labels, title="ML model", loc="lower right")
    # ax.get_legend().remove()

    plt.title("OpenML-" + str(instance), fontsize=14)
    plt.xlabel("Wall-clock time")
    # Set x-axis ticks every hour (3600 seconds)
    xticks = np.arange(0, 2 * 3600 + 1, 1 * 3600)
    xtick_labels = [f"{int(t // 3600)}h" for t in xticks]
    plt.grid(False)

    plt.xticks(xticks, xtick_labels)

    plt.xlim(0, 2 * 3600)  # change to your desired range
    plt.ylabel("Performance")
    plt.tight_layout()
    plt.savefig(
        "./figures/" + dataset_name + "_" + str(instance) + "_performance.pdf",
        bbox_inches="tight",
    )