In [None]:
import pickle
import pandas as pd
import seaborn as sns
import os
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator

In [None]:
sns.set_theme(font="Arial", style="whitegrid", rc={"figure.figsize": (5, 4)})

In [None]:
log_folder_name = "hospital_filtered"
folder_path = os.path.join(".", "results", log_folder_name)
pickle_files = [file for file in os.listdir(folder_path) if file.endswith(".pickle")]
pickle_files

In [None]:
with open(
    os.path.join(
        folder_path,
        pickle_files[0],
    ),
    "rb",
) as file:
    data = pickle.load(file)
duration_df = pd.DataFrame(index=range(len(data)))

for file_name in pickle_files:
    file_path = os.path.join(folder_path, file_name)
    if len(file_path.split("__")) == 3:
        log_name, strategy, ending = file_path.split("__")
    with open(file_path, "rb") as file:
        data = pickle.load(file)
        duration_ds = pd.Series(
            list(map(lambda iteration: iteration["duration"], data)),
            name=strategy,
        )
        duration_df = pd.concat([duration_df, duration_ds], axis=1)

In [None]:
duration_df = duration_df.drop(columns="F_1.0")

In [None]:
durations = duration_df.sum()
durations.index = durations.index.str.split("_", expand=True)
durations = durations.reset_index()
durations = durations.rename(
    columns={"level_0": "strategy", "level_1": "filter_rate", 0: "duration"}
)
durations = durations.sort_values(
    by=["strategy", "filter_rate"], ascending=[False, True]
)
durations["duration"] = durations["duration"] / 60
durations["combined"] = (
    durations["strategy"] + "_" + durations["filter_rate"].astype(str)
)
durations

In [None]:
legend_mapping = {
    "LHCMDB_0.1": "#f6a120",
    "LHCMDB_0.2": "#d88d05",
    "LHCMDB_0.4": "#b27407",
    "LHCMDB_0.7": "#8c5c02",
    "CHLDMB_0.1": "#0f8112",
    "CHLDMB_0.2": "#0e6f0e",
    "CHLDMB_0.4": "#0c5c0a",
    "CHLDMB_0.7": "#064906",
    "HCDMLB_0.1": "#ece232",
    "HCDMLB_0.2": "#d9d909",
    "HCDMLB_0.4": "#b2b303",
    "HCDMLB_0.7": "#8c8c08",
    "CDLHMB_0.1": "#d583c9",
    "CDLHMB_0.2": "#c459b2",
    "CDLHMB_0.4": "#b93ca7",
    "CDLHMB_0.7": "#aa21a2",
    "B_1.0": "#848484",
    # "F_1.0": "#000000",
}

In [None]:
bar_width = 0.1
strategy_spacing = 0.1
ticks_fontsize = 9

x_offsets = [0]
groupby = durations.groupby("strategy")
# Group data by strategy and iterate over groups
for strategy, group in groupby:
    # Calculate x positions for bars within the group
    x_positions = [x_offsets[-1] + bar_width * j for j in range(len(group))]
    x_offsets.append(x_positions[-1] + bar_width + strategy_spacing)
    # Plot bars for each filter rate within the group
    for x, (filter_rate, duration) in zip(
        x_positions, group[["filter_rate", "duration"]].values
    ):
        plt.bar(
            x,
            duration,
            width=bar_width,
            color=legend_mapping[f"{strategy}_{filter_rate}"],
            label=strategy,
            align="edge",
        )
        plt.text(
            x + bar_width / 2 + 0.02,
            duration * 0.8,
            f"F-Rate: {filter_rate}",
            ha="center",
            va="top",
            rotation=90,
            fontsize=8,
        )

plt.ylabel("Computation Time (m)")
plt.ylim(1, 10**5)

plt.yscale("log")
plt.yticks(fontsize=ticks_fontsize)

plt.xlabel("Trace Ordering Strategies")
plt.xticks(
    [
        (x + x_offsets[i + 1] - strategy_spacing) / 2
        for i, x in enumerate(x_offsets[:-1])
    ],
    list(map(lambda x: "-".join(x[0]), groupby)),
    fontsize=ticks_fontsize,
)

plt.savefig(
    f"figures/{log_folder_name}/{log_folder_name}_strategy_performance_comparison.png",
    bbox_inches="tight",
)