In [None]:
import pickle
import pandas as pd
import seaborn as sns
import os
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator

In [None]:
sns.set_theme(font="Arial", style="whitegrid", rc={"figure.figsize": (5, 4)})

In [None]:
log_folder_name = "rtfm"
folder_path = os.path.join(".", "results", log_folder_name)
pickle_files = [file for file in os.listdir(folder_path) if file.endswith(".pickle")]
pickle_files

In [None]:
with open(
    os.path.join(
        folder_path,
        pickle_files[0],
    ),
    "rb",
) as file:
    data = pickle.load(file)
    percentages = [100 * i / (len(data)) for i in range(len(data))]
f_measure_df = pd.DataFrame({"% processed variants": percentages})

for file_name in pickle_files:
    file_path = os.path.join(folder_path, file_name)
    if len(file_path.split("__")) == 3:
        log_name, strategy, ending = file_path.split("__")
    with open(file_path, "rb") as file:
        data = pickle.load(file)
        nan_values = [None] * (len(percentages) - len(data))
        f_measure_ds = pd.Series(
            nan_values + list(map(lambda iteration: iteration["f-measure"], data)),
            name=strategy,
        )
        f_measure_df = pd.concat([f_measure_df, f_measure_ds], axis=1)

In [None]:
folder_path = os.path.join("..", "ipda", "results", log_folder_name, "random_order")
pickle_files = [file for file in os.listdir(folder_path) if file.endswith(".pickle")]

with open(
    os.path.join(
        folder_path,
        pickle_files[0],
    ),
    "rb",
) as file:
    data = pickle.load(file)
    percentages = [100 * i / (len(data)) for i in range(len(data))]
random_runs_df = pd.DataFrame({"% processed variants": percentages})

for file_name in pickle_files:
    file_path = os.path.join(folder_path, file_name)
    if len(file_path.split("__")) == 4:
        log_name, approach, initial_method, ending = file_path.split("__")
        initial_method = initial_method.split(".")[-1]
    with open(file_path, "rb") as file:
        data = pickle.load(file)
        nan_values = [None] * (len(percentages) - len(data))
        f_measure_ds = pd.Series(
            nan_values + list(map(lambda iteration: iteration["f-measure"], data)),
            name=f"{approach}__{initial_method}__{ending}",
        )
        random_runs_df = pd.concat([random_runs_df, f_measure_ds], axis=1)

In [None]:
legend_mapping = {
    "LHCMDB_0.1": ("#f6a120", "s"),
    "LHCMDB_0.2": ("#d88d05", "v"),
    "LHCMDB_0.4": ("#b27407", "o"),
    "LHCMDB_0.7": ("#8c5c02", "^"),
    "CHLDMB_0.1": ("#0f8112", "s"),
    "CHLDMB_0.2": ("#0e6f0e", "v"),
    "CHLDMB_0.4": ("#0c5c0a", "o"),
    "CHLDMB_0.7": ("#064906", "^"),
    "HCDMLB_0.1": ("#ece232", "s"),
    "HCDMLB_0.2": ("#d9d909", "v"),
    "HCDMLB_0.4": ("#b2b303", "o"),
    "HCDMLB_0.7": ("#8c8c08", "^"),
    "CDLHMB_0.1": ("#d583c9", "s"),
    "CDLHMB_0.2": ("#c459b2", "v"),
    "CDLHMB_0.4": ("#b93ca7", "o"),
    "CDLHMB_0.7": ("#aa21a2", "^"),
    "B_1.0": ("#848484", None),
    "F_1.0": ("#000000", None),
}


def plot_strategies(strategy=None):
    plt.figure()
    shown_strategies = list(legend_mapping.keys())
    if strategy:
        shown_strategies = [
            strat
            for strat in shown_strategies
            if strat.split("_")[0] in [strategy, "B", "F"]
        ]
    print(shown_strategies)
    x_axis = "% processed variants"
    for col in random_runs_df.drop(x_axis, axis=1).columns:
        sns.lineplot(
            random_runs_df,
            x=x_axis,
            y=col,
            dashes=False,
            alpha=0.075,
            color="darkblue",
        )
    average = random_runs_df.drop(x_axis, axis=1).mean(axis=1)
    sns.lineplot(x=percentages, y=average, color="red")

    for col in f_measure_df[shown_strategies].columns:
        color, marker = legend_mapping[col]
        sns.lineplot(
            f_measure_df,
            x=x_axis,
            y=col,
            dashes=False,
            color=color,
            marker=marker,
            markevery=len(f_measure_df) // 15,
            markersize=6,
        )

    plt.xlabel(x_axis)
    plt.ylabel("F-measure")
    plt.xlim(-1, 101)
    plt.ylim(top=1)
    plt.gca().yaxis.set_major_locator(MultipleLocator(0.1))
    plt.savefig(
        f"figures/{log_folder_name}/{log_folder_name}_strategy_f-measure_comparison_{strategy if strategy else 'all'}_variants.png",
        bbox_inches="tight",
    )

In [None]:
plot_strategies(None)
plot_strategies("HCDMLB")
plot_strategies("LHCMDB")
plot_strategies("CDLHMB")
plot_strategies("CHLDMB")