In [None]:
import yaml
from glob import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
paths = glob("/home/jonas/experiments/finder-benchmark.finder=*/all.benchmark.yaml")

In [None]:
# paths = glob("../experiments/*/finder-benchmark.finder=*/bio/*")

In [None]:
def convert_to_df(paths, meta_path):
    all_docs = []
    for path in paths:
        with open(path) as file:
            docs = yaml.safe_load_all(file)
            all_docs += list(docs)
    
    df = pd.DataFrame(all_docs)
    
    NS_IN_S = 10**9

    df["name"] = df["instance"].apply(lambda x: x["name"].split("/")[-1])
    df["permutation"] = df["instance"].apply(lambda x: x["permutation"]).astype(int)

    df["time_mean"] = df["time"].apply(lambda x: np.nan if len(x) == 0 else np.mean(x[1:])).astype(float) / NS_IN_S
    df["time_std"] = df["time"].apply(lambda x: np.nan if len(x) == 0 else np.std(x[1:])).astype(float) / NS_IN_S
    
    df["finder"] = df["finder"].astype("category")
    df["type"] = df["type"].astype("category")
    df["forbidden_subgraphs"] = df["forbidden_subgraphs"].astype("category")

    df.drop(["time", "instance", "commit_hash"], axis=1, inplace=True)
    
    
    with open(meta_path) as meta:
        meta_df = pd.DataFrame(yaml.safe_load(meta)).set_index("name")
        
    meta_df = pd.concat([meta_df, meta_df["graph"].apply(pd.Series)], axis=1)
    meta_df.drop(["costs", "graph", "connected_components"], axis=1, inplace=True)
    
    df = df.join(meta_df, on="name")
    
    return df

In [None]:
df = convert_to_df(paths, "../data/bio/bio.metadata.yaml")

In [None]:
fmts = list(".1234+x_") + [4, 5]

for (fsg, type), fsg_type_group in df.groupby(["forbidden_subgraphs", "type"]):
        fig, ax = plt.subplots(figsize=(6, 4))
        #ax.set_yscale("log")
        for fmt, (finder, finder_group) in zip(fmts, fsg_type_group.groupby("finder")):
            if len(finder_group) == 0: continue
            #x = finder_group["count"]
            x = finder_group["number_of_vertices"]
            y = finder_group["time_mean"]
            #y = finder_group["count"]
            yerr = finder_group["time_std"]
            ax.scatter(x, y, label=finder, marker=fmt)
        ax.set_title(type)
        ax.set_xlabel("count")
        ax.set_ylabel("Mean Time [s]")
        ax.set_ylim((0, None))
        ax.grid(True)
        fig.suptitle(fsg)
        fig.legend()
        plt.show()

In [None]:
df.to_pickle("../experiments/finder-benchmark.df")