In [None]:
import subprocess
import yaml
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

In [None]:
meta_df = pd.DataFrame(yaml.safe_load(open("../data/bio/bio.metadata.yaml")))
meta_df = pd.concat([meta_df.drop(["costs", "graph"], axis=1), meta_df["graph"].apply(pd.Series)], axis=1)
meta_df = meta_df.drop(["connected_components"], axis=1)

In [None]:
paths = list(Path("../experiments").glob("finder-benchmark.finder=*/bio.benchmarks.df.gzip"))
df = pd.concat(map(pd.read_pickle, paths), ignore_index=True)

In [None]:
df.loc[df["forbidden_subgraphs"].isnull(), "forbidden_subgraphs"] = "C5P5"

In [None]:
df = df.join(meta_df.rename(columns={"name": "instance"}).set_index("instance"), on="instance")

In [None]:
styles = list(set(["default"] + plt.style.available) - set(["dark_background", "seaborn-poster", "classic", "Solarize_Light2", "_classic_test", "fivethirtyeight", "tableau-colorblind10"]))
for style in ["default"]:
    print(style)
    plt.style.use(style)
    for (type, fsg), fsg_df in df[(df["number_of_vertices"] < 80)].groupby(["finder_benchmark_type", "forbidden_subgraphs"]):
        if len(fsg_df) == 0: continue
        fig, (ax1, ax2) = plt.subplots(ncols=2, sharey=True, figsize=(8, 4))

        ax1.grid(True)
        ax2.grid(True)
        ax2.ticklabel_format(axis="x", scilimits=(3, 5))
        
        s = 10
        colors = [f"C{i}" for i in range(10)]

        for color, (finder, group_df) in zip(colors, [x for x in fsg_df.groupby("finder") if len(x[1]) != 0]):
                
            x1 = group_df["number_of_vertices"]
            # x1 = group_df["complexity"]
            # x1 = group_df["number_of_edges"]
            x2 = group_df["count"]
            y = group_df["time_mean"]
            yerr = group_df["time_std"]

            ax1.scatter(x1, y, label=finder, s=s, c=color, alpha=0.5)
            ax2.scatter(x2, y, label=finder, s=s, c=color, alpha=0.5)
            
            
            if type == "find_all_subgraphs":
                for ax, x, d in zip([ax1, ax2], [x1, x2], [(int(fsg[-1]),), (1,)]):
                    p_, *_ = np.linalg.lstsq(np.vstack([x[~np.isnan(y)]**i for i in d]).T, y[~np.isnan(y)], rcond=None)
                    p = lambda x: np.vstack([x**i for i in d]).T @ p_
                    
                    x_ = np.linspace(x.min(), 2 * x.max(), 20)

                    ax.plot(x_, p(x_), "k--", alpha=0.25, c=color)


        ax1.set_xlabel("Number of vertices")
        ax2.set_xlabel("Number of forbidden subgraphs")
        ax1.set_ylabel("Time [s]")
        
        for ax in (ax1, ax2):
            y = fsg_df["time_mean"]
            eps = y.max() / 20
            ax.set_ylim((0, y.max() + eps))
        for ax, col in zip((ax1, ax2), ("number_of_vertices", "count")):
            x = fsg_df[col]
            eps = (x.max() - x.min()) / 20
            ax.set_xlim((x.min() - eps, x.max() + eps))
        
        ax1.legend(loc="upper left", frameon=True)
        fig.tight_layout()
        plt.show()

In [None]:
for fsg, fsg_df in df.groupby("forbidden_subgraphs"):
    if len(fsg_df) == 0: continue
    fig, ax = plt.subplots(figsize=(6, 6))
    ax.grid(True)
    
    ax.set_yscale("log")
    ax.set_xscale("log")
    
    
    group_df = fsg_df[fsg_df["finder_benchmark_type"] == "find_all_subgraphs"]

    n = group_df["number_of_vertices"]
    m = group_df["number_of_edges"]
    x = n
    y_pred = dict(C4P4=m**2, P3=n*m, C5P5=m**2*n)[fsg]
    y = group_df["count"]
    
    ax.loglog(y_pred, y, ".", c="C0")
    
    ax.set_xlabel("Upper bound on number of paths")
    ax.set_ylabel("Actual number of forbidden subgraphs")

    fig.tight_layout()
    plt.show()

In [None]:
meta_df.loc[meta_df["number_of_vertices"] > 1000]