In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
from typing import List

plt.rcParams['axes.axisbelow'] = True

In [None]:
def read_data(ilp_paths, fpt_paths) -> pd.DataFrame:
    ilp_df = pd.concat(map(pd.read_pickle, ilp_paths))
    fpt_df = pd.concat(map(pd.read_pickle, fpt_paths))

    ilp_df["name"] = "Basic"
    ilp_df.loc[ilp_df["single_constraints"], "name"] = "Single"
    ilp_df.loc[ilp_df["sparse_constraints"], "name"] = "Sparse"

    fpt_df["name"] = fpt_df.apply(lambda row: f"{row['selector']} {row['lower_bound']} {row['search_strategy']}", axis=1)

    fpt_df["total_calls"] = fpt_df["calls"].apply(sum)
    ilp_df["total_calls"] = np.nan
                                  
    fpt_df["last_time"] = fpt_df["time"].str[-1].astype(float) / 10**9
    fpt_df["last_k"] = fpt_df["k"].str[-1].astype(float)
    fpt_df["last_calls"] = fpt_df["calls"].str[-1].astype(float)
    ilp_df["last_time"] = np.nan
    ilp_df["last_k"] = np.nan
    ilp_df["last_calls"] = np.nan

                                  
    headers = list(set(ilp_df.columns) & set(fpt_df.columns))

    df = pd.concat([ilp_df[headers], fpt_df[headers]])

    df["total_time"] = df["total_time"] / 10**9
    df.loc[df["total_time"] < 0, "total_time"] = np.nan
    df["solution_size"] = df.solutions.apply(lambda x: len(x[0]["edits"]) if len(x) > 0 else np.nan)
    df.loc[df["total_time"] > 100, "solved"] = False
    return df

In [None]:
def plot_solved_by_time_curve(df, output_path: Path, *, names : List[str] = None, labels : List[str] = None,
                              min_number_of_solutions: int = None, y: str = "time"):
    if min_number_of_solutions is None:
        min_number_of_solutions = 0
    if names is None:
        names = list(df["name"].unique())
    if labels is None:
        labels = names
    y_label = dict(total_time="Total Time [s]",
                   total_calls="Total Calls",
                   last_time="Time of last search step [s]",
                   last_calls="Number of calls of last search step",
                   solution_cost="Solution cost",
                   solution_size="Solution size")[y]

    d = dict()
    for name in names:
        g = df.loc[df["name"] == name]
        g = g.loc[g["solutions"].apply(lambda x: len(x[0]["edits"]) >= min_number_of_solutions if len(x) != 0 else True)]
        solved = g["solved"]
        t = pd.Series(g[y]) # .astype(float)
        t[~solved] = np.nan  # t.max() * 1.5
        d[name] = t.values

    fig, ax = plt.subplots(figsize=(8, 4))
    ax.set_xscale("log")
    ax.grid(True)

    for name, label in zip(names, labels):
        ax.plot(np.sort(d[name]), range(len(d[name])), label=label)

    for y_max in (0, len(list(d.values())[0])):
        ax.axhline(y=y_max, c="darkgrey")
    ax.set_ylim((-50, None))

    if "time" in y:
        ax.set_xlim((10**-3, 10**2))
    ax.set_ylabel("Number of solved instances")
    ax.set_xlabel(y_label)

    ax.legend(loc="upper left")
    # fig.legend(loc="upper left", bbox_to_anchor=(0.9, 0.9))
    plt.show()

In [None]:
ilp_paths = list((Path.cwd() / "../experiments/C4P4/").glob("ilp*/*.solutions.df.gzip"))
fpt_paths = list((Path.cwd() / "../experiments/C4P4/").glob("fpt*/*.solutions.df.gzip"))

df = read_data(ilp_paths, fpt_paths)

subset_df = df[df["dataset"] == "bio-C4P4-subset"]
bio_df = df[df["dataset"] == "bio"]

In [None]:
for y in ["solution_cost", "solution_size", "total_time", "total_calls", "last_time", "last_calls"]:
    plot_solved_by_time_curve(bio_df, Path(f"solved-curve-ilp-vs-fpt-bio-{y}.pdf"),
                              names=["Sparse", "MostAdjacentSubgraphs SortedGreedy Fixed"],
                              labels=["ILP Sparse", "FPT, known $k^*$"], min_number_of_solutions=10, y=y)
    plot_solved_by_time_curve(bio_df, Path(f"solved-curve-ilp-vs-fpt-{y}.pdf"),
                              names=["Basic", "Single", "Sparse", "MostAdjacentSubgraphs SortedGreedy Exponential", "MostAdjacentSubgraphs SortedGreedy Fixed"],
                              labels=["ILP", "ILP Single", "ILP Sparse", "FPT, estimated exponential growth", "FPT, known $k^*$"], min_number_of_solutions=10, y=y)
    plot_solved_by_time_curve(bio_df, Path(f"solved-curve-search-strategies-{y}.pdf"),
                              names=["MostAdjacentSubgraphs SortedGreedy Exponential", "MostAdjacentSubgraphs SortedGreedy PrunedDelta", "MostAdjacentSubgraphs SortedGreedy IncrementByMinCost", "MostAdjacentSubgraphs SortedGreedy IncrementByMultiplier", "MostAdjacentSubgraphs SortedGreedy Fixed"],
                              labels=["Exponential growth estimation", "Prune preventention", "Increment by minimum cost", "Increment by 1", "Known $k^*$"], min_number_of_solutions=10, y=y)
    plot_solved_by_time_curve(subset_df, Path(f"solved-curve-lower-bounds-exponential-{y}.pdf"),
                              names=["MostAdjacentSubgraphs Greedy Exponential", "MostAdjacentSubgraphs LocalSearch Exponential", "MostAdjacentSubgraphs SortedGreedy Exponential", "MostAdjacentSubgraphs Trivial Exponential"],
                              labels=["Simple packing", "Local search", "Greedy lower bound", "No lower bound"], min_number_of_solutions=10, y=y)
    plot_solved_by_time_curve(subset_df, Path(f"solved-curve-selectors-exponential-{y}.pdf"),
                              names=["MostAdjacentSubgraphs SortedGreedy Exponential", "FirstFound SortedGreedy Exponential", "MostMarkedPairs SortedGreedy Exponential"],
                              labels=["Most adjacent subgraphs", "First subgraph found", "Most marked vertex pairs"], min_number_of_solutions=10, y=y)


In [None]:

for x in ["n", "solution_size", "solution_cost"]:
    fig, ax = plt.subplots(figsize=(8, 3))
    #ax.set_yscale("log")
    #ax.set_xlim((0, 400))

    for name in ["Sparse", "MostAdjacentSubgraphs SortedGreedy Fixed"]:
        a = bio_df[(bio_df["name"] == name) & (bio_df["solution_size"] >= 10)]

        ax.hist(a.loc[a["solved"], x], alpha=1, bins=50)
        #ax.scatter(a["n"], a["total_time"], s=10, alpha=0.25)

    plt.show()

In [None]:
for a_name, a_label, b_name, b_label in [
    ("Basic", "ILP", "MostAdjacentSubgraphs SortedGreedy Exponential", "FPT"),
    ("Sparse", "ILP Sparse", "MostAdjacentSubgraphs SortedGreedy Exponential", "FPT")]:
    fig, ax = plt.subplots(figsize=(4.2, 4))
    ax.set_aspect("equal")
    ax.set_xlim((10**-5, 10**3))
    ax.set_ylim((10**-5, 10**3))
    ax.set_xscale("log")
    ax.set_yscale("log")
    ax.grid(True)

    ax.set_xlabel(f"{a_label} Total Time [s]")
    ax.set_ylabel(f"{b_label} Total Time [s]")

    a = subset_df[subset_df["name"] == a_name].copy().set_index("instance")
    b = subset_df[subset_df["name"] == b_name].copy().set_index("instance")
    b = b.loc[a.index,:]
    a.loc[(~a["solved"]) | (a["total_time"] > 10**2), "total_time"] = 10**2.5
    b.loc[(~b["solved"]) | (b["total_time"] > 10**2), "total_time"] = 10**2.5
    ax.scatter(a["total_time"], b["total_time"], s=15, c="C0", alpha=0.5)

    x = np.logspace(-5, 3, 10)
    ax.plot(x, x, "k")

    fig.tight_layout()
    plt.savefig(f"solved-scatter-{a_name.replace(' ', '-')}-vs-{b_name.replace(' ', '-')}.pdf")
    plt.show()

In [None]:
subset_df.loc[(subset_df["name"] == "Sparse") & (subset_df["total_time"] <= 100), "solved"].sum()

In [None]:
df["name"].unique()

In [None]:
names = ["MostAdjacentSubgraphs SortedGreedy Exponential", "MostAdjacentSubgraphs SortedGreedy PrunedDelta", "MostAdjacentSubgraphs SortedGreedy IncrementByMinCost", "MostAdjacentSubgraphs SortedGreedy IncrementByMultiplier", "MostAdjacentSubgraphs SortedGreedy Fixed"]

fig, ax = plt.subplots()
#ax.set_yscale("log")
ax.grid(True)
#ax.set_ylim((10**-4, 10**5))
for name in names:
    a = df[(df["name"] == name) & df["solved"]]
    ax.scatter(a["last_k"], a["last_k"] / a["solution_cost"], label=name)
ax.legend()
plt.show()