In [None]:
from glob import glob
import yaml
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path

In [None]:
paths = list((Path.cwd() / ".." / "experiments" / "C4P4").glob("fpt*/bio.solutions.df.gzip"))
dfs = list(map(pd.read_pickle, paths))
df = pd.concat(dfs, ignore_index=True, sort=True)

In [None]:
df = df.sort_values("total_time")

In [None]:
plt.xscale("log")
for (selector, lower_bound), g in df[df["solved"]].groupby(["selector", "lower_bound"]):
    plt.scatter(g["total_time"], range(len(g["time"])))

In [None]:
d = dict()
for (k, g) in df.groupby(["selector", "lower_bound", "search_strategy"]):
    t = pd.Series(g.loc[g["solutions"].apply(lambda x: len(x[0]["edits"]) >= 0 if len(x) != 0 else True), "total_time"])
    #t = pd.Series(g["time"])
    t[t == -1] = 150 * 10**9
    d[k] = t.values / 10**9

fig, ax = plt.subplots(figsize=(8, 6))
ax.set_xscale("log")
ax.grid(True)

for k in d:
    if k[2] == "Fixed": continue
    ax.plot(np.sort(d[k]), range(len(d[k])), label="{0} {1} {2}".format(*k))

ax.axhline(y=len(list(d.values())[0]), c="black")
ax.set_ylim((-50, None))
ax.set_xlim((10**-5, 100))
ax.set_ylabel("Solved")
ax.set_xlabel("Total Time [s]")

fig.legend(loc="upper left")
plt.show()

In [None]:
from collections import Counter

fig, ax = plt.subplots(figsize=(6, 6))
ax.set_yscale("log")
ax.set_xscale("log")
ax.grid(True)
for (k, g) in df.groupby(["selector", "lower_bound", "search_strategy"]):
    if k[2] == "Fixed": continue
    c = Counter(g["k"].apply(len))
    ax.scatter(list(c.keys()), list(c.values()), label=k)

ax.set_ylim((10**-0.5, 10**3))
ax.set_ylabel("Count")
ax.set_xlabel("Number of evaluation steps")
fig.legend()
plt.show()

In [None]:
fig, ax = plt.subplots()
for k, g in df.groupby(["search_strategy"]):
    total_work = g["calls"].apply(len)
    if total_work.max() <= 0: continue
        
    ax.hist(total_work, density=True, bins=range(10), label=str(k), alpha=0.5)
fig.legend()
plt.show()

In [None]:
import yaml
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
from hashlib import sha1

plt.rcParams['axes.axisbelow'] = True


def read_data(ilp_paths, fpt_paths) -> pd.DataFrame:
    ilp_df = pd.concat(map(pd.read_pickle, ilp_paths))
    fpt_df = pd.concat(map(pd.read_pickle, fpt_paths))

    ilp_df["name"] = "Basic"
    ilp_df.loc[ilp_df["single_constraints"], "name"] = "Single"
    ilp_df.loc[ilp_df["sparse_constraints"], "name"] = "Sparse"

    fpt_df["name"] = fpt_df.apply(lambda row: f"{row['selector']} {row['lower_bound']} {row['search_strategy']}", axis=1)

    headers = list(set(ilp_df.columns) & set(fpt_df.columns))

    df = pd.concat([ilp_df[headers], fpt_df[headers]])
    return df


def plot_solved_by_time_curve(df, *, names=None, labels=None, min_number_of_solutions=10):
    if min_number_of_solutions is None:
        min_number_of_solutions = 0
    if labels is None:
        labels = names

    d = dict()
    for name in names:
        g = df.loc[df["name"] == name]
        g = g.loc[g["solutions"].apply(lambda x: len(x[0]["edits"]) >= min_number_of_solutions if len(x) != 0 else True)]
        solved = g["solution_cost"] != -1
        t = pd.Series(g["total_time"])
        t[~solved] = t.max() * 1.5
        d[name] = t.values

    fig, ax = plt.subplots(figsize=(6, 4))
    ax.set_xscale("log")
    ax.grid(True)

    for name, label in zip(names, labels):
        ax.plot(np.sort(d[name]) / 10**9, range(len(d[name])), label=label)

    for y in (0, len(list(d.values())[0])):
        ax.axhline(y=y, c="darkgrey")
    ax.set_ylim((-50, None))
    ax.set_xlim((10**-3, 10**2))
    ax.set_ylabel("Number of solved instances")
    ax.set_xlabel("Total Time [s]")

    fig.legend(loc="upper left", bbox_to_anchor=(0.9, 0.9))
    plt.show()


ilp_paths = list((Path.cwd() / "../experiments/C4P4/").glob("ilp*/*.solutions.df.gzip"))
fpt_paths = list((Path.cwd() / "../experiments/C4P4/").glob("fpt*/*.solutions.df.gzip"))

# df = read_data(ilp_paths, fpt_paths)

plot_solved_by_time_curve(df[df["dataset"] == "bio-C4P4-subset"], names=["Basic", "Single", "Sparse", "MostAdjacentSubgraphs SortedGreedy Exponential"], labels=["ILP", "ILP Single", "ILP Sparse", "FPT"], min_number_of_solutions=0)
plot_solved_by_time_curve(df[df["dataset"] == "bio"], names=["MostAdjacentSubgraphs SortedGreedy Exponential", "MostAdjacentSubgraphs SortedGreedy PrunedDelta", "MostAdjacentSubgraphs SortedGreedy IncrementByMinCost", "MostAdjacentSubgraphs SortedGreedy IncrementByMultiplier"], labels=["Exponential", "PrunedDelta", "IncrementByMinCost", "IncrementByMultiplier"], min_number_of_solutions=10)
plot_solved_by_time_curve(df[df["dataset"] == "bio-C4P4-subset"], names=["MostAdjacentSubgraphs Greedy Exponential", "MostAdjacentSubgraphs LocalSearch Exponential", "MostAdjacentSubgraphs SortedGreedy Exponential", "MostAdjacentSubgraphs Trivial Exponential"], labels=["Greedy", "LocalSearch", "SortedGreedy", "No lower bound"], min_number_of_solutions=0)
plot_solved_by_time_curve(df[df["dataset"] == "bio-C4P4-subset"], names=["MostAdjacentSubgraphs SortedGreedy Exponential", "FirstFound SortedGreedy Exponential", "MostMarkedPairs SortedGreedy Exponential"], labels=["MostAdjacentSubgraphs", "FirstFound", "MostMarkedPairs"], min_number_of_solutions=0)

In [None]:
df["name"].unique()

In [None]:
ilp_paths = list((Path.cwd() / "../experiments/C4P4/").glob("ilp*/bio-C4P4-subset.solutions.df.gzip"))
fpt_paths = list((Path.cwd() / "../experiments/C4P4/").glob("fpt*/bio-C4P4-subset.solutions.df.gzip"))


ilp_df = pd.concat(map(pd.read_pickle, ilp_paths))
fpt_df = pd.concat(map(pd.read_pickle, fpt_paths))


ilp_df["name"] = "Basic"
ilp_df.loc[ilp_df["single_constraints"], "name"] = "Single"
ilp_df.loc[ilp_df["sparse_constraints"], "name"] = "Sparse"

fpt_df["name"] = fpt_df.apply(lambda row: f"{row['selector']} {row['lower_bound']} {row['search_strategy']}", axis=1)

                              
headers = list(set(ilp_df.columns) & set(fpt_df.columns))

df = pd.concat([ilp_df[headers], fpt_df[headers]])

In [None]:
fig, ax = plt.subplots(figsize=(8, 5))
ax.set_yscale("log")
ax.set_xscale("log")
ax.grid(True)

names = []
names += ["Basic", "Single", "Sparse", "MostAdjacentSubgraphs SortedGreedy Exponential"]
#names += ["MostAdjacentSubgraphs SortedGreedy Exponential", "MostAdjacentSubgraphs SortedGreedy PrunedDelta", "MostAdjacentSubgraphs SortedGreedy IncrementByMinCost", "MostAdjacentSubgraphs SortedGreedy IncrementByMultiplier"]
#names += ["MostAdjacentSubgraphs Greedy Exponential", "MostAdjacentSubgraphs LocalSearch Exponential", "MostAdjacentSubgraphs SortedGreedy Exponential", "MostAdjacentSubgraphs Trivial Exponential"]
#names += ["MostAdjacentSubgraphs SortedGreedy Exponential", "FirstFound SortedGreedy Exponential", "MostMarkedPairs SortedGreedy Exponential"]


for name in names:
    g = df[(df["name"] == name) & (df["dataset"] == "bio")]
    n = g["instance"].str.split("-").str[-1].str[:-6].astype(int)
    l = g["solutions"].apply(lambda x: len(x[0]["edits"]) if len(x) > 0 else -1)
    c = g["solution_cost"]
    t = g["total_time"].copy() / 10**9
    t[~g["solved"]] = 10**(3 + np.random.uniform(-0.25, 0.25, size=(~g["solved"]).sum()))

    ax.scatter(n, t, label=name, s=5)

ax.set_ylim((10**-5, 10**3.5))
ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
plt.show()

In [None]:
a = pd.DataFrame(columns=list(fpt_df["lower_bound"].unique()) + ["k"], index=fpt_df["instance"].unique())


for (lb, instance), g in fpt_df.groupby(["lower_bound", "instance"]):
    a.loc[instance, lb] = g["k"].str[0].max()
for instance, g in fpt_df.groupby(["instance"]):
    a.loc[instance, "k"] = g["solution_cost"].max()
a["k_max"] = a.max(axis=1)
a["n"] = a.index.str.split("-").str[-1].str[:-6].astype(int)

In [None]:
fig, ax = plt.subplots()

for lb in ["Greedy", "LocalSearch", "SortedGreedy"]:
    ax.scatter(a.loc[a["k"] >= 0, "n"], a.loc[a["k"] >= 0, lb], label=lb)

ax.legend()
plt.show()

In [None]:
b = fpt_df[(fpt_df["selector"] == "MostAdjacentSubgraphs") & (fpt_df["search_strategy"] == "Exponential")].groupby(["lower_bound", "instance"]).first()

In [None]:
k_1 = b.xs("LocalSearch", level="lower_bound").k.str[0]
k_2 = b.xs("SortedGreedy", level="lower_bound").k.str[0]
t_1 = b.xs("LocalSearch", level="lower_bound").time.str[0]
t_2 = b.xs("SortedGreedy", level="lower_bound").time.str[0]


fig, ax = plt.subplots()
#ax.set_yscale("log")

ax.scatter(t_1 / t_2, k_1 / k_2, s=5)

ax.set_ylim((10**0, 10**0.1))
ax.set_xlim((0, 10))
plt.show()