In [None]:
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [None]:
paths = list((Path() / ".." / "experiments" / "C4P4").glob("fpt*/bio.*.df.gzip"))

In [None]:
df = pd.concat(map(pd.read_pickle, paths))

In [None]:
ilp_df = pd.concat(map(pd.read_pickle, (Path() / ".." / "experiments" / "C4P4").glob("ilp*/bio.*.df.gzip")))

In [None]:
df = df[df["search_strategy"] == "Fixed"].copy()
df["initial_k"] = df["k"].str[0]
df["initial_time"] = df["time"].str[0]
df["n"] = df["name"].str[:-6].str.split("-").str[-1].astype(int)
df["total_calls"] = df["calls"].apply(sum)
df["solution_size"] = df["solutions"].apply(lambda x: x[0]["edits"] if len(x) > 0 else []).apply(len)

df.loc[(df["search_strategy"] == "Fixed") & (df["total_time"] != -1) & (df["solution_cost"] == -1), "solved"] = True
df.loc[(df["search_strategy"] == "Fixed") & (df["total_time"] != -1) & (df["solution_cost"] == -1), "solution_cost"] = np.nan

df.loc[(~df["solved"]), "solution_cost"] = np.nan

ilp_df["solution_size"] = ilp_df["solutions"].apply(lambda x: x[0]["edits"] if len(x) > 0 else []).apply(len)

In [None]:
fig, ax = plt.subplots()

for lb, g in df.groupby("lower_bound"):
    ax.scatter(g["n"], g["total_calls"], label=lb, s=10)
ax.legend()
ax.set_xlim((0, 200))
plt.show()

In [None]:
b = df[(df["lower_bound"] == "SortedGreedy") & (df["selector"] == "MostAdjacentSubgraphs")]
c = df[(df["lower_bound"] == "LocalSearch") & (df["selector"] == "MostAdjacentSubgraphs")]


fig, ax = plt.subplots()
ax.set_yscale("log")
#ax.set_xscale("log")

#ax.set_aspect("equal")
ax.scatter(b["n"], b["total_time"] / c["total_time"])
ax.set_xlim((3, 100))
ax.set_ylim((10**-2, 10**1))
plt.show()

In [None]:
a = df[(df["lower_bound"] == "SortedGreedy") & (df["selector"] == "MostAdjacentSubgraphs")]

fig, ax = plt.subplots()

ax.hist(a["n"], bins=np.linspace(0, 100, 20))
ax.hist(a.loc[~a["solved"], "n"], bins=np.linspace(0, 100, 20))
plt.show()

In [None]:
a = df[(df["lower_bound"] == "SortedGreedy") & (df["selector"] == "MostAdjacentSubgraphs")]

fig, ax = plt.subplots()

ax.scatter(a["n"], a["solution_size"])


In [None]:
a = df[(df["search_strategy"] == "Fixed") & (df["lower_bound"] == "SortedGreedy") & (df["selector"] == "MostAdjacentSubgraphs")].copy()
print(len(a))

fig, axes = plt.subplots(nrows=2, sharex=True, figsize=(8, 5), gridspec_kw=dict(height_ratios=[4, 1]))
axes[0].grid(True)

axes[0].set_ylim((-20, 250))

axes[1].set_xscale("log")
axes[1].set_xlim((10**-2, 10**3))

axes[0].scatter(a.loc[a["solved"], "solution_cost"] / 100, a.loc[a["solved"], "solution_size"], s=5)


bins = np.logspace(-2, 3, 50)
#axes[1].hist(a.loc[a["solved"], "solution_cost"] / 100, bins=bins, alpha=0.5, label="Solved, $k > 0$")
axes[1].hist(a.loc[a["solved"] & (a["solution_size"] < 10), "solution_cost"] / 100, bins=bins, alpha=0.5, label="Solved, $k > 0, |L| < 10$")
axes[1].hist(a.loc[a["solved"] & (a["solution_size"] >= 10), "solution_cost"] / 100, bins=bins, alpha=0.5, label="Solved, $k > 0, |L| \geq 10$")


axes[0].set_ylabel("Number of edits in optimal solution")
axes[1].set_xlabel("Cost of optimal solution")
axes[1].set_ylabel("Number of graphs")
axes[1].legend(loc="center left", frameon=False)

fig.tight_layout()
plt.savefig("cost-vs-size_C4P4-Fixed-SortedGreedy-MostAdjacentSubgraphs-bio.pdf")
plt.show()

In [None]:
a = df[(df["search_strategy"] == "Fixed") & (df["lower_bound"] == "SortedGreedy") & (df["selector"] == "MostAdjacentSubgraphs")].copy()
print(len(a))

fig, axes = plt.subplots(nrows=2, ncols=2, sharex=True, sharey="row", figsize=(10, 5), gridspec_kw=dict(height_ratios=[4, 1]))
axes[0][0].grid(True)

axes[0][0].set_yscale("log")
axes[0][0].set_ylim((10**-5, 10**2.5))

axes[1][0].set_xscale("log")
axes[1][0].set_xlim((10**-2, 10**3.5))

#axes[0].scatter(a.loc[a["solved"], "solution_cost"] / 100, a.loc[a["solved"], "total_time"] / 10**9, s=5)
axes[0][0].scatter(a.loc[a["solved"] & (a["solution_size"] < 10), "solution_cost"] / 100, a.loc[a["solved"] & (a["solution_size"] < 10), "total_time"] / 10**9, s=5, label="Solved, $k > 0, |L| < 10$")
axes[0][0].scatter(a.loc[a["solved"] & (a["solution_size"] >= 10), "solution_cost"] / 100, a.loc[a["solved"] & (a["solution_size"] >= 10), "total_time"] / 10**9, s=5, label="Solved, $k > 0, |L| \geq 10$")

bins = np.logspace(-2, 3.5, 50)
#axes[1][0].hist(a.loc[a["solved"], "solution_cost"] / 100, bins=bins, alpha=0.5, label="Solved, $k > 0$")
axes[1][0].hist(a.loc[a["solved"] & (a["solution_size"] < 10), "solution_cost"] / 100, bins=bins, alpha=0.5, label="Solved, $k > 0, |L| < 10$")
axes[1][0].hist(a.loc[a["solved"] & (a["solution_size"] >= 10), "solution_cost"] / 100, bins=bins, alpha=0.5, label="Solved, $k > 0, |L| \geq 10$")
#axes[1][0].hist([10**3] * (~a['solved']).sum(), bins=bins, alpha=0.5, label="Unsolved")
print(f"Solved: {a['solved'].sum()}, Unsolved: {(~a['solved']).sum()}")

axes[0][0].set_ylabel("Time [s]")
axes[1][0].set_xlabel("Cost of optimal solution")
axes[1][0].set_ylabel("Number of graphs")
axes[1][0].legend(loc="center left", frameon=False)

      
b = ilp_df[(ilp_df["sparse_constraints"])].copy()
print(len(b))

axes[0][1].grid(True)

axes[0][1].set_yscale("log")
axes[0][1].set_ylim((10**-5, 10**2.5))

axes[1][1].set_xscale("log")
axes[1][1].set_xlim((10**-2, 10**3.5))

axes[0][1].scatter(b.loc[b["solved"] & (b["solution_size"] < 10), "solution_cost"] / 100, b.loc[b["solved"] & (b["solution_size"] < 10), "total_time"] / 10**9, s=5, label="Solved, $k > 0, |L| < 10$")
axes[0][1].scatter(b.loc[b["solved"] & (b["solution_size"] >= 10), "solution_cost"] / 100, b.loc[b["solved"] & (b["solution_size"] >= 10), "total_time"] / 10**9, s=5, label="Solved, $k > 0, |L| \geq 10$")

bins = np.logspace(-2, 3.5, 50)
axes[1][1].hist(b.loc[b["solved"] & (b["solution_size"] < 10), "solution_cost"] / 100, bins=bins, alpha=0.5, label="Solved, $k > 0, |L| < 10$")
axes[1][1].hist(b.loc[b["solved"] & (b["solution_size"] >= 10), "solution_cost"] / 100, bins=bins, alpha=0.5, label="Solved, $k > 0, |L| \geq 10$")
print(f"Solved: {b['solved'].sum()}, Unsolved: {(~b['solved']).sum()}")

axes[1][1].set_xlabel("Cost of optimal solution")
axes[1][1].legend(loc="center left", frameon=False)

fig.tight_layout()
plt.savefig("cost-vs-time_C4P4-FPT-vs-ILP-bio.pdf")
plt.show()

In [None]:
a = df[(df["search_strategy"] == "Fixed") & (df["lower_bound"] == "SortedGreedy") & (df["selector"] == "MostAdjacentSubgraphs")].copy()
print(len(a))

fig, axes = plt.subplots(nrows=2, sharex=True, figsize=(8, 5), gridspec_kw=dict(height_ratios=[4, 1]))
axes[0].grid(True)

axes[0].set_ylim((-20, 250))

axes[1].set_xscale("log")
axes[1].set_xlim((2, 500))

axes[0].scatter(a.loc[a["solved"], "n"], a.loc[a["solved"], "solution_size"], s=5)
#x = np.logspace(0, 3, 100)
#axes[0].plot(x, 3 * x, "k--", alpha=0.5, label="$3 \cdot n$")

bins = np.max([np.logspace(0, 3, 50), np.linspace(1, 50, 50)], axis=0)
bins = np.logspace(0, 3, 30)

axes[1].hist(a.loc[a["solved"] & (a["solution_size"] == 0), "n"], bins=bins, alpha=0.5, label="Solved, $k = 0$")
axes[1].hist(a.loc[a["solved"] & (a["solution_size"] != 0), "n"], bins=bins, alpha=0.5, label="Solved, $k > 0$")
axes[1].hist(a.loc[~a["solved"], "n"], bins=bins, alpha=0.5, label="Unsolved")

axes[0].set_ylabel("Number of edits in optimal solution")
axes[1].set_xlabel("Number of vertices")
axes[1].set_ylabel("Number of graphs")
#axes[0].legend()
axes[1].legend(loc="center right", frameon=False)

fig.tight_layout()
plt.savefig("n-vs-size_C4P4-Fixed-SortedGreedy-MostAdjacentSubgraphs-bio.pdf")
plt.show()

In [None]:
(a["solved"] & (a["solution_size"] > 0)).sum(), (~a["solved"]).sum()

In [None]:
a = df[(df["search_strategy"] == "Fixed") & (df["lower_bound"] == "SortedGreedy") & (df["selector"] == "MostAdjacentSubgraphs")].copy()
print(len(a))

fig, axes = plt.subplots(nrows=2, sharex=True, figsize=(8, 5), gridspec_kw=dict(height_ratios=[4, 1]))
axes[0].grid(True)

#axes[0].set_yscale("log")
axes[0].set_ylim((-50, 10**3))

axes[1].set_xscale("log")
axes[1].set_xlim((2, 500))

x = np.logspace(0, 3, 100)
axes[0].scatter(a.loc[a["solved"], "n"], a.loc[a["solved"], "solution_cost"] / 100, s=5)

bins = np.logspace(0, 3, 30)
axes[1].hist(a.loc[a["solved"] & (a["solution_size"] == 0), "n"], bins=bins, alpha=0.5, label="Solved, $k = 0$")
axes[1].hist(a.loc[a["solved"] & (a["solution_size"] > 0), "n"], bins=bins, alpha=0.5, label="Solved, $k > 0$")
axes[1].hist(a.loc[~a["solved"], "n"], bins=bins, alpha=0.5, label="Unsolved")

axes[0].set_ylabel("Cost of optimal solution")
axes[1].set_xlabel("Number of vertices")
axes[1].set_ylabel("Number of graphs")
axes[1].legend(loc="center right", frameon=False)

fig.tight_layout()
plt.savefig("n-vs-cost_C4P4-Fixed-SortedGreedy-MostAdjacentSubgraphs-bio.pdf")
plt.show()

In [None]:
b = df[(df["search_strategy"] == "Fixed") & (df["selector"] == "MostAdjacentSubgraphs")].copy()

n_max = 100
bins = np.linspace(0, n_max, 20)

fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(10, 4), sharey="row", sharex=True, gridspec_kw=dict(height_ratios=[1, 9]))

axes[1][0].set_ylabel("Time [s]")
axes[0][0].set_ylabel("Graphs")
for ax in axes[1]:
    ax.grid(True)
    ax.set_yscale("log")
    ax.set_xlim((0, n_max))
    ax.set_ylim((10**-5, 10**2.5))
    ax.set_xlabel("Number of vertices")
    ax.axhline(y=10**2, ls="--", c="k", alpha=0.8)

for ax in axes[0]:
    ax.grid(True)


for ax, lower_bound, label in zip(axes[1], ["Trivial", "SortedGreedy", "LocalSearch"], ["No lower bound", "Greedy", "Local search"]):
    a = b[b["lower_bound"] == lower_bound]
    print(len(a))
    
    n = a.loc[a["solved"], "n"].values
    t = a.loc[a["solved"], "total_time"].values / 10**9
    ind = np.digitize(n, bins)

    ax.scatter(n, t, s=5, c="k", alpha=0.25)
    sns.lineplot(bins[ind[n < n_max]].astype(int), t[n < n_max], ax=ax, label=label)

for ax, lower_bound in zip(axes[0], ["Trivial", "SortedGreedy", "LocalSearch"]):
    a = b[b["lower_bound"] == lower_bound]
    n2 = a.loc[~a["solved"], "n"].values
    ax.hist(n2, bins, color="C1", alpha=0.5)


fig.tight_layout()
plt.savefig("n-vs-time_C4P4-Fixed-MostAdjacentSubgraphs-bio.pdf")
plt.show()

In [None]:
df[(df["solution_size"] >= 10) & (df["total_time"] <= 100 * 10**9)].groupby(["lower_bound", "selector"]).sum()["solved"]

In [None]:
plt.imshow([[331, 482, 337], [564, 648, 677], [534, 623, 658]])