In [None]:
import numpy as np
import matplotlib.pyplot as plt
import subprocess
import yaml
from glob import glob
from pathlib import Path
from itertools import product
import pandas as pd

$$log(calls) = a + b \cdot k$$
$$calls = exp(a) \cdot exp(b \cdot k) = exp(a) \cdot k ^ {exp(b)}$$

$$log(next\_calls) = a + b \cdot next\_k$$
$$next\_k = \frac{log(next\_calls) - a}{b}$$

In [None]:
paths = list(Path().glob("../experiments/C4P4/fpt*/*.df.gzip"))
df = pd.concat(map(pd.read_pickle, paths), ignore_index=True)

In [None]:
fig, ax = plt.subplots()
ax.set_xscale("log")
for s, g in df[(df["selector"] == "MostAdjacentSubgraphs") & (df["lower_bound"] == "SortedGreedy") & (df["dataset"] == "bio")].groupby("search_strategy"):
    t = g["total_time"].copy()
    t[~g["solved"]] = 10**13
    t = t.sort_values()
    y = np.linspace(0, 1, len(t))
    x = t / 10**9
    ax.plot(x, y, label=s)
ax.set_xlim((10**-5, df["timelimit"].max()))
fig.legend()
plt.show()

In [None]:
solved = df[(df["dataset"] == "bio")].groupby("search_strategy")["solved"]
solved.sum() / solved.count()

In [None]:
a = "PrunedDelta"
b = "IncrementByMinCost"
a_df = df[(df["dataset"] == "bio") & (df["search_strategy"] == a)].set_index("instance")
b_df = df[(df["dataset"] == "bio") & (df["search_strategy"] == b)].set_index("instance")

a_df.loc[(a_df["total_time"] == -1) | (a_df["total_time"] > 10**11), "total_time"] = 10**11.5
b_df.loc[(b_df["total_time"] == -1) | (b_df["total_time"] > 10**11), "total_time"] = 10**11.5

j = a_df.join(b_df, lsuffix="_a", rsuffix="_b")[["total_time_a", "total_time_b"]]

fig, ax = plt.subplots(figsize=(6, 6))
ax.grid(True)
ax.set_yscale("log")
ax.set_xscale("log")
ax.scatter(j["total_time_a"] / 10**9, j["total_time_b"] / 10**9, alpha=0.25, s=4)
ax.plot((10**-5, 10**3), (10**-5, 10**3), "k--")
ax.set_xlim((10**-5, 10**3))
ax.set_ylim((10**-5, 10**3))
ax.set_xlabel(a)
ax.set_ylabel(b)
plt.show()

In [None]:

for search_strategy, g in df[df["solved"] & (df["solution_cost"] > 0)].groupby("search_strategy"):
    if search_strategy == "Fixed": continue
    fig, ax = plt.subplots()
    ax.grid(True)
    ax.set_yscale("log")
    last_k = g["k"].str[-1]
    ax.scatter(g["solution_cost"], last_k / g["solution_cost"], label=search_strategy, s=10, alpha=0.2)

    ax.set_ylim((1, 10))
    ax.set_xlim((0, 20000))
    fig.legend()
    plt.show()

# Analysis on how number of calls grow with number of search steps

In [None]:
a = df[(df["search_strategy"] == "IncrementByMultiplier") & (df["solution_cost"] > 0)]

In [None]:
a = df[(df["search_strategy"] == "Exponential") & (df["solution_cost"] > 0)]

In [None]:
a = df[(df["search_strategy"] == "PrunedDelta")]

In [None]:
for search_strategy, a in df[(df["dataset"] == "bio") & (df["lower_bound"] == "SortedGreedy") & (df["selector"] == "MostAdjacentSubgraphs")].groupby("search_strategy"):
    if len(a) == 0 or a["calls"].apply(len).max() <= 1: continue
    fig, (ax1, ax2) = plt.subplots(figsize=(8, 4), nrows=2, sharex=True, gridspec_kw=dict(height_ratios=[4/5, 1/5]))
    ax1.set_yscale("log")
    ax1.grid(True)
    
    xs = a["calls"].apply(lambda x: pd.Series(np.exp(np.diff(np.log(x)))))
    #xs = a["stats"].apply(lambda x: pd.Series(x["calls"]))
    for q in np.linspace(0, 1, 11):
        alpha = 1 - 1.8 * np.abs(q - 0.5)
        c, lw = "C0", 2
        if q == 0.5:
            c, lw = "C1", 4
        y = xs.quantile(q)
        # np.cumprod(y) to look at values and not ratios
        ax1.plot(y, alpha=alpha, c=c, lw=lw, label=f"{q:.1f}")

    ax1.axhline(y=2, ls="--", c="black", label="$2^x$ growth")

    max_step_cutoff = (xs.count() / len(xs) < 0.01).idxmax()
    if max_step_cutoff == 0:
        max_step_cutoff = xs.shape[1] - 1
    print(max_step_cutoff)
    for ax in (ax1, ax2):
        ax.set_xlim((0, max_step_cutoff))
    
    ax2.grid(True)
    ax2.plot(100 * xs.count() / len(xs), c="black", alpha=0.8)

    ax1.set_ylim((1, max([2 * xs.quantile(0.99).mean(), 2.5])))
    ax1.set_ylabel("Ratio")
    ax2.set_xlabel("Search step")
    ax2.set_ylabel("Graphs [%]")
    #ax1.set_title(search_strategy + "\n", fontsize=12)
    legend = fig.legend(loc="center left", bbox_to_anchor=(1, 0.5))
    fig.tight_layout()
    plt.savefig(f"growth-ratio-{search_strategy}-step_max-{xs.shape[1]}-median-{xs.median()[:max_step_cutoff].mean():.3}.pdf", bbox_extra_artists=(legend,), bbox_inches="tight")
    plt.show()

In [None]:
fig, ax = plt.subplots()
ax.set_xscale("log")
#ax.set_xlim((10, 10**6))

for search_strategy, a in df[(df["solutions"].apply(lambda x: len(x[0]["edits"]) if len(x) > 0 else 0) >= 10) & (df["dataset"] == "bio") & (df["lower_bound"] == "SortedGreedy") & (df["selector"] == "MostAdjacentSubgraphs")].groupby("search_strategy"):
    if search_strategy == "Fixed": continue
    total_work = a["calls"].apply(sum)
    total_work[~a["solved"]] = total_work.max()
    total_work = total_work.sort_values()
    ax.plot(total_work.values, range(len(total_work)), label=search_strategy)

ax.legend()
plt.show()