In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yaml
from pathlib import Path
from collections import defaultdict
from pandas.api.types import CategoricalDtype


In [None]:
EXPERIMENTS_PATH = Path.home() / "ba" / "experiments"
benchmarks_paths = list((EXPERIMENTS_PATH / "C4P4").glob("lb.*/*.benchmarks.yaml"))

In [None]:
benchmarks_paths

In [None]:
DEFAULT_CATEGORY = lambda: "category"
CATEGORIES = defaultdict(DEFAULT_CATEGORY,
    forbidden_subgraphs=CategoricalDtype([
     "P3", "P4", "P5", "P6", "C4P4", "C5P5", "C6P6", ", C4_C5_2K2", "C4_C5_P5_Bowtie_Necktie"]),
    lower_bound_algorithm=CategoricalDtype([
     "Trivial", "Greedy", "SortedGreedy", "LocalSearch", "LPRelaxation", "NPS_MWIS_Solver",
     "LSSWZ_MWIS_Solver", "fpt-editing-LocalSearch", "GreedyWeightedPacking"]),
    dataset=CategoricalDtype([
     "barabasi-albert", "bio", "bio-C4P4-subset", "bio-subset-A", "duplication-divergence",
     "misc", "powerlaw-cluster", "bio-subset-B", "bio-unweighted"])
    )

def load_raw_df(paths):
    docs = []
    for path in paths:
        with path.open() as file:
            docs += list(yaml.safe_load_all(file))
    return pd.DataFrame(docs)

def load_data_unweighted_fpt_editing(paths):
    df = load_raw_df(paths)
    df[["dataset", "instance"]] = df["instance"].str.split("/", expand=True)[[1, 2]]
    df["lower_bound_algorithm"] = "fpt-editing-LocalSearch"
    return df

def load_data_weighted_fpt_editing(paths):
    df = load_raw_df(paths)
    df["value"] = df["values"].str[0]
    df.rename(columns={"lower_bound_name": "lower_bound_algorithm"}, inplace=True)

    df[["dataset", "instance"]] = df["instance"].str.split("/", expand=True)[[1, 2]]

    return df

def load_data(paths):
    columns = ["forbidden_subgraphs", "dataset", "instance", "lower_bound_algorithm", "value"]
    df1 = load_data_weighted_fpt_editing([p for p in paths if "fpt-editing" not in p.parent.name])
    df2 = load_data_unweighted_fpt_editing([p for p in paths if "fpt-editing" in p.parent.name])
    
    df1 = df1[columns]
    df2 = df2[columns]
    
    df = pd.concat([df1, df2], ignore_index=True)
    
    df = df.astype({k: CATEGORIES[k] for k in
                    ["forbidden_subgraphs", "lower_bound_algorithm", "dataset"]})
    df.loc[df["value"] < 0, "value"] = np.nan
    
    
    m = df["lower_bound_algorithm"] == "fpt-editing-LocalSearch"
    df.loc[m, "value"] = df.loc[m, "value"] / 100
    return df

df = load_data(benchmarks_paths)
df.head()

In [None]:
for lb, df_lb in df.groupby(["lower_bound_algorithm", "dataset"]):
    print(lb, len(df_lb))

In [None]:
# df = df[df["dataset"] == "bio"]

In [None]:
def plot_line_scatter(x, y, xlabel, ylabel, path=None):
    fig, ax = plt.subplots(figsize=(6, 6))
    ax.set_aspect("equal")
    ax.scatter(x, y, alpha=0.2)
    ax.plot([0, 5e5], [0, 5e5])
    ax.set_yscale("log"); ax.set_xscale("log")
    ax.set_ylim([1e-1, 5e5]); ax.set_xlim([1e-1, 5e5])
    ax.set_ylabel(ylabel); ax.set_xlabel(xlabel)
    
    if path is not None:
        plt.savefig(path)
    plt.show()

In [None]:
def plot_ratio_scatter(x, y, xlabel, ylabel):

    ratio = x / y
    ratio[x == y] = 1

    fig, ax = plt.subplots(figsize=(6, 4))
    ax.scatter(x, ratio, alpha=0.2)
    ax.set_xscale("log")
    ax.set_xlim((1e0, 5e5))
    ax.set_xlabel(xlabel); ax.set_ylabel(f"{xlabel} / {ylabel}")
    plt.show()

In [None]:
def plot_ratio(x, y, xlabel, ylabel, path=None):
    ratio = x / y
    ratio[x == y] = 1

    print("-" * 10)
    print(f"path: {path}")
    print(f"{((x==0) & (y==0)).sum()} or {100*((x==0) & (y==0)).mean():.4}% where x = y = 0")
    print(f"{(ratio == 1).sum()} / {ratio.shape[0]} or {100*(ratio == 1).mean():.4}% where ratio = 1")
    print(f"{ratio.isnull().sum()} / {ratio.shape[0]} where ratio = NaN")

    # TODO: print quantiles
    q = np.array([0, 0.05, 0.1, 0.5, 0.9, 0.95, 1])
    x = np.quantile(ratio[~ratio.isnull()], q)
    # print(f"{x}")
    for q_i, x_i in zip(q, x):
        print(f"{100*q_i:>6.2f}% {ylabel} / {xlabel} > {100 / x_i:>7.2f}%")
    
    q_line = " & ".join([f"{q_i:.2f}\\%" for q_i in q])
    x_line = " & ".join([f"{100 / x_i:.2f}\\%" for x_i in x])
    print(f"""\\begin{{table}}[h]
	\\begin{{tabular}}{{lllllll}}
		{q_line} \\\\ \\hline
		{x_line}
	\\end{{tabular}}
\\end{{table}}""")
    
    fig, ax = plt.subplots(figsize=(6, 4))
    ax.hist(ratio[ratio != 1], bins=np.linspace(min([0, ratio.min()]), max([0, ratio.max()]), 31))
    ax.set_xlabel(f"{xlabel} / {ylabel}"); ax.set_ylabel("count")
    
    if path is not None:
        plt.savefig(path)
    plt.show()

In [None]:
def draw_plots(df, dataset=""):
    a = df[(df["lower_bound_algorithm"] == "SortedGreedy")].reset_index()
    b = df[(df["lower_bound_algorithm"] == "LPRelaxation")].reset_index()
    c = df[(df["lower_bound_algorithm"] == "NPS_MWIS_Solver")].reset_index()
    d = df[(df["lower_bound_algorithm"] == "LocalSearch")].reset_index()
    e = df[(df["lower_bound_algorithm"] == "fpt-editing-LocalSearch")].reset_index()
    b.loc[b["value"] < 0, "value"] = np.nan

    # plot_line_scatter(a["value"], b["value"], "SortedGreedy", "LPRelaxation")

    # plot_ratio_scatter(a["value"], b["value"], "SortedGreedy", "LPRelaxation")
    # plot_ratio_scatter(a["value"], c["value"], "SortedGreedy", "NPS_MWIS_Solver")

#    plot_ratio(a["value"], b["value"], "SortedGreedy", "LPRelaxation",
#               path=f"ratio-histogram-SortedGreedy-LPRelaxation-{dataset}.pdf")
#    plot_ratio(a["value"], c["value"], "SortedGreedy", "NPS_MWIS_Solver",
#               path=f"ratio-histogram-SortedGreedy-NPS_MWIS_Solver-{dataset}.pdf")
#    plot_ratio(c["value"], b["value"], "NPS_MWIS_Solver", "LPRelaxation",
#               path=f"ratio-histogram-NPS_MWIS_Solver-LPRelaxation-{dataset}.pdf")
    
    plot_ratio(d["value"], b["value"], "LocalSearch", "LPRelaxation",
               path=f"ratio-histogram-LocalSearch-LPRelaxation-{dataset}.pdf")
    plot_ratio(a["value"], d["value"], "SortedGreedy", "LocalSearch",
               path=f"ratio-histogram-SortedGreedy-LocalSearch-{dataset}.pdf")
    #if len(e) > 0:
    #    plot_ratio(e["value"], b["value"], "fpt-editing-LocalSearch", "LPRelaxation")
    #    plot_ratio(d["value"], e["value"], "LocalSearch", "fpt-editing-LocalSearch")


#draw_plots(df[df["dataset"] == "bio"], dataset="bio")
#draw_plots(df[df["dataset"] == "bio-unweighted"], dataset="bio-unweighted")

In [None]:
X_unweighted = [(g[0], df.reset_index()["value"]) for (g, df) in df.groupby(["lower_bound_algorithm", "dataset"]) if g[1] == "bio-unweighted"]

In [None]:
X_weighted = [(g[0], df.reset_index()["value"]) for (g, df) in df.groupby(["lower_bound_algorithm", "dataset"]) if g[1] == "bio"]

In [None]:
def plot_matrix_histogram(X, ignore_zero_lb=False, ignore_equality=False, xmin=0, xmax=None, path=None):
    n = len(X)
    fig, axes = plt.subplots(nrows=n, ncols=n, figsize=(2*n, 2*n), sharex=True, sharey=True)

    for i, (lb_i, x_i) in enumerate(X):
        axes[i, 0].set_ylabel(lb_i)
        axes[-1, i].set_xlabel(lb_i)

        for j, (lb_j, x_j) in enumerate(X):
            if i != j:
                r = x_i / x_j

                if not ignore_zero_lb:
                    r[(x_i == 0) & (x_j == 0)] == 1
                if ignore_equality:
                    r[r == 1] = np.nan
                
                if xmax is None:
                    xmax = r.max()

                axes[i, j].axvline(1, c="k", ls="--", alpha=0.5)
                axes[i, j].hist(r, bins=np.linspace(xmin, xmax, 25))
                #axes[i, j].set_title(" ".join([
                #    f"{100*x:.2f}%" for x in np.quantile(r[~np.isnan(r)], [0.05, 0.5, 0.95])]), fontdict=dict(fontsize=10))

    fig.tight_layout()
    if path is not None:
        plt.savefig(path)
    plt.show()

plot_matrix_histogram(X_unweighted, xmax=2, path="lb-ratio-bio-unweighted.pdf")
plot_matrix_histogram(X_weighted,   xmax=5, path="lb-ratio-bio.pdf")
plot_matrix_histogram(X_unweighted, xmax=2, ignore_equality=True, ignore_zero_lb=True, path="lb-ratio-bio-unweighted-filtered.pdf")
plot_matrix_histogram(X_weighted,   xmax=5, ignore_equality=True, ignore_zero_lb=True, path="lb-ratio-bio-filtered.pdf")

In [None]:
def plot_matrix_scatter(X, ignore_zero_lb=False, ignore_equality=False, xmin=0, xmax=None, path=None):
    n = len(X)
    fig, axes = plt.subplots(nrows=n, ncols=n, figsize=(2*n, 2*n))
    
    for ax in axes.flatten():
        ax.set_aspect("equal")

    for i, (lb_i, x_i) in enumerate(X):
        axes[i, 0].set_ylabel(lb_i)
        axes[-1, i].set_xlabel(lb_i)

        for j, (lb_j, x_j) in enumerate(X):
            if i != j:
                m = ~np.isnan(x_i) & ~np.isnan(x_j)
                l, u = min([x_i[m].min(), x_j[m].min()]), max([x_i[m].max(), x_j[m].max()])
                axes[i, j].plot([l, u], [l, u], c="k", ls="--", alpha=0.5)
                axes[i, j].scatter(x_i, x_j)
                #axes[i, j].set_title(" ".join([
                #    f"{100*x:.2f}%" for x in np.quantile(r[~np.isnan(r)], [0.05, 0.5, 0.95])]), fontdict=dict(fontsize=10))

    fig.tight_layout()
    if path is not None:
        plt.savefig(path)
    plt.show()

plot_matrix_scatter(X_weighted)

In [None]:


plt.scatter()

In [None]:
X_weighted[1]