In [None]:
import numpy as np
import seaborn as sns
import pandas as pd
from pathlib import Path
import json
import matplotlib.pyplot as plt

In [None]:
sns.set_theme(context="paper", style="white")

In [None]:
!python3 scripts/combine_data.py

In [None]:
!mkdir -p figures

In [None]:
df = pd.read_csv("./data/experiments.csv")
df["n+m"] = df["n"] + df["m"]
df = df[df["n+m"] != 0]
df["log(n+m)"] = np.log(df["n+m"])
df["time/m"] = df["time"] / df["m"]
df["time/n"] = df["time"] / df["n"]
df["time/(n+m)"] = df["time"] / df["n+m"]
for variable in ("time", "time/m", "time/n", "time/(n+m)"):
    for unit, factor in zip(("s", "ms", "μs", "ns"), (10**0, 10**3, 10**6, 10**9)):
        df[f"{variable} [{unit}]"] = factor * df[variable]
df = df.sort_values(["n+m", "name"])
def map_algo(algo):
    return {"kar19-rust": "fracture",
            "ms00": "skeleton",
            "miz23-rust": "linear",
            "miz23-cpp": "linear (ref)",
            "linear-ref": "linear (ref)",
           }.get(algo, algo)
df["algo"] = df["algo"].apply(map_algo)
df["algo"] = pd.Categorical(df["algo"], categories=["linear (ref)", "linear", "skeleton","fracture"], ordered=True)

def map_dataset(dataset):
    return {"cograph-uni-deg": "cograph"}.get(dataset, dataset)
df["dataset"] = df["dataset"].apply(map_dataset)

In [None]:
data = df.copy()
data = data.drop_duplicates("name")
data.groupby("dataset")["time/(n+m) [ns]"].agg(["count", "mean"])

In [None]:
data = df.copy()
data.groupby(["dataset", "algo"], observed=True)["time/(n+m) [ns]"].mean()

In [None]:
grouped_dfs = list(df.groupby("algo", observed=False))

In [None]:
for scale in ('log', 'linear'):
    fig, axes = plt.subplots(ncols=3, figsize=(9,3.5), sharey=True)
    names = [a for a, b in grouped_dfs]
    x = grouped_dfs[3][1]["time/(n+m) [ns]"].copy()
    c = (grouped_dfs[0][1]["log(n+m)"]).copy()
    # xm = t.max() * 0.11
    xm = 1.6 * 10**3
    ym = xm * 10
    if scale == 'log':
        ym = xm = xm * 10
    l = np.linspace(0, xm, 10000)
    for (i, ax) in zip((0, 1, 2), axes):
        if scale == 'log':
            ax.set(aspect='equal')
        ax.set(xlabel=f"{names[3]} $t/(n+m)$ [ns]", title=names[i])
        ax.grid(True)
        ax.set_axisbelow(True)
        ax.set(xscale=scale, yscale=scale)
        y = grouped_dfs[i][1]["time/(n+m) [ns]"].copy()
        x[x > xm] = xm * 0.99
        y[y > ym] = ym * 0.99
        ax.set(xlim=(0.5, xm), ylim=(0.5, ym))
        ax.scatter(x, y, c=c, s=3, label="instance", rasterized=True)
        ax.plot(l, l, c='k', lw=1, label=r"$y = x$")
        ax.plot(l, 2*l, c='k', lw=1, ls='-.', label=r"$y = 2 x$")
        ax.plot(l, 10*l, c='k', lw=1, ls='--', label=r"$y = 10 x$")
    axes[0].set(ylabel="algo $t/(n+m)$ [ns]")
    axes[0].legend(loc='upper left')
    fig.tight_layout(w_pad=1.12)
    plt.savefig(f"figures/scatter-all-normalized-{scale}-log.pdf", dpi=300)
    plt.show()

In [None]:
for xscale in ('log', 'linear'):
    fig, axes = plt.subplots(ncols=3, figsize=(9,3.5), sharey=True)
    names = [a for a, b in grouped_dfs]
    x = grouped_dfs[3][1]["time [s]"].copy()
    c = (grouped_dfs[0][1]["log(n+m)"]).copy()
    l = np.linspace(0, xm, 10000)
    for (i, ax) in zip((0, 1, 2), axes):
        ax.set(xlabel=f"{names[3]} [s]")
        ax.grid(True)
        ax.set_axisbelow(True)
        #if scale == "linear":
        #    ax.set(ylim=(0, 10))
        ax.set(xscale=xscale)
        ax.set(yscale="log")
        ax.set(title=names[i])
        #ax.set(ylim=(-1, 15))
        y = grouped_dfs[i][1]["time [s]"].copy()
        ax.scatter(x, y.values / x.values, c=c, s=3, rasterized=True)
        ax.axhline(1, ls="-", c="k", lw=1, label=r"$y = x$")
        ax.axhline(3, ls="-.", c="k", lw=1, label=r"$y = 3x$")
        ax.axhline(10, ls="--", c="k", lw=1, label=r"$y = 10x$")
    axes[0].set(ylabel=f"algo / {names[3]}")
    axes[-1].legend()
    fig.tight_layout(w_pad=1.12)
    plt.savefig(f"figures/scatter-all-relative-{xscale}-log.png", dpi=300)
    plt.show()

## Overview, dataset scatter

In [None]:
data = df.copy()
data = data[(data["dataset"].isin(("real", "pace2023-exact", "pace2023-heuristic", "cograph", "gnm", "girg", "girg-deg-scaling")))]
data = data.sort_values("dataset", ascending=False)

palette = [c for i, c in enumerate(sns.color_palette("tab20c", 8)) if i != 3]
grid = sns.FacetGrid(data, col="algo", hue="dataset", palette=palette, col_wrap=2, margin_titles=True, height=2.5, legend_out=True)
grid.map(sns.scatterplot, "n+m", "time/(n+m) [μs]", s=4, edgecolor=None, rasterized=True)
grid.set(xscale="log", ylim=(-0.1, 2.6))
grid.set_titles("{col_name}")
grid.add_legend(title="Dataset", markerscale=3, fontsize="xx-small")
for ax in grid.axes:
    ax.grid(True)
grid.tight_layout()
sns.despine(left=True, bottom=True)
#plt.savefig("figures/all-scatter.pdf", dpi=300)
plt.show()

In [None]:
data["out_of_figure"] = data["time/(n+m) [μs]"] > 2.5
data.groupby(["algo"], observed=False)["out_of_figure"].agg(["sum", "mean", "count"])

# Overview, dataset boxenplot

In [None]:
datasets_ = [
    ["real", "pace2023\nexact", "pace2023\nheuristic", "cograph", "gnm", "girg", "girg\ndeg-scaling"],
    ["empty", "path", "cycle", "complete"]
]
for yscale, width, i, datasets in zip(("linear", "log"), (6, 4), (0, 1), datasets_):
    data = df.copy()
    data.loc[data["dataset"] == "pace2023-exact", "dataset"] = "pace2023\nexact"
    data.loc[data["dataset"] == "pace2023-heuristic", "dataset"] = "pace2023\nheuristic"
    data.loc[data["dataset"] == "girg-deg-scaling", "dataset"] = "girg\ndeg-scaling"
    data = data[data["dataset"].isin(datasets)]
    data["dataset"] = pd.Categorical(data["dataset"], categories=datasets, ordered=True)
    
    data = data.groupby(["name", "dataset", "algo"], observed=False)[["time"]].mean()
    best = data.groupby(["name", "dataset"], observed=False)["time"].min().rename("best")
    data = pd.merge(data, best, left_on=["name", "dataset"], right_index=True, how="left")
    data["time/best"] = data["time"] / data["best"]
    data = data.reset_index()
    
    fig, ax = plt.subplots(figsize=(width, 2.5))
    ax.set(yscale=yscale)
    if yscale == "linear":
        ax.set(ylim=(0, 15))
    ax.tick_params(axis='x', which='major', labelsize=8)
    ax.grid(True)
    ax.set_axisbelow(True)
    ax.axhline(1, ls="--", c="k", lw=1, alpha=0.2)
    sns.boxenplot(x="dataset", y="time/best", hue="algo", data=data, ax=ax)
    ax.set(xlabel=None)
    sns.despine(left=True, trim=True)
    ax.legend(bbox_to_anchor=(1, 1), loc="upper left", title="Algorithm", frameon=False)
    fig.tight_layout()
    plt.savefig(f"figures/datasets-{i}.pdf", dpi=300)
    plt.show()

In [None]:
data = df.copy()
data = data[data["dataset"].isin(["real", "pace2023-exact", "pace2023-heuristic"])]
data = data.groupby(["name", "dataset", "algo"], observed=False)[["time"]].mean()
best = data.groupby(["name", "dataset"], observed=False)["time"].min().rename("best")
data = pd.merge(data, best, left_on=["name", "dataset"], right_index=True, how="left")
data["time/best"] = data["time"] / data["best"]
data.groupby(["dataset", "algo"], observed=False)["time/best"].mean()

# Cograph scaling

In [None]:
data = df.copy()
data = data[data["dataset"] == "cograph"]

data["inner nodes"] = data["num_inner"]
data["cograph_params"] = data["name"].str.split('_').str[1].str.split('-')
data["a"] = data["cograph_params"].str[1].str[2:].astype(int)
data["b"] = data["cograph_params"].str[2].str[2:].astype(int)
data["r"] = data["cograph_params"].str[3].str[2:]

data["p"] = data["cograph_params"].str[1:4].str.join(' ')

a = 2
b = 8
data = data[(data["a"] == a) & (data["b"] == b)]

fig, axes = plt.subplots(figsize=(6, 3.5), nrows=2, ncols=4, sharey=True, sharex=True)
for axes_row, r in zip(axes, ("series", "parallel")):
    for ax, (algo, grouped_data) in zip(axes_row, data[data["r"] == r].groupby("algo", observed=True)):
        ax.set(xscale="log")
        ax.set(ylim=(0.0, 400))
        ax.set(title=algo)
        ax.grid(True)
        ax.set_axisbelow(True)
        sns.scatterplot(x="n+m", y="time/(n+m) [ns]", hue="n", data=grouped_data, edgecolor=None, palette="viridis", s=6, ax=ax, rasterized=True)
        ax.set(ylabel=f"{r}\ntime/(n+m) [ns]")
for ax in axes[1]:
    ax.set(title=None)
for ax in axes.flatten():
    ax.legend([],[], frameon=False)
axes[0][-1].legend(bbox_to_anchor=(1, 1), loc="upper left", markerscale=2, frameon=False, title="n")
sns.despine(left=True)
fig.tight_layout()
plt.savefig("figures/cographs-scaling.pdf", dpi=300)
plt.show()

# Girg deg scaling

In [None]:
data = df.copy()
data = data[data["dataset"] == "girg-deg-scaling"]

data["m_bucket"] = 0
data["n+m_bucket"] = 0
for m in range(150000, 600000+1, 50000):
    data.loc[(data["m"] - m).abs() < 10000, "m_bucket"] = m
    data.loc[(data["m"] - m).abs() < 10000, "n+m_bucket"] = 50000 + m

fig, axes = plt.subplots(figsize=(6, 2.5), ncols=2)
axes[0].set(ylim=(0, 9))
axes[1].set(ylim=(0, 1.25))
for ax, (n, grouped_data) in zip(axes, data.groupby("n")):
    ax.grid(True)
    ax.tick_params(axis='x', which='major', bottom=True, labelsize=6)
    sns.scatterplot(x="n+m", y="time/(n+m) [μs]", hue="algo", style="algo", data=data, ax=ax, s=10, rasterized=True)
    sns.lineplot(x="n+m_bucket", y="time/(n+m) [μs]", errorbar=("pi", 50), hue="algo", style="algo", data=data, ax=ax)#, s=10, rasterized=True)
    ax.legend([], [], frameon=False)
axes[1].set(ylabel=None)
ax.legend(bbox_to_anchor=(1.0, 1.0), frameon=False, markerscale=2, title="Algorithm")
sns.despine(left=True, trim=True)
fig.tight_layout()
plt.savefig("figures/girg-deg-scaling.pdf", dpi=300)
plt.show()

In [None]:
data = df.copy()
data = data[data["dataset"] == "girg"]


qs = np.quantile(data["n+m"].values, np.linspace(0, 1, 21))
data["n+m_bucket"] = 0.0
for i in range(qs.shape[0] - 1):
    data.loc[(qs[i] <= data["n+m"]) & (data["n+m"] <= qs[i+1]), "n+m_bucket"] = qs[i] #(qs[i+1] + qs[i]) / 2

fig, axes = plt.subplots(figsize=(6, 2.5), ncols=2)
axes[0].set(ylim=(0, 6))
axes[1].set(ylim=(0, 2))
for ax, (n, grouped_data) in zip(axes, data.groupby("n")):
    ax.grid(True)
    ax.tick_params(axis='x', which='major', bottom=True, labelsize=6)
    sns.scatterplot(x="n+m", y="time/(n+m) [μs]", hue="algo", style="algo", data=data, ax=ax, s=10, rasterized=True)
    sns.lineplot(x="n+m_bucket", y="time/(n+m) [μs]", errorbar=("pi", 50), hue="algo", style="algo", data=data, ax=ax)#, s=10, rasterized=True)
    ax.legend([], [], frameon=False)
axes[1].set(ylabel=None)
ax.legend(bbox_to_anchor=(1.0, 1.0), frameon=False, markerscale=2, title="Algorithm")
sns.despine(left=True, trim=True)
fig.tight_layout()
plt.show()

## $G(n, m)$ scaling for fixed n
$G(n, m), n=c, O(n+m\log n) = O(m)$

In [None]:
data = df[(df["dataset"].isin(("gnm",))) & (df["m"] > 0)]
fig, axes = plt.subplots(figsize=(6, 3), ncols=2, sharey=True)
for ax, (n, grouped_data) in zip(axes, data.groupby("n")):
    l = grouped_data.loc[(grouped_data["num_prime"] == 1) & (grouped_data["num_parallel"] == 0) & (grouped_data["num_series"] == 0), "m"].min()
    min_m = grouped_data["m"].min()
    max_m = grouped_data["m"].max()
    low = f"2^{{{int(np.log2(min_m))}}}" if min_m > 0 else "0"
    ax.set(title=f"$n=2^{{{int(np.log2(n))}}}$, $m = {low} \dots 2^{{{int(np.log2(max_m))}}}$")
    ax.grid(True)
    ax.set(ylim=(0, 3))
    ax.tick_params(axis='x', which='major', bottom=True, labelsize=10)
    sns.scatterplot(x="m", y="time/(n+m) [μs]", hue="algo", data=grouped_data, ax=ax, rasterized=True, style="algo")#s=5, edgecolor=None)
    ax.axvline(n, ls="-", c="k", alpha=0.5, zorder=0)
    ax.axvline(l, ls="--", c="k", alpha=0.5, zorder=0)
axes[0].legend([], [], frameon=False)
axes[-1].legend(bbox_to_anchor=(1, 1), loc="upper left", frameon=False, title="Algorithm")
sns.despine(left=True, trim=True)
fig.tight_layout()
fig.subplots_adjust(hspace=0.05, wspace=0.1)
plt.savefig("figures/gnm-scaling-m.pdf", dpi=300)
plt.show()

## $G(n, m)$ scaling for fixed multiple number of edges

$G(n, m), m=8n, O(n + m\log n) = O(n \log n)$

In [None]:
data  = df[df["dataset"] == "gnm-m=8n"].copy()

fig, axes = plt.subplots(figsize=(6, 3), nrows=2, ncols=2, height_ratios=[0.5, 4], sharey="row", sharex="col")

y = "time/(n+m) [μs]"
for ax_col in axes.T:
    sns.scatterplot(x="n+m", y=y, hue="algo", data=data[data[y] > 3.25], ax=ax_col[0], rasterized=True, style="algo")#s=5, edgecolor=None)
    sns.scatterplot(x="n+m", y=y, hue="algo", data=data[data[y] <= 3.25], ax=ax_col[1], rasterized=True, style="algo")#s=5, edgecolor=None)
    #sns.lineplot(x="n+m", y=y, hue="algo", data=data, ax=ax_col[0], rasterized=True, style="algo")
    #sns.lineplot(x="n+m", y=y, hue="algo", data=data, ax=ax_col[1], rasterized=True, style="algo")

d = 0.75
kwargs = dict(marker=[(-1, -d), (1, d)], markersize=12,
              linestyle="none", color='k', mec='k', mew=1, clip_on=False)

for ax in axes[0]:
    ax.set_ylim(3.25, 26)
    ax.set(ylabel=None)
    ax.spines.bottom.set(alpha=0.5, ls="--")
    ax.xaxis.tick_top()
    ax.tick_params(labeltop=False)
    ax.plot([0, 1], [0, 0], transform=ax.transAxes, **kwargs)

for ax in axes[1]:
    ax.set_ylim(0, 3.25)
    ax.spines.top.set(alpha=0.5, ls="--")
    ax.xaxis.tick_bottom()
    ax.plot([0, 1], [1, 1], transform=ax.transAxes, **kwargs)


for ax in axes.flatten():
    ax.set(xlim=(10**5, 10**7))
    ax.grid(True)
    ax.set_axisbelow(True)
    ax.legend([], [], frameon=False)

axes[1][1].set(xscale="log")
axes[0][1].legend(bbox_to_anchor=(1, 1), loc="upper left", frameon=False, title="Algorithm")

fig.tight_layout()
fig.subplots_adjust(hspace=0.05, wspace=0.15)
plt.savefig("figures/gnm-scaling-n-m.pdf", dpi=300)
plt.show()

# Simple graphs

In [None]:
fig, axes = plt.subplots(figsize=(6, 3), ncols=3, sharey=True)
for ax, dataset, title in zip(axes, ("empty", "path", "cycle"), ("Empty graphs $E_n$", "Path graphs $P_n$", "Cycle graphs $C_n$")):
    data = df[df["dataset"] == dataset]
    ax.set(title=title)
    ax.set(ylim=(0, 3.5), axisbelow=True)
    ax.tick_params(axis='x', which='major', bottom=True, labelsize=8)
    ax.grid(True)
    sns.scatterplot(x="n+m", y="time/(n+m) [μs]", hue="algo", edgecolor=None, s=2, data=data, ax=ax, rasterized=True)
    ax.legend([],[], frameon=False)
axes[1].legend(bbox_to_anchor=(1.0, 1.0), markerscale=4, title="Algorithm")
sns.despine(left=True)
fig.tight_layout()
plt.savefig("figures/empty-path-cycle.pdf", dpi=300)
plt.show()

# Anaylsis of $G(n, m)$ model

In [None]:
import networkx as nx
from tqdm.contrib.concurrent import process_map
from itertools import product
from subprocess import run
from tempfile import NamedTemporaryFile, TemporaryDirectory

import sys  
sys.path.insert(1, '../scripts')
import analyze

def write_metis_to_file(f, graph: nx.Graph):
    graph = nx.convert_node_labels_to_integers(graph)
    f.write(f"{graph.number_of_nodes()} {graph.number_of_edges()}\n".encode())
    for u in graph.nodes:
        f.write((" ".join(str(v+1) for v in graph[u]) + "\n").encode())
    f.flush()
    f.seek(0)

def modular_decomposition(graph: nx.Graph):
    with NamedTemporaryFile() as input_file, TemporaryDirectory() as dir:
        write_metis_to_file(input_file, graph)
        output = Path(dir) / "out"
        cmd = ["../target/release/md", "--input-type", "metis", "--input", input_file.name, "--algo", "fracture", "--output", output]
        out = run(cmd, capture_output=True)
        out.check_returncode()
        return analyze.analyze_tree(output, only_header=False, timeout=10)

def generate_data(params):
    n, m, seed = params
    m = int(m)
    graph = nx.gnm_random_graph(n, m, seed=seed)
    res = modular_decomposition(graph)
    return [dict(n=n, m=m, seed=seed, kind=kind, num=num) for kind, num in zip(("prime", "series", "parallel"), map(int, res.split(",")[5:8]))]

In [None]:
if False:
    params = [(n, m, seed) for n, repeats in [(2**10, 40), (2**16, 10)] for m, seed in product(np.linspace(0, 3 * 2**10, 81), range(repeats))]
    rows = process_map(generate_data, params, chunksize=16)
else:
    rows = []

In [None]:
data = pd.DataFrame([row for group in rows for row in group])
data = data[data["n"].isin([2**10, 2**16])]
data["m/n"] = data["m"] / data["n"]
data["num/n"] = data["num"] / data["n"]

fig, ax = plt.subplots(figsize=(6, 2.5))
sns.lineplot(x="m/n", y="num/n", hue="kind", style="n", rasterized=True, data=data, ax=ax)
ax.legend(bbox_to_anchor=(1, 1), loc="upper left", frameon=False)
sns.despine()
plt.savefig("figures/gnm-module-distribution.pdf", dpi=300)
plt.show()