# PP-plot builder

In [None]:
import os

import numpy as np
from numpy import ndarray

from matplotlib import pyplot as plt

The pp-curve drawing procedure. We compare distibutions using the pp-curve, which is analogous to the ROC curve: pp compares two independent distributions, while ROC compares the true-, false- positive distributions

In [None]:
from toybnb.scip.ecole.il.plotting import pp_curve

Shifted geometric mean for $\varepsilon > 0$ (used in Gasse et al. 2019):
    $$
    \operatorname{sgm}(x)
        = \exp\bigl\{
            \frac1m \sum_j \log \max\{1, x_j + \varepsilon\}
        \bigr\} - \varepsilon
    \,. $$

In [None]:
def geommean(arr: ndarray, axis: int = None, *, eps: float = 1.0) -> ndarray:
    r"""Shifted geometric mean of `arr` for `$\varepsilon > 0$ (`eps) as
    used in Gasse et al. 2019:
    $$
    \operatorname{sgm}(x)
        = \exp\bigl\{
            \frac1m \sum_j \log \max\{1, x_j + \varepsilon\}
        \bigr\} - \varepsilon
    \,. $$
    """

    x = np.log(np.maximum(arr + eps, 1.0))
    return np.exp(np.mean(x, axis=axis)) - eps

We keep experiment data in CSV form

In [None]:
import csv
from typing import Iterable


def read_csv(filename: str) -> Iterable[dict]:
    """Read the csv as an iterable of dicts"""
    # use stdlib's csv package to read the comma-separated molecular data
    with open(filename, "rt", newline="") as f:
        it = csv.reader(f, delimiter=",")

        # get the header then represent rows a dicts
        header = next(it)
        for row in it:
            yield dict(zip(header, row))

A simple proc to collate a list of identically structured dicts into a dict of lists.

In [None]:
def collate(records: list[dict]) -> dict[..., ndarray]:
    """Collate records assuming no fields are missing"""
    out = {}
    for rec in records:
        for field, value in rec.items():
            out.setdefault(field, []).append(value)

    return {k: np.array(v) for k, v in out.items()}

The pp-plots are a comprehensive method to decide, which methods produce a superior number-of-nodes distribution. But in order to track the progress during training we use point-statistics, which are less comprehensive and do not show the bigger picture.

In [None]:
def get_stats(metrics: dict, series: str) -> dict[str, dict[str, float]]:
    tmp, out = {}, {}
    for (k1, k2), v in metrics.items():
        tmp.setdefault(k1, []).append(
            dict(
                tot=len(v[series]),
                median=np.median(v[series]),
                mean=np.mean(v[series]),
                std=np.std(v[series]),
                sgm=geommean(v[series], eps=1.0),
            )
        )

    # collate and average across replications
    for k, v in tmp.items():
        for met, val in collate(v).items():
            dt = out.setdefault(k, {})
            dt[met] = np.mean(val)
    return out

Load the stats from the csv in the format of Scavuzzo et al. 2022:
- `policy` -- the identifier of the branching policy
- `seed` -- the opaque id of the replication (fixed randomness of SCIP)
- `type` -- the kind of evaluation: `test` and `transfer` -- in theiur original code, `custom` -- in our patches
- `instance` -- the path to the instance used for evaluation
- `nnodes` -- the total number of nodes after solving
- `nlps` -- the number of LP solver iterarions
- `stime` -- the solution time as measured by SCIP using CPU seconds (`clocktype=1`)
- `gap` -- the primal-dual gap achieved at the end of the bnb search
- `status` -- SCIPs reported solution status
- `walltime` -- the wall time of the solution process as measured by python
- `proctime` -- the cpu time of the solution process as measured by python

In [None]:
def load_metrics_scavuzzo(
    filename: str, *, keep: str = ("test", "custom")
) -> dict[tuple, ndarray]:
    """Load the metrics data in Scavuzzo et al. 2022 format"""
    keep = keep if isinstance(keep, tuple) else (keep,)

    metrics = {}
    # make sure NOT to pool values from different seeds,
    #  as they are independent runs on the same instance
    for rec in read_csv(filename):
        if rec["type"] not in keep:
            continue

        key = rec["policy"], int(rec["seed"])
        metrics.setdefault(key, []).append(
            {
                "n_nodes": int(rec["nnodes"]),
                "n_lps": int(rec["nlps"]),
                "f_soltime": float(rec["stime"]),
                "f_gap": float(rec["gap"]),
            }
        )

    return {k: collate(v) for k, v in metrics.items()}

Load the evaluation metrics data folder in rlbnb format

In [None]:
def cast_rlbnb(rec: dict) -> dict:
    """Standardize the record given in rlbnb format"""
    return dict(
        # ignore the '' index
        # lp_iterations=int(lp_iterations),
        n_nodes=int(float(rec["num_nodes"])),
        f_soltime=float(rec["solving_time"]),
    )


def rename_rlbnb(filename: str) -> str:
    if filename.startswith(("bipartite", "tripartite", "masked")) and "_" in filename:
        name, _, co = filename.partition("_")
        return name

    if filename == "strong":
        return "internal:vanillafullstrong"

    return filename


def load_metrics_rlbnb(path: str) -> dict[tuple, ndarray]:
    """Load the data folder in rlbnb format"""
    if os.path.isdir(path):
        root, _, filenames = next(os.walk(os.path.abspath(path)))

    elif os.path.isfile(path):
        root, filename = os.path.split(os.path.abspath(path))
        filenames = [filename]

    else:
        raise NotImplementedError

    metrics = {}
    for basename, ext in map(os.path.splitext, filenames):
        if ext != ".csv":
            continue

        # load the csv and store is as single-seed evaluation result
        # XXX make sure to call `rename_*` and `cast_*`
        records = read_csv(os.path.join(root, basename + ext))
        metrics[rename_rlbnb(basename), 0] = collate(map(cast_rlbnb, records))

    return metrics

Try to assign a unique fixed color to the each method.

In [None]:
scavuzzo_colors = {
    "internal:relpscost": "fuchsia",
    "internal:vanillafullstrong": "k",
    # "internal:emulated-vanillafullstrong": "C1",
    "gcnn:il": "C2",
    "gcnn:mdp": "C3",
    "gcnn:tmdp+DFS": "C4",
    "gcnn:tmdp+ObjLim": "C8",
    # "gcnn:2022-12-26--00.53.24--best_params--tmdp+DFS.pkl": "C6",
}

# rlbnb and Scavuzzo have different naming conventions
rlbnb_colors = {
    "internal:vanillafullstrong": "k",  # "internal:vanillafullstrong"
    "random": "C9",  # random branching baseline
    "dqn": "C0",  # RETRO by Parsonson et al. 2022
    "dqn_atoms": "C1",  # DQN with head ensemble (50 atoms)
    "bipartite": "C5",  # IL with bipartite observations
    "tripartite": "C6",  # IL with tripartite observations
    "masked": "C7",  # IL with bipartite observations with a mask on input
}

Pick the table to plot

In [None]:
# eval on Scavuzzo's 100 test instances
# filename = "/Users/ivannazarov/Github/repos_with_rl/copt/rl2branch/cauctions_20221221-200625.csv"

# eval on OUR 1k instances w/o baseline heuristic
# filename = "/Users/ivannazarov/Github/repos_with_rl/copt/rl2branch/custom_20221222-021818.csv"

# eval on OUR 1k instances with baseline heuristic
filename = (
    #     "/Users/ivannazarov/Github/repos_with_rl/copt/rl2branch/indset_20221223-112911.csv"
    "/Users/ivannazarov/Github/repos_with_rl/copt/rl2branch/cauc_20221222-222137.csv"
    #     "/Users/ivannazarov/Github/repos_with_rl/copt/rl2branch/setcover_20221223-125129.csv"
    #     "/Users/ivannazarov/Github/repos_with_rl/copt/rl2branch/ufacilities_20221223-141643.csv"
    #     "/Users/ivannazarov/Github/repos_with_rl/copt/rl2branch/indset_from_cauc_20221225-215555.csv"
    # XXX it looks like cuac traind model works just as well as the indset one
    #     "/Users/ivannazarov/Github/repos_with_rl/copt/rl2branch/cauc-retrained.csv"
    # XXX retrained seems to have replicated the original tmdp+dfs
)
metrics_scavuzzo = load_metrics_scavuzzo(filename)

Load the data from rlbnb

In [None]:
metrics_rlbnb = load_metrics_rlbnb(
    "/Users/ivannazarov/Github/repos_with_rl/copt/rlbnb/"
    "results/combinatorial_auction_n_items_100_n_bids_500/"
)

metrics = {**metrics_rlbnb, **metrics_scavuzzo}
colors = {
    **rlbnb_colors,
    **scavuzzo_colors,
}  # , "gcnn:2022-12-26--00.53.24--best_params--tmdp+DFS.pkl": "C4"}

# metrics = metrics_rlbnb
# colors = rlbnb_colors

Build the pp
* `n_nodes`
* `n_lpiter`
* `f_soltime`
* `f_gap`

In [None]:
series = "n_nodes"

# base = "internal:relpscost"  # very strong
base = "internal:vanillafullstrong"
# base = "gcnn:il"  # "internal:vanillafullstrong"

Build the plot

In [None]:
metric = {}
for (k1, k2), v in metrics.items():
    if k1 in colors:
        out = metric.setdefault(k1, {})
        out[k2] = v[series]

pooled = {k: np.median(list(v.values()), 0) for k, v in metric.items()}

fig, ax = plt.subplots(1, 1, figsize=(5, 5), dpi=200)
for name, repl in metric.items():
    if base == name:
        continue

    # plot the mean pp curve first, and then individual pp-s
    p, q = pp_curve(x=pooled[base], y=pooled[name], num=None)
    ax.plot(p, q, label=name, c=colors[name])
    for _, data in repl.items():
        p, q = pp_curve(x=pooled[base], y=data, num=None)
        ax.plot(p, q, c=colors[name], alpha=0.15, zorder=-10)

ax.plot((0, 1), (0, 1), c=colors[base], zorder=10, alpha=0.25, label=base)
ax.set_xlim(-0.025, 1.025)
ax.set_ylim(-0.025, 1.025)
ax.set_aspect(1.0)
ax.legend(loc="best", fontsize="xx-small")

# fig.savefig(f"dump/tmdp__{os.path.basename(filename)}.pdf")

Now print the stats

In [None]:
header = f'{"name":<26} {"tot":>8} {"median":>8} {"sgm":>8} {"mean":>8} {"std":>8}'
row = "{nom:<26} {tot:>8.0f} {median:>8.0f} {sgm:>8.0f} {mean:>8.0f} {std:>8.0f}"

print(header, "\n" + "-" * len(header))
for name, stat in get_stats(metrics, series).items():
    print(row.format(nom=name, **stat))

<br>