# PP-plot builder

In [None]:
import os

import numpy as np
from numpy import ndarray

from matplotlib import pyplot as plt

We keep experiment data in CSV form

In [None]:
import csv
from typing import Iterable


def read_csv(filename: str) -> Iterable[dict]:
    """Read the csv as an iterable of dicts"""
    # use stdlib's csv package to read the comma-separated molecular data
    with open(filename, "rt", newline="") as f:
        it = csv.reader(f, delimiter=",")

        # get the header then represent rows a dicts
        header = next(it)
        for row in it:
            yield dict(zip(header, row))

A simple proc to collate a list of identically structured dicts into a dict of lists.

In [None]:
def collate(records: list[dict]) -> dict[..., list]:
    """Collate records assuming no fields are missing"""
    out = {}
    for rec in records:
        for field, value in rec.items():
            out.setdefault(field, []).append(value)

    return {k: np.array(v) for k, v in out.items()}

Pick the table to plot

In [None]:
# eval on Scavuzzo's 100 test instances
filename = "/Users/ivannazarov/Github/repos_with_rl/copt/rl2branch/cauctions_20221221-200625.csv"

# eval on OUR 1k instances w/o baseline heuristic
# filename = "/Users/ivannazarov/Github/repos_with_rl/copt/rl2branch/custom_20221222-021818.csv"

# eval on OUR 1k instances with baseline heuristic (incomplete due to signalling issue)
filename = (
    "/Users/ivannazarov/Github/repos_with_rl/copt/rl2branch/custom_20221222-120628.csv"
)

We use the table format as in teh code repo of Scavuzzo et al. 2022:
- `policy` -- the identifier of the branching policy
- `seed` -- the opaque id of the replication (fixed randomness of SCIP)
- `type` -- the kind of evaluation: `test` and `transfer` -- in theiur original code, `custom` -- in our patches
- `instance` -- the path to the instance used for evaluation
- `nnodes` -- the total number of nodes after solving
- `nlps` -- the number of LP solver iterarions
- `stime` -- the solution time as measured by SCIP using CPU seconds (`clocktype=1`)
- `gap` -- the primal-dual gap achieved at the end of the bnb search
- `status` -- SCIPs reported solution status
- `walltime` -- the wall time of the solution process as measured by python
- `proctime` -- the cpu time of the solution process as measured by python

Load the stats

In [None]:
metrics = {}
for rec in read_csv(filename):
    if rec["type"] not in ("test", "custom"):
        continue

    # make sure not to pool valeus from different seeds
    key = rec["policy"], int(rec["seed"])
    metrics.setdefault(key, []).append(
        {
            "n_nodes": int(rec["nnodes"]),
            "n_lps": int(rec["nlps"]),
            "f_soltime": float(rec["stime"]),
            "f_gap": float(rec["gap"]),
        }
    )

metrics = {k: collate(v[:996]) for k, v in metrics.items()}

The pp-curve drawing procedure. We compare distibutions using the pp-curve, which is analogous to the ROC curve: pp compares two independent distributions, while ROC compares the true-, false- positive distributions

In [None]:
from toybnb.scip.ecole.il.plotting import pp_curve

Build the pp
* `n_nodes`
* `n_lpiter`
* `f_soltime`
* `f_gap`

In [None]:
series = "n_nodes"
colors = {
    "internal:relpscost": "C0",
    "internal:vanillafullstrong": "k",
    # "internal:emulated-vanillafullstrong": "C1",
    "gcnn:il": "C2",
    "gcnn:mdp": "C3",
    "gcnn:tmdp+DFS": "C4",
    "gcnn:tmdp+ObjLim": "C5",
}

# base = "internal:relpscost"  # very strong
base = "internal:vanillafullstrong"
# base = "gcnn:il"  # "internal:vanillafullstrong"

Build the plot

In [None]:
metric = {}
for (k1, k2), v in metrics.items():
    if k1 in colors:
        out = metric.setdefault(k1, {})
        out[k2] = v[series]

pooled = {k: np.median(list(v.values()), 0) for k, v in metric.items()}

fig, ax = plt.subplots(1, 1, figsize=(5, 5), dpi=200)
for name, repl in metric.items():
    if base == name:
        continue

    # plot the mean pp curve first, and then individual pp-s
    p, q = pp_curve(x=pooled[base], y=pooled[name], num=None)
    ax.plot(p, q, label=name, c=colors[name])
    for _, data in repl.items():
        p, q = pp_curve(x=pooled[base], y=data, num=None)
        ax.plot(p, q, c=colors[name], alpha=0.15, zorder=-10)

ax.plot((0, 1), (0, 1), c=colors[base], zorder=10, alpha=0.25, label=base)
ax.set_xlim(-0.025, 1.025)
ax.set_ylim(-0.025, 1.025)
ax.set_aspect(1.0)
ax.legend(loc="best", fontsize="xx-small")

# fig.savefig(f"dump/tmdp__{os.path.basename(filename)}.pdf")

<br>