# Report and plots `fig:threshold__tradeoff__fine-tune`

In [None]:
import os
import tqdm

import numpy as np
import pandas as pd

%matplotlib inline
import matplotlib.pyplot as plt

import warnings
warnings.simplefilter("ignore")

For this experiment we take the specified bunch of experiments,
and using run fine-tune stages for varying thresholds.

In [None]:
from cplxpaper.auto.utils import load_manifest
from cplxpaper.auto.utils import get_stage_snapshot
from cplxpaper.auto.parameter_grid import get_params, set_params


def prepare_manifest(base):
    """Prepare the manifest for testing effects of threshold on `fine-tune`."""
    config = load_manifest(base)

    # clear device and threshold
    config.update({
        "__name__": "Impact of the treshold on fine-tune on MusicNet",
        "device": None,
        "threshold": None,
    })

    # remove `dense` stage completely
    del config["stages"]["dense"]

    # cold start `sparsify` from the base experiment, disable training
    config["stages"]["sparsify"].update({
        # cold restart
        "snapshot": get_stage_snapshot("sparsify", base),
        # skip training
        "n_epochs": 0,
    })

    # ensure `fine-tune` restarts the optim, but inherits the weights
    config["stages"]["fine-tune"].update({
        "reset": False,        
        "restart": True,
        "snapshot": None,
    })

    # manually reset stage order
    config["stage-order"] = ["sparsify", "fine-tune"]# list(config["stages"].keys())

    return config

This generator exists in many places, and should eventually be imported form a single place:
* cplxpaper.auto.reports.\_\_main\_\_
* cplxpaper.auto.reports.utils

In [None]:
from cplxpaper.auto.utils import verify_experiment

def enumerate_experiments(manifests):
    """Return the experiment associated with each manifest."""
    for manifest, ext in map(os.path.splitext, manifests):
        path, name = os.path.split(manifest)
        if ext != ".json" or name.startswith("."):
            continue

        experiment = os.path.join(path, name)
        if not verify_experiment(experiment):
            continue

        yield experiment

<br>

## Prepare the report

In [None]:
PREFIX, dry_run = "addendum__", False

report = "./grids/musicnet__threshold__fine-tune.pk"

experiments = [
# 1/200
    './grids/grid-fast/musicnet-fast__00/musicnet[003]-097.json',  # ARD
    './grids/grid-fast/musicnet-fast__01/musicnet[001]-045.json',
    './grids/grid-fast/musicnet-fast__02/musicnet[004]-123.json',
    './grids/grid-fast/musicnet-fast__03/musicnet[002]-071.json',
    './grids/grid-fast/musicnet-fast__04/musicnet[000]-019.json',

    './grids/grid-fast/musicnet-fast__00/musicnet[003]-084.json',  # VD
    './grids/grid-fast/musicnet-fast__01/musicnet[001]-032.json',
    './grids/grid-fast/musicnet-fast__02/musicnet[004]-110.json',
    './grids/grid-fast/musicnet-fast__03/musicnet[002]-058.json',
    './grids/grid-fast/musicnet-fast__04/musicnet[000]-006.json',

# 1/20
    './grids/grid-fast/musicnet-fast__00/musicnet[003]-101.json',  # ARD
    './grids/grid-fast/musicnet-fast__01/musicnet[001]-049.json',
    './grids/grid-fast/musicnet-fast__02/musicnet[004]-127.json',
    './grids/grid-fast/musicnet-fast__03/musicnet[002]-075.json',
    './grids/grid-fast/musicnet-fast__04/musicnet[000]-023.json',

    './grids/grid-fast/musicnet-fast__00/musicnet[003]-088.json',  # VD
    './grids/grid-fast/musicnet-fast__01/musicnet[001]-036.json',
    './grids/grid-fast/musicnet-fast__02/musicnet[004]-114.json',
    './grids/grid-fast/musicnet-fast__03/musicnet[002]-062.json',
    './grids/grid-fast/musicnet-fast__04/musicnet[000]-010.json',
]

<br>

In [None]:
assert False, '''Run cells below to create a grid for this experiment.'''

<br>

Enumerate the base experiments, the final sparsity of which to test.

In [None]:
report = os.path.normpath(os.path.abspath(report))

folder = os.path.abspath(os.path.join(
    ".", "grids", "musicnet__threshold__fine-tune"
))

bash = f"{folder}.sh"

os.makedirs(folder, exist_ok=False)

For each base experiment we create a small grid of varying thresholds:
* we take $\tau$ from ~the same array as the `kind = "threshold"` report~ a coarser array
  * $\{\pm\tfrac{k}4\colon k=0..24\}$ is too fine and too wide
    - 980 experiments ~ 20 days 4x2
* but eventually use `kind = "trade-off"` report builder

In [None]:
# taus = [i / 4 for i in range(-24, 24 + 1)]
taus = [i / 2 for i in range(-8, 8 + 1)]

Create a pseudo-grid: cold start from a snapshot, drop irrelevant
parameters, and then `fine-tune`

In [None]:
import json
import copy
from cplxpaper.auto.parameter_grid import set_params


for experiment in enumerate_experiments(map(os.path.abspath, experiments)):
    name = os.path.basename(experiment)

    config = prepare_manifest(experiment)
    for i, tau in enumerate(taus):
        local = set_params(copy.deepcopy(config), **{
            "threshold": tau
        })

        manifest = os.path.join(folder, f"{name}__tau{i:02d}.json")
        if not dry_run:
            json.dump(local, open(manifest, "w"), indent=2)    

Pick a name for the report pickle and compile a **bash** script for
building the threshold figure for each of epxeriment in the list above.

In [None]:
import stat

devspec = """--devices "cuda:0" "cuda:1" "cuda:2" "cuda:3" --per-device 2"""
with open(bash, "w") as fout:
    # experiment execution
    fout.write(f"""python -m cplxpaper.auto {devspec} "{folder}"\n""")

    fout.write("\n")
    # report analysis
    fout.write(f"""python -m cplxpaper.auto.reports {devspec} "trade-off" "{report}" "{folder}"\n""")

# allow exc and keep r/w
os.chmod(bash, stat.S_IXUSR | stat.S_IRUSR | stat.S_IWUSR)

bash

<br>

In [None]:
assert False, '''Run all below to make the figure.'''

<br>

## Build the table

Load the report constructed on the selected experiments.

In [None]:
from cplxpaper.auto.reports.utils import restore
from cplxpaper.auto.parameter_grid import reconstruct_grid

def build_report(filename):
    report = tqdm.tqdm(restore(filename), desc="analyzing report data")
    workers, results = zip(*report)
    if not results:
        return {}, []

    # compute the grid and flatten the manifests
    experiments, options, *results = zip(*results)
    full_grid, flat_options = reconstruct_grid(options)

    return full_grid, [*zip(experiments, flat_options, *results)]

Extract the score from the scorers' output.

In [None]:
from cplxpaper.auto.reports.utils import dict_get_one

def get_score(score):
    # something is horribly wrong if this fails...
    assert score["pre-fine-tune"]["sparsity"] == score["post-fine-tune"]["sparsity"]

    metrics = {k: dict_get_one(v, "pooled_average_precision", "accuracy")
               for k, v in score.items()}

    n_zer, n_par = map(sum, zip(*score["pre-fine-tune"]["sparsity"].values()))
    return {
        **metrics,
        "compression": n_par / (n_par - n_zer)
    }

Evaluate several grids and join them

In [None]:
raw_grid, output = build_report(report)

Alter the recovered grid

In [None]:
grid = set(field for field in raw_grid
           if not any(map(field.__contains__, {
                # service fields
                "__name__", "__timestamp__", "__version__", "device",

                # ignore global model class settings
                "model__cls",

                # upcast is a service variable, which only complex models have
                #  and it is usually mirrored in `features` settings.
                "__upcast"
            })))

grid.update({
    "stages__sparsify__model__cls",
    "stages__sparsify__objective__kl_div",
    "threshold"  # ensure threshold is included
})

Index by the experiment **grid--folder** and prepare fields

In [None]:
experiments, options, *rest = zip(*output)

# experiment paths are absolute!
df = pd.DataFrame(experiments, columns=["experiment",])
df = df["experiment"].str.replace(os.path.commonpath(experiments), "*")\
                     .str.extract("^(?P<grid>.*)/(?P<experiment>[^/]*)$", expand=True)

master_index = df.set_index(["grid", "experiment"]).index

Gradually construct the table of options

In [None]:
parameters = pd.DataFrame(index=master_index)

Assign proper tags to models

In [None]:
from cplxpaper.auto.reports.utils import get_model_tag

def patched_get_model_tag(opt):
    tag = get_model_tag(opt)

    # Legacy model patch: if not specified then True (see `musicnet.models.base`)
    cls = tag["model"]
    if "DeepConvNet" in cls and opt.get("model__legacy", True):
        cls += " k3"

    return {**tag, "model": cls}

grid = [k for k in grid if not k.startswith((
    "model__",
    "stages__sparsify__model__"
))]

parameters = parameters.join(pd.DataFrame([
    *map(patched_get_model_tag, options)
], index=master_index))

Other fields' preprocessing.

In [None]:
assert 'dataset' not in grid
assert 'features' not in grid

Only the essential experiment parameters should have remained by now.

In [None]:
parameters = parameters.join(pd.DataFrame([
    {g: opt[g] for g in grid} for opt in options
], index=master_index))

grid

Now collect the metrics. We need:
* **accuracy** performance on `dense`, `pre-fine-tune` and `post-fine-tune`
* **compression rate** from a `fine-tune` stage

In [None]:
scores, *tail = rest
assert not tail

metrics = pd.DataFrame([
    get_score(dict_get_one(score, "test", "test-256")) for score in scores
], index=master_index)

Join the tables and rename unfotunate columns.

In [None]:
df_main = parameters.join(metrics).rename(columns={
    "stages__sparsify__objective__kl_div": "kl_div",
    "stages__sparsify__snapshot": "snapshot"
})

In [None]:
df_main["snapshot"] = df_main["snapshot"].str.replace(os.path.commonpath(df_main["snapshot"].to_list()), "*")

In [None]:
df_main = df_main.set_index(["snapshot", "threshold"])

<br>

## Create the threshold plot

Decide on the target folder and computation cache.

In [None]:
report_name = "figure__musicnet__threshold__fine-tune"

report_target = os.path.normpath(os.path.abspath(os.path.join(
    "../../assets", report_name
)))

A service plotting function to darkern the specified colour

In [None]:
from matplotlib.ticker import FormatStrFormatter, FuncFormatter


def darker(color, a=0.5):
    """Adapted from this stackoverflow question_.
    .. _question: https://stackoverflow.com/questions/37765197/
    """
    from matplotlib.colors import to_rgb
    from colorsys import rgb_to_hls, hls_to_rgb

    h, l, s = rgb_to_hls(*to_rgb(color))
    return hls_to_rgb(h, max(0, min(a * l, 1)), s)

Group by all fileds except for `threshold`:
* `model`, `kind`, `method`, `dataset`, `features` and `kl_div`

In [None]:
print([f for f in parameters.columns if "kl_div" not in f])
fields = [
    'method',
    'model',
    'kind',
    'kl_div'
]

Handle colours

In [None]:
def kind_model_method_color(kind, model, method, kl_div):
    return {  # VD/ARD
        # tab10 colours are paired! use this to keep similar models distinguishable
        ("C"  , "DeepConvNet",   "VD", 1/200): "C0",
        ("C"  , "DeepConvNet",  "ARD", 1/200): "C1",
        ("C"  , "DeepConvNet",   "VD", 1/20): "C2",
        ("C"  , "DeepConvNet",  "ARD", 1/20): "C3",
        ("C"  , "DeepConvNet",   "VD", 1/2000): "C4",
        ("C"  , "DeepConvNet",  "ARD", 1/2000): "C5",
    }[kind, model, method, kl_div]

Threshold-compression map $\tau \mapsto c(\tau)$ is a monotonic decreasing,
hence can be inverted and used to parameterize the performance curve.

So we plot below
$$
    (c(\tau), p(\tau))_{\tau \in T}
        = (x, p\circ c^{-1}(x))_{x \in c(T)}
    \,, $$

In [None]:
def make_plot(ax, df):
    ax.set_xscale('log')
    ax.set_xlim(41, 2000)
    ax.set_ylim(0.55, 0.75)
    ax.axvspan(50, 500, color="k", alpha=0.05, zorder=-10)

    ax.set_ylabel('average precision')
    ax.xaxis.set_major_formatter(FuncFormatter(lambda x, p: f'$\\times${int(x):d}'))

    # group by tau and experiment spec and plot
    grouper, legend = df.groupby(fields), []
    for key, df in grouper:
        df = df[["post-fine-tune", "pre-fine-tune", "compression"]].sort_index()
        label = dict(zip(fields, key))

        # harmonic mean for comression
        compression = 1. / (1. / df['compression']).mean(level=-1)
        m, min_, max_ = df.mean(level=-1), df.min(level=-1), df.max(level=-1)

        for field, marker in zip(['pre-fine-tune', 'post-fine-tune'], ['o', '*']):
            perf = m[field]
            line, = ax.plot(compression, perf, alpha=1.0, zorder=20,
                            marker=marker, lw=2, markersize=4,
                            markeredgecolor='k', markeredgewidth=0.5)

            tau = ax.scatter(compression[[-0.5]], perf[[-0.5]], s=20, zorder=30, lw=2,
                              marker='s', c=['k'])

            ax.fill_between(compression, min_[field], max_[field], lw=0, 
                              color=darker(line.get_color(), 1.4), alpha=0.25, zorder=20)

            legend.append((line, "{field} ($C\!\!=\!\!{kl_div}$)".format(field=field, **label)))
    
#         breakpoint()
        for i, t in enumerate(compression.index):
            if t not in (-3., +3.):
                continue
            
            p = max(m.loc[t, 'pre-fine-tune'], m.loc[t, 'post-fine-tune'])
            ax.annotate(fr'$\tau\!\!=\!\!\!{t:+.0f}$', (compression[t], p), size='small',
                        xytext=(-5, 5), zorder=20, textcoords='offset points')

        z, a = m['post-fine-tune'], m['pre-fine-tune']
        ax.quiver(compression, a, compression * 0, z - a,
                  angles='xy', scale_units='xy', scale=1.,
                  width=0.005, alpha=0.125)

        ax.axhline(0.729, color="k", alpha=0.125, zorder=-10, lw=2)
        ax.annotate("Trabelsi et al. (2018)", xy=(300, 0.729),  xycoords='data',
                    xytext=(300, 0.73), textcoords='data', alpha=0.75)

    legend.append((tau, r'$\tau\!\!=\!\!-\frac{1}{2}$'))

    handles, labels = zip(*legend)
    ax.legend(handles, labels, ncol=1, fontsize='small')
    ax.grid(axis='y', alpha=0.15)


Decide on the target folder and computation cache.

In [None]:
report_name = "figure__musicnet__threshold__{kind}__{model}.pdf"

report_target = os.path.normpath(os.path.abspath(os.path.join(
    "../../assets", report_name
)))

In [None]:
for (kind, model), df in df_main.groupby(['kind', 'model']):
#     fig, axes = plt.subplots(1, 2, figsize=(8, 3), dpi=300, sharey=True)
    fig, axes = plt.subplots(2, 1, figsize=(6, 6), dpi=300, sharex=True)
    for ax, (method, df_method) in zip(axes, df.groupby(['method'])):

    #     ax.set_title(f"Threshold effect on performance of {kind}-{method} for {model} (MusicNet)")
        ax.set_title(f"{kind}-{method} for {model} (MusicNet)")

        make_plot(ax, df_method)

    plt.xlabel('compression')
    plt.tight_layout()
    fig.savefig(report_target.format(**locals()), dpi=300)

#     plt.show()
    plt.close()


In [None]:
assert False

<br>

Older plot

In [None]:
report_name = "figure__musicnet__threshold.pdf"

report_target = os.path.normpath(os.path.abspath(os.path.join(
    "../../assets", report_name
)))

Make a crude plot

In [None]:
fig, (ax_l, ax_r) = plt.subplots(2, 1, figsize=(8, 5), dpi=300, sharex=True)
fig.patch.set_alpha(1.0)
# ax_r = ax_l.twinx()

ax_l.set_title("The effect of $\\tau$ on performance and compression (MusicNet)")

# set up limits and axis labels
ax_l.set_ylabel("Average Precision")
ax_r.set_ylabel("$\\times$ compression")
ax_r.set_yscale("log")

ax_r.set_xlabel("Threshold $\\tau$")

ax_l.set_xlim(-3.6125, 3.6125)
ax_l.set_ylim(0.55, 0.75)
ax_r.set_ylim(40, 2000)

# Trabelsi et al. (2018)
ax_l.axhline(0.729, color="k", alpha=0.25, zorder=-10, lw=1)
ax_l.annotate("Trabelsi et al. (2018)", xy=(0, 0.75),  xycoords='data',
              xytext=(0.05, 0.935), textcoords='axes fraction', alpha=0.75)


# group by tau and experiment spec and plot
grouper = df_main.groupby(fields)
for key, df in tqdm.tqdm(grouper, desc="populating plots"):
    df = df[["post-fine-tune", "pre-fine-tune", "compression"]].sort_index()
    label = dict(zip(fields, key))

    m, min_, max_ = df.mean(level=-1), df.min(level=-1), df.max(level=-1)
    color = kind_model_method_color(**label)

    for ax, field, marker in zip([ax_l, ax_r], ["post-fine-tune", "compression"], ["", "o"]):
        ax.fill_between(m.index, min_[field], max_[field],
                          color=darker(color, 1.4), alpha=0.25, zorder=20)
        ax.plot(m[field], c=color, alpha=1.0, marker=marker, markersize=4,
                label="{kind} {model} {method} ($C={kl_div}$)".format(**label),
                zorder=25)
    
    ax_l.fill_between(m.index, min_["pre-fine-tune"], max_["pre-fine-tune"],
                      color=darker(color, 1.4), alpha=0.25, zorder=10)
    ax_l.plot(m["pre-fine-tune"], c=color, alpha=1.0, marker="x", markersize=4,
#             label="{kind} {model} {method} ($C={kl_div}$)".format(**label),
              zorder=15)

ax_l.legend(ncol=1, loc=(0.55, .05))  # loc="center right")

ax_l.axvline(-0.5, c="k", lw=2, zorder=2)
ax_r.axvline(-0.5, c="k", lw=2, zorder=2)
# ax_r.grid(axis='y', which='both')
# ax_l.grid(axis='y', which='both')

plt.tight_layout(h_pad=-0.55)
fig.savefig(report_target, dpi=300)

# plt.show()
plt.close()

<br>