# Experiment Analysis: MusicNet

In [1]:
import os
import tqdm
import json
import copy

In [None]:
import numpy as np

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

<br>

Fully flatten the dictionary.

In [None]:
from cplxpaper.auto.parameter_grid import flatten

Load performance results from each snapshot in the experiment.

In [None]:
from cplxpaper.auto.utils import load_snapshot

def from_snapshots(*snapshots):
    results, options = {}, {}
    for snapshot in sorted(snapshots):
        name = os.path.basename(snapshot)
        snapshot = load_snapshot(snapshot)

        options = snapshot['options']
        stage, settings = snapshot['stage']

        results[name] = stage, snapshot['performance']

    return results, options

load experiment from its snapshots or from cache

In [None]:
import re
import pickle


def load_experiment(folder, cache="cache.pk", reload=False):
    if isinstance(cache, str):
        cache = os.path.join(folder, cache)

    assert cache is None or isinstance(cache, str)

    snapshots = []
    folder, _, filenames = next(os.walk(folder))
    for filename in sorted(filenames):
        if re.match(r"^\d+.*\.gz$", filename) is not None:
            snapshots.append(filename)

    # load scorer results from the snapshots or from cache
    scores, options = {}, {}
    if cache is not None and os.path.exists(cache) and not reload:
        with open(cache, "rb") as fin:
            scores, options = pickle.load(fin)

    # reload from originals if anything is missing (use SHA-digest)
    if any(s not in scores for s in snapshots):
        snapshots = [os.path.join(folder, s) for s in snapshots]
        scores, options = from_snapshots(*snapshots)
        if cache is not None:
            with open(cache, "wb") as fout:
                pickle.dump((scores, options), fout)

    return scores, options

<br>

Get the name of the test dataset.

In [None]:
from cplxpaper.auto.utils import get_class

dataset_name = "MusicNet"
metric_name = "pooled_average_precision"

# sources, scorer_name = ["""./runs/grid_trabelsi_legacy/"""], "test_256"  # obsolete

sources = [
#     """./runs/grid_cplx_fine_kl_div/""",
#     """./runs/grid_cplx_fine_kl_div_v2/""",
    """./runs/grid_cplx_fine_kl_div_v3_fast/""",
]
scorer_name = "test"


Gather model performance summary.

In [None]:
import pandas as pd

def performance_summary(scores):
    out = {}
    for stage, results in scores.values():
        # Collect performance metrics..
        score = results[scorer_name]

        # ... aggregate sparsity and metrics.
        n_zer, n_par = map(sum, zip(*score["sparsity"].values()))
        out[stage] = {
            "score": score[metric_name],
            "n_zer": int(n_zer), "n_par": int(n_par)
        }

    return pd.DataFrame.from_dict(out, orient='index')

<br>

Collect results and reconstruct the grid

In [None]:
from collections import defaultdict

grid_options = defaultdict(set)
ignore = {"__name__", "__timestamp__", "__version__", "device"}

results = []
for source in sources:
    source, experiments, manifests = next(os.walk(source))
    for experiment in tqdm.tqdm(experiments):
        match = re.match(r"^.*?\[(\d+)\]-(\d+)$", experiment)
        if not match:
            continue

        replication, exp_no = map(int, match.groups())

        # load scorer results from the snapshots
        scores, options = load_experiment(
            os.path.join(source, experiment),
            cache='cache.pk', reload=False)

        if not options:
            continue

        flat = flatten(options)
        for k, v in flat.items():
            if k not in ignore:
                grid_options[k].add(v)

        results.append((
            experiment,
            performance_summary(scores),
            flat
        ))

<br>

In [None]:
experiments, scores, manifests = zip(*results)

Finalize the grid variables

In [None]:
# pick all keys which have more than one unique value
#  and drop any model specs (added manually)
full_grid = [k for k, v in grid_options.items()
             if len(v) > 1 and "model__cls" not in k]

manual = ["stages__sparsify__model__cls"]
for field in manual:
    if len(grid_options[field]) > 0:
        full_grid.append(field)

# upcast is a service variable, which only complex models have
#  and it i usually mirrored in `features` settings.
full_grid = [g for g in full_grid if not g.endswith("__upcast")]

main_grid = [g for g in full_grid if not g.endswith('__kl_div')]

Compile the report spreadsheet.

In [None]:
params = [{k: opt.get(k, None) for k in full_grid} for opt in manifests]
params = pd.DataFrame.from_dict(dict(zip(experiments, params)), orient="index")

scores = pd.concat(dict(zip(experiments, scores)), axis=0, names=["expno"])

In [None]:
df = scores.unstack(-1)
df.columns = df.columns.to_flat_index().map('-'.join)

df = params.join(df).reset_index()

In [None]:
df = df.replace({
        # identify models by the sparsify stage model
        "stages__sparsify__model__cls": {"^<class '.*?\.models\.(.*?)'>$": r"\1"},
        "features__cls": {"^<class '.*?\.feeds\.(.*?)'>$": r"\1"}
    }, regex=True)

In [None]:
df = df.set_index([*main_grid, "stages__sparsify__objective__kl_div", "index"], append=False, drop=True).sort_index(0)

In [None]:
df.loc["complex.DeepConvNetVD"]

<br>

In [None]:
summary, stage = {}, "fine-tune"
summary, stage = {}, "sparsify"

if main_grid:
    groups = ((k, g.loc[k]) for k, g in df.groupby(axis=0, level=main_grid))

else:
    groups = [(
        (dataset_name,), df
    )]

for k, g in groups:
    score_before = g["score-dense"].mean(), g["score-dense"].std()
    f_score, n_par, n_zer = g[f"score-{stage}"], g[f"n_par-{stage}"], g[f"n_zer-{stage}"]

    curve = pd.concat([n_zer / n_par, f_score], axis=1)
    curve_mean = curve.mean(level=0).to_numpy()
    curve_std = curve.std(level=0).to_numpy()

    curve = curve.to_numpy()
    order = curve[:, 0].argsort()

    summary[k] = score_before, curve[order]

In [None]:
import time
from matplotlib.ticker import FormatStrFormatter, FuncFormatter

# dttm = time.strftime("%Y%m%d-%H%M%S")
figurename = os.path.basename(os.path.normpath(source))

filename = os.path.join(
    "../../assets", f"{figurename}__{stage}.pdf")

Produce a plot (bad)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(14, 5))
fig.patch.set_alpha(1.0)

for name, (dense, curve) in summary.items():
    m, s = dense
    spr, scr = curve.T
    pts = ax.scatter(1 / (1 - spr), scr, label=name, s=15)

    color = pts.get_facecolor()[0]
    ax.axhspan(m - 1.96 * s, m + 1.96 * s, alpha=0.1, color=color, zorder=-99)

ax.legend(loc="lower left", ncol=1)

ax.set_title(f"Average Precision - Compression trade-off on MusicNet")

ax.set_ylabel("Average Precision")
ax.set_xlabel(r"$\times$ compressed")

ax.set_xscale("log")
ax.set_xlim(0.9, 2e3)
ax.xaxis.set_major_formatter(FuncFormatter(lambda x, p: f"{int(x):d}"))
ax.yaxis.set_major_formatter(FuncFormatter(lambda x, p: f"{x:.0%}"))

# ax.axvspan(50, 500, color="k", alpha=0.05, zorder=-10)  # for Trablesi et al. with k=3
ax.axvspan(40, 300, color="k", alpha=0.05, zorder=-10)  # for Trablesi et al. with k=6
ax.axhline(0.726, color="k", alpha=0.5, lw=1, zorder=-11)

fig.savefig(filename, dpi=300, transparent=False)

plt.show()
filename

In [None]:
assert False

<br>

In [None]:
m = (n_zer / n_par).mean(level=0)
s = (n_zer / n_par).std(level=0)
m.plot(label="sparsity")
plt.fill_between(m.index, m-1.96*s, m+1.96*s, alpha=0.25)
plt.gca().set_xscale("log")
plt.legend(loc="lower left")

plt.twinx()
m = (g["score-dense"]).mean(level=0)
s = (g["score-dense"]).std(level=0)
m.plot(c="C1", label="dense")
plt.fill_between(m.index, m-1.96*s, m+1.96*s, alpha=0.25, color="C1")

m = (g["score-fine-tune"]).mean(level=0)
s = (g["score-fine-tune"]).std(level=0)
m.plot(c="C2", label="fine-tune")
plt.fill_between(m.index, m-1.96*s, m+1.96*s, alpha=0.25, color="C2")
plt.legend(loc="lower right")

<br>