# Manifest generator for MNIST-like

Train for $30-60-30$ with various kl-div coefficients and the following weighting schemes:
* sum of size-average kl-divs, i.e. average penalty from each layer
* sum of sums of kl-divs, i.e. consistent with theory

**NB** we use batch averaged target loss and **do not** multiply
by the number of samples in the dataset (for fp arithmetic stability)
-- hence in the sum kl-div case the largest coefficient must be
$\tfrac1{\text{n_samples}}$

$$
\frac1N \widehat{\mathcal{L}}_{\text{sgvb}}
    = \widehat{\mathbb{E}}_{z\sim B}
    \mathbb{E}_{\omega \sim q_\omega} \log p(z\mid \omega)
    - \frac1N \mathbb{E}_{\omega \sim q_\omega} \log\frac{q_\omega(\omega)}{\pi(\omega)}
\,. $$

* Mnist-like dataset:
  * MNIST, KMNIST, Fashion MNIST, EMNIST-Letters
    * emnist has 26 outputs, others - 10

* Feature-model
  * real-real: Raw, `.real.*` with `n_inputs=1`
  * cplx-real: Fourier, `.real.*` with `n_inputs=2`

  * real-cplx: Raw, `.cplx.*` with `n_inputs=1` needs to `upcast real to Cplx`
  * cplx-cplx: Fourier, `.cplx.*` with `n_inputs=1`


In [None]:
import os
import copy
import numpy as np

from itertools import product

Load default config

In [None]:
import json
from pkg_resources import resource_stream

with resource_stream("cplxpaper.mnist", "template.json") as fin:
    options = json.load(fin)

Prepare the main template:
* reset roots
* clear model definitions
* specify restarts and grad clips

In [None]:
from cplxpaper.auto.parameter_grid import get_params, set_params, special_params

experiment_folder = os.path.expanduser("~/Github/complex_paper/experiments/mnist/data")

options = set_params(options, **{
    # entirely reset datasets
    "datasets": {
        "train": {"root": experiment_folder,},
        "test": {"root": experiment_folder,},
    },
    # by default use 10k train sample size
    "datasets__train__train_size": 10000,

    # attach feeds to the newly defined datasts
    "feeds__train__dataset": "train",
    "feeds__train__batch_size": 128,
    "feeds__train__pin_memory": False,
    "feeds__test__dataset": "test",
    "feeds__test__pin_memory": False,

    # specify state inheritance
    "stages__sparsify__restart": False,
    "stages__sparsify__reset": False,
    "stages__fine-tune__restart": True,
    "stages__fine-tune__reset": False,  # 2020-01-03 used to be True

    # L2 clip gradients: seems to be always better to do so.
    "stages__dense__grad_clip": 0.5,
    "stages__sparsify__grad_clip": 0.5,
    "stages__fine-tune__grad_clip": 0.5,

    # train 40-75-40
    "stages__dense__n_epochs": 40,
    "stages__sparsify__n_epochs": 75,
    "stages__fine-tune__n_epochs": 40,

    # clean models
    "model": {},
    "stages__dense__model": {},
    "stages__sparsify__model": {},
    "stages__fine-tune__model": {},
})

Sample some random seeds for train splits.

In [None]:
np.random.randint(0x7fff_ffff, size=(13,))

<br>

## Datasets and splits

No `123`, `0xdeadc0de` or `42` bullshit!
Pick opaque random seed from `np.randint` above.

In [None]:
from cplxpaper.mnist import dataset as mnist_like

datasets = {
    "mnist": {
        "datasets__train__random_state": 1_641_730_374,
        "datasets__train__cls": str(mnist_like.MNIST_Train),
        "datasets__test__cls": str(mnist_like.MNIST_Test),
        "model__n_outputs": 10,
    },
    "kmnist": {
        "datasets__train__random_state": 102_048_205,
        "datasets__train__cls": str(mnist_like.KMNIST_Train), 
        "datasets__test__cls": str(mnist_like.KMNIST_Test),
        "model__n_outputs": 10,
    },
    "fashion-mnist": {
        "datasets__train__random_state": 1_526_761_432,
        "datasets__train__cls": str(mnist_like.FashionMNIST_Train), 
        "datasets__test__cls": str(mnist_like.FashionMNIST_Test),
        "model__n_outputs": 10,
    },
    "emnist": {
        "datasets__train__random_state": 605_446_338,
        "datasets__train__cls": str(mnist_like.EMNIST_Letters_Train), 
        "datasets__test__cls": str(mnist_like.EMNIST_Letters_Test),
        "model__n_outputs": 26,
    },
}

<br>

## KL divergence term structure

Specify how kl-div term within each layer are reduced and the base multiplier $C$ of the term

In [None]:
objective_kl_div = {
    "mean": {
        "objective_terms__kl_div__reduction": "mean",
        "objective_terms__kl_div__coef": 1.0,
    },
    "sum": {
        "objective_terms__kl_div__reduction": "sum",
        "objective_terms__kl_div__coef": 1e-4,  # 1 / n_samples (10k above)
    },
}

Mean yields ELBO
$$
    \sum_i \mathbb{E}_{W \sim q_W} \log p(z_i\,\mid W)
    - \lambda \sum_k \frac{C}{\lvert W_k\rvert}
        \sum_{w\in W_k} \mathbb{E}_{w\sim q_w} \log \frac{q_w(w)}{\pi_w(w)}
\,.$$

Sum yields
$$
    \sum_i \mathbb{E}_{W \sim q_W} \log p(z_i\,\mid W)
    - \lambda C \sum_k \sum_{w\in W_k} \mathbb{E}_{w\sim q_w} \log \frac{q_w(w)}{\pi_w(w)}
\,.$$

<br>

## $\mathbb{R}$-Model stages

In [None]:
from cplxpaper.mnist.models import real

real_model_stages = {
    "twolayerdense": {
        "model__cls": [str(real.TwoLayerDenseModel),],
        "stages__sparsify__model__cls": [str(real.TwoLayerDenseModelARD),],
        "stages__fine-tune__model__cls": [str(real.TwoLayerDenseModelMasked),],
    },
#     "simpledense": {  # SKIP
#         "model__cls": [str(real.SimpleDenseModel),],
#         "stages__sparsify__model__cls": [str(real.SimpleDenseModelARD),],
#         "stages__fine-tune__model__cls": [str(real.SimpleDenseModelMasked),],
#     },
    "simpleconv": {
        "model__cls": [str(real.SimpleConvModel),],
        "stages__sparsify__model__cls": [str(real.SimpleConvModelARD),],
        "stages__fine-tune__model__cls": [str(real.SimpleConvModelMasked),],
    },
}

<br>

## $\mathbb{C}$-model stages

In [None]:
import cplxpaper.mnist.models.complex as cplx

cplx_model_stages = {
    "twolayerdense": {
        "model__cls": [str(cplx.TwoLayerDenseModel),],
        "stages__sparsify__model__cls": [str(cplx.TwoLayerDenseModelARD),],
        "stages__fine-tune__model__cls": [str(cplx.TwoLayerDenseModelMasked),],
    },
#     "simpledense": {  # SKIP
#         "model__cls": [str(cplx.SimpleDenseModel),],
#         "stages__sparsify__model__cls": [str(cplx.SimpleDenseModelARD),],
#         "stages__fine-tune__model__cls": [str(cplx.SimpleDenseModelMasked),],
#     },
    "simpleconv": {
        "model__cls": [str(cplx.SimpleConvModel),],
        "stages__sparsify__model__cls": [str(cplx.SimpleConvModelARD),],
        "stages__fine-tune__model__cls": [str(cplx.SimpleConvModelMasked),],
    },
}

<br>

## Admissible feature-model pairings

In [None]:
from cplxpaper.auto import feeds

### Pairings for $\mathbb{R}$-model

In [None]:
features_real = {
    "real": {
        "features": [{"cls": str(feeds.FeedRawFeatures),}],
        "model__n_inputs": [1],
        # "model__upcast": [True],  # not applicable to real models
    },
#     "cplx-fft-abs": {  # SKIP
#         "features": [{
#             "cls": str(feeds.FeedFourierFeatures),
#             "signal_ndim": 2, "shift": True, "cplx": False,
#         }],
#         "model__n_inputs": [1],
#         # "model__upcast": [True],  # not applicable to real models
#     },
    "cplx-fft-raw": {
        "features": [{
            "cls": str(feeds.FeedFourierFeatures),
            "signal_ndim": 2, "shift": True, "cplx": True,
        }],
        "model__n_inputs": [2],
        # "model__upcast": [False],  # not applicable to real models
    }
}

<br>

### Pairings for $\mathbb{C}$-model

In [None]:
features_cplx = {
    "real": {
        "features": [{"cls": str(feeds.FeedRawFeatures),}],
        "model__n_inputs": [1],
        "model__upcast": [True],
    },
#     "cplx-fft-abs": {  # SKIP
#         "features": [{
#             "cls": str(feeds.FeedFourierFeatures),
#             "signal_ndim": 2, "shift": True, "cplx": False,
#         }],
#         "model__n_inputs": [1],
#         "model__upcast": [True],
#     },
    "cplx-fft-raw": {
        "features": [{
            "cls": str(feeds.FeedFourierFeatures),
            "signal_ndim": 2, "shift": True, "cplx": True,
        }],
        "model__n_inputs": [1],
        "model__upcast": [False],
    }
}

In [None]:
features_stages = {"real": {}, "cplx": {}}
for f, m in product(features_real, real_model_stages):
    features_stages["real"][f, m] = {
        **features_real[f],
        **real_model_stages[m]
    }

for f, m in product(features_cplx, cplx_model_stages):
    features_stages["cplx"][f, m] = {
        **features_cplx[f],
        **cplx_model_stages[m]
    }

<br>

## Plan of experiments: mnist-like

KL-divergence term $\lambda$ settings -- directly affects sparsification.

In [None]:
kl_divs = np.unique(np.r_[
    1.0 * np.logspace(-7, -2, 6),
    3.3 * np.logspace(-7, -2, 6),
    6.6 * np.logspace(-7, -2, 6),
    1.0 * np.linspace(0.1, 1, 10)
])

len(kl_divs), kl_divs

In [None]:
base_grid = {
    # 5 full replications of the same experiment
    "n_copy": [*range(5)],

    # kl-div weights come from a moderately sized grid
    "stages__sparsify__objective__kl_div": kl_divs,
}

5*21 = 105 base experiments per setting

In [None]:
from itertools import product

grids = {}
for k, d in product(objective_kl_div, datasets):
    local = copy.deepcopy(options)
    grids[k, d] = set_params(
        local, **datasets[d], **objective_kl_div[k])

<br>

In [None]:
import tqdm
from sklearn.model_selection import ParameterGrid
from cplxpaper.auto.utils import get_class

base_folder = os.path.abspath("./grids")
assert os.path.exists(base_folder) and os.path.isdir(base_folder)


for key, settings in grids.items():
    folder = os.path.join(base_folder, "__".join(key))
    os.makedirs(folder)

    for kind, details in features_stages.items():
        for (fea, mdl), param in details.items():
            pargrid = ParameterGrid([{**base_grid, **param}])
            for i, par in enumerate(tqdm.tqdm(pargrid)):
                par, special = special_params(**par)
                assert not special

                local = set_params(copy.deepcopy(settings), **par, device=None)
                n_copy = local.pop("n_copy")

                # format the name
                manifest = f"{kind}__{mdl}__{fea}__{n_copy}__{i:04d}.json"
                filename = os.path.join(folder, manifest)
                json.dump(local, open(filename, "w"), indent=2)

In [None]:
assert False

<br>

In [None]:
import tqdm
from cplxpaper.auto.utils import get_class

for i, par in enumerate(tqdm.tqdm(grid)):
    par, special = special_params(**par)
    assert not special

    local = set_params(copy.deepcopy(options), **par, device=None)
    n_copy = local.pop("n_copy")

    # format the name
    dataset = get_class(local["datasets"]["test"]["cls"]).__name__
    dataset = dataset.rsplit("_", 1)[0]
    kind = "cplx" if local["features"].get("signal_ndim", 1) == 2 else "real"
    model = get_class(local["model"]["cls"]).__name__
    
    manifest = f"{dataset}-{kind}-{model}-{n_copy} {i:04d}.json"
    filename = os.path.join(folder, manifest)
    json.dump(local, open(filename, "w"))

<br>