# Manifest generator for CIFAR10 : real vs complex

In [None]:
import os
import copy
import numpy as np

from itertools import product

Load default config

In [None]:
import json
from pkg_resources import resource_stream

with resource_stream("cplxpaper.cifar.models.vgg", "template.json") as fin:
    options = json.load(fin)

define datasets

In [None]:
from cplxpaper.cifar import dataset

dataset_variety = {
    "cifar10": {
        "datasets__train__cls": [str(dataset.AugmentedCIFAR10_Train)],
        "datasets__train__disable": [False],  # disable random crop and hor-flip
        "datasets__test__cls": [str(dataset.AugmentedCIFAR10_Test)],
        "model__n_outputs": [10],
    }
}

Enumerate all possbile model `combinations`

In [None]:
from cplxpaper.cifar import models
from itertools import product, chain

model_combinations = {
    "real": [*chain(product([
            models.vgg.real.VGG
        ], [
            models.vgg.real.VGGARD, models.vgg.real.VGGVD
        ], [
            models.vgg.real.VGGMasked
        ])
    )],
    "complex": [*chain(product([
            models.vgg.complex.VGG
        ], [
            models.vgg.complex.VGGARD, models.vgg.complex.VGGVD
        ], [
            models.vgg.complex.VGGMasked
        ])
    )],
}

In [None]:
model_variety = {}

for name, combinations in model_combinations.items():
    for models in combinations:
        m_dense, m_sparsify, m_masked = map(str, models)
        model_variety.setdefault(name, []).append({
            "stages__dense__model__cls": [m_dense],
            "stages__sparsify__model__cls": [m_sparsify],
            "stages__fine-tune__model__cls": [m_masked],
        })

Update the template with correct data specification

In [None]:
options.update({
    "datasets": {
        "train": {
            "cls": None,
            "root": '/home/ivan.nazarov/Github/complex_paper/experiments/cifar/data',
        },
        "test": {
            "cls": None,
            "root": '/home/ivan.nazarov/Github/complex_paper/experiments/cifar/data'
        },
    },
    'features': {
        "cls": None
    },
    "feeds": {
        'train': {
            'cls': "<class 'torch.utils.data.dataloader.DataLoader'>",
            'dataset': 'train',
            'batch_size': 128,
            'shuffle': True,
            'pin_memory': False,
            'n_batches': -1
        },
        'test': {
            'cls': "<class 'torch.utils.data.dataloader.DataLoader'>",
            'dataset': 'test',
            'batch_size': 128,
            'shuffle': False,
            'pin_memory': False,
            'n_batches': -1
        }
    },
    "scorers": {},  # we shall score models when building a report
})

Prepare the main template:
* reset roots
* clear model definitions
* specify restarts and grad clips

In [None]:
from cplxpaper.auto.parameter_grid import get_params, set_params, special_params

options = set_params(options, **{
    "threshold": -0.5,  # we use -1/2 for reasons outlined in the text

    "objective_terms__kl_div__reduction": "sum",
    "objective_terms__kl_div__coef": 1e-4,  # 5 fold over 1 / 50k = 2e-5 for higher compression

    # specify state inheritance
    "stages__sparsify__restart": False,
    "stages__sparsify__reset": False,

    "stages__fine-tune__restart": True,
    "stages__fine-tune__reset": False,

    # L2 clip gradients: seems to be always better to do so.
    "stages__dense__grad_clip": 0.5,
    "stages__sparsify__grad_clip": 0.5,
    "stages__fine-tune__grad_clip": 0.5,

    # train 20-40-20
    "stages__dense__n_epochs": 20,  # ~40 min for C-half, should be faster for R
    "stages__sparsify__n_epochs": 40,  # ~100 min
    "stages__fine-tune__n_epochs": 20,  # ~40 min

    # 10 epoch of 1e-3, then 1e-4
    "stages__dense__lr_scheduler__cls": "<class 'cplxpaper.musicnet.lr_scheduler.Trabelsi2017LRSchedule'>",
    "stages__sparsify__lr_scheduler__cls": "<class 'cplxpaper.musicnet.lr_scheduler.Trabelsi2017LRSchedule'>",
    "stages__fine-tune__lr_scheduler__cls": "<class 'cplxpaper.musicnet.lr_scheduler.Trabelsi2017LRSchedule'>",

    # clean models
    "model": {
        "vgg_name": "VGG16"  # request VGG16 architecture
    },
    "stages__dense__model": {},
    "stages__sparsify__model": {},
    "stages__fine-tune__model": {},
})

<br>

KL-divergence coeffcient $C$ settings -- directly affects sparsification.

In [None]:
base_grid = {
#     "stages__sparsify__objective__kl_div": 1.5 * np.logspace(-13, -1, base=2, num=13),

    # short grid
    "stages__sparsify__objective__kl_div": 1.5 * np.logspace(-7.5, -3.5, base=2, num=9),

    # use only one threshold for higher sparsity at the cost of worse performance
    "threshold": [-0.5],  # [-0.5, 1.5]
}

The grid

In [None]:
grid, tag = [], "real-vs-cplx"

Use raw feaures and compare $\mathbb{R}$ against $\tfrac12 \mathbb{C}$

In [None]:
from cplxpaper.auto import feeds

features = {
    "features__cls": [str(feeds.FeedRawFeatures),],
    "model__n_channels": [3],
}
for data, data_options in dataset_variety.items():
    for model_stages in model_variety["real"]:
        grid.append({
            "____name__": [f"Full real-valued VGG16 on raw {data}"],
            **data_options,
            **model_stages,
            **features,
            "model__double": [False],
            **base_grid
        })

# SKIP
#     for model_stages in model_variety["complex"]:
#         grid.append({
#             "____name__": [f"Halved complex-valued VGG16 on raw {data}"],
#             **data_options,
#             **model_stages,
#             **features,
#             "model__half": [True],
#             "model__upcast": [True],
#             **base_grid
#         })

    for model_stages in model_variety["complex"]:
        grid.append({
            "____name__": [f"Full complex-valued VGG16 on raw {data}"],
            **data_options,
            **model_stages,
            **features,
            "model__half": [False],
            "model__upcast": [True],
            **base_grid
        })

Use Fourier feaures and compare $2 \mathbb{R}$ against $\mathbb{C}$

<br>

## Create the grid

Create a master folder to house all grid replications.

In [None]:
SUFFIX = "-short"

base_folder = os.path.normpath(os.path.abspath(f"./grids{SUFFIX}/"))
os.makedirs(base_folder, exist_ok=False)

assert os.path.exists(base_folder) and os.path.isdir(base_folder)

Repeat the experiment 5 times

In [None]:
n_replications = 5

Write experiment manifest en-masse: put each replication in a separate folder

In [None]:
import tqdm
from sklearn.model_selection import ParameterGrid

folders = []
for replication in range(n_replications):
    folder = os.path.join(base_folder, f"cifar10__{tag}__{replication:02d}")
    os.makedirs(folder, exist_ok=False)

    for exp_no, par in enumerate(tqdm.tqdm(ParameterGrid(grid))):
        par, special = special_params(**par)

        local = set_params(copy.deepcopy(options), **par, device=None)
        local.update(special)

        # format the name
        filename = os.path.join(folder, f"experiment__{exp_no:05d}.json")
        json.dump(local, open(filename, "w"), indent=2)

    folders.append(folder)

Create a bash script for this grid

In [None]:
import stat

devspec = """--devices "cuda:0" "cuda:1" "cuda:2" "cuda:3" --per-device 2"""
bash = os.path.join(base_folder, "cifar.sh")
with open(bash, "w") as fout:
    # experiment execution
    for folder in map(os.path.abspath, folders):
        fout.write(f"""python -m cplxpaper.auto {devspec} "{folder}"\n""")

    # report analysis
    paths = '" "'.join(map(os.path.abspath, folders))
    output = os.path.join(base_folder, "report__trade-off.pk")
    fout.write(f"""python -m cplxpaper.auto.reports {devspec} "trade-off" "{output}" "{paths}"\n""")

# allow exc and keep r/w
os.chmod(bash, stat.S_IXUSR | stat.S_IRUSR | stat.S_IWUSR)

bash

In [None]:
assert False

<br>