# Edge-Popup

Paper: [https://arxiv.org/abs/1911.13299](https://arxiv.org/abs/1911.13299)

Code adapted from: [train_imagenette.py](https://github.com/fastai/fastai2/blob/master/nbs/examples/train_imagenette.py)

In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rcParams['figure.dpi'] = 300

In [None]:
import pickle

import torch
import torch.nn as nn
import torch.nn.functional as F

from fastai2.basics import *
from fastai2.vision.all import *
from fastai2.callback.all import *
from fastai2.distributed import *
from fastprogress import fastprogress
from torchvision.models import *
from fastai2.vision.models.xresnet import *
from fastai2.callback.mixup import *
from fastscript import *

from lib.layers import Conv2dSubnet, LinearSubnet
from lib.utils import adapt_model

In [None]:
def get_dbunch(size, woof, bs, sh=0., workers=None):
    if size<=224: path = URLs.IMAGEWOOF_320 if woof else URLs.IMAGENETTE_320
    else        : path = URLs.IMAGEWOOF     if woof else URLs.IMAGENETTE
    source = untar_data(path)
    if workers is None: workers = min(8, num_cpus())
    dblock = DataBlock(blocks=(ImageBlock, CategoryBlock),
                       splitter=GrandparentSplitter(valid_name='val'),
                       get_items=get_image_files, get_y=parent_label)
    item_tfms=[RandomResizedCrop(size, min_scale=0.35), FlipItem(0.5)]
    batch_tfms=RandomErasing(p=0.3, max_count=3, sh=sh) if sh else None
    return dblock.databunch(source, path=source, bs=bs, num_workers=workers,
                            item_tfms=item_tfms, batch_tfms=batch_tfms)

In [None]:
def adapt_model_all(m, k=0.3):
    ex = {'k': k}
    adapt_model(m, orig=nn.Conv2d, new=Conv2dSubnet, extra_args=ex)
    adapt_model(m, orig=nn.Linear, new=LinearSubnet, extra_args=ex)

In [None]:
def main(
    gpu:   Param("GPU to run on", int)=None,
    woof:  Param("Use imagewoof (otherwise imagenette)", int)=0,
    lr:    Param("Learning rate", float)=1e-2,
    size:  Param("Size (px: 128,192,256)", int)=128,
    sqrmom:Param("sqr_mom", float)=0.99,
    mom:   Param("Momentum", float)=0.9,
    eps:   Param("epsilon", float)=1e-6,
    epochs:Param("Number of epochs", int)=20,
    bs:    Param("Batch size", int)=64,
    mixup: Param("Mixup", float)=0.,
    opt:   Param("Optimizer (adam,rms,sgd,ranger)", str)='ranger',
    arch:  Param("Architecture", str)='xresnet50',
    sh:    Param("Random erase max proportion", float)=0.,
    sa:    Param("Self-attention", int)=0,
    sym:   Param("Symmetry for self-attention", int)=0,
    beta:  Param("SAdam softplus beta", float)=0.,
    act_fn:Param("Activation function", str)='MishJit',
    fp16:  Param("Use mixed precision training", int)=0,
    pool:  Param("Pooling method", str)='AvgPool',
    dump:  Param("Print model; don't train", int)=0,
    runs:  Param("Number of times to repeat training", int)=1,
    sub:   Param("Use edge-popup subnets", bool)=True,
    subk:  Param("The k parameter for edge-popup", float)=0.3,
    meta:  Param("Metadata (ignored)", str)=''
):
    "Training of Imagenette."

    #gpu = setup_distrib(gpu)
    if gpu is not None: torch.cuda.set_device(gpu)
    if   opt=='adam'  : opt_func = partial(Adam, mom=mom, sqr_mom=sqrmom, eps=eps)
    elif opt=='rms'   : opt_func = partial(RMSprop, sqr_mom=sqrmom)
    elif opt=='sgd'   : opt_func = partial(SGD, mom=mom)
    elif opt=='ranger': opt_func = partial(ranger, mom=mom, sqr_mom=sqrmom, eps=eps, beta=beta)

    dbunch = get_dbunch(size, woof, bs, sh=sh)
    if not gpu: print(f'epochs: {epochs}; lr: {lr}; size: {size}; sqrmom: {sqrmom}; mom: {mom}; eps: {eps}')

    mk_m,act_fn,pool = [globals()[o] for o in (arch,act_fn,pool)]

    results = L()
    for run in range(runs):
        print(f'Run: {run}')
        
        # create and adapt model
        m = mk_m(c_out=10, act_cls=act_fn, sa=sa, sym=sym, pool=pool)
        if sub:
            adapt_model_all(m, k=subk)
        
        learn = Learner(dbunch, m, opt_func=opt_func, \
                metrics=[accuracy,top_k_accuracy], loss_func=LabelSmoothingCrossEntropy())
        if dump: print(learn.model); exit()
        if fp16: learn = learn.to_fp16()
        cbs = MixUp(mixup) if mixup else []
        #n_gpu = torch.cuda.device_count()
        #if gpu is None and n_gpu: learn.to_parallel()
        if num_distrib()>1: learn.to_distributed(gpu) # Requires `-m fastai.launch`
        learn.fit_flat_cos(epochs, lr, wd=1e-2, cbs=cbs)
        results.append(learn.recorder.values)
    return results

In [None]:
ks = np.arange(0.05, 1, 0.05)

In [None]:
d = {}
for k in ks:
    res = main(sub=True, woof=1, subk=k, epochs=10, arch="xresnet50", runs=5)
    d[k] = res

In [None]:
res = main(sub=False, woof=1, epochs=10, arch="xresnet50", runs=3)

In [None]:
# mean & std over runs
accs = []
accs = res.map(lambda that_run: that_run[-1][2])
one = (np.array(accs).mean() * 100, np.array(accs).std() * 100)

In [None]:
d = pickle.load(open('./save/ks5r10ep.pkl', 'rb'))

In [None]:
i_acc = 2
i_last = -1 
out = {} 

# all ks
for k in ks:
    
    that_k = d[k]
    
    # mean & std over runs
    accs = []
    accs = that_k.map(lambda that_run: that_run[i_last][i_acc])
    out[k] = (np.array(accs).mean(), np.array(accs).std())
    
p_ks = (np.array(L(out.keys())) * 100).astype(np.int)
p_means = np.array(L(out.values()).map(lambda el: el[0])) * 100
p_stds = np.array(L(out.values()).map(lambda el: el[1])) * 100
p_lo = p_means - p_stds
p_hi = p_means + p_stds
plt.xlabel("% of Weights")
plt.ylabel("Accuracy")
plt.xlim(5, 95)
plt.title("Mean over 5 runs à 10 epochs")

# trials
plt.plot(p_ks, p_means[::-1], label="Weights ~ signed kaiming constant")
plt.fill_between(p_ks, p_lo[::-1], p_hi[::-1], alpha=0.2)

# one
plt.plot(p_ks, [one[0]]*len(p_ks), label="Learned Dense Weights (Adam)")
lo, hi = [one[0]-one[1]]*len(p_ks), [one[0]+one[1]]*len(p_ks)
plt.fill_between(p_ks, lo, hi, alpha=0.2)

plt.legend()
plt.savefig("img/experiments/imagewoof_xresnet50_ks_10ep_5r.png", dpi=200)