In [None]:
%load_ext autoreload
%autoreload 2

import os
from datetime import datetime

from fastai.script import *
from fastai.vision import *
from fastai.callbacks import *
from fastai.distributed import *
from fastprogress import fastprogress
from torchvision.models import *
from fastai.vision.models import *
from pathlib import Path

from mmdet.models.backbones import *
from mmdet.models.backbones.base_backbone import ClassifierPretrainWrapper


torch.backends.cudnn.benchmark = True
fastprogress.MAX_COLS = 80

In [None]:
def get_data(size, woof, bs, workers=None):
    if   size <= 128:
        path = URLs.IMAGEWOOF_160 if woof else URLs.IMAGENETTE_160
    elif size <= 224: 
        path = URLs.IMAGEWOOF_320 if woof else URLs.IMAGENETTE_320
    else:
        path = URLs.IMAGEWOOF if woof else URLs.IMAGENETTE
    path = untar_data(path)

    n_gpus = num_distrib() or 1
    if workers is None:
        workers = min(8, num_cpus() // n_gpus)

    return (ImageList
            .from_folder(path)
            .split_by_folder(valid='val')
            .label_from_folder()
            .transform(([flip_lr(p=0.5), cutout(n_holes=(1, 4), length=(10, 160), p=0.5)], []), size=size)
            .databunch(bs=bs, num_workers=workers)
            .presize(size, scale=(0.35, 1)))

In [None]:
num_classes: Param("Class count", int) = 10
arch_name: Param("Backbone name, used for dumping", str) = 'scarlet_c'

# Model description
backbone = ScarletC(out_indices=None)
model = ClassifierPretrainWrapper(backbone, input_channels=1280, num_classes=num_classes)

In [None]:
gpu: Param("GPU to run on", str) = None
woof: Param("Use imagewoof (otherwise imagenette)", int) = True
size: Param("Size (px: 128, 192, 224)", int) = 320
mom: Param("Momentum", float) = 0.9
eps: Param("epsilon", float) = 1e-6
epochs: Param("Number of epochs", int) = 20
bs: Param("Batch size", int) = 64
mixup: Param("Mixup", float) = 0.
opt: Param("Optimizer (adam, rms, sgd)", str) = 'adam'
dump: Param("Path to pretrained backbones", Path) = Path('/mnt/nfs/Other/pytorch_pretrained_backbones/')

gpu = setup_distrib(gpu)
if gpu is None: 
    bs *= torch.cuda.device_count()

opt_func = partial(optim.SGD, momentum=mom)

data = get_data(size, woof, bs)
bs_rat = bs / 256
if gpu is not None:
    bs_rat *= num_distrib()

learn = Learner(data, model, wd=1e-2, opt_func=opt_func,
                metrics=[accuracy, top_k_accuracy],
                bn_wd=False, true_wd=True,
                loss_func=LabelSmoothingCrossEntropy())
if mixup: 
    learn = learn.mixup(alpha=mixup)
learn = learn.to_fp16(dynamic=True)
if gpu is None:      
    learn.to_parallel()
elif num_distrib() > 1: 
    learn.to_distributed(gpu) # Requires `-m fastai.launch`

In [None]:
# learn.lr_find(num_it=200, end_lr=10.)
# learn.recorder.plot(skip_start=2, suggestion=True)

In [None]:
lr: Param("Learning rate", float) = 1e-3
if not gpu:
    print(f'lr: {lr}; eff_lr: {lr * bs_rat}; size: {size}; mom: {mom}; eps: {eps}')
lr *= bs_rat
    
    
learn.fit_one_cycle(epochs, lr, div_factor=10, pct_start=0.3)
if not (dump / arch_name).exists():
    os.makedirs(str(dump / arch_name))
learn.model.save_backbone(str(dump / arch_name / f"{arch_name}__{datetime.now().strftime('%d_%m_%y__%H_%M_%S')}.pth"))