In [1]:
from fastai2.basics import *
from fastai2.vision.all import *
from fastai2.callback.all import *
from fastai2.distributed import *
from fastprogress import fastprogress
from torchvision.models import *
from fastai2.vision.models.xresnet import *
from fastai2.callback.mixup import *
from fastscript import *

torch.backends.cudnn.benchmark = True
fastprogress.MAX_COLS = 80

def get_dbunch(size, woof, bs, sh=0., workers=None):
    if size<=224: path = URLs.IMAGEWOOF_320 if woof else URLs.IMAGENETTE_320
    else        : path = URLs.IMAGEWOOF     if woof else URLs.IMAGENETTE
    source = untar_data(path)
    if workers is None: workers = min(8, num_cpus())
    dblock = DataBlock(blocks=(ImageBlock, CategoryBlock),
                       splitter=GrandparentSplitter(valid_name='val'),
                       get_items=get_image_files, get_y=parent_label)
    item_tfms=[RandomResizedCrop(size, min_scale=0.35), FlipItem(0.5)]
    batch_tfms=RandomErasing(p=0.9, max_count=3, sh=sh) if sh else None
    return dblock.databunch(source, path=source, bs=bs, num_workers=workers,
                            item_tfms=item_tfms, batch_tfms=batch_tfms)

In [2]:
# Adopted from 
# https://discuss.pytorch.org/t/how-can-i-replace-an-intermediate-layer-in-a-pre-trained-network/3586/7
import kornia
def convert_MP_to_blurMP(model, layer_type_old):
    conversion_count = 0
    for name, module in reversed(model._modules.items()):
        if len(list(module.children())) > 0:
            # recurse
            model._modules[name] = convert_MP_to_blurMP(module, layer_type_old)

        if type(module) == layer_type_old:
            layer_old = module
            layer_new = kornia.contrib.MaxBlurPool2d(3, True)
            model._modules[name] = layer_new

    return model

In [3]:
@call_parse
def main(
        gpu:   Param("GPU to run on", int)=None,
        woof:  Param("Use imagewoof (otherwise imagenette)", int)=0,
        lr:    Param("Learning rate", float)=1e-2,
        size:  Param("Size (px: 128,192,256)", int)=128,
        sqrmom:Param("sqr_mom", float)=0.99,
        mom:   Param("Momentum", float)=0.9,
        eps:   Param("epsilon", float)=1e-6,
        epochs:Param("Number of epochs", int)=5,
        bs:    Param("Batch size", int)=64,
        mixup: Param("Mixup", float)=0.,
        opt:   Param("Optimizer (adam,rms,sgd,ranger)", str)='ranger',
        arch:  Param("Architecture", str)='xresnet50',
        sh:    Param("Random erase max proportion", float)=0.,
        sa:    Param("Self-attention", int)=0,
        sym:   Param("Symmetry for self-attention", int)=0,
        beta:  Param("SAdam softplus beta", float)=0.,
        act_fn:Param("Activation function", str)='MishJit',
        fp16:  Param("Use mixed precision training", int)=0,
        pool:  Param("Pooling method", str)='AvgPool',
        dump:  Param("Print model; don't train", int)=0,
        runs:  Param("Number of times to repeat training", int)=1,
        meta:  Param("Metadata (ignored)", str)='',
        blurpool: Param("Convert MaxPool to MaxPoolBlur", bool)=False,
        ):
    "Distributed training of Imagenette."

    #gpu = setup_distrib(gpu)
    if gpu is not None: torch.cuda.set_device(gpu)
    if   opt=='adam'  : opt_func = partial(Adam, mom=mom, sqr_mom=sqrmom, eps=eps)
    elif opt=='rms'   : opt_func = partial(RMSprop, sqr_mom=sqrmom)
    elif opt=='sgd'   : opt_func = partial(SGD, mom=mom)
    elif opt=='ranger': opt_func = partial(ranger, mom=mom, sqr_mom=sqrmom, eps=eps, beta=beta)

    dbunch = get_dbunch(size, woof, bs, sh=sh)
    if not gpu: print(f'lr: {lr}; size: {size}; sqrmom: {sqrmom}; mom: {mom}; eps: {eps}')

    m,act_fn,pool = [globals()[o] for o in (arch,act_fn,pool)]

    for run in range(runs):
        print(f'Run: {run}')
        model =  m(c_out=10, act_cls=act_fn, sa=sa, sym=sym, pool=pool)
        if blurpool:
            model = convert_MP_to_blurMP(model, nn.MaxPool2d)
            #print (model)
        learn = Learner(dbunch, model, opt_func=opt_func, \
                metrics=[accuracy,top_k_accuracy], loss_func=LabelSmoothingCrossEntropy())
        
        if dump: return learn
        #print(learn.model); exit()
        if fp16: learn = learn.to_fp16()
        cbs = MixUp(mixup) if mixup else []
        #n_gpu = torch.cuda.device_count()
        #if gpu is None and n_gpu: learn.to_parallel()
        if num_distrib()>1: learn.to_distributed(gpu) # Requires `-m fastai.launch`
        learn.fit_flat_cos(epochs, lr, wd=1e-2, cbs=cbs)

In [5]:
# We take training parameters as in https://github.com/fastai/imagenette/blob/master/2020-01-train.md
#--lr 8e-3 --sqrmom 0.99 --mom 0.95 --eps 1e-6 --bs 64 --opt ranger --sa 1
#--fp16 1 --arch xse_resnext50 --pool MaxPool

#But we run 5 times
args = ( 0,1, 8e-3, 128, 0.99, 0.95, 1e-6, 5, 64, 0,
     'ranger','xse_resnext50',
     0, 1,0,0,'MishJit',1, 'MaxPool', 0, 5, '')

In [6]:
#Default training, MaxPool
main( *args, False)

lr: 0.008; size: 128; sqrmom: 0.99; mom: 0.95; eps: 1e-06
Run: 0


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.994467,2.328049,0.282006,0.77068,00:44
1,1.731662,1.863557,0.391448,0.860015,00:40
2,1.584344,1.840207,0.392721,0.870705,00:40
3,1.447863,1.465186,0.586918,0.937134,00:40
4,1.266306,1.189855,0.714686,0.965131,00:40


Run: 1


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.981347,2.145555,0.321456,0.789514,00:40
1,1.72869,2.049455,0.374905,0.854416,00:40
2,1.552165,1.724983,0.473912,0.892848,00:40
3,1.432839,1.469613,0.586918,0.931026,00:40
4,1.248347,1.195553,0.712904,0.964113,00:41


Run: 2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,2.004928,2.069448,0.328073,0.839145,00:40
1,1.783823,2.040909,0.36956,0.871723,00:40
2,1.577019,2.090141,0.369305,0.873505,00:41
3,1.451208,1.494441,0.550776,0.943497,00:41
4,1.241533,1.185659,0.719012,0.966149,00:41


Run: 3


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.992879,2.545146,0.294222,0.784678,00:40
1,1.734758,3.561966,0.235683,0.695342,00:41
2,1.57407,2.056845,0.385594,0.899975,00:41
3,1.437058,1.458323,0.598626,0.939934,00:41
4,1.229054,1.168625,0.729193,0.96844,00:41


Run: 4


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.991899,1.918463,0.342581,0.84169,00:40
1,1.758539,1.891298,0.398829,0.837109,00:41
2,1.60471,1.625394,0.494528,0.908628,00:41
3,1.47732,1.394247,0.60957,0.950115,00:41
4,1.274549,1.211914,0.701451,0.96055,00:41


In [11]:
results = [0.714686, 0.712904,0.719012, 0.729193, 0.701451 ]
print (np.mean(results), np.median(results))

0.7154491999999999 0.714686


In [7]:
#MaxPoolBlur training 
main( *args, True)

lr: 0.008; size: 128; sqrmom: 0.99; mom: 0.95; eps: 1e-06
Run: 0


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,2.003904,2.223005,0.299822,0.81013,01:01
1,1.764573,2.072203,0.338254,0.850853,00:59
2,1.574507,1.853626,0.443879,0.873759,00:59
3,1.417001,1.321541,0.644184,0.955968,00:59
4,1.216009,1.15548,0.731738,0.964877,00:59


Run: 1


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,2.02447,1.968835,0.36676,0.850344,00:59
1,1.75863,1.911492,0.400865,0.862815,00:59
2,1.578867,2.05731,0.357343,0.815984,00:59
3,1.432039,1.354007,0.639857,0.947569,00:59
4,1.233604,1.156523,0.729193,0.965386,00:59


Run: 2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,2.005394,2.010665,0.344362,0.836091,00:59
1,1.738645,1.870527,0.42199,0.877832,00:59
2,1.55427,1.482105,0.569102,0.937389,01:00
3,1.42462,1.54249,0.543141,0.924663,00:59
4,1.199454,1.148754,0.730466,0.968694,00:59


Run: 3


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,2.005426,2.111513,0.340036,0.832527,00:59
1,1.735521,1.896309,0.393484,0.851871,00:59
2,1.579385,1.692503,0.455841,0.913973,00:59
3,1.405025,1.343927,0.638585,0.943497,00:59
4,1.192734,1.138111,0.740392,0.966404,00:59


Run: 4


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,2.008575,2.133754,0.254518,0.756427,00:59
1,1.720407,1.742496,0.439298,0.898447,00:59
2,1.543136,1.828868,0.433189,0.840672,00:59
3,1.400129,1.577766,0.576228,0.924408,01:00
4,1.187301,1.140684,0.737083,0.967167,00:59


In [10]:
results_mbp = [0.731738, 0.729193,0.730466, 0.740392, 0.737083]
print (np.mean(results_mbp), np.median(results_mbp))

0.7337743999999999 0.731738


In [12]:
#Now ImageNette
args = ( 0,0, 8e-3, 128, 0.99, 0.95, 1e-6, 5, 64, 0,
     'ranger','xse_resnext50',
     0, 1,0,0,'MishJit',1, 'MaxPool', 0, 5, '')

In [13]:
#Default training, MaxPool
main( *args, False)

lr: 0.008; size: 128; sqrmom: 0.99; mom: 0.95; eps: 1e-06
Run: 0


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.55337,1.467474,0.610191,0.929427,00:43
1,1.302615,1.273466,0.684841,0.953376,00:42
2,1.154172,1.793088,0.506242,0.873121,00:42
3,1.085731,1.125555,0.760764,0.97172,00:42
4,0.938625,0.905465,0.852739,0.986497,00:42


Run: 1


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.560764,1.48307,0.586242,0.941656,00:42
1,1.29404,1.302575,0.671592,0.955159,00:42
2,1.152244,1.770406,0.552357,0.907261,00:42
3,1.076044,1.039984,0.787771,0.974268,00:42
4,0.939874,0.910965,0.848917,0.983949,00:43


Run: 2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.558743,1.762441,0.505987,0.885605,00:42
1,1.298764,1.516917,0.566624,0.940127,00:43
2,1.171672,1.471202,0.620127,0.952611,00:43
3,1.07931,1.089393,0.777325,0.967898,00:43
4,0.943211,0.913316,0.848917,0.981401,00:43


Run: 3


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.556514,1.83305,0.524076,0.917452,00:42
1,1.294437,1.419953,0.637197,0.94242,00:42
2,1.156018,1.481858,0.623694,0.949554,00:43
3,1.071758,1.115519,0.76051,0.974522,00:42
4,0.934624,0.911172,0.841529,0.98293,00:42


Run: 4


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.533444,1.45893,0.620127,0.944204,00:42
1,1.287585,1.248199,0.701656,0.958981,00:42
2,1.145923,1.633679,0.55949,0.944713,00:42
3,1.072981,1.021157,0.794395,0.979873,00:42
4,0.929677,0.905917,0.842548,0.985478,00:42


In [15]:
results_nette = [0.852739, 0.848917, 0.848917, 0.841529, 0.842548  ]
print (np.mean(results_nette), np.median(results_nette))

0.8469300000000001 0.848917


In [16]:
#With MaxBlurPool
main( *args, True)

lr: 0.008; size: 128; sqrmom: 0.99; mom: 0.95; eps: 1e-06
Run: 0


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.510369,1.334672,0.654013,0.951338,01:01
1,1.280501,1.323934,0.684076,0.942166,01:01
2,1.140033,1.654492,0.540382,0.903185,01:01
3,1.076823,1.069148,0.772994,0.980382,01:01
4,0.914368,0.887269,0.846879,0.988025,01:01


Run: 1


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.525621,1.484924,0.611975,0.927134,01:01
1,1.253216,1.373257,0.665733,0.943694,01:01
2,1.155353,1.370966,0.656815,0.917707,01:01
3,1.069552,1.025684,0.789809,0.974777,01:02
4,0.907935,0.88914,0.849682,0.985732,01:01


Run: 2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.568489,1.83959,0.488408,0.900892,01:01
1,1.294772,1.221399,0.713885,0.961019,01:01
2,1.134202,1.433192,0.610191,0.933248,01:01
3,1.049995,1.031356,0.784968,0.979618,01:01
4,0.909276,0.870828,0.857834,0.985478,01:01


Run: 3


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.505874,1.651442,0.563057,0.938854,01:01
1,1.279572,1.346696,0.658599,0.953631,01:01
2,1.15077,1.077656,0.766369,0.974013,01:01
3,1.058478,1.081523,0.766115,0.977834,01:01
4,0.910908,0.890739,0.847898,0.984968,01:02


Run: 4


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.514718,1.462456,0.60051,0.946497,01:01
1,1.269807,1.702702,0.560255,0.934522,01:01
2,1.125594,1.196752,0.725605,0.969427,01:02
3,1.045998,1.039236,0.781656,0.975796,01:01
4,0.898163,0.886169,0.852994,0.987261,01:01


In [17]:
results_nette_mbp = [0.846879, 0.849682, 0.857834, 0.847898,0.852994 ]
print (np.mean(results_nette_mbp), np.median(results_nette_mbp))

0.8510574 0.849682
