## Image网 Submission `128x128`

This contains a submission for the Image网 leaderboard in the `128x128` category.

In this notebook we:
1. Train on 1 pretext task: 
  - Train a network to do image inpatining on Image网's `/train`, `/unsup` and `/val` images. 
2. Train on 4 downstream tasks:
  - We load the pretext weights and train for `5` epochs.
  - We load the pretext weights and train for `20` epochs.
  - We load the pretext weights and train for `80` epochs.
  - We load the pretext weights and train for `200` epochs.
  
Our leaderboard submissions are the accuracies we get on each of the downstream tasks.

In [1]:
import json
import torch

import numpy as np

from functools import partial
from fastai2.basics import *
from fastai2.vision.all import *

In [3]:
torch.cuda.set_device(2)

## Pretext Task: Contrastive Learning

In [4]:
# Chosen parameters
lr=2e-2
sqrmom=0.99
mom=0.95
beta=0.
eps=1e-6
bs=64 
sa=1

m = xresnet34
act_fn = Mish
pool = MaxPool

nc=20

In [5]:
source = untar_data(URLs.IMAGEWANG_160)
len(get_image_files(source/'unsup')), len(get_image_files(source/'train')), len(get_image_files(source/'val'))

(7750, 14669, 3929)

In [6]:
def get_dbunch(size, bs, workers=8):
    path = URLs.IMAGEWANG_160 if size <= 160 else URLs.IMAGEWANG
    source = untar_data(path)
    
    files = get_image_files(source, folders=['unsup', 'val'])
    
    tfms = [[PILImage.create, ToTensor, Resize(size)], 
            [lambda x: x.parent.name, Categorize()]]
    
    dsets = Datasets(files, tfms=tfms, splits=RandomSplitter(valid_pct=0.1)(files))
    
    batch_tfms = [IntToFloatTensor]
    dls = dsets.dataloaders(bs=bs, num_workers=workers, after_batch=batch_tfms)
    dls.path = source
    return dls

In [159]:
def get_dbunch(size, bs, workers=8):
    path = URLs.IMAGEWANG_160 if size <= 160 else URLs.IMAGEWANG
    source = untar_data(path)
    
    files = get_image_files(source, folders=['unsup', 'val'])
    
    tfms = [[PILImage.create, ToTensor, Resize(size)], 
            [lambda x: x.parent.name, Categorize()]]
    
    dsets = Datasets(files, tfms=tfms, splits=RandomSplitter(valid_pct=0.1)(files))
    

    batch_tfms = [IntToFloatTensor, Normalize.from_stats(*imagenet_stats), 
                  *aug_transforms(size=size, min_scale=0.2, max_lighting=0.3, mult=2, batch=False)]
    dls = dsets.dataloaders(bs=bs, num_workers=workers, after_batch=batch_tfms)
    dls.path = source
    return dls

In [160]:
# Use the Ranger optimizer
opt_func = partial(ranger, mom=mom, sqr_mom=sqrmom, eps=eps, beta=beta)

In [161]:
size = 128
bs = 256

dbunch = get_dbunch(160, bs)
# dbunch.c = nc
dbunch.c = 128

In [116]:
len(dbunch.train.dataset)

10512

In [91]:
#export
from pytorch_metric_learning import losses
class XentLoss(losses.NTXentLoss):
    def forward(self, output1, output2):
        stacked = torch.cat((output1, output2), dim=0)
        labels = torch.arange(output1.shape[0]).repeat(2)
        return super().forward(stacked, labels, None)
    
class ContrastCallback(Callback):
    run_before=Recorder
    def __init__(self, size=256, aug_targ=None, aug_pos=None, temperature=0.1):
        self.aug_targ = ifnone(aug_targ, get_aug_pipe(size, min_scale=0.7))
        self.aug_pos = ifnone(aug_pos, get_aug_pipe(size, min_scale=0.4))
        self.temperature = temperature
        
    def update_size(self, size):
        pipe_update_size(self.aug_targ, size)
        pipe_update_size(self.aug_pos, size)
        
    def begin_fit(self): 
        self.old_lf = self.learn.loss_func
        self.old_met = self.learn.metrics
        self.learn.metrics = []
        self.learn.loss_func = losses.NTXentLoss(self.temperature)
        
    def after_fit(self):
        self.learn.loss_fun = self.old_lf
        self.learn.metrics = self.old_met
        
    def begin_batch(self):
        xb, = self.learn.xb
        xb_targ = self.aug_targ(xb)
        xb_pos = self.aug_pos(xb)
        self.learn.xb = torch.cat((xb_targ, xb_pos), dim=0),
        self.learn.yb = torch.arange(xb_targ.shape[0]).repeat(2),
        

In [92]:
#export
def pipe_update_size(pipe, size):
    for tf in pipe.fs:
        if isinstance(tf, RandomResizedCropGPU):
            tf.size = size

In [93]:
#export
def get_aug_pipe(size, min_scale=0.4, stats=imagenet_stats, erase=True, **kwargs):
    tfms = [Normalize.from_stats(*stats), *aug_transforms(size=size, min_scale=min_scale, **kwargs)]
    if erase: tfms.append(RandomErasing(p=0.5, max_count=1, sh=0.2))
    return Pipeline(tfms)

In [94]:
m_part = partial(m, c_out=nc, act_cls=torch.nn.ReLU, sa=sa, pool=pool)

In [95]:
save_name = 'imagewang_contrast_simple_stem'

In [96]:
from fastai2.vision.models.xresnet import _xresnet

In [97]:
m_part2 = partial(_xresnet, expansion=1, layers=[1,1], c_out=nc, act_cls=torch.nn.ReLU, sa=sa, pool=pool)

In [104]:
aug = get_aug_pipe(size, min_scale=0.3, mult=1, max_lighting=0.4, stats=imagenet_stats)
aug2 = get_aug_pipe(size, min_scale=0.25, mult=2, stats=imagenet_stats)
cbs = ContrastCallback(size=size, aug_targ=aug, aug_pos=aug2, temperature=0.25)
dbunch.c = 64
learn = cnn_learner(dbunch, m_part2, opt_func=opt_func,
                    metrics=[], loss_func=CrossEntropyLossFlat(), cbs=cbs, pretrained=False,
                    config={'ps':0.0, 'concat_pool':False}
                   )

In [105]:
learn.unfreeze()
learn.fit_flat_cos(15, 2e-2, wd=1e-2)

epoch,train_loss,valid_loss,time
0,3.387634,3.659662,00:14
1,3.232777,3.621425,00:14
2,3.103615,3.391924,00:14
3,3.071557,3.283654,00:14
4,3.012472,3.113512,00:14
5,2.988152,3.305422,00:14
6,2.974991,3.645974,00:14
7,2.953123,3.383167,00:14
8,2.919331,3.267648,00:14
9,2.90802,3.22866,00:14


In [106]:
torch.save(learn.model[0].state_dict(), f'{save_name}.pth')

In [107]:
# learn.save(save_name)

## Downstream Task: Image Classification

In [108]:
def get_dbunch(size, bs, workers=8, dogs_only=True):
    path = URLs.IMAGEWANG_160 if size <= 160 else URLs.IMAGEWANG
    source = untar_data(path)
    
    if dogs_only:
        dog_categories = [f.name for f in (source/'val').ls()]
        dog_train = get_image_files(source/'train', folders=dog_categories)
        valid = get_image_files(source/'val')
        files = dog_train + valid
        splits = [range(len(dog_train)), range(len(dog_train), len(dog_train)+len(valid))]
    else:
        files = get_image_files(source)
        splits = GrandparentSplitter(valid_name='val')(files)
        
    
    item_aug = [RandomResizedCrop(size, min_scale=0.35), FlipItem(0.5)]
    tfms = [[PILImage.create, ToTensor, *item_aug], 
            [lambda x: x.parent.name, Categorize()]]
    
    dsets = Datasets(files, tfms=tfms, splits=splits)
    
    batch_tfms = [IntToFloatTensor, Normalize.from_stats(*imagenet_stats)]
    dls = dsets.dataloaders(bs=bs, num_workers=workers, after_batch=batch_tfms)
    dls.path = source
    return dls

In [109]:
def do_train(size=128, bs=64, epochs=5, runs=5, dogs_only=False, save_name=None):
    dbunch = get_dbunch(size, bs, dogs_only=dogs_only)
    for run in range(runs):
        print(f'Run: {run}')
        ch = nn.Sequential(nn.AdaptiveAvgPool2d(1), Flatten(), nn.Linear(512, 20))
        learn = cnn_learner(dbunch, m_part, opt_func=opt_func,
                metrics=[accuracy,top_k_accuracy], loss_func=CrossEntropyLossFlat(),
                pretrained=False,
                config={'custom_head':ch})

        if save_name is not None:
            state_dict = torch.load(f'{save_name}.pth')
            learn.model[0].load_state_dict(state_dict, strict=False)

        learn.unfreeze()
        learn.fit_flat_cos(epochs, 1e-2, wd=1e-2)

### 5 Epochs

In [110]:
epochs = 5
runs = 2

In [111]:
do_train(epochs=epochs, runs=runs, dogs_only=False, save_name=save_name)

Run: 0


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.503062,3.1737,0.068974,0.483838,00:20
1,1.175924,3.336261,0.136931,0.537032,00:23
2,1.033601,3.190902,0.148384,0.57521,00:20
3,0.894012,3.337077,0.153729,0.679053,00:19
4,0.675724,2.204824,0.329346,0.807585,00:20


Run: 1


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,2.086357,00:05,,,


KeyboardInterrupt: 

In [50]:
do_train(epochs=epochs, runs=runs, dogs_only=False, save_name=save_name)

Run: 0


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.491335,3.657266,0.043013,0.455587,00:20
1,1.196516,2.647952,0.168745,0.669381,00:20
2,1.062373,2.66408,0.214304,0.660982,00:20
3,0.904396,2.874358,0.180199,0.694833,00:20
4,0.707809,2.275755,0.31662,0.779333,00:20


Run: 1


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.507454,3.399617,0.07941,0.434207,00:20
1,1.163295,3.723319,0.049122,0.510562,00:20
2,1.034284,3.253925,0.12904,0.527615,00:20
3,0.920781,3.292387,0.1662,0.57521,00:20
4,0.695897,2.256835,0.309748,0.796131,00:20


## Dogs only

In [51]:
do_train(epochs=epochs, runs=runs, dogs_only=True, save_name=save_name)

Run: 0


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,2.46802,2.201355,0.231357,0.721303,00:04
1,2.167516,2.44277,0.238738,0.75999,00:04
2,2.007626,3.546451,0.167727,0.695597,00:04
3,1.909244,2.580137,0.262408,0.774752,00:04
4,1.772702,1.849108,0.354034,0.847544,00:04


Run: 1


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,,00:00,,,


KeyboardInterrupt: 

## Random weights - ACC = 0.337999	

In [112]:
do_train(epochs=epochs, runs=1, dogs_only=False, save_name=None)

Run: 0


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.406883,3.782342,0.032578,0.475439,00:19
1,1.092562,3.793924,0.056757,0.392975,00:19
2,0.942774,3.296282,0.167473,0.600916,00:19
3,0.861684,2.880444,0.195724,0.627641,00:20
4,0.639393,2.099574,0.362688,0.813184,00:19


### 20 Epochs

In [47]:
epochs = 20
runs = 3

In [None]:
do_train(epochs=epochs, runs=runs, dogs_only=False, save_name=save_name)

Run: 0


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.257433,3.583709,0.055994,0.364469,00:20
1,1.004941,2.540476,0.226266,0.740137,00:20
2,0.918966,2.997299,0.198015,0.653856,00:20
3,0.836182,2.604789,0.273097,0.694833,00:20
4,0.748276,2.963373,0.20336,0.616951,00:20
5,0.69971,2.427061,0.329091,0.763808,00:20
6,0.6559,2.179317,0.365996,0.777806,00:20
7,0.591791,2.329697,0.374905,0.804021,00:20
8,0.565931,2.494817,0.358106,0.809875,00:21
9,0.509965,3.094292,0.2479,0.684907,00:21


Run: 1


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.269204,3.301069,0.13184,0.571901,00:20
1,1.017692,3.388632,0.125732,0.501909,00:19
2,0.912226,2.442005,0.233138,0.70705,00:20
3,0.789521,2.61266,0.275134,0.776024,00:20
4,0.749869,2.76661,0.218631,0.6648,00:21
5,0.693362,2.47666,0.308221,0.733774,00:21
6,0.627887,1.994382,0.402392,0.825146,00:20
7,0.610023,2.50879,0.275897,0.775515,00:20
8,0.557153,2.561898,0.327055,0.795368,00:19
9,0.534639,2.271221,0.400611,0.792059,00:20


Run: 2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.24068,3.091946,0.109952,0.579791,00:19
1,1.009205,3.817732,0.069483,0.404938,00:19
2,0.899343,2.559685,0.273097,0.717485,00:19
3,0.810252,2.767231,0.257317,0.658946,00:20
4,0.736861,3.026284,0.189361,0.641894,00:19


In [None]:
do_train(epochs=epochs, runs=runs, dogs_only=True, save_name=save_name)

## 80 epochs

In [None]:
epochs = 80
runs = 1

In [None]:
for run in range(runs):
    print(f'Run: {run}')
    ch = nn.Sequential(nn.AdaptiveAvgPool2d(1), Flatten(), nn.Linear(512, 20))
    learn = cnn_learner(dbunch, m_part, opt_func=opt_func,
            metrics=[accuracy,top_k_accuracy], loss_func=CrossEntropyLossFlat(),
            pretrained=False,
            config={'custom_head':ch})
    learn.unfreeze()
    learn.fit_flat_cos(epochs, 2e-2, wd=1e-3)

Accuracy: **62.18%**

### 200 epochs

In [None]:
epochs = 200
runs = 1

In [None]:
for run in range(runs):
        print(f'Run: {run}')
        ch = nn.Sequential(nn.AdaptiveAvgPool2d(1), Flatten(), nn.Linear(512, 20))
        learn = cnn_learner(dbunch, m_part, opt_func=opt_func,
                metrics=[accuracy,top_k_accuracy], loss_func=CrossEntropyLossFlat(),
                config={'custom_head':ch})#, cbs=cbs)
        
        if dump: print(learn.model); exit()
#         if fp16: learn = learn.to_fp16()
        cbs = MixUp(mixup) if mixup else []
        learn.load(ss_name, strict=True)
        
        learn.freeze()
        learn.fit_flat_cos(epochs, lr, wd=1e-2, cbs=cbs)

Accuracy: **62.03%**