## Old fastai

In [1]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [2]:
from fastai.conv_learner import *
from fastai.models.cifar10.wideresnet import wrn_22
torch.backends.cudnn.benchmark = True
PATH = Path("../data/cifar10/")
os.makedirs(PATH,exist_ok=True)

In [3]:
classes = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
stats = (np.array([ 0.4914 ,  0.48216,  0.44653]), np.array([ 0.24703,  0.24349,  0.26159]))

bs=128
sz=32

In [4]:
tfms = tfms_from_stats(stats, 32, aug_tfms=[RandomCrop(32), RandomFlip()], pad=4)
data1 = ImageClassifierData.from_paths(PATH, tfms=tfms, bs=bs, val_name='test')

In [5]:
m = wrn_22()
opt_fn = partial(optim.Adam, betas=(0.95,0.99))
learn = ConvLearner.from_model_data(m, data1, opt_fn=opt_fn)
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=0.1

In [6]:
%time learn.fit(3e-3, 1, cycle_len=30, use_clr_beta=(10,7.5,0.95,0.85), wds=wd, use_wd_sched=True)

HBox(children=(IntProgress(value=0, description='Epoch', max=30), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                   
    0      1.070115   1.196505   0.5752    
    1      0.825004   0.973065   0.6554                      
    2      0.653242   0.771395   0.7448                      
    3      0.584217   0.681873   0.7707                      
    4      0.532069   0.591126   0.8027                      
    5      0.489163   0.569924   0.8081                      
    6      0.45033    0.489344   0.8385                      
    7      0.418057   0.505793   0.8317                      
    8      0.394136   0.514943   0.8263                      
    9      0.390115   0.622177   0.7885                      
    10     0.384691   0.473768   0.8427                      
    11     0.356405   0.432304   0.8512                      
    12     0.344455   0.414629   0.8612                      
    13     0.335668   0.428897   0.8523                      
    14     0.302873   0.376021   0.8756                      
    15     0.284885   0.366

[0.20729929401874542, 0.9451]

Standard DawnBench result with one GPU: 94% accuracy in 22min47s.

## New pipeline + openCV (like in old fastai)

In [16]:
default_device = torch.device('cuda', 0)

In [11]:
def find_classes(folder):
    classes = [d for d in folder.iterdir()
               if d.is_dir() and not d.name.startswith('.')]
    classes.sort(key=lambda d: d.name)
    return classes

def get_image_files(c):
    return [o for o in list(c.iterdir())
            if not o.name.startswith('.') and not o.is_dir()]

In [35]:
from PIL import Image
from torch.utils.data import Dataset as Dataset1

class FilesDataset1(Dataset1):#Renamed to avoid conflict with fastai FilesDataset
    def __init__(self, folder, tfms):
        cls_dirs = find_classes(folder)
        self.fns, self.y = [], []
        self.classes = [cls.name for cls in cls_dirs]
        for i, cls_dir in enumerate(cls_dirs):
            fnames = get_image_files(cls_dir)
            self.fns += fnames
            self.y += [i] * len(fnames)
        self.tfms = tfms
        
    def __len__(self): return len(self.fns)

    def __getitem__(self,i):
        x = open_image(self.fns[i])
        for tfm in self.tfms: x,_ = tfm(x, None) 
        return x,self.y[i]

In [12]:
class DeviceDataLoader():
    def __init__(self, dl, device, stats):
        self.dl,self.device = dl,device
        self.m, self.s = map(lambda x:torch.tensor(x, dtype=torch.float32, device=device), stats)
        
    def __iter__(self):
        for b in self.dl:
            x, y = b[0].to(self.device),b[1].to(self.device)
            x = (x - self.m[None,:,None,None]) / self.s[None,:,None,None]
            yield x,y
    
    def __len__(self): return (len(self.dl))

Let's get the DataLoader from pytorch since fastai replaced the definition.

In [13]:
from torch.utils.data.dataloader import DataLoader as DataLoader1
def get_dataloader(ds, bs, shuffle, device, stats):
    return DeviceDataLoader(DataLoader1(ds, batch_size=bs, shuffle=shuffle,num_workers=8), device, stats)

In [14]:
class DataBunch():
    def __init__(self, trn_ds, val_ds, stats, bs=64, device=None):
        self.device = default_device if device is None else device
        if hasattr(trn_ds, 'classes'): self.classes = trn_ds.classes
        self.trn_dl = get_dataloader(trn_ds, bs,   shuffle=True,  device=self.device, stats=stats)
        self.val_dl = get_dataloader(val_ds, bs*2, shuffle=False, device=self.device, stats=stats)

    @classmethod
    def from_files(cls, Path, trn_tfms, val_tfms, stats, trn_name='train', val_name='valid', bs=64, device=None):
        trn_ds, val_ds = FilesDataset1(Path/trn_name, trn_tfms), FilesDataset1(Path/val_name, val_tfms)
        return cls(trn_ds, val_ds, stats, bs, device)

In [39]:
tfms = tfms_from_stats(stats, 32, aug_tfms=[RandomCrop(32), RandomFlip()], pad=4)
tfms[0].tfms.pop(-2)
tfms[1].tfms.pop(-2)
data = DataBunch.from_files(PATH, tfms[0].tfms, tfms[1].tfms, stats, bs=bs, val_name='test')

In [40]:
tfms1 = tfms_from_stats(stats, 28, aug_tfms=[RandomCrop(28)], pad=0)
data1 = ImageClassifierData.from_paths(PATH, tfms=tfms1, bs=bs, val_name='test')

In [41]:
m = wrn_22()
opt_fn = partial(optim.Adam, betas=(0.95,0.99))
learn = ConvLearner.from_model_data(m, data1, opt_fn=opt_fn)
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=0.1
learn.data.trn_dl, learn.data.val_dl = data.trn_dl, data.val_dl

In [42]:
%time learn.fit(3e-3, 1, cycle_len=30, use_clr_beta=(10,7.5,0.95,0.85), wds=wd, use_wd_sched=True)

HBox(children=(IntProgress(value=0, description='Epoch', max=30), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                   
    0      1.095266   1.14763    0.5863    
    1      0.824504   0.937468   0.6746                      
    2      0.698435   0.730168   0.7521                      
    3      0.609296   0.611863   0.7971                      
    4      0.532544   0.606716   0.7964                      
    5      0.493457   0.503308   0.83                        
    6      0.455336   0.611184   0.8036                      
    7      0.419502   0.50613    0.8269                      
    8      0.407886   0.544323   0.8193                      
    9      0.383891   0.484687   0.8349                      
    10     0.37561    0.524561   0.8323                      
    11     0.346088   0.507868   0.8352                      
    12     0.343822   0.496445   0.8368                      
    13     0.335781   0.614847   0.8121                      
    14     0.307608   0.473802   0.8476                      
    15     0.278937   0.445

[0.22224570198059082, 0.9447]

The new dataloader in pytorch is fast! It only takes 13min47s to reach the 94%!

## New pipeline + torchvision

In [43]:
from PIL import Image
from torch.utils.data import Dataset as Dataset1

class FilesDataset1(Dataset1):#Renamed to avoid conflict with fastai FilesDataset
    def __init__(self, folder, tfms):
        cls_dirs = find_classes(folder)
        self.fns, self.y = [], []
        self.classes = [cls.name for cls in cls_dirs]
        for i, cls_dir in enumerate(cls_dirs):
            fnames = get_image_files(cls_dir)
            self.fns += fnames
            self.y += [i] * len(fnames)
        self.tfms = torchvision.transforms.Compose(tfms) if tfms != [] else None
        
    def __len__(self): return len(self.fns)

    def __getitem__(self,i):
        x = Image.open(self.fns[i])
        if self.tfms is not None: x = self.tfms(x) 
        return np.array(x, dtype=np.float32).transpose(2,0,1)/255,self.y[i]

In [45]:
trn_tfms = [torchvision.transforms.Pad(4, padding_mode='symmetric'),
            torchvision.transforms.RandomCrop(32),
            torchvision.transforms.RandomHorizontalFlip()]
val_tfms = []
data = DataBunch.from_files(PATH, trn_tfms, val_tfms, stats, bs=bs, val_name='test')

In [46]:
tfms = tfms_from_stats(stats, 28, aug_tfms=[RandomCrop(28)], pad=0)
data1 = ImageClassifierData.from_paths(PATH, tfms=tfms, bs=bs, val_name='test')

In [48]:
m = wrn_22()
opt_fn = partial(optim.Adam, betas=(0.95,0.99))
learn = ConvLearner.from_model_data(m, data1, opt_fn=opt_fn)
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=0.1
learn.data.trn_dl, learn.data.val_dl = data.trn_dl, data.val_dl

In [49]:
%time learn.fit(3e-3, 1, cycle_len=30, use_clr_beta=(10,7.5,0.95,0.85), wds=wd, use_wd_sched=True)

HBox(children=(IntProgress(value=0, description='Epoch', max=30), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                   
    0      1.106797   1.20077    0.5803    
    1      0.841253   1.137082   0.6228                      
    2      0.67957    0.898744   0.6969                      
    3      0.604853   0.590829   0.7994                      
    4      0.538944   0.692684   0.7748                      
    5      0.496735   0.55795    0.8137                      
    6      0.468676   0.520246   0.8232                      
    7      0.432442   0.53066    0.8241                      
    8      0.409816   0.496635   0.8319                      
    9      0.384066   0.454232   0.846                       
    10     0.365821   0.537091   0.8284                      
    11     0.352648   0.431229   0.8565                      
    12     0.338223   0.44415    0.8516                      
    13     0.333257   0.459053   0.8375                      
    14     0.317837   0.388367   0.8719                      
    15     0.287709   0.377

[0.22315136890411377, 0.9445]

Just a tiny bit slower but nothing remarkable.

## New pipeline + data aug on tensors

Here we do all the data aug on the torch tensors.

In [10]:
from PIL import Image
from torch.utils.data import Dataset as Dataset1

class FilesDataset1(Dataset1):#Renamed to avoid conflict with fastai FilesDataset
    def __init__(self, folder, tfms=None):
        cls_dirs = find_classes(folder)
        self.fns, self.y = [], []
        self.classes = [cls.name for cls in cls_dirs]
        for i, cls_dir in enumerate(cls_dirs):
            fnames = get_image_files(cls_dir)
            self.fns += fnames
            self.y += [i] * len(fnames)
        self.tfms = tfms
        
    def __len__(self): return len(self.fns)

    def __getitem__(self,i):
        x = Image.open(self.fns[i])
        x = torch.tensor(np.array(x, dtype=np.float32).transpose(2,0,1)/255)
        if self.tfms is not None: x = self.tfms(x)[0]
        return x,self.y[i]

In [113]:
class CustomTfm():
    
    def __init__(self, p_flip, pad, size):
        self.p_flip,self.pad,self.size = p_flip,pad,size
        
    def __call__(self, x):
        _, h, w = x.size()
        x = F.pad(x[None], (self.pad,self.pad,self.pad,self.pad), 'reflect') #Symmetric not implemented in F.pad
        a = random.randint(0, h+2*self.pad-self.size) if h + 2*self.pad>= self.size else 0
        b = random.randint(0, w+2*self.pad-self.size) if w + 2*self.pad>= self.size else 0
        x = x[:,:,a:a+self.size,b:b+self.size]
        return do_random_flip(x, self.p_flip)

In [9]:
def do_random_flip(x, prob):
    if np.random.rand() < prob:
        idx = [i for i in range(x.size(3)-1, -1, -1)]
        idx = torch.LongTensor(idx)
        return x.index_select(3, idx)
    else: return x

In [115]:
trn_tfms = CustomTfm(0.5, 4, 32)
val_tfms = None
data = DataBunch.from_files(PATH, trn_tfms, val_tfms, stats, bs=bs, val_name='test')

In [116]:
tfms = tfms_from_stats(stats, 28, aug_tfms=[RandomCrop(28)], pad=0)
data1 = ImageClassifierData.from_paths(PATH, tfms=tfms, bs=bs, val_name='test')

In [117]:
m = wrn_22()
opt_fn = partial(optim.Adam, betas=(0.95,0.99))
learn = ConvLearner.from_model_data(m, data1, opt_fn=opt_fn)
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=0.1
learn.data.trn_dl, learn.data.val_dl = data.trn_dl, data.val_dl

In [118]:
%time learn.fit(3e-3, 1, cycle_len=30, use_clr_beta=(10,7.5,0.95,0.85), wds=wd, use_wd_sched=True)

HBox(children=(IntProgress(value=0, description='Epoch', max=30), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                   
    0      1.103617   1.091981   0.6097    
    1      0.831675   0.844796   0.7023                      
    2      0.691814   0.839972   0.7116                      
    3      0.593204   0.780473   0.741                       
    4      0.559518   0.632671   0.7869                      
    5      0.492613   0.471881   0.8396                      
    6      0.456501   0.492801   0.8319                      
    7      0.43044    0.437216   0.8522                      
    8      0.407928   0.604274   0.7974                      
    9      0.379541   0.569592   0.816                       
    10     0.359945   0.457494   0.8454                      
    11     0.353988   0.499036   0.8367                      
    12     0.335698   0.494482   0.8376                      
    13     0.338825   0.537266   0.831                       
    14     0.305629   0.375439   0.8749                      
    15     0.273885   0.438

[0.22309259892702102, 0.9437]

A bit faster than opencv. And reflect padding instead of symmetric doesn't seem to hurt.

## Same but with an interpolation

In [7]:
def interpolate(x, coords, padding='reflect'):
    if padding=='reflect':#Reflect padding isn't implemented in grid_sample yet
        coords[coords < -1] = coords[coords < -1].mul_(-1).add_(-2)
        coords[coords > 1] = coords[coords > 1].mul_(-1).add_(2)
        padding='zeros'
    return F.grid_sample(x, coords, padding_mode=padding)

In [29]:
class CustomTfm():
    
    def __init__(self, p_flip, pad, size):
        self.p_flip,self.pad,self.size = p_flip,pad,size
        
    def __call__(self, x):
        _, h, w = x.size()
        x = F.pad(x[None], (self.pad,self.pad,self.pad,self.pad), 'reflect') #Symmetric not implemented in F.pad
        matrix = torch.eye(3)
        matrix = matrix[:2,:]
        img_size = torch.Size([1,3,h+2*self.pad,w+2*self.pad])
        coords = F.affine_grid(matrix[None], img_size)
        a = random.randint(0, h+2*self.pad-self.size) if h + 2*self.pad>= self.size else 0
        b = random.randint(0, w+2*self.pad-self.size) if w + 2*self.pad>= self.size else 0
        coords = coords[:,a:a+self.size,b:b+self.size,:]
        return do_random_flip(interpolate(x, coords), self.p_flip)

In [30]:
trn_tfms = CustomTfm(0.5, 4, 32)
val_tfms = None
data = DataBunch.from_files(PATH, trn_tfms, val_tfms, stats, bs=bs, val_name='test')

 41%|████      | 159/391 [00:30<00:43,  5.30it/s, loss=0.291]

In [31]:
tfms = tfms_from_stats(stats, 28, aug_tfms=[RandomCrop(28)], pad=0)
data1 = ImageClassifierData.from_paths(PATH, tfms=tfms, bs=bs, val_name='test')

In [32]:
m = wrn_22()
opt_fn = partial(optim.Adam, betas=(0.95,0.99))
learn = ConvLearner.from_model_data(m, data1, opt_fn=opt_fn)
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=0.1
learn.data.trn_dl, learn.data.val_dl = data.trn_dl, data.val_dl

In [33]:
%time learn.fit(3e-3, 1, cycle_len=30, use_clr_beta=(10,7.5,0.95,0.85), wds=wd, use_wd_sched=True)

HBox(children=(IntProgress(value=0, description='Epoch', max=30), HTML(value='')))

 37%|███▋      | 145/391 [00:09<00:16, 14.60it/s, loss=1.5] 
epoch      trn_loss   val_loss   accuracy                   
    0      1.096365   1.221584   0.5711    
    1      0.828955   0.880446   0.6967                      
    2      0.704505   0.847956   0.7117                      
    3      0.612732   0.632858   0.7827                      
    4      0.530568   0.601534   0.8001                      
    5      0.489203   0.567136   0.8061                      
    6      0.449854   0.480488   0.8403                      
    7      0.435406   0.54834    0.8205                      
    8      0.409922   0.456254   0.8476                      
    9      0.407223   0.480855   0.8425                      
    10     0.388129   0.561264   0.8225                      
    11     0.363649   0.694288   0.7856                      
    12     0.344785   0.47669    0.8455                      
    13     0.332264   0.409744   0.8596                      
    14     0.33043    0.3986

[0.22742701032161713, 0.9434]

We don't lose time and it's still as accurate.

## Same with random flip as an affine transform

In [38]:
def affine_transform(img, matrix, interpol=True, padding='reflect'):
    """
    Applies an affine transformation to an image.
    
    Optional: only computes the new coordinates without doing the interpolation to create the new images.
    Args:
    x: a batch of images
    matrix: a matrix of size 2 by 3 describing the transformation.
            if the transformation is Ax + b, the matrix is (A|b)
    interpol: if False, returns only the new coordinates
    padding: padding to apply during the interpolation. Supports zeros, border, reflect
    
    """
    coords = F.affine_grid(matrix[None], img[None].size())
    return interpolate(img[None],coords,padding) if interpol else coords

In [39]:
def get_random_rot_matrix(degrees):
    theta = random.uniform(-degrees,degrees) * math.pi / 180
    return torch.tensor([[math.cos(theta), -math.sin(theta), 0],
                         [math.sin(theta), math.cos(theta),  0],
                         [0,               0,                1]])

In [40]:
def get_random_scale_matrix(zoom_range):
    scale = random.uniform(*zoom_range)
    return torch.tensor([[scale, 0, 0],
                         [0, scale, 0],
                         [0,  0,    1]])

In [41]:
def get_random_flip(prob):
    if np.random.rand() < prob:
        return torch.tensor([[-1, 0, 0],
                             [0,  1, 0],
                             [0,  0, 1]]).float()
    else: return torch.eye(3)

In [69]:
class CustomTfm():
    
    def __init__(self, p_flip, pad, size):
        self.p_flip,self.pad,self.size = p_flip,pad,size
        
    def __call__(self, x):
        _, h, w = x.size()
        x = F.pad(x[None], (self.pad,self.pad,self.pad,self.pad), 'reflect') #Symmetric not implemented in F.pad
        matrix = get_random_flip(self.p_flip)
        matrix = matrix[:2,:]
        img_size = torch.Size([1,3,h+2*self.pad,w+2*self.pad])
        coords = F.affine_grid(matrix[None], img_size)
        a = random.randint(0, h+2*self.pad-self.size) if h + 2*self.pad>= self.size else 0
        b = random.randint(0, w+2*self.pad-self.size) if w + 2*self.pad>= self.size else 0
        coords = coords[:,a:a+self.size,b:b+self.size,:]
        return interpolate(x, coords)

In [43]:
trn_tfms = CustomTfm(0.5, 4, 32)
val_tfms = None
data = DataBunch.from_files(PATH, trn_tfms, val_tfms, stats, bs=bs, val_name='test')

In [44]:
tfms = tfms_from_stats(stats, 28, aug_tfms=[RandomCrop(28)], pad=0)
data1 = ImageClassifierData.from_paths(PATH, tfms=tfms, bs=bs, val_name='test')

In [45]:
m = wrn_22()
opt_fn = partial(optim.Adam, betas=(0.95,0.99))
learn = ConvLearner.from_model_data(m, data1, opt_fn=opt_fn)
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=0.1
learn.data.trn_dl, learn.data.val_dl = data.trn_dl, data.val_dl

In [46]:
%time learn.fit(3e-3, 1, cycle_len=30, use_clr_beta=(10,7.5,0.95,0.85), wds=wd, use_wd_sched=True)

HBox(children=(IntProgress(value=0, description='Epoch', max=30), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                   
    0      1.090638   1.110851   0.6083    
    1      0.868387   0.895112   0.6872                      
    2      0.717526   0.729544   0.7502                      
    3      0.607997   0.657253   0.7799                      
    4      0.560143   0.625082   0.7863                      
    5      0.491189   0.487409   0.8326                      
    6      0.472838   0.578546   0.812                       
    7      0.43529    0.514291   0.8297                      
    8      0.41678    0.454186   0.8473                      
    9      0.400172   0.439173   0.8569                      
    10     0.372496   0.507483   0.8303                      
    11     0.385859   0.419538   0.8588                      
    12     0.359593   0.440407   0.8571                      
    13     0.350565   0.524155   0.8191                      
    14     0.321838   0.432839   0.853                       
    15     0.297971   0.442

[0.24224200434684753, 0.942]

Still seems fine.

## Final pipeline

In [95]:
class CustomTfm():
    
    def __init__(self, p_flip, pad, size, size_mult):
        self.p_flip,self.pad,self.size,self.size_mult = p_flip,pad,size,size_mult
        
    def __call__(self, x):
        _, h, w = x.size()
        #Resize the image so that the lower dimension is size * size_mult
        ratio = (self.size * self.size_mult) / min(h,w)
        h,w = int(h * ratio), int(w*ratio)
        #Pads
        x = F.pad(x[None], (self.pad,self.pad,self.pad,self.pad), 'reflect') #Symmetric not implemented in F.pad
        #Affine transforms
        matrix = get_random_flip(self.p_flip)
        matrix = matrix[:2,:]
        img_size = torch.Size([1,3,h+2*self.pad,w+2*self.pad])
        coords = F.affine_grid(matrix[None], img_size)
        #Coords transforms then crop
        a = random.randint(0, h+2*self.pad-self.size) if h + 2*self.pad>= self.size else 0
        b = random.randint(0, w+2*self.pad-self.size) if w + 2*self.pad>= self.size else 0
        coords = coords[:,a:a+self.size,b:b+self.size,:]
        #Interpolation
        return interpolate(x, coords)

In [97]:
trn_tfms = CustomTfm(0.5, 4, 32, 1)
val_tfms = None
data = DataBunch.from_files(PATH, trn_tfms, val_tfms, stats, bs=bs, val_name='test')

In [98]:
tfms = tfms_from_stats(stats, 28, aug_tfms=[RandomCrop(28)], pad=0)
data1 = ImageClassifierData.from_paths(PATH, tfms=tfms, bs=bs, val_name='test')

In [99]:
m = wrn_22()
opt_fn = partial(optim.Adam, betas=(0.95,0.99))
learn = ConvLearner.from_model_data(m, data1, opt_fn=opt_fn)
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=0.1
learn.data.trn_dl, learn.data.val_dl = data.trn_dl, data.val_dl

In [100]:
%time learn.fit(3e-3, 1, cycle_len=30, use_clr_beta=(10,7.5,0.95,0.85), wds=wd, use_wd_sched=True)

HBox(children=(IntProgress(value=0, description='Epoch', max=30), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                   
    0      1.120263   1.128053   0.599     
    1      0.870778   0.898527   0.6884                      
    2      0.709498   0.687441   0.7642                      
    3      0.614241   0.77149    0.7289                      
    4      0.539517   0.730055   0.756                       
    5      0.510432   0.524482   0.8198                      
    6      0.477814   0.474085   0.8392                      
    7      0.428758   0.543345   0.8174                      
    8      0.431321   0.560641   0.8089                      
    9      0.378077   0.436421   0.8542                      
    10     0.378665   0.503413   0.8335                      
    11     0.371681   0.47223    0.8381                      
    12     0.354042   0.475374   0.8439                      
    13     0.343062   0.415416   0.8606                      
    14     0.314198   0.395102   0.8664                      
    15     0.286189   0.411

[0.23895504400730133, 0.9407]