In [1]:
%load_ext autoreload
%autoreload 2

from nb_002 import *

In [2]:
DATA_PATH = Path('../data')
PATH = DATA_PATH/'imagenet'

Test of all the different possiblities for a pipeline on imagenet including:
- resizing the image so that the lower dimension is 224
- random rotate -10 to 10 degrees
- random scale 0.9 to 1.1
- random flip
- random crop

Test on the first 100 batches of imagenet (with shuffle=False)

# Torchvision

In [3]:
import torchvision

In [20]:
class TVFilesDataset(Dataset):
    def __init__(self, folder, tfms):
        cls_dirs = find_classes(folder)
        self.fns, self.y = [], []
        self.classes = [cls.name for cls in cls_dirs]
        for i, cls_dir in enumerate(cls_dirs):
            fnames = get_image_files(cls_dir)
            self.fns += fnames
            self.y += [i] * len(fnames)
        self.tfms = torchvision.transforms.Compose(tfms)
        
    def __len__(self): return len(self.fns)

    def __getitem__(self,i):
        x = Image.open(self.fns[i]).convert('RGB')
        x = self.tfms(x)
        return x,self.y[i]

In [21]:
class DeviceDataLoader():
    def __init__(self, dl, device, stats):
        self.dl,self.device = dl,device
        self.m, self.s = map(lambda x:torch.tensor(x, dtype=torch.float32, device=device), stats)
        
    def __iter__(self):
        for b in self.dl:
            x, y = b[0].to(self.device),b[1].to(self.device)
            x = (x - self.m[None,:,None,None]) / self.s[None,:,None,None]
            yield x,y
    
    def __len__(self): return (len(self.dl))

In [22]:
def get_dataloader(ds, bs, shuffle, stats, device = None, sampler=None):
    if device is None: device = default_device
    dl = DataLoader(ds, batch_size=bs, shuffle=shuffle,num_workers=8, sampler=sampler, pin_memory=True)
    return DeviceDataLoader(dl, device, stats)

In [82]:
sz, bs = 224, 192
stats = (np.array([0.485, 0.456, 0.406]), np.array([0.229, 0.224, 0.225]))
train_tfms = [torchvision.transforms.RandomRotation(10),
              torchvision.transforms.RandomResizedCrop(sz, scale=(0.5, 1.0), ratio=(1.,1.)),
              torchvision.transforms.RandomHorizontalFlip(),
              torchvision.transforms.ToTensor()]

In [83]:
train_ds = TVFilesDataset(PATH/'train', train_tfms)

In [84]:
default_device = default_device = torch.device('cuda', 0)

In [85]:
train_dl = get_dataloader(train_ds, bs, shuffle=False, stats=stats)

In [86]:
train_iter = iter(train_dl)
%time for i in tqdm(range(100)): x,y = next(train_iter)

100%|██████████| 100/100 [00:37<00:00,  2.68it/s]

CPU times: user 6.02 s, sys: 3.12 s, total: 9.15 s
Wall time: 37.3 s





37.3s

# Pipeline with grid_sampler

Needs the PR https://github.com/pytorch/pytorch/pull/9961/files to make grid_sample fast.

In [87]:
def dict_groupby(iterable, key=None):
    return {k:list(v) for k,v in itertools.groupby(sorted(iterable, key=key), key=key)}

def resolve_pipeline(tfms, **kwargs):
    tfms = listify(tfms)
    if len(tfms)==0: return noop
    grouped_tfms = dict_groupby(tfms, lambda o: o.__annotations__['return'])
    lighting_tfms,coord_tfms,affine_tfms,pixel_tfms,final_tfms = map(grouped_tfms.get, TfmType)
    lighting_tfm = apply_lighting_tfms(lighting_tfms)
    affine_tfm = compose_affine_tfms(affine_tfms, funcs=coord_tfms, **kwargs)
    pixel_tfm = compose_tfms(pixel_tfms)
    final_tfm = compose_tfms(final_tfms)
    return lambda x,**k: final_tfm(affine_tfm(lighting_tfm(pixel_tfm(x)), **k))

In [88]:
class TransformedFilesDataset(Dataset):
    def __init__(self, folder, sz, tfms=None, classes=None):
        self.fns, self.y = [], []
        if classes is None: classes = [cls.name for cls in find_classes(folder)]
        self.classes = classes
        for i, cls in enumerate(classes):
            fnames = get_image_files(folder/cls)
            self.fns += fnames
            self.y += [i] * len(fnames)
        self.sz, self.tfms = sz, tfms

    def __len__(self): return len(self.fns)

    def __getitem__(self,i):
        x = PIL.Image.open(self.fns[i]).convert('RGB')
        x = pil2tensor(x)
        if self.tfms is not None:
            x = resolve_pipeline(self.tfms, size=self.sz)(x) 
        return x,self.y[i]

In [89]:
sz, bs = 224, 64
stats = (np.array([0.485, 0.456, 0.406]), np.array([0.229, 0.224, 0.225]))
train_tfms = [flip_lr_tfm(p=0.5),
              zoom_tfm(scale=(0.9,1.1),p=0.75),
              rotate_tfm(degrees=(-10,10.),p=0.75),
              crop_tfm(size=sz)]

In [90]:
train_ds = TransformedFilesDataset(PATH/'train', sz, train_tfms)

In [91]:
train_dl = get_dataloader(train_ds, bs, shuffle=False, stats=stats)

In [92]:
train_iter = iter(train_dl)
%time for i in tqdm(range(100)): x,y = next(train_iter)

100%|██████████| 100/100 [00:31<00:00,  3.18it/s]

CPU times: user 2.14 s, sys: 1.31 s, total: 3.45 s
Wall time: 31.5 s





31.5s

Now without affine augmentation

In [35]:
sz, bs = 224, 64
stats = (np.array([0.485, 0.456, 0.406]), np.array([0.229, 0.224, 0.225]))
train_tfms = [flip_lr_tfm(p=0.5),
              #zoom_tfm(scale=(0.9,1.1),p=0.75),
              #rotate_tfm(degrees=(-10,10.),p=0.75),
              crop_tfm(size=sz)]

In [36]:
train_ds = TransformedFilesDataset(PATH/'train', sz, train_tfms)

In [37]:
train_dl = get_dataloader(train_ds, bs, shuffle=False, stats=stats)

In [38]:
train_iter = iter(train_dl)
%time for i in tqdm(range(100)): x,y = next(train_iter)

100%|██████████| 100/100 [00:28<00:00,  3.57it/s]

CPU times: user 2.15 s, sys: 1.31 s, total: 3.46 s
Wall time: 28 s





28s

# With PIL

In [66]:
from PIL import Image

In [76]:
class TransformedFilesDataset(Dataset):
    def __init__(self, folder, sz, tfms=None, classes=None):
        self.fns, self.y = [], []
        if classes is None: classes = [cls.name for cls in find_classes(folder)]
        self.classes = classes
        for i, cls in enumerate(classes):
            fnames = get_image_files(folder/cls)
            self.fns += fnames
            self.y += [i] * len(fnames)
        self.sz, self.tfms = sz, tfms

    def __len__(self): return len(self.fns)

    def __getitem__(self,i):
        x = PIL.Image.open(self.fns[i]).convert('RGB')
        w,h = x.size
        if w < h: w,h = self.sz,int(self.sz * h / w)
        else:     w,h = int(self.sz * w / h),self.sz
        theta = random.uniform(-10,10) * math.pi / 180 if random.random() < 0.75 else 0
        scale = random.uniform(0.9,1.1) if random.random() < 0.75 else 1
        x = x.transform((w,h), Image.AFFINE, (cos(theta)/scale, -sin(theta), 0, sin(theta), cos(theta)/scale, 0), Image.BILINEAR)
        x = pil2tensor(x)
        if self.tfms is not None:
            x = resolve_pipeline(self.tfms)(x) 
        return x,self.y[i]

In [103]:
sz, bs = 224, 64
stats = (np.array([0.485, 0.456, 0.406]), np.array([0.229, 0.224, 0.225]))
train_tfms = [flip_lr_tfm(p=0.5),
              #zoom_tfm(scale=(0.9,1.1),p=0.75), Those are done in the dataset
              #rotate_tfm(degrees=(-10,10.),p=0.75),
              crop_tfm(size=sz)]

In [78]:
train_ds = TransformedFilesDataset(PATH/'train', sz, train_tfms)

In [79]:
train_dl = get_dataloader(train_ds, bs, shuffle=False, stats=stats)

In [80]:
train_iter = iter(train_dl)
%time for i in tqdm(range(100)): x,y = next(train_iter)

100%|██████████| 100/100 [00:15<00:00,  6.35it/s]

CPU times: user 2.1 s, sys: 1.22 s, total: 3.33 s
Wall time: 15.8 s





15.8s ^^

Just PIL resize

In [98]:
class TransformedFilesDataset(Dataset):
    def __init__(self, folder, sz, tfms=None, classes=None):
        self.fns, self.y = [], []
        if classes is None: classes = [cls.name for cls in find_classes(folder)]
        self.classes = classes
        for i, cls in enumerate(classes):
            fnames = get_image_files(folder/cls)
            self.fns += fnames
            self.y += [i] * len(fnames)
        self.sz, self.tfms = sz, tfms

    def __len__(self): return len(self.fns)

    def __getitem__(self,i):
        x = PIL.Image.open(self.fns[i]).convert('RGB')
        w,h = x.size
        if w < h: w,h = self.sz,int(self.sz * h / w)
        else:     w,h = int(self.sz * w / h),self.sz
        x = x.resize((w,h))
        x = pil2tensor(x)
        x = F.interpolate(x[None], size=(self.sz,self.sz),mode='bilinear')
        if self.tfms is not None:
            x = resolve_pipeline(self.tfms)(x[0]) 
        return x,self.y[i]

In [99]:
sz, bs = 224, 64
stats = (np.array([0.485, 0.456, 0.406]), np.array([0.229, 0.224, 0.225]))
train_tfms = [flip_lr_tfm(p=0.5),
              #zoom_tfm(scale=(0.9,1.1),p=0.75),
              #rotate_tfm(degrees=(-10,10.),p=0.75),
              crop_tfm(size=sz)]

In [100]:
train_ds = TransformedFilesDataset(PATH/'train', sz, train_tfms)

In [101]:
train_dl = get_dataloader(train_ds, bs, shuffle=False, stats=stats)

In [102]:
train_iter = iter(train_dl)
%time for i in tqdm(range(100)): x,y = next(train_iter)

  "See the documentation of nn.Upsample for details.".format(mode))
  "See the documentation of nn.Upsample for details.".format(mode))
  "See the documentation of nn.Upsample for details.".format(mode))
  "See the documentation of nn.Upsample for details.".format(mode))
  "See the documentation of nn.Upsample for details.".format(mode))
  "See the documentation of nn.Upsample for details.".format(mode))
  "See the documentation of nn.Upsample for details.".format(mode))
  "See the documentation of nn.Upsample for details.".format(mode))
100%|██████████| 100/100 [00:11<00:00,  8.65it/s]

CPU times: user 2.1 s, sys: 1.39 s, total: 3.49 s
Wall time: 11.6 s





11.6s

# Just F.interpolate

In [93]:
class TransformedFilesDataset(Dataset):
    def __init__(self, folder, sz, tfms=None, classes=None):
        self.fns, self.y = [], []
        if classes is None: classes = [cls.name for cls in find_classes(folder)]
        self.classes = classes
        for i, cls in enumerate(classes):
            fnames = get_image_files(folder/cls)
            self.fns += fnames
            self.y += [i] * len(fnames)
        self.sz, self.tfms = sz, tfms

    def __len__(self): return len(self.fns)

    def __getitem__(self,i):
        x = PIL.Image.open(self.fns[i]).convert('RGB')
        x = pil2tensor(x)
        x = F.interpolate(x[None], size=(self.sz,self.sz),mode='bilinear')
        if self.tfms is not None:
            x = resolve_pipeline(self.tfms)(x[0]) 
        return x,self.y[i]

In [94]:
sz, bs = 224, 64
stats = (np.array([0.485, 0.456, 0.406]), np.array([0.229, 0.224, 0.225]))
train_tfms = [flip_lr_tfm(p=0.5),
              #zoom_tfm(scale=(0.9,1.1),p=0.75),
              #rotate_tfm(degrees=(-10,10.),p=0.75),
              crop_tfm(size=sz)]

In [95]:
train_ds = TransformedFilesDataset(PATH/'train', sz, train_tfms)

In [96]:
train_dl = get_dataloader(train_ds, bs, shuffle=False, stats=stats)

In [97]:
train_iter = iter(train_dl)
%time for i in tqdm(range(100)): x,y = next(train_iter)

  "See the documentation of nn.Upsample for details.".format(mode))
  "See the documentation of nn.Upsample for details.".format(mode))
  "See the documentation of nn.Upsample for details.".format(mode))
  "See the documentation of nn.Upsample for details.".format(mode))
  "See the documentation of nn.Upsample for details.".format(mode))
  "See the documentation of nn.Upsample for details.".format(mode))
  "See the documentation of nn.Upsample for details.".format(mode))
  "See the documentation of nn.Upsample for details.".format(mode))
100%|██████████| 100/100 [00:13<00:00,  7.52it/s]

CPU times: user 2.14 s, sys: 1.36 s, total: 3.5 s
Wall time: 13.3 s





13.3s

# On the GPU

grid_sample used to be faster on the GPU so let's try to do that step there by moving the image on the GPU during the affine transformation.

In [54]:
def do_affine(img, m=None, func=None, size=None, **kwargs):
    img = img.cuda(non_blocking=True)
    if size is None: size = img.size()
    elif isinstance(size, int):
        if img.size(1) < img.size(2): size = (img.size(0),size,int(img.size(2)*size/img.size(1)))
        else: size = (img.size(0),int(img.size(1)*size/img.size(2)),size)
    if m is None:
        if img.shape==size: return img
        else: m=eye_new(img, 3)
    m = m.cuda(non_blocking=True)
    c = affine_grid(img,  img.new_tensor(m), size=size)
    if func is not None: c = func(c)
    return grid_sample(img, c, **kwargs)

In [55]:
def dict_groupby(iterable, key=None):
    return {k:list(v) for k,v in itertools.groupby(sorted(iterable, key=key), key=key)}

def resolve_pipeline(tfms, **kwargs):
    tfms = listify(tfms)
    if len(tfms)==0: return noop
    grouped_tfms = dict_groupby(tfms, lambda o: o.__annotations__['return'])
    lighting_tfms,coord_tfms,affine_tfms,pixel_tfms,final_tfms = map(grouped_tfms.get, TfmType)
    lighting_tfm = apply_lighting_tfms(lighting_tfms)
    affine_tfm = compose_affine_tfms(affine_tfms, funcs=coord_tfms, **kwargs)
    pixel_tfm = compose_tfms(pixel_tfms)
    final_tfm = compose_tfms(final_tfms)
    return lambda x,**k: final_tfm(affine_tfm(lighting_tfm(pixel_tfm(x)), **k))

In [56]:
class TransformedFilesDataset(Dataset):
    def __init__(self, folder, sz, tfms=None, classes=None):
        self.fns, self.y = [], []
        if classes is None: classes = [cls.name for cls in find_classes(folder)]
        self.classes = classes
        for i, cls in enumerate(classes):
            fnames = get_image_files(folder/cls)
            self.fns += fnames
            self.y += [i] * len(fnames)
        self.sz, self.tfms = sz, tfms

    def __len__(self): return len(self.fns)

    def __getitem__(self,i):
        x = PIL.Image.open(self.fns[i]).convert('RGB')
        x = pil2tensor(x)
        if self.tfms is not None:
            x = resolve_pipeline(self.tfms, size=self.sz)(x) 
        return x,self.y[i]

In [57]:
sz, bs = 224, 64
stats = (np.array([0.485, 0.456, 0.406]), np.array([0.229, 0.224, 0.225]))
train_tfms = [flip_lr_tfm(p=0.5),
              zoom_tfm(scale=(0.9,1.1),p=0.75),
              rotate_tfm(degrees=(-10,10.),p=0.75),
              crop_tfm(size=sz)]

In [58]:
class DeviceDataLoader():
    def __init__(self, dl, device, stats):
        self.dl,self.device = dl,device
        self.m, self.s = map(lambda x:torch.tensor(x, dtype=torch.float32, device=device), stats)
        
    def __iter__(self):
        for b in self.dl:
            #x, y = b[0].to(self.device),b[1].to(self.device)
            x = (x - self.m[None,:,None,None]) / self.s[None,:,None,None]
            yield x,y
    
    def __len__(self): return (len(self.dl))

In [59]:
def get_dataloader(ds, bs, shuffle, stats, device = None, sampler=None):
    if device is None: device = default_device
    dl = DataLoader(ds, batch_size=bs, shuffle=shuffle,num_workers=8, sampler=sampler, pin_memory=True)
    return DeviceDataLoader(dl, device, stats)

In [61]:
class DeviceDataLoader():
    def __init__(self, dl, device, stats):
        self.dl,self.device = dl,device
        self.m, self.s = map(lambda x:torch.tensor(x, dtype=torch.float32, device=device), stats)
        
    def __iter__(self):
        for b in self.dl:
            x, y = b[0].to(self.device),b[1].to(self.device)
            x = (x - self.m[None,:,None,None]) / self.s[None,:,None,None]
            yield x,y
    
    def __len__(self): return (len(self.dl))

def get_dataloader(ds, bs, shuffle, stats, device = None, sampler=None):
    if device is None: device = default_device
    dl = DataLoader(ds, batch_size=bs, shuffle=shuffle,num_workers=8, sampler=sampler, pin_memory=True)
    return DeviceDataLoader(dl, device, stats)

In [62]:
train_ds = TransformedFilesDataset(PATH/'train', sz, train_tfms)

In [63]:
train_dl = get_dataloader(train_ds, bs, shuffle=False, stats=stats)

In [64]:
train_iter = iter(train_dl)
%time for i in tqdm(range(100)): x,y = next(train_iter)

100%|██████████| 100/100 [00:31<00:00,  3.18it/s]

CPU times: user 2.17 s, sys: 1.22 s, total: 3.39 s
Wall time: 31.4 s





No significant change from the CPU.