Uncomment to create the random sample

In [92]:
#import os, shutil

In [87]:
#DATA_PATH = Path('../data')
#PATH = DATA_PATH/'imagenet'
#list_classes = find_classes(PATH/'train')

In [88]:
#random_select = np.random.permutation(list_classes)[:20]

In [93]:
#def get_val_folders(class_folders):
#    names = [f.name for f in class_folders]
#    return [PATH/'val'/name for name in names]

In [94]:
#def create_new(class_folders):
#    path = Path('../data/sample_imagenet/')
#    os.makedirs(path, exist_ok=True)
#    for mode in ['train', 'val']:
#        p = path/mode
#        os.makedirs(p, exist_ok=True)
#        for f in class_folders:
#            os.makedirs(p/f.name, exist_ok=True)
#            list_images = (PATH/mode/f.name).glob('*')
#            for img in list_images:
#                shutil.copy(img, p/f.name/img.name)

In [95]:
#create_new(random_select)

In [96]:
%load_ext autoreload
%autoreload 2

from nb_002 import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [97]:
DATA_PATH = Path('../data')
PATH = DATA_PATH/'sample_imagenet'

Test of all the different possiblities for a pipeline on imagenet including:
- resizing the image so that the lower dimension is 224
- random rotate -10 to 10 degrees
- random scale 0.9 to 1.1
- random flip
- random crop

Test on the first 100 batches of imagenet (with shuffle=False)

# Torchvision

In [98]:
import torchvision

In [99]:
class TVFilesDataset(Dataset):
    def __init__(self, folder, tfms):
        cls_dirs = find_classes(folder)
        self.fns, self.y = [], []
        self.classes = [cls.name for cls in cls_dirs]
        for i, cls_dir in enumerate(cls_dirs):
            fnames = get_image_files(cls_dir)
            self.fns += fnames
            self.y += [i] * len(fnames)
        self.tfms = torchvision.transforms.Compose(tfms)
        
    def __len__(self): return len(self.fns)

    def __getitem__(self,i):
        x = Image.open(self.fns[i]).convert('RGB')
        x = self.tfms(x)
        return x,self.y[i]

In [100]:
class DeviceDataLoader():
    def __init__(self, dl, device, stats):
        self.dl,self.device = dl,device
        self.m, self.s = map(lambda x:torch.tensor(x, dtype=torch.float32, device=device), stats)
        
    def __iter__(self):
        for b in self.dl:
            x, y = b[0].to(self.device),b[1].to(self.device)
            x = (x - self.m[None,:,None,None]) / self.s[None,:,None,None]
            yield x,y
    
    def __len__(self): return (len(self.dl))

In [101]:
def get_dataloader(ds, bs, shuffle, stats, device = None, sampler=None):
    if device is None: device = default_device
    dl = DataLoader(ds, batch_size=bs, shuffle=shuffle,num_workers=8, sampler=sampler, pin_memory=True)
    return DeviceDataLoader(dl, device, stats)

In [102]:
sz, bs = 224, 192
stats = (np.array([0.485, 0.456, 0.406]), np.array([0.229, 0.224, 0.225]))
train_tfms = [torchvision.transforms.RandomResizedCrop(sz),
              torchvision.transforms.RandomHorizontalFlip(),
              torchvision.transforms.ToTensor()]

In [103]:
train_ds = TVFilesDataset(PATH/'train', train_tfms)

In [105]:
default_device = default_device = torch.device('cuda', 0)

In [106]:
train_dl = get_dataloader(train_ds, bs, shuffle=False, stats=stats)

In [107]:
train_iter = iter(train_dl)
%time for i in tqdm(range(100)): x,y = next(train_iter)

100%|██████████| 100/100 [00:18<00:00,  5.38it/s]

CPU times: user 6.23 s, sys: 3.43 s, total: 9.66 s
Wall time: 18.6 s





18.6s

# Pipeline with grid_sampler

Needs the PR https://github.com/pytorch/pytorch/pull/10051/files to make grid_sample fast and support reflect padding/nearest interpolation.

In [108]:
def dict_groupby(iterable, key=None):
    return {k:list(v) for k,v in itertools.groupby(sorted(iterable, key=key), key=key)}

def resolve_pipeline(tfms, **kwargs):
    tfms = listify(tfms)
    if len(tfms)==0: return noop
    grouped_tfms = dict_groupby(tfms, lambda o: o.__annotations__['return'])
    lighting_tfms,coord_tfms,affine_tfms,pixel_tfms,final_tfms = map(grouped_tfms.get, TfmType)
    lighting_tfm = apply_lighting_tfms(lighting_tfms)
    affine_tfm = compose_affine_tfms(affine_tfms, funcs=coord_tfms, **kwargs)
    pixel_tfm = compose_tfms(pixel_tfms)
    final_tfm = compose_tfms(final_tfms)
    return lambda x,**k: final_tfm(affine_tfm(lighting_tfm(pixel_tfm(x)), **k))

In [109]:
def get_rrc_params(img, scale, ratio):
    for attempt in range(10):
        area = img.size[0] * img.size[1]
        target_area = random.uniform(*scale) * area
        aspect_ratio = random.uniform(*ratio)

        w = int(round(math.sqrt(target_area * aspect_ratio)))
        h = int(round(math.sqrt(target_area / aspect_ratio)))

        if random.random() < 0.5:
            w, h = h, w

        if w <= img.size[0] and h <= img.size[1]:
            i = random.randint(0, img.size[1] - h)
            j = random.randint(0, img.size[0] - w)
            return i, j, h, w

    w = min(img.size[0], img.size[1])
    i = (img.size[1] - w) // 2
    j = (img.size[0] - w) // 2
    return i, j, w, w

In [110]:
class TransformedFilesDataset(Dataset):
    def __init__(self, folder, sz, tfms=None, classes=None):
        self.fns, self.y = [], []
        if classes is None: classes = [cls.name for cls in find_classes(folder)]
        self.classes = classes
        for i, cls in enumerate(classes):
            fnames = get_image_files(folder/cls)
            self.fns += fnames
            self.y += [i] * len(fnames)
        self.sz, self.tfms = sz, tfms

    def __len__(self): return len(self.fns)

    def __getitem__(self,i):
        x = PIL.Image.open(self.fns[i]).convert('RGB')
        i,j,h,w = get_rrc_params(x, (0.8,1), (3/4,4/3))
        x = pil2tensor(x)
        x = x[:,i:i+h,j:j+w]
        if self.tfms is not None:
            x = resolve_pipeline(self.tfms, size=self.sz)(x) 
        return x,self.y[i]

In [111]:
sz, bs = 224, 64
stats = (np.array([0.485, 0.456, 0.406]), np.array([0.229, 0.224, 0.225]))
train_tfms = [flip_lr_tfm(p=0.5)]

In [112]:
train_ds = TransformedFilesDataset(PATH/'train', sz, train_tfms)

In [115]:
train_dl = get_dataloader(train_ds, bs, shuffle=False, stats=stats)

In [116]:
train_iter = iter(train_dl)
%time for i in tqdm(range(100)): x,y = next(train_iter)

100%|██████████| 100/100 [00:30<00:00,  3.28it/s]

CPU times: user 2.15 s, sys: 1.11 s, total: 3.26 s
Wall time: 30.5 s





31.2s

Let's check if ToTensor() is faster then our pil2tensor

In [117]:
class TransformedFilesDataset(Dataset):
    def __init__(self, folder, sz, tfms=None, classes=None):
        self.fns, self.y = [], []
        if classes is None: classes = [cls.name for cls in find_classes(folder)]
        self.classes = classes
        for i, cls in enumerate(classes):
            fnames = get_image_files(folder/cls)
            self.fns += fnames
            self.y += [i] * len(fnames)
        self.sz, self.tfms = sz, tfms

    def __len__(self): return len(self.fns)

    def __getitem__(self,i):
        x = PIL.Image.open(self.fns[i]).convert('RGB')
        i,j,h,w = get_rrc_params(x, (0.8,1), (3/4,4/3))
        x = torchvision.transforms.ToTensor()(x)
        x = x[:,i:i+h,j:j+w]
        if self.tfms is not None:
            x = resolve_pipeline(self.tfms, size=self.sz)(x) 
        return x,self.y[i]

In [118]:
sz, bs = 224, 64
stats = (np.array([0.485, 0.456, 0.406]), np.array([0.229, 0.224, 0.225]))
train_tfms = [flip_lr_tfm(p=0.5)]

In [119]:
train_ds = TransformedFilesDataset(PATH/'train', sz, train_tfms)

In [120]:
train_dl = get_dataloader(train_ds, bs, shuffle=False, stats=stats)

In [121]:
train_iter = iter(train_dl)
%time for i in tqdm(range(100)): x,y = next(train_iter)

100%|██████████| 100/100 [00:32<00:00,  3.10it/s]

CPU times: user 2.09 s, sys: 1.08 s, total: 3.18 s
Wall time: 32.2 s





32.2s, no it's not.

# With PIL

In [122]:
from PIL import Image

In [123]:
class TransformedFilesDataset(Dataset):
    def __init__(self, folder, sz, tfms=None, classes=None):
        self.fns, self.y = [], []
        if classes is None: classes = [cls.name for cls in find_classes(folder)]
        self.classes = classes
        for i, cls in enumerate(classes):
            fnames = get_image_files(folder/cls)
            self.fns += fnames
            self.y += [i] * len(fnames)
        self.sz, self.tfms = sz, tfms

    def __len__(self): return len(self.fns)

    def __getitem__(self,i):
        x = PIL.Image.open(self.fns[i]).convert('RGB')
        i,j,h,w = get_rrc_params(x, (0.8,1), (3/4,4/3))
        x = x.crop((j,i,j+w,i+h))
        x = x.resize((self.sz,self.sz), Image.BILINEAR)
        x = pil2tensor(x)
        if self.tfms is not None:
            x = resolve_pipeline(self.tfms)(x)
        return x,self.y[i]

In [124]:
sz, bs = 224, 64
stats = (np.array([0.485, 0.456, 0.406]), np.array([0.229, 0.224, 0.225]))
train_tfms = [flip_lr_tfm(p=0.5)]

In [125]:
train_ds = TransformedFilesDataset(PATH/'train', sz, train_tfms)

In [126]:
train_dl = get_dataloader(train_ds, bs, shuffle=False, stats=stats)

In [127]:
train_iter = iter(train_dl)
%time for i in tqdm(range(100)): x,y = next(train_iter)

100%|██████████| 100/100 [00:07<00:00, 13.07it/s]

CPU times: user 2.11 s, sys: 1.31 s, total: 3.42 s
Wall time: 7.68 s





7.7s

# Just F.interpolate

In [128]:
class TransformedFilesDataset(Dataset):
    def __init__(self, folder, sz, tfms=None, classes=None):
        self.fns, self.y = [], []
        if classes is None: classes = [cls.name for cls in find_classes(folder)]
        self.classes = classes
        for i, cls in enumerate(classes):
            fnames = get_image_files(folder/cls)
            self.fns += fnames
            self.y += [i] * len(fnames)
        self.sz, self.tfms = sz, tfms

    def __len__(self): return len(self.fns)

    def __getitem__(self,i):
        x = PIL.Image.open(self.fns[i]).convert('RGB')
        i,j,h,w = get_rrc_params(x, (0.8,1), (3/4,4/3))
        x = pil2tensor(x)
        x = x[:,i:i+h,j:j+w]
        x = F.interpolate(x[None], size=(self.sz,self.sz),mode='bilinear')
        if self.tfms is not None:
            x = resolve_pipeline(self.tfms)(x)
        return x,self.y[i]

In [129]:
sz, bs = 224, 64
stats = (np.array([0.485, 0.456, 0.406]), np.array([0.229, 0.224, 0.225]))
train_tfms = [flip_lr_tfm(p=0.5)]

In [130]:
train_ds = TransformedFilesDataset(PATH/'train', sz, train_tfms)

In [131]:
train_dl = get_dataloader(train_ds, bs, shuffle=False, stats=stats)

In [132]:
train_iter = iter(train_dl)
%time for i in tqdm(range(100)): x,y = next(train_iter)

  "See the documentation of nn.Upsample for details.".format(mode))
  "See the documentation of nn.Upsample for details.".format(mode))
  "See the documentation of nn.Upsample for details.".format(mode))
  "See the documentation of nn.Upsample for details.".format(mode))
  "See the documentation of nn.Upsample for details.".format(mode))
  "See the documentation of nn.Upsample for details.".format(mode))
  "See the documentation of nn.Upsample for details.".format(mode))
  "See the documentation of nn.Upsample for details.".format(mode))
100%|██████████| 100/100 [00:11<00:00,  8.96it/s]

CPU times: user 2.21 s, sys: 1.24 s, total: 3.45 s
Wall time: 11.2 s





11.2s