In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
#export
from nb_002b import *

import operator
from random import sample
from torch.utils.data.sampler import Sampler

In [None]:
DATA_PATH = Path('data')
PATH = DATA_PATH/'caltech101'

# Caltech 101

## Create validation set

In [None]:
#export
class FilesDataset(Dataset):
    def __init__(self, fns, labels, classes=None):
        if classes is None: classes = list(set(labels))
        self.classes = classes
        self.class2idx = {v:k for k,v in enumerate(classes)}
        self.fns = np.array(fns)
        self.y = [self.class2idx[o] for o in labels]
        
    def __len__(self): return len(self.fns)

    def __getitem__(self,i):
        x = PIL.Image.open(self.fns[i]).convert('RGB')
        return pil2tensor(x),self.y[i]
    
    @classmethod
    def from_folder(cls, folder, classes=None, test_pct=0.):
        if classes is None: classes = [cls.name for cls in find_classes(folder)]
            
        fns,labels = [],[]
        for cl in classes:
            fnames = get_image_files(folder/cl)
            fns += fnames
            labels += [cl] * len(fnames)
            
        if test_pct==0.: return cls(fns, labels)
        
        fns,labels = np.array(fns),np.array(labels)
        is_test = np.random.uniform(size=(len(fns),)) < test_pct
        return cls(fns[~is_test], labels[~is_test]), cls(fns[is_test], labels[is_test])

In [None]:
classes = ["airplanes", "Motorbikes", "BACKGROUND_Google", "Faces", "watch", "Leopards", "bonsai",
    "car_side", "ketch", "chandelier", "hawksbill", "grand_piano", "brain", "butterfly", "helicopter", "menorah",
    "trilobite", "starfish", "kangaroo", "sunflower", "ewer", "buddha", "scorpion", "revolver", "laptop", "ibis", "llama",
    "minaret", "umbrella", "electric_guitar", "crab", "crayfish",]

np.random.seed(42)
train_ds,valid_ds = FilesDataset.from_folder(PATH, test_pct=0.2)

x = train_ds[15][0]
classes = train_ds.classes
c = len(classes)

len(train_ds),len(valid_ds),c

## Rectangular affine fix

In [None]:
show_image(x, figsize=(6,3), hide_axis=False)
print(x.shape)

In [None]:
rot_m = np.array(rotate(40.)); rot_m

In [None]:
show_image(apply_affine(rot_m)(x), figsize=(6,3))

In [None]:
#export
def affine_grid(x, matrix, size=None):
    h,w = x.shape[1:]
    if size is None: size=x.shape
    matrix[0,1] *= h/w; matrix[1,0] *= w/h
    return F.affine_grid(matrix[None,:2], torch.Size((1,)+size))

import nb_002
nb_002.affine_grid = affine_grid

In [None]:
show_image(apply_affine(rot_m)(x), figsize=(6,3))

## RandomResizedCrop

The goal is to replicate the RandomResizedCrop function from torchvision. First we take a crop of the picture that has a certain size and a certain ratio, then we resize it to the desired output size. This is their code to pick the crop area.

In [None]:
def get_crop(img, scale, ratio):
    for attempt in range(10):
        area = img.size[0] * img.size[1]
        target_area = random.uniform(*scale) * area
        aspect_ratio = random.uniform(*ratio)

        w = int(round(math.sqrt(target_area * aspect_ratio)))
        h = int(round(math.sqrt(target_area / aspect_ratio)))

        if random.random() < 0.5:
            w, h = h, w

        if w <= img.size[0] and h <= img.size[1]:
            i = random.randint(0, img.size[1] - h)
            j = random.randint(0, img.size[0] - w)
            return i, j, h, w

    # Fallback
    w = min(img.size[0], img.size[1])
    i = (img.size[1] - w) // 2
    j = (img.size[0] - w) // 2
    return i, j, w, w

Rewritting it to take tensors with channel, height, width order.

In [None]:
def get_crop(t, scale, ratio):
    for attempt in range(10):
        area = t.size(1) * t.size(2)
        target_area = random.uniform(*scale) * area
        aspect_ratio = random.uniform(*ratio)

        w = int(round(math.sqrt(target_area * aspect_ratio)))
        h = int(round(math.sqrt(target_area / aspect_ratio)))

        if random.random() < 0.5:
            w, h = h, w

        if w <= t.size(2) and h <= t.size(1):
            i = random.randint(0, t.size(1) - h)
            j = random.randint(0, t.size(2) - w)
            return i, j, h, w

    # Fallback
    w = min(t.size(1), t.size(2))
    i = (t.size(1) - w) // 2
    j = (t.size(2) - w) // 2
    return i, j, w, w

In [None]:
fig, axs = plt.subplots(4,4,figsize=(8,12))
for ax in axs.flatten():
    i,j,h,w = get_crop(x, (0.08,1.), (3./4.,4./3.))
    #Crop
    y = x[:,i:i+h,j:j+w]
    #Then resize to the output size.
    y = F.interpolate(y[None], size=(224,224), mode='bilinear')
    show_image(y[0], ax)

## 1. With a start tfm

One way to do this is to create a transform of type start and then go through the pipeline with the target size.

In [None]:
@reg_transform
def crop_with_ratio(x, scale:uniform, ratio:uniform, invert:rand_bool, row_pct:uniform, col_pct:uniform) -> TfmType.Start:
    #scale, ratio and invert are supposed to have a size corresponding to the number of attempts before fallback.
    for s,r,i in zip(scale, ratio, invert):
        area = x.size(1) * x.size(2)
        target_area = area * s
        cols = int(round(math.sqrt(target_area * r)))
        rows = int(round(math.sqrt(target_area / r)))

        if i: cols,rows = rows,cols

        if cols <= x.size(2) and rows <= x.size(1):
            row = int((x.size(1)-rows+1)*row_pct)
            col = int((x.size(2)-cols+1)*col_pct)
            return x[:, row:row+rows, col:col+cols].contiguous()
    # Fallback
    rows = min(x.size(1), x.size(2))
    row = (x.size(1) - rows) // 2
    col = (x.size(2) - rows) // 2
    return x[:, row:row+rows, col:col+rows].contiguous()

In [None]:
random_resized_crop = crop_with_ratio_tfm(scale=(0.08,1.,10), ratio=(0.75,1.33,10),invert=(0.5,10),
                                          row_pct=(0,1.), col_pct=(0,1.))

In [None]:
fig, axs = plt.subplots(4,4,figsize=(8,12))
for ax in axs.flatten():
    #Crop
    y = random_resized_crop()(x)
    #Then resize to the output size.
    y = F.interpolate(y[None], size=(224,224), mode='bilinear')
    show_image(y[0], ax)

In [None]:
fig, axs = plt.subplots(4,4,figsize=(8,12))
for ax in axs.flatten():
    y = apply_tfms([random_resized_crop])(x, size=(3,224,224))
    show_image(y, ax)

## 2.The affine way

The scale and ratio is just an affine transformation that zooms in and squeeshes the picture in a given direction. Then the random crop corresponds to a center differnet from (0,0). So all of this can be done as an affine transformation (then coupled with others, like a rotation).

In [None]:
@reg_affine
def zoom_squish(scale: uniform = 1.0, squish: uniform=1.0, invert: rand_bool = False, 
                row_pct:uniform = 0.5, col_pct:uniform = 0.5) -> TfmType.Affine:
    for s,r, i in zip(scale,squish, invert):
        s,r = math.sqrt(s),math.sqrt(r)
        if s * r <= 1 and s / r < 1:
            w,h = (s/r, s*r) if i else (s*r,s/r)
            col_c = (1-w) * (2*col_pct - 1)
            row_c = (1-h) * (2*row_pct - 1)
            return [[w, 0, col_c],
                    [0, h, row_c],
                    [0, 0, 1.   ]]
    return [[1, 0, 0.],
            [0, 1, 0.],
            [0, 0, 1.]]

In [None]:
random_resized_crop = zoom_squish_tfm(scale=(0.08,1.,10), squish=(0.75,1.33, 10), invert=(0.5,10), row_pct=(0,1.), col_pct=(0,1.))

In [None]:
fig, axs = plt.subplots(4,4,figsize=(8,12))
for ax in axs.flatten():
    #Crop
    y = apply_tfms([random_resized_crop])(x, size=(3,224,224))
    show_image(y, ax)

## Deterministic RandomResizedCrop

In [None]:
x = train_ds[15][0]

In [None]:
x.size()

In [None]:
area = x.size(1) * x.size(2)
target_area = 0.5 * area
aspect_ratio = 0.8
w = int(round(math.sqrt(target_area * aspect_ratio)))
h = int(round(math.sqrt(target_area / aspect_ratio)))
w,h

In [None]:
def crop_v1(img):
    area = img.size(1) * img.size(2)
    target_area = 0.5 * area
    aspect_ratio = 0.8

    w = int(round(math.sqrt(target_area * aspect_ratio)))
    h = int(round(math.sqrt(target_area / aspect_ratio)))

    w, h = h, w

    i = int(0.2 * (img.size(1) - h))
    j = int(0.4 * (img.size(2) - w))
    x = img[:,i:i+h, j:j+w]
    return F.interpolate(x[None], size=(224,224), mode='bilinear')

In [None]:
show_image(crop_v1(x)[0])

In [None]:
def crop_v2(img):
    x = crop_with_ratio(img, [0.5], [0.8], [True], 0.2, 0.4)
    x = F.interpolate(x[None], size=(224,224), mode='bilinear')
    return x[0]

In [None]:
show_image(crop_v2(x))

In [None]:
orig_ratio = math.sqrt(x.size(2)/x.size(1))

In [None]:
@reg_affine
def zoom_squish1(scale: uniform = 1.0, squish: uniform=1.0, invert: rand_bool = False, 
                row_pct:uniform = 0.5, col_pct:uniform = 0.5) -> TfmType.Affine:
    for s,r, i in zip(scale,squish, invert):
        s,r = math.sqrt(s),math.sqrt(r)
        if s * r <= 1 and s / r < 1:
            w,h = (s/r, s*r) if i else (s*r,s/r)
            w /= orig_ratio
            h *= orig_ratio
            col_c = (1-w) * (2*col_pct - 1)
            row_c = (1-h) * (2*row_pct - 1)
            return [[w, 0, col_c],
                    [0, h, row_c],
                    [0, 0, 1.   ]]
    if orig_ratio > 1: 
        return [[1/orig_ratio**2, 0, 0.],
                [0, 1, 0.],
                [0, 0, 1.]]
    else:
        return [[1, 0, 0.],
                [0, orig_ratio**2, 0.],
                [0, 0, 1.]]

In [None]:
def crop_v3(img):
    x = apply_affine(zoom_squish1([0.5], [0.8], [True], 0.2, 0.4))(img, size=(3,224,224))
    return x

In [None]:
show_image(crop_v3(x))