In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from nb_006a import *

# Pascal

## Setup

In [None]:
PATH = Path('data/pascal')
JPEG_PATH = PATH/'VOCdevkit'/'VOC2007'/'JPEGImages'

In [None]:
import json
trn_j = json.load((PATH / 'pascal_train2007.json').open())

In [None]:
classes = {o['id']:o['name'] for o in trn_j['categories']}
filenames = {o['id']:JPEG_PATH/o['file_name'] for o in trn_j['images']}
annotations = [{'img_id': o['image_id'], 
                'class': classes[o['category_id']], 
                'bbox':o['bbox']} for o in trn_j['annotations'] if not o['ignore']]

In [None]:
len(annotations)

In [None]:
annot_by_img = collections.defaultdict(list)
for annot in annotations:
    annot_by_img[annot['img_id']].append({'class': annot['class'], 'bbox': annot['bbox']})

In [None]:
len(annot_by_img)

First, let's do build a model finding the biggest bbox.

In [None]:
biggest_bb = {}
for id in filenames.keys():
    size,best = 0,0
    for i,o in enumerate(annot_by_img[id]):
        o_sz = o['bbox'][2] * o['bbox'][3]
        if size < o_sz:
            size,best = o_sz,i
    biggest_bb[id] = annot_by_img[id][best]

In [None]:
ids = np.array(list(filenames.keys()))
ids = np.random.permutation(ids)
split = int(len(filenames) * 0.2)
train_fns = [filenames[i] for i in ids[split:]]
valid_fns = [filenames[i] for i in ids[:split]]

In [None]:
bboxes = {}
for i in filenames.keys():
    bb = biggest_bb[i]['bbox']
    bboxes[i] = [[bb[1],bb[0]], [bb[3]+bb[1], bb[2]+bb[0]]]

train_bbs = [bboxes[i] for i in ids[split:]]
valid_bbs = [bboxes[i] for i in ids[:split]]

In [None]:
def bb2hw(a): return np.array([a[1],a[0],a[3]-a[1],a[2]-a[0]])

def draw_outline(o, lw):
    o.set_path_effects([patheffects.Stroke(
        linewidth=lw, foreground='black'), patheffects.Normal()])

def draw_rect(ax, b, color='white'):
    patch = ax.add_patch(patches.Rectangle(b[:2], *b[-2:], fill=False, edgecolor=color, lw=2))
    draw_outline(patch, 4)

In [None]:
class ImageBBox(ImageMask):
    
    def clone(self):
        return self.__class__(self.px.clone())
    
    @classmethod
    def create(cls, bbox, h, w): 
        pxls = torch.zeros(h, w).long()
        pxls[bbox[0][0]:bbox[1][0]+1,bbox[0][1]:bbox[1][1]+1] = 1
        return cls(pxls[None])
    
    @property
    def data(self):
        idxs = torch.nonzero(self.px[0])
        return torch.tensor([idxs[:,0].min(), idxs[:,1].min(), idxs[:,0].max(), idxs[:,1].max()])

In [None]:
from matplotlib import patches, patheffects

def bb2hw(a): return np.array([a[1],a[0],a[3]-a[1],a[2]-a[0]])

def draw_outline(o, lw):
    o.set_path_effects([patheffects.Stroke(
        linewidth=lw, foreground='black'), patheffects.Normal()])

def draw_rect(ax, b, color='white'):
    patch = ax.add_patch(patches.Rectangle(b[:2], *b[-2:], fill=False, edgecolor=color, lw=2))
    draw_outline(patch, 4)

def _show_image(img, ax=None, figsize=(3,3), hide_axis=True, cmap='binary', alpha=None):
    if ax is None: fig,ax = plt.subplots(figsize=figsize)
    ax.imshow(image2np(img), cmap=cmap, alpha=alpha)
    if hide_axis: ax.axis('off')
    return ax

def show_image(x, y=None, ax=None, figsize=(3,3), alpha=0.5, hide_axis=True, cmap='viridis'):
    ax1 = _show_image(x, ax=ax, hide_axis=hide_axis, cmap=cmap)
    if y is not None: _show_image(y, ax=ax1, alpha=alpha, hide_axis=hide_axis, cmap=cmap)
    if hide_axis: ax1.axis('off')
        
def _show(self, ax=None, y=None, **kwargs):
    if y is not None:
        is_bb = isinstance(y, ImageBBox)
        y=y.data
    if not is_bb: return show_image(self.data, ax=ax, y=y, **kwargs)
    ax = _show_image(self.data, ax=ax)
    draw_rect(ax, bb2hw(y))

Image.show = _show

In [None]:
@dataclass
class CoordTargetDataset(Dataset):
    x_fns:List[Path]; bbs:List[List[int]]
    def __post_init__(self): assert len(self.x_fns)==len(self.bbs)
    def __repr__(self): return f'{type(self).__name__} of len {len(self.x_fns)}'
    def __len__(self): return len(self.x_fns)
    def __getitem__(self, i): 
        x = open_image(self.x_fns[i])
        return x, ImageBBox.create(self.bbs[i], *x.size)

In [None]:
train_ds = CoordTargetDataset(train_fns, train_bbs)
valid_ds = CoordTargetDataset(valid_fns, valid_bbs)

In [None]:
x,y = valid_ds[0]

In [None]:
x.show(y=y)

In [None]:
x.show(y=ImageMask(y.px))

In [None]:
y.data, valid_bbs[0]

In [None]:
tfms = get_transforms(do_flip=True, max_rotate=4, max_lighting=0.2)

In [None]:
train_tds = DatasetTfm(train_ds, tfms=tfms[0], tfm_y=True, size=128, padding_mode='border')

In [None]:
fig,axs = plt.subplots(2,4, figsize=(10,5))
for ax in axs.flatten():
    x,y = train_tds[2]
    x.show(ax=ax,y=y)

In [None]:
data = DataBunch.create(train_ds, valid_ds, bs, num_workers=0, train_tfm=train_tfms, valid_tfm=valid_tfms, size=sz)

## Model

We take a pretrained resnet34 with a custom head.

In [None]:
from torchvision.models import resnet34

In [None]:
arch = resnet34

In [None]:
def create_skeleton(model, cut):
    layers = list(model.children())
    if cut != 0: layers = layers[:-cut]
    return nn.Sequential(*layers)

In [None]:
model = create_skeleton(arch(), 2)

In [None]:
model

In [None]:
x,y = ne

Once 5a works...