In [23]:
%matplotlib inline
import matplotlib.pyplot as plt
from fastai.vision import *
from fastai.metrics import accuracy
from fastai.basic_data import *
from skimage.util import montage
from fastai.callbacks.hooks import num_features_model
from torch.nn import L1Loss
import pandas as pd
from torch import optim
import re
import json
#import cv2
import types

from utils import *

In [24]:
root_dir = Path('../input')
train_dir = root_dir/'train'
annot_dir = root_dir/'annotation'

In [25]:
j = json.load(open(f'{annot_dir}/annotations.json'))

In [26]:
j[0]

{'annotations': [{'class': 'fluke',
   'height': 372.0,
   'type': 'rect',
   'width': 1017.0,
   'x': 14.0,
   'y': 97.0},
  {'class': 'left',
   'type': 'point',
   'x': 50.802273527488566,
   'y': 98.58659021176},
  {'class': 'notch',
   'type': 'point',
   'x': 516.2391276137811,
   'y': 269.48861474128864},
  {'class': 'right',
   'type': 'point',
   'x': 1013.5305065138045,
   'y': 102.10753986218477}],
 'class': 'image',
 'filename': '6138dce83.jpg'}

The above annotations are in coco dataset format (x, y, width, height). The fastai library expects bbox coordinates to be in the format of (y_upper_left, x_upper_left, y_lower_right, x_lower_right) with the origin being in the upper left hand corner of the image.

In [27]:
SZ = 224
BS = 64
NUM_WORKERS = 6

In [28]:
j[0]['annotations'][0]

{'class': 'fluke',
 'height': 372.0,
 'type': 'rect',
 'width': 1017.0,
 'x': 14.0,
 'y': 97.0}

In [29]:
def anno2bbox(anno):
    im_width, im_height = PIL.Image.open(f"../input/train/{anno['filename']}").size
    file = anno['filename']
    for anno in anno['annotations']:
        if anno['class'] == 'fluke':
            break
    #anno = anno['annotations'][0]
    #print(file, anno)
    return [
        np.clip(anno['y'], 0, im_height) / im_height * SZ,
        np.clip(anno['x'], 0, im_width) / im_width * SZ,
        np.clip(anno['y']+anno['height'], 0, im_height) / im_height * SZ,
        np.clip(anno['x']+anno['width'], 0, im_width) / im_width * SZ
    ]

Just some small functions that will be helpful as we construct our dataset.

In [30]:
fn2bbox = {jj['filename']: [[anno2bbox(jj)], [anno['class'] for anno in jj['annotations']]] for jj in j}
path2fn = lambda path: re.search('\w*\.jpg$', path).group(0)
get_y_func = lambda o: fn2bbox[path2fn(o)]

In [31]:
get_y_func

<function __main__.<lambda>(o)>

In [32]:
fn2bbox['6138dce83.jpg']

[[[36.21333333333333,
   2.986666666666667,
   175.09333333333333,
   219.9466666666667]],
 ['fluke', 'left', 'notch', 'right']]

In [33]:
idxs = np.arange(len(j))
np.random.seed(0)
np.random.shuffle(idxs)
val_idxs = idxs[:100]
# val_idxs = pd.to_pickle(val_idxs, 'data/val_idxs_detection.pkl')

In [34]:
val_j = [anno for i, anno in enumerate(j) if i in val_idxs]
trn_j = [anno for i, anno in enumerate(j) if i not in val_idxs]
len(trn_j), len(val_j)

(300, 100)

In [35]:
pd.to_pickle([anno['filename'] for anno in val_j], f'{annot_dir}/val_fns_detection.pkl') # this will allow me to use the same validation set across NBs

In [36]:
val_fns = pd.read_pickle(f'{annot_dir}/val_fns_detection.pkl') # I create this file in fluke_detection.ipynb

In [37]:
val_fns[0]

'0dcfd05bf.jpg'

ObjectCategoryList is designed to be used in a full object recognition scenario (multiple bounding boxes per image, objects of multiple classes). Since that is more functionality than we require (and more than our model will predict - our objects will always be of the same class, fluke, and we will be predicting just a single bounding box per image) I make minor changes to ObjectCategoryList.

In [38]:
class StubbedObjectCategoryList(ObjectCategoryList):
    def analyze_pred(self, pred): return [pred.unsqueeze(0), torch.ones(1).long()]

In [39]:
data = (ObjectItemList.from_df(pd.DataFrame(data=list(fn2bbox.keys())), path=train_dir)
        .split_by_valid_func(lambda path: path2fn(path) in val_fns)                         
        .label_from_func(get_y_func, label_cls=StubbedObjectCategoryList)
        .transform(get_transforms(max_zoom=1, max_warp=0.05, max_rotate=0.05, max_lighting=0.2), tfm_y=True, size=(SZ,SZ), resize_method=ResizeMethod.SQUISH)
        .databunch(bs=BS, num_workers=NUM_WORKERS)
        .normalize(imagenet_stats))



  warn(message)


In [40]:
data.show_batch(rows=3, ds_type=DatasetType.Valid, figsize=(12,12))

IndexError: Traceback (most recent call last):
  File "/home/wb/.pyenv/versions/anaconda3-5.3.1/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 138, in _worker_loop
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/home/wb/.pyenv/versions/anaconda3-5.3.1/lib/python3.7/site-packages/fastai/torch_core.py", line 105, in data_collate
    return torch.utils.data.dataloader.default_collate(to_data(batch))
  File "/home/wb/.pyenv/versions/anaconda3-5.3.1/lib/python3.7/site-packages/fastai/torch_core.py", line 89, in to_data
    if is_listy(b): return [to_data(o) for o in b]
  File "/home/wb/.pyenv/versions/anaconda3-5.3.1/lib/python3.7/site-packages/fastai/torch_core.py", line 89, in <listcomp>
    if is_listy(b): return [to_data(o) for o in b]
  File "/home/wb/.pyenv/versions/anaconda3-5.3.1/lib/python3.7/site-packages/fastai/torch_core.py", line 89, in to_data
    if is_listy(b): return [to_data(o) for o in b]
  File "/home/wb/.pyenv/versions/anaconda3-5.3.1/lib/python3.7/site-packages/fastai/torch_core.py", line 89, in <listcomp>
    if is_listy(b): return [to_data(o) for o in b]
  File "/home/wb/.pyenv/versions/anaconda3-5.3.1/lib/python3.7/site-packages/fastai/torch_core.py", line 90, in to_data
    return b.data if isinstance(b,ItemBase) else b
  File "/home/wb/.pyenv/versions/anaconda3-5.3.1/lib/python3.7/site-packages/fastai/vision/image.py", line 356, in data
    bboxes,lbls = self._compute_boxes()
  File "/home/wb/.pyenv/versions/anaconda3-5.3.1/lib/python3.7/site-packages/fastai/vision/image.py", line 352, in _compute_boxes
    return res, self.labels[to_np(mask).astype(bool)]
IndexError: boolean index did not match indexed array along dimension 0; dimension is 4 but corresponding boolean dimension is 1


In [None]:
class FlukeDetector(nn.Module):
    def __init__(self, arch=models.resnet18):
        super().__init__() 
        self.cnn = create_body(arch)
        self.head = create_head(num_features_model(self.cnn) * 2, 4)
        
    def forward(self, im):
        x = self.cnn(im)
        x = self.head(x)
        return 2 * (x.sigmoid_() - 0.5)

In [None]:
def loss_fn(preds, targs, class_idxs):
    return L1Loss()(preds, targs.squeeze())

In [None]:
learn = Learner(data, FlukeDetector(arch=models.resnet50), loss_func=loss_fn)
learn.metrics = [lambda preds, targs, _: IoU(preds, targs.squeeze()).mean()]

In [None]:
learn.split([learn.model.cnn[:6], learn.model.cnn[6:], learn.model.head])

In [None]:
learn.freeze_to(-1)

In [None]:
learn.fit_one_cycle(10, 1e-2)

In [None]:
learn.recorder.plot_losses()

In [None]:
learn.unfreeze()

In [None]:
max_lr = 2e-3
lrs = [max_lr/100, max_lr/10, max_lr]

In [None]:
learn.fit_one_cycle(40, lrs, div_factor=20)

In [None]:
learn.recorder.plot_losses()

In [None]:
learn.show_results(rows=6)

In [None]:
# preds, targs = learn.get_preds()

# predicted_bboxes = ((preds + 1) / 2 * SZ).numpy()
# targets = ((targs + 1) / 2 * SZ).numpy().squeeze()

# def draw_bbox(img, bbox, target=None, color=(255, 0, 0), thickness=2):
#     y_min, x_min, y_max, x_max = map(int, bbox)
#     cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color=color, thickness=thickness)
#     if target is not None:
#         y_min, x_min, y_max, x_max = map(int, target)
#         cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color=(0,255,0), thickness=thickness)
#     return img

# def cv_read(path):    
#     im = cv2.imread(path, cv2.IMREAD_COLOR)
#     return cv2.cvtColor(im, cv2.COLOR_BGR2RGB)

# # Targets are in green, predictions in red.
# ims = np.stack([draw_bbox(cv_read(data.valid_ds.items[i]), predicted_bboxes[i], targets[i]) for i in range(9)])
# plt.figure(figsize=(12,12))
# plt.axis('off')
# plt.imshow(montage(np.stack(ims), multichannel=True))