In [1]:
%matplotlib inline
from fastai2.vision.all import *
import typing
from sklearn.model_selection import train_test_split
import pycocotools.mask as mask_util


In [2]:
dataset_root = Path("../dataset")
annotations_path = dataset_root / "via_region_data_fish_type.json"

In [3]:
with open(annotations_path) as f:
    annotations_data = list(json.load(f).values())
annotations_data_train, annotations_data_test = train_test_split(annotations_data, test_size=0.25)
len(annotations_data_train), len(annotations_data_test)

(40, 14)

In [4]:
def polygons_to_bitmask(polygons: typing.List[np.ndarray], height: int, width: int) -> np.ndarray:
    assert len(polygons) > 0, "COCOAPI does not support empty polygons"
    rles = mask_util.frPyObjects(polygons, height, width)
    rle = mask_util.merge(rles)
    return mask_util.decode(rle).astype(np.bool)

In [67]:
class MaskRCCNImage(Tuple):
    def show(self, ctx=None, **kwargs):
        image_tensor, boxes, *_ = self
       
        print(boxes.shape)
        return show_image(image_tensor, ctx=ctx, **kwargs)
    
@typedispatch
def show_batch(x: MaskRCCNImage, y, samples, ctxs=None, max_n=6, nrows=None, ncols=2, figsize=None, **kwargs):
    if figsize is None: 
        figsize = (ncols*6, max_n // ncols * 3)
    if ctxs is None: 
        ctxs = get_grid(
            min(x[0].shape[0], max_n), 
            nrows=None, 
            ncols=ncols, 
            figsize=figsize
        )
        
    for i,ctx in enumerate(ctxs):
        MaskRCCNImage([x[0][i], x[1][i]]).show(ctx=ctx)

In [69]:
class MaskRCCNTransform(Transform):
    def __init__(self, root):
        self.root = root
    
    def encodes(self, data):
        file_path = self.root / data["filename"]
        image = PILImage.create(file_path)
        
        boxes = []
        for region in data["regions"]:
            shape_attributes = region["shape_attributes"]
            px = shape_attributes["all_points_x"]
            py = shape_attributes["all_points_y"]
            
            box = [min(px), min(py), max(px), max(py)]
            boxes.append(box)
            
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        
        return MaskRCCNImage([image, boxes])
    
    
    
splits = RandomSplitter()(annotations_data)
tfm = MaskRCCNTransform(dataset_root)
tls = TfmdLists(annotations_data, tfm, splits=splits)


dls = tls.dataloaders(
    after_item=[Resize(224), ToTensor], 
    after_batch=[IntToFloatTensor, Normalize.from_stats(*imagenet_stats)],
    bs=2
)
dls.show_batch()

RuntimeError: stack expects each tensor to be equal size, but got [14, 4] at entry 0 and [12, 4] at entry 1