In [1]:
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
"""
COCO dataset which returns image_id for evaluation.

Mostly copy-paste from https://github.com/pytorch/vision/blob/13b35ff/references/detection/coco_utils.py
"""
from pathlib import Path

import torch
import torch.utils.data
import torchvision
from pycocotools import mask as coco_mask

# import datasets.transforms as T


class CocoDetection(torchvision.datasets.CocoDetection):
    def __init__(self, img_folder, ann_file, transforms, return_masks):
        super(CocoDetection, self).__init__(img_folder, ann_file)
        self._transforms = transforms
        self.prepare = ConvertCocoPolysToMask(return_masks)

    def __getitem__(self, idx):
        img, target = super(CocoDetection, self).__getitem__(idx)
        image_id = self.ids[idx]
        target = {'image_id': image_id, 'annotations': target}
        img, target = self.prepare(img, target)
        if self._transforms is not None:
            img, target = self._transforms(img, target)
        return img, target


def convert_coco_poly_to_mask(segmentations, height, width):
    masks = []
    for polygons in segmentations:
        rles = coco_mask.frPyObjects(polygons, height, width)
        mask = coco_mask.decode(rles)
        if len(mask.shape) < 3:
            mask = mask[..., None]
        mask = torch.as_tensor(mask, dtype=torch.uint8)
        mask = mask.any(dim=2)
        masks.append(mask)
    if masks:
        masks = torch.stack(masks, dim=0)
    else:
        masks = torch.zeros((0, height, width), dtype=torch.uint8)
    return masks


class ConvertCocoPolysToMask(object):
    def __init__(self, return_masks=False):
        self.return_masks = return_masks

    def __call__(self, image, target):
        w, h = image.size

        image_id = target["image_id"]
        image_id = torch.tensor([image_id])

        anno = target["annotations"]

        anno = [obj for obj in anno if 'iscrowd' not in obj or obj['iscrowd'] == 0]

        boxes = [obj["bbox"] for obj in anno]

        # guard against no boxes via resizing
        boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
        boxes[:, 2:] += boxes[:, :2]
        boxes[:, 0::2].clamp_(min=0, max=w)
        boxes[:, 1::2].clamp_(min=0, max=h)

        classes = [obj["category_id"] for obj in anno]
        classes = torch.tensor(classes, dtype=torch.int64)

        if self.return_masks:
            segmentations = [obj["segmentation"] for obj in anno]
            masks = convert_coco_poly_to_mask(segmentations, h, w)

        keypoints = None
        if anno and "keypoints" in anno[0]:
            keypoints = [obj["keypoints"] for obj in anno]
            keypoints = torch.as_tensor(keypoints, dtype=torch.float32)
            num_keypoints = keypoints.shape[0]
            if num_keypoints:
                keypoints = keypoints.view(num_keypoints, -1, 3)

        keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
        boxes = boxes[keep]
        classes = classes[keep]
        if self.return_masks:
            masks = masks[keep]
        if keypoints is not None:
            keypoints = keypoints[keep]

        target = {}
        target["boxes"] = boxes
        target["labels"] = classes
        if self.return_masks:
            target["masks"] = masks
        target["image_id"] = image_id
        if keypoints is not None:
            target["keypoints"] = keypoints

        # for conversion to coco api
        area = torch.tensor([obj["area"] for obj in anno])
        iscrowd = torch.tensor([obj["iscrowd"] if "iscrowd" in obj else 0 for obj in anno])
        target["area"] = area[keep]
        target["iscrowd"] = iscrowd[keep]

        target["orig_size"] = torch.as_tensor([int(h), int(w)])
        target["size"] = torch.as_tensor([int(h), int(w)])

        return image, target


# def make_coco_transforms(image_set):

#     normalize = T.Compose([
#         T.ToTensor(),
#         T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
#     ])

#     scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800]

#     if image_set == 'train':
#         return T.Compose([
#             T.RandomHorizontalFlip(),
#             T.RandomSelect(
#                 T.RandomResize(scales, max_size=1333),
#                 T.Compose([
#                     T.RandomResize([400, 500, 600]),
#                     T.RandomSizeCrop(384, 600),
#                     T.RandomResize(scales, max_size=1333),
#                 ])
#             ),
#             normalize,
#         ])

#     if image_set == 'val':
#         return T.Compose([
#             T.RandomResize([800], max_size=1333),
#             normalize,
#         ])

#     raise ValueError(f'unknown {image_set}')


def build(image_set, args):
    root = Path(args.coco_path)
    assert root.exists(), f'provided COCO path {root} does not exist'
    mode = 'coco'
    PATHS = {
        "train": (root / "images", root / f'{mode}.json'),
        "val": (root / "images", root / f'val_{mode}.json'),
    }

    img_folder, ann_file = PATHS[image_set]
    dataset = CocoDetection(img_folder, ann_file, transforms=None, return_masks=args.masks)
    return dataset

In [430]:
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
"""
COCO dataset which returns image_id for evaluation.

Mostly copy-paste from https://github.com/pytorch/vision/blob/13b35ff/references/detection/coco_utils.py
"""
from pathlib import Path

import torch
import torch.utils.data
import torchvision
from pycocotools import mask as coco_mask

import datasets.transforms as T
import albumentations as A
import random
import numpy as np
from PIL import Image

import logging


class ConstructionDetection(torchvision.datasets.CocoDetection):
    def __init__(self, img_folder, ann_file, transforms, return_masks, dataset_type):
        super(ConstructionDetection, self).__init__(img_folder, ann_file)
        self._transforms = transforms
        self.prepare = ConvertConstructionPolysToMask(return_masks)
        self.dataset_type = dataset_type
        
    def get_random_set(self, idx):
        collage_images = list(random.sample(range(len(self.ids)), 3))
        
        while idx in collage_images:
            collage_images = list(random.sample(range(len(self.ids)), 3))
        
        collage_images.append(idx)
        
        targets = {i: [] for i in collage_images}
        images = {i: [] for i in collage_images}
        
        return collage_images, targets, images
        
    def get_images(self, idx):
        collage_images, targets, images = self.get_random_set(idx)
        
        for imid in collage_images:
            image, target = super(ConstructionDetection, self).__getitem__(imid)
            target = {'image_id': imid, 'annotations': target}
            image, target = self.prepare(image, target)
            targets[imid] = target
            images[imid] = image
            
        return images, targets

    def __getitem__(self, idx):
        _flip = flip_coin()
        image_id = self.ids[idx]
        
        if _flip or self.dataset_type == 'val':
            img, target = super(ConstructionDetection, self).__getitem__(idx)
        else:
            images, targets = self.get_images(idx)
            img, target = prepare_collage(images, targets)
        
        target = {'image_id': image_id, 'annotations': target}
        img, target = self.prepare(img, target)
        if self._transforms is not None:
            if self.dataset_type == 'val':
                img, target = self._transforms(img, target)
            elif _flip:
                img, target = self._transforms[0](img, target)
            else:
                img, target = self._transforms[1](img, target)
        return img, target

    
def flip_coin():
    if torch.rand(1) > 1.0:
        return True
    else:
        return False
    

def prepare_collage(imgs, targets):
    idxs = imgs.keys()
    
    bbs = {i: [] for i in idxs}
    cats = {i: [] for i in idxs}

    collage_target = []

    for i in idxs:
        targets[i]["boxes"][:, 2:] -= targets[i]["boxes"][:, :2]
        bbs[i] = targets[i]["boxes"].int().tolist()
        cats[i]= targets[i]["labels"].int().tolist()

    trans_imgs = []
    trans_bbs = torch.tensor([])
    trans_cats = []

    transform = A.Compose(
        [A.SmallestMaxSize(max_size=400), A.RandomCrop(width=300, height=300)],
        bbox_params=A.BboxParams(format='coco', label_fields=['category_ids'], min_visibility=0.2),
    )

    for i in idxs:
        image = np.array(imgs[i])

        transformed = transform(image=image, bboxes=bbs[i], category_ids=cats[i])

        trans_imgs.append(transformed)
        bb_tensor = torch.tensor(transformed['bboxes'])

        if len(bb_tensor) > 0:

            if i == 1:
                bb_tensor[:, 1]+=300
            if i == 2:
                bb_tensor[:, 0]+=300
            if i == 3:
                bb_tensor[:, 0]+=300
                bb_tensor[:, 1]+=300

            trans_bbs = torch.cat([trans_bbs, bb_tensor], dim=0)
            trans_cats += transformed['category_ids']


    collage_image = Image.fromarray(torch.cat([
        torch.cat([
            torch.tensor(trans_imgs[0]['image']), 
            torch.tensor(trans_imgs[1]['image'])
        ], dim=0),
        torch.cat([
            torch.tensor(trans_imgs[2]['image']), 
            torch.tensor(trans_imgs[3]['image'])
        ], dim=0)
    ], dim=1).detach().numpy())

    for bb, cid, ar in zip(trans_bbs, trans_cats, trans_bbs[:, 2] * trans_bbs[:, 3]):
        collage_target.append({
        'bbox': bb.tolist(),
        'category_id': cid,
        'area': ar
    })
    
    return collage_image, collage_target


def convert_construction_poly_to_mask(segmentations, height, width):
    masks = []
    for polygons in segmentations:
        rles = coco_mask.frPyObjects(polygons, height, width)
        mask = coco_mask.decode(rles)
        if len(mask.shape) < 3:
            mask = mask[..., None]
        mask = torch.as_tensor(mask, dtype=torch.uint8)
        mask = mask.any(dim=2)
        masks.append(mask)
    if masks:
        masks = torch.stack(masks, dim=0)
    else:
        masks = torch.zeros((0, height, width), dtype=torch.uint8)
    return masks


class ConvertConstructionPolysToMask(object):
    def __init__(self, return_masks=False):
        self.return_masks = return_masks

    def __call__(self, image, target):
        w, h = image.size

        image_id = target["image_id"]
        image_id = torch.tensor([image_id])

        anno = target["annotations"]

        anno = [obj for obj in anno if 'iscrowd' not in obj or obj['iscrowd'] == 0]

        boxes = [obj["bbox"] for obj in anno]
        # guard against no boxes via resizing
        boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
        boxes[:, 2:] += boxes[:, :2]
        boxes[:, 0::2].clamp_(min=0, max=w)
        boxes[:, 1::2].clamp_(min=0, max=h)

        classes = [obj["category_id"] for obj in anno]
        classes = torch.tensor(classes, dtype=torch.int64)

        if self.return_masks:
            segmentations = [obj["segmentation"] for obj in anno]
            masks = convert_construction_poly_to_mask(segmentations, h, w)

        keypoints = None
        if anno and "keypoints" in anno[0]:
            keypoints = [obj["keypoints"] for obj in anno]
            keypoints = torch.as_tensor(keypoints, dtype=torch.float32)
            num_keypoints = keypoints.shape[0]
            if num_keypoints:
                keypoints = keypoints.view(num_keypoints, -1, 3)

        keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
        boxes = boxes[keep]
        classes = classes[keep]
        if self.return_masks:
            masks = masks[keep]
        if keypoints is not None:
            keypoints = keypoints[keep]

        target = {}
        target["boxes"] = boxes
        target["labels"] = classes
        if self.return_masks:
            target["masks"] = masks
        target["image_id"] = image_id
        if keypoints is not None:
            target["keypoints"] = keypoints

        # for conversion to coco api
        area = torch.tensor([obj["area"] for obj in anno])
        iscrowd = torch.tensor([obj["iscrowd"] if "iscrowd" in obj else 0 for obj in anno])
        target["area"] = area[keep]
        target["iscrowd"] = iscrowd[keep]

        target["orig_size"] = torch.as_tensor([int(h), int(w)])
        target["size"] = torch.as_tensor([int(h), int(w)])

        return image, target


def make_construction_transforms(image_set):

    normalize = T.Compose([
        T.ToTensor(),
        T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800]

    if image_set == 'train':
        return T.Compose([
            T.RandomHorizontalFlip(),
            T.RandomSelect(
                T.RandomResize(scales, max_size=1333),
                T.Compose([
                    T.RandomResize([400, 500, 600]),
                    T.RandomSizeCrop(384, 600),
                    T.RandomResize(scales, max_size=1333),
                ])
            ),
            normalize,
        ])

    if image_set == 'val':
        return T.Compose([
            T.RandomResize([800], max_size=1333),
            normalize,
        ])

    raise ValueError(f'unknown {image_set}')


def build(image_set, args):
    root = Path(args.data_path)
    assert root.exists(), f'provided Construction path {root} does not exist'
    mode = 'coco'
    PATHS = {
        "train": (root / "images", root / f'aac_{mode}.json'),
        "val": (root / "images", root / f'aac_val_{mode}.json'),
    }

    img_folder, ann_file = PATHS[image_set]
    if image_set == 'train':
        dataset = ConstructionDetection(img_folder, ann_file, transforms=(make_construction_transforms('train'), make_construction_transforms('val')), return_masks=args.masks, dataset_type=image_set)
    else:
        dataset = ConstructionDetection(img_folder, ann_file, transforms=make_construction_transforms(image_set), return_masks=args.masks, dataset_type=image_set)
    return dataset


In [2]:
import sys    
import os

if os.path.join(os.getcwd(), "detr/") not in sys.path:
    sys.path.append(os.path.join(os.getcwd(), "detr/"))

print(sys.path)

['/usr/lib/python36.zip', '/usr/lib/python3.6', '/usr/lib/python3.6/lib-dynload', '', '/home/ammar/projects/.venv/lib/python3.6/site-packages', '/home/ammar/projects/.venv/lib/python3.6/site-packages/IPython/extensions', '/home/ammar/.ipython', '/home/ammar/projects/construction/detr-fine/detr/']


In [44]:
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import json
from pathlib import Path

import numpy as np
import torch
from PIL import Image

from panopticapi.utils import rgb2id
# from util.box_ops import masks_to_boxes

from datasets.construction import make_construction_transforms

import logging


def box_xywh_to_xyxy(x):
    xs, ys, w, h = x.unbind(-1)
    b = [xs, ys, (xs + w), (ys + h)]
    return torch.stack(b, dim=-1)


def masks_to_boxes(segments):
    boxes = []
    labels = []
    iscrowd = []
    area = []
    ids = []

    for ann in segments:
        if len(ann["bbox"]) == 4:
            boxes.append(ann["bbox"])
            area.append(ann['area'])
        else:
            boxes.append([0, 0, 2, 2])
            area.append(4)
        labels.append(ann["category_id"])
        iscrowd.append(ann['iscrowd'])
        ids.append(ann['id'])
    
    if len(boxes) == 0 and len(labels) == 0:
        boxes.append([0, 0, 2, 2])
        labels.append(1)
        area.append(4)
        iscrowd.append(0)

    boxes = torch.tensor(boxes, dtype=torch.int64)
    labels = torch.tensor(labels, dtype=torch.int64)
    iscrowd = torch.tensor(iscrowd)
    area = torch.tensor(area)
    boxes = box_xywh_to_xyxy(boxes)
    return boxes, labels, iscrowd, area, ids

class ConstructionPanoptic:
    def __init__(self, img_folder, ann_folder, ann_file, transforms=None, return_masks=True):
        with open(ann_file, "r") as f:
            self.coco = json.load(f)

        # sort 'images' field so that they are aligned with 'annotations'
        # i.e., in alphabetical order
        self.coco["images"] = sorted(self.coco["images"], key=lambda x: x["id"])
        # sanity check
        if "annotations" in self.coco:
            for img, ann in zip(self.coco["images"], self.coco["annotations"]):
                assert img["file_name"][:-4] == ann["file_name"][:-4]

        self.img_folder = img_folder
        self.ann_folder = ann_folder
        self.ann_file = ann_file
        self.transforms = transforms
        self.return_masks = return_masks

    def __getitem__(self, idx):
        try:
            ann_info = (
                self.coco["annotations"][idx]
                if "annotations" in self.coco
                else self.coco["images"][idx]
            )
            img_path = Path(self.img_folder) / ann_info["file_name"].replace(".png", ".jpg")
            ann_path = Path(self.ann_folder) / ann_info["file_name"]
            
            print(ann_info["file_name"])

            img = Image.open(img_path).convert("RGB")
            w, h = img.size
            if "segments_info" in ann_info:
                masks = np.asarray(Image.open(ann_path), dtype=np.uint32)
                masks = rgb2id(masks)

                ids = np.array([ann["id"] for ann in ann_info["segments_info"]])
                print(ids)
                masks = masks == ids[:, None, None]

                masks = torch.as_tensor(masks, dtype=torch.uint8)
                
                # labels = torch.tensor(
                #     [ann["category_id"] for ann in ann_info["segments_info"]],
                #     dtype=torch.int64,
                # )


            target = {}
            target['image_id'] = torch.tensor([ann_info['image_id'] if "image_id" in ann_info else ann_info["id"]])
            if self.return_masks:
                target['masks'] = masks

            boxes, labels, iscrowd, area, ids = masks_to_boxes(ann_info["segments_info"])
            
            print(ids)

            target['labels'] = labels

            # Instead of finding boxes, just take the one from json info available 
            # target["boxes"] = masks_to_boxes(ann_info["segments_info"])
            target["boxes"] = boxes


            target['size'] = torch.as_tensor([int(h), int(w)])
            target['orig_size'] = torch.as_tensor([int(h), int(w)])

            target['iscrowd'] = iscrowd
            target['area'] = area
            # if "segments_info" in ann_info:
            #     for name in ['iscrowd', 'area']:
            #         target[name] = torch.tensor([ann[name] for ann in ann_info['segments_info']])

            if self.transforms is not None:
                img, target = self.transforms(img, target)

            return img, target

        except Exception as e:
            logging.error(ann_info)
            raise e


    def __len__(self):
        return len(self.coco['images'])

    def get_height_and_width(self, idx):
        img_info = self.coco['images'][idx]
        height = img_info['height']
        width = img_info['width']
        return height, width


def build(image_set, args):
    root = Path(args.data_path)
    assert (
        root.exists()
    ), f"provided Panoptic path {root} does not exist"

    mode = "panoptic"

    PATHS = {
        "train": ("images", f"{mode}", f"{mode}.json"),
        "val": ("images", f"val_{mode}", f"val_{mode}.json"),
    }

    img_folder, ann_folder, ann_file = PATHS[image_set]
    img_folder_path = root / img_folder
    ann_folder_path = root / ann_folder
    ann_file = root / ann_file

    dataset = ConstructionPanoptic(
        img_folder_path,
        ann_folder_path,
        ann_file,
        transforms=make_construction_transforms(image_set),
        return_masks=args.masks,
    )

    return dataset


In [45]:
class Args():
    pass

args = Args()

args.data_path = '../data'
args.masks = False
args.batch_size = 5
args.num_workers = 4
args.masks = True

In [46]:
from torch.utils.data import DataLoader
import util.misc as utils

train_dataset = build('train', args)
sampler_train = torch.utils.data.RandomSampler(train_dataset)

batch_sampler_train = torch.utils.data.BatchSampler(sampler_train, args.batch_size, drop_last=True)

data_loader_train = DataLoader(train_dataset, batch_sampler=batch_sampler_train, collate_fn=utils.collate_fn, num_workers=args.num_workers)

In [47]:
# for data in data_loader_train:
#     pass

img, target = train_dataset.__getitem__(2173)

print(target['masks'].size())

target

aac_blocks_2176.png
[   33265 11881084  2100087  1120607]
[33265, 11881084, 2100087, 1120607]
torch.Size([4, 672, 895])


{'image_id': tensor([2176]),
 'masks': tensor([[[ True,  True,  True,  ...,  True,  True,  True],
          [ True,  True,  True,  ...,  True,  True,  True],
          [ True,  True,  True,  ...,  True,  True,  True],
          ...,
          [False, False, False,  ..., False, False, False],
          [False, False, False,  ..., False, False, False],
          [False, False, False,  ..., False, False, False]],
 
         [[False, False, False,  ..., False, False, False],
          [False, False, False,  ..., False, False, False],
          [False, False, False,  ..., False, False, False],
          ...,
          [False, False, False,  ..., False, False, False],
          [False, False, False,  ..., False, False, False],
          [False, False, False,  ..., False, False, False]],
 
         [[False, False, False,  ..., False, False, False],
          [False, False, False,  ..., False, False, False],
          [False, False, False,  ..., False, False, False],
          ...,
          [

In [43]:
logging.warning(222)



In [345]:
1032.0000 * 308.0000

317856.0

In [1]:
import os
import json
import random
import numpy as np

random.seed(10)

def convert(o):
    if isinstance(o, np.generic): return o.item()  
    raise TypeError

In [2]:
# Global Files

ROOT_DIR = './data'

COCO_FILE = 'coco.json'
PANOPTIC_FILE = 'panoptic.json'

AAC_COCO_FILE = 'aac_coco.json'
AAC_PANOPTIC_FILE = 'aac_panoptic.json'

VAL_COCO_FILE = 'val_coco.json'
VAL_PANOPTIC_FILE = 'val_panoptic.json'

AAC_VAL_COCO_FILE = 'aac_val_coco.json'
AAC_VAL_PANOPTIC_FILE = 'aac_val_panoptic.json'

In [3]:
# ## Prepare only one class DS

# with open(os.path.join(ROOT_DIR, COCO_FILE), "r") as coco_file:
#     coco_data = json.load(coco_file)
    
# annotations = []
# images_ids = []
# images = []

# # Remove those images where we dont have any segmentations
# for img in coco_data["images"]:
#     if "aac_blocks" in img["file_name"]:
#         images.append(img)
#         images_ids.append(img['id'])

# for ann in coco_data["annotations"]:
#     if ann['image_id'] in images_ids:
#         annotations.append(ann)

# coco_data["images"] = images
# coco_data["annotations"] = annotations

# with open(os.path.join(ROOT_DIR, AAC_COCO_FILE), "w") as aac_coco_file:
#     json.dump(coco_data, aac_coco_file) # , default=convert)

In [4]:
# ## Prepare only one class DS

# with open(os.path.join(ROOT_DIR, VAL_COCO_FILE), "r") as val_coco_file:
#     val_coco_data = json.load(val_coco_file)
    
# annotations = []
# images_ids = []
# images = []

# # Remove those images where we dont have any segmentations
# for img in val_coco_data["images"]:
#     if "aac_blocks" in img["file_name"]:
#         images.append(img)
#         images_ids.append(img['id'])

# for ann in val_coco_data["annotations"]:
#     if ann['image_id'] in images_ids:
#         annotations.append(ann)

# val_coco_data["images"] = images
# val_coco_data["annotations"] = annotations

# with open(os.path.join(ROOT_DIR, AAC_VAL_COCO_FILE), "w") as aac_val_coco_file:
#     json.dump(val_coco_data, aac_val_coco_file) #, default=convert)

In [6]:
# # Rename iscroud to iscrowd

# with open(os.path.join(ROOT_DIR, AAC_VAL_COCO_FILE), "r") as aac_val_coco_file:
#     aac_val_coco_data = json.load(aac_val_coco_file)
    
# for ann in aac_val_coco_data['annotations']:
#     if 'iscroud' in ann:
#         ann['iscrowd'] = ann['iscroud']
        
# with open(os.path.join(ROOT_DIR, AAC_VAL_COCO_FILE), "w") as aac_val_coco_file:
#     json.dump(aac_val_coco_data, aac_val_coco_file) #, default=convert)

In [7]:
# # Rename iscroud to iscrowd

# with open(os.path.join(ROOT_DIR, AAC_COCO_FILE), "r") as aac_coco_file:
#     aac_coco_data = json.load(aac_coco_file)
    
# for ann in aac_coco_data['annotations']:
#     if 'iscroud' in ann:
#         ann['iscrowd'] = ann['iscroud']
        
# with open(os.path.join(ROOT_DIR, AAC_COCO_FILE), "w") as aac_coco_file:
#     json.dump(aac_coco_data, aac_coco_file) #, default=convert)