## Specify dataset name

In [None]:
# Specify dataset name. Available options:
# dataset_name = "paco_lvis_v1_train"
# dataset_name = "paco_lvis_v1_val"
# dataset_name = "paco_lvis_v1_test"
# dataset_name = "paco_ego4d_v1_train"
# dataset_name = "paco_ego4d_v1_val"
# dataset_name = "paco_ego4d_v1_test"
dataset_name = "paco_lvis_v1_test"


## Load dataset and extract maps

In [None]:
import json
from paco.data.datasets.builtin import _PREDEFINED_PACO

# Derived parameters.
dataset_file_name, image_root_dir = _PREDEFINED_PACO[dataset_name]

# Load dataset.
with open(dataset_file_name) as f:
    dataset = json.load(f)


In [None]:
import os
from collections import defaultdict

def get_obj_and_part_anns(annotations):
    """
    Returns a map between an object annotation ID and 
    (object annotation, list of part annotations) pair.
    """
    obj_ann_id_to_anns = {ann["id"]: (ann, []) for ann in annotations if ann["id"] == ann["obj_ann_id"]}
    for ann in annotations:
        if ann["id"] != ann["obj_ann_id"]:
            obj_ann_id_to_anns[ann["obj_ann_id"]][1].append(ann)
    return obj_ann_id_to_anns

# Extract maps from dataset.
cat_id_to_name = {d["id"]: d["name"] for d in dataset["categories"]}
attr_id_to_name = {d["id"]: d["name"] for d in dataset["attributes"]}
image_id_to_image_file_name = {d["id"]: os.path.join(image_root_dir, d["file_name"]) for d in dataset["images"]}
obj_ann_id_to_anns = get_obj_and_part_anns(dataset["annotations"])
cat_name_to_anns = defaultdict(list)
attr_name_to_anns = defaultdict(list)
cat_name_to_attr_name_to_anns = defaultdict(lambda: defaultdict(list))
for ann in dataset["annotations"]:
    anns = obj_ann_id_to_anns[ann["obj_ann_id"]]
    attr_ids = ann["attribute_ids"]
    cat_name = cat_id_to_name[ann["category_id"]]
    if len(attr_ids) > 0:
        cat_name_to_anns[cat_name].append(anns)
    for attr_id in attr_ids:
        attr_name = attr_id_to_name[attr_id]
        attr_name_to_anns[attr_name].append(anns)
        cat_name_to_attr_name_to_anns[cat_name][attr_name].append(anns)
cat_name_to_anns = dict(cat_name_to_anns)
attr_name_to_anns = dict(attr_name_to_anns)
cat_name_to_attr_name_to_anns = {k: dict(v) for k, v in cat_name_to_attr_name_to_anns.items()}
print("Available categories:", sorted(cat_name_to_anns.keys()))
print("Available attributes:", sorted(attr_name_to_anns.keys()))


## Visualization functions

In [None]:
import cv2
import numpy as np
from PIL import Image
import pycocotools.mask as mask_util
from detectron2.data.detection_utils import read_image
from IPython.display import Markdown

def expand_bounding_box(box, factor, im_height, im_width):
    """
    Expands a bounding box by the specified factor.
    Args:
        box:        (4, ) NumPy array with bounding box in (left, top, width, height)
                    format
        factor:     Expansion factor (e.g., 1.5)
        im_height:  Image height
        im_width:   Image width

    Returns:
        expanded_box: (4, ) NumPy array with expanded bounding box
    """
    # Extract coordinates.
    x1, y1, w, h = box

    # Convert from top-left corner to center.
    cx = x1 + 0.5 * w
    cy = y1 + 0.5 * h

    # Find the square size by taking a max side and multiplying it with the
    # provided factor.
    sq = factor * np.maximum(h, w)

    # Reduce the box sides if the expanded box goes out of the image so that center
    # of the expanded box remains the same as that of the original box.
    new_w = sq + 2 * min(0, cx - sq / 2, im_width - (cx + sq / 2))
    new_h = sq + 2 * min(0, cy - sq / 2, im_height - (cy + sq / 2))

    # Square the box.
    x1 = cx - new_w / 2
    y1 = cy - new_h / 2
    x2 = cx + new_w / 2
    y2 = cy + new_h / 2
    expanded_box = np.array([x1, y1, (x2 - x1), (y2 - y1)])

    # Return the expanded box.
    return expanded_box

def image_grid(imgs, rows, cols):
    assert len(imgs) == rows*cols

    w, h = imgs[0].size
    grid = Image.new('RGB', size=(cols*w, rows*h), color=(255, 255, 255))
    
    for i, img in enumerate(imgs):
        grid.paste(img, box=(i%cols*w, i//cols*h))
    return grid

def crop_and_resize(img, bbox, crop_size):
    x, y, w, h = bbox
    crop_img = img[int(y) : int(y + h), int(x) : int(x + w)]
    max_side = max(w, h)
    resize_factor = crop_size / max_side
    new_w, new_h = round(resize_factor * w), round(resize_factor * h)
    crop_img = cv2.resize(crop_img, (new_w, new_h))
    return crop_img

def mask_and_paste(img, background, mask):
    img_masked = mask * img + (1 - mask) * background
    return img_masked

def visualize_segments(vis_info_list, vis_image_width=1920):
    for vis_info in vis_info_list:
        # Extract info.
        im_fn = vis_info["im_fn"]
        bbox = vis_info["bbox"]
        part_mask_rles = vis_info["part_mask_rles"]
        display_fields = vis_info["display_fields"]
        # Derive parameters.
        cell_size = min(int(round(vis_image_width / (len(part_mask_rles) + 1))), 256)
        # Read the image.
        img = read_image(im_fn, format="RGB")
        if img is not None:
            # Square the box and expand it by a factor of 1.1.
            bbox = expand_bounding_box(bbox, 1.1, img.shape[0], img.shape[1])
            if bbox[2] < 1 or bbox[3] < 1:
                print("Mask too small, skipping...")
                continue
            # Crop and resize to fixed max side both the image and the mask.
            img_crop = crop_and_resize(img, bbox, cell_size)
            imgs = [Image.fromarray(img_crop)]
            for rle in part_mask_rles:
                # Decode the mask.
                mask = mask_util.decode(rle)
                if img.shape[:2] != mask.shape:
                    print("Image and mask shapes are different: {img.shape[:2]} != {mask.shape}")
                mask_crop = crop_and_resize(mask, bbox, cell_size)[..., None]
                # Display masked crop on white backgrounds.
                img_masked = mask_and_paste(img_crop, 255 * np.ones(img_crop.shape, dtype=np.uint8), mask_crop)
                img_masked[0, :, :] = 0
                img_masked[-1, :, :] = 0
                img_masked[:, 0, :] = 0
                img_masked[:, -1, :] = 0
                imgs.append(Image.fromarray(img_masked))
        display(image_grid(imgs, 1, len(imgs)))
        display(Markdown("<br>".join([f"**{f_name}**: {f_val}" for f_name, f_val in display_fields.items()])))

def get_vis_info_list(anns, cat_id_to_name, attr_id_to_name, im_id_to_im_fn):
    vis_info_list = []
    for ann, part_anns in anns:
        display_fields = {}
        obj_name = cat_id_to_name[ann["category_id"]]
        display_fields[obj_name] = ", ".join([attr_id_to_name[attr_id] for attr_id in ann["attribute_ids"]])
        for part_ann in part_anns:
            part_name = cat_id_to_name[part_ann["category_id"]].replace(":", "-")
            display_fields[part_name] = ", ".join([attr_id_to_name[attr_id] for attr_id in part_ann["attribute_ids"]])
        display_fields["image_id"] = ann["image_id"]
        display_fields["ann_id"] = ann["id"]
        # Build the visualization info dictionary.
        vis_info_list.append(
            {
                "im_fn": im_id_to_im_fn[ann["image_id"]],
                "bbox": ann["bbox"],
                "part_mask_rles": [part_ann["segmentation"] for part_ann in part_anns],
                "display_fields": display_fields,
            }
        )
    return vis_info_list

def sample_vis_info(info_list, num_samples, method="random", offset=0):
    if method == "random":
        indices = np.random.permutation(len(info_list))
        return [info_list[i] for i in indices[:num_samples]]
    elif method == "equidistant":
        step = max(len(info_list) // num_samples, 1)
        return info_list[::step]
    elif method == "range":
        return info_list[offset:offset+num_samples]
    else:
        raise ValueError(f"Method {method} not supported.")

def get_description(cat_name, attr_name=None, part_name=None):
    if cat_name is None:
        cat_name = "object"
    results = []
    if part_name is None:
        if attr_name is not None and attr_name not in {"text", "logo"}:
            results += [attr_name]
        results += [cat_name.split("_(")[0].replace("_", " ")]
        if attr_name in {"text", "logo"}:
            results += ["with", attr_name, "on it"]
    else:
        results += [cat_name.split("_(")[0].replace("_", " "), "with"]
        if attr_name is not None:
            results += [attr_name]
        if attr_name in {"text", "logo"}:
            results += ["on its"]
        results += [part_name.replace("_", " ")]
    results = ' '.join(results)
    results = ('An ' if results[0] in {'a', 'e', 'i', 'o', 'u'} else 'A ') + results

    return results


## Visualize

In [None]:
# Set visualization parameters.
vis_cat = None         # One of the available categories or None for all
vis_attr = "checkered" # One of the available attributes or None for all
vis_num_samples = 20   # Number of samples to show
vis_offset = 0         # Starting offset to the list of all samples

# Get visualization image.
if vis_cat is None and vis_attr is None:
    anns = [anns for ann_id, anns in sorted(obj_ann_id_to_anns.items(), key=lambda x: x[0])]
elif vis_attr is None:
    anns = cat_name_to_anns.get(vis_cat, [])
elif vis_cat is None:
    anns = attr_name_to_anns.get(vis_attr, [])
else:
    anns = cat_name_to_attr_name_to_anns.get(vis_cat, {}).get(vis_attr, [])
print(f"Number of boxes with {get_description(vis_cat, vis_attr).lower()}:", len(anns))
vis_info_list = get_vis_info_list(anns, cat_id_to_name, attr_id_to_name, image_id_to_image_file_name)
vis_info_list = sample_vis_info(vis_info_list, vis_num_samples, "range", vis_offset)
visualize_segments(vis_info_list)
