## Specify dataset name

In [None]:
# Specify dataset name. Available options:
# dataset_name = "paco_ego4d_v1_train"
# dataset_name = "paco_ego4d_v1_val"
# dataset_name = "paco_ego4d_v1_test"
dataset_name = "paco_ego4d_v1_test"


## Load dataset and extract maps

In [None]:
import json
from paco.data.datasets.builtin import _PREDEFINED_PACO

# Derived parameters.
dataset_file_name, image_root_dir = _PREDEFINED_PACO[dataset_name]

# Load dataset.
with open(dataset_file_name) as f:
    dataset = json.load(f)


In [None]:
import os
from collections import defaultdict

# Extract maps from dataset.
image_id_to_fn = {d["id"]: os.path.join(image_root_dir, d["file_name"]) for d in dataset["images"]}
cat_id_to_name = {d["id"]: d["name"] for d in dataset["categories"] if ":" not in d["name"]}
cat_name_to_inst_to_anns = defaultdict(lambda: defaultdict(list))
for ann in dataset["annotations"]:
    if ann["id"] == ann["obj_ann_id"]:
        cat_name_to_inst_to_anns[cat_id_to_name[ann["category_id"]]][ann["instance_id"]].append(ann)
cat_name_to_inst_to_anns = {k: dict(v) for k, v in cat_name_to_inst_to_anns.items()}
print("Available categories:", sorted(cat_name_to_inst_to_anns.keys()))


## Visualization functions

In [None]:
import os
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import json
from detectron2.utils.file_io import PathManager
from detectron2.data.detection_utils import read_image

def expand_bounding_box(box, factor, im_height, im_width):
    """
    Expands a bounding box by the specified factor.
    Args:
        box:        (4, ) NumPy array with bounding box in (left, top, width, height)
                    format
        factor:     Expansion factor (e.g., 1.5)
        im_height:  Image height
        im_width:   Image width

    Returns:
        expanded_box: (4, ) NumPy array with expanded bounding box
    """
    # Extract coordinates.
    x1, y1, w, h = box

    # Convert from top-left corner to center.
    cx = x1 + 0.5 * w
    cy = y1 + 0.5 * h

    # Find the square size by taking a max side and multiplying it with the
    # provided factor.
    sq = factor * np.maximum(h, w)

    # Reduce the box sides if the expanded box goes out of the image so that center
    # of the expanded box remains the same as that of the original box.
    new_w = sq + 2 * min(0, cx - sq / 2, im_width - (cx + sq / 2))
    new_h = sq + 2 * min(0, cy - sq / 2, im_height - (cy + sq / 2))

    # Square the box.
    x1 = cx - new_w / 2
    y1 = cy - new_h / 2
    x2 = cx + new_w / 2
    y2 = cy + new_h / 2
    expanded_box = np.array([x1, y1, (x2 - x1), (y2 - y1)])

    # Return the expanded box.
    return expanded_box

def image_grid(imgs, rows, cols):
    assert len(imgs) == rows*cols

    w, h = imgs[0].size
    grid = Image.new('RGB', size=(cols*w, rows*h), color=(255, 255, 255))
    
    for i, img in enumerate(imgs):
        grid.paste(img, box=(i%cols*w, i//cols*h))
    return grid

def visualize_instances(anns, rows, cols, image_id_to_fn, vis_image_width=1920):
    """
    Prepares image grid with one occurrence in each cell for a given list
    of occurrences for one instance.
    """
    # Derive parameters.
    cell_size = int(round(vis_image_width / cols))
    max_num_images = rows * cols
    imgs = []
    for item_id, ann in enumerate(anns):
        # Extract info.
        im_fn = image_id_to_fn[ann["image_id"]]
        bbox = ann["bbox"]
        # Read the image.
        img = read_image(im_fn, format="RGB")
        # Square the box and expand it.
        x, y, w, h = expand_bounding_box(bbox, 1.2, img.shape[0], img.shape[1])
        # Crop and resize the image.
        crop_img = img[int(y) : int(y + h), int(x) : int(x + w)]
        crop_img = cv2.resize(crop_img, (cell_size, cell_size))
        crop_img = Image.fromarray(crop_img)
        # Add occurrence index.
        draw = ImageDraw.Draw(crop_img)
        font_size = int(round(0.1667 * cell_size))
        font = ImageFont.truetype(
            "/usr/share/fonts/truetype/dejavu/DejaVuSerif.ttf", font_size
        )
        x = int(round(0.02 * cell_size))
        draw.text((x, 0), str(item_id), (255, 255, 0), font=font)
        font = ImageFont.truetype(
            "/usr/share/fonts/truetype/dejavu/DejaVuSerif.ttf", font_size - 4
        )
        y_offset = 2 if cols < 7 else 3
        draw.text((x + 1, y_offset), str(item_id), (0, 0, 0), font=font)
        crop_img = np.asarray(crop_img)
        # Append the occurrence crop to the output.
        imgs.append(Image.fromarray(crop_img))
        if len(imgs) == max_num_images:
            break
    if len(imgs) == 0:
        imgs = [Image.fromarray(255 * np.ones((cell_size, cell_size, 3), dtype="uint8"))]
    if len(imgs) < max_num_images:
        rows = (len(imgs) - 1) // cols + 1
        imgs += (rows * cols - len(imgs)) * [Image.fromarray(255 * np.ones_like(imgs[0]))]
    return image_grid(imgs, rows, cols)


## Visualize

In [None]:
from collections import defaultdict

# Parameters.
num_rows = 12               # Number of image grid rows
num_cols = 6                # Number of image grid columns
vis_cat_names = ["wrench"]  # Object categories to visualize, set to None for all
vis_min_num_occ = 5         # Show instances with more than minimum number of occurrences, set to None to disable
vis_inst_per_cat = 10

# Get visualization categories.
if vis_cat_names is None:
    vis_cat_names = sorted(cat_name_to_inst_to_anns.keys())

# Visualize instances for desired categories.
for cat_name in vis_cat_names:
    inst_id_to_anns = cat_name_to_inst_to_anns[cat_name]
    if vis_min_num_occ is not None:
        # Skip instances with less than minimum number of occurrences.
        inst_id_to_anns = {inst_id: anns for inst_id, anns in inst_id_to_anns.items() if len(anns) >= vis_min_num_occ}
    for inst_id, anns in sorted(inst_id_to_anns.items(), key=lambda x: x[0]):
        # Get visualization image.
        vis_im = visualize_instances(anns, num_rows, num_cols, image_id_to_fn)
        # Display the image.
        print(f"Instance {inst_id} for category {cat_name}")
        display(vis_im)
