In [8]:
import json

with open(os.path.join(root_path, "new_train.json")) as f:
    train_data = json.load(f)
print(train_data.keys())
print(len(train_data["categories"]))
print(train_data['images'][0])
print(train_data['annotations'][0])

dict_keys(['info', 'categories', 'images', 'annotations'])
277
{'id': 1665702843794501, 'height': 1080, 'width': 1920, 'url': 'manifold://ego_objects_v1/tree/images_and_annotations/images/00ED3B9E9100528CCFDCB958B489A3BD_01_25.jpg', 'gaia_id': 334937101979014, 'timestamp': [1645099834.815261], 'stream_ids': ['1201-2'], 'group_id': '00ED3B9E9100528CCFDCB958B489A3BD', 'video_id': '01', 'frame_id': 25, 'main_category': 'Banana', 'main_category_instance_ids': [1]}
{'id': 1, 'image_id': 1665702843794501, 'bbox': [752.29, 527.23, 371.66, 307.4], 'category_id': 22, 'instance_id': '00ED3B9E9100528CCFDCB958B489A3BD_22_0', 'area': 114248.284}


In [3]:
def category_names_to_snake_case(category_name: str) -> str:
    """Convert category name to snake case.

    Args:
        category_name (str): Category name in string

    Returns:
        str: Modified category name.
    """    
    return (
        category_name.lower()
        .replace(" ", "_")
        .replace("/", "_")
        .replace("(", "")
        .replace(")", "")
        .replace("&", "and")
        .replace("-", "_")
    )

def xulyulwh_to_clipped_xcycwh(xulyulwh: list[float], width: int, height: int) -> list[float]:
    """Convert bounding box format and clip it to image boundary.
    Input format is [x, y, w, h] where x, y is the top-left corner of the box.
    Output format is [xc, yc, w, h] where xc, yc is the center of the box.
    All four vertices of the box are clipped to the image boundary.

    Args:
        xulyulwh (list[float]): Input bounding box in [x, y, w, h] format.
        width (int): Image width.
        height (int): Image height.

    Raises:
        Exception: The bounding box is outside the image boundary.

    Returns:
        list[float]: Bounding box in [xc, yc, w, h] format.
    """
    x, y, w, h = xulyulwh
    if x < 0:
        w += x
        x = 0
        if w <= 0:
            raise Exception("Invalid box")
    if y < 0:
        h += y
        y = 0
        if h <= 0:
            raise Exception("Invalid box")
    if x + w > width:
        w = width - x
        if w <= 0:
            raise Exception("Invalid box")
    if y + h > height:
        h = height - y
        if h <= 0:
            raise Exception("Invalid box")
    return [x + w / 2, y + h / 2, w, h]

In [4]:
from abc import ABC
import pickle
from collections import defaultdict
from pathlib import Path
from typing import Sequence
from statistics import mean, stdev
import ujson as json

import numpy as np


class DetectionDataset(ABC):
    """Format of a detection dataset.
    Category dictionary should have the following keys:
        'category_id' (int): Unique identifier for the category.
        'category' (str): Name of the category.
        '{split}_instance_count' (int): Number of instances in the {split}.
        '{split}_annotation_count' (int): Number of annotations in the {split}.
        '{split}_area_mean' (float): Mean area of the instances in the {split}.
        '{split}_area_stdev' (float): Standard deviation of the area of the instances in the {split}.
        '{split}_area_ratio_mean' (float): Mean area ratio of the instances in the {split}.
        '{split}_area_ratio_stdev' (float): Standard deviation of the area ratio of the instances in the {split}.

    Image dictionary should have the following keys:
        'img_id' (int): Unique identifier for the image.
        'url' (str): Absolute path to the actual image file.
        'width' (int): Width of the image.
        'height' (int): Height of the image.
        'annotation_ids' (list[int]): List of annotation ids.
        'split' (str): Split of the dataset (i.e., 'train', 'val', and 'test').

    Annotation dictionary should have the following keys:
        "annotation_id" (int): Unique identifier for the annotation.
        'category_id' (int): Category id of the annotation.
        'img_id' (int): Unique identifier for the image.
        'bbox' (list[float]): Bounding box of the annotation in [x, y, w, h] format.
        'area' (float): Area of the annotation.
        'area_ratio' (float): Area ratio of the annotation to the image.

    Attributes:
        categories (dict[int, dict]): Dictionary of category id to category dictionary.
        imgs (dict[int, dict]): Image data in the dataset.
            The key is img_id and the value is an image dictionary described above.
        annotations (dict[int, dict]): Annotations in the dataset.
        imgid_orders (dict[str, list[int]]): Ordered image ids per each split.
    """

    categories: dict[int, dict]
    imgs: dict[int, dict]
    annotations: dict[int, dict]
    imgid_orders: dict[str, list[int]]

    def group_imgids_by_key(self, key: str) -> dict[str, list[int]]:
        """Group the imgids based on the key string of img dictionary.

        Returns:
            dict[str, list[int]]: Key to list of imgids mapping.
        """
        key_imgs = defaultdict(list)
        for imgid, img in self.imgs.items():
            key_imgs[img[key]].append(imgid)
        return dict(key_imgs)

    def get_catid_split_imgids(self) -> dict[int, dict[str, list[int]]]:
        """Get the mapping of category id to split and image ids.

        Returns:
            dict[int, dict[str, list[int]]]: Category id to split and image ids mapping.
        """
        split_imgids = self.group_imgids_by_key("split")
        catid_split_imgids = {
            catid: {split: [] for split in split_imgids.keys()}
            for catid in self.categories.keys()
        }
        for split, imgids in split_imgids.items():
            for imgid in imgids:
                img = self.imgs[imgid]
                for annotation_id in img["annotation_ids"]:
                    catid = self.annotations[annotation_id]["category_id"]
                    catid_split_imgids[catid][split].append(imgid)
        return catid_split_imgids

    def create_ut_annotations(
        self,
        target_categories: Sequence[int | str],
        annotation_root: str | Path,
        stratify: bool = False,
        seed: int = 22,
    ) -> None:
        """Create ultralytics detection annotations from the dataset.
        Detailed format of the ultralytics detection annotation can be found here:
            https://docs.ultralytics.com/datasets/detect/
        The numberings of the categories in the ultralytics annotation will be based on the order of the target_categories.

        Args:
            target_categories (Sequence[int | str]): Annotation target categories. Both category id and category name are acceptable.
            annotation_root (str | Path): Root to create the annotation directory.
            stratify (bool): Whether to stratify the number of images per category in the training set.
            seed (int): Random seed for stratification.

        Raises:
            Exception: target_categories contain invalid element.
            NoInstanceException: No instance for the target categories.
        """
        assert len(target_categories) > 0
        category_catid = {
            category["category"]: catid for catid, category in self.categories.items()
        }
        target_catids = [
            category if isinstance(category, int) else category_catid[category]
            for category in target_categories
        ]
        annotation_root = (
            Path(annotation_root)
            if not isinstance(annotation_root, Path)
            else annotation_root
        )
        image_dir = annotation_root / "images"
        label_dir = annotation_root / "labels"
        catid_split_imgids = self.get_catid_split_imgids()

        split_imgids = {
            split: set() for split in catid_split_imgids[target_catids[0]].keys()
        }

        cat_split_imgids = {
            catid: {split: set() for split in split_imgids.keys()}
            for catid in target_catids
        }
        min_train_count = 999999999999999999999999999999999999999999

        for catid in target_catids:
            if not isinstance(catid, int):
                raise Exception(f"Invalid category id: {catid}")
            for split, imgids in catid_split_imgids[catid].items():
                if len(imgids) == 0:
                    raise NoInstanceException(
                        f"No image for category {catid} in split {split}"
                    )
                split_imgids[split] = split_imgids[split].union(set(imgids))

                cat_split_imgids[catid][split] = set(imgids)
                if split == "train":
                    min_train_count = min(len(set(imgids)), min_train_count)

        if stratify:
            np.random.seed(seed)
            for catid, split_imgids_dict in cat_split_imgids.items():
                for split, imgids in split_imgids_dict.items():
                    if split == "train":
                        sampled_imgids = set(
                            np.random.choice(
                                list(imgids), min_train_count, replace=False
                            )
                        )
                    else:
                        sampled_imgids = imgids
                    split_imgids[split] = split_imgids[split].union(sampled_imgids)

        for split, imgids_set in split_imgids.items():
            split_img_dir = image_dir / split
            split_lab_dir = label_dir / split
            split_img_dir.mkdir(parents=True, exist_ok=True)
            split_lab_dir.mkdir(parents=True, exist_ok=True)
            for imgid in list(imgids_set):
                img = self.imgs[imgid]
                img_src = Path(img["url"])
                img_dst = split_img_dir / img_src.name
                assert img_src.is_file()

                label = ""
                for annid in img["annotation_ids"]:
                    annotation = self.annotations[annid]
                    catid = annotation["category_id"]
                    if catid not in target_catids:
                        continue
                    try:
                        bbox = xulyulwh_to_clipped_xcycwh(
                            annotation["bbox"], img["width"], img["height"]
                        )
                        bbox = [
                            bbox[0] / img["width"],
                            bbox[1] / img["height"],
                            bbox[2] / img["width"],
                            bbox[3] / img["height"],
                        ]
                    except Exception:
                        continue
                    label += " ".join(
                        [
                            str(target_catids.index(catid)),
                            *[str(coordinate) for coordinate in bbox],
                        ]
                    )
                    label += "\n"
                if len(label) < 0:
                    continue
                with open(split_lab_dir / (img_src.with_suffix(".txt").name), "w") as f:
                    f.write(label)

                if not img_dst.exists():
                    img_dst.symlink_to(img_src)

    def create_ut_yaml(self, target_categories, annotation_root, yaml_path):
        content = f"path: {Path(annotation_root).resolve()}"
        for split in self.group_imgids_by_key("split").keys():
            content += f"\n{split}: images/{split}"
        content += "\n\nnames:\n"
        for i, category in enumerate(target_categories):
            assert isinstance(category, str)
            content += f"  {i}: {category}\n"
        with open(yaml_path, "w") as f:
            f.write(content)


class NoInstanceException(Exception):
    """Exception raised when there is no instance for current condition."""


VERSION = "0.1.0"


class EgoObjects(DetectionDataset):
    """DetectionDataset for EgoObjects dataset (https://ai.meta.com/datasets/egoobjects-downloads/).
    The minimal directory structure is as follows:
    root/
    ├── train.json
    ├── eval.json
    ├── metadata.json
    ├── images/
    │   ├── ...
    Since EgoObjects dataset is a piecewise video dataset, each image has a group_id, video_id, and frame_id.

    Args:
        root (str | Path): Root directory of the dataset.
    """

    def __init__(self, root: str | Path) -> None:
        self.root = Path(root) if not isinstance(root, Path) else root
        assert self.root.is_dir()
        self.imgs = {}
        self.annotations = {}
        self.imgid_orders = defaultdict(list)
        self.cache_dir = self.root / ".cache" / VERSION
        self.cache_dir.mkdir(parents=True, exist_ok=True)

        with open(self.root / "metadata.json") as fp:
            metadata = json.load(fp)

        if (
            (self.cache_dir / "categories.pkl").exists()
            and (self.cache_dir / "imgs.pkl").exists()
            and (self.cache_dir / "annotations.pkl").exists()
            and (self.cache_dir / "imgid_orders.pkl").exists()
        ):
            with open(self.cache_dir / "categories.pkl", "rb") as fp:
                self.categories = pickle.load(fp)
            with open(self.cache_dir / "imgs.pkl", "rb") as fp:
                self.imgs = pickle.load(fp)
            with open(self.cache_dir / "annotations.pkl", "rb") as fp:
                self.annotations = pickle.load(fp)
            with open(self.cache_dir / "imgid_orders.pkl", "rb") as fp:
                self.imgid_orders = pickle.load(fp)
            return

        self.categories = {
            catobj["id"]: {
                "category_id": catobj["id"],
                "category": category_names_to_snake_case(catobj["name"]),
            }
            for catobj in metadata["categories"]
        }

        for label_file in ("train", "eval"):
            with open(self.root / f"{label_file}.json") as fp:
                data = json.load(fp)

            s_g_v_f_imgid = {}
            split = "train" if label_file == "train" else "val"
            for img in data["images"]:
                url = str((self.root / "images" / img["url"]).resolve())
                # split = img["subset"] if "subset" in img else label_file
                # split = "train" if label_file == "train" else "val"
                self.imgs[img["id"]] = {
                    "img_id": img["id"],
                    "url": url,
                    "width": img["width"],
                    "height": img["height"],
                    "annotation_ids": [],
                    "split": split,
                    "group_id": img["group_id"],
                    "video_id": img["video_id"],
                    "frame_id": int(img["frame_id"]),
                }

                if split not in s_g_v_f_imgid:
                    s_g_v_f_imgid[split] = {}
                if img["group_id"] not in s_g_v_f_imgid[split]:
                    s_g_v_f_imgid[split][img["group_id"]] = {}
                if img["video_id"] not in s_g_v_f_imgid[split][img["group_id"]]:
                    s_g_v_f_imgid[split][img["group_id"]][img["video_id"]] = {}
                s_g_v_f_imgid[split][img["group_id"]][img["video_id"]][
                    int(img["frame_id"])
                ] = img["id"]

            for split, g_v_f_imgid in s_g_v_f_imgid.items():
                for v_f_imgid in g_v_f_imgid.values():
                    v_f_imgid = dict(sorted(v_f_imgid.items()))
                    for f_imgid in v_f_imgid.values():
                        f_imgid = dict(sorted(f_imgid.items()))
                        self.imgid_orders[split] += f_imgid.values()

            cat_annotations = {
                catid: {
                    "instances": set(),
                    "areas": [],
                    "area_ratios": [],
                }
                for catid in self.categories.keys()
            }

            for ann in data["annotations"]:
                img = self.imgs[ann["image_id"]]

                img["annotation_ids"].append(ann["id"])

                catid = ann["_category_id"]
                area = ann["area"]
                area_ratio = area / (img["width"] * img["height"])
                self.annotations[ann["id"]] = {
                    "annotation_id": ann["id"],
                    "category_id": catid,
                    "img_id": ann["image_id"],
                    "bbox": ann["bbox"],
                    "area": area,
                    "area_ratio": area_ratio,
                }

                cat_annotations[catid]["instances"].add(ann["instance_tag"])
                cat_annotations[catid]["areas"].append(area)
                cat_annotations[catid]["area_ratios"].append(area_ratio)

            for catid, cat_annotation in cat_annotations.items():
                self.categories[catid][f"{split}_instance_count"] = len(
                    cat_annotation["instances"]
                )
                annot_count = len(cat_annotation["areas"])
                self.categories[catid][f"{split}_annotation_count"] = annot_count
                if annot_count > 1:
                    self.categories[catid][f"{split}_area_mean"] = mean(
                        cat_annotation["areas"]
                    )
                    self.categories[catid][f"{split}_area_stdev"] = stdev(
                        cat_annotation["areas"]
                    )
                    self.categories[catid][f"{split}_area_ratio_mean"] = mean(
                        cat_annotation["area_ratios"]
                    )
                    self.categories[catid][f"{split}_area_ratio_stdev"] = stdev(
                        cat_annotation["area_ratios"]
                    )
                elif annot_count == 1:
                    self.categories[catid][f"{split}_area_mean"] = cat_annotation[
                        "areas"
                    ][0]
                    self.categories[catid][f"{split}_area_stdev"] = 0
                    self.categories[catid][f"{split}_area_ratio_mean"] = cat_annotation[
                        "area_ratios"
                    ][0]
                    self.categories[catid][f"{split}_area_ratio_stdev"] = 0
                else:
                    self.categories[catid][f"{split}_area_mean"] = 0
                    self.categories[catid][f"{split}_area_stdev"] = 0
                    self.categories[catid][f"{split}_area_ratio_mean"] = 0
                    self.categories[catid][f"{split}_area_ratio_stdev"] = 0

        self.imgid_orders = dict(self.imgid_orders)

        with open(self.cache_dir / "categories.pkl", "wb") as fp:
            pickle.dump(self.categories, fp)
        with open(self.cache_dir / "imgs.pkl", "wb") as fp:
            pickle.dump(self.imgs, fp)
        with open(self.cache_dir / "annotations.pkl", "wb") as fp:
            pickle.dump(self.annotations, fp)
        with open(self.cache_dir / "imgid_orders.pkl", "wb") as fp:
            pickle.dump(self.imgid_orders, fp)

In [5]:
root_path = "/data/projects/multiexpert/EgoObjects"

egoobject_dataset = EgoObjects(root_path)

In [8]:
print(len(egoobject_dataset.categories))
for key, value in egoobject_dataset.imgs.items():
    print(key, value)
    file_name = value["url"].split("/")[-1]
    print(file_name)
    break

for key, value in egoobject_dataset.annotations.items():
    print(key, value)
    break

638
1665813890465793 {'img_id': 1665813890465793, 'url': '/data/projects/multiexpert/EgoObjects/images/600FAAD73B7D73AB5A896F9253377DBC_04_18.jpg', 'width': 1280, 'height': 720, 'annotation_ids': [252828, 252829, 252830, 252831, 252832], 'split': 'train', 'group_id': '600FAAD73B7D73AB5A896F9253377DBC', 'video_id': '04', 'frame_id': 18}
600FAAD73B7D73AB5A896F9253377DBC_04_18.jpg
1 {'annotation_id': 1, 'category_id': 235, 'img_id': 2336139953203985, 'bbox': [394.25, 600.07, 599.31, 275.62], 'area': 165181.8222, 'area_ratio': 0.10338751689933504}


In [9]:
import os

train_json_format =  dict({
    "info": {
        "description": "EgoObjects dataset"
    },
    "licenses": [
        {
            "url": "https://ai.meta.com/datasets/egoobjects-downloads/",
            "id": 1,
            "name": "Meta AI"
        }
    ],
    "images": list(),
    "annotations": list(),
    "categories": list()
})

val_json_format =  dict({
    "info": {
        "description": "EgoObjects dataset"
    },
    "licenses": [
        {
            "url": "https://ai.meta.com/datasets/egoobjects-downloads/",
            "id": 1,
            "name": "Meta AI"
        }
    ],
    "images": list(),
    "annotations": list(),
    "categories": list()
})

## Categories
new_categories = list()
for key, value in egoobject_dataset.categories.items():
    new_categories.append({
        "id": value["category_id"],
        "name": value["category"]
    })

## Images
new_images_train = list()
new_images_val = list()

for key, value in egoobject_dataset.imgs.items():
    file_name = value["url"].split("/")[-1]
    if value["split"] == "train":
        new_images_train.append({
            "id": value["img_id"],
            "url": value["url"],
            "file_name": file_name,
            "width": value["width"],
            "height": value["height"],
            "split": value["split"]
        })
    elif value["split"] == "val":
        new_images_val.append({
            "id": value["img_id"],
            "url": value["url"],
            "file_name": file_name,
            "width": value["width"],
            "height": value["height"],
            "split": value["split"]
        })

new_annotations_train = list()
new_annotations_val = list()

for key, value in egoobject_dataset.annotations.items():
    new_annotation = {
        "id": value["annotation_id"],
        "category_id": value["category_id"],
        "image_id": value["img_id"],
        "bbox": value["bbox"],
        "area": value["area"],
        "area_ratio": value["area_ratio"]
    }
    if egoobject_dataset.imgs[value["img_id"]]["split"] == "train":
        new_annotations_train.append(new_annotation)
    elif egoobject_dataset.imgs[value["img_id"]]["split"] == "val":
        new_annotations_val.append(new_annotation)

train_json_format["categories"] = new_categories
train_json_format["images"] = new_images_train
train_json_format["annotations"] = new_annotations_train

val_json_format["categories"] = new_categories
val_json_format["images"] = new_images_val
val_json_format["annotations"] = new_annotations_val

annotations_path = "data/egoobjects/annotations"
os.makedirs(annotations_path, exist_ok=True)

with open(os.path.join(annotations_path, "train.json"), "w") as f:
    json.dump(train_json_format, f)

with open(os.path.join(annotations_path, "val.json"), "w") as f:
    json.dump(val_json_format, f)

In [7]:
## Linking images
import os
train_image_path = "data/egoobjects/images/train"
val_image_path = "data/egoobjects/images/val"

os.makedirs(train_image_path, exist_ok=True)
os.makedirs(val_image_path, exist_ok=True)


for key, value in egoobject_dataset.imgs.items():
    file_name = value["url"].split("/")[-1]
    if value["split"] == "train":
        os.symlink(value["url"], os.path.join(train_image_path, file_name))
    elif value["split"] == "val":
        os.symlink(value["url"], os.path.join(val_image_path, file_name))

In [15]:
import random
CLASSES = ()
PALETTE = []

for i in range(len(new_categories)):
    CLASSES += (new_categories[i]["name"], )
    color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
    PALETTE.append(color)

CLASSES_str = "CLASSES = ("
for name in CLASSES:
    CLASSES_str += f"'{name}', "
CLASSES_str += ")"
print(CLASSES_str)

PALETTE_str = "PALETTE = ["
for color in PALETTE:
    PALETTE_str += f"{color}, "
PALETTE_str += "]"
print(PALETTE_str)

CLASSES = ('accordion', 'adhesive_tape', 'air_conditioner', 'air_fryer', 'air_purifier', 'airplane', 'alarm_clock', 'almond', 'alpaca', 'aluminium_foil', 'ambulance', 'ant', 'antelope', 'apple', 'apricot', 'armadillo', 'artichoke', 'arugula', 'avocado', 'axe', 'baby_monitor', 'backpack', 'bacon', 'badminton_birdie', 'badminton_racket', 'bagel', 'balance_beam', 'balloon', 'banana', 'band_aid', 'banjo', 'barge', 'barrel', 'baseball_bat', 'baseball_glove', 'basketball', 'bat_animal', 'bathroom_cabinet', 'bathtub', 'beaker', 'beans', 'bee', 'beef', 'beehive', 'beer', 'bell_pepper', 'belt', 'bench', 'bicycle', 'bicycle_helmet', 'bicycle_wheel', 'bidet', 'billboard', 'billiard_table', 'binoculars', 'blackberry', 'blanket', 'blender', 'blue_jay', 'blueberry', 'bok_choy', 'bomb', 'bonsai', 'book', 'bookcase', 'boot', 'bottle', 'bottle_opener', 'bow_and_arrow', 'bowl', 'bowling_equipment', 'box', 'box_of_macaroni_and_cheese', 'boxing_gloves', 'brassiere', 'bread', 'bridges', 'briefcase', 'brocc

In [None]:
###
annotation_fiel 


In [None]:
a