<a href="https://colab.research.google.com/github/ankesh86/ASPNETMVCStoreApp/blob/main/HICOEmbedding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
pip install git+https://github.com/openai/CLIP.git

Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-fcycwjmr
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-fcycwjmr
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting ftfy (from clip==1.0)
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Downloading ftfy-6.3.1-py3-none-any.whl (44 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: clip
  Building wheel for clip (setup.py) ... [?25l[?25hdone
  Created wheel for clip: filename=clip-1.0-py3-none-any.whl size=1369489 sha256=210201d84dfcbab5b632aa19377913416247c65ebfeee506db29191c0b826760
  Stored in directory: /tmp/pip-ephem-wheel-cache-nzjy3wgv/wheels/da/2b/4c/d6691fa9597aac8bb

In [4]:
import torch
from transformers import CLIPProcessor, CLIPModel
import json
from tqdm import tqdm
from torch.utils.data import DataLoader
import clip
from PIL import Image
from torchvision import transforms
import multiprocessing
from zipfile import ZipFile
from io import BytesIO
import os
import math
from hoi_label import hico_text_label, coco_class_dict, valid_obj_ids
import sys
#from hico_det.hico_categories import HICO_ACTIONS, HICO_INTERACTIONS, NON_INTERACTION_IDS, HICO_OBJECTS, VERB_MAPPER


In [5]:
hico_verb_dict = {}
for k,v in hico_text_label.items():
    verb_idx, obj_idx = k[0], k[1]
    if v.split(' ')[6] == 'a' or v.split(' ')[6] == 'an':
        verb = v.split(' ')[5]
    else:
        verb = ' '.join(v.split(' ')[5:7])
    hico_verb_dict[verb_idx+1] = verb

hico_verb_dict_text = dict(zip(hico_verb_dict.keys(), hico_verb_dict.values()))
coco_class_dict_text = dict(zip(coco_class_dict.keys(), coco_class_dict.values()))

In [6]:
def get_categoty_name(id):
    return coco_class_dict_text[id]

In [7]:
def get_action_name(id):
    try:
        action = HICO_INTERACTIONS[id]['action']
    except:
        print(id)
    if action == "and":
        return "with"
    action_name = action.replace('_', ' ')
    new_action_name = []
    for str in action_name.split(' '):
        if str in VERB_MAPPER:
            str = VERB_MAPPER[str]
        new_action_name.append(str)
    return " ".join(new_action_name)

In [8]:
def xyxy_to_xywh(bbox):
    return [bbox[0], bbox[1], bbox[2]-bbox[0], bbox[3]-bbox[1]]

In [9]:
def max_bbox(box1, box2):
    return [min(box1[0], box2[0]),
            min(box1[1], box2[1]),
            max(box1[2], box2[2]),
            max(box1[3], box2[3])]

In [10]:
class Base():
    def __init__(self, image_root):
        self.image_root = image_root
        self.use_zip = True if image_root[-4:] == ".zip" else False
        if self.use_zip:
            self.zip_dict = {}

        # This is CLIP mean and std
        # Since our image is cropped from bounding box, thus we directly resize to 224*224 without center_crop to keep obj whole information.
        self.preprocess = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])
        ])

    def fetch_zipfile(self, ziproot):
        pid = multiprocessing.current_process().pid  # get pid of this process.
        if pid not in self.zip_dict:
            self.zip_dict[pid] = ZipFile(ziproot)
        zip_file = self.zip_dict[pid]
        return zip_file

    def fetch_image(self, file_name):
        if self.use_zip:
            zip_file = self.fetch_zipfile(self.image_root)
            image = Image.open(BytesIO(zip_file.read(file_name))).convert('RGB')
        else:
            image = Image.open(os.path.join(self.image_root, file_name)).convert('RGB')
        return image

In [18]:
class HICODetDataset_Detection(Base):
    def __init__(self, instances_json_path, image_root, chunk_idx, total_chunk):
        super().__init__(image_root)

        self.image_root = image_root
        self.instances_json_path = instances_json_path

        # Load all jsons
        with open(instances_json_path, 'r') as f:
            annos = json.load(f)

        version = "openai/clip-vit-large-patch14"
        self.model = CLIPModel.from_pretrained(version).cuda()
        self.processor = CLIPProcessor.from_pretrained(version)

        # clean_annotations(instances_data["annotations"])
        self.annotations = annos

        # Misc
        self.image_ids = []  # main list for selecting images
        self.image_id_to_filename = {}  # file names used to read image
        for anno in self.annotations:
            image_id = int(anno['file_name'].split('.jpg')[0].split('_')[-1])
            filename = anno['file_name']
            self.image_ids.append(image_id)
            self.image_id_to_filename[image_id] = filename

    def __getitem__(self, index):
        anno = self.annotations[index]
        anno_id = int(anno['file_name'].split('.jpg')[0].split('_')[-1])
        filename = self.image_id_to_filename[anno_id]
        image = self.fetch_image(filename)

        classes = [obj["category_id"] for obj in anno["annotations"]]

        prompts = []
        hois = []

        for hoi in anno['hoi_annotation']:
            print(hoi)
            subject_annotation = anno['annotations'][hoi['subject_id']]
            subject_category_name = get_categoty_name(subject_annotation['category_id'])
            print('category ann : ',subject_category_name)
            object_annotation = anno['annotations'][hoi['object_id']]
            object_category_name = get_categoty_name(object_annotation['category_id'])
            print('object ann : ',object_annotation)

            action_id = hoi["category_id"] - 1  # Starting from 1
            target_id = hoi["object_id"]

            sub = hoi['subject_id']
            obj = hoi['object_id'] # annotation idx
            verb_id = hoi['category_id']
            object_id = anno['annotations'][obj]['category_id'] # obj bbox annotations idx

            text_hoi = hico_text_label[(verb_id-1, valid_obj_ids.index(object_id))] # 'a photo of ~ '
            action_name = hico_verb_dict_text[verb_id]

            subject_image_crop = self.preprocess(image.crop(subject_annotation['bbox']).resize((224, 224), Image.BICUBIC))
            object_image_crop = self.preprocess(image.crop(object_annotation['bbox']).resize((224, 224), Image.BICUBIC))
            #max_bbox
            action_image_crop = self.preprocess(image.crop(max_bbox(subject_annotation['bbox'], object_annotation['bbox'])).resize((224, 224), Image.BICUBIC))  # not using
            prompts.append(f"a {subject_category_name} is {action_name} a {object_category_name}")


            with torch.no_grad():
                inputs = self.processor(text=[subject_category_name, object_category_name, action_name], return_tensors="pt", padding=True)
                inputs['input_ids'] = inputs['input_ids'].cuda()
                inputs['pixel_values'] = torch.stack([subject_image_crop, object_image_crop, action_image_crop]).cuda()  # we use our own preprocessing without center_crop
                inputs['attention_mask'] = inputs['attention_mask'].cuda()
                outputs = self.model(**inputs)

            text_before_features = outputs.text_model_output.pooler_output  # before projection feature
            text_after_features = outputs.text_embeds  # normalized after projection feature (CLIP aligned space)

            image_before_features = outputs.vision_model_output.pooler_output  # before projection feature
            image_after_features = outputs.image_embeds  # normalized after projection feature (CLIP aligned space)

            hois.append({
                'subject_xywh': xyxy_to_xywh(subject_annotation['bbox']),
                'object_xywh': xyxy_to_xywh(object_annotation['bbox']),
                'action': action_name,
                'subject': subject_category_name,
                'object': object_category_name,

                'subject_text_embedding_before': text_before_features[0].cpu(),
                'subject_text_embedding_after': text_after_features[0].cpu(),
                'subject_image_embedding_before': image_before_features[0].cpu(),  # not using
                'subject_image_embedding_after': image_after_features[0].cpu(),  # not using
                'object_text_embedding_before': text_before_features[1].cpu(),
                'object_text_embedding_after': text_after_features[1].cpu(),
                'object_image_embedding_before': image_before_features[1].cpu(),  # not using
                'object_image_embedding_after': image_after_features[1].cpu(),  # not using
                'action_text_embedding_before': text_before_features[2].cpu(),
                'action_text_embedding_after': text_after_features[2].cpu(),
                'action_image_embedding_before': image_before_features[2].cpu(),  # not using
                'action_image_embedding_after': image_after_features[2].cpu()  # not using
            })
            del image_before_features, image_after_features, text_before_features, text_after_features, outputs, inputs
        return {'file_name': anno['file_name'],
                'anno_id': anno_id,
                'image': image,
                'data_id': anno_id,
                'caption': ", ".join(prompts),
                'hois': hois
               }

    def __len__(self):
        return len(self.annotations)


In [12]:
dataset_root = "hico"
annotation_filename = "annotations/test_hico.json"
annotation_path = os.path.join(dataset_root, annotation_filename)
image_root = os.path.join(dataset_root, "images", "test2015")
print(annotation_path)
print(image_root)

hico/annotations/test_hico.json
hico/images/test2015


In [19]:
dataset = HICODetDataset_Detection(annotation_path, image_root, None, None)

save_root = "hico_det_clip_test"
os.makedirs(save_root, exist_ok=True)
for d in tqdm(dataset):
    torch.save(d, os.path.join(save_root, f"embed_{d['anno_id']}.clip.pt"))

  0%|          | 0/100 [00:00<?, ?it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [148, 345, 376, 414], 'category_id': 15}


  1%|          | 1/100 [00:00<00:42,  2.33it/s]

{'subject_id': 2, 'object_id': 1, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [148, 345, 376, 414], 'category_id': 15}
{'subject_id': 0, 'object_id': 1, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [174, 65, 393, 440], 'category_id': 19}
{'subject_id': 2, 'object_id': 3, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [186, 62, 392, 438], 'category_id': 19}
{'subject_id': 4, 'object_id': 5, 'category_id': 111}
category ann :  person
object ann :  {'bbox': [179, 61, 433, 374], 'category_id': 19}


  2%|▏         | 2/100 [00:00<00:49,  1.99it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [234, 85, 444, 298], 'category_id': 9}


  3%|▎         | 3/100 [00:01<00:35,  2.71it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [338, 109, 574, 247], 'category_id': 4}
{'subject_id': 2, 'object_id': 1, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [338, 109, 574, 247], 'category_id': 4}
{'subject_id': 3, 'object_id': 4, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [110, 183, 501, 364], 'category_id': 4}
{'subject_id': 5, 'object_id': 4, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [110, 183, 501, 364], 'category_id': 4}
{'subject_id': 6, 'object_id': 7, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [350, 117, 591, 242], 'category_id': 4}
{'subject_id': 8, 'object_id': 9, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [116, 151, 456, 384], 'category_id': 4}


  4%|▍         | 4/100 [00:02<01:05,  1.47it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 9}
category ann :  person
object ann :  {'bbox': [197, 167, 466, 638], 'category_id': 27}
{'subject_id': 2, 'object_id': 3, 'category_id': 115}
category ann :  person
object ann :  {'bbox': [327, 165, 470, 524], 'category_id': 27}


  6%|▌         | 6/100 [00:02<00:42,  2.21it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 50}
category ann :  person
object ann :  {'bbox': [28, 213, 636, 477], 'category_id': 15}
{'subject_id': 0, 'object_id': 1, 'category_id': 25}
category ann :  person
object ann :  {'bbox': [16, 247, 521, 422], 'category_id': 67}
{'subject_id': 2, 'object_id': 1, 'category_id': 25}
category ann :  person
object ann :  {'bbox': [16, 247, 521, 422], 'category_id': 67}
{'subject_id': 3, 'object_id': 1, 'category_id': 25}
category ann :  person
object ann :  {'bbox': [16, 247, 521, 422], 'category_id': 67}
{'subject_id': 4, 'object_id': 1, 'category_id': 25}
category ann :  person
object ann :  {'bbox': [16, 247, 521, 422], 'category_id': 67}
{'subject_id': 5, 'object_id': 6, 'category_id': 87}
category ann :  person
object ann :  {'bbox': [2, 239, 522, 423], 'category_id': 67}


  7%|▋         | 7/100 [00:03<00:58,  1.59it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 58}
category ann :  person
object ann :  {'bbox': [347, 106, 541, 267], 'category_id': 4}
{'subject_id': 0, 'object_id': 2, 'category_id': 58}
category ann :  person
object ann :  {'bbox': [118, 72, 520, 478], 'category_id': 4}


  9%|▉         | 9/100 [00:04<00:40,  2.27it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 4}
category ann :  person
object ann :  {'bbox': [65, 474, 334, 627], 'category_id': 61}
{'subject_id': 0, 'object_id': 1, 'category_id': 58}
category ann :  person
object ann :  {'bbox': [378, 114, 398, 131], 'category_id': 62}
{'subject_id': 0, 'object_id': 2, 'category_id': 58}
category ann :  person
object ann :  {'bbox': [446, 106, 475, 169], 'category_id': 62}
{'subject_id': 0, 'object_id': 3, 'category_id': 58}
category ann :  person
object ann :  {'bbox': [404, 110, 433, 182], 'category_id': 62}
{'subject_id': 0, 'object_id': 4, 'category_id': 58}
category ann :  person
object ann :  {'bbox': [282, 117, 318, 190], 'category_id': 62}
{'subject_id': 5, 'object_id': 1, 'category_id': 58}
category ann :  person
object ann :  {'bbox': [378, 114, 398, 131], 'category_id': 62}
{'subject_id': 5, 'object_id': 2, 'category_id': 58}
category ann :  person
object ann :  {'bbox': [446, 106, 475, 169], 'category_id': 62}
{'subject_id': 5, 'obj

 10%|█         | 10/100 [00:07<01:53,  1.26s/it]

{'subject_id': 0, 'object_id': 1, 'category_id': 9}
category ann :  person
object ann :  {'bbox': [130, 232, 291, 417], 'category_id': 38}
{'subject_id': 2, 'object_id': 3, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [122, 210, 291, 413], 'category_id': 38}


 11%|█         | 11/100 [00:08<01:28,  1.01it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 9}
category ann :  person
object ann :  {'bbox': [147, 158, 211, 228], 'category_id': 37}
{'subject_id': 2, 'object_id': 3, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [150, 148, 225, 230], 'category_id': 37}
{'subject_id': 4, 'object_id': 5, 'category_id': 105}
category ann :  person
object ann :  {'bbox': [143, 156, 221, 235], 'category_id': 37}


 12%|█▏        | 12/100 [00:08<01:16,  1.14it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 29}
category ann :  person
object ann :  {'bbox': [134, 140, 190, 241], 'category_id': 41}
{'subject_id': 2, 'object_id': 3, 'category_id': 44}
category ann :  person
object ann :  {'bbox': [136, 138, 179, 242], 'category_id': 41}


 13%|█▎        | 13/100 [00:09<01:03,  1.37it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [274, 80, 447, 173], 'category_id': 22}
{'subject_id': 0, 'object_id': 2, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [313, 109, 638, 240], 'category_id': 22}
{'subject_id': 0, 'object_id': 3, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [10, 107, 631, 442], 'category_id': 22}
{'subject_id': 4, 'object_id': 5, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [197, 85, 432, 168], 'category_id': 22}
{'subject_id': 6, 'object_id': 7, 'category_id': 113}
category ann :  person
object ann :  {'bbox': [150, 93, 638, 298], 'category_id': 22}
{'subject_id': 6, 'object_id': 8, 'category_id': 113}
category ann :  person
object ann :  {'bbox': [1, 130, 638, 435], 'category_id': 22}


 15%|█▌        | 15/100 [00:10<00:56,  1.51it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 58}
category ann :  person
object ann :  {'bbox': [427, 119, 491, 173], 'category_id': 64}
{'subject_id': 0, 'object_id': 1, 'category_id': 115}
category ann :  person
object ann :  {'bbox': [357, 106, 408, 281], 'category_id': 32}


 16%|█▌        | 16/100 [00:10<00:44,  1.91it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [392, 128, 633, 340], 'category_id': 40}
{'subject_id': 2, 'object_id': 3, 'category_id': 115}
category ann :  person
object ann :  {'bbox': [394, 134, 632, 342], 'category_id': 40}


 17%|█▋        | 17/100 [00:10<00:40,  2.06it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 5}
category ann :  person
object ann :  {'bbox': [110, 190, 426, 398], 'category_id': 7}
{'subject_id': 2, 'object_id': 1, 'category_id': 5}
category ann :  person
object ann :  {'bbox': [110, 190, 426, 398], 'category_id': 7}
{'subject_id': 3, 'object_id': 1, 'category_id': 5}
category ann :  person
object ann :  {'bbox': [110, 190, 426, 398], 'category_id': 7}
{'subject_id': 4, 'object_id': 1, 'category_id': 5}
category ann :  person
object ann :  {'bbox': [110, 190, 426, 398], 'category_id': 7}
{'subject_id': 5, 'object_id': 6, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [110, 189, 424, 396], 'category_id': 7}


 19%|█▉        | 19/100 [00:12<00:41,  1.97it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 112}
category ann :  person
object ann :  {'bbox': [177, 18, 605, 361], 'category_id': 6}


 20%|██        | 20/100 [00:12<00:33,  2.40it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 50}
category ann :  person
object ann :  {'bbox': [1, 129, 636, 477], 'category_id': 65}


 21%|██        | 21/100 [00:12<00:27,  2.83it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 103}
category ann :  person
object ann :  {'bbox': [99, 24, 378, 389], 'category_id': 1}
{'subject_id': 0, 'object_id': 1, 'category_id': 9}
category ann :  person
object ann :  {'bbox': [58, 82, 409, 220], 'category_id': 28}
{'subject_id': 2, 'object_id': 3, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [58, 75, 407, 226], 'category_id': 28}
{'subject_id': 4, 'object_id': 5, 'category_id': 95}
category ann :  person
object ann :  {'bbox': [56, 72, 415, 226], 'category_id': 28}


 22%|██▏       | 22/100 [00:13<00:33,  2.34it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 31}
category ann :  person
object ann :  {'bbox': [331, 31, 423, 162], 'category_id': 38}
{'subject_id': 2, 'object_id': 3, 'category_id': 31}
category ann :  person
object ann :  {'bbox': [38, 94, 130, 149], 'category_id': 38}
{'subject_id': 4, 'object_id': 5, 'category_id': 31}
category ann :  person
object ann :  {'bbox': [1, 120, 23, 179], 'category_id': 38}
{'subject_id': 6, 'object_id': 7, 'category_id': 31}
category ann :  person
object ann :  {'bbox': [89, 26, 349, 227], 'category_id': 38}
{'subject_id': 8, 'object_id': 9, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [90, 54, 348, 311], 'category_id': 38}
{'subject_id': 10, 'object_id': 11, 'category_id': 48}
category ann :  person
object ann :  {'bbox': [93, 48, 355, 303], 'category_id': 38}
{'subject_id': 12, 'object_id': 13, 'category_id': 71}
category ann :  person
object ann :  {'bbox': [98, 56, 343, 219], 'category_id': 38}


 23%|██▎       | 23/100 [00:14<00:55,  1.38it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [99, 41, 207, 241], 'category_id': 39}
{'subject_id': 2, 'object_id': 3, 'category_id': 116}
category ann :  person
object ann :  {'bbox': [120, 43, 181, 235], 'category_id': 39}


 24%|██▍       | 24/100 [00:15<00:47,  1.59it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 66}
category ann :  person
object ann :  {'bbox': [103, 1, 424, 472], 'category_id': 25}


 25%|██▌       | 25/100 [00:15<00:37,  1.98it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [3, 4, 637, 372], 'category_id': 9}
{'subject_id': 2, 'object_id': 3, 'category_id': 78}
category ann :  person
object ann :  {'bbox': [1, 190, 638, 361], 'category_id': 9}
{'subject_id': 4, 'object_id': 3, 'category_id': 78}
category ann :  person
object ann :  {'bbox': [1, 190, 638, 361], 'category_id': 9}
{'subject_id': 5, 'object_id': 3, 'category_id': 78}
category ann :  person
object ann :  {'bbox': [1, 190, 638, 361], 'category_id': 9}
{'subject_id': 6, 'object_id': 3, 'category_id': 78}
category ann :  person
object ann :  {'bbox': [1, 190, 638, 361], 'category_id': 9}
{'subject_id': 7, 'object_id': 3, 'category_id': 78}
category ann :  person
object ann :  {'bbox': [1, 190, 638, 361], 'category_id': 9}
{'subject_id': 8, 'object_id': 9, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [4, 216, 631, 352], 'category_id': 9}
{'subject_id': 10, 'object_id': 9, 'categor

 27%|██▋       | 27/100 [00:18<01:01,  1.18it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 58}
category ann :  person
object ann :  {'bbox': [70, 38, 559, 435], 'category_id': 4}
{'subject_id': 0, 'object_id': 1, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [215, 165, 594, 376], 'category_id': 19}
{'subject_id': 2, 'object_id': 3, 'category_id': 99}
category ann :  person
object ann :  {'bbox': [217, 174, 543, 378], 'category_id': 19}


 28%|██▊       | 28/100 [00:18<00:51,  1.40it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [5, 208, 54, 302], 'category_id': 73}
{'subject_id': 2, 'object_id': 3, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [120, 273, 221, 325], 'category_id': 73}
{'subject_id': 4, 'object_id': 5, 'category_id': 74}
category ann :  person
object ann :  {'bbox': [2, 211, 85, 310], 'category_id': 73}
{'subject_id': 6, 'object_id': 7, 'category_id': 74}
category ann :  person
object ann :  {'bbox': [109, 263, 238, 338], 'category_id': 73}


 29%|██▉       | 29/100 [00:19<00:52,  1.36it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 25}
category ann :  person
object ann :  {'bbox': [62, 207, 591, 428], 'category_id': 67}
{'subject_id': 2, 'object_id': 1, 'category_id': 25}
category ann :  person
object ann :  {'bbox': [62, 207, 591, 428], 'category_id': 67}
{'subject_id': 3, 'object_id': 1, 'category_id': 25}
category ann :  person
object ann :  {'bbox': [62, 207, 591, 428], 'category_id': 67}
{'subject_id': 4, 'object_id': 1, 'category_id': 25}
category ann :  person
object ann :  {'bbox': [62, 207, 591, 428], 'category_id': 67}
{'subject_id': 5, 'object_id': 1, 'category_id': 25}
category ann :  person
object ann :  {'bbox': [62, 207, 591, 428], 'category_id': 67}
{'subject_id': 6, 'object_id': 7, 'category_id': 87}
category ann :  person
object ann :  {'bbox': [58, 227, 615, 429], 'category_id': 67}
{'subject_id': 8, 'object_id': 7, 'category_id': 87}
category ann :  person
object ann :  {'bbox': [58, 227, 615, 429], 'category_id': 67}
{'subject_id': 9, 'object_i

 30%|███       | 30/100 [00:21<01:17,  1.10s/it]

{'subject_id': 0, 'object_id': 1, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [230, 243, 298, 254], 'category_id': 9}
{'subject_id': 2, 'object_id': 1, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [230, 243, 298, 254], 'category_id': 9}
{'subject_id': 3, 'object_id': 1, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [230, 243, 298, 254], 'category_id': 9}
{'subject_id': 4, 'object_id': 1, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [230, 243, 298, 254], 'category_id': 9}
{'subject_id': 5, 'object_id': 6, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [386, 230, 475, 253], 'category_id': 9}
{'subject_id': 7, 'object_id': 6, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [386, 230, 475, 253], 'category_id': 9}
{'subject_id': 8, 'object_id': 9, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [98, 271, 564, 394], 'category_id': 9}
{'subject_id': 10, 'object_i

 31%|███       | 31/100 [00:24<01:52,  1.63s/it]

{'subject_id': 19, 'object_id': 16, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [101, 256, 599, 413], 'category_id': 9}


 33%|███▎      | 33/100 [00:24<00:59,  1.12it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 58}
category ann :  person
object ann :  {'bbox': [1, 257, 133, 305], 'category_id': 9}
{'subject_id': 0, 'object_id': 1, 'category_id': 58}
category ann :  person
object ann :  {'bbox': [244, 125, 384, 586], 'category_id': 46}
{'subject_id': 0, 'object_id': 1, 'category_id': 44}
category ann :  person
object ann :  {'bbox': [170, 69, 203, 111], 'category_id': 36}
{'subject_id': 2, 'object_id': 3, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [171, 66, 199, 106], 'category_id': 36}
{'subject_id': 4, 'object_id': 5, 'category_id': 94}
category ann :  person
object ann :  {'bbox': [170, 69, 202, 112], 'category_id': 36}


 34%|███▍      | 34/100 [00:25<00:56,  1.17it/s]

{'subject_id': 6, 'object_id': 7, 'category_id': 115}
category ann :  person
object ann :  {'bbox': [172, 69, 203, 112], 'category_id': 36}


 35%|███▌      | 35/100 [00:25<00:42,  1.52it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 74}
category ann :  person
object ann :  {'bbox': [169, 235, 521, 401], 'category_id': 73}
{'subject_id': 0, 'object_id': 1, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [35, 163, 274, 398], 'category_id': 75}


 36%|███▌      | 36/100 [00:25<00:36,  1.74it/s]

{'subject_id': 2, 'object_id': 3, 'category_id': 69}
category ann :  person
object ann :  {'bbox': [58, 176, 258, 378], 'category_id': 75}
{'subject_id': 0, 'object_id': 1, 'category_id': 9}
category ann :  person
object ann :  {'bbox': [3, 117, 633, 589], 'category_id': 2}
{'subject_id': 2, 'object_id': 3, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [1, 12, 637, 628], 'category_id': 2}
{'subject_id': 4, 'object_id': 5, 'category_id': 42}
category ann :  person
object ann :  {'bbox': [2, 3, 637, 628], 'category_id': 2}
{'subject_id': 6, 'object_id': 7, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [40, 319, 172, 406], 'category_id': 2}


 37%|███▋      | 37/100 [00:26<00:41,  1.53it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [348, 128, 638, 423], 'category_id': 63}


 39%|███▉      | 39/100 [00:26<00:26,  2.34it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 58}
category ann :  person
object ann :  {'bbox': [157, 98, 343, 617], 'category_id': 46}
{'subject_id': 0, 'object_id': 1, 'category_id': 58}

 40%|████      | 40/100 [00:27<00:21,  2.78it/s]


category ann :  person
object ann :  {'bbox': [252, 157, 287, 188], 'category_id': 85}
{'subject_id': 0, 'object_id': 1, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [229, 201, 423, 381], 'category_id': 33}
{'subject_id': 2, 'object_id': 3, 'category_id': 53}
category ann :  person
object ann :  {'bbox': [227, 209, 410, 346], 'category_id': 33}


 42%|████▏     | 42/100 [00:27<00:18,  3.13it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [217, 66, 415, 348], 'category_id': 84}
{'subject_id': 0, 'object_id': 1, 'category_id': 33}
category ann :  person
object ann :  {'bbox': [250, 326, 347, 353], 'category_id': 41}
{'subject_id': 2, 'object_id': 3, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [241, 321, 391, 358], 'category_id': 41}


 43%|████▎     | 43/100 [00:28<00:22,  2.51it/s]

{'subject_id': 4, 'object_id': 5, 'category_id': 94}
category ann :  person
object ann :  {'bbox': [238, 327, 390, 358], 'category_id': 41}
{'subject_id': 0, 'object_id': 1, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [183, 200, 507, 399], 'category_id': 2}
{'subject_id': 2, 'object_id': 3, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [170, 206, 506, 399], 'category_id': 2}


 44%|████▍     | 44/100 [00:28<00:25,  2.22it/s]

{'subject_id': 4, 'object_id': 5, 'category_id': 99}
category ann :  person
object ann :  {'bbox': [182, 199, 506, 401], 'category_id': 2}
{'subject_id': 0, 'object_id': 1, 'category_id': 58}
category ann :  person
object ann :  {'bbox': [19, 14, 629, 407], 'category_id': 3}
{'subject_id': 2, 'object_id': 1, 'category_id': 58}
category ann :  person
object ann :  {'bbox': [19, 14, 629, 407], 'category_id': 3}
{'subject_id': 3, 'object_id': 1, 'category_id': 58}
category ann :  person
object ann :  {'bbox': [19, 14, 629, 407], 'category_id': 3}
{'subject_id': 4, 'object_id': 1, 'category_id': 58}
category ann :  person
object ann :  {'bbox': [19, 14, 629, 407], 'category_id': 3}


 45%|████▌     | 45/100 [00:29<00:29,  1.85it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 33}
category ann :  person
object ann :  {'bbox': [202, 318, 293, 393], 'category_id': 41}
{'subject_id': 2, 'object_id': 3, 'category_id': 44}
category ann :  person
object ann :  {'bbox': [203, 326, 291, 391], 'category_id': 41}
{'subject_id': 4, 'object_id': 5, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [198, 324, 289, 394], 'category_id': 41}
{'subject_id': 6, 'object_id': 7, 'category_id': 94}
category ann :  person
object ann :  {'bbox': [200, 324, 289, 404], 'category_id': 41}


 46%|████▌     | 46/100 [00:30<00:32,  1.67it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 9}
category ann :  person
object ann :  {'bbox': [132, 145, 289, 350], 'category_id': 27}
{'subject_id': 2, 'object_id': 3, 'category_id': 115}
category ann :  person
object ann :  {'bbox': [135, 142, 291, 352], 'category_id': 27}


 47%|████▋     | 47/100 [00:30<00:28,  1.87it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [142, 168, 176, 197], 'category_id': 40}
{'subject_id': 2, 'object_id': 3, 'category_id': 115}
category ann :  person
object ann :  {'bbox': [438, 57, 510, 159], 'category_id': 40}


 48%|████▊     | 48/100 [00:31<00:25,  2.05it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [119, 171, 506, 558], 'category_id': 4}
{'subject_id': 2, 'object_id': 1, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [119, 171, 506, 558], 'category_id': 4}
{'subject_id': 3, 'object_id': 4, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [118, 162, 517, 559], 'category_id': 4}
{'subject_id': 5, 'object_id': 4, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [118, 162, 517, 559], 'category_id': 4}
{'subject_id': 6, 'object_id': 7, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [108, 162, 514, 558], 'category_id': 4}
{'subject_id': 8, 'object_id': 7, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [108, 162, 514, 558], 'category_id': 4}
{'subject_id': 9, 'object_id': 10, 'category_id': 99}
category ann :  person
object ann :  {'bbox': [178, 214, 504, 555], 'category_id': 4}
{'subject_id': 11, 'object

 50%|█████     | 50/100 [00:32<00:30,  1.64it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [326, 179, 493, 252], 'category_id': 15}
{'subject_id': 0, 'object_id': 1, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [279, 442, 364, 491], 'category_id': 36}
{'subject_id': 2, 'object_id': 3, 'category_id': 94}
category ann :  person
object ann :  {'bbox': [285, 443, 359, 487], 'category_id': 36}
{'subject_id': 4, 'object_id': 5, 'category_id': 115}
category ann :  person
object ann :  {'bbox': [280, 453, 359, 482], 'category_id': 36}


 51%|█████     | 51/100 [00:33<00:29,  1.68it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 31}
category ann :  person
object ann :  {'bbox': [113, 174, 164, 234], 'category_id': 38}
{'subject_id': 2, 'object_id': 1, 'category_id': 31}
category ann :  person
object ann :  {'bbox': [113, 174, 164, 234], 'category_id': 38}
{'subject_id': 3, 'object_id': 1, 'category_id': 31}
category ann :  person
object ann :  {'bbox': [113, 174, 164, 234], 'category_id': 38}
{'subject_id': 4, 'object_id': 5, 'category_id': 71}
category ann :  person
object ann :  {'bbox': [122, 174, 167, 234], 'category_id': 38}


 52%|█████▏    | 52/100 [00:34<00:30,  1.56it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 44}
category ann :  person
object ann :  {'bbox': [279, 167, 438, 236], 'category_id': 35}
{'subject_id': 2, 'object_id': 3, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [279, 167, 425, 239], 'category_id': 35}


 53%|█████▎    | 53/100 [00:34<00:28,  1.63it/s]

{'subject_id': 4, 'object_id': 5, 'category_id': 115}
category ann :  person
object ann :  {'bbox': [278, 157, 423, 247], 'category_id': 35}
{'subject_id': 0, 'object_id': 1, 'category_id': 31}
category ann :  person
object ann :  {'bbox': [77, 105, 426, 267], 'category_id': 5}
{'subject_id': 2, 'object_id': 3, 'category_id': 31}
category ann :  person
object ann :  {'bbox': [110, 202, 610, 426], 'category_id': 5}
{'subject_id': 4, 'object_id': 5, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [81, 90, 422, 260], 'category_id': 5}
{'subject_id': 6, 'object_id': 7, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [104, 203, 612, 430], 'category_id': 5}
{'subject_id': 8, 'object_id': 9, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [66, 101, 423, 254], 'category_id': 5}
{'subject_id': 10, 'object_id': 11, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [100, 198, 618, 426], 'category_id': 5}


 55%|█████▌    | 55/100 [00:36<00:26,  1.70it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 58}
category ann :  person
object ann :  {'bbox': [621, 307, 638, 394], 'category_id': 70}
{'subject_id': 0, 'object_id': 1, 'category_id': 76}
category ann :  person
object ann :  {'bbox': [133, 97, 273, 202], 'category_id': 72}


 56%|█████▌    | 56/100 [00:36<00:20,  2.13it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 36}
category ann :  person
object ann :  {'bbox': [118, 360, 167, 411], 'category_id': 37}
{'subject_id': 2, 'object_id': 3, 'category_id': 45}
category ann :  person
object ann :  {'bbox': [115, 361, 171, 407], 'category_id': 37}


 57%|█████▋    | 57/100 [00:36<00:18,  2.27it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 5}
category ann :  person
object ann :  {'bbox': [2, 39, 628, 343], 'category_id': 5}
{'subject_id': 2, 'object_id': 1, 'category_id': 5}
category ann :  person
object ann :  {'bbox': [2, 39, 628, 343], 'category_id': 5}


 58%|█████▊    | 58/100 [00:36<00:17,  2.37it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 31}
category ann :  person
object ann :  {'bbox': [255, 29, 315, 77], 'category_id': 38}
{'subject_id': 2, 'object_id': 3, 'category_id': 71}
category ann :  person
object ann :  {'bbox': [261, 28, 313, 86], 'category_id': 38}


 59%|█████▉    | 59/100 [00:37<00:16,  2.46it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [137, 176, 201, 220], 'category_id': 41}
{'subject_id': 2, 'object_id': 3, 'category_id': 94}
category ann :  person
object ann :  {'bbox': [134, 185, 199, 232], 'category_id': 41}


 60%|██████    | 60/100 [00:37<00:15,  2.53it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [216, 150, 448, 413], 'category_id': 2}
{'subject_id': 2, 'object_id': 3, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [208, 150, 457, 420], 'category_id': 2}
{'subject_id': 4, 'object_id': 5, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [198, 155, 497, 416], 'category_id': 2}
{'subject_id': 6, 'object_id': 7, 'category_id': 99}
category ann :  person
object ann :  {'bbox': [217, 152, 422, 416], 'category_id': 2}


 62%|██████▏   | 62/100 [00:38<00:15,  2.45it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [264, 183, 481, 399], 'category_id': 73}
{'subject_id': 0, 'object_id': 1, 'category_id': 31}
category ann :  person
object ann :  {'bbox': [40, 82, 238, 201], 'category_id': 38}


 63%|██████▎   | 63/100 [00:39<00:14,  2.50it/s]

{'subject_id': 2, 'object_id': 3, 'category_id': 71}
category ann :  person
object ann :  {'bbox': [40, 74, 242, 206], 'category_id': 38}
{'subject_id': 0, 'object_id': 1, 'category_id': 9}
category ann :  person
object ann :  {'bbox': [282, 82, 543, 423], 'category_id': 38}
{'subject_id': 2, 'object_id': 3, 'category_id': 31}
category ann :  person
object ann :  {'bbox': [295, 84, 535, 184], 'category_id': 38}


 64%|██████▍   | 64/100 [00:39<00:16,  2.21it/s]

{'subject_id': 4, 'object_id': 5, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [303, 78, 539, 181], 'category_id': 38}


 65%|██████▌   | 65/100 [00:39<00:13,  2.63it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 16}
category ann :  person
object ann :  {'bbox': [1, 270, 244, 475], 'category_id': 61}


 66%|██████▌   | 66/100 [00:40<00:11,  3.04it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 58}
category ann :  person
object ann :  {'bbox': [336, 59, 638, 475], 'category_id': 46}
{'subject_id': 0, 'object_id': 1, 'category_id': 9}
category ann :  person
object ann :  {'bbox': [218, 388, 384, 590], 'category_id': 47}
{'subject_id': 2, 'object_id': 3, 'category_id': 21}
category ann :  person
object ann :  {'bbox': [243, 391, 348, 584], 'category_id': 47}
{'subject_id': 4, 'object_id': 5, 'category_id': 42}
category ann :  person
object ann :  {'bbox': [239, 390, 373, 615], 'category_id': 47}


 67%|██████▋   | 67/100 [00:40<00:13,  2.48it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 113}
category ann :  person
object ann :  {'bbox': [223, 83, 373, 373], 'category_id': 25}
{'subject_id': 2, 'object_id': 1, 'category_id': 113}
category ann :  person
object ann :  {'bbox': [223, 83, 373, 373], 'category_id': 25}
{'subject_id': 3, 'object_id': 1, 'category_id': 113}
category ann :  person
object ann :  {'bbox': [223, 83, 373, 373], 'category_id': 25}
{'subject_id': 4, 'object_id': 1, 'category_id': 113}
category ann :  person
object ann :  {'bbox': [223, 83, 373, 373], 'category_id': 25}
{'subject_id': 5, 'object_id': 1, 'category_id': 113}
category ann :  person
object ann :  {'bbox': [223, 83, 373, 373], 'category_id': 25}
{'subject_id': 6, 'object_id': 1, 'category_id': 113}
category ann :  person
object ann :  {'bbox': [223, 83, 373, 373], 'category_id': 25}
{'subject_id': 7, 'object_id': 1, 'category_id': 113}
category ann :  person
object ann :  {'bbox': [223, 83, 373, 373], 'category_id': 25}
{'subject_id': 8, 'o

 69%|██████▉   | 69/100 [00:42<00:17,  1.79it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 76}
category ann :  person
object ann :  {'bbox': [46, 450, 451, 638], 'category_id': 73}
{'subject_id': 0, 'object_id': 1, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [190, 168, 494, 254], 'category_id': 9}
{'subject_id': 2, 'object_id': 3, 'category_id': 78}
category ann :  person
object ann :  {'bbox': [192, 188, 480, 254], 'category_id': 9}
{'subject_id': 4, 'object_id': 5, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [179, 188, 482, 255], 'category_id': 9}


 70%|███████   | 70/100 [00:42<00:16,  1.81it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [175, 471, 361, 504], 'category_id': 42}
{'subject_id': 2, 'object_id': 3, 'category_id': 94}
category ann :  person
object ann :  {'bbox': [155, 474, 403, 532], 'category_id': 42}


 71%|███████   | 71/100 [00:43<00:14,  2.00it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [227, 117, 413, 365], 'category_id': 62}
{'subject_id': 2, 'object_id': 3, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [14, 100, 194, 337], 'category_id': 62}


 72%|███████▏  | 72/100 [00:43<00:14,  1.95it/s]

{'subject_id': 4, 'object_id': 5, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [425, 112, 629, 346], 'category_id': 62}
{'subject_id': 0, 'object_id': 1, 'category_id': 9}
category ann :  person
object ann :  {'bbox': [87, 178, 202, 303], 'category_id': 47}


 73%|███████▎  | 73/100 [00:44<00:12,  2.12it/s]

{'subject_id': 2, 'object_id': 3, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [81, 159, 211, 322], 'category_id': 47}
{'subject_id': 0, 'object_id': 1, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [38, 16, 262, 212], 'category_id': 4}
{'subject_id': 2, 'object_id': 3, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [78, 69, 589, 377], 'category_id': 4}
{'subject_id': 4, 'object_id': 5, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [36, 20, 255, 175], 'category_id': 4}
{'subject_id': 6, 'object_id': 7, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [80, 68, 560, 404], 'category_id': 4}
{'subject_id': 8, 'object_id': 9, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [45, 26, 264, 181], 'category_id': 4}
{'subject_id': 10, 'object_id': 11, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [83, 83, 571, 401], 'category_id': 4}
{'subject_id': 12, 'object_id': 13, '

 74%|███████▍  | 74/100 [00:45<00:21,  1.23it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [244, 374, 269, 388], 'category_id': 35}
{'subject_id': 2, 'object_id': 3, 'category_id': 94}
category ann :  person
object ann :  {'bbox': [158, 311, 165, 317], 'category_id': 35}
{'subject_id': 4, 'object_id': 5, 'category_id': 94}
category ann :  person
object ann :  {'bbox': [117, 321, 130, 326], 'category_id': 35}
{'subject_id': 6, 'object_id': 7, 'category_id': 94}
category ann :  person
object ann :  {'bbox': [273, 304, 282, 309], 'category_id': 35}
{'subject_id': 8, 'object_id': 9, 'category_id': 94}
category ann :  person
object ann :  {'bbox': [34, 325, 46, 336], 'category_id': 35}
{'subject_id': 10, 'object_id': 11, 'category_id': 94}
category ann :  person
object ann :  {'bbox': [347, 334, 360, 346], 'category_id': 35}
{'subject_id': 12, 'object_id': 13, 'category_id': 94}
category ann :  person
object ann :  {'bbox': [252, 373, 269, 383], 'category_id': 35}
{'subject_id': 14,

 75%|███████▌  | 75/100 [00:47<00:25,  1.02s/it]

{'subject_id': 0, 'object_id': 1, 'category_id': 9}
category ann :  person
object ann :  {'bbox': [180, 134, 320, 237], 'category_id': 42}
{'subject_id': 2, 'object_id': 3, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [154, 127, 335, 243], 'category_id': 42}


 76%|███████▌  | 76/100 [00:47<00:19,  1.22it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [49, 191, 383, 633], 'category_id': 18}
{'subject_id': 2, 'object_id': 3, 'category_id': 40}
category ann :  person
object ann :  {'bbox': [69, 169, 358, 596], 'category_id': 18}
{'subject_id': 4, 'object_id': 5, 'category_id': 46}
category ann :  person
object ann :  {'bbox': [44, 189, 387, 637], 'category_id': 18}


 77%|███████▋  | 77/100 [00:48<00:17,  1.31it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [94, 84, 570, 411], 'category_id': 4}
{'subject_id': 2, 'object_id': 3, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [111, 100, 567, 405], 'category_id': 4}


 78%|███████▊  | 78/100 [00:48<00:15,  1.43it/s]

{'subject_id': 4, 'object_id': 5, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [110, 103, 573, 401], 'category_id': 4}
{'subject_id': 0, 'object_id': 1, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [96, 157, 521, 379], 'category_id': 19}
{'subject_id': 2, 'object_id': 3, 'category_id': 79}
category ann :  person
object ann :  {'bbox': [112, 169, 530, 375], 'category_id': 19}


 79%|███████▉  | 79/100 [00:49<00:12,  1.63it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 10}
category ann :  person
object ann :  {'bbox': [363, 72, 423, 111], 'category_id': 34}
{'subject_id': 2, 'object_id': 1, 'category_id': 10}
category ann :  person
object ann :  {'bbox': [363, 72, 423, 111], 'category_id': 34}


 80%|████████  | 80/100 [00:49<00:11,  1.67it/s]

{'subject_id': 3, 'object_id': 4, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [371, 80, 407, 102], 'category_id': 34}
{'subject_id': 0, 'object_id': 1, 'category_id': 58}
category ann :  person
object ann :  {'bbox': [135, 105, 451, 247], 'category_id': 59}


 81%|████████  | 81/100 [00:50<00:10,  1.88it/s]

{'subject_id': 0, 'object_id': 2, 'category_id': 58}
category ann :  person
object ann :  {'bbox': [8, 251, 476, 638], 'category_id': 59}
{'subject_id': 0, 'object_id': 1, 'category_id': 9}
category ann :  person
object ann :  {'bbox': [99, 65, 323, 104], 'category_id': 42}
{'subject_id': 2, 'object_id': 3, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [99, 63, 331, 121], 'category_id': 42}
{'subject_id': 4, 'object_id': 5, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [346, 256, 640, 424], 'category_id': 42}


 82%|████████▏ | 82/100 [00:50<00:09,  1.87it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 9}
category ann :  person
object ann :  {'bbox': [177, 278, 241, 363], 'category_id': 27}
{'subject_id': 2, 'object_id': 3, 'category_id': 115}
category ann :  person
object ann :  {'bbox': [179, 276, 235, 360], 'category_id': 27}


 83%|████████▎ | 83/100 [00:50<00:08,  2.07it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 33}
category ann :  person
object ann :  {'bbox': [294, 174, 340, 229], 'category_id': 41}
{'subject_id': 2, 'object_id': 3, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [294, 189, 336, 233], 'category_id': 41}


 84%|████████▍ | 84/100 [00:51<00:07,  2.23it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 24}
category ann :  person
object ann :  {'bbox': [487, 145, 531, 209], 'category_id': 54}
{'subject_id': 2, 'object_id': 3, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [470, 140, 537, 215], 'category_id': 54}


 85%|████████▌ | 85/100 [00:51<00:07,  2.08it/s]

{'subject_id': 4, 'object_id': 5, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [205, 282, 263, 352], 'category_id': 54}
{'subject_id': 0, 'object_id': 1, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [104, 321, 434, 425], 'category_id': 15}


 86%|████████▌ | 86/100 [00:52<00:05,  2.54it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [1, 185, 463, 475], 'category_id': 9}
{'subject_id': 2, 'object_id': 3, 'category_id': 78}
category ann :  person
object ann :  {'bbox': [2, 214, 466, 477], 'category_id': 9}


 87%|████████▋ | 87/100 [00:52<00:05,  2.24it/s]

{'subject_id': 4, 'object_id': 5, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [3, 212, 462, 474], 'category_id': 9}
{'subject_id': 0, 'object_id': 1, 'category_id': 39}
category ann :  person
object ann :  {'bbox': [98, 186, 635, 452], 'category_id': 3}
{'subject_id': 2, 'object_id': 3, 'category_id': 42}
category ann :  person
object ann :  {'bbox': [95, 192, 637, 445], 'category_id': 3}
{'subject_id': 4, 'object_id': 3, 'category_id': 42}
category ann :  person
object ann :  {'bbox': [95, 192, 637, 445], 'category_id': 3}
{'subject_id': 5, 'object_id': 6, 'category_id': 112}
category ann :  person
object ann :  {'bbox': [88, 170, 638, 443], 'category_id': 3}
{'subject_id': 7, 'object_id': 6, 'category_id': 112}
category ann :  person
object ann :  {'bbox': [88, 170, 638, 443], 'category_id': 3}
{'subject_id': 8, 'object_id': 6, 'category_id': 112}
category ann :  person
object ann :  {'bbox': [88, 170, 638, 443], 'category_id': 3}
{'subject_id': 9, 'object_id': 6

 88%|████████▊ | 88/100 [00:54<00:09,  1.25it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 9}
category ann :  person
object ann :  {'bbox': [487, 52, 638, 167], 'category_id': 28}
{'subject_id': 2, 'object_id': 3, 'category_id': 9}
category ann :  person
object ann :  {'bbox': [539, 96, 636, 270], 'category_id': 28}
{'subject_id': 4, 'object_id': 5, 'category_id': 9}
category ann :  person
object ann :  {'bbox': [2, 85, 228, 309], 'category_id': 28}
{'subject_id': 6, 'object_id': 7, 'category_id': 9}
category ann :  person
object ann :  {'bbox': [84, 65, 243, 291], 'category_id': 28}
{'subject_id': 8, 'object_id': 9, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [78, 54, 242, 258], 'category_id': 28}
{'subject_id': 10, 'object_id': 11, 'category_id': 37}
category ann :  person
object ann :  {'bbox': [533, 96, 634, 228], 'category_id': 28}


 89%|████████▉ | 89/100 [00:55<00:09,  1.13it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [106, 222, 312, 618], 'category_id': 22}
{'subject_id': 2, 'object_id': 1, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [106, 222, 312, 618], 'category_id': 22}


 91%|█████████ | 91/100 [00:55<00:05,  1.77it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [2, 170, 425, 631], 'category_id': 63}
{'subject_id': 0, 'object_id': 1, 'category_id': 50}
category ann :  person
object ann :  {'bbox': [2, 155, 348, 389], 'category_id': 63}


 92%|█████████▏| 92/100 [00:56<00:04,  1.97it/s]

{'subject_id': 2, 'object_id': 3, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [317, 227, 635, 423], 'category_id': 63}
{'subject_id': 0, 'object_id': 1, 'category_id': 22}
category ann :  person
object ann :  {'bbox': [114, 204, 438, 336], 'category_id': 9}
{'subject_id': 2, 'object_id': 3, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [120, 211, 439, 331], 'category_id': 9}
{'subject_id': 4, 'object_id': 3, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [120, 211, 439, 331], 'category_id': 9}
{'subject_id': 5, 'object_id': 6, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [118, 209, 439, 323], 'category_id': 9}
{'subject_id': 7, 'object_id': 6, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [118, 209, 439, 323], 'category_id': 9}


 93%|█████████▎| 93/100 [00:57<00:04,  1.61it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 59}
category ann :  person
object ann :  {'bbox': [259, 62, 638, 255], 'category_id': 84}
{'subject_id': 2, 'object_id': 3, 'category_id': 74}
category ann :  person
object ann :  {'bbox': [259, 74, 638, 267], 'category_id': 84}


 95%|█████████▌| 95/100 [00:57<00:02,  2.28it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 28}
category ann :  person
object ann :  {'bbox': [178, 228, 314, 333], 'category_id': 47}
{'subject_id': 0, 'object_id': 1, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [45, 517, 221, 585], 'category_id': 15}


 96%|█████████▌| 96/100 [00:58<00:01,  2.41it/s]

{'subject_id': 2, 'object_id': 1, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [45, 517, 221, 585], 'category_id': 15}
{'subject_id': 0, 'object_id': 1, 'category_id': 44}
category ann :  person
object ann :  {'bbox': [262, 169, 349, 200], 'category_id': 41}


 97%|█████████▋| 97/100 [00:58<00:01,  2.51it/s]

{'subject_id': 2, 'object_id': 3, 'category_id': 77}
category ann :  person
object ann :  {'bbox': [264, 171, 342, 204], 'category_id': 41}
{'subject_id': 0, 'object_id': 1, 'category_id': 115}
category ann :  person
object ann :  {'bbox': [40, 343, 275, 394], 'category_id': 36}


 98%|█████████▊| 98/100 [00:58<00:00,  2.99it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 9}
category ann :  person
object ann :  {'bbox': [119, 241, 190, 381], 'category_id': 27}
{'subject_id': 2, 'object_id': 3, 'category_id': 115}
category ann :  person
object ann :  {'bbox': [119, 250, 232, 377], 'category_id': 27}


100%|██████████| 100/100 [00:59<00:00,  1.69it/s]

{'subject_id': 0, 'object_id': 1, 'category_id': 88}
category ann :  person
object ann :  {'bbox': [9, 85, 635, 413], 'category_id': 15}





In [15]:
from google.colab import files

# Compress the folder into a ZIP file
!zip -r hico.zip hico

# Download the ZIP file
files.download('hico.zip')


  adding: hico/ (stored 0%)
  adding: hico/images/ (stored 0%)
  adding: hico/images/.ipynb_checkpoints/ (stored 0%)
  adding: hico/images/test2015/ (stored 0%)
  adding: hico/images/test2015/HICO_test2015_00000074.jpg (deflated 12%)
  adding: hico/images/test2015/HICO_test2015_00000096.jpg (deflated 0%)
  adding: hico/images/test2015/HICO_test2015_00000094.jpg (deflated 7%)
  adding: hico/images/test2015/HICO_test2015_00000033.jpg (deflated 0%)
  adding: hico/images/test2015/HICO_test2015_00000084.jpg (deflated 11%)
  adding: hico/images/test2015/HICO_test2015_00000029.jpg (deflated 2%)
  adding: hico/images/test2015/HICO_test2015_00000056.jpg (deflated 7%)
  adding: hico/images/test2015/HICO_test2015_00000036.jpg (deflated 1%)
  adding: hico/images/test2015/HICO_test2015_00000022.jpg (deflated 5%)
  adding: hico/images/test2015/HICO_test2015_00000034.jpg (deflated 8%)
  adding: hico/images/test2015/HICO_test2015_00000060.jpg (deflated 2%)
  adding: hico/images/test2015/HICO_test2015_

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>