### Creating protocol files for the paper &darr;
### The Overlooked Elephant of Object Detection: Open Set

We first map all COCO objects to their corresponding PASCAL VOC objects

In [1]:
COCOID_to_pascalID = {
                        5: 1,
                        2: 2,
                        16: 3,
                        9: 4,
                        44: 5,
                        6: 6,
                        3: 7,
                        17: 8,
                        62: 9,
                        21: 10,
                        67: 11,
                        18: 12,
                        19: 13,
                        4: 14,
                        1: 15,
                        64: 16,
                        20: 17,
                        63: 18,
                        7: 19,
                        72: 20
                    }

In [2]:
coco_ids, pascal_ids = zip(*COCOID_to_pascalID.items())
PascalID_to_COCOID = dict(zip(pascal_ids,coco_ids))

In [3]:
# Necessary Imports
import json
from pycocotools.coco import COCO

The protocol in this paper is based on the PASCAL VOC and the MSCOCO dataset

Please define their paths below along with the expected openset protocol file names

In [4]:
coco_2017_training_file = 'instances_train2017.json'
pascal_2007_test_file = 'pascal_test2007.json'
only_knowns_file_name = "onlyKnown_instances.json"
mixed_unknowns_file_name = "Mixed_Unknowns.json"

Loading the files into coco objects

In [5]:
coco_2017_train=COCO(coco_2017_training_file)
pascal_2007_test=COCO(pascal_2007_test_file)

loading annotations into memory...
Done (t=56.70s)
creating index...
index created!
loading annotations into memory...
Done (t=0.50s)
creating index...
index created!


## Making the protocol for mixed unknowns

Finding images that donot contain any of the known objects

In [6]:
images_with_knowns=[]
for _ in COCOID_to_pascalID.keys():
    images_with_knowns.extend(coco_2017_train.getImgIds(catIds=[_]))
images_with_knowns = set(images_with_knowns)
images_without_knowns = set(coco_2017_train.imgs)-images_with_knowns

In [7]:
json_data = coco_2017_train.dataset.copy()
new_json={}
new_json['images']=[]
new_json['annotations']=[]
new_json['categories']=[]

Lets create the categories 

In [8]:
new_class_id_mapping={}
for new_class_id,known_class_id in enumerate(sorted(COCOID_to_pascalID.keys()), start=1):
    new_class_id_mapping[known_class_id] = new_class_id
    new_json['categories'].append(coco_2017_train.cats[known_class_id])
    new_json['categories'][-1]['id']=new_class_id_mapping[known_class_id]

Lets create the entries for images and annotations

In [9]:
for img_id in images_without_knowns:
    new_json['images'].append(coco_2017_train.imgs[img_id])
    ann_ids = coco_2017_train.getAnnIds(imgIds=[img_id])
    for annotation in coco_2017_train.loadAnns(ids=ann_ids):
        if annotation['category_id'] not in COCOID_to_pascalID.keys():
            annotation['category_id'] = -1
        else:
            annotation['category_id'] = new_class_id_mapping[annotation['category_id']]
        annotation['segmentation'] = [[]]
        new_json['annotations'].append(annotation)

Saving the file with mixed unknowns

In [10]:
json.dump(new_json, open(mixed_unknowns_file_name, "w"))

## Making the protocol for only knowns

In [11]:
json_data = coco_2017_train.dataset.copy()
new_json={}
new_json['images']=[]
new_json['annotations']=[]
new_json['categories']=[]

In [12]:
new_class_id_mapping={}
for new_class_id,known_class_id in enumerate(sorted(COCOID_to_pascalID.keys()), start=1):
    new_class_id_mapping[known_class_id] = new_class_id
    new_json['categories'].append(coco_2017_train.cats[known_class_id])
    new_json['categories'][-1]['id']=new_class_id_mapping[known_class_id]

In [13]:
for img_id in pascal_2007_test.imgs.keys():
    new_json['images'].append(pascal_2007_test.imgs[img_id])
    ann_ids = pascal_2007_test.getAnnIds(imgIds=[img_id])
    for annotation in pascal_2007_test.loadAnns(ids=ann_ids):
        annotation['category_id'] = new_class_id_mapping[PascalID_to_COCOID[annotation['category_id']]]
        annotation['segmentation'] = [[]]
        new_json['annotations'].append(annotation)

In [14]:
json.dump(new_json, open(only_knowns_file_name, "w"))