In [9]:
import argparse
import csv
import cv2
import json
import os
from detection.core.datasets import metadata
from matplotlib import pyplot as plt
from tqdm import tqdm
import imagesize

## Open Images

In [10]:
def get_fruits_dicts(stage, root_ann="./data/openim", root_img="./data/openim", output_dir=None, dataset='openim'):  #'/content/coco'
    openim_classes_need = metadata.OPENIM_FRUITS_ID_THING_CLASSES
    category_mapper = dict(zip(openim_classes_need, range(len(openim_classes_need))))
    images_list_id = []
    annotations_list_id = []    
    images_list_ood = []
    annotations_list_ood = []   
    f = open(f'{root_ann}/annotation/fruits_{stage}.txt')
    filename = str.rstrip(f.readline()) # 'The first line.\n'
    idx = 0
    count = 0
    while filename != '':
        annotation_list_file_id = []
        full_filename = os.path.join(root_img, filename)
        if not os.path.exists(full_filename):
            num = int(f.readline())
            for _ in range(num):
                line = f.readline()
            filename = str.rstrip(f.readline()) # 'The first line.\n'
            continue
        width, height = imagesize.get(full_filename)
        num = int(f.readline())
        num_ID_boxes = 0
        has_ood = False
        for i in range(num):
            line = f.readline()
            bbox = list(map(float, line.split(' ')[:4]))
            category_name = ' '.join(line.split(' ')[4:])[:-1]
            if category_name not in category_mapper.keys():
                has_ood = True
                continue
            label = category_mapper[category_name]
            annotation_list_file_id.append({'image_id': idx,
                                    'id': count,
                                    'category_id': label,
                                    'bbox': bbox,
                                    'area': bbox[2] * bbox[3],
                                    'iscrowd': 0,
                                    'is_truncated': 0,
                                    'is_occluded': 0})
            count += 1
            num_ID_boxes += 1
        condition_id = num_ID_boxes > 0 and not has_ood
        condition_ood = num_ID_boxes == 0
        if condition_id:
            annotations_list_id += annotation_list_file_id
            images_list_id.append({'id': idx,
                            'width': width,
                            'height': height,
                            'file_name': filename,
                            'license': 1})
        elif condition_ood:
            images_list_ood.append({'id': idx,
                            'width': width,
                            'height': height,
                            'file_name': filename,
                            'license': 1})
        filename = str.rstrip(f.readline()) # 'The first line.\n'
        idx += 1
    for images_list, annotations_list, output_dir in zip([images_list_id, images_list_ood], [annotations_list_id, annotations_list_ood], [f'./data/{dataset}_id/COCO-Format', f'./data/{dataset}_ood/COCO-Format']):
        categories = [{"supercategory": "food", "id": i, "name": classname} for classname, i in category_mapper.items()]
        licenses = [{'id': 1,
                    'name': 'none',
                    'url': 'none'}]
        json_dict = {'info': {'year': 2020},
                    'licenses': licenses,
                    'categories': categories,
                    'images': images_list,
                    'annotations': annotations_list}
        os.makedirs(output_dir, exist_ok=True)
        file_name = f'{output_dir}/{stage}_coco_format.json'
        with open(file_name, 'w') as outfile:
            json.dump(json_dict, outfile)
    print(count)
    print(len(images_list_id))
    print(len(images_list_ood))
    return 0

# for stage in ['test', 'val', 'train']:
# for stage in ['test']:
#     print(stage)
#     get_fruits_dicts(stage, root = "./data/openim", dataset='openim')

## COCO neg

In [18]:
get_fruits_dicts(stage='test', root_ann="./data/coco_neg", root_img="/ssd/l.lemikhova/data/animals/coco/full/val2017", output_dir=None, dataset='coco_neg')

0
0
500


0

## Deep fruits

In [4]:
get_fruits_dicts('test', root_ann="./data/deep_fruits", root_img="./data/deep_fruits", dataset='deep_fruits')

981
163
166


0

### Check that no id in ood

In [None]:
stage = 'test'
root = "./data/deep_fruits"
openim_classes_need = metadata.OPENIM_FRUITS_ID_THING_CLASSES
category_mapper = dict(zip(openim_classes_need, range(len(openim_classes_need))))
images_list_id = []
annotations_list_id = []    
images_list_ood = []
annotations_list_ood = []   
f = open(f'{root}/annotation/fruits_{stage}.txt')
filename = str.rstrip(f.readline()) # 'The first line.\n'
idx = 0
count = 0
ood_num = 0
while filename != '' and ood_num < 10:
    annotation_list_file_id = []
    full_filename = os.path.join(root, filename)
    img = cv2.imread(full_filename)
    height, width = img.shape[:2]
    num = int(f.readline())
    num_ID_boxes = 0
    categories = []
    has_ood = False
    for i in range(num):
        line = f.readline()
        bbox = list(map(float, line.split(' ')[:4]))
        category_name = ' '.join(line.split(' ')[4:])[:-1]
        if category_name not in category_mapper.keys():
            has_ood = True
            categories.append(category_name)
            continue
        label = category_mapper[category_name]
        categories.append(category_name)
        count += 1
        num_ID_boxes += 1
    condition_id = num_ID_boxes > 0 and not has_ood
    condition_ood = num_ID_boxes == 0 and has_ood
    # if condition_id:
    #     annotations_list_id += annotation_list_file_id
    #     images_list_id.append({'id': idx,
    #                     'width': width,
    #                     'height': height,
    #                     'file_name': filename,
    #                     'license': 1})
    if condition_ood:
        print(categories)
        ood_num += 1
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        plt.imshow(img)
        plt.show()
    filename = str.rstrip(f.readline()) # 'The first line.\n'
    idx += 1

## OOD similar and different

In [15]:
def get_sim_diff_dicts(stage, root = "./data/openim", dataset='openim'):  #'/content/coco'
    openim_classes_need = metadata.OPENIM_FRUITS_ID_THING_CLASSES
    # ood = 'grape', 'watermelon', 'pineapple', 'pomegranate', 'grapefruit', 'peach', 'mango', 'common fig', 'cantaloupe'
    ood_sim = ['grape', 'grapefruit', 'peach', 'mango']
    ood_diff = ['watermelon', 'pineapple', 'pomegranate','common fig', 'cantaloupe']
    category_mapper = dict(zip(openim_classes_need, range(len(openim_classes_need))))
    images_list_id = []
    images_list_ood = []
    images_list_ood_sim = []
    images_list_ood_diff = []
    annotations_list_id = []    
    annotations_list_ood = []   
    f = open(f'{root}/annotation/fruits_{stage}.txt')
    filename = str.rstrip(f.readline()) # 'The first line.\n'
    idx = 0
    count = 0
    while filename != '':
        annotation_list_file_id = []
        full_filename = os.path.join(root, filename)
        if not os.path.exists(full_filename):
            num = int(f.readline())
            for _ in range(num):
                line = f.readline()
            filename = str.rstrip(f.readline()) # 'The first line.\n'
            continue
        img = cv2.imread(full_filename)
        height, width = img.shape[:2]
        num = int(f.readline())
        num_ID_boxes = 0
        has_ood = False
        categories = []
        for i in range(num):
            line = f.readline()
            bbox = list(map(float, line.split(' ')[:4]))
            category_name = ' '.join(line.split(' ')[4:])[:-1]
            if category_name not in category_mapper.keys():
                has_ood = True
                categories.append(category_name)
                continue
            label = category_mapper[category_name]
            annotation_list_file_id.append({'image_id': idx,
                                    'id': count,
                                    'category_id': label,
                                    'bbox': bbox,
                                    'area': bbox[2] * bbox[3],
                                    'iscrowd': 0,
                                    'is_truncated': 0,
                                    'is_occluded': 0})
            count += 1
            num_ID_boxes += 1
        condition_id = num_ID_boxes > 0 and not has_ood
        condition_ood = num_ID_boxes == 0 and has_ood
        if condition_id:
            annotations_list_id += annotation_list_file_id
            images_list_id.append({'id': idx,
                            'width': width,
                            'height': height,
                            'file_name': filename,
                            'license': 1})
        elif condition_ood:
            images_list_ood.append({'id': idx,
                            'width': width,
                            'height': height,
                            'file_name': filename,
                            'license': 1})
            if min([category in ood_sim for category in categories]):
                images_list_ood_sim.append({'id': idx,
                                'width': width,
                                'height': height,
                                'file_name': filename,
                                'license': 1})
            elif min([category in ood_diff for category in categories]):
                images_list_ood_diff.append({'id': idx,
                                'width': width,
                                'height': height,
                                'file_name': filename,
                                'license': 1})
        filename = str.rstrip(f.readline()) # 'The first line.\n'
        idx += 1
    # for images_list, annotations_list, output_dir in zip([images_list_id, images_list_ood], [annotations_list_id, annotations_list_ood], [f'./data/{dataset}_id/COCO-Format', f'./data/{dataset}_ood/COCO-Format']):
    for images_list, annotations_list, output_dir in zip([images_list_ood_sim, images_list_ood_diff], [annotations_list_ood, annotations_list_ood], [f'./data/{dataset}_ood_sim/COCO-Format', f'./data/{dataset}_ood_diff/COCO-Format']):
        categories = [{"supercategory": "food", "id": i, "name": classname} for classname, i in category_mapper.items()]
        licenses = [{'id': 1,
                    'name': 'none',
                    'url': 'none'}]
        json_dict = {'info': {'year': 2020},
                    'licenses': licenses,
                    'categories': categories,
                    'images': images_list,
                    'annotations': annotations_list}
        os.makedirs(output_dir, exist_ok=True)
        file_name = f'{output_dir}/{stage}_coco_format.json'
        with open(file_name, 'w') as outfile:
            json.dump(json_dict, outfile)
    print(count)
    print('images_list_id', len(images_list_id))
    print('images_list_ood', len(images_list_ood))
    print('images_list_ood_sim', len(images_list_ood_sim))
    print('images_list_ood_diff', len(images_list_ood_diff))
    return 0

In [16]:
for stage in ['test', 'val', 'train']:
    print(stage)
    get_sim_diff_dicts(stage, root = "./data/openim", dataset='openim')


test
1103
images_list_id 246
images_list_ood 100
images_list_ood_sim 43
images_list_ood_diff 57
val
3461
images_list_id 808
images_list_ood 216
images_list_ood_sim 86
images_list_ood_diff 129
train
22120
images_list_id 4608
images_list_ood 1218
images_list_ood_sim 424
images_list_ood_diff 791


## COCO negative