In [1]:
from copy import deepcopy
from utils import convert2coco, dataset_split, dataset_analysis
from utils import coco_to_img2annots, img2annots_to_coco

In [2]:
def split_by_num_objects(annotations, num_objects_split=120):
    img2annots = coco_to_img2annots(annotations)
    
    img2annots1 = {
        'type': img2annots['type'],
        'categories': img2annots['categories'],
        'img2annots': {}
    }
    
    img2annots2 = deepcopy(img2annots1)
    
    for key, val in img2annots['img2annots'].items():
        num_objects = 0
        for _, no in val['num_objects'].items():
            num_objects = num_objects + no
        
        if num_objects >= num_objects_split:
            img2annots1['img2annots'][key] = val
        else:
            img2annots2['img2annots'][key] = val
        
    # print(len(img2annots1['img2annots']))
    # print(len(img2annots2['img2annots']))
    
    return img2annots_to_coco(img2annots1), img2annots_to_coco(img2annots2)

def concatenate_2_coco_annotations(annotations_1, annotations_2):
    ########## IMPORTANT ##########
    # Assumption: inputs have exactly the same type and categories
    
    annotations = deepcopy(annotations_1)
    for key in ['images', 'annotations']:
        for val in annotations_2[key]:
            annotations[key].append(val)
            
    return annotations

In [3]:
base = './data/ori'
annotations = convert2coco(base)

In [4]:
annotations_split_1, annotations_split_2 = split_by_num_objects(annotations)

In [5]:
split_dictionary = {
    'train': 0.60,
    'val': 0.20,
    'test': 0.20
}

ann_split_1 = dataset_split(annotations_split_1, split_dictionary, 10000)
ann_split_2 = dataset_split(annotations_split_2, split_dictionary, 10000)

The best error: 0.0018010042419455393
The best error: 0.001405063577319903
The best error: 0.00036472866103810173
The best error: 0.0002759004374251383
The best error: 0.00013706750286514443
The best error: 1.4932601176013415e-05
The best error: 1.2536952442713507e-05
The best error: 0.003601535811706012
The best error: 0.0015403511613274802
The best error: 0.00034642956022512767
The best error: 0.00011191001750778452
The best error: 9.586982352836716e-05
The best error: 2.676592449898526e-05
The best error: 1.63736935978789e-05


In [6]:
results_split_1 = dataset_analysis(ann_split_1)

-----------------------------------
num_images          89
num_objects      16228
-----------------------------------
num_images on each set

train          54    0.607
val            18    0.202
test           17    0.191
-----------------------------------
num_objects on each set

train        9757    0.601
val          3217    0.198
test         3254    0.201
-----------------------------------
Category: 1

train        4471    0.602
val          1478    0.199
test         1482    0.199
-----------------------------------
Category: 2

train        5286    0.601
val          1739    0.198
test         1772    0.201
-----------------------------------


In [7]:
results_split_2 = dataset_analysis(ann_split_2)

-----------------------------------
num_images          91
num_objects       6911
-----------------------------------
num_images on each set

train          55    0.604
val            19    0.209
test           17    0.187
-----------------------------------
num_objects on each set

train        4149    0.600
val          1382    0.200
test         1380    0.200
-----------------------------------
Category: 1

train        2347    0.601
val           774    0.198
test          786    0.201
-----------------------------------
Category: 2

train        1802    0.600
val           608    0.202
test          594    0.198
-----------------------------------


In [8]:
anns = {}
for set_name in ['train', 'val', 'test']:
    anns[set_name] = concatenate_2_coco_annotations(ann_split_1[set_name], ann_split_2[set_name])

In [9]:
dataset_analysis(anns)

-----------------------------------
num_images         180
num_objects      23139
-----------------------------------
num_images on each set

train         109    0.606
val            37    0.206
test           34    0.189
-----------------------------------
num_objects on each set

train       13906    0.601
val          4599    0.199
test         4634    0.200
-----------------------------------
Category: 1

train        6818    0.601
val          2252    0.199
test         2268    0.200
-----------------------------------
Category: 2

train        7088    0.601
val          2347    0.199
test         2366    0.200
-----------------------------------


{'train': {'num_images': 109,
  'num_objects': 13906,
  'objects': {1: 6818, 2: 7088}},
 'val': {'num_images': 37, 'num_objects': 4599, 'objects': {1: 2252, 2: 2347}},
 'test': {'num_images': 34,
  'num_objects': 4634,
  'objects': {1: 2268, 2: 2366}}}