In [30]:
import os.path as osp
import mmcv

def convert_balloon_to_coco(ann_file, out_file, image_prefix):
    data_infos = mmcv.load(ann_file) #그냥 load
    annotations = []
    images = []
    obj_count = 0
    
    for idx, v in enumerate(mmcv.track_iter_progress(data_infos.values())):
        filename = v['filename']
        img_path = osp.join(image_prefix, filename)
        height, width = mmcv.imread(img_path).shape[:2]

        images.append(dict(
            id=idx,
            file_name=filename,
            height=height,
            width=width))

        bboxes = []
        labels = []
        masks = []
        for _, obj in v['regions'].items():  #index, obj
            assert not obj['region_attributes']
            obj = obj['shape_attributes']
            px = obj['all_points_x'] # list들
            py = obj['all_points_y']
            poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)] #0.5를 왜 더해주지?
            poly = [p for x in poly for p in x] # list로 한 번에 쭉 나열
            
            x_min, y_min, x_max, y_max = (
                min(px), min(py), max(px), max(py))
            
            # image_id는 이미지
            # id는 말 그대로 object에 할당된 id
            # category_id >> label
            data_anno = dict(
                image_id=idx,
                id=obj_count,
                category_id=0,
                bbox=[x_min, y_min, x_max - x_min, y_max - y_min],
                area=(x_max - x_min) * (y_max - y_min),
                segmentation=[poly],
                iscrowd=0) # 0.5왜 더해주지??
            annotations.append(data_anno)
            obj_count += 1

    coco_format_json = dict(
        images=images,
        annotations=annotations,
        categories=[{'id':0, 'name': 'balloon'}])
    mmcv.dump(coco_format_json, out_file)
    
convert_balloon_to_coco('./balloon/train/via_region_data.json', './balloon/coco_anno.json', './balloon/train')

[                                                  ] 0/61, elapsed: 0s, ETA:0 0
[                                  ] 1/61, 6.5 task/s, elapsed: 0s, ETA:     9s1 1
[>                                 ] 2/61, 9.6 task/s, elapsed: 0s, ETA:     6s2 2
[>                                ] 3/61, 11.4 task/s, elapsed: 0s, ETA:     5s3 3
3 4
3 5
3 6
3 7
3 8
3 9
3 10
3 11
3 12
[>>                               ] 4/61, 12.6 task/s, elapsed: 0s, ETA:     5s4 13
4 14
4 15
4 16
4 17
4 18
4 19
[>>                               ] 5/61, 10.0 task/s, elapsed: 1s, ETA:     6s5 20
[>>>                               ] 6/61, 7.0 task/s, elapsed: 1s, ETA:     8s6 21
[>>>                               ] 7/61, 6.4 task/s, elapsed: 1s, ETA:     8s7 22
7 23
7 24
7 25
7 26
7 27
[>>>>                              ] 8/61, 6.7 task/s, elapsed: 1s, ETA:     8s8 28
[>>>>>                             ] 9/61, 7.2 task/s, elapsed: 1s, ETA:     7s9 29
9 30
9 31
[>>>>>                            ] 10/61, 7.6 task/s, elapsed:

In [None]:
# key >> '34020010494_e5cb88e1c4_k.jpg1115004'
#value >> 'fileref', 'size', 'filename', 'base64_img_data', 'file_attributes', 'regions'
# regions >> '0' >> 

In [None]:
import pandas as pd

def convert_143_to_coco(ann_file, out_file, image_prefix):
    
    with open(ann_file, 'r') as l:
        data_infos = pd.read_csv(l)
        
    annotations = []
    images = []
    obj_count = 0
    
    for idx, filename in enumerate(data_infos.File.unique()):
        img_path = osp.join(image_prefix, filename)
        height, width = mmcv.imread(img_path).shape[:2]

        images.append(dict(
            id=idx,
            file_name=filename,
            height=height,
            width=width))

        bboxes = []
        labels = []
        masks = []
        
        data_per_filename = data_infos[data_infos.File == filename]
        

        for _,obj in data_per_filename.iterrows():  #index, obj
            px = [obj['X1'],obj['X2'],obj['X3'],obj['X4']]
            py = [obj['Y1'],obj['Y2'],obj['Y3'],obj['Y4']]
            poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)] #0.5를 왜 더해주지?
            poly = [p for x in poly for p in x]
            x_min, y_min, x_max, y_max = (
                min(px), min(py), max(px), max(py))
            
            # image_id는 이미지
            # id는 말 그대로 object에 할당된 id
            # category_id >> label
            data_anno = dict(
                image_id=idx,
                id=obj_count,
                category_id=0,
                bbox=[x_min, y_min, x_max - x_min, y_max - y_min],
                area=(x_max - x_min) * (y_max - y_min),
                segmentation=[poly],
                iscrowd=0) # 0.5왜 더해주지??
            annotations.append(data_anno)
            obj_count += 1

    coco_format_json = dict(
        images=images,
        annotations=annotations,
        categories=[{'id':0, 'name': 'balloon'}])
    mmcv.dump(coco_format_json, out_file)
    
convert_143_to_coco('../train_label.csv', '../train_label.json', '../train_image')

[106.5, 558.5, 175.5, 606.5, 169.5, 614.5, 100.5, 567.5]
[278.5, 150.5, 345.5, 136.5, 347.5, 146.5, 280.5, 160.5]
[236.5, 143.5, 295.5, 136.5, 296.5, 146.5, 237.5, 153.5]
[358.5, 94.5, 382.5, 140.5, 373.5, 145.5, 350.5, 99.5]
[194.5, 203.5, 217.5, 230.5, 210.5, 236.5, 187.5, 209.5]
[218.5, 230.5, 254.5, 297.5, 245.5, 302.5, 209.5, 235.5]
[366.5, 149.5, 444.5, 180.5, 440.5, 190.5, 362.5, 159.5]
[508.5, 248.5, 576.5, 199.5, 582.5, 207.5, 514.5, 256.5]
[505.5, 145.5, 576.5, 190.5, 570.5, 199.5, 499.5, 154.5]
[413.5, 201.5, 471.5, 261.5, 463.5, 269.5, 405.5, 209.5]
[420.5, 190.5, 475.5, 242.5, 468.5, 249.5, 413.5, 197.5]
[472.5, 193.5, 484.5, 236.5, 474.5, 238.5, 462.5, 196.5]
[497.5, 232.5, 516.5, 272.5, 507.5, 276.5, 488.5, 236.5]
[601.5, 218.5, 643.5, 260.5, 636.5, 267.5, 594.5, 225.5]
[526.5, 280.5, 546.5, 310.5, 537.5, 315.5, 518.5, 285.5]
[476.5, 249.5, 514.5, 305.5, 506.5, 311.5, 468.5, 255.5]
[515.5, 308.5, 554.5, 364.5, 546.5, 369.5, 507.5, 314.5]
[543.5, 310.5, 587.5, 372.5, 578.