In [30]:
import os.path as osp
import mmcv

def convert_balloon_to_coco(ann_file, out_file, image_prefix):
    data_infos = mmcv.load(ann_file) #그냥 load
    annotations = []
    images = []
    obj_count = 0
    
    for idx, v in enumerate(mmcv.track_iter_progress(data_infos.values())):
        filename = v['filename']
        img_path = osp.join(image_prefix, filename)
        height, width = mmcv.imread(img_path).shape[:2]

        images.append(dict(
            id=idx,
            file_name=filename,
            height=height,
            width=width))

        bboxes = []
        labels = []
        masks = []
        for _, obj in v['regions'].items():  #index, obj
            assert not obj['region_attributes']
            obj = obj['shape_attributes']
            px = obj['all_points_x'] # list들
            py = obj['all_points_y']
            poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)] #0.5를 왜 더해주지?
            poly = [p for x in poly for p in x] # list로 한 번에 쭉 나열
            
            x_min, y_min, x_max, y_max = (
                min(px), min(py), max(px), max(py))
            
            # image_id는 이미지
            # id는 말 그대로 object에 할당된 id
            
            data_anno = dict(
                image_id=idx,
                id=obj_count,
                category_id=0,
                bbox=[x_min, y_min, x_max - x_min, y_max - y_min],
                area=(x_max - x_min) * (y_max - y_min),
                segmentation=[poly],
                iscrowd=0)
            annotations.append(data_anno)
            obj_count += 1

    coco_format_json = dict(
        images=images,
        annotations=annotations,
        categories=[{'id':0, 'name': 'balloon'}])
    mmcv.dump(coco_format_json, out_file)
    
convert_balloon_to_coco('./balloon/train/via_region_data.json', './balloon/coco_anno.json', './balloon/train')

[                                                  ] 0/61, elapsed: 0s, ETA:0 0
[                                  ] 1/61, 6.5 task/s, elapsed: 0s, ETA:     9s1 1
[>                                 ] 2/61, 9.6 task/s, elapsed: 0s, ETA:     6s2 2
[>                                ] 3/61, 11.4 task/s, elapsed: 0s, ETA:     5s3 3
3 4
3 5
3 6
3 7
3 8
3 9
3 10
3 11
3 12
[>>                               ] 4/61, 12.6 task/s, elapsed: 0s, ETA:     5s4 13
4 14
4 15
4 16
4 17
4 18
4 19
[>>                               ] 5/61, 10.0 task/s, elapsed: 1s, ETA:     6s5 20
[>>>                               ] 6/61, 7.0 task/s, elapsed: 1s, ETA:     8s6 21
[>>>                               ] 7/61, 6.4 task/s, elapsed: 1s, ETA:     8s7 22
7 23
7 24
7 25
7 26
7 27
[>>>>                              ] 8/61, 6.7 task/s, elapsed: 1s, ETA:     8s8 28
[>>>>>                             ] 9/61, 7.2 task/s, elapsed: 1s, ETA:     7s9 29
9 30
9 31
[>>>>>                            ] 10/61, 7.6 task/s, elapsed:

In [None]:
# key >> '34020010494_e5cb88e1c4_k.jpg1115004'
#value >> 'fileref', 'size', 'filename', 'base64_img_data', 'file_attributes', 'regions'
# regions >> '0' >> 

In [None]:
import pandas as pd

def convert_143_to_coco(ann_file, out_file, image_prefix, fold):
    
    with open(ann_file, 'r') as l:
        data_infos = pd.read_csv(l)
        

    
    for i in range(len(data_infos['File'])):
        if data_infos['File'][i] == '378.tif':
            data_infos['File'][i] = '328.tif'
            if data_infos['File'][i+1] != '378.tif':
                break
    
    image_name = (data_infos['File'].unique())
    image_num = (len(image_name))
    
    train_idx = [(fold-1)%5,(fold)%5,(fold+1)%5,(fold+2)%5] 
    val_idx = [(fold+3)%5]
    
    
    
    annotations_train = []
    images_train = []
    obj_count_train = 0
    
    annotations_val = []
    images_val = []
    obj_count_val = 0
    
    for idx, filename in enumerate((image_name)):
        if idx %50 ==0:
            print(f"{idx}/{image_num}")
        
        if idx%5 in train_idx:
            
            img_path = osp.join(image_prefix, filename)
            height, width = mmcv.imread(img_path).shape[:2]

            images_train.append(dict(
                id=idx,
                file_name=filename,
                height=height,
                width=width))
        
            data_per_filename = data_infos[data_infos.File == filename]
            for _,obj in data_per_filename.iterrows():  #index, obj
                px = [obj['X1'],obj['X2'],obj['X3'],obj['X4']]
                py = [obj['Y1'],obj['Y2'],obj['Y3'],obj['Y4']]
                poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)] #0.5를 왜 더해주지?
                poly = [p for x in poly for p in x]
                x_min, y_min, x_max, y_max = (
                    min(px), min(py), max(px), max(py))

                # image_id는 이미지
                # id는 말 그대로 object에 할당된 id
                # category_id >> label
                data_anno = dict(
                    image_id=idx,
                    id=obj_count_train,
                    category_id=0,
                    bbox=[x_min, y_min, x_max - x_min, y_max - y_min],
                    area=(x_max - x_min) * (y_max - y_min),
                    segmentation=[poly],
                    iscrowd=0) # category_id >> obj['Class']

                annotations_train.append(data_anno)
                obj_count_train += 1
        
        elif idx%5 in val_idx:
            img_path = osp.join(image_prefix, filename)
            height, width = mmcv.imread(img_path).shape[:2]

            images_val.append(dict(
                id=idx,
                file_name=filename,
                height=height,
                width=width))
        
            data_per_filename = data_infos[data_infos.File == filename]
            for _,obj in data_per_filename.iterrows():  #index, obj
                px = [obj['X1'],obj['X2'],obj['X3'],obj['X4']]
                py = [obj['Y1'],obj['Y2'],obj['Y3'],obj['Y4']]
                poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)] #0.5를 왜 더해주지?
                poly = [p for x in poly for p in x]
                x_min, y_min, x_max, y_max = (
                    min(px), min(py), max(px), max(py))

                # image_id는 이미지
                # id는 말 그대로 object에 할당된 id
                # category_id >> label
                data_anno = dict(
                    image_id=idx,
                    id=obj_count_val,
                    category_id=0,
                    bbox=[x_min, y_min, x_max - x_min, y_max - y_min],
                    area=(x_max - x_min) * (y_max - y_min),
                    segmentation=[poly],
                    iscrowd=0) # category_id >> obj['Class']

                annotations_val.append(data_anno)
                obj_count_val += 1
            
           

    coco_format_json_train = dict(
        images=images_train,
        annotations=annotations_train,
        categories=[{'id':0, 'name': '0'}]) #  [{'id':1, 'name': '1'}, ......]
    mmcv.dump(coco_format_json_train, out_file+'_'+str(fold)+'.json')
    
    coco_format_json_val = dict(
        images=images_val,
        annotations=annotations_val,
        categories=[{'id':0, 'name': '0'}]) #  [{'id':1, 'name': '1'}, ......]
    mmcv.dump(coco_format_json_val, out_file.replace('train','val')+'_'+str(fold)+'.json')
    
convert_143_to_coco('../train_label.csv', '../train_label', '../train_image', fold = 5)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_infos['File'][i] = '328.tif'


0/400
50/400
100/400
150/400
200/400
