In [3]:
from typing import Dict
import json
import datetime
import os

now = datetime.datetime.now()
now = now.strftime('%Y-%m-%d %H:%M:%S')

input_path = '../../data/medical/ufo/train.json'
output_path = '../../data/medical/ufo/train_coco.json'

In [4]:
info = {
    'year': 2024,
    'version': '1.0',
    'description': 'OCR Competition Data',
    'contributor': 'Naver Boostcamp',
    'url': 'https://aistages-api-public-prod.s3.amazonaws.com/app/Competitions/000273/data/data.tar.gz',
    'date_created': now
}
licenses = {
    'id': '1',
    'name': 'For Naver Boostcamp Competition',
    'url': None
}
categories = [{
    'id': 1,
    'name': 'word'
}]

In [5]:
def ufo_to_coco(file: Dict, output_path: str) -> None:
    img_id = 1 #COCO는 1부터 시작
    annotation_id = 1 #COCO는 1부터 시작
    images = []
    annotations = []
    for fname, data in file.items():
        image = {
            "id": img_id,
            "width": data['img_w'],
            "height": data['img_h'],
            "file_name": fname,
            "license": 1,
            "flickr_url": None,
            "coco_url": None,
            "date_captured": now
        }
        images.append(image)
        for anno_id, annotation in data['words'].items():
            if annotation['illegibility'] == True:
                continue
            min_x = min(item[0] for item in annotation['points'])
            min_y = min(item[1] for item in annotation['points'])
            max_x = max(item[0] for item in annotation['points'])
            max_y = max(item[1] for item in annotation['points'])
            width = max_x - min_x
            height = max_y - min_y
            coco_annotation = {
                "id": annotation_id,
                "image_id": img_id,
                "category_id": 1,
                "segmentation": [[value for sublist in annotation['points'] for value in sublist]],
                "area": width * height,
                "bbox": [min_x, min_y, width, height],
                "iscrowd": 0
            }
            annotations.append(coco_annotation)
            annotation_id += 1
        img_id += 1
    coco = {
        'info' : info,
        'images' : images,
        'annotations' : annotations,
        'licenses' : licenses,
        'categories' : categories
    }
    with open(output_path, 'w') as f:
        json.dump(coco, f, indent=4)

In [6]:
with open(input_path, 'r') as f:
    file = json.load(f)
ufo_to_coco(file['images'], output_path)

## CoCo to labelme

In [7]:
def convert_coco_to_labelme(coco_json_path, output_dir):
    # COCO 데이터 로드
    with open(coco_json_path, 'r') as f:
        coco = json.load(f)

    # 출력 디렉토리가 없으면 생성
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # 이미지별로 어노테이션 처리
    for img in coco['images']:
        # LabelMe JSON 구조 생성
        labelme_data = {
            "version": "4.5.0",
            "flags": {},
            "shapes": [],
            "imagePath": img['file_name'],
            "imageData": None,
            "imageHeight": img['height'],
            "imageWidth": img['width']
        }

        # 해당 이미지에 대한 어노테이션 찾기
        annotations = [a for a in coco['annotations'] if a['image_id'] == img['id']]
        for ann in annotations:
            shape = {
                "label": str(ann['category_id']),
                "points": ann['segmentation'][0],
                "group_id": None,
                "shape_type": "polygon",
                "flags": {}
            }
            labelme_data["shapes"].append(shape)

        # LabelMe 형식의 JSON 파일 저장
        labelme_json_path = os.path.join(output_dir, os.path.splitext(img['file_name'])[0] + '.json')
        with open(labelme_json_path, 'w') as f:
            json.dump(labelme_data, f, indent=2)


In [9]:
# 사용 예시
convert_coco_to_labelme('../../data/medical/ufo/train_coco.json', '../../data/medical/ufo/directory')