In [8]:
import json
import os
import re

In [9]:
folder = "annotations/"
output_dir = os.path.dirname(folder)
if not os.path.exists(output_dir):
    os.makedirs(output_dir, exist_ok=True)

In [10]:
def read_json(filename):
    with open(filename) as f:
        return json.load(f)

In [11]:
def write_json(data, filename):
    with open(filename, 'w') as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

In [12]:
ufo_in = "/data/ephemeral/home/원본trainjson/japanese_receipt/train.json"
ufo_in = read_json(ufo_in)
task_name = "japanese"
split = "train"
id_prefix = task_name + "/images/" + split + "/"
out_name = "annotations/default.json"

In [13]:
wrap_point_items = lambda items: {
    "info": {},
    "categories": {
        "label": {
            "labels": [{"name": "1", "parent": "", "attributes": []}],
            "attributes": ["occluded"],
        },
        "points": {"items": []},
    },
    "items": items,
}

get_image_node = lambda img_name: ufo_in["images"][img_name]

node_dimensions = lambda img_node: {
    "img_w": img_node["img_w"],
    "img_h": img_node["img_h"],
}

def wrap_annotations(idx: int, img_name: str, annotations: list[dict]):
    return {
        "id": id_prefix + img_name,
        "annotations": annotations,
        "attr": {"frame": idx},
        "point_cloud": {"path": ""},
        "info": node_dimensions(get_image_node(img_name)),
    }

def wrap_vertices(vertices: list[float]):
    return {
        "id": 0,
        "type": "polygon",
        "attributes": {"occluded": False},
        "group": 0,
        "label_id": 0,
        "points": vertices,
        "z_order": 0,
    }

def boxify_polygon(pgn: list[list[float | int]]) -> list[list[list[float | int]]]:
    num_vertices = len(pgn)
    if num_vertices == 4:
        return [[pgn]]
    try:
        return [
            [[pgn[i], pgn[i + 1], pgn[-i - 2], pgn[-i - 1]]]
            for i in range(num_vertices // 2 - 1)
        ]
    except:
        pass
    return []

def flatten_points(boxes: list[list[float | int]]) -> list[float | int]:
    return [coordinate for box in boxes for point in box for coordinate in point]

def extract_flat_points(image: dict):
    boxified_list = [boxify_polygon(v["points"]) for v in image.values()]
    flat_box_list = [flatten_points(v) for vs in boxified_list for v in vs]
    return flat_box_list

image_keys = image_keys = sorted(ufo_in["images"].keys())  # [:10]
image_map = image_map = {k: ufo_in["images"][k]["words"] for k in image_keys}
flat_points = {fname: extract_flat_points(image) for fname, image in image_map.items()}

annotation = wrap_point_items(
    [
        wrap_annotations(
            idx_, img_name, [wrap_vertices(vertices) for vertices in points]
        )
        for idx_, (img_name, points) in enumerate(flat_points.items())
    ]
)

In [14]:
write_json(annotation, out_name)