# Преобразование данных в нужный формат (COCO format)

Используемые библиотеки:

In [1]:
import json
import math
import os
from typing import Dict, List, Tuple

import cv2
import numpy as np
from tqdm.auto import tqdm

from cropper.utils import get_full_boxes

DATA_PATH = r'data'

Основные функции:

In [2]:
def is_point_in_image(point: List[int], image_bbox: List) -> bool:
    """ Проверка точки на попадание на изображение. """
    return image_bbox[0] <= point[0] < image_bbox[2] and image_bbox[1] <= point[1] < image_bbox[3]


def get_bbox(quad: List, image_bbox: List) -> List:
    """ Получение bbox с учётом краёв изображения. """
    xA = max(min([pos[0] for pos in quad]), image_bbox[0])
    yA = max(min([pos[1] for pos in quad]), image_bbox[1])
    xB = min(max([pos[0] for pos in quad]), image_bbox[2])
    yB = min(max([pos[1] for pos in quad]), image_bbox[3])
    label_bbox = [xA, yA, xB - xA, yB - yA]
    return label_bbox


def get_keypoints(quad: List, image_bbox: List) -> List:
    """ Получение ключевых точек. """
    assert quad[0][0] < quad[1][0] and quad[1][1] < quad[2][1] and quad[2][0] > quad[3][0] and quad[3][1] > quad[0][1]
    keypoints = []
    for point in quad:
        if is_point_in_image(point, image_bbox):
            keypoints.extend(point + [2])
        else:
            keypoints.extend(point + [1])
    return keypoints


def write_coco_format(img_paths: List[str], ann_paths: List[str], target_json: str) -> None:
    """ Основная функция преобразования в COCO-формат. """
    json_content = {
        'images': [],
        'annotations': [],
        'categories': [
            {'id': 1, 'name': 'document'}
        ]
    }

    parent_dir = os.path.dirname(target_json)
    for id, (img_path, ann_path) in tqdm(enumerate(zip(img_paths, ann_paths))):
        image = cv2.imread(img_path)
        if img_path.startswith(parent_dir):
            img_path = img_path[len(parent_dir) + 1:]
        image_data = {
            'file_name': img_path,
            'height': image.shape[0],
            'width': image.shape[1],
            'id': id
        }
        json_content['images'].append(image_data)

        quad = json.load(open(ann_path, 'r'))['quad']
        image_bbox = [0, 0, image.shape[1], image.shape[0]]
        label_bbox = get_bbox(quad, image_bbox)
        keypoints = get_keypoints(quad, image_bbox)

        ann_data = {
            'segmentation': [single_coord for coord_pair in quad for single_coord in coord_pair],
            'keypoints': keypoints,
            'num_keypoints': 4,
            'area': image.shape[0] * image.shape[1],
            'iscrowd': 0,
            'image_id': id,
            'bbox': label_bbox,
            'category_id': 1,
            'id': id
        }
        json_content['annotations'].append(ann_data)

    with open(target_json, 'w', encoding='utf-8') as json_file:
        json.dump(json_content, json_file, indent=None)


def get_filenames(data_path: str, test_gt_path: str) -> Tuple[Dict[str, List[str]], Dict[str, List[str]]]:
    """ Разбиение выборки на обучающие и тестовые, получение файлов изображений и аннотаций. """
    img_paths = {'train': [], 'train_det': [], 'train_crop': [], 'test': []}
    ann_paths = {'train': [], 'train_det': [], 'train_crop': [], 'test': []}

    with open(test_gt_path, 'r') as f:
        json_contents = json.load(f)
        test_samples = list(map(lambda name: name.split('|')[-1], json_contents.keys()))

    for root, dirnames, filenames in os.walk(data_path):
        for filename in filter(lambda x: x.endswith('.json') and len(dirnames) == 0, filenames):
            if filename in test_samples:
                cur_sets = ['test']
            else:
                doc_num = int(filename[-10:-8])
                internal_num = int(filename[-7:-5])
                if doc_num > 10 and internal_num > 10:
                    cur_sets = ['train', 'train_det']
                else:
                    cur_sets = ['train', 'train_crop']
            for cur_set in cur_sets:
                ann_paths[cur_set].append(os.path.join(root, filename))
                img_paths[cur_set].append(ann_paths[cur_set][-1].replace('ground_truth', 'images').replace('.json', '.png'))

    return img_paths, ann_paths

Преобразование данных:

In [3]:
img_paths, ann_paths = get_filenames(os.path.join(DATA_PATH, 'midv500_compressed'), 'gt.json')

In [4]:
write_coco_format(img_paths['train'], ann_paths['train'], os.path.join(DATA_PATH, 'train_gt.json'))

0it [00:00, ?it/s]

In [5]:
write_coco_format(img_paths['train_det'], ann_paths['train_det'], os.path.join(DATA_PATH, 'train_det_gt.json'))

0it [00:00, ?it/s]

In [6]:
write_coco_format(img_paths['train_crop'], ann_paths['train_crop'], os.path.join(DATA_PATH, 'train_crop_gt.json'))

0it [00:00, ?it/s]

In [7]:
write_coco_format(img_paths['test'], ann_paths['test'], os.path.join(DATA_PATH, r'test_gt.json'))

0it [00:00, ?it/s]

Создадим файл с боксами, необходимыми для обучения и использования HRNet (в качестве боксов возьмём изображения целиком):

In [None]:
get_full_boxes(r'data\train_gt.json', r'cropper\train_boxes.json')
get_full_boxes(r'data\test_gt.json', r'cropper\test_boxes.json')