In [None]:
import os
import json
import numpy as np
from PIL import Image
from pycocotools import mask

def rle_to_serializable(rle):
    """Преобразует RLE-код в сериализуемый формат."""
    rle_serializable = {key: value.decode('utf-8') if isinstance(value, bytes) else int(value) if isinstance(value, (np.integer, np.uint32)) else value for key, value in rle.items()}
    return rle_serializable

def create_coco_annotations(image_dir, mask_dir):
    coco = {
        "images": [],
        "annotations": [],
        "categories": []
    }
    category_ids = {}
    annotation_id = 0
    for idx, file_name in enumerate(os.listdir(image_dir)):
        if file_name.endswith(".jpg"):
            image_id = idx + 1
            image_path = os.path.join(image_dir, file_name)
            mask_path = os.path.join(mask_dir, file_name.replace(".jpg", ".png"))
            if not os.path.exists(mask_path):
                continue
            
            # Добавляем информацию об изображении
            image = Image.open(image_path)
            coco["images"].append({
                "id": image_id,
                "file_name": file_name,
                "height": image.size[1],
                "width": image.size[0],
            })
            
            # Генерация аннотаций из маски
            mask_image = np.array(Image.open(mask_path))
            for category_id in np.unique(mask_image):
                if category_id == 0:  # Пропускаем фон
                    continue
                if category_id not in category_ids:
                    category_ids[category_id] = len(category_ids) + 1
                    coco["categories"].append({
                        "id": category_ids[category_id],
                        "name": f"class_{category_id}"
                    })
                # Создаем сегментацию
                binary_mask = (mask_image == category_id).astype(np.uint8)
                rle = mask.encode(np.asfortranarray(binary_mask))
                
                # Работаем с RLE-кодом
                if isinstance(rle, list):
                    rle = rle[0]  # Берем первый элемент, если это список
                
                area = mask.area(rle)
                bbox = mask.toBbox(rle).tolist()
                
                # Преобразуем RLE в сериализуемый формат
                rle_serializable = rle_to_serializable(rle)
                
                coco["annotations"].append({
                    "id": annotation_id,
                    "image_id": image_id,
                    "category_id": category_ids[category_id],
                    "segmentation": rle_serializable,
                    "area": int(area) if isinstance(area, np.integer) else area,
                    "bbox": [int(b) if isinstance(b, np.integer) else b for b in bbox],
                    "iscrowd": 0
                })
                annotation_id += 1
    return coco

image_dir = "cv_open_dataset/open_img"
mask_dir = "cv_open_dataset/open_msk"
output_path = "annotations.json"
coco_annotations = create_coco_annotations(image_dir, mask_dir)
with open(output_path, "w") as f:
    json.dump(coco_annotations, f)
