In [5]:
import json
import numpy as np
import cv2
from pycocotools import mask

def convert_to_coco_format(input_file, output_file):
    # Load the input JSON file
    with open(input_file, 'r') as f:
        data = json.load(f)
    
    # Initialize the COCO format dictionary
    coco_format = {
        "info": {
            "description": data["info"]["description"],
            "version": data["info"]["version"],
            "year": 2024,
            "contributor": "Alex Davis",
            "date_created": "2024-07-26"
        },
        "licenses": [
            {
                "id": 1,
                "name": "CC BY 4.0",
                "url": "http://creativecommons.org/licenses/by/4.0/"
            }
        ],
        "categories": [],
        "images": [],
        "annotations": []
    }
    
    # Add categories and update id=3 to id=2
    for category in data["categories"]:
        if category["id"] == 3:
            category["id"] = 2
        coco_format["categories"].append({
            "id": category["id"],
            "name": category["name"],
            "supercategory": "none"
        })
    
    # Add images
    for image in data["images"]:
        coco_format["images"].append({
            "id": image["id"],
            "file_name": image["file_name"],
            "height": image["height"],
            "width": image["width"],
            "license": 1,
            "date_captured": "2024-07-26 00:00:00"
        })
    
    # Function to decode RLE to polygon
    def rle_to_polygon(rle):
        binary_mask = mask.decode(rle)
        polygons = []
        contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        for contour in contours:
            if contour.size >= 6:
                polygon = contour.flatten().tolist()
                polygons.append(polygon)
        return polygons
    
    # Add annotations and update category_id=3 to category_id=2
    for annotation in data["annotations"]:
        if annotation["category_id"] == 3:
            annotation["category_id"] = 2
        rle = annotation["segmentation"]
        if isinstance(rle, dict) and 'counts' in rle and 'size' in rle:
            segmentation = rle_to_polygon(rle)
        else:
            segmentation = rle
        coco_format["annotations"].append({
            "id": annotation["id"],
            "image_id": annotation["image_id"],
            "category_id": annotation["category_id"],
            "segmentation": segmentation,
            "area": annotation["area"],
            "bbox": annotation["bbox"],
            "iscrowd": annotation["iscrowd"]
        })
    
    # Save the COCO format JSON to output file
    with open(output_file, 'w') as f:
        json.dump(coco_format, f, separators=(',', ':'))
        #json.dump(coco_format, f, indent=4)

# Input and output file paths
input_file = './export_coco-instance_davis_alexander_TEM_Project4_TEM_V01.json'
output_file = './coco_format_dataset.json'

# Convert the dataset
convert_to_coco_format(input_file, output_file)

print(f"Converted dataset saved to {output_file}")



Converted dataset saved to ./coco_format_dataset.json
