## COCO JSON to CSV Conversion

This notebook illustrates an example to generate a CSV for gold standard upload from a COCO JSON file. This example uses:

- `json` and `csv` libraries for file handling
- `pandas` to prepare data for CSV conversion

In [None]:
# # Imports
import json
import csv
import pandas as pd

In [None]:
# # Variables

# Set the category id (from the COCO JSON data) to limit the output
# Use None to include all categories
category_id = None

# Path to the COCO JSON file
coco_json_path = './example/input/example.json'

# Path for the output CSV file
csv_path = './example/output/example.csv'

# Prefix to prepend to the file names in the CSV output
origin_prefix = 'example_prefix/'

In [None]:
# # Convert bbox data to WKT polgyon

def bbox_to_wkt(bbox, width, height):
    # Scale the bbox coordinates relative to the image size
    x_min, y_min, bbox_width, bbox_height = bbox
    x_min_rel = (x_min / width) * 100
    y_min_rel = (y_min / height) * 100
    x_max_rel = ((x_min + bbox_width) / width) * 100
    y_max_rel = ((y_min + bbox_height) / height) * 100
    # Return the WKT polygon string based on the relative coordinates
    return f"POLYGON(({x_min_rel} {y_min_rel}, {x_min_rel} {y_max_rel}, {x_max_rel} {y_max_rel}, {x_max_rel} {y_min_rel}, {x_min_rel} {y_min_rel}))"


In [None]:
# # Extract images and annotations data from COCO JSON file

def parse_coco_json_data():
    with open(coco_json_path, 'r') as file:
        data = json.load(file)

    # Create a dictionary for images to map image_id to file_name, width, and height
    images_dict = {image['id']: (image['file_name'], image['width'], image['height']) for image in data['images']}
    
    annotations_dict = {}
    for annotation in data['annotations']:
        image_id = annotation['image_id']
        
        # Skip annotations that don't match the category_id if it's specified
        if category_id is not None and annotation.get('category_id') != category_id:
            continue

        # Skip annotations that are missing the bbox
        if 'bbox' not in annotation:
            print(f"No bbox for annotation ID: {annotation['id']} in image ID: {image_id}")
            continue
        
        # Initialize the list for the image_id if it doesn't exist
        if image_id not in annotations_dict:
            annotations_dict[image_id] = []
        
        annotations_dict[image_id].append(annotation['bbox'])

    # Filter images_dict to only include images with matching annotations
    images_dict = {id: images_dict[id] for id in annotations_dict}

    return images_dict, annotations_dict


In [None]:
# # Prepare the data to be loaded into the CSV file

def prepare_csv_data(images_dict, annotations_dict):
    # This list will hold the final output data for the CSV
    output_data = []
    
    # Iterate over the images_dict to create the 'customer_origin' and 'answer' data
    for image_id, image_data in images_dict.items():
        file_name, width, height = image_data
        
        # Create the list of WKT polygons for this image
        wkt_strings = []
        for bbox in annotations_dict.get(image_id, []):
            wkt_polygon = bbox_to_wkt(bbox, width, height)
            wkt_strings.append(wkt_polygon)
        
        # Create a string that represents an array of WKT polygons
        answer_string = json.dumps(wkt_strings)
        
        # Append the data for the current image to the output_data list
        output_data.append({
            "case_id": None,
            "content_id": None,
            "labeling_state": None,
            "customer_origin": origin_prefix + file_name,
            "answer": answer_string,
            "notes": None
        })
    
    return output_data

In [None]:
# # Create the CSV

def write_to_csv(data):
    # Convert the list of dictionaries to a pandas DataFrame
    df = pd.DataFrame(data)
    # Write the DataFrame to a CSV file
    df.to_csv(csv_path, index=False, quoting=csv.QUOTE_MINIMAL)

In [None]:
images_dict, annotations_dict = parse_coco_json_data()
output_data = prepare_csv_data(images_dict, annotations_dict)
write_to_csv(output_data)