# csv to coco

## Import packages and read the data

In [1]:
import csv
import json
import os
import random

In [2]:
csv_file_path = 'annotations/tiles_VIA_annotations_800_120.csv'
images_folder = 'tiles'

## Step 1: Parse the CSV file

In [3]:
annotations = []

with open(csv_file_path, 'r') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        annotations.append(row)

## Step 2: Create COCO-format annotations

In [13]:
# Group annotations by image
annotations_by_image = {}
for annotation in annotations:
    file_name = annotation['filename']
    if file_name not in annotations_by_image:
        annotations_by_image[file_name] = []
    annotations_by_image[file_name].append(annotation)

# Convert image dictionary to a list
image_list = list(annotations_by_image.items())

# Set random seed and shuffle the image list
random.seed(42)
random.shuffle(image_list)

# Calculate the indices for splitting the dataset
train_idx = int(len(image_list) * 0.6)
val_idx = int(len(image_list) * 0.8)

# Split the shuffled images into train, validation, and test sets
train_images = image_list[:train_idx]
val_images = image_list[train_idx:val_idx]
test_images = image_list[val_idx:]

category_set = set()
for annotation in annotations:
    region_attributes_str = annotation['region_attributes'].strip()
    if region_attributes_str:
        try:
            region_attributes = json.loads(region_attributes_str)
            category = list(region_attributes.values())[0]
            category_set.add(category)
        except json.JSONDecodeError:
            print(f"Error decoding JSON in annotation: {annotation}")

# Function to create a COCO-format dictionary for a given set of images
def create_coco_annotations(images):
    coco_annotations = {
        "info": {},
        "licenses": [],
        "categories": [],
        "images": [],
        "annotations": [],
    }

    # Add categories to the COCO-format annotations
    for category in category_set:
        coco_annotations['categories'].append({
            "id": len(coco_annotations['categories']) + 1,
            "name": category,
            "supercategory": None,
        })

    image_id = 0
    annotation_id = 0

    for file_name, image_annotations in images:
        # Add image metadata to the COCO-format annotations
        first_annotation = image_annotations[0]
        region_shape_attributes_str = first_annotation['region_shape_attributes'].strip()
        if not region_shape_attributes_str:
            continue

        try:
            region_shape_attributes = json.loads(region_shape_attributes_str)
        except json.JSONDecodeError:
            continue

        coco_annotations['images'].append({
            "id": image_id,
            "width": region_shape_attributes['width'],
            "height": region_shape_attributes['height'],
            "file_name": file_name,
            "license": None,
            "flickr_url": None,
            "coco_url": None,
            "date_captured": None,
        })

        for annotation in image_annotations:
            # Add the object annotation to the COCO-format annotations
            region_shape_attributes_str = annotation['region_shape_attributes'].strip()
            region_attributes_str = annotation['region_attributes'].strip()

            if not (region_shape_attributes_str and region_attributes_str):
                continue

            try:
                region_shape_attributes = json.loads(region_shape_attributes_str)
                region_attributes = json.loads(region_attributes_str)
            except json.JSONDecodeError:
                continue

            category = list(region_attributes.values())[0]
            category_id = [cat['id'] for cat in coco_annotations['categories'] if cat['name'] == category][0]

            coco_annotations['annotations'].append({
                "id": annotation_id,
                "image_id": image_id,
                "category_id": category_id,
                "segmentation": [],
                "area": region_shape_attributes['width'] * region_shape_attributes['height'],
                "bbox": [
                    region_shape_attributes['x'],
                    region_shape_attributes['y'],
                    region_shape_attributes['width'],
                    region_shape_attributes['height']
                ],
                "iscrowd": 0,
            })
            annotation_id += 1

        image_id += 1

    return coco_annotations

# Create COCO-format annotations for train, validation, and test sets
train_coco_annotations = create_coco_annotations(train_images)
val_coco_annotations = create_coco_annotations(val_images)
test_coco_annotations = create_coco_annotations(test_images)

## Step 3: Save the annotations to a JSON file

In [15]:
# Save the annotations to JSON files
with open('train_annotations_random.json', 'w') as train_outfile:
    json.dump(train_coco_annotations, train_outfile)

with open('val_annotations_random.json', 'w') as val_outfile:
    json.dump(val_coco_annotations, val_outfile)

with open('test_annotations_random.json', 'w') as test_outfile:
    json.dump(test_coco_annotations, test_outfile)

## Step 4: Copy images into three subfolders according to the json files.

In [10]:
import shutil

# Function to copy images to their respective folders
def copy_images(images, output_folder):
    os.makedirs(output_folder, exist_ok=True)

    for file_name, _ in images:
        src_path = os.path.join(images_folder, file_name)
        dst_path = os.path.join(output_folder, file_name)
        shutil.copyfile(src_path, dst_path)

# Copy images to train, validation, and test folders
copy_images(train_images, 'data/palms/train_random')
copy_images(val_images, 'data/palms/val_random')
copy_images(test_images, 'data/palms/test_random')