In [6]:
import os
import shutil
import random
import json
from pathlib import Path

COCO to YOLO: Class mapping

In [7]:
# Define class_id mapping here.
class_id_mapping = {
    1: 0,   #adult
    4: 1,   #bicycle
    5: 2,   #motorcycle
    6: 3,   #car
    7: 4,   #bus
}

# Then use the mapping in your code
# json_info["class_id"] = class_id_mapping[item["category_id"]]


COCO to YOLO: Coordinates mapping

In [8]:
def convert_coco_to_yolo(bbox, image_width, image_height):
    """
    Convert bounding box coordinates from COCO format to YOLO format.

    Parameters:
    bbox (list): A list of four numbers representing the bounding box in COCO format [x, y, width, height].
    image_width (int): The width of the image.
    image_height (int): The height of the image.

    Returns:
    list: A list of four numbers representing the bounding box in YOLO format [x_center, y_center, width, height].
    """
    # Convert the bounding box from COCO format to YOLO format
    x_center = (bbox[0] + bbox[2] / 2) / image_width
    y_center = (bbox[1] + bbox[3] / 2) / image_height
    width = bbox[2] / image_width
    height = bbox[3] / image_height

    return [x_center, y_center, width, height]

COCO to YOLO: Annotations mapping

In [9]:
def process_images(images, data_dir, label_dir, image_output_dir, label_output_dir):
    for image in images:
        # Copy the image to the output directory
        shutil.copy(data_dir / image, image_output_dir)

        # Fetch corresponding Json file name
        json_file = label_dir / (os.path.splitext(image)[0] + '.json')

        # Load corresponding Json file
        with open(json_file) as f:
            ground_truth = json.load(f)

        # Define required info dictionary
        json_info = {"class_id": 0, "x_top": 0, "y_top": 0, "width": 0, "height": 0}

        # Write image height and width in the dictionary
        image_height = ground_truth["image"]["height"]
        image_width = ground_truth["image"]["width"]

        json_annotations = [] 

        # Write class_id and coordinates for all annotations
        for item in ground_truth["annotations"]:
            json_info["class_id"] = class_id_mapping[item["category_id"]]
            json_info["x_top"], json_info["y_top"], json_info["width"], json_info["height"] = item["bbox"]

            # convert coco coordinates to yolo coordinates    
            annotation = convert_coco_to_yolo((json_info["x_top"], json_info["y_top"], json_info["width"], json_info["height"]), image_width, image_height)
            annotation = " ".join(map(str, annotation))

            json_annotations.append((json_info["class_id"], annotation, "\n"))

        # write txt file
        output_file = (label_output_dir / image).with_suffix(".txt")

        with open(output_file,"a") as file:
            for line in json_annotations:
                file.write(" ".join(map(str, line)))

Dataset Split

In [10]:
# Define the source directory and the destination directories
source_dir = Path(r"C:\Users\samco\Downloads\New folder")
image_train_dir = Path(r"C:\Dk\Projects\Team Project\YOLO detection\Training Setup_augmented\images\train")
label_train_dir = Path(r"C:\Dk\Projects\Team Project\YOLO detection\Training Setup_augmented\labels\train")
image_val_dir = Path(r"C:\Dk\Projects\Team Project\YOLO detection\Training Setup_augmented\images\val")
label_val_dir = Path(r"C:\Dk\Projects\Team Project\YOLO detection\Training Setup_augmented\labels\val")

# Define the split ratio for training and validation
split_ratio = 0.8

# Get the list of subfolders in the source directory
scenes = [f.path for f in os.scandir(source_dir) if f.is_dir()]

# Iterate over each scene
for scene in scenes:
    # Get the path to the 'camera_01__data' and 'camera_01__annotation' directory
    data_dir = Path(scene) / 'camera_01' / 'camera_01__data'
    label_dir = Path(scene) / 'camera_01' / 'camera_01__annotation'

    # Check if the 'camera_01__data' directory exists
    if data_dir.exists():
        # Get the list of images in the 'camera_01__data' directory
        images = os.listdir(data_dir)

        # Shuffle the images to ensure a random split
        random.shuffle(images)

        # Calculate the number of images for training
        train_count = int(len(images) * split_ratio)

        # Split the images into training and validation sets
        train_images = images[:train_count]
        val_images = images[train_count:]

        # Process the training and validation images
        process_images(train_images, data_dir, label_dir, image_train_dir, label_train_dir)
        process_images(val_images, data_dir, label_dir, image_val_dir, label_val_dir)

print("Images have been successfully sampled to the training and validation directories.")


Images have been successfully sampled to the training and validation directories.
