In [11]:
import os
import xml.etree.ElementTree as ET

# Paths
annotations_dir = r'C:\Users\Leemart\ClearPath_ML_Development\evaluation\Annotations'
images_dir = r'C:\Users\Leemart\ClearPath_ML_Development\evaluation\JPEGImages'
labels_output_dir = r'C:\Users\Leemart\ClearPath_ML_Development\evaluation\YOLOLabels'

# Class names (ensure these match your model's class order in dataset.yaml)
classes = [
    "person", "bicycle", "bird", "boat", "bottle",
    "bus", "car", "cat", "chair", "cow",
    "dog", "horse", "motorbike", "potted plant", "sheep",
    "sofa", "train", "tvmonitor"
]

# Create output directory for YOLO labels
os.makedirs(labels_output_dir, exist_ok=True)

def convert_to_yolo_format(size, box):
    """Converts Pascal VOC bounding box to YOLO format."""
    dw = 1.0 / size[0]
    dh = 1.0 / size[1]
    x_center = (box[0] + box[1]) / 2.0
    y_center = (box[2] + box[3]) / 2.0
    width = box[1] - box[0]
    height = box[3] - box[2]
    return x_center * dw, y_center * dh, width * dw, height * dh

# Process each XML file
for xml_file in os.listdir(annotations_dir):
    if not xml_file.endswith(".xml"):
        continue

    # Parse XML
    tree = ET.parse(os.path.join(annotations_dir, xml_file))
    root = tree.getroot()

    # Image dimensions
    size = root.find("size")
    width = int(size.find("width").text)
    height = int(size.find("height").text)

    # Create a corresponding .txt file for YOLO labels
    txt_file_path = os.path.join(labels_output_dir, xml_file.replace(".xml", ".txt"))
    with open(txt_file_path, "w") as txt_file:
        for obj in root.findall("object"):
            class_name = obj.find("name").text
            if class_name not in classes:
                continue  # Skip classes not in the predefined list
            class_id = classes.index(class_name)

            # Get bounding box coordinates
            bbox = obj.find("bndbox")
            xmin = float(bbox.find("xmin").text)
            xmax = float(bbox.find("xmax").text)
            ymin = float(bbox.find("ymin").text)
            ymax = float(bbox.find("ymax").text)

            # Convert to YOLO format
            x_center, y_center, width, height = convert_to_yolo_format(
                (width, height), (xmin, xmax, ymin, ymax)
            )
            # Write to the .txt file
            txt_file.write(f"{class_id} {x_center} {y_center} {width} {height}\n")

print("Conversion completed! YOLO labels are saved in:", labels_output_dir)


Conversion completed! YOLO labels are saved in: C:\Users\Leemart\ClearPath_ML_Development\evaluation\YOLOLabels


In [10]:
import os
import shutil
import random

# Paths
image_dir = r'C:\Users\Leemart\ClearPath_ML_Development\dataset\VOC2012_train_val\JPEGImages'
label_dir = r'C:\Users\Leemart\ClearPath_ML_Development\dataset\VOC2012_train_val\YOLOLabels'
output_train_dir = r'C:\Users\Leemart\ClearPath_ML_Development\dataset\train'
output_val_dir = r'C:\Users\Leemart\ClearPath_ML_Development\dataset\val'

# Create output folders
os.makedirs(os.path.join(output_train_dir, "images"), exist_ok=True)
os.makedirs(os.path.join(output_train_dir, "labels"), exist_ok=True)
os.makedirs(os.path.join(output_val_dir, "images"), exist_ok=True)
os.makedirs(os.path.join(output_val_dir, "labels"), exist_ok=True)

# Get all image filenames
images = [f for f in os.listdir(image_dir) if f.endswith(".jpg")]
random.shuffle(images)

# Split into train (80%) and val (20%)
split_idx = int(len(images) * 0.8)
train_images = images[:split_idx]
val_images = images[split_idx:]

# Copy images and labels
def copy_files(image_list, output_image_dir, output_label_dir):
    for img_file in image_list:
        # Copy image
        shutil.copy(os.path.join(image_dir, img_file),
                    os.path.join(output_image_dir, img_file))
        
        # Copy corresponding label, if it exists
        label_file = img_file.replace(".jpg", ".txt")
        label_path = os.path.join(label_dir, label_file)
        
        print(f"Checking for label: {label_path}")  # Debugging statement
        if os.path.exists(label_path):
            shutil.copy(label_path, os.path.join(output_label_dir, label_file))
        else:
            print(f"Warning: Label file not found for image {img_file}, skipping.")

copy_files(train_images, os.path.join(output_train_dir, "images"), os.path.join(output_train_dir, "labels"))
copy_files(val_images, os.path.join(output_val_dir, "images"), os.path.join(output_val_dir, "labels"))

print(f"Split completed: {len(train_images)} train, {len(val_images)} val")

Checking for label: C:\Users\Leemart\ClearPath_ML_Development\dataset\VOC2012_train_val\YOLOLabels\2008_008446.txt
Checking for label: C:\Users\Leemart\ClearPath_ML_Development\dataset\VOC2012_train_val\YOLOLabels\2010_006566.txt
Checking for label: C:\Users\Leemart\ClearPath_ML_Development\dataset\VOC2012_train_val\YOLOLabels\2012_004329.txt
Checking for label: C:\Users\Leemart\ClearPath_ML_Development\dataset\VOC2012_train_val\YOLOLabels\2012_001499.txt
Checking for label: C:\Users\Leemart\ClearPath_ML_Development\dataset\VOC2012_train_val\YOLOLabels\2012_002246.txt
Checking for label: C:\Users\Leemart\ClearPath_ML_Development\dataset\VOC2012_train_val\YOLOLabels\2008_008724.txt
Checking for label: C:\Users\Leemart\ClearPath_ML_Development\dataset\VOC2012_train_val\YOLOLabels\2011_004656.txt
Checking for label: C:\Users\Leemart\ClearPath_ML_Development\dataset\VOC2012_train_val\YOLOLabels\2008_007356.txt
Checking for label: C:\Users\Leemart\ClearPath_ML_Development\dataset\VOC2012_tr