In [1]:
import os

# Absolute path to your dataset
DATASET_DIR = r"C:\dev_projects_pc\uu-insect-project\object detection\insects\dataset_yolo_final"

# Subfolders to check
splits = ["train", "val", "test"]

# Supported image extensions
img_exts = [".jpg", ".jpeg", ".png", ".bmp"]

for split in splits:
    images_dir = os.path.join(DATASET_DIR, split, "images")
    labels_dir = os.path.join(DATASET_DIR, split, "labels")

    if not os.path.exists(images_dir):
        print(f"[WARNING] Images folder missing: {images_dir}")
        continue

    os.makedirs(labels_dir, exist_ok=True)

    missing_labels = 0
    total_images = 0

    for img_file in os.listdir(images_dir):
        if os.path.splitext(img_file)[1].lower() in img_exts:
            total_images += 1
            label_file = os.path.join(labels_dir, os.path.splitext(img_file)[0] + ".txt")
            if not os.path.exists(label_file):
                # Create empty label file
                with open(label_file, "w") as f:
                    pass
                missing_labels += 1

    print(f"[{split}] Total images: {total_images}, Missing labels created: {missing_labels}")


[train] Total images: 498, Missing labels created: 0
[val] Total images: 174, Missing labels created: 0
[test] Total images: 94, Missing labels created: 0


In [2]:
import os

DATASET_DIR = r"C:\dev_projects_pc\uu-insect-project\object detection\insects\dataset_yolo_final"
splits = ["train", "val", "test"]

for split in splits:
    labels_dir = os.path.join(DATASET_DIR, split, "labels")
    for label_file in os.listdir(labels_dir):
        if label_file.endswith(".txt"):
            path = os.path.join(labels_dir, label_file)
            with open(path, "r") as f:
                for line in f:
                    parts = line.strip().split()
                    if len(parts) != 5:
                        print(f"[ERROR] Wrong format in {path}: {line.strip()}")
                        continue
                    cls, x, y, w, h = parts
                    x, y, w, h = float(x), float(y), float(w), float(h)
                    if not (0 <= x <= 1 and 0 <= y <= 1 and 0 <= w <= 1 and 0 <= h <= 1):
                        print(f"[ERROR] Out-of-range coordinates in {path}: {line.strip()}")


In [5]:
import os

DATASET_DIR = r"C:\dev_projects_pc\uu-insect-project\object detection\insects\dataset_yolo_final"
split = "val"

images_dir = os.path.join(DATASET_DIR, split, "images")
labels_dir = os.path.join(DATASET_DIR, split, "labels")

images = sorted([f for f in os.listdir(images_dir) if f.lower().endswith((".jpg", ".png"))])
labels = sorted([f for f in os.listdir(labels_dir) if f.endswith(".txt")])

for img in images:
    base = os.path.splitext(img)[0]
    label_file = base + ".txt"
    if label_file not in labels:
        print(f"Missing label for image: {img}")
