In [5]:
from pathlib import Path

CLASS_COUNT = 19  # change to your number of classes

def validation(images_dir: Path, label_dir: Path):
    image_list = images_dir.glob("*.*") # get me all the files under this path and set it inside list

    if not image_list:
        print(f"No images found in {images_dir}")
        return

    ok = 0
    bad = 0

    for image_path in image_list:
        label = label_dir / (image_path.stem + '.txt')

        if not label.exists():
            print(f"Label {label} not found in {label_dir}")
            bad += 1
            continue

        lines =label.read_text().strip().splitlines()

        for i, line in enumerate(lines, start=1):
            parts = line.strip().split()
            if parts[0] == 5:
                print(f"[BAD FORMAT] {label.name}:{i} -> {line}")
                bad += 1
                break

            cls = int(float(parts[0]))
            vals = list(map(float, parts[1:]))

            if cls < 0 or cls >= CLASS_COUNT:
                print(f"[BAD CLASS] {label.name}:{i} -> class={cls}")
                bad += 1
                break

            if any(v < 0 or v > 1 for v in vals):
                print(f"[BAD RANGE] {label.name}:{i} -> {vals}")
                bad += 1
                break
            else:
                ok+=1

    print(f"[{images_dir.name}] OK: {ok}, BAD: {bad}")


def main():
    base = Path("datasets")
    for split in ["train", "valid", "test"]:
        validation(
            base / split  / "images",
            base / split / "labels"
        )

if __name__ == "__main__":
    main()

[images] OK: 11823, BAD: 0
[images] OK: 3423, BAD: 0
[images] OK: 1768, BAD: 0
