In [None]:
import os, glob

BASE_DIR = "./Dataset" 
LABEL_DIRS = [
    os.path.join(BASE_DIR, "train", "labels"),
    os.path.join(BASE_DIR, "valid", "labels"),
    os.path.join(BASE_DIR, "test", "labels"),
]
nc = None

import yaml
with open(os.path.join(BASE_DIR, "data.yaml"), "r") as f:
    data = yaml.safe_load(f)
names = data.get("names", [])
nc = data.get("nc", len(names))

bad = []
total = 0

for ld in LABEL_DIRS:
    for lp in glob.glob(os.path.join(ld, "*.txt")):
        total += 1
        with open(lp, "r") as f:
            lines = [x.strip() for x in f.readlines() if x.strip()]
        for i, line in enumerate(lines):
            parts = line.split()
            if len(parts) != 5:
                bad.append((lp, i, "format!=5"))
                continue
            cls = int(float(parts[0]))
            x, y, w, h = map(float, parts[1:])
            if cls < 0 or cls >= nc:
                bad.append((lp, i, f"class_out_of_range({cls})"))
            if not (0 <= x <= 1 and 0 <= y <= 1 and 0 <= w <= 1 and 0 <= h <= 1):
                bad.append((lp, i, "bbox_not_normalized"))
            if w <= 0 or h <= 0:
                bad.append((lp, i, "bbox_nonpositive"))

print("nc:", nc)
print("Total label files scanned:", total)
print("Bad entries:", len(bad))
print("First 20 bad examples:")
for b in bad[:300]:
    print(b)


In [None]:
import os
import glob
import shutil


DATASET_DIR = "./Dataset"  
SPLITS = ["train", "valid","test"] 
NC = 120
BACKUP_DIR = os.path.join(DATASET_DIR, "_labels_backup")

def is_float(x: str) -> bool:
    try:
        float(x)
        return True
    except:
        return False

def clamp01(v: float) -> float:
    return max(0.0, min(1.0, v))

def polygon_to_bbox(nums):
    cls = int(nums[0])
    pts = nums[1:]
    xs = pts[0::2]
    ys = pts[1::2]
    if len(xs) < 1 or len(ys) < 1:
        return None

    x_min, x_max = min(xs), max(xs)
    y_min, y_max = min(ys), max(ys)

    xc = (x_min + x_max) / 2.0
    yc = (y_min + y_max) / 2.0
    w  = (x_max - x_min)
    h  = (y_max - y_min)

    return [cls, clamp01(xc), clamp01(yc), clamp01(w), clamp01(h)]

def clean_labels_for_split(split):
    label_dir = os.path.join(DATASET_DIR, split, "labels")
    if not os.path.isdir(label_dir):
        print(f"[SKIP] No labels dir: {label_dir}")
        return

    out_dir = os.path.join(DATASET_DIR, split, "labels_clean")
    os.makedirs(out_dir, exist_ok=True)

    fixed_files = 0
    removed_files = 0
    fixed_lines = 0
    removed_lines = 0

    for path in glob.glob(os.path.join(label_dir, "*.txt")):
        with open(path, "r", encoding="utf-8") as f:
            raw_lines = [ln.strip() for ln in f.read().splitlines() if ln.strip()]

        new_lines = []
        file_changed = False

        for ln in raw_lines:
            parts = ln.replace(",", " ").split()
            if len(parts) < 5:
                removed_lines += 1
                file_changed = True
                continue

            if not all(is_float(p) for p in parts):
                removed_lines += 1
                file_changed = True
                continue

            nums = [float(p) for p in parts]


            if len(nums) == 5:
                cls = int(nums[0])
                x, y, w, h = nums[1], nums[2], nums[3], nums[4]

          
                if cls < 0 or cls >= NC:
                    removed_lines += 1
                    file_changed = True
                    continue
                if not (0 <= x <= 1 and 0 <= y <= 1 and 0 <= w <= 1 and 0 <= h <= 1):
         
                    x, y, w, h = clamp01(x), clamp01(y), clamp01(w), clamp01(h)
                    file_changed = True

                new_lines.append(f"{cls} {x:.6f} {y:.6f} {w:.6f} {h:.6f}")
                continue

            if (len(nums) - 1) % 2 == 0 and len(nums) >= 7:
                bbox = polygon_to_bbox(nums)
                if bbox is None:
                    removed_lines += 1
                    file_changed = True
                    continue

                cls = int(bbox[0])
                if cls < 0 or cls >= NC:
                    removed_lines += 1
                    file_changed = True
                    continue

                new_lines.append(f"{cls} {bbox[1]:.6f} {bbox[2]:.6f} {bbox[3]:.6f} {bbox[4]:.6f}")
                fixed_lines += 1
                file_changed = True
                continue

            cls = int(nums[0])
            x, y, w, h = nums[1], nums[2], nums[3], nums[4]
            if cls < 0 or cls >= NC:
                removed_lines += 1
                file_changed = True
                continue
            new_lines.append(f"{cls} {clamp01(x):.6f} {clamp01(y):.6f} {clamp01(w):.6f} {clamp01(h):.6f}")
            fixed_lines += 1
            file_changed = True

        out_path = os.path.join(out_dir, os.path.basename(path))

        if len(new_lines) == 0:
            removed_files += 1
            continue

        with open(out_path, "w", encoding="utf-8") as f:
            f.write("\n".join(new_lines) + "\n")

        if file_changed:
            fixed_files += 1

    print(f"\n=== {split.upper()} CLEAN REPORT ===")
    print("fixed_files:", fixed_files)
    print("removed_files:", removed_files)
    print("fixed_lines:", fixed_lines)
    print("removed_lines:", removed_lines)
    print("output labels:", out_dir)

os.makedirs(BACKUP_DIR, exist_ok=True)
for split in SPLITS:
    src = os.path.join(DATASET_DIR, split, "labels")
    if os.path.isdir(src):
        dst = os.path.join(BACKUP_DIR, f"{split}_labels")
        if not os.path.exists(dst):
            shutil.copytree(src, dst)
            print(f"[BACKUP] {src} -> {dst}")

for split in SPLITS:
    clean_labels_for_split(split)

print("\nresult saved in labels_clean.")


In [None]:
import os, glob

BASE_DIR = "./Dataset"  
LABEL_DIRS = [
    os.path.join(BASE_DIR, "train", "labels"),
    os.path.join(BASE_DIR, "valid", "labels"),
    os.path.join(BASE_DIR, "test", "labels"),
]
nc = None

import yaml
with open(os.path.join(BASE_DIR, "data.yaml"), "r") as f:
    data = yaml.safe_load(f)
names = data.get("names", [])
nc = data.get("nc", len(names))

bad = []
total = 0

for ld in LABEL_DIRS:
    for lp in glob.glob(os.path.join(ld, "*.txt")):
        total += 1
        with open(lp, "r") as f:
            lines = [x.strip() for x in f.readlines() if x.strip()]
        for i, line in enumerate(lines):
            parts = line.split()
            if len(parts) != 5:
                bad.append((lp, i, "format!=5"))
                continue
            cls = int(float(parts[0]))
            x, y, w, h = map(float, parts[1:])
            if cls < 0 or cls >= nc:
                bad.append((lp, i, f"class_out_of_range({cls})"))
            if not (0 <= x <= 1 and 0 <= y <= 1 and 0 <= w <= 1 and 0 <= h <= 1):
                bad.append((lp, i, "bbox_not_normalized"))
            if w <= 0 or h <= 0:
                bad.append((lp, i, "bbox_nonpositive"))

print("nc:", nc)
print("Total label files scanned:", total)
print("Bad entries:", len(bad))
print("First 20 bad examples:")
for b in bad[:20]:
    print(b)


In [None]:
from ultralytics import YOLO
import torch
DATA_YAML = "./Dataset/data.yaml"
def main():
    model = YOLO("yolo11m.pt")
    device = 0 if torch.cuda.is_available() else "cpu"
    results = model.train(
        data=DATA_YAML,
        epochs=80,
        imgsz=640,
        batch=16 if device == 0 else 8,
        device=device,
        workers=2,
        optimizer="AdamW",
        lr0=0.001, 
        warmup_epochs=3,
        weight_decay=0.01,
        patience=15,  
        fliplr=0.2, 
        mosaic=0.2, 
        mixup=0.0,
        hsv_h=0.01,
        hsv_s=0.4,
        hsv_v=0.3,
        translate=0.05,
        scale=0.2,
        degrees=0.0,  
        label_smoothing=0.0
    )
    model.val(data=DATA_YAML, device=device)
if __name__ == "__main__":
    main()

[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m.pt to 'yolo11m.pt': 100% ━━━━━━━━━━━━ 38.8MB 3.2MB/s 12.0s1.9s<0.1s9s
New https://pypi.org/project/ultralytics/8.3.240 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.233  Python-3.12.1 torch-2.5.1+cu121 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=./Dataset/data.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=80, erasing=0.4, exist_ok=False, fliplr=0.2, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.01, hsv_s=0.4, hsv_v=0.3, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None

In [1]:
from ultralytics import YOLO

model = YOLO("runs/detect/train/weights/best.pt")

metrics_test = model.val(
    data="Dataset/data.yaml",
    split="test",
    imgsz=640,
    device=0
)

print("TRAIN metrics:", metrics_test)


Ultralytics 8.3.233  Python-3.12.1 torch-2.5.1+cu121 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
YOLO11m summary (fused): 125 layers, 20,046,223 parameters, 0 gradients, 67.7 GFLOPs
[34m[1mval: [0mFast image access  (ping: 0.60.7 ms, read: 8.32.2 MB/s, size: 44.2 KB)
[K[34m[1mval: [0mScanning D:\Sem5IS\Deep Learning\New_test\test3_Yolo_8_11\Yolo11m\Dataset\test\labels... 289 images, 2 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 291/291 575.0it/s 0.5s0.1s
[34m[1mval: [0mNew cache created: D:\Sem5IS\Deep Learning\New_test\test3_Yolo_8_11\Yolo11m\Dataset\test\labels.cache
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 19/19 2.3it/s 8.2s0.4s
                   all        291       1084      0.813      0.757      0.806      0.638
                  Beef         16         22      0.783      0.955      0.888      0.765
           Bell Pepper         18         27      0.907      0.722       0.87      0.745
      