## Weed Detection learning

## Converting the dataset from the XML to TXT

In [3]:
import os
from pathlib import Path
from PIL import Image
import xml.etree.ElementTree as ET
import shutil

# --- Paths ---
src_dirs = [
    r"D:\python projects\MachineLearning\WeedDetection\data\Year2021_Part1",
    r"D:\python projects\MachineLearning\WeedDetection\data\Year2021_Part2",
    r"D:\python projects\MachineLearning\WeedDetection\data\Year2022"
]
output_dir = Path("D:\\python projects\\MachineLearning\\WeedDetection\\data\\yolo_dataset")
images_out = output_dir / "images"
labels_out = output_dir / "labels"
images_out.mkdir(parents=True, exist_ok=True)
labels_out.mkdir(parents=True, exist_ok=True)

CLASS_NAMES = ["Waterhemp","Carpetweed","Morninglory","Goosegrass","Spotted Spurge","Palmer Amaranth","Purslane","Ragweed"]
CLASS_MAP = {c.lower(): i for i, c in enumerate(CLASS_NAMES)}

def xml_to_yolo(xml_file, img_w, img_h):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    yolo_obj = []

    for obj in root.findall('object'):
        cls_name = obj.findtext('name').lower()
        cls_idx = CLASS_MAP.get(cls_name)
        
        if cls_idx is None:
            continue

        bnd = obj.find('bndbox')
        xmin = int(float(bnd.findtext('xmin')))
        ymin = int(float(bnd.findtext('ymin')))
        xmax = int(float(bnd.findtext('xmax')))
        ymax = int(float(bnd.findtext('ymax')))
        x_center = ((xmin + xmax) / 2) / img_w
        y_center = ((ymin + ymax) / 2) / img_h
        w = (xmax - xmin) / img_w
        h = (ymax - ymin) / img_h
        
        yolo_obj.append(f"{cls_idx} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}")
    return yolo_obj

for src in src_dirs:
    
    src_path = Path(src)
    
    for img_file in src_path.glob('*.jpg'):
    
        base = img_file.stem
        xml_file = src_path / f"{base}.xml"

        if not xml_file.exists():
            continue

        with Image.open(img_file) as im:
            w, h = im.size
        yolo_obj = xml_to_yolo(xml_file, w, h)

        if not yolo_obj:
            continue
    
        label_path = labels_out / f"{base}.txt"
        with open(label_path, mode='w') as f:
            f.write('\n'.join(yolo_obj))

        shutil.copy(img_file, images_out / img_file.name)
        
print(f"Done! Converted {len(list(images_out.glob('*.jpg')))} images and labels")



Done! Converted 4223 images and labels


Now that your images and labels are ready, the next step is to create the data.yaml file, which YOLOv8 uses to know:
<br>
1. Where the train and validation images are <br>
<br>
2. Where the labels are <br>
<br>
3. How many classes exist <br>
<br>
4. What the class names are

In [9]:
import yaml 
import random

output_dir = Path(r"D:\\python projects\\MachineLearning\\WeedDetection\\data\\yolo_dataset")
images_dir = output_dir / 'images'

images = list(images_dir.glob('*.jpg'))
random.shuffle(images)
split_idx = int(len(images) * 0.8)

train_imgs = images[:split_idx]
val_imgs = images[split_idx:]


# ======= Folders for Training and Validation sets of images =======

train_dir = output_dir / "train/images"
val_dir = output_dir / "val/images"
train_dir.mkdir(parents=True, exist_ok=True)
val_dir.mkdir(parents=True, exist_ok=True)

# ======= Cp of images and labels =======

train_labels_dir = output_dir / "train/labels"
val_labels_dir = output_dir / "val/labels"
train_labels_dir.mkdir(parents=True, exist_ok=True)
val_labels_dir.mkdir(parents=True, exist_ok=True)

for img in train_imgs:
    shutil.copy2(img, train_dir / img.name)
    shutil.copy2(output_dir / "labels" / f"{img.stem}.txt", train_labels_dir / f"{img.stem}.txt")

for img in val_imgs:
    shutil.copy2(img, val_dir / img.name)
    shutil.copy2(output_dir / "labels" / f"{img.stem}.txt", val_labels_dir / f"{img.stem}.txt")

data_yaml_path = output_dir / "data.yaml"

data_dict = {
    "train": str(train_dir),
    "val": str(val_dir),
    "nc": len(CLASS_NAMES),
    "names": CLASS_NAMES
}

try:
    with open(data_yaml_path, mode='w') as f:
        yaml.dump(data_dict, f)
except Exception as e:
    print(f"Error occured while saving the Yaml file: {e}")

print(f"data.yaml created at {data_yaml_path}")


data.yaml created at D:\python projects\MachineLearning\WeedDetection\data\yolo_dataset\data.yaml


In [None]:
print(len(val_imgs))
len(train_imgs)

845


3378

## Training with YOLO

In [16]:
from ultralytics import YOLO # type: ignore

model = YOLO("yolov8n.pt") 

results = model.train(
    data=str(data_yaml_path),
    epochs=50,
    imgsz=640,
    batch=8,
    workers=8,       # number of CPU threads, can increase to 12-14
    name="weed_detection",
    project="runs"
)



Ultralytics 8.3.228  Python-3.12.6 torch-2.9.1+cpu CPU (13th Gen Intel Core i7-13620H)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=D:\python projects\MachineLearning\WeedDetection\data\yolo_dataset\data.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=weed_detection2, nbs=64, nms=False, opset=None, optimize=False, optimizer=a

KeyboardInterrupt: 