In [None]:
!pip install ultralytics --quiet
!pip install tqdm --quiet

#### This notebook is desgined to be used with the following folders/files:
- yolo_dataset/images/
- yolo_dataset/labels/test/
- yolo_dataset/labels/train/
- yolo_dataset/labels/validation/
- yolo_dataset/data.yaml
- <del>ValidData/valid_df.csv

which can be found here --> Capstone Drive [link](https://drive.google.com/drive/folders/1PDhYnGtgJ2uIekZMpIevll753sodh9Th)

In [2]:
import torch
import os
from tqdm import tqdm
from ultralytics import YOLO

<del>TODO: Add code that converts `valid_df.csv` into YOLO formatted validation labels

### Check if your labels are formatted correctly for YOLO

In [None]:
def check_and_normalize_yolo_labels(label_dir, image_width=640, image_height=640):
    print(f"Checking and normalizing: {label_dir}")
    
    txt_files = [f for f in os.listdir(label_dir) if f.endswith(".txt")]
    
    for file in tqdm(txt_files, desc=f"Processing {os.path.basename(label_dir)}", unit="file"):
        path = os.path.join(label_dir, file)
        with open(path, "r") as f:
            lines = f.readlines()

        fixed_lines = []
        needs_fix = False

        for line in lines:
            parts = line.strip().split()
            if len(parts) != 5:
                print(f"Invalid line in {file}: {line.strip()}")
                continue

            try:
                cls = int(parts[0])
                coords = list(map(float, parts[1:]))
            except ValueError:
                print(f"Non-numeric values in {file}: {line.strip()}")
                continue

            # Check if normalization is needed
            if any(val > 1.0 for val in coords):
                needs_fix = True
                x, y, w, h = coords
                x /= image_width
                y /= image_height
                w /= image_width
                h /= image_height
                coords = [x, y, w, h]

            fixed_line = f"{cls} {' '.join(f'{v:.6f}' for v in coords)}"
            fixed_lines.append(fixed_line)

        if needs_fix:
            with open(path, "w") as f:
                f.write("\n".join(fixed_lines) + "\n")

    print(f"Completed: {label_dir}\n")

In [None]:
# check all label folders
check_and_normalize_yolo_labels("yolo_dataset/labels/test")
check_and_normalize_yolo_labels("yolo_dataset/labels/validation")
check_and_normalize_yolo_labels("yolo_dataset/labels/train")

### Training the YOLO Model

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f'Using: {device}')

model = YOLO('models/yolov8n.pt') # or yolov8s.pt for a slightly larger model

In [None]:
# increase for faster data loading speed
# make sure your system has enough ram to use 2 workers (32GB)
# otherwise the training will crash
num_workers = 2

model.train(
    data="yolo_dataset/data.yaml",
    epochs=25,              # increase for better results
    imgsz=640,              # match your image size if you prefer (or 128 for speed)
    batch=16,
    lr0=0.01,               # base learning rate
    optimizer='SGD',        # or 'Adam'
    project="dominic_yolo_runs",
    name="finetuned_model",
    exist_ok=True,
    device=device,
    workers=num_workers,    # parallel data loading
    cache=True,             # speeds up subsequent epochs
    plots=False,            # skips image generation, saving time on training
    verbose=False           # turn off logging
)

In [None]:
print("\nTraining complete. Here's a quick look at results:")
print()

In [None]:
model.val()

### Finetuning YOLO Model