# Trolley YOLO-OBB Training (Synthetic Data)

This notebook trains a YOLO-OBB model on synthetic data.

**Workflow:**
1. **Environment Setup**: Install and import required libraries.
2. **Configuration**: Set constants and seeds.
3. **Data Augmentation**: Define geometric and color augmentations.
4. **Data Preparation**: Split synthetic data into Train/Val and apply augmentations.
5. **Format Conversion**: Convert polygon annotations to OBB format.
6. **Training**: Train the YOLOv8-OBB model.


## 1. Environment Setup

In [2]:
# Install required libraries
!pip install -q ultralytics albumentations opencv-python numpy pyyaml tqdm scikit-learn

In [1]:
import random
import shutil
import cv2
import os
from pathlib import Path
from tqdm import tqdm
import albumentations as A
from ultralytics import YOLO


## 2. Configuration

In [2]:
IMG_EXTS = [".jpg", ".jpeg", ".png"]
RANDOM_SEED = 42
random.seed(RANDOM_SEED)


## 3. Data Augmentation Definitions

In [3]:
kp_params = A.KeypointParams(format="xy", remove_invisible=False)

common_augs = {
    "brightness_contrast": A.Compose([
        A.RandomBrightnessContrast(p=0.3)
    ], keypoint_params=kp_params),

    "hue_saturation": A.Compose([
        A.HueSaturationValue(p=0.3)
    ], keypoint_params=kp_params),

    "gauss_noise": A.Compose([
        A.GaussNoise(p=0.2)
    ], keypoint_params=kp_params),

    "motion_blur": A.Compose([
        A.MotionBlur(blur_limit=5, p=0.2)
    ], keypoint_params=kp_params),
}

# Full Geometric Augmentations
geo_full = A.Compose([
    A.OneOf([
        A.HorizontalFlip(p=1.0),
        A.VerticalFlip(p=1.0),
        A.Rotate(limit=(45, 45), border_mode=cv2.BORDER_CONSTANT, p=1.0),
        A.Rotate(limit=(90, 90), border_mode=cv2.BORDER_CONSTANT, p=1.0),
    ], p=0.7)
], keypoint_params=kp_params)

# Restricted Geometric Augmentations (Horizontal Flip only)
geo_restricted = A.Compose([
    A.HorizontalFlip(p=0.5)
], keypoint_params=kp_params)

AUGMENTATIONS_FULL = {**common_augs, "geometric": geo_full}
AUGMENTATIONS_RESTRICTED = {**common_augs, "geometric": geo_restricted}


  self._set_keys()


## 4. Helper Functions
Functions for loading labels, converting polygon coordinates to Minimum Oriented Rectangles (OBB), and saving datasets.

In [4]:
def load_label_file(path):
    with open(path) as f:
        lines = f.readlines()
    return [list(map(float, l.strip().split())) for l in lines]


def save_label_file(path, entries):
    with open(path, "w") as f:
        for cls, coords in entries:
            f.write(" ".join([str(cls)] + [f"{c:.6f}" for c in coords]) + "\n")


import numpy as np

def polygon_to_minrect_norm(points, w, h):
    if len(points) != 4:
        return None

    pts = np.array(points, dtype=np.float32)
    rect = cv2.minAreaRect(pts)
    box = cv2.boxPoints(rect)

    norm = []
    for x, y in box:
        norm.append(x / w)
        norm.append(y / h)

    return norm


## 5. Data Processing Pipeline
Logic to split the dataset and apply augmentations to the training set.

In [5]:
def gather_and_split(dataset_dirs, train_ratio=0.8):
    all_train = []
    all_val = []
    
    for ds in dataset_dirs:
        images_dir = ds / "images"
        labels_dir = ds / "labels"
        source_id = ds.name
        
        # Validate pair existence
        valid_pairs = []
        image_files = [p for p in images_dir.iterdir() if p.suffix.lower() in IMG_EXTS]
        
        for img_p in image_files:
            lab_p = labels_dir / f"{img_p.stem}.txt"
            if lab_p.exists():
                valid_pairs.append((img_p, lab_p, source_id))
        
        # Shuffle and Split
        random.shuffle(valid_pairs)
        split_idx = int(len(valid_pairs) * train_ratio)
        
        all_train.extend(valid_pairs[:split_idx])
        all_val.extend(valid_pairs[split_idx:])
        
    return all_train, all_val

def process_dataset(file_list, mode, output_root):
    # output_root should be FINAL_DATASET
    img_out_dir = output_root / "images" / mode
    lab_out_dir = output_root / "labels" / mode
    img_out_dir.mkdir(parents=True, exist_ok=True)
    lab_out_dir.mkdir(parents=True, exist_ok=True)
    
    excluded_folders = ["Capture_as_our_dataset", "Capture_scene_8"]
    
    for img_p, lab_p, source_id in tqdm(file_list, desc=f"Processing {mode}"):
        # Determine Augmentations
        if mode == "train":
            if source_id in excluded_folders:
                active_augs = AUGMENTATIONS_RESTRICTED
            else:
                active_augs = AUGMENTATIONS_FULL
        else:
            # Validation: No augmentation, just identity/format conversion
            active_augs = {"origin": A.Compose([], keypoint_params=A.KeypointParams(format="xy", remove_invisible=False))}
            
        # Load Image and Label
        img = cv2.imread(str(img_p))
        if img is None: continue
        h, w = img.shape[:2]
        
        labels = load_label_file(lab_p)
        keypoints = []
        for lbl in labels:
            coords = lbl[1:]
            for i in range(0, 8, 2):
                keypoints.append((coords[i] * w, coords[i + 1] * h))
                
        # Apply Augmentations Loop
        base_name = f"{source_id}__{img_p.stem}"
        
        for aug_name, aug in active_augs.items():
            try:
                out = aug(image=img, keypoints=keypoints)
                if len(out["keypoints"]) != len(keypoints): continue
                
                # Convert Back to OBB Norm
                entries = []
                for i, lbl in enumerate(labels):
                    pts = out["keypoints"][i*4 : (i+1)*4]
                    rect = polygon_to_minrect_norm(pts, out["image"].shape[1], out["image"].shape[0])
                    if rect:
                        entries.append((int(lbl[0]), rect))
                
                if not entries: continue
                
                # Save
                suffix = f"_{aug_name}" if mode == "train" else ""
                new_name = f"{base_name}{suffix}"
                
                cv2.imwrite(str(img_out_dir / f"{new_name}.jpg"), out["image"])
                save_label_file(lab_out_dir / f"{new_name}.txt", entries)
                
            except Exception as e:
                print(f"Error processing {img_p.name}: {e}")


### Execute Data Processing

In [6]:
DATASETS = [
    Path("Synthetized_Data/Capture_as_our_dataset"),
    Path("Synthetized_Data/Capture_scene_8"),
    Path("Synthetized_Data/Capture_zone_with_human_occ2"),
    Path("Synthetized_Data/scene_10"),
]

FINAL_DATASET = Path("final_dataset_synthesized")

# 1. Split First
train_files, val_files = gather_and_split(DATASETS, train_ratio=0.8)
print(f"Split: {len(train_files)} Train, {len(val_files)} Val")

# 2. Process Train (With Augmentation)
process_dataset(train_files, mode="train", output_root=FINAL_DATASET)

# 3. Process Val (No Augmentation)
process_dataset(val_files, mode="val", output_root=FINAL_DATASET)


Split: 651 Train, 166 Val


Processing train: 100%|██████████| 651/651 [00:06<00:00, 97.22it/s] 
Processing val: 100%|██████████| 166/166 [00:00<00:00, 436.33it/s]


## 6. Dataset YAML Configuration

In [7]:
def create_yaml(out_dir, nc, names):
    content = f"""
path: {out_dir.resolve()}
train: images/train
val: images/val
test: images/test

nc: {nc}
names: {names}
"""
    with open(out_dir / "dataset_synthesized.yaml", "w") as f:
        f.write(content.strip())


In [8]:
create_yaml(FINAL_DATASET, nc=1, names=["object"])


## 7. Model Training

In [None]:
from ultralytics import YOLO

model = YOLO("yolo26l-obb.pt")

model.train(
    data="final_dataset_synthesized/dataset_synthesized.yaml",
    epochs=100,
    imgsz=640,
    batch=10,
    workers=20,
    optimizer="AdamW",
    lr0=0.001,
    lrf=0.01,
    momentum=0.937,
    weight_decay=0.0005,
    close_mosaic=15,
    patience=25,
    save_period=10,
    val=True,
    amp=True,
    device=0,
    hsv_h=0.015, hsv_s=0.7, hsv_v=0.4,
    degrees=10.0,
    translate=0.1,
    scale=0.9,
    fliplr=0.5,
    mosaic=1.0,
    mixup=0.1,
    erasing=0.3,
    perspective=0.0005,
    project="paper_experiment",
    name="YOLO26l_OBB_E1_Synthesized_only"
)
