# Trolley YOLO-OBB Training (Real Data)

This notebook trains a YOLO-OBB model on Real-World data.

**Workflow:**
1. **Environment Setup**: Install and import required libraries.
2. **Configuration**: Set constants and seeds.
3. **Data Augmentation**: Define geometric and color augmentations.
4. **Data Preparation**: Split real data into Train/Val respecting existing splits.
5. **Format Conversion**: Convert polygon annotations to OBB format.
6. **Training**: Train the YOLOv8-OBB model.


## 1. Environment Setup

In [None]:
# Install required libraries
!pip install -q ultralytics albumentations opencv-python numpy pyyaml tqdm scikit-learn

In [None]:
import random
import shutil
import cv2
import os
from pathlib import Path
from tqdm import tqdm
import albumentations as A
from ultralytics import YOLO


## 2. Configuration

In [None]:
IMG_EXTS = [".jpg", ".jpeg", ".png"]
RANDOM_SEED = 42
random.seed(RANDOM_SEED)


## 3. Data Augmentation Definitions

In [None]:
kp_params = A.KeypointParams(format="xy", remove_invisible=False)

common_augs = {
    "brightness_contrast": A.Compose([
        A.RandomBrightnessContrast(p=0.3)
    ], keypoint_params=kp_params),

    "hue_saturation": A.Compose([
        A.HueSaturationValue(p=0.3)
    ], keypoint_params=kp_params),

    "gauss_noise": A.Compose([
        A.GaussNoise(p=0.2)
    ], keypoint_params=kp_params),

    "motion_blur": A.Compose([
        A.MotionBlur(blur_limit=5, p=0.2)
    ], keypoint_params=kp_params),
}

# Full Geometric Augmentations
geo_full = A.Compose([
    A.OneOf([
        A.HorizontalFlip(p=1.0),
        A.VerticalFlip(p=1.0),
        A.Rotate(limit=(45, 45), border_mode=cv2.BORDER_CONSTANT, p=1.0),
        A.Rotate(limit=(90, 90), border_mode=cv2.BORDER_CONSTANT, p=1.0),
    ], p=0.7)
], keypoint_params=kp_params)

# Restricted Geometric Augmentations (Horizontal Flip only)
geo_restricted = A.Compose([
    A.HorizontalFlip(p=0.5)
], keypoint_params=kp_params)

AUGMENTATIONS_FULL = {**common_augs, "geometric": geo_full}
AUGMENTATIONS_RESTRICTED = {**common_augs, "geometric": geo_restricted}


## 4. Helper Functions
Basic file I/O and geometric conversion utilities.

In [None]:
def load_label_file(path):
    with open(path) as f:
        lines = f.readlines()
    return [list(map(float, l.strip().split())) for l in lines]


def save_label_file(path, entries):
    with open(path, "w") as f:
        for cls, coords in entries:
            f.write(" ".join([str(cls)] + [f"{c:.6f}" for c in coords]) + "\n")


import numpy as np

def polygon_to_minrect_norm(points, w, h):
    if len(points) != 4:
        return None

    pts = np.array(points, dtype=np.float32)
    rect = cv2.minAreaRect(pts)
    box = cv2.boxPoints(rect)

    norm = []
    for x, y in box:
        norm.append(x / w)
        norm.append(y / h)

    return norm


## 5. Data Processing Pipeline
Logic to gather existing splits and process the dataset.

In [None]:
import cv2
import random
from pathlib import Path
from tqdm import tqdm
import albumentations as A

# Assuming these are defined elsewhere in your code:
# IMG_EXTS = {".jpg", ".jpeg", ".png", ".bmp"}
# AUGMENTATIONS_RESTRICTED = ...
# AUGMENTATIONS_FULL = ...
# polygon_to_minrect_norm = ...
# load_label_file = ...
# save_label_file = ...


def gather_existing_splits(dataset_dirs):
    """
    Scans dataset directories for existing split subfolders.
    Supports both structures:
      1. ds/images/split_name & ds/labels/split_name
      2. ds/split_name/images & ds/split_name/labels
    """
    all_train = []
    all_val = []
    
    # --- 1. Define your exact folder names here ---
    splits = ['train(80_ REAL DATA)', 'test(20_ REAL DATA)']

    for ds in dataset_dirs:
        ds = Path(ds) # Ensure it's a Path object
        source_id = ds.name
        
        for split in splits:
            # --- Detect Folder Structure ---
            # Check Pattern 1: ds/images/train(...)
            p1_img = ds / "images" / split
            p1_lbl = ds / "labels" / split
            
            # Check Pattern 2: ds/train(...)/images
            p2_img = ds / split / "images"
            p2_lbl = ds / split / "labels"

            if p1_img.exists() and p1_img.is_dir():
                images_dir = p1_img
                labels_dir = p1_lbl
            elif p2_img.exists() and p2_img.is_dir():
                images_dir = p2_img
                labels_dir = p2_lbl
            else:
                # If this specific split folder doesn't exist in this dataset, skip it
                continue
            
            # --- Collect Pairs ---
            if not labels_dir.exists():
                print(f"Warning: Labels dir not found for {images_dir}")
                continue

            image_files = [p for p in images_dir.iterdir() if p.suffix.lower() in IMG_EXTS]
            
            count = 0
            for img_p in image_files:
                lab_p = labels_dir / f"{img_p.stem}.txt"
                
                if lab_p.exists():
                    # Create the pair tuple
                    pair = (img_p, lab_p, source_id)
                    
                    # --- 2. Sort into Train or Val based on folder name ---
                    # We check if the folder name starts with "train" (case insensitive)
                    if split.lower().startswith('train'):
                        all_train.append(pair)
                    else:
                        # Everything else (like "test(20...)") goes to validation
                        all_val.append(pair)
                    count += 1
            
            print(f"Found {count} pairs in {source_id} [{split}]")

    return all_train, all_val

def process_dataset(file_list, mode, output_root):
    # output_root should be FINAL_DATASET
    img_out_dir = output_root / "images" / mode
    lab_out_dir = output_root / "labels" / mode
    img_out_dir.mkdir(parents=True, exist_ok=True)
    lab_out_dir.mkdir(parents=True, exist_ok=True)
    
    excluded_folders = ["Capture_as_our_dataset", "Capture_scene_8"]
    
    for img_p, lab_p, source_id in tqdm(file_list, desc=f"Processing {mode}"):
        # Determine Augmentations
        if mode == "train":
            if source_id in excluded_folders:
                active_augs = AUGMENTATIONS_RESTRICTED
            else:
                active_augs = AUGMENTATIONS_FULL
        else:
            # Validation: No augmentation, just identity/format conversion
            active_augs = {"origin": A.Compose([], keypoint_params=A.KeypointParams(format="xy", remove_invisible=False))}
            
        # Load Image and Label
        img = cv2.imread(str(img_p))
        if img is None: continue
        h, w = img.shape[:2]
        
        labels = load_label_file(lab_p)
        keypoints = []
        for lbl in labels:
            coords = lbl[1:]
            for i in range(0, 8, 2):
                keypoints.append((coords[i] * w, coords[i + 1] * h))
                
        # Apply Augmentations Loop
        base_name = f"{source_id}__{img_p.stem}"
        
        for aug_name, aug in active_augs.items():
            try:
                out = aug(image=img, keypoints=keypoints)
                if len(out["keypoints"]) != len(keypoints): continue
                
                # Convert Back to OBB Norm
                entries = []
                for i, lbl in enumerate(labels):
                    pts = out["keypoints"][i*4 : (i+1)*4]
                    rect = polygon_to_minrect_norm(pts, out["image"].shape[1], out["image"].shape[0])
                    if rect:
                        entries.append((int(lbl[0]), rect))
                
                if not entries: continue
                
                # Save
                suffix = f"_{aug_name}" if mode == "train" else ""
                new_name = f"{base_name}{suffix}"
                
                cv2.imwrite(str(img_out_dir / f"{new_name}.jpg"), out["image"])
                save_label_file(lab_out_dir / f"{new_name}.txt", entries)
                
            except Exception as e:
                print(f"Error processing {img_p.name}: {e}")

### Execute Data Processing

In [None]:
dataset_paths = [Path("Real_Data")]
final_output = Path("final_dataset_real")

# 1. Gather files respecting existing 'train'/'val' folders
train_files, val_files = gather_existing_splits(dataset_paths)

# 2. Process them
process_dataset(train_files, "train", final_output)
process_dataset(val_files, "val", final_output)

## 6. Dataset YAML Configuration

In [None]:
def create_yaml(out_dir, nc, names):
    content = f"""
path: {out_dir.resolve()}
train: images/train
val: images/val
test: images/test

nc: {nc}
names: {names}
"""
    with open(out_dir / "dataset_real.yaml", "w") as f:
        f.write(content.strip())


In [None]:
create_yaml(final_output, nc=1, names=["object"])


## 7. Model Training

In [None]:
from ultralytics import YOLO

model = YOLO("yolo26l-obb.pt")

model.train(
    data="final_dataset_real/dataset_real.yaml",
    epochs=100,
    imgsz=640,
    batch=10,
    workers=20,
    optimizer="AdamW",
    lr0=0.001,
    lrf=0.01,
    momentum=0.937,
    weight_decay=0.0005,
    close_mosaic=15,
    patience=25,
    save_period=10,
    val=True,
    amp=True,
    device=0,
    hsv_h=0.015, hsv_s=0.7, hsv_v=0.4,
    degrees=10.0,
    translate=0.1,
    scale=0.9,
    fliplr=0.5,
    mosaic=1.0,
    mixup=0.1,
    erasing=0.3,
    perspective=0.0005,
    project="paper_experiment",
    name="YOLO26l_OBB_E1_Synthesized_only"
)
