# üêü YOLO Ultralytics Native Augmentation Visualization

This notebook uses **real Ultralytics augmentation classes** - the same code that runs during training!

In [None]:
import os
import yaml
import cv2
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import random
from copy import deepcopy
from types import SimpleNamespace

# Ultralytics imports
from ultralytics.data.dataset import YOLODataset
from ultralytics.data.augment import (
    Compose, Mosaic, MixUp, CopyPaste, RandomHSV, 
    RandomFlip, RandomPerspective
)

print("‚úì Imports successful")

## üìÅ Configuration

In [None]:
# Dataset path
DATASET_PATH = "yolo_dataset_segmentation_path"
DATA_YAML = os.path.join(DATASET_PATH, "data.yaml")
IMGSZ = 640

# Hyperparameters - MUST be SimpleNamespace for Ultralytics compatibility
hyp = SimpleNamespace(
    # Main augmentations (from segmentation_yolo.sh)
    mosaic=1.0,
    mixup=0.15,
    copy_paste=0.3,
    
    # Geometric
    degrees=0.0,
    translate=0.1,
    scale=0.5,
    shear=0.0,
    perspective=0.0,
    flipud=0.0,
    fliplr=0.5,
    
    # Color
    hsv_h=0.015,
    hsv_s=0.7,
    hsv_v=0.4,
    
    # Segmentation-specific (required by Ultralytics)
    mask_ratio=4,
    overlap_mask=True,
    bgr=0.0,
)

print(f"Dataset: {DATASET_PATH}")
print(f"Image Size: {IMGSZ}")
print(f"\nAugmentation settings:")
print(f"  Mosaic: {hyp.mosaic}")
print(f"  MixUp: {hyp.mixup}")
print(f"  Copy-Paste: {hyp.copy_paste}")

## üìä Load Dataset

In [None]:
# Load data.yaml
with open(DATA_YAML, 'r') as f:
    data = yaml.safe_load(f)

# Fix paths - convert relative to absolute
for key in ['train', 'val']:
    if key in data and not os.path.isabs(data[key]):
        data[key] = os.path.join(DATASET_PATH, data[key])

print(f"Train path: {data['train']}")
print(f"Train exists: {os.path.exists(data['train'])}")

# Create dataset
dataset = YOLODataset(
    img_path=data['train'],
    imgsz=IMGSZ,
    batch_size=1,
    augment=False,
    hyp=hyp,
    rect=False,
    cache=False,
    single_cls=True,
    stride=32,
    pad=0.0,
    prefix='train: ',
    task='segment',
    classes=None,
    data=data,
    fraction=1.0
)

print(f"\n‚úì Dataset loaded")
print(f"  Total samples: {len(dataset)}")

## üé® Visualization Helper

In [None]:
def visualize_sample(labels_dict):
    """Visualize YOLO sample with segmentation masks."""
    img = labels_dict['img'].copy()
    
    # Draw segmentation masks
    if 'segments' in labels_dict and labels_dict['segments']:
        h, w = img.shape[:2]
        overlay = img.copy()
        
        for segment in labels_dict['segments']:
            if len(segment) > 0:
                pts = segment.copy()
                pts[:, 0] *= w
                pts[:, 1] *= h
                pts = pts.astype(np.int32)
                
                cv2.fillPoly(overlay, [pts], (0, 255, 0))
                cv2.polylines(overlay, [pts], True, (255, 255, 0), 2)
        
        img = cv2.addWeighted(overlay, 0.4, img, 0.6, 0)
    
    return img

print("‚úì Helper loaded")

## üñºÔ∏è View Original Sample

In [None]:
idx = random.randint(0, len(dataset) - 1)
sample = dataset[idx]

print(f"Sample {idx}:")
print(f"  Image shape: {sample['img'].shape}")
print(f"  Objects: {len(sample.get('segments', []))}")

img_vis = visualize_sample(sample)

plt.figure(figsize=(12, 10))
plt.imshow(cv2.cvtColor(img_vis, cv2.COLOR_BGR2RGB))
plt.title(f'Original Sample (Before Augmentation)\n{len(sample.get("segments", []))} objects', 
          fontsize=14, fontweight='bold')
plt.axis('off')
plt.tight_layout()
plt.show()

## 1Ô∏è‚É£ Mosaic Augmentation

In [None]:
mosaic = Mosaic(dataset, imgsz=IMGSZ, p=1.0)

fig, axes = plt.subplots(2, 3, figsize=(18, 12))
axes = axes.flatten()

for i in range(6):
    idx = random.randint(0, len(dataset) - 1)
    sample = deepcopy(dataset[idx])
    augmented = mosaic(sample)
    
    img_vis = visualize_sample(augmented)
    axes[i].imshow(cv2.cvtColor(img_vis, cv2.COLOR_BGR2RGB))
    axes[i].set_title(f'Mosaic {i+1}', fontsize=12, fontweight='bold')
    axes[i].axis('off')

plt.suptitle('üî∏ MOSAIC (combines 4 images)', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()

print("‚úì Mosaic augmentation")

## 2Ô∏è‚É£ Copy-Paste Augmentation

In [None]:
copy_paste = CopyPaste(p=1.0)

fig, axes = plt.subplots(2, 3, figsize=(18, 12))
axes = axes.flatten()

for i in range(6):
    idx = random.randint(0, len(dataset) - 1)
    sample = deepcopy(dataset[idx])
    sample['img'] = cv2.resize(sample['img'], (IMGSZ, IMGSZ))
    
    try:
        augmented = copy_paste(sample)
        img_vis = visualize_sample(augmented)
    except:
        img_vis = visualize_sample(sample)
    
    axes[i].imshow(cv2.cvtColor(img_vis, cv2.COLOR_BGR2RGB))
    axes[i].set_title(f'Copy-Paste {i+1}', fontsize=12, fontweight='bold')
    axes[i].axis('off')

plt.suptitle('üî∏ COPY-PASTE (objects copied between images)', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()

print("‚úì Copy-Paste augmentation")

## 3Ô∏è‚É£ MixUp Augmentation

In [None]:
mixup = MixUp(dataset, imgsz=IMGSZ, p=1.0)

fig, axes = plt.subplots(2, 3, figsize=(18, 12))
axes = axes.flatten()

for i in range(6):
    idx = random.randint(0, len(dataset) - 1)
    sample = deepcopy(dataset[idx])
    
    try:
        augmented = mixup(sample)
        img_vis = visualize_sample(augmented)
    except:
        img_vis = visualize_sample(sample)
    
    axes[i].imshow(cv2.cvtColor(img_vis, cv2.COLOR_BGR2RGB))
    axes[i].set_title(f'MixUp {i+1}', fontsize=12, fontweight='bold')
    axes[i].axis('off')

plt.suptitle('üî∏ MIXUP (blends 2 images)', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()

print("‚úì MixUp augmentation")

## 4Ô∏è‚É£ HSV Color Augmentation

In [None]:
hsv = RandomHSV(hgain=hyp.hsv_h, sgain=hyp.hsv_s, vgain=hyp.hsv_v)

idx = random.randint(0, len(dataset) - 1)
sample = dataset[idx]

fig, axes = plt.subplots(2, 3, figsize=(18, 12))
axes = axes.flatten()

for i in range(6):
    sample_copy = deepcopy(sample)
    augmented = hsv(sample_copy)
    
    img_vis = visualize_sample(augmented)
    axes[i].imshow(cv2.cvtColor(img_vis, cv2.COLOR_BGR2RGB))
    axes[i].set_title(f'HSV {i+1}', fontsize=12, fontweight='bold')
    axes[i].axis('off')

plt.suptitle(f'üî∏ HSV COLOR (H={hyp.hsv_h}, S={hyp.hsv_s}, V={hyp.hsv_v})', 
             fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()

print("‚úì HSV augmentation")

## üéØ Full Augmentation Pipeline

Apply all augmentations together as in real training.

In [None]:
# Build full pipeline
transforms = Compose([
    Mosaic(dataset, imgsz=IMGSZ, p=hyp.mosaic),
    MixUp(dataset, imgsz=IMGSZ, p=hyp.mixup),
    RandomPerspective(
        degrees=hyp.degrees,
        translate=hyp.translate,
        scale=hyp.scale,
        shear=hyp.shear,
        perspective=hyp.perspective,
        border=(0, 0)
    ),
    CopyPaste(p=hyp.copy_paste),
    RandomHSV(hgain=hyp.hsv_h, sgain=hyp.hsv_s, vgain=hyp.hsv_v),
    RandomFlip(direction='horizontal', p=hyp.fliplr),
])

print("‚úì Full pipeline created")
print(f"\nPipeline:")
for i, t in enumerate(transforms.transforms):
    print(f"  {i+1}. {t.__class__.__name__}")

In [None]:
# Generate training samples
fig, axes = plt.subplots(3, 4, figsize=(20, 15))
axes = axes.flatten()

for i in range(12):
    idx = random.randint(0, len(dataset) - 1)
    sample = deepcopy(dataset[idx])
    
    try:
        augmented = transforms(sample)
        img_vis = visualize_sample(augmented)
    except Exception as e:
        print(f"Error {i}: {e}")
        img_vis = visualize_sample(sample)
    
    axes[i].imshow(cv2.cvtColor(img_vis, cv2.COLOR_BGR2RGB))
    axes[i].set_title(f'Sample {i+1}', fontsize=11, fontweight='bold')
    axes[i].axis('off')

title = f'üéØ Full Training Pipeline (Ultralytics Native)\n'
title += f'Mosaic={hyp.mosaic}, MixUp={hyp.mixup}, Copy-Paste={hyp.copy_paste}'
plt.suptitle(title, fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()

print("\n‚úì This is EXACTLY what happens during YOLO training!")

## üìù Summary

### ‚úÖ This notebook uses:
- **Real Ultralytics classes** from `ultralytics.data.augment`
- **YOLODataset** for loading
- **Same augmentation pipeline** as training

### Current settings (from `scripts/segmentation_yolo.sh`):
```bash
MOSAIC=1.0          # Always applied
MIXUP=0.15          # 15% probability
COPY_PASTE=0.3      # 30% probability
```

### To modify settings:
Edit `scripts/segmentation_yolo.sh` lines 119-121:
```bash
MOSAIC=1.0
MIXUP=0.15
COPY_PASTE=0.3
```