# YOLOv5 FSOCO Training

**Essential steps only:**
1. Setup YOLOv5
2. Convert FSOCO labels (Supervisely → YOLO)
3. Train
4. Inference

**Run from:** `driverless-ml-dev/perceptions/camera-pipeline/notebooks/`

**Activate venv in root/driverless-ml-dev directory, and connect kernel for this notebook to it**
open vsc terminal via ctrl+shift+`

first time only:
```
cd root/driverless-ml-dev
python -m venv venv
source venv/bin/activate
pip install ipykernel
python -m ipykernel install --name driverless-ml --display-name "Python (driverless-ml)" --user
```
then type `>developer reload` in the search bar at the top

to activate venv
```
cd /root/driverless-ml-dev
source venv/bin/activate
```
then type `>developer reload` in the search bar at the top

install libraries with: `pip install pillow tqdm pyyaml`

In [None]:
%cd ~/driverless-ml-dev
!python -m venv venv
!source venv/bin/activate
!venv/bin/python -m pip install --upgrade pip ipykernel
!venv/bin/python -m ipykernel install --name driverless-ml --display-name "Python (driverless-ml)" --user

/root/driverless-ml-dev
Installed kernelspec driverless-ml in /root/.local/share/jupyter/kernels/driverless-ml
/root


**Activate venv in root/driverless-ml-dev directory, and connect kernel for this notebook to it**

## 1. Setup

In [33]:

import os
import sys
from pathlib import Path

# Paths
ROOT = Path.cwd().parent.parent.parent  # driverless-ml-dev/
print(ROOT)
YOLO_DIR = ROOT / 'yolov5'
DATA_DIR = ROOT / 'ml_data'
FSOCO_RAW = ROOT / 'ml_data/perceptions/fsoco_raw'  # Download FSOCO here
FSOCO_YOLO = ROOT / 'ml_data/perceptions/fsoco_yolo'  # Converted dataset
FSOCO_MOD = ROOT / 'ml_data/perceptions/fsoco_mod'  # Working copy for preprocessing

print(f"Root: {ROOT}")
print(f"YOLOv5: {YOLO_DIR}")
print(f"Data: {DATA_DIR}")
print(f"Fsoco_raw: {FSOCO_RAW}")
print(f"Fsoco_mod: {FSOCO_MOD}")
print(f"Fsoco_yolo: {FSOCO_YOLO}")
for item in FSOCO_RAW.iterdir():
    print(f"  - {item.name}")

/
Root: /
YOLOv5: /yolov5
Data: /ml_data
Fsoco_raw: /ml_data/perceptions/fsoco_raw
Fsoco_mod: /ml_data/perceptions/fsoco_mod
Fsoco_yolo: /ml_data/perceptions/fsoco_yolo


In [26]:
# Clone YOLOv5
if not YOLO_DIR.exists():
    !cd {ROOT} && git clone https://github.com/ultralytics/yolov5.git
    print("✓ YOLOv5 cloned")
else:
    print("✓ YOLOv5 exists")

# Install requirements
%pip install -r {YOLO_DIR}/requirements.txt
print("✓ Dependencies installed")

✓ YOLOv5 exists
[0mNote: you may need to restart the kernel to use updated packages.
✓ Dependencies installed


## 2. Download FSOCO Dataset

**Manual step required:**
1. Visit: https://www.fsoco-dataset.com/download
2. Download dataset (Supervisely format)
3. Extract to: `data/fsoco_raw/`

Expected structure:
```
data/fsoco_raw/
└── dataset_name/
    ├── ann/       # JSON annotations
    ├── img/       # Images
    └── meta.json
```

In [30]:
import os
import shutil
from pathlib import Path


def flatten_dataset_structure(dataset_path: str):
    """
    change copies and redoes the downloaded dataset structure from:
        dataset/
          - meta.json
          - team1/ann, team1/img
          - team2/ann, team2/img
          ...
    to:
        dataset/
          - ann/
          - img/
          - meta.json
    """
    root = Path(dataset_path)
    ann_out = root / "ann"
    img_out = root / "img"

    #check output dirs
    ann_out.mkdir(exist_ok=True)
    img_out.mkdir(exist_ok=True)

    for team_dir in root.iterdir():
        if team_dir.is_dir() and team_dir.name not in {"ann", "img"}:
            ann_dir = team_dir / "ann"
            img_dir = team_dir / "img"

            if ann_dir.exists():
                for file in ann_dir.iterdir():
                    dest = ann_out / f"{team_dir.name}_{file.name}"
                    shutil.move(str(file), dest)

            # copy img files
            if img_dir.exists():
                for file in img_dir.iterdir():
                    dest = img_out / f"{team_dir.name}_{file.name}"
                    shutil.move(str(file), dest)

            # Clean up empty team dir
            shutil.rmtree(team_dir)

    print(f"Flattening complete. Files moved into {ann_out} and {img_out}")


def prepare_working_copy(source_path: Path, dest_path: Path):
    """Copy the raw dataset into a working directory before mutating it."""
    if not dest_path.exists():
        raise FileNotFoundError(
            f"✗ Working dataset directory not found: {dest_path}\nCreate this directory before running the preprocessing pipeline."
        )

    if not source_path.exists() or not any(source_path.iterdir()):
        raise FileNotFoundError(
            f"✗ Source dataset not found or empty: {source_path}\nDownload and extract the FSOCO dataset first."
        )

    print(f"Preparing working dataset at: {dest_path}")

    # Clear existing contents in the working directory
    for item in dest_path.iterdir():
        if item.is_dir():
            shutil.rmtree(item)
        else:
            item.unlink()

    # Copy fresh contents from the source dataset
    for item in source_path.iterdir():
        target = dest_path / item.name
        if item.is_dir():
            shutil.copytree(item, target)
        else:
            shutil.copy2(item, target)

    print("✓ Working copy created")


raw_exists = FSOCO_RAW.exists()
print(any(FSOCO_RAW.iterdir()))
print(FSOCO_RAW)
raw_has_content = any(FSOCO_RAW.iterdir()) if raw_exists else False
print(raw_exists)
print(raw_has_content)

if raw_exists and raw_has_content:
    print("✓ FSOCO dataset found")
    for item in FSOCO_RAW.iterdir():
        print(f"  - {item.name}")
    print(FSOCO_MOD)
    FSOCO_MOD.mkdir(parents=True, exist_ok=True)
    prepare_working_copy(FSOCO_RAW, FSOCO_MOD)
    flatten_dataset_structure(FSOCO_MOD)
# else:
#     raise FileNotFoundError(f"✗ Download FSOCO dataset to: {FSOCO_RAW}")

False
/ml_data/perceptions/fsoco_raw
True
False


In [None]:
# Check structure
if not FSOCO_MOD.exists():
    raise FileNotFoundError(
        f"✗ Working dataset directory not found: {FSOCO_MOD}. Run the preparation step and ensure the directory exists."
    )

if not any(FSOCO_MOD.iterdir()):
    raise RuntimeError(
        f"✗ Working dataset directory is empty: {FSOCO_MOD}. Run the preparation step to copy data before proceeding."
    )

working_dataset = FSOCO_MOD
print(f"Inspecting dataset at: {working_dataset}")

bb_path = working_dataset / 'bounding_boxes'
if not bb_path.exists():
    bb_path = working_dataset / 'ann'

print("bounding_boxes/ann contents:")
if bb_path.exists():
    for item in list(bb_path.iterdir())[:5]:
        print(f"  {item.name}")
else:
    print("  ✗ No bounding box directory found")

img_path = working_dataset / 'images'
if not img_path.exists():
    img_path = working_dataset / 'img'

print("\nimages/img contents:")
if img_path.exists():
    for item in list(img_path.iterdir())[:5]:
        print(f"  {item.name}")
else:
    print("  ✗ No image directory found")

print("\nmeta.json:")
meta_path = working_dataset / 'meta.json'
if meta_path.exists():
    import json
    meta = json.load(open(meta_path))
    print(json.dumps(meta, indent=2)[:500])
else:
    print("  ✗ meta.json not found")

FileNotFoundError: ✗ Working dataset directory not found: /ml_data/perceptions/fsoco_mod. Run the preparation step and ensure the directory exists.

## 3. Filter Images by Aspect Ratio

Remove images that don't meet minimum aspect ratio requirements (width/height).

In [None]:
from PIL import Image
import os
from tqdm import tqdm
from collections import Counter

def filter_images_by_aspect_ratio(fsoco_raw, min_ratio=1.0, max_ratio=3.0):
    """
    Filter out images that don't meet aspect ratio requirements.
    
    Args:
        fsoco_raw: Path to FSOCO dataset
        min_ratio: Minimum aspect ratio (width/height)
        max_ratio: Maximum aspect ratio (width/height)
    
    Returns:
        List of filtered image paths and count of removed images
    """
    img_dir = fsoco_raw / 'images'
    if not img_dir.exists():
        img_dir = fsoco_raw / 'img'
    ann_dir = fsoco_raw / 'bounding_boxes'
    if not ann_dir.exists():
        ann_dir = fsoco_raw / 'ann'
    
    if not img_dir.exists():
        print(f"✗ Image directory not found: {img_dir}")
        return [], 0
    
    images = list(img_dir.glob('*.jpg')) + list(img_dir.glob('*.png'))
    print(f"Total images found: {len(images)}")
    
    filtered_images = []
    removed_images = []
    kept_dimensions = []
    removed_dimensions = []
    kept_aspect_ratios = []
    removed_aspect_ratios = []
    
    for img_path in tqdm(images, desc="Filtering images by aspect ratio"):
        try:
            img = Image.open(img_path)
            w, h = img.size
            aspect_ratio = w / h
            
            if min_ratio <= aspect_ratio <= max_ratio:
                filtered_images.append(img_path)
                kept_dimensions.append((w, h))
                kept_aspect_ratios.append(aspect_ratio)
            else:
                # Remove image and corresponding annotation
                removed_images.append((img_path, aspect_ratio, w, h))
                removed_dimensions.append((w, h))
                removed_aspect_ratios.append(aspect_ratio)
                
                img_path.unlink()  # Delete image
                
                # Delete corresponding annotation if exists
                ann_path = ann_dir / f"{img_path.name}.json"
                if not ann_path.exists():
                    ann_path = ann_dir / f"{img_path.stem}.json"
                if ann_path.exists():
                    ann_path.unlink()
                
                print(f"Removed: {img_path.name} (aspect ratio: {aspect_ratio:.2f}, dims: {w}x{h})")
                
        except Exception as e:
            print(f"Error processing {img_path.name}: {e}")
            continue
    
    # Print detailed statistics
    print(f"\n{'='*70}")
    print(f"FILTERING STATISTICS")
    print(f"{'='*70}")
    
    print(f"\n📊 OVERALL SUMMARY:")
    print(f"  Total images processed:  {len(images)}")
    if images:
        print(f"  ✓ Images kept:           {len(filtered_images)} ({100*len(filtered_images)/len(images):.1f}%)")
        print(f"  ✗ Images filtered out:   {len(removed_images)} ({100*len(removed_images)/len(images):.1f}%)")
    else:
        print("  ✓ Images kept:           0")
        print("  ✗ Images filtered out:   0")
    print(f"  Aspect ratio range:      {min_ratio} - {max_ratio}")
    
    if kept_aspect_ratios:
        print(f"\n📐 KEPT IMAGES - ASPECT RATIO STATS:")
        print(f"  Min aspect ratio:  {min(kept_aspect_ratios):.2f}")
        print(f"  Max aspect ratio:  {max(kept_aspect_ratios):.2f}")
        print(f"  Avg aspect ratio:  {sum(kept_aspect_ratios)/len(kept_aspect_ratios):.2f}")
    
    if kept_dimensions:
        print(f"\n📏 KEPT IMAGES - DIMENSION STATS:")
        dim_counter = Counter(kept_dimensions)
        top_dims = dim_counter.most_common(10)
        print(f"  Unique dimensions: {len(dim_counter)}")
        print(f"  Top 10 dimensions:")
        for (w, h), count in top_dims:
            ratio = w/h
            print(f"    {w}x{h} (ratio {ratio:.2f}): {count} images")
    
    if removed_aspect_ratios:
        print(f"\n🚫 REMOVED IMAGES - ASPECT RATIO STATS:")
        print(f"  Min aspect ratio:  {min(removed_aspect_ratios):.2f}")
        print(f"  Max aspect ratio:  {max(removed_aspect_ratios):.2f}")
        print(f"  Avg aspect ratio:  {sum(removed_aspect_ratios)/len(removed_aspect_ratios):.2f}")
    
    if removed_dimensions:
        print(f"\n🚫 REMOVED IMAGES - DIMENSION STATS:")
        dim_counter = Counter(removed_dimensions)
        print(f"  Unique dimensions: {len(dim_counter)}")
        print(f"  All removed dimensions:")
        for (w, h), count in sorted(dim_counter.items(), key=lambda x: x[1], reverse=True):
            ratio = w/h
            print(f"    {w}x{h} (ratio {ratio:.2f}): {count} images")
    
    print(f"\n{'='*70}\n")
    
    return filtered_images, len(removed_images)

# Run filtering
MIN_ASPECT_RATIO = 1.0  # Minimum width/height ratio
MAX_ASPECT_RATIO = 3.5  # Maximum width/height ratio

if not FSOCO_MOD.exists():
    raise FileNotFoundError(
        f"✗ Working dataset directory not found: {FSOCO_MOD}. Create it and rerun the preparation step before filtering."
    )

if not any(FSOCO_MOD.iterdir()):
    raise RuntimeError(
        f"✗ Working dataset directory is empty: {FSOCO_MOD}. Run the preparation step to copy data before filtering."
    )

print(f"Using working dataset: {FSOCO_MOD}")
filtered_imgs, removed = filter_images_by_aspect_ratio(
    FSOCO_MOD,
    min_ratio=MIN_ASPECT_RATIO,
    max_ratio=MAX_ASPECT_RATIO
)

FileNotFoundError: ✗ Working dataset directory not found: /ml_data/perceptions/fsoco_mod. Create it and rerun the preparation step before filtering.

## 4. Convert Labels: Supervisely → YOLO

YOLO format: `<class_id> <x_center> <y_center> <width> <height>` (all normalized 0-1)

In [None]:
import json
import shutil
from PIL import Image
from tqdm import tqdm
import random

# Add code for conversion

def convert_supervisely_to_yolo(fsoco_raw, fsoco_yolo, split=(0.8, 0.1, 0.1)):
    """Convert FSOCO Supervisely format to YOLO format."""
    
    # Create directories
    for s in ['train', 'val', 'test']:
        (fsoco_yolo / 'images' / s).mkdir(parents=True, exist_ok=True)
        (fsoco_yolo / 'labels' / s).mkdir(parents=True, exist_ok=True)
    
    # Load class names from meta.json (only rectangle classes for bounding boxes)
    meta = json.load(open(fsoco_raw / 'meta.json'))
    classes = [c['title'] for c in meta['classes'] if c['shape'] == 'rectangle']
    class_map = {name: i for i, name in enumerate(classes)}
    print(f"Classes (bounding boxes only): {classes}\n")
    
    # Get all annotation files
    ann_dir = fsoco_raw / 'bounding_boxes'
    img_dir = fsoco_raw / 'images'
    
    ann_files = list(ann_dir.glob('*.json'))
    print(f"Total annotations: {len(ann_files)}")
    
    # Split dataset
    random.shuffle(ann_files)
    n1 = int(len(ann_files) * split[0])
    n2 = int(len(ann_files) * (split[0] + split[1]))
    splits = {
        'train': ann_files[:n1],
        'val': ann_files[n1:n2],
        'test': ann_files[n2:]
    }
    
    # Convert each split
    for split_name, anns in splits.items():
        print(f"\nConverting {split_name}: {len(anns)} images")
        
        for ann_path in tqdm(anns):
            # Find corresponding image (handle both .jpg and .png)
            img_name = ann_path.stem  # e.g., "amz_00588.jpg" from "amz_00588.jpg.json"
            img_path = img_dir / img_name
            
            if not img_path.exists():
                # Try alternate extension
                alt_name = img_name.replace('.jpg', '.png') if '.jpg' in img_name else img_name.replace('.png', '.jpg')
                img_path = img_dir / alt_name
                if not img_path.exists():
                    continue
            
            # Read image dimensions
            img = Image.open(img_path)
            w, h = img.size
            
            # Read annotations
            ann = json.load(open(ann_path))
            
            # Convert to YOLO format
            yolo_labels = []
            for obj in ann.get('objects', []):
                cls = obj['classTitle']
                if cls not in class_map or obj['geometryType'] != 'rectangle':
                    continue
                
                points = obj['points']['exterior']
                x1, y1 = points[0]
                x2, y2 = points[1]
                
                # Convert to YOLO format (center_x, center_y, width, height) normalized
                x_center = ((x1 + x2) / 2) / w
                y_center = ((y1 + y2) / 2) / h
                width = abs(x2 - x1) / w
                height = abs(y2 - y1) / h
                
                # Clamp to [0, 1]
                x_center = max(0, min(1, x_center))
                y_center = max(0, min(1, y_center))
                width = max(0, min(1, width))
                height = max(0, min(1, height))
                
                yolo_labels.append(f"{class_map[cls]} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")
            
            # Save image and label
            if yolo_labels:  # Only save if there are labels
                shutil.copy(img_path, fsoco_yolo / 'images' / split_name / img_path.name)
                with open(fsoco_yolo / 'labels' / split_name / f"{img_path.stem}.txt", 'w') as f:
                    f.write('\n'.join(yolo_labels))
    
    print("\n✓ Conversion complete")
    print(f"\nDataset splits:")
    for split_name in ['train', 'val', 'test']:
        n_imgs = len(list((fsoco_yolo / 'images' / split_name).glob('*.[jp][pn]g')))
        print(f"  {split_name}: {n_imgs} images")
    
    return classes

# Run conversion
if FSOCO_RAW.exists() and any(FSOCO_RAW.iterdir()):
    classes = convert_supervisely_to_yolo(FSOCO_RAW, FSOCO_YOLO)
else:
    print("Download dataset first!")
    classes = None

## 5. Create Dataset Config (YAML)

In [None]:
import yaml

if classes:
    config = {
        'path': str(FSOCO_YOLO.absolute()),
        'train': 'images/train',
        'val': 'images/val',
        'test': 'images/test',
        'nc': len(classes),
        'names': classes
    }
    
    config_path = FSOCO_YOLO / 'fsoco.yaml'
    with open(config_path, 'w') as f:
        yaml.dump(config, f)
    
    print("✓ Config saved:", config_path)
    print(yaml.dump(config, default_flow_style=False))
else:
    print("Convert dataset first!")

## 6. Train YOLOv5

In [None]:
# Training parameters

import torch
MODEL = 'yolov5n'  # Options: yolov5n, yolov5s, yolov5m, yolov5l, yolov5x
EPOCHS = 10
BATCH = 2
IMG_SIZE = 640
DEVICE = "0" if torch.cuda.is_available() else "cpu"
rect = True 
print(f"Model: {MODEL}, Epochs: {EPOCHS}, Batch: {BATCH}, Image size: {IMG_SIZE}")

In [None]:
# Train

%cd $YOLO_DIR
!python train.py \
    --img {IMG_SIZE} \
    --batch {BATCH} \
    --epochs {EPOCHS} \
    --data {config_path} \
    --weights {MODEL}.pt \
    --name fsoco_{MODEL} \
    --rect {rect} \
    --cache \
    --device {DEVICE}

## 7. View Results

In [None]:
from IPython.display import Image as IPImage

# Find latest run
runs = sorted((YOLO_DIR / 'runs' / 'train').glob('fsoco_*'), key=lambda x: x.stat().st_mtime)
if runs:
    latest = runs[-1]
    print(f"Training run: {latest.name}\n")
    
    # Show results
    results_img = latest / 'results.png'
    if results_img.exists():
        display(IPImage(filename=str(results_img)))
    
    print(f"\nWeights: {latest / 'weights' / 'best.pt'}")
else:
    print("No training runs found")

## 8. Run Inference

In [None]:
# Get trained weights
if runs:
    weights = runs[-1] / 'weights' / 'best.pt'
    test_imgs = FSOCO_YOLO / 'images' / 'test'
    
    if weights.exists() and test_imgs.exists():
        print("Running inference...\n")
        
        !python {YOLO_DIR}/detect.py \
            --weights {weights} \
            --source {test_imgs} \
            --img {IMG_SIZE} \
            --conf 0.25 \
            --name fsoco_inference \
            --max-det 100
        
        # Show results
        detect_runs = sorted((YOLO_DIR / 'runs' / 'detect').glob('fsoco_inference*'), 
                           key=lambda x: x.stat().st_mtime)
        if detect_runs:
            print(f"\nResults saved to: {detect_runs[-1]}")
            
            # Display first result
            results = list(detect_runs[-1].glob('*.jpg'))[:1]
            for r in results:
                display(IPImage(filename=str(r), width=800))
    else:
        print("Missing weights or test images")
else:
    print("Train model first")

## Done!

**Your model is trained and ready to use.**

**Quick commands:**
```bash
# Inference on images
python yolov5/detect.py --weights runs/train/fsoco_yolov5s/weights/best.pt --source /path/to/images

# Inference on video
python yolov5/detect.py --weights runs/train/fsoco_yolov5s/weights/best.pt --source video.mp4

# Inference on webcam
python yolov5/detect.py --weights runs/train/fsoco_yolov5s/weights/best.pt --source 0
```