# YOLOv9-s ReLU6 Training for EdgeTPU

This notebook fine-tunes YOLOv9-s with ReLU6 activations for better INT8 quantization.

## Step 1: Check GPU and Setup

In [None]:
import torch
import os
from pathlib import Path

print("="*70)
print("SETUP: Checking GPU")
print("="*70)

if torch.cuda.is_available():
    print(f"‚úì GPU detected: {torch.cuda.get_device_name(0)}")
    print(f"  Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
else:
    print("‚ö†Ô∏è  NO GPU DETECTED!")
    print("   Go to: Runtime > Change runtime type > Hardware accelerator > GPU")
    print("   Then restart this notebook")

SETUP: Checking GPU
‚úì GPU detected: Tesla T4
  Memory: 15.83 GB


## Step 2: Clone YOLOv9 Repository

In [None]:
print("="*70)
print("Cloning YOLOv9 Repository")
print("="*70)

if not Path('yolov9').exists():
    !git clone https://github.com/WongKinYiu/yolov9.git
    print("‚úì Repository cloned")
else:
    print("‚úì Repository already exists")

# Navigate to repo
os.chdir('/content/yolov9')
print(f"‚úì Working directory: {os.getcwd()}")

# Install requirements
print("\nInstalling requirements...")
!pip install -q -r requirements.txt
print("‚úì Requirements installed")

Cloning YOLOv9 Repository
Cloning into 'yolov9'...
remote: Enumerating objects: 781, done.[K
remote: Total 781 (delta 0), reused 0 (delta 0), pack-reused 781 (from 1)[K
Receiving objects: 100% (781/781), 3.27 MiB | 4.74 MiB/s, done.
Resolving deltas: 100% (330/330), done.
‚úì Repository cloned
‚úì Working directory: /content/yolov9

Installing requirements...
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m1.6/1.6 MB[0m [31m44.7 MB/s[0m eta [36m0:00:00[0m
[?25h‚úì Requirements installed


## Step 5: Create ReLU6 Configuration

In [None]:
# needed for model definition yaml
# keep only the classes that are relevant for object detection in surveillance
CLASSES_TO_KEEP = [
    0,   # person
    1,   # bicycle
    2,   # car
    3,   # motorcycle
    4,   # airplane
    5,   # bus
    6,   # train
    7,   # truck
    8,   # boat
    14,  # bird
    15,  # cat
    16,  # dog
    17,  # horse
    18,  # sheep
    19,  # cow
    20,  # elephant
    21,  # bear
]


In [None]:
print("="*70)
print("Creating ReLU6 Model Configuration with DualDDetectEdgeTPU")
print("="*70)

from pathlib import Path

# Read the original config
config_path = Path('models/detect/yolov9-s.yaml')
with open(config_path, 'r') as f:
    lines = f.readlines()

# Create ReLU6 + DualDDetectTwoOutputs version
relu6_config_path = Path('models/detect/yolov9-s-relu6.yaml')
new_lines = []

for line in lines:
    if line.strip() == '#activation: nn.ReLU()':
        # Uncomment and change to ReLU6
        new_lines.append('activation: nn.ReLU6()\n')
        print("‚úì Set activation: nn.ReLU6()")
    elif line.strip().startswith('#activation:'):
        # Keep other activation lines commented
        new_lines.append(line)
    elif 'DualDDetect' in line and 'DualDDetectEdgeTPU' not in line:
        # Replace DualDDetect with DualDDetectEdgeTPU
        new_line = line.replace('DualDDetect', 'DualDDetectEdgeTPU')
        new_lines.append(new_line)
        print(f"‚úì Changed detection head: DualDDetect ‚Üí DualDDetectEdgeTPU")
    elif line.strip() == 'nc: 80  # number of classes':
        new_lines.append(f'nc: {len(CLASSES_TO_KEEP)}  # number of classes\n')
        print(f"‚úì Changed number of classes to {len(CLASSES_TO_KEEP)}")
    else:
        new_lines.append(line)

# Save config
with open(relu6_config_path, 'w') as f:
    f.writelines(new_lines)
print(f"‚úì Created: {relu6_config_path}")

# Verify the changes
print("\nVerifying configuration:")
with open(relu6_config_path, 'r') as f:
    content = f.read()
    for line in content.split('\n'):
        if 'activation:' in line and not line.strip().startswith('#'):
            print(f"  Activation: {line.strip()}")
        if 'DualDDetect' in line:
            print(f"  Detection head: {line.strip()}")

print("\n‚úÖ Configuration ready for EdgeTPU quantization:")
print("   - ReLU6 activation (bounded [0,6] for all layers)")
print("   - DualDDetectTwoOutputs (separate box/class tensors)")
print("   - Both prevent quantization value crushing")

Creating ReLU6 Model Configuration with DualDDetectEdgeTPU
‚úì Changed number of classes to 17
‚úì Set activation: nn.ReLU6()
‚úì Changed detection head: DualDDetect ‚Üí DualDDetectEdgeTPU
‚úì Created: models/detect/yolov9-s-relu6.yaml

Verifying configuration:
  Activation: activation: nn.ReLU6()
  Detection head: [[28, 25, 22, 15, 18, 21], 1, DualDDetectEdgeTPU, [nc]],  # Detect(P3, P4, P5)

‚úÖ Configuration ready for EdgeTPU quantization:
   - ReLU6 activation (bounded [0,6] for all layers)
   - DualDDetectTwoOutputs (separate box/class tensors)
   - Both prevent quantization value crushing


In [None]:
# reset / clean up for use in re-runs

if False:
    os.chdir('/content/yolov9')
    !rm -rf runs/train/yolov9-s-relu6

if False:
    !git checkout models/yolo.py

Updated 1 path from the index


In [None]:
print("="*70)
print("DualDDetectEdgeTPU - Two Tensors Output")
print("="*70)
# This version keeps the tensors separate to allow different ranges of values
# that the EdgeTPU can quantize individually.

import os
os.chdir('/content/yolov9')

dualddetect_edgetpu_code = '''
class DualDDetectEdgeTPU(nn.Module):
    """
    EdgeTPU-optimized DualDDetect that outputs two tensores with:
    - DFL distribution for calculating box coordinates
    - Logit value class scores clamped to [-4, 4], or about 2% to 98% probability

    Both ranges are compatible for INT8 quantization.
    """

    dynamic = False
    export = False
    shape = None
    anchors = torch.empty(0)
    strides = torch.empty(0)

    def __init__(self, nc=80, ch=(), inplace=True):
        super().__init__()
        self.nc = nc
        self.nl = len(ch) // 2
        self.reg_max = 16
        self.no = nc + self.reg_max * 4
        self.inplace = inplace
        self.stride = torch.zeros(self.nl)

        c2 = make_divisible(max((ch[0] // 4, self.reg_max * 4, 16)), 4)
        c3 = max((ch[0], min((self.nc * 2, 128))))
        c4 = make_divisible(max((ch[self.nl] // 4, self.reg_max * 4, 16)), 4)
        c5 = max((ch[self.nl], min((self.nc * 2, 128))))

        # Branch 1 (auxiliary)
        self.cv2 = nn.ModuleList(
            nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3, g=4),
                         nn.Conv2d(c2, 4 * self.reg_max, 1, groups=4))
            for x in ch[:self.nl]
        )
        self.cv3 = nn.ModuleList(
            nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1))
            for x in ch[:self.nl]
        )

        # Branch 2 (main)
        self.cv4 = nn.ModuleList(
            nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3, g=4),
                         nn.Conv2d(c4, 4 * self.reg_max, 1, groups=4))
            for x in ch[self.nl:]
        )
        self.cv5 = nn.ModuleList(
            nn.Sequential(Conv(x, c5, 3), Conv(c5, c5, 3), nn.Conv2d(c5, self.nc, 1))
            for x in ch[self.nl:]
        )

        self.dfl = DFL(self.reg_max)
        self.dfl2 = DFL(self.reg_max)

    def forward(self, x):
        shape = x[0].shape  # BCHW
        d1 = []
        d2 = []

        for i in range(self.nl):
            d1.append(torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1))
            d2.append(torch.cat((self.cv4[i](x[self.nl+i]), self.cv5[i](x[self.nl+i])), 1))

        if self.training:
            return [d1, d2]

        if self.export:
            # Inference path: keep boxes and classes separate on edgetpu
            # Output in non-standard B(H*W)C format instead of BC(H*W)
            # to avoid limits of EdgeTPU concatenation sizes
            # AND to facilitate post-processing on CPU

            boxes_raw = torch.cat([
                self.cv4[i](x[self.nl+i]).permute(0, 2, 3, 1).flatten(1, 2)
                for i in range(self.nl)
            ], dim=1)

            classes_raw = torch.cat([
                torch.clamp(self.cv5[i](x[self.nl+i]), -4.0, 4.0).permute(0, 2, 3, 1).flatten(1, 2)
                for i in range(self.nl)
            ], dim=1)

            return boxes_raw, classes_raw
        else:
            # Validation path
            # Set up anchors if needed
            if self.dynamic or self.shape != shape:
                self.anchors, self.strides = (d1.transpose(0, 1) for d1 in make_anchors(d1, self.stride, 0.5))
                self.shape = shape

            # Decode boxes
            box, cls = torch.cat([di.view(shape[0], self.no, -1) for di in d1], 2).split((self.reg_max * 4, self.nc), 1)
            dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides

            box2, cls2 = torch.cat([di.view(shape[0], self.no, -1) for di in d2], 2).split((self.reg_max * 4, self.nc), 1)
            dbox2 = dist2bbox(self.dfl2(box2), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides

            # Apply sigmoid to classes
            cls_sigmoid = cls.sigmoid()
            cls2_sigmoid = cls2.sigmoid()
            # Validation needs PIXEL coordinates
            y_aux = torch.cat((dbox, cls_sigmoid), 1)
            y_main = torch.cat((dbox2, cls2_sigmoid), 1)  # NO normalization!
            return ([y_aux, y_main], [d1, d2])

    def bias_init(self):
        """Initialize biases"""
        m = self
        for a, b, s in zip(m.cv2, m.cv3, m.stride):
            a[-1].bias.data[:] = 1.0
            b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2)
        for a, b, s in zip(m.cv4, m.cv5, m.stride):
            a[-1].bias.data[:] = 1.0
            b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2)

'''

print("="*70)
print("Adding DualDDetectEdgeTPU to models/yolo.py")
print("="*70)

# Read the file
with open('models/yolo.py', 'r') as f:
    lines = f.readlines()

# Check if code already present
code_already_present = False
for i, line in enumerate(lines):
    if 'class DualDDetectEdgeTPU(nn.Module):' in line:
        code_already_present = True
        print(f"‚úÖ Code already present at line {i+1}")
        break

# Find where to insert (after DualDDetect class)
insert_idx = -1
for i, line in enumerate(lines):
    if 'class TripleDetect(nn.Module):' in line:
        insert_idx = i
        print(f"‚úÖ Found insertion point at line {i+1} (before TripleDetect)")
        break

if insert_idx == -1:
    print("‚ùå Could not find TripleDetect class")
    print("Looking for alternative insertion point...")
    for i, line in enumerate(lines):
        if 'class DualDDetect(nn.Module):' in line:
            # Find the end of DualDDetect
            for j in range(i+1, len(lines)):
                if lines[j].strip().startswith('class ') and j > i + 10:
                    insert_idx = j
                    print(f"‚úÖ Found insertion point at line {j+1}")
                    break
            break

if insert_idx == -1:
    print("‚ùå Error: Could not find insertion point!")
elif code_already_present:
    print("‚úÖ Code already present")
else:
    # Insert the new class
    lines.insert(insert_idx, '\n\n' + dualddetect_edgetpu_code + '\n\n')

    # Write back
    with open('models/yolo.py', 'w') as f:
        f.writelines(lines)

    print("‚úÖ Successfully added DualDDetectEdgeTPU class")

    # Now update parse_model to recognize the new class
    print("\n" + "="*70)
    print("Updating parse_model to recognize DualDDetectEdgeTPU")
    print("="*70)

    # Read again
    with open('models/yolo.py', 'r') as f:
        content = f.read()

    # Update all the isinstance checks
    updates = [
        ('elif m in {Detect, DualDetect, TripleDetect, DDetect, DualDDetect, TripleDDetect, Segment, DSegment, DualDSegment, Panoptic}:',
         'elif m in {Detect, DualDetect, TripleDetect, DDetect, DualDDetect, DualDDetectEdgeTPU, TripleDDetect, Segment, DSegment, DualDSegment, Panoptic}:'),

        ('if isinstance(m, (Detect, DualDetect, TripleDetect, DDetect, DualDDetect, TripleDDetect, Segment, DSegment, DualDSegment, Panoptic)):',
         'if isinstance(m, (Detect, DualDetect, TripleDetect, DDetect, DualDDetect, DualDDetectEdgeTPU, TripleDDetect, Segment, DSegment, DualDSegment, Panoptic)):'),

        ('if isinstance(m, (DualDetect, TripleDetect, DualDDetect, TripleDDetect, DualDSegment)):',
         'if isinstance(m, (DualDetect, TripleDetect, DualDDetect, DualDDetectEdgeTPU, TripleDDetect, DualDSegment)):'),
    ]

    for old, new in updates:
        if old in content:
            content = content.replace(old, new)
            print(f"‚úÖ Updated: {old[:50]}...")

    # Write back
    with open('models/yolo.py', 'w') as f:
        f.write(content)

    print("\n‚úÖ All updates completed successfully!")


DualDDetectEdgeTPU - Two Tensors Output
Adding DualDDetectEdgeTPU to models/yolo.py
‚úÖ Found insertion point at line 259 (before TripleDetect)
‚úÖ Successfully added DualDDetectEdgeTPU class

Updating parse_model to recognize DualDDetectEdgeTPU
‚úÖ Updated: elif m in {Detect, DualDetect, TripleDetect, DDete...
‚úÖ Updated: if isinstance(m, (Detect, DualDetect, TripleDetect...
‚úÖ Updated: if isinstance(m, (DualDetect, TripleDetect, DualDD...

‚úÖ All updates completed successfully!


## Step 4: Download Pretrained Weights

In [None]:
print("="*70)
print("Downloading YOLOv9-s Pretrained Weights (based on SiLU activation and 80 classes)")
print("="*70)

weights_file = Path('yolov9-s-converted.pt')

if not weights_file.exists():
    print("Downloading pretrained weights...")
    !wget -q --show-progress https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-s-converted.pt
    print("‚úì Weights downloaded")
else:
    print("‚úì Weights already exist")

print(f"  File size: {weights_file.stat().st_size / 1e6:.2f} MB")

Downloading YOLOv9-t Pretrained Weights (based on SiLU activation and 80 classes)
Downloading pretrained weights...
‚úì Weights downloaded
  File size: 15.04 MB


## Step 3: Download COCO Dataset

**This takes 10-15 minutes and downloads ~20GB**

Only needs to be done once - Colab will cache it.

In [None]:
# ============================================================================
# Download Pre-Converted COCO Dataset with YOLO Format Labels
# ============================================================================
print("="*70)
print("Downloading COCO Dataset (YOLO Format)")
print("="*70)

# Remove the old incorrect structure
import shutil

if False:
    if Path('coco').exists():
        shutil.rmtree('coco')
        print("‚úì Removed old COCO directory")

if not Path('coco').exists():

    # Download COCO in YOLO format (images + labels already structured correctly)
    print("\nDownloading COCO dataset in YOLO format...")
    print("This includes both images and labels in the correct structure")

    TEST_SET = False # can reduce data size with this

    !mkdir -p coco/images
    if not Path('coco2017labels.zip').exists():
        !wget -q --show-progress https://github.com/ultralytics/yolov5/releases/download/v1.0/coco2017labels.zip
    !unzip -q coco2017labels.zip # -d coco
    if TEST_SET:
        if not Path('coco128.zip').exists():
            !wget -q --show-progress https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip
        !rm -rf coco/labels/train2017
        !rm -rf coco/images/train2017
        !unzip -q coco128.zip
        !mv coco128/images/* coco/images/
        !mv coco128/labels/* coco/labels/
        !rm -rf coco128
        !rm coco/train2017.txt
        !ls coco/images/train2017/*.jpg | sed 's|^coco|.|' > coco/train2017.txt
    else:
        if not Path('train2017.zip').exists():
            !wget -q --show-progress http://images.cocodataset.org/zips/train2017.zip
        !unzip -q train2017.zip -d coco/images
    if not Path('val2017.zip').exists():
        !wget -q --show-progress http://images.cocodataset.org/zips/val2017.zip
    !unzip -q -n val2017.zip -d coco/images

    # Cleanup
    #!rm coco2017labels.zip train2017.zip val2017.zip

    print("\n‚úì COCO dataset downloaded with correct structure!")
    print("\nStructure:")
    print("  coco/")
    print("    images/")
    print("      train2017/")
    print("      val2017/")
    print("    labels/")
    print("      train2017/")
    print("      val2017/")

# Verify
train_imgs = len(list(Path('coco/images/train2017').glob('*.jpg')))
train_lbls = len(list(Path('coco/labels/train2017').glob('*.txt')))
print(f"\n‚úì Train: {train_imgs:,} images, {train_lbls:,} labels")

Downloading COCO Dataset (YOLO Format)

Downloading COCO dataset in YOLO format...
This includes both images and labels in the correct structure

‚úì COCO dataset downloaded with correct structure!

Structure:
  coco/
    images/
      train2017/
      val2017/
    labels/
      train2017/
      val2017/

‚úì Train: 118,287 images, 117,266 labels


In [None]:
# cleanup commands in case of re-try

#!rm -rf coco
#!unzip -q coco2017labels.zip # -d coco
#!unzip -q val2017.zip -d coco/images
#!unzip -q -n val2017.zip -d coco/images

In [None]:
# ============================================================================
# Filter COCO Dataset to 17 Specific Classes
# ============================================================================
from pathlib import Path
from tqdm import tqdm
import shutil

print("="*70)
print("Filtering COCO Dataset to Your Classes")
print("="*70)
print(f"\nKeeping {len(CLASSES_TO_KEEP)} classes:")

# Map old COCO IDs to new sequential IDs
old_to_new = {old_id: new_id for new_id, old_id in enumerate(CLASSES_TO_KEEP)}

def filter_labels(split='train2017'):
    """Filter labels to only include specified classes"""
    label_dir = Path(f'coco/labels/{split}')
    filtered_dir = Path(f'coco/labels_filtered/{split}')
    filtered_dir.mkdir(parents=True, exist_ok=True)

    kept_images = 0
    total_objects_before = 0
    total_objects_after = 0

    for label_file in tqdm(list(label_dir.glob('*.txt')), desc=f"  {split}"):
        with open(label_file, 'r') as f:
            lines = f.readlines()

        total_objects_before += len(lines)

        # Filter and remap class IDs
        filtered_lines = []
        for line in lines:
            parts = line.strip().split()
            if len(parts) >= 5:
                class_id = int(parts[0])
                if class_id in old_to_new:
                    # Remap to new sequential ID
                    new_class_id = old_to_new[class_id]
                    filtered_lines.append(f"{new_class_id} {' '.join(parts[1:])}\n")

        # Only keep label files for images that have at least one relevant object
        if filtered_lines:
            output_file = filtered_dir / label_file.name
            with open(output_file, 'w') as f:
                f.writelines(filtered_lines)
            kept_images += 1
            total_objects_after += len(filtered_lines)

    return kept_images, total_objects_before, total_objects_after

# Filter both train and val
print("\nFiltering labels...")
train_kept, train_before, train_after = filter_labels('train2017')
val_kept, val_before, val_after = filter_labels('val2017')

print(f"\n‚úì Filtering complete:")
print(f"\nTraining set:")
print(f"  Images: 118,287 ‚Üí {train_kept:,} ({train_kept/118287*100:.1f}%)")
print(f"  Objects: {train_before:,} ‚Üí {train_after:,} ({train_after/train_before*100:.1f}%)")
print(f"\nValidation set:")
print(f"  Images: 5,000 ‚Üí {val_kept:,} ({val_kept/5000*100:.1f}%)")
print(f"  Objects: {val_before:,} ‚Üí {val_after:,} ({val_after/val_before*100:.1f}%)")

# Backup and replace
if Path('coco/labels_full').exists():
    shutil.rmtree('coco/labels_full')
shutil.move('coco/labels', 'coco/labels_full')
shutil.move('coco/labels_filtered', 'coco/labels')
print(f"\n‚úì Labels replaced (original backed up to labels_full)")

# Delete old cache
for cache in Path('coco/labels').rglob('*.cache'):
    cache.unlink()
print(f"‚úì Cache cleared")

Filtering COCO Dataset to Your Classes

Keeping 17 classes:

Filtering labels...


  train2017: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 117266/117266 [00:51<00:00, 2295.91it/s]
  val2017: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 4952/4952 [00:01<00:00, 2624.20it/s]



‚úì Filtering complete:

Training set:
  Images: 118,287 ‚Üí 86,145 (72.8%)
  Objects: 849,947 ‚Üí 404,191 (47.6%)

Validation set:
  Images: 5,000 ‚Üí 3,645 (72.9%)
  Objects: 36,335 ‚Üí 16,998 (46.8%)

‚úì Labels replaced (original backed up to labels_full)
‚úì Cache cleared


In [None]:
# ============================================================================
# Subsample Filtered Dataset
# ============================================================================
import random
from pathlib import Path
import shutil

SUBSAMPLE_PERCENT = 10  # Use 20% of data

print("="*70)
print(f"Subsampling to {SUBSAMPLE_PERCENT}% of Filtered Dataset")
print("="*70)

SUBSAMPLE_SUFFIX = "_sub"

def subsample_dataset(split='train2017', percent=15):
    label_dir = Path(f'coco/labels/{split}')
    img_dir = Path(f'coco/images/{split}')

    # Get all labels
    all_labels = list(label_dir.glob('*.txt'))

    # Random subsample
    random.seed(42)  # Reproducible
    n_keep = int(len(all_labels) * percent / 100)
    selected = random.sample(all_labels, n_keep)

    # Create subsample directories
    sub_label_dir = Path(f'coco/labels/{split}{SUBSAMPLE_SUFFIX}')
    sub_img_dir = Path(f'coco/images/{split}{SUBSAMPLE_SUFFIX}')
    sub_label_dir.mkdir(parents=True, exist_ok=True)
    sub_img_dir.mkdir(parents=True, exist_ok=True)

    # Copy selected files
    for label_file in selected:
        # Copy label
        shutil.copy(label_file, sub_label_dir / label_file.name)

        # Copy corresponding image
        img_file = img_dir / f"{label_file.stem}.jpg"
        if img_file.exists():
            shutil.copy(img_file, sub_img_dir / img_file.name)

    return n_keep, len(all_labels)


if SUBSAMPLE_PERCENT < 99.999:
    # Subsample both splits
    train_kept, train_total = subsample_dataset('train2017', SUBSAMPLE_PERCENT)
    val_kept, val_total = subsample_dataset('val2017', SUBSAMPLE_PERCENT)

    print(f"\n‚úì Subsampling complete:")
    print(f"  Training: {train_total:,} ‚Üí {train_kept:,} images")
    print(f"  Validation: {val_total:,} ‚Üí {val_kept:,} images")
else:
    SUBSAMPLE_SUFFIX = ""
    print("‚úì No subsampling required")


Subsampling to 10% of Filtered Dataset

‚úì Subsampling complete:
  Training: 86,145 ‚Üí 8,614 images
  Validation: 3,645 ‚Üí 364 images


## Step 6: Setup COCO Data Configuration

In [None]:
print("="*70)
print("Configuring Dataset Paths")
print("="*70)

# COCO data config (if not already correct)
coco_yaml_content = f"""
# COCO 2017 dataset

path: {os.getcwd()}/coco
train: images/train2017{SUBSAMPLE_SUFFIX}
val: images/val2017{SUBSAMPLE_SUFFIX}

nc: {len(CLASSES_TO_KEEP)}

# Classes
names:
  0: person
  1: bicycle
  2: car
  3: motorcycle
  4: airplane
  5: bus
  6: train
  7: truck
  8: boat
  9: bird
  10: cat
  11: dog
  12: horse
  13: sheep
  14: cow
  15: elephant
  16: bear
"""

with open('data/coco.yaml', 'w') as f:
    f.write(coco_yaml_content)

print("‚úì COCO configuration created")

Configuring Dataset Paths
‚úì COCO configuration created


## Step 7: Configure Training Parameters

In [None]:
#!rm -rf /content/yolov9/runs/train/yolov9-s-edgetpu

In [None]:
print("="*70)
print("Training Configuration")
print("="*70)

# Training parameters
BATCH_SIZE = 24       # Adjust if out of memory (try 8 or 12 or 16)
EPOCHS = 20           # Fine-tuning epochs
IMAGE_SIZE = 640      # Match pre-training size
CHECKPOINT_FREQ = 3   # Save every N epochs

print(f"\nüìã Training Configuration:")
print(f"   Model: YOLOv9-s with ReLU6 activation")
print(f"   Starting weights: {weights_file}")
print(f"   Batch size: {BATCH_SIZE}")
print(f"   Image size: {IMAGE_SIZE}x{IMAGE_SIZE}")
print(f"   Epochs: {EPOCHS}")
print(f"   Checkpoint frequency: every {CHECKPOINT_FREQ} epochs")

print(f"\nüí° Colab Free Tier Tips:")
print(f"   ‚Ä¢ Training will take ~3-5 hours")
print(f"   ‚Ä¢ Session may disconnect - rerun next cell to resume")
print(f"   ‚Ä¢ Don't close the browser tab")
print(f"   ‚Ä¢ Checkpoints saved automatically")

Training Configuration

üìã Training Configuration:
   Model: YOLOv9-s with ReLU6 activation
   Starting weights: yolov9-s-converted.pt
   Batch size: 24
   Image size: 640x640
   Epochs: 10
   Checkpoint frequency: every 3 epochs

üí° Colab Free Tier Tips:
   ‚Ä¢ Training will take ~3-5 hours
   ‚Ä¢ Session may disconnect - rerun next cell to resume
   ‚Ä¢ Don't close the browser tab
   ‚Ä¢ Checkpoints saved automatically


## Step 8: Start Training

Leave browser tab open while this runs

In [None]:
# ============================================================================
# FIX: Patch train_dual.py to allow loading pretrained weights
# ============================================================================
print("Patching train_dual.py for PyTorch 2.8 compatibility...")

# Read the file
with open('train_dual.py', 'r') as f:
    content = f.read()

# Find and replace the torch.load line
old_line = "ckpt = torch.load(weights, map_location='cpu')  # load checkpoint to CPU to avoid CUDA memory leak"
new_line = "ckpt = torch.load(weights, map_location='cpu', weights_only=False)  # load checkpoint to CPU to avoid CUDA memory leak"

if old_line in content:
    content = content.replace(old_line, new_line)

    # Write back
    with open('train_dual.py', 'w') as f:
        f.write(content)

    print("‚úì train_dual.py patched successfully")
else:
    print("‚ö†Ô∏è  Line not found - manual edit needed")

Patching train_dual.py for PyTorch 2.8 compatibility...
‚úì train_dual.py patched successfully


In [None]:
print("="*70)
print("STARTING TRAINING")
print("="*70)
print("You can minimize browser but don't close the tab\n")
print("="*70 + "\n")


import shutil
from pathlib import Path
import threading
import time

# Run training
!python train_dual.py \
  --workers 4 \
  --device 0 \
  --batch-size {BATCH_SIZE} \
  --data data/coco.yaml \
  --img {IMAGE_SIZE} \
  --cfg models/detect/yolov9-s-relu6.yaml \
  --weights {weights_file} \
  --name yolov9-s-relu6 \
  --hyp data/hyps/hyp.scratch-high.yaml \
  --epochs {EPOCHS} \
  --save-period {CHECKPOINT_FREQ} \
  --cache \
  --exist-ok

print("\n" + "="*70)
print("‚úì TRAINING COMPLETE!")
print("="*70)

STARTING TRAINING
You can minimize browser but don't close the tab


2025-11-10 12:09:02.588121: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1762776542.847725    5421 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1762776542.920591    5421 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1762776543.458684    5421 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1762776543.458722    5421 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:176277

## Step 9: Download Trained Model

In [None]:
from google.colab import files
import shutil

print("="*70)
print("Download Trained Model")
print("="*70)

best_weights = Path('runs/train/yolov9-s-relu6/weights/best.pt')
last_weights = Path('runs/train/yolov9-s-relu6/weights/last.pt')
epoch_summary = Path('runs/train/yolov9-s-relu6/results.csv')

if best_weights.exists():
    output_path = '/content/yolov9-s-relu6-best.pt'
    shutil.copy(best_weights, output_path)
    print(f"‚úì Model ready: {output_path}")
    print(f"  File size: {Path(output_path).stat().st_size / 1e6:.2f} MB")
    print("\nDownloading to your computer...")
    files.download(output_path)
    print("\n‚úì Download complete!")
if last_weights.exists():
    output_path = '/content/yolov9-s-relu6-last.pt'
    shutil.copy(last_weights, output_path)
    print(f"‚úì Model ready: {output_path}")
    print(f"  File size: {Path(output_path).stat().st_size / 1e6:.2f} MB")
    print("\nDownloading to your computer...")
    files.download(output_path)
    print("\n‚úì Download complete!")
if epoch_summary.exists():
    output_path = '/content/epoch-results.csv'
    shutil.copy(epoch_summary, output_path)
    print(f"‚úì Epoch summary ready: {output_path}")
    print(f"  File size: {Path(output_path).stat().st_size / 1e6:.2f} MB")
    print("\nDownloading to your computer...")
    files.download(output_path)
    print("\n‚úì Download complete!")


Download Trained Model
‚úì Model ready: /content/yolov9-s-relu6-best.pt
  File size: 79.88 MB

Downloading to your computer...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


‚úì Download complete!
‚úì Model ready: /content/yolov9-s-relu6-last.pt
  File size: 79.88 MB

Downloading to your computer...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


‚úì Download complete!
‚úì Epoch summary ready: /content/epoch-results.csv
  File size: 0.00 MB

Downloading to your computer...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


‚úì Download complete!


## Step 10: (Optional) Validate Trained Model

In [None]:
print("="*70)
print("Validating Trained Model")
print("="*70)

best_weights = Path('runs/train/yolov9-s-relu6/weights/best.pt')

if best_weights.exists():
    print(f"‚úì Best weights found: {best_weights}\n")

    # Run validation
    print("Running validation on COCO val set...\n")
    !python val_dual.py \
      --data data/coco.yaml \
      --img {IMAGE_SIZE} \
      --batch 32 \
      --conf 0.001 \
      --iou 0.7 \
      --device 0 \
      --weights {best_weights} \
      --task val

    print("\n‚úì Validation complete!")
else:
    print("‚ö†Ô∏è  Best weights not found. Training may have failed.")
    print("    Check training output above for errors.")

## Step 11: (Optional) Plot Training Results

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

print("="*70)
print("Training Results")
print("="*70)

results_file = Path('runs/train/yolov9-s-relu6/results.txt')

if results_file.exists():
    # Read results
    results = pd.read_csv(results_file, sep=r'\s+', header=0)

    # Plot
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    fig.suptitle('YOLOv9-s ReLU Training Results', fontsize=16, fontweight='bold')

    # Box loss
    axes[0, 0].plot(results['epoch'], results['train/box_loss'], label='Train', linewidth=2)
    axes[0, 0].plot(results['epoch'], results['val/box_loss'], label='Val', linewidth=2)
    axes[0, 0].set_xlabel('Epoch')
    axes[0, 0].set_ylabel('Loss')
    axes[0, 0].set_title('Box Loss')
    axes[0, 0].legend()
    axes[0, 0].grid(True, alpha=0.3)

    # Object loss
    axes[0, 1].plot(results['epoch'], results['train/obj_loss'], label='Train', linewidth=2)
    axes[0, 1].plot(results['epoch'], results['val/obj_loss'], label='Val', linewidth=2)
    axes[0, 1].set_xlabel('Epoch')
    axes[0, 1].set_ylabel('Loss')
    axes[0, 1].set_title('Object Loss')
    axes[0, 1].legend()
    axes[0, 1].grid(True, alpha=0.3)

    # Class loss
    axes[1, 0].plot(results['epoch'], results['train/cls_loss'], label='Train', linewidth=2)
    axes[1, 0].plot(results['epoch'], results['val/cls_loss'], label='Val', linewidth=2)
    axes[1, 0].set_xlabel('Epoch')
    axes[1, 0].set_ylabel('Loss')
    axes[1, 0].set_title('Class Loss')
    axes[1, 0].legend()
    axes[1, 0].grid(True, alpha=0.3)

    # mAP metrics
    axes[1, 1].plot(results['epoch'], results['metrics/mAP50(B)'], label='mAP@0.5', linewidth=2)
    axes[1, 1].plot(results['epoch'], results['metrics/mAP50-95(B)'], label='mAP@0.5:0.95', linewidth=2)
    axes[1, 1].set_xlabel('Epoch')
    axes[1, 1].set_ylabel('mAP')
    axes[1, 1].set_title('Mean Average Precision')
    axes[1, 1].legend()
    axes[1, 1].grid(True, alpha=0.3)

    plt.tight_layout()
    plt.savefig('training_results.png', dpi=150, bbox_inches='tight')
    plt.show()

    # Show final metrics
    last_row = results.iloc[-1]
    print(f"\nüìä Final Metrics (Epoch {int(last_row['epoch'])}):")
    print(f"   mAP@0.5: {last_row['metrics/mAP50(B)']:.4f}")
    print(f"   mAP@0.5:0.95: {last_row['metrics/mAP50-95(B)']:.4f}")
else:
    print("‚ö†Ô∏è  Results file not found")