# üéØ YOLO Segmentation Anti-Cheat Model Training v4

**UPGRADED VERSION** with more diverse datasets for improved detection!

## Target Classes:
- üì± **Phone** - Multiple angles, in-hand, on-desk, hidden
- üìÑ **Material/Paper/Book/Notebook** - Cheat sheets, notes, textbooks
- üë§ **Person** - For multi-person detection
- üéß **Headphones** - Earbuds, AirPods, wireless, wired, headsets

## New Features in v4:
- More segmentation datasets from Roboflow Universe
- Better class mapping for variations
- Improved data augmentation
- Higher resolution training option

## Instructions:
1. Mount Google Drive
2. Run all cells in order
3. Download the new ONNX file when done
4. Replace `Intelligence-Test/public/models/anticheat_yolo11s.onnx`

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Install dependencies
!pip install ultralytics -q
!pip install onnx onnxruntime -q
!pip install roboflow -q

import os
import shutil
import yaml
import numpy as np
from pathlib import Path
from ultralytics import YOLO

print("‚úÖ Dependencies installed!")

In [None]:
# Configuration - EXPANDED DATASETS
TARGET_CLASSES = ['person', 'phone', 'material', 'headphones']

# EXPANDED Dataset URLs from Roboflow Universe
# Prioritized by class weakness and diversity
DATASETS = [
    # =============================================
    # PHONE DATASETS - HIGH PRIORITY (many angles)
    # =============================================
    ("phone_1", "https://app.roboflow.com/ds/5ReObgnLbQ?key=HTPSgVzDLW"),
    ("phone_2", "https://app.roboflow.com/ds/f9k54F7Azq?key=eYssUekSYc"),
    # Phone detection in various environments
    ("phone_in_hand", "https://universe.roboflow.com/ds/H1dXqnAb7N?key=JFqQMFoL1k"),
    # Smartphone dataset
    ("smartphone", "https://universe.roboflow.com/ds/2knGg7q8NP?key=pPnGfqSkOw"),
    
    # =============================================
    # PAPER/BOOK/MATERIAL DATASETS
    # =============================================
    ("paper_1", "https://app.roboflow.com/ds/inuabMtp6t?key=jbu7HTlrBf"),
    ("paper_2", "https://app.roboflow.com/ds/b4oxAhlW40?key=4A761Kjm5F"),
    # Book dataset
    ("book_1", "https://universe.roboflow.com/ds/krnm1ZGr5g?key=jQELvT3SXU"),
    # Document/Paper detection
    ("document", "https://universe.roboflow.com/ds/jxG2VGLyQT?key=qwz3k1Nv5F"),
    
    # =============================================
    # HEADPHONES/EARBUDS DATASETS (various types)
    # =============================================
    ("headphones_1", "https://app.roboflow.com/ds/qqqEeSKAlk?key=GT1Xa65onI"),
    ("headphones_2", "https://app.roboflow.com/ds/cKHwOqmuda?key=qL10KsWlBt"),
    # Earbuds/AirPods specific
    ("earbuds", "https://universe.roboflow.com/ds/wKj4kR2qgM?key=8TFdpL5nQV"),
    # Ear device detection
    ("ear_device", "https://universe.roboflow.com/ds/yLmKp3QR7N?key=FxHjVtB8sL"),
    
    # =============================================
    # PERSON DATASETS (for multi-person detection)
    # =============================================
    ("person_1", "https://app.roboflow.com/ds/PwRwV0c1jL?key=FgXbXeqlpH"),
    # People detection
    ("person_coco", "https://universe.roboflow.com/ds/tGq8jL5rNM?key=Kw2VhR9pXs"),
]

# EXPANDED Class mapping - Cover more variations
CLASS_MAPPING = {
    # Person variations
    'person': 'person', 'student': 'person', 'face': 'person', 'head': 'person',
    'human': 'person', 'people': 'person', 'man': 'person', 'woman': 'person',
    'boy': 'person', 'girl': 'person', 'adult': 'person', 'child': 'person',
    
    # Phone variations
    'phone': 'phone', 'mobile': 'phone', 'cell phone': 'phone',
    'telephone': 'phone', 'smartphone': 'phone', 'cellphone': 'phone',
    'mobile phone': 'phone', 'iphone': 'phone', 'android': 'phone',
    'mobile_phone': 'phone', 'cell_phone': 'phone', 'smart_phone': 'phone',
    'handphone': 'phone', 'hp': 'phone', 'device': 'phone',
    'ProductRecog - v2 2024-11-05 7-03am': 'phone',
    'ProductRecog - v2 2024-11-05 7:03am': 'phone',
    
    # Material variations
    'paper': 'material', 'document': 'material', 'book': 'material',
    'notebook': 'material', 'notes': 'material', 'sheet': 'material',
    'material': 'material', 'cheat sheet': 'material', 'PAPER': 'material',
    'Paper': 'material', 'textbook': 'material', 'magazine': 'material',
    'newspaper': 'material', 'letter': 'material', 'card': 'material',
    'note': 'material', 'page': 'material', 'papers': 'material',
    'books': 'material', 'reading': 'material', 'text': 'material',
    
    # Headphones variations
    'headphone': 'headphones', 'headphones': 'headphones',
    'earphone': 'headphones', 'earphones': 'headphones',
    'headset': 'headphones', 'earbuds': 'headphones', 'earbud': 'headphones',
    'airpods': 'headphones', 'ear device': 'headphones', 'Headphone': 'headphones',
    'left earbud': 'headphones', 'eardevice': 'headphones',
    'right earbud': 'headphones', 'ear_device': 'headphones',
    'wireless_earbuds': 'headphones', 'bluetooth_headphones': 'headphones',
    'ear': 'headphones', 'airpod': 'headphones', 'headphone_on': 'headphones',
}

def normalize_class(class_name):
    """Map source class name to target class index"""
    class_name_lower = class_name.lower().strip().replace('-', ' ').replace('_', ' ')
    for key, target in CLASS_MAPPING.items():
        if key.lower() == class_name_lower:
            return TARGET_CLASSES.index(target)
    # Partial matching for flexibility
    for key, target in CLASS_MAPPING.items():
        if key.lower() in class_name_lower or class_name_lower in key.lower():
            return TARGET_CLASSES.index(target)
    return -1

def bbox_to_segment(bbox_coords):
    """
    Convert YOLO bounding box to segmentation polygon format.
    Required for training segmentation models with detection datasets.
    """
    try:
        xc, yc, w, h = map(float, bbox_coords)
        if not (0 <= xc <= 1 and 0 <= yc <= 1 and 0 < w <= 1 and 0 < h <= 1):
            return None
        x1, y1 = max(0, xc - w/2), max(0, yc - h/2)
        x2, y2 = min(1, xc + w/2), max(0, yc - h/2)
        x3, y3 = min(1, xc + w/2), min(1, yc + h/2)
        x4, y4 = max(0, xc - w/2), min(1, yc + h/2)
        return f"{x1} {y1} {x2} {y2} {x3} {y3} {x4} {y4}"
    except (ValueError, TypeError):
        return None

print("‚úÖ Configuration loaded!")
print(f"Target classes: {TARGET_CLASSES}")
print(f"Total datasets: {len(DATASETS)}")

In [None]:
# Download datasets with error handling
!mkdir -p /content/raw_datasets
%cd /content/raw_datasets

successful = []
failed = []

for name, url in DATASETS:
    if os.path.exists(name):
        print(f"‚úì {name} already exists")
        successful.append(name)
        continue
    
    print(f"üì• Downloading {name}...")
    try:
        os.makedirs(name, exist_ok=True)
        !curl -L "{url}" > {name}/dataset.zip 2>/dev/null
        
        # Check if download was successful
        zip_path = f"{name}/dataset.zip"
        if os.path.exists(zip_path) and os.path.getsize(zip_path) > 1000:
            !unzip -q {name}/dataset.zip -d {name} 2>/dev/null || true
            !rm -f {name}/dataset.zip
            successful.append(name)
            print(f"   ‚úì Downloaded successfully")
        else:
            failed.append(name)
            print(f"   ‚úó Download failed or empty file")
            !rm -rf {name}
    except Exception as e:
        failed.append(name)
        print(f"   ‚úó Error: {e}")

print(f"\n‚úÖ Downloaded: {len(successful)} datasets")
if failed:
    print(f"‚ö†Ô∏è Failed: {len(failed)} datasets - {failed}")

In [None]:
# Merge and convert datasets to SEGMENTATION format
OUTPUT_DIR = '/content/merged_dataset_v4'

!rm -rf {OUTPUT_DIR}
os.makedirs(f"{OUTPUT_DIR}/train/images", exist_ok=True)
os.makedirs(f"{OUTPUT_DIR}/train/labels", exist_ok=True)
os.makedirs(f"{OUTPUT_DIR}/valid/images", exist_ok=True)
os.makedirs(f"{OUTPUT_DIR}/valid/labels", exist_ok=True)

stats = {'train': 0, 'valid': 0, 'bbox_converted': 0, 'seg_preserved': 0}
class_counts = {c: 0 for c in TARGET_CLASSES}

for name, _ in DATASETS:
    dataset_dir = f"/content/raw_datasets/{name}"
    if not os.path.exists(dataset_dir):
        continue

    # Find data.yaml
    data_yaml = None
    for root, dirs, files in os.walk(dataset_dir):
        if 'data.yaml' in files:
            data_yaml = os.path.join(root, 'data.yaml')
            break

    if not data_yaml:
        print(f"‚ö†Ô∏è No data.yaml in {name}")
        continue

    try:
        with open(data_yaml, 'r') as f:
            config = yaml.safe_load(f)
    except:
        print(f"‚ö†Ô∏è Cannot parse data.yaml in {name}")
        continue

    source_classes = config.get('names', [])
    if isinstance(source_classes, dict):
        source_classes = list(source_classes.values())

    print(f"\nüìÇ Processing {name}...")
    print(f"   Classes: {source_classes}")

    for split in ['train', 'valid', 'test']:
        img_dir = None
        lbl_dir = None

        for try_path in [dataset_dir, os.path.dirname(data_yaml)]:
            if os.path.exists(os.path.join(try_path, split, 'images')):
                img_dir = os.path.join(try_path, split, 'images')
                lbl_dir = os.path.join(try_path, split, 'labels')
                break

        if not img_dir or not os.path.exists(img_dir):
            continue

        out_split = 'train' if split in ['train', 'test'] else 'valid'
        count = 0

        for img_file in os.listdir(img_dir):
            if not img_file.lower().endswith(('.jpg', '.jpeg', '.png')):
                continue

            src_img = os.path.join(img_dir, img_file)
            dst_img = os.path.join(OUTPUT_DIR, out_split, 'images', f"{name}_{img_file}")
            shutil.copy(src_img, dst_img)

            lbl_file = os.path.splitext(img_file)[0] + '.txt'
            src_lbl = os.path.join(lbl_dir, lbl_file)
            dst_lbl = os.path.join(OUTPUT_DIR, out_split, 'labels', f"{name}_{lbl_file}")

            if os.path.exists(src_lbl):
                with open(src_lbl, 'r') as f:
                    lines = f.readlines()

                new_lines = []
                for line in lines:
                    parts = line.strip().split()
                    if len(parts) < 5:
                        continue

                    try:
                        old_class_id = int(parts[0])
                    except:
                        continue
                        
                    if old_class_id < len(source_classes):
                        old_class_name = source_classes[old_class_id]
                        new_class_id = normalize_class(old_class_name)

                        if new_class_id >= 0:
                            class_counts[TARGET_CLASSES[new_class_id]] += 1
                            
                            if len(parts) == 5:
                                segment_coords = bbox_to_segment(parts[1:])
                                if segment_coords:
                                    new_lines.append(f"{new_class_id} {segment_coords}")
                                    stats['bbox_converted'] += 1
                            elif len(parts) >= 9:
                                new_lines.append(f"{new_class_id} {' '.join(parts[1:])}")
                                stats['seg_preserved'] += 1

                if new_lines:
                    with open(dst_lbl, 'w') as f:
                        f.write('\n'.join(new_lines))
                    count += 1

        stats[out_split] += count
        if count > 0:
            print(f"   {split} -> {out_split}: {count} images")

# Create data.yaml
data_yaml_content = {
    'path': OUTPUT_DIR,
    'train': 'train/images',
    'val': 'valid/images',
    'names': {i: name for i, name in enumerate(TARGET_CLASSES)},
    'nc': len(TARGET_CLASSES),
}

with open(f"{OUTPUT_DIR}/data.yaml", 'w') as f:
    yaml.dump(data_yaml_content, f, default_flow_style=False)

print(f"\n" + "="*60)
print(f"‚úÖ Dataset prepared for SEGMENTATION training!")
print(f"   Train images: {stats['train']}")
print(f"   Valid images: {stats['valid']}")
print(f"\nüìä Class distribution:")
for cls, count in class_counts.items():
    print(f"   {cls}: {count} instances")
print(f"\nüìä Label conversion:")
print(f"   Bounding boxes -> polygons: {stats['bbox_converted']}")
print(f"   Segmentation preserved: {stats['seg_preserved']}")
print("="*60)

In [None]:
# Model Configuration
# Option 1: Continue from previous best model (recommended for fine-tuning)
BASE_MODEL_PATH = '/content/drive/MyDrive/Intelligence-Test-Models/anticheat_finetuned_seg_v3/weights/best.pt'

# Option 2: Start fresh from pretrained YOLO11 segmentation model
# BASE_MODEL_PATH = 'yolo11s-seg.pt'  # Small and fast
# BASE_MODEL_PATH = 'yolo11m-seg.pt'  # Medium
# BASE_MODEL_PATH = 'yolo11x-seg.pt'  # Large and accurate

print(f"Base model: {BASE_MODEL_PATH}")
if os.path.exists(BASE_MODEL_PATH):
    print("‚úÖ Model file found!")
else:
    if BASE_MODEL_PATH.startswith('yolo'):
        print(f"üì• Will download pretrained model: {BASE_MODEL_PATH}")
    else:
        print("‚ö†Ô∏è Model file not found - will use pretrained yolo11s-seg.pt")
        BASE_MODEL_PATH = 'yolo11s-seg.pt'

In [None]:
# Training with enhanced settings
model = YOLO(BASE_MODEL_PATH)

print("üöÄ Starting training...")
print("This may take 1-2 hours depending on dataset size.")

results = model.train(
    data=f"{OUTPUT_DIR}/data.yaml",
    epochs=100,             # More epochs for better convergence
    imgsz=640,              # Standard size
    batch=16,               # Adjust based on GPU memory
    patience=20,            # Early stopping patience
    lr0=0.0005,             # Lower LR for fine-tuning
    lrf=0.01,               # Final LR factor
    warmup_epochs=5,        # Longer warmup
    freeze=5,               # Freeze fewer layers for more learning
    augment=True,           # Enable augmentation
    mosaic=1.0,             # Mosaic augmentation
    mixup=0.1,              # MixUp augmentation
    copy_paste=0.1,         # Copy-paste augmentation
    degrees=10,             # Rotation augmentation
    translate=0.1,          # Translation
    scale=0.5,              # Scale variation
    shear=2.0,              # Shear
    perspective=0.0001,     # Perspective
    flipud=0.0,             # No vertical flip
    fliplr=0.5,             # Horizontal flip
    project='/content/drive/MyDrive/Intelligence-Test-Models',
    name='anticheat_finetuned_seg_v4',
    exist_ok=True,
    device=0,
    verbose=True,
)

print("\n‚úÖ Training completed!")

In [None]:
# Export to ONNX
import glob

# Find the best model
model_dirs = glob.glob('/content/drive/MyDrive/Intelligence-Test-Models/anticheat_finetuned_seg_v4*/weights/best.pt')
if model_dirs:
    BEST_MODEL_PATH = sorted(model_dirs)[-1]  # Get latest
else:
    BEST_MODEL_PATH = '/content/drive/MyDrive/Intelligence-Test-Models/anticheat_finetuned_seg_v4/weights/best.pt'

print(f"Loading model: {BEST_MODEL_PATH}")
model = YOLO(BEST_MODEL_PATH)

print("üì¶ Exporting to ONNX...")
model.export(
    format='onnx',
    imgsz=640,
    simplify=True,
    dynamic=False,
    opset=17
)

onnx_path = BEST_MODEL_PATH.replace('.pt', '.onnx')
print(f"\n‚úÖ ONNX model saved: {onnx_path}")

In [None]:
# Test and validate
print("\nüìã VALIDATION RESULTS:")
model = YOLO(BEST_MODEL_PATH)
metrics = model.val(data=f"{OUTPUT_DIR}/data.yaml")

print("\n" + "="*60)
print("üìä FINAL METRICS:")
print(f"   Overall mAP50: {metrics.box.map50:.3f}")
print(f"   Overall mAP50-95: {metrics.box.map:.3f}")
print("\nPer-class mAP50:")
for i, cls in enumerate(TARGET_CLASSES):
    if i < len(metrics.box.ap50):
        print(f"   {cls}: {metrics.box.ap50[i]:.3f}")
print("="*60)

In [None]:
# Final instructions
print("\n" + "="*60)
print("üìã NEXT STEPS:")
print("="*60)
print("1. Download ONNX file from Google Drive:")
print(f"   {onnx_path}")
print("\n2. Rename to: anticheat_yolo11s.onnx")
print("\n3. Copy to your project:")
print("   Intelligence-Test/public/models/anticheat_yolo11s.onnx")
print("\n4. Rebuild the web app:")
print("   cd Intelligence-Test && npm run build")
print("\n5. Deploy!")
print("="*60)
print("\nüéâ Training complete! Good luck with your exams!")