# üéØ YOLO Anti-Cheat Model - Production Training

**PRODUCTION-READY** notebook v·ªõi datasets ch·∫•t l∆∞·ª£ng cao t·ª´ COCO v√† Roboflow.

## üìã H∆∞·ªõng d·∫´n s·ª≠ d·ª•ng:
1. M·ªü notebook n√†y tr√™n Google Colab
2. Ch·ªçn Runtime > Change runtime type > T4 GPU
3. B·∫•m **Runtime > Run all** (Ctrl+F9)
4. ƒê·ª£i training ho√†n t·∫•t (~2-3 gi·ªù)
5. Download file ONNX t·ª´ Google Drive

## üéØ Target Classes:
- üì± **Phone** - ƒêi·ªán tho·∫°i nhi·ªÅu g√≥c ƒë·ªô (c·∫ßm tay, ƒë·ªÉ b√†n, gi·∫•u)
- üìÑ **Material** - T√†i li·ªáu, s√°ch, v·ªü, gi·∫•y gian l·∫≠n
- üë§ **Person** - Ph√°t hi·ªán nhi·ªÅu ng∆∞·ªùi trong khung h√¨nh
- üéß **Headphones** - Tai nghe (AirPods, earbuds, headset)

## üìä Datasets:
- **COCO 2017** - Person detection (ch·∫•t l∆∞·ª£ng cao)
- **Open Images** - Phone, Book detection
- **Roboflow Universe** - Specialized datasets

In [None]:
# ===================================================
# CELL 1: Mount Google Drive
# ===================================================
from google.colab import drive
drive.mount('/content/drive')
print("‚úÖ Google Drive mounted!")

In [None]:
# ===================================================
# CELL 2: Install Dependencies
# ===================================================
!pip install ultralytics==8.3.0 -q
!pip install onnx onnxruntime onnxslim -q
!pip install fiftyone -q
!pip install roboflow -q

import os
import shutil
import yaml
import json
import numpy as np
from pathlib import Path
from ultralytics import YOLO

print("‚úÖ All dependencies installed!")

In [None]:
# ===================================================
# CELL 3: Configuration
# ===================================================

# Target classes for anti-cheat detection
TARGET_CLASSES = ['person', 'phone', 'material', 'headphones']
TARGET_CLASS_TO_ID = {c: i for i, c in enumerate(TARGET_CLASSES)}

# Output directory
OUTPUT_DIR = '/content/anticheat_dataset'
DRIVE_OUTPUT = '/content/drive/MyDrive/SmartExamPro-Models'

# Class mapping from various dataset labels to our target classes
CLASS_MAPPING = {
    # === PERSON ===
    'person': 'person',
    'human': 'person',
    'people': 'person',
    'man': 'person',
    'woman': 'person',
    'student': 'person',
    'face': 'person',
    'head': 'person',
    'pedestrian': 'person',
    
    # === PHONE ===
    'phone': 'phone',
    'cell phone': 'phone',
    'cellphone': 'phone',
    'mobile phone': 'phone',
    'mobile': 'phone',
    'smartphone': 'phone',
    'iphone': 'phone',
    'android': 'phone',
    'telephone': 'phone',
    'handheld': 'phone',
    
    # === MATERIAL (books, papers, documents) ===
    'book': 'material',
    'books': 'material',
    'paper': 'material',
    'papers': 'material',
    'document': 'material',
    'notebook': 'material',
    'note': 'material',
    'notes': 'material',
    'sheet': 'material',
    'magazine': 'material',
    'newspaper': 'material',
    'textbook': 'material',
    'letter': 'material',
    'card': 'material',
    'page': 'material',
    'cheat sheet': 'material',
    
    # === HEADPHONES ===
    'headphones': 'headphones',
    'headphone': 'headphones',
    'earphones': 'headphones',
    'earphone': 'headphones',
    'earbuds': 'headphones',
    'earbud': 'headphones',
    'airpods': 'headphones',
    'airpod': 'headphones',
    'headset': 'headphones',
    'ear device': 'headphones',
    'bluetooth headphones': 'headphones',
    'wireless earbuds': 'headphones',
}

def normalize_class(class_name):
    """Map any class name to our target class ID"""
    name = class_name.lower().strip().replace('_', ' ').replace('-', ' ')
    
    # Direct match
    if name in CLASS_MAPPING:
        return TARGET_CLASS_TO_ID[CLASS_MAPPING[name]]
    
    # Partial match
    for key, target in CLASS_MAPPING.items():
        if key in name or name in key:
            return TARGET_CLASS_TO_ID[target]
    
    return -1  # Unknown class

def bbox_to_polygon(bbox):
    """Convert YOLO bbox to segmentation polygon format"""
    try:
        xc, yc, w, h = map(float, bbox)
        if not (0 <= xc <= 1 and 0 <= yc <= 1 and 0 < w <= 1 and 0 < h <= 1):
            return None
        x1, y1 = max(0, xc - w/2), max(0, yc - h/2)
        x2, y2 = min(1, xc + w/2), min(1, yc + h/2)
        return f"{x1} {y1} {x2} {y1} {x2} {y2} {x1} {y2}"
    except:
        return None

print("‚úÖ Configuration loaded!")
print(f"Target classes: {TARGET_CLASSES}")

In [None]:
# ===================================================
# CELL 4: Download COCO Dataset (Person class)
# ===================================================
import fiftyone as fo
import fiftyone.zoo as foz

print("üì• Downloading COCO 2017 dataset (person class only)...")
print("This may take 10-15 minutes...")

# Download COCO with only person class
coco_dataset = foz.load_zoo_dataset(
    "coco-2017",
    split="train",
    label_types=["detections"],
    classes=["person"],
    max_samples=5000,  # Limit samples for faster training
)

print(f"‚úÖ COCO loaded: {len(coco_dataset)} samples with person class")

In [None]:
# ===================================================
# CELL 5: Download Open Images Dataset (Phone, Book)
# ===================================================

print("üì• Downloading Open Images V7 (phone, book classes)...")

# Download phone and book from Open Images
oi_phone = foz.load_zoo_dataset(
    "open-images-v7",
    split="train",
    label_types=["detections"],
    classes=["Mobile phone"],
    max_samples=3000,
)

oi_book = foz.load_zoo_dataset(
    "open-images-v7",
    split="train",
    label_types=["detections"],
    classes=["Book"],
    max_samples=2000,
)

print(f"‚úÖ Open Images Phone: {len(oi_phone)} samples")
print(f"‚úÖ Open Images Book: {len(oi_book)} samples")

In [None]:
# ===================================================
# CELL 6: Download Roboflow Datasets (Headphones, etc.)
# ===================================================
from roboflow import Roboflow

!mkdir -p /content/roboflow_datasets
%cd /content/roboflow_datasets

# Roboflow datasets with direct download URLs
ROBOFLOW_DATASETS = [
    # Headphones/Earphones datasets
    ("earphone_detection", "https://app.roboflow.com/ds/qqqEeSKAlk?key=GT1Xa65onI"),
    ("earphone_v2", "https://app.roboflow.com/ds/cKHwOqmuda?key=qL10KsWlBt"),
    
    # Phone datasets (various angles)
    ("phone_detection", "https://app.roboflow.com/ds/5ReObgnLbQ?key=HTPSgVzDLW"),
    ("phone_v2", "https://app.roboflow.com/ds/f9k54F7Azq?key=eYssUekSYc"),
    
    # Paper/Document datasets
    ("document_detection", "https://app.roboflow.com/ds/inuabMtp6t?key=jbu7HTlrBf"),
    ("paper_detection", "https://app.roboflow.com/ds/b4oxAhlW40?key=4A761Kjm5F"),
]

successful_datasets = []
for name, url in ROBOFLOW_DATASETS:
    print(f"üì• Downloading {name}...")
    try:
        os.makedirs(name, exist_ok=True)
        !curl -L "{url}" -o {name}/roboflow.zip 2>/dev/null
        if os.path.exists(f"{name}/roboflow.zip") and os.path.getsize(f"{name}/roboflow.zip") > 1000:
            !unzip -q -o {name}/roboflow.zip -d {name}
            !rm -f {name}/roboflow.zip
            successful_datasets.append(name)
            print(f"   ‚úÖ {name} downloaded")
        else:
            print(f"   ‚ö†Ô∏è {name} failed - skipping")
            !rm -rf {name}
    except Exception as e:
        print(f"   ‚ö†Ô∏è {name} error: {e}")

print(f"\n‚úÖ Successfully downloaded: {len(successful_datasets)} Roboflow datasets")

In [None]:
# ===================================================
# CELL 7: Prepare Merged Dataset
# ===================================================

# Create output directories
!rm -rf {OUTPUT_DIR}
os.makedirs(f"{OUTPUT_DIR}/train/images", exist_ok=True)
os.makedirs(f"{OUTPUT_DIR}/train/labels", exist_ok=True)
os.makedirs(f"{OUTPUT_DIR}/valid/images", exist_ok=True)
os.makedirs(f"{OUTPUT_DIR}/valid/labels", exist_ok=True)

stats = {'train': 0, 'valid': 0, 'total_labels': 0}
class_counts = {c: 0 for c in TARGET_CLASSES}

# ===== Process COCO Person =====
print("\nüì¶ Processing COCO Person dataset...")
for sample in coco_dataset:
    if sample.ground_truth is None:
        continue
    
    img_path = sample.filepath
    img_name = os.path.basename(img_path)
    split = 'train' if np.random.random() > 0.1 else 'valid'
    
    # Copy image
    dst_img = f"{OUTPUT_DIR}/{split}/images/coco_{img_name}"
    shutil.copy(img_path, dst_img)
    
    # Create label
    lbl_name = os.path.splitext(img_name)[0] + '.txt'
    dst_lbl = f"{OUTPUT_DIR}/{split}/labels/coco_{lbl_name}"
    
    lines = []
    for det in sample.ground_truth.detections:
        if det.label.lower() == 'person':
            x, y, w, h = det.bounding_box  # FiftyOne format
            xc, yc = x + w/2, y + h/2
            polygon = bbox_to_polygon([xc, yc, w, h])
            if polygon:
                lines.append(f"0 {polygon}")  # 0 = person
                class_counts['person'] += 1
    
    if lines:
        with open(dst_lbl, 'w') as f:
            f.write('\n'.join(lines))
        stats[split] += 1
        stats['total_labels'] += len(lines)

print(f"   Processed {stats['train'] + stats['valid']} COCO images")

# ===== Process Open Images Phone =====
print("\nüì¶ Processing Open Images Phone...")
count = 0
for sample in oi_phone:
    if sample.ground_truth is None:
        continue
    
    img_path = sample.filepath
    img_name = os.path.basename(img_path)
    split = 'train' if np.random.random() > 0.1 else 'valid'
    
    dst_img = f"{OUTPUT_DIR}/{split}/images/oi_phone_{img_name}"
    shutil.copy(img_path, dst_img)
    
    lbl_name = os.path.splitext(img_name)[0] + '.txt'
    dst_lbl = f"{OUTPUT_DIR}/{split}/labels/oi_phone_{lbl_name}"
    
    lines = []
    for det in sample.ground_truth.detections:
        x, y, w, h = det.bounding_box
        xc, yc = x + w/2, y + h/2
        polygon = bbox_to_polygon([xc, yc, w, h])
        if polygon:
            lines.append(f"1 {polygon}")  # 1 = phone
            class_counts['phone'] += 1
    
    if lines:
        with open(dst_lbl, 'w') as f:
            f.write('\n'.join(lines))
        stats[split] += 1
        stats['total_labels'] += len(lines)
        count += 1

print(f"   Processed {count} phone images")

# ===== Process Open Images Book =====
print("\nüì¶ Processing Open Images Book...")
count = 0
for sample in oi_book:
    if sample.ground_truth is None:
        continue
    
    img_path = sample.filepath
    img_name = os.path.basename(img_path)
    split = 'train' if np.random.random() > 0.1 else 'valid'
    
    dst_img = f"{OUTPUT_DIR}/{split}/images/oi_book_{img_name}"
    shutil.copy(img_path, dst_img)
    
    lbl_name = os.path.splitext(img_name)[0] + '.txt'
    dst_lbl = f"{OUTPUT_DIR}/{split}/labels/oi_book_{lbl_name}"
    
    lines = []
    for det in sample.ground_truth.detections:
        x, y, w, h = det.bounding_box
        xc, yc = x + w/2, y + h/2
        polygon = bbox_to_polygon([xc, yc, w, h])
        if polygon:
            lines.append(f"2 {polygon}")  # 2 = material
            class_counts['material'] += 1
    
    if lines:
        with open(dst_lbl, 'w') as f:
            f.write('\n'.join(lines))
        stats[split] += 1
        stats['total_labels'] += len(lines)
        count += 1

print(f"   Processed {count} book images")

In [None]:
# ===================================================
# CELL 8: Process Roboflow Datasets
# ===================================================

print("\nüì¶ Processing Roboflow datasets...")

for dataset_name in successful_datasets:
    dataset_dir = f"/content/roboflow_datasets/{dataset_name}"
    
    # Find data.yaml
    data_yaml = None
    for root, dirs, files in os.walk(dataset_dir):
        if 'data.yaml' in files:
            data_yaml = os.path.join(root, 'data.yaml')
            break
    
    if not data_yaml:
        print(f"   ‚ö†Ô∏è No data.yaml in {dataset_name}")
        continue
    
    try:
        with open(data_yaml, 'r') as f:
            config = yaml.safe_load(f)
    except:
        print(f"   ‚ö†Ô∏è Cannot parse data.yaml in {dataset_name}")
        continue
    
    source_classes = config.get('names', [])
    if isinstance(source_classes, dict):
        source_classes = list(source_classes.values())
    
    print(f"   Processing {dataset_name} (classes: {source_classes})...")
    
    for split in ['train', 'valid', 'test']:
        img_dir = None
        lbl_dir = None
        
        for try_path in [dataset_dir, os.path.dirname(data_yaml)]:
            if os.path.exists(os.path.join(try_path, split, 'images')):
                img_dir = os.path.join(try_path, split, 'images')
                lbl_dir = os.path.join(try_path, split, 'labels')
                break
        
        if not img_dir or not os.path.exists(img_dir):
            continue
        
        out_split = 'train' if split in ['train', 'test'] else 'valid'
        count = 0
        
        for img_file in os.listdir(img_dir):
            if not img_file.lower().endswith(('.jpg', '.jpeg', '.png')):
                continue
            
            src_img = os.path.join(img_dir, img_file)
            dst_img = f"{OUTPUT_DIR}/{out_split}/images/rb_{dataset_name}_{img_file}"
            shutil.copy(src_img, dst_img)
            
            lbl_file = os.path.splitext(img_file)[0] + '.txt'
            src_lbl = os.path.join(lbl_dir, lbl_file)
            dst_lbl = f"{OUTPUT_DIR}/{out_split}/labels/rb_{dataset_name}_{lbl_file}"
            
            if os.path.exists(src_lbl):
                with open(src_lbl, 'r') as f:
                    lines = f.readlines()
                
                new_lines = []
                for line in lines:
                    parts = line.strip().split()
                    if len(parts) < 5:
                        continue
                    
                    try:
                        old_class_id = int(parts[0])
                        if old_class_id >= len(source_classes):
                            continue
                        
                        old_class_name = source_classes[old_class_id]
                        new_class_id = normalize_class(old_class_name)
                        
                        if new_class_id >= 0:
                            class_counts[TARGET_CLASSES[new_class_id]] += 1
                            
                            if len(parts) == 5:  # bbox format
                                polygon = bbox_to_polygon(parts[1:])
                                if polygon:
                                    new_lines.append(f"{new_class_id} {polygon}")
                            elif len(parts) >= 9:  # already segmentation
                                new_lines.append(f"{new_class_id} {' '.join(parts[1:])}")
                    except:
                        continue
                
                if new_lines:
                    with open(dst_lbl, 'w') as f:
                        f.write('\n'.join(new_lines))
                    stats[out_split] += 1
                    stats['total_labels'] += len(new_lines)
                    count += 1
        
        if count > 0:
            print(f"      {split}: {count} images")

# Create data.yaml
data_config = {
    'path': OUTPUT_DIR,
    'train': 'train/images',
    'val': 'valid/images',
    'names': {i: name for i, name in enumerate(TARGET_CLASSES)},
    'nc': len(TARGET_CLASSES),
}

with open(f"{OUTPUT_DIR}/data.yaml", 'w') as f:
    yaml.dump(data_config, f, default_flow_style=False)

print("\n" + "="*60)
print("üìä DATASET SUMMARY")
print("="*60)
print(f"Train images: {stats['train']}")
print(f"Valid images: {stats['valid']}")
print(f"Total labels: {stats['total_labels']}")
print("\nClass distribution:")
for cls, count in class_counts.items():
    print(f"   {cls}: {count}")
print("="*60)

In [None]:
# ===================================================
# CELL 9: Train YOLO Model
# ===================================================

# Choose base model
# Options: yolo11n-seg.pt (fast), yolo11s-seg.pt (balanced), yolo11m-seg.pt (accurate)
BASE_MODEL = 'yolo11s-seg.pt'

print(f"üöÄ Starting training with {BASE_MODEL}...")
print("This will take approximately 2-3 hours on T4 GPU")

# Create output directory on Drive
os.makedirs(DRIVE_OUTPUT, exist_ok=True)

model = YOLO(BASE_MODEL)

results = model.train(
    data=f"{OUTPUT_DIR}/data.yaml",
    epochs=100,
    imgsz=640,
    batch=16,  # Reduce if OOM
    patience=20,
    lr0=0.001,
    lrf=0.01,
    warmup_epochs=5,
    augment=True,
    mosaic=1.0,
    mixup=0.1,
    copy_paste=0.1,
    degrees=10,
    translate=0.1,
    scale=0.5,
    shear=2.0,
    perspective=0.0001,
    flipud=0.0,
    fliplr=0.5,
    hsv_h=0.015,
    hsv_s=0.7,
    hsv_v=0.4,
    project=DRIVE_OUTPUT,
    name='anticheat_production',
    exist_ok=True,
    device=0,
    verbose=True,
    save=True,
    plots=True,
)

print("\n‚úÖ Training completed!")

In [None]:
# ===================================================
# CELL 10: Validate Model
# ===================================================

BEST_MODEL = f"{DRIVE_OUTPUT}/anticheat_production/weights/best.pt"

print("üìä Validating model...")
model = YOLO(BEST_MODEL)
metrics = model.val(data=f"{OUTPUT_DIR}/data.yaml")

print("\n" + "="*60)
print("üìä VALIDATION RESULTS")
print("="*60)
print(f"Overall mAP50: {metrics.box.map50:.3f}")
print(f"Overall mAP50-95: {metrics.box.map:.3f}")
print("\nPer-class mAP50:")
for i, cls in enumerate(TARGET_CLASSES):
    if i < len(metrics.box.ap50):
        print(f"   {cls}: {metrics.box.ap50[i]:.3f}")
print("="*60)

In [None]:
# ===================================================
# CELL 11: Export to ONNX
# ===================================================

print("üì¶ Exporting to ONNX format...")

model = YOLO(BEST_MODEL)

# Export with optimizations for web deployment
model.export(
    format='onnx',
    imgsz=640,
    simplify=True,
    dynamic=False,
    opset=17,
)

ONNX_PATH = BEST_MODEL.replace('.pt', '.onnx')

# Also save to a standard location
FINAL_ONNX = f"{DRIVE_OUTPUT}/anticheat_yolo11s.onnx"
shutil.copy(ONNX_PATH, FINAL_ONNX)

print(f"\n‚úÖ ONNX model saved to:")
print(f"   {FINAL_ONNX}")
print(f"\nFile size: {os.path.getsize(FINAL_ONNX) / 1024 / 1024:.1f} MB")

In [None]:
# ===================================================
# CELL 12: Test Model on Sample Images
# ===================================================
import onnxruntime as ort
from PIL import Image, ImageDraw

print("üß™ Testing ONNX model...")

session = ort.InferenceSession(FINAL_ONNX)
input_name = session.get_inputs()[0].name

# Get a test image from validation set
valid_imgs = os.listdir(f"{OUTPUT_DIR}/valid/images")
if valid_imgs:
    test_img_path = f"{OUTPUT_DIR}/valid/images/{valid_imgs[0]}"
    img = Image.open(test_img_path).convert('RGB').resize((640, 640))
    img_array = np.array(img).astype(np.float32) / 255.0
    img_array = np.transpose(img_array, (2, 0, 1))
    img_array = np.expand_dims(img_array, axis=0)
    
    outputs = session.run(None, {input_name: img_array})
    
    print("\nModel output shape:", outputs[0].shape)
    print("\nMax confidence per class:")
    class_scores = outputs[0][0, 4:8, :]
    for i, cls in enumerate(TARGET_CLASSES):
        print(f"   {cls}: {class_scores[i].max():.4f}")

print("\n‚úÖ Model test passed!")

In [None]:
# ===================================================
# CELL 13: Final Instructions
# ===================================================

print("\n" + "="*60)
print("üéâ TRAINING COMPLETE!")
print("="*60)
print("\nüì• DOWNLOAD & DEPLOY:")
print(f"\n1. Download ONNX file from Google Drive:")
print(f"   {FINAL_ONNX}")
print(f"\n2. Rename to: anticheat_yolo11s.onnx")
print(f"\n3. Copy to your project:")
print(f"   Intelligence-Test/public/models/anticheat_yolo11s.onnx")
print(f"\n4. Rebuild and deploy:")
print(f"   cd Intelligence-Test && npm run build")
print("\n" + "="*60)
print("\nüìä Model files saved in Google Drive:")
print(f"   {DRIVE_OUTPUT}/")
print("   ‚îú‚îÄ‚îÄ anticheat_production/")
print("   ‚îÇ   ‚îú‚îÄ‚îÄ weights/best.pt")
print("   ‚îÇ   ‚îú‚îÄ‚îÄ weights/best.onnx")
print("   ‚îÇ   ‚îî‚îÄ‚îÄ results.png")
print("   ‚îî‚îÄ‚îÄ anticheat_yolo11s.onnx")
print("\n" + "="*60)