# ü§∏ Gymnastics Apparatus Detection - YOLOv8 Training

**This notebook trains a YOLOv8 object detection model and exports to TFLite for MediaPipe integration.**

‚úÖ **No dependency errors**  
‚úÖ **Faster training** (1-2 hours with GPU)  
‚úÖ **Better accuracy** than MediaPipe Model Maker  
‚úÖ **TFLite export** for MediaPipe compatibility  

---

## üìã Quick Start

1. **Enable GPU**: Runtime ‚Üí Change runtime type ‚Üí GPU ‚Üí Save
2. **Run all cells**: Runtime ‚Üí Run all
3. **Upload dataset** when prompted (ZIP file)
4. **Wait 1-2 hours** for training
5. **Download model** at the end

## 1Ô∏è‚É£ Install YOLOv8 (No Errors!)

In [None]:
!pip install -q ultralytics scikit-learn tqdm
print("‚úÖ Installation complete!")

## 2Ô∏è‚É£ Import Libraries & Check GPU

In [None]:
import os
import xml.etree.ElementTree as ET
from pathlib import Path
import shutil
from sklearn.model_selection import train_test_split
import yaml
from tqdm import tqdm
from ultralytics import YOLO
import torch

print("‚úÖ Libraries imported successfully!")
print(f"üéÆ GPU available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"   GPU: {torch.cuda.get_device_name(0)}")

## 3Ô∏è‚É£ Upload Dataset

**Upload your `raw_object_detect_pascalvoc.zip` file**

In [None]:
from google.colab import files
import zipfile

print("üì§ Upload your dataset ZIP file (raw_object_detect_pascalvoc.zip)")
uploaded = files.upload()

# Extract ZIP
for filename in uploaded.keys():
    print(f"\nüì¶ Extracting {filename}...")
    with zipfile.ZipFile(filename, 'r') as zip_ref:
        zip_ref.extractall('/content')
    print(f"‚úÖ Extracted successfully!")

# Set paths
RAW_DATA_DIR = Path('/content/raw_object_detect_pascalvoc')
OUTPUT_DIR = Path('/content/yolo_dataset')

# Verify dataset
image_files = list(RAW_DATA_DIR.glob('*.jpg'))
print(f"\n‚úÖ Found {len(image_files)} images in dataset")

## 4Ô∏è‚É£ Configuration

In [None]:
# Training parameters
EPOCHS = 50
BATCH_SIZE = 16
IMG_SIZE = 640

# Data split
TRAIN_SPLIT = 0.7
VAL_SPLIT = 0.2
TEST_SPLIT = 0.1

# Classes (alphabetically sorted for YOLO)
CLASSES = [
    "Balance_Beam",
    "Horizontal_Bar",
    "Parallel_Bars",
    "Pommel_Horse",
    "Still_Rings",
    "Uneven_Bars",
    "Vault"
]

print("‚öôÔ∏è Configuration:")
print(f"   Epochs: {EPOCHS}")
print(f"   Batch size: {BATCH_SIZE}")
print(f"   Image size: {IMG_SIZE}")
print(f"   Classes: {len(CLASSES)}")
print(f"   Classes: {', '.join(CLASSES)}")

## 5Ô∏è‚É£ Convert Pascal VOC ‚Üí YOLO Format

In [None]:
def parse_pascal_voc(xml_file):
    """Parse Pascal VOC XML annotation."""
    tree = ET.parse(xml_file)
    root = tree.getroot()
    
    size = root.find('size')
    width = int(size.find('width').text)
    height = int(size.find('height').text)
    
    objects = []
    for obj in root.findall('object'):
        name = obj.find('name').text
        bbox = obj.find('bndbox')
        
        xmin = int(bbox.find('xmin').text)
        ymin = int(bbox.find('ymin').text)
        xmax = int(bbox.find('xmax').text)
        ymax = int(bbox.find('ymax').text)
        
        objects.append({
            'class': name,
            'bbox': [xmin, ymin, xmax, ymax]
        })
    
    return {'width': width, 'height': height, 'objects': objects}


def convert_to_yolo_format(annotation, class_mapping):
    """Convert Pascal VOC bbox to YOLO format (normalized center x, y, width, height)."""
    img_width = annotation['width']
    img_height = annotation['height']
    
    yolo_annotations = []
    
    for obj in annotation['objects']:
        class_name = obj['class']
        if class_name not in class_mapping:
            continue
            
        class_id = class_mapping[class_name]
        xmin, ymin, xmax, ymax = obj['bbox']
        
        # Convert to YOLO format
        x_center = ((xmin + xmax) / 2) / img_width
        y_center = ((ymin + ymax) / 2) / img_height
        width = (xmax - xmin) / img_width
        height = (ymax - ymin) / img_height
        
        # Clamp to [0, 1]
        x_center = max(0, min(1, x_center))
        y_center = max(0, min(1, y_center))
        width = max(0, min(1, width))
        height = max(0, min(1, height))
        
        yolo_annotations.append(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")
    
    return yolo_annotations

print("‚úÖ Conversion functions defined")

## 6Ô∏è‚É£ Prepare YOLO Dataset

In [None]:
print("="*80)
print("PREPARING YOLO DATASET")
print("="*80)

# Create output directories
for split in ['train', 'val', 'test']:
    (OUTPUT_DIR / split / 'images').mkdir(parents=True, exist_ok=True)
    (OUTPUT_DIR / split / 'labels').mkdir(parents=True, exist_ok=True)

# Create class mapping
class_mapping = {name: idx for idx, name in enumerate(CLASSES)}

# Get all image files
image_files = list(RAW_DATA_DIR.glob("*.jpg"))
print(f"\nFound {len(image_files)} images")

# Parse and filter valid annotations
print("\nParsing annotations...")
valid_data = []

for img_file in tqdm(image_files):
    xml_file = img_file.with_suffix('.xml')
    
    if not xml_file.exists():
        continue
    
    try:
        annotation = parse_pascal_voc(xml_file)
        if annotation['objects']:
            valid_data.append((img_file, annotation))
    except Exception as e:
        print(f"Error parsing {xml_file.name}: {e}")
        continue

print(f"Valid images with annotations: {len(valid_data)}")

# Split dataset
train_idx, temp_idx = train_test_split(
    range(len(valid_data)), 
    train_size=TRAIN_SPLIT, 
    random_state=42
)
val_idx, test_idx = train_test_split(
    temp_idx,
    train_size=VAL_SPLIT / (VAL_SPLIT + TEST_SPLIT),
    random_state=42
)

splits = {
    'train': train_idx,
    'val': val_idx,
    'test': test_idx
}

print(f"\nTrain: {len(train_idx)} images")
print(f"Val: {len(val_idx)} images")
print(f"Test: {len(test_idx)} images")

# Process each split
for split_name, indices in splits.items():
    print(f"\nProcessing {split_name} split...")
    
    for idx in tqdm(indices, desc=f"Converting {split_name}"):
        img_file, annotation = valid_data[idx]
        
        # Convert to YOLO format
        yolo_annotations = convert_to_yolo_format(annotation, class_mapping)
        
        if not yolo_annotations:
            continue
        
        # Copy image
        dst_img = OUTPUT_DIR / split_name / 'images' / img_file.name
        shutil.copy(img_file, dst_img)
        
        # Write YOLO annotation
        dst_label = OUTPUT_DIR / split_name / 'labels' / img_file.with_suffix('.txt').name
        with open(dst_label, 'w') as f:
            f.write('\n'.join(yolo_annotations))

# Create data.yaml for YOLOv8
data_yaml = {
    'path': str(OUTPUT_DIR.absolute()),
    'train': 'train/images',
    'val': 'val/images',
    'test': 'test/images',
    'nc': len(CLASSES),
    'names': CLASSES
}

yaml_path = OUTPUT_DIR / 'data.yaml'
with open(yaml_path, 'w') as f:
    yaml.dump(data_yaml, f, default_flow_style=False)

print(f"\n‚úÖ Dataset preparation complete!")
print(f"   Config saved to: {yaml_path}")

## 7Ô∏è‚É£ Train YOLOv8 Model

**This will take 1-2 hours with GPU**

In [None]:
print("="*80)
print("TRAINING YOLOV8 MODEL")
print("="*80)

# Initialize YOLOv8 medium model (balanced speed/accuracy)
model = YOLO('yolov8m.pt')

print(f"\nModel: YOLOv8m")
print(f"Epochs: {EPOCHS}")
print(f"Batch size: {BATCH_SIZE}")
print(f"Image size: {IMG_SIZE}")
print("\nStarting training...\n")

# Train model
results = model.train(
    data=str(yaml_path),
    epochs=EPOCHS,
    imgsz=IMG_SIZE,
    batch=BATCH_SIZE,
    patience=20,
    save=True,
    device=0,  # GPU
    workers=8,
    project='/content/runs/detect',
    name='apparatus_detector',
    exist_ok=True,
    pretrained=True,
    optimizer='AdamW',
    verbose=True,
    seed=42,
    deterministic=True,
    cos_lr=True,
    close_mosaic=10,
    amp=True,
    # Data augmentation
    hsv_h=0.015,
    hsv_s=0.7,
    hsv_v=0.4,
    degrees=10.0,
    translate=0.1,
    scale=0.5,
    shear=2.0,
    flipud=0.0,
    fliplr=0.5,
    mosaic=1.0,
    mixup=0.1
)

print("\n" + "="*80)
print("‚úÖ TRAINING COMPLETE!")
print("="*80)

## 8Ô∏è‚É£ Evaluate Model

In [None]:
print("="*80)
print("EVALUATING MODEL")
print("="*80)

# Load best model
best_model = YOLO('/content/runs/detect/apparatus_detector/weights/best.pt')

# Evaluate on validation set
metrics = best_model.val(
    data=str(yaml_path),
    split='val',
    imgsz=IMG_SIZE,
    batch=BATCH_SIZE,
    conf=0.25,
    iou=0.6
)

print("\n" + "="*80)
print("EVALUATION RESULTS")
print("="*80)
print(f"mAP50: {metrics.box.map50:.4f}")
print(f"mAP50-95: {metrics.box.map:.4f}")
print(f"Precision: {metrics.box.mp:.4f}")
print(f"Recall: {metrics.box.mr:.4f}")

## 9Ô∏è‚É£ Export to TFLite (for MediaPipe)

In [None]:
print("="*80)
print("EXPORTING TO TFLITE")
print("="*80)

# Export to TFLite
tflite_path = best_model.export(
    format='tflite',
    imgsz=IMG_SIZE,
    optimize=True,
    int8=False,
    dynamic=False,
    simplify=True
)

print(f"\n‚úÖ Model exported to TFLite!")
print(f"   Path: {tflite_path}")

# Also export to ONNX (optional, for compatibility)
onnx_path = best_model.export(
    format='onnx',
    imgsz=IMG_SIZE,
    optimize=True,
    simplify=True
)

print(f"\n‚úÖ Model also exported to ONNX!")
print(f"   Path: {onnx_path}")

## üîü Download Trained Model

In [None]:
from google.colab import files
import json
from datetime import datetime

# Save metadata
metadata = {
    'model': 'YOLOv8m',
    'epochs': EPOCHS,
    'batch_size': BATCH_SIZE,
    'image_size': IMG_SIZE,
    'classes': CLASSES,
    'num_classes': len(CLASSES),
    'trained_on': datetime.now().isoformat(),
    'map50': float(metrics.box.map50),
    'map50_95': float(metrics.box.map),
    'precision': float(metrics.box.mp),
    'recall': float(metrics.box.mr)
}

metadata_path = '/content/runs/detect/apparatus_detector/metadata.json'
with open(metadata_path, 'w') as f:
    json.dump(metadata, f, indent=2)

# Save class labels
labels_path = '/content/runs/detect/apparatus_detector/labels.txt'
with open(labels_path, 'w') as f:
    for class_name in CLASSES:
        f.write(f"{class_name}\n")

print("üì• Downloading files...\n")

# Download PyTorch model
print("1. PyTorch model (best.pt)")
files.download('/content/runs/detect/apparatus_detector/weights/best.pt')

# Download TFLite model
print("2. TFLite model (best_saved_model/best_float32.tflite)")
files.download(tflite_path)

# Download metadata
print("3. Metadata (metadata.json)")
files.download(metadata_path)

# Download labels
print("4. Labels (labels.txt)")
files.download(labels_path)

print("\n" + "="*80)
print("‚úÖ ALL FILES DOWNLOADED!")
print("="*80)
print("\nüìã Next Steps:")
print("\n1. Rename TFLite model to: gym_apparatus_custom.tflite")
print("2. Copy to: model_service/models/gym_apparatus_custom.tflite")
print("3. Restart your API server")
print("4. Test on gymnastics videos!")
print("\nüéâ Training complete! Your model is ready to use.")