# ü§∏ Gymnastics Skill Detection - YOLOv8 Training

**This notebook trains a YOLOv8 object detection model for gymnastics skills and exports to TFLite for MediaPipe integration.**

‚úÖ **No dependency errors**  
‚úÖ **Faster training** (1-2 hours with GPU)  
‚úÖ **Better accuracy** than MediaPipe Model Maker  
‚úÖ **TFLite export** for MediaPipe compatibility  

---

## üìã Quick Start

1. **Enable GPU**: Runtime ‚Üí Change runtime type ‚Üí GPU ‚Üí Save
2. **Run all cells**: Runtime ‚Üí Run all
3. **Upload dataset** when prompted (ZIP file of `skill_detect_dataset`)
4. **Wait 1-2 hours** for training
5. **Download model** at the end

## 1Ô∏è‚É£ Install YOLOv8 (No Errors!)

In [None]:
!pip install -q ultralytics scikit-learn tqdm
print("‚úÖ Installation complete!")

## 2Ô∏è‚É£ Import Libraries & Check GPU

In [None]:
import os
import xml.etree.ElementTree as ET
from pathlib import Path
import shutil
from sklearn.model_selection import train_test_split
import yaml
from tqdm import tqdm
from ultralytics import YOLO
import torch

print("‚úÖ Libraries imported successfully!")
print(f"üéÆ GPU available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"   GPU: {torch.cuda.get_device_name(0)}")

## 3Ô∏è‚É£ Upload Dataset

**Upload your `skill_detect_dataset.zip` file**

In [None]:
from google.colab import files
import zipfile

print("üì§ Upload your dataset ZIP file (skill_detect_dataset.zip)")
uploaded = files.upload()

# Extract ZIP
for filename in uploaded.keys():
    print(f"\nüì¶ Extracting {filename}...")
    with zipfile.ZipFile(filename, 'r') as zip_ref:
        zip_ref.extractall('/content')
    print(f"‚úÖ Extracted successfully!")

# Set paths
RAW_DATA_DIR = Path('/content/skill_detect_dataset')
OUTPUT_DIR = Path('/content/yolo_dataset')

# Verify dataset (check train folder directly if nested)
train_dir = RAW_DATA_DIR / 'train'
if not train_dir.exists():
    # Maybe the images are directly in RAW_DATA_DIR or images/labels are already split
    image_files = list(RAW_DATA_DIR.glob('**/*.jpg'))
else:
    image_files = list(train_dir.glob('*.jpg'))

print(f"\n‚úÖ Found {len(image_files)} sample images in dataset search path")

## 4Ô∏è‚É£ Configuration

In [None]:
# Training parameters
EPOCHS = 100
BATCH_SIZE = 16
IMG_SIZE = 640

# Data split (if not already split)
TRAIN_SPLIT = 0.8
VAL_SPLIT = 0.2

# Classes (identified from dataset)
CLASSES = [
    "BL",
    "FL",
    "HS",
    "IN-IRON-C",
    "IRON-C",
    "L-CROSS",
    "LS",
    "M-UP",
    "PN",
    "VS"
]

print("‚öôÔ∏è Configuration:")
print(f"   Epochs: {EPOCHS}")
print(f"   Batch size: {BATCH_SIZE}")
print(f"   Image size: {IMG_SIZE}")
print(f"   Classes: {len(CLASSES)}")
print(f"   Classes: {', '.join(CLASSES)}")

## 5Ô∏è‚É£ Convert Pascal VOC ‚Üí YOLO Format

In [None]:
def parse_pascal_voc(xml_file):
    """Parse Pascal VOC XML annotation."""
    tree = ET.parse(xml_file)
    root = tree.getroot()
    
    size = root.find('size')
    width = int(size.find('width').text)
    height = int(size.find('height').text)
    
    objects = []
    for obj in root.findall('object'):
        name = obj.find('name').text
        bbox = obj.find('bndbox')
        
        xmin = int(bbox.find('xmin').text)
        ymin = int(bbox.find('ymin').text)
        xmax = int(bbox.find('xmax').text)
        ymax = int(bbox.find('ymax').text)
        
        objects.append({
            'class': name,
            'bbox': [xmin, ymin, xmax, ymax]
        })
    
    return {'width': width, 'height': height, 'objects': objects}


def convert_to_yolo_format(annotation, class_mapping):
    """Convert Pascal VOC bbox to YOLO format (normalized center x, y, width, height)."""
    img_width = annotation['width']
    img_height = annotation['height']
    
    yolo_annotations = []
    
    for obj in annotation['objects']:
        class_name = obj['class']
        if class_name not in class_mapping:
            continue
            
        class_id = class_mapping[class_name]
        xmin, ymin, xmax, ymax = obj['bbox']
        
        # Convert to YOLO format
        x_center = ((xmin + xmax) / 2) / img_width
        y_center = ((ymin + ymax) / 2) / img_height
        width = (xmax - xmin) / img_width
        height = (ymax - ymin) / img_height
        
        # Clamp to [0, 1]
        x_center = max(0, min(1, x_center))
        y_center = max(0, min(1, y_center))
        width = max(0, min(1, width))
        height = max(0, min(1, height))
        
        yolo_annotations.append(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")
    
    return yolo_annotations

print("‚úÖ Conversion functions defined")

## 6Ô∏è‚É£ Prepare YOLO Dataset

In [None]:
print("="*80)
print("PREPARING YOLO DATASET")
print("="*80)

# Create output directories
for split in ['train', 'val']:
    (OUTPUT_DIR / split / 'images').mkdir(parents=True, exist_ok=True)
    (OUTPUT_DIR / split / 'labels').mkdir(parents=True, exist_ok=True)

# Create class mapping
class_mapping = {name: idx for idx, name in enumerate(CLASSES)}

# Get all image files recursively
image_files = list(RAW_DATA_DIR.glob("**/*.jpg")) + list(RAW_DATA_DIR.glob("**/*.png"))
print(f"\nFound {len(image_files)} images total")

# Parse and filter valid annotations
print("\nParsing annotations...")
valid_data = []

for img_file in tqdm(image_files):
    xml_file = img_file.with_suffix('.xml')
    if not xml_file.exists():
        # Check if xml is in same folder even if extension mapping is weird
        xml_file = Path(str(img_file).rsplit('.', 1)[0] + '.xml')
        if not xml_file.exists(): continue
    
    try:
        annotation = parse_pascal_voc(xml_file)
        if annotation['objects']:
            valid_data.append((img_file, annotation))
    except Exception as e:
        continue

print(f"Valid images with annotations: {len(valid_data)}")

if len(valid_data) == 0:
    print("‚ùå ERROR: No valid images found with XML annotations.")
else:
    # Split dataset
    train_idx, val_idx = train_test_split(
        range(len(valid_data)), 
        train_size=TRAIN_SPLIT, 
        random_state=42
    )

    splits = {'train': train_idx, 'val': val_idx}
    print(f"\nTrain: {len(train_idx)} images")
    print(f"Val: {len(val_idx)} images")

    # Process each split
    for split_name, indices in splits.items():
        print(f"\nProcessing {split_name} split...")
        for idx in tqdm(indices, desc=f"Converting {split_name}"):
            img_file, annotation = valid_data[idx]
            yolo_annotations = convert_to_yolo_format(annotation, class_mapping)
            if not yolo_annotations: continue
            
            # Copy image and write label
            shutil.copy(img_file, OUTPUT_DIR / split_name / 'images' / img_file.name)
            dst_label = OUTPUT_DIR / split_name / 'labels' / img_file.with_suffix('.txt').name
            with open(dst_label, 'w') as f:
                f.write('\n'.join(yolo_annotations))

    # Create data.yaml
    data_yaml = {
        'path': str(OUTPUT_DIR.absolute()),
        'train': 'train/images',
        'val': 'val/images',
        'nc': len(CLASSES),
        'names': CLASSES
    }
    yaml_path = OUTPUT_DIR / 'data.yaml'
    with open(yaml_path, 'w') as f:
        yaml.dump(data_yaml, f, default_flow_style=False)

    print(f"\n‚úÖ Dataset preparation complete! Config saved to: {yaml_path}")

## 7Ô∏è‚É£ Train YOLOv8 Model

**This will take 1-2 hours with GPU**

In [None]:
print("="*80)
print("TRAINING YOLOV8 MODEL")
print("="*80)

# Initialize YOLOv8 medium model
model = YOLO('yolov8m.pt')

print(f"\nEpochs: {EPOCHS}")
print(f"Batch size: {BATCH_SIZE}")
print("\nStarting training...\n")

# Train model
results = model.train(
    data=str(yaml_path),
    epochs=EPOCHS,
    imgsz=IMG_SIZE,
    batch=BATCH_SIZE,
    patience=20,
    save=True,
    device=0, 
    project='/content/runs/detect',
    name='skill_detector',
    exist_ok=True,
    pretrained=True,
    optimizer='AdamW',
    cos_lr=True
)

print("\n" + "="*80)
print("‚úÖ TRAINING COMPLETE!")
print("="*80)

## 8Ô∏è‚É£ Evaluate Model

In [None]:
# Load best model
best_model = YOLO('/content/runs/detect/skill_detector/weights/best.pt')

# Evaluate
metrics = best_model.val()

print("\n" + "="*80)
print("EVALUATION RESULTS")
print("="*80)
print(f"mAP50: {metrics.box.map50:.4f}")
print(f"mAP50-95: {metrics.box.map:.4f}")

## 9Ô∏è‚É£ Export to TFLite (for MediaPipe)

In [None]:
print("="*80)
print("EXPORTING TO TFLITE")
print("="*80)

tflite_path = best_model.export(format='tflite', imgsz=IMG_SIZE, optimize=True)

print(f"\n‚úÖ Model exported to TFLite!")
print(f"   Path: {tflite_path}")

## üîü Download Trained Model

In [None]:
from google.colab import files
import json
from datetime import datetime

# Save metadata
metadata = {
    'model': 'YOLOv8m-Skill',
    'epochs': EPOCHS,
    'classes': CLASSES,
    'trained_on': datetime.now().isoformat(),
    'map50': float(metrics.box.map50)
}
with open('/content/runs/detect/skill_detector/metadata.json', 'w') as f:
    json.dump(metadata, f, indent=2)

print("üì• Downloading files...\n")
files.download('/content/runs/detect/skill_detector/weights/best.pt')
files.download(tflite_path)
files.download('/content/runs/detect/skill_detector/metadata.json')

print("\nüéâ Training complete! Your model is ready to use.")