In [None]:
# IMPORTANT: SOME KAGGLE DATA SOURCES ARE PRIVATE
# RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES.
import kagglehub
kagglehub.login()


In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

hasanfaisal_mma_fighter_detection_pose_estimation_path = kagglehub.dataset_download('hasanfaisal/mma-fighter-detection-pose-estimation')

print('Data source import complete.')


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
   # for filename in filenames:
        # print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install ultralytics



In [None]:
"""
YOLO-Pose Keypoint Annotation Generator with GPU Acceleration (Kaggle Version)
This script adds keypoint annotations to an existing YOLO detection dataset
by using a pretrained pose estimation model, optimized for GPU processing.

KAGGLE-SPECIFIC: Reads from /kaggle/input/ and writes to /kaggle/working/
"""

import os
import cv2
import numpy as np
from pathlib import Path
from ultralytics import YOLO
from tqdm.notebook import tqdm
import shutil
import torch

In [None]:
# ============================================================================
# CONFIGURATION
# ============================================================================

# Path to your INPUT dataset (read-only on Kaggle)
DATASET_INPUT = "/kaggle/input/mma-fighter-detection-pose-estimation/mma-fighter-detection-dataset"

# Path to OUTPUT dataset (will be created in /kaggle/working/)
DATASET_OUTPUT = "/kaggle/working/mma-fighter-pose-dataset"

# Which splits to process (you can comment out splits you don't want to process)
SPLITS = ['train', 'valid', 'test']

# IoU threshold for matching pose detections to your fighter bounding boxes
# Higher values mean stricter matching (0.3-0.5 is typically good)
IOU_THRESHOLD = 0.6

# Confidence threshold for pose detections
# Lower this if the model misses fighters, raise it if you get false detections
POSE_CONFIDENCE = 0.3

# GPU Configuration
# Set to None to auto-detect, or specify device like 'cuda:0', 'cuda:1', etc.
DEVICE = None  # Will auto-select best available device

# Batch size for GPU processing (higher = faster but uses more memory)
# Start with 8 and increase if you have GPU memory to spare
# Reduce to 4 or 2 if you get out-of-memory errors
BATCH_SIZE = 16

In [None]:
def setup_device(device=None):
    """
    Set up and verify GPU device for processing.

    This function checks what compute devices are available on your system,
    provides detailed information about GPU capabilities, and sets up the
    optimal device for pose estimation processing.

    Returns:
        str: Device string ('cuda:0', 'cpu', etc.)
        dict: Device information for logging
    """
    device_info = {}

    # Check if CUDA (NVIDIA GPU support) is available
    cuda_available = torch.cuda.is_available()
    device_info['cuda_available'] = cuda_available

    if cuda_available:
        # Get GPU count and details
        gpu_count = torch.cuda.device_count()
        device_info['gpu_count'] = gpu_count

        # If no device specified, use GPU 0
        if device is None:
            device = 'cuda:0'

        # Get information about the selected GPU
        gpu_id = int(device.split(':')[1]) if ':' in device else 0
        device_info['gpu_name'] = torch.cuda.get_device_name(gpu_id)
        device_info['gpu_memory_total'] = torch.cuda.get_device_properties(gpu_id).total_memory / 1024**3  # GB

        # Get current memory usage
        torch.cuda.reset_peak_memory_stats(gpu_id)
        device_info['gpu_memory_allocated'] = torch.cuda.memory_allocated(gpu_id) / 1024**3  # GB

    else:
        # No GPU available, fall back to CPU
        if device is None or device.startswith('cuda'):
            print("⚠ Warning: CUDA not available. Falling back to CPU.")
            print("  GPU processing will NOT be used. This will be significantly slower.")
            print("  To enable GPU: In Kaggle, go to Settings → Accelerator → Select 'GPU T4 x2'")
            device = 'cpu'
        device_info['gpu_name'] = 'CPU'

    device_info['device'] = device
    return device, device_info


def print_device_info(device_info):
    """Print detailed information about the compute device being used."""
    print("\n" + "=" * 70)
    print("COMPUTE DEVICE INFORMATION")
    print("=" * 70)

    if device_info['cuda_available']:
        print(f"✓ GPU acceleration ENABLED")
        print(f"  Device: {device_info['device']}")
        print(f"  GPU Name: {device_info['gpu_name']}")
        print(f"  Total GPU Memory: {device_info['gpu_memory_total']:.2f} GB")
        print(f"  Available GPUs: {device_info['gpu_count']}")
        print(f"\n  This will provide 10-50x speedup compared to CPU processing!")
    else:
        print(f"✗ GPU acceleration DISABLED")
        print(f"  Device: {device_info['device']}")
        print(f"  Processing will be done on CPU (much slower)")

    print("=" * 70)

In [None]:
setup_device()

('cuda:0',
 {'cuda_available': True,
  'gpu_count': 1,
  'gpu_name': 'Tesla P100-PCIE-16GB',
  'gpu_memory_total': 15.887939453125,
  'gpu_memory_allocated': 0.0,
  'device': 'cuda:0'})

In [None]:
print_device_info({'cuda_available': True,
  'gpu_count': 1,
  'gpu_name': 'Tesla P100-PCIE-16GB',
  'gpu_memory_total': 15.887939453125,
  'gpu_memory_allocated': 0.0,
  'device': 'cuda:0'})

# this guy won't be able to take my job, incredibly stupid piece of code above


COMPUTE DEVICE INFORMATION
✓ GPU acceleration ENABLED
  Device: cuda:0
  GPU Name: Tesla P100-PCIE-16GB
  Total GPU Memory: 15.89 GB
  Available GPUs: 1

  This will provide 10-50x speedup compared to CPU processing!


In [None]:
def calculate_iou(box1, box2):
    """
    Calculate Intersection over Union between two bounding boxes.

    Boxes are in YOLO format: [x_center, y_center, width, height] (normalized 0-1)

    This function converts them to corner coordinates, calculates the
    intersection area, and divides by the union area.
    """
    # Convert from center format to corner format
    box1_x1 = box1[0] - box1[2] / 2
    box1_y1 = box1[1] - box1[3] / 2
    box1_x2 = box1[0] + box1[2] / 2
    box1_y2 = box1[1] + box1[3] / 2

    box2_x1 = box2[0] - box2[2] / 2
    box2_y1 = box2[1] - box2[3] / 2
    box2_x2 = box2[0] + box2[2] / 2
    box2_y2 = box2[1] + box2[3] / 2

    # Calculate intersection area
    inter_x1 = max(box1_x1, box2_x1)
    inter_y1 = max(box1_y1, box2_y1)
    inter_x2 = min(box1_x2, box2_x2)
    inter_y2 = min(box1_y2, box2_y2)

    # If boxes don't overlap, intersection is zero
    if inter_x2 < inter_x1 or inter_y2 < inter_y1:
        return 0.0

    inter_area = (inter_x2 - inter_x1) * (inter_y2 - inter_y1)

    # Calculate union area
    box1_area = (box1_x2 - box1_x1) * (box1_y2 - box1_y1)
    box2_area = (box2_x2 - box2_x1) * (box2_y2 - box2_y1)
    union_area = box1_area + box2_area - inter_area

    # Avoid division by zero
    if union_area == 0:
        return 0.0

    return inter_area / union_area

In [None]:
def xyxy_to_xywh_normalized(box, img_width, img_height):
    """
    Convert bounding box from corner coordinates (x1, y1, x2, y2) in pixels
    to YOLO format (x_center, y_center, width, height) normalized to 0-1.
    """
    x1, y1, x2, y2 = box
    x_center = ((x1 + x2) / 2) / img_width
    y_center = ((y1 + y2) / 2) / img_height
    width = (x2 - x1) / img_width
    height = (y2 - y1) / img_height
    return [x_center, y_center, width, height]

In [None]:
def load_existing_labels(label_path):
    """
    Load existing bounding box annotations from a YOLO format label file.

    Returns a list of [class_id, x_center, y_center, width, height] for each object.
    """
    if not os.path.exists(label_path):
        return []

    labels = []
    with open(label_path, 'r') as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) >= 5:  # class_id + 4 bbox coordinates
                # Convert to floats
                label = [int(parts[0])] + [float(x) for x in parts[1:5]]
                labels.append(label)
    return labels

In [None]:
def save_yolo_pose_labels(label_path, annotations):
    """
    Save annotations in YOLO-Pose format.

    Format: class_id x_center y_center width height kp1_x kp1_y kp1_v ... kp17_x kp17_y kp17_v

    Where each keypoint has x, y coordinates (normalized 0-1) and visibility flag v:
    - 0: not labeled
    - 1: labeled but not visible (occluded)
    - 2: labeled and visible
    """
    # Ensure directory exists
    os.makedirs(os.path.dirname(label_path), exist_ok=True)

    with open(label_path, 'w') as f:
        for ann in annotations:
            # Write class and bounding box
            line = f"{ann['class_id']} {ann['bbox'][0]:.6f} {ann['bbox'][1]:.6f} {ann['bbox'][2]:.6f} {ann['bbox'][3]:.6f}"

            # Write keypoints if they exist
            if 'keypoints' in ann and ann['keypoints'] is not None:
                for kp in ann['keypoints']:
                    line += f" {kp[0]:.6f} {kp[1]:.6f} {int(kp[2])}"

            f.write(line + '\n')

In [None]:
# ============================================================================
# MAIN PROCESSING FUNCTION
# ============================================================================

def process_dataset(input_root, output_root, splits, iou_threshold, pose_confidence, device_str, batch_size):
    """
    Main function that processes the entire dataset and adds keypoint annotations.

    This version reads from a read-only input directory (Kaggle input) and writes
    to a new output directory (Kaggle working).
    """

    print("=" * 70)
    print("YOLO-Pose Keypoint Annotation Generator (Kaggle Version)")
    print("=" * 70)
    print(f"\nInput dataset: {input_root}")
    print(f"Output dataset: {output_root}")
    print(f"Splits to process: {splits}")
    print(f"IoU threshold: {iou_threshold}")
    print(f"Pose confidence: {pose_confidence}")
    print(f"Batch size: {batch_size}\n")

    # Setup GPU device
    device, device_info = setup_device(device_str)
    print_device_info(device_info)

    # Load the pretrained YOLO-Pose model
    # Using yolov8m-pose as a good balance between speed and accuracy
    # You can also use: yolov8n-pose (faster), yolov8l-pose (more accurate), yolov8x-pose (most accurate)

    # going to use yolov11x now! - hasan

    print("\nLoading pretrained YOLO-Pose model...")
    model = YOLO('yolo11x-pose.pt')

    # Explicitly move model to the specified device
    model.to(device)

    print(f"✓ Model loaded successfully on {device}!\n")

    # Create output directory structure
    os.makedirs(output_root, exist_ok=True)

    # Copy data.yaml if it exists
    yaml_src = os.path.join(input_root, 'data.yaml')
    if os.path.exists(yaml_src):
        yaml_dst = os.path.join(output_root, 'data.yaml')
        print(f"Copying data.yaml to output directory...")
        shutil.copy2(yaml_src, yaml_dst)
        print("✓ data.yaml copied\n")

    # Statistics tracking
    stats = {
        'total_images': 0,
        'images_with_poses': 0,
        'total_fighters_annotated': 0,
        'fighters_matched': 0,
        'fighters_unmatched': 0
    }

    # Process each split
    for split in splits:
        print(f"\n{'=' * 70}")
        print(f"Processing {split.upper()} split")
        print(f"{'=' * 70}\n")

        input_images_dir = os.path.join(input_root, split, 'images')
        input_labels_dir = os.path.join(input_root, split, 'labels')

        output_images_dir = os.path.join(output_root, split, 'images')
        output_labels_dir = os.path.join(output_root, split, 'labels')

        if not os.path.exists(input_images_dir):
            print(f"⚠ Warning: {input_images_dir} does not exist. Skipping...")
            continue

        # Create output directories
        os.makedirs(output_images_dir, exist_ok=True)
        os.makedirs(output_labels_dir, exist_ok=True)

        # Get all image files
        image_files = []
        for ext in ['*.jpg', '*.jpeg', '*.png', '*.JPG', '*.JPEG', '*.PNG']:
            image_files.extend(Path(input_images_dir).glob(ext))

        print(f"Found {len(image_files)} images in {split} split")
        print(f"Copying images and generating keypoint annotations...\n")

        # Process each image
        for img_path in tqdm(image_files, desc=f"Processing {split}"):
            stats['total_images'] += 1

            # Construct input label file path
            input_label_path = os.path.join(input_labels_dir, img_path.stem + '.txt')

            # Construct output paths
            output_image_path = os.path.join(output_images_dir, img_path.name)
            output_label_path = os.path.join(output_labels_dir, img_path.stem + '.txt')

            # Copy image to output directory (using symlink would be faster but Kaggle doesn't allow it)
            shutil.copy2(str(img_path), output_image_path)

            # Load existing fighter bounding boxes
            existing_labels = load_existing_labels(input_label_path)

            if len(existing_labels) == 0:
                # No fighters labeled, just copy the empty label file
                if os.path.exists(input_label_path):
                    shutil.copy2(input_label_path, output_label_path)
                continue

            # Read image to get dimensions
            img = cv2.imread(str(img_path))
            if img is None:
                print(f"⚠ Warning: Could not read {img_path}. Skipping...")
                continue

            img_height, img_width = img.shape[:2]

            # Run pose estimation on the full image with explicit device specification
            results = model(img, conf=pose_confidence, device=device, verbose=False)

            # Extract pose detections
            if len(results) == 0 or results[0].keypoints is None:
                # No poses detected, save labels without keypoints
                annotations = []
                for fighter_label in existing_labels:
                    annotations.append({
                        'class_id': fighter_label[0],
                        'bbox': fighter_label[1:5],
                        'keypoints': None
                    })
                save_yolo_pose_labels(output_label_path, annotations)
                continue

            result = results[0]

            # Check if we have any detections
            if result.boxes is None or len(result.boxes) == 0:
                # No poses detected, save labels without keypoints
                annotations = []
                for fighter_label in existing_labels:
                    annotations.append({
                        'class_id': fighter_label[0],
                        'bbox': fighter_label[1:5],
                        'keypoints': None
                    })
                save_yolo_pose_labels(output_label_path, annotations)
                continue

            stats['images_with_poses'] += 1

            # Prepare annotations list (will contain bbox + keypoints for each fighter)
            annotations = []
            matched_fighter_indices = set()  # Track which fighters we've matched

            # For each existing fighter bounding box, find the best matching pose
            for fighter_idx, fighter_label in enumerate(existing_labels):
                class_id = fighter_label[0]
                fighter_bbox = fighter_label[1:5]  # [x_center, y_center, width, height] normalized

                stats['total_fighters_annotated'] += 1

                best_iou = 0
                best_match_idx = -1

                # Compare with all detected poses
                for det_idx in range(len(result.boxes)):
                    # Get detection bounding box in normalized YOLO format
                    det_box_xyxy = result.boxes[det_idx].xyxy[0].cpu().numpy()  # [x1, y1, x2, y2] in pixels
                    det_box_xywh = xyxy_to_xywh_normalized(det_box_xyxy, img_width, img_height)

                    # Calculate IoU with fighter box
                    iou = calculate_iou(fighter_bbox, det_box_xywh)

                    if iou > best_iou:
                        best_iou = iou
                        best_match_idx = det_idx

                # If we found a good match, extract keypoints
                if best_iou >= iou_threshold and best_match_idx not in matched_fighter_indices:
                    matched_fighter_indices.add(best_match_idx)
                    stats['fighters_matched'] += 1

                    # Extract keypoints from the matched detection
                    kps = result.keypoints[best_match_idx].data[0].cpu().numpy()  # Shape: (17, 3)

                    # Normalize keypoints to 0-1 range and format for YOLO
                    keypoints = []
                    for kp in kps:
                        x_norm = kp[0] / img_width
                        y_norm = kp[1] / img_height
                        confidence = kp[2]

                        # Convert confidence to visibility flag
                        # YOLO-Pose uses: 0=not labeled, 1=labeled but occluded, 2=labeled and visible
                        # We'll use confidence threshold to determine visibility
                        if confidence < 0.3:
                            visibility = 1  # Low confidence = occluded
                        else:
                            visibility = 2  # High confidence = visible

                        keypoints.append([x_norm, y_norm, visibility])

                    annotation = {
                        'class_id': class_id,
                        'bbox': fighter_bbox,
                        'keypoints': keypoints
                    }
                else:
                    # No matching pose found, save without keypoints
                    stats['fighters_unmatched'] += 1
                    annotation = {
                        'class_id': class_id,
                        'bbox': fighter_bbox,
                        'keypoints': None
                    }

                annotations.append(annotation)

            # Save updated labels to output directory
            save_yolo_pose_labels(output_label_path, annotations)

            # Clear GPU cache periodically to prevent memory accumulation
            if device_info['cuda_available'] and stats['total_images'] % 100 == 0:
                torch.cuda.empty_cache()

    # Print summary statistics
    print("\n" + "=" * 70)
    print("PROCESSING COMPLETE - SUMMARY")
    print("=" * 70)
    print(f"\nTotal images processed: {stats['total_images']}")
    print(f"Images with pose detections: {stats['images_with_poses']}")
    print(f"Total fighters in dataset: {stats['total_fighters_annotated']}")
    print(f"Fighters matched with poses: {stats['fighters_matched']} ({100*stats['fighters_matched']/max(stats['total_fighters_annotated'],1):.1f}%)")
    print(f"Fighters without pose match: {stats['fighters_unmatched']} ({100*stats['fighters_unmatched']/max(stats['total_fighters_annotated'],1):.1f}%)")

    print(f"\n✓ Dataset augmentation complete!")
    print(f"✓ New dataset with keypoint annotations saved to: {output_root}")
    print(f"✓ Your labels now include keypoint annotations in YOLO-Pose format")
    print(f"\nYou can now use this dataset at '{output_root}' for YOLO-Pose training!")

In [None]:
# ============================================================================
# RUN THE SCRIPT
# ============================================================================

if __name__ == "__main__":
    process_dataset(
        input_root=DATASET_INPUT,
        output_root=DATASET_OUTPUT,
        splits=SPLITS,
        iou_threshold=IOU_THRESHOLD,
        pose_confidence=POSE_CONFIDENCE,
        device_str=DEVICE,
        batch_size=BATCH_SIZE
    )

YOLO-Pose Keypoint Annotation Generator (Kaggle Version)

Input dataset: /kaggle/input/mma-fighter-detection-pose-estimation/mma-fighter-detection-dataset
Output dataset: /kaggle/working/mma-fighter-pose-dataset
Splits to process: ['train', 'valid', 'test']
IoU threshold: 0.6
Pose confidence: 0.3
Batch size: 16


COMPUTE DEVICE INFORMATION
✓ GPU acceleration ENABLED
  Device: cuda:0
  GPU Name: Tesla P100-PCIE-16GB
  Total GPU Memory: 15.89 GB
  Available GPUs: 1

  This will provide 10-50x speedup compared to CPU processing!

Loading pretrained YOLO-Pose model...
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-pose.pt to 'yolo11x-pose.pt': 100% ━━━━━━━━━━━━ 113.0MB 278.4MB/s 0.4s 0.4s<0.0s
✓ Model loaded successfully on cuda:0!

Copying data.yaml to output directory...
✓ data.yaml copied


Processing TRAIN split

Found 3635 images in train split
Copying images and generating keypoint annotations...



Processing train:   0%|          | 0/3635 [00:00<?, ?it/s]


Processing VALID split

Found 980 images in valid split
Copying images and generating keypoint annotations...



Processing valid:   0%|          | 0/980 [00:00<?, ?it/s]


Processing TEST split

Found 491 images in test split
Copying images and generating keypoint annotations...



Processing test:   0%|          | 0/491 [00:00<?, ?it/s]


PROCESSING COMPLETE - SUMMARY

Total images processed: 5106
Images with pose detections: 5106
Total fighters in dataset: 10186
Fighters matched with poses: 10155 (99.7%)
Fighters without pose match: 31 (0.3%)

✓ Dataset augmentation complete!
✓ New dataset with keypoint annotations saved to: /kaggle/working/mma-fighter-pose-dataset
✓ Your labels now include keypoint annotations in YOLO-Pose format

You can now use this dataset at '/kaggle/working/mma-fighter-pose-dataset' for YOLO-Pose training!


In [None]:
# Path to your augmented dataset
dataset_root = "/kaggle/working/mma-fighter-pose-dataset"

# Number of keypoints expected (17 for COCO format)
NUM_KEYPOINTS = 17

def check_label_has_keypoints(label_path):
    """
    Check if a label file contains instances without keypoints.
    Returns list of (line_number, has_keypoints) tuples.
    """
    if not os.path.exists(label_path):
        return []

    instances = []
    with open(label_path, 'r') as f:
        for line_num, line in enumerate(f, 1):
            parts = line.strip().split()
            # Format: class_id x y w h [kp1_x kp1_y kp1_v ... kp17_x kp17_y kp17_v]
            # With keypoints: 1 + 4 + (17 * 3) = 56 values
            # Without keypoints: 1 + 4 = 5 values

            has_keypoints = len(parts) > 5
            instances.append((line_num, has_keypoints))

    return instances

# Find all images with missing keypoints
print("Scanning dataset for images with missing keypoint annotations...\n")

images_with_missing_keypoints = []

for split in ['train', 'valid', 'test']:
    labels_dir = os.path.join(dataset_root, split, 'labels')
    images_dir = os.path.join(dataset_root, split, 'images')

    if not os.path.exists(labels_dir):
        continue

    label_files = list(Path(labels_dir).glob('*.txt'))

    for label_path in label_files:
        instances = check_label_has_keypoints(str(label_path))

        # Check if any instance is missing keypoints
        missing_count = sum(1 for _, has_kp in instances if not has_kp)

        if missing_count > 0:
            # Find corresponding image
            img_name = label_path.stem
            img_path = None
            for ext in ['.jpg', '.jpeg', '.png', '.JPG', '.JPEG', '.PNG']:
                potential_img = os.path.join(images_dir, img_name + ext)
                if os.path.exists(potential_img):
                    img_path = potential_img
                    break

            images_with_missing_keypoints.append({
                'split': split,
                'image': img_path,
                'label': str(label_path),
                'missing_count': missing_count,
                'total_instances': len(instances)
            })

# Print results
print(f"{'='*70}")
print(f"IMAGES WITH MISSING KEYPOINT ANNOTATIONS")
print(f"{'='*70}\n")

if len(images_with_missing_keypoints) == 0:
    print("✓ All instances have keypoint annotations!")
else:
    print(f"Found {len(images_with_missing_keypoints)} images with missing keypoints:\n")

    for idx, item in enumerate(images_with_missing_keypoints, 1):
        print(f"{idx}. [{item['split'].upper()}] {Path(item['image']).name}")
        print(f"   Missing: {item['missing_count']}/{item['total_instances']} fighters")
        print(f"   Image: {item['image']}")
        print(f"   Label: {item['label']}\n")

# Save list to file for reference
output_file = "/kaggle/working/images_needing_review.txt"
with open(output_file, 'w') as f:
    f.write("Images requiring manual keypoint annotation\n")
    f.write("=" * 70 + "\n\n")

    for item in images_with_missing_keypoints:
        f.write(f"Split: {item['split']}\n")
        f.write(f"Image: {item['image']}\n")
        f.write(f"Label: {item['label']}\n")
        f.write(f"Missing: {item['missing_count']}/{item['total_instances']} fighters\n")
        f.write("-" * 70 + "\n")

print(f"\n✓ List saved to: {output_file}")
print(f"\nRecommendation: Review these {len(images_with_missing_keypoints)} images and manually")
print(f"annotate the missing keypoints using a tool like CVAT, Labelme, or Label Studio.")

Scanning dataset for images with missing keypoint annotations...

IMAGES WITH MISSING KEYPOINT ANNOTATIONS

Found 31 images with missing keypoints:

1. [TRAIN] 4_mp4-0201_jpg.rf.55371adca0432434f29e0ff4d0656dd1.jpg
   Missing: 1/2 fighters
   Image: /kaggle/working/mma-fighter-pose-dataset/train/images/4_mp4-0201_jpg.rf.55371adca0432434f29e0ff4d0656dd1.jpg
   Label: /kaggle/working/mma-fighter-pose-dataset/train/labels/4_mp4-0201_jpg.rf.55371adca0432434f29e0ff4d0656dd1.txt

2. [TRAIN] -vs-1-DPjtZWsn8gM-_mp4-0884_jpg.rf.5cc30faf5e0358353426ad4eeca2ed28.jpg
   Missing: 1/2 fighters
   Image: /kaggle/working/mma-fighter-pose-dataset/train/images/-vs-1-DPjtZWsn8gM-_mp4-0884_jpg.rf.5cc30faf5e0358353426ad4eeca2ed28.jpg
   Label: /kaggle/working/mma-fighter-pose-dataset/train/labels/-vs-1-DPjtZWsn8gM-_mp4-0884_jpg.rf.5cc30faf5e0358353426ad4eeca2ed28.txt

3. [TRAIN] -vs-zhC7KhFk49M-_mp4-0163_jpg.rf.28f28b5c94d1676966b120ad0b75aef0.jpg
   Missing: 1/2 fighters
   Image: /kaggle/working/mma-figh

In [None]:
from pathlib import Path

# Path to your augmented dataset
dataset_path = "/kaggle/working/mma-fighter-pose-dataset"

# Create a ZIP archive
print("Creating ZIP archive of your augmented dataset...")
print("This may take a few minutes depending on dataset size...")

archive_path = "/kaggle/working/mma-fighter-pose-dataset"
shutil.make_archive(archive_path, 'zip', dataset_path)

archive_file = archive_path + '.zip'
archive_size_mb = os.path.getsize(archive_file) / (1024 * 1024)

print(f"\n✓ Archive created successfully!")
print(f"  File: {archive_file}")
print(f"  Size: {archive_size_mb:.2f} MB")
print(f"\nTo download:")
print(f"  1. Look at the right sidebar in your Kaggle notebook")
print(f"  2. Click on 'Output' tab")
print(f"  3. You'll see 'mma-fighter-pose-dataset.zip'")
print(f"  4. Click the download icon next to it")

Creating ZIP archive of your augmented dataset...
This may take a few minutes depending on dataset size...

✓ Archive created successfully!
  File: /kaggle/working/mma-fighter-pose-dataset.zip
  Size: 275.55 MB

To download:
  1. Look at the right sidebar in your Kaggle notebook
  2. Click on 'Output' tab
  3. You'll see 'mma-fighter-pose-dataset.zip'
  4. Click the download icon next to it
