# YOLOv11 Pose Estimation for Muay Thai

This notebook uses YOLOv11 pose estimation to detect body keypoints in muay thai videos.

## 1. Import Libraries and Check Installation

In [32]:
from ultralytics import YOLO
import cv2
import os
from pathlib import Path

# Check ultralytics version
import ultralytics
print(f"Ultralytics version: {ultralytics.__version__}")
print(f"OpenCV version: {cv2.__version__}")

Ultralytics version: 8.3.228
OpenCV version: 4.12.0


## 2. Load Pre-trained YOLOv11 Pose Model

YOLOv11 has pre-trained pose estimation models that detect 17 body keypoints:
- 0: Nose, 1: Left Eye, 2: Right Eye, 3: Left Ear, 4: Right Ear
- 5: Left Shoulder, 6: Right Shoulder, 7: Left Elbow, 8: Right Elbow
- 9: Left Wrist, 10: Right Wrist, 11: Left Hip, 12: Right Hip
- 13: Left Knee, 14: Right Knee, 15: Left Ankle, 16: Right Ankle

In [33]:
# Load YOLOv11 pose model to detect body parts
# Options: yolo11n-pose.pt (nano), yolo11s-pose.pt (small), yolo11m-pose.pt (medium), yolo11l-pose.pt (large)
model = YOLO('yolo11n-pose.pt')  # Nano model - FASTEST (10-30 FPS on GPU)
# This will detect keypoints to draw boxes around body parts

print("Model loaded successfully!")

Model loaded successfully!


## 3. Process Your Video Directly (No Training Needed)

Since we're using a pre-trained model, you can directly process your muay thai video!

In [42]:
# Process video with body part detection (head, torso, arms, legs)
import numpy as np

video_path = "data/videos/amateur1.mov"  # UPDATE THIS PATH
output_path = "runs/bodyparts/amateur1.mp4"
os.makedirs("runs/bodyparts", exist_ok=True)

# Open video
cap = cv2.VideoCapture(video_path)
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

# Video writer
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

print(f"Processing {total_frames} frames at {fps} FPS...")
frame_count = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    # Run pose detection
    results = model.predict(frame, device=0, conf=0.3, imgsz=416, half=True, verbose=False)
    
    if len(results[0].boxes) > 0:
        # Get keypoints for each detected person
        keypoints = results[0].keypoints.xy.cpu().numpy()
        boxes = results[0].boxes.xywh.cpu().numpy()  # Person boxes
        
        for person_idx, kpts in enumerate(keypoints):
            # Detect boxing gloves by checking hand/wrist area size and color
            person_box = boxes[person_idx]
            person_area = person_box[2] * person_box[3]  # width * height
            
            # Check for boxing gloves presence by analyzing wrist regions
            has_gloves = False
            glove_regions = []
            
            for wrist_idx in [9, 10]:  # Left and right wrists
                if kpts[wrist_idx][0] > 0:
                    wx, wy = kpts[wrist_idx].astype(int)
                    # Sample larger area around wrist (gloves are bigger than bare hands)
                    glove_size = 40
                    y1, y2 = max(0, wy-glove_size), min(height, wy+glove_size)
                    x1, x2 = max(0, wx-glove_size), min(width, wx+glove_size)
                    
                    if y2 > y1 and x2 > x1:
                        wrist_region = frame[y1:y2, x1:x2]
                        if wrist_region.size > 0:
                            # Calculate average color saturation (gloves are usually bright colored)
                            hsv = cv2.cvtColor(wrist_region, cv2.COLOR_BGR2HSV)
                            avg_saturation = np.mean(hsv[:,:,1])
                            
                            # Boxing gloves typically have high saturation (bright colors)
                            if avg_saturation > 50:  # Threshold for colored gloves
                                has_gloves = True
                                glove_regions.append((wx, wy))
            
            # Only process people with boxing gloves (fighters)
            # Filter out background fighters by size - only keep the 2 largest detected people
            if person_area > (width * height * 0.005) and has_gloves:
                # Store person info for size comparison
                continue
        
        # After checking all people, only process the 2 largest (main fighters in foreground)
        if len(results[0].boxes) > 0:
            # Get all fighters with gloves and their areas
            fighters = []
            for person_idx, kpts in enumerate(keypoints):
                person_box = boxes[person_idx]
                person_area = person_box[2] * person_box[3]
                
                # Check for gloves again
                has_gloves = False
                for wrist_idx in [9, 10]:
                    if kpts[wrist_idx][0] > 0:
                        wx, wy = kpts[wrist_idx].astype(int)
                        glove_size = 40
                        y1, y2 = max(0, wy-glove_size), min(height, wy+glove_size)
                        x1, x2 = max(0, wx-glove_size), min(width, wx+glove_size)
                        
                        if y2 > y1 and x2 > x1:
                            wrist_region = frame[y1:y2, x1:x2]
                            if wrist_region.size > 0:
                                hsv = cv2.cvtColor(wrist_region, cv2.COLOR_BGR2HSV)
                                avg_saturation = np.mean(hsv[:,:,1])
                                if avg_saturation > 50:
                                    has_gloves = True
                                    break
                
                if has_gloves and person_area > (width * height * 0.005):
                    fighters.append((person_idx, person_area, kpts))
            
            # Sort by area (largest first) and take only top 2
            fighters.sort(key=lambda x: x[1], reverse=True)
            # Keep top 2 fighters if available, or just 1 if only one fighter detected
            main_fighters = fighters[:min(2, len(fighters))]
            
            # Process only the main fighters
            for person_idx, person_area, kpts in main_fighters:
                
                # Define body parts from keypoints (x, y coordinates)
                # Keypoints: 0=nose, 5=L_shoulder, 6=R_shoulder, 7=L_elbow, 8=R_elbow,
                #           9=L_wrist, 10=R_wrist, 11=L_hip, 12=R_hip, 13=L_knee, 14=R_knee, 15=L_ankle, 16=R_ankle
                
                # HEAD: nose, eyes, ears (keypoints 0-4)
                head_kpts = kpts[0:5]
                valid_head = head_kpts[head_kpts[:, 0] > 0]
                if len(valid_head) > 0:
                    x_min, y_min = valid_head.min(axis=0).astype(int)
                    x_max, y_max = valid_head.max(axis=0).astype(int)
                    padding = 20
                    cv2.rectangle(frame, (x_min-padding, y_min-padding), (x_max+padding, y_max+padding), (0, 255, 0), 2)
                    cv2.putText(frame, 'HEAD', (x_min-padding, y_min-padding-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
                
                # CHEST: shoulders only (keypoints 5,6)
                chest_indices = [5, 6]
                chest_kpts = kpts[chest_indices]
                valid_chest = chest_kpts[chest_kpts[:, 0] > 0]
                if len(valid_chest) > 1:
                    x_min, y_min = valid_chest.min(axis=0).astype(int)
                    x_max, y_max = valid_chest.max(axis=0).astype(int)
                    # Extend down from shoulders to make chest area
                    y_max = int(y_max + (y_max - y_min) * 1.5)  # Extend downward
                    padding = 15
                    cv2.rectangle(frame, (x_min-padding, y_min-padding), (x_max+padding, y_max+padding), (255, 0, 0), 2)
                    cv2.putText(frame, 'CHEST', (x_min-padding, y_min-padding-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
                
                # LEFT ELBOW (keypoint 7)
                if kpts[7][0] > 0:
                    x, y = kpts[7].astype(int)
                    box_size = 25
                    cv2.rectangle(frame, (x-box_size, y-box_size), (x+box_size, y+box_size), (0, 255, 255), 2)
                    cv2.putText(frame, 'L_ELBOW', (x-box_size, y-box_size-5), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 255), 2)
                
                # RIGHT ELBOW (keypoint 8)
                if kpts[8][0] > 0:
                    x, y = kpts[8].astype(int)
                    box_size = 25
                    cv2.rectangle(frame, (x-box_size, y-box_size), (x+box_size, y+box_size), (255, 255, 0), 2)
                    cv2.putText(frame, 'R_ELBOW', (x-box_size, y-box_size-5), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 0), 2)
                
                # LEFT HAND (keypoint 9 - wrist, extend to cover glove)
                if kpts[9][0] > 0:
                    x, y = kpts[9].astype(int)
                    # Larger box for boxing glove
                    box_size = 35
                    # Extend box forward (in front of wrist) based on elbow-wrist direction
                    if kpts[7][0] > 0:  # If elbow is detected
                        ex, ey = kpts[7].astype(int)
                        # Direction from elbow to wrist
                        dx, dy = x - ex, y - ey
                        length = np.sqrt(dx**2 + dy**2)
                        if length > 0:
                            # Extend wrist position toward hand direction
                            extend = 25
                            x = int(x + (dx/length) * extend)
                            y = int(y + (dy/length) * extend)
                    cv2.rectangle(frame, (x-box_size, y-box_size), (x+box_size, y+box_size), (255, 0, 255), 2)
                    cv2.putText(frame, 'L_HAND', (x-box_size, y-box_size-5), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 0, 255), 2)
                
                # RIGHT HAND (keypoint 10 - wrist, extend to cover glove)
                if kpts[10][0] > 0:
                    x, y = kpts[10].astype(int)
                    # Larger box for boxing glove
                    box_size = 35
                    # Extend box forward (in front of wrist) based on elbow-wrist direction
                    if kpts[8][0] > 0:  # If elbow is detected
                        ex, ey = kpts[8].astype(int)
                        # Direction from elbow to wrist
                        dx, dy = x - ex, y - ey
                        length = np.sqrt(dx**2 + dy**2)
                        if length > 0:
                            # Extend wrist position toward hand direction
                            extend = 25
                            x = int(x + (dx/length) * extend)
                            y = int(y + (dy/length) * extend)
                    cv2.rectangle(frame, (x-box_size, y-box_size), (x+box_size, y+box_size), (128, 0, 128), 2)
                    cv2.putText(frame, 'R_HAND', (x-box_size, y-box_size-5), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (128, 0, 128), 2)
                
                # LEFT TOES (keypoint 15 - ankle)
                if kpts[15][0] > 0:
                    x, y = kpts[15].astype(int)
                    box_size = 20
                    cv2.rectangle(frame, (x-box_size, y-box_size), (x+box_size, y+box_size), (0, 128, 255), 2)
                    cv2.putText(frame, 'L_TOES', (x-box_size, y-box_size-5), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 128, 255), 2)
                
                # RIGHT TOES (keypoint 16 - ankle)
                if kpts[16][0] > 0:
                    x, y = kpts[16].astype(int)
                    box_size = 20
                    cv2.rectangle(frame, (x-box_size, y-box_size), (x+box_size, y+box_size), (255, 128, 0), 2)
                    cv2.putText(frame, 'R_TOES', (x-box_size, y-box_size-5), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 128, 0), 2)
    
    out.write(frame)
    frame_count += 1
    
    if frame_count % 30 == 0:
        print(f"Processed {frame_count}/{total_frames} frames ({frame_count/total_frames*100:.1f}%)")

cap.release()
out.release()

print(f"\n✓ Processing complete!")
print(f"Output saved to: {output_path}")
print(f"\nBody parts detected:")
print("  - HEAD (green)")
print("  - CHEST (blue)")
print("  - L_ELBOW (cyan)")
print("  - R_ELBOW (yellow)")
print("  - L_HAND (magenta)")
print("  - R_HAND (purple)")
print("  - L_TOES (orange)")
print("  - R_TOES (light orange)")

Processing 399 frames at 25 FPS...
Processed 30/399 frames (7.5%)
Processed 30/399 frames (7.5%)
Processed 60/399 frames (15.0%)
Processed 60/399 frames (15.0%)
Processed 90/399 frames (22.6%)
Processed 90/399 frames (22.6%)
Processed 120/399 frames (30.1%)
Processed 120/399 frames (30.1%)
Processed 150/399 frames (37.6%)
Processed 150/399 frames (37.6%)
Processed 180/399 frames (45.1%)
Processed 180/399 frames (45.1%)
Processed 210/399 frames (52.6%)
Processed 210/399 frames (52.6%)
Processed 240/399 frames (60.2%)
Processed 240/399 frames (60.2%)
Processed 270/399 frames (67.7%)
Processed 270/399 frames (67.7%)
Processed 300/399 frames (75.2%)
Processed 300/399 frames (75.2%)
Processed 330/399 frames (82.7%)
Processed 330/399 frames (82.7%)
Processed 360/399 frames (90.2%)
Processed 360/399 frames (90.2%)
Processed 390/399 frames (97.7%)
Processed 390/399 frames (97.7%)

✓ Processing complete!
Output saved to: runs/bodyparts/amateur1.mp4

Body parts detected:
  - HEAD (green)
  - CHE