In [3]:
import cv2
import numpy as np
from ultralytics import YOLO
from collections import deque
import os

# Load models
player_model = YOLO('playerIdentificationModel.pt')  # player detection
pose_model = YOLO('yolov8n-pose.pt')  # pose estimation

# Knockdown memory for temporal consistency
knockdown_history = {}

def is_knockdown_from_pose(keypoints, threshold_ratio=0.3):
    try:
        nose = keypoints[0]
        left_ankle = keypoints[15]
        right_ankle = keypoints[16]
        
        if nose[2] < 0.3 or left_ankle[2] < 0.3 or right_ankle[2] < 0.3:
            return False
        
        avg_ankle_y = (left_ankle[1] + right_ankle[1]) / 2
        vertical_diff = nose[1] - avg_ankle_y
        
        return vertical_diff > -10  # nose nearly same level or lower than ankles
    except:
        return False

def is_knockdown_from_bbox(bbox, aspect_thresh=0.5):
    x1, y1, x2, y2 = bbox
    h = y2 - y1
    w = x2 - x1
    if w == 0: return False
    return (h / w) < aspect_thresh

def detect_knockdowns_in_video(video_path, output_path='knockdown_output.mp4'):
    cap = cv2.VideoCapture(video_path)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    frame_idx = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret: break

        # 1. Detect players
        player_results = player_model(frame)[0]
        knockdown_frame = False

        for box in player_results.boxes.xyxy:
            x1, y1, x2, y2 = map(int, box)
            player_crop = frame[y1:y2, x1:x2]

            # 2. Pose estimation on cropped region
            try:
                pose_result = pose_model(player_crop)[0]
                if len(pose_result.keypoints) > 0:
                    keypoints = pose_result.keypoints[0].cpu().numpy()
                    # Adjust coordinates relative to full frame
                    keypoints[:, 0] += x1
                    keypoints[:, 1] += y1

                    if is_knockdown_from_pose(keypoints):
                        knockdown_frame = True
                        cv2.putText(frame, "Knockdown (Pose)", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)

                else:
                    # Pose missing → fallback to bbox
                    if is_knockdown_from_bbox([x1, y1, x2, y2]):
                        knockdown_frame = True
                        cv2.putText(frame, "Knockdown (Fallback)", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)

            except Exception as e:
                # If pose fails completely
                if is_knockdown_from_bbox([x1, y1, x2, y2]):
                    knockdown_frame = True
                    cv2.putText(frame, "Knockdown (No Pose)", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)

            # Draw bbox
            cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)

        if knockdown_frame:
            cv2.putText(frame, "💥 Knockdown Detected", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 3)

        out.write(frame)
        frame_idx += 1

    cap.release()
    out.release()
    print(f"[✅] Knockdown video saved to: {output_path}")


In [None]:
detect_knockdowns_in_video("knd3.mp4")


0: 384x640 2 fighters, 97.8ms
Speed: 4.8ms preprocess, 97.8ms inference, 7.3ms postprocess per image at shape (1, 3, 384, 640)

0: 640x480 6 persons, 81.5ms
Speed: 2.5ms preprocess, 81.5ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 480)

0: 640x448 4 persons, 81.6ms
Speed: 2.5ms preprocess, 81.6ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 448)

0: 384x640 2 fighters, 51.3ms
Speed: 2.6ms preprocess, 51.3ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 640x480 5 persons, 75.4ms
Speed: 2.3ms preprocess, 75.4ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 480)

0: 640x480 6 persons, 70.9ms
Speed: 2.4ms preprocess, 70.9ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 480)

0: 384x640 2 fighters, 56.4ms
Speed: 2.5ms preprocess, 56.4ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 640x480 5 persons, 69.4ms
Speed: 2.4ms preprocess, 69.4ms inference, 1.2ms postprocess per image at shap

In [1]:
import cv2
import numpy as np
from ultralytics import YOLO
from collections import deque

# Load models
pose_model = YOLO("yolov8n-pose.pt")
player_model = YOLO("playerIdentificationModel.pt")

# Settings
KNOCKDOWN_FRAME_THRESHOLD = 3
POSE_NOSE_IDX = 0
POSE_ANKLE_IDX = [15, 16]
knockdown_buffer = {}
player_history = {}

def is_knockdown_pose(keypoints):
    try:
        nose_y = keypoints[POSE_NOSE_IDX][1]
        ankle_y = max(keypoints[i][1] for i in POSE_ANKLE_IDX)
        return abs(nose_y - ankle_y) < 50
    except:
        return False

def is_knockdown_fallback(bbox, history):
    x, y, w, h = bbox
    aspect_ratio = h / (w + 1e-5)
    if aspect_ratio < 0.5 and history:
        prev_y = history[-1]
        if y - prev_y > 20:
            return True
    return False

def detect_knockdowns_with_tracking(video_path, output_path="knockdowns_output.mp4"):
    cap = cv2.VideoCapture(video_path)
    width  = int(cap.get(3))
    height = int(cap.get(4))
    fps    = int(cap.get(5))

    out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

    frame_idx = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        results = player_model(frame)
        players = results[0].boxes.xyxy.cpu().numpy()

        for i, box in enumerate(players):
            x1, y1, x2, y2 = box[:4]
            cx = int((x1 + x2) / 2)
            cy = int((y1 + y2) / 2)
            w = int(x2 - x1)
            h = int(y2 - y1)
            player_id = f"player_{i}"

            # Update player Y-history
            player_history.setdefault(player_id, deque(maxlen=5)).append(y1)

            # Crop player region
            player_crop = frame[int(y1):int(y2), int(x1):int(x2)]

            # Run pose detection
            pose_result = pose_model(player_crop)
            keypoints = pose_result[0].keypoints.xy.cpu().numpy()[0] if pose_result[0].keypoints else []

            # Knockdown logic
            knockdown = False
            if len(keypoints) > 0:
                knockdown = is_knockdown_pose(keypoints)

            # Fallback only if pose doesn't detect knockdown
            if not knockdown:
                knockdown = is_knockdown_fallback((x1, y1, w, h), list(player_history[player_id]))


            # Temporal buffer
            if knockdown:
                knockdown_buffer[player_id] = knockdown_buffer.get(player_id, 0) + 1
            else:
                knockdown_buffer[player_id] = 0

            # Confirmed knockdown
            if knockdown_buffer[player_id] >= KNOCKDOWN_FRAME_THRESHOLD:
                cv2.putText(frame, "KNOCKDOWN", (int(x1), int(y1)-10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 3)
            else:
                cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (255, 255, 0), 2)

        out.write(frame)
        frame_idx += 1

    cap.release()
    out.release()
    print("Video saved:", output_path)


In [2]:
detect_knockdowns_with_tracking("test_video.mp4","testoppp.mp4")


0: 384x640 1 fighter, 63.2ms
Speed: 4.2ms preprocess, 63.2ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 640x640 2 persons, 106.7ms
Speed: 6.1ms preprocess, 106.7ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 1 fighter, 59.6ms
Speed: 3.3ms preprocess, 59.6ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 640x640 4 persons, 93.4ms
Speed: 4.7ms preprocess, 93.4ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 1 fighter, 59.0ms
Speed: 3.5ms preprocess, 59.0ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 640x448 2 persons, 79.5ms
Speed: 2.0ms preprocess, 79.5ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 448)

0: 384x640 1 fighter, 56.8ms
Speed: 3.8ms preprocess, 56.8ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 640x448 1 person, 68.4ms
Speed: 2.1ms preprocess, 68.4ms inference, 1.2ms postprocess per image at shape 