# OKD-NOKD Dataset and Pose Detection Use-Case
---
### Exploring the classification of catcher positioning (one-knee down vs. both knees down) by utilizing the pose of the catcher for datapoints with a classification model.




## Data Prep from Pose Points for OKD/NOKD Classification

- Load dataset OKD_NOKD
- Apply YOLO pose detection (using large v8 model) for keypoints
- Create dataframe for analysis


In [29]:
import cv2
import os
import pandas as pd
from ultralytics import YOLO
from scripts.load_tools import load_dataset


pose_model = YOLO("yolov8l-pose.pt")

load_dataset("datasets/yolo/OKD_NOKD.txt")

# input folder with OKD and NOKD classification folders
input_folder = "OKD_NOKD/data/"

# Create empty pose data list
pose_data = []

# columns for df
columns = ['filename', 'OKD', 'NOKD']
for i in range(17):  # Assuming 17 keypoints
    columns.extend([f'pose_x_{i}', f'pose_y_{i}'])

# Process each subfolder
for subfolder in ['OKD', 'NOKD']:
    subfolder_path = os.path.join(input_folder, subfolder)

    for filename in os.listdir(subfolder_path):
        if filename.lower().endswith(('.jpg', '.jpeg')):
            input_path = os.path.join(subfolder_path, filename)

            img = cv2.imread(input_path)
            if img is None:
                print(f"{input_path} failed to load.")
                continue

            # run pose with 10% confidence min threshold
            pose_results = pose_model(img, device='mps', verbose=False, conf=0.1)[0]
            pose_points = pose_results.keypoints[0].xyn[0].cpu().numpy().tolist()
            pose_points = [(float(x), float(y)) for x, y in pose_points]

            # Determine OKD or NOKD based on classification folder
            okd = 1 if subfolder == 'OKD' else 0
            nokd = 1 - okd

            row = [filename, okd, nokd]
            
            for i in range(17):
                if i < len(pose_points):
                    row.extend(pose_points[i])
                else:
                    row.extend([None, None])  # None if missing

            pose_data.append(row)

df = pd.DataFrame(pose_data, columns=columns)
print(f"Rows: {df.shape[0]} | Columns: {df.shape[1]}")
df.head()


Downloading Dataset: 100%|██████████| 113M/113M [00:07<00:00, 14.6MiB/s] 


Dataset downloaded and extracted to OKD_NOKD.
Rows: 2816 | Columns: 37


Unnamed: 0,filename,OKD,NOKD,pose_x_0,pose_y_0,pose_x_1,pose_y_1,pose_x_2,pose_y_2,pose_x_3,...,pose_x_12,pose_y_12,pose_x_13,pose_y_13,pose_x_14,pose_y_14,pose_x_15,pose_y_15,pose_x_16,pose_y_16
0,003301.jpg,1,0,0.790011,0.15346,0.802203,0.139449,0.784699,0.137306,0.833918,...,0.958046,0.394738,0.821571,0.58052,0.844192,0.546921,0.926096,0.731617,0.905766,0.720922
1,000608.jpg,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.723342,...,0.882876,0.382014,0.821561,0.536551,0.83279,0.51638,0.849846,0.721451,0.884939,0.651804
2,001270.jpg,1,0,0.72186,0.119212,0.731224,0.102129,0.0,0.0,0.772603,...,0.848121,0.374772,0.845932,0.504056,0.789903,0.536188,0.88247,0.693847,0.799509,0.663481
3,005002.jpg,1,0,0.402278,0.342783,0.418348,0.332589,0.397017,0.330485,0.444214,...,0.409693,0.593397,0.627751,0.542191,0.29084,0.633584,0.713278,0.650721,0.440318,0.643927
4,003473.jpg,1,0,0.810342,0.037531,0.824416,0.018822,0.795775,0.02702,0.86692,...,0.90513,0.312765,0.885468,0.516388,0.838246,0.506505,0.955514,0.730053,0.896856,0.700632


### Fill NaN values with 0, print value counts for OKD (should be even-split with 1408 of each class)

In [12]:
df.fillna(0, inplace=True)
df[['OKD']].value_counts()


OKD
0      1408
1      1408
Name: count, dtype: int64

## Train AutoML Classification Instance

### Explore different types of sci-kit learn models to find the best model for this specific use-case

- Create an 80/20 train/test split based on feature points and target OKD
- Train AutoML classifier for 6 minutes optimizing for accuracy
- Print Accuracy and Classification Report

In [14]:

from flaml import AutoML
from sklearn.model_selection import train_test_split
from sklearn.exceptions import ConvergenceWarning
from sklearn.metrics import accuracy_score, classification_report
import warnings

warnings.filterwarnings('ignore', category=ConvergenceWarning) #ignore warnings about iterations of non-converging models

features = df.drop(columns=['filename', 'OKD', 'NOKD']) #keypoint data
target = df['OKD'] #train to predict OKD

X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=22, stratify=target, shuffle=True) #80/20 train/test split

model = AutoML() 

model.fit(X_train, y_train, task='classification', metric='accuracy', time_budget=360) #train classifier for 6 minutes optimizing for accuracy


print(f"Estimator: {model.best_estimator}")
print(f"Config: {model.best_config}")

y_test_predict = model.predict(X_test)
test_accuracy = accuracy_score(y_test, y_test_predict)

print(f"Test Accuracy: {test_accuracy:.3f}")
print(f"Classification: {classification_report(y_test, y_test_predict)}")


[flaml.automl.logger: 09-27 14:48:04] {1680} INFO - task = classification
[flaml.automl.logger: 09-27 14:48:04] {1691} INFO - Evaluation method: cv
[flaml.automl.logger: 09-27 14:48:04] {1789} INFO - Minimizing error metric: 1-accuracy
[flaml.automl.logger: 09-27 14:48:04] {1901} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'xgboost', 'extra_tree', 'xgb_limitdepth', 'lrl1']
[flaml.automl.logger: 09-27 14:48:04] {2219} INFO - iteration 0, current learner lgbm
[flaml.automl.logger: 09-27 14:48:04] {2345} INFO - Estimated sufficient time budget=503s. Estimated necessary time budget=12s.
[flaml.automl.logger: 09-27 14:48:04] {2392} INFO -  at 0.1s,	estimator lgbm's best error=0.3148,	best estimator lgbm's best error=0.3148
[flaml.automl.logger: 09-27 14:48:04] {2219} INFO - iteration 1, current learner lgbm
[flaml.automl.logger: 09-27 14:48:04] {2392} INFO -  at 0.1s,	estimator lgbm's best error=0.3131,	best estimator lgbm's best error=0.3131
[flaml.automl.logger: 09-27 14:48:0

## Process Example Video to test for OKD Predictions

### Creates overall function for processing and predicting

- Load Pose, PHC, and Classifier models
- Process video in frame batches for quicker processing
- Predict OKD for a given frame (futue iterations need to identify where pitch starts)
- Save video with predictions (if given an output path)
- Print percentage of frames for video predicted as OKD


In [27]:
import numpy as np
from tqdm import tqdm
from scripts.load_tools import load_model
import matplotlib.pyplot as plt

warnings.filterwarnings("ignore", message=r".*X does not have valid feature names.*")


# create function to process individual video for OKD given models
def process_okd_video(video_path, pose_model, phc_model, model, output_path=None, batch_size=4) -> list[int]:
    cap = cv2.VideoCapture(video_path)
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    if output_path: # save video with predictions if output path is specified
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    okd_predictions = []
    frames = []
    
    for _ in tqdm(range(0, total_frames, batch_size), desc="Processing batches"): # process video in batches of frames for quicker processing
        batch_frames = []
        for _ in range(batch_size):
            ret, frame = cap.read()
            if not ret:
                break
            batch_frames.append(frame)
        
        if not batch_frames:
            break
        
        # perform detections on batch
        phc_results = phc_model(batch_frames, device='mps', verbose=False)
        
        for i, frame in enumerate(batch_frames):
            catcher_box = None
            for box in phc_results[i].boxes:
                cls = int(box.cls)
                if cls == 2:
                    catcher_box = box.xyxy[0].cpu().numpy() # extract catcher box coordinates
                    break
            
            if catcher_box is None:
                okd_predictions.append(0)
                frames.append(frame)
                continue
            
             # predict pose within catcher's box
            x1, y1, x2, y2 = map(int, catcher_box)
            catcher_frame = frame[y1:y2, x1:x2]
            pose_results = pose_model(catcher_frame, device='mps', verbose=False, conf=0.5)[0] 
            
            pose_points = []
            for keypoints in pose_results.keypoints:
                for point in keypoints.xyn[0].cpu().numpy():
                    pose_points.extend(point)


            # pad pose points for expected length
            pose_points = pose_points[:34] + [0] * (34 - len(pose_points))
            
            okd_pred = model.predict(np.array(pose_points).reshape(1, -1))[0] # predict with classifier model
            okd_predictions.append(okd_pred)
            
            if output_path:
                cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
                cv2.putText(frame, "Catcher", (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
                cv2.putText(frame, f"OKD: {'Yes' if okd_pred == 1 else 'No'}", (10, 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0) if okd_pred == 1 else (0, 0, 255), 2)
            
            frames.append(frame)
    
    if output_path:
        for frame in frames:
            out.write(frame)
    
    cap.release()
    if output_path:
        out.release()
    
    return okd_predictions


pose_model = YOLO("yolov8l-pose.pt")
phc_model = YOLO(load_model("phc_detector"))

video_path = "assets/example_broadcast_video.mp4"
output_path = "test_okd.mp4"

okd_predictions = process_okd_video(video_path, pose_model, phc_model, model, output_path, batch_size=4)

okd_count = sum(okd_predictions)
total_frames = len(okd_predictions)

print(f"Predicted OKD in {okd_count/total_frames:.1%} of {total_frames} frames.")




Model found at models/pitcher_hitter_catcher_detector/model_weights/pitcher_hitter_catcher_detector_v3.pt


Processing batches:   0%|          | 0/98 [00:00<?, ?it/s]



Processing batches: 100%|██████████| 98/98 [00:52<00:00,  1.85it/s]


Predicted OKD in 71.7% of 392 frames.


##**CONGRATS!** You utilized the OKD / NOKD datase and pose estimation to train a classifier to predict if a catcher is in a one-knee down position!

### The classifier model and it's relavent information can be found in the models/okd_nokd_classifier folder.