# Mediapipe inference on static Yoga pose image dataset

In [10]:
# Importing libs for the project

import json
import os
from pathlib import Path
import cv2
import mediapipe as mp
import numpy as np
from sklearn.metrics import pairwise_distances

# Initialize MediaPipe Pose
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=True, min_detection_confidence=0.5)

I0000 00:00:1723675656.289234    5921 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1723675656.313953    6199 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 535.183.01), renderer: NVIDIA GeForce RTX 3070/PCIe/SSE2


## Dataset pre-processing

In [11]:
# Access dataset
def load_coco_annotations(json_path):
    with open(json_path, 'r') as f:
        data = json.load(f)
    return data

# OpenCV standard img processing
def process_image(image_path):
    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = pose.process(image_rgb)
    return results

# Dataset conversion and processing
def coco_keypoints_to_mediapipe(kpts, image_width, image_height):
    mediapipe_kpts = []
    for i in range(0, len(kpts), 3):
        kpx = kpts[i] / image_width
        kpy = kpts[i + 1] / image_height
        v = kpts[i + 2]  # visibility
        #Extracting keypoints
        mediapipe_kpts.append((kpx, kpy, v))
    return mediapipe_kpts

## Defining metrics

In [12]:
#Object Keypoint Similarity
def calculate_oks(y_true, y_pred, bbox_area, sigmas=None):
    if sigmas is None:
        #Values are taken from COCO database (constants)
        sigmas = np.array([0.26, 0.25, 0.25, 0.35, 0.35, 0.79, 0.79, 0.72, 0.72, 0.62, 0.62, 0.79, 0.79, 0.72, 0.72, 0.62, 0.62])
    
    y_true = np.array(y_true).reshape(-1, 2)
    y_pred = np.array(y_pred).reshape(-1, 2)
    d = pairwise_distances(y_true, y_pred)  #euclidian distance
    # Shows similarity between predicted keypoints and ground truth
    oks = np.exp(-d ** 2 / (2 * bbox_area * (sigmas ** 2)))
    return np.mean(oks)

#Percentage of Correct Parts
def calculate_pcp(y_true, y_pred, thresh=0.5):
    correct_parts = 0
    total_parts = len(y_true) // 2  #A part or full joint is considered as two keypoints
    for i in range(total_parts):
        if np.linalg.norm(np.array(y_true[i]) - np.array(y_pred[i])) < thresh:
            correct_parts += 1
    #Evaluates how many full joints are being correctly predicted compared to the entire body
    return correct_parts / total_parts

## Applying model

In [13]:
def process_dataset(coco_json_path, image_dir):
    annotations = load_coco_annotations(coco_json_path)
    image_id_to_annotations = {ann['image_id']: ann for ann in annotations['annotations']}
    oks_scores = []
    pcp_scores = []

    for image_info in annotations['images']:
        image_id = image_info['id']
        image_path = os.path.join(image_dir, image_info['file_name'])
        image_width = image_info['width']
        image_height = image_info['height']

        results = process_image(image_path)

        # COCO ground truth keypoints
        gt_ann = image_id_to_annotations[image_id]
        gt_keypoints = coco_keypoints_to_mediapipe(gt_ann['keypoints'], image_width, image_height)

        # MediaPipe keypoints
        if results.pose_landmarks:
            mp_keypoints = []
            for landmark in results.pose_landmarks.landmark:
                mp_keypoints.append((landmark.x, landmark.y))
        else:
            mp_keypoints = [(0, 0) for _ in range(len(gt_keypoints))]  # default to zero if no keypoints detected

        # Calculate OKS
        oks = calculate_oks(gt_keypoints, mp_keypoints, bbox_area=gt_ann['area'])
        oks_scores.append(oks)

        # Calculate PCP
        pcp = calculate_pcp(gt_keypoints, mp_keypoints)
        pcp_scores.append(pcp)

    return np.mean(oks_scores), np.mean(pcp_scores)

## Results

In [14]:
# Accessing dataset
coco_train_json = 'yogapose-dataset/yogapose-dataset/train/_annotations.coco.json'
coco_val_json = 'yogapose-dataset/yogapose-dataset/val/_annotations.coco.json'
coco_test_json = 'yogapose-dataset/yogapose-dataset/test/_annotations.coco.json'

image_train_dir = 'yogapose-dataset/yogapose-dataset/train'
image_val_dir = 'yogapose-dataset/yogapose-dataset/val'
image_test_dir = 'yogapose-dataset/yogapose-dataset/test'

#Training and validating
oks_train, pcp_train = process_dataset(coco_train_json, image_train_dir)
oks_val, pcp_val = process_dataset(coco_val_json, image_val_dir)
oks_test, pcp_test = process_dataset(coco_test_json, image_test_dir)

print(f"Train OKS: {oks_train:.4f}, PCP: {pcp_train:.4f}")
print(f"Validation OKS: {oks_val:.4f}, PCP: {pcp_val:.4f}")
print(f"Test OKS: {oks_test:.4f}, PCP: {pcp_test:.4f}")

KeyError: 'keypoints'

## Inference on image

In [16]:
import matplotlib.pyplot as plt

image_path = 'yogapose-dataset/yogapose-dataset/train/446_jpg.rf.81efb5629ce52cab0a0f6680e8b68ecf.jpg'  # Replace with the path to image
results = process_image(image_path)

image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
plt.imshow(image)

# Add keypoints to the image
for landmark in results.pose_landmarks.landmark:
    x = int(landmark.x * image.shape[1])
    y = int(landmark.y * image.shape[0])
    cv2.circle(image, (x, y), 5, (0, 255, 0), -1)

plt.axis('off')
plt.show()

[ WARN:0@496.377] global loadsave.cpp:248 findDecoder imread_('yogapose-dataset/yogapose-dataset/train/446_jpg.rf.81efb5629ce52cab0a0f6680e8b68ecf.jpg'): can't open/read file: check file path/integrity


error: OpenCV(4.9.0) /io/opencv/modules/imgproc/src/color.cpp:196: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'
