https://github.com/open-mmlab/mmpose/blob/main/docs/en/user_guides/inference.md

The inferencer is capable of processing a range of input types, which includes the following:

A path to an image
A path to a video
A path to a folder (which will cause all images in that folder to be inferred)
An image array (NA for CLI tool)
A list of image arrays (NA for CLI tool)
A webcam (in which case the input parameter should be set to either 'webcam' or 'webcam:{CAMERA_ID}')

## Step 1: Install dependencies

In [None]:
# Check Pytorch installation
import torch, torchvision

print('torch version:', torch.__version__, torch.cuda.is_available(), torch.backends.mps.is_available())
print('torchvision version:', torchvision.__version__)

# Check MMPose installation
import mmpose

print('mmpose version:', mmpose.__version__)

# Check mmcv installation
from mmcv.ops import get_compiling_cuda_version, get_compiler_version

print('cuda version:', get_compiling_cuda_version())
print('compiler information:', get_compiler_version())

from mmpose.apis import MMPoseInferencer

import glob
from datetime import datetime
import json

## Step 2: Read in video files

In [None]:
PATH = "C:\\awilde\\britta\\LTU\\SkiingProject\\SkiersProject\\Data"
RESULT_PATH = "./resultsMMPose/video_output"
PATH_ANNO = RESULT_PATH + "/predictions"

video_files = glob.glob(PATH + "\\*.mp4")
print(len(video_files))

## Step 3: Use MMPose Inferencer to predict keypoints for each video

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Can't use MPS not implemented
print(f"Using device: {device}")

# instantiate the inferencer using the model alias
inferencer = MMPoseInferencer('human', device=device)

for video_path in video_files:

    # The MMPoseInferencer API employs a lazy inference approach,
    # creating a prediction generator when given input
    result_generator = inferencer(video_path, show=True, out_dir=RESULT_PATH)
    results = [result for result in result_generator]

## Step 4: Create Annotations in Coco format

In [None]:
# function to convert json file that we get out of MMPose into Coco format
# only use keypoints where the average score is over 50 %
def convert_to_coco_format(input_file, output_file, score_threshold=0.5):
    with open(input_file, 'r') as f:
        data = json.load(f)

    coco_format = {
        "info": {
            "description": "Skiers dataset",
            "url": "",  # Add a relevant link if available
            "version": "1.0",
            "year": datetime.now().year,
            "contributor": "LTU",
            "date_created": datetime.now().strftime("%Y-%m-%d")
        },
        "licenses": [
            {
                "id": 0,
                "name": "",
                "url": ""
            }
        ],
        "images": [],
        "annotations": [],
        "categories": []
    }

    # Define categories (e.g., "person")
    category_id = 1
    coco_format["categories"].append({
        "id": category_id,
        "name": "body",
        "supercategory": "",
        "keypoints": [
            "1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17"
        ],
        "skeleton": [
            [1,12],[1,2],[10,8],[13,1],[6,1],[15,13],[4,2],[14,16],[7,9],[12,14],[1,7],[8,6],[17,15],[9,11],[1,3],[3,5]
        ]
    })

    annotation_id = 1
    for frame in data:
        frame_id = frame["frame_id"]

        # Add image info
        coco_format["images"].append({
            "id": frame_id,
            "file_name": f"frame_{frame_id:06d}.png",
            "width": 1920,  # Update with your video resolution
            "height": 1080,  # Update with your video resolution
            "license" : 0,
            "flickr_url" : "",
            "coco_url" : "",
            "date_captured" : 0
        })

        for instance in frame["instances"]:
            scores = instance.get("keypoint_scores", [])
            if len(scores) == 0 or sum(scores) / len(scores) < score_threshold:
                # Skip keypoints for this instance if average score is below threshold
                continue
            
            keypoints = []
            bbox = instance.get("bbox", [[0, 0, 0, 0]])[0]  # Assuming single bbox per instance

            # Flatten keypoints into COCO format [x1, y1, v1, x2, y2, v2, ...]
            for i, (x, y) in enumerate(instance["keypoints"]):
                v = 1 if scores[i] > 0 else 0  # Visibility based on score (visible or not)
                keypoints.extend([x, y, v])

            # Add annotation
            coco_format["annotations"].append({
                "id": annotation_id,
                "image_id": frame_id,
                "category_id": category_id,
                "bbox": [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]],  # x, y, width, height
                "area": (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]),
                "keypoints": keypoints,
                "num_keypoints": sum([1 for v in keypoints[2::3] if v > 0]),
                "iscrowd": 0,
                "segmentation": [] 
            })
            annotation_id += 1

    # Write to output file
    with open(output_file, 'w') as f:
        json.dump(coco_format, f, indent=4)


In [None]:
import os

OUTPUT_FOLDER = "/Users/emillundin/Desktop/Ski_project/output_coco_format"
# create folder if it does not exist
if not os.path.exists(OUTPUT_FOLDER):
   os.makedirs(OUTPUT_FOLDER)
   
annotation_files = glob.glob(PATH_ANNO + "/*.json")
print(len(annotation_files))

for anno in annotation_files:
    filename = os.path.basename(anno).split(".")[0] + "_coco.json"
    output_path = os.path.join(OUTPUT_FOLDER, filename)
    convert_to_coco_format(anno, output_path)


## Create skeleton gif

In [None]:
import matplotlib.pyplot as plt
import json
from PIL import Image
import shutil


# Function to extract keypoints from JSON
def get_keypoints(frame_index, data):
    annotation = data['annotations'][frame_index]
    keypoints = annotation['keypoints']
    keypoints_xy = [
        (1920 - keypoints[i], 1080 - keypoints[i + 1])  # Reverse the coordinates for your format
        for i in range(0, len(keypoints), 3)
    ]
    return keypoints_xy

# Function to plot a single skeleton frame
def plot_skeleton_frame(keypoints_xy, skeleton, save_path, frame_index):
    x_coords = [kp[0] for kp in keypoints_xy]
    y_coords = [kp[1] for kp in keypoints_xy]

    fig, ax = plt.subplots()
    ax.scatter(x_coords, y_coords, color='red')

    # Draw lines for the skeleton
    for start, end in skeleton:
        start_point = (x_coords[start], y_coords[start])
        end_point = (x_coords[end], y_coords[end])
        ax.plot([start_point[0], end_point[0]], [start_point[1], end_point[1]], 'k-', lw=2)

    ax.set_xlabel('X')
    ax.set_ylabel('Y')
    ax.set_title(f'Skeleton Frame {frame_index}')

    # Save the frame
    plt.savefig(save_path)
    plt.close(fig)

# Generate frames and save them as images
def generate_frames(starting_time,
                    ending_time, 
                    temp_folder_path, 
                    data, 
                    skeleton):
    
    os.makedirs(temp_folder_path, exist_ok=True)
    
    frame_indices = range(starting_time, ending_time, 1) 
    image_paths = []
    for i, frame_index in enumerate(frame_indices):
        keypoints_xy = get_keypoints(frame_index, data)
        image_path = temp_folder_path + f"/frame_{i}.png"
        plot_skeleton_frame(keypoints_xy, skeleton, image_path, frame_index)
        image_paths.append(image_path)
    return image_paths


def create_skeleton_gif(coco_json_path, 
                        skeleton,
                        starting_frame,
                        ending_frame,
                        temp_folder_path,
                        frames_per_second,
                        gif_file_path,
                        remove_frame_folder = False
                        ):
    # Load the JSON file
    with open(coco_json_path) as f:
        data = json.load(f)

    image_paths = generate_frames(starting_frame, ending_frame, temp_folder_path, data, skeleton)
    # Compile the frames into a GIF
    frames = [Image.open(img) for img in image_paths]
    frames[0].save(gif_file_path, save_all=True, append_images=frames[1:], duration=1000/frames_per_second, loop=0)
    if remove_frame_folder:
        shutil.rmtree(temp_folder_path)

In [None]:
coco_json_path = '/Users/emillundin/Desktop/Ski_project/output_coco_format/DJI_0001_coco.json'
# keypoints connections to create the skeleton
skeleton =  [
            [1,12],[1,2],[10,8],[13,1],[6,1],[15,13],[4,2],[14,16],[7,9],[12,14],[1,7],[8,6],[17,15],[9,11],[1,3],[3,5],
            [6,12], [12,13], [7, 13] # additional for more coherent lines
        ]
skeleton = [[x - 1, y - 1] for x, y in skeleton] # Convert skeleton from 1-based indexing to 0-based
starting_time = 26 # in seconds
ending_time = 32 # in seconds
frames_per_second = 30 # change depending on video frames per second
gif_frames_per_second = 15 # frames per second of the output gif
starting_frame = starting_time*frames_per_second
ending_frame = ending_time*frames_per_second
temp_folder_path = "skeleton_images"
gif_file_path = 'skeleton_animation.gif'

create_skeleton_gif(coco_json_path, 
                    skeleton,
                    starting_frame,
                    ending_frame,
                    temp_folder_path,
                    gif_frames_per_second,
                    gif_file_path,
                    remove_frame_folder=True)