## Preprocess FS Jump3D
- 2D data from DWposeDetector
- 3D data from json file formatted to h36m

In [None]:
import cv2
import numpy as np
import sys
import os
import glob

sys.path.append(os.path.dirname(os.path.dirname(os.getcwd())))

from tqdm import tqdm
from dwpose.scripts.dwpose import DWposeDetector
from dwpose.scripts.tool import read_frames
from PIL import Image
import torch
import warnings
import json
from ultralytics import YOLO

from pathlib import Path

warnings.filterwarnings("ignore")

In [None]:
os.environ["TORCH_USE_CUDA_DSA"] = "1"

In [3]:
def load_json(json_file: str) -> dict:
    with open(json_file, "r") as f:
        return json.load(f)

In [None]:
frame_range = load_json("./ranges.json")

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
detector = DWposeDetector(
    det_config="D:\\github\\skating-ai\\v3\\pose\\dwpose\\config\\yolox_l_8xb8-300e_coco.py",
    # det_ckpt = args.yolox_ckpt,
    pose_config="D:\\github\\skating-ai\\v3\\pose\\dwpose\\config\\dwpose-l_384x288.py",
    # pose_ckpt = args.dwpose_ckpt,
    keypoints_only=True,
)
detector = detector.to(device)

Loads checkpoint by http backend from path: https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth
Loads checkpoint by http backend from path: https://huggingface.co/wanghaofan/dw-ll_ucoco_384/resolve/main/dw-ll_ucoco_384.pth


In [6]:
device

device(type='cuda', index=0)

In [None]:
yolo_model = YOLO("yolov8n.pt")

In [None]:
def detect_person(frame, num_person):
    yolo_results = yolo_model(frame, classes=[0], verbose=False, device="cpu")
    person_idx = -1
    if len(yolo_results[0].boxes) > 0:
        largest_area = 0
        for i, box in enumerate(yolo_results[0].boxes):
            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
            area = (x2 - x1) * (y2 - y1)
            if area > largest_area:
                largest_area = area
                person_idx = min(i, num_person - 1)
    return person_idx

In [None]:
def estimate2d(video_path, detector):
    path_parts = Path(video_path).parts
    skater = path_parts[4].lower()
    jump = Path(video_path).stem.lower()

    range_key = f"{skater}-{jump}"
    start = int(frame_range[range_key]["start"])
    end = int(frame_range[range_key]["end"]) + 1

    frames = read_frames(video_path)[start:end]

    num_frames = len(frames)
    kpts2d = []
    score2d = []
    kpts2d.reserve(num_frames) if hasattr(kpts2d, "reserve") else None
    score2d.reserve(num_frames) if hasattr(score2d, "reserve") else None

    person_idx = 0

    for idx, frame in enumerate(frames):
        pose = detector(frame)
        candidate = pose["bodies"]["candidate"]
        subset = pose["bodies"]["subset"]
        num_person = subset.shape[0]

        if num_person == 0:
            break

        num_joints = subset.shape[1]
        keypoint = candidate.reshape(num_person, num_joints, 2)

        if num_person == 1:
            kpts2d.append(keypoint[0, 1:])
            score2d.append(subset[0, 1:])
            continue

        if idx == 0:
            person_idx = detect_person(frame, num_person)

        if person_idx >= num_person:
            person_idx = detect_person(frame, num_person)

        kpts2d.append(keypoint[person_idx, 1:])
        score2d.append(subset[person_idx, 1:])

    kpts2d = np.array(kpts2d)
    score2d = np.array(score2d)

    keypoints = np.concatenate([kpts2d, score2d[..., np.newaxis]], axis=-1)

    return keypoints

In [None]:
def process_video(video_path):
    """Process a single video file"""
    # try:
    path_parts = Path(video_path).parts
    skater = path_parts[-3].lower()
    camera = path_parts[-2].lower()
    filename = Path(video_path).stem.lower()
    output_name = f"{skater}_{camera}_{filename}_2D.npy"

    output_path = os.path.join(output_dir, output_name)

    if not os.path.exists(output_path):
        keypoints = estimate2d(video_path, detector)
        np.save(output_path, keypoints)
    # except Exception as e:
    #    print(f"{video_path}: {str(e)}")

In [10]:
video_paths = glob.glob("D:\\github\\FS-Jump3D\\data\\**\\*.mp4", recursive=True)
output_dir = "D:\\github\\MotionAGFormer\\data\\keypoints"
os.makedirs(output_dir, exist_ok=True)

In [None]:
from concurrent.futures import ThreadPoolExecutor
from threading import Lock
from tqdm import tqdm

lock = Lock()


def locked_process_video(video_path):
    with lock:
        return process_video(video_path)


# Usage
with ThreadPoolExecutor(max_workers=4) as executor:
    futures = [executor.submit(locked_process_video, path) for path in video_paths]
    results = [future.result() for future in tqdm(futures, desc="Processing")]

Processing:   0%|                                                                                                      | 0/3036 [00:00<?, ?it/s]