In [1]:
# Copyright (c) OpenMMLab. All rights reserved.
import pickle
import argparse

import cv2
import mmcv
import mmengine
import torch
from mmengine import DictAction
from mmengine.utils import track_iter_progress

from mmaction.apis import (detection_inference,
                           init_recognizer, pose_inference)



In [2]:
def frame_extraction(video_path, short_side):
    """Extract frames given video_path.

    Args:
        video_path (str): The video_path.
    """
    # Load the video, extract frames into ./tmp/video_name
    target_dir = osp.join('./tmp', osp.splitext(video_path)[0])
    os.makedirs(target_dir, exist_ok=True)
    # Should be able to handle videos up to several hours
    frame_tmpl = osp.join(target_dir, 'img_{:06d}.jpg')
    vid = cv2.VideoCapture(video_path)
    frames = []
    frame_paths = []
    flag, frame = vid.read()
    cnt = 0
    new_h, new_w = None, None
    while flag:
        if new_h is None:
            h, w, _ = frame.shape
            new_w, new_h = mmcv.rescale_size((w, h), (short_side, np.Inf))

        frame = mmcv.imresize(frame, (new_w, new_h))

        frames.append(frame)
        frame_path = frame_tmpl.format(cnt + 1)
        frame_paths.append(frame_path)

        cv2.imwrite(frame_path, frame)
        cnt += 1
        flag, frame = vid.read()

    return frame_paths, frames

In [3]:
def parse_args():
    parser = argparse.ArgumentParser(description='MMAction2 demo')
    # parser.add_argument('video', help='video file/url')
    # parser.add_argument('out_filename', help='output filename')

    parser.add_argument(
        '--det-config',
        default='demo/demo_configs/faster-rcnn_r50_fpn_2x_coco_infer.py',
        help='human detection config file path (from mmdet)')
    parser.add_argument(
        '--det-checkpoint',
        default=('http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/'
                 'faster_rcnn_r50_fpn_2x_coco/'
                 'faster_rcnn_r50_fpn_2x_coco_'
                 'bbox_mAP-0.384_20200504_210434-a5d8aa15.pth'),
        help='human detection checkpoint file/url')
    parser.add_argument(
        '--det-score-thr',
        type=float,
        default=0.9,
        help='the threshold of human detection score')
    parser.add_argument(
        '--det-cat-id',
        type=int,
        default=0,
        help='the category id for human detection')
    
    parser.add_argument(
        '--pose-config',
        default='demo/demo_configs/'
        'td-hm_hrnet-w32_8xb64-210e_coco-256x192_infer.py',
        help='human pose estimation config file path (from mmpose)')
    parser.add_argument(
        '--pose-checkpoint',
        default=('https://download.openmmlab.com/mmpose/top_down/hrnet/'
                 'hrnet_w32_coco_256x192-c78dce93_20200708.pth'),
        help='human pose estimation checkpoint file/url')
    parser.add_argument(
        '--label-map',
        default='tools/data/skeleton/label_map_ntu60.txt',
        help='label map file')
    parser.add_argument(
        '--device', type=str, default='cuda:0', help='CPU/CUDA device option')
    

    args = parser.parse_args()
    return args

In [4]:
def main(video_idx):
    video_path = file_paths[video_idx]
    args = parse_args()
    # video_path = args.video
    frame_paths, original_frames = frame_extraction(video_path,
                                                    args.short_side)
    num_frame = len(frame_paths)
    h, w, _ = original_frames[0].shape

    tmp_frame_dir = osp.dirname(frame_paths[0])
    print(f"tmp_frame_dir: {tmp_frame_dir}")

    # Get Human detection results
    det_results = detection_inference(args, frame_paths)
    torch.cuda.empty_cache()

    pose_results = pose_inference(args, frame_paths, det_results)
    torch.cuda.empty_cache()

    fake_anno = dict(
        frame_dir='',
        label=-1,
        img_shape=(h, w),
        original_shape=(h, w),
        start_index=0,
        modality='Pose',
        total_frames=num_frame)

    num_person = max([len(x) for x in pose_results])
    # Current PoseC3D models are trained on COCO-keypoints (17 keypoints)
    num_keypoint = 17
    keypoint = np.zeros((num_person, num_frame, num_keypoint, 2),
                        dtype=np.float16)
    keypoint_score = np.zeros((num_person, num_frame, num_keypoint),
                              dtype=np.float16)
    for i, poses in enumerate(pose_results):
        for j, pose in enumerate(poses):
            pose = pose['keypoints']
            keypoint[j, i] = pose[:, :2]
            keypoint_score[j, i] = pose[:, 2]
    fake_anno['keypoint'] = keypoint
    fake_anno['keypoint_score'] = keypoint_score

       # save fake_anno to pkl
    with open(f'{tmp_frame_dir}/anno.pkl', 'wb') as f:
        pickle.dump(fake_anno, f)






In [5]:

import os

# List of folder names from 1 to 17
folder_names = [str(i) for i in range(0, 16)]

# Initialize an empty list to store the file paths
file_paths = []

# Iterate over each folder
for folder_name in folder_names:
    folder_path = f"SetA1_clip/{folder_name}"
    if os.path.isdir(folder_path):
        # Walk through the folder directory tree
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                file_path = os.path.join(root, file)
                file_paths.append(file_path)

# # Print the file paths
# for file_path in file_paths:
#     print(file_path)
#     main(file_path)

In [6]:
len(file_paths)

4644

In [8]:
from pathlib import Path
from tqdm import tqdm, auto
from multiprocessing import pool
num_files = len(file_paths)
start_idx = 0
end_idx = num_files
results = pool.ThreadPool(2).imap(main, list(range(start_idx, end_idx)))
pbar = auto.tqdm(results, total=end_idx - start_idx)
for _ in pbar:
    pass


  from .autonotebook import tqdm as notebook_tqdm


In [9]:
main(0)


usage: ipykernel_launcher.py [-h] [--det-config DET_CONFIG]
                             [--det-checkpoint DET_CHECKPOINT]
                             [--det-score-thr DET_SCORE_THR]
                             [--det-cat-id DET_CAT_ID]
                             [--pose-config POSE_CONFIG]
                             [--pose-checkpoint POSE_CHECKPOINT]
                             [--label-map LABEL_MAP] [--device DEVICE]
ipykernel_launcher.py: error: unrecognized arguments: -f /home/datnt114/.local/share/jupyter/runtime/kernel-f93025cf-6288-4d70-8f1c-8dc583c06587.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


usage: ipykernel_launcher.py [-h] [--det-config DET_CONFIG]
                             [--det-checkpoint DET_CHECKPOINT]
                             [--det-score-thr DET_SCORE_THR]
                             [--det-cat-id DET_CAT_ID]
                             [--pose-config POSE_CONFIG]
                             [--pose-checkpoint POSE_CHECKPOINT]
                             [--label-map LABEL_MAP] [--device DEVICE]
ipykernel_launcher.py: error: unrecognized arguments: -f /home/datnt114/.local/share/jupyter/runtime/kernel-247ad608-df91-4371-8748-603cff8448db.json
usage: ipykernel_launcher.py [-h] [--det-config DET_CONFIG]
                             [--det-checkpoint DET_CHECKPOINT]
                             [--det-score-thr DET_SCORE_THR]
                             [--det-cat-id DET_CAT_ID]
                             [--pose-config POSE_CONFIG]
                             [--pose-checkpoint POSE_CHECKPOINT]
                             [--label-map LABEL_MAP] [--d

KeyboardInterrupt: 