# 统计某一种动作在视频指定时间段中出现的次数
说明：
1. 某一动作是指：get/put down/screw/pump其中之一，标签0-3
2. 指定时间段是由起始时间点和结束时间点确定。
3. 可能是视频+指定时间，也有可能是帧文件夹+帧索引

步骤（如果传入的是帧文件夹+ 帧索引）：
1. 传参：帧文件夹路径frames_path，开始帧索引start_frame_index，结束帧索引end_frame_index，滑动窗口的大小window_size，滑动窗口的step
2. 读入帧文件夹
3. 取\[start，end\]，start = start_frame_index, end = start+window_size > end_frame_index ? end_frame_index : start_frame_index+window_size 生成一个关节点/骨架pkl文件，
4. 将pkl送入动作分类模型进行推理，得到一个动作分类，添加到结果列表result_list中
5. 取\[start, end\], start = start + step, end = end + step > end_frame_index ? end_frame_index: end + step，生成一个关节点/骨架pkl文件， 重复第四步
6. 当start >= end_frame_index 时，停止循环
7. 统计result_list动作出现的次数。


# 优化思路
1. 根据开始帧-结束帧索引，直接生成完整的骨骼点pkl文件
2. 再滑动窗口，读取pkl文件，提取窗口内的骨骼点数据，重新生成子pkl文件，
3. 将子pkl文件做推理。

In [13]:
# const
# 根据关节点进行动作分类的配置文件和模型文件 
CONFIG_FILE = 'slowonly_r50_u48_240e_ntu60_xsub_keypoint_4labels.py'
# configfile = '/mmaction2/filling_exps/TimeSformer/timesformer.py'
CHECKPOINT_FILE = '../checkpoints/ntu60_keypoints_4labels.pth'

#pkl文件保存的路径
PKL_DIR = './pkl_dir'

In [14]:
# import
import os
import os.path as osp
from demo.demo_skeleton import  pose_inference, parse_args
import argparse
from mmcv import DictAction
import sys
import cv2
import numpy as np
import mmcv
import torch
from mmaction.apis import inference_recognizer, init_recognizer
import pickle
from tqdm import tqdm

try:
    from mmdet.apis import inference_detector, init_detector
except (ImportError, ModuleNotFoundError):
    raise ImportError('Failed to import `inference_detector` and '
                      '`init_detector` form `mmdet.apis`. These apis are '
                      'required in this demo! ')



In [15]:
sys.argv = ['../demo/demo_skeleton.py', '../demo/demo.mp4', '../demo/demo_ske.mp4']
# 暂时先用demo_ske的配置，后面改为自己的
args = parse_args()
# args.checkpoint = ''
# args.config = '../configs/skeleton/posec3d/slowonly_r50_u48_240e_ntu120_xsub_keypoint.py'
args.config ='slowonly_r50_u48_240e_ntu60_xsub_keypoint_4labels.py'
args.det_config = '../demo/faster_rcnn_r50_fpn_2x_coco_modify.py'
args.pose_config='../demo/hrnet_w32_coco_256x192.py'
args.label_map='../tools/data/skeleton/label_map_ntu120.txt'
args

Namespace(cfg_options={}, checkpoint='https://download.openmmlab.com/mmaction/skeleton/posec3d/slowonly_r50_u48_240e_ntu120_xsub_keypoint/slowonly_r50_u48_240e_ntu120_xsub_keypoint-6736b03f.pth', config='slowonly_r50_u48_240e_ntu60_xsub_keypoint_4labels.py', det_checkpoint='http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_bbox_mAP-0.384_20200504_210434-a5d8aa15.pth', det_config='../demo/faster_rcnn_r50_fpn_2x_coco_modify.py', det_score_thr=0.9, device='cuda:0', label_map='../tools/data/skeleton/label_map_ntu120.txt', out_filename='../demo/demo_ske.mp4', pose_checkpoint='https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192-c78dce93_20200708.pth', pose_config='../demo/hrnet_w32_coco_256x192.py', short_side=480, video='../demo/demo.mp4')

In [16]:
def check_folder(folder_path, start_frame_index, end_frame_index):
    '''
    传入的路径是否存在，是否是文件夹，是否包含帧图
    '''
    if not osp.exists(folder_path) or not osp.isdir(folder_path):
        return False
    files = os.listdir(folder_path)
    if 'img_00000.jpg' not in files:
        print('img_00000.jpg 不存在')
        return False
    index_str = "%05d" % start_frame_index
    if f'img_{index_str}.jpg' not in files:
        print(f'img_{index_str}.jpg 不存在')
        return False
    index_str = "%05d" % end_frame_index
    if f'img_{index_str}.jpg' not in files:
        print(f'img_{index_str}.jpg 不存在')
        return False
    return True

def check_frame_index(start_frame_index, end_frame_index):
    '''
    检查index参数是否合法
    '''
    if start_frame_index < 0 or end_frame_index < 0 or start_frame_index >= end_frame_index:
        return False
    return True

In [17]:
# print(check_folder('/home/fate/openmmlab/mmaction2/data/13621115_5_0', 5873, 7539))
# print(check_frame_index(5873, 7539))

In [18]:
def frame_extraction(folder_path, start, end):
    '''
    读取帧文件夹中从start-end的帧图，返回帧图的路径列表和帧数组
    '''
    frame_paths = []
    frames = []
    new_h, new_w = None, None
    for i in range(start, end):
        index = "%05d" % i
        filename = f'img_{index}.jpg'
        frame_path = osp.join(folder_path, filename)
        frame = cv2.imread(frame_path)

        if new_h is None:
            h, w, _ = frame.shape
            new_w, new_h = mmcv.rescale_size((w, h), (480, np.Inf))

        frame = mmcv.imresize(frame, (new_w, new_h))

        frames.append(frame)
        frame_paths.append(osp.join(folder_path, filename))

    return frame_paths, frames

# frame_paths, frames = frame_extraction('/home/fate/openmmlab/mmaction2/data/13621115_5_0', 5873, 7539)
# len(frame_paths), len(frames)

In [19]:

def detection_inference(args, frame_paths, batch_size = 1):
    """Detect human boxes given frame paths.
    修改的 demo_skeleton.detection_inference
    Args:
        args (argparse.Namespace): The arguments.
        frame_paths (list[str]): The paths of frames to do detection inference.

    Returns:
        list[np.ndarray]: The human detection results.
    """
    model = init_detector(args.det_config, args.det_checkpoint, args.device)
    assert model.CLASSES[0] == 'person', ('We require you to use a detector '
                                          'trained on COCO')
    results = []
    print('Performing Human Detection for each frame')
    prog_bar = mmcv.ProgressBar(len(frame_paths))
    if batch_size == 1:
        for frame_path in frame_paths:
            result = inference_detector(model, frame_path)
            # We only keep human detections with score larger than det_score_thr
            result = result[0][result[0][:, 4] >= args.det_score_thr]
            results.append(result)
            prog_bar.update()
    elif batch_size > 1:
        start = 0 
        while start < len(frame_paths):
            end = start + batch_size if start + batch_size < len(frame_paths) else len(frame_paths)
            slice_paths = frame_paths[start : end]
            results_part = inference_detector(model, slice_paths)
            for result in results_part:
                result = result[0][result[0][:, 4] >= args.det_score_thr]
                results.append(result)
            update_num = end - start
            start += batch_size
            prog_bar.update(update_num)

    return results

In [20]:
import os 
pth = '/home/fate/openmmlab/mmaction2/data/tmp'
filenames = os.listdir(pth)
filenames = [osp.join(pth, filename) for filename in filenames]
r1 = detection_inference(args, filenames, batch_size = 1)
r2 = detection_inference(args, filenames, batch_size = 2)
with open('r1.txt', 'w') as f:
    f.write(str(r1))
with open('r2.txt', 'w') as f:
    f.write(str(r2))

load checkpoint from http path: http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_bbox_mAP-0.384_20200504_210434-a5d8aa15.pth
Performing Human Detection for each frame
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 3/3, 25.0 task/s, elapsed: 0s, ETA:     0sload checkpoint from http path: http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_bbox_mAP-0.384_20200504_210434-a5d8aa15.pth
Performing Human Detection for each frame
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 3/3, 26.7 task/s, elapsed: 0s, ETA:     0s

In [21]:
def make_skeleton_by_frames_folder_with_start_and_end(folder_path, start, end, pkl_dir):
    '''
    生成骨架pkl文件，保存到pkl_dir中
    返回保存的文件名，文件名命名格式为：{start}.pkl命名
    '''
    if not osp.exists(pkl_dir):
        os.mkdir(pkl_dir)
    frame_paths, original_frames = frame_extraction(folder_path, start, end)
    num_frame = len(frame_paths)
    h, w, _ = original_frames[0].shape

    # Get clip_len, frame_interval and calculate center index of each clip
    config = mmcv.Config.fromfile(args.config)
    config.merge_from_dict(args.cfg_options)
    for component in config.data.test.pipeline:
        if component['type'] == 'PoseNormalize':
            component['mean'] = (w // 2, h // 2, .5)
            component['max_value'] = (w, h, 1.)

    # Get Human detection results
    det_results = detection_inference(args, frame_paths, batch_size= 32)
    torch.cuda.empty_cache()
    
    pose_results = pose_inference(args, frame_paths, det_results)
    torch.cuda.empty_cache()

    fake_anno = dict(
        frame_dir='',
        label=-1,
        img_shape=(h, w),
        original_shape=(h, w),
        start_index=0,
        modality='Pose',
        total_frames=num_frame)
    num_person = max([len(x) for x in pose_results])

    num_keypoint = 17
    keypoint = np.zeros((num_person, num_frame, num_keypoint, 2),
                        dtype=np.float16)
    keypoint_score = np.zeros((num_person, num_frame, num_keypoint),
                              dtype=np.float16)
    for i, poses in enumerate(pose_results):
        for j, pose in enumerate(poses):
            pose = pose['keypoints']
            keypoint[j, i] = pose[:, :2]
            keypoint_score[j, i] = pose[:, 2]
    fake_anno['keypoint'] = keypoint
    fake_anno['keypoint_score'] = keypoint_score

    out_anno_filename = osp.join(pkl_dir, f'{start}_{end}.pkl')
    print(f'\nsaving anno file: {out_anno_filename}')

    with open(out_anno_filename, 'wb') as f:
        pickle.dump(fake_anno, f)
    return out_anno_filename

# make_out_anno_filename = make_skeleton_by_frames_folder_with_start_and_end('/home/fate/openmmlab/data/13621115_5_0', 5873, 5874, '/home/fate/openmmlab/data/pkls')

# make_out_anno_filename

In [22]:
def count_the_number_by_pred_labels_list(y_preds:list, label:int):
    '''
    给出模型预测的动作label序列列表，和需要计算的统计的动作label，计算出现的次数
    计算次数的规则：连续出现的相同标签算做一次，各动作累计，
    例如动作标签列表如[0, 1, 1, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 2]，将连续的相同标签合并之后如
    [0,1,0,2,0,1,0,1,0,2]
    则label= 0时，返回5，label=1时返回3，label=2时返回2
    '''
    if len(y_preds) == 0:
        return -1
    last = y_preds[0]
    # seq = [y_preds[0]]
    count_map = {y_preds[0] : 1}
    for y_pred in y_preds:
        if last != y_pred:
            if y_pred in count_map:
                count_map[y_pred] += 1
            else:
                count_map[y_pred] = 1
        last = y_pred
            
    print(f'count_map:{count_map}')
    return count_map[label]

# l = [0, 1, 1, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 2]
# count_the_number_by_pred_labels_list(l, 1)


In [23]:
def count_the_number_of_times_a_certain_type_of_action_appears_in_the_frames_folder(
    folder_path: str,
    start_frame_index: int,
    end_frame_index: int,
    window_size: int,
    step: int,
    action_label: int
):
    '''
    folder_path: path of a folder which contains frames of a video,
    start_frame_index,
    end_frame_index,
    window_size: size of slide window
    step: slide step, 小于等于0，则step = window_size
    action_label: 4 actions type :['get','put down','screw', 'pump']
    '''
    # 0. check params
    if not check_folder(folder_path, start_frame_index, end_frame_index) or not check_frame_index(start_frame_index, end_frame_index):
        print("请检查参数")
        return
    if step <= 0:
        step = window_size
    start = start_frame_index
    y_preds = []
    print(f'start: {start_frame_index}, end_frame_index: {end_frame_index}; window_size: {window_size}; step: {step}')
    pkl_file_fullpath = make_skeleton_by_frames_folder_with_start_and_end(folder_path, start_frame_index, end_frame_index, PKL_DIR)
    with open(pkl_file_fullpath, "rb")as f:
        b = pickle.load(f) # read
    model = init_recognizer(CONFIG_FILE, CHECKPOINT_FILE, device='cuda:0')
    pbar = tqdm(total = end_frame_index - start_frame_index)
    while start <= end_frame_index:
        end = end_frame_index if start + window_size > end_frame_index else start + window_size
        # print(f'start:{start}; end:{end}')
        # pkl_file_fullpath = make_skeleton_by_frames_folder_with_start_and_end(folder_path, start, end, PKL_DIR)
        # 读取完整的pkl文件
        tmp = b.copy()
        tmp['keypoint'] = tmp['keypoint'][:, (start - start_frame_index): (end - start_frame_index), :, :]
        tmp['keypoint_score'] = tmp['keypoint_score'][:, (start - start_frame_index): (end - start_frame_index), :]
        tmp['total_frames'] = end - start
        
        results = inference_recognizer(model, tmp)
        y_pred = results[0][0]
        y_preds.append(y_pred)
            
        start += step
        pbar.update(step)
    pbar.close()
    # print(y_preds)
    return count_the_number_by_pred_labels_list(y_preds, action_label)


In [24]:
count_the_number_of_times_a_certain_type_of_action_appears_in_the_frames_folder(
    '/home/fate/openmmlab/mmaction2/data/13621115_5_0',
    5873,
    7539,
    30*2,
    30,
    1
)  ## 3min16s ## 2m5s

start: 5873, end_frame_index: 7539; window_size: 60; step: 30
load checkpoint from http path: http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_bbox_mAP-0.384_20200504_210434-a5d8aa15.pth
Performing Human Detection for each frame
[>>>>>>>>>>>>>>>>>>>>>>>>>>>] 1666/1666, 28.1 task/s, elapsed: 59s, ETA:     0sload checkpoint from http path: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192-c78dce93_20200708.pth
Performing Human Pose Estimation for each frame
[>>>>>>>>>>>>>>>>>>>>>>>>>>>] 1666/1666, 34.6 task/s, elapsed: 48s, ETA:     0s
saving anno file: ./pkl_dir/5873_7539.pkl
load checkpoint from local path: ../checkpoints/ntu60_keypoints_4labels.pth


1680it [00:30, 54.45it/s]                          

count_map:{0: 12, 1: 3, 3: 3, 2: 9}





3

1. 解决batch不同，推理结果不同的问题
2. 多找几个视频试试，
3. 其他的推理的batch也改大
4. 22机器数据备份