In [104]:
import json
from tqdm import tqdm
import os
from rekall import Interval, IntervalSet, IntervalSetMapping, Bounds3D
from vgrid import VGridSpec, VideoMetadata, VideoBlockFormat, FlatFormat
from vgrid import SpatialType_Keypoints, Metadata_Keypoints
from vgrid_jupyter import VGridWidget

# Load metadata for videos

Read from `/media/4tb_hdd/shared/goodvideo_lpch/file_metadata.json`.

In [105]:
with open('/media/4tb_hdd/shared/goodvideo_lpch/file_metadata.json', 'r') as f:
    video_files = json.load(f)
    
print('There are {} videos with metadata'.format(len(video_files)))

metadata_videos = [
    VideoMetadata(
        v["filename"], id=v["id"], fps=v["fps"],
        num_frames=int(v["num_frames"]), width=v["width"], height=v["height"])
    for v in video_files
]

There are 38 videos with metadata


# Load pose annotations

In [106]:
pose_annotation_files = []
with open('/home/danfu/eeg-video/pose_annotation_list.txt', 'r') as f:
    for line in f.readlines():
        pose_annotation_files.append(line.strip())

In [107]:
metadata_videos[0].path

'BA12305R.VOR/12305R00.mp4'

In [108]:
pose_annotation_files[0]

'/media/4tb_hdd/shared/goodvideo_lpch/BA12305R.VOR/12305R04.json'

In [113]:
def json_path_to_video_path(json_path):
    return os.path.join(
        os.path.basename(os.path.dirname(json_path)),
        os.path.basename(json_path)
    )[:-4]

In [120]:
def video_path_to_vm(video_path):
    for vm in metadata_videos:
        if vm.path[:-3] == video_path:
            return vm
    return None

In [121]:
def pose_annotation_to_array(pose_annotation):
    '''
    Assumes a format that looks like this for each frame:
    {
        '0': {'0': [0.5694444444444444, 0.5271739130434783]},
        '1': {'1': [0.4675925925925926, 0.7391304347826086]},
        '2': {'2': [0.2962962962962963, 0.7717391304347826]},
        '5': {'5': [0.6712962962962963, 0.6902173913043478]},
        '6': {'6': [0.7314814814814815, 0.9184782608695652]},
        '14': {'14': [0.5231481481481481, 0.45652173913043476]},
        '15': {'15': [0.6157407407407407, 0.483695652173913]},
        '16': {'16': [0.4027777777777778, 0.483695652173913]},
        '17': {'17': [0.6435185185185185, 0.5163043478260869]}
    }
    '''
    new_pose = []
    for i in range(18):
        if str(i) in pose:
            keypoint = pose[str(i)][str(i)]
            new_pose.append([keypoint[0], keypoint[1], 1])
        else:
            new_pose.append([0, 0, 0])
    return new_pose

In [125]:
pose_metadata = {}
for pose_annotation_file in pose_annotation_files:
    pose_intervals = []
    with open(pose_annotation_file, 'r') as f:
        stride = 10
        pose_annotations = [
            json.loads(line.strip())
            for line in f.readlines()
        ][::stride]
        
        video_meta = video_path_to_vm(
            json_path_to_video_path(pose_annotation_file)
        )
        
        if video_meta is None:
            continue
        
        for frame_number, pose_annotation in tqdm(enumerate(pose_annotations), total=len(pose_annotations)):
            start = (frame_number * stride) / video_meta.fps
            end = (frame_number + 1) * stride / video_meta.fps
            for pose in pose_annotation:
                pose_intervals.append(
                    Interval(
                        Bounds3D(start, end),
                        {
                            'spatial_type': SpatialType_Keypoints(),
                            'metadata': {
                                'pose': Metadata_Keypoints.from_openpose(
                                    pose_annotation_to_array(pose)
                                )
                            }
                        }
                    )
                )
        
        pose_metadata[video_meta.id] = IntervalSet(pose_intervals)

100%|██████████| 10516/10516 [00:00<00:00, 44308.37it/s]
100%|██████████| 10410/10410 [00:00<00:00, 26870.20it/s]
100%|██████████| 10341/10341 [00:00<00:00, 35958.24it/s]
100%|██████████| 10478/10478 [00:00<00:00, 33280.21it/s]
100%|██████████| 10532/10532 [00:00<00:00, 26693.47it/s]


In [126]:
pose_annotation_files

['/media/4tb_hdd/shared/goodvideo_lpch/BA12305R.VOR/12305R04.json',
 '/media/4tb_hdd/shared/goodvideo_lpch/BA12305R.VOR/12305R00.json',
 '/media/4tb_hdd/shared/goodvideo_lpch/BA12305R.VOR/12305R03.json',
 '/media/4tb_hdd/shared/goodvideo_lpch/BA12305R.VOR/12305R05.json',
 '/media/4tb_hdd/shared/goodvideo_lpch/BA12305R.VOR/12305R01.json',
 '/media/4tb_hdd/shared/goodvideo_lpch/BA12305R.VOR/12305R02.json']

In [127]:
vgrid_spec = VGridSpec(
    video_meta = metadata_videos,
    vis_format = VideoBlockFormat(imaps = [
        ('pose', IntervalSetMapping(pose_metadata))
    ]),
    video_endpoint = 'http://localhost:8080'
)
VGridWidget(vgrid_spec = vgrid_spec.to_json_compressed())

VGridWidget(vgrid_spec={'compressed': True, 'data': b'x\x9c\xec\xbd\xdb\x8e\xecL\x92\x9d\xf9*\x85\xffj\x04$\x0…