In [1]:
from pipelines.sampleframes import SampleFrames
import os.path as osp

In [2]:
ann_file = '../data/rawframes/annotations.txt'
root_dir = '../data/rawframes/test/'
rgb_prefix =  'img_{:05}.jpg'
flow_prefix = 'flow_{:05}.jpg'
depth_prefix = 'depth_{:05}.jpg'

In [3]:
results=dict()

## Load annotations

In [4]:
video_infos = []
with open(ann_file, 'r') as fin:
    for line in fin:
        line_split = line.strip().split()
        
        print(line_split)
        video_info = dict()
        video_info['video_path'] = osp.join(root_dir, line_split[0])
        video_info['start_index'] = 1
        video_info['total_frames'] = int(line_split[1])
        video_info['label'] = int(line_split[2])
        video_infos.append(video_info)

['01391', '41', '0']
['07973', '71', '1']
['13159', '55', '2']


In [5]:
video_infos

[{'video_path': '../data/rawframes/test/01391',
  'start_index': 1,
  'total_frames': 41,
  'label': 0},
 {'video_path': '../data/rawframes/test/07973',
  'start_index': 1,
  'total_frames': 71,
  'label': 1},
 {'video_path': '../data/rawframes/test/13159',
  'start_index': 1,
  'total_frames': 55,
  'label': 2}]

In [6]:
results.update(video_infos[0])

## Load Pose

In [10]:
def decode(line):
    line = [l for l in line.replace(',',"").split(' ') if l != '' and l != '\n']
    imgpath = line[0]
    line = line[1:]
    line = [float(l) for l in line]


    #line [imgpath [0], [y,x, conf][1], ...[4], ... [52], headlb[54], headrt,lhandlb[58], lhandrt, rhandlb[62], rhandrt, bboxlb[66], bboxrt[68] ]

    
    posepoints = line[0:51]
    head = line[51:55]
    lhand =line[55:59]
    rhand = line[59:63]
    bodybbox = line[63:]
    
    keypoints = ["nose",
            "left_eye",
            "right_eye",
            "left_ear",
            "right_ear",
            "left_shoulder",
            "right_shoulder",
             "left_elbow",
             "right_elbow",
             "left_wrist",
             "right_wrist",
             "left_hip",
             "right_hip",
             "left_knee",
             "right_knee",
             "left_ankle",
             "right_ankle"
            ]

    pose_values = dict()

    for i in range(0, 51, 3):
        pose_values[keypoints[i//3]] = dict(x = posepoints[i],
                                          y=posepoints[i+1],
                                          confidence=posepoints[i+2])
    
    return pose_values, head, lhand, rhand, bodybbox, imgpath

In [11]:
results

{'video_path': '../data/rawframes/test/01391',
 'start_index': 1,
 'total_frames': 41,
 'label': 0}

In [13]:
video_path = results['video_path']

In [14]:
pose_frames = dict()
with open(video_path + '/pose.txt', 'r') as fin:
    for line in fin:
        pose_values, head, lhand, rhand, bodybbox, imgpath = decode(line)
        pose_frames[imgpath] = dict(keypoints=pose_values,
                                   head=head,
                                   left_hand=lhand,
                                   right_hand=rhand,
                                   body_bbox=bodybbox,
                                   )
        
results['pose'] = pose_frames

In [17]:
imgpath[4:9]

'00041'

In [15]:
pose_frames

{'img_00001.jpg': {'keypoints': {'nose': {'x': 43.81553649902344,
    'y': 93.28892517089844,
    'confidence': 0.8509206771850586},
   'left_eye': {'x': 35.57920837402344,
    'y': 100.61471557617188,
    'confidence': 0.964339017868042},
   'right_eye': {'x': 36.213104248046875,
    'y': 84.92330932617188,
    'confidence': 0.9061140418052673},
   'left_ear': {'x': 39.73222351074219,
    'y': 112.42738342285156,
    'confidence': 0.9504300951957703},
   'right_ear': {'x': 41.08543395996094,
    'y': 74.89695739746094,
    'confidence': 0.9219939708709717},
   'left_shoulder': {'x': 87.77450561523438,
    'y': 135.4550323486328,
    'confidence': 0.8804111480712891},
   'right_shoulder': {'x': 83.13693237304688,
    'y': 56.477447509765625,
    'confidence': 0.8907977938652039},
   'left_elbow': {'x': 145.02542114257812,
    'y': 153.73004150390625,
    'confidence': 0.9272946119308472},
   'right_elbow': {'x': 131.3916778564453,
    'y': 46.6795654296875,
    'confidence': 1.01055240

In [None]:
from pipelines.sampleframes import SampleFrames

In [None]:
sampleframes = SampleFrames(clip_len=32,
                           frame_interval=2,
                           num_clips=1)

In [None]:
results.keys()

In [None]:
results

In [None]:
results = sampleframes(results)

In [None]:
results

## Load frames

In [None]:
from PIL import Image

In [None]:
frame_indices = results['frame_inds']

In [None]:
frame = frame_indices[0]

In [None]:
image = Image.open(osp.join(video_path, depth_prefix.format(frame)))

In [None]:
rgb_frames = []
flow_frames = []
depth_frames = []
cache = dict()

for frame in frame_indices:
    if frame not in cache:
        rgb_frame = Image.open(osp.join(video_path, rgb_prefix.format(frame)))
        depth_frame = Image.open(osp.join(video_path, depth_prefix.format(frame)))
        flow_frame = Image.open(osp.join(video_path, flow_prefix.format(frame)))
        
        # Add frames to cache
        cache[frame] = dict(rgb_frame=rgb_frame,
                           depth_frame=depth_frame,
                           flow_frame=flow_frame)
        
        rgb_frames.append(rgb_frame)
        depth_frames.append(depth_frame)
        flow_frames.append(flow_frame)
        
    else:
        rgb_frames.append(cache[frame][rgb_frame])
        depth_frames.append(cache[frame][depth_frame])
        flow_frames.append(cache[frame][flow_frame])
        
