In [100]:
import argparse
import logging
import time
import sys
import cv2
import numpy as np

from tf_pose.estimator import BodyPart
from tf_pose.estimator import TfPoseEstimator
from tf_pose.networks import get_graph_path, model_wh

In [101]:
logger = logging.getLogger('TfPoseEstimator-Video')
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter('[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)

fps_time = 0

In [102]:
def process_human_data(humans):
    
    if (len(humans)==0):
        return np.zeros(shape=(18,2))
                        
    feature = np.zeros(shape=(18,2))
    for i in range(18):
        if i not in humans[0].body_parts:
            feature[i] = [0, 0]
        else:
            feature[i] = [bp[i].x, bp[i].y]
    
    return feature
        

In [105]:
model_path='mobilenet_thin'
resolution = '320x240'
showBG=True
    
logger.debug('initialization %s : %s' % (model_path, get_graph_path(model_path)))
w, h = model_wh(resolution)
e = TfPoseEstimator(get_graph_path(model_path), target_size=(w, h))


video = '../action_dataset/PlayingPiano/v_PlayingPiano_g02_c01.avi'
cap = cv2.VideoCapture(video)

#---------------modified----------------#
num_frames = float(cap.get(cv2.CAP_PROP_FRAME_COUNT))
print ("All Frames: " ,num_frames)
cur_frames = 0.0
step = (num_frames / 20.0) 
#---------------modified----------------#

fourcc = cv2.VideoWriter_fourcc(*'XVID')
width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
resize_out_ratio = 8.0
print("Image Size: %d x %d" % (width, height)) 

single_video_features = np.array([])
if cap.isOpened() is False:
    print("Error opening video stream or file")

while (cap.isOpened()):   
    if(cur_frames >= num_frames):
        break

    frame_no = (cur_frames/num_frames)
    cap.set(1,frame_no)
    ret_val, image = cap.read()
    
    print("Frame no: ", frame_no)
    print ("Count: ", cur_frames)
    
    if ret_val == True:
        humans = e.inference(image, resize_to_default=(w > 0 and h > 0), upsample_size=resize_out_ratio)
        #print ("Frame numbers: ", cur_frames, humans)
        frame_feature = process_human_data(humans) 
        single_video_features = np.append(single_video_features, frame_feature) 
        
    cur_frames+=step
    if cv2.waitKey(1) == 27:
        break
print (single_video_features)
cv2.destroyAllWindows()
logger.debug('finished+')

[2018-07-25 11:15:50,932] [TfPoseEstimator-Video] [DEBUG] initialization mobilenet_thin : /Users/david/Documents/system_implemetation/tf-openpose/models/graph/mobilenet_thin/graph_opt.pb
[2018-07-25 11:15:50,932] [TfPoseEstimator-Video] [DEBUG] initialization mobilenet_thin : /Users/david/Documents/system_implemetation/tf-openpose/models/graph/mobilenet_thin/graph_opt.pb
[2018-07-25 11:15:50,932] [TfPoseEstimator-Video] [DEBUG] initialization mobilenet_thin : /Users/david/Documents/system_implemetation/tf-openpose/models/graph/mobilenet_thin/graph_opt.pb
[2018-07-25 11:15:50,932] [TfPoseEstimator-Video] [DEBUG] initialization mobilenet_thin : /Users/david/Documents/system_implemetation/tf-openpose/models/graph/mobilenet_thin/graph_opt.pb
[2018-07-25 11:15:50,940] [TfPoseEstimator] [INFO] loading graph from /Users/david/Documents/system_implemetation/tf-openpose/models/graph/mobilenet_thin/graph_opt.pb(default size=320x240)


All Frames:  225.0
Image Size: 320 x 240
Frame no:  0.0
Count:  0.0
Frame no:  0.05
Count:  11.25
Frame no:  0.1
Count:  22.5
Frame no:  0.15
Count:  33.75
Frame no:  0.2
Count:  45.0
Frame no:  0.25
Count:  56.25
Frame no:  0.3
Count:  67.5
Frame no:  0.35
Count:  78.75
Frame no:  0.4
Count:  90.0
Frame no:  0.45
Count:  101.25
Frame no:  0.5
Count:  112.5
Frame no:  0.55
Count:  123.75
Frame no:  0.6
Count:  135.0
Frame no:  0.65
Count:  146.25
Frame no:  0.7
Count:  157.5
Frame no:  0.75
Count:  168.75
Frame no:  0.8
Count:  180.0
Frame no:  0.85
Count:  191.25
Frame no:  0.9
Count:  202.5
Frame no:  0.95
Count:  213.75


[2018-07-25 11:16:43,161] [TfPoseEstimator-Video] [DEBUG] finished+
[2018-07-25 11:16:43,161] [TfPoseEstimator-Video] [DEBUG] finished+
[2018-07-25 11:16:43,161] [TfPoseEstimator-Video] [DEBUG] finished+
[2018-07-25 11:16:43,161] [TfPoseEstimator-Video] [DEBUG] finished+


[0.475      0.22916667 0.403125   0.42916667 0.290625   0.40833333
 0.08125    0.5125     0.         0.         0.528125   0.47083333
 0.515625   0.72916667 0.         0.         0.3125     0.8125
 0.         0.         0.         0.         0.459375   0.82916667
 0.         0.         0.         0.         0.440625   0.19166667
 0.49375    0.19166667 0.         0.         0.509375   0.20833333
 0.475      0.22916667 0.403125   0.42916667 0.290625   0.40833333
 0.08125    0.5125     0.         0.         0.528125   0.47083333
 0.515625   0.72916667 0.         0.         0.3125     0.8125
 0.         0.         0.         0.         0.459375   0.82916667
 0.         0.         0.         0.         0.440625   0.19166667
 0.49375    0.19166667 0.         0.         0.509375   0.20833333
 0.475      0.22916667 0.403125   0.42916667 0.290625   0.40833333
 0.08125    0.5125     0.         0.         0.528125   0.47083333
 0.515625   0.72916667 0.         0.         0.3125     0.8125
 0.    

In [27]:
humans

[BodyPart:0-(0.47, 0.23) score=0.73 BodyPart:1-(0.40, 0.43) score=0.65 BodyPart:2-(0.29, 0.41) score=0.58 BodyPart:3-(0.08, 0.51) score=0.69 BodyPart:4-(0.19, 0.70) score=0.73 BodyPart:5-(0.53, 0.47) score=0.63 BodyPart:6-(0.52, 0.73) score=0.38 BodyPart:8-(0.31, 0.81) score=0.12 BodyPart:11-(0.46, 0.83) score=0.20 BodyPart:14-(0.44, 0.19) score=0.77 BodyPart:15-(0.49, 0.19) score=0.86 BodyPart:16-(0.38, 0.22) score=0.79 BodyPart:17-(0.51, 0.21) score=0.08]

In [54]:
humans[0].body_parts[0].x

0.475

In [82]:


feature = np.zeros(shape=(18,2))
for i in range(18):
    if i not in humans[0].body_parts:
        feature[i] = [0, 0]
    else:
        feature[i] = [bp[i].x, bp[i].y]
feature = list(feature)

print(feature[0])


[0.475      0.22916667]


In [104]:
print(len(single_video_features))

720


In [None]:
10*[18, 2]

train_x[] train_y[label]