In [1]:
!pip install ffmpeg-python
!pip install "tensorflow-gpu<2" "dm-sonnet<2" "tensorflow-probability==0.7.0"
!pip install keras==2.3.1
!git clone https://github.com/deepmind/kinetics-i3d

Collecting ffmpeg-python
  Downloading https://files.pythonhosted.org/packages/d7/0c/56be52741f75bad4dc6555991fabd2e07b432d333da82c11ad701123888a/ffmpeg_python-0.2.0-py3-none-any.whl
Installing collected packages: ffmpeg-python
Successfully installed ffmpeg-python-0.2.0
Collecting tensorflow-gpu<2
[?25l  Downloading https://files.pythonhosted.org/packages/83/b1/9c0d6640eab34fae38f4dae6b312894f8bc1025b0876b3eae1fe11745a7b/tensorflow_gpu-1.15.4-cp36-cp36m-manylinux2010_x86_64.whl (411.0MB)
[K     |████████████████████████████████| 411.0MB 39kB/s 
[?25hCollecting dm-sonnet<2
[?25l  Downloading https://files.pythonhosted.org/packages/53/14/e221b910127bf4e2c19bc6d3b3e65a4e0104b90f7e98a3d428926474ece3/dm_sonnet-1.36-py3-none-any.whl (665kB)
[K     |████████████████████████████████| 665kB 43.0MB/s 
[?25hCollecting tensorflow-probability==0.7.0
[?25l  Downloading https://files.pythonhosted.org/packages/3e/3a/c10b6c22320531c774402ac7186d1b673374e2a9d12502cbc8d811e4601c/tensorflow_probabi

In [2]:
import os
from os.path import exists, join, basename, splitext

git_repo_url = 'https://github.com/CMU-Perceptual-Computing-Lab/openpose.git'
project_name = splitext(basename(git_repo_url))[0]
if not exists(project_name):
  # see: https://github.com/CMU-Perceptual-Computing-Lab/openpose/issues/949
  # install new CMake becaue of CUDA10
  !wget -q https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.tar.gz
  !tar xfz cmake-3.13.0-Linux-x86_64.tar.gz --strip-components=1 -C /usr/local
  # clone openpose
  !git clone -q --depth 1 $git_repo_url
  !sed -i 's/execute_process(COMMAND git checkout master WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}\/3rdparty\/caffe)/execute_process(COMMAND git checkout f019d0dfe86f49d1140961f8c7dec22130c83154 WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}\/3rdparty\/caffe)/g' openpose/CMakeLists.txt
  # install system dependencies
  !apt-get -qq install -y libatlas-base-dev libprotobuf-dev libleveldb-dev libsnappy-dev libhdf5-serial-dev protobuf-compiler libgflags-dev libgoogle-glog-dev liblmdb-dev opencl-headers ocl-icd-opencl-dev libviennacl-dev
  # install python dependencies
  !pip install -q youtube-dl
  # build openpose
  !cd openpose && rm -rf build || true && mkdir build && cd build && cmake .. && make -j`nproc`

Selecting previously unselected package libgflags2.2.
(Reading database ... 144865 files and directories currently installed.)
Preparing to unpack .../00-libgflags2.2_2.2.1-1_amd64.deb ...
Unpacking libgflags2.2 (2.2.1-1) ...
Selecting previously unselected package libgflags-dev.
Preparing to unpack .../01-libgflags-dev_2.2.1-1_amd64.deb ...
Unpacking libgflags-dev (2.2.1-1) ...
Selecting previously unselected package libgoogle-glog0v5.
Preparing to unpack .../02-libgoogle-glog0v5_0.3.5-1_amd64.deb ...
Unpacking libgoogle-glog0v5 (0.3.5-1) ...
Selecting previously unselected package libgoogle-glog-dev.
Preparing to unpack .../03-libgoogle-glog-dev_0.3.5-1_amd64.deb ...
Unpacking libgoogle-glog-dev (0.3.5-1) ...
Selecting previously unselected package libhdf5-serial-dev.
Preparing to unpack .../04-libhdf5-serial-dev_1.10.0-patch1+docs-4_all.deb ...
Unpacking libhdf5-serial-dev (1.10.0-patch1+docs-4) ...
Selecting previously unselected package libleveldb1v5:amd64.
Preparing to unpack ...

In [3]:
cd openpose

/content/openpose


In [4]:
!cp /content/kinetics-i3d/i3d.py i3d.py

In [9]:
import shutil
import os
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import shutil
import json
import numpy as np
import ffmpeg  
import math
import pickle
from scipy.special import softmax
import tensorflow as tf
import time
import i3d
import random
from keras import layers
from keras import models
from keras.models import Sequential, Model
from keras.layers.normalization import BatchNormalization
import sys

def extract_skeleton_sequences(video_dir):
  video_name=video_dir.split('/')[-1][:-4]
  openpose_dir='openpose_output/{}'.format(video_name)
  if os.path.exists(openpose_dir):
    shutil.rmtree(openpose_dir)
  os.makedirs(openpose_dir)
  !./build/examples/openpose/openpose.bin --video '{video_dir}' --write_json '{openpose_dir}' --display 0 --render_pose 0
  frames=os.listdir(openpose_dir)
  video=[]
  for frame in frames:
    with open(openpose_dir+'/'+frame) as f:
      data = json.load(f)
    if not data or not data['people'] or not data['people'][0] or not data['people'][0]['pose_keypoints_2d']:
        continue
    else: 
      if len(data['people'])==1:
        data=data['people'][0]['pose_keypoints_2d']
        joints=[] #collect x,y coordinate of joints from json file
        temp=[]
        for j in range(len(data)):
          if j%3==0 or j%3==1:
            temp.append(data[j])
          else:
            joints.append(temp)
            temp=[]
      else:
        joints=[]
        best_fault=float('inf')
        best_center=0
        best_prob=0
        for j in range(len(data['people'])):
          fault_cnt=0
          prob=0
          x_min=float('inf')
          x_max=float('-inf')
          data2=data['people'][j]['pose_keypoints_2d']
          joints2=[] #collect x,y coordinate of joints from json file
          temp=[]
          for k in range(len(data2)):
            if k%3==0 or k%3==1:
              temp.append(data2[k])
              if k%3==0:
                if data2[k]!=0:
                  x_min=min(x_min,data2[k])
                  x_max=max(x_max,data2[k])
            else:
              prob+=data2[k]
              if data2[k]==0:
                fault_cnt+=1
              joints2.append(temp)
              temp=[]
          center=(x_min+x_max)/2
          prob/=25-fault_cnt
          if fault_cnt<7 and fault_cnt-best_fault<=5 and abs(center-1920/2)<abs(best_center-1920/2) and prob>0.5 and best_prob-prob<0.15 and x_max<1820 and x_min>100:
            joints=joints2
            best_fault=fault_cnt
            best_center=center
            best_prob=prob
      if joints:
        video.append(joints)
  return video
def dist(p1,p2):
  x1,y1=p1
  x2,y2=p2
  return ((x1-x2)**2+(y1-y2)**2)**0.5
def ang(p1,p2):
  x1,y1=p1
  x2,y2=p2
  if x1==x2 and y1==y2:
    return (0,0)
  dist=((x1-x2)**2+(y1-y2)**2)**0.5
  return ((x1-x2)/dist,(y1-y2)/dist)
def generate_rnn_feature(skeleton_sequence):
  num_frames=24
  frames=skeleton_sequence
  distances=[]
  ls=[]
  for i in range(num_frames-1):
    distance=[]
    for j in range(25):
      if frames[len(frames)*i//num_frames][j]!=[0,0] and frames[len(frames)*(i+1)//num_frames][j]!=[0,0]:
        d=dist(frames[len(frames)*i//num_frames][j],frames[len(frames)*(i+1)//num_frames][j])
        distance.append(d)
        ls.append(d)
      else:
        distance.append(0)
    distances.append(distance)
  ls.sort()
  m=ls[-1]
  for i in range(num_frames-1):
    for j in range(25):
      if distances[i][j]>m:
        distances[i][j]=0
      else:
        distances[i][j]/=m
  output=distances
  for i in range(num_frames-1):
    angle=[]
    for j in range(25):
      if frames[len(frames)*i//num_frames][j]!=[0,0] and frames[len(frames)*(i+1)//num_frames][j]!=[0,0]:
        x,y=ang(frames[len(frames)*i//num_frames][j],frames[len(frames)*(i+1)//num_frames][j])
        angle.append(x)
        angle.append(y)
      else:
        angle.append(0)
        angle.append(0)
    output[i]+=angle
  for i in range(num_frames-1):
    angle=[]
    frame=frames[len(frames)*i//(num_frames-1)]
    for j in range(25):
      for k in range(j+1,25):
        if frame[j]!=[0,0] and frame[k]!=[0,0]:
          x,y=ang(frame[j],frame[k])
        else:
          x=0
          y=0
        angle.append(x)
        angle.append(y)
    output[i]+=angle
  for i in range(num_frames-1):
    dists=[]
    frame=frames[len(frames)*i//(num_frames-1)]
    for j in range(25):
      for k in range(j+1,25):
        if frame[j]!=[0,0] and frame[k]!=[0,0]:
          d=dist(frame[j],frame[k])
        else:
          d=0
        dists.append(d)
    m=max(dists)
    for j in range(len(dists)):
      dists[j]/=m
    output[i]+=dists
  return output
def create_model_rnn():
  model= models.Sequential()
  model.add(layers.Bidirectional(layers.CuDNNGRU(300,input_shape=(23,975),return_sequences=True)))
  model.add(layers.Bidirectional(layers.CuDNNGRU(300)))
  model.add(BatchNormalization())
  model.add(layers.Activation('relu'))
  model.add(layers.Dropout(0.25))
  model.add(layers.Dense(600, activation='relu'))
  model.add(layers.Dense(49, activation='softmax'))
  model.build((None,23,975))
  from keras import optimizers
  model.compile(loss='categorical_crossentropy',
  optimizer=optimizers.Adam(
    learning_rate=0.0001),
  metrics=['acc'])
  return model
def center_standardization(img):
  img=img.astype('float')
  mean=np.mean(img)
  std=np.std(img)
  for x in range(len(img)):
    for y in range(len(img[0])):
      for z in range(len(img[0,0])):
        img[x,y,z]=(img[x,y,z]-mean)/max(std,1/img.size)
  return img
def generate_RGB_feature(skeleton_sequence,video_dir):
  video=video_dir
  frames=skeleton_sequence
  x1=float('inf')
  y1=float('inf')
  x2=float('-inf')
  y2=float('-inf')
  joints=frames[len(frames)//2]
  for joint in joints:
    x,y=joint
    if not (x==0 and y==0):
      x1=min(x1,x)
      y1=min(y1,y)
      x2=max(x2,x)
      y2=max(y2,y)
  x1=int(x1)
  y1=int(y1)
  x2=int(x2)
  y2=int(y2)
  width=x2-x1
  height=y2-y1
  x1=max(0,x1-width//8)
  y1=max(0,y1-height//5)
  x2=min(1920,x2+width//8)
  y2=min(1080,y2+height//5)
  width=x2-x1
  height=y2-y1
  if width>height:
    diff=width-height
    if y1-diff//2<0:
      y2+=diff-y1
      y1=0
    elif y2+diff//2>1080:
      y1-=diff-(1080-y2)
      y2=1920
    else:
      y1-=diff//2
      y2+=diff//2
  elif width<height:
    diff=height-width
    if x1-diff//2<0:
      x2+=diff-x1
      x1=0
    elif x2+diff//2>1920:
      x1-=diff-(1920-x2)
      x2=1920
    else:
      x1-=diff//2
      x2+=diff//2
  cap = cv2.VideoCapture(video)
  frameCount = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
  cnt=0
  ret=True
  sequences=np.zeros((9,224,224,3))
  inde=0
  while cnt<frameCount and ret:
    ret,frame=cap.read()
    if ret:
      if cnt==frameCount*inde//9:
        frame=frame[y1:y2,x1:x2,:]
        frame=cv2.resize(frame,(224,224))
        frame=center_standardization(frame)
        sequences[inde]=frame
        inde+=1
        if inde==9:
          break
    cnt+=1
  cap.release()
  return sequences
def predict_RGB(RGB_feature):
  tf.reset_default_graph()
  _BATCH_SIZE = 1
  _CLIP_SIZE = 9
  _FRAME_SIZE = 224
  _CHECKPOINT_PATHS = {
      'rgb':'/content/_RGB_0.754_model-34986'
  }
  clip_holder=tf.placeholder(tf.float32,shape=(_BATCH_SIZE, _CLIP_SIZE, _FRAME_SIZE, _FRAME_SIZE, 3))
  with tf.variable_scope('RGB'):
    model = i3d.InceptionI3d()
    logits, _ = model(clip_holder, is_training=False,dropout_keep_prob=1)
    logits_dropout = tf.nn.dropout(logits, 1)
    fc_out = tf.layers.dense(logits_dropout, 49, use_bias=True)
  variable_map = {}
  for variable in tf.global_variables():
    tmp = variable.name.split('/')
    if tmp[0] == 'RGB':
      variable_map[variable.name.replace(':0', '')] = variable
  saver = tf.train.Saver(var_list=variable_map)
  sess = tf.Session()
  merged_summary = tf.summary.merge_all()
  sess.run(tf.global_variables_initializer())
  saver.restore(sess, _CHECKPOINT_PATHS['rgb'])
  fc_out2= sess.run(fc_out,feed_dict={clip_holder:np.array([RGB_feature])})
  preds=softmax(fc_out2,axis=1)[0]
  sess.close()
  return preds
def generate_Flow_feature(skeleton_sequence,video_dir):
  video=video_dir
  frames=skeleton_sequence
  x1=float('inf')
  y1=float('inf')
  x2=float('-inf')
  y2=float('-inf')
  joints=frames[len(frames)//2]
  for joint in joints:
    x,y=joint
    if not (x==0 and y==0):
      x1=min(x1,x)
      y1=min(y1,y)
      x2=max(x2,x)
      y2=max(y2,y)
  x1=int(x1)
  y1=int(y1)
  x2=int(x2)
  y2=int(y2)
  width=x2-x1
  height=y2-y1
  x1=max(0,x1-width//8)
  y1=max(0,y1-height//5)
  x2=min(1920,x2+width//8)
  y2=min(1080,y2+height//5)
  width=x2-x1
  height=y2-y1
  if width>height:
    diff=width-height
    if y1-diff//2<0:
      y2+=diff-y1
      y1=0
    elif y2+diff//2>1080:
      y1-=diff-(1080-y2)
      y2=1920
    else:
      y1-=diff//2
      y2+=diff//2
  elif width<height:
    diff=height-width
    if x1-diff//2<0:
      x2+=diff-x1
      x1=0
    elif x2+diff//2>1920:
      x1-=diff-(1920-x2)
      x2=1920
    else:
      x1-=diff//2
      x2+=diff//2
  cap = cv2.VideoCapture(video)
  frameCount = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
  cnt=0
  ret=True
  sequences=np.zeros((9,224,224,2))
  k=0
  inde=0
  optical_flow=cv2.optflow.createOptFlow_DualTVL1()
  while cnt<frameCount and ret:
    ret,frame=cap.read()
    if ret:
      if cnt==frameCount*inde//10:
        frame=frame[y1:y2,x1:x2]
        frame=cv2.resize(frame,(224,224))
        frame=cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        frame=frame.astype('uint8')
        if inde!=0:
          sequences[inde-1]=optical_flow.calc(old_frame, frame, None)
        inde+=1
        old_frame=frame
        if inde==10:
          break
    cnt+=1
  cap.release()
  return sequences
def predict_Flow(Flow_feature):
  tf.reset_default_graph()
  _BATCH_SIZE = 1
  _CLIP_SIZE = 9
  _FRAME_SIZE = 224
  _CHECKPOINT_PATHS = {
      'flow':'/content/_Flow_0.759_model-29988'
  }
  clip_holder=tf.placeholder(tf.float32,shape=(_BATCH_SIZE, _CLIP_SIZE, _FRAME_SIZE, _FRAME_SIZE, 2))
  with tf.variable_scope('Flow'):
    model = i3d.InceptionI3d()
    logits, _ = model(clip_holder, is_training=False,dropout_keep_prob=1)
    logits_dropout = tf.nn.dropout(logits, 1)
    fc_out = tf.layers.dense(logits_dropout, 49, use_bias=True)
  variable_map = {}
  for variable in tf.global_variables():
    tmp = variable.name.split('/')
    if tmp[0] == 'Flow':
      variable_map[variable.name.replace(':0', '')] = variable
  saver = tf.train.Saver(var_list=variable_map)
  sess = tf.Session()
  merged_summary = tf.summary.merge_all()
  sess.run(tf.global_variables_initializer())
  saver.restore(sess, _CHECKPOINT_PATHS['flow'])
  fc_out2= sess.run(fc_out,feed_dict={clip_holder:np.array([Flow_feature])})
  preds=softmax(fc_out2,axis=1)[0]
  sess.close()
  return preds
def predict_combine(video_dir):
  rnn_model=create_model_rnn()
  rnn_model.load_weights('/content/rnn_24.h5')
  skeleton_sequence=extract_skeleton_sequences(video_dir)
  rnn_feature=generate_rnn_feature(skeleton_sequence)
  rnn_prediction=rnn_model.predict(np.array([rnn_feature]))[0]
  RGB_feature=generate_RGB_feature(skeleton_sequence,video_dir)
  RGB_prediction=predict_RGB(RGB_feature)
  Flow_feature=generate_Flow_feature(skeleton_sequence,video_dir)
  Flow_prediction=predict_Flow(Flow_feature)
  combined_prediction=rnn_prediction+RGB_prediction+Flow_prediction
  print('Probability vector: ',combined_prediction)
  with open('/content/label_map.txt', 'rb') as f:
    label_map = pickle.load(f)
  label=label_map[np.argmax(combined_prediction)]
  print('label is {}'.format(label))
  return combined_prediction,label
video_dir='/content/S001C001P006R001A045_rgb.avi'
combined_prediction,label=predict_combine(video_dir)

Starting OpenPose demo...
Configuring OpenPose...
Starting thread(s)...
Auto-detecting all available GPUs... Detected 1 GPU(s), using 1 of them starting at GPU 0.
OpenPose demo successfully finished. Total time: 8.052764 seconds.
INFO:tensorflow:Restoring parameters from /content/_RGB_0.754_model-34986
INFO:tensorflow:Restoring parameters from /content/_Flow_0.759_model-29988
Probability vector:  [1.12175076e-04 5.01777837e-03 3.93991417e-04 7.97343552e-02
 8.62550177e-03 1.10647588e-05 1.37678808e-06 5.51613187e-03
 2.56613639e-05 3.35526565e-04 5.04559418e-03 4.84976582e-02
 3.43079650e-04 3.82935745e-04 5.18271583e-04 2.54493680e-05
 1.40895736e-05 1.71839815e-07 1.91327970e-04 5.97608505e-06
 3.40987113e-04 1.50512335e-06 1.78630289e-05 1.60642685e-06
 5.39506190e-02 3.87948489e-07 1.01760515e-05 2.09782476e-04
 2.23985151e-03 3.10586870e-01 1.08327229e-04 1.38224714e-04
 5.70704006e-02 9.28281224e-04 5.51462919e-03 6.75323140e-03
 2.60998495e-03 1.39021940e-05 2.42238700e-01 2.488

(array([1.12175076e-04, 5.01777837e-03, 3.93991417e-04, 7.97343552e-02,
        8.62550177e-03, 1.10647588e-05, 1.37678808e-06, 5.51613187e-03,
        2.56613639e-05, 3.35526565e-04, 5.04559418e-03, 4.84976582e-02,
        3.43079650e-04, 3.82935745e-04, 5.18271583e-04, 2.54493680e-05,
        1.40895736e-05, 1.71839815e-07, 1.91327970e-04, 5.97608505e-06,
        3.40987113e-04, 1.50512335e-06, 1.78630289e-05, 1.60642685e-06,
        5.39506190e-02, 3.87948489e-07, 1.01760515e-05, 2.09782476e-04,
        2.23985151e-03, 3.10586870e-01, 1.08327229e-04, 1.38224714e-04,
        5.70704006e-02, 9.28281224e-04, 5.51462919e-03, 6.75323140e-03,
        2.60998495e-03, 1.39021940e-05, 2.42238700e-01, 2.48855678e-04,
        1.68429466e-03, 3.55500561e-06, 6.98961085e-05, 1.24169510e-05,
        2.07905722e+00, 6.82784691e-02, 1.18195848e-03, 6.23669708e-03,
        5.69292204e-03], dtype=float32), 'chest pain')