In [6]:
import numpy as np
import math
import random
import pandas as pd
import os
import matplotlib.pyplot as plt
import cv2
import glob
from tqdm import tqdm
import pickle
import scipy.ndimage.interpolation as inter
from scipy.signal import medfilt 
from scipy.spatial.distance import cdist

from keras.optimizers import *
from keras.models import Model
from keras.layers import *
from keras.layers.core import *
from tensorflow.keras.callbacks import *
from keras.layers.convolutional import *
from keras import backend as K

import tensorflow as tf
# import tensorflow as tf
from tensorflow import keras
from sklearn import preprocessing

In [2]:
from utils import  AverageMeter, LevenshteinDistance, Queue

In [12]:
random.seed(1234)

class Config():
    def __init__(self):
        self.frame_l = 32 # the length of frames
        self.joint_n = 20 # the number of joints
        self.joint_d = 3 # the dimension of joints
        self.clc_num = 2 # the number of class, (= 8 if using subsets)
        self.feat_d = 190
        self.filters = 64
        self.nd = 60
C = Config()

In [13]:
def zoom(p,target_l=32,joints_num=20,joints_dim=3):
    l = p.shape[0]
    p_new = np.empty([target_l,joints_num,joints_dim]) 
    for m in range(joints_num):  # 20 khớp
        for n in range(joints_dim): # 3 tọa dộ x,y,z
            
            p[:,m,n] = medfilt(p[:,m,n],3)  # median filter, kernel_size = 3- shape vẫn thế 
            p_new[:,m,n] = inter.zoom(p[:,m,n],target_l/l)[:target_l]         
    return p_new

def sampling_frame(p,C):
    full_l = p.shape[0] # full length
    if random.uniform(0,1)<0.5: # aligment sampling
        valid_l = np.round(np.random.uniform(0.9,1)*full_l)
        s = random.randint(0, full_l-int(valid_l))
        e = s+valid_l # sample end point
        p = p[int(s):int(e),:,:]    
    else: # without aligment sampling
        valid_l = np.round(np.random.uniform(0.9,1)*full_l)
        index = np.sort(np.random.choice(range(0,full_l),int(valid_l),replace=False))
        p = p[index,:,:]
    p = zoom(p,C.frame_l,C.joint_n,C.joint_d)
    return p

from scipy.spatial.distance import cdist
def get_CG(p,C):
    M = []
    iu = np.triu_indices(C.joint_n,1,C.joint_n)
    for f in range(C.frame_l):
        #distance max 
        d_m = cdist(p[f],np.concatenate([p[f],np.zeros([1,C.joint_d])]),'euclidean')       
        d_m = d_m[iu] 
        M.append(d_m)
    M = np.stack(M)   
    return M

def norm_train(p):
    # normolize to start point, use the center for hand case
    # p[:,:,0] = p[:,:,0]-p[:,3:4,0]
    # p[:,:,1] = p[:,:,1]-p[:,3:4,1]
    # p[:,:,2] = p[:,:,2]-p[:,3:4,2]
    # # return p
       
    p[:,:,0] = p[:,:,0]-np.mean(p[:,:,0])
    p[:,:,1] = p[:,:,1]-np.mean(p[:,:,1])
    p[:,:,2] = p[:,:,2]-np.mean(p[:,:,2])
    return p
def norm_train2d(p):
    # normolize to start point, use the center for hand case
    # p[:,:,0] = p[:,:,0]-p[:,3:4,0]
    # p[:,:,1] = p[:,:,1]-p[:,3:4,1]
    # p[:,:,2] = p[:,:,2]-p[:,3:4,2]
    # # return p
       
    p[:,:,0] = p[:,:,0]-np.mean(p[:,:,0])
    p[:,:,1] = p[:,:,1]-np.mean(p[:,:,1])
    # p[:,:,2] = p[:,:,2]-np.mean(p[:,:,2])
    return p
# def normlize_test(p):
#     # normolize to start point, use the center for hand case
#     p[:,:,0] = p[:,:,0]-p[:,1:2,0]
#     p[:,:,1] = p[:,:,1]-p[:,1:2,1]
#     p[:,:,2] = p[:,:,2]-p[:,1:2,2]
#     # p[:,:,0] = p[:,:,0]-np.mean(p[:,:,0])
#     # p[:,:,1] = p[:,:,1]-np.mean(p[:,:,1])
#     # p[:,:,2] = p[:,:,2]-np.mean(p[:,:,2])
#     return p
#     return p

In [14]:
drop_rate = 0.1
def poses_diff(x):
    H, W = x.get_shape()[1],x.get_shape()[2]
    x = tf.subtract(x[:,1:,...],x[:,:-1,...])
    x = tf.image.resize(x,size=[H,W]) 
    return x
def poses_diff_2(x):
    H, W = x.get_shape()[1],x.get_shape()[2]
    # x = tf.subtract(x[:,1:,...],x[:,:-1,...])
    x = tf.image.resize(x,size=[H,W]) 
    return x
def pose_motion_2(D, frame_l):
    x_1 = Lambda(lambda x: poses_diff_2(x))(D)
    x_1 = Reshape((frame_l,-1))(x_1)
    return x_1

def pose_motion(P,frame_l):
    P_diff_slow = Lambda(lambda x: poses_diff(x))(P)
    P_diff_slow = Reshape((frame_l,-1))(P_diff_slow)
    P_fast = Lambda(lambda x: x[:,::2,...])(P)
    P_diff_fast = Lambda(lambda x: poses_diff(x))(P_fast)
    P_diff_fast = Reshape((int(frame_l/2),-1))(P_diff_fast)
    x_1 = Reshape((frame_l,-1))(P)
    return P_diff_slow,P_diff_fast
# def reshape_x_2(D, frame_l):
#     x_1 = Lambda(lambda y: poses_diff_2(y))(D)
#     x_1 = Reshape((frame_l, -1))(D)

def c1D(x,filters,kernel):
    x = Conv1D(filters, kernel_size=kernel,padding='same',use_bias=False)(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.2)(x)
    return x

def block(x,filters):
    x = c1D(x,filters,3)
    x = c1D(x,filters,3)
    return x
    
def d1D(x,filters):
    x = Dense(filters,use_bias=False)(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.2)(x)
    return x

def build_FM(frame_l=32,joint_n=20,joint_d=3,feat_d=190,filters=16, nd=60):   
    M = Input(shape=(frame_l,feat_d))
    P = Input(shape=(frame_l,joint_n,joint_d))
    # D = Input(shape =(frame_l, joint_n, joint_d))
    # x_ = pose_motion_2(D, frame_l)
    diff_slow,diff_fast = pose_motion(P,frame_l)
    


    x = c1D(M,filters*2,1)
    x = SpatialDropout1D(drop_rate)(x)
    x = c1D(x,filters,3)
    x = SpatialDropout1D(drop_rate)(x)
    x = c1D(x,filters,1)
    x = MaxPooling1D(2)(x)
    x = SpatialDropout1D(drop_rate)(x)

    
    # x_1 = c1D(x_1, filters*2,1)
    # x_1 = SpatialDropout1D(drop_rate)(x_1)
    # x_1 = c1D(x_1, filters, 3)
    # x_1 = SpatialDropout1D(drop_rate)(x_1)
    # x_1 = c1D(x_1, filters,1)
    # x_1 = MaxPooling1D(2)(x_1)
    # x_1 = SpatialDropout1D(drop_rate)(x_1)

    x_d_slow = c1D(diff_slow,filters*2,1)
    x_d_slow = SpatialDropout1D(drop_rate)(x_d_slow)
    x_d_slow = c1D(x_d_slow,filters,3)
    x_d_slow = SpatialDropout1D(drop_rate)(x_d_slow)
    x_d_slow = c1D(x_d_slow,filters,1)
    x_d_slow = MaxPool1D(2)(x_d_slow)
    x_d_slow = SpatialDropout1D(drop_rate)(x_d_slow)

    # x = c1D(diff_fast,filters*2,1)
    # x = SpatialDropout1D(drop_rate)(x)
    # x = c1D(x,filters,3) 
    # x = SpatialDropout1D(drop_rate)(x)
    # x = c1D(x,filters,1) 
    # x = SpatialDropout1D(drop_rate)(x)

    x_d_fast = c1D(diff_fast,filters*2,1)
    x_d_fast = SpatialDropout1D(drop_rate)(x_d_fast)
    x_d_fast = c1D(x_d_fast,filters,3) 
    x_d_fast = SpatialDropout1D(drop_rate)(x_d_fast)
    x_d_fast = c1D(x_d_fast,filters,1) 
    x_d_fast = SpatialDropout1D(drop_rate)(x_d_fast)
   
    x = concatenate([x,x_d_slow,x_d_fast])
    x = block(x,filters*2)
    x = MaxPool1D(2)(x)
    x = SpatialDropout1D(drop_rate)(x)
    
    x = block(x,filters*4)
    x = MaxPool1D(2)(x)
    x = SpatialDropout1D(drop_rate)(x)

    x = block(x,filters*8)
    x = SpatialDropout1D(drop_rate)(x)
    
    return Model(inputs=[M,P],outputs=x)


def build_DD_Net(C):
    M = Input(name='M', shape=(C.frame_l,C.feat_d))  
    P = Input(name='P', shape=(C.frame_l,C.joint_n,C.joint_d)) 
    # D = Input(name ='D', shape =(C.frame_l, C.joint_n,C.joint_d))
    FM = build_FM(C.frame_l,C.joint_n,C.joint_d,C.feat_d,C.filters)
    
    x = FM([M,P])

    x = GlobalMaxPool1D()(x)
    
    x = d1D(x,128)
    x = Dropout(0.5)(x)
    x = d1D(x,128)
    x = Dropout(0.5)(x)
    x = Dense(20, activation='softmax')(x)
    
    ######################Self-supervised part
    model = Model(inputs=[M,P],outputs=x)
    return model

In [15]:
DD_Net = build_DD_Net(C)
DD_Net.summary()

Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 M (InputLayer)                 [(None, 32, 190)]    0           []                               
                                                                                                  
 P (InputLayer)                 [(None, 32, 20, 3)]  0           []                               
                                                                                                  
 model_2 (Functional)           (None, 4, 512)       1733376     ['M[0][0]',                      
                                                                  'P[0][0]']                      
                                                                                                  
 global_max_pooling1d_1 (Global  (None, 512)         0           ['model_2[0][0]']          

In [16]:
DD_Net.load_weights('/home/giang/Downloads/gg1.h5')


In [17]:
import pickle
Train = pickle.load(open("/home/giang/Downloads/MICA_data/clf_ver2/train_clf1.pkl", "rb"))
Test = pickle.load(open("/home/giang/Downloads/MICA_data/clf_ver2/test_clf.pkl1", "rb"))

In [18]:
# Đầu vào là chuỗi khung xương dạng np.ndarray   (số frame x 20 x3 )
# Đầu ra là double feaure đưa vào mô hình predict

def data_generator_test(T,C):
    p = np.copy(T).reshape(-1,20,3)
    p = zoom(p,target_l=C.frame_l,joints_num=C.joint_n,joints_dim=C.joint_d)
    p = norm_train(p)
    M = get_CG(p,C)
    f1 = np.expand_dims(M, axis = 0)
    f2 = np.expand_dims(p, axis = 0)

    return f1,f2
    
    

In [37]:
list = []
for index, data in enumerate(Train['label']):
    if data == 7:
        list.append(index)

In [38]:
len(list)

262

In [39]:
count = 0
for i in list:
    print(Train['label'][i]) # label gốc
    res = DD_Net.predict(data_generator_test(Train['pose'][i],C))[0]
    print(np.argmax(res) + 1) # label predict
    if ( Train['label'][i] == np.argmax(res) + 1):
        count = count+1


    print('==============')
print(count)
print(count/ len(list))

7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7


In [10]:
len(Test['label'])

7050

In [8]:
def data_generator_test(T,C):
    p = np.copy(T)
    p = zoom(p,target_l=C.frame_l,joints_num=C.joint_n,joints_dim=C.joint_d)
    p = norm_train(p)
    M = get_CG(p,C)
    f1 = np.expand_dims(M, axis = 0)
    f2 = np.expand_dims(p, axis = 0)

    return f1,f2

In [9]:
import os
import numpy as np
import matplotlib.pyplot as plt
joint_orders_kinect = [[0,1], [1,2], [2,3], [2,4], [2,8], [4,5], [5,6], [6,7], [8,9], [9,10], [10,11], 
                 [0,12], [0,16], [12,13], [13,14], [14,15], [16,17], [17,18], [18,19]]
joint_orders_mp = [[0, 1], [1, 2], [2, 3], [3, 7], [0, 4], [4, 5], [5, 6], [6, 8], [9, 10], [11, 12], [12, 14], [14, 16],
                 [16, 22], [16, 18], [16, 20], [18, 20], [11, 13], [13, 15], [15, 21], [15, 17], [15, 19], [17, 19], [12, 24],
                 [24, 26], [26, 28], [28, 30], [28, 32], [30, 32], [11, 23], [23, 25], [25, 27], [27, 29], [27, 31], [29, 31],
                 [23, 24]]

def kinect2mp_spec_joint(mp, joint1, joint2):
    kinect = np.zeros(3, dtype=np.float32)
    kinect[0] = (mp[joint1][0] + mp[joint2][0]) / 2
    kinect[1] = (mp[joint1][1] + mp[joint2][1]) / 2
    kinect[2] = (mp[joint1][2] + mp[joint2][2]) / 2
    return kinect

def mp2kinect(mp):
    kinect2mp_list = [[3,0], [4,11], [5,13], [6,15], [8,12], [9,14], [10,16], [12,23],
    [13,25], [14,27], [15,31], [16,24], [17,26], [18,28], [19,32]]
    kinect = np.zeros((20,3), dtype=np.float32)
    for jointID in kinect2mp_list:
        kinect[jointID[0]] = mp[jointID[1]]
    kinect[0] = kinect2mp_spec_joint(mp, 23, 24)
    kinect[2] = kinect2mp_spec_joint(mp, 11, 12)
    kinect[1] = kinect2mp_spec_joint(kinect, 0, 2)
    kinect[11] = kinect2mp_spec_joint(mp, 18, 20)
    kinect[7] = kinect2mp_spec_joint(mp, 17, 19)
    return kinect
    return kinect
def visualize(skeleton_data, joint_orders):
    x = []
    y = []
    plt.axis('equal')
    for coord in skeleton_data:
        coord = list(map(float, coord))
        x.append(coord[0])
        y.append(coord[1])
    plt.scatter(x, y, color = "green")
    for joint_order in joint_orders:
        x_coord = [x[joint_order[0]], x[joint_order[1]]]
        y_coord = [y[joint_order[0]], y[joint_order[1]]]
        plt.plot(x_coord, y_coord, color=plt.cm.gray(0))

In [10]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

In [11]:
mp_pose = mp.solutions.pose # pose model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [12]:
def mediapipe_detection(image, model):


    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False               # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [13]:
def draw_landmarks(image, results):

    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS) # Draw pose connections

In [14]:
def draw_styled_landmarks(image, results):

    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 

In [15]:
def extract_keypoints(results):
    if results.pose_world_landmarks:
        pose = np.array([[res.x, res.y, res.z] for res in results.pose_world_landmarks.landmark]) 

    return pose

In [16]:
fourcc = cv2.VideoWriter_fourcc(*'mp4v')

In [17]:
labels = [ 'No Action', 'Action' ]


In [18]:
from utils import  AverageMeter, LevenshteinDistance, Queue

In [19]:
import ntpath
video_path = '/home/giang/Downloads/MICA_data/MICA-Action2020/Subject05Cuong/Kinect_1/color.avi'
file_name = ntpath.basename(video_path)


In [20]:
colors = [(245,117,16), (117,245,16)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,90+num*40), (int(prob*100), 120+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 115+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame

In [22]:
det_strategy = 'ma'
det_selected_queue = np.zeros(2, )
myqueue_det = Queue(6, 2)
window_size = 16
sequence = []
sentence = []
predictions = []
threshold = 0.5
label = None
prev_frame_time = 0
new_frame_time = 0
global res
cap = cv2.VideoCapture('/home/giang/Downloads/MICA_data/MICA-Action2020/Subject05Cuong/Kinect_1/color.avi')
# Set mediapipe model 
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
vid_writer = cv2.VideoWriter('res_{}'.format(
        file_name), fourcc, 25, (width, height))
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()
        frame = cv2.rotate(frame, cv2.ROTATE_180) 

        # Make detections
        image, results = mediapipe_detection(frame, pose)
        
        
        # Draw landmarks
        draw_styled_landmarks(image, results)
        # 33x3
        keypoints = extract_keypoints(results)
        # Chuyển thành list
        #keypoints = keypoints.tolist()

        
        if len(sequence) < window_size:
            # sequence là list mảng numpy 33x3 
            sequence.append(keypoints)
            new_frame_time = time.time()
            fps = 1/(new_frame_time-prev_frame_time)
            prev_frame_time = new_frame_time
        else:
            # 
            # chuyển sang biến mới np array
            sequence1 = [mp2kinect(sequence[i]) for i in range(len(sequence))]
            sequence1 = np.array(sequence1)
            #print(sequence1.shape)
            outputs_det = DD_Net.predict(data_generator_test(sequence1,C))[0]
            
            myqueue_det.enqueue(outputs_det.tolist())
            det_selected_queue = myqueue_det.median
            prediction_det = np.argmax(outputs_det)
            #print(res)
            #idx = np.argmax(res)
            label =  labels[prediction_det]
            
            
            
            
            # bug here 
            sequence.pop(0)
            sequence.append(keypoints)
            new_frame_time = time.time()
            fps = 1/(new_frame_time-prev_frame_time)
            prev_frame_time = new_frame_time
            
            
            
        
        fps = str(fps)
        fps = fps[0:4]
        

            

        image = cv2.rotate(image, cv2.ROTATE_180)
        image = prob_viz(det_selected_queue, labels, image, colors)
        

                         
        #image = cv2.flip(image, 1)
        if label is not None:
            cv2.putText(image, ' {}, probs {:.2f} %'.format(label, (outputs_det[prediction_det])*100),
               (int(width-400), height-50), cv2.FONT_HERSHEY_COMPLEX, 0.9, (102, 255, 255), 2)
        cv2.putText(image, 'FPS: {}'.format(fps), (7, 70), cv2.FONT_HERSHEY_SIMPLEX, 3, (100, 255, 0), 3, cv2.LINE_AA) 
        # Show to screen
        vid_writer.write(image)
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

UnboundLocalError: local variable 'pose' referenced before assignment

In [24]:
result = []
result.append((1,2))
result.append((3,4))
result

[(1, 2), (3, 4)]

In [28]:
predicted = np.array(result)

In [29]:
predicted

array([[1, 2],
       [3, 4]])

In [30]:
predicted[:,1]

array([2, 4])

In [23]:
cv2.destroyAllWindows()

In [24]:
sequence1.shape

(16, 20, 3)

In [25]:
outputs_det = DD_Net.predict(data_generator_test(sequence1,C))[0]
outputs_det

array([0.01010437, 0.9898956 ], dtype=float32)