# 1 -  install dependencies

In [1]:
import numpy as np
import cv2 
import os
import mediapipe as mp
import tensorflow as tf 
from tensorflow import keras
import matplotlib.pyplot as plt
import time
import pandas as pd

# 2 - keypoints extractions and drawing

### 

- link to mediapipe documentation and info about keypoints numbers
- https://google.github.io/mediapipe/solutions/hands.html




- link to mediapipe code for drawing (to draw the point myself)
- https://github.com/google/mediapipe/blob/master/mediapipe/python/solutions/drawing_utils.py



In [3]:
pose_selected_landmarks = [
    [0,2,5,11,13,15,12,14,16], # responsible for pose 
    [0,2,4,5,8,9,12,13,16,17,20], # left hand
    [0,2,4,5,8,9,12,13,16,17,20], # right hand
]


mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils
holistic = mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5)

# holistic model process image and return the results as keypoints
def mediapipe_detection(image,model):
    image  = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image  = cv2.cvtColor(image,cv2.COLOR_RGB2BGR)
    return image,results

                
                
def extract_keypoints(results):
    
    original_landmarks = [
        results.pose_landmarks,
        results.left_hand_landmarks,
        results.right_hand_landmarks
    ]
    
    outputs = []
    for shape in range(3):
        if(original_landmarks[shape]):
            lis = original_landmarks[shape].landmark
            pose = np.array([ [lis[res].x,lis[res].y] for res in pose_selected_landmarks[shape] ]).flatten()
        else:
            pose = np.zeros(len(pose_selected_landmarks[shape])*2)
        outputs.append(pose)
    return np.concatenate([outputs[0],outputs[1],outputs[2]])
            


def draw_landmark_from_results(image,results):
    image_rows, image_cols, _ = image.shape
    
    original_landmarks = [
        results.pose_landmarks,
        results.left_hand_landmarks,
        results.right_hand_landmarks
    ]

    
    for shape in range(3):
        if(original_landmarks[shape]):
            lis = original_landmarks[shape].landmark
            for idx in pose_selected_landmarks[shape]:
                point = lis[idx]
                landmark_px = mp_drawing._normalized_to_pixel_coordinates(point.x, point.y,
                                                           image_cols, image_rows)

                cv2.circle(image, landmark_px, 2, (0,0,255),
                         4)


def draw_landmark_from_array(image,keyPoints):
    image_rows, image_cols, _ = image.shape
    
    
    for i in range(len(keyPoints)//2):
        x = keyPoints[i*2]
        y = keyPoints[i*2+1]
        if(x!=0 and y!=0): 
            landmark_px = mp_drawing._normalized_to_pixel_coordinates(x,y,
                                                       image_cols, image_rows)
            cv2.circle(image, landmark_px, 2, (0,0,255),
                     4)

                

 
        

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


# 3 - read and process data

### 3.1 get action list

In [4]:
data_path = "../../../data/Datasets/"

train_labels = pd.read_csv(os.path.join(data_path,"train_labels.csv"),names=['sample','id'])
validation_labels = pd.read_csv(os.path.join(data_path,"validation_labels.csv"),names=['sample','id'])
test_labels = pd.read_csv(os.path.join(data_path,"test_labels.csv"),names=['sample','id'])
class_id = pd.read_csv(os.path.join(data_path,"class_id.csv"))

train_path = os.path.join(data_path,"train")
val_path = os.path.join(data_path,"val")
test_path = os.path.join(data_path,"test")



In [6]:
# for 10 signs

actions_ids= [
    0,1,2,3,4,5,6,7,8,9 # first 10 actions
]
n_actions = len(actions_ids)

actions = list(np.array(class_id['EN'])[actions_ids])
actions[3] = "meal"


### 3.2 collecting the data pathes 

In [7]:
def check_file(file_path):
    try:
        f = open(file_path)
        f.close()
        return True
    except IOError:
        return False
    

def construct_path(file,data_mode="train"):
    return os.path.join(data_path,data_mode,file+"_color.mp4")
    
    
def get_data(value,data_mode):

    if data_mode=="train":
        label_dic = train_labels
    elif data_mode=="val":
        label_dic = validation_labels
    elif data_mode=="test":
        label_dic = test_labels
        
    
    data =  label_dic[label_dic['id']==value]
    lis =  [construct_path(i,data_mode) for i in  (data['sample'])]
    data =  [i for i in lis if check_file(i)]
    return data,[value for i in data]


def get_one_class(value,data_mode):
    if data_mode=="train":
        label_dic = train_labels
    elif data_mode=="val":
        label_dic = validation_labels
    elif data_mode=="test":
        label_dic = test_labels
        
    data =  label_dic[label_dic['id']==value]
    lis =  [construct_path(i,data_mode) for i in  (data['sample'])]
    data =  [i for i in lis if check_file(i)]
    return data



def get_list(d,data_mode):
    arr_x=[]
    arr_y=[]
    for index,value in enumerate(d):
        
        data,labels = get_data(value,data_mode)
        arr_x.extend(data)
        arr_y.extend(labels)
    return arr_x,arr_y

In [9]:






train_X,train_Y = get_list(actions_ids,"train")
val_X,val_Y = get_list(actions_ids,"val")
test_X,test_Y = get_list(actions_ids,"test")


print(
    len(train_X ),
len(train_Y),
len(val_X ),
len(val_Y),
len(test_X ),
len(test_Y)
)


1241 1241 190 190 168 168


### 3.2 get frames from path

In [10]:
# remove later but save it 
class CustomDataset(tf.keras.utils.Sequence):
    def __init__(self, batch_size=32,data=None,labels=None,image_generator=None ):
        self.batch_size = batch_size
        self.data = data
        self.labels = labels
        self.image_generator = image_generator


    def __len__(self):
        # returns the number of batches
        ...
        return len(self.data) // self.batch_size

    def __getitem__(self, index):
        # returns one batch
        new_ind = index*self.batch_size
        y = self.labels[new_ind:new_ind+self.batch_size]
        X = self.data[new_ind:new_ind+self.batch_size]
        if(self.image_generator):
            new_X =[]
            for video in X:
                new_video = []
                for frame in video:
                    for trans_frame in self.image_generator.flow(np.expand_dims(frame, axis=0)):
                        new_video.append(np.squeeze(trans_frame, axis=0))
                        break;
                new_X.append(new_video)
            X = np.array(new_X)


        return X, y

    def on_epoch_end(self):
        perm = np.random.permutation(len(self.data))
        self.data = self.data[perm]
        self.labels = self.labels[perm]
        pass
    
    
# test_ds = CustomDataset(batch_size=32,data=test_data,labels=test_labels)
# model.fit(train_ds,validation_data=test_ds,epochs=10)

In [11]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
import random

class VideoProcessing:
    def __init__(self,num_frames,transformer=None):
        self.transformer = transformer # the datagenerator class
        self.num_frames = num_frames   # the num_frames per video
        self.seed = random.randint(1,100000000)
    
    
    def change_seed(self):
        self.seed = random.randint(1,100000000)
    
    def transform(self,frame):
        for trans_frame in self.transformer.flow(np.expand_dims(frame, axis=0),seed=self.seed):
            return np.squeeze(trans_frame.astype(np.uint8), axis=0)
        
        

    def __capture_frames(self,video_path):
        video = cv2.VideoCapture(video_path)
        video_length = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) - 1

        count=0
        frames = []
        while video.isOpened():
            ret, frame = video.read()
            if not ret:
                continue
            frames.append(frame)
            count += 1
            if (count > (video_length-1)):
                video.release()
        video.release()
        return np.array(frames)


    def get_frames(self,video_path,num_frames):
        # collect 2 extra frames and remove one in the beginnign and last one
        num_frames+=4
        
        frames = self.__capture_frames(video_path)
        video_length = len(frames)
        steps = video_length/num_frames
        count=0
        new_frames=[]
        while count<video_length:
            frame = frames[int(count)]
            if(self.transformer !=None):
                frame = self.transform(frame)
            new_frames.append(frame)
            count+=steps
        
        
        num_frames-=4
        
        # return np.array(new_frames[:num_frames])
    
        return  np.array(new_frames[2:num_frames+2])

    
    def extract_keypoints_video(self,frames=None,path=None,display_text=None):
        self.change_seed()
        if(display_text != None ):
            print(display_text,end="\r")

        if(frames==None):
            frames = self.get_frames(path,self.num_frames)
            
        output_key_points=[]
        output_images=[]

        for frame in frames:
            image, results = mediapipe_detection(frame, holistic)
            output_key_points.append(extract_keypoints(results))
            output_images.append(image)
        return np.array(output_images),np.array(output_key_points)
    
    
    
    
class VideosProcessing:
    def __init__(self,transformer,num_frames):
        self.processor = VideoProcessing(transformer=transformer,num_frames=num_frames)
        self.num_frames = num_frames
        self.transformer = transformer
        
    def stop_transofrmation(self):
        self.processor.transformer = None
        
    def enable_transformation(self):
        self.processor.transformer = self.transformer
        
        
    def convert_get_both(self,array):
        output = []
        frames_output=[]
        for index,video in enumerate(array):
            display_text = f"processing video : {index+1}/{len(array)}"
            frames,keypoints = self.processor.extract_keypoints_video(path=video,display_text=display_text)
            output.append(keypoints)
            frames_output.append(frames)
        return np.array(frames_output),np.array(output)
        
    def convert(self,array):
        output = []
        for index,video in enumerate(array):
            display_text = f"processing video : {index+1}/{len(array)}"
            frames,keypoints = self.processor.extract_keypoints_video(path=video,display_text=display_text)
            output.append(keypoints)
        return np.array(output)

    
    
datagen = ImageDataGenerator(
        rotation_range=10,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0.2,
        zoom_range=0.2,
        #horizontal_flip=True,
        fill_mode='nearest')



video_processing_obj = VideoProcessing(transformer=datagen,num_frames=16)
video_list_obj = VideosProcessing(transformer=datagen,num_frames=16)


### 3.3 test extracted images and frames

#### 3.3.1 get and view keypoints

In [299]:
one_class = get_one_class(0,"train")
# video_list_obj.stop_transofrmation()
video_list_obj.enable_transformation()
frames_list,keypoints_list = video_list_obj.convert_get_both(one_class[0:5])

processing video : 1/1

In [374]:
data = get_one_class(1,"test")[:5] # get 5 videos with class label from training data
video_list_obj.stop_transofrmation()
frames_list,keypoints_list = video_list_obj.convert_get_both(data) # convert them to kye points

processing video : 5/5

In [376]:
# images,keypoints = video_processing_obj.extract_keypoints_video(path=train_X[250])

for video_num in range(len(frames_list)):
    images = frames_list[video_num]
    keypoints = keypoints_list[video_num]

    for index in range(16):

        image = images[index]

        keypoint = keypoints[index]

        draw_landmark_from_array(image,keypoint)

        cv2.imshow("frame",image)
        if cv2.waitKey(200) & 0xFF == ord('q'):
            break
        

cv2.destroyAllWindows()

#### 3.3.2 view keypoints only

In [249]:
one_class = get_one_class(5,"train")
# video_list_obj.stop_transofrmation()
video_list_obj.enable_transformation()
keypoints_list = video_list_obj.convert(one_class[60:62])

processing video : 5/5

In [251]:
# images,keypoints = video_processing_obj.extract_keypoints_video(path=train_X[250])

for keypoints in keypoints_list:

    for index in range(16):

        image = np.zeros((512,512,3))+255

        keypoint = keypoints[index]

        draw_landmark_from_array(image,keypoint)

        cv2.imshow("frame",image)
        if cv2.waitKey(200) & 0xFF == ord('q'):
            break
        

cv2.destroyAllWindows()

#### 3.3.3 view keypoints from numpy array

In [43]:
def fil_keypoints(array):
    output = array.copy()
    for i in range(2,len(output)):
        current_frame = output[i]
        prev_prev_frame = output[i-2]
        prev_frame = output[i-1]
        for index,num in enumerate(current_frame):
            if num==0:
                current_frame[index] = prev_frame[index]*2 - prev_prev_frame[index]
                
    return output
                
        

In [45]:
# keypoints_list = np.load(os.path.join("key_points","val",'1.npy'))

test_path,_ =  get_list(actions_ids,"val")

keypoints_list = val_X[:15] # use thing after loading text_X from keypoints directory
new_video_processing = VideoProcessing(transformer=None,num_frames=16)
for video_index,keypoints in enumerate(keypoints_list):
    images = new_video_processing.get_frames(test_path[video_index],16)
    new_keypoints = fil_keypoints(keypoints)
    for index in range(16):

        image = images[index]

        keypoint = new_keypoints[index]

        draw_landmark_from_array(image,keypoint)

        cv2.imshow("frame",image)
        if cv2.waitKey(200) & 0xFF == ord('q'):
            break
        

cv2.destroyAllWindows()

In [350]:
keypoints_list = np.load(os.path.join("key_points","val",'1.npy'))

for keypoints in keypoints_list:

    for index in range(16):

        image = np.zeros((512,512,3))+255

        keypoint = keypoints[index]

        draw_landmark_from_array(image,keypoint)

        cv2.imshow("frame",image)
        if cv2.waitKey(200) & 0xFF == ord('q'):
            break
        

cv2.destroyAllWindows()

In [40]:
cv2.destroyAllWindows()

# 4 - extract keypoint and save them

### 4.1  extract training

In [12]:
# collect realdata and 5 different transformations
num_training_iterations = 6

for transformation_index in range(num_training_iterations):
    dir_name = os.path.join("key_points","train",str(transformation_index))
    try:
        os.mkdir(dir_name)
    except:
        pass
    print("iteration :",transformation_index," "*40)
    
    
    if(transformation_index == 0):
        video_list_obj.stop_transofrmation()
    else:
        video_list_obj.enable_transformation()
        
    
    for label in actions_id:
        
        path = os.path.join(dir_name,str(label)+".npy")
        
        if check_file(path):
            print("Label :",label,"already exists")
            continue
            
        print("Label",label," "*40)
        data = get_one_class(label,"train") # get videos with class label from training data
        data = video_list_obj.convert(data) # convert them to kye points
        np.save(path,data)



iteration : 0                                         
Label : 0 already exists
Label : 1 already exists
Label : 2 already exists
Label : 3 already exists
Label : 4 already exists
Label : 5 already exists
Label : 6 already exists
Label : 7 already exists
Label : 8 already exists
Label : 9 already exists
iteration : 1                                         
Label : 0 already exists
Label : 1 already exists
Label : 2 already exists
Label : 3 already exists
Label : 4 already exists
Label : 5 already exists
Label : 6 already exists
Label : 7 already exists
Label : 8 already exists
Label : 9 already exists
iteration : 2                                         
Label : 0 already exists
Label : 1 already exists
Label : 2 already exists
Label : 3 already exists
Label : 4 already exists
Label : 5 already exists
Label : 6 already exists
Label : 7 already exists
Label : 8 already exists
Label : 9 already exists
iteration : 3                                         
Label : 0 already exists
Label

### 4.2  extract validation & testing

In [13]:
# collect realdata and 5 different transformations

video_list_obj.stop_transofrmation()

for mode in ['val','test']:
    dir_name = os.path.join("key_points",mode)
    try:
        os.mkdir(dir_name)
    except:
        pass
    print("Mode :",mode)
    
        
    
    for label in actions_id:
        
        path = os.path.join(dir_name,str(label)+".npy")
        
        if check_file(path):
            print("Label :",label,"already exists")
            continue
        print("Label",label," "*40)
        data = get_one_class(label,mode) # get videos with class label from training data
        data = video_list_obj.convert(data) # convert them to kye points
        np.save(path,data)
    print(mode,"end"," "*40)



Mode : val
Label : 0 already exists
Label : 1 already exists
Label : 2 already exists
Label : 3 already exists
Label : 4 already exists
Label : 5 already exists
Label : 6 already exists
Label : 7 already exists
Label : 8 already exists
Label : 9 already exists
val end                                         
Mode : test
Label : 0 already exists
Label : 1 already exists
Label : 2 already exists
Label : 3 already exists
Label : 4 already exists
Label : 5 already exists
Label : 6 already exists
Label : 7 already exists
Label : 8 already exists
Label : 9 already exists
test end                                         


### 4.3 load and test all

In [30]:
# both depends on actions_id -> [0,1,2,3,4,5,6,7,8,9]
def load_dir(dir_name,data_temp=None,labels_temp=None,actions_id=None):
    if actions_id == None or action_id == "all":
        actions_id = [int(s.split('.')[0]) for s in os.listdir(os.path.join(dir_name))]
        actions_id.sort()
    for action_id in actions_id:
        new_array = np.load(os.path.join(dir_name,f"{action_id}.npy"))
        labels_array = np.array([action_id]*len(new_array))

        if(type(data_temp) == np.ndarray):
            data_temp = np.concatenate([data_temp,new_array])
            labels_temp = np.concatenate([labels_temp,labels_array])
        else:
            data_temp = new_array
            labels_temp = labels_array
    
    return data_temp,labels_temp

def load_mul_dir(parent_dir):
    data_temp = None
    labels_temp = None
    for transformation_index in range(len(os.listdir(parent_dir))):
        dir_name = os.path.join(parent_dir,str(transformation_index))
        data_temp,labels_temp = load_dir(dir_name,data_temp,labels_temp)
    return data_temp,labels_temp


    

In [31]:
train_X,train_Y = load_mul_dir(os.path.join("key_points","train"))
val_X,val_Y = load_dir(os.path.join("key_points","val"))
test_X,test_Y = load_dir(os.path.join("key_points","test"))

In [16]:
print(
train_X.shape,
    train_Y.shape,
    val_X.shape,
    val_Y.shape,
    test_X.shape,
    test_Y.shape
    
)

(7446, 16, 62) (7446,) (190, 16, 62) (190,) (168, 16, 62) (168,)


In [373]:

# test video on train_x
keypoints_list = train_x
for keypoints in keypoints_list:

    for index in range(16):

        image = np.zeros((512,512,3))+255

        keypoint = keypoints[index]

        draw_landmark_from_array(image,keypoint)

        cv2.imshow("frame",image)
        if cv2.waitKey(200) & 0xFF == ord('q'):
            break
        

cv2.destroyAllWindows()

# extra

In [None]:
perm = np.random.permutation(len(train_data))

train_X = train_data[perm]
train_Y = train_labels[perm]
val_X = val_data
val_Y = val_labels
test_X = test_data
test_Y = test_labels


In [223]:
print(
train_X.shape,
train_Y.shape,
val_X.shape,
val_Y.shape,
test_X.shape,
test_Y.shape,
)

(4482, 20, 62) (4482,) (118, 20, 62) (118,) (100, 20, 62) (100,)
