# 1 - install dependencies

In [1]:
import numpy as np
import cv2 
import os
import mediapipe as mp
import tensorflow as tf 
from tensorflow import keras
import matplotlib.pyplot as plt
import time
import pandas as pd

# 2 - keypoints extractions and drawing

In [2]:
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

num_hand_marks = 21
num_pose_marks = 33


# holistic model process image and return the results as keypoints
def mediapipe_detection(image,model):
    image  = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image  = cv2.cvtColor(image,cv2.COLOR_RGB2BGR)
    return image,results
    
def draw_styled_landmarks(image,results):
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 
    

# read the keypoints and extract them and process them
def extract_keypoints(results):
    # extract pose marks
    if results.pose_landmarks:
        pose = np.array([ [res.x,res.y,res.z,res.visibility] for res in results.pose_landmarks.landmark ]).flatten()
    else:
        pose = np.zeros(num_pose_marks*4)
    
    # extract left hand
    if results.left_hand_landmarks:
        left_hand = np.array([ [res.x,res.y,res.z] for res in results.left_hand_landmarks.landmark ]).flatten()
    else:
        left_hand = np.zeros(num_hand_marks*3)
        
        
    # extract right hand
    if results.right_hand_landmarks:
        right_hand = np.array([ [res.x,res.y,res.z] for res in results.right_hand_landmarks.landmark ]).flatten()
    else:
        right_hand = np.zeros(num_hand_marks*3)
    
    return np.concatenate([pose,left_hand,right_hand])
    

# 3 - read and process data

### 3.1 collect data that in classes 0,1,2,3,4, and available in the data/train directory

In [23]:
data_path = "../../data"
    
data_train_path = os.path.join(data_path,"my_data")

actions = list(os.listdir(data_train_path))
n_actions = len(actions)
dic={}
counter=0
for action in actions:
    dic[action] = counter
    counter+=1

In [26]:
x=[]
y=[]
for action in actions:
    videos = os.listdir(os.path.join(data_train_path,action))
    for video in videos:
        x.append(os.path.join(data_train_path,action,video))
        y.append(dic[action])

train = x
labels = y

In [27]:
print(len(x),len(y))

250 250


### 3.2 process videos into frames and label to cateogries

In [28]:
def capture_frames(video_path):
    video = cv2.VideoCapture(video_path)
    video_length = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) - 1
    # count = 0
    # frame_rate = video.get(cv2.CAP_PROP_FRAME_COUNT)

    count=0
    frames = []
    while video.isOpened():
      # frame_id = video.get(0)
        ret, frame = video.read()
        
        if not ret:
            continue
        frames.append(frame)
        count += 1
        # If there are no more frames left
        if (count > (video_length-1)):
          # Release the feed
          video.release()
        # if frame_id % math.floor(1) == 0:
        #     file_name = f"{video_path}_frame_{count}.jpg"
        #     count += 1
        #     # print(f"File name: {file_name}")
        #     frames.append(frame)
        #     # cv2.imwrite(file_name, frame)
    video.release()
    return np.array(frames)


def get_frames(video_path,num_frames):
    frames = capture_frames(video_path)
    video_length = len(frames)
    steps = video_length/num_frames
    count=0
    new_frames=[]
    while count<video_length:
        frame = frames[int(count)]
#         frame = cv2.resize(frame, (256, 256))
#         frame =  (frame/255.0).astype(np.float16)
        new_frames.append(frame)
        count+=steps

    s = np.array(new_frames[:num_frames])
    print("SHAPE",s.shape)
    return np.array(new_frames[:num_frames])


In [29]:
def extract_keypoints_video(path,i=-1):
    if(i):
        print(i,end="\t")
    frames = get_frames(path,20)
    output_key_points=[]
    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        for frame in frames:
            image, results = mediapipe_detection(frame, holistic)
            output_key_points.append(extract_keypoints(results))
    return np.array(output_key_points)
    

### 3.3 - extract the keypoints and save them in train

In [30]:
if "extracted_features.npy" in os.listdir():
    train = np.load("extracted_features.npy")
else:
    train = np.array([extract_keypoints_video(path,f'iteration : {i+1}/{len(x)}') for i,path in enumerate(x) ])
    np.save('extracted_features.npy', train) 

iteration : 1/250	SHAPE (20, 480, 640, 3)


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


iteration : 2/250	SHAPE (20, 480, 640, 3)
iteration : 3/250	SHAPE (20, 480, 640, 3)
iteration : 4/250	SHAPE (20, 480, 640, 3)
iteration : 5/250	SHAPE (20, 480, 640, 3)
iteration : 6/250	SHAPE (20, 480, 640, 3)
iteration : 7/250	SHAPE (20, 480, 640, 3)
iteration : 8/250	SHAPE (20, 480, 640, 3)
iteration : 9/250	SHAPE (20, 480, 640, 3)
iteration : 10/250	SHAPE (20, 480, 640, 3)
iteration : 11/250	SHAPE (20, 480, 640, 3)
iteration : 12/250	SHAPE (20, 480, 640, 3)
iteration : 13/250	SHAPE (20, 480, 640, 3)
iteration : 14/250	SHAPE (20, 480, 640, 3)
iteration : 15/250	SHAPE (20, 480, 640, 3)
iteration : 16/250	SHAPE (20, 480, 640, 3)
iteration : 17/250	SHAPE (20, 480, 640, 3)
iteration : 18/250	SHAPE (20, 480, 640, 3)
iteration : 19/250	SHAPE (20, 480, 640, 3)
iteration : 20/250	SHAPE (20, 480, 640, 3)
iteration : 21/250	SHAPE (20, 480, 640, 3)
iteration : 22/250	SHAPE (20, 480, 640, 3)
iteration : 23/250	SHAPE (20, 480, 640, 3)
iteration : 24/250	SHAPE (20, 480, 640, 3)
iteration : 25/250	

iteration : 191/250	SHAPE (20, 480, 640, 3)
iteration : 192/250	SHAPE (20, 480, 640, 3)
iteration : 193/250	SHAPE (20, 480, 640, 3)
iteration : 194/250	SHAPE (20, 480, 640, 3)
iteration : 195/250	SHAPE (20, 480, 640, 3)
iteration : 196/250	SHAPE (20, 480, 640, 3)
iteration : 197/250	SHAPE (20, 480, 640, 3)
iteration : 198/250	SHAPE (20, 480, 640, 3)
iteration : 199/250	SHAPE (20, 480, 640, 3)
iteration : 200/250	SHAPE (20, 480, 640, 3)
iteration : 201/250	SHAPE (20, 480, 640, 3)
iteration : 202/250	SHAPE (20, 480, 640, 3)
iteration : 203/250	SHAPE (20, 480, 640, 3)
iteration : 204/250	SHAPE (20, 480, 640, 3)
iteration : 205/250	SHAPE (20, 480, 640, 3)
iteration : 206/250	SHAPE (20, 480, 640, 3)
iteration : 207/250	SHAPE (20, 480, 640, 3)
iteration : 208/250	SHAPE (20, 480, 640, 3)
iteration : 209/250	SHAPE (20, 480, 640, 3)
iteration : 210/250	SHAPE (20, 480, 640, 3)
iteration : 211/250	SHAPE (20, 480, 640, 3)
iteration : 212/250	SHAPE (20, 480, 640, 3)
iteration : 213/250	SHAPE (20, 4

In [31]:
train.shape

(250, 20, 258)

### 3.4 split the data (training,testing)

In [32]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [33]:
labels = to_categorical(y)

In [36]:
X_train, X_test, Y_train, Y_test = train_test_split(train, labels, test_size=0.1)

In [37]:
print(
X_train.shape,
X_test.shape,
Y_train.shape,
Y_test.shape,
)

(225, 20, 258) (25, 20, 258) (225, 5) (25, 5)


# 4 - build the model

In [38]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense,Input
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import TensorBoard

log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)



In [40]:
input_layer = Input(shape=(20,258))
layer = LSTM(64,return_sequences=True,activation="relu")(input_layer)
layer = LSTM(128,return_sequences=True,activation="relu")(layer)
layer = LSTM(64,return_sequences=False,activation="relu")(layer)
layer = Dense(64,activation="relu")(layer)
layer = Dense(32,activation="relu")(layer)
layer = Dense(len(actions),activation="softmax")(layer)

model = Model(inputs=input_layer,outputs=layer)



In [41]:
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 20, 258)]         0         
                                                                 
 lstm_3 (LSTM)               (None, 20, 64)            82688     
                                                                 
 lstm_4 (LSTM)               (None, 20, 128)           98816     
                                                                 
 lstm_5 (LSTM)               (None, 64)                49408     
                                                                 
 dense_3 (Dense)             (None, 64)                4160      
                                                                 
 dense_4 (Dense)             (None, 32)                2080      
                                                                 
 dense_5 (Dense)             (None, 5)                 165 

In [42]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [43]:
model.fit(X_train, Y_train, epochs=1000, callbacks=[tb_callback],validation_data=(X_test,Y_test))

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000


Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
Epoch 73/1000
Epoch 74/1000
Epoch 75/1000
Epoch 76/1000
Epoch 77/1000
Epoch 78/1000
Epoch 79/1000
Epoch 80/1000
Epoch 81/1000
Epoch 82/1000
Epoch 83/1000
Epoch 84/1000
Epoch 85/1000
Epoch 86/1000
Epoch 87/1000
Epoch 88/1000
Epoch 89/1000
Epoch 90/1000
Epoch 91/1000
Epoch 92/1000
Epoch 93/1000
Epoch 94/1000
Epoch 95/1000
Epoch 96/1000
Epoch 97/1000
Epoch 98/1000
Epoch 99/1000


Epoch 100/1000
Epoch 101/1000
Epoch 102/1000
Epoch 103/1000
Epoch 104/1000
Epoch 105/1000
Epoch 106/1000
Epoch 107/1000
Epoch 108/1000
Epoch 109/1000
Epoch 110/1000
Epoch 111/1000
Epoch 112/1000
Epoch 113/1000
Epoch 114/1000
Epoch 115/1000
Epoch 116/1000
Epoch 117/1000
Epoch 118/1000
Epoch 119/1000
Epoch 120/1000
Epoch 121/1000
Epoch 122/1000
Epoch 123/1000
Epoch 124/1000
Epoch 125/1000
Epoch 126/1000
Epoch 127/1000
Epoch 128/1000
Epoch 129/1000
Epoch 130/1000
Epoch 131/1000
Epoch 132/1000
Epoch 133/1000
Epoch 134/1000
Epoch 135/1000
Epoch 136/1000
Epoch 137/1000
Epoch 138/1000
Epoch 139/1000
Epoch 140/1000
Epoch 141/1000
Epoch 142/1000
Epoch 143/1000
Epoch 144/1000
Epoch 145/1000
Epoch 146/1000
Epoch 147/1000


Epoch 148/1000
Epoch 149/1000
Epoch 150/1000
Epoch 151/1000
Epoch 152/1000
Epoch 153/1000
Epoch 154/1000
Epoch 155/1000
Epoch 156/1000
Epoch 157/1000
Epoch 158/1000
Epoch 159/1000
Epoch 160/1000
Epoch 161/1000
Epoch 162/1000
Epoch 163/1000
Epoch 164/1000
Epoch 165/1000
Epoch 166/1000
Epoch 167/1000
Epoch 168/1000
Epoch 169/1000
Epoch 170/1000
Epoch 171/1000
Epoch 172/1000
Epoch 173/1000
Epoch 174/1000
Epoch 175/1000
Epoch 176/1000
Epoch 177/1000
Epoch 178/1000
Epoch 179/1000

KeyboardInterrupt: 

In [44]:
model.save_weights("model.h5")

In [45]:
model.load_weights('model.h5')

# 5 - Make predictions

### this doesn't make any sence

In [46]:
model.evaluate(X_test,Y_test)



[0.7527683973312378, 0.8799999952316284]

In [47]:
model.evaluate(X_train,Y_train)



[7.869308319641277e-06, 1.0]

In [48]:
x[0]

'../../data/train/signer0_sample431_color.mp4'

# 6 - test in real time

In [57]:
model.predict(np.array([train[0]]))

array([[9.9999952e-01, 2.4448259e-11, 3.1743037e-18, 4.4404717e-07,
        1.1752664e-15]], dtype=float32)

In [58]:
from scipy import stats

colors = [(245,117,16), (117,245,16), (16,117,245),(255,0,0),(0,255,0)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame


In [59]:
# 1. New detection variables
sequence = []
sentence = []
predictions = []
threshold = 0.5

cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()
        if(not ret):
            break
        frame = cv2.resize(frame, (640, 480))

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)
        
        # 2. Prediction logic
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-20:]
        
        if len(sequence) == 20:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            predictions.append(np.argmax(res))
            
            
        #3. Viz logic
            if np.unique(predictions[-2:])[0]==np.argmax(res): 
                if res[np.argmax(res)] > threshold: 
                    
                    if len(sentence) > 0: 
                        if actions[np.argmax(res)] != sentence[-1]:
                            sentence.append(actions[np.argmax(res)])
                    else:
                        sentence.append(actions[np.argmax(res)])

            if len(sentence) > 5: 
                sentence = sentence[-5:]

            # Viz probabilities
            image = prob_viz(res, actions, image, colors)
            
        cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(sentence), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Show to screen
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

In [50]:
cap.release()

In [51]:
cv2.destroyAllWindows()