1. Imports

In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

2023-07-08 17:53:26.703630: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-07-08 17:53:26.741878: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-07-08 17:53:26.742685: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


2. Draw Keypoints using MP Holistic

In [2]:
mp_holistic = mp.solutions.holistic # holistic model
mp_drawing = mp.solutions.drawing_utils # drawing utilities

In [3]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # convert bgr -> rgb
    image.flags.writeable = False # set to non-changeable
    results = model.process(image) # make prediction with model
    image.flags.writeable = True # set to changeable
    iamge = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # convert rgb -> bgr
    return image, results

In [4]:
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS) # draw face connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # draw pose connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # draw right hand connections

In [5]:
def draw_styled_landmarks(image, results):
    
    # draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS, 
                              mp_drawing.DrawingSpec(color = (80, 110, 10) , thickness = 1, circle_radius = 1), 
                              mp_drawing.DrawingSpec(color = (80, 256, 121), thickness = 1, circle_radius = 1))

    # draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                              mp_drawing.DrawingSpec(color = (80, 22, 10) , thickness = 2, circle_radius = 4), 
                              mp_drawing.DrawingSpec(color = (80, 44, 121), thickness = 2, circle_radius = 2))

    # draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color = (121, 22, 76) , thickness = 2, circle_radius = 4), 
                              mp_drawing.DrawingSpec(color = (121, 44, 250), thickness = 2, circle_radius = 2))

    # draw right hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color = (245, 117, 66) , thickness = 2, circle_radius = 4), 
                              mp_drawing.DrawingSpec(color = (245, 66, 230), thickness = 2, circle_radius = 2))

In [6]:
cap = cv2.VideoCapture(2)

# set mediapipe model
# intial detection, then mediapipe tracks landmarks
with mp_holistic.Holistic(min_detection_confidence = 0.5, min_tracking_confidence = 0.5) as holistic:
    while cap.isOpened():
    
        # read feed
        ret, frame = cap.read()
    
        # make detections
        image, results = mediapipe_detection(frame, holistic)

        # draw landmarks
        draw_styled_landmarks(image, results)
    
        # show to screen
        cv2.imshow("OpenCV Feed" , cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    
        # break gracefully
        if cv2.waitKey(10) & 0xFF == ord("q"):
            break
        
cap.release()
cv2.destroyAllWindows()

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


3. Extract Keypoint Values

In [6]:
def extract_keypoints(results):

    pose = np.zeros(33*4)
    if results.pose_landmarks:
        pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten()

    face = np.zeros(468*3)
    if results.face_landmarks:
        face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten()

    lh = np.zeros(21*3)
    if results.left_hand_landmarks:
        lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten()

    rh = np.zeros(21*3)
    if results.right_hand_landmarks:
        rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten()

    return np.concatenate([pose, face, lh, rh])

In [7]:
result_test = extract_keypoints(results)

NameError: name 'results' is not defined

In [40]:
np.save("0", result_test)

In [41]:
np.load("0.npy")

array([ 0.44014844,  0.42120978, -1.40003324, ...,  0.        ,
        0.        ,  0.        ])

4. Setup Folders for Collection

In [8]:
DATA_PATH = os.path.join("MP_Data") # path for exported data
actions = np.array(["hello", "thanks", "iloveyou"]) # categories
no_sequences = 30 # videos
sequence_length = 30 # frames per video

In [9]:
# loops through 3 actions
for action in actions:
    # loops through 30 sequences (30 videos)
    for sequence in range(no_sequences):
        try:
            # make a folder for each video -- numbered 0 to 29
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass

5. Collect Keypoint Values for Training and Testing

In [10]:
cap = cv2.VideoCapture(2)

# set mediapipe model
# intial detection, then mediapipe tracks landmarks
with mp_holistic.Holistic(min_detection_confidence = 0.5, min_tracking_confidence = 0.5) as holistic:

    # NEW loop through actions:
    for action in actions:
        # loop through sequences aka videos
        for sequence in range(no_sequences):
            # loop through frames in video
            for frame_num in range(sequence_length):
                
                # read feed
                ret, frame = cap.read()
            
                # make detections
                image, results = mediapipe_detection(frame, holistic)
        
                # draw landmarks
                draw_styled_landmarks(image, results)

                # NEW apply wait logic
                if frame_num == 0:
                    cv2.putText(image, "STARTING COLLECTION", (120, 200),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 1, cv2.LINE_AA)
                    cv2.putText(image, "Collecting frames for {} Video Number {}".format(action, sequence), (15, 12),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    cv2.waitKey(2000)
                else:
                    cv2.putText(image, "Collecting frames for {} Video Number {}".format(action, sequence), (15, 12),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)

                # NEW export keypoints
                keypoints = extract_keypoints(results)
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
                np.save(npy_path, keypoints)
            
                # show to screen
                cv2.imshow("OpenCV Feed" , cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    
                # break gracefully
                if cv2.waitKey(10) & 0xFF == ord("q"):
                    break
        
cap.release()
cv2.destroyAllWindows()

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [11]:
cap.release()
cv2.destroyAllWindows()

6. Preprocess Data and Create Labels and Features

In [14]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [15]:
label_map = {label : num for num, label in enumerate(actions)}

In [16]:
label_map

{'hello': 0, 'thanks': 1, 'iloveyou': 2}

In [18]:
sequences, labels = [], []
for action in actions:
    for sequence in range(no_sequences):
        window = []
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])
                        

In [21]:
x = np.array(sequences)
y = to_categorical(labels).astype(int)

In [22]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.05)

7. Build and Train LSTM Neural Network

In [32]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

In [33]:
log_dir = os.path.join("Logs")
tb_callback = TensorBoard(log_dir = log_dir)

In [48]:
model = Sequential()

model.add(LSTM(64, return_sequences = True, activation = "relu", input_shape = (30, 1662)))
model.add(LSTM(128, return_sequences = True, activation = "relu"))
model.add(LSTM(64, return_sequences = False, activation = "relu"))

model.add(Dense(64, activation = "relu"))
model.add(Dense(32, activation = "relu"))
model.add(Dense(actions.shape[0], activation = "softmax"))

In [49]:
model.compile(optimizer = "Adam", loss = "categorical_crossentropy" , metrics = ["categorical_accuracy"])

In [None]:
model.fit(x_train, y_train, epochs = 500, callbacks = [tb_callback])

8. Make Predictions

In [37]:
res = model.predict(x_test)



In [43]:
actions[np.argmax(res[4])]

'hello'

In [44]:
actions[np.argmax(y_test[4])]

'hello'

9. Save Weights

In [45]:
model.save("action.h5")

  saving_api.save_model(


In [46]:
del model

In [50]:
model.load_weights("action.h5")

10. Evaluation using Confusion Matrix and Accuracy

In [51]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [56]:
yhat = model.predict(x_train)



In [57]:
ytrue = np.argmax(y_train, axis = 1).tolist()
yhat = np.argmax(yhat, axis = 1).tolist()

In [58]:
multilabel_confusion_matrix(ytrue, yhat)

array([[[58,  0],
        [ 0, 27]],

       [[56,  0],
        [ 0, 29]],

       [[56,  0],
        [ 0, 29]]])

In [59]:
accuracy_score(ytrue, yhat)

1.0

11. Test in Real Time!!