In [1]:
import numpy as np
import cv2
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

# Keypoints using Mediapipe

Here we check if we have our web cam access. Then we add an addition layer showing keypoints in our body

In [3]:
mp_holistics = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Layer drawing utilities

Create a function so that our while loop does not get cluttered. We pass a image frame and a model, this model processes the frame and detects the keypoints found. This function then returns the frame and keypoints

In [4]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    result = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, result

Function to draw the detected landmarks upon a given frame

In [10]:
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistics.FACEMESH_TESSELATION)
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistics.POSE_CONNECTIONS)
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistics.HAND_CONNECTIONS)
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistics.HAND_CONNECTIONS)

In [30]:
def draw_styled_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistics.FACEMESH_TESSELATION, 
                              None,
                              mp_drawing.DrawingSpec(color=(255, 255, 255), thickness=1, circle_radius=1)
                             )
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistics.POSE_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(255, 0, 0), thickness=2, circle_radius=2),
                              mp_drawing.DrawingSpec(color=(255, 255, 255), thickness=1, circle_radius=1)
                             )
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistics.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(255, 0, 0), thickness=3, circle_radius=3),
                              mp_drawing.DrawingSpec(color=(255, 255, 255), thickness=1, circle_radius=1)
                             )
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistics.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(255, 0, 0), thickness=3, circle_radius=3),
                              mp_drawing.DrawingSpec(color=(255, 255, 255), thickness=1, circle_radius=1)
                             )

In [65]:
cap = cv2.VideoCapture(0)

# Initialise mediapipe model
with mp_holistics.Holistic(min_detection_confidence=0.7, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read our feed
        ret, frame = cap.read()

        # Perform keypoint detection
        image, result = mediapipe_detection(frame, holistic)
        
        # Draw landmarks on the frame
        draw_styled_landmarks(image, result)

        # Show to screen ('title', frame)
        cv2.imshow('OpenCV feed', image)

        # Catch event when we want to quit (q)
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    # Cleanup
    cap.release()
    cv2.destroyAllWindows()

# Extract Keypoint values

Extracting and processing keypoint values into a format that we can use

In [68]:
result.pose_landmarks.landmark

[x: 0.4553624987602234
y: 0.5731104016304016
z: -1.2322654724121094
visibility: 0.9997774362564087
, x: 0.48357701301574707
y: 0.48920854926109314
z: -1.185476541519165
visibility: 0.9994886517524719
, x: 0.5012804865837097
y: 0.48758044838905334
z: -1.1852549314498901
visibility: 0.9995299577713013
, x: 0.5211310982704163
y: 0.48568785190582275
z: -1.1856610774993896
visibility: 0.9994864463806152
, x: 0.41371744871139526
y: 0.4903739094734192
z: -1.1987465620040894
visibility: 0.9995215535163879
, x: 0.387626975774765
y: 0.49075374007225037
z: -1.19772207736969
visibility: 0.9994756579399109
, x: 0.3643929064273834
y: 0.492742121219635
z: -1.197809100151062
visibility: 0.9994405508041382
, x: 0.5464252233505249
y: 0.5014891028404236
z: -0.7554653286933899
visibility: 0.9996281266212463
, x: 0.3289526104927063
y: 0.513339638710022
z: -0.7723501920700073
visibility: 0.999731183052063
, x: 0.4904845654964447
y: 0.6480852365493774
z: -1.0467863082885742
visibility: 0.9998200535774231
, x

In [75]:
def extract_keypoints(result):
    pose = np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in result.pose_landmarks.landmark]).flatten() if result.face_landmarks else np.zeros(33*4)
    face = np.array([[landmark.x, landmark.y, landmark.z] for landmark in result.face_landmarks.landmark]).flatten() if result.face_landmarks else np.zeros(468*3)
    left_hand = np.array([[landmark.x, landmark.y, landmark.z] for landmark in result.left_hand_landmarks.landmark]).flatten() if result.left_hand_landmarks else np.zeros(21*3)
    right_hand = np.array([[landmark.x, landmark.y, landmark.z] for landmark in result.right_hand_landmarks.landmark]).flatten() if result.right_hand_landmarks else np.zeros(21*3)
    
    return np.concatenate([pose, face, left_hand, right_hand])

In [76]:
result_test = extract_keypoints(result)


In [77]:
print(result_test)

[ 0.4553625   0.5731104  -1.23226547 ...  0.          0.
  0.        ]


# Setup folders for data collection

In [46]:
os.path

<module 'ntpath' from 'C:\\Users\\LENOVO\\anaconda3\\envs\\tensorflow\\lib\\ntpath.py'>

In [51]:
# Path for exported data (numpy arrays)
DATA_PATH = os.path.join('Keypoint Data')

# Actions that we try to detect
# actions = np.array(['Hello', 'Thanks', 'I Love You'])
actions = np.array(['Hello'])
no_sequences = 10
sequence_length = 30

In [52]:
for action in actions:
    for seq in range(no_sequences):
        try:
            os.makedirs(os.path.join(DATA_PATH, action, str(seq)))
        except:
            pass

# Collect keypoint data for Training and Testing

Collect live feed and store it in the created folders.

Collect a sample for each action.

In [54]:
cap = cv2.VideoCapture(0)

# Initialise mediapipe model
with mp_holistics.Holistic(min_detection_confidence=0.7, min_tracking_confidence=0.5) as holistic:
    # Loop each action
    for action in actions:
        # Collect a defined no of sample videos
        for seq in range(no_sequences):
            # Collect defined no of frames per video
            for frame_no in range(sequence_length):

                # Read our feed
                ret, frame = cap.read()

                # Perform keypoint detection
                image, result = mediapipe_detection(frame, holistic)

                # Draw landmarks on the frame
                draw_styled_landmarks(image, result)
                
                # Apply wait logic
                if frame_no == 0:
                    cv2.putText(image, 'STARTING COLLECTION', (120, 200), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 4 ,cv2.LINE_AA)
                    cv2.putText(image, 'Collecting Video {} for {}'.format(seq, action), (15, 12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1 ,cv2.LINE_AA)
                    # Show to screen ('title', frame)
                    cv2.imshow('OpenCV feed', image)
                    cv2.waitKey(2000)
                else:
                    cv2.putText(image, 'Collecting Video {} for {}'.format(seq, action), (15, 12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1 ,cv2.LINE_AA)
                    # Show to screen ('title', frame)
                    cv2.imshow('OpenCV feed', image)
                    
                # Export keypoints to folders
                keypoints = extract_keypoints(result)
                npy_path = os.path.join(DATA_PATH, action, str(seq), str(frame_no))
                np.save(npy_path, keypoints)

                # Catch event when we want to quit (q)
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break

    # Cleanup
    cap.release()
    cv2.destroyAllWindows()

# Preprocess data and Create labels and features

In [55]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [56]:
label_map = {label:num for num, label in  enumerate(actions)}

In [57]:
label_map

{'Hello': 0, 'Thanks': 1, 'I Love You': 2}

In [78]:
sequences, labels = [], []
for action in actions:
    for seq in range(no_sequences):
        window = []
        for frame_no in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(seq), "{}.npy".format(frame_no)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

ValueError: Object arrays cannot be loaded when allow_pickle=False