In [30]:
import cv2
import time
import mediapipe as mp
import matplotlib.pyplot as plt
import os
import numpy as np

**Keypoints using MP holistic**

In [31]:
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

In [32]:
def preprocess(hand_region):
    # Convert to grayscale
    gray = cv2.cvtColor(hand_region, cv2.COLOR_BGR2GRAY)
    
    # Resize to the model's expected input size
    resized = cv2.resize(gray, (150, 150))
    
    # Expand dimensions for channel
    expanded = np.expand_dims(resized, axis=-1)
    
    # Normalize (if necessary)
    normalized = expanded / 255.0

    return normalized

In [33]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

In [34]:
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS) # Draw face connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Draw pose connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw right hand connection

In [35]:
def draw_styled_landmarks(image, results):

    #draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS,
                              mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
                              mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1))

    #draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2))
    
    #draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2))
    
    #draw right hand connection
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2))

In [36]:
cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        print(results)
        
        # Draw landmarks
        # draw_landmarks(image, results)
        draw_styled_landmarks(image, results)

        # Show to screen
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.soluti

In [37]:
draw_landmarks(frame, results)

In [None]:
plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

**Extract keypoint values**

In [39]:
len(results.left_hand_landmarks.landmark)

21

In [40]:
pose = []
for res in results.pose_landmarks.landmark:
    test = np.array([res.x, res.y, res.z, res.visibility])
    pose.append(test)

In [41]:
pose = np.array([(res.x, res.y, res.z, res.visibility) for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(21*3)
face = np.array([(res.x, res.y, res.z) for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(1404)
lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)

In [42]:
face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(1404)

In [43]:
def extract_keypoints(results):
    pose = np.array([(res.x, res.y, res.z, res.visibility) for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(21*3)
    face = np.array([(res.x, res.y, res.z) for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(1404)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

In [44]:
def extract_hand_keypoints(results):
    if results.left_hand_landmarks:
        x_coords = [landmark.x for landmark in results.left_hand_landmarks.landmark]
        y_coords = [landmark.y for landmark in results.left_hand_landmarks.landmark]
    elif results.right_hand_landmarks:
        x_coords = [landmark.x for landmark in results.right_hand_landmarks.landmark]
        y_coords = [landmark.y for landmark in results.right_hand_landmarks.landmark]
    else:
        return [0, 0, 0, 0]  # Return invalid box if no landmarks
    
    return [min(x_coords), min(y_coords), max(x_coords), max(y_coords)]

In [45]:
results_test = extract_hand_keypoints(results)

In [46]:
results_test

[0.7759708166122437,
 0.4585370421409607,
 0.9659820795059204,
 0.8468704223632812]

In [47]:
np.save('0', results_test)

**Test in real time**

In [48]:
from scipy import stats

In [49]:
colors = [(245, 117, 16), (117, 245, 16), (16, 117, 245)]
actions = ['01_palm', '02_l', '03_fist', '04_fist_moved', '05_thumb', '06_index', '07_ok', '08_palm_moved', '09_c', '10_down']

In [50]:
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):  # Directly iterate over 'res'
        cv2.rectangle(output_frame, (0, 60 + num*40), (int(prob*100), 90 + num*40), colors[num%3], -1)
        cv2.putText(output_frame, actions[num], (0, 85 + num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
    return output_frame

In [51]:
from tensorflow.keras.models import load_model

In [52]:
def get_hand_bbox(hand_landmarks):
    if hand_landmarks is None:
        return None

    x_coords = [landmark.x for landmark in hand_landmarks.landmark]
    y_coords = [landmark.y for landmark in hand_landmarks.landmark]

    min_x, max_x = min(x_coords), max(x_coords)
    min_y, max_y = min(y_coords), max(y_coords)

    # Convert relative coordinates to pixel values
    min_x, max_x = int(min_x * frame.shape[1]), int(max_x * frame.shape[1])
    min_y, max_y = int(min_y * frame.shape[0]), int(max_y * frame.shape[0])

    return (min_x, min_y, max_x, max_y)

In [53]:
# Ensure the base path exists
DATA_PATH = 'MP_Data'
if not os.path.exists(DATA_PATH):
    os.makedirs(DATA_PATH)

# Actions you want to detect
actions = ['01_palm', '02_l', '03_fist', '04_fist_moved', '05_thumb', '06_index', '07_ok', '08_palm_moved', '09_c', '10_down']

# Set up mediapipe holistic model
mp_holistic = mp.solutions.holistic

# Load the model
model = load_model('hand_gesture.h5')

cap = cv2.VideoCapture(0)
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        
        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)

        # If hands are detected, predict the gesture
        if results.left_hand_landmarks or results.right_hand_landmarks:
            # Extract the hand region
            keypoints = extract_hand_keypoints(results)

            # Ensure there's a valid bounding box:
            if keypoints[1] < keypoints[3] and keypoints[0] < keypoints[2]:
                hand_region = image[int(keypoints[1]*image.shape[0]):int(keypoints[3]*image.shape[0]),
                                    int(keypoints[0]*image.shape[1]):int(keypoints[2]*image.shape[1])]

                # Check for valid hand region size
                if hand_region.size != 0:
                    # Preprocess the hand region
                    processed_data = preprocess(hand_region)  # Assumes preprocessing includes resizing, normalization, etc.

                    # Predict the gesture
                    res = model.predict(np.expand_dims(processed_data, axis=0))[0]
                    gesture = actions[np.argmax(res)]
                    cv2.putText(image, gesture, (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

                    # Visualization
                    image = prob_viz(res, actions, image, colors)
                else:
                    cv2.putText(image, "Invalid Hand Region", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
            else:
                cv2.putText(image, "Incorrect Hand Coordinates", (10, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)

        # Display results
        cv2.imshow('OpenCV Feed', image)

        # Break on 'q' keypress
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

