In [None]:
%pip install opencv-python numpy matplotlib mediapipe

In [1]:
import cv2 as cv
import numpy as np
import os
from matplotlib import pyplot as plt
import mediapipe as mp

# Initialize MediaPipe Holistic
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

# Create a Holistic model instance
holistic = mp_holistic.Holistic(
    static_image_mode=False, 
    min_detection_confidence=0.5, 
    min_tracking_confidence=0.5
)



In [2]:
cap = cv.VideoCapture(0)
cap.set(cv.CAP_PROP_FRAME_WIDTH, 1280)  # Set the width to 1280
cap.set(cv.CAP_PROP_FRAME_HEIGHT, 720)  # Set the height to 720

# Check if the resolution was set correctly
actual_width = cap.get(cv.CAP_PROP_FRAME_WIDTH)
actual_height = cap.get(cv.CAP_PROP_FRAME_HEIGHT)
print(f'Actual camera resolution: {actual_width}x{actual_height}')


Actual camera resolution: 1280.0x720.0


In [3]:
def mediapipe_detection(image, model):
    image = cv.cvtColor(image, cv.COLOR_BGR2RGB)  # Convert color to RGB
    image.flags.writeable = False  # Image is no longer writeable to improve performance
    results = model.process(image)  # Process the image and return results
    image.flags.writeable = True  # Make image writeable again
    image = cv.cvtColor(image, cv.COLOR_RGB2BGR)  # Convert color back to BGR for OpenCV
    return image, results

def draw_landmarks(image, results):
    if results.left_hand_landmarks:
        mp_drawing.draw_landmarks(
            image,
            results.left_hand_landmarks,
            mp_holistic.HAND_CONNECTIONS  # Ensure this is appropriate for the model you're using
        )
    if results.right_hand_landmarks:
        mp_drawing.draw_landmarks(
            image,
            results.right_hand_landmarks,
            mp_holistic.HAND_CONNECTIONS  # Ensure this is appropriate for the model you're using
        )
    return image  # Return the image with drawn landmarks


# Setup camera
cap = cv.VideoCapture(0)
cap.set(cv.CAP_PROP_FRAME_WIDTH, 1280)
cap.set(cv.CAP_PROP_FRAME_HEIGHT, 720)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Process frame
    image, results = mediapipe_detection(frame, holistic)

    # Draw landmarks
    draw_landmarks(image, results)

    # Display the processed image
    cv.imshow('Holistic Model Feed', image)
    if cv.waitKey(10) & 0xFF == ord('q'):
        break

cap.release()
cv.destroyAllWindows()


In [4]:
def extract_keypoints(results):
    # Initialize arrays to zeros if landmarks are not detected
    lh = np.zeros(63)  # 21 landmarks x 3 coordinates
    rh = np.zeros(63)
    
    if results.left_hand_landmarks:
        lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten()
    
    if results.right_hand_landmarks:
        rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten()
    
    return np.concatenate([lh, rh])


In [5]:
DATA_PATH = os.path.join('MP_Data')
actions = np.array(['baby','friend','help','love','more','pain','play','stand','stop','what','front','right','left','up','down','now','eat','drink','super','hug','me','name','hello'])
no_sequence = 15
sequence_length = 15

# Create folders for storing data
for action in actions:
    for sequence in range(no_sequence):
        os.makedirs(os.path.join(DATA_PATH, action, str(sequence)), exist_ok=True)


In [None]:
cap = cv.VideoCapture(0)
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    for action in actions:
        for sequence in range(no_sequence):
            for frame_num in range(sequence_length):
                ret, frame = cap.read()
                if not ret:
                    print("Failed to capture frame.")
                    continue

                image, results = mediapipe_detection(frame, holistic)

                draw_landmarks(image, results)

                if frame_num == 0: 
                    cv.putText(image, 'STARTING COLLECTION', (120,200), 
                               cv.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv.LINE_AA)
                    cv.putText(image, f'Collecting frames for {action} Video Number {sequence}', (15,12), 
                               cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv.LINE_AA)
                    cv.imshow('OpenCV Feed', image)
                    cv.waitKey(2000)
                else:
                    cv.putText(image, f'Collecting frames for {action} Video Number {sequence}', (15,12), 
                               cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv.LINE_AA)
                
                cv.imshow('OpenCV Feed', image)
                
                if cv.waitKey(100) & 0xFF == ord('q'):
                    break
                    
    cap.release()
    cv.destroyAllWindows()

KeyboardInterrupt: 

In [None]:
%pip install scikit-learn

%pip install tensorflow==2.12.1 tensorflow-cpu==2.12.1 opencv-python mediapipe matplotlib

In [6]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical 

ModuleNotFoundError: No module named 'sklearn'