In [48]:
import cv2
import mediapipe as mp
import numpy as np
from filterpy.kalman import KalmanFilter
from capture import initialize_webcam, display_frame, capture_video, release_resources
from gesture_rec import gesture_recognition_integration

In [49]:
#init mediapipe hands & drawing
mp_hands_solution = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

In [50]:
hands_instance = mp_hands_solution.Hands(max_num_hands=2, min_detection_confidence=0.7, min_tracking_confidence=0.5)

In [51]:
def hand_detection(frame, hands):
    """
    Perform hand detection using MediaPipe and return the processed frame with landmarks.
    """
    # convert the image to RGB - mediapipe expects RGB input
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # process frame
    results = hands.process(rgb_frame)

    # draw landmarks
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            # Use the HAND_CONNECTIONS directly from mp.solutions.hands
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands_solution.HAND_CONNECTIONS)
    
    return frame

In [52]:
def main_hand_detection():
    """
    Main function to perform hand detection with webcam input.
    """
    # webcam
    cap = initialize_webcam()
    if cap is None:
        return

    # mediapipe hands
    while cap.isOpened():
        frame = capture_video(cap)
        if frame is None:
            break
        
        frame_with_landmarks = hand_detection(frame, hands_instance)

        display_frame(frame_with_landmarks)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    release_resources(cap)

In [53]:
def main_capture_only():
    """
    Main function to capture and display webcam frames without hand detection.
    """
    cap = initialize_webcam()
    if cap is None:
        return

    while cap.isOpened():
        frame = capture_video(cap)
        if frame is None:
            break
        
        display_frame(frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    release_resources(cap)

In [54]:
# Initialize Kalman Filter for hand position
def initialize_kalman_filter():
    kf = KalmanFilter(dim_x=4, dim_z=2)
    kf.x = np.array([0., 0., 0., 0.])  # Initial state (x, y, x_velocity, y_velocity)
    kf.F = np.array([[1., 0., 1., 0.],
                     [0., 1., 0., 1.],
                     [0., 0., 1., 0.],
                     [0., 0., 0., 1.]])  # State transition matrix
    kf.H = np.array([[1., 0., 0., 0.],
                     [0., 1., 0., 0.]])  # Measurement matrix
    kf.P *= 1000.  # Initial covariance matrix
    kf.R = np.array([[5., 0.],
                     [0., 5.]])  # Measurement noise covariance
    return kf

# Initialize two Kalman filters for two hands
kalman_filters = [initialize_kalman_filter(), initialize_kalman_filter()]

In [55]:
def apply_kalman_filter(kf, hand_landmarks):
    """
    Apply Kalman Filter to hand landmarks for position smoothing.
    """
    if hand_landmarks:
        # Extract x, y coordinates of wrist (landmark 0)
        wrist_x = hand_landmarks.landmark[0].x
        wrist_y = hand_landmarks.landmark[0].y

        # Update Kalman Filter
        z = np.array([wrist_x, wrist_y])
        kf.predict()
        kf.update(z)

        # Get the filtered position
        filtered_x, filtered_y = kf.x[0], kf.x[1]
        return filtered_x, filtered_y
    return None, None

In [56]:
def hand_detection_with_kalman(frame, hands):
    """
    Perform hand detection using MediaPipe, apply Kalman filter, and recognize gestures.
    """
    # Convert the image to RGB - mediapipe expects RGB input
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Process frame
    results = hands.process(rgb_frame)

    # Draw landmarks and apply Kalman filter
    if results.multi_hand_landmarks:
        for idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
            # Apply the corresponding Kalman filter to each hand
            if idx < 2:  # Only handle up to 2 hands
                filtered_x, filtered_y = apply_kalman_filter(kalman_filters[idx], hand_landmarks)
                
                if filtered_x is not None and filtered_y is not None:
                    # Draw the filtered position as a circle on the frame
                    h, w, _ = frame.shape
                    cv2.circle(frame, (int(filtered_x * w), int(filtered_y * h)), 10, (0, 255, 0), -1)

            # Draw the original hand landmarks
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands_solution.HAND_CONNECTIONS)
            
            # Gesture recognition part
            predicted_gesture = gesture_recognition_integration(hand_landmarks)
            if predicted_gesture is not None:
                print(f"Predicted gesture: {predicted_gesture}")

    return frame


In [57]:
def main_hand_detection_optimized():
    """
    Main function with optimized hand tracking using Kalman filter.
    """
    cap = initialize_webcam()
    if cap is None:
        return

    while cap.isOpened():
        frame = capture_video(cap)
        if frame is None:
            break
        
        frame_with_landmarks = hand_detection_with_kalman(frame, hands_instance)
        display_frame(frame_with_landmarks)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    release_resources(cap)

In [58]:
#main_hand_detection()
main_hand_detection_optimized()

Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2
Predicted gesture: 2


KeyboardInterrupt: 