In [2]:
import cv2  # OpenCV for video capture and image processing
import mediapipe as mp  # MediaPipe for hand tracking
import pyautogui  # PyAutoGUI for mouse control
import time  # For adding delays

# Initialize MediaPipe Hands
print("Initializing MediaPipe Hands...")
capture_hands = mp.solutions.hands.Hands()  # Create a Hands object for hand tracking
drawing_option = mp.solutions.drawing_utils  # Utility for drawing hand landmarks
screen_width, screen_height = pyautogui.size()  # Get the screen resolution
print(f"Screen size: {screen_width}x{screen_height}")

# Open webcam
print("Opening webcam...")
cap = cv2.VideoCapture(0)  # Open the default webcam (index 0)
if not cap.isOpened():  # Check if the webcam is opened successfully
    print("Error: Could not open webcam.")
    exit()

# Initialize video writer to save the output
fourcc = cv2.VideoWriter_fourcc(*'XVID')  # Codec for video encoding
out = cv2.VideoWriter('output.avi', fourcc, 20.0, (640, 480))  # Output file, frame rate, and resolution

# Variables to store hand landmark positions
x1 = y1 = x2 = y2 = 0

# Main loop to process webcam frames
while True:
    # Read frame from webcam
    _, image = cap.read()  # Capture a frame from the webcam
    if image is None:  # Check if the frame is captured successfully
        print("Error: Could not read frame from webcam.")
        break

    # Flip and convert the image
    image_height, image_width, _ = image.shape  # Get the dimensions of the frame
    image = cv2.flip(image, 1)  # Flip the image horizontally (mirror effect)
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB for MediaPipe

    # Process the image with MediaPipe
    res = capture_hands.process(rgb_image)  # Detect hand landmarks in the frame
    hands = res.multi_hand_landmarks  # Get the list of detected hands

    if hands:  # If hands are detected
        print("Hand detected!")
        for hand in hands:  # Loop through each detected hand
            drawing_option.draw_landmarks(image, hand)  # Draw landmarks on the image
            one_hand_landmarks = hand.landmark  # Get the landmarks of the current hand
            for id, lm in enumerate(one_hand_landmarks):  # Loop through each landmark
                x = int(lm.x * image_width)  # Convert normalized x-coordinate to pixel value
                y = int(lm.y * image_height)  # Convert normalized y-coordinate to pixel value
                if id == 8:  # Index finger tip (landmark ID 8)
                    mouse_x = int(screen_width / image_width * x)  # Map x-coordinate to screen width
                    mouse_y = int(screen_height / image_height * y)  # Map y-coordinate to screen height
                    pyautogui.moveTo(mouse_x, mouse_y)  # Move the mouse cursor to the mapped position
                    x1 = x  # Store the x-coordinate of the index finger tip
                    y1 = y  # Store the y-coordinate of the index finger tip
                if id == 4:  # Thumb tip (landmark ID 4)
                    x2 = x  # Store the x-coordinate of the thumb tip
                    y2 = y  # Store the y-coordinate of the thumb tip

        # Calculate distance between index finger and thumb
        dist = ((x2 - x1) ** 2 + (y2 - y1) ** 2) ** 0.5  # Euclidean distance formula
        if dist < 30:  # If the distance is less than 30 pixels (click threshold)
            print("Click detected!")
            pyautogui.click()  # Simulate a mouse click
            time.sleep(0.5)  # Add a delay to avoid multiple clicks
    else:
        print("No hand detected.")

    # Write the frame to the output video file
    out.write(image)  # Save the current frame to the video file

    # Display the image
    cv2.imshow('Hand movement', image)  # Show the processed frame in a window

    # Exit on 'x' key press
    key = cv2.waitKey(1)  # Wait for 1 millisecond for a key press
    if key == ord('x'):  # If 'x' is pressed, exit the loop
        print("Exiting...")
        break

# Release resources
cap.release()  # Release the webcam
out.release()  # Release the video writer
cv2.destroyAllWindows()  # Close all OpenCV windows

Initializing MediaPipe Hands...
Screen size: 1920x1080
Opening webcam...
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand detected.
No hand d