In [None]:
!pip install opencv-python
!pip install numpy
!pip install mediapipe

In [None]:
# Install pyautogui for automating GUI interactions, such as moving the mouse, clicking, and keyboard actions
!pip install pyautogui

# Install MediaPipe, a library for machine learning pipelines, useful for tasks like hand and face tracking
!pip install mediapipe

# Install xvfb (X Virtual FrameBuffer), a display server that enables off-screen rendering, which is useful for headless environments
!apt-get install -y xvfb

# Install pyvirtualdisplay to create and manage virtual displays, allowing GUI applications to run in a headless environment
!pip install pyvirtualdisplay

In [None]:
# Install pynput, a library that allows control and monitoring of the mouse and keyboard.
# Useful for creating automation scripts that simulate user input or listen for specific keyboard events.
!pip install pynput

In [None]:
# Import OpenCV for image processing and computer vision tasks
import cv2

# Import MediaPipe for machine learning-based pipelines, such as hand or pose tracking
import mediapipe as mp

# Import pyautogui for GUI automation, enabling control of the mouse and keyboard for screen interactions
import pyautogui

# Import random for generating random values, which could be used for various purposes, like randomizing movements or actions
import random

# Import Button and Controller from pynput.mouse to control the mouse programmatically, allowing clicks and movement
from pynput.mouse import Button, Controller

# Initialize a mouse controller instance, enabling programmatic mouse actions (e.g., moving, clicking)
mouse = Controller()

In [6]:
import numpy as np

# Calculate the angle between three points (a, b, c)
def get_angle(a, b, c):
    # Calculate the angle in radians between vectors ba and bc
    radians = np.arctan2(c[1] - b[1], c[0] - b[0]) - np.arctan2(a[1] - b[1], a[0] - b[0])
    # Convert the angle from radians to degrees and take the absolute value
    angle = np.abs(np.degrees(radians))
    return angle

# Calculate the distance between two points in the landmark list
def get_distance(landmark_ist):
    # Ensure there are at least two landmarks to calculate the distance
    if len(landmark_ist) < 2:
        return
    # Unpack the coordinates of the two points
    (x1, y1), (x2, y2) = landmark_ist[0], landmark_ist[1]
    # Compute the Euclidean distance between the points
    L = np.hypot(x2 - x1, y2 - y1)
    # Interpolate the distance to a scale of 0 to 1000
    return np.interp(L, [0, 1], [0, 1000])

In [7]:
# Get the screen width and height in pixels using pyautogui, which can be used for setting boundaries for mouse movements or GUI elements
screen_width, screen_height = pyautogui.size()

In [None]:
# Access the MediaPipe Hands module, which provides hand detection and tracking capabilities
mpHands = mp.solutions.hands

# Initialize the Hands object with custom parameters for hand detection and tracking
hands = mpHands.Hands(
    static_image_mode=False,           # False enables detection for live video streams
    model_complexity=1,                # Sets model complexity; higher values may improve accuracy at a cost of speed
    min_detection_confidence=0.7,      # Minimum confidence for the hand detection to be considered successful
    min_tracking_confidence=0.7,       # Minimum confidence for tracking a detected hand between frames
    max_num_hands=1                    # Sets the maximum number of hands to detect and track
)

In [9]:
# Function to find the tip of the index finger from hand landmarks detected by MediaPipe
def find_finger_tip(processed):
    # Check if any hand landmarks were detected
    if processed.multi_hand_landmarks:
        # Access the landmarks of the first detected hand (assuming only one hand is detected)
        hand_landmarks = processed.multi_hand_landmarks[0]
        
        # Get the landmark corresponding to the tip of the index finger
        index_finger_tip = hand_landmarks.landmark[mpHands.HandLandmark.INDEX_FINGER_TIP]
        
        # Return the coordinates of the index finger tip (x, y, z)
        return index_finger_tip
    
    # Return None if no hand is detected
    return None, None

In [None]:
# Function to move the mouse cursor to the position of the index finger tip
def move_mouse(index_finger_tip):
    # Check if the index finger tip coordinates are not None (i.e., a hand is detected)
    if index_finger_tip is not None:
        # Convert the normalized x and y coordinates (ranging from 0 to 1) to screen pixel values
        x = int(index_finger_tip.x * screen_width)  # Multiply by screen width to get x-coordinate in pixels
        y = int(index_finger_tip.y / 2 * screen_height)  # Divide by 2 to account for the flipped y-axis in screen coordinates
        
        # Move the mouse cursor to the calculated screen coordinates
        pyautogui.moveTo(x, y)

In [None]:
# Function to detect a left-click gesture based on hand landmarks and thumb-index finger distance
def is_left_click(landmark_list, thumb_index_dist):
    return (
        # Check if the angle between the thumb and index finger is less than 50 degrees (indicating a closed hand)
        get_angle(landmark_list[5], landmark_list[6], landmark_list[8]) < 50 and
        
        # Check if the angle between the middle finger and index finger is greater than 90 degrees (indicating an open hand)
        get_angle(landmark_list[9], landmark_list[10], landmark_list[12]) > 90 and
        
        # Check if the distance between the thumb and index finger is greater than a threshold (indicating the fingers are apart enough for a click)
        thumb_index_dist > 50
    )

In [12]:
# Function to detect a right-click gesture based on hand landmarks and thumb-index finger distance
def is_right_click(landmark_list, thumb_index_dist):
    return (
        # Check if the angle between the middle finger and the index finger is less than 50 degrees (indicating the fingers are close, forming a "pinch")
        get_angle(landmark_list[9], landmark_list[10], landmark_list[12]) < 50 and
        
        # Check if the angle between the thumb and index finger is greater than 90 degrees (indicating a stretched hand, suitable for a right-click gesture)
        get_angle(landmark_list[5], landmark_list[6], landmark_list[8]) > 90 and
        
        # Check if the distance between the thumb and index finger is greater than a threshold (indicating that the fingers are apart, suitable for the gesture)
        thumb_index_dist > 50
    )

In [13]:
# Function to detect a double-click gesture based on hand landmarks and thumb-index finger distance
def is_double_click(landmark_list, thumb_index_dist):
    return (
        # Check if the angle between the thumb and index finger is less than 50 degrees (indicating the fingers are close, forming a "pinch")
        get_angle(landmark_list[5], landmark_list[6], landmark_list[8]) < 50 and
        
        # Check if the angle between the middle finger and index finger is less than 50 degrees (indicating the fingers are close, forming another "pinch")
        get_angle(landmark_list[9], landmark_list[10], landmark_list[12]) < 50 and
        
        # Check if the distance between the thumb and index finger is greater than a threshold (indicating the fingers are apart enough for a precise gesture)
        thumb_index_dist > 50
    )

In [14]:
# Function to detect a screenshot gesture based on hand landmarks and thumb-index finger distance
def is_screenshot(landmark_list, thumb_index_dist):
    return (
        # Check if the angle between the thumb and index finger is less than 50 degrees (indicating the fingers are close, forming a "pinch")
        get_angle(landmark_list[5], landmark_list[6], landmark_list[8]) < 50 and
        
        # Check if the angle between the middle finger and index finger is less than 50 degrees (indicating the fingers are close, forming another "pinch")
        get_angle(landmark_list[9], landmark_list[10], landmark_list[12]) < 50 and
        
        # Check if the distance between the thumb and index finger is less than a threshold (indicating the fingers are very close, possibly for a screenshot gesture)
        thumb_index_dist < 50
    )


In [15]:
# Function to detect hand gestures and perform corresponding actions (e.g., mouse movements, clicks, screenshot)
def detect_gesture(frame, landmark_list, processed):
    # Ensure there are enough landmarks (at least 21) for gesture detection
    if len(landmark_list) >= 21:

        # Get the index finger tip position and the distance between thumb and index fingers
        index_finger_tip = find_finger_tip(processed)
        thumb_index_dist = get_distance([landmark_list[4], landmark_list[5]])

        # If the distance between thumb and index finger is small and the angle between them is large (indicating a gesture to move the mouse)
        if get_distance([landmark_list[4], landmark_list[5]]) < 50 and get_angle(landmark_list[5], landmark_list[6], landmark_list[8]) > 90:
            move_mouse(index_finger_tip)  # Move the mouse cursor based on the index finger's tip position
        
        # If the gesture is detected as a left-click
        elif is_left_click(landmark_list, thumb_index_dist):
            mouse.press(Button.left)  # Press the left mouse button
            mouse.release(Button.left)  # Release the left mouse button
            cv2.putText(frame, "Left Click", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)  # Display "Left Click" text on the screen
        
        # If the gesture is detected as a right-click
        elif is_right_click(landmark_list, thumb_index_dist):
            mouse.press(Button.right)  # Press the right mouse button
            mouse.release(Button.right)  # Release the right mouse button
            cv2.putText(frame, "Right Click", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)  # Display "Right Click" text on the screen
        
        # If the gesture is detected as a double-click
        elif is_double_click(landmark_list, thumb_index_dist):
            pyautogui.doubleClick()  # Perform a double-click using pyautogui
            cv2.putText(frame, "Double Click", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)  # Display "Double Click" text on the screen
        
        # If the gesture is detected as a screenshot
        elif is_screenshot(landmark_list, thumb_index_dist):
            im1 = pyautogui.screenshot()  # Take a screenshot using pyautogui
            label = random.randint(1, 1000)  # Generate a random label for the screenshot file name
            im1.save(f'my_screenshot_{label}.png')  # Save the screenshot with the label as part of the filename
            cv2.putText(frame, "Screenshot Taken", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)  # Display "Screenshot Taken" text on the screen

In [16]:
# Main function to capture video from the webcam and detect hand gestures in real-time
def main():
    # Drawing utilities from MediaPipe to draw landmarks on the frame
    draw = mp.solutions.drawing_utils
    
    # Open the webcam for capturing video
    cap = cv2.VideoCapture(0)

    try:
        # Start reading frames from the webcam
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break  # If no frame is returned, exit the loop

            # Flip the frame horizontally to create a mirror effect (user's hand appears like in a mirror)
            frame = cv2.flip(frame, 1)

            # Convert the frame from BGR to RGB color space (required by MediaPipe)
            frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            # Process the frame through the MediaPipe Hands model
            processed = hands.process(frameRGB)

            # List to store the landmarks of the hand
            landmark_list = []

            # If hand landmarks are detected, process them
            if processed.multi_hand_landmarks:
                hand_landmarks = processed.multi_hand_landmarks[0]  # Assuming only one hand is detected

                # Draw the landmarks and connections on the frame
                draw.draw_landmarks(frame, hand_landmarks, mpHands.HAND_CONNECTIONS)

                # Collect the landmark positions (x, y coordinates)
                for lm in hand_landmarks.landmark:
                    landmark_list.append((lm.x, lm.y))

            # Detect the gesture based on the landmarks and update the frame
            detect_gesture(frame, landmark_list, processed)

            # Display the frame with hand landmarks and gesture annotations
            cv2.imshow('Frame', frame)

            # Break the loop when the user presses the 'q' key
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
    finally:
        # Release the webcam capture and close any OpenCV windows
        cap.release()
        cv2.destroyAllWindows()

In [None]:
if __name__ == '__main__':
    main()