In [7]:
import cv2
import mediapipe as mp
import pyautogui
import utils
import numpy as np
import datetime


In [8]:
# ADDED: screen size variables used by move_mouse
screen_width, screen_height = pyautogui.size()

# keep the same variable names you used in tutorial
mpHands = mp.solutions.hands
hands = mpHands.Hands(
    static_image_mode=False,
    model_complexity=1,
    min_detection_confidence=0.6,
    min_tracking_confidence=0.6,
    max_num_hands=1
)


In [9]:
def find_finger_tip(processed):
    """
    ADDED: Return index finger tip landmark, same name/signature.
    """
    if processed is None:
        return None

    if processed.multi_hand_landmarks:
        hand_landmarks = processed.multi_hand_landmarks[0]
        return hand_landmarks.landmark[mpHands.HandLandmark.INDEX_FINGER_TIP]
    return None


In [10]:
def move_mouse(index_finger_tip):
    """
    ADDED: Smooth mouse movement (exponential smoothing). Function name unchanged.
    """
    if index_finger_tip is None:
        return

    try:
        # Convert normalized coords -> screen coords
        target_x = int(index_finger_tip.x * screen_width)   # ADDED:
        target_y = int(index_finger_tip.y * screen_height)  # ADDED:

        # smoothing factor (0..1) - lower = more smoothing
        smoothing = 0.25  # ADDED:

        # use prev_x/prev_y from globals if available, else jump directly
        global prev_x, prev_y
        try:
            px = prev_x
            py = prev_y
            if px is None or py is None:
                px, py = target_x, target_y
        except NameError:
            px, py = target_x, target_y

        # compute smoothed coords and store them
        new_x = int(px + (target_x - px) * smoothing)
        new_y = int(py + (target_y - py) * smoothing)
        prev_x, prev_y = new_x, new_y

        # Move mouse (short duration to make motion smooth)
        pyautogui.moveTo(new_x, new_y, duration=0.01)
    except Exception as e:
        print("move_mouse error:", e)


In [11]:
def detect_gestures(frame, landmarks_list, processed):
    """
    REPLACED: Movement only when index+middle are open and thumb/ring/pinky closed.
    - Movement uses palm center (so moving/shaking your hand moves the cursor).
    - If thumb or any other finger is open movement is disabled; click pinches still work.
    - Screenshot on full-open -> full-closed preserved.
    Keeps same signature and variable names.
    """
    if processed is None:
        return

    if len(landmarks_list) < 21:
        return

    # ---------- helper: which fingers are up ----------
    def fingers_up(landmarks):
        up = [False]*5
        try:
            thumb_tip_x = landmarks[4][0]
            thumb_ip_x  = landmarks[3][0]
            up[0] = abs(thumb_tip_x - thumb_ip_x) > 0.02
        except Exception:
            up[0] = False
        try:
            up[1] = landmarks[8][1] < landmarks[6][1]   # index
            up[2] = landmarks[12][1] < landmarks[10][1] # middle
            up[3] = landmarks[16][1] < landmarks[14][1] # ring
            up[4] = landmarks[20][1] < landmarks[18][1] # pinky
        except Exception:
            up[1], up[2], up[3], up[4] = False, False, False, False
        return up

    # ---------- primary detection ----------
    up_list = fingers_up(landmarks_list)
    thumb_up, index_up, middle_up, ring_up, pinky_up = up_list

    # draw fingertip markers (green if up, red if down)
    h, w, _ = frame.shape
    for tid in [4,8,12,16,20]:
        tx = int(landmarks_list[tid][0] * w)
        ty = int(landmarks_list[tid][1] * h)
        is_up = {4:thumb_up, 8:index_up, 12:middle_up, 16:ring_up, 20:pinky_up}.get(tid, False)
        color = (0,255,0) if is_up else (0,0,255)
        cv2.circle(frame, (tx, ty), 5, color, -1)

    # ---------- compute palm center (for movement) ----------
    palm_idxs = [0, 5, 9, 13, 17]   # wrist + MCP joints
    sumx = sum(landmarks_list[i][0] for i in palm_idxs)
    sumy = sum(landmarks_list[i][1] for i in palm_idxs)
    palm_x = sumx / len(palm_idxs)
    palm_y = sumy / len(palm_idxs)
    # draw palm center (yellow) for visual feedback
    px = int(palm_x * w)
    py = int(palm_y * h)
    cv2.circle(frame, (px, py), 8, (0,255,255), 2)

    # ---------- movement condition: EXACTLY index+middle open, others closed ----------
    # index_up==True AND middle_up==True AND thumb_up==False AND ring_up==False AND pinky_up==False
    exact_index_middle = index_up and middle_up and (not thumb_up) and (not ring_up) and (not pinky_up)

    # ---------- hysteresis to avoid flicker ----------
    global move_counter, move_enabled
    try:
        _ = move_counter
    except NameError:
        move_counter = 0
    try:
        _ = move_enabled
    except NameError:
        move_enabled = False

    MOVE_HOLD_FRAMES = 4  # require pose for N frames to enable movement
    if exact_index_middle:
        move_counter = min(MOVE_HOLD_FRAMES, move_counter + 1)
    else:
        move_counter = max(0, move_counter - 1)

    move_enabled = (move_counter >= MOVE_HOLD_FRAMES)

    # If movement is enabled, move cursor using palm center
    if move_enabled:
        # create tiny object with .x/.y to reuse move_mouse
        class _P: pass
        palm_landmark = _P()
        palm_landmark.x = palm_x
        palm_landmark.y = palm_y
        move_mouse(palm_landmark)

    # ---------- CLICK: only allow clicks when thumb (or relevant finger) is available ----------
    # Left click: thumb + index pinch (works regardless of move_enabled)
    try:
        thumb_index_dist = utils.get_distance([landmarks_list[4], landmarks_list[8]])
    except Exception:
        thumb_index_dist = None
    PINCH_THRESHOLD_LEFT = 40
    global left_pinch_down
    try:
        _ = left_pinch_down
    except NameError:
        left_pinch_down = False

    if thumb_index_dist is not None:
        if thumb_index_dist < PINCH_THRESHOLD_LEFT and not left_pinch_down:
            # Only perform pinch click if thumb is open OR movement is disabled due to thumb open
            # (user requested clicks happen when thumb or another finger is open)
            # We'll allow pinch clicks regardless, but if thumb is open we won't move cursor (movement already blocked).
            left_pinch_down = True
            try:
                pyautogui.mouseDown(button='left')
                print("LEFT mouseDown (thumb+index pinch)")
            except Exception as e:
                print("mouseDown error:", e)
        elif thumb_index_dist >= PINCH_THRESHOLD_LEFT and left_pinch_down:
            left_pinch_down = False
            try:
                pyautogui.mouseUp(button='left')
                print("LEFT mouseUp (thumb+index release)")
            except Exception as e:
                print("mouseUp error:", e)

    # Right click: thumb + middle pinch
    try:
        thumb_middle_dist = utils.get_distance([landmarks_list[4], landmarks_list[12]])
    except Exception:
        thumb_middle_dist = None
    PINCH_THRESHOLD_RIGHT = 40
    global right_pinch_triggered
    try:
        _ = right_pinch_triggered
    except NameError:
        right_pinch_triggered = False

    if thumb_middle_dist is not None:
        if thumb_middle_dist < PINCH_THRESHOLD_RIGHT and not right_pinch_triggered:
            right_pinch_triggered = True
            try:
                pyautogui.click(button='right')
                print("RIGHT click (thumb+middle pinch)")
            except Exception as e:
                print("right click error:", e)
        elif thumb_middle_dist >= PINCH_THRESHOLD_RIGHT and right_pinch_triggered:
            right_pinch_triggered = False

    # ---------- SCREENSHOT: full-open -> full-closed transition ----------
    all_open = all(up_list)
    all_closed = not any(up_list)
    global prev_all_open
    try:
        _ = prev_all_open
    except NameError:
        prev_all_open = False

    if prev_all_open and all_closed:
        try:
            ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
            filename = f"screenshot_{ts}.png"
            pyautogui.screenshot(filename)
            print(f"Screenshot saved: {filename}")
        except Exception as e:
            print("screenshot error:", e)

    prev_all_open = all_open


In [12]:
def main():
    cap = cv2.VideoCapture(0)
    draw = mp.solutions.drawing_utils

    # ADDED: initialize globals used by move_mouse and detect_gestures
    global prev_x, prev_y, left_pinch_down, right_pinch_triggered, prev_all_open
    prev_x, prev_y = None, None
    left_pinch_down = False
    right_pinch_triggered = False
    prev_all_open = False

    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                print("Camera read failed")
                break

            frame = cv2.flip(frame, 1)  # mirror for natural interaction
            frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            processed = hands.process(frameRGB)

            landmarks_list = []
            if processed.multi_hand_landmarks:
                hand_landmarks = processed.multi_hand_landmarks[0]
                draw.draw_landmarks(frame, hand_landmarks, mpHands.HAND_CONNECTIONS)
                for lm in hand_landmarks.landmark:
                    landmarks_list.append((lm.x, lm.y))

                cv2.putText(frame, "Hand detected", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255,255,255), 2)

            # call gesture detection (keeps same signature)
            detect_gestures(frame, landmarks_list, processed)

            cv2.imshow('Frame', frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
    finally:
        cap.release()
        cv2.destroyAllWindows()

if __name__ == '__main__':
    main()


RIGHT click (thumb+middle pinch)
LEFT mouseDown (thumb+index pinch)
LEFT mouseUp (thumb+index release)
LEFT mouseDown (thumb+index pinch)
RIGHT click (thumb+middle pinch)
Screenshot saved: screenshot_20250922_225357.png
LEFT mouseUp (thumb+index release)
LEFT mouseDown (thumb+index pinch)
RIGHT click (thumb+middle pinch)
LEFT mouseUp (thumb+index release)
Screenshot saved: screenshot_20250922_225401.png
LEFT mouseDown (thumb+index pinch)
LEFT mouseUp (thumb+index release)
