In [84]:
%pip install pyautogui

Note: you may need to restart the kernel to use updated packages.


In [85]:
import cv2
import mediapipe as mp
import pyautogui
import random
from pynput.mouse import Button, Controller
import util  # Assuming you have util.py in the same directory!

# Mouse control
mouse = Controller()

# Screen resolution
screen_width, screen_height = pyautogui.size()

# Load images with transparency
heart_img = cv2.imread('C:/Users/Freya/OneDrive/Desktop/Projects/Gesturize/Heart.png', cv2.IMREAD_UNCHANGED)
thumbs_up_img = cv2.imread('C:/Users/Freya/OneDrive/Desktop/Projects/Gesturize/Thumbs_up.png', cv2.IMREAD_UNCHANGED)

if heart_img is None:
    print("Error: heart image not loaded!")
if thumbs_up_img is None:
    print("Error: thumbs up image not loaded!")

# Mediapipe setup
mpHands = mp.solutions.hands
hands = mpHands.Hands(static_image_mode=False,
                      model_complexity=1,
                      min_detection_confidence=0.7,
                      min_tracking_confidence=0.7,
                      max_num_hands=1)
mpDraw = mp.solutions.drawing_utils

In [86]:
def overlay_transparent(background, overlay, x, y, overlay_size=None):
    # Resize overlay if necessary
    if overlay_size is not None:
        overlay = cv2.resize(overlay, overlay_size, interpolation=cv2.INTER_AREA)

    # Check if overlay has alpha channel
    if overlay.shape[2] < 4:
        print("Overlay image has no alpha channel")
        return

    b, g, r, a = cv2.split(overlay)
    overlay_color = cv2.merge((b, g, r))
    
    mask = cv2.merge((a, a, a))  # Create 3 channel alpha mask

    h, w, _ = overlay_color.shape
    roi = background[y:y+h, x:x+w]

    # Ensure roi matches mask size
    if roi.shape[0] != mask.shape[0] or roi.shape[1] != mask.shape[1]:
        print("ROI and mask sizes do not match.")
        return

    # Blend the overlay with the background
    img1_bg = cv2.bitwise_and(roi, cv2.bitwise_not(mask))
    img2_fg = cv2.bitwise_and(overlay_color, mask)

    dst = cv2.add(img1_bg, img2_fg)

    background[y:y+h, x:x+w] = dst


In [87]:
def find_finger_tip(processed):
    if processed.multi_hand_landmarks:
        hand_landmarks = processed.multi_hand_landmarks[0]
        index_finger_tip = hand_landmarks.landmark[mpHands.HandLandmark.INDEX_FINGER_TIP]
        return index_finger_tip
    return None

In [88]:
def move_mouse(index_finger_tip):
    if index_finger_tip:
        x = int(index_finger_tip.x * screen_width)
        y = int(index_finger_tip.y * screen_height)
        pyautogui.moveTo(x, y)

In [89]:
def is_left_click(landmarks, thumb_index_dist):
    return (util.get_angle(landmarks[5], landmarks[6], landmarks[8]) < 50 and
            util.get_angle(landmarks[9], landmarks[10], landmarks[12]) > 90 and
            thumb_index_dist > 50)

def is_right_click(landmarks, thumb_index_dist):
    return (util.get_angle(landmarks[9], landmarks[10], landmarks[12]) < 50 and
            util.get_angle(landmarks[5], landmarks[6], landmarks[8]) > 90 and
            thumb_index_dist > 50)

def is_double_click(landmarks, thumb_index_dist):
    return (util.get_angle(landmarks[5], landmarks[6], landmarks[8]) < 50 and
            util.get_angle(landmarks[9], landmarks[10], landmarks[12]) < 50 and
            thumb_index_dist > 50)

def is_screenshot(landmarks, thumb_index_dist):
    return (util.get_angle(landmarks[5], landmarks[6], landmarks[8]) < 50 and
            util.get_angle(landmarks[9], landmarks[10], landmarks[12]) < 50 and
            thumb_index_dist < 50)

def is_heart_gesture(landmarks):
    thumb_tip = landmarks[4]
    index_tip = landmarks[8]
    distance = util.get_distance([thumb_tip, index_tip])
    return distance < 50

In [90]:
def is_thumbs_up_gesture(landmark_list):
    thumb_tip = landmark_list[4]
    index_tip = landmark_list[8]
    middle_tip = landmark_list[12]
    ring_tip = landmark_list[16]
    pinky_tip = landmark_list[20]

    # Thumb should be up (higher y), other fingers should be down (lower y)
    thumb_is_up = thumb_tip[1] < index_tip[1] and thumb_tip[1] < middle_tip[1]
    fingers_folded = (
        index_tip[1] > landmark_list[5][1] and
        middle_tip[1] > landmark_list[9][1] and
        ring_tip[1] > landmark_list[13][1] and
        pinky_tip[1] > landmark_list[17][1]
    )

    return thumb_is_up and fingers_folded

In [91]:
def detect_gesture(frame, landmark_list, processed):
    if len(landmark_list) >= 21:
        index_finger_tip = find_finger_tip(processed)
        thumb_index_dist = util.get_distance([landmark_list[4], landmark_list[5]])

        # Mouse Move
        if util.get_distance([landmark_list[4], landmark_list[5]]) < 50 and \
                util.get_angle(landmark_list[5], landmark_list[6], landmark_list[8]) > 90:
            move_mouse(index_finger_tip)

        # Left Click
        elif is_left_click(landmark_list, thumb_index_dist):
            mouse.press(Button.left)
            mouse.release(Button.left)
            cv2.putText(frame, "Left Click", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        # Right Click
        elif is_right_click(landmark_list, thumb_index_dist):
            mouse.press(Button.right)
            mouse.release(Button.right)
            cv2.putText(frame, "Right Click", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

        # Double Click
        elif is_double_click(landmark_list, thumb_index_dist):
            pyautogui.doubleClick()
            cv2.putText(frame, "Double Click", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)

        # Screenshot
        elif is_screenshot(landmark_list, thumb_index_dist):
            im1 = pyautogui.screenshot()
            label = random.randint(1, 1000)
            im1.save(f'my_screenshot_{label}.png')
            cv2.putText(frame, "Screenshot Taken", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)

        # Heart Gesture (with transparent overlay)
        elif is_heart_gesture(landmark_list):
            cv2.putText(frame, "HEART", (50, 150), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 4)
            overlay_transparent(frame, heart_img, 100, 100, overlay_size=(100, 100))

         # Thumbs Up Gesture
        elif is_thumbs_up_gesture(landmark_list):
            cv2.putText(frame, "THUMBS UP!", (50, 200), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 4)
            overlay_transparent(frame, thumbs_up_img, 250, 100, overlay_size=(100, 100))

In [92]:
def main():
    cap = cv2.VideoCapture(0)

    try:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            frame = cv2.flip(frame, 1)
            frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = hands.process(frameRGB)

            landmark_list = []

            if results.multi_hand_landmarks:
                for handLms in results.multi_hand_landmarks:
                    mpDraw.draw_landmarks(frame, handLms, mpHands.HAND_CONNECTIONS)
                    for lm in handLms.landmark:
                        landmark_list.append((lm.x, lm.y))

                detect_gesture(frame, landmark_list, results)

            cv2.imshow("Virtual Mouse with Heart Gesture", frame)

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    finally:
        cap.release()
        cv2.destroyAllWindows()


if __name__ == '__main__':
    main()

KeyboardInterrupt: 

In [None]:
print(heart_img.shape)

(707, 353, 4)
