# HAND GESTURE-BASED INTERACTION

---

Group members:
*   Ada Yılmaz
*   Ceren Şahin
*   Sima Adleyba
*   Selen Naz Gürsoy

### Installing necessary libraries and models

In [1]:
#install mediapipe
%pip install -q mediapipe

Note: you may need to restart the kernel to use updated packages.


In [2]:
#download a model that can recognize 7 hand gestures: 👍, 👎, ✌️, ☝️, ✊, 👋, 🤟
!wget -q https://storage.googleapis.com/mediapipe-models/gesture_recognizer/gesture_recognizer/float16/1/gesture_recognizer.task

In [3]:
#download test images from pixabay
import urllib

IMAGE_FILENAMES = ['thumbs_down.jpg', 'victory.jpg', 'thumbs_up.jpg', 'pointing_up.jpg']

for name in IMAGE_FILENAMES:
  url = f'https://storage.googleapis.com/mediapipe-tasks/gesture_recognizer/{name}'
  urllib.request.urlretrieve(url, name)

In [4]:
#or we can use our own images as shown below

# from google.colab import files
# uploaded = files.upload()

# for filename in uploaded:
#   content = uploaded[filename]
#   with open(filename, 'wb') as f:
#     f.write(content)
# IMAGE_FILENAMES = list(uploaded.keys())

# print('Uploaded files:', IMAGE_FILENAMES)

### Functions for visualization

In [5]:
#some functions to visualize the gesture recognition results.
import math
from matplotlib import pyplot as plt
import mediapipe as mp
from mediapipe.framework.formats import landmark_pb2

plt.rcParams.update({
    'axes.spines.top': False,
    'axes.spines.right': False,
    'axes.spines.left': False,
    'axes.spines.bottom': False,
    'xtick.labelbottom': False,
    'xtick.bottom': False,
    'ytick.labelleft': False,
    'ytick.left': False,
    'xtick.labeltop': False,
    'xtick.top': False,
    'ytick.labelright': False,
    'ytick.right': False
})

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles


def display_one_image(image, title, subplot, titlesize=16):
    """Displays one image along with the predicted category name and score."""
    plt.subplot(*subplot)
    plt.imshow(image)
    if len(title) > 0:
        plt.title(title, fontsize=int(titlesize), color='black', fontdict={'verticalalignment':'center'}, pad=int(titlesize/1.5))
    return (subplot[0], subplot[1], subplot[2]+1)


def display_batch_of_images_with_gestures_and_hand_landmarks(images, results):
    """Displays a batch of images with the gesture category and its score along with the hand landmarks."""
    # Images and labels.
    images = [image.numpy_view() for image in images]
    gestures = [top_gesture for (top_gesture, _) in results]
    multi_hand_landmarks_list = [multi_hand_landmarks for (_, multi_hand_landmarks) in results]

    # Auto-squaring: this will drop data that does not fit into square or square-ish rectangle.
    rows = int(math.sqrt(len(images)))
    cols = len(images) // rows

    # Size and spacing.
    FIGSIZE = 13.0
    SPACING = 0.1
    subplot=(rows,cols, 1)
    if rows < cols:
        plt.figure(figsize=(FIGSIZE,FIGSIZE/cols*rows))
    else:
        plt.figure(figsize=(FIGSIZE/rows*cols,FIGSIZE))

    # Display gestures and hand landmarks.
    for i, (image, gestures) in enumerate(zip(images[:rows*cols], gestures[:rows*cols])):
        title = f"{gestures.category_name} ({gestures.score:.2f})"
        dynamic_titlesize = FIGSIZE*SPACING/max(rows,cols) * 40 + 3
        annotated_image = image.copy()

        for hand_landmarks in multi_hand_landmarks_list[i]:
          hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
          hand_landmarks_proto.landmark.extend([
            landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks
          ])

          mp_drawing.draw_landmarks(
            annotated_image,
            hand_landmarks_proto,
            mp_hands.HAND_CONNECTIONS,
            mp_drawing_styles.get_default_hand_landmarks_style(),
            mp_drawing_styles.get_default_hand_connections_style())

        subplot = display_one_image(annotated_image, title, subplot, titlesize=dynamic_titlesize)

    # Layout.
    plt.tight_layout()
    plt.subplots_adjust(wspace=SPACING, hspace=SPACING)
    plt.show()

### Preview the images

In [6]:
import cv2
import math

DESIRED_HEIGHT = 480
DESIRED_WIDTH = 480

def resize_and_show(image, name):
    h, w = image.shape[:2]
    if h < w:
        img = cv2.resize(image, (DESIRED_WIDTH, math.floor(h / (w / DESIRED_WIDTH))))
    else:
        img = cv2.resize(image, (math.floor(w / (h / DESIRED_HEIGHT)), DESIRED_HEIGHT))
    
    # Display the image in a window with a name
    cv2.imshow(name, img)
    cv2.waitKey(0)  # Wait for a key press to close the window
    cv2.destroyAllWindows()  # Close the window after key press

# Example usage
images = {name: cv2.imread(name) for name in IMAGE_FILENAMES}

for name, image in images.items():
    if image is not None:
        print(f"Displaying: {name}")
        resize_and_show(image, name)
    else:
        print(f"Error: Could not read image {name}")


Displaying: thumbs_down.jpg
Displaying: victory.jpg
Displaying: thumbs_up.jpg
Displaying: pointing_up.jpg


### Google implementation
This first one is a how to from google, we can create our own by following the steps given.


In [8]:
# STEP 1: Import the necessary modules.
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

# STEP 2: Create an GestureRecognizer object.
base_options = python.BaseOptions(model_asset_path='gesture_recognizer.task')
options = vision.GestureRecognizerOptions(base_options=base_options)
recognizer = vision.GestureRecognizer.create_from_options(options)

images = []
results = []
for image_file_name in IMAGE_FILENAMES:
    
  # STEP 3: Load the input image.
  image = mp.Image.create_from_file(image_file_name)

  # STEP 4: Recognize gestures in the input image.
  recognition_result = recognizer.recognize(image)

  # STEP 5: Process the result. In this case, visualize it.
  images.append(image)
  top_gesture = recognition_result.gestures[0][0]
  hand_landmarks = recognition_result.hand_landmarks
  results.append((top_gesture, hand_landmarks))

display_batch_of_images_with_gestures_and_hand_landmarks(images, results)

I0000 00:00:1733734701.863207   14037 gl_context.cc:357] GL version: 2.1 (2.1 INTEL-22.5.10), renderer: Intel(R) Iris(TM) Plus Graphics OpenGL Engine
W0000 00:00:1733734701.909597   14037 gesture_recognizer_graph.cc:129] Hand Gesture Recognizer contains CPU only ops. Sets HandGestureRecognizerGraph acceleration to Xnnpack.
I0000 00:00:1733734701.932447   14037 hand_gesture_recognizer_graph.cc:250] Custom gesture classifier is not defined.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1733734702.018345   15212 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733734702.089356   15212 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733734702.094463   15215 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature infere

ValueError: Input image must contain three channel bgr data.

<Figure size 1300x1300 with 0 Axes>

### Example
Tried an example to see how it works and update the functions accordingly

In [9]:
import cv2
import mediapipe as mp

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands()
mp_drawing = mp.solutions.drawing_utils

# Initialize webcam
cap = cv2.VideoCapture(0)

def detect_peace_sign(hand_landmarks):
    # Get coordinates of relevant landmarks
    index_tip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
    middle_tip = hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_TIP]
    ring_tip = hand_landmarks.landmark[mp_hands.HandLandmark.RING_FINGER_TIP]
    pinky_tip = hand_landmarks.landmark[mp_hands.HandLandmark.PINKY_TIP]
    
    
    index_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_MCP]
    middle_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_MCP]
    ring_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.RING_FINGER_MCP]
    pinky_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.PINKY_MCP]


    # Define peace sign gesture: index and middle fingers up, ring and pinky fingers down
    if (
        # Index and middle fingers' tips must be highest parts of those fingers
        (index_tip.y < index_mcp.y) and (middle_tip.y < middle_mcp.y)
        and
        # Index finger's tips must be higher than other fingers' mcp
        (index_tip.y < ring_mcp.y) and (index_tip.y < pinky_mcp.y)
        and
        # Middle finger's tips must be higher than other fingers' mcp
        (middle_tip.y < ring_mcp.y) and (middle_tip.y < pinky_mcp.y)
    ):
        return True
    return False


while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Convert the frame to RGB
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process the frame and detect hands
    result = hands.process(frame_rgb)

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            # Draw hand landmarks
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Detect custom gesture (peace sign)
            if detect_peace_sign(hand_landmarks):
                cv2.putText(frame, 'Peace Sign Detected!', (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
                # Trigger interaction (e.g., print a message)
                print("Peace sign gesture detected!")

    # Display the frame
    cv2.imshow('MediaPipe Hands', frame)

    if cv2.waitKey(5) & 0xFF == 27:
        break

cap.release()
cv2.destroyAllWindows()

I0000 00:00:1733734784.299569   14037 gl_context.cc:357] GL version: 2.1 (2.1 INTEL-22.5.10), renderer: Intel(R) Iris(TM) Plus Graphics OpenGL Engine
W0000 00:00:1733734784.420640   16680 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733734784.470914   16680 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Peace sign gesture detected!
Peace sign gesture detected!
Peace sign gesture detected!
Peace sign gesture detected!
Peace sign gesture detected!
Peace sign gesture detected!
Peace sign gesture detected!
Peace sign gesture detected!


### Gesture Detection Functions
After checking how the upper ones worked, we implemented fixed, more robust versions of gesture detection functions.

In [7]:
import time

# Global variables for tracking gestures and cooldown

# The currently detected gesture
current_gesture = None

# Time when the current gesture expires
gesture_reset_time = 0

# Cooldown variables for scrolling
previous_thumb_tip = None
previous_index_tip = None
last_gesture_time = 0
COOLDOWN_PERIOD = 1.5

# Reset current gesture when it expires
def reset_gesture():
    global current_gesture, gesture_reset_time
    if time.time() > gesture_reset_time:
        current_gesture = None

# Gesture detection functions
def detect_peace_sign(hand_landmarks):
    
    # Track the time of the last detected gesture
    global last_gesture_time

    # Get landmarks
    index_tip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
    middle_tip = hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_TIP]
    ring_tip = hand_landmarks.landmark[mp_hands.HandLandmark.RING_FINGER_TIP]
    pinky_tip = hand_landmarks.landmark[mp_hands.HandLandmark.PINKY_TIP]

    index_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_MCP]
    middle_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_MCP]
    ring_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.RING_FINGER_MCP]
    pinky_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.PINKY_MCP]

    # Check that index and middle are raised above their MCPs and other MCPs
    index_and_middle_up = (
        (index_tip.y < index_mcp.y) and
        (middle_tip.y < middle_mcp.y) and
        (index_tip.y < ring_mcp.y) and 
        (index_tip.y < pinky_mcp.y) and
        (middle_tip.y < ring_mcp.y) and 
        (middle_tip.y < pinky_mcp.y)
    )

    # Check spacing between index and middle fingers
    index_middle_spacing = abs(index_tip.x - middle_tip.x) > 0.1

    # Check that ring and pinky are down (their tips should be below their MCP joints)
    ring_and_pinky_down = (
        (ring_tip.y > ring_mcp.y + 0.02) and
        (pinky_tip.y > pinky_mcp.y + 0.02)
    )

    if index_and_middle_up and index_middle_spacing and ring_and_pinky_down:
        
        # Update last gesture time (to apply cooldown for peace sign gesture)
        last_gesture_time = time.time()
        return True
    return False



def detect_thumbs_up(hand_landmarks, margin=0.05):
    
    # Track the time of the last detected gesture
    global last_gesture_time 

    # Get landmarks
    index_tip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
    middle_tip = hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_TIP]
    ring_tip = hand_landmarks.landmark[mp_hands.HandLandmark.RING_FINGER_TIP]
    pinky_tip = hand_landmarks.landmark[mp_hands.HandLandmark.PINKY_TIP]
    thumb_tip = hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP]

    index_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_MCP]
    middle_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_MCP]
    ring_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.RING_FINGER_MCP]
    pinky_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.PINKY_MCP]
    thumb_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_MCP]
    
    thumb_base = hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_CMC]

    # Thumb tip should be above other fingertips
    thumb_tip_up = ((thumb_tip.y + margin < index_tip.y) and
                    (thumb_tip.y + margin < middle_tip.y) and
                    (thumb_tip.y + margin < ring_tip.y) and
                    (thumb_tip.y + margin < pinky_tip.y) and
                    (thumb_tip.y < thumb_mcp.y))
    
    # Other fingers should be in order from top to bottom
    other_fingers_ordered = ((index_mcp.y < middle_mcp.y) and
                             (middle_mcp.y < ring_mcp.y) and
                             (ring_mcp.y < pinky_mcp.y))
    
    
    if thumb_tip_up and other_fingers_ordered:
        
        # Update last gesture time (to apply cooldown for thumbs-up gesture)
        last_gesture_time = time.time()
        return True
    return False


def detect_thumbs_down(hand_landmarks, margin=0.05):
    
    # Track the time of the last detected gesture
    global last_gesture_time

    # Get landmarks
    index_tip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
    middle_tip = hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_TIP]
    ring_tip = hand_landmarks.landmark[mp_hands.HandLandmark.RING_FINGER_TIP]
    pinky_tip = hand_landmarks.landmark[mp_hands.HandLandmark.PINKY_TIP]
    thumb_tip = hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP]

    index_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_MCP]
    middle_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_MCP]
    ring_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.RING_FINGER_MCP]
    pinky_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.PINKY_MCP]

    # Thumb tip must be lower than anything else
    thumb_tip_down = ((thumb_tip.y > index_tip.y + margin) and
                    (thumb_tip.y > middle_tip.y + margin) and
                    (thumb_tip.y > ring_tip.y + margin) and
                    (thumb_tip.y > pinky_tip.y + margin))
    
    # Other fingers should be in the order (from top to down) pinky > ring > middle > index finger
    other_fingers_ordered = ((index_mcp.y > middle_mcp.y) and
                             (middle_mcp.y > ring_mcp.y) and
                             (ring_mcp.y > pinky_mcp.y))
    
    if thumb_tip_down and other_fingers_ordered:
        
        # Update last gesture time (to apply cooldown for thumbs-down gesture)
        last_gesture_time = time.time()
        return True
    return False

def detect_scroll(hand_landmarks, threshold=0.1, dominance_ratio=4.0):
    
    # Get previous positions and last gesture time
    global previous_thumb_tip, previous_index_tip, last_gesture_time

    # Get current positions
    current_thumb_tip_x = hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].x
    current_thumb_tip_y = hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP].y
    current_index_tip_x = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x
    current_index_tip_y = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y

    # Check for cooldown
    current_time = time.time()
    
    # If last gesture happened not before cooldown, return False (no scroll)
    if current_time - last_gesture_time < COOLDOWN_PERIOD:
        return False, None 

    # Initialize previous positions if not set
    if previous_thumb_tip is None or previous_index_tip is None:
        previous_thumb_tip = (current_thumb_tip_x, current_thumb_tip_y)
        previous_index_tip = (current_index_tip_x, current_index_tip_y)
        return False, None

    # Calculate location changes
    thumb_horizontal_disp = current_thumb_tip_x - previous_thumb_tip[0]
    thumb_vertical_disp = current_thumb_tip_y - previous_thumb_tip[1]
    index_horizontal_disp = current_index_tip_x - previous_index_tip[0]
    index_vertical_disp = current_index_tip_y - previous_index_tip[1]

    # Average the movements of thumb and index for robustness
    horizontal_disp = (thumb_horizontal_disp + index_horizontal_disp) / 2
    vertical_disp = (thumb_vertical_disp + index_vertical_disp) / 2

    # Determine dominant movement (we want to return only a horizontal or vertical movement)
    horizontal_movement = abs(horizontal_disp) > threshold
    vertical_movement = abs(vertical_disp) > threshold

    # Check dominance
    if horizontal_movement and abs(horizontal_disp) > dominance_ratio * abs(vertical_disp):
        direction = "right" if horizontal_disp > 0 else "left"
        dominant_axis = "horizontal"
    elif vertical_movement and abs(vertical_disp) > dominance_ratio * abs(horizontal_disp):
        direction = "down" if vertical_disp > 0 else "up"
        dominant_axis = "vertical"
    else:
        direction = None
        dominant_axis = None

    # If there is a dominant movement
    if dominant_axis:
        
        # Update positions
        previous_thumb_tip = (current_thumb_tip_x, current_thumb_tip_y)
        previous_index_tip = (current_index_tip_x, current_index_tip_y)
        
        # Update last gesture time
        last_gesture_time = current_time 
        return True, direction

    return False, None

In [10]:
import cv2
import mediapipe as mp

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7)
mp_drawing = mp.solutions.drawing_utils

# Initialize webcam
cap = cv2.VideoCapture(0)

# Global variables for tracking previous positions
previous_thumb_tip = None
previous_index_tip = None

print("Press 'Esc' to exit.")

# Initialize gesture display variables
current_gesture = None 
gesture_display_time = 0 
GESTURE_DISPLAY_DURATION = 1.5 

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Flip the frame horizontally
    frame = cv2.flip(frame, 1)

    # Convert the frame to RGB
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process the frame and detect hands
    result = hands.process(frame_rgb)

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            # Draw hand landmarks
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Detect gestures
            detected_gesture = None
            if detect_peace_sign(hand_landmarks):
                detected_gesture = "Peace Sign Detected!"
            elif detect_thumbs_up(hand_landmarks):
                detected_gesture = "Thumbs Up Detected!"
            elif detect_thumbs_down(hand_landmarks):
                detected_gesture = "Thumbs Down Detected!"
            
            # If a gesture is detected, update the display variables
            if detected_gesture:
                current_gesture = detected_gesture
                gesture_display_time = time.time()
            else:
                # Only check for scroll if no other gesture is detected
                detected, direction = detect_scroll(hand_landmarks, threshold=0.1, dominance_ratio=4.0)
                if detected:
                    current_gesture = f"Scroll Detected: {direction.title()}!"
                    gesture_display_time = time.time()

    # Display the current gesture if within the display duration
    if current_gesture and (time.time() - gesture_display_time < GESTURE_DISPLAY_DURATION):
        cv2.putText(frame, current_gesture, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

    # Display the frame
    cv2.imshow('Gesture Recognition', frame)

    # Break the loop when 'Esc' key is pressed
    if cv2.waitKey(5) & 0xFF == 27:
        break

# Release the webcam and close windows
cap.release()
cv2.destroyAllWindows()

I0000 00:00:1733740389.826000   73686 gl_context.cc:357] GL version: 2.1 (2.1 INTEL-22.5.10), renderer: Intel(R) Iris(TM) Plus Graphics OpenGL Engine
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1733740389.893388   74420 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733740389.926177   74420 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Press 'Esc' to exit.


W0000 00:00:1733740391.747621   74419 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


### Cursor control

In [8]:
import cv2
import mediapipe as mp
import pyautogui

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7)
mp_drawing = mp.solutions.drawing_utils

# Get screen dimensions
screen_width, screen_height = pyautogui.size()

def is_click_gesture(hand_landmarks):
    """Detect a pinching gesture for a left click."""
    index_tip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
    thumb_tip = hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP]

    # Calculate the 3D Euclidean distance between index tip and thumb tip
    distance = ((index_tip.x - thumb_tip.x) ** 2 +
                (index_tip.y - thumb_tip.y) ** 2 +
                (index_tip.z - thumb_tip.z) ** 2) ** 0.5

    # Adjust threshold based on typical 3D distances observed
    return distance < 0.05


# Open webcam
cap = cv2.VideoCapture(0)

print("Press 'Esc' to exit.")

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Flip the frame horizontally for a mirror-like effect
    frame = cv2.flip(frame, 1)

    # Get frame dimensions
    frame_height, frame_width, _ = frame.shape

    # Convert the frame to RGB
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process the frame and detect hands
    result = hands.process(frame_rgb)

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            # Draw hand landmarks
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Get index finger tip coordinates
            index_finger_tip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]

            # Normalize coordinates to screen dimensions
            cursor_x = int(index_finger_tip.x * screen_width)
            cursor_y = int(index_finger_tip.y * screen_height)

            # Move the mouse cursor
            pyautogui.moveTo(cursor_x, cursor_y)

            # Detect click gesture
            if is_click_gesture(hand_landmarks):
                pyautogui.click()
                cv2.putText(frame, "Click!", (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Display the frame
    cv2.imshow("Gesture-Based Cursor Control", frame)

    # Break the loop when 'Esc' key is pressed
    if cv2.waitKey(5) & 0xFF == 27:
        break

# Release the webcam and close windows
cap.release()
cv2.destroyAllWindows()

I0000 00:00:1733740596.988830   76643 gl_context.cc:357] GL version: 2.1 (2.1 INTEL-22.5.10), renderer: Intel(R) Iris(TM) Plus Graphics OpenGL Engine
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1733740597.024627   77265 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733740597.056736   77265 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Press 'Esc' to exit.


In [None]:
import cv2
import mediapipe as mp
import tkinter as tk
from PIL import Image, ImageTk, ImageDraw, ImageFont
import os
import time

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7)
mp_drawing = mp.solutions.drawing_utils

# Initialize the main Tkinter window
root = tk.Tk()
root.title("Gesture-Controlled Instagram Feed")
root.geometry("1024x768")
root.configure(bg="#f0f0f0")

# Load photos into a list
photo_folder = "photos/"
photo_files = [os.path.join(photo_folder, f) for f in os.listdir(photo_folder) if f.endswith((".jpg", ".png"))]
photos = [Image.open(photo).resize((400, 400)) for photo in photo_files]

# Create a label to display a single photo
photo_label = tk.Label(root, bg="#ffffff", width=400, height=400)
photo_label.pack(side=tk.LEFT, padx=10, pady=10)

# Feedback label
feedback_label = tk.Label(root, text="Perform gestures to interact!", font=("Helvetica", 14), bg="#f0f0f0")
feedback_label.pack(side=tk.BOTTOM, pady=20)

# Interaction states
current_photo_index = 0  # Start with the first photo
liked_photos = {}
disliked_photos = {}
saved_photos = []  # List to store saved photos
showing_saved_photos = False  # Track whether we are showing saved photos

# Camera feed frame
camera_frame = tk.Label(root, bg="#000000", width=500, height=700)
camera_frame.pack(side=tk.RIGHT, padx=10, pady=10)

# Function to add emoji to photos
def add_emoji(photo, emoji):
    """Add an emoji to the given photo."""
    img = photo.copy()
    draw = ImageDraw.Draw(img)
    font = ImageFont.truetype("arial.ttf", 50)  # Ensure you have a compatible font installed
    draw.text((150, 150), emoji, fill="red", font=font)
    return img

# Function to update the displayed photo
def update_photo():
    """Update the currently displayed photo based on the index."""
    global current_photo_index
    if showing_saved_photos:
        if len(saved_photos) == 0:
            feedback_label.config(text="No saved photos available!")
            photo_label.config(image="")
            return
        if 0 <= current_photo_index < len(saved_photos):
            img = ImageTk.PhotoImage(saved_photos[current_photo_index])
            photo_label.configure(image=img)
            photo_label.img = img  # Keep a reference to avoid garbage collection
    else:
        if 0 <= current_photo_index < len(photos):
            current_photo = photos[current_photo_index]
            if current_photo_index in liked_photos:
                current_photo = liked_photos[current_photo_index]
            elif current_photo_index in disliked_photos:
                current_photo = disliked_photos[current_photo_index]
            img = ImageTk.PhotoImage(current_photo)
            photo_label.configure(image=img)
            photo_label.img = img  # Keep a reference to avoid garbage collection

# Function to toggle between all photos and saved photos
def show_saved_photos():
    global showing_saved_photos, current_photo_index
    showing_saved_photos = not showing_saved_photos
    current_photo_index = 0  # Reset to the first photo
    if showing_saved_photos:
        feedback_label.config(text="Showing Saved Photos 📂")
    else:
        feedback_label.config(text="Showing All Photos 🌍")
    update_photo()

# Gesture Detection Logic
def detect_gesture(hand_landmarks):
    """Detect gestures for liking, disliking, saving, scrolling, and clicking."""
    global current_photo_index

    # Get landmarks for gestures
    thumb_tip = hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP]
    index_tip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
    middle_tip = hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_TIP]
    ring_tip = hand_landmarks.landmark[mp_hands.HandLandmark.RING_FINGER_TIP]
    pinky_tip = hand_landmarks.landmark[mp_hands.HandLandmark.PINKY_TIP]

    # Like Gesture (Thumbs Up)
    if (
        thumb_tip.y < index_tip.y
        and thumb_tip.y < middle_tip.y
        and thumb_tip.y < ring_tip.y
        and thumb_tip.y < pinky_tip.y
    ):
        feedback_label.config(text="Photo Liked! ❤️")
        if current_photo_index not in liked_photos:
            liked_photos[current_photo_index] = add_emoji(photos[current_photo_index], "❤️")
        update_photo()
        time.sleep(0.5)
        return

    # Dislike Gesture (Thumbs Down)
    if (
        thumb_tip.y > index_tip.y
        and thumb_tip.y > middle_tip.y
        and thumb_tip.y > ring_tip.y
        and thumb_tip.y > pinky_tip.y
    ):
        feedback_label.config(text="Photo Disliked! 👎")
        if current_photo_index not in disliked_photos:
            disliked_photos[current_photo_index] = add_emoji(photos[current_photo_index], "👎")
        update_photo()
        time.sleep(0.5)
        return

    # Save Gesture (Peace Sign)
    if (
        index_tip.y < middle_tip.y  # Index finger is above middle finger
        and middle_tip.y < ring_tip.y  # Middle finger is above ring finger
        and ring_tip.y < pinky_tip.y  # Ring finger is above pinky
        and thumb_tip.y > index_tip.y  # Thumb is below the index
    ):
        feedback_label.config(text="Photo Saved! ✌️")
        current_photo = photos[current_photo_index]
        if current_photo not in saved_photos:
            saved_photos.append(current_photo)
        time.sleep(0.5)
        return

    # Scrolling Gesture (Index Finger Movement)
    if index_tip.y < hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_MCP].y:
        feedback_label.config(text="Scrolling Up ⬆️")
        if current_photo_index > 0:
            current_photo_index -= 1
            update_photo()
        time.sleep(0.5)
    elif index_tip.y > hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_MCP].y:
        feedback_label.config(text="Scrolling Down ⬇️")
        if showing_saved_photos:
            if current_photo_index < len(saved_photos) - 1:
                current_photo_index += 1
                update_photo()
        else:
            if current_photo_index < len(photos) - 1:
                current_photo_index += 1
                update_photo()
        time.sleep(0.5)

# Open webcam and process gestures
cap = cv2.VideoCapture(0)

def update_camera_feed():
    """Update the camera feed and process gestures in real-time."""
    global current_photo_index

    ret, frame = cap.read()
    if not ret:
        return

    # Flip the frame horizontally for a mirror-like effect
    frame = cv2.flip(frame, 1)

    # Convert the frame to RGB for MediaPipe
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process the frame for hand landmarks
    results = hands.process(frame_rgb)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            # Draw hand landmarks on the frame
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Detect gestures for liking, disliking, saving, scrolling, and clicking
            detect_gesture(hand_landmarks)

    # Convert the frame to an image for Tkinter
    frame_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    imgtk = ImageTk.PhotoImage(image=frame_image)

    # Display the camera feed in the GUI
    camera_frame.imgtk = imgtk
    camera_frame.configure(image=imgtk)

    # Schedule the next frame update
    root.after(10, update_camera_feed)

# Start with the first photo
update_photo()

# Add a button to toggle saved photos
saved_button = tk.Button(root, text="Show Saved Photos", command=show_saved_photos, font=("Helvetica", 12))
saved_button.pack(side=tk.BOTTOM, pady=10)

# Start the camera feed
update_camera_feed()

# Run the Tkinter event loop
root.mainloop()

# Release the camera
cap.release()


: 

### Tetris

bu cursorsız, bi right left yapılmıyor ondan tetris çalışıyo mu düzgün anlayamadım 

In [1]:
import tkinter as tk
import random
import cv2
import mediapipe as mp
from PIL import Image, ImageTk

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7)
mp_drawing = mp.solutions.drawing_utils

# Tkinter Setup
root = tk.Tk()
root.title("Gesture-Based Tetris")
root.geometry("1100x800")
root.configure(bg="black")

# Game Variables
rows, cols = 20, 10
cell_size = 30
board = [[0 for _ in range(cols)] for _ in range(rows)]
shapes = [
    [[1, 1, 1, 1]],  # Line
    [[1, 1], [1, 1]],  # Square
    [[0, 1, 0], [1, 1, 1]],  # T-shape
    [[1, 1, 0], [0, 1, 1]],  # Z-shape
    [[0, 1, 1], [1, 1, 0]]   # S-shape
]
current_shape = None
next_shape = random.choice(shapes)  # Initialize next shape
current_position = (0, cols // 2 - 1)
game_running = False
score = 0

# Grid Canvas
game_canvas = tk.Canvas(root, width=cols * cell_size, height=rows * cell_size, bg="black", highlightthickness=1)
game_canvas.pack(side=tk.LEFT, padx=20, pady=20)

# Next Shape Canvas
next_shape_label = tk.Label(root, text="Next Shape:", font=("Helvetica", 14), bg="black", fg="white")
next_shape_label.place(x=cols * cell_size + 150, y=300)  # Positioned next to the grid

next_canvas = tk.Canvas(root, width=4 * cell_size, height=4 * cell_size, bg="black", highlightthickness=0)
next_canvas.place(x=cols * cell_size + 150, y=330)  # Positioned below the label

# Score Label
score_label = tk.Label(root, text=f"Score: {score}", font=("Helvetica", 18), bg="black", fg="white")
score_label.pack(anchor="n", pady=10)

# Instructions Label
instructions_label = tk.Label(
    root,
    text="Gesture Instructions:\n✌️: Rotate | 👎: Instant Drop | ⬅️: Move Left | ➡️: Move Right",
    font=("Helvetica", 14),
    bg="black",
    fg="white",
    justify="left"
)
instructions_label.place(x=cols * cell_size + 150, y=50)

# Camera Feed Placeholder
camera_label = tk.Label(root, bg="black", width=400, height=400)
camera_label.pack(side=tk.RIGHT, padx=10, pady=20)

# Start and Stop Buttons
def start_game():
    global game_running
    game_running = True
    spawn_shape()
    draw_board()
    game_loop()

def stop_game():
    global game_running
    game_running = False
    score_label.config(text="Game Over! Final Score: " + str(score))

start_button = tk.Button(root, text="Start Game", command=start_game, font=("Helvetica", 12), bg="green", fg="white")
start_button.place(x=cols * cell_size + 150, y=200)

stop_button = tk.Button(root, text="Stop Game", command=stop_game, font=("Helvetica", 12), bg="red", fg="white")
stop_button.place(x=cols * cell_size + 150, y=250)

# Draw the Game Board
def draw_board():
    game_canvas.delete("all")
    for r in range(rows):
        for c in range(cols):
            if board[r][c] != 0:
                game_canvas.create_rectangle(
                    c * cell_size, r * cell_size, (c + 1) * cell_size, (r + 1) * cell_size,
                    fill="blue", outline="gray"
                )
    if current_shape:
        shape, position = current_shape, current_position
        for r, row in enumerate(shape):
            for c, cell in enumerate(row):
                if cell:
                    x = position[1] + c
                    y = position[0] + r
                    if 0 <= y < rows and 0 <= x < cols:
                        game_canvas.create_rectangle(
                            x * cell_size, y * cell_size, (x + 1) * cell_size, (y + 1) * cell_size,
                            fill="red", outline="black"
                        )

# Draw the Next Shape
def draw_next_shape():
    next_canvas.delete("all")
    for r, row in enumerate(next_shape):
        for c, cell in enumerate(row):
            if cell:
                next_canvas.create_rectangle(
                    c * cell_size, r * cell_size, (c + 1) * cell_size, (r + 1) * cell_size,
                    fill="green", outline="gray"
                )

# Spawn New Shape
def spawn_shape():
    global current_shape, next_shape, current_position
    current_shape = next_shape
    current_position = (0, cols // 2 - len(current_shape[0]) // 2)
    next_shape = random.choice(shapes)
    draw_next_shape()
    if not can_move(current_shape, current_position):
        stop_game()

# Check Valid Move
def can_move(shape, position):
    for r, row in enumerate(shape):
        for c, cell in enumerate(row):
            if cell:
                x = position[1] + c
                y = position[0] + r
                if x < 0 or x >= cols or y >= rows or (y >= 0 and board[y][x] != 0):
                    return False
    return True

# Place Shape on Board
def place_shape():
    global board, score, current_shape, current_position
    shape, position = current_shape, current_position
    for r, row in enumerate(shape):
        for c, cell in enumerate(row):
            if cell:
                x = position[1] + c
                y = position[0] + r
                board[y][x] = 1
    clear_rows()
    spawn_shape()

# Clear Completed Rows
def clear_rows():
    global board, score
    new_board = [row for row in board if any(cell == 0 for cell in row)]
    rows_cleared = rows - len(new_board)
    board = [[0 for _ in range(cols)] for _ in range(rows_cleared)] + new_board
    score += rows_cleared * 100
    score_label.config(text=f"Score: {score}")

# Move Shape Down
def move_down():
    global current_position
    new_position = (current_position[0] + 1, current_position[1])
    if can_move(current_shape, new_position):
        current_position = new_position
    else:
        place_shape()

# Move Shape Horizontally
def move_shape(dx):
    global current_position
    new_position = (current_position[0], current_position[1] + dx)
    if can_move(current_shape, new_position):
        current_position = new_position

# Rotate Shape
def rotate_shape():
    global current_shape
    rotated = [[current_shape[r][c] for r in range(len(current_shape))] for c in range(len(current_shape[0]) - 1, -1, -1)]
    if can_move(rotated, current_position):
        current_shape = rotated

def instant_drop():
    global current_position
    # Faster drop rate: reduce the time interval between drops
    while can_move(current_shape, (current_position[0] + 1, current_position[1])):
        current_position = (current_position[0] + 1, current_position[1])
        draw_board()
        root.update()  # Update the UI to visually show the faster movement
        root.after(50)  # Adjust this value for desired speed (lower is faster)
    place_shape()

# Gesture Detection
def detect_gesture(hand_landmarks):
    if detect_peace_sign(hand_landmarks):
        rotate_shape()
        instructions_label.config(text="Peace Gesture ✌️ Rotate!")
    elif detect_thumbs_down(hand_landmarks):
        instant_drop()
        instructions_label.config(text="Thumbs Down 👎 Instant Drop!")
    scroll_direction = detect_scroll(hand_landmarks)
    if scroll_direction == "right":
        move_shape(1)
        instructions_label.config(text="Scroll ➡️ Move Right!")
    elif scroll_direction == "left":
        move_shape(-1)
        instructions_label.config(text="Scroll ⬅️ Move Left!")

# Update Camera Feed
def update_camera_feed():
    ret, frame = cap.read()
    if not ret:
        return
    frame = cv2.flip(frame, 1)
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(frame_rgb)
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
            detect_gesture(hand_landmarks)
    frame_image = Image.fromarray(frame_rgb)
    imgtk = ImageTk.PhotoImage(image=frame_image)
    camera_label.imgtk = imgtk
    camera_label.configure(image=imgtk)
    root.after(10, update_camera_feed)

# Game Loop
def game_loop():
    if game_running:
        move_down()
        draw_board()
        root.after(500, game_loop)

# Start the Game
cap = cv2.VideoCapture(0)
spawn_shape()
draw_board()
update_camera_feed()
root.mainloop()
cap.release()

I0000 00:00:1733740479.136341   75699 gl_context.cc:357] GL version: 2.1 (2.1 INTEL-22.5.10), renderer: Intel(R) Iris(TM) Plus Graphics OpenGL Engine
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1733740479.188548   75793 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733740479.237855   75793 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733740481.406676   75790 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.
Exception in Tkinter callback
Traceback (most recent call last):
  File "/usr/local/Cellar/python@3.11/3.11.10/Frameworks/Python.framework/Versions/3.11/lib/python3.11/tkinter/__init__.py", line 1967, in __call__
    return

bu cursorlu gesture falan algılamıyo

In [20]:
import tkinter as tk
import random
import cv2
import mediapipe as mp
from PIL import Image, ImageTk
import pyautogui

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7)
mp_drawing = mp.solutions.drawing_utils

# Tkinter Setup
root = tk.Tk()
root.title("Gesture-Based Tetris")
root.geometry("1100x800")
root.configure(bg="black")

# Game Variables
rows, cols = 20, 10
cell_size = 30
board = [[0 for _ in range(cols)] for _ in range(rows)]
shapes = [
    [[1, 1, 1, 1]],  # Line
    [[1, 1], [1, 1]],  # Square
    [[0, 1, 0], [1, 1, 1]],  # T-shape
    [[1, 1, 0], [0, 1, 1]],  # Z-shape
    [[0, 1, 1], [1, 1, 0]]   # S-shape
]
current_shape = None
next_shape = random.choice(shapes)  # Initialize next shape
current_position = (0, cols // 2 - 1)
game_running = False
score = 0

screen_width, screen_height = pyautogui.size()

# Grid Canvas
game_canvas = tk.Canvas(root, width=cols * cell_size, height=rows * cell_size, bg="black", highlightthickness=1)
game_canvas.pack(side=tk.LEFT, padx=20, pady=20)

# Next Shape Canvas
next_shape_label = tk.Label(root, text="Next Shape:", font=("Helvetica", 14), bg="black", fg="white")
next_shape_label.place(x=cols * cell_size + 150, y=300)  # Positioned next to the grid

next_canvas = tk.Canvas(root, width=4 * cell_size, height=4 * cell_size, bg="black", highlightthickness=0)
next_canvas.place(x=cols * cell_size + 150, y=330)  # Positioned below the label

# Score Label
score_label = tk.Label(root, text=f"Score: {score}", font=("Helvetica", 18), bg="black", fg="white")
score_label.pack(anchor="n", pady=10)

# Instructions Label
instructions_label = tk.Label(
    root,
    text="Gesture Instructions:\n✌️: Rotate | 👎: Instant Drop | ⬅️: Move Left | ➡️: Move Right",
    font=("Helvetica", 14),
    bg="black",
    fg="white",
    justify="left"
)
instructions_label.place(x=cols * cell_size + 150, y=50)

# Camera Feed Placeholder
camera_label = tk.Label(root, bg="black", width=400, height=400)
camera_label.pack(side=tk.RIGHT, padx=10, pady=20)

# Start and Stop Buttons
def start_game():
    global game_running
    game_running = True
    spawn_shape()
    draw_board()
    game_loop()

def stop_game():
    global game_running
    game_running = False
    score_label.config(text="Game Over! Final Score: " + str(score))

start_button = tk.Button(root, text="Start Game", command=start_game, font=("Helvetica", 12), bg="green", fg="white")
start_button.place(x=cols * cell_size + 150, y=200)

stop_button = tk.Button(root, text="Stop Game", command=stop_game, font=("Helvetica", 12), bg="red", fg="white")
stop_button.place(x=cols * cell_size + 150, y=250)

# Draw the Game Board
def draw_board():
    game_canvas.delete("all")
    for r in range(rows):
        for c in range(cols):
            if board[r][c] != 0:
                game_canvas.create_rectangle(
                    c * cell_size, r * cell_size, (c + 1) * cell_size, (r + 1) * cell_size,
                    fill="blue", outline="gray"
                )
    if current_shape:
        shape, position = current_shape, current_position
        for r, row in enumerate(shape):
            for c, cell in enumerate(row):
                if cell:
                    x = position[1] + c
                    y = position[0] + r
                    if 0 <= y < rows and 0 <= x < cols:
                        game_canvas.create_rectangle(
                            x * cell_size, y * cell_size, (x + 1) * cell_size, (y + 1) * cell_size,
                            fill="red", outline="black"
                        )

# Draw the Next Shape
def draw_next_shape():
    next_canvas.delete("all")
    for r, row in enumerate(next_shape):
        for c, cell in enumerate(row):
            if cell:
                next_canvas.create_rectangle(
                    c * cell_size, r * cell_size, (c + 1) * cell_size, (r + 1) * cell_size,
                    fill="green", outline="gray"
                )

# Spawn New Shape
def spawn_shape():
    global current_shape, next_shape, current_position
    current_shape = next_shape
    current_position = (0, cols // 2 - len(current_shape[0]) // 2)
    next_shape = random.choice(shapes)
    draw_next_shape()
    if not can_move(current_shape, current_position):
        stop_game()

# Check Valid Move
def can_move(shape, position):
    for r, row in enumerate(shape):
        for c, cell in enumerate(row):
            if cell:
                x = position[1] + c
                y = position[0] + r
                if x < 0 or x >= cols or y >= rows or (y >= 0 and board[y][x] != 0):
                    return False
    return True

# Place Shape on Board
def place_shape():
    global board, score, current_shape, current_position
    shape, position = current_shape, current_position
    for r, row in enumerate(shape):
        for c, cell in enumerate(row):
            if cell:
                x = position[1] + c
                y = position[0] + r
                board[y][x] = 1
    clear_rows()
    spawn_shape()

# Clear Completed Rows
def clear_rows():
    global board, score
    new_board = [row for row in board if any(cell == 0 for cell in row)]
    rows_cleared = rows - len(new_board)
    board = [[0 for _ in range(cols)] for _ in range(rows_cleared)] + new_board
    score += rows_cleared * 100
    score_label.config(text=f"Score: {score}")

# Move Shape Down
def move_down():
    global current_position
    new_position = (current_position[0] + 1, current_position[1])
    if can_move(current_shape, new_position):
        current_position = new_position
    else:
        place_shape()

# Move Shape Horizontally
def move_shape(dx):
    global current_position
    new_position = (current_position[0], current_position[1] + dx)
    if can_move(current_shape, new_position):
        current_position = new_position

# Rotate Shape
def rotate_shape():
    global current_shape
    rotated = [[current_shape[r][c] for r in range(len(current_shape))] for c in range(len(current_shape[0]) - 1, -1, -1)]
    if can_move(rotated, current_position):
        current_shape = rotated

def instant_drop():
    global current_position
    # Faster drop rate: reduce the time interval between drops
    while can_move(current_shape, (current_position[0] + 1, current_position[1])):
        current_position = (current_position[0] + 1, current_position[1])
        draw_board()
        root.update()  # Update the UI to visually show the faster movement
        root.after(50)  # Adjust this value for desired speed (lower is faster)
    place_shape()

# Gesture Detection
def detect_gesture(hand_landmarks):
    if detect_peace_sign(hand_landmarks):
        rotate_shape()
        instructions_label.config(text="Peace Gesture ✌️ Rotate!")
    elif detect_thumbs_down(hand_landmarks):
        instant_drop()
        instructions_label.config(text="Thumbs Down 👎 Instant Drop!")
    scroll_direction = detect_scroll(hand_landmarks)
    if scroll_direction == "right":
        move_shape(1)
        instructions_label.config(text="Scroll ➡️ Move Right!")
    elif scroll_direction == "left":
        move_shape(-1)
        instructions_label.config(text="Scroll ⬅️ Move Left!")

def detect_click_gesture(hand_landmarks):
    """Detect a pinching gesture for a click."""
    index_tip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
    thumb_tip = hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP]

    # Calculate Euclidean distance between the tips
    distance = ((index_tip.x - thumb_tip.x) ** 2 +
                (index_tip.y - thumb_tip.y) ** 2 +
                (index_tip.z - thumb_tip.z) ** 2) ** 0.5

    return distance < 0.05

def update_cursor_position(hand_landmarks):
    """Move the cursor based on the index fingertip."""
    index_tip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]

    # Map hand coordinates to screen coordinates
    cursor_x = int(index_tip.x * screen_width)
    cursor_y = int(index_tip.y * screen_height)

    # Move cursor
    pyautogui.moveTo(cursor_x, cursor_y)

# Add this function near other game logic functions
def update_camera_and_game():
    global game_running, current_position
    print("Updating camera and game...")  # Debug print

    # Camera feed logic
    ret, frame = cap.read()
    if not ret:
        print("Camera not working!")  # Debug print
        return

    frame = cv2.flip(frame, 1)
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(frame_rgb)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            print("Hand landmarks detected!")  # Debug print
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
            detect_gesture(hand_landmarks)

    # Game logic
    if game_running:
        print("Game is running!")  # Debug print
        move_down()
        draw_board()

    # Update the camera feed in the UI
    frame_image = Image.fromarray(frame_rgb)
    imgtk = ImageTk.PhotoImage(image=frame_image)
    camera_label.imgtk = imgtk
    camera_label.configure(image=imgtk)

    # Schedule the next update
    root.after(50, update_camera_and_game)


# Call this function instead of `update_camera_feed()` or `game_loop()`
update_camera_and_game()


Updating camera and game...
Camera not working!


In [10]:
# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7)
mp_drawing = mp.solutions.drawing_utils

# Screen dimensions for cursor mapping
screen_width, screen_height = pyautogui.size()

# Button positions and dimensions
BUTTON_WIDTH = 150
BUTTON_HEIGHT = 70
pause_button = {"x": 100, "y": 100, "width": BUTTON_WIDTH, "height": BUTTON_HEIGHT}
start_button = {"x": 300, "y": 100, "width": BUTTON_WIDTH, "height": BUTTON_HEIGHT}

# Game states
game_paused = False
game_started = False

# Gesture detection for click
def is_click_gesture(hand_landmarks):
    index_tip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
    thumb_tip = hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP]

    # Calculate the 3D Euclidean distance between index tip and thumb tip
    distance = ((index_tip.x - thumb_tip.x) ** 2 +
                (index_tip.y - thumb_tip.y) ** 2 +
                (index_tip.z - thumb_tip.z) ** 2) ** 0.5

    return distance < 0.05

# Initialize webcam
cap = cv2.VideoCapture(0)

print("Press 'Esc' to exit.")

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Flip the frame horizontally for a mirror-like effect
    frame = cv2.flip(frame, 1)

    # Get frame dimensions
    frame_height, frame_width, _ = frame.shape

    # Draw buttons on the frame
    cv2.rectangle(frame, (pause_button["x"], pause_button["y"]),
                  (pause_button["x"] + pause_button["width"], pause_button["y"] + pause_button["height"]),
                  (0, 255, 0) if not game_paused else (255, 0, 0), -1)
    cv2.putText(frame, "Pause", (pause_button["x"] + 20, pause_button["y"] + 45),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

    cv2.rectangle(frame, (start_button["x"], start_button["y"]),
                  (start_button["x"] + start_button["width"], start_button["y"] + start_button["height"]),
                  (0, 255, 0) if game_started else (255, 0, 0), -1)
    cv2.putText(frame, "Start", (start_button["x"] + 20, start_button["y"] + 45),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

    # Convert the frame to RGB
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process the frame and detect hands
    result = hands.process(frame_rgb)

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            # Draw hand landmarks
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Get index finger tip coordinates for cursor control
            index_finger_tip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]

            # Normalize coordinates to screen dimensions
            cursor_x = int(index_finger_tip.x * screen_width)
            cursor_y = int(index_finger_tip.y * screen_height)

            # Move the mouse cursor
            pyautogui.moveTo(cursor_x, cursor_y)

            # Detect click gesture
            if is_click_gesture(hand_landmarks):
                # Map cursor to the frame dimensions
                cursor_on_frame_x = int(index_finger_tip.x * frame_width)
                cursor_on_frame_y = int(index_finger_tip.y * frame_height)

                # Check if click is on Pause button
                if (pause_button["x"] <= cursor_on_frame_x <= pause_button["x"] + pause_button["width"] and
                        pause_button["y"] <= cursor_on_frame_y <= pause_button["y"] + pause_button["height"]):
                    game_paused = not game_paused
                    print("Game Paused" if game_paused else "Game Resumed")

                # Check if click is on Start button
                if (start_button["x"] <= cursor_on_frame_x <= start_button["x"] + start_button["width"] and
                        start_button["y"] <= cursor_on_frame_y <= start_button["y"] + start_button["height"]):
                    game_started = not game_started
                    print("Game Started" if game_started else "Game Stopped")

    # Display the frame
    cv2.imshow("Gesture-Based Game Control", frame)

    # Break the loop when 'Esc' key is pressed
    if cv2.waitKey(5) & 0xFF == 27:
        break

# Release the webcam and close windows
cap.release()
cv2.destroyAllWindows()


I0000 00:00:1733740648.673357   76643 gl_context.cc:357] GL version: 2.1 (2.1 INTEL-22.5.10), renderer: Intel(R) Iris(TM) Plus Graphics OpenGL Engine
W0000 00:00:1733740648.772987   77851 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733740648.833991   77851 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Press 'Esc' to exit.
