# HAND GESTURE-BASED INTERACTION

---

Group members:
*   Ada Yılmaz
*   Ceren Şahin
*   Sima Adleyba
*   Selen Naz Gürsoy

### Installing necessary libraries and models

In [2]:
#install mediapipe
%pip install -q mediapipe

Note: you may need to restart the kernel to use updated packages.


In [3]:
#download a model that can recognize 7 hand gestures: 👍, 👎, ✌️, ☝️, ✊, 👋, 🤟
!wget -q https://storage.googleapis.com/mediapipe-models/gesture_recognizer/gesture_recognizer/float16/1/gesture_recognizer.task

In [4]:
#download test images from pixabay
import urllib

IMAGE_FILENAMES = ['thumbs_down.jpg', 'victory.jpg', 'thumbs_up.jpg', 'pointing_up.jpg']

for name in IMAGE_FILENAMES:
  url = f'https://storage.googleapis.com/mediapipe-tasks/gesture_recognizer/{name}'
  urllib.request.urlretrieve(url, name)

In [6]:
#or we can use our own images as shown below

# from google.colab import files
# uploaded = files.upload()

# for filename in uploaded:
#   content = uploaded[filename]
#   with open(filename, 'wb') as f:
#     f.write(content)
# IMAGE_FILENAMES = list(uploaded.keys())

# print('Uploaded files:', IMAGE_FILENAMES)

### Functions for visualization

In [7]:
from matplotlib import pyplot as plt
import mediapipe as mp
from mediapipe.framework.formats import landmark_pb2

2024-12-10 17:13:38.461106: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [9]:
#some functions to visualize the gesture recognition results.
import math

plt.rcParams.update({
    'axes.spines.top': False,
    'axes.spines.right': False,
    'axes.spines.left': False,
    'axes.spines.bottom': False,
    'xtick.labelbottom': False,
    'xtick.bottom': False,
    'ytick.labelleft': False,
    'ytick.left': False,
    'xtick.labeltop': False,
    'xtick.top': False,
    'ytick.labelright': False,
    'ytick.right': False
})

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles


def display_one_image(image, title, subplot, titlesize=16):
    """Displays one image along with the predicted category name and score."""
    plt.subplot(*subplot)
    plt.imshow(image)
    if len(title) > 0:
        plt.title(title, fontsize=int(titlesize), color='black', fontdict={'verticalalignment':'center'}, pad=int(titlesize/1.5))
    return (subplot[0], subplot[1], subplot[2]+1)


def display_batch_of_images_with_gestures_and_hand_landmarks(images, results):
    """Displays a batch of images with the gesture category and its score along with the hand landmarks."""
    # Images and labels.
    images = [image.numpy_view() for image in images]
    gestures = [top_gesture for (top_gesture, _) in results]
    multi_hand_landmarks_list = [multi_hand_landmarks for (_, multi_hand_landmarks) in results]

    # Auto-squaring: this will drop data that does not fit into square or square-ish rectangle.
    rows = int(math.sqrt(len(images)))
    cols = len(images) // rows

    # Size and spacing.
    FIGSIZE = 13.0
    SPACING = 0.1
    subplot=(rows,cols, 1)
    if rows < cols:
        plt.figure(figsize=(FIGSIZE,FIGSIZE/cols*rows))
    else:
        plt.figure(figsize=(FIGSIZE/rows*cols,FIGSIZE))

    # Display gestures and hand landmarks.
    for i, (image, gestures) in enumerate(zip(images[:rows*cols], gestures[:rows*cols])):
        title = f"{gestures.category_name} ({gestures.score:.2f})"
        dynamic_titlesize = FIGSIZE*SPACING/max(rows,cols) * 40 + 3
        annotated_image = image.copy()

        for hand_landmarks in multi_hand_landmarks_list[i]:
          hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
          hand_landmarks_proto.landmark.extend([
            landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks
          ])

          mp_drawing.draw_landmarks(
            annotated_image,
            hand_landmarks_proto,
            mp_hands.HAND_CONNECTIONS,
            mp_drawing_styles.get_default_hand_landmarks_style(),
            mp_drawing_styles.get_default_hand_connections_style())

        subplot = display_one_image(annotated_image, title, subplot, titlesize=dynamic_titlesize)

    # Layout.
    plt.tight_layout()
    plt.subplots_adjust(wspace=SPACING, hspace=SPACING)
    plt.show()

### Preview the images

In [10]:
import cv2
import math

DESIRED_HEIGHT = 480
DESIRED_WIDTH = 480

def resize_and_show(image, name):
    h, w = image.shape[:2]
    if h < w:
        img = cv2.resize(image, (DESIRED_WIDTH, math.floor(h / (w / DESIRED_WIDTH))))
    else:
        img = cv2.resize(image, (math.floor(w / (h / DESIRED_HEIGHT)), DESIRED_HEIGHT))
    
    # Display the image in a window with a name
    cv2.imshow(name, img)
    cv2.waitKey(0)  # Wait for a key press to close the window
    cv2.destroyAllWindows()  # Close the window after key press

# Example usage
images = {name: cv2.imread(name) for name in IMAGE_FILENAMES}

for name, image in images.items():
    if image is not None:
        print(f"Displaying: {name}")
        resize_and_show(image, name)
    else:
        print(f"Error: Could not read image {name}")


Displaying: thumbs_down.jpg
Displaying: victory.jpg
Displaying: thumbs_up.jpg
Displaying: pointing_up.jpg


### Google implementation - do not run
This first one is a how to from google, we can create our own by following the steps given.


In [8]:
# STEP 1: Import the necessary modules.
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

# STEP 2: Create an GestureRecognizer object.
base_options = python.BaseOptions(model_asset_path='gesture_recognizer.task')
options = vision.GestureRecognizerOptions(base_options=base_options)
recognizer = vision.GestureRecognizer.create_from_options(options)

images = []
results = []
for image_file_name in IMAGE_FILENAMES:
    
  # STEP 3: Load the input image.
  image = mp.Image.create_from_file(image_file_name)

  # STEP 4: Recognize gestures in the input image.
  recognition_result = recognizer.recognize(image)

  # STEP 5: Process the result. In this case, visualize it.
  images.append(image)
  top_gesture = recognition_result.gestures[0][0]
  hand_landmarks = recognition_result.hand_landmarks
  results.append((top_gesture, hand_landmarks))

display_batch_of_images_with_gestures_and_hand_landmarks(images, results)

I0000 00:00:1733734701.863207   14037 gl_context.cc:357] GL version: 2.1 (2.1 INTEL-22.5.10), renderer: Intel(R) Iris(TM) Plus Graphics OpenGL Engine
W0000 00:00:1733734701.909597   14037 gesture_recognizer_graph.cc:129] Hand Gesture Recognizer contains CPU only ops. Sets HandGestureRecognizerGraph acceleration to Xnnpack.
I0000 00:00:1733734701.932447   14037 hand_gesture_recognizer_graph.cc:250] Custom gesture classifier is not defined.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1733734702.018345   15212 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733734702.089356   15212 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733734702.094463   15215 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature infere

ValueError: Input image must contain three channel bgr data.

<Figure size 1300x1300 with 0 Axes>

### Example - do not run
Tried an example to see how it works and update the functions accordingly

In [9]:
import cv2
import mediapipe as mp

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands()
mp_drawing = mp.solutions.drawing_utils

# Initialize webcam
cap = cv2.VideoCapture(0)

def detect_peace_sign(hand_landmarks):
    # Get coordinates of relevant landmarks
    index_tip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
    middle_tip = hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_TIP]
    ring_tip = hand_landmarks.landmark[mp_hands.HandLandmark.RING_FINGER_TIP]
    pinky_tip = hand_landmarks.landmark[mp_hands.HandLandmark.PINKY_TIP]
    
    
    index_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_MCP]
    middle_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_MCP]
    ring_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.RING_FINGER_MCP]
    pinky_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.PINKY_MCP]


    # Define peace sign gesture: index and middle fingers up, ring and pinky fingers down
    if (
        # Index and middle fingers' tips must be highest parts of those fingers
        (index_tip.y < index_mcp.y) and (middle_tip.y < middle_mcp.y)
        and
        # Index finger's tips must be higher than other fingers' mcp
        (index_tip.y < ring_mcp.y) and (index_tip.y < pinky_mcp.y)
        and
        # Middle finger's tips must be higher than other fingers' mcp
        (middle_tip.y < ring_mcp.y) and (middle_tip.y < pinky_mcp.y)
    ):
        return True
    return False


while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Convert the frame to RGB
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process the frame and detect hands
    result = hands.process(frame_rgb)

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            # Draw hand landmarks
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Detect custom gesture (peace sign)
            if detect_peace_sign(hand_landmarks):
                cv2.putText(frame, 'Peace Sign Detected!', (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
                # Trigger interaction (e.g., print a message)
                print("Peace sign gesture detected!")

    # Display the frame
    cv2.imshow('MediaPipe Hands', frame)

    if cv2.waitKey(5) & 0xFF == 27:
        break

cap.release()
cv2.destroyAllWindows()

I0000 00:00:1733734784.299569   14037 gl_context.cc:357] GL version: 2.1 (2.1 INTEL-22.5.10), renderer: Intel(R) Iris(TM) Plus Graphics OpenGL Engine
W0000 00:00:1733734784.420640   16680 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733734784.470914   16680 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Peace sign gesture detected!
Peace sign gesture detected!
Peace sign gesture detected!
Peace sign gesture detected!
Peace sign gesture detected!
Peace sign gesture detected!
Peace sign gesture detected!
Peace sign gesture detected!


### Gesture Detection Functions
After checking how the upper ones worked, we implemented fixed, more robust versions of gesture detection functions.

In [None]:
import tkinter as tk
from PIL import Image, ImageTk
import cv2
import mediapipe as mp

class GestureApp:
    def __init__(self, root):
        self.root = root
        self.root.geometry("800x600")
        self.root.title("Gesture Controlled App")

        # Placeholder image
        self.image_index = 0
        self.images = [f"image_{i}.jpg" for i in range(1, 4)]  # Example image names
        self.image_label = tk.Label(root)
        self.image_label.pack()

        # Buttons
        self.like_button = tk.Button(root, text="Like", command=self.like_picture)
        self.like_button.pack(side=tk.LEFT)

        self.dislike_button = tk.Button(root, text="Dislike", command=self.dislike_picture)
        self.dislike_button.pack(side=tk.LEFT)

        # Camera feed
        self.cap = cv2.VideoCapture(0)
        self.update_camera_feed()

    def update_camera_feed(self):
        ret, frame = self.cap.read()
        if not ret:
            print("Camera not working!")
            return

        frame = cv2.flip(frame, 1)
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(rgb_frame)

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                # Process hand landmarks here
                pass

        # Convert the frame to an image format that Tkinter can use
        img = Image.fromarray(rgb_frame)
        imgtk = ImageTk.PhotoImage(image=img)
        self.image_label.imgtk = imgtk
        self.image_label.configure(image=imgtk)

        # Call this method again after 10 milliseconds
        self.root.after(10, self.update_camera_feed)

    def like_picture(self):
        print("Liked picture")

    def dislike_picture(self):
        print("Disliked picture")

# Initialize MediaPipe hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands()

# Create the Tkinter window and run the app
root = tk.Tk()
app = GestureApp(root)
root.mainloop()

In [13]:
import cv2
import mediapipe as mp

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7)
mp_drawing = mp.solutions.drawing_utils

# Initialize webcam
cap = cv2.VideoCapture(0)

# Global variables for tracking previous positions
previous_thumb_tip = None
previous_index_tip = None

print("Press 'Esc' to exit.")

# Initialize gesture display variables
current_gesture = None 
gesture_display_time = 0 
GESTURE_DISPLAY_DURATION = 1.5 

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Flip the frame horizontally
    frame = cv2.flip(frame, 1)

    # Convert the frame to RGB
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process the frame and detect hands
    result = hands.process(frame_rgb)

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            # Draw hand landmarks
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Detect gestures
            detected_gesture = None
            if detect_peace_sign(hand_landmarks):
                detected_gesture = "Peace Sign Detected!"
            elif detect_thumbs_up(hand_landmarks):
                detected_gesture = "Thumbs Up Detected!"
            elif detect_thumbs_down(hand_landmarks):
                detected_gesture = "Thumbs Down Detected!"
            
            # If a gesture is detected, update the display variables
            if detected_gesture:
                current_gesture = detected_gesture
                gesture_display_time = time.time()
            else:
                # Only check for scroll if no other gesture is detected
                detected, direction = detect_scroll(hand_landmarks, threshold=0.1, dominance_ratio=4.0)
                if detected:
                    current_gesture = f"Scroll Detected: {direction.title()}!"
                    gesture_display_time = time.time()

    # Display the current gesture if within the display duration
    if current_gesture and (time.time() - gesture_display_time < GESTURE_DISPLAY_DURATION):
        cv2.putText(frame, current_gesture, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

    # Display the frame
    cv2.imshow('Gesture Recognition', frame)

    # Break the loop when 'Esc' key is pressed
    if cv2.waitKey(5) & 0xFF == 27:
        break

# Release the webcam and close windows
cap.release()
cv2.destroyAllWindows()

I0000 00:00:1733840147.281885  123659 gl_context.cc:357] GL version: 2.1 (2.1 INTEL-22.5.10), renderer: Intel(R) Iris(TM) Plus Graphics OpenGL Engine
W0000 00:00:1733840147.360739  128637 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733840147.423230  128637 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Press 'Esc' to exit.


W0000 00:00:1733840153.492748  128634 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


### Cursor control

In [8]:
import cv2
import mediapipe as mp
import pyautogui

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7)
mp_drawing = mp.solutions.drawing_utils

# Get screen dimensions
screen_width, screen_height = pyautogui.size()

def is_click_gesture(hand_landmarks):
    """Detect a pinching gesture for a left click."""
    index_tip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
    thumb_tip = hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP]

    # Calculate the 3D Euclidean distance between index tip and thumb tip
    distance = ((index_tip.x - thumb_tip.x) ** 2 +
                (index_tip.y - thumb_tip.y) ** 2 +
                (index_tip.z - thumb_tip.z) ** 2) ** 0.5

    # Adjust threshold based on typical 3D distances observed
    return distance < 0.05


# Open webcam
cap = cv2.VideoCapture(0)

print("Press 'Esc' to exit.")

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Flip the frame horizontally for a mirror-like effect
    frame = cv2.flip(frame, 1)

    # Get frame dimensions
    frame_height, frame_width, _ = frame.shape

    # Convert the frame to RGB
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process the frame and detect hands
    result = hands.process(frame_rgb)

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            # Draw hand landmarks
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Get index finger tip coordinates
            index_finger_tip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]

            # Normalize coordinates to screen dimensions
            cursor_x = int(index_finger_tip.x * screen_width)
            cursor_y = int(index_finger_tip.y * screen_height)

            # Move the mouse cursor
            pyautogui.moveTo(cursor_x, cursor_y)

            # Detect click gesture
            if is_click_gesture(hand_landmarks):
                pyautogui.click()
                cv2.putText(frame, "Click!", (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Display the frame
    cv2.imshow("Gesture-Based Cursor Control", frame)

    # Break the loop when 'Esc' key is pressed
    if cv2.waitKey(5) & 0xFF == 27:
        break

# Release the webcam and close windows
cap.release()
cv2.destroyAllWindows()

I0000 00:00:1733740596.988830   76643 gl_context.cc:357] GL version: 2.1 (2.1 INTEL-22.5.10), renderer: Intel(R) Iris(TM) Plus Graphics OpenGL Engine
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1733740597.024627   77265 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733740597.056736   77265 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Press 'Esc' to exit.


In [None]:
import cv2
import mediapipe as mp
import tkinter as tk
from PIL import Image, ImageTk, ImageDraw, ImageFont
import os
import time

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7)
mp_drawing = mp.solutions.drawing_utils

# Initialize the main Tkinter window
root = tk.Tk()
root.title("Gesture-Controlled Instagram Feed")
root.geometry("1024x768")
root.configure(bg="#f0f0f0")

# Load photos into a list
photo_folder = "photos/"
photo_files = [os.path.join(photo_folder, f) for f in os.listdir(photo_folder) if f.endswith((".jpg", ".png"))]
photos = [Image.open(photo).resize((400, 400)) for photo in photo_files]

# Create a label to display a single photo
photo_label = tk.Label(root, bg="#ffffff", width=400, height=400)
photo_label.pack(side=tk.LEFT, padx=10, pady=10)

# Feedback label
feedback_label = tk.Label(root, text="Perform gestures to interact!", font=("Helvetica", 14), bg="#f0f0f0")
feedback_label.pack(side=tk.BOTTOM, pady=20)

# Interaction states
current_photo_index = 0  # Start with the first photo
liked_photos = {}
disliked_photos = {}
saved_photos = []  # List to store saved photos
showing_saved_photos = False  # Track whether we are showing saved photos

# Camera feed frame
camera_frame = tk.Label(root, bg="#000000", width=500, height=700)
camera_frame.pack(side=tk.RIGHT, padx=10, pady=10)

# Function to add emoji to photos
def add_emoji(photo, emoji):
    """Add an emoji to the given photo."""
    img = photo.copy()
    draw = ImageDraw.Draw(img)
    font = ImageFont.truetype("arial.ttf", 50)  # Ensure you have a compatible font installed
    draw.text((150, 150), emoji, fill="red", font=font)
    return img

# Function to update the displayed photo
def update_photo():
    """Update the currently displayed photo based on the index."""
    global current_photo_index
    if showing_saved_photos:
        if len(saved_photos) == 0:
            feedback_label.config(text="No saved photos available!")
            photo_label.config(image="")
            return
        if 0 <= current_photo_index < len(saved_photos):
            img = ImageTk.PhotoImage(saved_photos[current_photo_index])
            photo_label.configure(image=img)
            photo_label.img = img  # Keep a reference to avoid garbage collection
    else:
        if 0 <= current_photo_index < len(photos):
            current_photo = photos[current_photo_index]
            if current_photo_index in liked_photos:
                current_photo = liked_photos[current_photo_index]
            elif current_photo_index in disliked_photos:
                current_photo = disliked_photos[current_photo_index]
            img = ImageTk.PhotoImage(current_photo)
            photo_label.configure(image=img)
            photo_label.img = img  # Keep a reference to avoid garbage collection

# Function to toggle between all photos and saved photos
def show_saved_photos():
    global showing_saved_photos, current_photo_index
    showing_saved_photos = not showing_saved_photos
    current_photo_index = 0  # Reset to the first photo
    if showing_saved_photos:
        feedback_label.config(text="Showing Saved Photos 📂")
    else:
        feedback_label.config(text="Showing All Photos 🌍")
    update_photo()

# Gesture Detection Logic
def detect_gesture(hand_landmarks):
    """Detect gestures for liking, disliking, saving, scrolling, and clicking."""
    global current_photo_index

    # Get landmarks for gestures
    thumb_tip = hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP]
    index_tip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
    middle_tip = hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_TIP]
    ring_tip = hand_landmarks.landmark[mp_hands.HandLandmark.RING_FINGER_TIP]
    pinky_tip = hand_landmarks.landmark[mp_hands.HandLandmark.PINKY_TIP]

    # Like Gesture (Thumbs Up)
    if (
        thumb_tip.y < index_tip.y
        and thumb_tip.y < middle_tip.y
        and thumb_tip.y < ring_tip.y
        and thumb_tip.y < pinky_tip.y
    ):
        feedback_label.config(text="Photo Liked! ❤️")
        if current_photo_index not in liked_photos:
            liked_photos[current_photo_index] = add_emoji(photos[current_photo_index], "❤️")
        update_photo()
        time.sleep(0.5)
        return

    # Dislike Gesture (Thumbs Down)
    if (
        thumb_tip.y > index_tip.y
        and thumb_tip.y > middle_tip.y
        and thumb_tip.y > ring_tip.y
        and thumb_tip.y > pinky_tip.y
    ):
        feedback_label.config(text="Photo Disliked! 👎")
        if current_photo_index not in disliked_photos:
            disliked_photos[current_photo_index] = add_emoji(photos[current_photo_index], "👎")
        update_photo()
        time.sleep(0.5)
        return

    # Save Gesture (Peace Sign)
    if (
        index_tip.y < middle_tip.y  # Index finger is above middle finger
        and middle_tip.y < ring_tip.y  # Middle finger is above ring finger
        and ring_tip.y < pinky_tip.y  # Ring finger is above pinky
        and thumb_tip.y > index_tip.y  # Thumb is below the index
    ):
        feedback_label.config(text="Photo Saved! ✌️")
        current_photo = photos[current_photo_index]
        if current_photo not in saved_photos:
            saved_photos.append(current_photo)
        time.sleep(0.5)
        return

    # Scrolling Gesture (Index Finger Movement)
    if index_tip.y < hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_MCP].y:
        feedback_label.config(text="Scrolling Up ⬆️")
        if current_photo_index > 0:
            current_photo_index -= 1
            update_photo()
        time.sleep(0.5)
    elif index_tip.y > hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_MCP].y:
        feedback_label.config(text="Scrolling Down ⬇️")
        if showing_saved_photos:
            if current_photo_index < len(saved_photos) - 1:
                current_photo_index += 1
                update_photo()
        else:
            if current_photo_index < len(photos) - 1:
                current_photo_index += 1
                update_photo()
        time.sleep(0.5)

# Open webcam and process gestures
cap = cv2.VideoCapture(0)

def update_camera_feed():
    """Update the camera feed and process gestures in real-time."""
    global current_photo_index

    ret, frame = cap.read()
    if not ret:
        return

    # Flip the frame horizontally for a mirror-like effect
    frame = cv2.flip(frame, 1)

    # Convert the frame to RGB for MediaPipe
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process the frame for hand landmarks
    results = hands.process(frame_rgb)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            # Draw hand landmarks on the frame
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Detect gestures for liking, disliking, saving, scrolling, and clicking
            detect_gesture(hand_landmarks)

    # Convert the frame to an image for Tkinter
    frame_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    imgtk = ImageTk.PhotoImage(image=frame_image)

    # Display the camera feed in the GUI
    camera_frame.imgtk = imgtk
    camera_frame.configure(image=imgtk)

    # Schedule the next frame update
    root.after(10, update_camera_feed)

# Start with the first photo
update_photo()

# Add a button to toggle saved photos
saved_button = tk.Button(root, text="Show Saved Photos", command=show_saved_photos, font=("Helvetica", 12))
saved_button.pack(side=tk.BOTTOM, pady=10)

# Start the camera feed
update_camera_feed()

# Run the Tkinter event loop
root.mainloop()

# Release the camera
cap.release()


: 

# Instagram layout

In [None]:
import tkinter as tk
from PIL import Image, ImageTk
import cv2
import requests
from io import BytesIO

class GestureApp:
    def __init__(self, root):
        self.root = root
        self.root.geometry("800x600")
        self.root.title("Gesture Controlled App")

        # Placeholder image
        self.image_index = 0
        self.images = self.fetch_images()  # Fetch Instagram images
        self.image_label = tk.Label(root)
        self.image_label.pack()

        # Buttons
        self.like_button = tk.Button(root, text="Like", command=self.like_picture)
        self.like_button.pack(side=tk.LEFT)

        self.dislike_button = tk.Button(root, text="Dislike", command=self.dislike_picture)
        self.dislike_button.pack(side=tk.LEFT)

        # Camera feed
        self.cap = cv2.VideoCapture(0)
        self.update_camera_feed()

    def fetch_images(self): #FILL IN FUNCTION
        images = []
        return images

    def update_camera_feed(self):
        ret, frame = self.cap.read()
        if not ret:
            print("Camera not working!")
            return

        frame = cv2.flip(frame, 1)
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        # Process frame for hand landmarks (not shown here)

        # Convert the frame to an image format that Tkinter can use
        img = Image.fromarray(rgb_frame)
        imgtk = ImageTk.PhotoImage(image=img)
        self.image_label.imgtk = imgtk
        self.image_label.configure(image=imgtk)

        # Call this method again after 10 milliseconds
        self.root.after(10, self.update_camera_feed)

    def like_picture(self):
        print("Liked picture")

    def dislike_picture(self):
        print("Disliked picture")

# Create the Tkinter window and run the app
root = tk.Tk()
app = GestureApp(root)
root.mainloop()