In [None]:
### Real Time Hand Gesture Recognition with Emoji Overlays (LIKE & VICTORY)
## created by Bareera Mushthak

!pip install mediapipe opencv-python

In [52]:
# Import libraries
import cv2
import mediapipe as mp
import math
import random

# Define functions

def calculate_angle(a, b, c):  # to calculate thumb angle
    """Calculate angle between three points: a (start), b (mid), c (end)"""
    ang = math.degrees(math.atan2(c[1]-b[1], c[0]-b[0]) -
                       math.atan2(a[1]-b[1], a[0]-b[0]))
    return abs(ang)

    
def resize_emoji(img, width=None, height=None):
    """Resize emoji image proportionally"""
    if width is None and height is None:
        return img
    h, w = img.shape[:2]
    if width is None:
        r = height / float(h)
        dim = (int(w * r), height)
    else:
        r = width / float(w)
        dim = (width, int(h * r))
    return cv2.resize(img, dim, interpolation=cv2.INTER_AREA)

    

def overlay_image(bg, overlay, x, y):
    """Overlay emoji image with transparency and boundary check"""
    h, w = overlay.shape[:2]
    bg_h, bg_w = bg.shape[:2]

    # Adjust width/height if overlay exceeds bg size
    if y < 0:
        overlay = overlay[-y:, :]
        h = overlay.shape[0]
        y = 0
    if x < 0:
        overlay = overlay[:, -x:]
        w = overlay.shape[1]
        x = 0
    if y + h > bg_h:
        h = bg_h - y
        overlay = overlay[:h, :, :]
    if x + w > bg_w:
        w = bg_w - x
        overlay = overlay[:, :w, :]

    if overlay.shape[2] < 4:  # Ensure overlay has alpha channel
        return

    alpha = overlay[:, :, 3] / 255.0
    for c in range(3):
        bg[y:y+h, x:x+w, c] = (1 - alpha) * bg[y:y+h, x:x+w, c] + alpha * overlay[:, :, c]

        
# Initialize mediapipe

mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands



# Load emoji images

like_img = cv2.imread('like.png', cv2.IMREAD_UNCHANGED)
fireworks_img = cv2.imread('fireworks.png', cv2.IMREAD_UNCHANGED)


# --- SETUP CAMERA ---

cap = cv2.VideoCapture(0)

# Set frame width and height (optional, but helps avoid resizing issues)
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

fourcc = cv2.VideoWriter_fourcc(*'XVID')  # Most compatible
out = cv2.VideoWriter('my_gesture_video.avi', fourcc, 20.0, (frame_width, frame_height))

if not out.isOpened():
    print("Error: VideoWriter not initialized properly.")
else:
    print("VideoWriter initialized successfully.")


print("Recording finished and saved as 'my_gesture_video.mp4'.")
fireworks_positions = []

# --- RESIZE EMOJI BASED ON FRAME ---
ret, test_frame = cap.read()
if ret:
    frame_h, frame_w = test_frame.shape[:2]
    like_img = resize_emoji(like_img, width=int(frame_w * 0.15))  # Resize to 10% width
else:
    print("Failed to read from webcam")

# Define fingertios landmark indexes for finger (from Mediapipe hand model)
tip_ids = [4, 8, 12, 16, 20]


with mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.5) as hands:
    while cap.isOpened():
        success, image = cap.read()
        if not success:
            continue
        # FLip and convert color
        image = cv2.flip(image, 1)
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB )
        results = hands.process(image_rgb)
        finger_count = 0

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                lm_list = []

                for id, lm in enumerate(hand_landmarks.landmark):
                    h, w, _ = image.shape
                    cx ,cy = int(lm.x * w), int(lm.y *h)
                    lm_list.append((id, cx ,cy))
                # check fingers are up (skip if not 21 landmarks)
                                    # Assuming lm_list is already filled with (id, x, y)
                if len(lm_list) != 0:
                    fingers = []
                    # Extract thumb base, joint, and tip
                    a = lm_list[2][1:]  # Thumb base
                    b = lm_list[3][1:]  # Thumb joint
                    c = lm_list[4][1:]  # Thumb tip
                    
                    thumb_angle = calculate_angle(a, b, c)
                    
                    # Detect thumb "up" if angle is more than 150 degrees (almost straight)
                    if thumb_angle > 150:
                        fingers.append(1)
                    else:
                        fingers.append(0)

                    # Optional: show angle on screen for tuning
                    cv2.putText(image, f'Thumb Angle: {int(thumb_angle)}', (10, 430),
                                cv2.FONT_HERSHEY_PLAIN, 1.2, (0, 255, 255), 1)
                   
                    # Other 4 fingers: check if tip is above lower joint in Y axis
                    for id in range(1, 5):
                        tip_y = lm_list[tip_ids[id]][2]
                        joint_y = lm_list[tip_ids[id] - 2][2]
                        if tip_y < joint_y:
                            fingers.append(1)
                        else:
                            fingers.append(0)

                    finger_count = fingers.count (1)

                    # Show detected finger list on screen for debug
                    cv2.putText(image, str(fingers), (20, 450),
                               cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2)

                    # Like gesture detection (only thumb up)
                    if fingers[0] == 1 and fingers[1:] == [0, 0, 0, 0]:
                        cv2.putText(image, 'LIKE!', (60, 90),
                                    cv2.FONT_HERSHEY_COMPLEX, 1, (200, 255, 0), 2)
                        overlay_image(image, like_img, 50, 100)
                    # Victory gesture(fireworks)
                    if fingers == [0, 1, 1, 0, 0]:
                        for _ in range(5):
                            x = random.randint(100, w - 100)
                            y = random.randint(50,200)
                            fireworks_positions.append((x,y,0))
                # Draw Landmarks
                mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
        # Draw fireworks (blinking effect)
        new_positions = []
        for (x,y,t) in fireworks_positions:
            if t < 20:
                if t % 3 != 0: #blink effect
                    resized_fw = resize_emoji(fireworks_img, width = 100)
                    overlay_image(image,resized_fw, x, y-t *5)
                new_positions.append((x, y, t+1))
        fireworks_positions = new_positions
        #show the count
        cv2.putText(image, "Fingers: " + str(finger_count), (20, 50),
                    cv2.FONT_HERSHEY_COMPLEX, 1.5, (158, 0, 0), 3)
        #Display window
        cv2.imshow("Hand Gesture with Count", image)
        out.write(image)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
cap.release()
out.release()
cv2.destroyAllWindows()
                        

VideoWriter initialized successfully.
Recording finished and saved as 'my_gesture_video.mp4'.


I0000 00:00:1747218238.861115   10497 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1747218238.861854   18118 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 23.2.1-1ubuntu3.1~22.04.3), renderer: Mesa Intel(R) UHD Graphics (ADL-S GT0.5)
W0000 00:00:1747218238.869638   18109 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1747218238.882601   18111 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
