In [1]:
import numpy as np

def normalize_landmarks(landmarks, handedness):
    landmarks = np.array(landmarks)

    # Translate so that wrist is at origin
    wrist = landmarks[0]
    landmarks = landmarks - wrist

    # Scale so that distance between wrist and middle finger MCP is 1
    mcp_index = 9  # Middle finger MCP landmark index
    scale = np.linalg.norm(landmarks[mcp_index]) # euclidean distance from the origin (wrist)
    if scale > 0:
        landmarks = landmarks / scale
    
    # Mirror left hands
    if handedness == "Left":
        landmarks[:, 0]  =  -landmarks[:, 0]

    return landmarks.tolist()

In [4]:
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from pathlib import Path
import cv2
from tensorflow import keras
import time

DETECTOR_PATH = 'utils/mediapipe_cropper/hand_landmarker.task'
MODEL_PATH = 'shared_artifacts/models/gesture_model_20251221_184630.keras'

class_names = ['like', 'stop', 'two_up']


model = keras.models.load_model(MODEL_PATH)

BaseOptions = mp.tasks.BaseOptions
VisionRunningMode = mp.tasks.vision.RunningMode
HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions
HandLandmarker = mp.tasks.vision.HandLandmarker

options = HandLandmarkerOptions(base_options=BaseOptions(model_asset_path=str(DETECTOR_PATH)),
                                num_hands=1,
                                running_mode=VisionRunningMode.VIDEO)


with HandLandmarker.create_from_options(options) as landmarker:
    # Open default camera (0)
    cap = cv2.VideoCapture(0)

    if not cap.isOpened():
        print("Cannot open camera")
        exit()

    while True:
        ret, frame = cap.read()
        if not ret:
            print("Failed to grab frame")
            break

        image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=image_rgb)

        timestamp_ms = int(time.time() * 1000)

        results = landmarker.detect_for_video(mp_image, timestamp_ms)

        if results.hand_landmarks:
            landmarks = results.hand_landmarks[0]
            handedness_category = results.handedness[0][0]
            handedness = handedness_category.category_name
            confidence = handedness_category.score

            landmark_list = []
            for lm in landmarks:
                landmark_list.append([lm.x, lm.y])

            normalized_landmarks = normalize_landmarks(landmark_list, handedness)

            input_vector = np.array(normalized_landmarks, dtype=np.float32).flatten() # (42,)

            input_vector = np.expand_dims(input_vector, axis=0) # (1, 42)

            predictions = model.predict(input_vector, verbose=0)

            predicted_idx = np.argmax(predictions[0])
            print(predicted_idx)
            confidence = predictions[0][predicted_idx]

            predicted_gesture = class_names[predicted_idx]

            print(predicted_gesture)

        cv2.imshow("Camera", frame)

        # Press 'q' to quit
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()


W0000 00:00:1766354406.206677   53411 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1766354406.220789   53411 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like
0
like