In [4]:
import numpy as np
import mediapipe as mp
import cv2

In [2]:
mp_hands = mp.solutions.hands
hands = mp_hands.Hands()

In [18]:
def landmarks_to_feature_vector(landmarks, additional_features: bool = False) -> np.ndarray:
    # Convert landmarks to a list of (x, y, z) tuples
    landmarks = [(lm.x, lm.y, lm.z) for lm in landmarks.landmark]
    
    # Normalize based on wrist and middle finger
    wrist = np.array(landmarks[mp_hands.HandLandmark.WRIST])
    middle_tip = np.array(landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_MCP])
    scale = np.linalg.norm(middle_tip - wrist)

    normalized_landmarks = [(lm - wrist) / scale for lm in landmarks]
    
    # Flatten the coordinates
    feature_vector = np.array(normalized_landmarks).flatten()

    if additional_features:
        # Additional features: distances between thumb tip and other fingertips
        thumb_tip = normalized_landmarks[mp_hands.HandLandmark.THUMB_TIP]
        finger_tips = [normalized_landmarks[i] for i in [
            mp_hands.HandLandmark.INDEX_FINGER_TIP,
            mp_hands.HandLandmark.MIDDLE_FINGER_TIP,
            mp_hands.HandLandmark.RING_FINGER_TIP,
            mp_hands.HandLandmark.PINKY_TIP
        ]]
        
        for fingertip in finger_tips:
            distance = np.linalg.norm(thumb_tip - fingertip)
            feature_vector = np.append(feature_vector, distance)
    
    return feature_vector

In [16]:
test_img = cv2.imread('./ASL_dataset/asl_alphabet_test/asl_alphabet_test/K_test.jpg')
test_img = cv2.cvtColor(test_img, cv2.COLOR_BGR2RGB)
landmarks = hands.process(test_img).multi_hand_landmarks[0]
landmarks

landmark {
  x: 0.487440228
  y: 0.966851354
  z: 1.23281905e-006
}
landmark {
  x: 0.587344766
  y: 0.872867167
  z: -0.0805802792
}
landmark {
  x: 0.611501217
  y: 0.720474303
  z: -0.0993674174
}
landmark {
  x: 0.518171966
  y: 0.607011318
  z: -0.112812519
}
landmark {
  x: 0.441145599
  y: 0.517384171
  z: -0.120530576
}
landmark {
  x: 0.604262829
  y: 0.59546864
  z: -0.0142214354
}
landmark {
  x: 0.639508247
  y: 0.451204389
  z: -0.0496620797
}
landmark {
  x: 0.652320087
  y: 0.357407719
  z: -0.0734654814
}
landmark {
  x: 0.660554
  y: 0.27849
  z: -0.0873675644
}
landmark {
  x: 0.507541299
  y: 0.606523514
  z: -0.00758898724
}
landmark {
  x: 0.478684843
  y: 0.444038153
  z: -0.0584124625
}
landmark {
  x: 0.456403315
  y: 0.341932982
  z: -0.0897961333
}
landmark {
  x: 0.434143245
  y: 0.260007232
  z: -0.0998728424
}
landmark {
  x: 0.426859021
  y: 0.652160406
  z: -0.0132408636
}
landmark {
  x: 0.404248029
  y: 0.573114753
  z: -0.116463751
}
landmark {
  x: 0.

In [21]:
feature_vector = landmarks_to_feature_vector(landmarks)

In [23]:
feature_vector.shape

(63,)

In [25]:
feature_vector

array([ 0.        ,  0.        ,  0.        ,  0.27676849, -0.26036717,
       -0.22323734,  0.34368982, -0.68254562, -0.27528391,  0.08513704,
       -0.99687547, -0.31253127, -0.12825138, -1.24517221, -0.33391283,
        0.3236371 , -1.0288525 , -0.03940148,  0.42127852, -1.42851201,
       -0.13758374,  0.45677154, -1.6883597 , -0.20352701,  0.47958217,
       -1.90698776, -0.24204036,  0.05568659, -0.99822685, -0.02102741,
       -0.0242553 , -1.44836485, -0.16182518, -0.08598248, -1.73122982,
       -0.24876829, -0.14765021, -1.95819114, -0.2766841 , -0.16782991,
       -0.87179762, -0.03668497, -0.23046981, -1.09078013, -0.32264638,
       -0.09496698, -0.78027439, -0.36899497, -0.02088487, -0.57215939,
       -0.30297112, -0.36566195, -0.69181835, -0.0631509 , -0.35508093,
       -0.89574167, -0.31779486, -0.24873406, -0.64805375, -0.32444188,
       -0.1977877 , -0.46274459, -0.26098776])

In [26]:
# unflatten the feature vector
landmarks = feature_vector.reshape(-1, 3)
landmarks

array([[ 0.        ,  0.        ,  0.        ],
       [ 0.27676849, -0.26036717, -0.22323734],
       [ 0.34368982, -0.68254562, -0.27528391],
       [ 0.08513704, -0.99687547, -0.31253127],
       [-0.12825138, -1.24517221, -0.33391283],
       [ 0.3236371 , -1.0288525 , -0.03940148],
       [ 0.42127852, -1.42851201, -0.13758374],
       [ 0.45677154, -1.6883597 , -0.20352701],
       [ 0.47958217, -1.90698776, -0.24204036],
       [ 0.05568659, -0.99822685, -0.02102741],
       [-0.0242553 , -1.44836485, -0.16182518],
       [-0.08598248, -1.73122982, -0.24876829],
       [-0.14765021, -1.95819114, -0.2766841 ],
       [-0.16782991, -0.87179762, -0.03668497],
       [-0.23046981, -1.09078013, -0.32264638],
       [-0.09496698, -0.78027439, -0.36899497],
       [-0.02088487, -0.57215939, -0.30297112],
       [-0.36566195, -0.69181835, -0.0631509 ],
       [-0.35508093, -0.89574167, -0.31779486],
       [-0.24873406, -0.64805375, -0.32444188],
       [-0.1977877 , -0.46274459, -0.260