In [59]:
import copy
import numpy as np
import cv2
import csv
import os
import tensorflow as tf
import mediapipe as mp 
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import itertools

In [60]:
# Hand detection model initialization
mp_hands = mp.solutions.hands

hands = mp_hands.Hands(
    max_num_hands = 2,
    min_detection_confidence = 0.7,
    min_tracking_confidence = 0.5,
)

I0000 00:00:1724517650.344616 8728518 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1 Pro


In [61]:
def calc_bounding_box(image, landmarks):
    image_width, image_height = image.shape[1], image.shape[0]

    landmark_array = np.empty((0,2), int)

    for _, landmark in enumerate(landmarks.landmark):
        landmark_x = min(int(landmark.x * image_width), image_width - 1)
        landmark_y = min(int(landmark.y * image_height), image_height - 1)
        landmark_point = [np.array((landmark_x, landmark_y))]

        landmark_array = np.append(landmark_array, landmark_point, axis = 0)

    x,y,w,h = cv2.boundingRect(landmark_array)
    return [x,y,x+w,y+h]

W0000 00:00:1724517650.360419 9431513 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1724517650.373418 9431509 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


In [62]:
def censor_function(image, brect):
    roi = image[brect[1]:brect[3], brect[0]:brect[2]]
    blur_image = cv2.GaussianBlur(roi,(151,151),0)
    
    image[brect[1]:brect[3], brect[0]:brect[2]] = blur_image
    return image

In [63]:
def draw_bounding_box(image, brect, sign, handedness):
    cv2.rectangle(image,(brect[0], brect[1]), (brect[2], brect[3]), (0,0,0), 1)

    info_text = handedness.classification[0].label[0:]    
    if sign!= "":
        info_text = info_text + ":" + sign
    cv2.putText(image, info_text, (brect[0] + 5, brect[1] - 4), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 1, cv2.LINE_AA)

    if sign == "Fuck":
        image = censor_function(image, brect)
    return image

In [64]:
def calc_landmark_points(image, landmarks):
    image_width, image_height = image.shape[1], image.shape[0]

    landmark_points = []

    for _, landmark in enumerate(landmarks.landmark):
        landmark_x = min(int(landmark.x * image_width), image_width - 1)
        landmark_y = min(int(landmark.y * image_height), image_height - 1)

        landmark_points.append([landmark_x, landmark_y])
    
    return landmark_points

In [65]:
def draw_landmark_points(image, landmark_points):
    if len(landmark_points) > 0:
        for index, landmark in enumerate(landmark_points):
            if index == 0:
                cv2.circle(image,(landmark[0],landmark[1]), 5, (0,255,255), -11)
            if index == 1:
                cv2.circle(image,(landmark[0],landmark[1]), 5, (0,255,255), -1)
                
            if index == 2:
                cv2.circle(image,(landmark[0],landmark[1]), 5, (0,255,255), -1)
                
            if index == 3:
                cv2.circle(image,(landmark[0],landmark[1]), 5, (0,255,255), -1)
                
            if index == 4:
                cv2.circle(image,(landmark[0],landmark[1]), 5, (0,255,255), -1)
                
            if index == 5:
                cv2.circle(image,(landmark[0],landmark[1]), 5, (0,255,255), -1)
                
            if index == 6:
                cv2.circle(image,(landmark[0],landmark[1]), 5, (0,255,255), -1)
                
            if index == 7:
                cv2.circle(image,(landmark[0],landmark[1]), 5, (0,255,255), -1)
                
            if index == 8:
                cv2.circle(image,(landmark[0],landmark[1]), 5, (0,255,255), -1)
                
            if index == 9:
                cv2.circle(image,(landmark[0],landmark[1]), 5, (0,255,255), -1)
                
            if index == 10:
                cv2.circle(image,(landmark[0],landmark[1]), 5, (0,255,255), -1)
            
            if index == 11:
                cv2.circle(image,(landmark[0],landmark[1]), 5, (0,255,255), -1)
                
            if index == 12:
                cv2.circle(image,(landmark[0],landmark[1]), 5, (0,255,255), -1)
                
            if index == 13:
                cv2.circle(image,(landmark[0],landmark[1]), 5, (0,255,255), -1)
                
            if index == 14:
                cv2.circle(image,(landmark[0],landmark[1]), 5, (0,255,255), -1)
                
            if index == 15:
                cv2.circle(image,(landmark[0],landmark[1]), 5, (0,255,255), -1)
                
            if index == 16:
                cv2.circle(image,(landmark[0],landmark[1]), 5, (0,255,255), -1)
                
            if index == 17:
                cv2.circle(image,(landmark[0],landmark[1]), 5, (0,255,255), -1)
                
            if index == 18:
                cv2.circle(image,(landmark[0],landmark[1]), 5, (0,255,255), -1)
                
            if index == 19:
                cv2.circle(image,(landmark[0],landmark[1]), 5, (0,255,255), -1)
                
            if index == 20:
                cv2.circle(image,(landmark[0],landmark[1]), 5, (0,255,255), -1)
    return image

In [66]:
def draw_landmark_lines(image, landmark_points):
    if len(landmark_points) > 0:
        # Palm
        cv2.line(image, landmark_points[0], landmark_points[1], (255,255,255), 2)
        cv2.line(image, landmark_points[0], landmark_points[5], (255,255,255), 2)
        cv2.line(image, landmark_points[0], landmark_points[17], (255,255,255), 2)
        cv2.line(image, landmark_points[5], landmark_points[9], (255,255,255), 2)
        cv2.line(image, landmark_points[9], landmark_points[13], (255,255,255), 2)
        cv2.line(image, landmark_points[13], landmark_points[17], (255,255,255), 2)

        # Thumb
        cv2.line(image, landmark_points[1], landmark_points[2], (255,255,255), 2)
        cv2.line(image, landmark_points[2], landmark_points[3], (255,255,255), 2)
        cv2.line(image, landmark_points[3], landmark_points[4], (255,255,255), 2)

        # Index
        cv2.line(image, landmark_points[5], landmark_points[6], (255,255,255), 2)
        cv2.line(image, landmark_points[6], landmark_points[7], (255,255,255), 2)
        cv2.line(image, landmark_points[7], landmark_points[8], (255,255,255), 2)

        #Middle
        cv2.line(image, landmark_points[9], landmark_points[10], (255,255,255), 2)
        cv2.line(image, landmark_points[10], landmark_points[11], (255,255,255), 2)
        cv2.line(image, landmark_points[11], landmark_points[12], (255,255,255), 2)

        # Ring
        cv2.line(image, landmark_points[13], landmark_points[14], (255,255,255), 2)
        cv2.line(image, landmark_points[14], landmark_points[15], (255,255,255), 2)
        cv2.line(image, landmark_points[15], landmark_points[16], (255,255,255), 2)

        # Pinky
        cv2.line(image, landmark_points[17], landmark_points[18], (255,255,255), 2)
        cv2.line(image, landmark_points[18], landmark_points[19], (255,255,255), 2)
        cv2.line(image, landmark_points[19], landmark_points[20], (255,255,255), 2)


        

    return image
    

In [67]:
def preprocess_keypoints(landmarks):
    temp_landmark_list = copy.deepcopy(landmarks)

    # Converting into local coordinates
    x, y = 0,0
    for index, landmark_point in enumerate(landmarks):
        if index == 0:
            x,y = landmark_point[0],landmark_point[1]
        
        temp_landmark_list[index][0] = temp_landmark_list[index][0] - x
        temp_landmark_list[index][1] = temp_landmark_list[index][1] - y

    # Converting into 1D  list
    temp_landmark_list = list(itertools.chain.from_iterable(temp_landmark_list))

    # Finding the max value for normalization 
    maximum = max(list(map(abs, temp_landmark_list)))
    def normalization(n):
        return n/maximum
    temp_landmark_list = list(map(normalization, temp_landmark_list))
    
    return temp_landmark_list

In [68]:
def select_mode(key, mode):
    num = -1
    # Setting labels 0-9
    if 48 <= key <= 57:
        num = key - 48
    if key == 107:
        mode = 1
    return num, mode 

In [69]:
def csv_logger(num, mode, pre_processed_keypoints):
    if mode == 0:
        pass
    if mode == 1 and 0 <= num <= 9:
        csv_path = "/Users/Atharv/All scripts/SmartMAte/ASL Translation/model/keypoint_classifier/keypoints.csv"
        with open(csv_path, 'rb+') as f:
            f.seek(-1, os.SEEK_END)
            last_char = f.read(1)
            if last_char != b'\n':
                f.write(b'\n')
        with open(csv_path, 'a', newline = "") as f:
            writer = csv.writer(f)
            writer.writerow([num, *pre_processed_keypoints])
            print("mode = {%d}, label = {%d}" %(mode, num))
    return

In [70]:
keypoint_classifier_labels = ["Palm", "Peace", "Fist", "Ok / Good", "Fuck", "Rock", "Thumbs Up", "Thumbs Down", "Call", "Fingers Crossed"]

In [71]:
vid = cv2.VideoCapture(0)
keypoint_classifier = tf.keras.models.load_model("model/keypoint_classifier/keypoints_classifier.keras")
mode = 0
while (True):

    key = cv2.waitKey(10)

    if key == 27:
        break
    num, mode = select_mode(key,mode)
    # Camera capture
    ret, image = vid.read()
    # Stopping the loop camera stops detecting
    if not ret:
        break
    # Flipping the image as the camera capture is mirrored.
    image = cv2.flip(image,1)
    debug_image = copy.deepcopy(image)
    # Hand detection
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    results = hands.process(image)
    if results.multi_hand_landmarks:
        for hand_landmarks, handedness in zip(results.multi_hand_landmarks, results.multi_handedness):
            # Assigning bounding box
            brect = calc_bounding_box(debug_image, hand_landmarks)
            # Landmark calculation 
            landmarks = calc_landmark_points(debug_image, hand_landmarks)
            # Localizing and Normalizing hand landmarks in the detected hand 
            pre_processed_keypoints = preprocess_keypoints(landmarks)
            input_array = np.array(pre_processed_keypoints).reshape(1,-1)
            # Inferencing from Keypoint classifier model 
            output = keypoint_classifier.predict(input_array, verbose = 0)
            sign_id = np.argmax(np.squeeze(output))
            sign = keypoint_classifier_labels[sign_id]
            # Logging the points as needed 
            csv_logger(num, mode, pre_processed_keypoints)
            # Drawing bounding box
            debug_image = draw_bounding_box(debug_image, brect, sign, handedness)
            debug_image = draw_landmark_points(debug_image, landmarks)
            debug_image = draw_landmark_lines(debug_image, landmarks)

    cv2.imshow('Hand Gesture Recognition', debug_image)
    
vid.release()
cv2.destroyWindow('Hand Gesture Recognition')
cv2.waitKey(1)

-1

: 