In [1]:
import cv2
import math
import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands

from google.protobuf.json_format import MessageToDict

In [2]:
CAMERAS = { 
    'INTEGRATED_CAMERA': 0,
    'STUDIO_CAMERA': 1,
    'CUPOLA360': 2
}
CURRENT_DEVICE = CAMERAS['CUPOLA360']

In [3]:
LANDMARKS = {
    'WRIST'     : 0,
    'THUMB'     : 1,
    'INDEX'     : 2,
    'MIDDLE'    : 3,
    'RING'      : 4,
    'PINKY'     : 5,
    'FINGERTIPS': 6
}

LANDMARKS_INDEXS = [ [1,2,3,4], [5,6,7,8], [9,10,11,12], [13,14,15,16], [17,18,19,20] ]

In [4]:
# OSD function
OSD_X = 10
OSD_Y = 70
def OSD_TEXT(image, text, x=10, y=70):
    cv2.putText(image, text, (x, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (100, 0, 200), 2, cv2.LINE_AA)
    return image

In [5]:
# Angle of 2 vector calculation
def vector_2d_angle(v1,v2):  
    v1_x=v1[0]
    v1_y=v1[1]
    v2_x=v2[0]
    v2_y=v2[1]
    try:
        angle_= math.degrees(math.acos((v1_x*v2_x+v1_y*v2_y)/(((v1_x**2+v1_y**2)**0.5)*((v2_x**2+v2_y**2)**0.5))))
    except:
        angle_ = 100000.
    return angle_

In [6]:
def hand_angle(hand, x=0, y=0):
    angle_list = []
    for idx_array in LANDMARKS_INDEXS:
        angle = vector_2d_angle(
            ((int(hand[0][0]) - int(hand[idx_array[1]][0])), (int(hand[0][1])- int(hand[idx_array[1]][1]))),
            ((int(hand[idx_array[2]][0])- int(hand[idx_array[3]][0])), (int(hand[idx_array[2]][1])- int(hand[idx_array[3]][1])))
            )
        angle_list.append(angle)
    return angle_list

In [7]:
# For static images:
IMAGE_FILES = []
with mp_hands.Hands(static_image_mode=True, max_num_hands=2, min_detection_confidence=0.5) as hands:
    for idx, file in enumerate(IMAGE_FILES):
        # Read an image, flip it around y-axis for correct handedness output (see above).
        image = cv2.flip(cv2.imread(file), 1)
        # Convert the BGR image to RGB before processing.
        results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

        # Print handedness and draw hand landmarks on the image.
        print('Handedness:', results.multi_handedness)
        if not results.multi_hand_landmarks:
            continue
            
        image_height, image_width, _ = image.shape
        annotated_image = image.copy()
        for hand_landmarks in results.multi_hand_landmarks:
            print('hand_landmarks:', hand_landmarks)
            print(f'Index finger tip coordinates: (', 
                  f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * image_width}, '
                  f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * image_height})'
                  )
            mp_drawing.draw_landmarks(
                annotated_image,
                hand_landmarks,
                mp_hands.HAND_CONNECTIONS,
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style())
        cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', cv2.flip(annotated_image, 1))
        # Draw hand world landmarks.
        if not results.multi_hand_world_landmarks:
            continue
        for hand_world_landmarks in results.multi_hand_world_landmarks:
            mp_drawing.plot_landmarks(hand_world_landmarks, mp_hands.HAND_CONNECTIONS, azimuth=5)

In [8]:
# For usbvideo device
cap = cv2.VideoCapture(CURRENT_DEVICE)
with mp_hands.Hands(model_complexity=0, min_detection_confidence=0.5, min_tracking_confidence=0.5) as hands:
    while cap.isOpened():
        success, image = cap.read()
        if not success:
            print("Ignoring empty camera frame.")
            # If loading a video, use 'break' instead of 'continue'.
            continue

        # To improve performance, optionally mark the image as not writeable to
        # pass by reference.
        image.flags.writeable = False
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        imageHeight, imageWidth, _ = image.shape
        #results = hands.process(image)
        results = hands.process(cv2.flip(image, 1))

        # Draw the hand annotations on the image.
        image.flags.writeable = True
        #image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_RGB2BGR)

        if results.multi_hand_landmarks:
            # Right/Left hand
            for i in results.multi_handedness:
                label = MessageToDict(i)['classification'][0]['label']
                #image = OSD_TEXT(image, str(label))
            
            # Find Hands Landmark
            for idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
                label = results.multi_handedness[idx].classification[0].label
                
                if(label == "Right"):
                    # Draw landmarks
                    mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS)

                    #找出 Landmark中的各節點 (關節)
                    #for point in mp_hands.HandLandmark:
                    #    normalizedLandmark = hand_landmarks.landmark[point]
                    #    pixelCoordinatesLandmark = mp_drawing._normalized_to_pixel_coordinates(normalizedLandmark.x, normalizedLandmark.y, imageWidth, imageHeight)
                    #    #print(point)
                    #    #print(pixelCoordinatesLandmark)
                    #    #print(normalizedLandmark)

                    #check if there's a start gesture
                    #if yes, start detecting command gesture
                    #if no, check detect time

                    keypoint_pos = []
                    for i in range(21):
                        x = hand_landmarks.landmark[i].x*image.shape[1]
                        y = hand_landmarks.landmark[i].y*image.shape[0]
                        keypoint_pos.append((x,y))

                    #image = OSD_TEXT(image, "{:.2f}".format(hand_landmarks.landmark[4].x*image.shape[1]) + ", " + "{:.2f}".format(hand_landmarks.landmark[4].x*image.shape[0]))


                    #v1_x=int(keypoint_pos[0][0])- int(keypoint_pos[2][0])
                    #v1_y=int(keypoint_pos[0][1])- int(keypoint_pos[2][1])
                    #v2_x=int(keypoint_pos[3][0])- int(keypoint_pos[4][0])
                    #v2_y=int(keypoint_pos[3][1])- int(keypoint_pos[4][1])
                    #angle= math.degrees(math.acos((v1_x*v2_x+v1_y*v2_y)/(((v1_x**2+v1_y**2)**0.5)*((v2_x**2+v2_y**2)**0.5))))
                    #image = OSD_TEXT(image, "{:.2f}".format(angle) + ", " + str(v1_x) + ", " + str(v1_y) + ", " + str(v2_x) + ", " + str(v2_y) )               


                    angle_list = hand_angle(keypoint_pos)
                    #image = OSD_TEXT(image, "{:.2f}".format(angle_list[0]) + ", " + "{:.2f}".format(angle_list[1]) + ", " + "{:.2f}".format(angle_list[2]) \
                    #            + ", " + "{:.2f}".format(angle_list[3]) + ", " + "{:.2f}".format(angle_list[4]))

                    # Gesture detection
                    #image = OSD_TEXT(image, "{:.2f}".format(hand_landmarks.landmark[4].y) + ", " + "{:.2f}".format(hand_landmarks.landmark[3].y) + ", " + "{:.2f}".format(hand_landmarks.landmark[2].y) \
                    #                + ", " + "{:.2f}".format(hand_landmarks.landmark[1].y) + ", " + "{:.2f}".format(hand_landmarks.landmark[0].y) )

                    if ((angle_list[0] < 50) and (angle_list[1] > 50) and (angle_list[2] > 50) and (angle_list[3] > 50) \
                        and (angle_list[4] > 50) and ( abs((keypoint_pos[4][0]) - (keypoint_pos[0][0])) < 80)):
                        if ( (hand_landmarks.landmark[4].y) > (hand_landmarks.landmark[0].y) ):
                            image = OSD_TEXT(image, "Volumn Down")
                        else:
                            image = OSD_TEXT(image, "Volumn Up")


            #mp_drawing.draw_landmarks(
            #    image,
            #    hand_landmarks,
            #    mp_hands.HAND_CONNECTIONS,
            #    mp_drawing_styles.get_default_hand_landmarks_style(),
            #    mp_drawing_styles.get_default_hand_connections_style())

            #keypoint_pos = []
            #for i in range(21):
            #    x = hand_landmarks.landmark[i].x
            #    y = hand_landmarks.landmark[i].y
            #    keypoint_pos.append((x,y))
            #    cv2.circle(image,(y,x), 30, (0, 255, 255), 3)


        # Flip the image horizontally for a selfie-view display.
        #cv2.imshow('MediaPipe Hands', cv2.flip(image, 1))
        cv2.imshow('MediaPipe Hands', image)
        if cv2.waitKey(5) & 0xFF == 27:
            break

cap.release()
cv2.destroyAllWindows()

In [None]:
# 
def hand_angle(hand_,x=0,y=0):  
    angle_list = []
    # Thumb
    angle_ = vector_2d_angle(
        ((int(hand_[0][0])- int(hand_[2][0])),(int(hand_[0][1])- int(hand_[2][1]))),
        ((int(hand_[3][0])- int(hand_[4][0])),(int(hand_[3][1])- int(hand_[4][1])))
        )
    angle_list.append(angle_)
    #Index
    angle_ = vector_2d_angle(
        ((int(hand_[0][0])-int(hand_[6][0])),(int(hand_[0][1])- int(hand_[6][1]))),
        ((int(hand_[7][0])- int(hand_[8][0])),(int(hand_[7][1])- int(hand_[8][1])))
        )
    angle_list.append(angle_)
    # Middle
    angle_ = vector_2d_angle(
        ((int(hand_[0][0])- int(hand_[10][0])),(int(hand_[0][1])- int(hand_[10][1]))),
        ((int(hand_[11][0])- int(hand_[12][0])),(int(hand_[11][1])- int(hand_[12][1])))
        )
    angle_list.append(angle_)
    # Ring
    angle_ = vector_2d_angle(
        ((int(hand_[0][0])- int(hand_[14][0])),(int(hand_[0][1])- int(hand_[14][1]))),
        ((int(hand_[15][0])- int(hand_[16][0])),(int(hand_[15][1])- int(hand_[16][1])))
        )
    angle_list.append(angle_)
    #Pinky
    angle_ = vector_2d_angle(
        ((int(hand_[0][0])- int(hand_[18][0])),(int(hand_[0][1])- int(hand_[18][1]))),
        ((int(hand_[19][0])- int(hand_[20][0])),(int(hand_[19][1])- int(hand_[20][1])))
        )
    angle_list.append(angle_)
    return angle_list