In [1]:
#@markdown We implemented some functions to visualize the hand landmark detection results. <br/> Run the following cell to activate the functions.

from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import numpy as np

MARGIN = 10  # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
HANDEDNESS_TEXT_COLOR = (88, 205, 54) # vibrant green

def draw_landmarks_on_image(rgb_image, detection_result):
  hand_landmarks_list = detection_result.hand_landmarks
  handedness_list = detection_result.handedness
  annotated_image = np.copy(rgb_image)

  # Loop through the detected hands to visualize.
  for idx in range(len(hand_landmarks_list)):
    hand_landmarks = hand_landmarks_list[idx]
    handedness = handedness_list[idx]

    # Draw the hand landmarks.
    hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    hand_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks
    ])
    solutions.drawing_utils.draw_landmarks(
      annotated_image,
      hand_landmarks_proto,
      solutions.hands.HAND_CONNECTIONS,
      solutions.drawing_styles.get_default_hand_landmarks_style(),
      solutions.drawing_styles.get_default_hand_connections_style())

    # Get the top left corner of the detected hand's bounding box.
    height, width, _ = annotated_image.shape
    x_coordinates = [landmark.x for landmark in hand_landmarks]
    y_coordinates = [landmark.y for landmark in hand_landmarks]
    text_x = int(min(x_coordinates) * width)
    text_y = int(min(y_coordinates) * height) - MARGIN

    # Draw handedness (left or right hand) on the image.
    cv2.putText(annotated_image, f"{handedness[0].category_name}",
                (text_x, text_y), cv2.FONT_HERSHEY_DUPLEX,
                FONT_SIZE, HANDEDNESS_TEXT_COLOR, FONT_THICKNESS, cv2.LINE_AA)

  return annotated_image

In [2]:
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

BaseOptions = mp.tasks.BaseOptions
HandLandmarker = mp.tasks.vision.HandLandmarker
HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode

base_options = python.BaseOptions(model_asset_path='hand_landmarker.task')
options = vision.HandLandmarkerOptions(base_options=base_options,
                                       num_hands=2)
detector = vision.HandLandmarker.create_from_options(options)

In [3]:
def angle(joint, label):
    v1 = joint[[0,1,2,3,0,5,6,7,0,9,10,11,0,13,14,15,0,17,18,19], :3] # Parent joint
    v2 = joint[[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20], :3] # Child joint
    v = v2 - v1 # [20, 3]
    # Normalize v
    v = v / np.linalg.norm(v, axis=1)[:, np.newaxis]

    # Get angle using arcos of dot product
    angle = np.arccos(np.einsum('nt,nt->n', # Einstein Summation 표기법으로 다차원 배열의 내적 계산 -> arccos값 구해서 angle에 할당
        v[[0,1,2,4,5,6,8,9,10,12,13,14,16,17,18],:], # einsum: 두 개의 벡터 배열 간의 내적 계산
        v[[1,2,3,5,6,7,9,10,11,13,14,15,17,18,19],:])) # [15,]
    
    angle = np.degrees(angle) # Convert radian to degree

    angle_label = np.array([angle], dtype=np.float32)
    angle_label = np.append(angle_label, label)

    d = np.concatenate([joint.flatten(), angle_label])

    return d

In [4]:
import re
import os
import cv2
import mediapipe as mp
import numpy as np
import pandas as pd

'''
구조
data
  |_  (video folder)
  |_  .xlsx
'''
PATH = "C:/Users/elena/Desktop/data/" # 폴더 경로
video_path = PATH + "0001~3000(영상)/" # 폴더 내 영상이 있는 폴더
file_name = os.listdir(video_path)

os.makedirs(PATH+'output', exist_ok=True)

for file in file_name:
    file_path = video_path + file
    file_ = re.search(r'\d+', file).group()
    file_ = int(re.sub(r'^0+', '', file_))

    # xlsx 열어서 영상에 해당하는 라벨이 있을 경우, 해당 영상이 정면일 경우 전처리 진행
    excel = pd.read_excel(PATH + 'KETI-2017-SL-Annotation-v2_1.xlsx')

    direction = excel[excel['번호'] == file_]['방향'].values[0]
    if direction != '정면':
        continue

    label = excel[excel['번호'] == file_]['한국어'].values[0]
    label = str(label)

    cap = cv2.VideoCapture(file_path)

    full_data = np.empty(shape=(0, 2, 79))
    while cap.isOpened():

        ret, frame = cap.read()
        if not ret:
            break
        
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)

        result = detector.detect(mp_image)

        annotated = draw_landmarks_on_image(frame, result)
        final = cv2.cvtColor(annotated, cv2.COLOR_BGR2RGB)

        pair = []
        if len(result.handedness) > 0: # 손이 감지되었을 때
            pair = []
            left_data = []
            right_data = []
            if len(result.handedness) == 2: # 양손 감지
                joint1 = np.zeros((21, 3))
                joint2 = np.zeros((21, 3))

                for idx1, lm1 in enumerate(result.hand_landmarks[0]):
                    joint1[idx1] = [lm1.x, lm1.y, lm1.z]
                for idx2, lm2 in enumerate(result.hand_landmarks[1]):
                    joint2[idx2] = [lm2.x, lm2.y, lm2.z]
                
                data1 = angle(joint1, label)
                data2 = angle(joint2, label)

                if result.handedness[0][0].category_name == "Left":
                    left_data = data1
                    right_data = data2
                else:
                    left_data = data2
                    right_data = data1
            elif len(result.handedness) == 1:
                joint = np.zeros((21, 3))

                for idx, lm in enumerate(result.hand_landmarks[0]):
                    joint[idx] = [lm.x, lm.y, lm.z]
                
                data = angle(joint, label)
                empty = np.zeros((78,))
                empty = np.append(empty, label)

                if result.handedness[0][0].category_name == "Left":
                    left_data = data
                    right_data = empty
                else:
                    left_data = empty
                    right_data = data
            
            pair = np.array([left_data, right_data])
            full_data = np.vstack((full_data, [pair]))

    print(label, full_data.shape)
    np.save(PATH+'output/'+str(file_)+'.npy', full_data)

    cap.release()
    cv2.destroyAllWindows()


0 (36, 2, 79)
1 (40, 2, 79)
2 (42, 2, 79)
3 (44, 2, 79)
4 (47, 2, 79)
5 (51, 2, 79)
6 (45, 2, 79)
7 (48, 2, 79)
8 (47, 2, 79)


KeyboardInterrupt: 