In [1]:
import cv2
import mediapipe as mp
import numpy as np
from sklearn.neighbors import KNeighborsClassifier

gesture = {
    0:'fist', 1:'one', 2:'two', 3:'three', 4:'four', 5:'five',
    6:'six', 7:'rock', 8:'spiderman', 9:'yeah', 10:'ok',
}

cap = cv2.VideoCapture(0)

mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    max_num_hands=1,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5)

file = np.genfromtxt('images/gesture_train.csv', delimiter=',')
angle = file[:,:-1].astype(np.float32)
label = file[:, -1].astype(np.float32)
knn = KNeighborsClassifier(n_neighbors = 3)
knn.fit(angle, label)

# 얼굴에서 특징점 찾기 관련 기능
mp_face = mp.solutions.face_mesh
# 특징점 연결 관련 기능
mp_drawing = mp.solutions.drawing_utils
# 얼굴 찾기 기능
face = mp_face.FaceMesh(
    min_detection_confidence = 0.5, # 정확한 얼굴 찾기
    min_tracking_confidence = 0.5 # 얼굴 위치 찾기
)

spiderman = cv2.imread('images/spiderman.jpg',cv2.IMREAD_COLOR)
spiderman = cv2.resize(spiderman,(250,250))

# 카메라에 보이는 얼굴 영역 크기만큼 조절
mask2gray_spiderman = cv2.cvtColor(spiderman, cv2.COLOR_RGB2GRAY)
_, mask_b_spiderman = cv2.threshold(mask2gray_spiderman, 200, 255, cv2.THRESH_BINARY)
mask_b_inv_spiderman = cv2.bitwise_not(mask_b_spiderman)

tiger = cv2.imread('images/tiger.png',cv2.IMREAD_COLOR)
tiger = cv2.resize(tiger,(250,250))

# 카메라에 보이는 얼굴 영역 크기만큼 조절
mask2gray_tiger = cv2.cvtColor(tiger, cv2.COLOR_RGB2GRAY)
_, mask_b_tiger = cv2.threshold(mask2gray_tiger, 200, 255, cv2.THRESH_BINARY)
mask_b_inv_tiger = cv2.bitwise_not(mask_b_tiger)
while True:
    ret, frame = cap.read()
    frame = cv2.flip(frame,1)
    if not ret:
        cap.release()
        cv2.destroyAllWindows()
        break
        

    
    hand_result = hands.process(frame)
    try:
        if hand_result.multi_hand_landmarks is not None:
            for res in hand_result.multi_hand_landmarks:
                joint = np.zeros((21, 3))
                for j, lm in enumerate(res.landmark):
                    joint[j] = [lm.x, lm.y, lm.z]

                # joint들로 관절값 구하기
                v1 = joint[[0,1,2,3,0,5,6,7,0,9,10,11,0,13,14,15,0,17,18,19],:] # Parent joint
                v2 = joint[[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20],:] # Child joint
                v = v2 - v1 # [20,3]
                # Normalize v 유클리디안 길이
                v = v / np.linalg.norm(v, axis=1)[:, np.newaxis]

                # 관절값으로 관절 각도 구하기
                angle = np.arccos(np.einsum('nt,nt->n',
                    v[[0,1,2,4,5,6,8,9,10,12,13,14,16,17,18],:], 
                    v[[1,2,3,5,6,7,9,10,11,13,14,15,17,18,19],:])) # [15,]

                angle = np.degrees(angle) # radian각도를 degree각도로 변경하기

                # 제스쳐 인식시키기
                data = np.array([angle], dtype=np.float32)
                results = knn.predict(data)
                idx = int(results)
                            # 제스쳐 인식되면 표시하기
                if idx == 7 or idx == 8: # rock을 스파이더맨으로 인식
                    cv2.putText(frame, text='SpiderMan', org=(int(res.landmark[0].x * frame.shape[1]), int(res.landmark[0].y * frame.shape[0] + 20)), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(255, 255, 255), thickness=2)
                # Other gestures
                #cv2.putText(frame, text=gesture[idx].upper(), org=(int(res.landmark[0].x * frame.shape[1]), int(res.landmark[0].y * frame.shape[0] + 20)), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(255, 255, 255), thickness=2)
                    face_result = face.process(frame)
                
                    # 이미지에서 얼굴을 찾으면 실행
                    if face_result.multi_face_landmarks is not None:
                        # 478개의 점을 얼굴에 찍기
                        # for res in face_result.multi_face_landmarks:
                        #     # 점을찍고 선으로 연결하기
                        #     mp_drawing.draw_landmarks(frame, res, mp_face.FACEMESH_TESSELATION)
                        # 478개 점의 값을 저장
                        # 4번점이 코의 위치
                        nose = face_result.multi_face_landmarks[0].landmark[4]
                        # 코의 위치 출력
                        # cv2.circle(frame,
                        #            (int(nose.x * frame.shape[1]), int(nose.y * frame.shape[0])),
                        #            20, (0,0,255), cv2.FILLED
                        #           )
                        nose_x = int(nose.x * frame.shape[1])
                        nose_y = int(nose.y * frame.shape[0])
                        # 123, 132
                        roi = frame[nose_y-125:nose_y+125, nose_x-125:nose_x+125]
                        frame_bg = cv2.bitwise_and(roi,roi,mask = mask_b_spiderman)
                        frame_fg = cv2.bitwise_and(spiderman,spiderman,mask = mask_b_inv_spiderman)
                        bg_fg = cv2.add(frame_bg,frame_fg)
                        frame[nose_y-125:nose_y+125, nose_x-125:nose_x+125] = bg_fg
                
                elif idx == 0 : # rock을 스파이더맨으로 인식
                    cv2.putText(frame, text='Tiger', org=(int(res.landmark[0].x * frame.shape[1]), int(res.landmark[0].y * frame.shape[0] + 20)), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(255, 255, 255), thickness=2)
                # Other gestures
                #cv2.putText(frame, text=gesture[idx].upper(), org=(int(res.landmark[0].x * frame.shape[1]), int(res.landmark[0].y * frame.shape[0] + 20)), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(255, 255, 255), thickness=2)
                    face_result = face.process(frame)
                
                    # 이미지에서 얼굴을 찾으면 실행
                    if face_result.multi_face_landmarks is not None:
                        # 478개의 점을 얼굴에 찍기
                        # for res in face_result.multi_face_landmarks:
                        #     # 점을찍고 선으로 연결하기
                        #     mp_drawing.draw_landmarks(frame, res, mp_face.FACEMESH_TESSELATION)
                        # 478개 점의 값을 저장
                        # 4번점이 코의 위치
                        nose = face_result.multi_face_landmarks[0].landmark[4]
                        # 코의 위치 출력
                        # cv2.circle(frame,
                        #            (int(nose.x * frame.shape[1]), int(nose.y * frame.shape[0])),
                        #            20, (0,0,255), cv2.FILLED
                        #           )
                        nose_x = int(nose.x * frame.shape[1])
                        nose_y = int(nose.y * frame.shape[0])
                        # 123, 132
                        roi = frame[nose_y-125:nose_y+125, nose_x-125:nose_x+125]
                        frame_bg = cv2.bitwise_and(roi,roi,mask = mask_b_tiger)
                        frame_fg = cv2.bitwise_and(tiger,tiger,mask = mask_b_inv_tiger)
                        bg_fg = cv2.add(frame_bg,frame_fg)
                        frame[nose_y-125:nose_y+125, nose_x-125:nose_x+125] = bg_fg
                mp_drawing.draw_landmarks(frame, res, mp_hands.HAND_CONNECTIONS)
    except :
        pass
    
    cv2.imshow('frame',frame)
    k = cv2.waitKey(1)
    if k == 49:
        cap.release()
        cv2.destroyAllWindows()
        break
    

I0000 00:00:1721285067.316790  993150 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M3
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1721285067.330563  993428 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1721285067.333721  993150 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M3
W0000 00:00:1721285067.337348  993438 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1721285067.341016  993432 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1721285067.341402  993437 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback t

: 