In [12]:
# 동영상 캡처 기능 추가
import cv2
video_path = 0
cap = cv2.VideoCapture(video_path)

gesture = {
    0:'fist', 1:'one', 2:'two', 3:'three', 4:'four', 5:'five',
    6:'six', 7:'rock', 8:'spiderman', 9:'yeah', 10:'ok',
}

rsp_gesture = {0:'rock', 5:'paper', 9:'scissors'}

import mediapipe as mp
# 이미지에서 손 찾기 기능 불러오기
mp_hands = mp.solutions.hands
# 찾은 손 관절 이미지에 표현하기
mp_drawing = mp.solutions.drawing_utils
# 손 찾기 기능 세부 설정
hands = mp_hands.Hands(
    max_num_hands=2,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5)

# 제스쳐 인식 모델 학습하기
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
# 데이터 불러오기
file = np.genfromtxt('images/gesture_train.csv', delimiter=',')
# 모델 학습 데이터 준비
angle = file[:,:-1].astype(np.float32)
label = file[:, -1].astype(np.float32)
# 모델 준비
knn = KNeighborsClassifier(n_neighbors = 3)
# 모델 학습
knn.fit(angle, label)
# 손관절 각도를 사용해서 손의 동작을 판별하는 모델 제작

while True:
    ch, frame = cap.read()
    if not ch:
        print('동영상 재생 끝 or 읽기 실패')
        break
    # frame = 1 이면 좌우반전
    frame = cv2.flip(frame, 1)
    # 이미지에서 손 관절값 찾기
    result = hands.process(frame)
    # 이미지에서 손 찾으면 실행
    if result.multi_hand_landmarks is not None:
        # 두 손의 동작과 손의 위치값이 저장
        rsp_result = []
        # 1초에 30장씩 한장에 21개의 관절값을 하나의 관절마다 x,y,z좌표 값을 출력
        # 21개 관절을 하나씩 그리고 연결하기
        # print(result.multi_hand_landmarks)
        for res in result.multi_hand_landmarks:
            # 15개의 손관절 각도 값 구하기
            joint = np.zeros((21, 3))
            for j, lm in enumerate(res.landmark):
                joint[j] = [lm.x, lm.y, lm.z]

            # joint들로 관절값 구하기
            v1 = joint[[0,1,2,3,0,5,6,7,0,9,10,11,0,13,14,15,0,17,18,19],:] # Parent joint
            v2 = joint[[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20],:] # Child joint
            v = v2 - v1 # [20,3]
            # Normalize v 유클리디안 길이
            v = v / np.linalg.norm(v, axis=1)[:, np.newaxis]

            # 관절값으로 관절 각도 구하기
            angle = np.arccos(np.einsum('nt,nt->n',
                v[[0,1,2,4,5,6,8,9,10,12,13,14,16,17,18],:], 
                v[[1,2,3,5,6,7,9,10,11,13,14,15,17,18,19],:])) # [15,]

            angle = np.degrees(angle) # radian각도를 degree각도로 변경하기

            # 제스쳐 인식시키기
            data = np.array([angle], dtype=np.float32)
            results = knn.predict(data)
            idx = int(results)
            
            # 제스쳐 인식되면 표시하기
            if idx in rsp_gesture.keys():
                org = (int(res.landmark[0].x * frame.shape[1]), int(res.landmark[0].y * frame.shape[0]))
                cv2.putText(frame, text=rsp_gesture[idx].upper(), org=(org[0], org[1] + 20), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(255, 255, 255), thickness=2)

                rsp_result.append({
                    'rsp': rsp_gesture[idx],
                    'org': org
                })
            mp_drawing.draw_landmarks(frame, res, mp_hands.HAND_CONNECTIONS) 

             # Who wins?
            if len(rsp_result) == 2:
                winner = None
                text = ''

                if rsp_result[0]['rsp']=='rock':
                    if rsp_result[1]['rsp']=='rock'     : text = 'Tie'
                    elif rsp_result[1]['rsp']=='paper'  : text = 'Paper wins'  ; winner = 1
                    elif rsp_result[1]['rsp']=='scissors': text = 'Rock wins'   ; winner = 0
                elif rsp_result[0]['rsp']=='paper':
                    if rsp_result[1]['rsp']=='rock'     : text = 'Paper wins'  ; winner = 0
                    elif rsp_result[1]['rsp']=='paper'  : text = 'Tie'
                    elif rsp_result[1]['rsp']=='scissors': text = 'Scissors wins'; winner = 1
                elif rsp_result[0]['rsp']=='scissors':
                    if rsp_result[1]['rsp']=='rock'     : text = 'Rock wins'   ; winner = 1
                    elif rsp_result[1]['rsp']=='paper'  : text = 'Scissors wins'; winner = 0
                    elif rsp_result[1]['rsp']=='scissors': text = 'Tie'

                if winner is not None:
                    cv2.putText(frame, text='Winner', org=(rsp_result[winner]['org'][0], rsp_result[winner]['org'][1] + 70), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=2, color=(0, 255, 0), thickness=3)
                cv2.putText(frame, text=text, org=(int(frame.shape[1] / 3), 100), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=2, color=(0, 0, 255), thickness=3)
    cv2.imshow('video', frame)
    key = cv2.waitKey(33)
    if key == ord('q'):
        print('사용자에 의해서 영상이 종료')
        break

cap.release()
cv2.destroyAllWindows()

I0000 00:00:1721284280.846717  928932 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M3
W0000 00:00:1721284280.852630  984240 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1721284280.859224  984240 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
  idx = int(results)
  idx = int(results)
  idx = int(results)
  idx = int(results)
  idx = int(results)
  idx = int(results)
  idx = int(results)
  idx = int(results)
  idx = int(results)
  idx = int(results)
  idx = int(results)
  idx = int(results)
  idx = int(results)
  idx = int(results)
  idx = int(results)
  idx = int(results)
  idx = int(results)
  idx = int(results)
  idx = int(results)
  idx = int(results)
  idx = int(results)
  idx = int(results)
  idx = int(results)
  idx = int(results)
  idx = int(results)

사용자에 의해서 영상이 종료


: 

In [7]:
!pip install scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.5.1-cp311-cp311-macosx_12_0_arm64.whl.metadata (12 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Downloading joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Downloading threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.5.1-cp311-cp311-macosx_12_0_arm64.whl (11.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.0/11.0 MB[0m [31m32.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading joblib-1.4.2-py3-none-any.whl (301 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m301.8/301.8 kB[0m [31m37.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading threadpoolctl-3.5.0-py3-none-any.whl (18 kB)
Installing collected packages: threadpoolctl, joblib, scikit-learn
Successfully installed joblib-1.4.2 scikit-learn-1.5.1 threadpoolctl-3.5.0


In [1]:
!pip install mediapipe

Collecting mediapipe
  Downloading mediapipe-0.10.14-cp311-cp311-macosx_11_0_universal2.whl.metadata (9.7 kB)
Collecting attrs>=19.1.0 (from mediapipe)
  Using cached attrs-23.2.0-py3-none-any.whl.metadata (9.5 kB)
Collecting jax (from mediapipe)
  Downloading jax-0.4.30-py3-none-any.whl.metadata (22 kB)
Collecting jaxlib (from mediapipe)
  Downloading jaxlib-0.4.30-cp311-cp311-macosx_11_0_arm64.whl.metadata (1.0 kB)
Collecting matplotlib (from mediapipe)
  Using cached matplotlib-3.9.1-cp311-cp311-macosx_11_0_arm64.whl.metadata (11 kB)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.4.7-py3-none-macosx_10_6_x86_64.macosx_10_6_universal2.whl.metadata (1.4 kB)
Collecting CFFI>=1.0 (from sounddevice>=0.4.4->mediapipe)
  Using cached cffi-1.16.0-cp311-cp311-macosx_11_0_arm64.whl.metadata (1.5 kB)
Collecting scipy>=1.9 (from jax->mediapipe)
  Downloading scipy-1.14.0-cp311-cp311-macosx_14_0_arm64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━