In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import time
import cv2
import matplotlib.pyplot as plt
import mediapipe as mp
import IPython
from PIL import ImageFont, ImageDraw, Image
from io import BytesIO
import numpy as np

mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hand = mp.solutions.hands

def show_img(img, dpi=150, title=None):
    fig, ax = plt.subplots(nrows=1, ncols=1, dpi=dpi)
    ax.set_title( title, fontsize=16, color='black')
    ax.axis('off')
    ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    return fig, ax

def putText_japanese(img, text, point, size, color):
    font = ImageFont.truetype('/usr/share/fonts/opentype/noto/NotoSansCJK-Bold.ttc', size)
    img_pil = Image.fromarray(img)
    draw = ImageDraw.Draw(img_pil)
    draw.text(point, text, fill=color, font=font)
    return np.array(img_pil)

def landmark2np(hand_landmarks):
    li = []
    for j in (hand_landmarks.landmark):
        li.append([j.x, j.y, j.z])

    return np.array(li) - li[0]

def calc_score(A,B):
    x_score = manual_cos(A, B)

    A_v = np.diff(np.array(A), axis=0)
    B_v = np.diff(np.array(B), axis=0)
    v_score = manual_cos(A_v, B_v)

    A_a = np.diff(A_v, axis=0)
    B_a = np.diff(B_v, axis=0)
    a_score = manual_cos(A_a, B_a)

    # print(round(x_score, 2), round(v_score, 2), round(a_score, 2))

    return [x_score, v_score, a_score]

def manual_cos(A, B):
    dot = np.sum(np.array(A)*np.array(B), axis=-1)
    A_norm = np.linalg.norm(A, axis=-1)
    B_norm = np.linalg.norm(B, axis=-1)
    cos = dot / (A_norm*B_norm+1e-7)

    return cos[1:].mean()


# 検出したいポーズを保存

In [2]:
# カウントダウン
timer = 3 #[s]
for i in range(timer):
    print(f"\r{timer-i}", end=""); time.sleep(1)

cap = cv2.VideoCapture(0)
hands = mp_hand.Hands(  static_image_mode=True,
                        max_num_hands=2,
                        min_detection_confidence=0.5 )

pose_time = 2
saved_array = None
saved_landmark_array = None
start = -100
score = [0, 0, 0]

try:
    while(True):
        ret, frame = cap.read()

        if ret:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = hands.process(frame)

            if results.multi_hand_landmarks:
                for hand_landmarks in results.multi_hand_landmarks:

                    if start < 0:
                        saved_array = [landmark2np(hand_landmarks)]
                        saved_landmark_array = [hand_landmarks]
                        start = time.time()
                        score = [0, 0, 0]

                    if time.time()-start < pose_time:
                            saved_array.append(landmark2np(hand_landmarks))
                            saved_landmark_array.append(hand_landmarks)
                
                # 描画
                frame = putText_japanese(frame, "検出", (0,0), 50, (255,0,0))
                frame.flags.writeable = True
                for hand_landmarks in results.multi_hand_landmarks:
                    mp_drawing.draw_landmarks(
                        frame, hand_landmarks, mp_hand.HAND_CONNECTIONS)
                
                # 指定秒数経過した場合
                if time.time()-start > pose_time:
                    break
            
                
            f = BytesIO()
            Image.fromarray(frame).save(f, "jpeg")
            IPython.display.display(IPython.display.Image(data=f.getvalue()))
            IPython.display.clear_output(wait=True)

        else:
            break
                
except KeyboardInterrupt:
    cap.release()
    print('Stream stopped')


OSError: cannot open resource

# 保存したポーズを検出

In [36]:
cap = cv2.VideoCapture(0)
assert cap.isOpened(), 'Could not open video device'

hands = mp_hand.Hands(  static_image_mode=True,
                        max_num_hands=2,
                        min_detection_confidence=0.5 )

now_array = []

try:
    while(True):
        ret, frame = cap.read()

        if ret:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = hands.process(frame)

            if results is not None:

                if saved_array is not None:
                    now_array.append(landmark2np(hand_landmarks))
                    
                    if len(now_array) > len(saved_array):
                        now_array.pop(0)
                        score = calc_score(saved_array, now_array)

                frame.flags.writeable = True
                image = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

                if results.multi_hand_landmarks:
                    for hand_landmarks in results.multi_hand_landmarks:
                        mp_drawing.draw_landmarks(
                            image, hand_landmarks, mp_hand.HAND_CONNECTIONS)
                        
                frame = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

                if score[0] > 0.8:
                    frame = putText_japanese(frame, "検出しました", (0,20), 20, (255,0,0))

            f = BytesIO()
            frame = putText_japanese(frame, str(score[0]), (0,0), 20, (255,0,0))
            Image.fromarray(frame).save(f, "jpeg")
            IPython.display.display(IPython.display.Image(data=f.getvalue()))
            
            IPython.display.clear_output(wait=True)

except KeyboardInterrupt:
    cap.release()
    print('Stream stopped')

Stream stopped
