In [1]:
import cv2
import mediapipe as mp
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras import models

In [2]:
model = models.load_model("lstm_model.h5")

In [3]:
mp_holistic = mp.solutions.holistic # Holistic Model
mp_drawing = mp.solutions.drawing_utils # Drawing Utilities

In [4]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # BGR to RGB color
    image.flags.writeable = False                  # Image no longer writeable to avoid the problem of accidental changes in the image
    result = model.process(image)                  # Processing Image
    image.flags.writeable = True                   # Image is now writeable
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # RGB to BGR color
    return image, result

In [5]:
def draw_style_landmark(image, results):
    # Drawing Face
    mp_drawing.draw_landmarks(image, results.face_landmarks, 
                              mp_holistic.FACEMESH_TESSELATION,
                              mp_drawing.DrawingSpec(color=(169, 255, 203), thickness=1, circle_radius=1),
                              mp_drawing.DrawingSpec(color=(222, 242, 144), thickness=1, circle_radius=1)) 
     # Drawing Right Hand
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, 
                              mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(145, 75, 255), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(163, 2, 8), thickness=2, circle_radius=2))
     # Drawing Left Hand
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(49, 63, 216), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(123, 26, 241), thickness=2, circle_radius=2))
    # Drawing Pose
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(49, 63, 216), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(0, 79, 252), thickness=2, circle_radius=2))

In [6]:
# Create Array
def get_array(results):
    array_pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    array_right_hand = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    array_left_hand = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    array_face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    return np.concatenate([array_face, array_pose, array_left_hand, array_right_hand])

In [7]:
actions = np.array(["paper", "rock", "scissors"])

In [9]:
sequence = []
predictions = []
sentence = []
threshold = 0.5

cap = cv2.VideoCapture(0) # capture video

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        # Read Feed
        ret, frame = cap.read()

        # Making Detection
        image, results = mediapipe_detection(frame, holistic)

        # Drawing Landmarks
        draw_style_landmark(image, results)

        # Making Prediction
        array = get_array(results)
        sequence.append(array)
        sequence = sequence[-30:]

        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            predictions.append(np.argmax(res))
        
        # Visual Logic
        if len(sequence) >= 30:  # Ensure 'res' is defined
            if np.unique(predictions[-10:])[0] == np.argmax(res):
                if res[np.argmax(res)] > threshold:
                    if len(sentence) > 0:
                        if actions[np.argmax(res)] != sentence[-1]:
                            sentence.append(actions[np.argmax(res)])
                    else:
                        sentence.append(actions[np.argmax(res)])

        if len(sentence) > 5:
            sentence = sentence[-5:]

        cv2.rectangle(image, (0,0), (640,40), (245, 117, 16), -1)
        cv2.putText(image, " ".join(sentence), (3,30),
                   cv2.FONT_HERSHEY_DUPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Showing Image
        cv2.imshow("OpenCV Feed", image) 
        
        # Break cap video
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
        
    cap.release()
    cv2.destroyAllWindows()

