In [18]:
import cv2
import mediapipe as mp
import os
import tensorflow as tf
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [19]:
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

In [20]:
def mediapipe_detection(image, model):

    # Convert the BGR image to RGB before processing
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Image is no longer writeable
    image.flags.writeable= False 

    # Make prediction
    results = model.process(image) 

    # Image is now writeable
    image.flags.writeable = True 

    # Convert back to BGR
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 
    return image, results

In [21]:
def holistic_drawing(img, holistic_res):
    # Draw face connections
    # mp_drawing.draw_landmarks(img, holistic_res.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
    #                          mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
    #                          mp_drawing.DrawingSpec(color=(80,256,120), thickness=1, circle_radius=1)
    #                          ) 
    # Draw pose connections
    mp_drawing.draw_landmarks(img, holistic_res.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(200,50,50), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(200,25,25), thickness=2, circle_radius=2)
                             ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(img, holistic_res.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(25,25,200), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(50,50,200), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(img, holistic_res.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(25,25,200), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(50,50,200), thickness=2, circle_radius=2)
                             )


In [22]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    #face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, lh, rh])

In [23]:
# Path for exported data, numpy arrays
# DATA_PATH = os.path.join('../input/mpdata-from-video/MP_Data_from_VIDEO') # Create a folder named MP_Data in the current directory
# Actions that we try to detect
actions = np.array(['again','boy','deaf','finish','girl','go','goodbye','hearing','hello','help',
'how','i','know','like','love','man','more','my','need','no',
'none','not','please','right','school','science','sentence','sign-language','sorry','sports',
'student','thank-you','that','want','what','when','where','who','why','with',
'without','woman','yes','you','your','i-love-you','youre-welcome','intelligent','friend','come-here',
'go-away','see-you-later','baby','name'])
no_videos = 60  # Number of sequences for each action
sequence_length = 60  # Number of frames in each sequence

In [24]:
model = tf.keras.Sequential()
model.add(LSTM(64, return_sequences=True, activation='tanh', input_shape=(60,258)))
model.add(LSTM(128, return_sequences=True, activation='tanh'))
model.add(LSTM(64, return_sequences=False, activation='tanh'))
model.add(Dense(64, activation='tanh'))
model.add(Dense(32, activation='tanh'))
model.add(Dense(actions.shape[0], activation='softmax'))

In [25]:
model.load_weights("No-face.h5")

In [28]:
sequence = []
sentence = []
predictions = []
threshold = 0.9



cap = cv2.VideoCapture(0)
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
fps = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
#out = cv2.VideoWriter('output.avi', 0, cv2.VideoWriter_fourcc(*'MJPG'), 30, (width, height))

# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()
        if ret:
            # Make detections
            image, results = mediapipe_detection(frame, holistic)
            print(results)
            
            # Draw landmarks
            holistic_drawing(image, results)
            
            # 2. Prediction logic
            keypoints = extract_keypoints(results)
            sequence.append(keypoints)
            sequence = sequence[-60:]
            
            if len(sequence) == 60:
                res = model.predict(np.expand_dims(sequence, axis=0))[0]
                print(actions[np.argmax(res)])
                predictions.append(np.argmax(res))
                
                print(res[np.argmax(res)])
            #3. Viz logic
                if np.unique(predictions[-10:])[0]==np.argmax(res):
                    if res[np.argmax(res)] > threshold: 
                        
                        if len(sentence) > 0: 
                            if actions[np.argmax(res)] != sentence[-1]:
                                sentence.append(actions[np.argmax(res)])
                        else:
                            sentence.append(actions[np.argmax(res)])

                if len(sentence) > 1: 
                    sentence = sentence[-1:]
                
            cv2.rectangle(image, (0,0), (int(width/2.5), 100), (20, 105, 60), -1)
            cv2.putText(image, ' '.join(sentence), (3,70), 
                          cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 6, cv2.LINE_AA)
            # cv2.rectangle(image, (275, height-15), (width-275, height-35), (20, 105, 60), -1)
            # cv2.putText(image, "PS: I'm not differently-abled. It's a Sign Language Detection project I worked on", (310,height-20),
                          # cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1, cv2.LINE_AA)

            
            # Write frame to video
            cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.soluti

KeyboardInterrupt: 

: 