In [67]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

Importing the Media Pipe Holistic Model

In [68]:
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

In [69]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    
    results = model.process(image)
    
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    
    return image, results

In [76]:
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, mp_drawing.DrawingSpec(color=(80, 110, 10), thickness = 1, circle_radius=1), mp_drawing.DrawingSpec(color=(80, 256, 121), thickness = 1, circle_radius=1))
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)

Live feed from camera and landmark detection

In [10]:
live = cv2.VideoCapture(0)
with mp_holistic.Holistic(min_detection_confidence = 0.5, min_tracking_confidence = 0.5) as holistic:
    while live.isOpened():
        ret, frame = live.read()

        # Detecting
        image, results =  mediapipe_detection(frame, holistic)
        print(results)

        #Draw landmarks
        draw_landmarks(image, results)

        cv2.imshow('OpenCV Feed', image)
        #condition for closing the live feed
        if cv2.waitKey(10) & 0xFF ==  ord('q'):
            break
    live.release()
    cv2.destroyAllWindows()

<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.soluti

Extracting the keypoints

In [None]:
pose = []
for res in results.pose_landmarks.landmark:
    test = np.array([res.x, res.y, res.z, res.visibility])
    pose.append(test)

In [72]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

In [None]:
np.save('0', extract_keypoints(results))

Setting Up Paths for Data Collection

In [12]:
Data_Path = os.path.join('MP_Data')
signs = np.array(['hello', 'thanks', 'iloveyou'])

#30 videos
no_sequences = 30
# each video will be 30 frames long
sequence_length = 30 

In [13]:
for sign in signs:
    for sequence in range(no_sequences):
        try:
            os.makedirs(os.path.join(Data_Path, sign, str(sequence)))
        except:
            pass

Capturing the Data for Training and Testing.

In [None]:
# live = cv2.VideoCapture(0)
# with mp_holistic.Holistic(min_detection_confidence = 0.5, min_tracking_confidence = 0.5) as holistic:
    
#     for sign in signs:
#         for sequence in range(no_sequences):
#             for frame_no in range(sequence_length):

#                 ret, frame = live.read()

#                 # Detecting
#                 image, results =  mediapipe_detection(frame, holistic)
#                 print(results)

#                 #Draw landmarks
#                 draw_landmarks(image, results)


#                 if frame_no == 0:
#                     cv2.putText(image, 'Starting Collection', (120,200), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 4, cv2.LINE_AA)
#                     cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(sign, sequence), (15,12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
#                     cv2.waitKey(2000)
#                 else:
#                     cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(sign, sequence), (15,12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)                    

#                 keypoints = extract_keypoints(results)
#                 npy_path = os.path.join(Data_Path, sign, str(sequence), str(frame_no))
#                 np.save(npy_path, keypoints)

#                 cv2.imshow('OpenCV Feed', image)
#                 #condition for closing the live feed
#                 if cv2.waitKey(10) & 0xFF ==  ord('q'):
#                     break
#     live.release()
#     cv2.destroyAllWindows()

Preprocessing the Data and Creating Labels and Features

In [26]:
from tensorflow import keras, initializers
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical

In [15]:
label_map = {label:num for num, label in enumerate(signs)}
label_map

{'hello': 0, 'thanks': 1, 'iloveyou': 2}

In [16]:
sequences, labels = [], []
for sign in signs:
    for sequence in range(no_sequences):
        window = []
        for frame_no in range(sequence_length):
            res = np.load(os.path.join(Data_Path, sign, str(sequence),"{}.npy".format(frame_no)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[sign])

In [17]:
X = np.array(sequences)
X.shape

(90, 30, 1662)

One Hot Encoding

In [18]:
y = to_categorical(labels).astype(int)

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)

Buliding and Training the Model

In [28]:
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.callbacks import TensorBoard

In [29]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir = log_dir)

In [44]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation="relu", input_shape=(30,1662)))
model.add(LSTM(128, return_sequences=True, activation="relu"))
model.add(LSTM(64, return_sequences=False, activation="relu"))
model.add(Dense(64, activation="relu"))
model.add(Dense(32, activation="relu"))
model.add(Dense(signs.shape[0], activation='softmax'))

In [45]:
model.compile(optimizer='Adam', loss="categorical_crossentropy", metrics=['categorical_accuracy'])

In [None]:
model.fit(X_train, y_train, epochs=50, callbacks=[tb_callback])

In [50]:
res = model.predict(X_test)



In [56]:
signs[np.argmax(res[4])]

'iloveyou'

In [55]:
signs[np.argmax(y_test[4])]

'iloveyou'

In [57]:
model.save('signs.h5')

  saving_api.save_model(


In [79]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [80]:
yhat = model.predict(X_test)



In [81]:
ytrue = np.argmax(y_test, axis = 1).tolist()
yhat =  np.argmax(yhat, axis = 1).tolist()

In [82]:
multilabel_confusion_matrix(ytrue, yhat)

array([[[1, 4],
        [0, 0]],

       [[4, 0],
        [0, 1]],

       [[1, 0],
        [4, 0]]], dtype=int64)

In [None]:
accuracy_score(ytrue, yhat)

In [87]:
sequence = []
sentence = []
threshold = 0.7
 
live = cv2.VideoCapture(0)
with mp_holistic.Holistic(min_detection_confidence = 0.5, min_tracking_confidence = 0.5) as holistic:
    while live.isOpened():
        ret, frame = live.read()

        # Detecting
        image, results =  mediapipe_detection(frame, holistic)
        print(results)

        #Draw landmarks
        draw_landmarks(image, results)

        keypoints = extract_keypoints(results)
        sequence.insert(0,keypoints)
        sequence = sequence[:30]

        if len(sequence) == 30:
           res = model.predict(np.expand_dims(sequence, axis = 0))[0]
           print(signs[np.argmax(res)])

        if res[np.argmax(res)] > threshold:
            if len(sentence) > 0:
                if signs[np.argmax(res)] != sentence [-1]:
                    sentence.append(signs[np.argmax(res)])
            else:
                sentence.append(signs[np.argmax(res)])
        if len(sentence) > 5:
            sentence = sentence[-5:]
        
        cv2.rectangle(image, (0,0), (640,40), (245, 117, 16), -1)
        cv2.putText(image,' '.join(sentence), (3,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)


        cv2.imshow('OpenCV Feed', image)
        #condition for closing the live feed
        if cv2.waitKey(10) & 0xFF ==  ord('q'):
            break
    live.release()
    cv2.destroyAllWindows()

<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.soluti