## Using this file to connect to LeapMotion

# 1. Imports and Dependencies

In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

# 2. Keypoints using MP Holistic

In [2]:
mp_holistic = mp.solutions.holistic # Holistic Model
mp_drawing = mp.solutions.drawing_utils # Drawing Utilities

In [3]:
def mediapipe_detection(image, model):
     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Color conversion from BGR -> RGB
     image.flags.writeable = False                  # Image is no longer writable
     results = model.process(image)                 # Make Prediction
     image.flags.writeable = True                   # Image is now writable
     image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Color conversion from RGB -> BGR
     return image, results


def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw right hand connections


def draw_styled_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

# 3. Extract Keypoint Values

In [4]:
# Stores all the keypoint values in an array
# Error checking if the hand isn't there, it'll insert a zero array
def extract_keypoints(results):
     lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
     rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
     return np.concatenate([lh, rh])

# 4. Setup Folders for Collection

In [9]:
# Path for exported data, numpy arrays
DATA_PATH = os.path.join('New_Data') 

# Actions that we try to detect
actions = np.array(['a', 'b', 'c'])

# Thirty videos worth of data
no_sequences = 30

# Videos are going to be 30 frames in length
sequence_length = 20

In [10]:
for action in actions: 
    for sequence in range(no_sequences):
        try: 
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass

# 5. Collect Keypoint Values for Training & Testing

In [11]:
cap = cv2.VideoCapture(0)

# Set mediapipe model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
     
     # Loop through actions
     for action in actions:
          
          # Loop through sequences (videos)
          for sequence in range(no_sequences):
               
               # Loop through video length (sequence length)
               for frame_num in range(sequence_length):

                    # Read Feed
                    ret, frame = cap.read()
                    
                    # Made detections
                    image, results = mediapipe_detection(frame, holistic)
                     
                    # Draw landmarks
                    draw_styled_landmarks(frame, results)
                    
                    # NEW Apply wait logic
                    if frame_num == 0: 
                         cv2.putText(image, 'STARTING COLLECTION', (120,200), 
                                   cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                         cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                         # Show to screen
                         cv2.imshow('OpenCV Feed', image)
                         cv2.waitKey(1000)
                    else: 
                         cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                         # Show to screen
                         cv2.imshow('OpenCV Feed', image)
                    
                    # NEW Export keypoints
                    keypoints = extract_keypoints(results)
                    npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
                    np.save(npy_path, keypoints)
                
                    # Show to screen
                    cv2.imshow('OpenCV Feed', frame)
                    
                    # Breaking gracefully
                    if cv2.waitKey(5) & 0xFF == ord('q'):
                         break

cap.release()
cv2.destroyAllWindows()

# 6. Preprocess Data and Create Lables and Features

In [12]:
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical

In [13]:
label_map = {label:num for num, label in enumerate(actions)}

sequences, labels = [], []
# Loop through actions
for action in actions:
     # Go through each number number in actions
     for sequence in np.array(os.listdir(os.path.join(DATA_PATH, action))).astype(int):
         
          window = []
          # Grab all 30 frames and append them to window
          for frame_num in range(sequence_length):
               res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
               window.append(res)
          
          sequences.append(window)
          labels.append(label_map[action])
          
# Example: for hello, thank you, and i love you
     # You should have 90x30x1662
          # 30 for each one making 90
          # Each of the 90 have 30 np arrays
          # Each np array has 126 values

In [28]:
X = np.array(sequences)
y = to_categorical(labels).astype(int)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)

# 7. Build and Train LSTM Neural Network

In [16]:
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.callbacks import TensorBoard

In [17]:
log_dir = os.path.join('New Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [18]:
model = Sequential()

In [19]:
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(20,126)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

# To use LSTM in sequence, you need to set return_sequences to True. If the following is not LSTM, return False

In [20]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [21]:
model.fit(X_train, y_train, epochs=1000, callbacks=[tb_callback])

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.callbacks.History at 0x125d0d05e08>

In [22]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 20, 64)            48896     
                                                                 
 lstm_1 (LSTM)               (None, 20, 128)           98816     
                                                                 
 lstm_2 (LSTM)               (None, 64)                49408     
                                                                 
 dense (Dense)               (None, 64)                4160      
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dense_2 (Dense)             (None, 3)                 99        
                                                                 
Total params: 203,459
Trainable params: 203,459
Non-trai

In [None]:
# How to run TensorBoard
# Using the command line
     # Navigate to the Logs/train folder
     # Run the following command:
          # tensorboard --logdir=.

# 8. Make Predictions

In [29]:
results = model.predict(X_test)
print(actions[np.argmax(results[1])])
print(actions[np.argmax(y_test[1])])

b
b


# 9. Save, Delete, and Load Model

In [30]:
# Save Model
model.save('new_model.h5')

In [None]:
# Delete Model
del model

In [31]:
#Load Model
model.load_weights('new_model.h5')

# 10. Evaluation using Confusion Matrix and Accuracy

In [32]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [33]:
yhat = model.predict(X_test)  # Predicted values
ytrue = np.argmax(y_train, axis=1).tolist()  # Y true is the target values
yhat = np.argmax(yhat, axis=1).tolist()



In [34]:
# Create Multilabel Confusion Matrix
# Top Row:    TP | FP
# Bottom Row: FN | TN
# You want numbers in the top left and bottom right

multilabel_confusion_matrix(ytrue, yhat)
# print(accuracy_score(ytrue, yhat))

ValueError: Found input variables with inconsistent numbers of samples: [85, 5]

# 11. Test in Real Time

In [35]:
from scipy import stats

In [36]:
colors = [(245,117,16), (117,245,16), (16,117,245)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame

In [39]:
# 1. New detection variables
sequence = [] # Stores 30 frames and once we do, pass it to the prediction algorithm
sentence = [] # Concatenates our predictions
predictions = []
threshold = 0.7 # Confidence Metric... needs to be this confident to consider it a letter

cap = cv2.VideoCapture(0)

# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
     while cap.isOpened():

          # Read feed
          ret, frame = cap.read()

          # Make detections
          image, results = mediapipe_detection(frame, holistic)
          print(results)
          
          # Draw landmarks
          draw_styled_landmarks(image, results)
          
          # 2. Prediction logic
          keypoints = extract_keypoints(results)
          sequence.append(keypoints)
          sequence = sequence[-30:]
          
          if len(sequence) == 30:
               res = model.predict(np.expand_dims(sequence, axis=0))[0] # Fixes the dimension from (30, 126) -> (1,30,126)
               print(actions[np.argmax(res)])
               predictions.append(np.argmax(res))
               
               
               #3. Viz logic
               if np.unique(predictions[-10:])[0]==np.argmax(res):
                    
                    # Checking if out result is above the threshold
                    if res[np.argmax(res)] > threshold: 
                         
                         if len(sentence) > 0:
                              # No repeat words in the sentence
                              if actions[np.argmax(res)] != sentence[-1]:
                                   sentence.append(actions[np.argmax(res)])
                         else: 
                              sentence.append(actions[np.argmax(res)])

               # Only keeping 5 values in a sentence. no need for this for the project
               if len(sentence) > 5: 
                    sentence = sentence[-5:]

               # Viz probabilities
               image = prob_viz(res, actions, image, colors)
               
          cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
          cv2.putText(image, ' '.join(sentence), (3,30), 
                         cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
          
          # Show to screen
          cv2.imshow('OpenCV Feed', image)

          # Break gracefully
          if cv2.waitKey(10) & 0xFF == ord('q'):
               break
     
     cap.release()
     cv2.destroyAllWindows()

<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.soluti