## 1. Import and Install Dependencies

In [1]:

# !pip install -r requirements.txt
# !pip install tensorflow opencv-python mediapipe sklearn matplotlib numpy #tensorflow-gpu
%load_ext autoreload
%autoreload 2

In [2]:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
import time
import mediapipe as mp
import string



objc[36673]: Class CaptureDelegate is implemented in both /Users/calebgarfinkel/.pyenv/versions/3.10.6/envs/sign-game-server/lib/python3.10/site-packages/cv2/cv2.abi3.so (0x159fb25a0) and /Users/calebgarfinkel/.pyenv/versions/3.10.6/envs/sign-game-server/lib/python3.10/site-packages/mediapipe/.dylibs/libopencv_videoio.3.4.16.dylib (0x134ac8860). One of the two will be used. Which one is undefined.
objc[36673]: Class CVWindow is implemented in both /Users/calebgarfinkel/.pyenv/versions/3.10.6/envs/sign-game-server/lib/python3.10/site-packages/cv2/cv2.abi3.so (0x159fb25f0) and /Users/calebgarfinkel/.pyenv/versions/3.10.6/envs/sign-game-server/lib/python3.10/site-packages/mediapipe/.dylibs/libopencv_highgui.3.4.16.dylib (0x10fca0a68). One of the two will be used. Which one is undefined.
objc[36673]: Class CVView is implemented in both /Users/calebgarfinkel/.pyenv/versions/3.10.6/envs/sign-game-server/lib/python3.10/site-packages/cv2/cv2.abi3.so (0x159fb2618) and /Users/calebgarfinkel/.pye

## 2. Keypoints using MP Holistic

In [3]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities 

In [4]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # CV2 gets image as BGR, this converts it to RGB
    image.flags.writeable = False # Locks write on image so that nobody can change the image while we process
    results = model.process(image) # This uses mediapipe to detect
    image.flags.writeable = True # Unlocks write on image
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Converts it back to BGR
    return image, results

In [5]:
def draw_landmarks(image,results):
    mp_drawing.draw_landmarks(image,results.face_landmarks, mp_holistic.FACEMESH_CONTOURS)
    mp_drawing.draw_landmarks(image,results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
    mp_drawing.draw_landmarks(image,results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
    mp_drawing.draw_landmarks(image,results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)


## 3. Extract Keypoint Values

In [6]:
def extract_keypoints(results):
    right_hand = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(63,) # Changes from (33,4) to (132,)
    return right_hand

## 4. Setup Folders for Collection

In [7]:
DATA_PATH = os.path.join('MP_Data')

letters = np.array(list(string.ascii_uppercase)) # Action letters to predict

# For each LETTER we will have 30 videos
videos = 1

# Each video(sequence) will have 30 frames
frames_in_videos = 30

person = 'Caleb'



In [8]:
for letter in letters:
    for video in range(videos):
        try:
            os.makedirs(os.path.join(DATA_PATH,letter))
        except:
            pass

#this will create a structure such as :
#J
#--0
#----Frame0
#...
#----Frame29
#--1
#----Frame0
#...
#----Frame29


## 5. Collect Keypoint Values for Training and Testing
This is in case we want to create the dataset ourselves

In [None]:
cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    # NEW LOOP
    # Loop through letters
    for letter in letters:
        # Loop through videos
        for video in range(videos):
            # Loop through video frames
            
            for frame_num in range(frames_in_videos):


                # Read feed
                ret, frame = cap.read()

                # Make detections
                image, results = mediapipe_detection(frame, holistic)
                
                # NEW Apply wait logic
                if frame_num == 0: 
                    while True:
                        ret, frame = cap.read()
                        image = cv2.flip(frame,1)
                        cv2.putText(image, 'TESTING {}'.format(letter), (50,200), 
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                        cv2.putText(image, 'PRESS R to record letter', (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                        cv2.imshow('OpenCV Feed', image)
                        if cv2.waitKey(10) & 0xFF == ord('r'):
                            break
                    image = cv2.flip(frame,1)
                    cv2.putText(image, 'LETTER {}'.format(letter), (10,200), 
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                    cv2.putText(image, 'Collecting frames for {} '.format(letter), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    # Show to screen
                    
                    cv2.imshow('OpenCV Feed', image)
                    cv2.waitKey(10)
                
                else: 
                    image = cv2.flip(frame,1)
                    cv2.putText(image, 'LETTER {}'.format(letter), (10,200), 
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                    cv2.putText(image, 'Collecting frames for {} '.format(letter), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    # Show to screen
                    cv2.imshow('OpenCV Feed', image)
                
                    # NEW Export keypoints
                    keypoints = extract_keypoints(results)
                    file_name = person+"_"+str(frame_num)
                    npy_path = os.path.join(DATA_PATH, letter,file_name)
                    np.save(npy_path, keypoints)

                # Break gracefully
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break
                    
    cap.release()
    cv2.destroyAllWindows()

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [None]:
cap.release()
cv2.destroyAllWindows()

In [63]:
header = "WRIST_X,WRIST_Y,WRIST_Z,THUMB_CMC_X,THUMB_CMC_Y,THUMB_CMC_Z,THUMB_MCP_X,THUMB_MCP_Y,THUMB_MCP_Z,THUMB_IP_X,THUMB_IP_Y,THUMB_IP_Z,THUMB_TIP_X,THUMB_TIP_Y,THUMB_TIP_Z,INDEX_FINGER_MCP_X,INDEX_FINGER_MCP_Y,INDEX_FINGER_MCP_Z,INDEX_FINGER_PIP_X,INDEX_FINGER_PIP_Y,INDEX_FINGER_PIP_Z,INDEX_FINGER_DIP_X,INDEX_FINGER_DIP_Y,INDEX_FINGER_DIP_Z,INDEX_FINGER_TIP_X,INDEX_FINGER_TIP_Y,INDEX_FINGER_TIP_Z,MIDDLE_FINGER_MCP_X,MIDDLE_FINGER_MCP_Y,MIDDLE_FINGER_MCP_Z,MIDDLE_FINGER_PIP_X,MIDDLE_FINGER_PIP_Y,MIDDLE_FINGER_PIP_Z,MIDDLE_FINGER_DIP_X,MIDDLE_FINGER_DIP_Y,MIDDLE_FINGER_DIP_Z,MIDDLE_FINGER_TIP_X,MIDDLE_FINGER_TIP_Y,MIDDLE_FINGER_TIP_Z,RING_FINGER_MCP_X,RING_FINGER_MCP_Y,RING_FINGER_MCP_Z,RING_FINGER_PIP_X,RING_FINGER_PIP_Y,RING_FINGER_PIP_Z,RING_FINGER_DIP_X,RING_FINGER_DIP_Y,RING_FINGER_DIP_Z,RING_FINGER_TIP_X,RING_FINGER_TIP_Y,RING_FINGER_TIP_Z,PINKY_MCP_X,PINKY_MCP_Y,PINKY_MCP_Z,PINKY_PIP_X,PINKY_PIP_Y,PINKY_PIP_Z,PINKY_DIP_X,PINKY_DIP_Y,PINKY_DIP_Z,PINKY_TIP_X,PINKY_TIP_Y,PINKY_TIP_Z,TARGET,PATH".split(',')
header

['WRIST_X',
 'WRIST_Y',
 'WRIST_Z',
 'THUMB_CMC_X',
 'THUMB_CMC_Y',
 'THUMB_CMC_Z',
 'THUMB_MCP_X',
 'THUMB_MCP_Y',
 'THUMB_MCP_Z',
 'THUMB_IP_X',
 'THUMB_IP_Y',
 'THUMB_IP_Z',
 'THUMB_TIP_X',
 'THUMB_TIP_Y',
 'THUMB_TIP_Z',
 'INDEX_FINGER_MCP_X',
 'INDEX_FINGER_MCP_Y',
 'INDEX_FINGER_MCP_Z',
 'INDEX_FINGER_PIP_X',
 'INDEX_FINGER_PIP_Y',
 'INDEX_FINGER_PIP_Z',
 'INDEX_FINGER_DIP_X',
 'INDEX_FINGER_DIP_Y',
 'INDEX_FINGER_DIP_Z',
 'INDEX_FINGER_TIP_X',
 'INDEX_FINGER_TIP_Y',
 'INDEX_FINGER_TIP_Z',
 'MIDDLE_FINGER_MCP_X',
 'MIDDLE_FINGER_MCP_Y',
 'MIDDLE_FINGER_MCP_Z',
 'MIDDLE_FINGER_PIP_X',
 'MIDDLE_FINGER_PIP_Y',
 'MIDDLE_FINGER_PIP_Z',
 'MIDDLE_FINGER_DIP_X',
 'MIDDLE_FINGER_DIP_Y',
 'MIDDLE_FINGER_DIP_Z',
 'MIDDLE_FINGER_TIP_X',
 'MIDDLE_FINGER_TIP_Y',
 'MIDDLE_FINGER_TIP_Z',
 'RING_FINGER_MCP_X',
 'RING_FINGER_MCP_Y',
 'RING_FINGER_MCP_Z',
 'RING_FINGER_PIP_X',
 'RING_FINGER_PIP_Y',
 'RING_FINGER_PIP_Z',
 'RING_FINGER_DIP_X',
 'RING_FINGER_DIP_Y',
 'RING_FINGER_DIP_Z',
 'RING_FINGER

## 6. Preprocess Data and Create Labels and Features

In [50]:
import pandas as pd

for letter in os.listdir(DATA_PATH):content
    for file in os.listdir(os.path.join(DATA_PATH, letter)):
        content = np.load(os.path.join(DATA_PATH, letter,"{}".format(file)))
        df_elem = pd.DataFrame([])
        df_elem['TARGET'] = letter
        print(df_elem)



      0 TARGET
0   0.0      A
1   0.0      A
2   0.0      A
3   0.0      A
4   0.0      A
..  ...    ...
58  0.0      A
59  0.0      A
60  0.0      A
61  0.0      A
62  0.0      A

[63 rows x 2 columns]
               0 TARGET
0   4.046807e-01      A
1   6.898316e-01      A
2  -5.085602e-07      A
3   4.906912e-01      A
4   6.467514e-01      A
..           ...    ...
58  4.584200e-01      A
59 -3.499483e-02      A
60  3.924883e-01      A
61  4.916437e-01      A
62 -1.026652e-02      A

[63 rows x 2 columns]
               0 TARGET
0   3.694676e-01      A
1   6.832043e-01      A
2  -4.937486e-07      A
3   4.631427e-01      A
4   6.399192e-01      A
..           ...    ...
58  4.319233e-01      A
59 -3.229539e-02      A
60  3.639710e-01      A
61  4.678457e-01      A
62 -3.691707e-03      A

[63 rows x 2 columns]
               0 TARGET
0   3.674173e-01      A
1   6.825240e-01      A
2  -5.258332e-07      A
3   4.606263e-01      A
4   6.418245e-01      A
..           ...    ...
58  4.3

In [None]:
inputs, targets = [], []
for letter in letters:
    for video in range(videos):
        window = []
        for frame_num in range(frames_in_videos):
            res = np.load(os.path.join(DATA_PATH, letter, str(video), "{}.npy".format(frame_num)))
            window.append(res)
        inputs.append(window)
        targets.append(label_map[letter])


In [None]:
len(inputs),len(inputs[1]) #each input is an video of 30 frames and there are 60 videos, 30 for J and 30 for Z

In [None]:
type(to_categorical(targets[0])), type(to_categorical(targets[0]).astype(int))

In [None]:
X = np.array(inputs)

In [None]:
y = to_categorical(targets).astype(int) # Converts to OHE

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

## 7. Build and Train LSTM Neural Network

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

In [None]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

es = EarlyStopping(patience=20, restore_best_weights=True)

In [None]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,63)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(letters.shape[0], activation='softmax'))

In [None]:
# letters[np.argmax(res)]

In [None]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [None]:
model.fit(X_train, y_train, epochs=2000, callbacks=[tb_callback, es])

In [None]:
model.summary()

## 8. Make Predictions

In [None]:
res = model.predict(X_test)

In [None]:
letters[np.argmax(res[4])]

In [None]:
letters[np.argmax(y_test[4])]

In [None]:
# Test result!!!

## 9. Save Weights

## 10. Evaluation using Confusion Matrix and Accuracy

## 11. Test in Real Time