# 1. Import and Install Dependencies

In [3]:
 # !pip install tensorflow[and-cuda] opencv-python mediapipe scikit-learn matplotlib

In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

# 2. Identifying Keypoints through MP Holistic Library

In [2]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [3]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [4]:
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION) # Draw face connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Draw pose connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw right hand connections

In [5]:
def draw_styled_landmarks(image, results):
    # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             ) 
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

In [6]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

# 3. Setting Up Folders for Data Collection (Rerun to Add Additional Datasets)

In [101]:
# Path for exported data, numpy arrays
DATA_PATH = os.path.join('MP_Data') 

# Actions that we try to detect
actions = np.array(['nice', 'meet', 'you', 'me'])

# Thirty videos worth of data
# no_sequences = 10

# Videos are going to be 30 frames in length
sequence_length = 20

In [102]:
no_sequences = {}
for action in actions:
    action_path = os.path.join(DATA_PATH, action)
    if not os.path.exists(action_path):
            no_sequences[action] = 0 
    else:
            existing_folders = [name for name in os.listdir(action_path) if os.path.isdir(os.path.join(action_path, name))]
            no_sequences[action] = len(existing_folders)




###  Number of data collections per session

In [1]:
image_collection = 5

In [104]:
for action in actions:
    # Loop through sequences aka videos
    for sequence in range(no_sequences[action], no_sequences[action] + image_collection):
        sequence_path = os.path.join(DATA_PATH, action, str(sequence))

        # Make sure the directory exists in the current working directory
        try:
            os.makedirs(sequence_path, exist_ok=True)
            print(f"Directory '{sequence_path}' created.")
        except Exception as e:
            print(f"Error creating directory '{sequence_path}': {e}")
        else:
            print(f"Directory '{sequence_path}' already exists. Skipped.")

Directory 'MP_Data\nice\0' created.
Directory 'MP_Data\nice\0' already exists. Skipped.
Directory 'MP_Data\nice\1' created.
Directory 'MP_Data\nice\1' already exists. Skipped.
Directory 'MP_Data\nice\2' created.
Directory 'MP_Data\nice\2' already exists. Skipped.
Directory 'MP_Data\nice\3' created.
Directory 'MP_Data\nice\3' already exists. Skipped.
Directory 'MP_Data\nice\4' created.
Directory 'MP_Data\nice\4' already exists. Skipped.
Directory 'MP_Data\meet\0' created.
Directory 'MP_Data\meet\0' already exists. Skipped.
Directory 'MP_Data\meet\1' created.
Directory 'MP_Data\meet\1' already exists. Skipped.
Directory 'MP_Data\meet\2' created.
Directory 'MP_Data\meet\2' already exists. Skipped.
Directory 'MP_Data\meet\3' created.
Directory 'MP_Data\meet\3' already exists. Skipped.
Directory 'MP_Data\meet\4' created.
Directory 'MP_Data\meet\4' already exists. Skipped.
Directory 'MP_Data\you\0' created.
Directory 'MP_Data\you\0' already exists. Skipped.
Directory 'MP_Data\you\1' created.

# 4. Collect Keypoint Values for Training and Testing

In [105]:
cap = cv2.VideoCapture(0)

# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    # NEW LOOP
    # Loop through actions
    for action in actions:
        # Loop through sequences aka videos
        for sequence in range(no_sequences[action], no_sequences[action] + image_collection):
            # Loop through video length aka sequence length
            for frame_num in range(sequence_length):

                # Read feed
                ret, frame = cap.read()

                # Make detections
                image, results = mediapipe_detection(frame, holistic)

                # Draw landmarks
                draw_styled_landmarks(image, results)
                
                # NEW Apply wait logic
                if frame_num == 0: 
                    cv2.putText(image, 'STARTING COLLECTION', (120,200), 
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    # Show to screen
                    cv2.imshow('OpenCV Feed', image)
                    cv2.waitKey(2000)
                else: 
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    # Show to screen
                    cv2.imshow('OpenCV Feed', image)
                
                # NEW Export keypoints
                keypoints = extract_keypoints(results)

                sequence_path = os.path.join(DATA_PATH, action, str(sequence))
                if not os.path.exists(sequence_path):
                    os.makedirs(sequence_path)
                
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
                np.save(npy_path, keypoints)

                # Break gracefully
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break
                    
    cap.release()
    cv2.destroyAllWindows()

In [106]:
cap.release()
cv2.destroyAllWindows()

# 6. Preprocess Data and Create Labels and Features

In [107]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [108]:
no_sequences = 0
for action in actions:
        action_path = os.path.join(DATA_PATH, action)
        existing_folders = [name for name in os.listdir(action_path) if os.path.isdir(os.path.join(action_path, name))]
        no_sequences = len(existing_folders)

print(no_sequences)

5


In [109]:
label_map = {label:num for num, label in enumerate(actions)}

In [110]:
label_map

{'nice': 0, 'meet': 1, 'you': 2, 'me': 3}

## Number of Data Sets to be Used 

In [2]:
# dataset_lenght = no_sequences

# Pake 3 dataset , soalnya kalo ditambahin overfitting?
dataset_lenght = 2

In [112]:
sequences, labels = [], []


for action in actions:
    for sequence in range(dataset_lenght):
        window = []
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

In [113]:
np.array(sequences).shape

(8, 20, 1662)

In [114]:
np.array(labels).shape

(8,)

In [115]:
X = np.array(sequences)

In [116]:
X.shape

(8, 20, 1662)

In [117]:
y = to_categorical(labels).astype(int)

In [118]:
y

array([[1, 0, 0, 0],
       [1, 0, 0, 0],
       [0, 1, 0, 0],
       [0, 1, 0, 0],
       [0, 0, 1, 0],
       [0, 0, 1, 0],
       [0, 0, 0, 1],
       [0, 0, 0, 1]])

In [119]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)

In [120]:
y_test.shape

(1, 4)

# 7. Build and Train LSTM Neural Network

In [128]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.models import load_model

In [129]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [130]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(20, 1662)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

In [131]:
res = [.7, 0.2, 0.1]

In [132]:
actions[np.argmax(res)]

'nice'

In [133]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])
# model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [134]:
model.fit(X_train, y_train, epochs=1000, callbacks=[tb_callback])

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.src.callbacks.History at 0x1a53b9d8580>

In [135]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_9 (LSTM)               (None, 20, 64)            442112    
                                                                 
 lstm_10 (LSTM)              (None, 20, 128)           98816     
                                                                 
 lstm_11 (LSTM)              (None, 64)                49408     
                                                                 
 dense_9 (Dense)             (None, 64)                4160      
                                                                 
 dense_10 (Dense)            (None, 32)                2080      
                                                                 
 dense_11 (Dense)            (None, 4)                 132       
                                                                 
Total params: 596708 (2.28 MB)
Trainable params: 59670

# 9. Save Weights

In [136]:
model.save('action.h5')

  saving_api.save_model(


In [137]:
model = load_model('action.h5')

# 10. Evaluation using Confusion Matrix and Accuracy

In [138]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [139]:
yhat = model.predict(X_test)



In [140]:
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

In [141]:
multilabel_confusion_matrix(ytrue, yhat)

array([[[0, 0],
        [0, 1]]], dtype=int64)

In [142]:
accuracy_score(ytrue, yhat)

1.0

# 11. Test in Real Time

In [143]:
colors = [(245, 117, 16), (117, 245, 16), (16, 117, 245), (255, 0, 0), (0, 255, 0)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame

In [144]:
import pygame

def play_audio_for_word(word):
    pygame.mixer.init()
    pygame.mixer.music.load(f'./audio/{word}.wav')
    pygame.mixer.music.play()



In [145]:
# 1. New detection variables
sequence = []
sentence = []
threshold = 0.8

cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
       
        
        # Draw landmarks
        draw_styled_landmarks(image, results)
        
        # 2. Prediction logic
        keypoints = extract_keypoints(results)
#      sequence.insert(0,keypoints)
#       sequence = sequence[:30]
        sequence.append(keypoints)
        sequence = sequence[-30:]
            
        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            print(actions[np.argmax(res)])
            
            
        #3. Viz logic
            if res[np.argmax(res)] > threshold: 
                if len(sentence) > 0: 
                    if actions[np.argmax(res)] != sentence[-1]:
                        sentence.append(actions[np.argmax(res)])
                        play_audio_for_word(actions[np.argmax(res)])

                else:
                    sentence.append(actions[np.argmax(res)])
                    play_audio_for_word(actions[np.argmax(res)])


            if len(sentence) > 5: 
                sentence = sentence[-5:]

            # Viz probabilities
            image = prob_viz(res, actions, image, colors)


    
            
        cv2.rectangle(image, (0,0), (640, 40), (0,0,0), -1)
        cv2.putText(image, ' '.join(sentence), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Show to screen
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
you
you
me
me
me
nice
nice
meet
me
nice
me
me
me
meet
you
me
you
me
meet
nice
you
you
meet
meet
nice
meet
nice
you
meet
nice
nice
you
you
nice
nice
meet
nice
nice
nice
meet
meet
meet
meet
meet
meet
nice
nice
meet
meet
meet
nice
meet
meet
meet
meet
meet
meet
meet
meet
meet
nice
meet
nice
nice
nice
nice
meet
nice
meet
nice
meet
meet
meet
meet
meet
nice
nice
meet
meet
you
you
you
nice
nice
meet
meet
meet
meet
meet
meet
meet
meet
meet
meet
meet
meet
meet
meet
meet
nice
meet
me
me
me
me
me
me
me
meet
meet
meet
meet
me
me
me
me
me
me
me
me
me
meet
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
meet
me
me
me
me
me
me
me
me
me
me
me
me
me
me
meet
me
me
me
me
me
me
me
me
me
me
me
me
meet
meet
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me
me


In [113]:
cap.release()
cv2.destroyAllWindows()

In [114]:
res[np.argmax(res)] > threshold

False

In [115]:
(num_sequences,30,1662)

NameError: name 'num_sequences' is not defined

In [131]:
model.predict(np.expand_dims(X_test[0], axis=0))



array([[0.32995152, 0.32562667, 0.34442183]], dtype=float32)