In [23]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.models import load_model
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score


In [24]:
# Mediapipe Setup
mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic

In [25]:
# Define your subfolders and labels
subfolders = ['hello', 'goodbye', 'please', 'thankyou', 'yes', 'no']  # Example subfolders named '1' to '5'
DATA_PATH = "./actions"  # Path to the main folder containing subfolders
sequence_length = 30  # Number of frames per sequence

In [26]:
# Create a label map for the subfolders
label_map = {folder: num for num, folder in enumerate(subfolders)}
print("Label Map:", label_map)

Label Map: {'hello': 0, 'goodbye': 1, 'please': 2, 'thankyou': 3, 'yes': 4, 'no': 5}


In [27]:
# Initialize sequences and labels
sequences, labels = [], []

# Iterate through each subfolder and gather sequences with progress tracking
for folder in subfolders:
    folder_path = os.path.join(DATA_PATH, folder)
    
    # Process each video in the folder, ensuring only `.mp4` files are selected
    for video_file in tqdm([f for f in os.listdir(folder_path) if f.endswith('.mp4')], desc=f"Processing folder {folder}"):
        window = []
        missing_frames = False
        
        for frame_num in range(sequence_length):
            # Generate the expected `.npy` frame path
            frame_path = os.path.join(folder_path, f"{os.path.splitext(video_file)[0]}_{frame_num}.npy")
            try:
                # Check if the file exists and is not empty
                if os.path.exists(frame_path) and os.path.getsize(frame_path) > 0:
                    res = np.load(frame_path)
                    # Verify the shape to ensure it’s as expected
                    if res.shape[0] == 63:
                        window.append(res)
                    else:
                        print(f"Warning: Frame {frame_num} for {video_file} has unexpected shape {res.shape}.")
                        missing_frames = True
                        break
                else:
                    print(f"Warning: Frame {frame_num} for {video_file} in folder {folder} is missing or empty.")
                    missing_frames = True
                    break  # Skip incomplete sequences if necessary
            except Exception as e:
                print(f"Error loading frame {frame_num} for {video_file}: {e}")
                missing_frames = True
                break
        
        if not missing_frames:  # Only add complete sequences
            sequences.append(window)
            labels.append(label_map[folder])


Processing folder hello:   0%|          | 0/80 [00:00<?, ?it/s]

Processing folder hello: 100%|██████████| 80/80 [00:30<00:00,  2.63it/s]
Processing folder goodbye: 100%|██████████| 80/80 [00:26<00:00,  3.00it/s]
Processing folder please: 100%|██████████| 80/80 [00:23<00:00,  3.40it/s]
Processing folder thankyou: 100%|██████████| 80/80 [00:25<00:00,  3.16it/s]
Processing folder yes: 100%|██████████| 80/80 [00:25<00:00,  3.11it/s]
Processing folder no: 100%|██████████| 80/80 [00:31<00:00,  2.56it/s]


In [28]:
# Convert labels to categorical
labels = to_categorical(labels).astype(int)

In [29]:
np.array(sequences).shape

(480, 30, 63)

In [30]:
np.array(labels).shape

(480, 6)

In [31]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    np.array(sequences), 
    np.array(labels), 
    test_size=0.1, 
    stratify=np.array(labels)  # Use labels for stratified splitting
)

print(f"Data shapes - X_train: {X_train.shape}, X_test: {X_test.shape}, y_train: {y_train.shape}, y_test: {y_test.shape}")


Data shapes - X_train: (432, 30, 63), X_test: (48, 30, 63), y_train: (432, 6), y_test: (48, 6)


In [47]:
# Define logging directory
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

# Number of subfolders/classes
num_classes = len(subfolders)

# Adjust the input shape to match the data
input_shape = (sequence_length, 63)  # Updated for 126 features per frame (hand landmarks only)

# Define the model with corrected input shape
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=input_shape))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

In [48]:
# Compile the model
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])


In [49]:
# Train the model
model.fit(X_train, y_train, epochs=55, callbacks=[tb_callback])

Epoch 1/55
Epoch 2/55
Epoch 3/55
Epoch 4/55
Epoch 5/55
Epoch 6/55
Epoch 7/55
Epoch 8/55
Epoch 9/55
Epoch 10/55
Epoch 11/55
Epoch 12/55
Epoch 13/55
Epoch 14/55
Epoch 15/55
Epoch 16/55
Epoch 17/55
Epoch 18/55
Epoch 19/55
Epoch 20/55
Epoch 21/55
Epoch 22/55
Epoch 23/55
Epoch 24/55
Epoch 25/55
Epoch 26/55
Epoch 27/55
Epoch 28/55
Epoch 29/55
Epoch 30/55
Epoch 31/55
Epoch 32/55
Epoch 33/55
Epoch 34/55
Epoch 35/55
Epoch 36/55
Epoch 37/55
Epoch 38/55
Epoch 39/55
Epoch 40/55
Epoch 41/55
Epoch 42/55
Epoch 43/55
Epoch 44/55
Epoch 45/55
Epoch 46/55
Epoch 47/55
Epoch 48/55
Epoch 49/55
Epoch 50/55
Epoch 51/55
Epoch 52/55
Epoch 53/55
Epoch 54/55
Epoch 55/55


<keras.src.callbacks.History at 0x1ba67e60850>

In [50]:
# Display the model summary
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_12 (LSTM)              (None, 30, 64)            32768     
                                                                 
 lstm_13 (LSTM)              (None, 30, 128)           98816     
                                                                 
 lstm_14 (LSTM)              (None, 64)                49408     
                                                                 
 dense_12 (Dense)            (None, 64)                4160      
                                                                 
 dense_13 (Dense)            (None, 32)                2080      
                                                                 
 dense_14 (Dense)            (None, 6)                 198       
                                                                 
Total params: 187430 (732.15 KB)
Trainable params: 187

In [51]:
res = model.predict(X_test)



In [52]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")

Test Loss: 0.024570098146796227, Test Accuracy: 0.9791666865348816


In [53]:
# Make predictions on the test set
yhat = model.predict(X_test)
ytrue = np.argmax(y_test, axis=1)  # Convert one-hot encoded labels to class indices
yhat = np.argmax(yhat, axis=1)  # Get predicted class indices

# Initialize a dictionary to count correct predictions for each subfolder
correct_predictions_per_class = {label: 0 for label in label_map.values()}
total_videos_per_class = {label: 0 for label in label_map.values()}

# Count total and correct predictions for each class
for true_label, predicted_label in zip(ytrue, yhat):
    total_videos_per_class[true_label] += 1
    if true_label == predicted_label:
        correct_predictions_per_class[true_label] += 1

# Print the results
for folder, label in label_map.items():
    total = total_videos_per_class[label]
    correct = correct_predictions_per_class[label]
    print(f"Class {folder} (label {label}): Correctly Recognized {correct}/{total} videos ({(correct / total) * 100:.2f}% accuracy)")


Class hello (label 0): Correctly Recognized 7/8 videos (87.50% accuracy)
Class goodbye (label 1): Correctly Recognized 8/8 videos (100.00% accuracy)
Class please (label 2): Correctly Recognized 8/8 videos (100.00% accuracy)
Class thankyou (label 3): Correctly Recognized 8/8 videos (100.00% accuracy)
Class yes (label 4): Correctly Recognized 8/8 videos (100.00% accuracy)
Class no (label 5): Correctly Recognized 8/8 videos (100.00% accuracy)


In [54]:
model.save('action.h5')

  saving_api.save_model(


In [55]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")


Test Loss: 0.024570098146796227, Test Accuracy: 0.9791666865348816


In [56]:
# Calculate multilabel confusion matrix and accuracy
yhat = model.predict(X_test)
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()
print("Confusion Matrix:\n", multilabel_confusion_matrix(ytrue, yhat))
print("Accuracy Score:", accuracy_score(ytrue, yhat))

Confusion Matrix:
 [[[40  0]
  [ 1  7]]

 [[40  0]
  [ 0  8]]

 [[40  0]
  [ 0  8]]

 [[40  0]
  [ 0  8]]

 [[39  1]
  [ 0  8]]

 [[40  0]
  [ 0  8]]]
Accuracy Score: 0.9791666666666666
