In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%pip install mediapipe opencv-python numpy

Collecting mediapipe
  Downloading mediapipe-0.10.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
Collecting protobuf<5,>=4.25.3 (from mediapipe)
  Downloading protobuf-4.25.4-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.0-py3-none-any.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (35.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.9/35.9 MB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading protobuf-4.25.4-cp37-abi3-manylinux2014_x86_64.whl (294 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.6/294.6 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sounddevice-0.5.0-py3-none-any.whl (32 kB)
Installing collected packages: protobuf, sounddevice, mediapipe
  Attempting uninstall: protobuf
    Found existing installation: protobuf 3.20.3

In [None]:
import cv2
import numpy as np
import os
from glob import glob
import mediapipe as mp
import tensorflow as tf

load_model = tf.keras.models.load_model
LSTM, Dense, Input = tf.keras.layers.LSTM, tf.keras.layers.Dense, tf.keras.layers.Input
TensorBoard = tf.keras.callbacks.TensorBoard
Sequential = tf.keras.models.Sequential

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

In [None]:
import os
DATASET = '/content/drive/MyDrive/dataset/'

for idx, action in enumerate(os.listdir(DATASET)):
    if len(os.listdir(os.path.join(DATASET, action))) == 0:
      print(f"{idx + 1}. {action}: {len(os.listdir(os.path.join(DATASET, action)))} videos")

In [None]:
def extract_keypoints(frame):
    with mp_hands.Hands(
        static_image_mode=False,
        max_num_hands=2,
        min_detection_confidence=0.5,
    ) as hands:
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(frame)
        if results.multi_hand_landmarks:
            hand_landmarks = results.multi_hand_landmarks[0]
            keypoints = np.array([[landmark.x, landmark.y, landmark.z]
                                 for landmark in hand_landmarks.landmark]).flatten()
            return keypoints
        else:
            return np.zeros(21*3)

In [None]:
DATA_PATH = os.path.join('/content/drive/MyDrive/MP_DATA')
actions = [folder for folder in os.listdir('/content/drive/MyDrive/dataset/') if os.path.isdir(os.path.join('/content/drive/MyDrive/dataset/', folder))]
sequence_length = 20

for action in actions:
    video_paths = glob(os.path.join('/content/drive/MyDrive/dataset/', action, '*.MOV')) + \
                  glob(os.path.join('/content/drive/MyDrive/dataset/', action, '*.MP4'))
    print(f"Starting with {action}, found {len(video_paths)} videos.")

    for idx, video_path in enumerate(video_paths):
        cap = cv2.VideoCapture(video_path)
        if not cap.isOpened():
            print(f"Error opening video file: {video_path}")
            continue

        frames_saved = 0
        for frame_num in range(sequence_length):
            ret, frame = cap.read()
            if not ret:
                print(f"End of video or read error at frame {frame_num} for video {video_path}")
                break

            keypoints = extract_keypoints(frame)
            output_dir = os.path.join(DATA_PATH, action, str(idx))

            if not os.path.exists(output_dir):
                os.makedirs(output_dir)

            npy_path = os.path.join(output_dir, f"{frame_num}.npy")
            np.save(npy_path, keypoints)
            frames_saved += 1

        cap.release()
        if frames_saved > 0:
            print(f"Processed {frames_saved} frames, [{idx + 1}/{len(video_paths)}]")
        else:
            print(f"No frames were saved for video {video_path}")

In [None]:
from sklearn.model_selection import train_test_split

to_categorical = tf.keras.utils.to_categorical

label_map = {label: num for num, label in enumerate(actions)}

sequences, labels = [], []
for action in actions:
    for video_path in os.listdir(os.path.join(DATA_PATH, action)):
        window = []
        for sequence in range(sequence_length):
            npy_path = os.path.join(DATA_PATH, action, video_path, f"{sequence}.npy")
            if os.path.exists(npy_path):
                keypoints = np.load(npy_path)
                window.append(keypoints)

        if len(window) == sequence_length:
            sequences.append(window)
            labels.append(label_map[action])
        else:
            print(f"Incomplete sequence for action '{action}' and sequence '{sequence}' skipped.")

X = np.array(sequences)
y = to_categorical(labels).astype(int)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=42)

print(f"X shape: {X.shape}, y shape: {y.shape}")
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

X shape: (28, 20, 63), y shape: (28, 2)
X_train shape: (26, 20, 63), y_train shape: (26, 2)
X_test shape: (2, 20, 63), y_test shape: (2, 2)


In [None]:
if os.path.exists('/content/drive/MyDrive/model/my_model.keras'):
    model = load_model('/content/drive/MyDrive/model/my_model.keras')
else:
  model = Sequential()
  model.add(Input(shape=(sequence_length, 21*3)))
  model.add(LSTM(64, return_sequences=True, activation='relu'))
  model.add(LSTM(128, return_sequences=True, activation='relu'))
  model.add(LSTM(64, activation='relu'))
  model.add(Dense(64, activation='relu'))
  model.add(Dense(32, activation='relu'))
  model.add(Dense(len(actions), activation='softmax'))

  model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

model.summary()

## Train Model

In [None]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

model.fit(X_train, y_train, epochs=2000, callbacks=[tb_callback])
model.save('content/drive/MyDrive/model/my_model.keras')

## Evaluation

In [None]:
model.evaluate(X_test, y_test)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 602ms/step - categorical_accuracy: 0.5000 - loss: 15.3072


[15.30715274810791, 0.5]

In [None]:
from sklearn.metrics import multilabel_confusion_matrix, classification_report

yhat = model.predict(X_test)
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

print(multilabel_confusion_matrix(ytrue, yhat))
print(classification_report(ytrue, yhat))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[[[1 0]
  [1 0]]

 [[0 1]
  [0 1]]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.50      1.00      0.67         1

    accuracy                           0.50         2
   macro avg       0.25      0.50      0.33         2
weighted avg       0.25      0.50      0.33         2



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Predict

In [None]:
def predict_action(sequence):
    res = model.predict(np.expand_dims(sequence, axis=0))[0]
    predicted_action = actions[np.argmax(res)]
    return predicted_action