In [196]:
import cv2
import numpy as np
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import TensorBoard
import mediapipe as mp



In [None]:
# MediaPipe Hands setup
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5)

DATA_PATH = os.path.join('abc')
sequence_length = 30
actions = np.array(['A','B','C'])

In [198]:
def extract_landmarks(video_path):
    cap = cv2.VideoCapture(video_path)
    landmarks_sequence = []
    
    while len(landmarks_sequence) < sequence_length:
        ret, frame = cap.read()
        if not ret:
            break
        
        # Convert the BGR image to RGB
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(rgb_frame)
        
        if results.multi_hand_landmarks:
            hand_landmarks = results.multi_hand_landmarks[0]
            landmarks = [[lm.x, lm.y] for lm in hand_landmarks.landmark]
            landmarks_flattened = [coord for landmark in landmarks for coord in landmark]
            landmarks_sequence.append(landmarks_flattened)
    
    cap.release()
    
    # Pad the sequence if it's shorter than sequence_length
    while len(landmarks_sequence) < sequence_length:
        landmarks_sequence.append([0] * (21 * 2))  # 21 landmarks, x and y for each
    
    return landmarks_sequence[:sequence_length]

In [204]:
np.array(extract_landmarks('abc/A/10800A0911.mp4')).shape

(30, 42)

In [4]:
# Collect data
sequences, labels = [], []
for action in actions:
    action_path = os.path.join(DATA_PATH, action)
    
    # Get all video files in the action folder
    video_files = [f for f in os.listdir(action_path) if f.endswith(('.mp4', '.avi', '.MOV'))]
    
    for video_file in video_files:
        video_path = os.path.join(action_path, video_file)
        window = extract_landmarks(video_path)
        sequences.append(window)
        labels.append(actions.tolist().index(action))

# Convert to numpy arrays
X = np.array(sequences)
y = to_categorical(labels).astype(int)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)

print(f"Training data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")

# Save the processed data
np.save("X_train.npy", X_train)
np.save("X_test.npy", X_test)
np.save("y_train.npy", y_train)
np.save("y_test.npy", y_test)

print("Data processing completed and saved.")



Training data shape: (53, 30, 42)
Testing data shape: (3, 30, 42)
Data processing completed and saved.


In [98]:
actions=np.array(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'ADIOS',
       'B', 'C', 'D', 'DISCULPA', 'E', 'F', 'G', 'GRACIAS', 'H',
       'HERMANA', 'HOLA', 'I', 'J', 'K', 'L', 'LL', 'M', 'MAMA', 'N',
       'NN', 'O', 'P', 'PAPA', 'PORFAVOR', 'Q', 'R', 'S', 'T', 'U', 'V',
       'W', 'X', 'Y', 'Z'])
len(actions)

46

In [100]:
X_test = np.load('X_test.npy')
X_train = np.load('X_train.npy')
y_test = np.load('y_test.npy')
y_train = np.load('y_train.npy')

In [144]:
# Build and train LSTM model
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,42)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))
model.load_weights('hand_action (1).h5')

  super().__init__(**kwargs)


In [120]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])
model.fit(X_train, y_train, epochs=200, callbacks=[TensorBoard(log_dir=os.path.join('logs'))])

# Save the model
model.save('final_model.h5')

Epoch 1/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 25ms/step - categorical_accuracy: 0.8109 - loss: 0.4901
Epoch 2/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - categorical_accuracy: 0.8577 - loss: 0.3064
Epoch 3/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - categorical_accuracy: 0.8587 - loss: 0.3099
Epoch 4/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - categorical_accuracy: 0.8207 - loss: 0.4859
Epoch 5/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step - categorical_accuracy: 0.8440 - loss: 0.3386
Epoch 6/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - categorical_accuracy: 0.8711 - loss: 0.2655
Epoch 7/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step - categorical_accuracy: 0.8731 - loss: 0.2856
Epoch 8/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 



In [146]:
# Evaluation
yhat = model.predict(X_test)
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()
print(accuracy_score(ytrue, yhat))

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 201ms/step
0.8115942028985508


In [148]:
model.outputs


[<KerasTensor shape=(None, 46), dtype=float32, sparse=False, name=keras_tensor_294>]

In [158]:
# Load the Keras model from the .h5 file
model = tf.keras.models.load_model("final_model.h5")

converter = tf.lite.TFLiteConverter.from_keras_model(model)

# Enable TF Select ops
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS,  # Use TensorFlow Lite's built-in ops
    tf.lite.OpsSet.SELECT_TF_OPS     # Use TensorFlow ops for unsupported ops
]

# Disable lowering tensor list ops

# Convert the model
tflite_model = converter.convert()

# Save the TFLite model to a file
with open("lstm_model.tflite", "wb") as f:
    f.write(tflite_model)



INFO:tensorflow:Assets written to: C:\Users\DHO_d\AppData\Local\Temp\tmpcv0gr00x\assets


INFO:tensorflow:Assets written to: C:\Users\DHO_d\AppData\Local\Temp\tmpcv0gr00x\assets


Saved artifact at 'C:\Users\DHO_d\AppData\Local\Temp\tmpcv0gr00x'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 30, 42), dtype=tf.float32, name='input_layer_5')
Output Type:
  TensorSpec(shape=(None, 46), dtype=tf.float32, name=None)
Captures:
  2288395112464: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2288395119376: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2288417216784: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2288417222736: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2288417215056: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2288417216208: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2288417217552: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2288417217360: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2288417221008: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2288417218896: TensorSpec(shape=(), dtype=tf.resource, name=None)
  22884172

In [160]:
interpreter = tf.lite.Interpreter(model_path="lstm_model.tflite")

In [162]:
# Allocate tensors
interpreter.allocate_tensors()

# Get input and output details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Print input and output details to check shapes and types
print("Input details:", input_details)
print("Output details:", output_details)

Input details: [{'name': 'serving_default_input_layer_5:0', 'index': 0, 'shape': array([ 1, 30, 42]), 'shape_signature': array([-1, 30, 42]), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]
Output details: [{'name': 'StatefulPartitionedCall_1:0', 'index': 64, 'shape': array([ 1, 46]), 'shape_signature': array([-1, 46]), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]


In [190]:

# Define a function to run inference on a single input
def predict_tflite(interpreter, input_data):
    interpreter.set_tensor(input_details[0]['index'], input_data)
    interpreter.invoke()
    output = interpreter.get_tensor(output_details[0]['index'])
    return output

# Assuming X_test and y_test are your test dataset and labels
# If y_test is one-hot encoded, convert it to class indices
if len(y_test.shape) > 1:  # Check if y_test is one-hot encoded
    y_test = np.argmax(y_test, axis=1)

# Running inference and getting predictions
y_pred = []
for i in range(len(X_test)):
    input_data = np.float32(np.expand_dims(X_test[i], axis=0))  # Add batch dimension
    prediction = predict_tflite(interpreter, input_data)
    y_pred.append(np.argmax(prediction, axis=1)[0])  # Get the predicted class

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 0.9130434782608695
