In [8]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv3D, LSTM, Dense,idirectional, Dropout, Bidirectional, MaxPooling3D, Activation, Reshape, SpatialDropout3D, BatchNormalization, TimeDistributed, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score

2024-06-01 15:12:42.826680: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-01 15:12:42.826794: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-01 15:12:42.992333: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
# Define paths and actions
actions = np.array(['MoveRight', 'MoveLeft', 'ThumbsUp', 'ThumbsDown', 'Stop', 'Circle', 'FistClose', 'Victory'])
data_path = "/kaggle/input/hand-gesture-recogniton/Action_Recognition" 
no_sequences = 100
sequence_length = 60
featudjust this based on the actual features per frame
num_classes = len(actions)

In [3]:
print(f"Actions: {actions}")
print(f"Number of classes: {num_classes}")
print(f"Data path: {data_path}")

Actions: ['MoveRight' 'MoveLeft' 'ThumbsUp' 'ThumbsDown' 'Stop' 'Circle'
 'FistClose' 'Victory']
Number of classes: 8
Data path: /kaggle/input/hand-gesture-recogniton/Action_Recognition


In [4]:
# Map each action to an integer
label_map = {label: num for num, label in enumerate(actions)}
print("Label map:", label_map)

Label map: {'MoveRight': 0, 'MoveLeft': 1, 'ThumbsUp': 2, 'ThumbsDown': 3, 'Stop': 4, 'Circle': 5, 'FistClose': 6, 'Victory': 7}


In [6]:
import os
# Load data
sequences, labels = [], []
for action in actions:
    for sequence in range(no_sequences):
        window = []
        for frame_num in range(sequence_length):
            frame_path = os.path.join(data_path, action, str(sequence), f"{frame_num}.npy")
            res = np.load(frame_path)
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])
    print(f"Loaded data for action: {action}")

Loaded data for action: MoveRight
Loaded data for action: MoveLeft
Loaded data for action: ThumbsUp
Loaded data for action: ThumbsDown
Loaded data for action: Stop
Loaded data for action: Circle
Loaded data for action: FistClose
Loaded data for action: Victory


In [9]:
# Convert labels to one-hot encoding and lists to numpy arrays
labels = to_categorical(labels, num_classes=num_classes)
sequences = np.array(sequences)

In [10]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(sequences, labels, test_size=0.05, random_state=42)
print(f"X_train shape: {X_train.shape}, X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}, y_test shape: {y_test.shape}")

X_train shape: (760, 60, 1662), X_test shape: (40, 60, 1662)
y_train shape: (760, 8), y_test shape: (40, 8)


In [12]:
model = Sequential()

model.add(Reshape((60, 1, 1662, 1), input_shape=(60, 1662, 1)))

# 3D Convolutional layers
model.add(Conv3D(filters=64, kernel_size=(3, 1, 3), activation='relu', padding='same', input_shape=(60, 1, 1662, 1)))
model.add(MaxPooling3D(pool_size=(2, 1, 2)))
model.add(Dropout(0.5))

model.add(Conv3D(filters=64, kernel_size=(3, 1, 3), activation='relu', padding='same'))
model.add(MaxPooling3D(pool_size=(2, 1, 2)))
model.add(Conv3D(filters=128, kernel_size=(3, 1, 3), activation='relu', padding='same'))
model.add(Dropout(0.2))

model.add(TimeDistributed(Flatten()))

# LSTM layers to capture temporal dependencies
model.add(Bidirectional(LSTM(128, return_sequences=True)))
model.add(Dropout(0.2))
model.add(Bidirectional(LSTM(64)))
model.add(Dropout(0.5))

# Fully connected layers
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(8, activation='softmax'))
print("Model has been built.")

Model has been built.


In [13]:
# Compile the model
optimizer = Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
print("Model compilation complete.")

Model compilation complete.


In [14]:
# Callbacks
tb_callback = TensorBoard(log_dir='./logs', update_freq='batch')
early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
print("Callbacks have been set.")

Callbacks have been set.


In [15]:
# Train the model
print("Starting model training...")
model.fit(X_train, y_train, validation_data=(X_test, y_test),
          epochs=100, callbacks=[early_stopping, tb_callback])
print("Model training complete.")

Starting model training...
Epoch 1/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 678ms/step - categorical_accuracy: 0.1160 - loss: 2.1219 - val_categorical_accuracy: 0.1000 - val_loss: 2.0859
Epoch 2/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 406ms/step - categorical_accuracy: 0.1174 - loss: 2.0898 - val_categorical_accuracy: 0.1750 - val_loss: 2.0772
Epoch 3/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 397ms/step - categorical_accuracy: 0.1248 - loss: 2.0780 - val_categorical_accuracy: 0.2250 - val_loss: 2.0779
Epoch 4/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 403ms/step - categorical_accuracy: 0.1545 - loss: 2.0774 - val_categorical_accuracy: 0.0500 - val_loss: 2.0796
Epoch 5/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 406ms/step - categorical_accuracy: 0.1287 - loss: 2.0770 - val_categorical_accuracy: 0.0500 - val_loss: 2.0793
Epoch 6/100
[1m24/24[0m [32m━━

In [16]:
# Prediction
print("Starting prediction...")
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)
print("Prediction complete.")

Starting prediction...
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 500ms/step
Prediction complete.


In [17]:
# Evaluation
accuracy = accuracy_score(y_true, y_pred_classes)
f1 = f1_score(y_true, y_pred_classes, average='weighted')
print(f'Accuracy: {accuracy}')
print(f'F1 Score: {f1}')

Accuracy: 0.725
F1 Score: 0.7039661319073084


In [18]:
# Selecting a few samples (e.g., 5 samples) from the test set for prediction
num_samples_to_predict = 5
sample_indices = np.random.choice(X_test.shape[0], num_samples_to_predict, replace=False)  # Randomly pick indices without replacement
sample_data = X_test[sample_indices]
sample_labels = y_test[sample_indices]

In [19]:
# Make predictions
sample_predictions = model.predict(sample_data)
sample_pred_classes = np.argmax(sample_predictions, axis=1)
sample_true_classes = np.argmax(sample_labels, axis=1)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 985ms/step


In [20]:
# Print the results
print("Sample predictions (class indices):", sample_pred_classes)
print("Actual labels (class indices):", sample_true_classes)

Sample predictions (class indices): [3 3 7 2 0]
Actual labels (class indices): [6 3 6 2 0]


In [21]:
# Map indices back to action names for clearer interpretation
predicted_actions = [actions[idx] for idx in sample_pred_classes]
actual_actions = [actions[idx] for idx in sample_true_classes]

In [22]:
# Selecting a few samples (e.g., 5 samples) from the test set for prediction
num_samples_to_predict = 8
sample_indices = np.random.choice(X_test.shape[0], num_samples_to_predict, replace=False)  # Randomly pick indices without replacement
sample_data = X_test[sample_indices]
sample_labels = y_test[sample_indices]

In [23]:
# Make predictions
sample_predictions = model.predict(sample_data)
sample_pred_classes = np.argmax(sample_predictions, axis=1)
sample_true_classes = np.argmax(sample_labels, axis=1)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step


In [24]:
# Print the results
print("Sample predictions (class indices):", sample_pred_classes)
print("Actual labels (class indices):", sample_true_classes)

Sample predictions (class indices): [2 4 3 2 5 7 3 7]
Actual labels (class indices): [2 6 3 7 5 6 3 6]


In [25]:
# Map indices back to action names for clearer interpretation
predicted_actions = [actions[idx] for idx in sample_pred_classes]
actual_actions = [actions[idx] for idx in sample_true_classes]

In [26]:
print("Predicted actions:", predicted_actions)
print("Actual actions:", actual_actions)

Predicted actions: ['ThumbsUp', 'Stop', 'ThumbsDown', 'ThumbsUp', 'Circle', 'Victory', 'ThumbsDown', 'Victory']
Actual actions: ['ThumbsUp', 'FistClose', 'ThumbsDown', 'Victory', 'Circle', 'FistClose', 'ThumbsDown', 'FistClose']
