In [9]:
# import os
# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # Suppress INFO and WARNING logs
import tensorflow as tf
print("TensorFlow version:", tf.__version__)

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPU memory growth enabled")
    except RuntimeError as e:
        print(e)

TensorFlow version: 2.13.0


In [11]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    print("Physical GPUs found:", len(gpus))
else:
    print("No GPUs found")

No GPUs found


In [13]:
logical_gpus = tf.config.experimental.list_logical_devices('GPU')
print("Logical GPUs:", len(logical_gpus))

Logical GPUs: 0


In [15]:
import tensorflow as tf

print("Is GPU Available: ", tf.test.is_gpu_available())

gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
  print("Name:", gpu.name, "  Type:", gpu.device_type)

a = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
b = tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
c = tf.matmul(a, b)

print(c)

Is GPU Available:  False
tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32)


In [3]:
from tensorflow.keras.mixed_precision import set_global_policy
set_global_policy('mixed_float16')

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 4070 Laptop GPU, compute capability 8.9


In [5]:
import shutil
import os

# Define paths
parent_path = "D:/RNN/GestureRecognition/NewlyExtracted"
nested_path = os.path.join(parent_path, "Project_data")

# Check if nested path exists
if os.path.exists(nested_path):
    # Move all files and directories from the nested folder to the parent folder
    for item in os.listdir(nested_path):
        source = os.path.join(nested_path, item)
        destination = os.path.join(parent_path, item)
        shutil.move(source, destination)

    # Remove the now-empty nested folder
    os.rmdir(nested_path)

print(f"Contents of {parent_path}:")
print(os.listdir(parent_path))

Contents of D:/RNN/GestureRecognition/NewlyExtracted:
['combined_dataset', 'split_dataset', 'train', 'train.csv', 'train_fixed.csv', 'val', 'val.csv', 'val_fixed.csv']


In [7]:
import numpy as np
import tensorflow as tf
import random

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

In [15]:
import tensorflow as tf
import os
import pandas as pd
class GestureDataLoader(tf.keras.utils.Sequence):
    def __init__(self, csv_path, batch_size, sequence_length, frame_size, num_classes):
        super().__init__()  # Call the parent class constructor to handle additional arguments
        self.data = pd.read_csv(csv_path)
        self.batch_size = batch_size
        self.sequence_length = sequence_length
        self.frame_size = frame_size
        self.num_classes = num_classes

    def __len__(self):
        # Return the number of batches
        return len(self.data) // self.batch_size

    def __getitem__(self, index):
        # Generate indices for the batch
        batch_indices = self.data.iloc[index * self.batch_size:(index + 1) * self.batch_size]
        return self.__data_generation(batch_indices)

    def __data_generation(self, batch_indices):
        # Initialize storage for the batch
        X = np.empty((self.batch_size, self.sequence_length, *self.frame_size, 3))  # RGB frames
        y = np.empty((self.batch_size, self.num_classes))

        for i, row in enumerate(batch_indices.iterrows()):
            file_path = row[1]['file_path']
            label = row[1]['gesture_label']

            # Load sequence of frames
            folder_path = os.path.dirname(file_path)
            X[i] = self._load_sequence(folder_path)

            # One-hot encode label
            y[i] = tf.keras.utils.to_categorical(int(label), num_classes=self.num_classes)

        return X, y

    def _load_sequence(self, folder_path):
        sequence = []
        if not os.path.isdir(folder_path):  # Ensure it's a directory
            raise ValueError(f"Expected directory, got: {folder_path}")

        files = sorted(os.listdir(folder_path))[:self.sequence_length]
        for file_name in files:
            img_path = os.path.join(folder_path, file_name)
            img = tf.keras.utils.load_img(img_path, target_size=self.frame_size)
            img = tf.keras.utils.img_to_array(img)
            sequence.append(img)
        return np.array(sequence)

In [51]:
BASE_PATH = "D:/RNN/GestureRecognition/NewlyExtracted"
train_csv_path = f"{BASE_PATH}/train.csv"
val_csv_path = f"{BASE_PATH}/val.csv"

train_loader = GestureDataLoader(train_csv_path, batch_size=4, sequence_length=30, frame_size=(64, 64), num_classes=763)

# Verify train loader
X_train, y_train = train_loader[0]
print(f"Train Batch Shape (X): {X_train.shape}")
print(f"Train Batch Shape (y): {y_train.shape}")

val_loader = GestureDataLoader(val_csv_path, batch_size=4, sequence_length=30, frame_size=(64, 64), num_classes=763)

# Verify validation loader
X_val, y_val = val_loader[0]
print(f"Validation Batch Shape (X): {X_val.shape}")
print(f"Validation Batch Shape (y): {y_val.shape}")

Train Batch Shape (X): (16, 30, 64, 64, 3)
Train Batch Shape (y): (16, 763)
Validation Batch Shape (X): (16, 30, 64, 64, 3)
Validation Batch Shape (y): (16, 763)


In [33]:
# Define parameters
sequence_length = train_loader.sequence_length  # Frames per video (from generator)
frame_size = train_loader.frame_size            # Image size (height, width)
num_classes = train_loader.num_classes          # Number of gesture classes
input_shape = (sequence_length, frame_size[0], frame_size[1], 3)  # Conv3D input shape

In [39]:
from tensorflow.keras import layers, models, regularizers

# Define the model
model = models.Sequential([
    # Input layer
    layers.Input(shape=(sequence_length, None, None, 3)),  # Input shape: (timesteps, height, width, channels)

    # Smaller CNN layers for feature extraction
    layers.TimeDistributed(layers.Conv2D(4, (3, 3), activation='relu', padding='same')),
    layers.TimeDistributed(layers.BatchNormalization()),
    layers.TimeDistributed(layers.MaxPooling2D(pool_size=(2, 2), padding='same')),

    layers.TimeDistributed(layers.Conv2D(8, (3, 3), activation='relu', padding='same')),
    layers.TimeDistributed(layers.BatchNormalization()),
    layers.TimeDistributed(layers.MaxPooling2D(pool_size=(2, 2), padding='same')),

    layers.TimeDistributed(layers.Conv2D(16, (3, 3), activation='relu', padding='same')),
    layers.TimeDistributed(layers.BatchNormalization()),
    layers.TimeDistributed(layers.MaxPooling2D(pool_size=(2, 2), padding='same')),

    # Global average pooling to reduce parameters
    layers.TimeDistributed(layers.GlobalAveragePooling2D()),

    # RNN layer for temporal modeling
    layers.Bidirectional(layers.LSTM(32, return_sequences=False, recurrent_activation='sigmoid')),  # cuDNN-compatible configuration

    # Fully connected layers
    layers.Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)),  # L2 regularization
    layers.Dropout(0.5),

    # Output layer
    layers.Dense(num_classes, activation='softmax')  # Softmax for multi-class classification
])

# Compile the model
model.compile(
    optimizer='adam', 
    loss='categorical_crossentropy', 
    metrics=['accuracy']
)

# Print the model summary
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 time_distributed_30 (TimeDi  (None, 30, None, None, 4  112      
 stributed)                  )                                   
                                                                 
 time_distributed_31 (TimeDi  (None, 30, None, None, 4  16       
 stributed)                  )                                   
                                                                 
 time_distributed_32 (TimeDi  (None, 30, None, None, 4  0        
 stributed)                  )                                   
                                                                 
 time_distributed_33 (TimeDi  (None, 30, None, None, 8  296      
 stributed)                  )                                   
                                                                 
 time_distributed_34 (TimeDi  (None, 30, None, None, 8

In [41]:
model_save_location = 'best-models/GestureRecognition+Conv2D+LSTM'

In [45]:
# Define callbacks
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
callbacks = [
    ModelCheckpoint('gesture_model_best.keras', save_best_only=True, monitor='val_accuracy'),
    EarlyStopping(patience=10, restore_best_weights=True, monitor='val_accuracy')
]

In [None]:
# Train the model
history = model.fit(
    x=train_loader,
    validation_data=val_loader,
    epochs=100,
    callbacks=callbacks,
    verbose=1
)

plot_training_history

Epoch 1/100
