In [2]:
import numpy as np
import os
import random as rn
import datetime

# If using imageio or skimage, install or import accordingly:
from imageio import imread
from skimage.transform import resize

In [3]:
# For reproducibility across modules
np.random.seed(30)
rn.seed(30)

import tensorflow as tf
tf.random.set_seed(30)  # For older TF versions; in TF 2.x, use tf.random.set_seed(30)
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # For memory growth
        tf.config.experimental.set_memory_growth(gpus[0], True)
    except RuntimeError as e:
        print(e)

2024-12-31 13:43:18.667026: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1735632798.679732   53324 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1735632798.683383   53324 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-31 13:43:18.698121: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
# Paths to training and validation CSV files
train_csv = 'Project_data/train.csv'
val_csv   = 'Project_data/val.csv'

# Paths to folders containing subfolders of frames
train_path = 'Project_data/train'
val_path   = 'Project_data/val'

# Read CSV lines, then shuffle
train_doc = np.random.permutation(open(train_csv).readlines())
val_doc   = np.random.permutation(open(val_csv).readlines())

# Number of classes (gestures): 5 (Thumbs up/down, Left/Right swipe, Stop)
num_classes = 5

# Adjust as needed
batch_size  = 8 
num_epochs  = 20

In [5]:
def generator(source_path, folder_list, batch_size):
    """
    source_path: path to train/val folder
    folder_list: list of lines from train.csv or val.csv
    batch_size : how many videos per batch
    """
    print('Source path = ', source_path, '; batch size =', batch_size)
    
    # Example: pick frames [0, 2, 4, ... 28] => 15 frames out of 30
    # If you want all 30 frames, just do: img_idx = list(range(30))
    #img_idx = [x for x in range(0, 30, 2)]
    img_idx = list(range(30))
    
    # Decide final image size (after cropping & resizing)
    final_height = 64
    final_width  = 64
    
    while True:
        # Shuffle the folder list (videos) every epoch
        t = np.random.permutation(folder_list)
        num_batches = len(folder_list) // batch_size  # number of full batches
        
        for batch in range(num_batches):
            # Prepare batch arrays
            batch_data = np.zeros((batch_size, 
                                   len(img_idx), 
                                   final_height, 
                                   final_width, 
                                   3))  # shape: (B, T, H, W, C)
            batch_labels = np.zeros((batch_size, num_classes))  # one-hot (5 classes)
            
            for folder in range(batch_size):
                # Parse CSV line
                folder_line = t[batch * batch_size + folder].strip().split(';')
                folder_name = folder_line[0]
                gesture_label = int(folder_line[2])  # label 0..4
                
                # Path to subfolder (which contains frames)
                folder_path = os.path.join(source_path, folder_name)
                # Ensure frames are sorted if needed
                imgs = sorted(os.listdir(folder_path))
                
                for idx, frame_num in enumerate(img_idx):
                    image_path = os.path.join(folder_path, imgs[frame_num])
                    image = imread(image_path).astype(np.float32)
                    
                    # Resize or crop as needed
                    image_resized = resize(image, (final_height, final_width))
                    # Normalize: scale to [0,1]
                    image_resized /= 255.0
                    
                    # Assign to batch_data
                    batch_data[folder, idx, :, :, :] = image_resized
                
                # One-hot label
                batch_labels[folder, gesture_label] = 1
            
            yield batch_data, batch_labels
        
        # Handle leftover samples if any
        leftover = len(folder_list) % batch_size
        if leftover != 0:
            batch_data = np.zeros((leftover, 
                                   len(img_idx), 
                                   final_height, 
                                   final_width, 
                                   3))
            batch_labels = np.zeros((leftover, num_classes))
            
            start_idx = num_batches * batch_size
            for folder in range(leftover):
                folder_line = t[start_idx + folder].strip().split(';')
                folder_name = folder_line[0]
                gesture_label = int(folder_line[2])
                
                folder_path = os.path.join(source_path, folder_name)
                imgs = sorted(os.listdir(folder_path))
                
                for idx, frame_num in enumerate(img_idx):
                    image_path = os.path.join(folder_path, imgs[frame_num])
                    image = imread(image_path).astype(np.float32)
                    
                    image_resized = resize(image, (final_height, final_width))
                    image_resized /= 255.0
                    
                    batch_data[folder, idx, :, :, :] = image_resized
                
                batch_labels[folder, gesture_label] = 1
            
            yield batch_data, batch_labels

In [6]:
from keras.models import Sequential
from keras.layers import (
    Dense, GRU, TimeDistributed,
    Conv2D, MaxPooling2D, Flatten,
    Dropout
)
from keras.optimizers import Adam
# Example: Conv2D + GRU architecture using TimeDistributed
# Input shape = (None, T, H, W, C) => T=number of frames
input_frames = 15   # we used 15 frames if img_idx = [0,2,4,...,28]
height = 64
width  = 64
channels = 3


model = Sequential()

model.add(TimeDistributed(
    Conv2D(16, (3,3), padding='same', activation='relu'),
    input_shape=(input_frames, height, width, channels))
)
model.add(TimeDistributed(MaxPooling2D((2,2))))

model.add(TimeDistributed(
    Conv2D(32, (3,3), padding='same', activation='relu')
))
model.add(TimeDistributed(MaxPooling2D((2,2))))
# Flatten + Dropout to regularize
model.add(TimeDistributed(Flatten()))
model.add(Dropout(0.2))

# GRU with dropout
model.add(GRU(32, dropout=0.2, recurrent_dropout=0.2))

# Dense + optional dropout
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(num_classes, activation='softmax'))

# Change 'lr' to 'learning_rate'
optimiser = Adam(learning_rate=0.001)
model.compile(optimizer=optimiser, 
              loss='categorical_crossentropy', 
              metrics=['categorical_accuracy'])

print(model.summary())


  super().__init__(**kwargs)
I0000 00:00:1735632800.593771   53324 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 2273 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3050 Ti Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6


None


In [7]:
num_train_sequences = len(train_doc)
num_val_sequences   = len(val_doc)

# Steps per epoch
if (num_train_sequences % batch_size) == 0:
    steps_per_epoch = num_train_sequences // batch_size
else:
    steps_per_epoch = (num_train_sequences // batch_size) + 1

if (num_val_sequences % batch_size) == 0:
    validation_steps = num_val_sequences // batch_size
else:
    validation_steps = (num_val_sequences // batch_size) + 1

# Instantiate our generators
train_generator = generator(train_path, train_doc, batch_size)
val_generator   = generator(val_path, val_doc, batch_size)


In [8]:
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping

curr_dt_time = datetime.datetime.now()
model_name = 'model_init' + '_' + str(curr_dt_time).replace(' ','').replace(':','_') + '/'

if not os.path.exists(model_name):
    os.mkdir(model_name)

# In newer TF/Keras, use .keras extension. If you want .h5, you can rename it,
# but you'd have to adjust your Keras version or pass additional arguments.
filepath = (
    model_name
    + 'model-{epoch:05d}-{loss:.5f}-{categorical_accuracy:.5f}-'
    + '{val_loss:.5f}-{val_categorical_accuracy:.5f}.keras'
)

checkpoint = ModelCheckpoint(
    filepath,
    monitor='val_loss',
    verbose=1,
    save_best_only=False,
    save_weights_only=False,
    mode='auto'
)

LR = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.1,
    patience=5,
    verbose=1,
    min_lr=1e-6
)

# Early stopping to prevent overfitting if val_loss doesn't improve
earlystop = EarlyStopping(
    monitor='val_loss',
    patience=10,
    verbose=1,
    restore_best_weights=True
)

callbacks_list = [checkpoint, LR, earlystop]


In [9]:
model.fit(
    train_generator,
    steps_per_epoch=steps_per_epoch,
    epochs=num_epochs,
    verbose=1,
    callbacks=callbacks_list,
    validation_data=val_generator,
    validation_steps=validation_steps,
    class_weight=None,
    initial_epoch=0
)


print("Training completed.")

Source path =  Project_data/train ; batch size = 8


  image = imread(image_path).astype(np.float32)


Epoch 1/20


I0000 00:00:1735632807.657745   53538 cuda_dnn.cc:529] Loaded cuDNN version 90600


[1m82/83[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 780ms/step - categorical_accuracy: 0.2235 - loss: 5.8455

  image = imread(image_path).astype(np.float32)


[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 778ms/step - categorical_accuracy: 0.2236 - loss: 5.8126Source path =  Project_data/val ; batch size = 8

Epoch 1: saving model to model_init_2024-12-3113_43_21.249773/model-00001-3.11572-0.22926-1.63882-0.18000.keras
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 932ms/step - categorical_accuracy: 0.2237 - loss: 5.7805 - val_categorical_accuracy: 0.1800 - val_loss: 1.6388 - learning_rate: 0.0010
Epoch 2/20
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 757ms/step - categorical_accuracy: 0.2284 - loss: 1.5809
Epoch 2: saving model to model_init_2024-12-3113_43_21.249773/model-00002-1.57177-0.24887-1.60706-0.18000.keras
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 876ms/step - categorical_accuracy: 0.2287 - loss: 1.5808 - val_categorical_accuracy: 0.1800 - val_loss: 1.6071 - learning_rate: 0.0010
Epoch 3/20
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m