In [1]:
import cv2 as cv
import matplotlib.pyplot as plt
import os
import numpy as np
import mediapipe as mp
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Conv1D, MaxPooling1D, Flatten, TimeDistributed, Bidirectional, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
from datetime import datetime
from tqdm import tqdm

2025-08-21 22:38:33.710163: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-08-21 22:38:33.717129: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1755796113.725406   69938 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1755796113.727978   69938 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1755796113.734289   69938 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [2]:
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

In [3]:
def mediapipe_detection(image, model):
    image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv.cvtColor(image, cv.COLOR_RGB2BGR)
    return image, results

In [4]:
def draw_styled_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION,
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             )
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             )
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             )

In [5]:
def extract_keypoints(results):
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([face, lh, rh])


In [6]:
DATA_PATH = '/home/smayan/Desktop/ASL/dataset/SL'
sequence_length = 30
min_sequences_per_class = 10

In [7]:
# actions = [
#     'a', 'about', 'again', 'all', 'also', 'always', 'and', 'angry', 'animal', 'answer', 
#     'apple', 'ask', 'baby', 'bad', 'bathroom', 'beautiful', 'because', 'bed', 'before', 
#     'big', 'book', 'boy', 'brother', 'but', 'buy', 'bye', 'call', 'can', 'car', 'cat', 
#     'city', 'class', 'clean', 'clothes', 'cold', 'college', 'color', 'come', 'computer', 
#     'cook', 'dad', 'day', 'deaf', 'different', 'doctor', 'dog', 'done', "don't want", 
#     'down', 'drink', 'eat', 'eight', 'enough', 'family', 'fast', 'father', 'feel', 
#     'find', 'fine', 'finish', 'first', 'five', 'food', 'for', 'four', 'friend', 'from', 
#     'get', 'girl', 'give', 'go', 'good', 'goodbye', 'happy', 'hard', 'have', 
#     'head', 'hearing', 'hello', 'help', 'her', 'here', 'home', 'hospital', 'hot', 
#     'house', 'how', 'hungry', 'i', 'if', 'in', 'know', 'language', 'last', 'later', 
#     'learn', 'letter', 'like', 'little bit', 'live', 'look at', 'love', 'make', 'man', 
#     'many', 'me', 'meet', 'milk', 'mom', 'money', 'month', 'more', 'morning', 'mother', 
#     'movie', 'music', 'my', 'name', 'need', 'never', 'new', 'nice', 'night', 'nine', 
#     'no', 'not', 'now', 'old', 'on', 'one', 'open', 'orange', 'our', 'out', 'people', 
#     'phone', 'play', 'please', 'put', 'question', 'read', 'ready', 'red', 'right', 'sad', 
#     'same', 'say', 'school', 'see', 'seven', 'she', 'shirt', 'shoes', 'show', 'sick', 
#     'sign', 'sign language', 'sister', 'sit', 'six', 'sleep', 'slow', 'small', 'sorry', 
#     'stand', 'start', 'stop', 'store', 'story', 'student', 'study', 'talk', 'teach', 
#     'teacher', 'tell', 'ten', 'thank you', 'that', 'their', 'they', 'thing', 
#     'think', 'thirsty', 'this', 'three', 'time', 'tired', 'to', 'today', 'tomorrow', 
#     'two', 'understand', 'up', 'use', 'wait', 'walk', 'want', 'water', 'way', 
#     'we', 'wear', 'week', 'what', 'when', 'where', 'which', 'white', 'who', 'why', 
#     'will', 'with', 'woman', 'word', 'work', 'world', 'write', 'wrong', 'year', 'yellow', 
#     'yes', 'yesterday', 'you', 'your'
# ]
# label_map = {label: num for num, label in enumerate(actions)}

In [8]:
actions = ['hello', 'student','i','bye','goodbye','college','bye','how', 'you', 'your', 'want', 'nice', 'to', 'meet', 'doctor', 'time']
label_map = {label: num for num, label in enumerate(actions)}

In [9]:
len(actions)

16

In [10]:
sequences, labels = [], []

In [11]:
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    for action in actions:
        action_path = os.path.join(DATA_PATH, action)
        video_files = [f for f in os.listdir(action_path) if f.endswith(('.mp4', '.avi', '.mov'))]
        
        if not video_files:
            print(f"Warning: No videos found for action '{action}'. Skipping.")
            continue
            
        print(f"\nProcessing action: {action} ({len(video_files)} videos)")
        action_sequences = []

        for video_file in tqdm(video_files, desc=f"Processing videos for '{action}'"):
            video_path = os.path.join(action_path, video_file)
            cap = cv.VideoCapture(video_path)
            total_frames = int(cap.get(cv.CAP_PROP_FRAME_COUNT))
            
            if total_frames < sequence_length:
                cap.release()
                continue

            stride = max(1, (total_frames - sequence_length) // 5)

            for start_frame in range(0, total_frames - sequence_length + 1, stride):
                cap.set(cv.CAP_PROP_POS_FRAMES, start_frame)
                sequence = []

                for _ in range(sequence_length):
                    ret, frame = cap.read()
                    if not ret:
                        break
                    
                    _, results = mediapipe_detection(frame, holistic)
                    
                    keypoints = extract_keypoints(results)
                    sequence.append(keypoints)

                if len(sequence) == sequence_length:
                    action_sequences.append(sequence)
            
            cap.release()

        while len(action_sequences) < min_sequences_per_class and action_sequences:
            original_seq = np.array(action_sequences[np.random.randint(0, len(action_sequences))])
            noise = np.random.normal(0, 0.01, original_seq.shape)
            augmented_seq = original_seq + noise
            action_sequences.append(augmented_seq.tolist())

        sequences.extend(action_sequences)
        labels.extend([label_map[action]] * len(action_sequences))
        print(f"Generated {len(action_sequences)} sequences for {action}")

I0000 00:00:1755796114.989675   69938 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1755796115.045730   70044 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 570.172.08), renderer: NVIDIA GeForce RTX 4070 SUPER/PCIe/SSE2



Processing action: hello (4 videos)


Processing videos for 'hello':   0%|          | 0/4 [00:00<?, ?it/s]INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1755796115.081397   70020 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1755796115.101865   70043 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1755796115.103691   70036 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1755796115.103854   70030 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1755796115.104175   70017 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabl

Generated 20 sequences for hello

Processing action: student (8 videos)


Processing videos for 'student': 100%|██████████| 8/8 [00:32<00:00,  4.01s/it]


Generated 48 sequences for student

Processing action: i (5 videos)


Processing videos for 'i': 100%|██████████| 5/5 [00:16<00:00,  3.36s/it]


Generated 25 sequences for i

Processing action: bye (5 videos)


Processing videos for 'bye': 100%|██████████| 5/5 [00:19<00:00,  3.99s/it]


Generated 30 sequences for bye

Processing action: goodbye (5 videos)


Processing videos for 'goodbye': 100%|██████████| 5/5 [00:20<00:00,  4.16s/it]


Generated 31 sequences for goodbye

Processing action: college (7 videos)


Processing videos for 'college': 100%|██████████| 7/7 [00:29<00:00,  4.28s/it]


Generated 45 sequences for college

Processing action: bye (5 videos)


Processing videos for 'bye': 100%|██████████| 5/5 [00:20<00:00,  4.08s/it]


Generated 30 sequences for bye

Processing action: how (9 videos)


Processing videos for 'how': 100%|██████████| 9/9 [00:30<00:00,  3.38s/it]


Generated 45 sequences for how

Processing action: you (8 videos)


Processing videos for 'you': 100%|██████████| 8/8 [00:32<00:00,  4.03s/it]


Generated 48 sequences for you

Processing action: your (6 videos)


Processing videos for 'your': 100%|██████████| 6/6 [00:26<00:00,  4.34s/it]


Generated 36 sequences for your

Processing action: want (10 videos)


Processing videos for 'want': 100%|██████████| 10/10 [00:35<00:00,  3.53s/it]


Generated 52 sequences for want

Processing action: nice (6 videos)


Processing videos for 'nice': 100%|██████████| 6/6 [00:20<00:00,  3.39s/it]


Generated 30 sequences for nice

Processing action: to (8 videos)


Processing videos for 'to': 100%|██████████| 8/8 [00:22<00:00,  2.77s/it]


Generated 32 sequences for to

Processing action: meet (9 videos)


Processing videos for 'meet': 100%|██████████| 9/9 [00:40<00:00,  4.46s/it]


Generated 59 sequences for meet

Processing action: doctor (11 videos)


Processing videos for 'doctor': 100%|██████████| 11/11 [00:45<00:00,  4.18s/it]


Generated 67 sequences for doctor

Processing action: time (8 videos)


Processing videos for 'time': 100%|██████████| 8/8 [00:29<00:00,  3.64s/it]

Generated 42 sequences for time





In [12]:
X = np.array(sequences)
y = np.array(labels)

if X.shape[0] == 0:
    raise ValueError("No sequences were generated. Check your DATA_PATH and video files.")

print(f"\nTotal Dataset shape: X={X.shape}, y={y.shape}")


Total Dataset shape: X=(640, 30, 1530), y=(640,)


In [None]:
np.random.seed(42)
perm = np.random.permutation(len(X))
X = X[perm]
y = y[perm]

In [13]:
X.shape

(640, 30, 1530)

In [14]:
X

array([[[ 0.59060645,  0.298004  , -0.0141108 , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.58998013,  0.29970837, -0.01371168, ...,  0.        ,
          0.        ,  0.        ],
        [ 0.58967638,  0.29977855, -0.01387646, ...,  0.        ,
          0.        ,  0.        ],
        ...,
        [ 0.60153723,  0.33077788, -0.0111043 , ...,  0.53252566,
          0.21737686, -0.02696047],
        [ 0.6021049 ,  0.32972935, -0.01121327, ...,  0.53052449,
          0.21563651, -0.02373665],
        [ 0.60210556,  0.32775372, -0.01135233, ...,  0.52632856,
          0.20170368, -0.0168931 ]],

       [[ 0.59463733,  0.30495709, -0.01375333, ...,  0.        ,
          0.        ,  0.        ],
        [ 0.59636772,  0.3064371 , -0.0135813 , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.5975787 ,  0.30605981, -0.01378598, ...,  0.        ,
          0.        ,  0.        ],
        ...,
        [ 0.60318059,  0.32912713, -0.01227444, ...,  

In [16]:
X

array([[[ 0.59060645,  0.298004  , -0.0141108 , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.58998013,  0.29970837, -0.01371168, ...,  0.        ,
          0.        ,  0.        ],
        [ 0.58967638,  0.29977855, -0.01387646, ...,  0.        ,
          0.        ,  0.        ],
        ...,
        [ 0.60153723,  0.33077788, -0.0111043 , ...,  0.53252566,
          0.21737686, -0.02696047],
        [ 0.6021049 ,  0.32972935, -0.01121327, ...,  0.53052449,
          0.21563651, -0.02373665],
        [ 0.60210556,  0.32775372, -0.01135233, ...,  0.52632856,
          0.20170368, -0.0168931 ]],

       [[ 0.59463733,  0.30495709, -0.01375333, ...,  0.        ,
          0.        ,  0.        ],
        [ 0.59636772,  0.3064371 , -0.0135813 , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.5975787 ,  0.30605981, -0.01378598, ...,  0.        ,
          0.        ,  0.        ],
        ...,
        [ 0.60318059,  0.32912713, -0.01227444, ...,  

In [17]:
y.shape

(640,)

In [18]:
np.save('/media/smayan/500GB SSD/X_min.npy', X)
np.save('/media/smayan/500GB SSD/y_min.npy', y)

In [19]:
X = np.load('/media/smayan/500GB SSD/X_min.npy')
y = np.load('/media/smayan/500GB SSD/y_min.npy')