In [1]:
import cv2 as cv
import matplotlib.pyplot as plt
import os
import numpy as np
import mediapipe as mp
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Conv1D, MaxPooling1D, Flatten, TimeDistributed, Bidirectional, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
from datetime import datetime
from tqdm import tqdm

2025-08-25 19:12:55.056366: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-08-25 19:12:55.183219: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1756129375.228713   23901 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1756129375.242300   23901 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1756129375.351320   23901 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [2]:
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

In [3]:
def mediapipe_detection(image, model):
    image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv.cvtColor(image, cv.COLOR_RGB2BGR)
    return image, results

In [4]:
def draw_styled_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             )
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             )

In [5]:
def extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([lh, rh])


In [6]:
DATA_PATH = '/home/smayan/Desktop/ASL/dataset/SL'
sequence_length = 30
min_sequences_per_class = 10

In [7]:
# actions = [
#     'a', 'about', 'again', 'all', 'also', 'always', 'and', 'angry', 'animal', 'answer', 
#     'apple', 'ask', 'baby', 'bad', 'bathroom', 'beautiful', 'because', 'bed', 'before', 
#     'big', 'book', 'boy', 'brother', 'but', 'buy', 'bye', 'call', 'can', 'car', 'cat', 
#     'city', 'class', 'clean', 'clothes', 'cold', 'college', 'color', 'come', 'computer', 
#     'cook', 'dad', 'day', 'deaf', 'different', 'doctor', 'dog', 'done', "don't want", 
#     'down', 'drink', 'eat', 'eight', 'enough', 'family', 'fast', 'father', 'feel', 
#     'find', 'fine', 'finish', 'first', 'five', 'food', 'for', 'four', 'friend', 'from', 
#     'get', 'girl', 'give', 'go', 'good', 'goodbye', 'happy', 'hard', 'have', 
#     'head', 'hearing', 'hello', 'help', 'her', 'here', 'home', 'hospital', 'hot', 
#     'house', 'how', 'hungry', 'i', 'if', 'in', 'know', 'language', 'last', 'later', 
#     'learn', 'letter', 'like', 'little bit', 'live', 'look at', 'love', 'make', 'man', 
#     'many', 'me', 'meet', 'milk', 'mom', 'money', 'month', 'more', 'morning', 'mother', 
#     'movie', 'music', 'my', 'name', 'need', 'never', 'new', 'nice', 'night', 'nine', 
#     'no', 'not', 'now', 'old', 'on', 'one', 'open', 'orange', 'our', 'out', 'people', 
#     'phone', 'play', 'please', 'put', 'question', 'read', 'ready', 'red', 'right', 'sad', 
#     'same', 'say', 'school', 'see', 'seven', 'she', 'shirt', 'shoes', 'show', 'sick', 
#     'sign', 'sign language', 'sister', 'sit', 'six', 'sleep', 'slow', 'small', 'sorry', 
#     'stand', 'start', 'stop', 'store', 'story', 'student', 'study', 'talk', 'teach', 
#     'teacher', 'tell', 'ten', 'thank you', 'that', 'their', 'they', 'thing', 
#     'think', 'thirsty', 'this', 'three', 'time', 'tired', 'to', 'today', 'tomorrow', 
#     'two', 'understand', 'up', 'use', 'wait', 'walk', 'want', 'water', 'way', 
#     'we', 'wear', 'week', 'what', 'when', 'where', 'which', 'white', 'who', 'why', 
#     'will', 'with', 'woman', 'word', 'work', 'world', 'write', 'wrong', 'year', 'yellow', 
#     'yes', 'yesterday', 'you', 'your'
# ]
# label_map = {label: num for num, label in enumerate(actions)}

In [8]:
actions = ['hello', 'student','i','bye','goodbye','college','wrong','how', 'work', 'your', 'want', 'nice', 'to', 'meet', 'doctor', 'time', 'age', 'breakfast', 'sorry', 'love']
label_map = {label: num for num, label in enumerate(actions)}


In [9]:
len(actions)

20

In [10]:
sequences, labels = [], []

In [11]:
import albumentations as A
import numpy as np

def get_augmentations():
    return A.Compose([
        A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=15, p=0.7),
        
        A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.7),
        
        A.MotionBlur(blur_limit=5, p=0.5),
        
        A.HorizontalFlip(p=0.5),
    ])

augmentation_pipeline = get_augmentations()

  original_init(self, **validated_kwargs)


In [12]:
sequences, labels = [], []

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    for action in actions:
        action_path = os.path.join(DATA_PATH, action)
        video_files = [f for f in os.listdir(action_path) if f.endswith(('.mp4', '.avi', '.mov'))]
        
        if not video_files:
            print(f"Warning: No videos found for action '{action}'. Skipping.")
            continue
            
        print(f"\nProcessing action: {action} ({len(video_files)} videos)")
        
        final_action_sequences = []

        for video_file in tqdm(video_files, desc=f"Processing videos for '{action}'"):
            video_path = os.path.join(action_path, video_file)
            cap = cv.VideoCapture(video_path)
            total_frames = int(cap.get(cv.CAP_PROP_FRAME_COUNT))
            
            if total_frames < sequence_length:
                cap.release()
                continue

            versions = {'original': None, 'augmented': augmentation_pipeline}
            for version_name, augmentation in versions.items():
                
                sequence = []
                
                frame_indices = np.linspace(0, total_frames - 1, sequence_length, dtype=int)
                
                for frame_idx in frame_indices:
                    cap.set(cv.CAP_PROP_POS_FRAMES, frame_idx)
                    
                    ret, frame = cap.read()
                    if not ret:
                        break
                    
                    if augmentation:
                        frame = augmentation(image=frame)['image']

                    image, results = mediapipe_detection(frame, holistic)
                    keypoints = extract_keypoints(results)
                    sequence.append(keypoints)

                
                if len(sequence) == sequence_length:
                    final_action_sequences.append(sequence)
            
            cap.release()
        
        sequences.extend(final_action_sequences)
        labels.extend([label_map[action]] * len(final_action_sequences))
        print(f"Generated {len(final_action_sequences)} sequences for {action} (including augmentations)")

I0000 00:00:1756129378.663434   23901 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1756129378.701508   24062 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 570.172.08), renderer: NVIDIA GeForce RTX 4070 SUPER/PCIe/SSE2



Processing action: hello (4 videos)


Processing videos for 'hello':   0%|          | 0/4 [00:00<?, ?it/s]INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1756129378.749757   24035 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1756129378.780240   24054 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1756129378.782300   24055 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1756129378.782816   24049 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1756129378.783030   24042 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabl

Generated 8 sequences for hello (including augmentations)

Processing action: student (8 videos)


Processing videos for 'student': 100%|██████████| 8/8 [00:13<00:00,  1.72s/it]


Generated 16 sequences for student (including augmentations)

Processing action: i (5 videos)


Processing videos for 'i': 100%|██████████| 5/5 [00:08<00:00,  1.61s/it]


Generated 10 sequences for i (including augmentations)

Processing action: bye (5 videos)


Processing videos for 'bye': 100%|██████████| 5/5 [00:07<00:00,  1.59s/it]


Generated 4 sequences for bye (including augmentations)

Processing action: goodbye (5 videos)


Processing videos for 'goodbye': 100%|██████████| 5/5 [00:08<00:00,  1.66s/it]


Generated 4 sequences for goodbye (including augmentations)

Processing action: college (7 videos)


Processing videos for 'college': 100%|██████████| 7/7 [00:11<00:00,  1.70s/it]


Generated 14 sequences for college (including augmentations)

Processing action: wrong (8 videos)


Processing videos for 'wrong': 100%|██████████| 8/8 [00:12<00:00,  1.54s/it]


Generated 12 sequences for wrong (including augmentations)

Processing action: how (9 videos)


Processing videos for 'how': 100%|██████████| 9/9 [00:14<00:00,  1.63s/it]


Generated 16 sequences for how (including augmentations)

Processing action: work (10 videos)


Processing videos for 'work': 100%|██████████| 10/10 [00:16<00:00,  1.68s/it]


Generated 16 sequences for work (including augmentations)

Processing action: your (6 videos)


Processing videos for 'your': 100%|██████████| 6/6 [00:09<00:00,  1.58s/it]


Generated 8 sequences for your (including augmentations)

Processing action: want (10 videos)


Processing videos for 'want': 100%|██████████| 10/10 [00:13<00:00,  1.39s/it]


Generated 14 sequences for want (including augmentations)

Processing action: nice (6 videos)


Processing videos for 'nice': 100%|██████████| 6/6 [00:09<00:00,  1.54s/it]


Generated 10 sequences for nice (including augmentations)

Processing action: to (8 videos)


Processing videos for 'to': 100%|██████████| 8/8 [00:09<00:00,  1.20s/it]


Generated 10 sequences for to (including augmentations)

Processing action: meet (9 videos)


Processing videos for 'meet': 100%|██████████| 9/9 [00:15<00:00,  1.74s/it]


Generated 16 sequences for meet (including augmentations)

Processing action: doctor (11 videos)


Processing videos for 'doctor': 100%|██████████| 11/11 [00:17<00:00,  1.59s/it]


Generated 18 sequences for doctor (including augmentations)

Processing action: time (8 videos)


Processing videos for 'time': 100%|██████████| 8/8 [00:13<00:00,  1.68s/it]


Generated 12 sequences for time (including augmentations)

Processing action: age (7 videos)


Processing videos for 'age': 100%|██████████| 7/7 [00:11<00:00,  1.65s/it]


Generated 12 sequences for age (including augmentations)

Processing action: breakfast (5 videos)


Processing videos for 'breakfast': 100%|██████████| 5/5 [00:08<00:00,  1.74s/it]


Generated 10 sequences for breakfast (including augmentations)

Processing action: sorry (7 videos)


Processing videos for 'sorry': 100%|██████████| 7/7 [00:10<00:00,  1.56s/it]


Generated 10 sequences for sorry (including augmentations)

Processing action: love (7 videos)


Processing videos for 'love': 100%|██████████| 7/7 [00:11<00:00,  1.65s/it]

Generated 8 sequences for love (including augmentations)





In [13]:
X = np.array(sequences)
y = np.array(labels)

if X.shape[0] == 0:
    raise ValueError("No sequences were generated. Check your DATA_PATH and video files.")

print(f"\nTotal Dataset shape: X={X.shape}, y={y.shape}")


Total Dataset shape: X=(228, 30, 126), y=(228,)


In [14]:
X.shape

(228, 30, 126)

In [15]:
X

array([[[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        ...,
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00]],

       [[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 

In [16]:
X

array([[[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        ...,
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00]],

       [[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 

In [17]:
y.shape

(228,)

In [19]:
np.save('/media/smayan/500GB SSD/X_nosliding.npy', X)
np.save('/media/smayan/500GB SSD/y_nosliding.npy', y)

In [None]:
X = np.load('/media/smayan/500GB SSD/X_nosliding.npy')
y = np.load('/media/smayan/500GB SSD/y_nosliding.npy')

In [None]:
num_features = X.shape[2]
X = X.reshape(X.shape[0], X.shape[1], num_features, 1)

y_categorical = to_categorical(y, num_classes=len(actions))

X_train, X_test, y_train, y_test = train_test_split(
    X, y_categorical, test_size=0.2, random_state=42, stratify=y
)

In [None]:
X.shape

(737, 30, 126, 1)

In [None]:
y.shape

(737,)

In [None]:
X_train = X_train.squeeze(-1)  
X_test  = X_test.squeeze(-1)

In [None]:
X.shape

(737, 30, 126, 1)

In [None]:
X_train.shape

(589, 30, 126)

In [None]:
class_weights = compute_class_weight('balanced', classes=np.unique(y), y=y)
class_weight_dict = dict(enumerate(class_weights))
print(f"Class weights computed: {class_weight_dict}")

Class weights computed: {0: 2.047222222222222, 1: 0.8530092592592593, 2: 1.6377777777777778, 3: 1.364814814814815, 4: 1.32078853046595, 5: 0.9098765432098765, 6: 1.023611111111111, 7: 0.9098765432098765, 8: 0.6712204007285975, 9: 1.1373456790123457, 10: 0.7873931623931624, 11: 1.364814814814815, 12: 1.2795138888888888, 13: 0.6939736346516008, 14: 0.6111111111111112, 15: 0.9748677248677249, 16: 0.9748677248677249, 17: 1.2795138888888888}


In [None]:
model = Sequential()

model.add(Conv1D(128, kernel_size=3, activation='relu', input_shape=(sequence_length, X.shape[2])))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.2))

model.add(Bidirectional(LSTM(32)))
model.add(Dropout(0.3))


model.add(Dense(32, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(len(actions), activation='softmax'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
log_dir = f'logs/wsl_model_{timestamp}'

In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [None]:
model.summary()

In [None]:
X_train.shape

(589, 30, 126)

In [None]:
# callbacks = [
#     TensorBoard(log_dir=log_dir, histogram_freq=1),
#     EarlyStopping(monitor='val_loss', restore_best_weights=True),
#     ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=10, min_lr=1e-7)
# ]

In [None]:
history = model.fit(
    X_train, y_train,
    epochs=150,
    batch_size=16,
    validation_data=(X_test, y_test),
    #callbacks=callbacks,
    class_weight=class_weight_dict,
    verbose=1
)

Epoch 1/150
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.0826 - loss: 2.8673 - val_accuracy: 0.1149 - val_loss: 2.7569
Epoch 2/150
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.1319 - loss: 2.6171 - val_accuracy: 0.1622 - val_loss: 2.6180
Epoch 3/150
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.2039 - loss: 2.5738 - val_accuracy: 0.2162 - val_loss: 2.4858
Epoch 4/150
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.2154 - loss: 2.3607 - val_accuracy: 0.2162 - val_loss: 2.4375
Epoch 5/150
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.2648 - loss: 2.2837 - val_accuracy: 0.1689 - val_loss: 2.5038
Epoch 6/150
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.2661 - loss: 2.2299 - val_accuracy: 0.2500 - val_loss: 2.2254
Epoch 7/150
[1m37/37[0m [32m━━━

KeyboardInterrupt: 

In [None]:
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"\nTest Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy*100:.2f}%")


Test Loss: 0.6492
Test Accuracy: 81.51%


In [None]:
model.save(f'main_wsl_model_{timestamp}.h5')
print(f"\nModel saved as wsl_model_{timestamp}.h5")




Model saved as wsl_model_20250822-192224.h5
