## Installing necessary libraries

## Importing essential libraries for data processing, model building, and visualization


In [1]:
import mediapipe as mp    # For MediaPipe's hand and pose tracking utilities (Real-time)
import tensorflow as tf   # Core machine learning library for building models
import keras              # High-level neural networks API, runs on TensorFlow
import numpy as np        # Numerical operations and array handling
import pandas as pd       # Data manipulation and analysis
import os                 # Interacting with the operating system
import shutil             # High-level file operations (e.g., copy, delete)
import datetime as dt     
import matplotlib.pyplot as plt # Plotting graphs and visualizations
import seaborn as sns        # Data visualization library
from tqdm import tqdm        # Progress bar for loops
import glob                  # File pattern matching (for file paths)
import cv2                   # OpenCV for image processing
from keras.utils import to_categorical  # Converts labels to categorical format
from keras.models import Sequential     # For building sequential neural network models
from keras.layers import Bidirectional, LSTM, Dense  # Layers for RNN (Recurrent Neural Network) models
from keras.callbacks import EarlyStopping  #Stops training early if performance plateaus
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score # Metrics for model evaluation
from sklearn.model_selection import train_test_split     
np.random.seed(42)

## Initialize MediaPipe Holistic Model and Drawing Utilities

In [2]:
# Create a Holistic object to detect keypoints (landmarks) such as: pose, face, and hands keypoints
mp_holistic = mp.solutions.holistic #Creating Object

# Drawing utilities for detected landmarks on images or video frames.
mp_drawing = mp.solutions.drawing_utils 

## Function to Detect Keypoints Using MediaPipe

This function takes an input image and a MediaPipe model (like mp_holistic) to perform **landmark detection**. It prepares the image by converting color formats (as OpenCV uses BGR, while MediaPipe uses RGB).
- the output of model.process(image): An Object contains attributes/(collection of landmark points) like pose_landmarks(x,y,z), left_hand_landmarks(x,y,z), right_hand_landmarks(x,y,z).

In [3]:
def mediapipe_detection(image, model):
    """
    Perform landmark detection on an image using a specified MediaPipe model.
    """
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable ; to improve performance during processing
    results = model.process(image)                 # Make prediction / Run the holistic model to detect and process keypoints
    image.flags.writeable = True                   # Image is now writeable for further processing
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR CONVERSION RGB 2 BGR
    return image, results

## Function to Draw Styled Landmarks on an Image

- This function takes an image and a results object containing detected keypoints.
- For each type of keypoint (pose, left hand, right hand), it uses mp_drawing.draw_landmarks to draw connections and landmarks with specified styles (color, thickness, and circle radius) for easier visualization.
- DrawingSpec allows customizing how the landmarks and connections appear on the image, which helps distinguish between different parts visually.

-- `right_hand_landmarks` is the actual data (coordinates of hand points)
-- `mp_holistic.HAND_CONNECTIONS` Specifies how to connect these keypoints to form a visual hand skeleton.
-- `draw_landmarks` : Uses the HAND_CONNECTIONS list to connect these detected points visually on image.

In [4]:
def draw_styled_landmarks(image, results):

    """
    Draw styled landmarks on the image for detected pose, left hand, and right hand keypoints.
    """
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, #pose_landmark == collection of landmark points POSE_CONNECTIONS == connect pairs 
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),# Color/style for landmarks 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2) # Color/style for connections
                             ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             )

## Function to Adjust Landmarks Based on a Center Point

**Function Purpose:**
This function takes an array of landmark coordinates and centers them around a specified point. This can help normalize positions, such as making all hand landmarks relative to the wrist or a central point, which is useful for consistency in gesture recognition.

**Centering landmarks is often done to standardize gesture positions, so the same gesture looks similar regardless of where the hand is in the frame.**

In [5]:
def adjust_landmarks(arr,center):

    """
    Adjusts the coordinates of landmarks by centering them around a given point.
    """
    # Reshape the array to have shape (n, 3)
    # Convert the flattened array of landmarks into (n, 3) shape, where n is the number of landmarks and 3 represents x, y, z coordinates
    
    arr_reshaped = arr.reshape(-1, 3)  

    
    # Repeat the center array to have shape (n, 3)
    # Repeat the center coordinates n times to match the shape of the landmark array for element-wise subtraction
    
    center_repeated = np.tile(center, (len(arr_reshaped), 1)) 

    
    # Subtract the center array from the arr array
    # Effectively centering all landmarks around this point
    
    arr_adjusted = arr_reshaped - center_repeated

    
    # Reshape arr_adjusted back to shape (n*3,)
    # Flatten the adjusted array back to its original (n*3,) shape
    #Using -1 lets you reshape an array without having to manually calculate the size for that dimension, as NumPy will infer it based on the total element count.
    
    arr_adjusted = arr_adjusted.reshape(-1) 

    
    return(arr_adjusted)

## Function to Extract and Adjust Keypoints from Detected Landmarks

This function extracts keypoints for the body’s pose, left hand, and right hand from the **results** object produced by MediaPipe. It then adjusts these keypoints to be centered around specific reference points (like the nose or wrists), making gesture recognition more consistent.


`.flatten()`: Flattens the array into a one-dimensional (1D) array.

`[[res.x, res.y, res.z] for res in results.pose_landmarks.landmark]` : 
- This is a list comprehension, a concise way to create lists in Python.
- It iterates over each landmark in results.pose_landmarks.landmark.
- For each landmark, it extracts the x, y, and z coordinates: [res.x, res.y, res.z]

`np.zeros(33*3)` : matching the shape of the array created when landmarks are detected. This is useful to maintain consistency in data shape.

Purpose of Selecting the Nose Landmark:
Using the nose coordinates as a reference (or "center") point allows us to normalize the position of other landmarks relative to the nose.

In [6]:
def extract_keypoints(results):

    """
    Extracts and adjusts keypoints for the pose, left hand, and right hand from the results object.
    """

    #ternary operator
    #This line creates a flattened array of x, y, z coordinates for 33 pose landmarks if detected, or a zero array of the same shape if no landmarks are found.
    # Extract pose landmarks, flattening from 3D (x,y,z) to a 1D array; if not detected, create an array of zeros "if true the code before it is executed"
    pose = np.array([[res.x, res.y, res.z] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*3) #1D array of 99 zeros (landmarks, coordinate)
    
    #res is the name given to each item (or landmark)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)

    # Set reference points for centering landmarks
    nose=pose[:3]   # First three values in pose represent the nose landmark // In MediaPipe’s landmark model, the nose is typically the first landmark in the list of pose landmarks
    lh_wrist=lh[:3] #x,y,z
    rh_wrist=rh[:3]

    # Adjust the landmarks to be centered around the reference points
    pose_adjusted = adjust_landmarks(pose,nose)
    lh_adjusted = adjust_landmarks(lh,lh_wrist)
    rh_adjusted = adjust_landmarks(rh,rh_wrist)
    
    return pose_adjusted, lh_adjusted, rh_adjusted

## Create List of Selected Words Using Ranges with Zero-Padding

- This cell generates a list of numbers within specified ranges, each formatted as a 4-digit, zero-padded string (e.g., "0111", "0112"). This could be used to create filenames, labels, or identifiers in a consistent format.

**zero padding:**
`str(num).zfill(4)`
`.zfill(4)` : Fill the string with zeros until it is 4 characters long

`.extend()` : Adds all elements

In [7]:
# Define the different ranges
ranges = [(1, 503)] #single one element

# Initialize an empty list to store the results
selected_words = []

# Iterate over each range
for start, end in ranges:
    # Extend the list with zero-padded numbers in the current range
    selected_words.extend([str(num).zfill(4) for num in range(start, end)]) 

# Print the result
print(selected_words)

['0001', '0002', '0003', '0004', '0005', '0006', '0007', '0008', '0009', '0010', '0011', '0012', '0013', '0014', '0015', '0016', '0017', '0018', '0019', '0020', '0021', '0022', '0023', '0024', '0025', '0026', '0027', '0028', '0029', '0030', '0031', '0032', '0033', '0034', '0035', '0036', '0037', '0038', '0039', '0040', '0041', '0042', '0043', '0044', '0045', '0046', '0047', '0048', '0049', '0050', '0051', '0052', '0053', '0054', '0055', '0056', '0057', '0058', '0059', '0060', '0061', '0062', '0063', '0064', '0065', '0066', '0067', '0068', '0069', '0070', '0071', '0072', '0073', '0074', '0075', '0076', '0077', '0078', '0079', '0080', '0081', '0082', '0083', '0084', '0085', '0086', '0087', '0088', '0089', '0090', '0091', '0092', '0093', '0094', '0095', '0096', '0097', '0098', '0099', '0100', '0101', '0102', '0103', '0104', '0105', '0106', '0107', '0108', '0109', '0110', '0111', '0112', '0113', '0114', '0115', '0116', '0117', '0118', '0119', '0120', '0121', '0122', '0123', '0124', '0125',

## Function to Generate Keypoint Arrays for Videos


generates arrays of keypoints (important points representing the pose, left hand, and right hand) from video files, storing them as **.npy** files for future use. These keypoints are used in tasks like sign language recognition.
`with` statement is used to manage resources (Mediapipe Holistic model) efficiently. It ensures that the resource is properly initialized and closed when its work is done.**memory released, resources freed**


`with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:`
`min_detection_confidence=0.5`:
If the model’s confidence in detecting a landmark is less than 0.5 (50%), it discards the detection.

`min_tracking_confidence=0.5`:
Purpose: Sets the confidence threshold for tracking landmarks across frames.
Meaning: If the confidence for tracking (linking landmarks across consecutive frames) is below 0.5, the model may stop tracking.

# adjust all paths *******************8

In [8]:
def make_keypoint_arrays(path,signer,split):
    """This function generates numpy arrays of keypoints for each video in the specified folder location.
    Args:
      signer(int): the signer of interest. Could be 01 or 02 or 03
      split(str): can be 'train', 'test' or 'val'
    """
    #Create Necessary Directories  
    #Creates folders to store the generated keypoints for each signer and dataset split.
    os.makedirs('working/npy_arrays',exist_ok = True)
    os.makedirs(f'working/npy_arrays/{signer}',exist_ok = True)
    os.makedirs(f'working/npy_arrays/{signer}/{split}',exist_ok = True)
    
    #Specifies where to save the keypoints.
    working_path = f'working/npy_arrays/{signer}/{split}'
    
    #Specifies the path to the folder containing videos.
    words_folder = os.path.join(path,str(signer),str(signer), split)


    
    # Loop through all the subfolders in the Dataset folder
    for word in tqdm(selected_words):
        #Lists all files within each word subfolder, representing different video files.
        video_files = os.listdir(os.path.join(words_folder, word))
          # Loop through each video files
        for video_file in video_files:
                # Open the video file. Retrieves and sorts frames from the video.
            video = sorted(os.listdir(os.path.join(words_folder, word, video_file)))

            # Initialize the list of keypoints for this video to store extracted keypoints.
            pose_keypoints, lh_keypoints, rh_keypoints = [], [], []
            with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
              # Loop through the video frames
              for frame in video:
                  # Perform any necessary preprocessing on the frame (e.g., resizing, normalization)
                frame = os.path.join(words_folder, word, video_file,frame)
                frame = cv2.imread(frame)
#                 frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                
                  # Normalize pixel values to the range [0, 1]
                # Make detections
                image, results = mediapipe_detection(frame, holistic)

                # Extract keypoints
                pose, lh, rh = extract_keypoints(results)
                # Add the keypoints to the list for this video
                pose_keypoints.append(pose)
                lh_keypoints.append(lh)
                rh_keypoints.append(rh)           
                
                # Save the keypoints for this video to a numpy array (Save Keypoints to Disk)
                #Paths to save the keypoints for each part.
                pose_directory = os.path.join(working_path, word,'pose_keypoints')
                lh_directory = os.path.join(working_path, word,'lh_keypoints')
                rh_directory = os.path.join(working_path, word,'rh_keypoints')

               # Ensures the directories for storing keypoints exist
                if not os.path.exists(pose_directory):
                    os.makedirs(pose_directory)

                if not os.path.exists(lh_directory):
                    os.makedirs(lh_directory)

                if not os.path.exists(rh_directory):
                    os.makedirs(rh_directory)

                #Save Keypoints Arrays as .npy Files
                pose_path = os.path.join(pose_directory, video_file)
                np.save(pose_path, pose_keypoints)

                lh_path = os.path.join(lh_directory, video_file)
                np.save(lh_path, lh_keypoints)

                rh_path = os.path.join(rh_directory, video_file)
                np.save(rh_path, rh_keypoints)

In [None]:
#import os
#print(os.path.exists('karsl-502'))  # Will print True if the directory exists


In [None]:
make_keypoint_arrays('working/karsl-502','01','train')

In [None]:
make_keypoint_arrays('working/karsl-502','01','test')

In [None]:
make_keypoint_arrays('working/karsl-502','02','train')

In [None]:
make_keypoint_arrays('working/karsl-502','02','test')

In [None]:
make_keypoint_arrays('working/karsl-502','03','train')

In [None]:
make_keypoint_arrays('working/karsl-502','03','test')

## Load Data and Filter Sign IDs

In [9]:
# Load an Excel file into a pandas DataFrame (Dataset)
karsl_df = pd.read_excel('KARSL-502_Labels.xlsx')

# Initialize an empty list to store filtering conditions
mask = [] #store boolean values indicating whether each row's SignID is in the selected_words

for i in karsl_df['SignID'].values: # Loop over each 'SignID' value in the DataFrame // SignID column values
    if str(i).zfill(4) in selected_words : # Check if the zero-padded ID is in selected_words
        mask.append(True) # Keep this row
    else :
        mask.append(False) # Exclude this row

# Filter the DataFrame rows where mask is True  
karsl_6 = karsl_df[mask].reset_index(drop=True) #Resets the index of the filtered DataFrame and drops the old index.

karsl_6 #contains only rows with SignID values in selected_words

Unnamed: 0,SignID,Sign-Arabic,Sign-English
0,1,0,0
1,2,1,1
2,3,2,2
3,4,3,3
4,5,4,4
...,...,...,...
497,498,ممرضة,nurse
498,499,ممرض,orderly
499,500,محام,lawyer
500,501,انتقال,traveling


In [10]:
import sys
print(sys.executable)


C:\Users\user\AppData\Local\Programs\Python\Python310\python.exe


## Create a Dictionary Mapping Arabic Signs to Their IDs

In [11]:
#Key (w): The Arabic name of a sign. Value (i): Its corresponding ID
w2id = {w:i for w,i in zip(karsl_6['Sign-Arabic'].values,karsl_6['SignID'].values  )}
w2id

{0: 1,
 1: 2,
 2: 3,
 3: 4,
 4: 5,
 5: 6,
 6: 7,
 7: 8,
 8: 9,
 9: 10,
 10: 11,
 20: 12,
 30: 13,
 40: 14,
 50: 15,
 60: 16,
 70: 17,
 80: 18,
 90: 19,
 100: 20,
 200: 21,
 300: 22,
 400: 23,
 500: 24,
 600: 25,
 700: 26,
 800: 27,
 900: 28,
 1000: 29,
 1000000: 30,
 10000000: 31,
 'ا': 32,
 'ب': 33,
 'ت': 34,
 'ث': 35,
 'ج': 36,
 'ح': 37,
 'خ': 38,
 'د': 39,
 'ذ': 40,
 'ر': 41,
 'ز': 42,
 'س': 43,
 'ش': 44,
 'ص': 45,
 'ض': 46,
 'ط': 47,
 'ظ': 48,
 'ع': 49,
 'غ': 50,
 'ف': 51,
 'ق': 52,
 'ك': 53,
 'ل': 54,
 'م': 55,
 'ن': 56,
 'ه': 57,
 'و': 58,
 'ي': 59,
 'ة': 60,
 'أ': 61,
 'ؤ': 62,
 'ئ': 63,
 'ئـ': 64,
 'ء': 65,
 'إ': 66,
 'آ': 67,
 'ى': 68,
 'لا': 69,
 'ال': 70,
 'هيكل عظمي': 71,
 'جمجة': 72,
 'عمود فقري': 73,
 'قفص صدري': 74,
 'جهاز تنفسي': 75,
 'قصبة هوائية': 76,
 'رئتان': 77,
 'شهيق - زفير': 78,
 'جهاز هضمي': 79,
 'وجه': 80,
 'بلعوم': 81,
 'كبد': 82,
 'البنكرياس': 83,
 'الأمعاء الدقيقة': 84,
 'الأمعاء الغليظة': 85,
 'الزائدة الدودية': 86,
 'جهاز عصبي': 87,
 'قلب': 88,
 'حواس خمس

## Extract and Print Arabic Sign Words

In [12]:
words= np.array([v for v in karsl_6['Sign-Arabic']])
print(words)

['0' '1' '2' '3' '4' '5' '6' '7' '8' '9' '10' '20' '30' '40' '50' '60'
 '70' '80' '90' '100' '200' '300' '400' '500' '600' '700' '800' '900'
 '1000' '1000000' '10000000' 'ا' 'ب' 'ت' 'ث' 'ج' 'ح' 'خ' 'د' 'ذ' 'ر' 'ز'
 'س' 'ش' 'ص' 'ض' 'ط' 'ظ' 'ع' 'غ' 'ف' 'ق' 'ك' 'ل' 'م' 'ن' 'ه' 'و' 'ي' 'ة'
 'أ' 'ؤ' 'ئ' 'ئـ' 'ء' 'إ' 'آ' 'ى' 'لا' 'ال' 'هيكل عظمي' 'جمجة' 'عمود فقري'
 'قفص صدري' 'جهاز تنفسي' 'قصبة هوائية' 'رئتان' 'شهيق - زفير' 'جهاز هضمي'
 'وجه' 'بلعوم' 'كبد' 'البنكرياس' 'الأمعاء الدقيقة' 'الأمعاء الغليظة'
 'الزائدة الدودية' 'جهاز عصبي' 'قلب' 'حواس خمس' 'عضلة' 'أنسجة' 'مستشفى'
 'إسعافات أولية' 'جرح نازف' 'حروق' 'مخدر/ بنج' 'عملية جراحية'
 'شاش / ضمادة' 'شريط لاصق / بلاستر' 'صيدلية' 'تحليل دم' 'فحص سريري'
 'فحص النظر' 'ميزان حرارة' 'سماعة أذن' 'جهاز قياس الضغط' 'نبض القلب'
 'تحليل طبي' 'معمل التحاليل / مختبر' 'صورة اشعة' 'التهاب' 'تورم' 'زكام'
 'عدوى' 'صداع' 'ألم' 'حمى' 'إسهال' 'إمساك' 'مغص' 'مرض السكر / سكري'
 'أزمة قلبية' 'سرطان' 'مرض فقدان المناعة / الإيدز' 'تساقط الشعر'
 'سكتة قلبية' 'شلل ن

## Create a Label Mapping

This cell assigns a unique numeric label to each Arabic sign word.

`enumerate()`: Assigns a unique index (starting from 0) to each item in the words array

In [13]:
label_map = {label:num for num, label in enumerate(words)}
print(label_map)

{'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9, '10': 10, '20': 11, '30': 12, '40': 13, '50': 14, '60': 15, '70': 16, '80': 17, '90': 18, '100': 19, '200': 20, '300': 21, '400': 22, '500': 23, '600': 24, '700': 25, '800': 26, '900': 27, '1000': 28, '1000000': 29, '10000000': 30, 'ا': 31, 'ب': 32, 'ت': 33, 'ث': 34, 'ج': 35, 'ح': 36, 'خ': 37, 'د': 38, 'ذ': 39, 'ر': 40, 'ز': 41, 'س': 42, 'ش': 43, 'ص': 44, 'ض': 45, 'ط': 46, 'ظ': 47, 'ع': 48, 'غ': 49, 'ف': 50, 'ق': 51, 'ك': 52, 'ل': 53, 'م': 54, 'ن': 55, 'ه': 56, 'و': 57, 'ي': 58, 'ة': 59, 'أ': 60, 'ؤ': 61, 'ئ': 62, 'ئـ': 63, 'ء': 64, 'إ': 65, 'آ': 66, 'ى': 67, 'لا': 68, 'ال': 69, 'هيكل عظمي': 70, 'جمجة': 71, 'عمود فقري': 72, 'قفص صدري': 73, 'جهاز تنفسي': 74, 'قصبة هوائية': 75, 'رئتان': 76, 'شهيق - زفير': 77, 'جهاز هضمي': 78, 'وجه': 79, 'بلعوم': 80, 'كبد': 81, 'البنكرياس': 82, 'الأمعاء الدقيقة': 83, 'الأمعاء الغليظة': 84, 'الزائدة الدودية': 85, 'جهاز عصبي': 86, 'قلب': 87, 'حواس خمس': 88, 'عضلة': 89, 'أنسجة':

## Function for Processing Data into Fixed-Length Sequences for Model Training and Testing

This function prepares the data by extracting keypoints from multiple videos, ensuring uniform sequence lengths, and converting the data into arrays ready for use in machine learning models.

In [None]:
def preprocess_data(data_path,signers,split,f_avg):
    """"
    This function loads the keypoints arrays for each video sequence of each word performed by the given signers, and extracts 
    a subsequence of length 'f_avg' from each sequence. Then it converts the sequences and labels to numpy arrays and returns 
    them as X and y.
    
    Args:
        data_path: Path to the directory containing keypoint data.
        signers(list): the signers of interest.
        split(str): can be 'train' or 'test'.
        f_avg(int): threshold for frame sampling(number of frames to sample per sequence).
    
    Returns:
        X(numpy.ndarray): array of sequences
        y(numpy.ndarray): array of one-hot encoded sign labels
    """

    # Initialize the lists of sequences and labels
    sequences, labels = [], [] #labels: A list to store the corresponding labels for the sequences.

    # Iterate through the list of words
    for word in tqdm(words): #Loops through all words, with a progress bar provided by the tqdm library.
        for signer in signers:
            # Iterate through the numpy arrays contained in the directory mentioned below
            for sequence in os.listdir(os.path.join(data_path,str(signer), split, str(w2id[word] ).zfill(4), 'lh_keypoints')):
                 # Load the left hand array .npy
                res_lh = np.load(os.path.join(data_path,str(signer), split, str(w2id[word] ).zfill(4), 'lh_keypoints', sequence))

                # Determine how many rows to select
                num_frames = min(res_lh.shape[0], f_avg) #Ensures the number of frames doesn't exceed f_avg
                #res_lh.shape[0]: The number of frames in the keypoint sequence
                
                res_lh = res_lh[:num_frames,:] #Truncates the sequence to the desired length
               
                while num_frames < f_avg:
                    res_lh = np.concatenate((res_lh, np.expand_dims(res_lh[-1,:], axis=0)), axis=0) #pad by duplicating the last frame until it reaches f_avg
                    num_frames += 1

                # Load the right hand array
                res_rh = np.load(os.path.join(data_path,str(signer), split, str(w2id[word]).zfill(4), 'rh_keypoints', sequence))

                # Determine how many rows to select
                num_frames = min(res_rh.shape[0], f_avg)
                res_rh = res_rh[:num_frames,:]
                while num_frames < f_avg:
                    res_rh = np.concatenate((res_rh, np.expand_dims(res_rh[-1,:], axis=0)), axis=0)
                    num_frames += 1

                # Load the pose array
                res_pose = np.load(os.path.join(data_path,str(signer), split, str(w2id[word]).zfill(4), 'pose_keypoints', sequence))

                # Determine how many rows to select
                num_frames = min(res_pose.shape[0], f_avg)
                res_pose = res_pose[:num_frames,:]
                while num_frames < f_avg:
                    res_pose = np.concatenate((res_pose, np.expand_dims(res_pose[-1,:], axis=0)), axis=0)
                    num_frames += 1

                # Append the subsequence to the list of sequences
                sequences.append(np.concatenate((res_pose,res_lh, res_rh), axis=1)) #axis=1 means the arrays will be concatenated horizontally (along columns)
                # Append the label to the list of labels
                labels.append(label_map[word])
    # Convert the lists of sequences and labels to numpy arrays
    X = np.array(sequences)
    y = to_categorical(labels).astype(int) #Converts labels into one-hot encoded format using a utility function(likely from keras.utils)

    return X, y

# new

In [16]:
def preprocess_data(data_path,signers,split,f_avg):
    """"
    This function loads the keypoints arrays for each video sequence of each word performed by the given signers, and extracts 
    a subsequence of length 'f_avg' from each sequence. Then it converts the sequences and labels to numpy arrays and returns 
    them as X and y.
    
    Args:
        data_path: Path to the directory containing keypoint data.
        signers(list): the signers of interest.
        split(str): can be 'train' or 'test'.
        f_avg(int): threshold for frame sampling(number of frames to sample per sequence).
    
    Returns:
        X(numpy.ndarray): array of sequences
        y(numpy.ndarray): array of one-hot encoded sign labels
    """

    # Initialize the lists of sequences and labels
    sequences, labels = [], [] #labels: A list to store the corresponding labels for the sequences.

    # Iterate through the list of words
    for word in tqdm(words):
        if word not in w2id:
            print(f"Warning: '{word}' not found in w2id")
            continue
        for signer in signers:
            # Iterate through the numpy arrays contained in the directory mentioned below
            for sequence in os.listdir(os.path.join(data_path,str(signer), split, str(w2id[word] ).zfill(4), 'lh_keypoints')):
                 # Load the left hand array .npy
                res_lh = np.load(os.path.join(data_path,str(signer), split, str(w2id[word] ).zfill(4), 'lh_keypoints', sequence))

                # Determine how many rows to select
                num_frames = min(res_lh.shape[0], f_avg) #Ensures the number of frames doesn't exceed f_avg
                #res_lh.shape[0]: The number of frames in the keypoint sequence
                
                res_lh = res_lh[:num_frames,:] #Truncates the sequence to the desired length
               
                while num_frames < f_avg:
                    res_lh = np.concatenate((res_lh, np.expand_dims(res_lh[-1,:], axis=0)), axis=0) #pad by duplicating the last frame until it reaches f_avg
                    num_frames += 1

                # Load the right hand array
                res_rh = np.load(os.path.join(data_path,str(signer), split, str(w2id[word]).zfill(4), 'rh_keypoints', sequence))

                # Determine how many rows to select
                num_frames = min(res_rh.shape[0], f_avg)
                res_rh = res_rh[:num_frames,:]
                while num_frames < f_avg:
                    res_rh = np.concatenate((res_rh, np.expand_dims(res_rh[-1,:], axis=0)), axis=0)
                    num_frames += 1

                # Load the pose array
                res_pose = np.load(os.path.join(data_path,str(signer), split, str(w2id[word]).zfill(4), 'pose_keypoints', sequence))

                # Determine how many rows to select
                num_frames = min(res_pose.shape[0], f_avg)
                res_pose = res_pose[:num_frames,:]
                while num_frames < f_avg:
                    res_pose = np.concatenate((res_pose, np.expand_dims(res_pose[-1,:], axis=0)), axis=0)
                    num_frames += 1

                # Append the subsequence to the list of sequences
                sequences.append(np.concatenate((res_pose,res_lh, res_rh), axis=1)) #axis=1 means the arrays will be concatenated horizontally (along columns)
                # Append the label to the list of labels
                labels.append(label_map[word])
    # Convert the lists of sequences and labels to numpy arrays
    X = np.array(sequences)
    y = to_categorical(labels).astype(int) #Converts labels into one-hot encoded format using a utility function(likely from keras.utils)

    return X, y

## Splitting Training and Validation Data

In [17]:
#train and validation splits
data_path = 'working/npy_arrays'
X_train,y_train=preprocess_data(data_path,['01','02'],'train',48)
# X_train,y_train=preprocess_data(data_path,['01','02'],'train',48)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
print(X_train.shape)
print(y_train.shape)
print(X_val.shape)
print(y_val.shape)

  0%|                                                                                          | 0/502 [00:00<?, ?it/s]




00%|████████████████████████████████████████████████████████████████████████████████| 502/502 [38:23<00:00,  4.59s/it]

(31788, 48, 225)
(31788, 502)
(7948, 48, 225)
(7948, 502)


In [18]:
#test split
# X_test,y_test=preprocess_data(data_path,['01','02'],'test',48)
X_test,y_test=preprocess_data(data_path,['01','02',],'test',48)
print(X_test.shape)
print(y_test.shape)

  0%|                                                                                          | 0/502 [00:00<?, ?it/s]




00%|████████████████████████████████████████████████████████████████████████████████| 502/502 [06:54<00:00,  1.21it/s]

(7536, 48, 225)
(7536, 502)


In [19]:
# Define the Bidirectional LSTM model with Attention

    
model = tf.keras.Sequential([
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(len(words), activation='softmax')
])

# Compile the model

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

# Set up early stopping
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',  # Metric to monitor for early stopping
    mode='min',  # Set mode to 'min' for minimizing the metric
    patience=5,  # Number of epochs with no improvement before stopping
    restore_best_weights=True,  # Restore the best model weights
    verbose=1
)

In [20]:
model_training_history = model.fit(X_train, y_train, batch_size=32, validation_data=(X_val,y_val), validation_batch_size=32, epochs=50, callbacks=[early_stopping])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Restoring model weights from the end of the best epoch: 37.
Epoch 42: early stopping


In [21]:
# Evaluate the model on train data
model_evaluation_history = model.evaluate(X_train, y_train)



In [22]:
# Evaluate the model on test data
model_evaluation_history = model.evaluate(X_test, y_test)
#Evaluate signers ****



In [23]:
# model.save('WLASL (World Level American Sign Language)/Saved Model/LSTM_Model.h5')  # Saves the model in HDF5 format
# Or
model.save('working/LSTM_Model_1.h5')  # Saves in TensorFlow SavedModel format



INFO:tensorflow:Assets written to: working/LSTM_Model_1\assets


INFO:tensorflow:Assets written to: working/LSTM_Model_1\assets


In [38]:
from IPython.display import FileLinks

# Create a download link
FileLinks("working/label_map.json")

In [40]:
import json

# Your existing label_map
label_map = {label: num for num, label in enumerate(words)}

# Save to a JSON file
with open("working/label_map.json", "w") as file:
    json.dump(label_map, file, indent=4)

print("label_map saved to 'label_map.json'")


label_map saved to 'label_map.json'


In [None]:
def plot_metric(model_training_history, metric_name_1, metric_name_2, plot_name):
    '''
    This function will plot the metrics passed to it in a graph.
    Args:
        model_training_history: A history object containing a record of training and validation 
                                loss values and metrics values at successive epochs
        metric_name_1:          The name of the first metric that needs to be plotted in the graph.
        metric_name_2:          The name of the second metric that needs to be plotted in the graph.
        plot_name:              The title of the graph.
    '''
    
    # Get metric values using metric names as identifiers.
    metric_value_1 = model_training_history.history[metric_name_1]
    metric_value_2 = model_training_history.history[metric_name_2]
    
    # Construct a range object which will be used as x-axis (horizontal plane) of the graph.
    epochs = range(len(metric_value_1))

    # Plot the Graph.
    plt.plot(epochs, metric_value_1, 'blue', label = metric_name_1)
    plt.plot(epochs, metric_value_2, 'red', label = metric_name_2)

    # Add title to the plot.
    plt.title(str(plot_name))

    # Add legend to the plot.
    plt.legend()

In [None]:
# Visualize the training and validation loss metrices.
plot_metric(model_training_history, 'loss', 'val_loss', 'Total Loss vs Total Validation Loss')

In [None]:
#Predicted sign
res = model.predict(X_test)
words[np.argmax(res[1])]

In [None]:
#Real sign
words[np.argmax(y_test[1])]

In [None]:
# Get the loss and accuracy from model_evaluation_history.
model_evaluation_loss, model_evaluation_accuracy = model_evaluation_history

# Define the string date format.
# Get the current Date and Time in a DateTime Object.
# Convert the DateTime object to string according to the style mentioned in date_time_format string.
date_time_format = '%Y_%m_%d__%H_%M_%S'
current_date_time_dt = dt.datetime.now()
current_date_time_string = dt.datetime.strftime(current_date_time_dt, date_time_format)

# Define a useful name for our model to make it easy for us while navigating through multiple saved models.
model_file_name = f'Kaleem_model_2_signers___Date_Time_{current_date_time_string}___Loss_{model_evaluation_loss}___Accuracy_{model_evaluation_accuracy}.h5'

# Save your Model.
model.save(model_file_name)

In [None]:
yhat = model.predict(X_test)

In [None]:
def get_key_by_value(dictionary, value):
    for key, val in dictionary.items():
        if val == value:
            return key
    return None

In [None]:
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

In [None]:
y = []
for v in ytrue:
    y.append(get_key_by_value(label_map, v))
print(y)

In [None]:
y = [karsl_6[karsl_6['Sign-Arabic'] == v]['Sign-English'].values[0] for v in y]
print(y)

In [None]:
ypred = []
for v in yhat:
    ypred.append(get_key_by_value(label_map, v))
print(ypred)

In [None]:
ypred = [karsl_6[karsl_6['Sign-Arabic'] == v]['Sign-English'].values[0] for v in ypred]
print(ypred)

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming y and ypred are your target labels and predicted labels, respectively

# Select the first 20 classes
y_subset = y[:200]
ypred_subset = ypred[:200]

# Get unique class labels
class_labels = np.unique(y_subset)

# Compute confusion matrix
cm = confusion_matrix(y_subset, ypred_subset, labels=class_labels)

# Create a DataFrame from the confusion matrix
df_cm = pd.DataFrame(cm, index=class_labels, columns=class_labels)
df_cm.index.name = 'Actual'
df_cm.columns.name = 'Predicted'

# Plot the confusion matrix
plt.figure(figsize=(10, 8))
sns.set(font_scale=1.3)  # for label size
sns.heatmap(df_cm, cmap="Blues", annot=True, fmt="d", annot_kws={"size": 12})
plt.title("Confusion Matrix - First 20 Classes")
plt.show()