In [47]:
import os
import json
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
import mediapipe as mp
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import cv2
from tqdm import tqdm
import pickle

In [24]:
class WLASLDataLoader:
    def __init__(self, data_path):
        """
        Initialize the WLASL data loader.
        
        Args:
            data_path (str): Path to the root directory of the WLASL dataset
        """
        self.data_path = data_path
        self.videos_path = os.path.join(data_path, 'videos')
        self.json_path = os.path.join(data_path, 'WLASL_v0.3.json')
        self.class_list_path = os.path.join(data_path, 'wlasl_class_list.txt')
        
    def load_class_list(self):
        """
        Load and parse the class list file with improved error handling.
        Returns a dictionary mapping class names to indices.
        """
        class_map = {}
        print("Loading class list from:", self.class_list_path)
        
        try:
            with open(self.class_list_path, 'r', encoding='utf-8') as f:
                lines = f.readlines()
                
            for line in lines:
                # Skip empty lines
                if not line.strip():
                    continue
                    
                # Try different parsing approaches
                try:
                    # First attempt: split by space
                    parts = line.strip().split()
                    if len(parts) >= 2:
                        idx = int(parts[0])
                        # Join remaining parts as class name in case class name contains spaces
                        class_name = ' '.join(parts[1:])
                        class_map[class_name] = idx
                    else:
                        print(f"Warning: Skipping invalid line: {line.strip()}")
                except ValueError as e:
                    print(f"Warning: Could not parse line: {line.strip()}, Error: {str(e)}")
                    
            if not class_map:
                raise ValueError("No valid classes found in the class list file")
                
            print(f"Successfully loaded {len(class_map)} classes")
            return class_map
            
        except FileNotFoundError:
            print(f"Error: Class list file not found at {self.class_list_path}")
            raise
        except Exception as e:
            print(f"Error loading class list: {str(e)}")
            raise
            
    def load_metadata(self):
        """
        Load and parse the WLASL JSON file with error handling.
        """
        try:
            print("Loading metadata from:", self.json_path)
            with open(self.json_path, 'r', encoding='utf-8') as f:
                metadata = json.load(f)
            print(f"Successfully loaded metadata with {len(metadata)} entries")
            return metadata
        except FileNotFoundError:
            print(f"Error: Metadata file not found at {self.json_path}")
            raise
        except json.JSONDecodeError as e:
            print(f"Error: Invalid JSON in metadata file: {str(e)}")
            raise
        except Exception as e:
            print(f"Error loading metadata: {str(e)}")
            raise
    
    def analyze_dataset(self):
        """
        Analyze the dataset and print statistics with improved error handling.
        """
        try:
            # Load metadata and class map
            metadata = self.load_metadata()
            class_map = self.load_class_list()
            
            print("\nDataset Analysis:")
            print(f"Total number of classes in class list: {len(class_map)}")
            
            # Count videos per class
            videos_per_class = {}
            total_videos = 0
            valid_videos = 0
            missing_videos = []
            
            # Create progress bar for metadata processing
            for entry in tqdm(metadata, desc="Analyzing dataset"):
                gloss = entry['gloss']
                if gloss in class_map:
                    instances = entry['instances']
                    num_instances = len(instances)
                    videos_per_class[gloss] = {
                        'total': num_instances,
                        'available': 0
                    }
                    total_videos += num_instances
                    
                    # Check video existence
                    for instance in instances:
                        video_id = instance['video_id']
                        video_path = os.path.join(self.videos_path, f'{video_id}.mp4')
                        
                        if os.path.exists(video_path):
                            videos_per_class[gloss]['available'] += 1
                            valid_videos += 1
                        else:
                            missing_videos.append((gloss, video_id))
            
            # Print detailed analysis
            print(f"\nDetailed Analysis:")
            print(f"Total videos in metadata: {total_videos}")
            print(f"Actually available videos: {valid_videos}")
            print(f"Missing videos: {total_videos - valid_videos}")
            
            if total_videos > 0:
                print(f"Video availability rate: {(valid_videos/total_videos)*100:.2f}%")
            
            if videos_per_class:
                # Find classes with most and least videos
                max_class = max(videos_per_class.items(), 
                              key=lambda x: x[1]['available'])
                min_class = min(videos_per_class.items(), 
                              key=lambda x: x[1]['available'])
                
                print(f"\nClass Distribution:")
                print(f"Largest class: {max_class[0]} "
                      f"({max_class[1]['available']}/{max_class[1]['total']} videos)")
                print(f"Smallest class: {min_class[0]} "
                      f"({min_class[1]['available']}/{min_class[1]['total']} videos)")
                
                # Calculate average videos per class
                avg_videos = valid_videos / len(videos_per_class)
                print(f"Average available videos per class: {avg_videos:.2f}")
            
            # Print sample of missing videos if any
            if missing_videos:
                print(f"\nSample of missing videos (showing first 5):")
                for gloss, video_id in missing_videos[:5]:
                    print(f"- Class: {gloss}, Video ID: {video_id}")
            
            return videos_per_class
            
        except Exception as e:
            print(f"Error during dataset analysis: {str(e)}")
            raise

In [18]:
class DataPreprocessor:
    def __init__(self, data_path):
        self.data_loader = WLASLDataLoader(data_path)
        self.mp_holistic = mp.solutions.holistic
        self.metadata = self.data_loader.load_metadata()
        self.class_map = self.data_loader.load_class_list()
        
    def extract_landmarks(self, frame, holistic):
        """Extract pose, face, and hand landmarks from a frame."""
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = holistic.process(image)
        
        # Initialize empty arrays for landmarks
        pose = np.zeros(33 * 3)  # 33 pose landmarks
        left_hand = np.zeros(21 * 3)  # 21 hand landmarks
        right_hand = np.zeros(21 * 3)
        
        # Extract pose landmarks
        if results.pose_landmarks:
            for idx, landmark in enumerate(results.pose_landmarks.landmark):
                pose[idx*3:(idx+1)*3] = [landmark.x, landmark.y, landmark.z]
                
        # Extract hand landmarks
        if results.left_hand_landmarks:
            for idx, landmark in enumerate(results.left_hand_landmarks.landmark):
                left_hand[idx*3:(idx+1)*3] = [landmark.x, landmark.y, landmark.z]
                
        if results.right_hand_landmarks:
            for idx, landmark in enumerate(results.right_hand_landmarks.landmark):
                right_hand[idx*3:(idx+1)*3] = [landmark.x, landmark.y, landmark.z]
                
        return np.concatenate([pose, left_hand, right_hand])
    
    def process_video(self, video_path, max_frames=30):
        """Process a video file and extract landmarks."""
        landmarks_sequence = []
        
        cap = cv2.VideoCapture(video_path)
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        # Calculate frame sampling rate
        sample_every = max(1, frame_count // max_frames)
        
        with self.mp_holistic.Holistic(
            min_detection_confidence=0.5,
            min_tracking_confidence=0.5) as holistic:
            
            frame_idx = 0
            while cap.isOpened() and len(landmarks_sequence) < max_frames:
                ret, frame = cap.read()
                if not ret:
                    break
                    
                if frame_idx % sample_every == 0:
                    landmarks = self.extract_landmarks(frame, holistic)
                    landmarks_sequence.append(landmarks)
                    
                frame_idx += 1
                
        cap.release()
        
        # Pad sequence if necessary
        while len(landmarks_sequence) < max_frames:
            landmarks_sequence.append(np.zeros_like(landmarks_sequence[0]))
            
        return np.array(landmarks_sequence)
    
    def prepare_dataset(self, split=0.2):
        """Prepare the dataset for training using JSON metadata."""
        X = []
        y = []
        
        # Process videos using metadata
        for entry in tqdm(self.metadata, desc="Processing classes"):
            gloss = entry['gloss']
            if gloss not in self.class_map:
                continue
                
            class_idx = self.class_map[gloss]
            
            for instance in tqdm(entry['instances'], 
                               desc=f"Processing videos for {gloss}",
                               leave=False):
                video_id = instance['video_id']
                video_path = os.path.join(self.data_loader.videos_path, f'{video_id}.mp4')
                
                if not os.path.exists(video_path):
                    continue
                    
                try:
                    sequence = self.process_video(video_path)
                    X.append(sequence)
                    y.append(class_idx)
                except Exception as e:
                    print(f"Error processing {video_path}: {str(e)}")
        
        X = np.array(X)
        y = np.array(y)
        
        # Split dataset
        X_train, X_val, y_train, y_val = train_test_split(
            X, y, test_size=split, random_state=42)
        
        return X_train, X_val, y_train, y_val

In [7]:
class SignLanguageModel:
    def __init__(self, num_classes, sequence_length, feature_dim):
        self.num_classes = num_classes
        self.sequence_length = sequence_length
        self.feature_dim = feature_dim
        
    def build_model(self):
        """Build the hybrid CNN-LSTM model."""
        # Input layer
        input_layer = layers.Input(shape=(self.sequence_length, self.feature_dim))
        
        # 1D CNN layers
        x = layers.Conv1D(64, 3, padding='same', activation='relu')(input_layer)
        x = layers.BatchNormalization()(x)
        x = layers.MaxPooling1D(2)(x)
        
        x = layers.Conv1D(128, 3, padding='same', activation='relu')(x)
        x = layers.BatchNormalization()(x)
        x = layers.MaxPooling1D(2)(x)
        
        # LSTM layers
        x = layers.LSTM(256, return_sequences=True)(x)
        x = layers.Dropout(0.3)(x)
        x = layers.LSTM(128)(x)
        x = layers.Dropout(0.3)(x)
        
        # Dense layers
        x = layers.Dense(128, activation='relu')(x)
        x = layers.BatchNormalization()(x)
        x = layers.Dropout(0.3)(x)
        
        # Output layer
        output_layer = layers.Dense(self.num_classes, activation='softmax')(x)
        
        # Create model
        model = models.Model(inputs=input_layer, outputs=output_layer)
        return model
    
    def compile_model(self, model):
        """Compile the model with appropriate optimizer and loss function."""
        optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
        model.compile(
            optimizer=optimizer,
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy']
        )
        return model
    
    def train_model(self, model, X_train, y_train, X_val, y_val, 
                    epochs=50, batch_size=32):
        """Train the model with early stopping."""
        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True
        )
        
        # Add TQDM progress bar
        class TQDMCallback(tf.keras.callbacks.Callback):
            def on_epoch_begin(self, epoch, logs=None):
                print(f'\nEpoch {epoch+1}/{epochs}')
                self.tqdm = tqdm(total=len(X_train)//batch_size, 
                               desc='Training', leave=False)

            def on_batch_end(self, batch, logs=None):
                self.tqdm.update(1)

            def on_epoch_end(self, epoch, logs=None):
                self.tqdm.close()
                
        history = model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=epochs,
            batch_size=batch_size,
            callbacks=[early_stopping, TQDMCallback()],
            verbose=0
        )
        
        return history

In [26]:
DATA_PATH = "./WLASL/"  # Update with your dataset path

try:
    # Initialize data loader
    data_loader = WLASLDataLoader(DATA_PATH)
    # Analyze dataset
    print("Starting dataset analysis...")
    videos_per_class = data_loader.analyze_dataset()
    print("\nAnalysis completed successfully!")

except Exception as e:
    print(f"Error in main execution: {str(e)}")

Starting dataset analysis...
Loading metadata from: ./WLASL/WLASL_v0.3.json
Successfully loaded metadata with 2000 entries
Loading class list from: ./WLASL/wlasl_class_list.txt
Successfully loaded 2000 classes

Dataset Analysis:
Total number of classes in class list: 2000


Analyzing dataset: 100%|██████████████████████████████████████████████████████████| 2000/2000 [00:04<00:00, 480.77it/s]


Detailed Analysis:
Total videos in metadata: 21067
Actually available videos: 11971
Missing videos: 9096
Video availability rate: 56.82%

Class Distribution:
Largest class: before (16/26 videos)
Smallest class: gloves (2/10 videos)
Average available videos per class: 5.99

Sample of missing videos (showing first 5):
- Class: book, Video ID: 65225
- Class: book, Video ID: 68011
- Class: book, Video ID: 68208
- Class: book, Video ID: 68012
- Class: book, Video ID: 70212

Analysis completed successfully!





In [28]:
# Initialize preprocessor and prepare dataset
print("\nPreparing dataset...")
preprocessor = DataPreprocessor(DATA_PATH)
X_train, X_val, y_train, y_val = preprocessor.prepare_dataset()


Preparing dataset...
Loading metadata from: ./WLASL/WLASL_v0.3.json
Successfully loaded metadata with 2000 entries
Loading class list from: ./WLASL/wlasl_class_list.txt
Successfully loaded 2000 classes


Processing classes:   0%|                                                                     | 0/2000 [00:00<?, ?it/s]

Processing videos for book:   2%|█▍                                                     | 1/40 [00:04<02:43,  4.19s/it][A
Processing videos for book:  28%|██████████████▊                                       | 11/40 [00:08<00:18,  1.54it/s][A
Processing videos for book:  45%|████████████████████████▎                             | 18/40 [00:11<00:13,  1.68it/s][A
Processing videos for book:  57%|███████████████████████████████                       | 23/40 [00:15<00:11,  1.54it/s][A
Processing videos for book:  62%|█████████████████████████████████▊                    | 25/40 [00:19<00:13,  1.15it/s][A
Processing videos for book:  75%|████████████████████████████████████████▌             | 30/40 [00:23<00:08,  1.19it/s][A
Processing classes:   0%|                                                          | 1/2000 [00:23<13:13:37, 23.82s/it][A
Processing videos 

In [31]:
# Get model parameters
num_classes = len(preprocessor.class_map)
sequence_length = X_train.shape[1]
feature_dim = X_train.shape[2]

In [33]:
# Initialize and build model
print("Building model...")
sign_language_model = SignLanguageModel(num_classes, sequence_length, feature_dim)
model = sign_language_model.build_model()
model = sign_language_model.compile_model(model)

Building model...


In [35]:
# Train model
print("Training model...")
history = sign_language_model.train_model(model, X_train, y_train, X_val, y_val)

Training model...

Epoch 1/50


                                                                                                                       


Epoch 2/50


                                                                                                                       


Epoch 3/50


                                                                                                                       


Epoch 4/50


                                                                                                                       


Epoch 5/50


                                                                                                                       


Epoch 6/50


                                                                                                                       


Epoch 7/50


                                                                                                                       


Epoch 8/50


                                                                                                                       


Epoch 9/50


                                                                                                                       


Epoch 10/50


                                                                                                                       


Epoch 11/50


                                                                                                                       


Epoch 12/50


                                                                                                                       


Epoch 13/50


                                                                                                                       


Epoch 14/50


                                                                                                                       


Epoch 15/50


                                                                                                                       


Epoch 16/50


                                                                                                                       


Epoch 17/50


                                                                                                                       


Epoch 18/50


                                                                                                                       


Epoch 19/50


                                                                                                                       


Epoch 20/50


                                                                                                                       


Epoch 21/50


                                                                                                                       


Epoch 22/50


                                                                                                                       


Epoch 23/50


                                                                                                                       


Epoch 24/50


                                                                                                                       


Epoch 25/50


                                                                                                                       


Epoch 26/50


                                                                                                                       


Epoch 27/50


                                                                                                                       


Epoch 28/50


                                                                                                                       


Epoch 29/50


                                                                                                                       


Epoch 30/50


                                                                                                                       

In [58]:
test_loss, test_accuracy = model.evaluate(X_val, y_val)
print(f"Test accuracy: {test_accuracy:.4f}")

Test accuracy: 0.0547


In [39]:
# Save model and preprocessing parameters
print("Saving model and parameters...")
model.save('sign_language_model.h5')

Saving model and parameters...


  saving_api.save_model(


In [64]:
# Get model predictions for validation set
y_pred = np.argmax(model.predict(X_val), axis=1)

# If your y_val is one-hot encoded, convert it to class indices
if len(y_val.shape) > 1:  # Check if one-hot encoded
    y_val_classes = np.argmax(y_val, axis=1)
else:
    y_val_classes = y_val

# Calculate confusion matrix
conf_matrix = confusion_matrix(y_val_classes, y_pred)

# Initialize metrics dictionary
metrics = {}

try:
    class_names = list(sign_language_model.preprocessor.class_map.keys())
except:
    class_names = [str(i) for i in range(len(np.unique(y_val_classes)))]



In [66]:
# Calculate metrics for each class
for i, class_name in enumerate(class_names):
    # True Positives: diagonal elements
    tp = conf_matrix[i, i]
    
    # False Positives: sum of column i (excluding diagonal)
    fp = np.sum(conf_matrix[:, i]) - tp
    
    # False Negatives: sum of row i (excluding diagonal)
    fn = np.sum(conf_matrix[i, :]) - tp
    
    # True Negatives: sum of all elements excluding row i and column i
    tn = np.sum(conf_matrix) - (tp + fp + fn)
    
    # Calculate additional metrics
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    
    metrics[class_name] = {
        'true_positive': int(tp),
        'false_positive': int(fp),
        'true_negative': int(tn),
        'false_negative': int(fn),
        'precision': float(precision),
        'recall': float(recall),
        'f1_score': float(f1)
    }

In [68]:
# Calculate overall metrics
overall_metrics = {
    'accuracy': float(np.mean(y_pred == y_val_classes)),
    'macro_precision': float(np.mean([m['precision'] for m in metrics.values()])),
    'macro_recall': float(np.mean([m['recall'] for m in metrics.values()])),
    'macro_f1': float(np.mean([m['f1_score'] for m in metrics.values()]))
}

In [70]:
# Create comprehensive report
report = {
    'overall_metrics': overall_metrics,
    'class_metrics': metrics
}

# Print training history metrics if available
if history:
    print("Training History Metrics:")
    print("=" * 50)
    print(f"Final Validation Accuracy: {history.history['val_accuracy'][-1]:.4f}")
    print(f"Final Training Accuracy: {history.history['accuracy'][-1]:.4f}")
    print(f"Final Validation Loss: {history.history['val_loss'][-1]:.4f}")
    print(f"Final Training Loss: {history.history['loss'][-1]:.4f}\n")

# Print overall metrics
print("\nModel Evaluation Report")
print("=" * 50)
print("\nOverall Metrics:")
print("-" * 20)
for metric, value in report['overall_metrics'].items():
    print(f"{metric.replace('_', ' ').title()}: {value:.4f}")

# Print per-class metrics
print("\nPer-Class Metrics:")
print("-" * 20)

# Calculate maximum class name length for formatting
max_name_length = max(len(str(name)) for name in report['class_metrics'].keys())

# Print header
header = (f"{'Class':<{max_name_length}} | {'TP':>5} | {'FP':>5} | "
         f"{'TN':>5} | {'FN':>5} | {'Precision':>9} | {'Recall':>9} | {'F1':>9}")
print(header)
print("-" * len(header))

# Print metrics for each class
for class_name, metrics in report['class_metrics'].items():
    print(
        f"{str(class_name):<{max_name_length}} | "
        f"{metrics['true_positive']:5d} | "
        f"{metrics['false_positive']:5d} | "
        f"{metrics['true_negative']:5d} | "
        f"{metrics['false_negative']:5d} | "
        f"{metrics['precision']:9.4f} | "
        f"{metrics['recall']:9.4f} | "
        f"{metrics['f1_score']:9.4f}"
    )

Training History Metrics:
Final Validation Accuracy: 0.0564
Final Training Accuracy: 0.3148
Final Validation Loss: 6.2727
Final Training Loss: 2.7227


Model Evaluation Report

Overall Metrics:
--------------------
Accuracy: 0.0547
Macro Precision: 0.0376
Macro Recall: 0.0519
Macro F1: 0.0376

Per-Class Metrics:
--------------------
Class |    TP |    FP |    TN |    FN | Precision |    Recall |        F1
-------------------------------------------------------------------------
0    |     0 |     0 |  2393 |     2 |    0.0000 |    0.0000 |    0.0000
1    |     1 |    10 |  2380 |     4 |    0.0909 |    0.2000 |    0.1250
2    |     0 |     0 |  2392 |     3 |    0.0000 |    0.0000 |    0.0000
3    |     0 |     0 |  2388 |     7 |    0.0000 |    0.0000 |    0.0000
4    |     0 |     4 |  2389 |     2 |    0.0000 |    0.0000 |    0.0000
5    |     0 |     0 |  2394 |     1 |    0.0000 |    0.0000 |    0.0000
6    |     0 |     4 |  2387 |     4 |    0.0000 |    0.0000 |    0.0000
7    |