# Sign Language Recognition - Trial 1: Classical ML
This notebook implements static gesture recognition using classical ML approaches.

In [5]:
import os
import cv2
import numpy as np
import mediapipe as mp
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report
from tqdm.notebook import tqdm

## 1. Data Processing with MediaPipe

In [12]:
class HandProcessor:
    def __init__(self):
        # Initialize MediaPipe
        self.mp_hands = mp.solutions.hands
        self.hands = self.mp_hands.Hands(
            static_image_mode=True,
            max_num_hands=1,
            min_detection_confidence=0.3
        )
        
        # Label mapping
        self.label_mapping = {
            '0': 'A', '1': 'B', '2': 'C', '3': 'D', '4': 'E',
            '5': 'F', '6': 'G', '7': 'H', '8': 'I',
            '10': 'K', '11': 'L', '12': 'M', '13': 'N', '14': 'O',
            '15': 'P', '16': 'Q', '17': 'R', '18': 'S', '19': 'T',
            '20': 'U', '21': 'V', '22': 'W', '23': 'X', '24': 'Y'
        }
    
    def extract_features(self, image):
        """Extract features from a single image."""
        # Convert to RGB
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        # Process image
        results = self.hands.process(image_rgb)
        
        if not results.multi_hand_landmarks:
            return None
        
        # Get landmarks
        landmarks = results.multi_hand_landmarks[0]
        
        # Extract features (x, y, z coordinates)
        features = []
        for landmark in landmarks.landmark:
            features.extend([landmark.x, landmark.y, landmark.z])
            
        return np.array(features)
    
    def process_dataset(self, data_dir):
        """Process dataset with flattened directory structure."""
        features = []
        labels = []
        
        # Process each gesture class
        for gesture_dir in sorted(self.label_mapping.keys()):
            gesture_path = os.path.join(data_dir, gesture_dir)
            
            if not os.path.exists(gesture_path):
                print(f"Skipping {self.label_mapping[gesture_dir]} (directory {gesture_dir} not found)")
                continue
            
            print(f"\nProcessing {self.label_mapping[gesture_dir]} (folder {gesture_dir})")
            
            # Get all jpg files directly in the gesture directory
            image_files = [f for f in os.listdir(gesture_path) 
                         if f.endswith('.jpg') and not f.startswith('.')]
            
            print(f"Found {len(image_files)} images")
            
            successful_extractions = 0
            failed_extractions = 0
            
            # Process each image
            for image_file in tqdm(image_files, desc=f"Processing {self.label_mapping[gesture_dir]}"):
                image_path = os.path.join(gesture_path, image_file)
                
                try:
                    # Load and process image
                    image = cv2.imread(image_path)
                    
                    if image is None:
                        failed_extractions += 1
                        continue
                    
                    # Extract features
                    hand_features = self.extract_features(image)
                    if hand_features is not None:
                        features.append(hand_features)
                        labels.append(gesture_dir)
                        successful_extractions += 1
                    else:
                        failed_extractions += 1
                except Exception as e:
                    print(f"Error processing {image_file}: {str(e)}")
                    failed_extractions += 1
            
            print(f"\nGesture {self.label_mapping[gesture_dir]} statistics:")
            print(f"Successful extractions: {successful_extractions}")
            print(f"Failed extractions: {failed_extractions}")
        
        if len(features) == 0:
            raise ValueError("No valid samples were processed. Check the dataset structure and image files.")
        
        features = np.array(features)
        labels = np.array(labels)
        
        print("\nFinal dataset statistics:")
        print(f"Total samples: {len(features)}")
        print("\nSamples per class:")
        for label in sorted(np.unique(labels)):
            count = np.sum(labels == label)
            print(f"Class {label} ({self.label_mapping[label]}): {count} samples")
        
        return features, labels

# Initialize processor
processor = HandProcessor()

# Process dataset
print("Processing dataset...")
try:
    X, y = processor.process_dataset(DATA_DIR)
    print("\nDataset processed successfully!")
    print(f"Feature shape: {X.shape}")
    print(f"Number of classes: {len(np.unique(y))}")
    
    # Create train/test split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, stratify=y, random_state=42
    )
    
    print("\nSplit created:")
    print(f"Training samples: {X_train.shape[0]}")
    print(f"Testing samples: {X_test.shape[0]}")
    
except Exception as e:
    print(f"\nError processing dataset: {str(e)}")

Processing dataset...

Processing A (folder 0)
Found 100 images


I0000 00:00:1731354004.049122 1957621 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M2


Processing A:   0%|          | 0/100 [00:00<?, ?it/s]

W0000 00:00:1731354004.081980 1974013 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1731354004.097134 1974015 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1731354004.230481 1974008 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.



Gesture A statistics:
Successful extractions: 100
Failed extractions: 0

Processing B (folder 1)
Found 100 images


Processing B:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture B statistics:
Successful extractions: 100
Failed extractions: 0

Processing K (folder 10)
Found 100 images


Processing K:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture K statistics:
Successful extractions: 95
Failed extractions: 5

Processing L (folder 11)
Found 100 images


Processing L:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture L statistics:
Successful extractions: 100
Failed extractions: 0

Processing M (folder 12)
Found 100 images


Processing M:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture M statistics:
Successful extractions: 100
Failed extractions: 0

Processing N (folder 13)
Found 100 images


Processing N:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture N statistics:
Successful extractions: 100
Failed extractions: 0

Processing O (folder 14)
Found 100 images


Processing O:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture O statistics:
Successful extractions: 100
Failed extractions: 0

Processing P (folder 15)
Found 100 images


Processing P:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture P statistics:
Successful extractions: 100
Failed extractions: 0

Processing Q (folder 16)
Found 100 images


Processing Q:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture Q statistics:
Successful extractions: 100
Failed extractions: 0

Processing R (folder 17)
Found 100 images


Processing R:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture R statistics:
Successful extractions: 100
Failed extractions: 0

Processing S (folder 18)
Found 100 images


Processing S:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture S statistics:
Successful extractions: 100
Failed extractions: 0

Processing T (folder 19)
Found 100 images


Processing T:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture T statistics:
Successful extractions: 100
Failed extractions: 0

Processing C (folder 2)
Found 100 images


Processing C:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture C statistics:
Successful extractions: 100
Failed extractions: 0

Processing U (folder 20)
Found 100 images


Processing U:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture U statistics:
Successful extractions: 98
Failed extractions: 2

Processing V (folder 21)
Found 100 images


Processing V:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture V statistics:
Successful extractions: 100
Failed extractions: 0

Processing W (folder 22)
Found 100 images


Processing W:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture W statistics:
Successful extractions: 100
Failed extractions: 0

Processing X (folder 23)
Found 100 images


Processing X:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture X statistics:
Successful extractions: 100
Failed extractions: 0

Processing Y (folder 24)
Found 100 images


Processing Y:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture Y statistics:
Successful extractions: 100
Failed extractions: 0

Processing D (folder 3)
Found 100 images


Processing D:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture D statistics:
Successful extractions: 100
Failed extractions: 0

Processing E (folder 4)
Found 100 images


Processing E:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture E statistics:
Successful extractions: 100
Failed extractions: 0

Processing F (folder 5)
Found 100 images


Processing F:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture F statistics:
Successful extractions: 100
Failed extractions: 0

Processing G (folder 6)
Found 100 images


Processing G:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture G statistics:
Successful extractions: 100
Failed extractions: 0

Processing H (folder 7)
Found 100 images


Processing H:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture H statistics:
Successful extractions: 100
Failed extractions: 0

Processing I (folder 8)
Found 100 images


Processing I:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture I statistics:
Successful extractions: 100
Failed extractions: 0

Final dataset statistics:
Total samples: 2393

Samples per class:
Class 0 (A): 100 samples
Class 1 (B): 100 samples
Class 10 (K): 95 samples
Class 11 (L): 100 samples
Class 12 (M): 100 samples
Class 13 (N): 100 samples
Class 14 (O): 100 samples
Class 15 (P): 100 samples
Class 16 (Q): 100 samples
Class 17 (R): 100 samples
Class 18 (S): 100 samples
Class 19 (T): 100 samples
Class 2 (C): 100 samples
Class 20 (U): 98 samples
Class 21 (V): 100 samples
Class 22 (W): 100 samples
Class 23 (X): 100 samples
Class 24 (Y): 100 samples
Class 3 (D): 100 samples
Class 4 (E): 100 samples
Class 5 (F): 100 samples
Class 6 (G): 100 samples
Class 7 (H): 100 samples
Class 8 (I): 100 samples

Dataset processed successfully!
Feature shape: (2393, 63)
Number of classes: 24

Split created:
Training samples: 1914
Testing samples: 479


## 2. Load and Process Dataset

In [13]:
# Initialize processor
processor = HandProcessor()

# Set your dataset path
DATA_DIR = 'hands_dataset_cleaned'

# Process dataset
X, y = processor.process_dataset(DATA_DIR)

# Create train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)




Processing A (folder 0)
Found 100 images


I0000 00:00:1731391917.204010 1957621 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M2


Processing A:   0%|          | 0/100 [00:00<?, ?it/s]

W0000 00:00:1731391917.259132 2003487 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1731391917.273413 2003487 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.



Gesture A statistics:
Successful extractions: 100
Failed extractions: 0

Processing B (folder 1)
Found 100 images


Processing B:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture B statistics:
Successful extractions: 100
Failed extractions: 0

Processing K (folder 10)
Found 100 images


Processing K:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture K statistics:
Successful extractions: 95
Failed extractions: 5

Processing L (folder 11)
Found 100 images


Processing L:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture L statistics:
Successful extractions: 100
Failed extractions: 0

Processing M (folder 12)
Found 100 images


Processing M:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture M statistics:
Successful extractions: 100
Failed extractions: 0

Processing N (folder 13)
Found 100 images


Processing N:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture N statistics:
Successful extractions: 100
Failed extractions: 0

Processing O (folder 14)
Found 100 images


Processing O:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture O statistics:
Successful extractions: 100
Failed extractions: 0

Processing P (folder 15)
Found 100 images


Processing P:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture P statistics:
Successful extractions: 100
Failed extractions: 0

Processing Q (folder 16)
Found 100 images


Processing Q:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture Q statistics:
Successful extractions: 100
Failed extractions: 0

Processing R (folder 17)
Found 100 images


Processing R:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture R statistics:
Successful extractions: 100
Failed extractions: 0

Processing S (folder 18)
Found 100 images


Processing S:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture S statistics:
Successful extractions: 100
Failed extractions: 0

Processing T (folder 19)
Found 100 images


Processing T:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture T statistics:
Successful extractions: 100
Failed extractions: 0

Processing C (folder 2)
Found 100 images


Processing C:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture C statistics:
Successful extractions: 100
Failed extractions: 0

Processing U (folder 20)
Found 100 images


Processing U:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture U statistics:
Successful extractions: 98
Failed extractions: 2

Processing V (folder 21)
Found 100 images


Processing V:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture V statistics:
Successful extractions: 100
Failed extractions: 0

Processing W (folder 22)
Found 100 images


Processing W:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture W statistics:
Successful extractions: 100
Failed extractions: 0

Processing X (folder 23)
Found 100 images


Processing X:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture X statistics:
Successful extractions: 100
Failed extractions: 0

Processing Y (folder 24)
Found 100 images


Processing Y:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture Y statistics:
Successful extractions: 100
Failed extractions: 0

Processing D (folder 3)
Found 100 images


Processing D:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture D statistics:
Successful extractions: 100
Failed extractions: 0

Processing E (folder 4)
Found 100 images


Processing E:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture E statistics:
Successful extractions: 100
Failed extractions: 0

Processing F (folder 5)
Found 100 images


Processing F:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture F statistics:
Successful extractions: 100
Failed extractions: 0

Processing G (folder 6)
Found 100 images


Processing G:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture G statistics:
Successful extractions: 100
Failed extractions: 0

Processing H (folder 7)
Found 100 images


Processing H:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture H statistics:
Successful extractions: 100
Failed extractions: 0

Processing I (folder 8)
Found 100 images


Processing I:   0%|          | 0/100 [00:00<?, ?it/s]


Gesture I statistics:
Successful extractions: 100
Failed extractions: 0

Final dataset statistics:
Total samples: 2393

Samples per class:
Class 0 (A): 100 samples
Class 1 (B): 100 samples
Class 10 (K): 95 samples
Class 11 (L): 100 samples
Class 12 (M): 100 samples
Class 13 (N): 100 samples
Class 14 (O): 100 samples
Class 15 (P): 100 samples
Class 16 (Q): 100 samples
Class 17 (R): 100 samples
Class 18 (S): 100 samples
Class 19 (T): 100 samples
Class 2 (C): 100 samples
Class 20 (U): 98 samples
Class 21 (V): 100 samples
Class 22 (W): 100 samples
Class 23 (X): 100 samples
Class 24 (Y): 100 samples
Class 3 (D): 100 samples
Class 4 (E): 100 samples
Class 5 (F): 100 samples
Class 6 (G): 100 samples
Class 7 (H): 100 samples
Class 8 (I): 100 samples


## 3. Train and Evaluate Models

In [18]:
import os
import joblib
from datetime import datetime

def create_results_dir():
    """Create a timestamped directory for results."""
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    results_dir = f'results/trial1_{timestamp}'
    os.makedirs(results_dir, exist_ok=True)
    return results_dir

def train_and_evaluate(model, X_train, X_test, y_train, y_test, model_name, save_dir):
    """Train, evaluate, and save the model."""
    # Train
    print(f"Training {model_name}...")
    model.fit(X_train, y_train)
    
    # Predict
    y_pred = model.predict(X_test)
    
    # Print results
    print(f"\n{model_name} Results:")
    print(classification_report(y_test, y_pred))
    
    # Plot confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(15, 15))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(f'Confusion Matrix - {model_name}')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    
    # Save confusion matrix plot
    plt.savefig(os.path.join(save_dir, f'{model_name.lower()}_confusion_matrix.png'))
    plt.close()
    
    # Save model
    model_path = os.path.join(save_dir, f'{model_name.lower()}_model.joblib')
    joblib.dump(model, model_path)
    print(f"Model saved to: {model_path}")
    
    # Save additional info
    info = {
        'label_mapping': processor.label_mapping,
        'feature_shape': X_train.shape[1],
        'num_classes': len(np.unique(y_train))
    }
    info_path = os.path.join(save_dir, f'{model_name.lower()}_info.joblib')
    joblib.dump(info, info_path)
    
    return model

# Create results directory
results_dir = create_results_dir()
print(f"Saving results to: {results_dir}")

# Train Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model = train_and_evaluate(rf_model, X_train, X_test, y_train, y_test, 
                            "RandomForest", results_dir)

# Train SVM
svm_model = SVC(kernel='rbf', random_state=42)
svm_model = train_and_evaluate(svm_model, X_train, X_test, y_train, y_test, 
                             "SVM", results_dir)

# Save a summary of the results
with open(os.path.join(results_dir, 'training_summary.txt'), 'w') as f:
    f.write("Sign Language Recognition - Trial 1 Results\n")
    f.write("=======================================\n\n")
    f.write(f"Training date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
    f.write(f"Dataset size: {len(X_train) + len(X_test)} samples\n")
    f.write(f"Training samples: {len(X_train)}\n")
    f.write(f"Testing samples: {len(X_test)}\n")
    f.write(f"Number of features: {X_train.shape[1]}\n")
    f.write(f"Number of classes: {len(np.unique(y_train))}\n\n")
    
    # Class distribution
    f.write("Class distribution:\n")
    for label in sorted(processor.label_mapping.keys()):
        count = np.sum(y_train == label) + np.sum(y_test == label)
        f.write(f"Class {label} ({processor.label_mapping[label]}): {count} samples\n")

print(f"\nTraining complete! Results saved to: {results_dir}")

Saving results to: results/trial1_20241112_142813
Training RandomForest...

RandomForest Results:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        20
           1       1.00      1.00      1.00        20
          10       1.00      1.00      1.00        19
          11       1.00      1.00      1.00        20
          12       1.00      1.00      1.00        20
          13       1.00      1.00      1.00        20
          14       1.00      1.00      1.00        20
          15       1.00      1.00      1.00        20
          16       1.00      1.00      1.00        20
          17       1.00      1.00      1.00        20
          18       1.00      1.00      1.00        20
          19       1.00      1.00      1.00        20
           2       1.00      0.95      0.97        20
          20       1.00      1.00      1.00        20
          21       1.00      1.00      1.00        20
          22       1.00      1.00    