In [None]:
# Preprocess the Words

import os
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn import __version__ as sklearn_version
from packaging.version import parse

class WordPreprocessor:
    def __init__(self, dataset_dir, csv_path):
        self.dataset_dir = dataset_dir
        self.csv_path = csv_path
        # Check sklearn version to decide on the argument name
        if parse(sklearn_version) >= parse("1.2"):
            self.onehot_encoder = OneHotEncoder(sparse_output=False)
        else:
            self.onehot_encoder = OneHotEncoder(sparse=False)
        self.label_encoder = LabelEncoder()

    def preprocess_words(self):
        # Step 1: Read the CSV file
        data = pd.read_csv(self.csv_path)
        
        # Filter out words for videos not in the ./train/videos directory
        available_videos = set(os.listdir(os.path.join(self.dataset_dir, 'videos')))
        data = data[data['video_name'].isin(available_videos)]
        
        words = data['word'].values
        video_names = data['video_name'].values

        # Step 2: Encode words into numerical labels
        labels = self.label_encoder.fit_transform(words)
        
        # Store the mapping of class names to labels
        self.class_to_label_mapping = dict(zip(words, labels))
        print(self.class_to_label_mapping)
        # Step 3: One-hot encode the labels (optional)
        labels_reshaped = labels.reshape(len(labels), 1)
        onehot_encoded = self.onehot_encoder.fit_transform(labels_reshaped)

        return video_names, labels, onehot_encoded

    def decode_labels(self, labels):
        return self.label_encoder.inverse_transform(labels)

    def decode_onehot(self, onehot_encoded):
        labels = self.onehot_encoder.inverse_transform(onehot_encoded)
        return self.label_encoder.inverse_transform(labels.flatten())

if __name__ == "__main__":
    word_processor = WordPreprocessor('./train', './train/video_dataset.csv')
    video_names, labels, onehot_encoded = word_processor.preprocess_words()

    # Example usage of decode functions
    decoded_labels = word_processor.decode_labels(labels)
    decoded_onehot = word_processor.decode_onehot(onehot_encoded)
    print(video_names + decoded_labels)
    print("Decoded labels:", decoded_labels)
    print("Decoded onehot:", decoded_onehot)


In [None]:
# LSTM - Model Training

import numpy as np
import h5py
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

# Constants
FEATURE_DIM = 4032
SEQUENCE_LENGTH = 101

class SignLanguageModel:
    def __init__(self, feature_dir, labels):
        self.feature_dir = feature_dir
        self.labels = labels
        self.N_CLASSES = len(np.unique(list(labels.values())))
        self.model = self.initialize_model(self.N_CLASSES)

    @staticmethod
    def initialize_model(N_CLASSES):
        model = Sequential()
        model.add(LSTM(256, input_shape=(SEQUENCE_LENGTH, FEATURE_DIM), return_sequences=True))
        model.add(LSTM(128))
        model.add(Dense(N_CLASSES, activation='softmax'))

        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        return model

    def train(self, epochs=50, batch_size=32):
        X, y = self.load_data()
        
        # Verify if X and y have data before training
        if len(X) == 0 or len(y) == 0:
            print("No valid data available for training!")
            return
        
        X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
        self.model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=epochs, batch_size=batch_size)

    def load_data(self):
        with h5py.File(self.feature_dir, 'r') as hf:
            video_names = list(hf.keys())
            print("Video names in NASNetLarge_feature_vectors.hdf5:", video_names)  
            X = []
            y = []

            for video_name in video_names:
                if video_name not in self.labels:
                    print(f"Warning: {video_name} not found in labels dictionary!")
                    continue
                
                # Print mapping for verification
                print(f"Mapping: {video_name} -> {self.labels[video_name]}")

                features = np.array(hf[video_name]).reshape(-1, FEATURE_DIM)

                if features.shape[0] < SEQUENCE_LENGTH:
                    zero_padding = np.zeros((SEQUENCE_LENGTH - features.shape[0], FEATURE_DIM))
                    features = np.vstack((features, zero_padding))
                elif features.shape[0] > SEQUENCE_LENGTH:
                    features = features[:SEQUENCE_LENGTH]

                X.append(features)
                y.append(self.labels[video_name])
            X = np.array(X)
            y = to_categorical(np.array(y), num_classes=self.N_CLASSES)
        return X, y

    def save_model(self, path_to_save):
        self.model.save(path_to_save, save_format="tf")
        print(f"Model saved to {path_to_save}")

if __name__ == "__main__":
    word_processor = WordPreprocessor('./train', './train/video_dataset.csv')
    video_names, labels, _ = word_processor.preprocess_words()
    labels_dict = dict(zip(video_names, labels))
    
    print("Labels dictionary:", labels_dict)  

    sl_model = SignLanguageModel('./train/NASNetLarge_feature_vectors.hdf5', labels_dict)
    sl_model.train(epochs=50, batch_size=16)
    sl_model.save_model('./trained_sign_language_model.keras')


In [None]:
# Model Prediction

import numpy as np
import h5py
import tensorflow as tf
from tensorflow.keras.models import load_model

# Constants
FEATURE_DIM = 4032
SEQUENCE_LENGTH = 101

def load_data_for_prediction(feature_dir, video_name):
    with h5py.File(feature_dir, 'r') as hf:
        if video_name not in hf.keys():
            print(f"Warning: {video_name} not found in the feature file!")
            return None

        features = np.array(hf[video_name]).reshape(-1, FEATURE_DIM)

        if features.shape[0] < SEQUENCE_LENGTH:
            zero_padding = np.zeros((SEQUENCE_LENGTH - features.shape[0], FEATURE_DIM))
            features = np.vstack((features, zero_padding))
        elif features.shape[0] > SEQUENCE_LENGTH:
            features = features[:SEQUENCE_LENGTH]
        
        # Reshaping for model input
        features = features.reshape(1, SEQUENCE_LENGTH, FEATURE_DIM)
    return features

if __name__ == "__main__":
    # Load model
    model_path = './trained_sign_language_model.keras'
    model = load_model(model_path)

    # Load the training data
    word_processor = WordPreprocessor('./train', './train/video_dataset.csv')
    video_names, labels, _ = word_processor.preprocess_words()
    labels_dict = dict(zip(video_names, labels))
    
    # Create a reverse mapping for class to label
    label_to_class_mapping = word_processor.class_to_label_mapping
    class_to_label_mapping = {v: k.strip() for k, v in label_to_class_mapping.items()}

    # Make predictions for the entire training dataset
    for sample_video_name in video_names:
        print(f"Making prediction for video: {sample_video_name}")
        
        sample_data = load_data_for_prediction('./train/NASNetLarge_feature_vectors.hdf5', sample_video_name)
        if sample_data is not None:
            prediction = model.predict(sample_data)
            predicted_class = np.argmax(prediction, axis=1)[0]
            
            actual_class = labels_dict[sample_video_name]
            actual_label = class_to_label_mapping[actual_class]
            predicted_label = class_to_label_mapping[predicted_class]
            
            print(f"Actual Class & Label for {sample_video_name}: {actual_class} ({actual_label})")
            print(f"Predicted Class & Label for {sample_video_name}: {predicted_class} ({predicted_label})")
            print('-'*50) 
