In [None]:
import os
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim

In [None]:
# Directories containing keypoints & labels
keypoints_folder = "/home/haggenmueller/asl_detection/machine_learning/datasets/own_dataset/keypoints_gpu"
model_save_path = "/home/haggenmueller/asl_detection/machine_learning/models/lstm/new_lstm_model.pth"

# Load all `.npy` files
keypoint_files = [f for f in os.listdir(keypoints_folder) if f.endswith(".npy")]

# Label extraction: Differentiating between original & augmented formats
def extract_label(file_name):
    parts = file_name.split("_")
    if len(parts) >= 3:
        return parts[1]  # Label is the second element
    return None

# Create a set of all labels
labels = sorted(set(extract_label(f) for f in keypoint_files if extract_label(f) is not None))
num_classes = len(labels)

# Convert label to index
label_to_index = {label: idx for idx, label in enumerate(labels)}

In [None]:
import os
import cv2
import torch
import numpy as np
from collections import defaultdict
from torch.utils.data import Dataset, DataLoader

# Directories
base_path = "/home/haggenmueller/asl_detection/machine_learning/datasets"
raw_videos_path = f"{base_path}/wlasl/raw_videos"
shortened_videos_path = f"{base_path}/own_dataset/shortened_videos"
augmented_videos_path = f"{base_path}/own_dataset/videos_augmented"
processed_folder = f"{base_path}/own_dataset/videos_processed"
keypoints_folder = f"{base_path}/own_dataset/keypoints_gpu"
model_save_path = "/home/haggenmueller/asl_detection/machine_learning/models/lstm/new_lstm_model.pth"
os.makedirs(processed_folder, exist_ok=True)

# Count videos per label and determine maximum frames
label_counts = defaultdict(int)
label_max_frames = defaultdict(int)
max_frames = 0

def process_videos(folder, is_augmented=False):
    global max_frames
    for video_file in os.listdir(folder):
        if video_file.endswith(".mp4"):
            parts = video_file.rsplit("_", 3) if is_augmented else video_file.rsplit("_", 2)
            if len(parts) >= 3:
                label = parts[1]  # The label is the second element
                video_path = os.path.join(folder, video_file)
                
                # Open video and count frames
                cap = cv2.VideoCapture(video_path)
                frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
                cap.release()
                
                label_counts[label] += 1
                label_max_frames[label] = max(label_max_frames[label], frame_count)
                max_frames = max(max_frames, frame_count)

# Count videos and find maximum frames in all three folders
process_videos(raw_videos_path)
process_videos(shortened_videos_path)
process_videos(augmented_videos_path, is_augmented=True)

print(f"📏 Maximum number of frames: {max_frames}")

# Function to extract label from filename
def extract_label(file_name):
    parts = file_name.split("_")
    if len(parts) >= 3:
        return parts[1]  # Label is the second element
    return None

# Create a set of all labels
keypoint_files = [f for f in os.listdir(keypoints_folder) if f.endswith(".npy")]
labels = sorted(set(extract_label(f) for f in keypoint_files if extract_label(f) is not None))
num_classes = len(labels)

# Map labels to indices
label_to_index = {label: idx for idx, label in enumerate(labels)}

# Custom Dataset class
class KeypointDataset(Dataset):
    def __init__(self, keypoints_folder, keypoint_files, label_to_index):
        self.keypoints_folder = keypoints_folder
        self.keypoint_files = keypoint_files
        self.label_to_index = label_to_index

    def __len__(self):
        return len(self.keypoint_files)

    def __getitem__(self, idx):
        file_name = self.keypoint_files[idx]
        file_path = os.path.join(self.keypoints_folder, file_name)

        # Load keypoints from .npy file
        keypoints = np.load(file_path)  # Shape: (max_frames, 99)
        keypoints = torch.tensor(keypoints, dtype=torch.float32)

        # Extract label from filename
        label = extract_label(file_name)
        label_idx = self.label_to_index[label]  # Convert label to index

        return keypoints, torch.tensor(label_idx, dtype=torch.long)  # Ensure labels are long type

# Create Dataset & DataLoader
dataset = KeypointDataset(keypoints_folder, keypoint_files, label_to_index)
train_loader = DataLoader(dataset, batch_size=16, shuffle=True)

print(f"✅ Dataset loaded: {len(dataset)} videos with {len(labels)} labels.")

In [None]:
import os
import cv2
import torch
import numpy as np
from collections import defaultdict
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim

# Directories
base_path = "/home/haggenmueller/asl_detection/machine_learning/datasets"
raw_videos_path = f"{base_path}/wlasl/raw_videos"
shortened_videos_path = f"{base_path}/own_dataset/shortened_videos"
augmented_videos_path = f"{base_path}/own_dataset/videos_augmented"
processed_folder = f"{base_path}/own_dataset/videos_processed"
keypoints_folder = f"{base_path}/own_dataset/keypoints_gpu"
model_save_path = "/home/haggenmueller/asl_detection/machine_learning/models/lstm/new_lstm_model.pth"
os.makedirs(processed_folder, exist_ok=True)

# Count videos per label and determine maximum frames
label_counts = defaultdict(int)
label_max_frames = defaultdict(int)
max_frames = 0

# Function to extract label from filename
def extract_label(file_name):
    parts = file_name.split("_")
    if len(parts) >= 3:
        return parts[1]  # Label is the second element
    return None

# Create a set of all labels
keypoint_files = [f for f in os.listdir(keypoints_folder) if f.endswith(".npy")]
labels = sorted(set(extract_label(f) for f in keypoint_files if extract_label(f) is not None))
num_classes = len(labels)

# Map labels to indices
label_to_index = {label: idx for idx, label in enumerate(labels)}

# Custom Dataset class
class KeypointDataset(Dataset):
    def __init__(self, keypoints_folder, keypoint_files, label_to_index):
        self.keypoints_folder = keypoints_folder
        self.keypoint_files = keypoint_files
        self.label_to_index = label_to_index

    def __len__(self):
        return len(self.keypoint_files)

    def __getitem__(self, idx):
        file_name = self.keypoint_files[idx]
        file_path = os.path.join(self.keypoints_folder, file_name)

        # Load keypoints from .npy file
        keypoints = np.load(file_path)  # Shape: (max_frames, 99)
        keypoints = torch.tensor(keypoints, dtype=torch.float32)

        # Extract label from filename
        label = extract_label(file_name)
        label_idx = self.label_to_index[label]  # Convert label to index

        return keypoints, torch.tensor(label_idx, dtype=torch.long)  # Ensure labels are long type

# Create Dataset & DataLoader
dataset = KeypointDataset(keypoints_folder, keypoint_files, label_to_index)
train_loader = DataLoader(dataset, batch_size=16, shuffle=True)

print(f"✅ Dataset loaded: {len(dataset)} videos with {len(labels)} labels.")

# Define LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_size=225, hidden_size=256, num_layers=3, output_size=num_classes):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=0.2)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        logits = self.fc(lstm_out.mean(dim=1))  # Average over all frames
        return torch.log_softmax(logits, dim=1)  # Use log-softmax for stable gradients

print("🔍 Testing simplified model...")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

try:
    model = LSTMModel()  # First create on CPU
    print("✅ Model successfully created on CPU.")

    model = model.to(device)  # Now move to GPU
    print("✅ Model successfully loaded onto GPU.")
except RuntimeError as e:
    print(f"❌ Error loading onto GPU: {e}")

# Loss function and optimizer
criterion = nn.NLLLoss()  # Negative Log-Likelihood Loss
optimizer = optim.Adam(model.parameters(), lr=0.0003, weight_decay=1e-5)

# Debugging: Check if labels are within the valid range
print(f"Minimum label: {min(label_to_index.values())}, Maximum label: {max(label_to_index.values())}")

In [None]:
# Training
num_epochs = 150

# Early Stopping Parameter
early_stopping_patience = 10  # Stoppt, wenn sich der Loss für 10 Epochen nicht verbessert
best_loss = float("inf")
epochs_no_improve = 0

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for keypoints, labels in train_loader:
        keypoints, labels = keypoints.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(keypoints)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    epoch_loss = running_loss / len(train_loader)
    epoch_accuracy = 100 * correct / total

    print(f"📌 Epoch {epoch+1}/{num_epochs} - Verlust: {epoch_loss:.4f} - Accuracy: {epoch_accuracy:.2f}%")

    # Check Early Stopping 
    if epoch_loss < best_loss:
        best_loss = epoch_loss
        epochs_no_improve = 0
        torch.save(model.state_dict(), model_save_path)  # Save best model
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= early_stopping_patience:
            print(f"🛑 Training gestoppt nach {epoch+1} Epochen, weil keine Verbesserung!")
            break

# Save model
torch.save(model.state_dict(), model_save_path)
print(f"💾 Modell gespeichert unter: {model_save_path}")

In [None]:
# Test model
model.eval()
test_sample, test_label = dataset[0]
test_sample = test_sample.unsqueeze(0).to(device)  # Add batch dimension
output = model(test_sample)
predicted_label = torch.argmax(output, dim=1).item()

print(f"🎯 Testbeispiel: Wahre Klasse = {test_label}, Vorhergesagte Klasse = {predicted_label}")
print("🚀 Training und Test abgeschlossen!")