# Generate informations for normalization

In [None]:
import numpy as np
import os

def compute_train_stats(folder_path):
    """Loads all .npy files in the folder and calculates the mean and standard deviation."""
    all_data = []

    for filename in os.listdir(folder_path):
        if filename.endswith(".npy"):
            file_path = os.path.join(folder_path, filename)
            data = np.load(file_path)

            if data.ndim == 1:  
                data = data.reshape(1, -1)

            all_data.append(data)

    if not all_data:
        raise ValueError("No .npy files found in the folder!")

    all_data = np.vstack(all_data)

    train_mean = np.mean(all_data, axis=0)
    train_std = np.std(all_data, axis=0)

    # Speicherort neben dem "keypoints" Ordner
    parent_folder = os.path.dirname(folder_path)

    np.save(os.path.join(parent_folder, "train_mean.npy"), train_mean)
    np.save(os.path.join(parent_folder, "train_std.npy"), train_std)

    print("✅ Mean & standard deviation saved next to the keypoints folder!")

# Example call:
compute_train_stats("/home/haggenmueller/asl_detection/machine_learning/datasets/own_dataset/keypoints")

# Training

In [28]:
import os
import json
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from collections import defaultdict

# Paths
LABELS_PATH = "/home/haggenmueller/asl_detection/machine_learning/models/lstm/label_to_index.json"
DATA_DIR = "/home/haggenmueller/asl_detection/machine_learning/datasets/own_dataset/keypoints"
MODEL_PATH = "/home/haggenmueller/asl_detection/machine_learning/models/lstm/best_lstm_model.pth"

# Parameters
SEQUENCE_LENGTH = 102
INPUT_SIZE = 225
BATCH_SIZE = 64
EPOCHS = 150  
LR = 0.001  
PATIENCE = 5

# Load labels
with open(LABELS_PATH, "r") as f:
    label_to_index = json.load(f)
    index_to_label = {v: k for k, v in label_to_index.items()}
    NUM_CLASSES = len(label_to_index)

# Dataset class
class ASLDataset(Dataset):
    def __init__(self, samples, sequence_length):
        self.samples = samples
        self.sequence_length = sequence_length
    
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        file_path, label = self.samples[idx]
        keypoints = np.load(file_path)
        
        if keypoints.shape[0] < self.sequence_length:
            pad = np.zeros((self.sequence_length - keypoints.shape[0], keypoints.shape[1]))
            keypoints = np.vstack((keypoints, pad))
        else:
            keypoints = keypoints[:self.sequence_length]
        
        # Normalize keypoints
        keypoints = (keypoints - keypoints.mean()) / keypoints.std()
        
        return torch.tensor(keypoints, dtype=torch.float32), torch.tensor(label, dtype=torch.long)

# Load and split dataset
label_samples = defaultdict(list)
for file in os.listdir(DATA_DIR):
    if file.endswith(".npy"):
        filename_parts = file.split("_")
        label_name = filename_parts[1]
        if label_name in label_to_index:
            label_samples[label_name].append(os.path.join(DATA_DIR, file))

train_samples, val_samples, test_samples = [], [], []

for label, files in label_samples.items():
    np.random.shuffle(files)
    num_total = len(files)
    
    num_train = int(0.70 * num_total)
    num_val = int(0.15 * num_total)
    num_test = num_total - num_train - num_val

    train_samples.extend([(f, label_to_index[label]) for f in files[:num_train]])
    val_samples.extend([(f, label_to_index[label]) for f in files[num_train:num_train + num_val]])
    test_samples.extend([(f, label_to_index[label]) for f in files[num_train + num_val:]])

train_dataset = ASLDataset(train_samples, SEQUENCE_LENGTH)
val_dataset = ASLDataset(val_samples, SEQUENCE_LENGTH)
test_dataset = ASLDataset(test_samples, SEQUENCE_LENGTH)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.dropout = nn.Dropout(0.5)
        self.fc = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.dropout(out[:, -1, :])  # Extract last time step
        return self.fc(out)

# Initialize model
model = LSTMModel(input_size=INPUT_SIZE, hidden_size=256, num_layers=2, num_classes=NUM_CLASSES)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0.1)
    elif isinstance(m, nn.LSTM):
        for name, param in m.named_parameters():
            if "weight_ih" in name or "weight_hh" in name:
                nn.init.xavier_uniform_(param)
            elif "bias" in name:
                nn.init.constant_(param, 0.1)

model.apply(init_weights)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer = optim.AdamW(model.parameters(), lr=LR, weight_decay=0.3)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS, eta_min=1e-6)

# Training loop
best_loss = float("inf")
stopping_counter = 0
for epoch in range(EPOCHS):
    model.train()
    total_loss, correct, total = 0, 0, 0
    
    for keypoints, labels in train_loader:
        keypoints, labels = keypoints.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(keypoints)
        loss = criterion(outputs, labels)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        total_loss += loss.item()
        
        _, predicted = torch.max(torch.softmax(outputs, dim=1), 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)
    
    avg_loss = total_loss / len(train_loader)
    accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{EPOCHS}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%")
    scheduler.step()
    
    if avg_loss < best_loss:
        best_loss = avg_loss
        stopping_counter = 0
        torch.save(model.state_dict(), MODEL_PATH)
        print(f"✅ Best model saved at epoch {epoch+1} with loss {avg_loss:.4f}")
    else:
        stopping_counter += 1
        if stopping_counter >= PATIENCE:
            print("Early stopping triggered.")
            break

print("Training completed! Best model saved at:", MODEL_PATH)

# Testing the model
model.load_state_dict(torch.load(MODEL_PATH))
model.eval()

def evaluate(loader, name):
    correct, total, loss = 0, 0, 0
    with torch.no_grad():
        for keypoints, labels in loader:
            keypoints, labels = keypoints.to(device), labels.to(device)
            outputs = model(keypoints)
            loss += criterion(outputs, labels).item()
            _, predicted = torch.max(torch.softmax(outputs, dim=1), 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
    accuracy = 100 * correct / total
    print(f"{name} Set: Loss = {loss / len(loader):.4f}, Accuracy = {accuracy:.2f}%")

evaluate(val_loader, "Validation")
evaluate(test_loader, "Test")

Epoch [1/150], Loss: 5.2108, Accuracy: 0.96%
✅ Best model saved at epoch 1 with loss 5.2108
Epoch [2/150], Loss: 4.9958, Accuracy: 1.35%
✅ Best model saved at epoch 2 with loss 4.9958
Epoch [3/150], Loss: 4.8688, Accuracy: 1.90%
✅ Best model saved at epoch 3 with loss 4.8688
Epoch [4/150], Loss: 4.7694, Accuracy: 1.88%
✅ Best model saved at epoch 4 with loss 4.7694
Epoch [5/150], Loss: 4.7017, Accuracy: 2.80%
✅ Best model saved at epoch 5 with loss 4.7017
Epoch [6/150], Loss: 4.6590, Accuracy: 2.90%
✅ Best model saved at epoch 6 with loss 4.6590
Epoch [7/150], Loss: 4.6275, Accuracy: 3.67%
✅ Best model saved at epoch 7 with loss 4.6275
Epoch [8/150], Loss: 4.5462, Accuracy: 4.15%
✅ Best model saved at epoch 8 with loss 4.5462
Epoch [9/150], Loss: 4.4820, Accuracy: 4.43%
✅ Best model saved at epoch 9 with loss 4.4820
Epoch [10/150], Loss: 4.4108, Accuracy: 4.78%
✅ Best model saved at epoch 10 with loss 4.4108
Epoch [11/150], Loss: 4.3609, Accuracy: 5.88%
✅ Best model saved at epoch 11 w

  model.load_state_dict(torch.load(MODEL_PATH))


Validation Set: Loss = 1.7860, Accuracy = 76.00%
Test Set: Loss = 1.7130, Accuracy = 76.24%


In [None]:
for i, file in enumerate(os.listdir(DATA_DIR)):
    if file.endswith(".npy") and i < 10:
        filename_parts = file.split("_")
        print(f"File: {file} → Extracted Label: {filename_parts[1]}")


In [27]:
print("Label Mapping:", label_to_index)


Label Mapping: {'I': 0, 'about': 1, 'accident': 2, 'add': 3, 'africa': 4, 'after': 5, 'ago': 6, 'alone': 7, 'always': 8, 'animal': 9, 'any': 10, 'apple': 11, 'appointment': 12, 'argue': 13, 'ask': 14, 'australia': 15, 'baby': 16, 'bad': 17, 'balance': 18, 'banana': 19, 'bar': 20, 'barely': 21, 'basketball': 22, 'beard': 23, 'bed': 24, 'before': 25, 'between': 26, 'bird': 27, 'black': 28, 'bowling': 29, 'brother': 30, 'buy': 31, 'california': 32, 'call': 33, 'can': 34, 'candy': 35, 'careful': 36, 'carrot': 37, 'cat': 38, 'champion': 39, 'change': 40, 'chat': 41, 'cheat': 42, 'check': 43, 'city': 44, 'cold': 45, 'computer': 46, 'convince': 47, 'cool': 48, 'corn': 49, 'cousin': 50, 'cow': 51, 'cry': 52, 'dark': 53, 'daughter': 54, 'day': 55, 'deaf': 56, 'decide': 57, 'decorate': 58, 'delay': 59, 'delicious': 60, 'dive': 61, 'dog': 62, 'drink': 63, 'drop': 64, 'eat': 65, 'environment': 66, 'family': 67, 'far': 68, 'fast': 69, 'fat': 70, 'fault': 71, 'feel': 72, 'few': 73, 'finish': 74, 'fi

In [24]:
for i in range(5):
    sample, label = train_dataset[i]
    print(f"Sample {i}: Shape = {sample.shape}, Label = {label}")


Sample 0: Shape = torch.Size([102, 225]), Label = 157
Sample 1: Shape = torch.Size([102, 225]), Label = 157
Sample 2: Shape = torch.Size([102, 225]), Label = 157
Sample 3: Shape = torch.Size([102, 225]), Label = 157
Sample 4: Shape = torch.Size([102, 225]), Label = 157
