# Training notebook

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import os
import numpy as np
from glob import glob
from collections import defaultdict

In [None]:
# --- 1. Prepare Data ---
class KeypointDataset(Dataset):
    def __init__(self, data_dir):
        self.files = glob(os.path.join(data_dir, "*.npy"))  # Assuming data is stored as .npy files
        self.labels = [os.path.basename(f).split("_")[1] for f in self.files]  # Extract label from filename
        self.label_dict = {label: idx for idx, label in enumerate(sorted(set(self.labels)))}  # Create label mapping

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        keypoints = np.load(self.files[idx])
        keypoints = torch.tensor(keypoints, dtype=torch.float32)
        label = self.label_dict[self.labels[idx]]
        return keypoints, torch.tensor(label, dtype=torch.long)

In [None]:
# --- 2. Create DataLoader ---
data_dir = "/home/haggenmueller/asl_detection/machine_learning/datasets/own_dataset/keypoints_gpu"
dataset = KeypointDataset(data_dir)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

In [None]:
# --- 3. Define LSTM Model ---
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers=3):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=0.2)
        self.fc = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        logits = self.fc(lstm_out.mean(dim=1))  # Average over all frames
        return torch.log_softmax(logits, dim=1)  # Log-softmax for stable gradients

# Model parameters
input_dim = dataset[0][0].shape[1]
hidden_dim = 256
output_dim = len(dataset.label_dict)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LSTMModel(input_dim, hidden_dim, output_dim).to(device)

In [None]:
# --- 4. Training & Validation with Early Stopping ---
criterion = nn.NLLLoss()  # Negative Log-Likelihood Loss
optimizer = optim.Adam(model.parameters(), lr=0.0003, weight_decay=1e-5)

def train_model(model, dataloader, criterion, optimizer, epochs=150, patience=10):
    model.train()
    best_loss = float("inf")
    epochs_no_improve = 0
    
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        
        for keypoints, labels in dataloader:
            keypoints, labels = keypoints.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(keypoints)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
        
        epoch_loss = running_loss / len(dataloader)
        epoch_accuracy = 100 * correct / total
        print(f"📌 Epoch {epoch+1}/{epochs} - Loss: {epoch_loss:.4f} - Accuracy: {epoch_accuracy:.2f}%")
        
        # Early Stopping Check
        if epoch_loss < best_loss:
            best_loss = epoch_loss
            epochs_no_improve = 0
            torch.save(model.state_dict(), "best_lstm_model.pth")  # Save best model
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print(f"🛑 Training stopped after {epoch+1} epochs due to no improvement.")
                break

train_model(model, dataloader, criterion, optimizer)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

# --- Compute Confusion Matrix ---
conf_matrix = confusion_matrix(true_labels, predictions, labels=sorted(dataset.label_dict.values()))

# --- Zeige nur Labels mit mindestens 3 Fehlklassifikationen ---
misclassified_counts = (conf_matrix - np.eye(conf_matrix.shape[0]) * conf_matrix).sum(axis=0)
top_misclassified = np.where(misclassified_counts >= 3)[0]  # Nur Labels mit mind. 3 Fehlern

if len(top_misclassified) > 0:
    conf_matrix_filtered = conf_matrix[top_misclassified][:, top_misclassified]
    
    plt.figure(figsize=(10, 8))
    sns.heatmap(conf_matrix_filtered, annot=True, fmt="d", cmap="Blues",
                xticklabels=top_misclassified, yticklabels=top_misclassified)
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.title("Labels with at least 3 Misclassifications")
    plt.show()
else:
    print("No labels with at least 3 misclassifications.")

In [1]:
import os
import json
from glob import glob

# --- Define paths ---
data_dir = "/home/haggenmueller/asl_detection/machine_learning/datasets/own_dataset/keypoints_gpu"
label_json_path = "/home/haggenmueller/asl_detection/machine_learning/models/lstm/label_to_index.json"

# --- Extract labels from filenames ---
def extract_label(file_name):
    parts = file_name.split("_")
    if len(parts) >= 3:
        return parts[1]  # Label is the second element
    return None

# --- Load existing labels if available ---
if os.path.exists(label_json_path):
    with open(label_json_path, "r") as f:
        label_to_index = json.load(f)
else:
    label_to_index = {}

# --- Get all files and extract labels ---
keypoint_files = glob(os.path.join(data_dir, "*.npy"))
extracted_labels = sorted(set(extract_label(os.path.basename(f)) for f in keypoint_files if extract_label(os.path.basename(f)) is not None))

# --- Update label_to_index mapping ---
updated_label_to_index = {label: idx for idx, label in enumerate(extracted_labels)}

# --- Save updated JSON ---
with open(label_json_path, "w") as f:
    json.dump(updated_label_to_index, f, indent=4)

print(f"✅ label_to_index.json updated with {len(updated_label_to_index)} labels.")

✅ label_to_index.json updated with 209 labels.
