In [5]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm

# Konstanten
RANDOM_SEED = 42
BATCH_SIZE = 32
EPOCHS = 40
LEARNING_RATE = 0.001
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Dataset-Klasse
class HandSignDataset(Dataset):
    def __init__(self, keypoints, labels):
        self.keypoints = torch.FloatTensor(keypoints)
        self.labels = torch.LongTensor(labels)
    
    def __len__(self):
        return len(self.keypoints)
    
    def __getitem__(self, idx):
        return self.keypoints[idx], self.labels[idx]

# Modell-Definition
class HandSignNet(nn.Module):
    def __init__(self, num_classes=24):
        super(HandSignNet, self).__init__()
        
        # Feature Extraction Blocks
        self.features = nn.Sequential(
            nn.Linear(63, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Dropout(0.3),
            
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.BatchNorm1d(256),
            nn.Dropout(0.3),
            
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Dropout(0.3)
        )
        
        # Classifier
        self.classifier = nn.Sequential(
            nn.Linear(128, num_classes)
        )
        
    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

def train_epoch(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    epoch_loss = running_loss / len(train_loader)
    epoch_acc = 100. * correct / total
    return epoch_loss, epoch_acc

def validate(model, val_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    epoch_loss = running_loss / len(val_loader)
    epoch_acc = 100. * correct / total
    return epoch_loss, epoch_acc, all_preds, all_labels

def plot_confusion_matrix(y_true, y_pred, classes):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=classes, yticklabels=classes)
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.savefig('confusion_matrix.png')
    plt.close()

def plot_training_history(train_losses, val_losses, train_accs, val_accs):
    plt.figure(figsize=(12, 4))
    
    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Train')
    plt.plot(val_losses, label='Validation')
    plt.title('Loss over epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    
    plt.subplot(1, 2, 2)
    plt.plot(train_accs, label='Train')
    plt.plot(val_accs, label='Validation')
    plt.title('Accuracy over epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.legend()
    
    plt.tight_layout()
    plt.savefig('training_history.png')
    plt.close()

def main():
    # Setze Seeds für Reproduzierbarkeit
    torch.manual_seed(RANDOM_SEED)
    np.random.seed(RANDOM_SEED)
    
    # Lade Daten
    print("Lade Daten...")
    data = np.load('/home/geiger/asl_detection/machine_learning/datasets/asl_now/Keypoints/asl_keypoints.npz')
    keypoints = data['keypoints']
    labels = data['labels']
    
    # Split Daten
    X_train, X_val, y_train, y_val = train_test_split(
        keypoints, labels, test_size=0.2, random_state=RANDOM_SEED, stratify=labels
    )
    
    # Erstelle DataLoader
    train_dataset = HandSignDataset(X_train, y_train)
    val_dataset = HandSignDataset(X_val, y_val)
    
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)
    
    # Initialisiere Modell
    print(f"Initialisiere Modell auf {DEVICE}...")
    model = HandSignNet().to(DEVICE)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.5)
    
    # Training
    print("Starte Training...")
    best_val_acc = 0
    train_losses, val_losses = [], []
    train_accs, val_accs = [], []
    
    for epoch in range(EPOCHS):
        # Training
        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, DEVICE)
        
        # Validation
        val_loss, val_acc, val_preds, val_labels = validate(model, val_loader, criterion, DEVICE)
        
        # Learning Rate Anpassung
        scheduler.step(val_loss)
        
        # Speichere Metriken
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        train_accs.append(train_acc)
        val_accs.append(val_acc)
        
        # Speichere bestes Modell
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), 'best_model.pth')
        
        # Ausgabe
        print(f'Epoch {epoch+1}/{EPOCHS}:')
        print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%')
        print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')
        print('-' * 50)
    
    # Lade bestes Modell für finale Evaluation
    model.load_state_dict(torch.load('best_model.pth'))
    _, final_acc, final_preds, final_labels = validate(model, val_loader, criterion, DEVICE)
    
    # Plotte Ergebnisse
    plot_training_history(train_losses, val_losses, train_accs, val_accs)
    plot_confusion_matrix(final_labels, final_preds, ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y'])
    
    print(f"\nBeste Validierungs-Accuracy: {best_val_acc:.2f}%")

if __name__ == "__main__":
    main() 

Lade Daten...
Initialisiere Modell auf cpu...
Starte Training...
Epoch 1/40:
Train Loss: 0.5459, Train Acc: 87.32%
Val Loss: 0.2111, Val Acc: 94.83%
--------------------------------------------------
Epoch 2/40:
Train Loss: 0.1436, Train Acc: 96.17%
Val Loss: 0.1844, Val Acc: 93.79%
--------------------------------------------------
Epoch 3/40:
Train Loss: 0.1138, Train Acc: 96.72%
Val Loss: 0.1611, Val Acc: 94.03%
--------------------------------------------------
Epoch 4/40:
Train Loss: 0.1252, Train Acc: 95.91%
Val Loss: 0.1243, Val Acc: 95.44%
--------------------------------------------------
Epoch 5/40:
Train Loss: 0.1248, Train Acc: 96.12%
Val Loss: 0.0338, Val Acc: 98.93%
--------------------------------------------------
Epoch 6/40:
Train Loss: 0.1175, Train Acc: 96.31%
Val Loss: 0.3712, Val Acc: 87.18%
--------------------------------------------------
Epoch 7/40:
Train Loss: 0.1322, Train Acc: 95.61%
Val Loss: 0.2095, Val Acc: 92.15%
-----------------------------------------