In [2]:
import os
import pandas as pd
import re
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score, confusion_matrix
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

# Einlesen der Daten 

In diesem Abschnitt werden die Hilfsfunktionen zu Laden der der FastSurfer-Analyseergebnisse definiert.


In [None]:
def read_stats_file(stats_path):
    regions = {}
    with open(stats_path, 'r') as f:
        for line in f:
            if line.startswith('#'):
                continue
            parts = line.split()
            if len(parts) >= 5:
                region = parts[4] 
                try:
                    volume = float(parts[3])  
                    regions[region] = volume
                except ValueError:
                    pass 
    return regions


def load_group(base_dir, label):
    subjects_data = []
    for root, dirs, files in os.walk(base_dir):
        for file in files:
            if file == "aseg+DKT.stats":
                stats_path = os.path.join(root, file)
                # extrahiere Probanden-ID
                match = re.search(r'(\d{3}_S_\d{4})', stats_path)
                subject_id = match.group(1) if match else os.path.basename(root)
                
                # Lade Gehirnregionen-Volumina für diesen Probanden
                regions = read_stats_file(stats_path)
                regions["subject_id"] = subject_id
                regions["Label"] = label  # AD oder CN
                subjects_data.append(regions)
    
    df = pd.DataFrame(subjects_data)
    return df.set_index("subject_id").sort_index()


# Lokales SVM-Modell Training mit PyTorch

Zuerst wird das Modell lokal trainiert.

In [None]:
class LinearSVM(nn.Module):
    def __init__(self, input_dim):
        super(LinearSVM, self).__init__()
        self.linear = nn.Linear(input_dim, 1)
    
    def forward(self, x):
        return self.linear(x)

# Hinge Loss
def hinge_loss(outputs, labels, C=1.0):
    # Labels müssen -1 oder +1 für SVM sein
    labels = 2 * labels - 1  
    # Standard Hinge Loss kalkulation
    loss = torch.mean(torch.clamp(1 - labels * outputs, min=0))
    
    # globaler Zugriff auf 'model' für L2-Regularisierungsterm 
    global model 
    l2_reg = torch.tensor(0., requires_grad=True)
    for name, param in model.named_parameters():
        if 'weight' in name:
            l2_reg = l2_reg + torch.norm(param, 2)**2

    # Der Faktor (1 / (2 * C)) wird in der primalen SVM-Formulierung verwendet
    return loss + (1 / (2 * C)) * l2_reg

## Laden der Daten
Nun werden die Daten geladen.

In [None]:
ad_dir = "/Users/hpe/Desktop/MRI/PA2/fast_surfer/AD" 
cn_dir = "/Users/hpe/Desktop/MRI/PA2/fast_surfer/CN"

df_AD = load_group(ad_dir, "AD")
df_CN = load_group(cn_dir, "CN")

df_all = pd.concat([df_AD, df_CN])

# entfernt Label-Spalten und ersetzt NaNs durch 0
X = df_all.drop(columns=["Label"]).fillna(0) 
# Umwandlung der Labels in binäre Werte
y = df_all["Label"].map({"AD": 1, "CN": 0})

# entfernt 0 Spalten
X = X.loc[:, (X != 0).any(axis=0)] 

## Aufteilung und Normalisierung 
In diesem Abschnitt werden die Daten in Trainings- und Testmenge aufgeteilt und normalisiert, um sie für das Modell vorzubereiten.

In [None]:
# Normalisierung
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Aufteilung in Trainings- und Testset
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.3, random_state=42, stratify=y
)

# Konvertiere Daten zu PyTorch Tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)

# Erstelle DataLoader für Trainingsdaten
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)


## Training mit Loss-Tracking
Ds SVM-Modell wird über mehrere Epochen mit den Trainingsdaten trainiert, wobei nach jeder Epoche der durchschnittliche Verlust (Loss) und die Genauigkeit (Accuracy) für Training und Test berechnet und gespeichert werden. 

In [238]:
# Erweitertes Training mit Loss- und Accuracy-Tracking
input_dim = X.shape[1]
model_tracked = LinearSVM(input_dim)
optimizer_tracked = optim.SGD(model_tracked.parameters(), lr=0.01, weight_decay=0.0001)

# Tracking-Listen
training_losses = []
training_accuracies = []
validation_accuracies = []

# Hyperparameters
C = 1.0
epochs = 100

# Update global model reference for hinge_loss
model = model_tracked

print("Training Linear SVM mit detailliertem Tracking...")
print(f"Input Dimension: {input_dim}")
print(f"Training Samples: {len(X_train)}, Test Samples: {len(X_test)}")
print(f"C Parameter: {C}\n")

for epoch in range(epochs):
    model_tracked.train()
    epoch_losses = []
    
    for inputs, labels in train_loader:
        optimizer_tracked.zero_grad()
        outputs = model_tracked(inputs)
        loss = hinge_loss(outputs, labels, C=C)
        loss.backward()
        optimizer_tracked.step()
        epoch_losses.append(loss.item())
    
    # Durchschnittlicher Loss für diese Epoche
    avg_epoch_loss = np.mean(epoch_losses)
    training_losses.append(avg_epoch_loss)
    
    # Training Accuracy berechnen
    model_tracked.eval()
    with torch.no_grad():
        train_outputs = model_tracked(X_train_tensor)
        train_preds = (train_outputs > 0).float()
        train_acc = accuracy_score(y_train_tensor, train_preds)
        training_accuracies.append(train_acc)
        
        # Validation Accuracy auf Test Set
        test_outputs = model_tracked(X_test_tensor)
        test_preds = (test_outputs > 0).float()
        test_acc = accuracy_score(y_test_tensor, test_preds)
        validation_accuracies.append(test_acc)
    
    if (epoch + 1) % 20 == 0:
        print(f"Epoch [{epoch+1}/{epochs}] - Loss: {avg_epoch_loss:.4f}, Train Acc: {train_acc:.3f}, Test Acc: {test_acc:.3f}")

Training Linear SVM mit detailliertem Tracking...
Input Dimension: 95
Training Samples: 17, Test Samples: 8
C Parameter: 1.0

Epoch [20/100] - Loss: 0.2497, Train Acc: 0.941, Test Acc: 0.500
Epoch [40/100] - Loss: 0.1819, Train Acc: 0.941, Test Acc: 0.500
Epoch [60/100] - Loss: 0.1726, Train Acc: 1.000, Test Acc: 0.500
Epoch [80/100] - Loss: 0.1668, Train Acc: 1.000, Test Acc: 0.500
Epoch [100/100] - Loss: 0.2655, Train Acc: 1.000, Test Acc: 0.500


# Evaluation

Nun werden die beiden Modelle – das zentral trainierte SVM-Modell und das Modell, das im Rahmen des Swarm-Learning-Prozesses entstanden ist – auf denselben Testdaten evaluiert. Dadurch kann die Leistung beider Ansätze direkt miteinander verglichen werden, wobei die Modelle nach ihrem jeweiligen Training gespeichert und anschließend identisch getestet werden.

## Lokales SVM-Modell
Zunächst erfolgt die Evaluation des lokal trainierten Modells. 

In [None]:
model.eval()
with torch.no_grad():
    # Vorhersagen auf Testdaten
    test_outputs = model(X_test_tensor)
    # Für SVM: Vorhersage basiert auf dem Vorzeichen (> 0 = Klasse 1, <= 0 = Klasse 0)
    preds_class = (test_outputs > 0).float()
    
    # Konvertierung zu numpy für sklearn metrics
    y_true = y_test_tensor.squeeze().numpy()
    local_pred = preds_class.squeeze().numpy()

    # Berechnung aller Metriken für Local Model
    local_accuracy = accuracy_score(y_true, local_pred)
    local_precision = precision_score(y_true, local_pred)
    local_recall = recall_score(y_true, local_pred)  # Sensitivity
    local_f1 = f1_score(y_true, local_pred)
    
    print(f"\n{'='*50}")
    print("LOCAL SVM MODEL - TEST EVALUATION")
    print(f"{'='*50}")
    print(f"Test Accuracy: {local_accuracy:.3f}")
    print(f"Precision: {local_precision:.3f}")
    print(f"Sensitivity (Recall): {local_recall:.3f}")
    print(f"F1-Score: {local_f1:.3f}")
    print(f"{'='*50}\n")

    # Detaillierter Klassifikationsbericht
    print("Classification Report:")
    print(classification_report(
        y_true, 
        local_pred, 
        target_names=['CN', 'AD']
    ))

    # Confusion Matrix
    local_cm = confusion_matrix(y_true, local_pred)
    print("\nConfusion Matrix:")
    print("         Predicted")
    print("         CN  AD")
    print(f"Actual CN {local_cm[0,0]:3d} {local_cm[0,1]:3d}")
    print(f"       AD {local_cm[1,0]:3d} {local_cm[1,1]:3d}")

# Speichern des Modells
MODEL_DIR = "/Users/hpe/Desktop/MRI/PA2/models"
MODEL_FILENAME = "linear_svm_brain_volumes.pt"
MODEL_SAVE_PATH = os.path.join(MODEL_DIR, MODEL_FILENAME)
os.makedirs(MODEL_DIR, exist_ok=True)
torch.save(model.state_dict(), MODEL_SAVE_PATH)

print(f"\n{'='*50}")
print(f"Modellgewichte erfolgreich gespeichert unter: {MODEL_SAVE_PATH}")
print(f"{'='*50}\n")


LOCAL SVM MODEL - TEST EVALUATION
Test Accuracy: 0.500
Precision: 0.667
Sensitivity (Recall): 0.400
F1-Score: 0.500

Classification Report:
              precision    recall  f1-score   support

          CN       0.40      0.67      0.50         3
          AD       0.67      0.40      0.50         5

    accuracy                           0.50         8
   macro avg       0.53      0.53      0.50         8
weighted avg       0.57      0.50      0.50         8


Confusion Matrix:
         Predicted
         CN  AD
Actual CN   2   1
       AD   3   2

Modellgewichte erfolgreich gespeichert unter: /Users/hpe/Desktop/MRI/PA2/model/linear_svm_brain_volumes.pt



## Swarm Learning SVM-Modell
Das aus dem Swarm-Learning-Prozess stammende Modell wird nun auf denselben Testdaten wie das lokale Modell evaluiert. Das Modell wurde zuvor nach dem Training aus dem Swarm-Learning-System heruntergeladen und steht jetzt für den direkten Leistungsvergleich bereit.

### Laden von Modell aus SL

In [None]:
SWARM_MODEL_PATH = "/Users/hpe/Desktop/MRI/PA2/models/AD_model_swarm.pth"
swarm_model = torch.load(SWARM_MODEL_PATH, map_location='cpu', weights_only=False)
swarm_model.eval()

print(f"Model Architecture: {type(swarm_model)}")
print(f"Input Dimension: {swarm_model.linear.in_features}")
print(f"Output Dimension: {swarm_model.linear.out_features}")

# Zeige Modell-Parameter
total_params = sum(p.numel() for p in swarm_model.parameters())
print(f"Total Parameters: {total_params}")

# Dimensionen der aktuellen Daten überprüfen
print(f"AKTUELLE DATEN-DIMENSIONEN:")
print(f"{'='*40}")
print(f"X.shape: {X.shape}")
print(f"X_scaled.shape: {X_scaled.shape}")
print(f"X_train.shape: {X_train.shape}")
print(f"X_test.shape: {X_test.shape}")
print(f"X_test_tensor.shape: {X_test_tensor.shape}")
print(f"input_dim (current): {input_dim}")
print(f"Anzahl Features: {len(X.columns)}")


Model Architecture: <class '__main__.LinearSVM'>
Input Dimension: 100
Output Dimension: 1
Total Parameters: 101
AKTUELLE DATEN-DIMENSIONEN:
X.shape: (25, 95)
X_scaled.shape: (25, 95)
X_train.shape: (17, 95)
X_test.shape: (8, 95)
X_test_tensor.shape: torch.Size([8, 95])
input_dim (current): 95
Anzahl Features: 95


### Evaluierung von SL-Modell auf den gleichen Testdaten

In [None]:
print(f"\n{'='*50}")
print("SWARM MODEL - TEST EVALUATION")
print(f"{'='*50}")

# Erstelle angepasste Testdaten mit Null-Padding für das Swarm-Modell
swarm_input_dim = swarm_model.linear.in_features
current_dim = X_test_tensor.shape[1]

# Dimensionen anpassen
if current_dim < swarm_input_dim:
    additional_features = swarm_input_dim - current_dim
    X_test_swarm = torch.cat([
        X_test_tensor, 
        torch.zeros(X_test_tensor.shape[0], additional_features)
    ], dim=1)
    print(f"Testdaten von {current_dim} auf {swarm_input_dim} Features erweitert (mit {additional_features} Nullen)")
else:
    X_test_swarm = X_test_tensor[:, :swarm_input_dim]
    print(f"Testdaten von {current_dim} auf {swarm_input_dim} Features reduziert")

print(f"Swarm Model Input Shape: {X_test_swarm.shape}")

# Testen des Swarm-Modells
swarm_model.eval()
with torch.no_grad():
    # Vorhersagen auf den angepassten Testdaten
    swarm_test_outputs = swarm_model(X_test_swarm)
    # Für SVM: Vorhersage basiert auf Vorzeichen (> 0 = Klasse 1, <= 0 = Klasse 0)
    swarm_preds_class = (swarm_test_outputs > 0).float()

    # Konvertierung zu numpy für sklearn metrics
    y_true = y_test_tensor.squeeze().numpy()
    swarm_pred = swarm_preds_class.squeeze().numpy()

    # Berechnung aller Metriken für Swarm Model
    swarm_accuracy = accuracy_score(y_true, swarm_pred)
    swarm_precision = precision_score(y_true, swarm_pred)
    swarm_recall = recall_score(y_true, swarm_pred)  
    swarm_f1 = f1_score(y_true, swarm_pred)
    
    print(f"Test Accuracy: {swarm_accuracy:.3f}")
    print(f"Precision: {swarm_precision:.3f}")
    print(f"Sensitivity (Recall): {swarm_recall:.3f}")
    print(f"F1-Score: {swarm_f1:.3f}")
    print(f"{'='*50}\n")

    # Detaillierter Klassifikationsbericht
    print("Classification Report:")
    print(classification_report(
        y_true, 
        swarm_pred, 
        target_names=['CN', 'AD']
    ))

    # Confusion Matrix
    swarm_cm = confusion_matrix(y_true, swarm_pred)
    print("\nConfusion Matrix:")
    print("         Predicted")
    print("         CN  AD")
    print(f"Actual CN {swarm_cm[0,0]:3d} {swarm_cm[0,1]:3d}")
    print(f"       AD {swarm_cm[1,0]:3d} {swarm_cm[1,1]:3d}")
    print(f"{'='*50}\n")


SWARM MODEL - TEST EVALUATION
Testdaten von 95 auf 100 Features erweitert (mit 5 Nullen)
Swarm Model Input Shape: torch.Size([8, 100])
Test Accuracy: 0.875
Precision: 0.833
Sensitivity (Recall): 1.000
F1-Score: 0.909

Classification Report:
              precision    recall  f1-score   support

          CN       1.00      0.67      0.80         3
          AD       0.83      1.00      0.91         5

    accuracy                           0.88         8
   macro avg       0.92      0.83      0.85         8
weighted avg       0.90      0.88      0.87         8


Confusion Matrix:
         Predicted
         CN  AD
Actual CN   2   1
       AD   0   5



## Vergleich der Ergebnisse 

In [None]:

print(f"\n{'='*70}")
print("COMPARISON: LOCAL SVM vs. SWARM MODEL")
print(f"{'='*70}\n")

# Erstelle DataFrame für den Vergleich
comparison_metrics = {
    'Metric': ['Accuracy', 'Precision', 'Sensitivity (Recall)', 'F1-Score'],
    'Local SVM': [local_accuracy, local_precision, local_recall, local_f1],
    'Swarm Model': [swarm_accuracy, swarm_precision, swarm_recall, swarm_f1]
}

comparison_df = pd.DataFrame(comparison_metrics)

# Berechne Differenzen
comparison_df['Difference (Swarm - Local)'] = comparison_df['Swarm Model'] - comparison_df['Local SVM']

print("METRICS COMPARISON:")
print("-" * 70)
print(f"{'Metric':<25} {'Local SVM':<12} {'Swarm Model':<12} {'Difference':<12}")
print("-" * 70)

for idx, row in comparison_df.iterrows():
    metric = row['Metric']
    local_val = row['Local SVM']
    swarm_val = row['Swarm Model'] 
    diff = row['Difference (Swarm - Local)']
    
    print(f"{metric:<25} {local_val:<12.3f} {swarm_val:<12.3f} {diff:<+12.3f}")

print("-" * 70)

# Detaillierte Confusion Matrix Vergleiche
print(f"\n{'='*50}")
print("CONFUSION MATRICES COMPARISON")
print(f"{'='*50}")

print("\nLocal SVM:")
print("         Predicted")
print("         CN  AD")
print(f"Actual CN {local_cm[0,0]:3d} {local_cm[0,1]:3d}")
print(f"       AD {local_cm[1,0]:3d} {local_cm[1,1]:3d}")

print("\nSwarm Model:")
print("         Predicted") 
print("         CN  AD")
print(f"Actual CN {swarm_cm[0,0]:3d} {swarm_cm[0,1]:3d}")
print(f"       AD {swarm_cm[1,0]:3d} {swarm_cm[1,1]:3d}")




COMPARISON: LOCAL SVM vs. SWARM MODEL

METRICS COMPARISON:
----------------------------------------------------------------------
Metric                    Local SVM    Swarm Model  Difference  
----------------------------------------------------------------------
Accuracy                  0.500        0.875        +0.375      
Precision                 0.667        0.833        +0.167      
Sensitivity (Recall)      0.400        1.000        +0.600      
F1-Score                  0.500        0.909        +0.409      
----------------------------------------------------------------------

CONFUSION MATRICES COMPARISON

Local SVM:
         Predicted
         CN  AD
Actual CN   2   1
       AD   3   2

Swarm Model:
         Predicted
         CN  AD
Actual CN   2   1
       AD   0   5
