In [1]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
=================================================================
GPU-Optimiertes Active Learning für Logistic Regression auf Fashion-MNIST
=================================================================
Professionelles Framework für GPU-beschleunigte Logistic Regression Active Learning
Experimente mit statistischer Analyse für Bachelorarbeit.

Optimiert für NVIDIA RTX 4060 (8GB VRAM) mit RAPIDS cuML.

Version: 1.0 - GPU-Optimiert mit Memory Management für Fashion-MNIST
            
GPU-Logistic Regression Implementierungen:
- RAPIDS cuML LogisticRegression (primär)
- Sklearn LogisticRegression (CPU Fallback)

Query-Strategien:
- Random Sampling (Baseline)
- Entropy Sampling
- Margin Sampling
- Least Confidence

Statistische Analyse:
- Wilcoxon Signed-Rank Test
- Cliff's Delta Effektstärke
- Bonferroni-Korrektur für multiple Vergleiche
"""

import os
import sys
import time
import logging
import numpy as np
import pandas as pd
import warnings
import gc
warnings.filterwarnings('ignore')

# Matplotlib Backend setzen
import matplotlib
matplotlib.use('Agg')  # Für Server ohne GUI
import matplotlib.pyplot as plt

# Seaborn mit Fehlerbehandlung
try:
    import seaborn as sns
    try:
        plt.style.use('seaborn-v0_8-whitegrid')
    except:
        try:
            plt.style.use('seaborn-whitegrid')
        except:
            plt.style.use('ggplot')
except ImportError:
    print("Warnung: Seaborn nicht installiert. Verwende Standard-Matplotlib.")
    sns = None

import torch
import torchvision
import torchvision.transforms as transforms

from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression as SklearnLR
from sklearn.preprocessing import StandardScaler
import sklearn

# Statistische Tests
import scipy
from scipy import stats
from scipy.stats import wilcoxon

# GPU-spezifische Imports mit Fehlerbehandlung
GPU_AVAILABLE = False
CUML_AVAILABLE = False
RMM_AVAILABLE = False

# Versuche RAPIDS cuML zu importieren
try:
    import cupy as cp
    import cuml
    from cuml.linear_model import LogisticRegression as cuMLLR
    CUML_AVAILABLE = True
    GPU_AVAILABLE = True
    print("✓ RAPIDS cuML verfügbar - GPU-Beschleunigung für Logistic Regression aktiviert")
    
    # RMM ist optional
    try:
        import rmm
        from rmm.allocators.cupy import rmm_cupy_allocator
        RMM_AVAILABLE = True
    except:
        RMM_AVAILABLE = False
        print("  Info: RMM Memory Manager nicht verfügbar, verwende Standard CuPy Memory")
        
except ImportError as e:
    print(f"⚠ RAPIDS cuML nicht verfügbar: {e}")

if not GPU_AVAILABLE:
    print("\n⚠ WARNUNG: Keine GPU-Beschleunigung verfügbar! Verwende CPU-basiertes sklearn.")
    print("\nEmpfohlene Installation für RTX 4060:")
    print("conda create -n rapids-gpu python=3.11")
    print("conda activate rapids-gpu")
    print("conda install -c rapidsai -c conda-forge -c nvidia rapids=24.12 python=3.11 cudatoolkit=12.0")

# Excel-Export
try:
    import openpyxl
    EXCEL_AVAILABLE = True
except ImportError:
    print("Warnung: openpyxl nicht installiert. Excel-Export wird deaktiviert.")
    EXCEL_AVAILABLE = False

# -------------------------------------------------------------------------------
# Fashion-MNIST Klassen-Labels (Deutsch)
# -------------------------------------------------------------------------------
FASHION_MNIST_CLASSES_DE = [
    'T-Shirt/Top',
    'Hose',
    'Pullover',
    'Kleid',
    'Mantel',
    'Sandale',
    'Hemd',
    'Sneaker',
    'Tasche',
    'Stiefelette'
]

# -------------------------------------------------------------------------------
# Konfiguration
# -------------------------------------------------------------------------------
USE_MEMORY_POOL = False  # Memory Pool deaktivieren bei Problemen
BUDGET_PERCENTAGES = [0.2, 0.4, 0.6, 0.8, 1.0]  # 20%, 40%, 60%, 80%, 100%
BATCH_SIZE = 500  # Größere Batches für effizienteres GPU Training
N_RUNS = 5  # Anzahl Wiederholungen
INITIAL_PERCENTAGE = 0.01  # 1% initial labeling
SIGNIFICANCE_LEVEL = 0.05  # Für statistische Tests
SEED = 42

# Logistic Regression-spezifische Konfiguration
LR_CONFIGS = {
    'cuml': {
        'penalty': 'l2',
        'C': 1.0,
        'fit_intercept': True,
        'max_iter': 1000,
        'tol': 1e-4,
        'solver': 'qn',  # Quasi-Newton für GPU
        'linesearch_max_iter': 50,
        'verbose': False
    },
    'sklearn': {
        'penalty': 'l2',
        'C': 1.0,
        'fit_intercept': True,
        'max_iter': 1000,
        'tol': 1e-4,
        'solver': 'lbfgs',  # Beste Performance für Multi-Class
        'multi_class': 'multinomial',
        'n_jobs': -1,  # Alle CPU Kerne nutzen
        'verbose': 0
    }
}

# -------------------------------------------------------------------------------
# GPU Memory Management
# -------------------------------------------------------------------------------
def setup_gpu_memory():
    """Konfiguriert optimales GPU Memory Management für RTX 4060."""
    if not CUML_AVAILABLE:
        return False
        
    if not USE_MEMORY_POOL or not RMM_AVAILABLE:
        if not RMM_AVAILABLE and USE_MEMORY_POOL:
            print("✓ RMM nicht verfügbar, verwende Standard GPU Memory Management")
        else:
            print("✓ Verwende Standard GPU Memory Management (RMM Pool deaktiviert)")
            
        # Zeige GPU Info wenn möglich
        try:
            import subprocess
            result = subprocess.run(['nvidia-smi', '--query-gpu=name,memory.total', 
                                   '--format=csv,noheader'], 
                                  capture_output=True, text=True)
            if result.returncode == 0:
                gpu_info = result.stdout.strip()
                print(f"  GPU: {gpu_info}")
        except:
            pass
        return True
    
    # RMM Pool Setup (wenn aktiviert und verfügbar)
    try:
        # Alte Allocations bereinigen
        if hasattr(cp, 'get_default_memory_pool'):
            cp.get_default_memory_pool().free_all_blocks()
        gc.collect()
        
        # RMM mit optimierten Einstellungen für 8GB VRAM
        rmm.reinitialize(
            pool_allocator=True,
            initial_pool_size="5GB",    # Konservativ für Logistic Regression
            maximum_pool_size="6.5GB",  # 1.5GB Reserve
            managed_memory=False        # Bessere Performance
        )
        
        # CuPy mit RMM verknüpfen
        cp.cuda.set_allocator(rmm_cupy_allocator)
        
        print(f"✓ RMM Memory Pool konfiguriert (5GB initial, 6.5GB max)")
        return True
        
    except Exception as e:
        print(f"⚠ RMM Pool Setup fehlgeschlagen: {e}")
        print("  Verwende Standard GPU Memory Management")
        return True

def clear_gpu_memory():
    """Räumt GPU-Speicher auf."""
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    
    if CUML_AVAILABLE:
        try:
            mempool = cp.get_default_memory_pool()
            pinned_mempool = cp.get_default_pinned_memory_pool()
            mempool.free_all_blocks()
            pinned_mempool.free_all_blocks()
        except:
            pass
    
    gc.collect()

def get_gpu_memory_info():
    """Gibt aktuelle GPU-Speichernutzung zurück."""
    info = {}
    
    # Versuche nvidia-smi (funktioniert fast immer)
    try:
        import subprocess
        result = subprocess.run(['nvidia-smi', '--query-gpu=memory.used,memory.total', 
                               '--format=csv,noheader,nounits'], 
                              capture_output=True, text=True)
        if result.returncode == 0:
            values = result.stdout.strip().split(', ')
            info['gpu_used'] = float(values[0]) / 1024  # MB to GB
            info['gpu_total'] = float(values[1]) / 1024
            info['gpu_free'] = info['gpu_total'] - info['gpu_used']
            return info
    except:
        pass
    
    # Fallback: Keine GPU Info verfügbar
    return {'gpu_used': 0.0, 'gpu_total': 0.0, 'gpu_free': 0.0}

# -------------------------------------------------------------------------------
# Reproduzierbarkeit
# -------------------------------------------------------------------------------
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# -------------------------------------------------------------------------------
# Logging konfigurieren
# -------------------------------------------------------------------------------
log_dir = "logs"
if not os.path.exists(log_dir):
    os.makedirs(log_dir)

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    datefmt="%H:%M:%S",
    handlers=[
        logging.FileHandler(
            os.path.join(log_dir, f"fashion_lr_active_learning_{time.strftime('%Y%m%d_%H%M%S')}.log"),
            encoding='utf-8'
        ),
        logging.StreamHandler(sys.stdout)
    ]
)
logger = logging.getLogger(__name__)

# Erstelle Output-Verzeichnisse
output_dirs = ["plots", "results", "reports"]
for dir_name in output_dirs:
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)
        logger.info(f"Erstellt Verzeichnis: {dir_name}")

# -------------------------------------------------------------------------------
# Fashion-MNIST Daten laden
# -------------------------------------------------------------------------------
def load_fashion_mnist_data():
    """Lädt Fashion-MNIST-Datensatz optimiert für GPU-Verarbeitung."""
    logger.info("Lade Fashion-MNIST-Datensatz...")
    
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.2860,), (0.3530,))  # Fashion-MNIST spezifische Normalisierung
    ])
    
    data_dir = './data'
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
    
    try:
        train_dataset = torchvision.datasets.FashionMNIST(
            root=data_dir, train=True, download=True, transform=transform
        )
        test_dataset = torchvision.datasets.FashionMNIST(
            root=data_dir, train=False, download=True, transform=transform
        )
    except Exception as e:
        logger.error(f"Fehler beim Laden des Fashion-MNIST-Datensatzes: {e}")
        raise
    
    # Konvertiere zu numpy arrays
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=len(train_dataset), shuffle=False)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=len(test_dataset), shuffle=False)
    
    X_train, y_train = next(iter(train_loader))
    X_test, y_test = next(iter(test_loader))
    
    # Flatten für Logistic Regression (2D: batch, features)
    X_train_flat = X_train.view(X_train.size(0), -1).numpy()
    X_test_flat = X_test.view(X_test.size(0), -1).numpy()
    
    y_train = y_train.numpy()
    y_test = y_test.numpy()
    
    logger.info(f"✓ Fashion-MNIST geladen: {len(X_train_flat):,} Trainingsbilder, {len(X_test_flat):,} Testbilder")
    logger.info(f"  Feature-Dimensionen: {X_train_flat.shape[1]}")
    logger.info(f"  Klassen: {len(np.unique(y_train))} ({', '.join(FASHION_MNIST_CLASSES_DE)})")
    logger.info(f"  Speicherbedarf: {(X_train_flat.nbytes + X_test_flat.nbytes) / 1024**2:.1f} MB")
    
    # Zeige Klassenverteilung
    unique, counts = np.unique(y_train, return_counts=True)
    logger.info("  Klassenverteilung (Training):")
    for cls, count in zip(unique, counts):
        logger.info(f"    {FASHION_MNIST_CLASSES_DE[cls]}: {count:,} ({count/len(y_train)*100:.1f}%)")
    
    return X_train_flat, y_train, X_test_flat, y_test

# -------------------------------------------------------------------------------
# GPU-Logistic Regression Wrapper Klasse
# -------------------------------------------------------------------------------
class GPUOptimizedLogisticRegression:
    """
    Wrapper für verschiedene Logistic Regression-Implementierungen mit automatischer GPU-Auswahl.
    Priorisiert RAPIDS cuML > sklearn basierend auf Verfügbarkeit.
    """
    def __init__(self, n_samples=None):
        self.n_samples = n_samples
        self.backend = None
        self.model = None
        self.scaler = StandardScaler()
        self.is_fitted = False
        
        # Wähle Backend basierend auf Verfügbarkeit
        self._select_backend()
        
    def _select_backend(self):
        """Wählt optimales Backend basierend auf Verfügbarkeit."""
        if CUML_AVAILABLE:
            try:
                # Test ob cuML wirklich funktioniert
                test_data = cp.random.rand(100, 10, dtype=cp.float32)
                test_labels = cp.random.randint(0, 2, 100, dtype=cp.int32)
                test_model = cuMLLR(max_iter=1)
                test_model.fit(test_data, test_labels)
                self.backend = 'cuml'
                
                # Zeige Memory Info
                mem_info = get_gpu_memory_info()
                if 'gpu_total' in mem_info:
                    logger.info(f"✓ Verwende RAPIDS cuML Logistic Regression (GPU: {mem_info.get('gpu_used', 0):.1f}/{mem_info.get('gpu_total', 0):.1f} GB)")
                else:
                    logger.info("✓ Verwende RAPIDS cuML Logistic Regression")
                    
                del test_data, test_labels, test_model
                cp.get_default_memory_pool().free_all_blocks()
            except Exception as e:
                logger.warning(f"cuML Test fehlgeschlagen: {e}")
                
        if self.backend is None:
            self.backend = 'sklearn'
            logger.warning("⚠ Verwende sklearn Logistic Regression (CPU) - keine GPU-Beschleunigung verfügbar!")
            logger.info("  Dies wird deutlich langsamer sein als GPU-beschleunigte Alternativen.")
            logger.info("  Empfehlung: Installieren Sie RAPIDS cuML für optimale Performance.")
    
    def _create_model(self):
        """Erstellt Logistic Regression-Modell basierend auf gewähltem Backend."""
        if self.backend == 'cuml':
            return cuMLLR(**LR_CONFIGS['cuml'])
        else:
            return SklearnLR(**LR_CONFIGS['sklearn'])
    
    def fit(self, X, y):
        """Trainiert Logistic Regression mit automatischer GPU-Optimierung."""
        start_time = time.time()
        
        # Feature Scaling
        X_scaled = self.scaler.fit_transform(X)
        
        # Normales Training mit Error Handling
        try:
            self.model = self._create_model()
            
            if self.backend == 'cuml':
                # Konvertiere zu CuPy Arrays
                X_gpu = cp.asarray(X_scaled, dtype=cp.float32)
                y_gpu = cp.asarray(y, dtype=cp.int32)
                
                # Explizite CUDA Synchronisation
                cp.cuda.Stream.null.synchronize()
                
                self.model.fit(X_gpu, y_gpu)
                
                # Cleanup
                del X_gpu, y_gpu
                cp.get_default_memory_pool().free_all_blocks()
            else:
                self.model.fit(X_scaled, y)
                
        except Exception as e:
            if self.backend == 'cuml':
                logger.warning(f"  GPU Training fehlgeschlagen: {e}")
                logger.info("  Fallback zu CPU...")
                
                # Fallback zu sklearn
                self.backend = 'sklearn'
                self.model = self._create_model()
                self.model.fit(X_scaled, y)
            else:
                raise
        
        self.is_fitted = True
        train_time = time.time() - start_time
        
        logger.info(f"  Training abgeschlossen in {train_time:.2f}s (Backend: {self.backend})")
        
        return self
    
    def predict_proba(self, X):
        """Gibt Wahrscheinlichkeiten zurück."""
        if not self.is_fitted:
            raise RuntimeError("Model not fitted!")
        
        X_scaled = self.scaler.transform(X)
        
        if self.backend == 'cuml':
            X_gpu = cp.asarray(X_scaled, dtype=cp.float32)
            probs = self.model.predict_proba(X_gpu)
            return cp.asnumpy(probs)
        else:
            return self.model.predict_proba(X_scaled)
    
    def predict(self, X):
        """Gibt Vorhersagen zurück."""
        if not self.is_fitted:
            raise RuntimeError("Model not fitted!")
        
        X_scaled = self.scaler.transform(X)
        
        if self.backend == 'cuml':
            X_gpu = cp.asarray(X_scaled, dtype=cp.float32)
            predictions = self.model.predict(X_gpu)
            return cp.asnumpy(predictions).astype(int)
        else:
            return self.model.predict(X_scaled)

# -------------------------------------------------------------------------------
# Query-Strategien
# -------------------------------------------------------------------------------
def entropy_sampling(model, X_pool, n_instances=1):
    """Wählt Samples mit höchster Entropie aus."""
    try:
        probs = model.predict_proba(X_pool)
        epsilon = 1e-10
        probs = np.clip(probs, epsilon, 1.0 - epsilon)
        entropies = -np.sum(probs * np.log(probs), axis=1)
        n_instances = min(n_instances, len(X_pool))
        return np.argsort(entropies)[-n_instances:]
    except Exception as e:
        logger.error(f"Fehler bei Entropy Sampling: {e}")
        return random_sampling(model, X_pool, n_instances)

def margin_sampling(model, X_pool, n_instances=1):
    """Wählt Samples mit kleinstem Margin zwischen Top-2 Klassen."""
    try:
        probs = model.predict_proba(X_pool)
        sorted_probs = np.sort(probs, axis=1)
        
        if sorted_probs.shape[1] >= 2:
            margins = sorted_probs[:, -1] - sorted_probs[:, -2]
        else:
            margins = 1.0 - sorted_probs[:, -1]
        
        n_instances = min(n_instances, len(X_pool))
        return np.argsort(margins)[:n_instances]
    except Exception as e:
        logger.error(f"Fehler bei Margin Sampling: {e}")
        return random_sampling(model, X_pool, n_instances)

def least_confidence_sampling(model, X_pool, n_instances=1):
    """Wählt Samples mit geringster Konfidenz."""
    try:
        probs = model.predict_proba(X_pool)
        confidences = np.max(probs, axis=1)
        n_instances = min(n_instances, len(X_pool))
        return np.argsort(confidences)[:n_instances]
    except Exception as e:
        logger.error(f"Fehler bei Least Confidence Sampling: {e}")
        return random_sampling(model, X_pool, n_instances)

def random_sampling(model, X_pool, n_instances=1):
    """Zufällige Auswahl (Baseline)."""
    try:
        n_instances = min(n_instances, len(X_pool))
        if n_instances <= 0:
            return np.array([], dtype=int)
        return np.random.choice(len(X_pool), size=n_instances, replace=False)
    except Exception as e:
        logger.error(f"Fehler bei Random Sampling: {e}")
        return np.arange(min(n_instances, len(X_pool)))

# -------------------------------------------------------------------------------
# Active Learning Hauptfunktion
# -------------------------------------------------------------------------------
def run_gpu_lr_active_learning(X_train, y_train, X_test, y_test,
                               strategy_name, strategy_func,
                               budget_percentages, batch_size=500):
    """
    Führt GPU-optimiertes Active Learning Experiment mit Logistic Regression durch.
    """
    results = []
    n_total = len(y_train)
    
    for budget_pct in budget_percentages:
        n_budget = int(budget_pct * n_total)
        
        logger.info(f"\nGPU-LR + {strategy_name} - Budget: {budget_pct:.0%} ({n_budget:,} Samples)")
        
        for run in range(N_RUNS):
            logger.info(f"  Run {run+1}/{N_RUNS}")
            
            try:
                # Set seed for reproducibility
                np.random.seed(SEED + run)
                
                # Initialisierung
                pool_indices = np.arange(n_total)
                labeled_indices = []
                
                # Initiale zufällige Auswahl (stratifiziert für Fashion-MNIST)
                n_initial = max(100, int(INITIAL_PERCENTAGE * n_total))
                n_initial = min(n_initial, len(pool_indices))
                
                # Stratifizierte initiale Auswahl
                initial_indices = []
                for class_label in range(10):
                    class_indices = pool_indices[y_train[pool_indices] == class_label]
                    n_class = max(1, n_initial // 10)
                    if len(class_indices) > 0:
                        selected = np.random.choice(class_indices, 
                                                  size=min(n_class, len(class_indices)), 
                                                  replace=False)
                        initial_indices.extend(selected)
                
                labeled_indices = list(initial_indices[:n_initial])
                pool_indices = np.setdiff1d(pool_indices, labeled_indices)
                
                # Tracking
                accuracies = []
                n_labeled_list = []
                query_times = []
                train_times = []
                
                while len(labeled_indices) < n_budget and len(pool_indices) > 0:
                    # Clear GPU memory before training
                    clear_gpu_memory()
                    
                    # Modell erstellen und trainieren
                    model = GPUOptimizedLogisticRegression(n_samples=len(labeled_indices))
                    
                    train_start = time.time()
                    model.fit(X_train[labeled_indices], y_train[labeled_indices])
                    train_time = time.time() - train_start
                    train_times.append(train_time)
                    
                    # Evaluation
                    y_pred = model.predict(X_test)
                    acc = accuracy_score(y_test, y_pred)
                    
                    accuracies.append(acc)
                    n_labeled_list.append(len(labeled_indices))
                    
                    # Nächste Batch auswählen
                    n_query = min(batch_size, n_budget - len(labeled_indices), len(pool_indices))
                    if n_query <= 0:
                        break
                    
                    # Query mit Zeitmessung
                    query_start = time.time()
                    query_indices = strategy_func(model, X_train[pool_indices], n_query)
                    query_time = time.time() - query_start
                    query_times.append(query_time)
                    
                    # Validierung der Query-Indizes
                    query_indices = np.asarray(query_indices)
                    query_indices = query_indices[query_indices < len(pool_indices)]
                    
                    if len(query_indices) == 0:
                        logger.warning(f"Keine gültigen Query-Indizes in Run {run+1}")
                        break
                    
                    selected_indices = pool_indices[query_indices]
                    
                    # Update
                    labeled_indices.extend(selected_indices)
                    pool_indices = np.setdiff1d(pool_indices, selected_indices)
                    
                    # Progress logging - nur bei wichtigen Meilensteinen
                    if len(labeled_indices) % 10000 == 0 or len(labeled_indices) == n_budget:
                        mem_info = get_gpu_memory_info()
                        gpu_mem_str = ""
                        if model.backend == 'cuml' and 'gpu_used' in mem_info:
                            gpu_mem_str = f" | GPU: {mem_info['gpu_used']:.1f}/{mem_info['gpu_total']:.1f} GB"
                        
                        logger.info(f"    {len(labeled_indices):,} labeled → Accuracy: {acc:.4f} "
                                  f"(Train: {train_time:.1f}s, Query: {query_time:.2f}s){gpu_mem_str}")
                
                # Finale Evaluation mit mehr Training
                if len(labeled_indices) > 0:
                    clear_gpu_memory()
                    model = GPUOptimizedLogisticRegression(n_samples=len(labeled_indices))
                    model.fit(X_train[labeled_indices], y_train[labeled_indices])
                    
                    y_pred = model.predict(X_test)
                    final_acc = accuracy_score(y_test, y_pred)
                    final_f1 = f1_score(y_test, y_pred, average='macro')
                    
                    results.append({
                        'strategy': strategy_name,
                        'budget_pct': budget_pct,
                        'run': run,
                        'n_labeled': len(labeled_indices),
                        'accuracy': final_acc,
                        'f1_score': final_f1,
                        'accuracies': accuracies,
                        'n_labeled_list': n_labeled_list,
                        'avg_query_time': np.mean(query_times) if query_times else 0,
                        'avg_train_time': np.mean(train_times) if train_times else 0,
                        'backend': model.backend,
                        'y_pred': y_pred,  # Für Confusion Matrix
                        'y_true': y_test
                    })
                    
                    logger.info(f"    Final: {len(labeled_indices):,} labeled → "
                              f"Accuracy: {final_acc:.4f}, F1: {final_f1:.4f}")
                
                # Cleanup
                clear_gpu_memory()
                    
            except Exception as e:
                logger.error(f"Fehler in Run {run+1}: {e}")
                import traceback
                traceback.print_exc()
                continue
    
    return results

# -------------------------------------------------------------------------------
# Statistische Analyse
# -------------------------------------------------------------------------------
def cliffs_delta(x, y):
    """Berechnet Cliff's Delta als Effektstärkemaß."""
    try:
        nx = len(x)
        ny = len(y)
        
        if nx == 0 or ny == 0:
            return 0.0
        
        x = np.asarray(x)
        y = np.asarray(y)
        
        greater = 0
        less = 0
        
        for xi in x:
            greater += np.sum(xi > y)
            less += np.sum(xi < y)
        
        d = (greater - less) / (nx * ny)
        d = np.clip(d, -1.0, 1.0)
        
        return d
    except Exception as e:
        logger.error(f"Fehler bei Cliff's Delta: {e}")
        return 0.0

def interpret_cliffs_delta(d):
    """Interpretiert die Effektstärke."""
    try:
        abs_d = abs(float(d))
        if abs_d < 0.147:
            return "negligible"
        elif abs_d < 0.33:
            return "small"
        elif abs_d < 0.474:
            return "medium"
        else:
            return "large"
    except:
        return "unknown"

def perform_statistical_analysis(results_df, metric='accuracy'):
    """Führt statistische Analyse durch."""
    statistical_results = []
    
    try:
        strategies = results_df['strategy'].unique()
        budget_levels = results_df['budget_pct'].unique()
        
        for budget_pct in budget_levels:
            # Random Sampling als Baseline
            baseline_data = results_df[
                (results_df['strategy'] == 'Random Sampling') & 
                (results_df['budget_pct'] == budget_pct)
            ][metric].values
            
            for strategy in strategies:
                if strategy == 'Random Sampling':
                    continue
                    
                strategy_data = results_df[
                    (results_df['strategy'] == strategy) & 
                    (results_df['budget_pct'] == budget_pct)
                ][metric].values
                
                if len(baseline_data) >= N_RUNS and len(strategy_data) >= N_RUNS:
                    # Wilcoxon Test
                    try:
                        if np.allclose(strategy_data, baseline_data):
                            statistic, p_value = 0.0, 1.0
                        else:
                            statistic, p_value = wilcoxon(
                                strategy_data, baseline_data, 
                                alternative='greater',
                                zero_method='zsplit'
                            )
                    except Exception as e:
                        logger.warning(f"Wilcoxon Test fehlgeschlagen: {e}")
                        statistic, p_value = 0.0, 1.0
                    
                    # Effektstärke
                    effect_size = cliffs_delta(strategy_data, baseline_data)
                    effect_interpretation = interpret_cliffs_delta(effect_size)
                    
                    # Statistiken
                    baseline_mean = np.mean(baseline_data)
                    baseline_std = np.std(baseline_data)
                    strategy_mean = np.mean(strategy_data)
                    strategy_std = np.std(strategy_data)
                    
                    improvement = strategy_mean - baseline_mean
                    improvement_pct = ((improvement / baseline_mean) * 100) if baseline_mean > 0 else 0
                    
                    statistical_results.append({
                        'strategy': strategy,
                        'budget_pct': budget_pct,
                        'baseline_mean': baseline_mean,
                        'baseline_std': baseline_std,
                        'strategy_mean': strategy_mean,
                        'strategy_std': strategy_std,
                        'improvement': improvement,
                        'improvement_pct': improvement_pct,
                        'wilcoxon_statistic': float(statistic),
                        'p_value': float(p_value),
                        'cliffs_delta': float(effect_size),
                        'effect_size': effect_interpretation,
                        'n_samples': len(strategy_data)
                    })
        
        stat_df = pd.DataFrame(statistical_results)
        
        if len(stat_df) > 0:
            # Bonferroni-Korrektur
            n_comparisons = len(stat_df)
            stat_df['p_value_corrected'] = np.minimum(stat_df['p_value'] * n_comparisons, 1.0)
            stat_df['significant'] = stat_df['p_value_corrected'] < SIGNIFICANCE_LEVEL
        
        return stat_df
        
    except Exception as e:
        logger.error(f"Fehler bei statistischer Analyse: {e}")
        return pd.DataFrame()

def create_statistical_report(stat_results):
    """Erstellt deutschen statistischen Bericht für Fashion-MNIST."""
    strategy_labels_de = {
        'Random Sampling': 'Zufällige Auswahl',
        'Entropy Sampling': 'Entropie-Auswahl',
        'Margin Sampling': 'Margin-Auswahl',
        'Least Confidence': 'Geringste Konfidenz'
    }
    
    effect_labels_de = {
        'negligible': 'vernachlässigbar',
        'small': 'klein',
        'medium': 'mittel',
        'large': 'groß'
    }
    
    try:
        # Sortiere nach Effektstärke
        if not stat_results.empty and 'cliffs_delta' in stat_results.columns:
            stat_results_sorted = stat_results.sort_values('cliffs_delta', ascending=False)
        else:
            stat_results_sorted = stat_results
        
        # Erstelle formatierten Bericht
        report = []
        report.append("\n" + "="*100)
        report.append("DETAILLIERTER STATISTISCHER BERICHT - FASHION-MNIST LOGISTIC REGRESSION ACTIVE LEARNING")
        report.append("="*100)
        report.append(f"Datensatz: Fashion-MNIST (10 Klassen: {', '.join(FASHION_MNIST_CLASSES_DE)})")
        report.append(f"Signifikanzniveau: {SIGNIFICANCE_LEVEL} (mit Bonferroni-Korrektur)")
        report.append(f"Anzahl Runs pro Experiment: {N_RUNS}")
        report.append(f"Statistischer Test: Wilcoxon Signed-Rank Test")
        report.append(f"Effektstärkemaß: Cliff's Delta")
        report.append("\n")
        
        # Signifikante Ergebnisse
        if 'significant' in stat_results_sorted.columns:
            sig_results = stat_results_sorted[stat_results_sorted['significant']]
        else:
            sig_results = pd.DataFrame()
        
        if not sig_results.empty:
            report.append("SIGNIFIKANTE VERBESSERUNGEN GEGENÜBER ZUFÄLLIGER AUSWAHL:")
            report.append("-"*100)
            report.append(f"{'Strategie':<20} {'Budget':<10} {'Verbesserung':<15} "
                         f"{'p-Wert':<12} {'Effekt':<15} {'Interpretation':<20}")
            report.append("-"*100)
            
            for _, row in sig_results.iterrows():
                strategy_de = strategy_labels_de.get(row['strategy'], row['strategy'])
                effect_de = effect_labels_de.get(row['effect_size'], row['effect_size'])
                
                report.append(f"{strategy_de:<20} "
                             f"{int(row['budget_pct']*100):>8}% "
                             f"{row['improvement_pct']:>13.2f}% "
                             f"{row['p_value_corrected']:>11.4f} "
                             f"{row['cliffs_delta']:>14.3f} "
                             f"{effect_de:<20}")
        else:
            report.append("Keine signifikanten Verbesserungen gefunden!")
        
        # Zusammenfassung nach Strategie
        report.append("\n\nZUSAMMENFASSUNG NACH STRATEGIE:")
        report.append("-"*100)
        
        for strategy in ['Entropy Sampling', 'Margin Sampling', 'Least Confidence']:
            if 'strategy' in stat_results.columns:
                strategy_data = stat_results[stat_results['strategy'] == strategy]
                if not strategy_data.empty:
                    sig_count = strategy_data['significant'].sum() if 'significant' in strategy_data.columns else 0
                    avg_improvement = strategy_data['improvement_pct'].mean() if 'improvement_pct' in strategy_data.columns else 0
                    avg_effect = strategy_data['cliffs_delta'].mean() if 'cliffs_delta' in strategy_data.columns else 0
                    
                    strategy_de = strategy_labels_de.get(strategy, strategy)
                    report.append(f"\n{strategy_de}:")
                    report.append(f"  - Signifikante Verbesserungen: {sig_count}/{len(strategy_data)} "
                                 f"({sig_count/len(strategy_data)*100:.1f}%)")
                    report.append(f"  - Durchschnittliche Verbesserung: {avg_improvement:.2f}%")
                    report.append(f"  - Durchschnittliche Effektstärke: {avg_effect:.3f}")
        
        # Empfehlung
        report.append("\n\nEMPFEHLUNG FÜR FASHION-MNIST:")
        report.append("-"*100)
        
        if not sig_results.empty:
            best_row = sig_results.iloc[0]
            strategy_de = strategy_labels_de.get(best_row['strategy'], best_row['strategy'])
            report.append(f"Die beste Active Learning Strategie für Fashion-MNIST ist {strategy_de}")
            report.append(f"mit einer durchschnittlichen Verbesserung von {best_row['improvement_pct']:.2f}%")
            report.append(f"und einer {effect_labels_de.get(best_row['effect_size'], best_row['effect_size'])}en Effektstärke.")
        else:
            report.append("Die Active Learning Strategien zeigen keine signifikanten Verbesserungen")
            report.append("gegenüber der zufälligen Auswahl in diesem Fashion-MNIST Experiment.")
        
        report.append("\n" + "="*100)
        
        # Ausgabe
        report_text = "\n".join(report)
        print(report_text)
        
        # Speichern
        report_filename = 'reports/fashion_lr_statistischer_bericht.txt'
        try:
            with open(report_filename, 'w', encoding='utf-8') as f:
                f.write(report_text)
            logger.info(f"✓ Statistischer Bericht gespeichert: {report_filename}")
        except Exception as e:
            logger.error(f"Fehler beim Speichern des Berichts: {e}")
        
        return report_text
        
    except Exception as e:
        logger.error(f"Fehler bei create_statistical_report: {e}")
        return "Fehler bei der Berichterstellung"

# -------------------------------------------------------------------------------
# Visualisierungen (Deutsch) für Fashion-MNIST
# -------------------------------------------------------------------------------
def plot_fashion_mnist_lr_results(all_results, stat_results):
    """Erstellt Fashion-MNIST spezifische Visualisierungen auf Deutsch."""
    # Deutsche Matplotlib Konfiguration
    plt.rcParams['font.family'] = 'DejaVu Sans'
    plt.rcParams['axes.unicode_minus'] = False
    
    try:
        plt.style.use('seaborn-v0_8-whitegrid')
    except:
        plt.style.use('ggplot')
    
    # Farben für Strategien
    strategy_colors = {
        'Random Sampling': '#808080',
        'Entropy Sampling': '#1f77b4',
        'Margin Sampling': '#ff7f0e',
        'Least Confidence': '#2ca02c'
    }
    
    # Deutsche Labels
    strategy_labels_de = {
        'Random Sampling': 'Zufällige Auswahl',
        'Entropy Sampling': 'Entropie-Auswahl',
        'Margin Sampling': 'Margin-Auswahl',
        'Least Confidence': 'Geringste Konfidenz'
    }
    
    effect_labels_de = {
        'negligible': 'vernachlässigbar',
        'small': 'klein',
        'medium': 'mittel',
        'large': 'groß'
    }
    
    # 1. Hauptvisualisierung: Lernkurven mit Signifikanz
    fig, axes = plt.subplots(1, len(BUDGET_PERCENTAGES), figsize=(20, 5))
    
    if len(BUDGET_PERCENTAGES) == 1:
        axes = [axes]
    
    fig.suptitle('Fashion-MNIST Logistic Regression Active Learning Performance', fontsize=16, y=1.02)
    
    for budget_idx, budget_pct in enumerate(BUDGET_PERCENTAGES):
        ax = axes[budget_idx]
        
        # Sammle alle y-Werte für dynamische Skalierung
        all_y_values = []
        
        for strategy, color in strategy_colors.items():
            strategy_results = [r for r in all_results 
                              if r['strategy'] == strategy 
                              and r['budget_pct'] == budget_pct]
            
            if strategy_results:
                # Lernkurven aggregieren
                max_samples = int(budget_pct * 60000)
                x_common = np.linspace(100, max_samples, 100)
                y_interpolated = []
                
                for r in strategy_results:
                    if len(r['n_labeled_list']) > 1:
                        try:
                            y_interp = np.interp(x_common, r['n_labeled_list'], r['accuracies'])
                            y_interpolated.append(y_interp)
                        except:
                            pass
                
                if y_interpolated:
                    y_mean = np.mean(y_interpolated, axis=0)
                    y_std = np.std(y_interpolated, axis=0)
                    
                    # Sammle Werte für Skalierung
                    all_y_values.extend(y_mean - y_std)
                    all_y_values.extend(y_mean + y_std)
                    
                    # Signifikanz prüfen
                    is_significant = False
                    effect_size = ""
                    if strategy != 'Random Sampling' and not stat_results.empty:
                        sig_data = stat_results[
                            (stat_results['strategy'] == strategy) & 
                            (stat_results['budget_pct'] == budget_pct)
                        ]
                        if not sig_data.empty:
                            is_significant = sig_data.iloc[0]['significant']
                            effect_size = effect_labels_de.get(
                                sig_data.iloc[0]['effect_size'], 
                                sig_data.iloc[0]['effect_size']
                            )
                    
                    label = strategy_labels_de.get(strategy, strategy)
                    if is_significant:
                        label += f" *({effect_size})"
                    
                    # Backend info
                    backend = strategy_results[0].get('backend', 'unknown')
                    if strategy == 'Random Sampling':
                        label += f" [{backend}]"
                    
                    ax.plot(x_common, y_mean, 
                           label=label, 
                           color=color, 
                           linewidth=2.5,
                           linestyle='-' if not is_significant or strategy == 'Random Sampling' else '--')
                    
                    ax.fill_between(x_common, 
                                  y_mean - y_std, 
                                  y_mean + y_std, 
                                  color=color, 
                                  alpha=0.2)
        
        ax.set_xlabel('Anzahl gelabelter Beispiele', fontsize=12)
        ax.set_ylabel('Test-Genauigkeit', fontsize=12)
        ax.set_title(f'Budget: {int(budget_pct*100)}%', fontsize=13)
        ax.grid(True, alpha=0.3)
        
        # Dynamische Y-Achsen-Skalierung
        if all_y_values:
            y_min = min(all_y_values)
            y_max = max(all_y_values)
            y_range = y_max - y_min
            
            # Füge 10% Padding hinzu
            y_min_adj = y_min - 0.1 * y_range
            y_max_adj = y_max + 0.1 * y_range
            
            # Stelle sicher, dass die Skalierung sinnvoll ist
            if y_range < 0.05:  # Wenn Bereich sehr klein
                center = (y_min + y_max) / 2
                y_min_adj = center - 0.03
                y_max_adj = center + 0.03
            
            ax.set_ylim([max(0.0, y_min_adj), min(1.0, y_max_adj)])
        
        # X-Achse formatieren
        ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{int(x/1000)}k'))
        
        if budget_idx == 0:
            ax.legend(loc='lower right', fontsize=10, framealpha=0.9)
    
    fig.text(0.5, -0.05, 
            '* = statistisch signifikant (p < 0,05); Effektstärke: vernachlässigbar/klein/mittel/groß',
            ha='center', fontsize=10, style='italic')
    
    plt.tight_layout()
    filename = 'plots/fashion_lr_active_learning_performance.png'
    plt.savefig(filename, dpi=300, bbox_inches='tight')
    logger.info(f"✓ Visualisierung erstellt: {filename}")
    plt.close()
    
    # 2. Fashion-MNIST spezifische Analyse
    create_fashion_specific_analysis(all_results, stat_results)
    
    # 3. Detaillierte Vergleichsvisualisierung
    create_detailed_comparison_fashion(all_results, stat_results)

def create_fashion_specific_analysis(all_results, stat_results):
    """Erstellt Fashion-MNIST spezifische Analysen."""
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle('Fashion-MNIST Logistic Regression Detailanalyse', fontsize=16)
    
    strategy_labels_de = {
        'Random Sampling': 'Zufällige Auswahl',
        'Entropy Sampling': 'Entropie-Auswahl',
        'Margin Sampling': 'Margin-Auswahl',
        'Least Confidence': 'Geringste Konfidenz'
    }
    
    # 1. Confusion Matrix für beste Strategie bei 100% Budget
    ax1 = axes[0, 0]
    
    # Finde beste Strategie
    results_100 = [r for r in all_results if r['budget_pct'] == 1.0]
    if results_100:
        best_acc = 0
        best_result = None
        best_strategy = None
        
        for strategy in ['Entropy Sampling', 'Margin Sampling', 'Least Confidence']:
            strategy_results = [r for r in results_100 if r['strategy'] == strategy]
            if strategy_results:
                avg_acc = np.mean([r['accuracy'] for r in strategy_results])
                if avg_acc > best_acc:
                    best_acc = avg_acc
                    best_result = strategy_results[0]  # Nimm erste für Confusion Matrix
                    best_strategy = strategy
        
        if best_result and 'y_pred' in best_result and 'y_true' in best_result:
            cm = confusion_matrix(best_result['y_true'], best_result['y_pred'])
            
            # Normalisiere Confusion Matrix
            cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
            
            # Plot mit Fashion-MNIST Klassen
            im = ax1.imshow(cm_normalized, interpolation='nearest', cmap='Blues')
            ax1.set_xticks(np.arange(10))
            ax1.set_yticks(np.arange(10))
            ax1.set_xticklabels(FASHION_MNIST_CLASSES_DE, rotation=45, ha='right', fontsize=8)
            ax1.set_yticklabels(FASHION_MNIST_CLASSES_DE, fontsize=8)
            ax1.set_xlabel('Vorhergesagt', fontsize=11)
            ax1.set_ylabel('Tatsächlich', fontsize=11)
            ax1.set_title(f'Confusion Matrix - {strategy_labels_de.get(best_strategy, best_strategy)} (100% Budget)', fontsize=12)
            
            # Colorbar
            plt.colorbar(im, ax=ax1, fraction=0.046, pad=0.04)
            
            # Text in Zellen (nur für Diagonale)
            for i in range(10):
                text = ax1.text(i, i, f'{cm_normalized[i, i]:.2f}',
                              ha="center", va="center", color="white" if cm_normalized[i, i] > 0.5 else "black",
                              fontsize=8)
    
    # 2. Klassenweise Performance
    ax2 = axes[0, 1]
    
    if results_100:
        class_accuracies = {}
        
        for strategy in ['Random Sampling', 'Entropy Sampling', 'Margin Sampling', 'Least Confidence']:
            strategy_results = [r for r in results_100 if r['strategy'] == strategy]
            if strategy_results and 'y_pred' in strategy_results[0]:
                # Berechne klassenweise Accuracy
                y_true = strategy_results[0]['y_true']
                y_pred = strategy_results[0]['y_pred']
                
                class_acc = []
                for cls in range(10):
                    mask = y_true == cls
                    if mask.sum() > 0:
                        acc = (y_pred[mask] == y_true[mask]).mean()
                        class_acc.append(acc)
                    else:
                        class_acc.append(0)
                
                class_accuracies[strategy] = class_acc
        
        # Plot als gruppierte Bars
        if class_accuracies:
            x = np.arange(10)
            width = 0.2
            
            for i, (strategy, accs) in enumerate(class_accuracies.items()):
                offset = (i - 1.5) * width
                ax2.bar(x + offset, accs, width, 
                       label=strategy_labels_de.get(strategy, strategy),
                       alpha=0.8)
            
            ax2.set_xlabel('Fashion-MNIST Klassen', fontsize=11)
            ax2.set_ylabel('Genauigkeit', fontsize=11)
            ax2.set_title('Klassenweise Performance (100% Budget)', fontsize=12)
            ax2.set_xticks(x)
            ax2.set_xticklabels(FASHION_MNIST_CLASSES_DE, rotation=45, ha='right', fontsize=8)
            ax2.legend(fontsize=9, loc='lower right')
            ax2.grid(axis='y', alpha=0.3)
            ax2.set_ylim([0, 1])
    
    # 3. Training Zeit vs Performance Trade-off
    ax3 = axes[1, 0]
    
    perf_time_data = []
    for strategy in ['Random Sampling', 'Entropy Sampling', 'Margin Sampling', 'Least Confidence']:
        for budget in BUDGET_PERCENTAGES:
            results = [r for r in all_results 
                      if r['strategy'] == strategy and r['budget_pct'] == budget]
            if results:
                avg_acc = np.mean([r['accuracy'] for r in results])
                avg_time = np.mean([r['avg_train_time'] + r['avg_query_time'] for r in results])
                
                perf_time_data.append({
                    'strategy': strategy_labels_de.get(strategy, strategy),
                    'budget': int(budget * 100),
                    'accuracy': avg_acc,
                    'time': avg_time
                })
    
    if perf_time_data:
        df_pt = pd.DataFrame(perf_time_data)
        
        # Scatter plot mit Farben nach Strategie und Größe nach Budget
        for strategy in df_pt['strategy'].unique():
            strategy_data = df_pt[df_pt['strategy'] == strategy]
            
            # Größe basiert auf Budget
            sizes = strategy_data['budget'].values * 2
            
            scatter = ax3.scatter(strategy_data['time'], strategy_data['accuracy'],
                                s=sizes, alpha=0.6, label=strategy)
            
            # Verbindungslinien zwischen Budgets
            strategy_data_sorted = strategy_data.sort_values('budget')
            ax3.plot(strategy_data_sorted['time'], strategy_data_sorted['accuracy'],
                    alpha=0.3, linestyle='--')
        
        ax3.set_xlabel('Durchschnittliche Zeit pro Batch (Sekunden)', fontsize=11)
        ax3.set_ylabel('Test-Genauigkeit', fontsize=11)
        ax3.set_title('Performance vs. Rechenzeit Trade-off', fontsize=12)
        ax3.legend(fontsize=9)
        ax3.grid(True, alpha=0.3)
    
    # 4. Fashion-MNIST Schwierige Klassen Analyse
    ax4 = axes[1, 1]
    
    # Identifiziere schwierigste Klassenpaare
    if results_100 and best_result and 'y_pred' in best_result:
        cm = confusion_matrix(best_result['y_true'], best_result['y_pred'])
        
        # Finde häufigste Verwechslungen (ohne Diagonale)
        cm_no_diag = cm.copy()
        np.fill_diagonal(cm_no_diag, 0)
        
        # Top 10 Verwechslungen
        confusion_pairs = []
        for i in range(10):
            for j in range(10):
                if i != j and cm_no_diag[i, j] > 0:
                    confusion_pairs.append({
                        'true': FASHION_MNIST_CLASSES_DE[i],
                        'pred': FASHION_MNIST_CLASSES_DE[j],
                        'count': cm_no_diag[i, j],
                        'rate': cm_no_diag[i, j] / cm[i].sum() if cm[i].sum() > 0 else 0
                    })
        
        # Sortiere nach Rate
        confusion_pairs = sorted(confusion_pairs, key=lambda x: x['rate'], reverse=True)[:10]
        
        if confusion_pairs:
            labels = [f"{cp['true'][:7]} → {cp['pred'][:7]}" for cp in confusion_pairs]
            rates = [cp['rate'] * 100 for cp in confusion_pairs]
            
            bars = ax4.barh(range(len(labels)), rates, color='coral')
            ax4.set_yticks(range(len(labels)))
            ax4.set_yticklabels(labels, fontsize=9)
            ax4.set_xlabel('Verwechslungsrate (%)', fontsize=11)
            ax4.set_title('Top 10 Klassenverw	ungen', fontsize=12)
            ax4.grid(axis='x', alpha=0.3)
            
            # Werte an Balken
            for bar, rate in zip(bars, rates):
                ax4.text(bar.get_width(), bar.get_y() + bar.get_height()/2,
                        f'{rate:.1f}%', ha='left', va='center', fontsize=8)
    
    plt.tight_layout()
    filename = 'plots/fashion_lr_detailanalyse.png'
    plt.savefig(filename, dpi=300, bbox_inches='tight')
    logger.info(f"✓ Fashion-MNIST Detailanalyse erstellt: {filename}")
    plt.close()

def create_detailed_comparison_fashion(all_results, stat_results):
    """Erstellt detaillierte Vergleichsplots für Fashion-MNIST."""
    # Deutsche Labels
    strategy_labels_de = {
        'Random Sampling': 'Zufällige Auswahl',
        'Entropy Sampling': 'Entropie-Auswahl',
        'Margin Sampling': 'Margin-Auswahl',
        'Least Confidence': 'Geringste Konfidenz'
    }
    
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle('Fashion-MNIST Active Learning Vergleichsanalyse', fontsize=16)
    
    # 1. Verbesserung über Random Sampling
    ax1 = axes[0, 0]
    improvements = []
    
    for budget_pct in BUDGET_PERCENTAGES:
        random_results = [r['accuracy'] for r in all_results 
                         if r['strategy'] == 'Random Sampling' and r['budget_pct'] == budget_pct]
        
        if random_results:
            random_mean = np.mean(random_results)
            
            for strategy in ['Entropy Sampling', 'Margin Sampling', 'Least Confidence']:
                strategy_results = [r['accuracy'] for r in all_results 
                                  if r['strategy'] == strategy and r['budget_pct'] == budget_pct]
                
                if strategy_results:
                    strategy_mean = np.mean(strategy_results)
                    improvement = (strategy_mean - random_mean) * 100  # In Prozentpunkten
                    
                    improvements.append({
                        'Strategie': strategy_labels_de.get(strategy, strategy),
                        'Budget': int(budget_pct * 100),
                        'Verbesserung': improvement
                    })
    
    if improvements:
        df_imp = pd.DataFrame(improvements)
        
        # Gruppierter Barplot
        strategies = df_imp['Strategie'].unique()
        x = np.arange(len(BUDGET_PERCENTAGES))
        width = 0.25
        
        colors = ['#3498db', '#e74c3c', '#2ecc71']
        
        for i, strategy in enumerate(strategies):
            data = df_imp[df_imp['Strategie'] == strategy]
            values = []
            for b in BUDGET_PERCENTAGES:
                budget_data = data[data['Budget'] == int(b*100)]
                if not budget_data.empty:
                    values.append(budget_data['Verbesserung'].values[0])
                else:
                    values.append(0)
            
            bars = ax1.bar(x + i*width - width, values, width, 
                           label=strategy, alpha=0.8, color=colors[i])
            
            # Werte auf Balken
            for bar, value in zip(bars, values):
                if value != 0:
                    ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height(),
                            f'{value:.2f}%', ha='center', va='bottom', fontsize=8)
        
        ax1.set_xlabel('Budget (%)', fontsize=11)
        ax1.set_ylabel('Verbesserung (Prozentpunkte)', fontsize=11)
        ax1.set_title('Verbesserung gegenüber zufälliger Auswahl', fontsize=13)
        ax1.set_xticks(x)
        ax1.set_xticklabels([f'{int(b*100)}%' for b in BUDGET_PERCENTAGES])
        ax1.axhline(y=0, color='black', linestyle='-', linewidth=0.5)
        ax1.legend()
        ax1.grid(True, alpha=0.3, axis='y')
    
    # 2. Box Plots für finale Genauigkeit
    ax2 = axes[0, 1]
    final_data = []
    
    for strategy in ['Random Sampling', 'Entropy Sampling', 'Margin Sampling', 'Least Confidence']:
        results_100 = [r['accuracy'] for r in all_results 
                      if r['strategy'] == strategy and r['budget_pct'] == 1.0]
        if results_100:
            for acc in results_100:
                final_data.append({
                    'Strategie': strategy_labels_de.get(strategy, strategy),
                    'Genauigkeit': acc
                })
    
    if final_data:
        df_final = pd.DataFrame(final_data)
        
        # Box Plot
        strategies_unique = df_final['Strategie'].unique()
        box_data = [df_final[df_final['Strategie'] == s]['Genauigkeit'].values 
                   for s in strategies_unique]
        
        bp = ax2.boxplot(box_data, labels=strategies_unique, patch_artist=True)
        
        # Farben für Boxen
        colors = ['lightgray', 'lightblue', 'lightcoral', 'lightgreen']
        for patch, color in zip(bp['boxes'], colors):
            patch.set_facecolor(color)
        
        ax2.set_title('Verteilung der finalen Genauigkeit (100% Budget)', fontsize=13)
        ax2.set_ylabel('Test-Genauigkeit', fontsize=11)
        ax2.grid(True, alpha=0.3, axis='y')
        ax2.set_xticklabels(strategies_unique, rotation=25, ha='right')
        
        # Dynamische Y-Achsen-Skalierung
        all_values = np.concatenate(box_data)
        y_min, y_max = all_values.min(), all_values.max()
        y_range = y_max - y_min
        
        if y_range < 0.02:  # Sehr kleine Unterschiede
            center = (y_min + y_max) / 2
            ax2.set_ylim([center - 0.015, center + 0.015])
        else:
            ax2.set_ylim([y_min - 0.1*y_range, y_max + 0.1*y_range])
    
    # 3. Lerngeschwindigkeit 
    ax3 = axes[1, 0]
    learning_speed = []
    
    # Ziel: 85% der Random Sampling Performance bei 100% (niedrigerer Wert für Fashion-MNIST)
    random_100_results = [r['accuracy'] for r in all_results 
                         if r['strategy'] == 'Random Sampling' and r['budget_pct'] == 1.0]
    
    if random_100_results:
        target_acc = np.mean(random_100_results) * 0.85
        
        for strategy in ['Random Sampling', 'Entropy Sampling', 'Margin Sampling', 'Least Confidence']:
            strategy_results = [r for r in all_results if r['strategy'] == strategy]
            
            samples_needed = []
            for r in strategy_results:
                if 'n_labeled_list' in r and 'accuracies' in r:
                    for i, acc in enumerate(r['accuracies']):
                        if acc >= target_acc:
                            samples_needed.append(r['n_labeled_list'][i])
                            break
            
            if samples_needed:
                learning_speed.append({
                    'Strategie': strategy_labels_de.get(strategy, strategy),
                    'Samples': np.mean(samples_needed),
                    'Std': np.std(samples_needed)
                })
    
    if learning_speed:
        df_speed = pd.DataFrame(learning_speed)
        bars = ax3.bar(df_speed['Strategie'], df_speed['Samples'], 
                       yerr=df_speed['Std'], capsize=5, color='purple', alpha=0.7)
        
        # Werte auf Balken
        for bar, (_, row) in zip(bars, df_speed.iterrows()):
            ax3.text(bar.get_x() + bar.get_width()/2, bar.get_height(),
                    f'{int(row["Samples"]):,}', ha='center', va='bottom', fontsize=9)
        
        ax3.set_title('Benötigte Samples für 85% der Baseline-Performance', fontsize=13)
        ax3.set_ylabel('Anzahl Samples', fontsize=11)
        ax3.set_xlabel('')
        ax3.grid(True, alpha=0.3, axis='y')
        ax3.set_xticklabels(df_speed['Strategie'], rotation=25, ha='right')
        
        # Referenzlinie
        ax3.axhline(y=60000, color='red', linestyle='--', alpha=0.5, 
                   label='Vollständiger Datensatz')
        ax3.legend()
    
    # 4. Effizienz-Matrix
    ax4 = axes[1, 1]
    efficiency_data = []
    
    for strategy in ['Random Sampling', 'Entropy Sampling', 'Margin Sampling', 'Least Confidence']:
        for budget_pct in BUDGET_PERCENTAGES:
            results = [r for r in all_results 
                      if r['strategy'] == strategy and r['budget_pct'] == budget_pct]
            
            if results:
                avg_acc = np.mean([r['accuracy'] for r in results])
                avg_time = np.mean([r['avg_train_time'] + r['avg_query_time'] for r in results])
                
                # Effizienz = Genauigkeit / Zeit (normalisiert)
                efficiency = avg_acc / avg_time if avg_time > 0 else 0
                
                efficiency_data.append({
                    'Strategie': strategy_labels_de.get(strategy, strategy),
                    'Budget': f"{int(budget_pct*100)}%",
                    'Effizienz': efficiency
                })
    
    if efficiency_data:
        df_eff = pd.DataFrame(efficiency_data)
        pivot_eff = df_eff.pivot(index='Strategie', columns='Budget', values='Effizienz')
        
        # Normalisiere Effizienz für bessere Visualisierung
        pivot_eff_norm = (pivot_eff - pivot_eff.min().min()) / (pivot_eff.max().max() - pivot_eff.min().min())
        
        if sns is not None:
            sns.heatmap(pivot_eff_norm, annot=True, fmt='.3f', cmap='YlOrRd',
                       ax=ax4, cbar_kws={'label': 'Relative Effizienz'})
        ax4.set_title('Effizienz-Matrix (Genauigkeit/Zeit)', fontsize=13)
        ax4.set_xlabel('Budget', fontsize=11)
        ax4.set_ylabel('Strategie', fontsize=11)
    
    plt.tight_layout()
    filename = 'plots/fashion_lr_vergleichsanalyse.png'
    plt.savefig(filename, dpi=300, bbox_inches='tight')
    logger.info(f"✓ Fashion-MNIST Vergleichsanalyse erstellt: {filename}")
    plt.close()

# -------------------------------------------------------------------------------
# Label-Einsparungs-Analyse für Fashion-MNIST
# -------------------------------------------------------------------------------
def calculate_label_savings_fashion(all_results, target_performance_percentages=[0.85, 0.90, 0.95]):
    """Berechnet Label-Einsparung für Fashion-MNIST (angepasste Ziele)."""
    savings_results = []
    
    # Random Sampling Performance bei 100% als Referenz
    random_100_results = [r for r in all_results 
                        if r['strategy'] == 'Random Sampling' 
                        and r['budget_pct'] == 1.0]
    
    if not random_100_results:
        return pd.DataFrame()
    
    random_100_acc = np.mean([r['accuracy'] for r in random_100_results])
    
    for target_pct in target_performance_percentages:
        target_accuracy = random_100_acc * target_pct
        
        for strategy in ['Random Sampling', 'Entropy Sampling', 'Margin Sampling', 'Least Confidence']:
            strategy_results = [r for r in all_results if r['strategy'] == strategy]
            
            if not strategy_results:
                continue
            
            # Aggregiere Lernkurven
            all_curves = []
            for r in strategy_results:
                if 'n_labeled_list' in r and 'accuracies' in r:
                    all_curves.append((r['n_labeled_list'], r['accuracies']))
            
            if not all_curves:
                continue
            
            # Finde minimale Labels für Ziel-Accuracy
            labels_needed = []
            
            for n_labeled_list, accuracies in all_curves:
                if len(accuracies) > 0 and max(accuracies) >= target_accuracy:
                    for i, acc in enumerate(accuracies):
                        if acc >= target_accuracy:
                            labels_needed.append(n_labeled_list[i])
                            break
                else:
                    labels_needed.append(60000)
            
            if labels_needed:
                avg_labels_needed = np.mean(labels_needed)
                std_labels_needed = np.std(labels_needed)
                
                savings_pct = ((60000 - avg_labels_needed) / 60000) * 100
                
                if strategy != 'Random Sampling':
                    random_labels = next((s['avg_labels_needed'] for s in savings_results 
                                        if s['strategy'] == 'Random Sampling' 
                                        and s['target_performance'] == int(target_pct*100)), 60000)
                    relative_savings_pct = ((random_labels - avg_labels_needed) / random_labels) * 100 if random_labels > 0 else 0
                else:
                    relative_savings_pct = 0
                
                savings_results.append({
                    'strategy': strategy,
                    'target_performance': int(target_pct * 100),
                    'target_accuracy': target_accuracy,
                    'avg_labels_needed': avg_labels_needed,
                    'std_labels_needed': std_labels_needed,
                    'savings_pct': savings_pct,
                    'relative_savings_pct': relative_savings_pct,
                    'random_100_acc': random_100_acc
                })
    
    return pd.DataFrame(savings_results)

def plot_label_savings_fashion(savings_df):
    """Visualisiert Label-Einsparungen für Fashion-MNIST auf Deutsch."""
    # Deutsche Labels
    strategy_labels_de = {
        'Random Sampling': 'Zufällige Auswahl',
        'Entropy Sampling': 'Entropie-Auswahl',
        'Margin Sampling': 'Margin-Auswahl',
        'Least Confidence': 'Geringste Konfidenz'
    }
    
    plt.rcParams['font.family'] = 'DejaVu Sans'
    plt.rcParams['axes.unicode_minus'] = False
    
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle('Label-Einsparungs-Analyse für Fashion-MNIST Active Learning', fontsize=16)
    
    # 1. Benötigte Labels für verschiedene Performance-Level
    ax1 = axes[0, 0]
    
    for target in savings_df['target_performance'].unique():
        data = savings_df[savings_df['target_performance'] == target]
        
        if not data.empty:
            strategies = [strategy_labels_de.get(s, s) for s in data['strategy'].values]
            labels_needed = data['avg_labels_needed'].values
            errors = data['std_labels_needed'].values
            
            x = np.arange(len(strategies))
            width = 0.25
            offset = (target - 90) * width / 5
            
            colors_map = {85: '#3498db', 90: '#e74c3c', 95: '#2ecc71'}
            
            bars = ax1.bar(x + offset, labels_needed, width, 
                           yerr=errors, capsize=5,
                           label=f'{target}% der Baseline',
                           alpha=0.8, color=colors_map.get(target, '#95a5a6'))
            
            # Werte auf Balken
            for bar, value in zip(bars, labels_needed):
                ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height(),
                        f'{int(value):,}', ha='center', va='bottom', fontsize=8, rotation=0)
    
    ax1.set_xlabel('Strategie', fontsize=11)
    ax1.set_ylabel('Benötigte Labels', fontsize=11)
    ax1.set_title('Benötigte Labels für Ziel-Performance', fontsize=13)
    ax1.set_xticks(np.arange(len(strategies)))
    ax1.set_xticklabels(strategies, rotation=25, ha='right')
    ax1.legend()
    ax1.grid(axis='y', alpha=0.3)
    
    # Referenzlinie
    ax1.axhline(y=60000, color='red', linestyle='--', alpha=0.5)
    ax1.text(0.02, 60000, 'Vollständiger Datensatz', transform=ax1.get_yaxis_transform(), 
            va='bottom', ha='left', color='red', fontsize=9)
    
    # 2. Relative Einsparung Heatmap
    ax2 = axes[0, 1]
    
    # Pivot für Heatmap
    savings_pivot = []
    for strategy in ['Entropy Sampling', 'Margin Sampling', 'Least Confidence']:
        row = []
        for target in [85, 90, 95]:
            data = savings_df[(savings_df['strategy'] == strategy) & 
                            (savings_df['target_performance'] == target)]
            if not data.empty:
                row.append(data['relative_savings_pct'].values[0])
            else:
                row.append(0)
        savings_pivot.append(row)
    
    savings_array = np.array(savings_pivot)
    
    if sns is not None:
        im = ax2.imshow(savings_array, cmap='RdYlGn', aspect='auto', vmin=-10, vmax=30)
        
        # Labels
        ax2.set_xticks(np.arange(3))
        ax2.set_yticks(np.arange(3))
        ax2.set_xticklabels(['85%', '90%', '95%'])
        ax2.set_yticklabels([strategy_labels_de.get(s, s) for s in 
                           ['Entropy Sampling', 'Margin Sampling', 'Least Confidence']])
        
        # Werte in Zellen
        for i in range(3):
            for j in range(3):
                color = "white" if abs(savings_array[i, j]) > 15 else "black"
                text = ax2.text(j, i, f'{savings_array[i, j]:.1f}%',
                              ha="center", va="center", color=color, fontsize=11)
        
        # Colorbar
        cbar = plt.colorbar(im, ax=ax2)
        cbar.set_label('Einsparung ggü. Zufälliger Auswahl (%)', fontsize=10)
        
    ax2.set_title('Relative Label-Einsparung', fontsize=13)
    ax2.set_xlabel('Ziel-Performance', fontsize=11)
    ax2.set_ylabel('Strategie', fontsize=11)
    
    # 3. Label-Einsparung über Performance-Level
    ax3 = axes[1, 0]
    
    markers = {'Entropy Sampling': 'o', 'Margin Sampling': 's', 'Least Confidence': '^'}
    colors = {'Entropy Sampling': '#3498db', 'Margin Sampling': '#e74c3c', 'Least Confidence': '#2ecc71'}
    
    for strategy in ['Entropy Sampling', 'Margin Sampling', 'Least Confidence']:
        data = savings_df[savings_df['strategy'] == strategy]
        if not data.empty:
            targets = data['target_performance'].values
            savings = data['savings_pct'].values
            
            ax3.plot(targets, savings, 
                    marker=markers.get(strategy, 'o'), 
                    linewidth=2, markersize=8,
                    label=strategy_labels_de.get(strategy, strategy),
                    color=colors.get(strategy, '#95a5a6'))
            
            # Werte an Punkten
            for t, s in zip(targets, savings):
                ax3.text(t, s+1, f'{s:.1f}%', ha='center', va='bottom', fontsize=8)
    
    ax3.set_xlabel('Ziel-Performance (%)', fontsize=11)
    ax3.set_ylabel('Label-Einsparung (%)', fontsize=11)
    ax3.set_title('Label-Einsparung bei verschiedenen Performance-Zielen', fontsize=13)
    ax3.set_xticks([85, 90, 95])
    ax3.legend()
    ax3.grid(True, alpha=0.3)
    
    # 4. Fashion-MNIST spezifische Zusammenfassung
    ax4 = axes[1, 1]
    ax4.axis('tight')
    ax4.axis('off')
    
    # Erstelle Zusammenfassungstabelle für 90% Performance
    data_90 = savings_df[savings_df['target_performance'] == 90]
    
    if not data_90.empty:
        table_data = []
        for _, row in data_90.iterrows():
            strategy = strategy_labels_de.get(row['strategy'], row['strategy'])
            labels = int(row['avg_labels_needed'])
            savings = row['savings_pct']
            rel_savings = row['relative_savings_pct']
            
            table_data.append([
                strategy,
                f"{labels:,} ± {int(row['std_labels_needed']):,}",
                f"{savings:.1f}%",
                f"{rel_savings:.1f}%" if row['strategy'] != 'Random Sampling' else "-"
            ])
        
        table = ax4.table(cellText=table_data,
                         colLabels=['Strategie', 'Benötigte Labels', 'Absolute Einsparung', 'Relative Einsparung'],
                         cellLoc='center',
                         loc='center')
        
        table.auto_set_font_size(False)
        table.set_fontsize(10)
        table.scale(1.2, 2)
        
        # Style header
        for i in range(4):
            table[(0, i)].set_facecolor('#34495e')
            table[(0, i)].set_text_props(weight='bold', color='white')
        
        # Färbe beste Strategie
        min_labels_idx = data_90['avg_labels_needed'].argmin()
        for i in range(4):
            table[(min_labels_idx + 1, i)].set_facecolor('#a8e6cf')
    
    ax4.set_title('Zusammenfassung für 90% Ziel-Performance\n(Fashion-MNIST Datensatz)', fontsize=13, pad=20)
    
    # Zusätzlicher Text mit Fashion-MNIST Info
    info_text = f"\nFashion-MNIST: {', '.join(FASHION_MNIST_CLASSES_DE[:5])}..."
    ax4.text(0.5, 0.1, info_text, transform=ax4.transAxes,
            ha='center', va='center', fontsize=9, style='italic', color='gray')
    
    plt.tight_layout()
    filename = 'plots/fashion_lr_label_einsparung.png'
    plt.savefig(filename, dpi=300, bbox_inches='tight')
    logger.info(f"✓ Fashion-MNIST Label-Einsparungs-Analyse erstellt: {filename}")
    plt.close()

# -------------------------------------------------------------------------------
# Hauptprogramm
# -------------------------------------------------------------------------------
def main():
    """Haupteinstiegspunkt für Fashion-MNIST GPU-optimierte Logistic Regression Active Learning."""
    print("="*80)
    print("GPU-OPTIMIERTES ACTIVE LEARNING FÜR FASHION-MNIST - BACHELORARBEIT")
    print("="*80)
    print("LOGISTIC REGRESSION IMPLEMENTIERUNG")
    print("="*80)
    
    # System Info
    print(f"Python Version: {sys.version.split()[0]}")
    print(f"PyTorch Version: {torch.__version__}")
    print(f"NumPy Version: {np.__version__}")
    print(f"Scikit-learn Version: {sklearn.__version__}")
    
    # GPU Setup
    print("\nGPU Setup:")
    if torch.cuda.is_available():
        print(f"✓ CUDA verfügbar: {torch.cuda.get_device_name(0)}")
        print(f"  VRAM: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
    else:
        print("✗ Keine CUDA GPU gefunden")
    
    # Prüfe ob GPU-Beschleunigung verfügbar ist
    if not GPU_AVAILABLE:
        print("\n" + "!"*80)
        print("WICHTIG: Keine GPU-Beschleunigung verfügbar!")
        print("Das Programm läuft im CPU-Modus, was DEUTLICH langsamer ist.")
        print("\nFür Ihre RTX 4060 empfehle ich folgende Installation:")
        print("-"*80)
        print("# RAPIDS cuML Installation")
        print("conda create -n rapids python=3.11")
        print("conda activate rapids")
        print("conda install -c rapidsai -c conda-forge -c nvidia \\")
        print("    rapids=24.12 python=3.11 cudatoolkit=12.0")
        print("!"*80)
        
        # Frage ob fortfahren
        response = input("\nMöchten Sie trotzdem im CPU-Modus fortfahren? (j/n): ")
        if response.lower() != 'j':
            print("Programm beendet. Bitte installieren Sie GPU-Unterstützung.")
            return 0
    
    # Initialisiere GPU Memory Pool
    if CUML_AVAILABLE:
        print("\nGPU Memory Setup:")
        gpu_setup_success = setup_gpu_memory()
        if not gpu_setup_success:
            print("\n⚠ GPU Memory Setup fehlgeschlagen, aber RAPIDS cuML wird trotzdem verwendet.")
    
    print(f"\nExperiment-Konfiguration:")
    print(f"- Datensatz: Fashion-MNIST")
    print(f"- Anzahl Runs: {N_RUNS}")
    print(f"- Budget-Stufen: {[f'{int(b*100)}%' for b in BUDGET_PERCENTAGES]}")
    print(f"- Batch-Größe: {BATCH_SIZE}")
    print(f"- GPU Backend: cuML={CUML_AVAILABLE}")
    print("="*80)
    
    # Daten laden
    try:
        X_train, y_train, X_test, y_test = load_fashion_mnist_data()
    except Exception as e:
        logger.error(f"Kritischer Fehler beim Laden der Daten: {e}")
        return 1
    
    # Query-Strategien
    strategies = [
        ('Random Sampling', random_sampling),
        ('Entropy Sampling', entropy_sampling),
        ('Margin Sampling', margin_sampling),
        ('Least Confidence', least_confidence_sampling)
    ]
    
    # Experimente durchführen
    all_results = []
    total_start_time = time.time()
    
    for strategy_name, strategy_func in strategies:
        print(f"\n{'='*60}")
        print(f"Strategie: {strategy_name}")
        print(f"{'='*60}")
        
        try:
            results = run_gpu_lr_active_learning(
                X_train, y_train, X_test, y_test,
                strategy_name, strategy_func,
                BUDGET_PERCENTAGES, BATCH_SIZE
            )
            all_results.extend(results)
            
        except Exception as e:
            logger.error(f"Kritischer Fehler bei {strategy_name}: {e}")
            import traceback
            traceback.print_exc()
            continue
    
    # Gesamtzeit
    total_time = time.time() - total_start_time
    print(f"\n✓ Alle Experimente abgeschlossen in {total_time/60:.1f} Minuten")
    
    # Ergebnisse verarbeiten
    if not all_results:
        logger.error("Keine Experimenteergebnisse vorhanden!")
        return 1
    
    # DataFrame für Analyse
    results_df = pd.DataFrame([{
        'strategy': r['strategy'],
        'budget_pct': r['budget_pct'],
        'run': r['run'],
        'n_labeled': r['n_labeled'],
        'accuracy': r['accuracy'],
        'f1_score': r['f1_score'],
        'avg_query_time': r.get('avg_query_time', 0),
        'avg_train_time': r.get('avg_train_time', 0),
        'backend': r.get('backend', 'unknown')
    } for r in all_results])
    
    # Statistische Analyse
    print("\nFühre statistische Analyse durch...")
    stat_results = perform_statistical_analysis(results_df)
    create_statistical_report(stat_results)
    
    # Visualisierungen
    print("\nErstelle Visualisierungen...")
    plot_fashion_mnist_lr_results(all_results, stat_results)
    
    # Label-Einsparungsanalyse (angepasste Ziele für Fashion-MNIST)
    print("\nBerechne Label-Einsparungen...")
    savings_df = calculate_label_savings_fashion(all_results)
    
    if not savings_df.empty:
        plot_label_savings_fashion(savings_df)
    
    # Ergebnisse speichern
    csv_filename = 'results/fashion_lr_active_learning_results.csv'
    results_df.to_csv(csv_filename, index=False)
    print(f"\n✓ Ergebnisse gespeichert: {csv_filename}")
    
    if not stat_results.empty:
        stat_csv = 'results/fashion_lr_statistical_analysis.csv'
        stat_results.to_csv(stat_csv, index=False)
        print(f"✓ Statistische Analyse gespeichert: {stat_csv}")
    
    if not savings_df.empty:
        savings_csv = 'results/fashion_lr_label_savings.csv'
        savings_df.to_csv(savings_csv, index=False)
        print(f"✓ Label-Einsparungen gespeichert: {savings_csv}")
    
    # Excel Export
    if EXCEL_AVAILABLE:
        excel_filename = 'results/fashion_lr_active_learning_summary.xlsx'
        try:
            with pd.ExcelWriter(excel_filename, engine='openpyxl') as writer:
                results_df.to_excel(writer, sheet_name='Raw Results', index=False)
                
                if not stat_results.empty:
                    stat_results.to_excel(writer, sheet_name='Statistical Analysis', index=False)
                
                if not savings_df.empty:
                    savings_df.to_excel(writer, sheet_name='Label Savings', index=False)
                
                # Summary
                summary = results_df.groupby(['strategy', 'budget_pct'])[['accuracy', 'f1_score']].agg(['mean', 'std'])
                summary.to_excel(writer, sheet_name='Summary Statistics')
            
            print(f"✓ Excel-Zusammenfassung gespeichert: {excel_filename}")
        except Exception as e:
            logger.error(f"Excel-Export fehlgeschlagen: {e}")
    
    # Abschlusszusammenfassung
    print("\n" + "="*80)
    print("FASHION-MNIST EXPERIMENT ERFOLGREICH ABGESCHLOSSEN")
    print("="*80)
    print(f"GPU Backend verwendet: {all_results[0].get('backend', 'unknown') if all_results else 'unknown'}")
    print(f"Gesamtanzahl Experimente: {len(all_results)}")
    print(f"Durchschnittliche Trainingszeit: {np.mean([r['avg_train_time'] for r in all_results]):.2f}s")
    print(f"Durchschnittliche Genauigkeit (100% Budget): {np.mean([r['accuracy'] for r in all_results if r['budget_pct'] == 1.0]):.4f}")
    
    if not stat_results.empty and 'significant' in stat_results.columns:
        sig_count = stat_results['significant'].sum()
        print(f"\nSignifikante Verbesserungen: {sig_count}/{len(stat_results)} ({sig_count/len(stat_results)*100:.1f}%)")
    
    print("\nOutput-Dateien:")
    print("- Visualisierungen: plots/")
    print("  - fashion_lr_active_learning_performance.png")
    print("  - fashion_lr_detailanalyse.png")
    print("  - fashion_lr_vergleichsanalyse.png")
    print("  - fashion_lr_label_einsparung.png")
    print("- Ergebnisse: results/")
    print("  - fashion_lr_active_learning_results.csv")
    print("  - fashion_lr_statistical_analysis.csv")
    print("  - fashion_lr_label_savings.csv")
    print("  - fashion_lr_active_learning_summary.xlsx")
    print("- Berichte: reports/")
    print("  - fashion_lr_statistischer_bericht.txt")
    print("- Logs: logs/")
    print("="*80)
    
    return 0

if __name__ == "__main__":
    try:
        exit_code = main()
        sys.exit(exit_code)
    except Exception as e:
        logger.error(f"Unerwarteter Fehler: {e}")
        import traceback
        traceback.print_exc()
        sys.exit(1)

✓ RAPIDS cuML verfügbar - GPU-Beschleunigung für Logistic Regression aktiviert
GPU-OPTIMIERTES ACTIVE LEARNING FÜR FASHION-MNIST - BACHELORARBEIT
LOGISTIC REGRESSION IMPLEMENTIERUNG
Python Version: 3.13.5
PyTorch Version: 2.7.1+cu126
NumPy Version: 2.2.6
Scikit-learn Version: 1.7.1

GPU Setup:
✓ CUDA verfügbar: NVIDIA GeForce RTX 4060 Laptop GPU
  VRAM: 7.6 GB

GPU Memory Setup:
✓ Verwende Standard GPU Memory Management (RMM Pool deaktiviert)
  GPU: NVIDIA GeForce RTX 4060 Laptop GPU, 8188 MiB

Experiment-Konfiguration:
- Datensatz: Fashion-MNIST
- Anzahl Runs: 5
- Budget-Stufen: ['20%', '40%', '60%', '80%', '100%']
- Batch-Größe: 500
- GPU Backend: cuML=True
09:31:43 [INFO] Lade Fashion-MNIST-Datensatz...
09:31:45 [INFO] ✓ Fashion-MNIST geladen: 60,000 Trainingsbilder, 10,000 Testbilder
09:31:45 [INFO]   Feature-Dimensionen: 784
09:31:45 [INFO]   Klassen: 10 (T-Shirt/Top, Hose, Pullover, Kleid, Mantel, Sandale, Hemd, Sneaker, Tasche, Stiefelette)
09:31:45 [INFO]   Speicherbedarf: 209.

SystemExit: 0