In [None]:
import h5py
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report
from tabpfn import TabPFNClassifier

# -----------------------------
# CPU-Restriktion freigeben
# -----------------------------
os.environ["TABPFN_ALLOW_CPU_LARGE_DATASET"] = "1"

# -----------------------------
# Daten laden aus HDF5-Datei
# -----------------------------
normal_path = "transients/S01/1/Normal/q_data"
relay_path  = "transients/S01/1/Relay/q_data"

with h5py.File('datasets/transients_cleaned_padded.h5', 'r') as f:
    normal_group = f[normal_path]
    relay_group  = f[relay_path]

    n_norm = normal_group["0"].shape[0]
    n_relay = relay_group["0"].shape[0]

    dataset_normal = np.zeros((49998, n_norm))
    dataset_relay  = np.zeros((49666, n_relay))

    for i in range(49998):
        dataset_normal[i] = normal_group[str(i)][:]
    for i in range(49666):
        try:
            dataset_relay[i] = relay_group[str(i)][:]
        except KeyError:
            continue

# -----------------------------
# Trainings- & Testdaten vorbereiten
# -----------------------------

# Begrenze Trainingsdaten auf 500 je Klasse (gesamt = 1000)
X_train_normal = dataset_normal[:500]
X_train_relay  = dataset_relay[:500]

y_train_normal = np.zeros(len(X_train_normal), dtype=int)
y_train_relay  = np.ones(len(X_train_relay), dtype=int)

X_train = np.vstack((X_train_normal, X_train_relay))
y_train = np.concatenate((y_train_normal, y_train_relay))

# Testdaten (nicht begrenzt)
X_test_normal = dataset_normal[47500:]
X_test_relay  = dataset_relay[47500:]

y_test = np.array([0] * len(X_test_normal) + [1] * len(X_test_relay))
X_test = np.vstack((X_test_normal, X_test_relay))

# -----------------------------
# Daten skalieren
# -----------------------------
scaler = StandardScaler()
scaler.fit(X_train)

X_train_scaled = scaler.transform(X_train)
X_test_scaled  = scaler.transform(X_test)

# -----------------------------
# Modell trainieren
# -----------------------------
clf = TabPFNClassifier(device="cpu", ignore_pretraining_limits=True)
clf.fit(X_train_scaled, y_train)

# -----------------------------
# Vorhersage & Auswertung
# -----------------------------
y_pred = clf.predict(X_test_scaled)

print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=["Normal", "Relay"]))

cm = confusion_matrix(y_test, y_pred, labels=[1, 0])

plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt="d",
            xticklabels=["Relay", "Normal"],
            yticklabels=["Relay", "Normal"],
            cbar=False)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Konfusionsmatrix – TabPFN Klassifikation (1000 Trainingsdaten)")
plt.tight_layout()
plt.show()
