In [None]:
# Importieren der erforderlichen Bibliotheken
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import precision_recall_curve, auc
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import StackingClassifier
from sklearn.neural_network import MLPClassifier
import matplotlib.pyplot as plt

In [None]:
# Benutzereingaben für Datensatz, Angriffstyp und Angriffsgröße
datensatz = input("Bitte den Datensatznamen eingeben: ")
angriffstyp = input("Bitte den Angriffstyp eingeben: ")
angriffsgröße = input("Bitte die Angriffsgröße eingeben: ")

In [None]:
# Daten einlesen (CSV-Datei mit den entsprechenden Parametern)
dateipfad = f"ratings_readyforclassification_{datensatz}_{angriffstyp}_{angriffsgröße}.csv"

In [None]:
# Datensatz laden
data = pd.read_csv(dateipfad)

In [None]:
# userId Spalte entfernen, damit nur noch Label und die Features übrig bleiben
data = data.drop(columns=['userId'])

In [None]:
# Zeige erste Einträge des Dataframes
data.head()

In [None]:
# Zeige letzte Einträge des Dataframes
data.tail()

In [None]:
# Merkmale und Zielvariable (Label) definieren
X = data.drop(columns=["Label"])  # Merkmale
y = data["Label"]  # Zielvariable (0 = normal, 1 = Angriff)
print(X.shape,y.shape)

In [None]:
# Skalieren der Merkmale
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Stratified K-Fold Setup
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [None]:
# Liste für die Ergebnisse aller Modelle
results = []

In [None]:
# Modell 1: Naive Bayes
model_name = "Naive Bayes"
model = GaussianNB()

# Initialisierung der Variablen
total_cm = np.zeros((2, 2))  # Gesamte Konfusionsmatrix
accuracies = []
pr_aucs = []  # PR-AUC Werte

y_true_all = []  # Alle wahren Labels
y_pred_all = []  # Alle vorhergesagten Labels

for train_index, test_index in skf.split(X_scaled, y):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]

    # Konfusionsmatrix und Metriken für diesen Fold
    cm = confusion_matrix(y_test, y_pred)
    total_cm += cm

    acc = accuracy_score(y_test, y_pred)
    accuracies.append(acc)

    # PR-AUC berechnen
    precision, recall, _ = precision_recall_curve(y_test, y_pred_proba)
    pr_auc = auc(recall, precision)
    pr_aucs.append(pr_auc)

    # Wahre und vorhergesagte Labels für den Classification Report sammeln
    y_true_all.extend(y_test)
    y_pred_all.extend(y_pred)

# Classification Report auf Basis der Cross-Validation-Vorhersagen
print(f"Classification Report für {model_name} (Cross-Validation):\n")
report = classification_report(y_true_all, y_pred_all, target_names=["Normal (0)", "Angriff (1)"], digits=6, output_dict=True)
print(report)

# Durchschnittswerte über alle Folds berechnen
average_accuracy = np.mean(accuracies)
average_pr_auc = np.mean(pr_aucs)

print(f"Durchschnittliche Accuracy (Cross-Validation): {average_accuracy:.6f}")
print(f"Durchschnittliche PR-AUC (Cross-Validation): {average_pr_auc:.6f}")

# Durchschnittliche Konfusionsmatrix
average_cm = total_cm / skf.n_splits
average_cm_rounded = np.round(average_cm).astype(int)
disp = ConfusionMatrixDisplay(confusion_matrix=average_cm_rounded, display_labels=["Normal (0)", "Angriff (1)"])
disp.plot(cmap=plt.cm.Blues)
plt.title(f"Durchschnittliche Confusion Matrix für {model_name}")
plt.show()

# Precision-Recall-Kurve für den letzten Fold plotten
plt.figure()
plt.plot(recall, precision, label=f"PR-AUC = {pr_auc:.6f}")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title(f"Precision-Recall Kurve für {model_name}")
plt.legend(loc="best")
plt.grid()
plt.show()

# Ergebnisse speichern (für Cross-Validation)
results.append({
    "Algorithm": model_name,
    "Precision (Klasse 1)": report["Angriff (1)"]["precision"],
    "Recall (Klasse 1)": report["Angriff (1)"]["recall"],
    "F1-Score (Klasse 1)": report["Angriff (1)"]["f1-score"],
    "PR-AUC": average_pr_auc,
})

In [None]:
# Modell 2: K-Nearest Neighbors
model_name = "K-Nearest Neighbors"
model = KNeighborsClassifier(metric='manhattan', n_neighbors=7, weights='distance')

# Initialisierung der Variablen
total_cm = np.zeros((2, 2))  # Gesamte Konfusionsmatrix
accuracies = []
pr_aucs = []  # PR-AUC Werte

y_true_all = []  # Alle wahren Labels (für Cross-Validation)
y_pred_all = []  # Alle vorhergesagten Labels (für Cross-Validation)

for train_index, test_index in skf.split(X_scaled, y):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]

    # Konfusionsmatrix und Metriken für diesen Fold
    cm = confusion_matrix(y_test, y_pred)
    total_cm += cm

    acc = accuracy_score(y_test, y_pred)
    accuracies.append(acc)

    # PR-AUC berechnen
    precision, recall, _ = precision_recall_curve(y_test, y_pred_proba)
    pr_auc = auc(recall, precision)
    pr_aucs.append(pr_auc)

    # Wahre und vorhergesagte Labels für den Classification Report sammeln
    y_true_all.extend(y_test)
    y_pred_all.extend(y_pred)

# Classification Report auf Basis der Cross-Validation-Vorhersagen
print(f"Classification Report für {model_name} (Cross-Validation):\n")
report = classification_report(y_true_all, y_pred_all, target_names=["Normal (0)", "Angriff (1)"], digits=6, output_dict=True)
print(report)

# Durchschnittswerte über alle Folds berechnen
average_accuracy = np.mean(accuracies)
average_pr_auc = np.mean(pr_aucs)

print(f"Durchschnittliche Accuracy (Cross-Validation): {average_accuracy:.6f}")
print(f"Durchschnittliche PR-AUC (Cross-Validation): {average_pr_auc:.6f}")

# Durchschnittliche Konfusionsmatrix
average_cm = total_cm / skf.n_splits
average_cm_rounded = np.round(average_cm).astype(int)
disp = ConfusionMatrixDisplay(confusion_matrix=average_cm_rounded, display_labels=["Normal (0)", "Angriff (1)"])
disp.plot(cmap=plt.cm.Blues)
plt.title(f"Durchschnittliche Confusion Matrix für {model_name}")
plt.show()

# Precision-Recall-Kurve für den letzten Fold plotten
plt.figure()
plt.plot(recall, precision, label=f"PR-AUC = {pr_auc:.6f}")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title(f"Precision-Recall Kurve für {model_name}")
plt.legend(loc="best")
plt.grid()
plt.show()

# Ergebnisse speichern (für Cross-Validation)
results.append({
    "Algorithm": model_name,
    "Precision (Klasse 1)": report["Angriff (1)"]["precision"],
    "Recall (Klasse 1)": report["Angriff (1)"]["recall"],
    "F1-Score (Klasse 1)": report["Angriff (1)"]["f1-score"],
    "PR-AUC": average_pr_auc,
})

In [None]:
# Modell 3: Decision Tree
model_name = "Decision Tree"
model = DecisionTreeClassifier(max_depth=10, min_samples_leaf=4, min_samples_split=10, random_state=42)

# Initialisierung der Variablen
total_cm = np.zeros((2, 2))  # Gesamte Konfusionsmatrix
accuracies = []
pr_aucs = []  # PR-AUC Werte

y_true_all = []  # Alle wahren Labels (für Cross-Validation)
y_pred_all = []  # Alle vorhergesagten Labels (für Cross-Validation)

for train_index, test_index in skf.split(X_scaled, y):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]

    # Konfusionsmatrix und Metriken für diesen Fold
    cm = confusion_matrix(y_test, y_pred)
    total_cm += cm

    acc = accuracy_score(y_test, y_pred)
    accuracies.append(acc)

    # PR-AUC berechnen
    precision, recall, _ = precision_recall_curve(y_test, y_pred_proba)
    pr_auc = auc(recall, precision)
    pr_aucs.append(pr_auc)

    # Wahre und vorhergesagte Labels für den Classification Report sammeln
    y_true_all.extend(y_test)
    y_pred_all.extend(y_pred)

# Classification Report auf Basis der Cross-Validation-Vorhersagen
print(f"Classification Report für {model_name} (Cross-Validation):\n")
report = classification_report(y_true_all, y_pred_all, target_names=["Normal (0)", "Angriff (1)"], digits=6, output_dict=True)
print(report)

# Durchschnittswerte über alle Folds berechnen
average_accuracy = np.mean(accuracies)
average_pr_auc = np.mean(pr_aucs)

print(f"Durchschnittliche Accuracy (Cross-Validation): {average_accuracy:.6f}")
print(f"Durchschnittliche PR-AUC (Cross-Validation): {average_pr_auc:.6f}")

# Durchschnittliche Konfusionsmatrix
average_cm = total_cm / skf.n_splits
average_cm_rounded = np.round(average_cm).astype(int)
disp = ConfusionMatrixDisplay(confusion_matrix=average_cm_rounded, display_labels=["Normal (0)", "Angriff (1)"])
disp.plot(cmap=plt.cm.Blues)
plt.title(f"Durchschnittliche Confusion Matrix für {model_name}")
plt.show()

# Precision-Recall-Kurve für den letzten Fold plotten
plt.figure()
plt.plot(recall, precision, label=f"PR-AUC = {pr_auc:.6f}")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title(f"Precision-Recall Kurve für {model_name}")
plt.legend(loc="best")
plt.grid()
plt.show()

# Ergebnisse speichern (für Cross-Validation)
results.append({
    "Algorithm": model_name,
    "Precision (Klasse 1)": report["Angriff (1)"]["precision"],
    "Recall (Klasse 1)": report["Angriff (1)"]["recall"],
    "F1-Score (Klasse 1)": report["Angriff (1)"]["f1-score"],
    "PR-AUC": average_pr_auc,
})

In [None]:
# Modell 4: Random Forest
model_name = "Random Forest"
model = RandomForestClassifier(n_estimators=200, min_samples_split=5, min_samples_leaf=1, max_depth=20, bootstrap=False, random_state=42)

# Initialisierung der Variablen
total_cm = np.zeros((2, 2))  # Gesamte Konfusionsmatrix
accuracies = []
pr_aucs = []  # PR-AUC Werte

y_true_all = []  # Alle wahren Labels (für Cross-Validation)
y_pred_all = []  # Alle vorhergesagten Labels (für Cross-Validation)

for train_index, test_index in skf.split(X_scaled, y):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]

    # Konfusionsmatrix und Metriken für diesen Fold
    cm = confusion_matrix(y_test, y_pred)
    total_cm += cm

    acc = accuracy_score(y_test, y_pred)
    accuracies.append(acc)

    # PR-AUC berechnen
    precision, recall, _ = precision_recall_curve(y_test, y_pred_proba)
    pr_auc = auc(recall, precision)
    pr_aucs.append(pr_auc)

    # Wahre und vorhergesagte Labels für den Classification Report sammeln
    y_true_all.extend(y_test)
    y_pred_all.extend(y_pred)

# Classification Report auf Basis der Cross-Validation-Vorhersagen
print(f"Classification Report für {model_name} (Cross-Validation):\n")
report = classification_report(y_true_all, y_pred_all, target_names=["Normal (0)", "Angriff (1)"], digits=6, output_dict=True)
print(report)

# Durchschnittswerte über alle Folds berechnen
average_accuracy = np.mean(accuracies)
average_pr_auc = np.mean(pr_aucs)

print(f"Durchschnittliche Accuracy (Cross-Validation): {average_accuracy:.6f}")
print(f"Durchschnittliche PR-AUC (Cross-Validation): {average_pr_auc:.6f}")

# Durchschnittliche Konfusionsmatrix
average_cm = total_cm / skf.n_splits
average_cm_rounded = np.round(average_cm).astype(int)
disp = ConfusionMatrixDisplay(confusion_matrix=average_cm_rounded, display_labels=["Normal (0)", "Angriff (1)"])
disp.plot(cmap=plt.cm.Blues)
plt.title(f"Durchschnittliche Confusion Matrix für {model_name}")
plt.show()

# Precision-Recall-Kurve für den letzten Fold plotten
plt.figure()
plt.plot(recall, precision, label=f"PR-AUC = {pr_auc:.6f}")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title(f"Precision-Recall Kurve für {model_name}")
plt.legend(loc="best")
plt.grid()
plt.show()

# Ergebnisse speichern (für Cross-Validation)
results.append({
    "Algorithm": model_name,
    "Precision (Klasse 1)": report["Angriff (1)"]["precision"],
    "Recall (Klasse 1)": report["Angriff (1)"]["recall"],
    "F1-Score (Klasse 1)": report["Angriff (1)"]["f1-score"],
    "PR-AUC": average_pr_auc,
})

In [None]:
# Modell 5: Support Vector Machine
model_name = "Support Vector Machine"
model = SVC(kernel='rbf', gamma='scale', C=100, probability=True, random_state=42)

# Initialisierung der Variablen
total_cm = np.zeros((2, 2))  # Gesamte Konfusionsmatrix
accuracies = []
pr_aucs = []  # PR-AUC Werte

y_true_all = []  # Alle wahren Labels (für Cross-Validation)
y_pred_all = []  # Alle vorhergesagten Labels (für Cross-Validation)

for train_index, test_index in skf.split(X_scaled, y):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_pred_proba = model.decision_function(X_test)  # decision_function für Scores

    # Konfusionsmatrix und Metriken für diesen Fold
    cm = confusion_matrix(y_test, y_pred)
    total_cm += cm

    acc = accuracy_score(y_test, y_pred)
    accuracies.append(acc)

    # PR-AUC berechnen
    precision, recall, _ = precision_recall_curve(y_test, y_pred_proba)
    pr_auc = auc(recall, precision)
    pr_aucs.append(pr_auc)

    # Wahre und vorhergesagte Labels für den Classification Report sammeln
    y_true_all.extend(y_test)
    y_pred_all.extend(y_pred)

# Classification Report auf Basis der Cross-Validation-Vorhersagen
print(f"Classification Report für {model_name} (Cross-Validation):\n")
report = classification_report(y_true_all, y_pred_all, target_names=["Normal (0)", "Angriff (1)"], digits=6, output_dict=True)
print(report)

# Durchschnittswerte über alle Folds berechnen
average_accuracy = np.mean(accuracies)
average_pr_auc = np.mean(pr_aucs)

print(f"Durchschnittliche Accuracy (Cross-Validation): {average_accuracy:.6f}")
print(f"Durchschnittliche PR-AUC (Cross-Validation): {average_pr_auc:.6f}")

# Durchschnittliche Konfusionsmatrix
average_cm = total_cm / skf.n_splits
average_cm_rounded = np.round(average_cm).astype(int)
disp = ConfusionMatrixDisplay(confusion_matrix=average_cm_rounded, display_labels=["Normal (0)", "Angriff (1)"])
disp.plot(cmap=plt.cm.Blues)
plt.title(f"Durchschnittliche Confusion Matrix für {model_name}")
plt.show()

# Precision-Recall-Kurve für den letzten Fold plotten
plt.figure()
plt.plot(recall, precision, label=f"PR-AUC = {pr_auc:.6f}")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title(f"Precision-Recall Kurve für {model_name}")
plt.legend(loc="best")
plt.grid()
plt.show()

# Ergebnisse speichern (für Cross-Validation)
results.append({
    "Algorithm": model_name,
    "Precision (Klasse 1)": report["Angriff (1)"]["precision"],
    "Recall (Klasse 1)": report["Angriff (1)"]["recall"],
    "F1-Score (Klasse 1)": report["Angriff (1)"]["f1-score"],
    "PR-AUC": average_pr_auc,
})

In [None]:
# Modell 6: Logistic Regression
model_name = "Logistic Regression"
model = LogisticRegression(C=10, penalty='l1', solver='liblinear', random_state=42)

# Initialisierung der Variablen
total_cm = np.zeros((2, 2))  # Gesamte Konfusionsmatrix
accuracies = []
pr_aucs = []  # PR-AUC Werte

y_true_all = []  # Alle wahren Labels (für Cross-Validation)
y_pred_all = []  # Alle vorhergesagten Labels (für Cross-Validation)

for train_index, test_index in skf.split(X_scaled, y):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]

    # Konfusionsmatrix und Metriken für diesen Fold
    cm = confusion_matrix(y_test, y_pred)
    total_cm += cm

    acc = accuracy_score(y_test, y_pred)
    accuracies.append(acc)

    # PR-AUC berechnen
    precision, recall, _ = precision_recall_curve(y_test, y_pred_proba)
    pr_auc = auc(recall, precision)
    pr_aucs.append(pr_auc)

    # Wahre und vorhergesagte Labels für den Classification Report sammeln
    y_true_all.extend(y_test)
    y_pred_all.extend(y_pred)

# Classification Report auf Basis der Cross-Validation-Vorhersagen
print(f"Classification Report für {model_name} (Cross-Validation):\n")
report = classification_report(y_true_all, y_pred_all, target_names=["Normal (0)", "Angriff (1)"], digits=6, output_dict=True)
print(report)

# Durchschnittswerte über alle Folds berechnen
average_accuracy = np.mean(accuracies)
average_pr_auc = np.mean(pr_aucs)

print(f"Durchschnittliche Accuracy (Cross-Validation): {average_accuracy:.6f}")
print(f"Durchschnittliche PR-AUC (Cross-Validation): {average_pr_auc:.6f}")

# Durchschnittliche Konfusionsmatrix
average_cm = total_cm / skf.n_splits
average_cm_rounded = np.round(average_cm).astype(int)
disp = ConfusionMatrixDisplay(confusion_matrix=average_cm_rounded, display_labels=["Normal (0)", "Angriff (1)"])
disp.plot(cmap=plt.cm.Blues)
plt.title(f"Durchschnittliche Confusion Matrix für {model_name}")
plt.show()

# Precision-Recall-Kurve für den letzten Fold plotten
plt.figure()
plt.plot(recall, precision, label=f"PR-AUC = {pr_auc:.6f}")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title(f"Precision-Recall Kurve für {model_name}")
plt.legend(loc="best")
plt.grid()
plt.show()

# Ergebnisse speichern (für Cross-Validation)
results.append({
    "Algorithm": model_name,
    "Precision (Klasse 1)": report["Angriff (1)"]["precision"],
    "Recall (Klasse 1)": report["Angriff (1)"]["recall"],
    "F1-Score (Klasse 1)": report["Angriff (1)"]["f1-score"],
    "PR-AUC": average_pr_auc,
})

In [None]:
# Modell 7: XGBoost
model_name = "XGBoost"
model = XGBClassifier(subsample=0.7, n_estimators=500, max_depth=5, learning_rate=0.1, 
                      colsample_bytree=0.8, use_label_encoder=False, eval_metric="logloss", random_state=42)

# Initialisierung der Variablen
total_cm = np.zeros((2, 2))  # Gesamte Konfusionsmatrix
accuracies = []
pr_aucs = []  # PR-AUC Werte

y_true_all = []  # Alle wahren Labels (für Cross-Validation)
y_pred_all = []  # Alle vorhergesagten Labels (für Cross-Validation)

for train_index, test_index in skf.split(X_scaled, y):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]

    # Konfusionsmatrix und Metriken für diesen Fold
    cm = confusion_matrix(y_test, y_pred)
    total_cm += cm

    acc = accuracy_score(y_test, y_pred)
    accuracies.append(acc)

    # PR-AUC berechnen
    precision, recall, _ = precision_recall_curve(y_test, y_pred_proba)
    pr_auc = auc(recall, precision)
    pr_aucs.append(pr_auc)

    # Wahre und vorhergesagte Labels für den Classification Report sammeln
    y_true_all.extend(y_test)
    y_pred_all.extend(y_pred)

# Classification Report auf Basis der Cross-Validation-Vorhersagen
print(f"Classification Report für {model_name} (Cross-Validation):\n")
report = classification_report(y_true_all, y_pred_all, target_names=["Normal (0)", "Angriff (1)"], digits=6, output_dict=True)
print(report)

# Durchschnittswerte über alle Folds berechnen
average_accuracy = np.mean(accuracies)
average_pr_auc = np.mean(pr_aucs)

print(f"Durchschnittliche Accuracy (Cross-Validation): {average_accuracy:.6f}")
print(f"Durchschnittliche PR-AUC (Cross-Validation): {average_pr_auc:.6f}")

# Durchschnittliche Konfusionsmatrix
average_cm = total_cm / skf.n_splits
average_cm_rounded = np.round(average_cm).astype(int)
disp = ConfusionMatrixDisplay(confusion_matrix=average_cm_rounded, display_labels=["Normal (0)", "Angriff (1)"])
disp.plot(cmap=plt.cm.Blues)
plt.title(f"Durchschnittliche Confusion Matrix für {model_name}")
plt.show()

# Precision-Recall-Kurve für den letzten Fold plotten
plt.figure()
plt.plot(recall, precision, label=f"PR-AUC = {pr_auc:.6f}")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title(f"Precision-Recall Kurve für {model_name}")
plt.legend(loc="best")
plt.grid()
plt.show()

# Ergebnisse speichern (für Cross-Validation)
results.append({
    "Algorithm": model_name,
    "Precision (Klasse 1)": report["Angriff (1)"]["precision"],
    "Recall (Klasse 1)": report["Angriff (1)"]["recall"],
    "F1-Score (Klasse 1)": report["Angriff (1)"]["f1-score"],
    "PR-AUC": average_pr_auc,
})

In [None]:
# Modell 8: Stacking
model_name = "Stacking (Meta-Model)"
stacking_model = StackingClassifier(
    estimators=[
        ('rf', RandomForestClassifier(n_estimators=200, min_samples_split=5, min_samples_leaf=1, max_depth=20, bootstrap=False, random_state=42)),
        ('xgb', XGBClassifier(subsample=0.7, n_estimators=500, max_depth=5, learning_rate=0.1, colsample_bytree=0.8, use_label_encoder=False, eval_metric="logloss", random_state=42))
    ],
    final_estimator=LogisticRegression(C=10, penalty='l1', solver='liblinear', random_state=42)
)

# Initialisierung der Variablen
total_cm = np.zeros((2, 2))  # Gesamte Konfusionsmatrix
accuracies = []
pr_aucs = []  # PR-AUC Werte

y_true_all = []  # Alle wahren Labels (für Cross-Validation)
y_pred_all = []  # Alle vorhergesagten Labels (für Cross-Validation)

for train_index, test_index in skf.split(X_scaled, y):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    stacking_model.fit(X_train, y_train)
    y_pred = stacking_model.predict(X_test)
    y_pred_proba = stacking_model.predict_proba(X_test)[:, 1]

    # Konfusionsmatrix und Metriken für diesen Fold
    cm = confusion_matrix(y_test, y_pred)
    total_cm += cm

    acc = accuracy_score(y_test, y_pred)
    accuracies.append(acc)

    # PR-AUC berechnen
    precision, recall, _ = precision_recall_curve(y_test, y_pred_proba)
    pr_auc = auc(recall, precision)
    pr_aucs.append(pr_auc)

    # Wahre und vorhergesagte Labels für den Classification Report sammeln
    y_true_all.extend(y_test)
    y_pred_all.extend(y_pred)

# Classification Report auf Basis der Cross-Validation-Vorhersagen
print(f"Classification Report für {model_name} (Cross-Validation):\n")
report = classification_report(y_true_all, y_pred_all, target_names=["Normal (0)", "Angriff (1)"], digits=6, output_dict=True)
print(report)

# Durchschnittswerte über alle Folds berechnen
average_accuracy = np.mean(accuracies)
average_pr_auc = np.mean(pr_aucs)

print(f"Durchschnittliche Accuracy (Cross-Validation): {average_accuracy:.6f}")
print(f"Durchschnittliche PR-AUC (Cross-Validation): {average_pr_auc:.6f}")

# Durchschnittliche Konfusionsmatrix
average_cm = total_cm / skf.n_splits
average_cm_rounded = np.round(average_cm).astype(int)
disp = ConfusionMatrixDisplay(confusion_matrix=average_cm_rounded, display_labels=["Normal (0)", "Angriff (1)"])
disp.plot(cmap=plt.cm.Blues)
plt.title(f"Durchschnittliche Confusion Matrix für {model_name}")
plt.show()

# Precision-Recall-Kurve für den letzten Fold plotten
plt.figure()
plt.plot(recall, precision, label=f"PR-AUC = {pr_auc:.6f}")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title(f"Precision-Recall Kurve für {model_name}")
plt.legend(loc="best")
plt.grid()
plt.show()

# Ergebnisse speichern (für Cross-Validation)
results.append({
    "Algorithm": model_name,
    "Precision (Klasse 1)": report["Angriff (1)"]["precision"],
    "Recall (Klasse 1)": report["Angriff (1)"]["recall"],
    "F1-Score (Klasse 1)": report["Angriff (1)"]["f1-score"],
    "PR-AUC": average_pr_auc,
})

In [None]:
# Modell 9: Neural Network
model_name = "Neural Network"
model = MLPClassifier(activation='relu', hidden_layer_sizes=(50, 50), learning_rate='constant', 
                      solver='adam', random_state=42, max_iter=500)

# Initialisierung der Variablen
total_cm = np.zeros((2, 2))  # Gesamte Konfusionsmatrix
accuracies = []
pr_aucs = []  # PR-AUC Werte

y_true_all = []  # Alle wahren Labels (für Cross-Validation)
y_pred_all = []  # Alle vorhergesagten Labels (für Cross-Validation)

for train_index, test_index in skf.split(X_scaled, y):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]

    # Konfusionsmatrix und Metriken für diesen Fold
    cm = confusion_matrix(y_test, y_pred)
    total_cm += cm

    acc = accuracy_score(y_test, y_pred)
    accuracies.append(acc)

    # PR-AUC berechnen
    precision, recall, _ = precision_recall_curve(y_test, y_pred_proba)
    pr_auc = auc(recall, precision)
    pr_aucs.append(pr_auc)

    # Wahre und vorhergesagte Labels für den Classification Report sammeln
    y_true_all.extend(y_test)
    y_pred_all.extend(y_pred)

# Classification Report auf Basis der Cross-Validation-Vorhersagen
print(f"Classification Report für {model_name} (Cross-Validation):\n")
report = classification_report(y_true_all, y_pred_all, target_names=["Normal (0)", "Angriff (1)"], digits=6, output_dict=True)
print(report)

# Durchschnittswerte über alle Folds berechnen
average_accuracy = np.mean(accuracies)
average_pr_auc = np.mean(pr_aucs)

print(f"Durchschnittliche Accuracy (Cross-Validation): {average_accuracy:.6f}")
print(f"Durchschnittliche PR-AUC (Cross-Validation): {average_pr_auc:.6f}")

# Durchschnittliche Konfusionsmatrix
average_cm = total_cm / skf.n_splits
average_cm_rounded = np.round(average_cm).astype(int)
disp = ConfusionMatrixDisplay(confusion_matrix=average_cm_rounded, display_labels=["Normal (0)", "Angriff (1)"])
disp.plot(cmap=plt.cm.Blues)
plt.title(f"Durchschnittliche Confusion Matrix für {model_name}")
plt.show()

# Precision-Recall-Kurve für den letzten Fold plotten
plt.figure()
plt.plot(recall, precision, label=f"PR-AUC = {pr_auc:.6f}")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title(f"Precision-Recall Kurve für {model_name}")
plt.legend(loc="best")
plt.grid()
plt.show()

# Ergebnisse speichern (für Cross-Validation)
results.append({
    "Algorithm": model_name,
    "Precision (Klasse 1)": report["Angriff (1)"]["precision"],
    "Recall (Klasse 1)": report["Angriff (1)"]["recall"],
    "F1-Score (Klasse 1)": report["Angriff (1)"]["f1-score"],
    "PR-AUC": average_pr_auc,
})

In [None]:
# Tabelle mit Ergebnissen erstellen
results_df = pd.DataFrame(results)

# Ergebnisse anzeigen
print(results_df)

In [None]:
# Ausgabe der Spalte "Precision (Klasse 1)" (gerundet auf 6 Nachkommastellen)
print("\nPrecision (Klasse 1):")
print("\n".join(results_df['Precision (Klasse 1)'].round(6).astype(str)))

# Ausgabe der Spalte "Recall (Klasse 1)" (gerundet auf 6 Nachkommastellen)
print("\nRecall (Klasse 1):")
print("\n".join(results_df['Recall (Klasse 1)'].round(6).astype(str)))

# Ausgabe der Spalte "F1-Score (Klasse 1)" (gerundet auf 6 Nachkommastellen)
print("\nF1-Score (Klasse 1):")
print("\n".join(results_df['F1-Score (Klasse 1)'].round(6).astype(str)))

# Ausgabe der Spalte "PR-AUC" (gerundet auf 6 Nachkommastellen)
print("\nPR-AUC:")
print("\n".join(results_df['PR-AUC'].round(6).astype(str)))