# Prototype: kappa

| Properties      | Data    |
|---------------|-----------|
| *Labels* | `['BENIGN', 'DDoS']` |
| *Normalization* | `Min-Max` |
| *Sample Size* | `40.000`|
| *Adversarial Attack* | `FGSM & C&W & JSMA & PGD` |
| *Explanations* | `SHAP` |
| *Detector* | `Detect Attacks and Misclassified Samples` |


---

## *Has to be run first alone!*

In [2]:
# To import modules from the functions directory
import sys
import os
sys.path.append(os.path.abspath(os.path.join('..')))

## Data Preprocessing

In [3]:
import functions.data_preprocessing as dp
import importlib
importlib.reload(dp)

encoding_type = 0 # binary encoding
norm_type = 0 # min-max normalization
label_names = ['BENIGN', 'DDoS'] # labels to include
sample_size = 20000 # sample size for each label -> 2 x sample_size = total samples

dataset = dp.build_dataset(label_names)

normalizer, zero_columns = dp.generate_normalizer(dataset, norm_type)

feature_df, label_df, used_indices = dp.preprocess_data(dataset, encoding_type, normalizer, zero_columns, sample_size=sample_size, random_sample_state=42)
print(f"Generate Features | Indices: {feature_df.index[:5]}... | Shape: {feature_df.shape}")
print(f"Generate Labels | Indices: {label_df.index[:5]}... | Shape: {label_df.shape}")
print(label_df.value_counts()) # -> will first show [0, 1] then [1, 0] if label number is equal

-- Building CICIDS2017 dataset --
--- Combining all CICIDS2017 files ---
Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv
Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv
Tuesday-WorkingHours.pcap_ISCX.csv
Wednesday-workingHours.pcap_ISCX.csv
Friday-WorkingHours-Morning.pcap_ISCX.csv
Thursday-WorkingHours-Morning-WebAttacks.pcap_ISCX.csv
Monday-WorkingHours.pcap_ISCX.csv
Thursday-WorkingHours-Afternoon-Infilteration.pcap_ISCX.csv
--- Removing NaN and Infinity values ---
Removing 1358 Rows with NaN values
Removing 1509 Rows with Infinity values
--- Extracting labels ---
 Label
BENIGN    2271320
DDoS       128025
Name: count, dtype: int64
-- Generating normalizer --
--- Splitting labels and features ---
Zero Columns: [' Bwd PSH Flags', ' Bwd URG Flags', 'Fwd Avg Bytes/Bulk', ' Fwd Avg Packets/Bulk', ' Fwd Avg Bulk Rate', ' Bwd Avg Bytes/Bulk', ' Bwd Avg Packets/Bulk', 'Bwd Avg Bulk Rate']
-- Preprocessing data --
--- Sampling balanced data ---
Sample to shape: (40000, 79)
--- Splitti

## Split Data

In [49]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(feature_df, label_df, test_size=0.2, random_state=42)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(32000, 70) (8000, 70) (32000, 2) (8000, 2)


## Create IDS

In [50]:
import functions.intrusion_detection_system as ids
import importlib
importlib.reload(ids)

# TODO: build ids with complete dataset
# X_train_all, y_train_all, _ = dp.preprocess_data(dataset, encoding_type, normalizer, zero_columns, random_sample_state=42)
# print(y_train_all.value_counts())
# X_train_all, X_test_all, y_train_all, y_test_all = train_test_split(X_train_all, y_train_all, test_size=0.2, random_state=42)
# print(X_train_all.shape, X_test_all.shape, y_train_all.shape, y_test_all.shape)

# build ids and evaluate it on test data
ids_model = ids.build_intrusion_detection_system(X_train, y_train, X_test, y_test)
# store prediction from X_train
y_pred = ids.predict(ids_model, X_train, columns=y_train.columns)
print(f"Predictions on Normal Data | Indices: {y_pred.index[:5]}... | Shape: {y_pred.shape}")

Epoch 1/10


[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9127 - loss: 0.3673 - val_accuracy: 0.9806 - val_loss: 0.0486
Epoch 2/10
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 945us/step - accuracy: 0.9833 - loss: 0.0428 - val_accuracy: 0.9822 - val_loss: 0.0357
Epoch 3/10
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 951us/step - accuracy: 0.9869 - loss: 0.0317 - val_accuracy: 0.9834 - val_loss: 0.0303
Epoch 4/10
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 947us/step - accuracy: 0.9875 - loss: 0.0266 - val_accuracy: 0.9837 - val_loss: 0.0275
Epoch 5/10
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 967us/step - accuracy: 0.9896 - loss: 0.0240 - val_accuracy: 0.9850 - val_loss: 0.0254
Epoch 6/10
[1m256/256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 956us/step - accuracy: 0.9897 - loss: 0.0221 - val_accuracy: 0.9873 - val_loss: 0.0234
Epoch 7/10
[1m256/256[0m [

## Generate Adversarial Attacks

In [6]:
from sklearn.utils import shuffle

def split_into_classes(X, y, class_labels):
    """
    Splits the dataset evenly into specified classes with given labels.

    Args:
        X (numpy.ndarray): The input samples.
        y (numpy.ndarray): The labels.
        class_labels (list of str): The names of the classes (e.g., ["normal", "cw", "fgsm", "hsj"]).

    Returns:
        dict: A dictionary where keys are class names and values are tuples (X_subset, y_subset).
    """
    num_classes = len(class_labels)

    if len(X) % num_classes != 0:
        raise ValueError("Number of samples must be evenly divisible by the number of classes.")

    # Shuffle data to avoid biases
    X, y = shuffle(X, y, random_state=42)

    # Compute samples per class
    num_samples_per_class = len(X) // num_classes

    # Dictionary to store the split datasets
    class_splits = {}

    for i, label in enumerate(class_labels):
        start = i * num_samples_per_class
        end = (i + 1) * num_samples_per_class
        class_splits[label] = (X[start:end], y[start:end])

    return class_splits


In [None]:
import functions.attack_generator as ag
import importlib
import numpy as np
importlib.reload(ag)

all_features = dataset.drop(columns=[' Label'])
art_model = ag.convert_to_art_model(ids_model, X_train) # TODO: use all features for generating art model

# Split the training data into classes
class_labels = ["normal", "cw", "fgsm", "jsma", "pgd"]  # Change this to any class names
splits = split_into_classes(X_train, y_train, class_labels)
X_normal, y_normal = splits["normal"]
X_cw, y_cw = splits["cw"]
X_fgsm, y_fgsm = splits["fgsm"]
X_jsma, y_jsma = splits["jsma"]
X_pgd, y_pgd = splits["pgd"]
print(f"Normal Data: {X_normal.shape} | CW Data: {X_cw.shape} | FGSM Data: {X_fgsm.shape} | JSMA Data: {X_jsma.shape} | PGD Data: {X_pgd.shape}")

# generate attacks on the separated training data
# TODO: when changing epsilon, the detector accuracy rises
X_adv_fgsm = ag.generate_fgsm_attacks(art_model, X_fgsm, target_label=1)
print(f"Create FGSM Adversarial Attack | Indices: {X_adv_fgsm.index[:5]}... | Shape: {X_adv_fgsm.shape}")
y_pred_adv_fgsm = ag.evaluate_art_model(art_model, X_adv_fgsm, y_fgsm)
print(f"Predictions on Adversarial Attacks | Indices: {y_pred_adv_fgsm.index[:5]}... | Shape: {y_pred_adv_fgsm.shape}")
y_pred_fgsm = y_pred.loc[X_fgsm.index]

import multiprocessing
num_cores = multiprocessing.cpu_count()
X_adv_cw = ag.generate_cw_attacks_parallel(art_model, X_cw, target_label=1, num_cores=num_cores)
print(f"Create CW Adversarial Attack | Indices: {X_adv_cw.index[:5]}... | Shape: {X_adv_cw.shape}")
y_pred_adv_cw = ag.evaluate_art_model(art_model, X_adv_cw, y_cw)
print(f"Predictions on Adversarial Attacks | Indices: {y_pred_adv_cw.index[:5]}... | Shape: {y_pred_adv_cw.shape}")
y_pred_cw = y_pred.loc[X_cw.index]

X_adv_jsma = ag.generate_jsma_attacks(art_model, X_jsma, target_label=1)
print(f"Create JSMA Adversarial Attack | Indices: {X_adv_jsma.index[:5]}... | Shape: {X_adv_jsma.shape}")
y_pred_adv_jsma = ag.evaluate_art_model(art_model, X_adv_jsma, y_jsma)
print(f"Predictions on Adversarial Attacks | Indices: {y_pred_adv_jsma.index[:5]}... | Shape: {y_pred_adv_jsma.shape}")
y_pred_jsma = y_pred.loc[X_jsma.index]

X_adv_pgd = ag.generate_pgd_attacks(art_model, X_pgd, target_label=1)
print(f"Create HSJ Adversarial Attack | Indices: {X_adv_pgd.index[:5]}... | Shape: {X_adv_pgd.shape}")
y_pred_adv_pgd = ag.evaluate_art_model(art_model, X_adv_pgd, y_pgd)
print(f"Predictions on Adversarial Attacks | Indices: {y_pred_adv_pgd.index[:5]}... | Shape: {y_pred_adv_pgd.shape}")
y_pred_pgd = y_pred.loc[X_pgd.index]

JSMA:   0%|          | 0/6400 [00:00<?, ?it/s]

Adversarial JSMA examples generated. Shape: (6400, 70)
Create JSMA Adversarial Attack | Indices: Index([79319, 1836822, 1340447, 477738, 1361834], dtype='int64')... | Shape: (6400, 70)
Accuracy: 50.98%
              precision    recall  f1-score   support

      ATTACK     0.0000    0.0000    0.0000      3137
      BENIGN     0.5098    1.0000    0.6754      3263

    accuracy                         0.5098      6400
   macro avg     0.2549    0.5000    0.3377      6400
weighted avg     0.2599    0.5098    0.3443      6400

Confusion Matrix: Positive == BENIGN
TN: 0, FP: 3137, FN: 0, TP: 3263
Predictions on Adversarial Attacks | Indices: Index([79319, 1836822, 1340447, 477738, 1361834], dtype='int64')... | Shape: (6400, 2)


In [8]:
def get_correctly_benign_classified_indices(y_train, y_pred):
    benign_indices = y_train[y_train['BENIGN'] == 1].index
    benign_adv_predicted_indices = y_pred[y_pred['BENIGN'] == 1].index
    correctly_benign_classified_indices = benign_indices.intersection(benign_adv_predicted_indices)
    return correctly_benign_classified_indices

def get_misclassified_as_benign_due_attack_indices(y_train, y_pred, y_pred_adv):
    attack_indices = y_train[y_train['ATTACK'] == 1].index
    attack_adv_predicted_indices = y_pred[y_pred['ATTACK'] == 1].index
    benign_predicted_adversarial_indices = y_pred_adv[y_pred_adv['BENIGN'] == 1].index
    misclassified_as_benign_due_attack_indices = attack_indices.intersection(attack_adv_predicted_indices).intersection(benign_predicted_adversarial_indices)
    return misclassified_as_benign_due_attack_indices

In [53]:
y_pred_normal = y_pred.loc[X_normal.index]
correctly_benign_classified_indices = get_correctly_benign_classified_indices(y_normal, y_pred_normal)

correctly_benign_classified_indices_fgsm = get_correctly_benign_classified_indices(y_fgsm, y_pred_adv_fgsm)
misclassified_as_benign_due_attack_indices_fgsm = get_misclassified_as_benign_due_attack_indices(y_fgsm, y_pred_fgsm, y_pred_adv_fgsm)

correctly_benign_classified_indices_cw = get_correctly_benign_classified_indices(y_cw, y_pred_cw)
misclassified_as_benign_due_attack_indices_cw = get_misclassified_as_benign_due_attack_indices(y_cw, y_pred_cw, y_pred_adv_cw)

correctly_benign_classified_indices_jsma = get_correctly_benign_classified_indices(y_jsma, y_pred_jsma)
misclassified_as_benign_due_attack_indices_jsma = get_misclassified_as_benign_due_attack_indices(y_jsma, y_pred_jsma, y_pred_adv_jsma)

# TODO: is it correct to only include the samples that are correctly classified from the IDS?
correctly_benign_classified_indices_pgd = get_correctly_benign_classified_indices(y_pgd, y_pred_pgd)
misclassified_as_benign_due_attack_indices_pgd = get_misclassified_as_benign_due_attack_indices(y_pgd, y_pred_pgd, y_pred_adv_pgd)

print(f"Correctly classified as BENIGN from the IDS: {len(correctly_benign_classified_indices)} | Indices: {correctly_benign_classified_indices[:3]}")
print(f"    Correctly classified as BENIGN from the IDS (FGSM): {len(correctly_benign_classified_indices_fgsm)} | Indices: {correctly_benign_classified_indices_fgsm[:3]}")
print(f"    ATTACK sample misclassified as BENIGN due to adversarial attack (FGSM): {len(misclassified_as_benign_due_attack_indices_fgsm)} | Indices: {misclassified_as_benign_due_attack_indices_fgsm[:3]}")
print(f"        Correctly classified as BENIGN from the IDS (CW): {len(correctly_benign_classified_indices_cw)} | Indices: {correctly_benign_classified_indices_cw[:3]}")
print(f"        ATTACK sample misclassified as BENIGN due to adversarial attack (CW): {len(misclassified_as_benign_due_attack_indices_cw)} | Indices: {misclassified_as_benign_due_attack_indices_cw[:3]}")
print(f"            Correctly classified as BENIGN from the IDS (PGD): {len(correctly_benign_classified_indices_pgd)} | Indices: {correctly_benign_classified_indices_pgd[:3]}")
print(f"            ATTACK sample misclassified as BENIGN due to adversarial attack (PGD): {len(misclassified_as_benign_due_attack_indices_pgd)} | Indices: {misclassified_as_benign_due_attack_indices_pgd[:3]}")
print(f"                Correctly classified as BENIGN from the IDS (JSMA): {len(correctly_benign_classified_indices_jsma)} | Indices: {correctly_benign_classified_indices_jsma[:3]}")
print(f"                ATTACK sample misclassified as BENIGN due to adversarial attack (JSMA): {len(misclassified_as_benign_due_attack_indices_jsma)} | Indices: {misclassified_as_benign_due_attack_indices_jsma[:3]}")

Correctly classified as BENIGN from the IDS: 3085 | Indices: Index([606504, 2655053, 820505], dtype='int64')
    Correctly classified as BENIGN from the IDS (FGSM): 3159 | Indices: Index([281272, 443688, 2048428], dtype='int64')
    ATTACK sample misclassified as BENIGN due to adversarial attack (FGSM): 3237 | Indices: Index([414312, 450214, 428606], dtype='int64')
        Correctly classified as BENIGN from the IDS (CW): 3185 | Indices: Index([1806823, 706236, 1671681], dtype='int64')
        ATTACK sample misclassified as BENIGN due to adversarial attack (CW): 1509 | Indices: Index([367524, 403889, 475346], dtype='int64')
            Correctly classified as BENIGN from the IDS (PGD): 3158 | Indices: Index([2032524, 2720032, 979762], dtype='int64')
            ATTACK sample misclassified as BENIGN due to adversarial attack (PGD): 3193 | Indices: Index([359920, 410469, 440128], dtype='int64')
                Correctly classified as BENIGN from the IDS (JSMA): 3214 | Indices: Index([793

In [None]:
# import functions.visualizer as visualizer
# import importlib
# importlib.reload(visualizer)

# visualizer.visualize_data_distribution(X_train.loc[correctly_benign_classified_indices], 'Normal Data', X_adv_fgsm.loc[misclassified_as_benign_due_attack_indices_fgsm], 'Adversarial Data')
# # visualizer.visualize_data_distribution(X_train.loc[misclassified_as_benign_due_attack_indices], 'Normal Data', X_adv.loc[misclassified_as_benign_due_attack_indices], 'Adversarial Data', side_by_side=True)

## Explainer

In [None]:
import functions.explainer as exp
import importlib
importlib.reload(exp)

explainer = exp.generate_shap_explainer(ids_model, X_train)

shap_values_df = exp.generate_shap_values(explainer, X_normal)
print(f"Generate Explanations | Indices: {shap_values_df.index[:5]}... | Shape: {shap_values_df.shape}")

shap_values_adv_df_fgsm = exp.generate_shap_values(explainer, X_adv_fgsm)
print(f"Generate FGSM Adversarial Explanations | Indices: {shap_values_adv_df_fgsm.index[:5]}... | Shape: {shap_values_adv_df_fgsm.shape}")

shap_values_adv_df_cw = exp.generate_shap_values(explainer, X_adv_cw)
print(f"Generate CW Adversarial Explanations | Indices: {shap_values_adv_df_cw.index[:5]}... | Shape: {shap_values_adv_df_cw.shape}")

shap_values_adv_df_jsma = exp.generate_shap_values(explainer, X_adv_jsma)
print(f"Generate JSMA Adversarial Explanations | Indices: {shap_values_adv_df_jsma.index[:5]}... | Shape: {shap_values_adv_df_jsma.shape}")

shap_values_adv_df_pgd = exp.generate_shap_values(explainer, X_adv_pgd)
print(f"Generate PGD Adversarial Explanations | Indices: {shap_values_adv_df_pgd.index[:5]}... | Shape: {shap_values_adv_df_pgd.shape}")

PermutationExplainer explainer: 6401it [03:22, 29.98it/s]                          


Generate JSMA Adversarial Explanations | Indices: Index([79319, 1836822, 1340447, 477738, 1361834], dtype='int64')... | Shape: (6400, 70)


In [12]:
# import pandas as pd
# # concat_correctly_benign_classified_shaps = pd.concat([shap_values_df.loc[correctly_benign_classified_indices], shap_values_adv_df_fgsm.loc[correctly_benign_classified_indices_fgsm], shap_values_adv_df_cw.loc[correctly_benign_classified_indices_cw]], axis=0)
# # # shap_values_df.loc[misclassified_as_benign_due_attack_indices]
# # concat_misclassified_as_benign_shaps = pd.concat([shap_values_adv_df_fgsm.loc[misclassified_as_benign_due_attack_indices_fgsm], shap_values_adv_df_cw.loc[misclassified_as_benign_due_attack_indices_cw]], axis=0)

# concat_correctly_benign_classified_shaps = pd.concat([shap_values_df.loc[correctly_benign_classified_indices], shap_values_adv_df_fgsm.loc[correctly_benign_classified_indices_fgsm]], axis=0)
# # shap_values_df.loc[misclassified_as_benign_due_attack_indices]
# concat_misclassified_as_benign_shaps = pd.concat([shap_values_adv_df_fgsm.loc[misclassified_as_benign_due_attack_indices_fgsm]], axis=0)

In [13]:
# import functions.visualizer as visualizer
# import importlib
# importlib.reload(visualizer)

# visualizer.visualize_data_distribution(shap_values_df.loc[correctly_benign_classified_indices], 'Normal Explanations', shap_values_adv_df_fgsm.loc[misclassified_as_benign_due_attack_indices_fgsm], 'Adversarial Explanations')

In [14]:
# exp.plot_shap_summary_comparison(shap_values_df.loc[correctly_benign_classified_indices].values, X_train.loc[correctly_benign_classified_indices], shap_values_adv_df.loc[misclassified_as_benign_due_attack_indices].values, X_adv.loc[misclassified_as_benign_due_attack_indices], 6, title='Normal vs Adversarial Explanations of Benign Samples')

In [15]:
# adversarial_correct_benign_indices_cw = correctly_benign_classified_indices_cw
# adversarial_misclassified_as_benign_indices_cw = misclassified_as_benign_due_attack_indices_cw
# adversarial_correct_benign_indices_fgsm = correctly_benign_classified_indices_fgsm
# adversarial_misclassified_as_benign_indices_fgsm = misclassified_as_benign_due_attack_indices_fgsm
# normal_correct_benign_indices = correctly_benign_classified_indices

# attack_indices = y_normal[y_normal['ATTACK'] == 1].index
# predicted_benign_indices = y_pred_normal[y_pred_normal['BENIGN'] == 1].index
# normal_misclassified_as_benign_indices = attack_indices.intersection(predicted_benign_indices)

# print(f"Normal Correctly Classified as Benign: {len(normal_correct_benign_indices)}")
# print(f"Normal Misclassified as Benign: {len(normal_misclassified_as_benign_indices)}")
# print(f"Adversarial Correctly Classified as Benign (CW): {len(adversarial_correct_benign_indices_cw)}")
# print(f"Adversarial Misclassified as Benign (CW): {len(adversarial_misclassified_as_benign_indices_cw)}")
# print(f"Adversarial Correctly Classified as Benign (FGSM): {len(adversarial_correct_benign_indices_fgsm)}")
# print(f"Adversarial Misclassified as Benign (FGSM): {len(adversarial_misclassified_as_benign_indices_fgsm)}")

In [16]:
import pandas as pd

def create_dataset(class_samples):
    """
    Create dataset from given class samples while preserving the original indices.
    
    Args:
        class_samples (dict): Dictionary where keys are class names and values are DataFrames of samples.
    
    Returns:
        X (pd.DataFrame): Feature matrix with original indices retained.
        y (pd.DataFrame): One-hot encoded labels with corresponding indices.
    """
    X_list = []
    y_list = []
    
    class_labels = list(class_samples.keys())
    num_classes = len(class_labels)
    
    for i, class_name in enumerate(class_labels):
        samples = class_samples[class_name]

        # Create one-hot encoding for the class
        one_hot = np.zeros((samples.shape[0], num_classes))
        one_hot[:, i] = 1  
        
        X_list.append(samples)
        
        # Convert one-hot encoding to DataFrame with matching indices
        y_df = pd.DataFrame(one_hot, index=samples.index, columns=class_labels)
        y_list.append(y_df)
    
    # Concatenate all selected samples
    X = pd.concat(X_list, axis=0)
    y = pd.concat(y_list, axis=0)

    print(f"Generated dataset: X shape {X.shape}, y shape {y.shape}")
    
    return X, y

In [59]:
import pandas as pd
adv_concat_correctly_benign_classified_shaps = pd.concat([
    shap_values_adv_df_fgsm.loc[correctly_benign_classified_indices_fgsm],
    shap_values_adv_df_pgd.loc[correctly_benign_classified_indices_pgd],
    ], axis=0)
# shap_values_df.loc[misclassified_as_benign_due_attack_indices]
adv_concat_misclassified_as_benign_shaps = pd.concat([
    shap_values_adv_df_fgsm.loc[misclassified_as_benign_due_attack_indices_fgsm], 
    shap_values_adv_df_pgd.loc[misclassified_as_benign_due_attack_indices_pgd],
    ], axis=0)

concat_correct_benign_shaps = pd.concat([
    shap_values_df.loc[correctly_benign_classified_indices], 
    shap_values_adv_df_cw.loc[correctly_benign_classified_indices_cw],
    shap_values_adv_df_jsma.loc[correctly_benign_classified_indices_jsma],
    ], axis=0)

In [60]:
class_samples = {
    'ADV CORRECT BENIGN': adv_concat_correctly_benign_classified_shaps,
    'ADV MISCLASSIFIED': adv_concat_misclassified_as_benign_shaps,
    'CW MISCLASSIFIED': shap_values_adv_df_cw.loc[misclassified_as_benign_due_attack_indices_cw],
    'JSMA MISCLASSIFIED': shap_values_adv_df_jsma.loc[misclassified_as_benign_due_attack_indices_jsma],
    'CORRECT BENIGN': concat_correct_benign_shaps, 
}

X, y = create_dataset(class_samples)
print(X.shape, y.shape)

Generated dataset: X shape (26872, 70), y shape (26872, 5)
(26872, 70) (26872, 5)


## Detector

In [61]:
import functions.detector as det
import importlib
importlib.reload(det)

# build detector to detect adversarial samples that misclassify attack samples as benign

# create dataframe
# TODO: build detector with normal and adversarial shap values?
# TODO: build with shap_values_adv_df to detect 'BENIGN' and 'ATTACK'
import pandas as pd

# alternative approach: detector that predicts the original label of the sample for all given adversarial attacks
# concat_correctly_benign_classified_shaps = pd.concat([shap_values_df.loc[correctly_benign_classified_indices], shap_values_adv_df.loc[correctly_benign_classified_indices]], axis=0)
# concat_misclassified_as_benign_shaps = pd.concat([shap_values_df.loc[misclassified_as_benign_due_attack_indices], shap_values_adv_df.loc[misclassified_as_benign_due_attack_indices]], axis=0)
# X, y = det.build_train_datasets(shap_values_df.loc[correctly_benign_classified_indices], shap_values_adv_df_fgsm.loc[misclassified_as_benign_due_attack_indices_fgsm])

#X, y = det.build_train_datasets(shap_values_df.loc[correctly_benign_classified_indices], shap_values_adv_df.loc[misclassified_as_benign_due_attack_indices])
print(X.shape, y.shape)

# split data
X_train_det, X_test_det, y_train_det, y_test_det = train_test_split(X, y, test_size=0.1, random_state=1503)
print(X_train_det.shape, X_test_det.shape, y_train_det.shape, y_test_det.shape)

# build detector
detector = det.build_detector(X_train_det, y_train_det, X_test_det, y_test_det)

(26872, 70) (26872, 5)
(24184, 70) (2688, 70) (24184, 5) (2688, 5)
Epoch 1/10


2025-04-01 14:32:38.938493: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_15}}


[1m194/194[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4506 - loss: 1.3877 - val_accuracy: 0.8888 - val_loss: 0.4615
Epoch 2/10
[1m194/194[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8035 - loss: 0.4986 - val_accuracy: 0.9086 - val_loss: 0.2481
Epoch 3/10
[1m194/194[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8670 - loss: 0.3375 - val_accuracy: 0.9221 - val_loss: 0.1959
Epoch 4/10
[1m194/194[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8882 - loss: 0.2781 - val_accuracy: 0.9281 - val_loss: 0.1762
Epoch 5/10
[1m194/194[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9076 - loss: 0.2409 - val_accuracy: 0.9349 - val_loss: 0.1592
Epoch 6/10
[1m194/194[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9166 - loss: 0.2188 - val_accuracy: 0.9403 - val_loss: 0.1497
Epoch 7/10
[1m194/194[0m [32m━━━━━━━

In [62]:
# Evaluate detector
y_pred_det = det.predict(detector, X_test_det, y.columns)
print(f"Predictions on Detector | Indices: {y_pred_det.index[:5]}... | Shape: {y_pred_det.shape}")

# Convert one-hot to class indices
y_true_indices = np.argmax(y_test_det, axis=1)
y_true_indices_pd = pd.Series(y_true_indices, index=y_test_det.index)
y_pred_indices = np.argmax(y_pred_det, axis=1)
y_pred_indices_pd = pd.Series(y_pred_indices, index=y_pred_det.index)
print(y_true_indices[:5], y_pred_indices[:5])

from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

# Compute Accuracy
accuracy = accuracy_score(y_true_indices, y_pred_indices)
print(f"Overall Accuracy: {accuracy:.4f}")

# Compute Classification Report for overall classification
print("Classification Report (Overall):")
print(classification_report(y_true_indices, y_pred_indices, target_names=y.columns, zero_division=0))

[1m 1/84[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 16ms/step

[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 448us/step
Predictions on Detector | Indices: Index([1780284, 386694, 677502, 467834, 2268119], dtype='int64')... | Shape: (2688, 5)
[4 3 4 2 0] [4 3 4 2 0]
Overall Accuracy: 0.9475
Classification Report (Overall):
                    precision    recall  f1-score   support

ADV CORRECT BENIGN       0.87      0.95      0.91       641
 ADV MISCLASSIFIED       0.95      0.86      0.90       642
  CW MISCLASSIFIED       0.97      0.94      0.95       156
JSMA MISCLASSIFIED       1.00      1.00      1.00       310
    CORRECT BENIGN       0.99      0.99      0.99       939

          accuracy                           0.95      2688
         macro avg       0.95      0.95      0.95      2688
      weighted avg       0.95      0.95      0.95      2688



In [21]:
# # Get unique values and their counts
# values, counts = np.unique(y_pred_indices, return_counts=True)

# # Combine values and counts into a dictionary or print
# value_counts = dict(zip(values, counts))
# print(value_counts)

# norm_benign_indices = y_test_det[y_test_det['NORM BENIGN'] == 1].index
# print(f"Normal Benign Indices: {len(norm_benign_indices)}")

# print(y_pred_indices_pd[norm_benign_indices].value_counts())

# # 0 == FGSM CORRECT BENIGN, 1 == FGSM MISCLASSIFIED, 2 == CW CORRECT BENIGN, 3 == CW MISCLASSIFIED, 4 == NORM BENIGN
# norm_pred_benign_indices = y_pred_indices_pd[y_pred_indices_pd == 4].index
# print(f"Normal Predicted Benign Indices: {len(norm_pred_benign_indices)}")

# norm_pred_cw_correct_benign_indices = y_pred_indices_pd[y_pred_indices_pd == 2].index
# print(f"Normal Predicted CW Correct Benign Indices: {len(norm_pred_cw_correct_benign_indices)}")

# benign_pred_indices = np.unique(np.concatenate((norm_pred_benign_indices, norm_pred_cw_correct_benign_indices)))

# correct_benign_pred_indices = np.intersect1d(norm_benign_indices, benign_pred_indices)
# print(f"Predicted Normal indices: {len(correct_benign_pred_indices)}")

# normal_benign_misclassified_indices = np.setdiff1d(norm_benign_indices, benign_pred_indices)
# print(f"Normal Misclassified indices: {len(normal_benign_misclassified_indices)}")


In [22]:
# fgsm_correct_bening_indices = y_test_det[y_test_det['FGSM CORRECT BENIGN'] == 1].index
# print(f"FGSM correct benign indices: {len(fgsm_correct_bening_indices)}")
# fgsm_misclassified_indices = y_test_det[y_test_det['FGSM MISCLASSIFIED'] == 1].index
# print(f"FGSM misclassified indices: {len(fgsm_misclassified_indices)}")
# fgsm_indices = np.unique(np.concatenate((fgsm_correct_bening_indices, fgsm_misclassified_indices)))
# print(f"FGSM indices: {len(fgsm_indices)}")

# fgsm_pred_correct_benign_indices = y_pred_det[y_pred_det['FGSM CORRECT BENIGN'] == True].index
# print(f"Predicted FGSM correct benign indices: {len(fgsm_pred_correct_benign_indices)}")
# fgsm_pred_misclassified_indices = y_pred_det[y_pred_det['FGSM MISCLASSIFIED'] == True].index
# print(f"Predicted FGSM misclassified indices: {len(fgsm_pred_misclassified_indices)}")
# fgsm_pred_indices = np.unique(np.concatenate((fgsm_pred_correct_benign_indices, fgsm_pred_misclassified_indices)))
# print(f"Predicted FGSM indices: {len(fgsm_pred_indices)}")

# fgsm_pred_indices = np.intersect1d(fgsm_indices, fgsm_pred_indices)
# print(f"Predicted FGSM indices: {len(fgsm_pred_indices)}")

# fgsm_misclassification_indices = np.setdiff1d(fgsm_indices, fgsm_pred_indices)
# print(f"FGSM misclassification indices: {len(fgsm_misclassification_indices)}")


# print(f"FGSM Detection Rate: {len(fgsm_pred_indices) / len(fgsm_indices):.4f}")
# print(f"Misclassification Rate: {len(fgsm_misclassification_indices) / len(fgsm_indices):.4f}")

In [23]:
# cw_correct_bening_indices = y_test_det[y_test_det['CW CORRECT BENIGN'] == 1].index
# print(f"CW correct benign indices: {len(fgsm_correct_bening_indices)}")
# cw_misclassified_indices = y_test_det[y_test_det['CW MISCLASSIFIED'] == 1].index
# print(f"CW misclassified indices: {len(cw_misclassified_indices)}")
# cw_indices = np.unique(np.concatenate((cw_correct_bening_indices, cw_misclassified_indices)))
# print(f"CW indices: {len(cw_indices)}")

# cw_pred_correct_benign_indices = y_pred_det[y_pred_det['CW CORRECT BENIGN'] == True].index
# print(f"Predicted CW correct benign indices: {len(cw_pred_correct_benign_indices)}")
# cw_pred_misclassified_indices = y_pred_det[y_pred_det['CW MISCLASSIFIED'] == True].index
# print(f"Predicted CW misclassified indices: {len(cw_pred_misclassified_indices)}")
# cw_pred_indices = np.unique(np.concatenate((cw_pred_correct_benign_indices, cw_pred_misclassified_indices)))
# print(f"Predicted CW indices: {len(cw_pred_indices)}")

# cw_pred_indices = np.intersect1d(cw_indices, cw_pred_indices)
# print(f"Predicted CW indices: {len(cw_pred_indices)}")

# cw_misclassification_indices = np.setdiff1d(cw_indices, cw_pred_indices)
# print(f"CW misclassification indices: {len(cw_misclassification_indices)}")


# print(f"CW Detection Rate: {len(cw_pred_indices) / len(cw_indices):.4f}")
# print(f"Misclassification Rate: {len(cw_misclassification_indices) / len(cw_indices):.4f}")

In [24]:
# def evaluate_attack_detection(y_test_det, y_pred_det, attack_name):
#     """
#     Evaluates the detection rate and misclassification rate for a given attack.
    
#     Parameters:
#     - y_test_det: DataFrame containing true labels for attack detection.
#     - y_pred_det: DataFrame containing predicted labels for attack detection.
#     - attack_name: The name of the attack (e.g., 'FGSM', 'CW').
    
#     Returns:
#     - A dictionary containing detection rate and misclassification rate.
#     """
#     # Generate column names dynamically
#     correct_benign_col = f"{attack_name} CORRECT BENIGN"
#     misclassified_col = f"{attack_name} MISCLASSIFIED"
    
#     # Get indices for actual attack samples
#     correct_benign_indices = y_test_det[y_test_det[correct_benign_col] == 1].index
#     misclassified_indices = y_test_det[y_test_det[misclassified_col] == 1].index
#     attack_indices = np.unique(np.concatenate((correct_benign_indices, misclassified_indices)))
    
#     print(f"{attack_name} total samples: {len(attack_indices)}")
    
#     # Get indices for predicted attack samples
#     pred_correct_benign_indices = y_pred_det[y_pred_det[correct_benign_col] == True].index
#     pred_misclassified_indices = y_pred_det[y_pred_det[misclassified_col] == True].index
#     pred_attack_indices = np.unique(np.concatenate((pred_correct_benign_indices, pred_misclassified_indices)))
    
#     print(f"Predicted {attack_name} total: {len(pred_attack_indices)}")
    
#     # Find correctly detected attack samples
#     detected_attack_indices = np.intersect1d(attack_indices, pred_attack_indices)
#     print(f"Correctly detected {attack_name} samples: {len(detected_attack_indices)}")
    
#     # Find misclassified attack samples
#     misclassified_attack_indices = np.setdiff1d(attack_indices, detected_attack_indices)
#     print(f"Misclassified {attack_name} samples: {len(misclassified_attack_indices)}")
    
#     # Compute rates
#     detection_rate = len(detected_attack_indices) / len(attack_indices) if len(attack_indices) > 0 else 0
#     misclassification_rate = len(misclassified_attack_indices) / len(attack_indices) if len(attack_indices) > 0 else 0
    
#     print(f"{attack_name} Detection Rate: {detection_rate:.4f}")
#     print(f"{attack_name} Misclassification Rate: {misclassification_rate:.4f}")
    
#     return {
#         "detection_rate": detection_rate,
#         "misclassification_rate": misclassification_rate
#     }

In [25]:
# results = evaluate_attack_detection(y_test_det, y_pred_det, 'FGSM')

In [26]:
# # Combine classes for adversarial detection (class 1 and class 2)
# y_true_adv = np.where(np.isin(y_true_indices, [0, 1]), 1, 0)  # Adversarial = 1 (class 1 or 2), otherwise 0
# y_pred_adv = np.where(np.isin(y_pred_indices, [0, 1]), 1, 0)  # Predicted as Adversarial
# print(y_true_adv[:5], y_pred_adv[:5])

# # Combine classes for benign detection (class 1 and class 3)
# y_true_benign = np.where(np.isin(y_true_indices, [0, 2]), 1, 0)  # Benign = 1 (class 1 or 3), otherwise 0
# y_pred_benign = np.where(np.isin(y_pred_indices, [0, 2]), 1, 0)  # Predicted as Benign
# print(y_true_benign[:5], y_pred_benign[:5])


# # Compute confusion matrix for Adversarial Detection
# tn_adv, fp_adv, fn_adv, tp_adv = confusion_matrix(y_true_adv, y_pred_adv).ravel()
# print(f"\nAdversarial Detection (Class 1 + 2):")
# print(f"TP (Adversarial correctly detected): {tp_adv}")
# print(f"FP (Benign incorrectly detected as adversarial): {fp_adv}")
# print(f"TN (Benign correctly detected): {tn_adv}")
# print(f"FN (Adversarial missed): {fn_adv}")

# # Calculate metrics for Adversarial Detection
# tpr_adv = tp_adv / (tp_adv + fn_adv) if (tp_adv + fn_adv) != 0 else 0
# fpr_adv = fp_adv / (fp_adv + tn_adv) if (fp_adv + tn_adv) != 0 else 0
# fnr_adv = fn_adv / (tp_adv + fn_adv) if (tp_adv + fn_adv) != 0 else 0
# tnr_adv = tn_adv / (tn_adv + fp_adv) if (tn_adv + fp_adv) != 0 else 0  

# # Calculate accuracy for Adversarial Detection
# accuracy_adv = (tp_adv + tn_adv) / (tp_adv + tn_adv + fp_adv + fn_adv) if (tp_adv + tn_adv + fp_adv + fn_adv) != 0 else 0

# print(f"Adversarial Detection Metrics:")
# print(f"Adversarial Detection Accuracy: {100*accuracy_adv:.2f}%")
# print(f"True Positive Rate (TPR): {100*tpr_adv:.2f}%")
# print(f"False Positive Rate (FPR): {100*fpr_adv:.2f}%")
# print(f"False Negative Rate (FNR): {100* fnr_adv:.2f}%")
# print(f"True Negative Rate (TNR): {100*tnr_adv:.2f}%") 

# # Compute confusion matrix for Benign Detection
# tn_benign, fp_benign, fn_benign, tp_benign = confusion_matrix(y_true_benign, y_pred_benign).ravel()
# print(f"\nBenign Detection (Class 1 + 3):")
# print(f"TP (Benign correctly detected): {tp_benign}")
# print(f"FP (Adversarial incorrectly detected as benign): {fp_benign}")
# print(f"TN (Adversarial correctly detected): {tn_benign}")
# print(f"FN (Benign missed): {fn_benign}")

# # Calculate metrics for Benign Detection
# tpr_benign = tp_benign / (tp_benign + fn_benign) if (tp_benign + fn_benign) != 0 else 0
# fpr_benign = fp_benign / (fp_benign + tn_benign) if (fp_benign + tn_benign) != 0 else 0
# fnr_benign = fn_benign / (tp_benign + fn_benign) if (tp_benign + fn_benign) != 0 else 0
# tnr_benign = tn_benign / (tn_benign + fp_benign) if (tn_benign + fp_benign) != 0 else 0  

# # Calculate accuracy for Benign Detection
# accuracy_benign = (tp_benign + tn_benign) / (tp_benign + tn_benign + fp_benign + fn_benign) if (tp_benign + tn_benign + fp_benign + fn_benign) != 0 else 0

# print(f"Benign Detection Metrics:")
# print(f"Benign Detection Accuracy: {100*accuracy_benign:.2f}%")
# print(f"True Positive Rate (TPR): {100*tpr_benign:.2f}%")
# print(f"False Positive Rate (FPR): {100*fpr_benign:.2f}%")
# print(f"False Negative Rate (FNR): {100*fnr_benign:.2f}%")
# print(f"True Negative Rate (TNR): {100*tnr_benign:.2f}%") 

In [27]:
# # find intersection of benign and normal samples
# normal_samples = np.where(y_pred_adv == 0)[0]
# print(f"Normal Samples: {len(normal_samples)} | {normal_samples[:5]}")
# normal_samples = set(normal_samples)
# benign_samples = np.where(y_pred_benign == 1)[0]
# print(f"Benign Samples: {len(benign_samples)} | {benign_samples[:5]}")
# benign_samples = set(benign_samples)
# intersection = normal_samples & benign_samples
# print(f"Intersection of Adversarial and Benign Samples: {len(intersection)}")

# # find incides from class [0, 0, 1, 0] of y_pred_indices
# normal_indices = np.where(y_pred_indices == 2)[0]
# print(f"Normal Samples: {len(normal_indices)} | {normal_indices[:5]}")

---
## Manual Evaluation
We perform the whole two-stages approach on new unseen data and evaluate the following scores:
- Recall
- Precision
- Accuracy
- F1 Score

In [28]:
import functions.data_preprocessing as dp
import importlib
importlib.reload(dp)

# exclude previously used samples
dataset_eval_excluded = dataset.drop(index=used_indices)

X_eval, y_eval, used_eval_indices = dp.preprocess_data(dataset_eval_excluded, encoding_type, normalizer, zero_columns, sample_size=500, random_sample_state=17)
print(f"Generate Features | Indices: {X_eval.index[:5]}... | Shape: {X_eval.shape}")
print(f"Generate Labels | Indices: {y_eval.index[:5]}... | Shape: {y_eval.shape}")
print(y_eval.value_counts())

-- Preprocessing data --
--- Sampling balanced data ---
Sample to shape: (1000, 79)
--- Splitting labels and features ---
--- Encoding labels as binary one-hot values ---
--- Normalizing features using MinMaxScaler ---
Generate Features | Indices: Index([2053501, 1417664, 1894894, 2042954, 2008688], dtype='int64')... | Shape: (1000, 70)
Generate Labels | Indices: Index([2053501, 1417664, 1894894, 2042954, 2008688], dtype='int64')... | Shape: (1000, 2)
BENIGN  ATTACK
False   True      500
True    False     500
Name: count, dtype: int64


In [63]:
importlib.reload(ag)

# X_adv_eval = ag.generate_cw_attacks_parallel(art_model, X_eval, target_label=1, num_cores=num_cores)
# print(f"Create Adversarial Attack | Indices: {X_adv_eval.index[:5]}... | Shape: {X_adv_eval.shape}")

X_adv_eval = ag.generate_jsma_attacks(art_model, X_eval, target_label=1)
print(f"Create Adversarial Attack | Indices: {X_adv_eval.index[:5]}... | Shape: {X_adv_eval.shape}")

y_pred_adv_eval = ag.evaluate_art_model(art_model, X_adv_eval, y_eval)
print(f"Predictions on Adversarial Attacks | Indices: {y_pred_adv_eval.index[:5]}... | Shape: {y_pred_adv_eval.shape}")

JSMA:   0%|          | 0/1000 [00:00<?, ?it/s]

Adversarial JSMA examples generated. Shape: (1000, 70)
Create Adversarial Attack | Indices: Index([2053501, 1417664, 1894894, 2042954, 2008688], dtype='int64')... | Shape: (1000, 70)
Accuracy: 50.00%
              precision    recall  f1-score   support

      ATTACK     0.0000    0.0000    0.0000       500
      BENIGN     0.5000    1.0000    0.6667       500

    accuracy                         0.5000      1000
   macro avg     0.2500    0.5000    0.3333      1000
weighted avg     0.2500    0.5000    0.3333      1000

Confusion Matrix: Positive == BENIGN
TN: 0, FP: 500, FN: 0, TP: 500
Predictions on Adversarial Attacks | Indices: Index([2053501, 1417664, 1894894, 2042954, 2008688], dtype='int64')... | Shape: (1000, 2)


In [None]:
importlib.reload(exp)
X_eval_adv_shap_values_df = exp.generate_shap_values(explainer, X_adv_eval)

print(f"Create Explanations | Indices: {X_eval_adv_shap_values_df.index[:5]}... | Shape: {X_eval_adv_shap_values_df.shape}")

PermutationExplainer explainer: 1001it [00:39, 18.90it/s]                         

Create Explanations | Indices: Index([2053501, 1417664, 1894894, 2042954, 2008688], dtype='int64')... | Shape: (1000, 70)





In [None]:
# # normal shap values

# y_pred_adv_eval = ag.evaluate_art_model(art_model, X_eval, y_eval)
# print(f"Predictions on Adversarial Attacks | Indices: {y_pred_adv_eval.index[:5]}... | Shape: {y_pred_adv_eval.shape}")

# X_eval_adv_shap_values_df = exp.generate_shap_values(explainer, X_eval)

# print(f"Create Explanations | Indices: {X_eval_adv_shap_values_df.index[:5]}... | Shape: {X_eval_adv_shap_values_df.shape}")

In [65]:
benign_eval_idx = y_eval[y_eval['BENIGN'] == 1].index
attack_eval_idx = y_eval[y_eval['ATTACK'] == 1].index

pred_benign_idx = y_pred_adv_eval[y_pred_adv_eval['BENIGN'] == 1].index
pred_attack_idx = y_pred_adv_eval[y_pred_adv_eval['ATTACK'] == 1].index

# predict
X_eval_detector = X_eval_adv_shap_values_df.loc[pred_benign_idx]
y_pred_eval_detector = det.predict(detector, X_eval_detector, y_train_det.columns)

# correctly_classified_det_idx = y_pred_eval_detector[y_pred_eval_detector['BENIGN'] == 1].index
# misclassified_det_idx = y_pred_eval_detector[y_pred_eval_detector['ATTACK'] == 1].index

[1m 1/32[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 17ms/step

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


2025-04-01 14:34:02.297606: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_14}}


In [67]:
pred_class_0 = y_pred_eval_detector[y_pred_eval_detector['ADV CORRECT BENIGN'] == 1].index 
pred_class_1 = y_pred_eval_detector[y_pred_eval_detector['ADV MISCLASSIFIED'] == 1].index
pred_class_2 = y_pred_eval_detector[y_pred_eval_detector['CW MISCLASSIFIED'] == 1].index
pred_class_3 = y_pred_eval_detector[y_pred_eval_detector['JSMA MISCLASSIFIED'] == 1].index
pred_class_4 = y_pred_eval_detector[y_pred_eval_detector['CORRECT BENIGN'] == 1].index
print(f"Predicted Class 0: {len(pred_class_0)}")
print(f"Predicted Class 1: {len(pred_class_1)}")
print(f"Predicted Class 2: {len(pred_class_2)}")
print(f"Predicted Class 3: {len(pred_class_3)}")
print(f"Predicted Class 4: {len(pred_class_4)}")

Predicted Class 0: 0
Predicted Class 1: 0
Predicted Class 2: 0
Predicted Class 3: 513
Predicted Class 4: 487


In [68]:
# After IDS Stage
TN = len(attack_eval_idx.intersection(pred_attack_idx)) # IDS classifies 'ATTACK' samples as 'ATTACK'
print(f"IDS classifies 'ATTACK' samples as 'ATTACK': {TN}")
FN = len(benign_eval_idx.intersection(pred_attack_idx)) # IDS classifies 'BENIGN' samples as 'ATTACK'
print(f"IDS mis-classifies 'BENIGN' samples as 'ATTACK': {FN}")

# TODO: define correct and misclassified classes for each attack:
correctly_classified_det_idx = y_pred_eval_detector.loc[pred_class_4].index # Detector classifies 'BENIGN' samples as correct 'BENIGN'
misclassified_det_idx = y_pred_eval_detector.loc[pred_class_3].index # Detector classifies 'ATTACK' samples as misclassified due to 'ATTACK'

# After Detector Stage
TP = len(benign_eval_idx.intersection(correctly_classified_det_idx)) # Detector classifies 'BENIGN' samples as correct 'BENIGN'
print(f"Detector classifies 'BENIGN' samples as correct 'BENIGN': {TP}")
FP = len(attack_eval_idx.intersection(correctly_classified_det_idx)) # Detector classifies 'ATTACK' samples as correct 'BENIGN'
print(f"Detector mis-classifies 'ATTACK' samples as correct 'BENIGN': {FP}")

TN_2 = len(attack_eval_idx.intersection(misclassified_det_idx)) # Detector classifies 'ATTACK' samples as misclassified due to 'ATTACK'
print(f"Detector classifies 'ATTACK' samples as misclassified due to 'ATTACK': {TN_2}")
FN_2 = len(benign_eval_idx.intersection(misclassified_det_idx)) # Detector classifies 'BENIGN' samples as misclassified due to 'ATTACK'
print(f"Detector classifies 'BENIGN' samples as misclassified due to 'ATTACK': {FN_2}")

# Sum up TN & FN from both stages
TN = TN + TN_2
FN = FN + FN_2

print(f"TP: {TP}")
print(f"FP: {FP}")
print(f"TN: {TN}")
print(f"FN: {FN}")
print(f"Sum: {TP + FP + TN + FN}")

IDS classifies 'ATTACK' samples as 'ATTACK': 0
IDS mis-classifies 'BENIGN' samples as 'ATTACK': 0
Detector classifies 'BENIGN' samples as correct 'BENIGN': 486
Detector mis-classifies 'ATTACK' samples as correct 'BENIGN': 1
Detector classifies 'ATTACK' samples as misclassified due to 'ATTACK': 499
Detector classifies 'BENIGN' samples as misclassified due to 'ATTACK': 14
TP: 486
FP: 1
TN: 499
FN: 14
Sum: 1000


In [69]:
from sklearn.metrics import classification_report

print(f"Global Accuracy: {(TP + TN) / (TP + FP + TN + FN) * 100:.2f}%")

# Construct a fake y_true and y_pred to match sklearn's classification_report format
y_true = np.array([1] * TP + [0] * TN + [1] * FN + [0] * FP)  # True labels
y_pred = np.array([1] * TP + [0] * TN + [0] * FN + [1] * FP)  # Predicted labels

# Generate classification report
report = classification_report(y_true, y_pred, target_names=['ATTACK', 'BENIGN']) # reverse labels because classification_report assumes first label is 0
print(report)

print(f"True Negative Rate: {TN/(TN+FP)*100:.2f}%")
print(f"False Positive Rate: {FP/(TN+FP)*100:.2f}%")
print(f"True Positive Rate: {TP/(TP+FN)*100:.2f}%")
print(f"False Negative Rate: {FN/(TP+FN)*100:.2f}%")

Global Accuracy: 98.50%
              precision    recall  f1-score   support

      ATTACK       0.97      1.00      0.99       500
      BENIGN       1.00      0.97      0.98       500

    accuracy                           0.98      1000
   macro avg       0.99      0.98      0.98      1000
weighted avg       0.99      0.98      0.98      1000

True Negative Rate: 99.80%
False Positive Rate: 0.20%
True Positive Rate: 97.20%
False Negative Rate: 2.80%


In [71]:
sample_indices = y_pred_eval_detector.index
print(f"#Attack-Samples: {len(sample_indices)}")

detected_indices = np.unique(np.concatenate((pred_class_4, pred_class_3)))
print(f"Predicted indices: {len(detected_indices)}")

correct_benign_pred_indices = np.intersect1d(sample_indices, detected_indices)
print(f"Predicted Normal indices: {len(correct_benign_pred_indices)}")

normal_benign_misclassified_indices = np.setdiff1d(sample_indices, detected_indices)
print(f"Normal Misclassified indices: {len(normal_benign_misclassified_indices)}")

print(f"ADV Detection Rate: {len(correct_benign_pred_indices) / len(sample_indices):.4f}")
print(f"Misclassification Rate: {len(normal_benign_misclassified_indices) / len(sample_indices):.4f}")

#Attack-Samples: 1000
Predicted indices: 1000
Predicted Normal indices: 1000
Normal Misclassified indices: 0
ADV Detection Rate: 1.0000
Misclassification Rate: 0.0000
