# Prototype: iota

| Properties      | Data    |
|---------------|-----------|
| *Labels* | `['BENIGN', 'DDoS']` |
| *Normalization* | `Min-Max` |
| *Sample Size* | `10.000`|
| *Adversarial Attack* | `FGSM & C&W` |
| *Explanations* | `SHAP` |
| *Detector* | `Detect misclassified Samples of both Attacks` |


---

## *Has to be run first alone!*

In [2]:
# To import modules from the functions directory
import sys
import os
sys.path.append(os.path.abspath(os.path.join('..')))

## Data Preprocessing

In [58]:
import functions.data_preprocessing as dp
import importlib
importlib.reload(dp)

encoding_type = 0 # binary encoding
norm_type = 0 # min-max normalization
label_names = ['BENIGN', 'DDoS'] # labels to include
sample_size = 5000 # sample size for each label -> 2 x sample_size = total samples

dataset = dp.build_dataset(label_names)

normalizer, zero_columns = dp.generate_normalizer(dataset, norm_type)

feature_df, label_df, used_indices = dp.preprocess_data(dataset, encoding_type, normalizer, zero_columns, sample_size=sample_size, random_sample_state=42)
print(f"Generate Features | Indices: {feature_df.index[:5]}... | Shape: {feature_df.shape}")
print(f"Generate Labels | Indices: {label_df.index[:5]}... | Shape: {label_df.shape}")
print(label_df.value_counts()) # -> will first show [0, 1] then [1, 0] if label number is equal

-- Building CICIDS2017 dataset --
--- Combining all CICIDS2017 files ---
Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv
Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv
Tuesday-WorkingHours.pcap_ISCX.csv
Wednesday-workingHours.pcap_ISCX.csv
Friday-WorkingHours-Morning.pcap_ISCX.csv
Thursday-WorkingHours-Morning-WebAttacks.pcap_ISCX.csv
Monday-WorkingHours.pcap_ISCX.csv
Thursday-WorkingHours-Afternoon-Infilteration.pcap_ISCX.csv
--- Removing NaN and Infinity values ---
Removing 1358 Rows with NaN values
Removing 1509 Rows with Infinity values
--- Extracting labels ---
 Label
BENIGN    2271320
DDoS       128025
Name: count, dtype: int64
-- Generating normalizer --
--- Splitting labels and features ---
Zero Columns: [' Bwd PSH Flags', ' Bwd URG Flags', 'Fwd Avg Bytes/Bulk', ' Fwd Avg Packets/Bulk', ' Fwd Avg Bulk Rate', ' Bwd Avg Bytes/Bulk', ' Bwd Avg Packets/Bulk', 'Bwd Avg Bulk Rate']
-- Preprocessing data --
--- Sampling balanced data ---
Sample to shape: (10000, 79)
--- Splitti

## Split Data

In [59]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(feature_df, label_df, test_size=0.2, random_state=42)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(8000, 70) (2000, 70) (8000, 2) (2000, 2)


## Create IDS

In [60]:
import functions.intrusion_detection_system as ids
import importlib
importlib.reload(ids)

# TODO: build ids with complete dataset
# X_train_all, y_train_all, _ = dp.preprocess_data(dataset, encoding_type, normalizer, zero_columns, random_sample_state=42)
# print(y_train_all.value_counts())
# X_train_all, X_test_all, y_train_all, y_test_all = train_test_split(X_train_all, y_train_all, test_size=0.2, random_state=42)
# print(X_train_all.shape, X_test_all.shape, y_train_all.shape, y_test_all.shape)

# build ids and evaluate it on test data
ids_model = ids.build_intrusion_detection_system(X_train, y_train, X_test, y_test)
# store prediction from X_train
y_pred = ids.predict(ids_model, X_train, columns=y_train.columns)
print(f"Predictions on Normal Data | Indices: {y_pred.index[:5]}... | Shape: {y_pred.shape}")

Epoch 1/10
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8144 - loss: 0.5964 - val_accuracy: 0.9775 - val_loss: 0.2770
Epoch 2/10
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9747 - loss: 0.1938 - val_accuracy: 0.9812 - val_loss: 0.0661
Epoch 3/10
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9803 - loss: 0.0637 - val_accuracy: 0.9837 - val_loss: 0.0508
Epoch 4/10
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9827 - loss: 0.0506 - val_accuracy: 0.9856 - val_loss: 0.0433
Epoch 5/10
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9834 - loss: 0.0445 - val_accuracy: 0.9856 - val_loss: 0.0378
Epoch 6/10
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9835 - loss: 0.0412 - val_accuracy: 0.9862 - val_loss: 0.0341
Epoch 7/10
[1m64/64[0m [32m━━━━━━━━━━

2025-03-26 14:21:59.782289: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_14}}


[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 353us/step
Predictions on Normal Data | Indices: Index([383258, 1283627, 1595713, 323550, 474229], dtype='int64')... | Shape: (8000, 2)


## Generate C&W and FGSM Attacks

In [61]:
import functions.attack_generator as ag
import importlib
import numpy as np
importlib.reload(ag)

all_features = dataset.drop(columns=[' Label'])
art_model = ag.convert_to_art_model(ids_model, X_train) # TODO: use all features for generating art model

# split train data into data for generating fgsm and cw attacks
X_fgsm, X_cw, y_fgsm, y_cw = train_test_split(X_train, y_train, test_size=0.5, random_state=15)
print(X_fgsm.shape, X_cw.shape, y_fgsm.shape, y_cw.shape)

# generate attacks on the separated training data
# TODO: when changing epsilon, the detector accuracy rises
X_adv_fgsm = ag.generate_fgsm_attacks(art_model, X_fgsm, 1)
print(f"Create Adversarial Attack | Indices: {X_adv_fgsm.index[:5]}... | Shape: {X_adv_fgsm.shape}")
y_pred_adv_fgsm = ag.evaluate_art_model(art_model, X_adv_fgsm, y_fgsm)
print(f"Predictions on Adversarial Attacks | Indices: {y_pred_adv_fgsm.index[:5]}... | Shape: {y_pred_adv_fgsm.shape}")
y_pred_fgsm = y_pred.loc[X_fgsm.index]

import multiprocessing
num_cores = multiprocessing.cpu_count()
X_adv_cw = ag.generate_cw_attacks_parallel(art_model, X_cw, 1, num_cores=num_cores)
print(f"Create Adversarial Attack | Indices: {X_adv_cw.index[:5]}... | Shape: {X_adv_cw.shape}")
y_pred_adv_cw = ag.evaluate_art_model(art_model, X_adv_cw, y_cw)
print(f"Predictions on Adversarial Attacks | Indices: {y_pred_adv_cw.index[:5]}... | Shape: {y_pred_adv_cw.shape}")
y_pred_cw = y_pred.loc[X_cw.index]

(4000, 70) (4000, 70) (4000, 2) (4000, 2)
Adversarial FGSM examples generated. Shape: (4000, 70)
Create Adversarial Attack | Indices: Index([305763, 2410673, 473894, 311088, 1632424], dtype='int64')... | Shape: (4000, 70)
Accuracy: 49.70%
              precision    recall  f1-score   support

      ATTACK       0.00      0.00      0.00      2012
      BENIGN       0.50      1.00      0.66      1988

    accuracy                           0.50      4000
   macro avg       0.25      0.50      0.33      4000
weighted avg       0.25      0.50      0.33      4000

Confusion Matrix: Positive == BENIGN
TN: 0, FP: 2012, FN: 0, TP: 1988
Predictions on Adversarial Attacks | Indices: Index([305763, 2410673, 473894, 311088, 1632424], dtype='int64')... | Shape: (4000, 2)
Running attack using 24 CPU cores...

Process 132176 is generating adversarial examples for batch of size 166 
Process 132177 is generating adversarial examples for batch of size 166 
Process 132175 is generating adversarial exampl

C&W L_2:   0%|          | 0/166 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/166 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/166 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/166 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/166 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/166 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/166 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/166 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/166 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/166 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/166 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/166 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/166 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/166 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/166 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/166 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/166 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/166 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/166 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/166 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/166 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/166 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/166 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/182 [00:00<?, ?it/s]

Create Adversarial Attack | Indices: Index([358687, 2773523, 323364, 430929, 467866], dtype='int64')... | Shape: (4000, 70)
Accuracy: 75.55%
              precision    recall  f1-score   support

      ATTACK       1.00      0.51      0.68      2000
      BENIGN       0.67      1.00      0.80      2000

    accuracy                           0.76      4000
   macro avg       0.84      0.76      0.74      4000
weighted avg       0.84      0.76      0.74      4000

Confusion Matrix: Positive == BENIGN
TN: 1022, FP: 978, FN: 0, TP: 2000
Predictions on Adversarial Attacks | Indices: Index([358687, 2773523, 323364, 430929, 467866], dtype='int64')... | Shape: (4000, 2)


In [62]:
def get_correctly_benign_classified_indices(y_train, y_pred):
    benign_indices = y_train[y_train['BENIGN'] == 1].index
    benign_adv_predicted_indices = y_pred[y_pred['BENIGN'] == 1].index
    correctly_benign_classified_indices = benign_indices.intersection(benign_adv_predicted_indices)
    return correctly_benign_classified_indices

def get_misclassified_as_benign_due_attack_indices(y_train, y_pred, y_pred_adv):
    attack_indices = y_train[y_train['ATTACK'] == 1].index
    attack_adv_predicted_indices = y_pred[y_pred['ATTACK'] == 1].index
    benign_predicted_adversarial_indices = y_pred_adv[y_pred_adv['BENIGN'] == 1].index
    misclassified_as_benign_due_attack_indices = attack_indices.intersection(attack_adv_predicted_indices).intersection(benign_predicted_adversarial_indices)
    return misclassified_as_benign_due_attack_indices

In [63]:
correctly_benign_classified_indices = get_correctly_benign_classified_indices(y_train, y_pred)
print(f"Correctly classified as BENIGN from the IDS: {len(correctly_benign_classified_indices)} | Indices: {correctly_benign_classified_indices[:3]}")

correctly_benign_classified_indices_fgsm = get_correctly_benign_classified_indices(y_fgsm, y_pred_fgsm)
misclassified_as_benign_due_attack_indices_fgsm = get_misclassified_as_benign_due_attack_indices(y_fgsm, y_pred_fgsm, y_pred_adv_fgsm)
print(f"Correctly classified as BENIGN from the IDS (FGSM): {len(correctly_benign_classified_indices_fgsm)} | Indices: {correctly_benign_classified_indices_fgsm[:3]}")
print(f"ATTACK sample misclassified as BENIGN due to adversarial attack (FGSM): {len(misclassified_as_benign_due_attack_indices_fgsm)} | Indices: {misclassified_as_benign_due_attack_indices_fgsm[:3]}")

correctly_benign_classified_indices_cw = get_correctly_benign_classified_indices(y_cw, y_pred_cw)
misclassified_as_benign_due_attack_indices_cw = get_misclassified_as_benign_due_attack_indices(y_cw, y_pred_cw, y_pred_adv_cw)
print(f"Correctly classified as BENIGN from the IDS (CW): {len(correctly_benign_classified_indices_cw)} | Indices: {correctly_benign_classified_indices_cw[:3]}")
print(f"ATTACK sample misclassified as BENIGN due to adversarial attack (CW): {len(misclassified_as_benign_due_attack_indices_cw)} | Indices: {misclassified_as_benign_due_attack_indices_cw[:3]}")

Correctly classified as BENIGN from the IDS: 3878 | Indices: Index([1283627, 1595713, 953730], dtype='int64')
Correctly classified as BENIGN from the IDS (FGSM): 1942 | Indices: Index([2410673, 1632424, 238489], dtype='int64')
ATTACK sample misclassified as BENIGN due to adversarial attack (FGSM): 2009 | Indices: Index([305763, 473894, 311088], dtype='int64')
Correctly classified as BENIGN from the IDS (CW): 1936 | Indices: Index([2773523, 2550577, 1631321], dtype='int64')
ATTACK sample misclassified as BENIGN due to adversarial attack (CW): 976 | Indices: Index([323364, 430929, 467866], dtype='int64')


In [19]:
# import functions.visualizer as visualizer
# import importlib
# importlib.reload(visualizer)

# visualizer.visualize_data_distribution(X_train.loc[correctly_benign_classified_indices], 'Normal Data', X_adv_fgsm.loc[misclassified_as_benign_due_attack_indices_fgsm], 'Adversarial Data')
# # visualizer.pca_visualization_side_by_side(X_train.loc[misclassified_as_benign_due_attack_indices], 'Normal Data', X_adv.loc[misclassified_as_benign_due_attack_indices], 'Adversarial Data')

## Explainer

In [64]:
import functions.explainer as exp
import importlib
importlib.reload(exp)

explainer = exp.generate_shap_explainer(ids_model, X_train)

shap_values, shap_values_df = exp.generate_shap_values(explainer, X_train)
print(f"Generate Explanations | Indices: {shap_values_df.index[:5]}... | Shape: {shap_values_df.shape}")

_, shap_values_adv_df_fgsm = exp.generate_shap_values(explainer, X_adv_fgsm)
print(f"Generate Adversarial Explanations | Indices: {shap_values_adv_df_fgsm.index[:5]}... | Shape: {shap_values_adv_df_fgsm.shape}")

_, shap_values_adv_df_cw = exp.generate_shap_values(explainer, X_adv_cw)
print(f"Generate Adversarial Explanations | Indices: {shap_values_adv_df_cw.index[:5]}... | Shape: {shap_values_adv_df_cw.shape}")

PermutationExplainer explainer: 8001it [04:04, 31.39it/s]                          


Generate Explanations | Indices: Index([383258, 1283627, 1595713, 323550, 474229], dtype='int64')... | Shape: (8000, 70)


PermutationExplainer explainer: 4001it [01:54, 31.88it/s]                          


Generate Adversarial Explanations | Indices: Index([305763, 2410673, 473894, 311088, 1632424], dtype='int64')... | Shape: (4000, 70)


PermutationExplainer explainer: 4001it [02:06, 29.23it/s]                          


Generate Adversarial Explanations | Indices: Index([358687, 2773523, 323364, 430929, 467866], dtype='int64')... | Shape: (4000, 70)


In [21]:
# import pandas as pd
# # concat_correctly_benign_classified_shaps = pd.concat([shap_values_df.loc[correctly_benign_classified_indices], shap_values_adv_df_fgsm.loc[correctly_benign_classified_indices_fgsm], shap_values_adv_df_cw.loc[correctly_benign_classified_indices_cw]], axis=0)
# # # shap_values_df.loc[misclassified_as_benign_due_attack_indices]
# # concat_misclassified_as_benign_shaps = pd.concat([shap_values_adv_df_fgsm.loc[misclassified_as_benign_due_attack_indices_fgsm], shap_values_adv_df_cw.loc[misclassified_as_benign_due_attack_indices_cw]], axis=0)

# concat_correctly_benign_classified_shaps = pd.concat([shap_values_df.loc[correctly_benign_classified_indices], shap_values_adv_df_fgsm.loc[correctly_benign_classified_indices_fgsm]], axis=0)
# # shap_values_df.loc[misclassified_as_benign_due_attack_indices]
# concat_misclassified_as_benign_shaps = pd.concat([shap_values_adv_df_fgsm.loc[misclassified_as_benign_due_attack_indices_fgsm]], axis=0)

In [22]:
# import functions.visualizer as visualizer
# import importlib
# importlib.reload(visualizer)

# visualizer.visualize_data_distribution(shap_values_df.loc[correctly_benign_classified_indices], 'Normal Explanations', shap_values_adv_df_fgsm.loc[misclassified_as_benign_due_attack_indices_fgsm], 'Adversarial Explanations')

In [23]:
# exp.plot_shap_summary_comparison(shap_values_df.loc[correctly_benign_classified_indices].values, X_train.loc[correctly_benign_classified_indices], shap_values_adv_df.loc[misclassified_as_benign_due_attack_indices].values, X_adv.loc[misclassified_as_benign_due_attack_indices], 6, title='Normal vs Adversarial Explanations of Benign Samples')

In [67]:
adversarial_correct_benign_indices_cw = correctly_benign_classified_indices_cw
adversarial_misclassified_as_benign_indices_cw = misclassified_as_benign_due_attack_indices_cw
adversarial_correct_benign_indices_fgsm = correctly_benign_classified_indices_fgsm
adversarial_misclassified_as_benign_indices_fgsm = misclassified_as_benign_due_attack_indices_fgsm
normal_correct_benign_indices = correctly_benign_classified_indices

attack_indices = y_train[y_train['ATTACK'] == 1].index
predicted_benign_indices = y_pred[y_pred['BENIGN'] == 1].index
normal_misclassified_as_benign_indices = attack_indices.intersection(predicted_benign_indices)

print(f"Normal Correctly Classified as Benign: {len(normal_correct_benign_indices)}")
print(f"Normal Misclassified as Benign: {len(normal_misclassified_as_benign_indices)}")
print(f"Adversarial Correctly Classified as Benign (CW): {len(adversarial_correct_benign_indices_cw)}")
print(f"Adversarial Misclassified as Benign (CW): {len(adversarial_misclassified_as_benign_indices_cw)}")
print(f"Adversarial Correctly Classified as Benign (FGSM): {len(adversarial_correct_benign_indices_fgsm)}")
print(f"Adversarial Misclassified as Benign (FGSM): {len(adversarial_misclassified_as_benign_indices_fgsm)}")

Normal Correctly Classified as Benign: 3878
Normal Misclassified as Benign: 5
Adversarial Correctly Classified as Benign (CW): 1936
Adversarial Misclassified as Benign (CW): 976
Adversarial Correctly Classified as Benign (FGSM): 1942
Adversarial Misclassified as Benign (FGSM): 2009


In [68]:
# import pandas as pd

# shap_adv_correct_benign = shap_values_adv_df_cw.loc[adversarial_correct_benign_indices_cw]
# shap_adv_misclassified_benign = shap_values_adv_df_cw.loc[adversarial_misclassified_as_benign_indices_cw]
# shap_normal_correct_benign = shap_values_df.loc[normal_correct_benign_indices]
# shap_normal_misclassified_benign = shap_values_df.loc[normal_misclassified_as_benign_indices]
# print(shap_adv_correct_benign.shape, shap_adv_misclassified_benign.shape, shap_normal_correct_benign.shape, shap_normal_misclassified_benign.shape)
# print('ADV CORRECT BENIGN |', 'ADV MISCLASSIFIED BENIGN |', 'NORM CORRECT BENIGN |', 'NORM MISCLASSIFIED BENIGN')


# # build dataset
# y_adv_benign = np.array([[1, 0, 0, 0]] * shap_adv_correct_benign.shape[0])  
# y_adv_attack = np.array([[0, 1, 0, 0]] * shap_adv_misclassified_benign.shape[0])
# y_norm_bening = np.array([[0, 0, 1, 0]] * shap_normal_correct_benign.shape[0])
# y_norm_attack = np.array([[0, 0, 0, 1]] * shap_normal_misclassified_benign.shape[0])


# y = np.concatenate([y_adv_benign, y_adv_attack, y_norm_bening, y_norm_attack])
# y = pd.DataFrame(y, columns=['ADV CORRECT BENIGN', 'ADV MISCLASSIFIED BENIGN', 'NORM CORRECT BENIGN', 'NORM MISCLASSIFIED BENIGN'])

# X = pd.concat([shap_adv_correct_benign, shap_adv_misclassified_benign, shap_normal_correct_benign, shap_normal_misclassified_benign], axis=0)
# print(X.shape, y.shape)

In [69]:
import pandas as pd

def create_dataset(class_samples):
    """
    Create dataset from given class samples.
    
    Args:
        class_samples (dict): Dictionary where keys are class names and values are DataFrames of samples.
    
    Returns:
        X (pd.DataFrame): Feature matrix
        y (pd.DataFrame): One-hot encoded labels
    """
    X_list = []
    y_list = []
    
    class_labels = list(class_samples.keys())
    num_classes = len(class_labels)
    
    for i, class_name in enumerate(class_labels):
        samples = class_samples[class_name]
        one_hot = np.zeros((samples.shape[0], num_classes))
        one_hot[:, i] = 1  # Set the respective column to 1
        
        X_list.append(samples)
        y_list.append(one_hot)
    
    X = pd.concat(X_list, axis=0)
    y = np.vstack(y_list)
    y = pd.DataFrame(y, columns=class_labels)
    
    return X, y

In [70]:
class_samples = {
    # 'ADV CW BENIGN': shap_values_adv_df_cw.loc[adversarial_correct_benign_indices_cw],
    'ADV CW ATTACK MISCLASSIFIED': shap_values_adv_df_cw.loc[adversarial_misclassified_as_benign_indices_cw],
    # 'ADV FGSM BENIGN': shap_values_adv_df_fgsm.loc[adversarial_correct_benign_indices_fgsm],
    'ADV FGSM': shap_values_adv_df_fgsm,
    'NORM BENIGN': shap_values_df.loc[normal_correct_benign_indices]
}

X, y = create_dataset(class_samples)
print(X.shape, y.shape)

(8854, 70) (8854, 3)


## Detector

In [71]:
import functions.detector as det
import importlib
importlib.reload(det)

# build detector to detect adversarial samples that misclassify attack samples as benign

# create dataframe
# TODO: build detector with normal and adversarial shap values?
# TODO: build with shap_values_adv_df to detect 'BENIGN' and 'ATTACK'
import pandas as pd

# alternative approach: detector that predicts the original label of the sample for all given adversarial attacks
# concat_correctly_benign_classified_shaps = pd.concat([shap_values_df.loc[correctly_benign_classified_indices], shap_values_adv_df.loc[correctly_benign_classified_indices]], axis=0)
# concat_misclassified_as_benign_shaps = pd.concat([shap_values_df.loc[misclassified_as_benign_due_attack_indices], shap_values_adv_df.loc[misclassified_as_benign_due_attack_indices]], axis=0)
# X, y = det.build_train_datasets(shap_values_df.loc[correctly_benign_classified_indices], shap_values_adv_df_fgsm.loc[misclassified_as_benign_due_attack_indices_fgsm])

#X, y = det.build_train_datasets(shap_values_df.loc[correctly_benign_classified_indices], shap_values_adv_df.loc[misclassified_as_benign_due_attack_indices])
print(X.shape, y.shape)

# split data
X_train_det, X_test_det, y_train_det, y_test_det = train_test_split(X, y, test_size=0.1, random_state=1503)
print(X_train_det.shape, X_test_det.shape, y_train_det.shape, y_test_det.shape)

# build detector
detector = det.build_detector(X_train_det, y_train_det, X_test_det, y_test_det)

(8854, 70) (8854, 3)
(7968, 70) (886, 70) (7968, 3) (886, 3)
Epoch 1/10


2025-03-26 14:41:25.408358: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_15}}


[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.4403 - loss: 0.6753 - val_accuracy: 0.4987 - val_loss: 0.5868
Epoch 2/10
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6011 - loss: 0.5457 - val_accuracy: 0.8896 - val_loss: 0.3263
Epoch 3/10
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8327 - loss: 0.3346 - val_accuracy: 0.9605 - val_loss: 0.1579
Epoch 4/10
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9167 - loss: 0.2188 - val_accuracy: 0.9737 - val_loss: 0.0972
Epoch 5/10
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9380 - loss: 0.1522 - val_accuracy: 0.9843 - val_loss: 0.0592
Epoch 6/10
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9448 - loss: 0.1094 - val_accuracy: 0.9893 - val_loss: 0.0374
Epoch 7/10
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━

In [75]:
# Evaluate detector
y_pred_det = det.predict(detector, X_test_det, y.columns)
print(f"Predictions on Detector | Indices: {y_pred_det.index[:5]}... | Shape: {y_pred_det.shape}")

# Convert one-hot to class indices
y_true_indices = np.argmax(y_test_det, axis=1)
y_pred_indices = np.argmax(y_pred_det, axis=1)
print(y_true_indices[:5], y_pred_indices[:5])

from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

# Compute Accuracy
accuracy = accuracy_score(y_true_indices, y_pred_indices)
print(f"Overall Accuracy: {accuracy:.4f}")

# Compute Classification Report for overall classification
print("Classification Report (Overall):")
print(classification_report(y_true_indices, y_pred_indices, target_names=y.columns, zero_division=0))

[1m 1/28[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 21ms/step

[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Predictions on Detector | Indices: Index([422234, 272630, 435345, 379069, 311517], dtype='int64')... | Shape: (886, 3)
[1 1 1 1 1] [1 1 1 1 1]
Overall Accuracy: 0.9910
Classification Report (Overall):
                             precision    recall  f1-score   support

ADV CW ATTACK MISCLASSIFIED       0.97      0.97      0.97       120
                   ADV FGSM       1.00      1.00      1.00       407
                NORM BENIGN       0.99      0.99      0.99       359

                   accuracy                           0.99       886
                  macro avg       0.99      0.99      0.99       886
               weighted avg       0.99      0.99      0.99       886



2025-03-26 16:17:32.628114: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_14}}


In [None]:
# find misclassified samples
print(len(adversarial_misclassified_as_benign_indices_cw))
print(len(adversarial_misclassified_as_benign_indices_fgsm))


976
2009


In [29]:
# Combine classes for adversarial detection (class 1 and class 2)
y_true_adv = np.where(np.isin(y_true_indices, [0, 1]), 1, 0)  # Adversarial = 1 (class 1 or 2), otherwise 0
y_pred_adv = np.where(np.isin(y_pred_indices, [0, 1]), 1, 0)  # Predicted as Adversarial
print(y_true_adv[:5], y_pred_adv[:5])

# Combine classes for benign detection (class 1 and class 3)
y_true_benign = np.where(np.isin(y_true_indices, [0, 2]), 1, 0)  # Benign = 1 (class 1 or 3), otherwise 0
y_pred_benign = np.where(np.isin(y_pred_indices, [0, 2]), 1, 0)  # Predicted as Benign
print(y_true_benign[:5], y_pred_benign[:5])


# Compute confusion matrix for Adversarial Detection
tn_adv, fp_adv, fn_adv, tp_adv = confusion_matrix(y_true_adv, y_pred_adv).ravel()
print(f"\nAdversarial Detection (Class 1 + 2):")
print(f"TP (Adversarial correctly detected): {tp_adv}")
print(f"FP (Benign incorrectly detected as adversarial): {fp_adv}")
print(f"TN (Benign correctly detected): {tn_adv}")
print(f"FN (Adversarial missed): {fn_adv}")

# Calculate metrics for Adversarial Detection
tpr_adv = tp_adv / (tp_adv + fn_adv) if (tp_adv + fn_adv) != 0 else 0
fpr_adv = fp_adv / (fp_adv + tn_adv) if (fp_adv + tn_adv) != 0 else 0
fnr_adv = fn_adv / (tp_adv + fn_adv) if (tp_adv + fn_adv) != 0 else 0
tnr_adv = tn_adv / (tn_adv + fp_adv) if (tn_adv + fp_adv) != 0 else 0  

# Calculate accuracy for Adversarial Detection
accuracy_adv = (tp_adv + tn_adv) / (tp_adv + tn_adv + fp_adv + fn_adv) if (tp_adv + tn_adv + fp_adv + fn_adv) != 0 else 0

print(f"Adversarial Detection Metrics:")
print(f"Adversarial Detection Accuracy: {100*accuracy_adv:.2f}%")
print(f"True Positive Rate (TPR): {100*tpr_adv:.2f}%")
print(f"False Positive Rate (FPR): {100*fpr_adv:.2f}%")
print(f"False Negative Rate (FNR): {100* fnr_adv:.2f}%")
print(f"True Negative Rate (TNR): {100*tnr_adv:.2f}%") 

# Compute confusion matrix for Benign Detection
tn_benign, fp_benign, fn_benign, tp_benign = confusion_matrix(y_true_benign, y_pred_benign).ravel()
print(f"\nBenign Detection (Class 1 + 3):")
print(f"TP (Benign correctly detected): {tp_benign}")
print(f"FP (Adversarial incorrectly detected as benign): {fp_benign}")
print(f"TN (Adversarial correctly detected): {tn_benign}")
print(f"FN (Benign missed): {fn_benign}")

# Calculate metrics for Benign Detection
tpr_benign = tp_benign / (tp_benign + fn_benign) if (tp_benign + fn_benign) != 0 else 0
fpr_benign = fp_benign / (fp_benign + tn_benign) if (fp_benign + tn_benign) != 0 else 0
fnr_benign = fn_benign / (tp_benign + fn_benign) if (tp_benign + fn_benign) != 0 else 0
tnr_benign = tn_benign / (tn_benign + fp_benign) if (tn_benign + fp_benign) != 0 else 0  

# Calculate accuracy for Benign Detection
accuracy_benign = (tp_benign + tn_benign) / (tp_benign + tn_benign + fp_benign + fn_benign) if (tp_benign + tn_benign + fp_benign + fn_benign) != 0 else 0

print(f"Benign Detection Metrics:")
print(f"Benign Detection Accuracy: {100*accuracy_benign:.2f}%")
print(f"True Positive Rate (TPR): {100*tpr_benign:.2f}%")
print(f"False Positive Rate (FPR): {100*fpr_benign:.2f}%")
print(f"False Negative Rate (FNR): {100*fnr_benign:.2f}%")
print(f"True Negative Rate (TNR): {100*tnr_benign:.2f}%") 

[1 0 0 1 1] [1 1 1 1 1]
[1 1 1 0 1] [1 1 1 0 1]

Adversarial Detection (Class 1 + 2):
TP (Adversarial correctly detected): 227
FP (Benign incorrectly detected as adversarial): 164
TN (Benign correctly detected): 0
FN (Adversarial missed): 0
Adversarial Detection Metrics:
Adversarial Detection Accuracy: 58.06%
True Positive Rate (TPR): 100.00%
False Positive Rate (FPR): 100.00%
False Negative Rate (FNR): 0.00%
True Negative Rate (TNR): 0.00%

Benign Detection (Class 1 + 3):
TP (Benign correctly detected): 305
FP (Adversarial incorrectly detected as benign): 0
TN (Adversarial correctly detected): 84
FN (Benign missed): 2
Benign Detection Metrics:
Benign Detection Accuracy: 99.49%
True Positive Rate (TPR): 99.35%
False Positive Rate (FPR): 0.00%
False Negative Rate (FNR): 0.65%
True Negative Rate (TNR): 100.00%


In [23]:
# find intersection of benign and normal samples
normal_samples = np.where(y_pred_adv == 0)[0]
print(f"Normal Samples: {len(normal_samples)} | {normal_samples[:5]}")
normal_samples = set(normal_samples)
benign_samples = np.where(y_pred_benign == 1)[0]
print(f"Benign Samples: {len(benign_samples)} | {benign_samples[:5]}")
benign_samples = set(benign_samples)
intersection = normal_samples & benign_samples
print(f"Intersection of Adversarial and Benign Samples: {len(intersection)}")

# find incides from class [0, 0, 1, 0] of y_pred_indices
normal_indices = np.where(y_pred_indices == 2)[0]
print(f"Normal Samples: {len(normal_indices)} | {normal_indices[:5]}")

Normal Samples: 0 | []
Benign Samples: 305 | [0 1 2 4 6]
Intersection of Adversarial and Benign Samples: 0
Normal Samples: 0 | []


---
## Manual Evaluation
We perform the whole two-stages approach on new unseen data and evaluate the following scores:
- Recall
- Precision
- Accuracy
- F1 Score

In [14]:
import functions.data_preprocessing as dp
import importlib
importlib.reload(dp)

# exclude previously used samples
dataset_eval_excluded = dataset.drop(index=used_indices)

X_eval, y_eval, used_eval_indices = dp.preprocess_data(dataset_eval_excluded, encoding_type, normalizer, zero_columns, sample_size=500, random_sample_state=17)
print(f"Generate Features | Indices: {X_eval.index[:5]}... | Shape: {X_eval.shape}")
print(f"Generate Labels | Indices: {y_eval.index[:5]}... | Shape: {y_eval.shape}")
print(y_eval.value_counts())

-- Preprocessing data --
--- Sampling balanced data ---
Sample to shape: (1000, 79)
--- Splitting labels and features ---
--- Encoding labels as binary one-hot values ---
--- Normalizing features using MinMaxScaler ---
Generate Features | Indices: Index([2056787, 2391506, 802264, 1981689, 480604], dtype='int64')... | Shape: (1000, 70)
Generate Labels | Indices: Index([2056787, 2391506, 802264, 1981689, 480604], dtype='int64')... | Shape: (1000, 2)
BENIGN  ATTACK
False   True      500
True    False     500
Name: count, dtype: int64


In [15]:
importlib.reload(ag)

# X_adv_eval = ag.generate_cw_attacks_parallel(art_model, X_eval, target_label=1, num_cores=num_cores)
# print(f"Create Adversarial Attack | Indices: {X_adv_eval.index[:5]}... | Shape: {X_adv_eval.shape}")

X_adv_eval = ag.generate_fgsm_attacks(art_model, X_eval, target_label=1)
print(f"Create Adversarial Attack | Indices: {X_adv_eval.index[:5]}... | Shape: {X_adv_eval.shape}")

y_pred_adv_eval = ag.evaluate_art_model(art_model, X_adv_eval, y_eval)
print(f"Predictions on Adversarial Attacks | Indices: {y_pred_adv_eval.index[:5]}... | Shape: {y_pred_adv_eval.shape}")

Adversarial FGSM examples generated. Shape: (1000, 70)
Create Adversarial Attack | Indices: Index([2056787, 2391506, 802264, 1981689, 480604], dtype='int64')... | Shape: (1000, 70)
Accuracy: 50.00%
              precision    recall  f1-score   support

      ATTACK       0.00      0.00      0.00       500
      BENIGN       0.50      1.00      0.67       500

    accuracy                           0.50      1000
   macro avg       0.25      0.50      0.33      1000
weighted avg       0.25      0.50      0.33      1000

Confusion Matrix: Positive == BENIGN
TN: 0, FP: 500, FN: 0, TP: 500
Predictions on Adversarial Attacks | Indices: Index([2056787, 2391506, 802264, 1981689, 480604], dtype='int64')... | Shape: (1000, 2)


In [16]:
importlib.reload(exp)
X_eval_adv_shap_values, X_eval_adv_shap_values_df = exp.generate_shap_values(explainer, X_adv_eval)

print(f"Create Explanations | Indices: {X_eval_adv_shap_values_df.index[:5]}... | Shape: {X_eval_adv_shap_values_df.shape}")

PermutationExplainer explainer: 1001it [00:29, 22.27it/s]                         

Create Explanations | Indices: Index([2056787, 2391506, 802264, 1981689, 480604], dtype='int64')... | Shape: (1000, 70)





In [17]:
benign_eval_idx = y_eval[y_eval['BENIGN'] == 1].index
attack_eval_idx = y_eval[y_eval['ATTACK'] == 1].index

pred_benign_idx = y_pred_adv_eval[y_pred_adv_eval['BENIGN'] == 1].index
pred_attack_idx = y_pred_adv_eval[y_pred_adv_eval['ATTACK'] == 1].index

# predict
X_eval_detector = X_eval_adv_shap_values_df.loc[pred_benign_idx]

y_pred_eval_detector = det.predict(detector, X_eval_detector, y_eval.columns)

correctly_classified_det_idx = y_pred_eval_detector[y_pred_eval_detector['BENIGN'] == 1].index
misclassified_det_idx = y_pred_eval_detector[y_pred_eval_detector['ATTACK'] == 1].index


[1m 1/32[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 14ms/step

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 674us/step


2025-03-25 12:20:04.257927: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_14}}


In [18]:
detector_misclassifies_attack_as_correct_benign = attack_eval_idx.intersection(correctly_classified_det_idx)
print(detector_misclassifies_attack_as_correct_benign)

y_pred_test = detector.predict(X_eval_detector)
y_pred_test = pd.DataFrame(y_pred_test, index=X_eval_detector.index, columns=y_eval.columns)

y_pred_test_index = y_pred_test[y_pred_test['BENIGN'] >= 0.8].index

print(detector_misclassifies_attack_as_correct_benign.intersection(y_pred_test_index))
print(len(detector_misclassifies_attack_as_correct_benign.intersection(y_pred_test_index)))

Index([], dtype='int64')
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 699us/step
Index([], dtype='int64')
0


In [19]:
# After IDS Stage
TN = len(attack_eval_idx.intersection(pred_attack_idx)) # IDS classifies 'ATTACK' samples as 'ATTACK'
print(f"IDS classifies 'ATTACK' samples as 'ATTACK': {TN}")
FN = len(benign_eval_idx.intersection(pred_attack_idx)) # IDS classifies 'BENIGN' samples as 'ATTACK'
print(f"IDS mis-classifies 'BENIGN' samples as 'ATTACK': {FN}")

# After Detector Stage
TP = len(benign_eval_idx.intersection(correctly_classified_det_idx)) # Detector classifies 'BENIGN' samples as correct 'BENIGN'
print(f"Detector classifies 'BENIGN' samples as correct 'BENIGN': {TP}")
FP = len(attack_eval_idx.intersection(correctly_classified_det_idx)) # Detector classifies 'ATTACK' samples as correct 'BENIGN'
print(f"Detector mis-classifies 'ATTACK' samples as correct 'BENIGN': {FP}")

TN_2 = len(attack_eval_idx.intersection(misclassified_det_idx)) # Detector classifies 'ATTACK' samples as misclassified due to 'ATTACK'
print(f"Detector classifies 'ATTACK' samples as misclassified due to 'ATTACK': {TN_2}")
FN_2 = len(benign_eval_idx.intersection(misclassified_det_idx)) # Detector classifies 'BENIGN' samples as misclassified due to 'ATTACK'
print(f"Detector classifies 'BENIGN' samples as misclassified due to 'ATTACK': {FN_2}")

# Sum up TN & FN from both stages
TN = TN + TN_2
FN = FN + FN_2

print(f"TP: {TP}")
print(f"FP: {FP}")
print(f"TN: {TN}")
print(f"FN: {FN}")
print(f"Sum: {TP + FP + TN + FN}")

IDS classifies 'ATTACK' samples as 'ATTACK': 0
IDS mis-classifies 'BENIGN' samples as 'ATTACK': 0
Detector classifies 'BENIGN' samples as correct 'BENIGN': 43
Detector mis-classifies 'ATTACK' samples as correct 'BENIGN': 0
Detector classifies 'ATTACK' samples as misclassified due to 'ATTACK': 500
Detector classifies 'BENIGN' samples as misclassified due to 'ATTACK': 457
TP: 43
FP: 0
TN: 500
FN: 457
Sum: 1000


In [19]:
from sklearn.metrics import classification_report

print(f"Global Accuracy: {(TP + TN) / (TP + FP + TN + FN) * 100:.2f}%")

# Construct a fake y_true and y_pred to match sklearn's classification_report format
y_true = np.array([1] * TP + [0] * TN + [1] * FN + [0] * FP)  # True labels
y_pred = np.array([1] * TP + [0] * TN + [0] * FN + [1] * FP)  # Predicted labels

# Generate classification report
report = classification_report(y_true, y_pred, target_names=['ATTACK', 'BENIGN']) # reverse labels because classification_report assumes first label is 0
print(report)

print(f"True Negative Rate: {TN/(TN+FP)*100:.2f}%")
print(f"False Positive Rate: {FP/(TN+FP)*100:.2f}%")
print(f"True Positive Rate: {TP/(TP+FN)*100:.2f}%")
print(f"False Negative Rate: {FN/(TP+FN)*100:.2f}%")

Global Accuracy: 96.00%
              precision    recall  f1-score   support

      ATTACK       0.95      0.98      0.96       500
      BENIGN       0.98      0.94      0.96       500

    accuracy                           0.96      1000
   macro avg       0.96      0.96      0.96      1000
weighted avg       0.96      0.96      0.96      1000

True Negative Rate: 97.60%
False Positive Rate: 2.40%
True Positive Rate: 94.40%
False Negative Rate: 5.60%
