# Prototype: ny

| Properties      | Data    |
|---------------|-----------|
| *Dataset* | `NSL-KDD` |
| *Labels* | `ALL` |
| *Normalization* | `Min-Max` |
| *Sample Size* | `50.000`|
| *Adversarial Attack* | `FGSM & C&W & JSMA & PGD` |
| *Explanations* | `SHAP` |
| *Detector* | `Detect Attacks and Misclassified Samples` |


---

## *Has to be run first alone!*

In [137]:
# To import modules from the functions directory
import sys
import os
sys.path.append(os.path.abspath(os.path.join('..')))

## Data Preprocessing

In [None]:
import functions.data_preprocessing as dp
import importlib
importlib.reload(dp)

encoding_type = 0 # binary encoding
norm_type = 0 # min-max normalization
sample_size = 25000 # sample size for each label -> 2 x sample_size = total samples

dataset = dp.build_nsl_kdd_dataset()
print(dataset.shape)
print(dataset.head(2))

normalizer, zero_columns = dp.generate_normalizer(dataset, norm_type)


feature_df, label_df, used_indices = dp.preprocess_data(dataset, encoding_type, normalizer, zero_columns, sample_size=sample_size, random_sample_state=42)
print(f"Generate Features | Indices: {feature_df.index[:5]}... | Shape: {feature_df.shape}")
print(f"Generate Labels | Indices: {label_df.index[:5]}... | Shape: {label_df.shape}")
print(label_df.value_counts()) # -> will first show [0, 1] then [1, 0] if label number is equal

-- Building NSL-KDD dataset --
--- Extracting labels ---
 Label
normal     77054
neptune    45871
Name: count, dtype: int64
(122925, 42)
   duration  protocol_type  service  flag  src_bytes  dst_bytes  land  \
0         0              1       20     9        491          0     0   
1         0              2       44     9        146          0     0   

   wrong_fragment  urgent  hot  ...  dst_host_srv_count  \
0               0       0    0  ...                  25   
1               0       0    0  ...                   1   

   dst_host_same_srv_rate  dst_host_diff_srv_rate  \
0                    0.17                    0.03   
1                    0.00                    0.60   

   dst_host_same_src_port_rate  dst_host_srv_diff_host_rate  \
0                         0.17                          0.0   
1                         0.88                          0.0   

   dst_host_serror_rate  dst_host_srv_serror_rate  dst_host_rerror_rate  \
0                   0.0                 

In [139]:
print(dataset[' Label'].shape)
dataset[' Label'].value_counts()

(148517,)


 Label
normal             77054
neptune            45871
satan               4368
ipsweep             3740
smurf               3311
portsweep           3088
nmap                1566
back                1315
guess_passwd        1284
mscan                996
warezmaster          964
teardrop             904
warezclient          890
apache2              737
processtable         685
snmpguess            331
saint                319
mailbomb             293
pod                  242
snmpgetattack        178
httptunnel           133
buffer_overflow       50
land                  25
multihop              25
rootkit               23
named                 17
ps                    15
sendmail              14
xterm                 13
imap                  12
ftp_write             11
loadmodule            11
xlock                  9
phf                    6
perl                   5
xsnoop                 4
spy                    2
worm                   2
sqlattack              2
udpstorm          

## Split Data

In [140]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(feature_df, label_df, test_size=0.1, random_state=42)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(45000, 40) (5000, 40) (45000, 2) (5000, 2)


## Create IDS

In [141]:
import functions.intrusion_detection_system as ids
import importlib
importlib.reload(ids)

# TODO: build ids with complete dataset
# X_train_all, y_train_all, _ = dp.preprocess_data(dataset, encoding_type, normalizer, zero_columns, random_sample_state=42)
# print(y_train_all.value_counts())
# X_train_all, X_test_all, y_train_all, y_test_all = train_test_split(X_train_all, y_train_all, test_size=0.2, random_state=42)
# print(X_train_all.shape, X_test_all.shape, y_train_all.shape, y_test_all.shape)

# build ids and evaluate it on test data
ids_model = ids.build_intrusion_detection_system(X_train, y_train, X_test, y_test)
# store prediction from X_train
y_pred = ids.predict(ids_model, X_train, columns=y_train.columns)
print(f"Predictions on Normal Data | Indices: {y_pred.index[:5]}... | Shape: {y_pred.shape}")

Epoch 1/20
[1m900/900[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 933us/step - accuracy: 0.8985 - loss: 0.2619 - val_accuracy: 0.9626 - val_loss: 0.1059
Epoch 2/20
[1m900/900[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 801us/step - accuracy: 0.9598 - loss: 0.1033 - val_accuracy: 0.9716 - val_loss: 0.0794
Epoch 3/20
[1m900/900[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 811us/step - accuracy: 0.9685 - loss: 0.0813 - val_accuracy: 0.9764 - val_loss: 0.0683
Epoch 4/20
[1m900/900[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 846us/step - accuracy: 0.9729 - loss: 0.0699 - val_accuracy: 0.9788 - val_loss: 0.0626
Epoch 5/20
[1m900/900[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 797us/step - accuracy: 0.9764 - loss: 0.0625 - val_accuracy: 0.9808 - val_loss: 0.0604
Epoch 6/20
[1m900/900[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 792us/step - accuracy: 0.9786 - loss: 0.0576 - val_accuracy: 0.9811 - val_loss: 0.0577
Epoch 7/20
[1m9

2025-04-10 15:01:21.955083: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_14}}


[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 368us/step
Predictions on Normal Data | Indices: Index([35100, 44022, 12737, 26214, 20883], dtype='int64')... | Shape: (45000, 2)


## Generate Adversarial Attacks

In [142]:
import functions.attack_generator as ag
import importlib
import numpy as np
importlib.reload(ag)

all_features = dataset.drop(columns=[' Label'])
art_model = ag.convert_to_art_model(ids_model, X_train) # TODO: use all features for generating art model

# Split the training data into classes
class_labels = ["normal", "cw", "fgsm", "jsma", "pgd"]
splits = ag.split_into_attack_classes(X_train, y_train, class_labels)
X_normal, y_normal = splits["normal"]
X_cw, y_cw = splits["cw"]
X_fgsm, y_fgsm = splits["fgsm"]
X_jsma, y_jsma = splits["jsma"]
X_pgd, y_pgd = splits["pgd"]
print(f"Normal Data: {X_normal.shape} | CW Data: {X_cw.shape} | FGSM Data: {X_fgsm.shape} | JSMA Data: {X_jsma.shape} | PGD Data: {X_pgd.shape}")

# generate attacks on the separated training data
# TODO: when changing epsilon, the detector accuracy rises
X_adv_fgsm = ag.generate_fgsm_attacks(art_model, X_fgsm, target_label=1)
print(f"Create FGSM Adversarial Attack | Indices: {X_adv_fgsm.index[:5]}... | Shape: {X_adv_fgsm.shape}")
y_pred_adv_fgsm = ag.evaluate_art_model(art_model, X_adv_fgsm, y_fgsm)
print(f"Predictions on Adversarial Attacks | Indices: {y_pred_adv_fgsm.index[:5]}... | Shape: {y_pred_adv_fgsm.shape}")
y_pred_fgsm = y_pred.loc[X_fgsm.index]

import multiprocessing
num_cores = multiprocessing.cpu_count()
X_adv_cw = ag.generate_cw_attacks_parallel(art_model, X_cw, target_label=1, num_cores=num_cores)
print(f"Create CW Adversarial Attack | Indices: {X_adv_cw.index[:5]}... | Shape: {X_adv_cw.shape}")
y_pred_adv_cw = ag.evaluate_art_model(art_model, X_adv_cw, y_cw)
print(f"Predictions on Adversarial Attacks | Indices: {y_pred_adv_cw.index[:5]}... | Shape: {y_pred_adv_cw.shape}")
y_pred_cw = y_pred.loc[X_cw.index]

X_adv_jsma = ag.generate_jsma_attacks(art_model, X_jsma, target_label=1)
print(f"Create JSMA Adversarial Attack | Indices: {X_adv_jsma.index[:5]}... | Shape: {X_adv_jsma.shape}")
y_pred_adv_jsma = ag.evaluate_art_model(art_model, X_adv_jsma, y_jsma)
print(f"Predictions on Adversarial Attacks | Indices: {y_pred_adv_jsma.index[:5]}... | Shape: {y_pred_adv_jsma.shape}")
y_pred_jsma = y_pred.loc[X_jsma.index]

X_adv_pgd = ag.generate_pgd_attacks(art_model, X_pgd, target_label=1)
print(f"Create HSJ Adversarial Attack | Indices: {X_adv_pgd.index[:5]}... | Shape: {X_adv_pgd.shape}")
y_pred_adv_pgd = ag.evaluate_art_model(art_model, X_adv_pgd, y_pgd)
print(f"Predictions on Adversarial Attacks | Indices: {y_pred_adv_pgd.index[:5]}... | Shape: {y_pred_adv_pgd.shape}")
y_pred_pgd = y_pred.loc[X_pgd.index]

Normal Data: (9000, 40) | CW Data: (9000, 40) | FGSM Data: (9000, 40) | JSMA Data: (9000, 40) | PGD Data: (9000, 40)
Adversarial FGSM examples generated. Shape: (9000, 40)
Create FGSM Adversarial Attack | Indices: Index([58311, 7703, 25104, 22457, 103581], dtype='int64')... | Shape: (9000, 40)
Accuracy: 59.69%
              precision    recall  f1-score   support

      ATTACK     0.9968    0.2057    0.3411      4564
      BENIGN     0.5501    0.9993    0.7096      4436

    accuracy                         0.5969      9000
   macro avg     0.7735    0.6025    0.5254      9000
weighted avg     0.7767    0.5969    0.5227      9000

Confusion Matrix: Positive == BENIGN
TN: 939, FP: 3625, FN: 3, TP: 4433
Predictions on Adversarial Attacks | Indices: Index([58311, 7703, 25104, 22457, 103581], dtype='int64')... | Shape: (9000, 2)
Running attack using 24 CPU cores...

Process 148716 is generating adversarial examples for batch of size 375 
Process 148715 is generating adversarial examples fo

C&W L_2:   0%|          | 0/375 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/375 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/375 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/375 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/375 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/375 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/375 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/375 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/375 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/375 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/375 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/375 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/375 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/375 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/375 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/375 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/375 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/375 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/375 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/375 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/375 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/375 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/375 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/375 [00:00<?, ?it/s]

Create CW Adversarial Attack | Indices: Index([67481, 114205, 61838, 66711, 85497], dtype='int64')... | Shape: (9000, 40)
Accuracy: 90.73%
              precision    recall  f1-score   support

      ATTACK     1.0000    0.8155    0.8984      4521
      BENIGN     0.8430    1.0000    0.9148      4479

    accuracy                         0.9073      9000
   macro avg     0.9215    0.9078    0.9066      9000
weighted avg     0.9219    0.9073    0.9066      9000

Confusion Matrix: Positive == BENIGN
TN: 3687, FP: 834, FN: 0, TP: 4479
Predictions on Adversarial Attacks | Indices: Index([67481, 114205, 61838, 66711, 85497], dtype='int64')... | Shape: (9000, 2)


JSMA:   0%|          | 0/9000 [00:00<?, ?it/s]

Adversarial JSMA examples generated. Shape: (9000, 40)
Create JSMA Adversarial Attack | Indices: Index([25100, 25692, 9516, 17747, 121254], dtype='int64')... | Shape: (9000, 40)
Accuracy: 49.84%
              precision    recall  f1-score   support

      ATTACK     0.0000    0.0000    0.0000      4514
      BENIGN     0.4984    1.0000    0.6653      4486

    accuracy                         0.4984      9000
   macro avg     0.2492    0.5000    0.3326      9000
weighted avg     0.2484    0.4984    0.3316      9000

Confusion Matrix: Positive == BENIGN
TN: 0, FP: 4514, FN: 0, TP: 4486
Predictions on Adversarial Attacks | Indices: Index([25100, 25692, 9516, 17747, 121254], dtype='int64')... | Shape: (9000, 2)


PGD - Batches: 0it [00:00, ?it/s]

Adversarial PGD examples generated. Shape: (9000, 40)
Create HSJ Adversarial Attack | Indices: Index([30869, 123704, 60521, 54227, 35893], dtype='int64')... | Shape: (9000, 40)
Accuracy: 56.59%
              precision    recall  f1-score   support

      ATTACK     1.0000    0.1189    0.2125      4434
      BENIGN     0.5389    1.0000    0.7004      4566

    accuracy                         0.5659      9000
   macro avg     0.7694    0.5594    0.4564      9000
weighted avg     0.7661    0.5659    0.4600      9000

Confusion Matrix: Positive == BENIGN
TN: 527, FP: 3907, FN: 0, TP: 4566
Predictions on Adversarial Attacks | Indices: Index([30869, 123704, 60521, 54227, 35893], dtype='int64')... | Shape: (9000, 2)


In [143]:
def get_correctly_benign_classified_indices(y_train, y_pred):
    benign_indices = y_train[y_train['BENIGN'] == 1].index
    benign_adv_predicted_indices = y_pred[y_pred['BENIGN'] == 1].index
    correctly_benign_classified_indices = benign_indices.intersection(benign_adv_predicted_indices)
    return correctly_benign_classified_indices

def get_misclassified_as_benign_due_attack_indices(y_train, y_pred, y_pred_adv):
    attack_indices = y_train[y_train['ATTACK'] == 1].index
    attack_adv_predicted_indices = y_pred[y_pred['ATTACK'] == 1].index
    benign_predicted_adversarial_indices = y_pred_adv[y_pred_adv['BENIGN'] == 1].index
    misclassified_as_benign_due_attack_indices = attack_indices.intersection(attack_adv_predicted_indices).intersection(benign_predicted_adversarial_indices)
    return misclassified_as_benign_due_attack_indices

In [144]:
y_pred_normal = y_pred.loc[X_normal.index]
correctly_benign_classified_indices = get_correctly_benign_classified_indices(y_normal, y_pred_normal)

correctly_benign_classified_indices_fgsm = get_correctly_benign_classified_indices(y_fgsm, y_pred_adv_fgsm)
misclassified_as_benign_due_attack_indices_fgsm = get_misclassified_as_benign_due_attack_indices(y_fgsm, y_pred_fgsm, y_pred_adv_fgsm)

correctly_benign_classified_indices_cw = get_correctly_benign_classified_indices(y_cw, y_pred_cw)
misclassified_as_benign_due_attack_indices_cw = get_misclassified_as_benign_due_attack_indices(y_cw, y_pred_cw, y_pred_adv_cw)

correctly_benign_classified_indices_jsma = get_correctly_benign_classified_indices(y_jsma, y_pred_jsma)
misclassified_as_benign_due_attack_indices_jsma = get_misclassified_as_benign_due_attack_indices(y_jsma, y_pred_jsma, y_pred_adv_jsma)

# TODO: is it correct to only include the samples that are correctly classified from the IDS?
correctly_benign_classified_indices_pgd = get_correctly_benign_classified_indices(y_pgd, y_pred_pgd)
misclassified_as_benign_due_attack_indices_pgd = get_misclassified_as_benign_due_attack_indices(y_pgd, y_pred_pgd, y_pred_adv_pgd)

print(f"Correctly classified as BENIGN from the IDS: {len(correctly_benign_classified_indices)} | Indices: {correctly_benign_classified_indices[:3]}")
print(f"    Correctly classified as BENIGN from the IDS (FGSM): {len(correctly_benign_classified_indices_fgsm)} | Indices: {correctly_benign_classified_indices_fgsm[:3]}")
print(f"    ATTACK sample misclassified as BENIGN due to adversarial attack (FGSM): {len(misclassified_as_benign_due_attack_indices_fgsm)} | Indices: {misclassified_as_benign_due_attack_indices_fgsm[:3]}")
print(f"        Correctly classified as BENIGN from the IDS (CW): {len(correctly_benign_classified_indices_cw)} | Indices: {correctly_benign_classified_indices_cw[:3]}")
print(f"        ATTACK sample misclassified as BENIGN due to adversarial attack (CW): {len(misclassified_as_benign_due_attack_indices_cw)} | Indices: {misclassified_as_benign_due_attack_indices_cw[:3]}")
print(f"            Correctly classified as BENIGN from the IDS (PGD): {len(correctly_benign_classified_indices_pgd)} | Indices: {correctly_benign_classified_indices_pgd[:3]}")
print(f"            ATTACK sample misclassified as BENIGN due to adversarial attack (PGD): {len(misclassified_as_benign_due_attack_indices_pgd)} | Indices: {misclassified_as_benign_due_attack_indices_pgd[:3]}")
print(f"                Correctly classified as BENIGN from the IDS (JSMA): {len(correctly_benign_classified_indices_jsma)} | Indices: {correctly_benign_classified_indices_jsma[:3]}")
print(f"                ATTACK sample misclassified as BENIGN due to adversarial attack (JSMA): {len(misclassified_as_benign_due_attack_indices_jsma)} | Indices: {misclassified_as_benign_due_attack_indices_jsma[:3]}")

Correctly classified as BENIGN from the IDS: 4426 | Indices: Index([36792, 80908, 47406], dtype='int64')
    Correctly classified as BENIGN from the IDS (FGSM): 4433 | Indices: Index([7703, 22457, 63618], dtype='int64')
    ATTACK sample misclassified as BENIGN due to adversarial attack (FGSM): 3577 | Indices: Index([58311, 25104, 103581], dtype='int64')
        Correctly classified as BENIGN from the IDS (CW): 4414 | Indices: Index([114205, 61838, 66711], dtype='int64')
        ATTACK sample misclassified as BENIGN due to adversarial attack (CW): 762 | Indices: Index([67481, 85497, 74557], dtype='int64')
            Correctly classified as BENIGN from the IDS (PGD): 4498 | Indices: Index([30869, 123704, 113616], dtype='int64')
            ATTACK sample misclassified as BENIGN due to adversarial attack (PGD): 3852 | Indices: Index([54227, 35893, 13435], dtype='int64')
                Correctly classified as BENIGN from the IDS (JSMA): 4423 | Indices: Index([25100, 113779, 90291], dtype

In [145]:
# import functions.visualizer as visualizer
# import importlib
# importlib.reload(visualizer)

# visualizer.visualize_data_distribution(X_train.loc[correctly_benign_classified_indices], 'Normal Data', X_adv_fgsm.loc[misclassified_as_benign_due_attack_indices_fgsm], 'Adversarial Data')
# # visualizer.visualize_data_distribution(X_train.loc[misclassified_as_benign_due_attack_indices], 'Normal Data', X_adv.loc[misclassified_as_benign_due_attack_indices], 'Adversarial Data', side_by_side=True)

## Explainer

In [146]:
import functions.explainer as exp
import importlib
importlib.reload(exp)

explainer = exp.generate_shap_explainer(ids_model, X_train)

shap_values_df = exp.generate_shap_values(explainer, X_normal)
print(f"Generate Explanations | Indices: {shap_values_df.index[:5]}... | Shape: {shap_values_df.shape}")

shap_values_adv_df_fgsm = exp.generate_shap_values(explainer, X_adv_fgsm)
print(f"Generate FGSM Adversarial Explanations | Indices: {shap_values_adv_df_fgsm.index[:5]}... | Shape: {shap_values_adv_df_fgsm.shape}")

shap_values_adv_df_cw = exp.generate_shap_values(explainer, X_adv_cw)
print(f"Generate CW Adversarial Explanations | Indices: {shap_values_adv_df_cw.index[:5]}... | Shape: {shap_values_adv_df_cw.shape}")

shap_values_adv_df_jsma = exp.generate_shap_values(explainer, X_adv_jsma)
print(f"Generate JSMA Adversarial Explanations | Indices: {shap_values_adv_df_jsma.index[:5]}... | Shape: {shap_values_adv_df_jsma.shape}")

shap_values_adv_df_pgd = exp.generate_shap_values(explainer, X_adv_pgd)
print(f"Generate PGD Adversarial Explanations | Indices: {shap_values_adv_df_pgd.index[:5]}... | Shape: {shap_values_adv_df_pgd.shape}")

PermutationExplainer explainer: 9001it [03:58, 36.14it/s]                          


Generate Explanations | Indices: Index([36792, 41605, 80908, 47406, 128924], dtype='int64')... | Shape: (9000, 40)


PermutationExplainer explainer: 9001it [04:13, 34.06it/s]                          


Generate FGSM Adversarial Explanations | Indices: Index([58311, 7703, 25104, 22457, 103581], dtype='int64')... | Shape: (9000, 40)


PermutationExplainer explainer: 9001it [04:03, 35.40it/s]                          


Generate CW Adversarial Explanations | Indices: Index([67481, 114205, 61838, 66711, 85497], dtype='int64')... | Shape: (9000, 40)


PermutationExplainer explainer: 9001it [04:04, 35.23it/s]                          


Generate JSMA Adversarial Explanations | Indices: Index([25100, 25692, 9516, 17747, 121254], dtype='int64')... | Shape: (9000, 40)


PermutationExplainer explainer: 9001it [04:10, 34.54it/s]                          


Generate PGD Adversarial Explanations | Indices: Index([30869, 123704, 60521, 54227, 35893], dtype='int64')... | Shape: (9000, 40)


In [147]:
# import functions.visualizer as visualizer
# import importlib
# importlib.reload(visualizer)

# visualizer.visualize_data_distribution(shap_values_df.loc[correctly_benign_classified_indices], 'Normal Explanations', shap_values_adv_df_fgsm.loc[misclassified_as_benign_due_attack_indices_fgsm], 'Adversarial Explanations')

In [148]:
# exp.plot_shap_summary_comparison(shap_values_df.loc[correctly_benign_classified_indices].values, X_train.loc[correctly_benign_classified_indices], shap_values_adv_df.loc[misclassified_as_benign_due_attack_indices].values, X_adv.loc[misclassified_as_benign_due_attack_indices], 6, title='Normal vs Adversarial Explanations of Benign Samples')

In [193]:
import pandas as pd
adv_concat_correctly_benign_classified_shaps = pd.concat([
    shap_values_adv_df_fgsm.loc[correctly_benign_classified_indices_fgsm],
    shap_values_adv_df_pgd.loc[correctly_benign_classified_indices_pgd],
    ], axis=0)
# shap_values_df.loc[misclassified_as_benign_due_attack_indices]
adv_concat_misclassified_as_benign_shaps = pd.concat([
    shap_values_adv_df_fgsm.loc[misclassified_as_benign_due_attack_indices_fgsm], 
    shap_values_adv_df_pgd.loc[misclassified_as_benign_due_attack_indices_pgd],
    # shap_values_adv_df_cw.loc[misclassified_as_benign_due_attack_indices_cw],
    # shap_values_adv_df_jsma.loc[misclassified_as_benign_due_attack_indices_jsma],
    ], axis=0)

concat_correct_benign_shaps = pd.concat([
    shap_values_df.loc[correctly_benign_classified_indices], 
    shap_values_adv_df_cw.loc[correctly_benign_classified_indices_cw],
    shap_values_adv_df_jsma.loc[correctly_benign_classified_indices_jsma],
    ], axis=0)

In [194]:
class_samples = {
    'ADV CORRECT BENIGN': adv_concat_correctly_benign_classified_shaps,
    'ADV MISCLASSIFIED': adv_concat_misclassified_as_benign_shaps,
    'CW MISCLASSIFIED': shap_values_adv_df_cw.loc[misclassified_as_benign_due_attack_indices_cw],
    'JSMA MISCLASSIFIED': shap_values_adv_df_jsma.loc[misclassified_as_benign_due_attack_indices_jsma],
    'CORRECT BENIGN': concat_correct_benign_shaps, 
}

print(f" 'ADV CORRECT BENIGN' | Shape: {adv_concat_correctly_benign_classified_shaps.shape}")
print(f" 'ADV MISCLASSIFIED' | Shape: {adv_concat_misclassified_as_benign_shaps.shape}")
print(f" 'CW MISCLASSIFIED' | Shape: {shap_values_adv_df_cw.loc[misclassified_as_benign_due_attack_indices_cw].shape}")
print(f" 'JSMA MISCLASSIFIED' | Shape: {shap_values_adv_df_jsma.loc[misclassified_as_benign_due_attack_indices_jsma].shape}")
print(f" 'CORRECT BENIGN' | Shape: {concat_correct_benign_shaps.shape}")

 'ADV CORRECT BENIGN' | Shape: (8931, 40)
 'ADV MISCLASSIFIED' | Shape: (7429, 40)
 'CW MISCLASSIFIED' | Shape: (762, 40)
 'JSMA MISCLASSIFIED' | Shape: (4452, 40)
 'CORRECT BENIGN' | Shape: (13263, 40)


In [14]:
# from sklearn.decomposition import PCA
# from sklearn.manifold import TSNE
# import matplotlib.pyplot as plt
# import numpy as np

# # Optional: UMAP (must be installed separately)
# try:
#     import umap
#     UMAP_AVAILABLE = True
# except ImportError:
#     UMAP_AVAILABLE = False

# def plot_dim_reduction(X, y_onehot, class_labels, method="pca", **kwargs):
#     """
#     Plots dimensionality-reduced data using PCA, t-SNE, or UMAP.

#     Args:
#         X (ndarray or DataFrame): Feature matrix.
#         y_onehot (ndarray): One-hot encoded labels.
#         class_labels (list of str): Class label names.
#         method (str): 'pca', 'tsne', or 'umap'.
#         **kwargs: Additional arguments for the reducer (e.g., perplexity for t-SNE).
#     """
#     y_indices = np.argmax(y_onehot, axis=1)

#     if method == "pca":
#         reducer = PCA(n_components=2)
#     elif method == "tsne":
#         reducer = TSNE(n_components=2, random_state=42, **kwargs)
#     elif method == "umap":
#         if not UMAP_AVAILABLE:
#             raise ImportError("UMAP is not installed. Run: pip install umap-learn")
#         reducer = umap.UMAP(n_components=2, random_state=42, **kwargs)
#     else:
#         raise ValueError("Invalid method. Choose from 'pca', 'tsne', or 'umap'.")

#     X_reduced = reducer.fit_transform(X)

#     # Plot
#     plt.figure(figsize=(10, 8))
#     for i, label in enumerate(class_labels):
#         idx = y_indices == i
#         plt.scatter(X_reduced[idx, 0], X_reduced[idx, 1], label=label, alpha=0.6)

#     plt.title(f"{method.upper()} Projection of SHAP Values")
#     plt.xlabel("Component 1")
#     plt.ylabel("Component 2")
#     plt.legend()
#     plt.grid(True)
#     plt.tight_layout()
#     plt.show()

# # Choose method: 'pca', 'tsne', or 'umap'
# # plot_dim_reduction(X, y, class_labels, method="tsne", perplexity=30)
# plot_dim_reduction(X, y, class_labels, method="pca")

## Detector

In [195]:
import functions.detector as det
import importlib
importlib.reload(det)

# create dataframe
class_samples = {
    'ADV CORRECT BENIGN': adv_concat_correctly_benign_classified_shaps,
    'ADV MISCLASSIFIED': adv_concat_misclassified_as_benign_shaps,
    'CW MISCLASSIFIED': shap_values_adv_df_cw.loc[misclassified_as_benign_due_attack_indices_cw],
    'JSMA MISCLASSIFIED': shap_values_adv_df_jsma.loc[misclassified_as_benign_due_attack_indices_jsma],
    'CORRECT BENIGN': concat_correct_benign_shaps, 
}
X, y = det.build_detector_dataset(class_samples)
print(X.shape, y.shape)

# split data
X_train_det, X_test_det, y_train_det, y_test_det = train_test_split(X, y, test_size=0.1, random_state=1503)
print(X_train_det.shape, X_test_det.shape, y_train_det.shape, y_test_det.shape)

# build detector
detector = det.build_detector(X_train_det, y_train_det, X_test_det, y_test_det)

# store detector
det.store(detector, 'ny')

Generated dataset: X shape (34837, 40), y shape (34837, 5)
(34837, 40) (34837, 5)
(31353, 40) (3484, 40) (31353, 5) (3484, 5)
Epoch 1/20


2025-04-10 16:12:21.664856: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_15}}


[1m628/628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.6147 - loss: 1.0186 - val_accuracy: 0.9351 - val_loss: 0.2227
Epoch 2/20
[1m628/628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 988us/step - accuracy: 0.9041 - loss: 0.3064 - val_accuracy: 0.9520 - val_loss: 0.1501
Epoch 3/20
[1m628/628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9274 - loss: 0.2326 - val_accuracy: 0.9542 - val_loss: 0.1364
Epoch 4/20
[1m628/628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9348 - loss: 0.2099 - val_accuracy: 0.9584 - val_loss: 0.1269
Epoch 5/20
[1m628/628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9383 - loss: 0.1960 - val_accuracy: 0.9598 - val_loss: 0.1221
Epoch 6/20
[1m628/628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9406 - loss: 0.1804 - val_accuracy: 0.9606 - val_loss: 0.1184
Epoch 7/20
[1m628/628[0m [32m━━━━━

2025-04-10 16:12:36.810338: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_14}}


In [196]:
# Evaluate detector
y_pred_det = det.predict(detector, X_test_det, y.columns)
print(f"Predictions on Detector | Indices: {y_pred_det.index[:5]}... | Shape: {y_pred_det.shape}")

# Convert one-hot to class indices
y_true_indices = np.argmax(y_test_det, axis=1)
y_true_indices_pd = pd.Series(y_true_indices, index=y_test_det.index)
y_pred_indices = np.argmax(y_pred_det, axis=1)
y_pred_indices_pd = pd.Series(y_pred_indices, index=y_pred_det.index)
print(y_true_indices[:5], y_pred_indices[:5])

from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

# Compute Accuracy
accuracy = accuracy_score(y_true_indices, y_pred_indices)
print(f"Overall Accuracy: {accuracy:.4f}")

# Compute Classification Report for overall classification
print("Classification Report (Overall):")
print(classification_report(y_true_indices, y_pred_indices, target_names=y.columns, zero_division=0))

[1m104/109[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 486us/step

[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 567us/step
Predictions on Detector | Indices: Index([24807, 24366, 7586, 134723, 22511], dtype='int64')... | Shape: (3484, 5)
[1 4 1 3 4] [1 4 1 3 4]
Overall Accuracy: 0.9656
Classification Report (Overall):
                    precision    recall  f1-score   support

ADV CORRECT BENIGN       0.92      0.98      0.95       947
 ADV MISCLASSIFIED       0.97      0.89      0.93       724
  CW MISCLASSIFIED       0.81      0.91      0.86        69
JSMA MISCLASSIFIED       0.99      0.99      0.99       420
    CORRECT BENIGN       1.00      0.99      0.99      1324

          accuracy                           0.97      3484
         macro avg       0.94      0.95      0.94      3484
      weighted avg       0.97      0.97      0.97      3484



---
## Manual Evaluation
We perform the whole two-stages approach on new unseen data and evaluate the following scores:
- Recall
- Precision
- Accuracy
- F1 Score

In [158]:
import functions.data_preprocessing as dp
import importlib
importlib.reload(dp)

# exclude previously used samples
dataset_eval_excluded = dataset.drop(index=used_indices)

X_eval, y_eval, used_eval_indices = dp.preprocess_data(dataset_eval_excluded, encoding_type, normalizer, zero_columns, sample_size=500, random_sample_state=17)
print(f"Generate Features | Indices: {X_eval.index[:5]}... | Shape: {X_eval.shape}")
print(f"Generate Labels | Indices: {y_eval.index[:5]}... | Shape: {y_eval.shape}")
print(y_eval.value_counts())

-- Preprocessing data --
--- Splitting labels and features ---
--- Encoding labels as binary one-hot values ---
--- Sampling balanced data ---
Sample to shape: (1000, 40)
--- Normalizing features using MinMaxScaler ---
Generate Features | Indices: Index([68520, 129860, 118208, 50067, 32481], dtype='int64')... | Shape: (1000, 40)
Generate Labels | Indices: Index([68520, 129860, 118208, 50067, 32481], dtype='int64')... | Shape: (1000, 2)
BENIGN  ATTACK
False   True      500
True    False     500
Name: count, dtype: int64


### Load Detector

In [None]:
# import functions.detector as det
# importlib.reload(det)
# detector = det.load('ny')

# explainer = exp.generate_shap_explainer(ids_model, X_train)

# all_features = dataset.drop(columns=[' Label'])
# art_model = ag.convert_to_art_model(ids_model, X_train) # TODO: use all features for generating art model

In [215]:
import functions.attack_generator as ag
importlib.reload(ag)

X_adv_eval = ag.generate_cw_attacks_parallel(art_model, X_eval, target_label=1, num_cores=num_cores)
print(f"Create Adversarial Attack | Indices: {X_adv_eval.index[:5]}... | Shape: {X_adv_eval.shape}")

y_pred_adv_eval = ag.evaluate_art_model(art_model, X_adv_eval, y_eval)
print(f"Predictions on Adversarial Attacks | Indices: {y_pred_adv_eval.index[:5]}... | Shape: {y_pred_adv_eval.shape}")

Running attack using 24 CPU cores...

Process 204772 is generating adversarial examples for batch of size 41 
Process 204775 is generating adversarial examples for batch of size 41 
Process 204770 is generating adversarial examples for batch of size 41 
Process 204774 is generating adversarial examples for batch of size 41 
Process 204771 is generating adversarial examples for batch of size 41 
Process 204773 is generating adversarial examples for batch of size 41 
Process 204776 is generating adversarial examples for batch of size 41 
Process 204778 is generating adversarial examples for batch of size 41 
Process 204777 is generating adversarial examples for batch of size 41 
Process 204779 is generating adversarial examples for batch of size 41 
Process 204781 is generating adversarial examples for batch of size 41 
Process 204780 is generating adversarial examples for batch of size 41 
Process 204783 is generating adversarial examples for batch of size 41 
Process 204785 is generati

C&W L_2:   0%|          | 0/41 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/41 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/41 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/41 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/41 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/41 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/41 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/41 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/41 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/41 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/41 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/41 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/41 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/41 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/41 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/41 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/41 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/41 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/41 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/41 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/41 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/41 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/41 [00:00<?, ?it/s]

C&W L_2:   0%|          | 0/57 [00:00<?, ?it/s]

Create Adversarial Attack | Indices: Index([68520, 129860, 118208, 50067, 32481], dtype='int64')... | Shape: (1000, 40)
Accuracy: 90.80%
              precision    recall  f1-score   support

      ATTACK     0.9976    0.8180    0.8989       500
      BENIGN     0.8458    0.9980    0.9156       500

    accuracy                         0.9080      1000
   macro avg     0.9217    0.9080    0.9072      1000
weighted avg     0.9217    0.9080    0.9072      1000

Confusion Matrix: Positive == BENIGN
TN: 409, FP: 91, FN: 1, TP: 499
Predictions on Adversarial Attacks | Indices: Index([68520, 129860, 118208, 50067, 32481], dtype='int64')... | Shape: (1000, 2)


In [216]:
import functions.explainer as exp
importlib.reload(exp)
X_eval_adv_shap_values_df = exp.generate_shap_values(explainer, X_adv_eval)

print(f"Create Explanations | Indices: {X_eval_adv_shap_values_df.index[:5]}... | Shape: {X_eval_adv_shap_values_df.shape}")

PermutationExplainer explainer: 1001it [00:27, 23.58it/s]                         

Create Explanations | Indices: Index([68520, 129860, 118208, 50067, 32481], dtype='int64')... | Shape: (1000, 40)





In [32]:
# # normal shap values

# y_pred_adv_eval = ag.evaluate_art_model(art_model, X_eval, y_eval)
# print(f"Predictions on Adversarial Attacks | Indices: {y_pred_adv_eval.index[:5]}... | Shape: {y_pred_adv_eval.shape}")

# X_eval_adv_shap_values_df = exp.generate_shap_values(explainer, X_eval)

# print(f"Create Explanations | Indices: {X_eval_adv_shap_values_df.index[:5]}... | Shape: {X_eval_adv_shap_values_df.shape}")

In [219]:
benign_eval_idx = y_eval[y_eval['BENIGN'] == 1].index
attack_eval_idx = y_eval[y_eval['ATTACK'] == 1].index

pred_benign_idx = y_pred_adv_eval[y_pred_adv_eval['BENIGN'] == 1].index
pred_attack_idx = y_pred_adv_eval[y_pred_adv_eval['ATTACK'] == 1].index

# predict
X_eval_detector = X_eval_adv_shap_values_df.loc[pred_benign_idx]

# TODO: uncommend want to find attacks
# print(f" All Adversarial Samples classified as BENIGN: {X_eval_detector.shape}")
# misclassified_idx = attack_eval_idx.intersection(pred_benign_idx)
# X_eval_detector = X_eval_detector.loc[misclassified_idx]
# print(f" Attack Samples misclassified through Adversarial Attack: {X_eval_detector.shape}")

columns = ['ADV CORRECT BENIGN', 'ADV MISCLASSIFIED', 'CW MISCLASSIFIED', 'JSMA MISCLASSIFIED', 'CORRECT BENIGN']
# columns = ['ADV CORRECT BENIGN', 'ADV MISCLASSIFIED', 'CORRECT BENIGN']
y_pred_eval_detector = det.predict(detector, X_eval_detector, columns)

# correctly_classified_det_idx = y_pred_eval_detector[y_pred_eval_detector['BENIGN'] == 1].index
# misclassified_det_idx = y_pred_eval_detector[y_pred_eval_detector['ATTACK'] == 1].index

[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


2025-04-10 16:22:20.291858: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_14}}


In [220]:
pred_class_0 = y_pred_eval_detector[y_pred_eval_detector['ADV CORRECT BENIGN'] == 1].index 
pred_class_1 = y_pred_eval_detector[y_pred_eval_detector['ADV MISCLASSIFIED'] == 1].index
pred_class_2 = y_pred_eval_detector[y_pred_eval_detector['CW MISCLASSIFIED'] == 1].index
pred_class_3 = y_pred_eval_detector[y_pred_eval_detector['JSMA MISCLASSIFIED'] == 1].index
pred_class_4 = y_pred_eval_detector[y_pred_eval_detector['CORRECT BENIGN'] == 1].index
print(f"Predicted Class 0: {len(pred_class_0)}")
print(f"Predicted Class 1: {len(pred_class_1)}")
print(f"Predicted Class 2: {len(pred_class_2)}")
print(f"Predicted Class 3: {len(pred_class_3)}")
print(f"Predicted Class 4: {len(pred_class_4)}") # TODO: .intersection(benign_eval_idx)
# TODO: 1 sample missing for normal BENIGN samples

Predicted Class 0: 0
Predicted Class 1: 0
Predicted Class 2: 81
Predicted Class 3: 4
Predicted Class 4: 504


In [221]:
# After IDS Stage
TN = len(attack_eval_idx.intersection(pred_attack_idx)) # IDS classifies 'ATTACK' samples as 'ATTACK'
print(f"IDS classifies 'ATTACK' samples as 'ATTACK': {TN}")
FN = len(benign_eval_idx.intersection(pred_attack_idx)) # IDS classifies 'BENIGN' samples as 'ATTACK'
print(f"IDS mis-classifies 'BENIGN' samples as 'ATTACK': {FN}")

# TODO: define correct and misclassified classes for each attack:
correctly_classified_det_idx = y_pred_eval_detector.loc[pred_class_4].index # Detector classifies 'BENIGN' samples as correct 'BENIGN'
misclassified_det_idx = y_pred_eval_detector.loc[pred_class_2].index # Detector classifies 'ATTACK' samples as misclassified due to 'ATTACK'

# After Detector Stage
TP = len(benign_eval_idx.intersection(correctly_classified_det_idx)) # Detector classifies 'BENIGN' samples as correct 'BENIGN'
print(f"Detector classifies 'BENIGN' samples as correct 'BENIGN': {TP}")
FP = len(attack_eval_idx.intersection(correctly_classified_det_idx)) # Detector classifies 'ATTACK' samples as correct 'BENIGN'
print(f"Detector mis-classifies 'ATTACK' samples as correct 'BENIGN': {FP}")

TN_2 = len(attack_eval_idx.intersection(misclassified_det_idx)) # Detector classifies 'ATTACK' samples as misclassified due to 'ATTACK'
print(f"Detector classifies 'ATTACK' samples as misclassified due to 'ATTACK': {TN_2}")
FN_2 = len(benign_eval_idx.intersection(misclassified_det_idx)) # Detector classifies 'BENIGN' samples as misclassified due to 'ATTACK'
print(f"Detector classifies 'BENIGN' samples as misclassified due to 'ATTACK': {FN_2}")

# Sum up TN & FN from both stages
TN = TN + TN_2
FN = FN + FN_2

print(f"TP: {TP}")
print(f"FP: {FP}")
print(f"TN: {TN}")
print(f"FN: {FN}")
print(f"Sum: {TP + FP + TN + FN}")

IDS classifies 'ATTACK' samples as 'ATTACK': 409
IDS mis-classifies 'BENIGN' samples as 'ATTACK': 1
Detector classifies 'BENIGN' samples as correct 'BENIGN': 485
Detector mis-classifies 'ATTACK' samples as correct 'BENIGN': 19
Detector classifies 'ATTACK' samples as misclassified due to 'ATTACK': 68
Detector classifies 'BENIGN' samples as misclassified due to 'ATTACK': 13
TP: 485
FP: 19
TN: 477
FN: 14
Sum: 995


In [222]:
from sklearn.metrics import classification_report

print(f"Global Accuracy: {(TP + TN) / (TP + FP + TN + FN) * 100:.2f}%")

# Construct a fake y_true and y_pred to match sklearn's classification_report format
y_true = np.array([1] * TP + [0] * TN + [1] * FN + [0] * FP)  # True labels
y_pred = np.array([1] * TP + [0] * TN + [0] * FN + [1] * FP)  # Predicted labels

# Generate classification report
report = classification_report(y_true, y_pred, target_names=['ATTACK', 'BENIGN'], digits=4) # reverse labels because classification_report assumes first label is 0
print(report)

print(f"True Negative Rate: {TN/(TN+FP)*100:.2f}%")
print(f"False Positive Rate: {FP/(TN+FP)*100:.2f}%")
print(f"True Positive Rate: {TP/(TP+FN)*100:.2f}%")
print(f"False Negative Rate: {FN/(TP+FN)*100:.2f}%")

Global Accuracy: 96.68%
              precision    recall  f1-score   support

      ATTACK     0.9715    0.9617    0.9666       496
      BENIGN     0.9623    0.9719    0.9671       499

    accuracy                         0.9668       995
   macro avg     0.9669    0.9668    0.9668       995
weighted avg     0.9669    0.9668    0.9668       995

True Negative Rate: 96.17%
False Positive Rate: 3.83%
True Positive Rate: 97.19%
False Negative Rate: 2.81%


In [36]:
sample_indices = y_pred_eval_detector.index
print(f"#Attack-Samples: {len(sample_indices)}")

detected_indices = np.unique(np.concatenate((pred_class_0, pred_class_1)))
print(f"Predicted indices: {len(detected_indices)}")

correct_benign_pred_indices = np.intersect1d(sample_indices, detected_indices)
print(f"Predicted Normal indices: {len(correct_benign_pred_indices)}")

normal_benign_misclassified_indices = np.setdiff1d(sample_indices, detected_indices)
print(f"Normal Misclassified indices: {len(normal_benign_misclassified_indices)}")

print(f"ADV Detection Rate: {len(correct_benign_pred_indices) / len(sample_indices):.4f}")
print(f"Misclassification Rate: {len(normal_benign_misclassified_indices) / len(sample_indices):.4f}")

#Attack-Samples: 1000
Predicted indices: 1000
Predicted Normal indices: 1000
Normal Misclassified indices: 0
ADV Detection Rate: 1.0000
Misclassification Rate: 0.0000
