# Prototype - Adversarial Detector
---

## Load Required Model & Data

In [3]:
# load model
from tensorflow import keras

model = keras.models.load_model('../CICIDS2017/models/ids_dnn.keras')
model.summary()

In [2]:
# load data split
import pandas as pd

X_test = pd.read_csv("../CICIDS2017/train_test_split/X_test_proto.csv")
y_test = pd.read_csv("../CICIDS2017/train_test_split/y_test_proto.csv")

print(X_test.shape, y_test.shape)


(2000, 70) (2000, 2)


In [4]:
# load adversarail samples

X_test_adv_fgsm = pd.read_csv("../CICIDS2017/adversarial_samples/X_test_adv_fgsm_proto.csv")
X_test_adv_hsj = pd.read_csv("../CICIDS2017/adversarial_samples/X_test_adv_hsj_proto.csv")

In [5]:
# load explanations

shap_values_df = pd.read_csv("../CICIDS2017/shap_values/shap_values_proto.csv")
print(shap_values_df.shape)
adv_shap_values_fgsm_df = pd.read_csv("../CICIDS2017/shap_values/adv_shap_values_fgsm_proto.csv")
print(adv_shap_values_fgsm_df.shape)
adv_shap_values_hsj_df = pd.read_csv("../CICIDS2017/shap_values/adv_shap_values_hsj_proto.csv")
print(adv_shap_values_hsj_df.shape)

(2000, 70)
(2000, 70)
(2000, 70)


---
## First Stage

### Normal Prediction

In [6]:
y_pred = model.predict(X_test)
y_pred_adv_fgsm = model.predict(X_test_adv_fgsm)
y_pred_adv_hsj = model.predict(X_test_adv_hsj)

# evaluate model
from sklearn.metrics import accuracy_score

print(f"Accuracy on test set: {accuracy_score(y_test, y_pred.round())*100:.2f}%")
print(f"Accuracy on adversarial samples (FGSM): {accuracy_score(y_test, y_pred_adv_fgsm.round())*100:.2f}%")
print(f"Accuracy on adversarial samples (HSJ): {accuracy_score(y_test, y_pred_adv_hsj.round())*100:.2f}%")

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 713us/step
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 913us/step
Accuracy on test set: 98.20%
Accuracy on adversarial samples (FGSM): 76.55%
Accuracy on adversarial samples (HSJ): 80.95%


### Find Benign Explanations

In [7]:
import numpy as np

# convert to binary, 1 == benign, 0 == malicious -> easier to work on
y_test_binary = np.array(y_test).argmin(axis=1)
y_pred_binary = y_pred.argmin(axis=1)
adv_fgsm_pred_binary = y_pred_adv_fgsm.argmin(axis=1)
adv_hsj_pred_binary = y_pred_adv_hsj.argmin(axis=1)

In [8]:
print(f"Correct Labels:                           {y_test_binary[:10]}")

# find indices of correctly classified benign samples
correct_benign_classified_indices = np.where((y_test_binary == 1) & (y_pred_binary == 1))[0]
print(f"Predicted Labels:                         {y_pred_binary[:10]}")
print(f"Correctly classified benign samples:      {correct_benign_classified_indices[:10]}")

# find indices of adversarial samples that were classified as benign
# FGSM
adv_fgsm_benign_indices = np.where(adv_fgsm_pred_binary == 1)[0]
print(f"Predicted Labels (FGSM):                  {adv_fgsm_pred_binary[:10]}")
print(f"Adversarial samples classified as benign: {adv_fgsm_benign_indices[:10]}")
# HSJ
adv_hsj_benign_indices = np.where(adv_hsj_pred_binary == 1)[0]
print(f"Predicted Labels (HSJ):                   {adv_hsj_pred_binary[:10]}")
print(f"Adversarial samples classified as benign: {adv_hsj_benign_indices[:10]}")

Correct Labels:                           [1 1 1 1 1 1 1 1 1 0]
Predicted Labels:                         [1 1 1 1 1 1 1 1 1 0]
Correctly classified benign samples:      [ 0  1  2  3  4  5  6  7  8 10]
Predicted Labels (FGSM):                  [1 1 1 1 1 0 1 1 1 1]
Adversarial samples classified as benign: [ 0  1  2  3  4  6  7  8  9 10]
Predicted Labels (HSJ):                   [1 1 1 1 1 1 1 1 1 1]
Adversarial samples classified as benign: [0 1 2 3 4 5 6 7 8 9]


---
## Build Detector

### Build Dataframe

In [9]:
# get shap values for correctly classified benign samples
print("Correctly classified benign samples:")
shap_values_benign_df = shap_values_df.iloc[correct_benign_classified_indices]
print(shap_values_benign_df.shape)

# get shap values for adversarial samples classified as benign
# FGSM
print("Adversarial samples classified as benign (FGSM):")
adv_shap_values_fgsm_benign_df = adv_shap_values_fgsm_df.iloc[adv_fgsm_benign_indices]
print(adv_shap_values_fgsm_benign_df.shape)
# HSJ
print("Adversarial samples classified as benign (HSJ):")
adv_shap_values_hsj_benign_df = adv_shap_values_hsj_df.iloc[adv_hsj_benign_indices]
print(adv_shap_values_hsj_benign_df.shape)

# build X
print("X:")
X = pd.concat([shap_values_benign_df, adv_shap_values_fgsm_benign_df, adv_shap_values_hsj_benign_df], axis=0)
print(X.shape)

# build y 
# normal: [1, 0], adv: [0, 1]
print("y:")
y_normal = np.array([[1, 0]] * shap_values_benign_df.shape[0])
y_adv = np.array([[0, 1]] * (adv_shap_values_fgsm_benign_df.shape[0] + adv_shap_values_hsj_benign_df.shape[0]))
print(y_normal.shape, y_adv.shape)
print(y_normal[0], y_adv[0])
y = np.concatenate([y_normal, y_adv], axis=0)
print(y.shape)

Correctly classified benign samples:
(1596, 70)
Adversarial samples classified as benign (FGSM):
(1894, 70)
Adversarial samples classified as benign (HSJ):
(2000, 70)
X:
(5490, 70)
y:
(1596, 2) (3894, 2)
[1 0] [0 1]
(5490, 2)


### Shuffle Data

In [10]:
# TODO: maybe not needed -> random_state in train-test-split
# shuffle both sets in the same way
from sklearn.utils import shuffle

X, y = shuffle(X, y, random_state=187)

### Split Train/Test Set

In [11]:
# spit data into train and test set
from sklearn.model_selection import train_test_split

X_train_detector, X_test_detector, y_train_detector, y_test_detector = train_test_split(X, y, test_size=0.25, random_state=187)
print(X_train_detector.shape, X_test_detector.shape, y_train_detector.shape, y_test_detector.shape)

(4117, 70) (1373, 70) (4117, 2) (1373, 2)


### Model Creation

In [13]:
# Create DNN model from tensorflow
import setuptools.dist # needed to avoid error
import tensorflow as tf
from tensorflow import keras

# keras model for handling one hot encoded labels -> needed for attack creation
model = keras.Sequential()
model.add(keras.layers.Dense(50, input_dim=X_train_detector.shape[1], activation='relu')) # hidden layer
model.add(keras.layers.Dense(30, activation='relu')) # hidden layer
model.add(keras.layers.Dense(10, activation='relu')) # hidden layer
model.add(keras.layers.Dense(y_train_detector.shape[1], activation='softmax'))  # Output layer with softmax for one-hot encoding

# set learning rate
opt = keras.optimizers.Adam(learning_rate=0.001)
# compile the keras model
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])

### Model Training

In [14]:
# fit the keras model on the dataset
model.fit(X_train_detector, y_train_detector, validation_data=(X_test_detector, y_test_detector), epochs=10, batch_size=100)

Epoch 1/10
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.7005 - loss: 0.6769 - val_accuracy: 0.6824 - val_loss: 0.6298
Epoch 2/10
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7180 - loss: 0.5960 - val_accuracy: 0.6824 - val_loss: 0.5909
Epoch 3/10
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7169 - loss: 0.5474 - val_accuracy: 0.6824 - val_loss: 0.5324
Epoch 4/10
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7229 - loss: 0.4747 - val_accuracy: 0.6846 - val_loss: 0.4217
Epoch 5/10
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7584 - loss: 0.3712 - val_accuracy: 0.9170 - val_loss: 0.3036
Epoch 6/10
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9358 - loss: 0.2589 - val_accuracy: 0.9621 - val_loss: 0.2154
Epoch 7/10
[1m42/42[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7f6510720ef0>

---
## Evaluation

In [15]:
y_pred_detector = model.predict(X_test_detector)
y_pred_detector = (y_pred_detector > 0.5)

y_test_detector_binary = np.array(y_test_detector).argmin(axis=1)
y_pred_detector_binary = y_pred_detector.argmin(axis=1)
print(y_test_detector_binary.shape, y_pred_detector_binary.shape)

[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
(1373,) (1373,)


In [16]:
# print accuracy, precision, recall and f1-score
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score


# print accuracy
print(f"Global Accuracy: {accuracy_score(y_test_detector, y_pred_detector)*100:.2f}%")


# precision, recall, f1-score
print(classification_report(y_test_detector, y_pred_detector, target_names=['NORMAL', 'ADVERSARIAL ATTACK']))

# print true positive rate, false positive rate, true negative rate, false negative rate
from sklearn.metrics import confusion_matrix

tn, fp, fn, tp = confusion_matrix(y_test_detector_binary, y_pred_detector_binary).ravel()
print(f"True Negative Rate: {tn/(tn+fp)*100:.2f}%")
print(f"False Positive Rate: {fp/(tn+fp)*100:.2f}%")
print(f"True Positive Rate: {tp/(tp+fn)*100:.2f}%")
print(f"False Negative Rate: {fn/(tp+fn)*100:.2f}%")

Global Accuracy: 99.64%
                    precision    recall  f1-score   support

            NORMAL       1.00      0.99      0.99       436
ADVERSARIAL ATTACK       1.00      1.00      1.00       937

         micro avg       1.00      1.00      1.00      1373
         macro avg       1.00      1.00      1.00      1373
      weighted avg       1.00      1.00      1.00      1373
       samples avg       1.00      1.00      1.00      1373

True Negative Rate: 99.79%
False Positive Rate: 0.21%
True Positive Rate: 99.31%
False Negative Rate: 0.69%


In [17]:
# show number of false positives and false negatives
print(f"False Positives: {fp}")
print(f"False Negatives: {fn}")


False Positives: 2
False Negatives: 3


In [18]:
# show false positive samples
fp_indices = np.where((y_test_detector_binary == 0) & (y_pred_detector_binary == 1))[0]
print(fp_indices)
# show false negative samples
fn_indices = np.where((y_test_detector_binary == 1) & (y_pred_detector_binary == 0))[0]
print(fn_indices)

[155 664]
[774 891 899]


---
## Explanations

In [None]:
import shap 

# init shap explainer
explainer = shap.Explainer(model, X_test_detector, feature_names=X_test_detector.columns)
shap_values = explainer(X_test_detector)

print(shap_values.shape)

PermutationExplainer explainer:  13%|█▎        | 172/1373 [00:13<01:18, 15.22it/s]

In [None]:
# convert shap values to correct format
print(shap_values.shape) # one shap value per feature per sample per class
shap_values = shap_values[:, :, 0] # 1 == Benign, 0 == Malicious
print(shap_values.shape) # one shap value per feature per sample