# Prototype - Adversarial Detector
---

## Load Required Model & Data

In [2]:
# load model
from tensorflow import keras

model = keras.models.load_model('../CICIDS2017/models/ids_dnn.keras')
model.summary()

In [3]:
# load data split
import pandas as pd

X_test = pd.read_csv("../CICIDS2017/train_test_split/X_test_small.csv")
y_test = pd.read_csv("../CICIDS2017/train_test_split/y_test_small.csv")

print(X_test.shape, y_test.shape)


(500, 70) (500, 2)


In [4]:
# load adversarail samples

X_test_adv_fgsm = pd.read_csv("../CICIDS2017/adversarial_samples/X_test_small_adv_fgsm.csv")

In [5]:
# load explanations

shap_values_df = pd.read_csv("../CICIDS2017/shap_values/shap_values_small.csv")
print(shap_values_df.shape)
adv_shap_values_fgsm_df = pd.read_csv("../CICIDS2017/shap_values/adv_shap_values_fgsm_small.csv")
print(adv_shap_values_fgsm_df.shape)

(500, 70)
(500, 70)


---
## First Stage

### Normal Prediction

In [6]:
y_pred = model.predict(X_test)
y_pred_adv_fgsm = model.predict(X_test_adv_fgsm)

# evaluate model
from sklearn.metrics import accuracy_score

print(f"Accuracy on test set: {accuracy_score(y_test, y_pred.round())*100}%")
print(f"Accuracy on adversarial samples (FGSM): {accuracy_score(y_test, y_pred_adv_fgsm.round())*100}%")

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Accuracy on test set: 98.0%
Accuracy on adversarial samples (FGSM): 72.6%


### Find Benign Explanations

In [7]:
import numpy as np

# convert to binary, 1 == benign, 0 == malicious -> easier to work on
y_test_binary = np.array(y_test).argmin(axis=1)
y_pred_binary = y_pred.argmin(axis=1)
adv_fgsm_pred_binary = y_pred_adv_fgsm.argmin(axis=1)

In [8]:
print(f"Correct Labels:                           {y_test_binary[:10]}")

# find indices of correctly classified benign samples
correct_benign_classified_indices = np.where((y_test_binary == 1) & (y_pred_binary == 1))[0]

print(f"Predicted Labels:                         {y_pred_binary[:10]}")
print(f"Correctly classified benign samples:      {correct_benign_classified_indices[:10]}")

# find indices of adversarial samples that were classified as benign
adv_fgsm_benign_indices = np.where(adv_fgsm_pred_binary == 1)[0]

print(f"Predicted Labels (FGSM):                  {adv_fgsm_pred_binary[:10]}")
print(f"Adversarial samples classified as benign: {adv_fgsm_benign_indices[:10]}")

Correct Labels:                           [1 1 1 1 1 1 1 1 1 0]
Predicted Labels:                         [1 1 1 1 1 1 1 1 1 0]
Correctly classified benign samples:      [ 0  1  2  3  4  5  6  7  8 10]
Predicted Labels (FGSM):                  [1 1 1 1 1 0 1 1 1 1]
Adversarial samples classified as benign: [ 0  1  2  3  4  6  7  8  9 10]


---
## Build Detector

### Build Dataframe

In [9]:
# get shap values for correctly classified benign samples
shap_values_benign_df = shap_values_df.iloc[correct_benign_classified_indices]
print(shap_values_benign_df.shape)

# get shap values for adversarial samples classified as benign
adv_shap_values_fgsm_benign_df = adv_shap_values_fgsm_df.iloc[adv_fgsm_benign_indices]
print(adv_shap_values_fgsm_benign_df.shape)

# build X
X = pd.concat([shap_values_benign_df, adv_shap_values_fgsm_benign_df], axis=0)
print(X.shape)

# build y 
# normal: [1, 0], adv: [0, 1]
y_normal = np.array([[1, 0]] * shap_values_benign_df.shape[0])
y_adv = np.array([[0, 1]] * adv_shap_values_fgsm_benign_df.shape[0])
print(y_normal.shape, y_adv.shape)
print(y_normal[0], y_adv[0])
y = np.concatenate([y_normal, y_adv], axis=0)
print(y.shape)

(409, 70)
(445, 70)
(854, 70)
(409, 2) (445, 2)
[1 0] [0 1]
(854, 2)


### Shuffle Data

In [10]:
# TODO: maybe not needed -> random_state in train-test-split
# shuffle both sets in the same way
from sklearn.utils import shuffle

X, y = shuffle(X, y, random_state=187)

### Split Train/Test Set

In [11]:
# spit data into train and test set
from sklearn.model_selection import train_test_split

X_train_detector, X_test_detector, y_train_detector, y_test_detector = train_test_split(X, y, test_size=0.25, random_state=187)
print(X_train_detector.shape, X_test_detector.shape, y_train_detector.shape, y_test_detector.shape)

(640, 70) (214, 70) (640, 2) (214, 2)


### Model Creation

In [None]:
# Create DNN model from tensorflow
import setuptools.dist # needed to avoid error
import tensorflow as tf
from tensorflow import keras

# keras model for handling one hot encoded labels -> needed for attack creation
model = keras.Sequential()
model.add(keras.layers.Dense(50, input_dim=X_train_detector.shape[1], activation='relu')) # hidden layer
model.add(keras.layers.Dense(30, activation='relu')) # hidden layer
model.add(keras.layers.Dense(10, activation='relu')) # hidden layer
model.add(keras.layers.Dense(y_train_detector.shape[1], activation='softmax'))  # Output layer with softmax for one-hot encoding

# set learning rate
opt = keras.optimizers.Adam(learning_rate=0.001)
# compile the keras model
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


### Model Training

In [14]:
# fit the keras model on the dataset
model.fit(X_train_detector, y_train_detector, validation_data=(X_test_detector, y_test_detector), epochs=10, batch_size=100)

Epoch 1/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 60ms/step - accuracy: 0.5170 - loss: 0.6933 - val_accuracy: 0.5794 - val_loss: 0.6905
Epoch 2/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.5503 - loss: 0.6897 - val_accuracy: 0.5794 - val_loss: 0.6867
Epoch 3/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.5563 - loss: 0.6856 - val_accuracy: 0.5794 - val_loss: 0.6821
Epoch 4/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.5572 - loss: 0.6813 - val_accuracy: 0.6028 - val_loss: 0.6767
Epoch 5/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.5681 - loss: 0.6763 - val_accuracy: 0.6262 - val_loss: 0.6703
Epoch 6/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.5606 - loss: 0.6710 - val_accuracy: 0.6308 - val_loss: 0.6621
Epoch 7/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7f9ff8070c80>

---
## Evaluation

In [16]:
y_pred_detector = model.predict(X_test_detector)
y_pred_detector = (y_pred_detector > 0.5)

y_test_detector_binary = np.array(y_test_detector).argmin(axis=1)
y_pred_detector_binary = y_pred_detector.argmin(axis=1)
print(y_test_detector_binary.shape, y_pred_detector_binary.shape)

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
(214,) (214,)


In [18]:
# print accuracy, precision, recall and f1-score
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score


# print accuracy
print(f"Global Accuracy: {accuracy_score(y_test_detector, y_pred_detector)*100:.2f}%")


# precision, recall, f1-score
print(classification_report(y_test_detector, y_pred_detector, target_names=['NORMAL', 'ADVERSARIAL ATTACK']))

# print true positive rate, false positive rate, true negative rate, false negative rate
from sklearn.metrics import confusion_matrix

tn, fp, fn, tp = confusion_matrix(y_test_detector_binary, y_pred_detector_binary).ravel()
print(f"True Negative Rate: {tn/(tn+fp)*100:.2f}%")
print(f"False Positive Rate: {fp/(tn+fp)*100:.2f}%")
print(f"True Positive Rate: {tp/(tp+fn)*100:.2f}%")
print(f"False Negative Rate: {fn/(tp+fn)*100:.2f}%")

Global Accuracy: 78.97%
                    precision    recall  f1-score   support

            NORMAL       0.91      0.60      0.72        97
ADVERSARIAL ATTACK       0.74      0.95      0.83       117

         micro avg       0.79      0.79      0.79       214
         macro avg       0.82      0.77      0.78       214
      weighted avg       0.82      0.79      0.78       214
       samples avg       0.79      0.79      0.79       214

True Negative Rate: 94.87%
False Positive Rate: 5.13%
True Positive Rate: 59.79%
False Negative Rate: 40.21%


In [19]:
# show number of false positives and false negatives
print(f"False Positives: {fp}")
print(f"False Negatives: {fn}")


False Positives: 6
False Negatives: 39


In [21]:
# show false positive samples
fp_indices = np.where((y_test_detector_binary == 0) & (y_pred_detector_binary == 1))[0]
print(fp_indices)
# show false negative samples
fn_indices = np.where((y_test_detector_binary == 1) & (y_pred_detector_binary == 0))[0]
print(fn_indices)

[ 22  27  34  61 150 203]
[  4   5   6   9  28  44  53  57  58  59  60  69  70  72  80  92  96  97
 108 119 121 129 140 155 159 167 168 180 182 189 193 194 195 196 197 198
 199 204 209]
