# Adversarial Attack Generator

Based on: https://github.com/Trusted-AI/adversarial-robustness-toolbox/wiki/Get-Started#setup </p>
- Docs: https://adversarial-robustness-toolbox.readthedocs.io/en/latest/index.html


---

## Load Data Split

In [1]:
import pandas as pd

X_train = pd.read_csv("../../CICIDS2017/train_test_split/X_train_poc.csv")
X_test = pd.read_csv("../../CICIDS2017/train_test_split/X_test_poc.csv")
y_train = pd.read_csv("../../CICIDS2017/train_test_split/y_train_poc.csv")
y_test = pd.read_csv("../../CICIDS2017/train_test_split/y_test_poc.csv")

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(192037, 68) (64013, 68) (192037, 2) (64013, 2)


In [2]:
# POC: create smaller dataset
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
X_train = X_train.iloc[:50000]
y_train = y_train.iloc[:50000]
X_test = X_test.iloc[:2500]
y_test = y_test.iloc[:2500]
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(192037, 68) (64013, 68) (192037, 2) (64013, 2)
(50000, 68) (2500, 68) (50000, 2) (2500, 2)


## Import Model

In [3]:
# load model
from tensorflow import keras

model = keras.models.load_model('../../CICIDS2017/models/ids_dnn_poc.keras')
model.summary()

## Convert Model

In [4]:
# convert model to ART -> needed for adversarial attacks
from art.estimators.classification import TensorFlowV2Classifier
import tensorflow as tf

# Define loss function
loss_object = keras.losses.BinaryCrossentropy()
optimizer = keras.optimizers.Adam(learning_rate=0.001)
input_dim = X_train.shape[1] 

@tf.function
def custom_train_step(model, x_batch, y_batch):
    with tf.GradientTape() as tape:
        predictions = model(x_batch, training=True)
        loss = loss_object(y_batch, predictions)
    
    # Compute and apply gradients
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    
    return loss

# KerasClassifier uses tf.keras.backend.placeholder, which has been removed in TensorFlow 2.10+.so we need to use TensorFlowV2Classifier
classifier = TensorFlowV2Classifier(
    model=model,
    nb_classes=2,  # Binary classification (0 or 1)
    input_shape=(input_dim,),  # Input shape
    clip_values=(0, 1), # because of the min-max normalization
    optimizer=optimizer, 
    loss_object=loss_object,
    train_step=custom_train_step  # Use default training function
)

# print accuracy
from sklearn.metrics import classification_report, accuracy_score

y_pred = classifier.predict(X_train)
y_pred = (y_pred > 0.5)

print(classification_report(y_train, y_pred, target_names=['BENIGN', 'ATTACK']))
print(f"Accuracy : {accuracy_score(y_train, y_pred)*100:.2f}%")

              precision    recall  f1-score   support

      BENIGN       0.99      1.00      1.00     96115
      ATTACK       1.00      0.99      1.00     95922

   micro avg       1.00      1.00      1.00    192037
   macro avg       1.00      1.00      1.00    192037
weighted avg       1.00      1.00      1.00    192037
 samples avg       1.00      1.00      1.00    192037

Accuracy : 99.64%


## Generate Attacks

In [7]:
# convert to numpy -> needed for adversarial attacks
X_test_np = X_test.to_numpy()
X_train_np = X_train.to_numpy()

### Carlini & Wagner Attack - White Box

In [6]:
from art.attacks.evasion import CarliniL2Method

# Create the C&W attack (non-targeted)
attack_cw = CarliniL2Method(classifier=classifier, confidence=0.0, targeted=False)

# Generate adversarial examples on the test set
X_test_adv_cw = attack_cw.generate(x=X_test_np)
print(f'Adversarial C&W examples generated. Shape: {X_test_adv_cw.shape}')

C&W L_2:   0%|          | 0/2500 [00:00<?, ?it/s]

Adversarial C&W examples generated. Shape: (2500, 68)


### FGSM Attack - White Box

In [None]:
from art.attacks.evasion import FastGradientMethod

# Create FGSM attack
attack_fgsm = FastGradientMethod(estimator=classifier, eps=0.1)  # ε tune this for stronger/weaker attacks: 0.01 weak, 0.1 balanced, 0.3-0.5 strong, 1 very strong
# the higher the epsilon, the easier it will be detected

# Generate adversarial examples
X_train_adv_fgsm = attack_fgsm.generate(x=X_train_np)
print(f'Adversarial FGSM examples generated. Shape: {X_train_adv_fgsm.shape}')

Adversarial FGSM examples generated. Shape: (50000, 68)


### HopSkipJumpAttack - Black Box

In [25]:
from art.attacks.evasion import HopSkipJump
import numpy as np

# Create HopSkipJump attack
attack_hop_skip_jump = HopSkipJump(classifier=classifier, targeted=False, norm=2)

# Generate adversarial examples
X_test_adv_hop_skip_jump = attack_hop_skip_jump.generate(x=X_test_np)
print(f'Adversarial HopSkipJump examples generated. Shape: {X_test_adv_hop_skip_jump.shape}')

[[1. 0.]
 [1. 0.]]


HopSkipJump:   0%|          | 0/2000 [00:00<?, ?it/s]

Adversarial HopSkipJump examples generated. Shape: (2000, 70)


## Evaluate Attacks

In [None]:
# check for negative values in attacks
print(np.any(X_test_adv_cw < 0))
print(np.any(X_train_adv_fgsm < 0))
print(np.any(X_test_adv_hop_skip_jump < 0))

False
False


In [None]:
# Evaluate the model on adversarial examples
loss, accuracy = model.evaluate(X_test_np, y_test)
print(f'Accuracy on clean examples: {accuracy * 100:.2f}%')

# loss_adv, accuracy_adv = model.evaluate(X_test_adv_cw, y_test)
# print(f'Accuracy on C&W attack: {accuracy_adv * 100:.2f}%')

loss_adv_fgsm, accuracy_adv_fgsm = model.evaluate(X_train_adv_fgsm, y_train)
print(f'Accuracy on fgsm attack: {accuracy_adv_fgsm * 100:.2f}%')

# loss_adv_hop_skip_jump, accuracy_adv_hop_skip_jump = model.evaluate(X_test_adv_hop_skip_jump, y_test)
# print(f'Accuracy on hop skip jump attack: {accuracy_adv_hop_skip_jump * 100:.2f}%')

[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9974 - loss: 0.0064
Accuracy on clean examples: 99.72%
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.2759 - loss: 47.7132
Accuracy on fgsm attack: 27.77%


In [8]:
from sklearn.metrics import classification_report

y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5)
y_pred_adv_cw = model.predict(X_test_adv_cw)
y_pred_adv_cw = (y_pred_adv_cw > 0.5)
# y_pred_adv_fgsm = model.predict(X_test_adv_fgsm)
# y_pred_adv_fgsm = (y_pred_adv_fgsm > 0.5)
# y_pred_adv_hop_skip_jump = model.predict(X_test_adv_hop_skip_jump)
# y_pred_adv_hop_skip_jump = (y_pred_adv_hop_skip_jump > 0.5)

print("Normal Classification Report:")
print(classification_report(y_test, y_pred, target_names=['BENIGN', 'ATTACK']))
print("C&W Classification Report:")
print(classification_report(y_test, y_pred_adv_cw, target_names=['BENIGN', 'ATTACK']))
# print("FGSM Classification Report:")
# print(classification_report(y_test, y_pred_adv_fgsm, target_names=['BENIGN', 'ATTACK']))
# print("Hop Skip Jump Classification Report:")
# print(classification_report(y_test, y_pred_adv_hop_skip_jump, target_names=['BENIGN', 'ATTACK']))


[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step  
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 746us/step
Normal Classification Report:
              precision    recall  f1-score   support

      BENIGN       0.99      1.00      1.00      1260
      ATTACK       1.00      0.99      1.00      1240

   micro avg       1.00      1.00      1.00      2500
   macro avg       1.00      1.00      1.00      2500
weighted avg       1.00      1.00      1.00      2500
 samples avg       1.00      1.00      1.00      2500

C&W Classification Report:
              precision    recall  f1-score   support

      BENIGN       0.74      0.97      0.84      1260
      ATTACK       0.96      0.65      0.78      1240

   micro avg       0.82      0.82      0.82      2500
   macro avg       0.85      0.81      0.81      2500
weighted avg       0.85      0.82      0.81      2500
 samples avg       0.82      0.82      0.82      2500



In [9]:
# show accuracy, precision, recall, f1-score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import numpy as np

# convert to binary 
y_test_binary = np.array(y_test).argmin(axis=1)
y_pred_binary = np.array(y_pred).argmin(axis=1)
y_pred_adv_cw_binary = np.array(y_pred_adv_cw).argmin(axis=1)
# y_pred_adv_fgsm_binary = np.array(y_pred_adv_fgsm).argmin(axis=1)
# y_pred_adv_hop_skip_jump_binary = np.array(y_pred_adv_hop_skip_jump).argmin(axis=1)

print("Normal:")
print(f"Accuracy: {accuracy_score(y_test_binary, y_pred_binary)*100:.2f}%")
print(f"Precision: {precision_score(y_test_binary, y_pred_binary)*100:.2f}%")
print(f"Recall: {recall_score(y_test_binary, y_pred_binary)*100:.2f}%")
print(f"F1-Score: {f1_score(y_test_binary, y_pred_binary)*100:.2f}%")

print("C&W:")
print(f"Accuracy: {accuracy_score(y_test_binary, y_pred_adv_cw_binary)*100:.2f}%")
print(f"Precision: {precision_score(y_test_binary, y_pred_adv_cw_binary)*100:.2f}%")
print(f"Recall: {recall_score(y_test_binary, y_pred_adv_cw_binary)*100:.2f}%")
print(f"F1-Score: {f1_score(y_test_binary, y_pred_adv_cw_binary)*100:.2f}%")

# print("FGSM:")
# print(f"Accuracy: {accuracy_score(y_test_binary, y_pred_adv_fgsm_binary)*100:.2f}%")
# print(f"Precision: {precision_score(y_test_binary, y_pred_adv_fgsm_binary)*100:.2f}%")
# print(f"Recall: {recall_score(y_test_binary, y_pred_adv_fgsm_binary)*100:.2f}%")
# print(f"F1-Score: {f1_score(y_test_binary, y_pred_adv_fgsm_binary)*100:.2f}%")

# print("Hop Skip Jump:")
# print(f"Accuracy: {accuracy_score(y_test_binary, y_pred_adv_hop_skip_jump_binary)*100:.2f}%")
# print(f"Precision: {precision_score(y_test_binary, y_pred_adv_hop_skip_jump_binary)*100:.2f}%")
# print(f"Recall: {recall_score(y_test_binary, y_pred_adv_hop_skip_jump_binary)*100:.2f}%")
# print(f"F1-Score: {f1_score(y_test_binary, y_pred_adv_hop_skip_jump_binary)*100:.2f}%")

# print tp, tn, fp, fn
print("Confusion Matrix - C&W Attack:")
tn, fp, fn, tp = confusion_matrix(y_test_binary, y_pred_adv_cw_binary).ravel()
print(f"TN: {tn}, FP: {fp}, FN: {fn}, TP: {tp}")

Normal Confusion Matrix:
TN: 812, FP: 428, FN: 33, TP: 1227
Normal:
Accuracy: 99.72%
Precision: 99.45%
Recall: 100.00%
F1-Score: 99.72%
C&W:
Accuracy: 81.56%
Precision: 74.14%
Recall: 97.38%
F1-Score: 84.19%


## Store Adversarial Instances

In [None]:
# adv_cw_df = pd.DataFrame(X_test_adv_cw)
# # set column names
# adv_cw_df.columns = X_test.columns

adv_fgsm_df = pd.DataFrame(X_train_adv_fgsm)
# set column names
adv_fgsm_df.columns = X_train.columns

# adv_hop_skip_jump_df = pd.DataFrame(X_test_adv_hop_skip_jump)
# # set column names
# adv_hop_skip_jump_df.columns = X_test.columns

norm_df = pd.DataFrame(X_test_np)

In [None]:
# store the adversarial examples
# adv_cw_df.to_csv("../../CICIDS2017/adversarial_samples/X_test_adv_cw_poc.csv", index=False)
adv_fgsm_df.to_csv("../../CICIDS2017/adversarial_samples/X_train_adv_fgsm_poc_50000.csv", index=False)
# adv_hop_skip_jump_df.to_csv("../../CICIDS2017/adversarial_samples/X_test_adv_hsj_proto.csv", index=False)

## Compare Data

In [49]:
#print(adv_cw_df.head(2))
print(adv_fgsm_df.head(2))
# print(adv_hop_skip_jump_df.head(2))
print(norm_df.head(2))

    Destination Port   Flow Duration   Total Fwd Packets  \
0           0.501221        0.528566                 0.0   
1           0.302768        0.000000                 0.5   

    Total Backward Packets  Total Length of Fwd Packets  \
0                      0.5                     0.500129   
1                      0.0                     0.000000   

    Total Length of Bwd Packets   Fwd Packet Length Max  \
0                           0.5                0.500431   
1                           0.0                0.000000   

    Fwd Packet Length Min   Fwd Packet Length Mean   Fwd Packet Length Std  \
0                0.504076                 0.501552                     0.5   
1                0.000000                 0.000000                     0.0   

   ...   act_data_pkt_fwd   min_seg_size_forward  Active Mean   Active Std  \
0  ...                0.0               0.884615          0.5          0.5   
1  ...                0.5               0.115385          0.0          0