# Adversarial Attack Generator

Based on: https://github.com/Trusted-AI/adversarial-robustness-toolbox/wiki/Get-Started#setup </p>
- Docs: https://adversarial-robustness-toolbox.readthedocs.io/en/latest/index.html


---

## Import Data

In [18]:
# # load data
# import pandas as pd

# # load dataset
# df = pd.read_csv('../data/preprocessed/binary_min_max_combined.csv')
# df.shape
# df.head()

Unnamed: 0,Destination Port,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,...,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,0.837186,1.333333e-07,5e-06,0.0,9.302326e-07,0.0,0.000242,0.002581,0.00101,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,0.84007,1.016667e-06,0.0,3e-06,4.651163e-07,9.153974e-09,0.000242,0.002581,0.00101,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
2,0.840085,5.416666e-07,0.0,3e-06,4.651163e-07,9.153974e-09,0.000242,0.002581,0.00101,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3,0.705516,3.916666e-07,0.0,3e-06,4.651163e-07,9.153974e-09,0.000242,0.002581,0.00101,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
4,0.837156,1.333333e-07,5e-06,0.0,9.302326e-07,0.0,0.000242,0.002581,0.00101,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0


In [24]:
# # create smaller dataset
# small_df = df.sample(frac=0.0003, random_state=10)
# small_df.shape

(848, 71)

In [26]:
# # split data
# from sklearn.model_selection import train_test_split

# X = small_df.drop(columns=[' Label'])
# y = small_df[' Label']

# y = pd.get_dummies(y)
# y.columns = ["BENIGN", "MALICIOUS"]
# print(y[:2])

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
# print(X_train.shape, X_test.shape)

         BENIGN  MALICIOUS
468003    False       True
1239319    True      False
(636, 70) (212, 70)


---

## Load Data Split

In [1]:
import pandas as pd

X_train = pd.read_csv("../data/train_test_split/X_train_small.csv")
X_test = pd.read_csv("../data/train_test_split/X_test_small.csv")
y_train = pd.read_csv("../data/train_test_split/y_train_small.csv")
y_test = pd.read_csv("../data/train_test_split/y_test_small.csv")

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(2000, 70) (500, 70) (2000, 2) (500, 2)


## Import Model

In [9]:
# load model
from tensorflow import keras

model = keras.models.load_model('../models/ids_dnn_small.keras')
model.summary()

In [8]:
# create C&W attack
from art.attacks.evasion import CarliniL2Method
from art.estimators.classification import TensorFlowV2Classifier
import tensorflow as tf
import numpy as np

# Define loss function
loss_object = keras.losses.BinaryCrossentropy()
optimizer = keras.optimizers.Adam(learning_rate=0.001)
input_dim = X_train.shape[1] 

@tf.function
def custom_train_step(model, x_batch, y_batch):
    with tf.GradientTape() as tape:
        predictions = model(x_batch, training=True)
        loss = loss_object(y_batch, predictions)
    
    # Compute and apply gradients
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    
    return loss

# KerasClassifier uses tf.keras.backend.placeholder, which has been removed in TensorFlow 2.10+.so we need to use TensorFlowV2Classifier
classifier = TensorFlowV2Classifier(
    model=model,
    nb_classes=2,  # Binary classification (0 or 1)
    input_shape=(input_dim,),  # Input shape
    clip_values=(0, 1), # because of the min-max normalization
    optimizer=optimizer, 
    loss_object=loss_object,
    train_step=custom_train_step  # Use default training function
)

# print accuracy
from sklearn.metrics import classification_report, accuracy_score

y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)

print(classification_report(y_test, y_pred, target_names=['BENIGN', 'ATTACK']))
print(f"Accuracy : {accuracy_score(y_test, y_pred)}")

              precision    recall  f1-score   support

      BENIGN       0.94      0.94      0.94       416
      ATTACK       0.70      0.68      0.69        84

   micro avg       0.90      0.90      0.90       500
   macro avg       0.82      0.81      0.81       500
weighted avg       0.90      0.90      0.90       500
 samples avg       0.90      0.90      0.90       500

Accuracy : 0.898


In [5]:
from art.attacks.evasion import CarliniL2Method

# Create the C&W attack (non-targeted)
attack = CarliniL2Method(classifier=classifier, confidence=0.0, targeted=False)

# convert to numpy
X_test_np = X_test.to_numpy()

# Generate adversarial examples on the test set
X_test_adv = attack.generate(x=X_test_np)

# Let's print the shape of the adversarial examples
print(f'Adversarial examples generated. Shape: {X_test_adv.shape}')


C&W L_2:   0%|          | 0/500 [00:00<?, ?it/s]

Adversarial examples generated. Shape: (500, 70)


In [6]:
# Evaluate the model on adversarial examples
loss_adv, accuracy_adv = model.evaluate(X_test_adv, y_test)
print(f'Accuracy on adversarial examples: {accuracy_adv * 100:.2f}%')

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8177 - loss: 0.2803  
Accuracy on adversarial examples: 79.60%
