In [None]:
import pandas as pd
import numpy as np
import joblib
import warnings

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import to_categorical

from art.estimators.classification import TensorFlowV2Classifier
from art.attacks.evasion import CarliniL2Method

# Suppress warnings
warnings.filterwarnings('ignore')
print("All libraries imported successfully.")

  from .autonotebook import tqdm as notebook_tqdm


All libraries imported successfully.


In [None]:
DATA_PATH = 'CIC-Darknet2020.csv'
TARGET_LABELS = ['Tor', 'Non-Tor', 'VPN', 'NonVPN']

MODEL_PATH = 'model-multi.h5'
SCALER_PATH = 'scaler-multi.pkl'

CW_CONFIDENCE = 0.0
CW_MAX_ITER = 10
CW_BATCH_SIZE = 32

In [None]:
def load_multi_class_test_data():
    try:
        df = pd.read_csv(DATA_PATH)
    except FileNotFoundError:
        print(f"Error: '{DATA_PATH}' not found.")
        return None

    df.columns = [*df.columns[:-2], 'Label', 'Label_Type']
    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    df.dropna(inplace=True)
    df_multi = df[df['Label'].isin(TARGET_LABELS)].copy()

    non_feature_cols = ['Flow ID', 'Src IP', 'Src Port', 'Dst IP', 'Dst Port', 'Protocol', 'Timestamp', 'Label', 'Label_Type']
    X = df_multi.drop(columns=non_feature_cols).apply(pd.to_numeric)
    y = df_multi['Label']

    le = LabelEncoder()
    y_encoded = le.fit_transform(y)
    n_classes = len(le.classes_)
    target_names = le.classes_
    y_ohe = to_categorical(y_encoded, num_classes=n_classes)
    
    print("--- Class Encoding Mapping ---")
    for index, label in enumerate(le.classes_):
        print(f"Class Index {index} -> {label}")

    X_train, X_test, y_train_ohe, y_test_ohe, y_train_encoded, y_test_encoded = train_test_split(
        X, y_ohe, y_encoded,
        test_size=0.2,
        random_state=42,
        stratify=y_encoded
    )

    try:
        scaler = joblib.load(SCALER_PATH)
        print(f"\nScaler '{SCALER_PATH}' loaded successfully.")
    except FileNotFoundError:
        print(f"Error: '{SCALER_PATH}' not found. Did you run the baseline notebook?")
        return None
        
    X_test_scaled = scaler.transform(X_test)

    n_features = X_test_scaled.shape[1]
    X_test_cnn = X_test_scaled.reshape((X_test_scaled.shape[0], n_features, 1))
    
    print(f"Data preparation complete. Found {n_features} features and {n_classes} classes.")
    
    return X_test_cnn, y_test_ohe, y_test_encoded, target_names, n_features, n_classes

In [None]:
def get_art_classifier_multi(model_path, n_features, n_classes):
    try:
        model = load_model(model_path)
        print(f"Model '{model_path}' loaded successfully.")
    except Exception as e:
        print(f"Error: Model file '{model_path}' not found or failed to load.")
        print(f"Details: {e}")
        return None, None
        
    loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False)

    classifier = TensorFlowV2Classifier(
        model=model,
        loss_object=loss_object,
        input_shape=(n_features, 1),
        nb_classes=n_classes,
        channels_first=False
    )
    
    return model, classifier

In [None]:
def evaluate_attack_multi(y_true_encoded, y_pred_probs, attack_name, target_names):
    y_pred_encoded = np.argmax(y_pred_probs, axis=1)
    
    acc = accuracy_score(y_true_encoded, y_pred_encoded)
    
    print(f"\n--- {attack_name} Metrics ---")
    print(f"Accuracy: {acc * 100:.4f}%")
    
    print("\nClassification Report:")
    print(classification_report(y_true_encoded, y_pred_encoded, target_names=target_names))

    print("\nConfusion Matrix:")
    print(confusion_matrix(y_true_encoded, y_pred_encoded))
    
    return acc

In [None]:
data = load_multi_class_test_data()

if data:
    X_test_cnn, y_test_ohe, y_test_encoded, target_names, n_features, n_classes = data
    X_test_art = X_test_cnn.astype(np.float32)

    model, classifier = get_art_classifier_multi(MODEL_PATH, n_features, n_classes)
else:
    print("Data loading failed. Cannot proceed.")



--- Class Encoding Mapping ---
Class Index 0 -> Non-Tor
Class Index 1 -> NonVPN
Class Index 2 -> Tor
Class Index 3 -> VPN

Scaler 'scaler-multi.pkl' loaded successfully.
Data preparation complete. Found 76 features and 4 classes.
Model 'model-multi.h5' loaded successfully.


In [None]:
if 'model' in locals():
    print("\nEvaluating baseline (clean) model performance...")
    y_pred_clean_probs = model.predict(X_test_art)
    
    clean_acc = evaluate_attack_multi(y_test_encoded, y_pred_clean_probs, 
                                      "Baseline (Clean)", target_names)
else:
    print("Model not loaded. Skipping baseline evaluation.")


Evaluating baseline (clean) model performance...
[1m992/992[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step

--- Baseline (Clean) Metrics ---
Accuracy: 95.3585%

Classification Report:
              precision    recall  f1-score   support

     Non-Tor       1.00      0.99      0.99     22079
      NonVPN       0.86      0.85      0.85      4772
         Tor       0.82      0.87      0.85       279
         VPN       0.86      0.88      0.87      4584

    accuracy                           0.95     31714
   macro avg       0.88      0.90      0.89     31714
weighted avg       0.95      0.95      0.95     31714


Confusion Matrix:
[[21885   137     1    56]
 [   43  4073    48   608]
 [    0    27   244     8]
 [   19   520     5  4040]]


In [None]:
if 'classifier' in locals() and 'clean_acc' in locals():
    print("\n" + "="*50)
    print(f"Starting C&W Attack (Iterations = {CW_MAX_ITER})")
    print("WARNING: This attack is VERY slow. Please be patient.")
    print("="*50 + "\n")

    attack = CarliniL2Method(
        classifier,
        confidence=CW_CONFIDENCE,
        max_iter=CW_MAX_ITER,
        batch_size=CW_BATCH_SIZE,
        verbose=True
    )

    print(f"Generating adversarial examples...")
    X_test_adv = attack.generate(x=X_test_art, y=y_test_ohe)
    
    print("\nEvaluating model on Adversarial Examples...")
    y_pred_adv_probs = model.predict(X_test_adv)
    
    adv_acc = evaluate_attack_multi(y_test_encoded, y_pred_adv_probs, 
                                    f"C&W Attack (iter={CW_MAX_ITER})", target_names)
    
    print("\n" + "="*50)
    print(f"Baseline (Clean) Accuracy: {clean_acc * 100:.4f}%")
    print(f"Adversarial (C&W) Accuracy: {adv_acc * 100:.4f}%")
    print(f"Accuracy Drop: {(clean_acc - adv_acc) * 100:.4f}%")
    print("="*50)

else:
    print("Classifier or clean accuracy not found. Skipping attack.")


Starting C&W Attack (Iterations = 10)

Generating adversarial examples...


C&W L_2:   0%|          | 0/992 [00:00<?, ?it/s]



C&W L_2: 100%|██████████| 992/992 [3:56:55<00:00, 14.33s/it]    



Evaluating model on Adversarial Examples...
[1m992/992[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step

--- C&W Attack (iter=10) Metrics ---
Accuracy: 10.0303%

Classification Report:
              precision    recall  f1-score   support

     Non-Tor       0.55      0.13      0.21     22079
      NonVPN       0.00      0.02      0.01      4772
         Tor       0.44      0.77      0.56       279
         VPN       0.01      0.01      0.01      4584

    accuracy                           0.10     31714
   macro avg       0.25      0.23      0.20     31714
weighted avg       0.39      0.10      0.15     31714


Confusion Matrix:
[[ 2818 15119    32  4110]
 [ 2176    81   221  2294]
 [    0    50   215    14]
 [  153  4344    20    67]]

Baseline (Clean) Accuracy: 95.3585%
Adversarial (C&W) Accuracy: 10.0303%
Accuracy Drop: 85.3282%
