In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from imblearn.over_sampling import RandomOverSampler
from collections import Counter

In [None]:
!pip install adversarial-robustness-toolbox
!pip install tensorflow-privacy
!pip install --upgrade tensorflow-estimator==2.3.0
!pip install --upgrade tensorflow==2.14.0

In [None]:
df = pd.read_csv('preprocessado_iot-23-2.csv', on_bad_lines='skip')
df.drop(['uid', 'service', 'duration','resp_bytes', 'orig_bytes', 'conn_state', 'local_orig',
         'local_resp', 'missed_bytes', 'history', 'orig_pkts', 'resp_pkts', 'resp_ip_bytes'],
         axis=1, inplace=True, errors='ignore')
df.rename(columns={'tunnel_parents   label   detailed-label': 'label'}, inplace=True)
df['label'] = df['label'].str.split().str[-1]
df = df[~df['label'].isin(['Attack','C&C-HeartBeat','C&C-Torii','C&C-FileDownload','FileDownload',
                           'C&C-HeartBeat-FileDownload','C&C-Mirai'])]

In [5]:
le = LabelEncoder()
for col in ['id.orig_h', 'id.resp_h', 'proto', 'label']:
    df[col] = le.fit_transform(df[col])

X = df.iloc[:, :-1]
y = df.iloc[:, -1]

X = MinMaxScaler().fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_train = np.nan_to_num(X_train)
X_test = np.nan_to_num(X_test)

ros = RandomOverSampler(random_state=42)
X_balanced, y_balanced = ros.fit_resample(X_train, y_train)

desired_size = 521460
indices = np.random.choice(X_balanced.shape[0], size=desired_size, replace=False)
X_balanced_r = X_balanced[indices]
y_balanced_r = y_balanced[indices]

In [9]:
from art.attacks.inference.membership_inference import MembershipInferenceBlackBoxRuleBased
from art.estimators.classification import TensorFlowV2Classifier

def build_model(input_dim, num_classes):
    model = Sequential([
        Dense(128, input_dim=input_dim, activation='relu'),
        Dense(64, activation='relu'),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(32, activation='relu'),
        Dense(32, activation='relu'),
        Dense(32, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])
    return model

def run_attack(classifier, X_train, X_test, y_train, y_test):
    attack = MembershipInferenceBlackBoxRuleBased(classifier)
    inferred_train = attack.infer(X_train, y_train)
    inferred_test = attack.infer(X_test, y_test)
    train_acc = np.sum(inferred_train) / len(inferred_train)
    test_acc = 1 - (np.sum(inferred_test) / len(inferred_test))
    acc = (train_acc * len(inferred_train) + test_acc * len(inferred_test)) / (len(inferred_train) + len(inferred_test))

    true_labels = np.concatenate((np.ones(len(inferred_train)), np.zeros(len(inferred_test))))
    predicted = np.concatenate((inferred_train, inferred_test))
    precision = np.sum((predicted == 1) & (true_labels == 1)) / np.sum(predicted == 1) if np.sum(predicted == 1) else 1
    recall = np.sum((predicted == 1) & (true_labels == 1)) / np.sum(true_labels == 1) if np.sum(true_labels == 1) else 1

    return train_acc, test_acc, acc, precision, recall

In [None]:
import tensorflow_privacy as tfp
results = []

l2_norm_clip_values = [1.3, 1.5]
noise_multipliers = [0.7, 0.8, 1.0, 1.2, 1.4, 2.7, 3.0]
epochs = 20
batch_size = 128
delta = 1e-5
n_samples = len(X_balanced_r)

for l2_clip in l2_norm_clip_values:
    for noise in noise_multipliers:
        opt = tfp.privacy.optimizers.dp_optimizer_keras.DPKerasAdamOptimizer(
            l2_norm_clip=l2_clip,
            noise_multiplier=noise,
            num_microbatches=1,
        )

        epsilon, _ = tfp.privacy.analysis.compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy(
            n=n_samples, batch_size=batch_size, noise_multiplier=noise, epochs=epochs, delta=delta
        )

        model = build_model(X.shape[1], len(np.unique(y)))
        model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
        model.fit(X_balanced_r, y_balanced_r, epochs=epochs, batch_size=batch_size, verbose=0)

        art_classifier = TensorFlowV2Classifier(
            model=model,
            loss_object=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
            optimizer=tf.keras.optimizers.Adam(),
            nb_classes=5,
            input_shape=(X.shape[1],),
            clip_values=(0, 1),
        )

        y_pred = art_classifier.predict(X_test)
        y_pred_classes = np.argmax(y_pred, axis=1)
        report = classification_report(y_test, y_pred_classes, output_dict=True)
        acc_model = report['accuracy']

        train_acc, test_acc, attack_acc, precision, recall = run_attack(art_classifier, X_train, X_test, y_train, y_test)

        results.append({
            'epsilon': round(epsilon, 4),
            'noise': noise,
            'l2_norm_clip': l2_clip,
            'model_acc': acc_model,
            'attack_acc': round(attack_acc, 4),
            'attack_train_acc': round(train_acc, 4),
            'attack_test_acc': round(test_acc, 4),
            'attack_precision': round(precision, 4),
            'attack_recall': round(recall, 4),
        })

In [11]:
baseline_model = build_model(X.shape[1], len(np.unique(y)))
baseline_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
baseline_model.fit(X_balanced_r, y_balanced_r, epochs=epochs, batch_size=batch_size, verbose=0)

baseline_classifier = TensorFlowV2Classifier(
    model=baseline_model,
    loss_object=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
    optimizer=tf.keras.optimizers.Adam(),
    nb_classes=5,
    input_shape=(X.shape[1],),
    clip_values=(0, 1),
)

y_pred_base = baseline_classifier.predict(X_test)
y_pred_base_classes = np.argmax(y_pred_base, axis=1)
report_base = classification_report(y_test, y_pred_base_classes, output_dict=True)
acc_baseline = report_base['accuracy']

train_acc, test_acc, attack_acc, precision, recall = run_attack(baseline_classifier, X_train, X_test, y_train, y_test)

results.append({
    'epsilon': 'baseline',
    'noise': 0,
    'l2_norm_clip': 0,
    'model_acc': acc_baseline,
    'attack_acc': round(attack_acc, 4),
    'attack_train_acc': round(train_acc, 4),
    'attack_test_acc': round(test_acc, 4),
    'attack_precision': round(precision, 4),
    'attack_recall': round(recall, 4),
})

In [None]:
# Separar linhas com epsilon numérico
private_df = results_df[results_df['epsilon'] != 'baseline'].copy()
private_df['epsilon'] = private_df['epsilon'].astype(float)
private_df = private_df.sort_values(by='epsilon')

# Separar baseline
baseline_df = results_df[results_df['epsilon'] == 'baseline']

# Concatenar com baseline no final (ou no início, se quiser)
final_df = pd.concat([private_df, baseline_df], ignore_index=True)

display(final_df)
