## Task 4: Generate adversarial examples

In [2]:
import wandb
import os
os.environ["WANDB_NOTEBOOK_NAME"] = "generate_adversarial_examples.ipynb"
wandb.login()
import pandas as pd
import torch
from torch import optim, nn
from tqdm import tqdm
import pickle
from models import run_pytorch
from data import get_datasets

[34m[1mwandb[0m: Currently logged in as: [33mzhipeng-he[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [3]:
import foolbox
import numpy as np
from scipy.stats import chi2
import eagerpy as ep

In [4]:
def metrics(adv_numpy, sample_numpy, adv_numpy_cont, train_mean, train_cov_matrix, threshold, std):

    def _calculate_mahalanobis_distance(x, mean, cov_matrix):
        # Calculate the Mahalanobis distance
        diff = x - mean
        inv_cov_matrix = np.linalg.inv(cov_matrix)
        md = np.sqrt(np.dot(np.dot(diff, inv_cov_matrix), diff.T))
        return md[0, 0] # return a scalar
    
    eps = 1e-8

    # sparsity
    l0_distance = np.count_nonzero(adv_numpy - sample_numpy)

    # proximity
    l1_distance = np.linalg.norm(adv_numpy - sample_numpy, ord=1)
    l2_distance = np.linalg.norm(adv_numpy - sample_numpy, ord=2)
    linf_distance = np.linalg.norm(adv_numpy - sample_numpy, ord=np.inf)

    # deviation
    if adv_numpy_cont is not None:
        md = _calculate_mahalanobis_distance(adv_numpy_cont, train_mean, train_cov_matrix)
        # Check if the Mahalanobis distance exceeds the threshold
        is_outlier = md > threshold
    else:
        md = 0
        is_outlier = 0

    # sensitivity: 
    if l0_distance == 0:
        sens = 0
    else:
        sens = (l1_distance / ((std + eps) * l0_distance)).mean()

    return {
        "L0 Distance": l0_distance,
        "L1 Distance": l1_distance,
        "L2 Distance": l2_distance,
        "Linf Distance": linf_distance,
        "Mahalanobis Distance": md,
        "Is Outlier": is_outlier,
        "Sensitivity": sens,
    }


In [5]:
ATTACK_LIST = {
    "L2CarliniWagner": foolbox.attacks.L2CarliniWagnerAttack(binary_search_steps=10, steps=10, stepsize=0.1),
    "L2DeepFool": foolbox.attacks.L2DeepFoolAttack(),
    "LinfFGSM": foolbox.attacks.LinfFastGradientAttack(),
    "LinfPGD": foolbox.attacks.LinfProjectedGradientDescentAttack(),
    "LinfBIM": foolbox.attacks.LinfBasicIterativeAttack(),
    "L2Gaussian": foolbox.attacks.L2AdditiveGaussianNoiseAttack(),
    "L2Uniform": foolbox.attacks.L2AdditiveUniformNoiseAttack(),
    "LinfUniform": foolbox.attacks.LinfAdditiveUniformNoiseAttack(),

}

In [6]:
def save_datapoints_as_npy(alg_name, model_name, dataset_name, epsilon, datapoints):

    path = f"./datapoints/{alg_name}_{dataset_name}/{model_name}"
    os.makedirs(path, exist_ok=True)
    for k in datapoints.keys():
            with open(f"{path}/{alg_name}_{dataset_name}_{model_name}_{epsilon}_{k}_arr.npy", 'wb') as f:
                np.save(f, datapoints[k]["arr"])
            with open(f"{path}/{alg_name}_{dataset_name}_{model_name}_{epsilon}_{k}_arr_adv.npy", 'wb') as f:
                np.save(f, datapoints[k]["arr_adv"])
    
    path_set = {
        "success_arr": f"{path}/{alg_name}_{dataset_name}_{model_name}_{epsilon}_success_arr.npy",
        "success_arr_adv": f"{path}/{alg_name}_{dataset_name}_{model_name}_{epsilon}_success_arr_adv.npy",
        "arr": f"{path}/{alg_name}_{dataset_name}_{model_name}_{epsilon}_arr.npy",
        "arr_adv": f"{path}/{alg_name}_{dataset_name}_{model_name}_{epsilon}_arr_adv.npy",
    }

    return path_set


In [7]:
def run_attack(model, attack_name, epsilons, X_test_tensor, y_test_tensor, X_train_tensor, num_continues, config, device):
    # Create a Foolbox model wrapper for the PyTorch model
    fmodel = foolbox.models.PyTorchModel(model, bounds=(0, 1))

    # Create an attack object (FGSM in this case)
    attack = ATTACK_LIST[attack_name]


    # Generate an adversarial example

    attack_success_rates = []
    average_distances = {
        "L0 Distance": [], "L1 Distance": [], "L2 Distance": [], "Linf Distance": [], "Mahalanobis Distance": [], "Sensitivity": []
    }
    outliner_rates = []
    path_sets = []

    X_train_numpy = X_train_tensor.numpy()

    if num_continues != 0:
        # for calculating mahalanobis distance, use numerical features only
        X_train_numpy_continues = X_train_tensor[:,:num_continues].numpy()
        train_mean = np.mean(X_train_numpy_continues, axis=0)
        train_cov_matrix = np.cov(X_train_numpy_continues.T)

        # Calculate the critical value for the Mahalanobis distance using chi-squared distribution
        alpha = 0.05
        degrees_of_freedom = len(train_mean)
        # Calculate the Chi-Square critical value at the given alpha and df
        chi_square_critical_value  = chi2.ppf(1 - alpha, df=degrees_of_freedom)
        threshold = np.sqrt(chi_square_critical_value)
        print(f"Threshold: {threshold}")
    else:
        X_train_numpy_continues = None
        train_mean = None
        train_cov_matrix = None
        threshold = None
        print("No numerical features, cannot calculate Mahalanobis distance")
    
    std = np.std(X_train_numpy, axis=0)
    # print(f"std: {std}")



    for epsilon in epsilons:
        # Initialize variables to keep track of success and the number of samples
        total_samples = len(X_test_tensor)
        successful_attacks = 0
        total_metrics = {
            "L0 Distance": 0.0, "L1 Distance": 0.0, "L2 Distance": 0.0,
            "Linf Distance": 0.0, "Mahalanobis Distance": 0.0,
            "Is Outlier": 0, "Sensitivity": 0.0
        }

        arr_list=[]
        arr_list_adv=[]

        success_arr_list=[]
        success_arr_list_adv=[]

        for sample_idx in tqdm(range(total_samples)):
            sample = X_test_tensor[sample_idx].to(device).unsqueeze(0)
            label = y_test_tensor[sample_idx].to(device).unsqueeze(0)

            _, advs, success = attack(fmodel, sample, label, epsilons=[epsilon])

            with torch.no_grad():
                model.eval()
                test_outputs = model(sample)
                adv_outputs = model(advs[0])
                _, predicted_classes = test_outputs.max(dim=1)
                _, adv_predicted_classes = adv_outputs.max(dim=1)

            adv_numpy = advs[0].cpu().numpy()
            # print(adv_numpy.shape)
            sample_numpy = sample.cpu().numpy()
            sample_numpy_cont = adv_numpy[:,:num_continues]
            # print(sample_numpy_cont.shape)

            metrics_dict = metrics(adv_numpy, sample_numpy, sample_numpy_cont, train_mean, train_cov_matrix, threshold, std)

            arr_list.append(sample_numpy)
            arr_list_adv.append(adv_numpy)

            if success:
                successful_attacks += 1

                for key, value in metrics_dict.items():
                    total_metrics[key] += value

                success_arr_list.append(sample_numpy)
                success_arr_list_adv.append(adv_numpy)

            # print(f"Epsilon = {epsilon}, Predicted class: {predicted_classes} | Adversarial example: {adv_predicted_classes} | Success: {success} | L2 distance: {np.linalg.norm(advs[0].cpu().numpy() - sample.cpu().numpy())}")

        arr = np.concatenate(arr_list, axis=0)
        arr_adv = np.concatenate(arr_list_adv, axis=0)
        success_arr = np.concatenate(success_arr_list, axis=0)
        success_arr_adv = np.concatenate(success_arr_list_adv, axis=0)

        datapoints = {
            "all": {
                "arr": arr,
                "arr_adv": arr_adv,
            },
            "success": {
                "arr": success_arr,
                "arr_adv": success_arr_adv,
            }
        }

        path_set = save_datapoints_as_npy(attack_name, config["model"], config["dataset"], epsilon, datapoints)

        if successful_attacks > 0:
            success_rate = successful_attacks / total_samples
            outliner_rate = total_metrics["Is Outlier"] / successful_attacks
            for key, value in total_metrics.items():
                if key in average_distances:
                    average_distances[key].append(value / successful_attacks)

        else:
            success_rate = 0.0
            outliner_rate = 0.0
            for key, value in total_metrics.items():
                if key in average_distances:
                    average_distances[key].append(0.0)
        
        attack_success_rates.append(success_rate)
        outliner_rates.append(outliner_rate)
        path_sets.append(path_set)
        
        print(f"Epsilon = {epsilon}")
        print(f"Success Rate: {success_rate * 100}%")
        for key, value in average_distances.items():
            print(f"Average {key} for Successful Attacks: {value[-1]}")
        print(f"Outlier Rate for Successful Attacks: {outliner_rate * 100}%\n")
        print("")

    return attack_success_rates, average_distances, outliner_rates, path_sets

Run Attack

In [8]:
wandb_run = False

In [9]:
# ### Only for Testing

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# # device = torch.device("cpu")


# for dataset_name in ["Adult"]: # "Adult", "Electricity", "Higgs", "KDDCup09_appetency", "Mushroom"
#     X_train, y_train, X_val, y_val, X_test, y_test, \
#         X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor, X_val_tensor, y_val_tensor, \
#         info = get_datasets.get_split_continues(dataset_name, device)
#     for model_name in ["MLP"]: # , "TabTransformer", "FTTransformer"
#         model, train_config = run_pytorch.model_config(model_name, X_train.shape[1], 2, [], info.numerical_cols, device)
#         train_config["dataset"] = f"{dataset_name}_continuous_only"
#         train_config["device"] = device

#         criterion = nn.CrossEntropyLoss()
#         optimizer = run_pytorch.build_optimizer(model, "adam", train_config["learning_rate"])
#         path = f"models/train/{model_name}/{dataset_name}_continuous_only/train_run-test.pt"
#         model.load_state_dict(torch.load(path))
        
#         # run_pytorch.test(model, (X_test_tensor, y_test_tensor), train_config, stage="train", wandb_run=wandb.run)

#         ## LinfFGSM
#         epsilons = np.arange(0.01, 0.15, 0.02)  # You can change the epsilon values
#         attack_config = {**train_config, "epsilons": epsilons}
#         attack_success_rates, average_distances, outliner_rates = attack_LinfFGSM(model, epsilons, X_test_tensor, y_test_tensor, X_train_tensor, len(info.numerical_cols), attack_config, device)

In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")


for dataset_name in ["Higgs"]: # "Adult", "Electricity", "Higgs", "KDDCup09_appetency", "Mushroom"
    X_train, y_train, X_val, y_val, X_test, y_test, \
        X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor, X_val_tensor, y_val_tensor, \
        info = get_datasets.get_split(dataset_name, device)
    # Considering size of Higgs dataset, we will use only 10k samples for adversarial attack.
    if dataset_name == "Higgs":
        X_test = X_test[:10000]
        y_test = y_test[:10000]
        X_test_tensor = X_test_tensor[:10000]
        y_test_tensor = y_test_tensor[:10000]

    for model_name in ["FTTransformer"]: # "LogisticRegression", "MLP", "TabTransformer", "FTTransformer"
        model, train_config = run_pytorch.model_config(model_name, X_train.shape[1], 2, info.num_categories_list, info.numerical_cols, device)
        train_config["dataset"] = dataset_name
        train_config["device"] = device

        criterion = nn.CrossEntropyLoss()
        optimizer = run_pytorch.build_optimizer(model, "adam", train_config["learning_rate"])
        path = f"models/train/{model_name}/{dataset_name}/train_run-test.pt"
        model.load_state_dict(torch.load(path))
        
        # run_pytorch.test(model, (X_test_tensor, y_test_tensor), train_config, stage="train", wandb_run=wandb.run)

        for attack_name in ["L2CarliniWagner"]: # "LinfFGSM", "LinfPGD", "LinfBIM", "L2CarliniWagner", "L2DeepFool", "L2Gaussian", "L2Uniform", "LinfUniform"

            epsilons = [
                0.0,
                0.01,
                0.03,
                0.05,
                0.1,
                0.3,
                0.5,
                1.0,
            ]
            attack_config = {**train_config, "epsilons": epsilons}
            attack_success_rates, average_distances, outliner_rates, path_sets = run_attack(model, attack_name, epsilons, X_test_tensor, y_test_tensor, X_train_tensor, len(info.numerical_cols), attack_config, device)

            
            results = {
                "model": model_name,
                "dataset": dataset_name,
                "attack": attack_name,
                "attack_config": attack_config,
                "result": {
                    "epsilons": epsilons,
                    "attack_success_rates": attack_success_rates,
                    "average_distances": average_distances,
                    "outliner_rates": outliner_rates,
                    "numpy_path": path_sets,
                }
            }

            # save python dictionary
            with open(f'results/{dataset_name}_{model_name}_{attack_name}.pickle', 'wb') as handle:
                pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)




Threshold: 6.429396406462071


100%|██████████| 10000/10000 [1:11:19<00:00,  2.34it/s]


Epsilon = 0.0
Success Rate: 27.04%
Average L0 Distance for Successful Attacks: 0.0
Average L1 Distance for Successful Attacks: 0.0
Average L2 Distance for Successful Attacks: 0.0
Average Linf Distance for Successful Attacks: 0.0
Average Mahalanobis Distance for Successful Attacks: 5.055094793359479
Average Sensitivity for Successful Attacks: 0.0
Outlier Rate for Successful Attacks: 9.245562130177515%




100%|██████████| 10000/10000 [1:10:31<00:00,  2.36it/s]


Epsilon = 0.01
Success Rate: 43.2%
Average L0 Distance for Successful Attacks: 27.283333333333335
Average L1 Distance for Successful Attacks: 0.0016233369656506343
Average L2 Distance for Successful Attacks: 0.0037420050027832386
Average Linf Distance for Successful Attacks: 0.016094493369657223
Average Mahalanobis Distance for Successful Attacks: 4.948732484808444
Average Sensitivity for Successful Attacks: 0.0009305887459270363
Outlier Rate for Successful Attacks: 7.3842592592592595%




100%|██████████| 10000/10000 [1:10:20<00:00,  2.37it/s]


Epsilon = 0.03
Success Rate: 75.99000000000001%
Average L0 Distance for Successful Attacks: 27.603237268061587
Average L1 Distance for Successful Attacks: 0.00828212762041994
Average L2 Distance for Successful Attacks: 0.0193256296570283
Average Linf Distance for Successful Attacks: 0.08348450285188999
Average Mahalanobis Distance for Successful Attacks: 4.979920354151647
Average Sensitivity for Successful Attacks: 0.004739180022638221
Outlier Rate for Successful Attacks: 6.921963416238978%




100%|██████████| 10000/10000 [1:10:21<00:00,  2.37it/s]


Epsilon = 0.05
Success Rate: 89.81%
Average L0 Distance for Successful Attacks: 27.664291281594476
Average L1 Distance for Successful Attacks: 0.015072536683711365
Average L2 Distance for Successful Attacks: 0.03494417679076814
Average Linf Distance for Successful Attacks: 0.1505374385539698
Average Mahalanobis Distance for Successful Attacks: 5.2009979671925874
Average Sensitivity for Successful Attacks: 0.008624767936997279
Outlier Rate for Successful Attacks: 9.00790557844338%




100%|██████████| 10000/10000 [1:10:25<00:00,  2.37it/s]


Epsilon = 0.1
Success Rate: 97.57000000000001%
Average L0 Distance for Successful Attacks: 27.690991083324793
Average L1 Distance for Successful Attacks: 0.0255680037529791
Average L2 Distance for Successful Attacks: 0.057843690621353645
Average Linf Distance for Successful Attacks: 0.2463399785379454
Average Mahalanobis Distance for Successful Attacks: 5.683239766547783
Average Sensitivity for Successful Attacks: 0.014630454016896251
Outlier Rate for Successful Attacks: 20.108639950804548%




100%|██████████| 10000/10000 [1:10:13<00:00,  2.37it/s]


Epsilon = 0.3
Success Rate: 99.86%
Average L0 Distance for Successful Attacks: 27.698077308231525
Average L1 Distance for Successful Attacks: 0.027850123452865947
Average L2 Distance for Successful Attacks: 0.06237023360772598
Average Linf Distance for Successful Attacks: 0.2633681312136478
Average Mahalanobis Distance for Successful Attacks: 5.8033625408901095
Average Sensitivity for Successful Attacks: 0.01593632198420887
Outlier Rate for Successful Attacks: 22.291207690767074%




100%|██████████| 10000/10000 [1:10:18<00:00,  2.37it/s]


Epsilon = 0.5
Success Rate: 99.88%
Average L0 Distance for Successful Attacks: 27.698137765318382
Average L1 Distance for Successful Attacks: 0.02790282573999313
Average L2 Distance for Successful Attacks: 0.06248478466262733
Average Linf Distance for Successful Attacks: 0.26378822587164136
Average Mahalanobis Distance for Successful Attacks: 5.807682922758364
Average Sensitivity for Successful Attacks: 0.0159664791346352
Outlier Rate for Successful Attacks: 22.3167801361634%




100%|██████████| 10000/10000 [1:10:02<00:00,  2.38it/s]

Epsilon = 1.0
Success Rate: 99.88%
Average L0 Distance for Successful Attacks: 27.698137765318382
Average L1 Distance for Successful Attacks: 0.027903261003687393
Average L2 Distance for Successful Attacks: 0.06248572122776106
Average Linf Distance for Successful Attacks: 0.26379182164059317
Average Mahalanobis Distance for Successful Attacks: 5.807720216603635
Average Sensitivity for Successful Attacks: 0.015966728199458425
Outlier Rate for Successful Attacks: 22.3167801361634%







Considering size of Higgs dataset, we will use only 10k samples for adversarial attack.