In [9]:
import tensorflow as tf
import pandas as pd
import numpy as np

from utils.df_loader import (
    load_adult_df,
    load_compas_df,
    load_german_df,
    load_diabetes_df,
    load_breast_cancer_df,
)
from sklearn.model_selection import train_test_split
from utils.preprocessing import preprocess_df
from utils.models import (
    train_models,
    evaluation_test,
    save_models,
    load_models,
)

from art.attacks.evasion import DeepFool, CarliniL0Method, CarliniLInfMethod, CarliniL2Method, BoundaryAttack
from art.estimators.classification import SklearnClassifier, KerasClassifier

from utils.preprocessing import DfInfo
from utils.preprocessing import inverse_dummy
from scipy.stats import pearsonr

seed = 42
# tf.random.set_seed(seed)
# np.random.seed(seed)

In [10]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
pd.options.mode.chained_assignment = None # suppress "SettingWithCopyWarning" warning

In [11]:
TRAIN_MODEL = False # (1)True(2)False
RUN_ART = True
num_instances = 64 # (1)&(2) 20

if RUN_ART:
    tf.compat.v1.disable_eager_execution()

print("TF version: ", tf.__version__)
print("Eager execution enabled: ", tf.executing_eagerly())  # False    

TF version:  2.10.0
Eager execution enabled:  False


In [12]:
#### Select dataset ####
for dataset_name in [
    # "adult",
    # "german",
    #  "compas",
    "diabetes",
    # "breast_cancer",
]:  # [adult, german, compas]
    print(f"Dataset Name: [{dataset_name}]")
    if dataset_name == "adult":
        dataset_loading_fn = load_adult_df
    elif dataset_name == "german":
        dataset_loading_fn = load_german_df
    elif dataset_name == "compas":
        dataset_loading_fn = load_compas_df
    elif dataset_name == "diabetes":
        dataset_loading_fn = load_diabetes_df
    elif dataset_name == "breast_cancer":
        dataset_loading_fn = load_breast_cancer_df
    else:
        raise Exception("Unsupported dataset")

    df_info = preprocess_df(dataset_loading_fn)

    train_df, test_df = train_test_split(
        df_info.dummy_df, train_size=0.8, random_state=seed, shuffle=True
    )
    X_train = np.array(train_df[df_info.ohe_feature_names])
    y_train = np.array(train_df[df_info.target_name])
    X_test = np.array(test_df[df_info.ohe_feature_names])
    y_test = np.array(test_df[df_info.target_name])



Dataset Name: [diabetes]


In [13]:
### Load models
models = load_models(X_train.shape[-1], dataset_name)


  updates=self.state_updates,


In [14]:
num_instances = 64 # (1)&(2) 20

X_test_re=X_test[0:num_instances]
y_test_re=y_test[0:num_instances]


In [15]:
X_test_re[30:39]

array([[0.        , 0.81407035, 0.62295082, 0.56565657, 0.11820331,
        0.7928465 , 0.29077711, 0.06666667],
       [0.23529412, 0.73366834, 0.63934426, 0.        , 0.        ,
        0.57377049, 0.18872758, 0.76666667],
       [0.23529412, 0.92462312, 0.63934426, 0.39393939, 0.32742317,
        0.5514158 , 0.0794193 , 0.16666667],
       [0.        , 0.83919598, 0.        , 0.        , 0.        ,
        0.48137109, 0.32493595, 0.15      ],
       [0.11764706, 0.54773869, 0.75409836, 0.        , 0.        ,
        0.63636364, 0.32749787, 0.55      ],
       [0.52941176, 0.7638191 , 0.63934426, 0.34343434, 0.20212766,
        0.50968703, 0.34799317, 0.2       ],
       [0.11764706, 0.98994975, 0.57377049, 1.        , 0.        ,
        0.5171386 , 0.21221178, 0.68333333],
       [0.        , 0.54773869, 0.72131148, 0.3030303 , 0.        ,
        0.48435171, 0.33176772, 0.28333333],
       [0.70588235, 0.6080402 , 0.63934426, 0.17171717, 0.        ,
        0.39493294, 0.077284

In [17]:
max_iter_list = []
curr_delta_list = []
curr_epsilon_list = []

for max_iter in range(50,5001,50):
    print(f'{max_iter} Done.')
    max_iter_list.append(max_iter)
    attack = BoundaryAttack(estimator=SklearnClassifier(models['gbc'], clip_values=(0,1)), verbose= False, batch_size=64, max_iter=max_iter)
    x_adv = attack.generate(X_test_re,y_test_re) 
    if hasattr(attack, 'curr_delta') and hasattr(attack, 'curr_epsilon'):
        curr_delta_list.append(attack.curr_delta)
        # print(f'The current delta is: {attack.curr_delta}')
        curr_epsilon_list.append(attack.curr_epsilon)
        # print(f'The current epsilon is: {attack.curr_epsilon}')
    else:
        break

50 Done.
100 Done.
150 Done.
200 Done.
250 Done.
300 Done.
350 Done.
400 Done.
450 Done.
500 Done.
550 Done.
600 Done.
650 Done.
700 Done.
750 Done.
800 Done.
850 Done.
900 Done.
950 Done.
1000 Done.
1050 Done.
1100 Done.
1150 Done.
1200 Done.
1250 Done.


OverflowError: (34, 'Result too large')

In [None]:
# max_iter_list 
# curr_delta_list
# curr_epsilon_list
import matplotlib.pyplot as plt

In [None]:
plt.plot(max_iter_list, curr_delta_list)
plt.plot(max_iter_list, curr_epsilon_list)
plt.yscale('log')
plt.title(f'Boundary attack - compas - GBC')
# plt.ylabel(f'delta')
plt.xlabel(f'max_iter')
plt.legend(['delta', 'epsilon'])

plt.show()

In [None]:
nn_max_iter_list = []
nn_curr_delta_list = []
nn_curr_epsilon_list = []

for max_iter in range(50,5001,50):
    print(f'{max_iter} Done.')
    nn_max_iter_list.append(max_iter)
    attack = BoundaryAttack(estimator=KerasClassifier(models['nn_2'], clip_values=(0,1)), verbose= False, batch_size=64, max_iter=max_iter)
    x_adv = attack.generate(X_test_re,y_test_re) 
    if hasattr(attack, 'curr_delta') and hasattr(attack, 'curr_epsilon'):
        nn_curr_delta_list.append(attack.curr_delta)
        # print(f'The current delta is: {attack.curr_delta}')
        nn_curr_epsilon_list.append(attack.curr_epsilon)
        # print(f'The current epsilon is: {attack.curr_epsilon}')
    else:
        break

In [None]:
plt.plot(nn_max_iter_list, nn_curr_delta_list)
plt.plot(nn_max_iter_list, nn_curr_epsilon_list)
plt.yscale('log')
plt.title(f'Boundary attack - diabetes - NN')
# plt.ylabel(f'delta')
plt.xlabel(f'max_iter')
plt.legend(['delta', 'epsilon'])

plt.show()