In [23]:
import pandas as pd
import numpy as np
import os

from utils.preprocessing import preprocess_df
from utils.df_loader import load_adult_df, load_compas_df, load_german_df, load_diabetes_df, load_breast_cancer_df
from utils.evaluation import get_evaluations, EvaluationMatrix

from utils.load import load_result_from_csv, load_datapoints_from_npy
from utils.models import load_models

seed = 42

In [24]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

pd.options.mode.chained_assignment = None # suppress "SettingWithCopyWarning" warning

In [25]:
datasets = [
        "adult",
        # "german",
        # "compas",
        # "diabetes",
        # "breast_cancer",
        ]

models = ["dt","gbc","lr","svc","nn_2"] # "dt","gbc","lr","svc",


attack_list = [
        'deepfool', 
        # 'carlini_l_2', 'carlini_l_inf', 
        # 'lowprofool_l_2', 'lowprofool_l_inf', 
        # 'boundary', 
        # 'hopskipjump_l_2', 'hopskipjump_l_inf'
        ]


## Combine dataframe

In [26]:
# for dataset_name in datasets:
#     for attack in attack_list:
#         folder_name = f"{attack}_{dataset_name}"

#         ## check if the folder exist

#         if os.path.isdir(f'./results/{folder_name}'):
#             for model_name in models:

#                 dfs = []
#                 file_name = f'{folder_name}_{model_name}_result.csv'
#                 destination_path = f'./results/{folder_name}/{file_name}'

#                 if os.path.isfile(f'./results/{folder_name}/{folder_name}_{model_name}_result_1.csv'):
#                     for i in range(0,10):
#                         dataset_path = (
#                             f"{attack}_{dataset_name}_{model_name}_result_{i}.csv"
#                         )
#                         dfs.append(pd.read_csv(f"./results/{folder_name}/{dataset_path}"))

#                     ### Combine dfs
#                     complete_df = pd.DataFrame([], columns=dfs[0].columns)
#                     for l in range(len(dfs[0])):
#                         for df in dfs:
#                             complete_df = complete_df.append(df.iloc[l : l + 1])

#                     ### Save dfs
#                     complete_df.to_csv(destination_path)
#                     print(f"Have saved combined sheet to {destination_path}")

## Check wheather white-box attack output same results

In [27]:
def are_ndarrays_same(ndarrays):
  # Create an empty matrix of size len(ndarrays) x len(ndarrays)
  results_matrix = np.empty((len(ndarrays), len(ndarrays)))

  # Iterate through each pair of ndarrays and check if they are the same
  for i in range(len(ndarrays)):
    for j in range(len(ndarrays)):
      if i == j:
        # If the indices are the same, mark it as True in the results matrix
        results_matrix[i][j] = True
      else:
        # Compare the ndarrays using the numpy.array_equal function
        results_matrix[i][j] = np.array_equal(ndarrays[i], ndarrays[j])
  return results_matrix

In [28]:
for dataset_name in datasets:
    for attack in attack_list:
        for model_name in models:
            ndarrays = []
            for running_times in range(0,10):
                ndarrays.append(load_datapoints_from_npy(attack, dataset_name, model_name, running_times, adv=True))
            print(f'{dataset_name} - {attack} - {model_name}')
            print(are_ndarrays_same(ndarrays).min())




adult - deepfool - dt
1.0
adult - deepfool - gbc
1.0
adult - deepfool - lr
1.0
adult - deepfool - svc
1.0
adult - deepfool - nn_2
1.0


## Evaluation

In [29]:
def get_loading_fn(dataset_name):
    if dataset_name == 'adult':
        dataset_loading_fn = load_adult_df
    elif dataset_name == 'german':
        dataset_loading_fn = load_german_df
    elif dataset_name == 'compas':
        dataset_loading_fn = load_compas_df
    elif dataset_name == 'diabetes':
        dataset_loading_fn = load_diabetes_df
    elif dataset_name == 'breast_cancer':
        dataset_loading_fn = load_breast_cancer_df
    else:
        raise Exception("Unsupported dataset")
    return dataset_loading_fn


In [30]:
# #### Select dataset ####

# all_metric = {}

# for dataset_name in datasets:
#     all_metric[dataset_name]={}    

#     df_info = preprocess_df(get_loading_fn(dataset_name))
#     for attack in attack_list:
#         all_metric[dataset_name][attack]={}

#         folder_name = f'{attack}_{dataset_name}'
#         models_list = load_models(df_info.dummy_df.shape[-1], dataset_name)
#         for model_name in models:

#             file_name = f'{folder_name}_{model_name}_result.csv'
#             result_path = f'./results/{folder_name}/{file_name}'
#             if os.path.isfile(result_path):
#                 result_df = pd.read_csv(result_path)
#                 evaluation_df, metric = get_evaluations(result_df, 
#                     df_info, 
#                     matrix = [
#                         EvaluationMatrix.L1, 
#                         EvaluationMatrix.L2, 
#                         EvaluationMatrix.Linf,
#                         EvaluationMatrix.Sparsity, 
#                         # EvaluationMatrix.Realistic, 
#                         EvaluationMatrix.MAD, 
#                         EvaluationMatrix.Mahalanobis,
#                         # EvaluationMatrix.Perturbation_Sensitivity,
#                         EvaluationMatrix.Neighbour_Distance,
#                         ],
#                     models = models_list,
#                     model_name=model_name)
                
#                 all_metric[dataset_name][attack][model_name] = metric

#                 csv_save_result_path = f'results/{folder_name}/eval_{file_name}'
#                 evaluation_df.to_csv(csv_save_result_path)
#                 print(f"Have saved file to {csv_save_result_path}")


In [31]:
def get_dic_from_metric(all_metric):

    dataset_arr = []
    attack_arr = []
    model_arr = []
    metric_arr = []
    value_arr = []

    for dataset, dic1 in all_metric.items():
        for attack, dic2 in dic1.items():
            for model, dic3 in dic2.items():
                for metric, value in dic3.items():
                    dataset_arr.append(dataset)
                    attack_arr.append(attack)
                    model_arr.append(model)
                    metric_arr.append(metric)
                    value_arr.append(value)

    table = {
            'Dataset': dataset_arr,
            'Attack': attack_arr,
            'Model': model_arr,
            'Metric': metric_arr,
            'Value': value_arr,
        }

    return table




In [32]:
#### Select dataset ####

all_metric = {}

for dataset_name in datasets:
    all_metric[dataset_name]={}

    df_info = preprocess_df(get_loading_fn(dataset_name))
    for attack in attack_list:
        all_metric[dataset_name][attack]={}

        folder_name = f'{attack}_{dataset_name}'
        for model_name in models:

            file_name = f'{folder_name}_{model_name}_result_0.csv'
            result_path = f'./results/{folder_name}/{file_name}'
            if os.path.isfile(result_path):
                result_df = pd.read_csv(result_path)
                evaluation_df, metric = get_evaluations(result_df=result_df, 
                    df_info=df_info, 
                    matrix = [
                        EvaluationMatrix.L1, 
                        EvaluationMatrix.L2, 
                        EvaluationMatrix.Linf,
                        EvaluationMatrix.Sparsity, 
                        # EvaluationMatrix.Realistic, 
                        EvaluationMatrix.MAD, 
                        EvaluationMatrix.Mahalanobis,
                        # EvaluationMatrix.Perturbation_Sensitivity,
                        EvaluationMatrix.Neighbour_Distance,
                        ])
                
                all_metric[dataset_name][attack][model_name] = metric

                csv_save_result_path = f'results/{folder_name}/eval_{file_name}'
                evaluation_df.to_csv(csv_save_result_path)
                print(f"Have saved file to {csv_save_result_path}")


Have saved file to results/deepfool_adult/eval_deepfool_adult_lr_result_0.csv
Have saved file to results/deepfool_adult/eval_deepfool_adult_svc_result_0.csv
Have saved file to results/deepfool_adult/eval_deepfool_adult_nn_2_result_0.csv


In [33]:
im_results = pd.DataFrame.from_dict(get_dic_from_metric(all_metric))

In [37]:
im_results.to_csv(f"./results/deepfool_adult_table.csv",index=False)

In [35]:
# #### Select dataset ####
# for dataset_name in datasets: 
#     All_Results[dataset_name] = {}
#     All_Datapoints[dataset_name] = {}

#     for model in models:
#         All_Results[dataset_name][model] = {}
#         All_Datapoints[dataset_name][model] = {}

#         for attack in attack_list:

#             All_Results[dataset_name][model][attack] = load_result_from_csv(attack, dataset_name, model)

#             All_Datapoints[dataset_name][model][attack] = {
#                 "original": load_datapoints_from_npy(attack, dataset_name, model, False),
#                 "adv": load_datapoints_from_npy(attack, dataset_name, model, True),
#             }


In [36]:
# All_Metrics = {}

# for dataset_name in datasets: 
#     All_Metrics[dataset_name] = {}
#     for model in models: 
#         All_Metrics[dataset_name][model] = {}
#         for attack in attack_list:

#             if All_Results[dataset_name][model][attack] is not None:
#                 if All_Results[dataset_name][model][attack]["original"] is not None:
#                     if All_Results[dataset_name][model][attack]["adv"] is not None:
#                         All_Metrics[dataset_name][model][attack] = metric_generator(
#                             All_Results[dataset_name][model][attack],
#                             All_Results[dataset_name][model][attack]["original"],
#                             All_Results[dataset_name][model][attack]["adv"]
#                         )
