In [1]:
import os
import sys
import itertools
import pandas as pd
from scenarios import *
from plots import *
from metrics import get_metrics

PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath('.')))
sys.path.append(PROJECT_ROOT)
#print(PROJECT_ROOT)
from data_preprocessing.data_interface import get_data_sklearn

from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier, RandomForestClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier

INFO:/home/alba/Documents/HIC_Dundee/GRAIMAtter/github/GRAIMatter/data_preprocessing/data_interface.py:ROOT PROJECT FOLDER = /home/alba/Documents/HIC_Dundee/GRAIMAtter/github/GRAIMatter


In [2]:
class model_info:
    '''
    For each model it builds a class where information can be more easily identified.
    
    kind: type of model, can be either target, shadow or attack
    name: name of the classifier
    dataset: name of the dataset used to train the model
    params: parameters used for the model
    attack_scenario: if this is part of an attack scenario, state which scenario name
    metrics: calculated metrics for the model
    
    To view the data contained in the instance use model_info.show().
    '''
    def __init__(self, target_model_id, kind, classifier, dataset, params=None, attack_scenario=None, metrics=None):
        self.target_model_id = target_model_id #identifier of the target model, so it makes possible to map attacks and target models
        self.kind = kind #type of model: target, shadow, attack
        self.classifier = classifier #name of the classifier
        self.dataset = dataset #name of the dataset used to train the model
        self.params = params #parameters used. 'None' assumes default params
        self.attack_scenario = attack_scenario #Only relevant to the attack
        self.metrics = metrics #calculated metrics, expect a dictionary
    
    def show(self):
        print("Target model ID:", self.target_model_id)
        print("Type:", self.kind)
        print("Classifier:", self.classifier)
        print("Parameters:", self.params)
        print("Dataset:", self.dataset)
        print("Attack scenario:", self.attack_scenario)
        print("Metrics")
        for metric, value in self.metrics.items():
            print(metric, value)
    
    def data_frame(self):
        d = {"Target model ID":self.target_model_id, "Type": self.kind, "Classifier":self.classifier,
                          "Dataset":self.dataset, "Attack scenario": self.attack_scenario}
        #print(self.params, type(self.params))
        if not self.params:
            self.params = {}
        return(pd.DataFrame.from_dict({**d, **self.params, **self.metrics}, orient='index').T)

In [3]:
#    plot_confusion_matrix(name, cm, nclasses)
#    plot_roc_curve(clf, X, y)
#    plot_prob_test_train(pred_test, pred_train)
#    plot_detection_error_tradeoff(clf, X_test, y_test)
    #plot_ks_metric

In [4]:
def create_dir(path:str):
    """
    Creates a new directory if it does not exist.

    path: directory to create.
    """
    if not os.path.isdir(path):
        os.mkdir(path)

Create a directory to save images and results files if it doesn't exist

In [5]:
results_dir = os.path.join(PROJECT_ROOT, 'results')
create_dir(results_dir)

Define available datasets

In [6]:
    datasets = ['mimic2-iaccd',
               'in-hospital-mortality',
                'medical-mnist-ab-v-br-100',
                'indian liver',
            'texas hospitals 10']

In [7]:
classifiers = {
        'RandomForestClassifier':RandomForestClassifier(), #bootstrap=False
        'DecisionTreeClassifier':DecisionTreeClassifier(),
        'GaussianProcessClassifier':GaussianProcessClassifier(),
        'MLPClassifier':MLPClassifier(),
        'KNeighborsClassifier':KNeighborsClassifier(),
        'SVC':SVC(),#kernel='rbf', probability=True),
        'AdaBoostClassifier':AdaBoostClassifier()#n_estimators=100)
}

In [8]:
experiment_params = {
    'RandomForestClassifier': {
        #'n_estimators': [10, 20, 100],
        #'criterion':['gini','entropy'],
        #'max_depth':[None,2,4],
        #'max_features':[None,'sqrt','log2'],
        'bootstrap': [True, False],
        'min_samples_split': [2, 10],
        #'class_weight':[None,'balanced','balanced_subsample'],
    },
    'DecisionTreeClassifier': {
        #'criterion':['gini','entropy'],
        'max_depth':[None,2,4],
        #'min_samples_split': [2, 10],
        #'max_features':[None,'sqrt','log2'],
        #'class_weight':[None,'balanced']
    },
    'GaussianProcessClassifier': {
        'max_iter_predict':[50,100,200],
        'warm_start':[True,False],
    },
    'MLPClassifier': {
        #'hidden_layer_size':[(50,),(100,),(200,)],
        #'activation':['identity', 'logistic', 'tanh', 'relu'],
        'solver':['lbfgs', 'sgd', 'adam'],
        #'learning_rate': ['constant', 'invscaling', 'adaptive'],
        #'max_iter': [50,200,400,1000]
    },
    'KNeighborsClassifier': {
        'n_neighbors':[2,5,10,20],
        'weights':['uniform', 'distance'],
        #'algorithm':['ball_tree', 'kd_tree', 'brute']
    },
    'SVC': {
        #'Kernel':['linear', 'poly', 'rbf', 'sigmoid'],
        #'decision_function_shape':['ovo', 'ovr'],
        #'max_iter':[-1, 2, 5],
        'probability':[True]
    },
    'AdaBoostClassifier': {
        #'n_estimators': [10, 20, 50, 100],
        'algorithm':['SAMME', 'SAMME.R']
    }
}


In [9]:
sets = {}
trained_classifiers = {}
for dataset in datasets[:1]:
    print(dataset)
    #load the data
    X, y = get_data_sklearn(dataset)
    #split into training, shadow model and validation data
    X_target_train, X_shadow_train, X_test, y_target_train, y_shadow_train, y_test = split_target_data(X, y)
        
    for name,clf in classifiers.items():
        all_combinations = itertools.product(*experiment_params[name].values())
        print('all', all_combinations)
        for i,combination in enumerate(all_combinations):
            # Turn this particular combination into a dictionary
            params = {n: v for n, v in zip(experiment_params[name].keys(), combination)}
            print('comb dict', params)
            clf = clf.set_params(**params)
            #clf.get_params()
            trained_classifiers[name] = clf.fit(X_target_train, y_target_train) 
            sets[name+'_targetModel'+str(i)] = model_info(dataset+"_"+name+str(i),
                                                          'target',
                                                          name,
                                                          attack_scenario='target',
                                                          dataset=dataset,
                                                          params=params,
                                                          metrics=get_metrics(clf, X_test, y_test))
            #worst case scenario
            mia_clf_name = 'randomForest'
            proba, mi_test_x, mi_test_y, mi_clf = run_membership_inference_attack(clf, X_target_train, X_test, RandomForestClassifier())#returns predicted probabilities, mi_test_x, mi_test_y and trained attack classifier
            sets[name+'_worstCase'+str(i)] = model_info(dataset+"_"+name+str(i),
                                                        'attack',
                                                  mia_clf_name,
                                                  attack_scenario='worstCase',
                                                  dataset=dataset,
                                                  params=params,
                                                  metrics=get_metrics(mi_clf, mi_test_x, mi_test_y))
            print("Worst case plots")
            #plot_confusion_matrix(name, confusion_matrix(mi_test_y, ), 2)
            print(mi_test_y.shape)
            #plot_roc_curve(mi_clf, mi_test_x, mi_test_y)
            #plot_prob_test_train(pred_test, pred_train)
            #plot_detection_error_tradeoff(clf, X_test, y_test)
    
            #Salem1
            proba, mi_test_x, mi_test_y, mi_clf, shadow_model = mia_salem_1(classifiers[name], X_shadow_train, y_shadow_train,
                                                 X_test)#, y_test)#returns predicted probabilities, mi_test_x, mi_test_y and trained attack classifier

            sets[name+'_Salem1_shadow'+str(i)] = model_info(dataset+"_"+name+str(i),'shadow',
                                              name,
                                              attack_scenario='Salem1',
                                              dataset=dataset+' test split',
                                              params=params,
                                              metrics=get_metrics(shadow_model, X_test, y_test))
            sets[name+'_Salem1_mia'+str(i)] = model_info(dataset+"_"+name+str(i),'attack',
                                              name,
                                              attack_scenario='Salem1',
                                              dataset=dataset+' test split',
                                              params=None,
                                              metrics=get_metrics(mi_clf, mi_test_x, mi_test_y))
            print("Salem1 plots")
            #plot_confusion_matrix(name, confusion_matrix(y_test, y_pred), 2)
            #plot_roc_curve(clf, X, y)
            #plot_prob_test_train(pred_test, pred_train)
            #plot_detection_error_tradeoff(clf, X_test, y_test)
    
            #Salem2
            shadow_dataset = 'Breast cancer'
            proba, mi_test_x, mi_test_y, mi_clf, shadow_model, x_shadow_test, y_shadow_test = mia_salem_2(classifiers[name])#returns predicted probabilities, mi_test_x, mi_test_y and trained attack classifier

            sets[name+'_Salem2_shadow'+str(i)] = model_info(dataset+"_"+name+str(i),'shadow',
                                              name,
                                              attack_scenario='Salem2',
                                              dataset=shadow_dataset,
                                              params=params,
                                              metrics=get_metrics(shadow_model, x_shadow_test, y_shadow_test))
            sets[name+'_Salem2_mia'+str(i)] = model_info(dataset+"_"+name+str(i),'attack',
                                              name,
                                              attack_scenario='Salem2',
                                              dataset=shadow_dataset,
                                              params=None,
                                              metrics=get_metrics(mi_clf, mi_test_x, mi_test_y))


INFO:/home/alba/Documents/HIC_Dundee/GRAIMAtter/github/GRAIMatter/data_preprocessing/data_interface.py:DATASET FOLDER = /home/alba/Documents/HIC_Dundee/GRAIMAtter/github/GRAIMatter/data
INFO:/home/alba/Documents/HIC_Dundee/GRAIMAtter/github/GRAIMatter/data_preprocessing/data_interface.py:Loading mimic2-iaccd
INFO:/home/alba/Documents/HIC_Dundee/GRAIMAtter/github/GRAIMatter/data_preprocessing/data_interface.py:Preprocessing
INFO:numexpr.utils:NumExpr defaulting to 4 threads.


mimic2-iaccd
all <itertools.product object at 0x7fd7998e7600>
comb dict {'bootstrap': True, 'min_samples_split': 2}


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes


Worst case plots
(294,)


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes


Salem1 plots
comb dict {'bootstrap': True, 'min_samples_split': 10}


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes


Worst case plots
(294,)


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes


Salem1 plots
comb dict {'bootstrap': False, 'min_samples_split': 2}


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes


Worst case plots
(294,)


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes


Salem1 plots


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio


comb dict {'bootstrap': False, 'min_samples_split': 10}
Worst case plots
(294,)


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes


Salem1 plots
all <itertools.product object at 0x7fd7998ad200>
comb dict {'max_depth': None}
Worst case plots
(294,)


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio


Salem1 plots


  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio


comb dict {'max_depth': 2}
Worst case plots
(294,)


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio


Salem1 plots
comb dict {'max_depth': 4}


  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio


Worst case plots
(294,)
Salem1 plots
all <itertools.product object at 0x7fd7998e7600>
comb dict {'max_iter_predict': 50, 'warm_start': True}


  metrics['FAR'] = fp / (fp + tp) #proportion of things classified as positives that are incorrect, also known as false discovery rate
  metrics['PPV'] = tp / (tp + fp) #precision or positive predictive value
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio


Worst case plots
(294,)


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio


Salem1 plots


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['FAR'] = fp / (fp + tp) #proportion of things classified as positives that are incorrect, also known as false discovery rate
  metrics['PPV'] = tp / (tp + fp) #precision or positive predictive value
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio


comb dict {'max_iter_predict': 50, 'warm_start': False}


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio


Worst case plots
(294,)


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio


Salem1 plots


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['FAR'] = fp / (fp + tp) #proportion of things classified as positives that are incorrect, also known as false discovery rate
  metrics['PPV'] = tp / (tp + fp) #precision or positive predictive value
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio


comb dict {'max_iter_predict': 100, 'warm_start': True}


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio


Worst case plots
(294,)


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio


Salem1 plots


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['FAR'] = fp / (fp + tp) #proportion of things classified as positives that are incorrect, also known as false discovery rate
  metrics['PPV'] = tp / (tp + fp) #precision or positive predictive value
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio


comb dict {'max_iter_predict': 100, 'warm_start': False}


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio


Worst case plots
(294,)


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio


Salem1 plots


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['FAR'] = fp / (fp + tp) #proportion of things classified as positives that are incorrect, also known as false discovery rate
  metrics['PPV'] = tp / (tp + fp) #precision or positive predictive value
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio


comb dict {'max_iter_predict': 200, 'warm_start': True}


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio


Worst case plots
(294,)


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio


Salem1 plots


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['FAR'] = fp / (fp + tp) #proportion of things classified as positives that are incorrect, also known as false discovery rate
  metrics['PPV'] = tp / (tp + fp) #precision or positive predictive value
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio


comb dict {'max_iter_predict': 200, 'warm_start': False}


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio


Worst case plots
(294,)


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio


Salem1 plots


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio


all <itertools.product object at 0x7fd7998ad200>
comb dict {'solver': 'lbfgs'}


  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio


Worst case plots
(294,)


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio


Salem1 plots


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


comb dict {'solver': 'sgd'}
Worst case plots
(294,)


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio
  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio


Salem1 plots
comb dict {'solver': 'adam'}
Worst case plots
(294,)
Salem1 plots




all <itertools.product object at 0x7fd7998e7600>
comb dict {'n_neighbors': 2, 'weights': 'uniform'}
Worst case plots
(294,)


  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio
  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio


Salem1 plots
comb dict {'n_neighbors': 2, 'weights': 'distance'}


  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes


Worst case plots
(294,)
Salem1 plots


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes


comb dict {'n_neighbors': 5, 'weights': 'uniform'}
Worst case plots
(294,)


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes


Salem1 plots
comb dict {'n_neighbors': 5, 'weights': 'distance'}


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes


Worst case plots
(294,)
Salem1 plots


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes


comb dict {'n_neighbors': 10, 'weights': 'uniform'}
Worst case plots
(294,)


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes


Salem1 plots
comb dict {'n_neighbors': 10, 'weights': 'distance'}


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes


Worst case plots
(294,)
Salem1 plots


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes


comb dict {'n_neighbors': 20, 'weights': 'uniform'}
Worst case plots
(294,)


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes


Salem1 plots
comb dict {'n_neighbors': 20, 'weights': 'distance'}


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes


Worst case plots
(294,)
Salem1 plots


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes


all <itertools.product object at 0x7fd7998ad200>
comb dict {'probability': True}
Worst case plots
(294,)


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio


Salem1 plots
all <itertools.product object at 0x7fd7998e7600>
comb dict {'algorithm': 'SAMME'}


  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio


Worst case plots
(294,)
Salem1 plots
comb dict {'algorithm': 'SAMME.R'}
Worst case plots
(294,)


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio


Salem1 plots


In [10]:
for s, v in sets.items():
    v.show()
    print("\n")

Target model ID: mimic2-iaccd_RandomForestClassifier0
Type: target
Classifier: RandomForestClassifier
Parameters: {'bootstrap': True, 'min_samples_split': 2}
Dataset: mimic2-iaccd
Attack scenario: target
Metrics
TPR 1.0
FPR 0.029850746268656716
FAR 0.011695906432748537
TNR 0.9701492537313433
PPV 0.9883040935672515
NPV 1.0
FNR 0.0
ACC 0.9915254237288136
Advantage 0.9701492537313433
PLR 33.5
NLR 0.0
OR inf


Target model ID: mimic2-iaccd_RandomForestClassifier0
Type: attack
Classifier: randomForest
Parameters: {'bootstrap': True, 'min_samples_split': 2}
Dataset: mimic2-iaccd
Attack scenario: worstCase
Metrics
TPR 0.9602272727272727
FPR 0.5423728813559322
FAR 0.27467811158798283
TNR 0.4576271186440678
PPV 0.7253218884120172
NPV 0.8852459016393442
FNR 0.03977272727272727
ACC 0.7585034013605442
Advantage 0.4178543913713405
PLR 1.770419034090909
NLR 0.08691077441077441
OR 20.37053571428571


Target model ID: mimic2-iaccd_RandomForestClassifier0
Type: shadow
Classifier: RandomForestClassifier

Dataset: mimic2-iaccd
Attack scenario: target
Metrics
TPR 0.0
FPR 0.0
FAR nan
TNR 1.0
PPV nan
NPV 0.2838983050847458
FNR 1.0
ACC 0.2838983050847458
Advantage 0.0
PLR nan
NLR 1.0
OR nan


Target model ID: mimic2-iaccd_GaussianProcessClassifier4
Type: attack
Classifier: randomForest
Parameters: {'max_iter_predict': 200, 'warm_start': True}
Dataset: mimic2-iaccd
Attack scenario: worstCase
Metrics
TPR 1.0
FPR 0.0
FAR 0.0
TNR 1.0
PPV 1.0
NPV 1.0
FNR 0.0
ACC 1.0
Advantage 1.0
PLR inf
NLR 0.0
OR inf


Target model ID: mimic2-iaccd_GaussianProcessClassifier4
Type: shadow
Classifier: GaussianProcessClassifier
Parameters: {'max_iter_predict': 200, 'warm_start': True}
Dataset: mimic2-iaccd test split
Attack scenario: Salem1
Metrics
TPR 0.005917159763313609
FPR 0.0
FAR 0.0
TNR 1.0
PPV 1.0
NPV 0.2851063829787234
FNR 0.9940828402366864
ACC 0.288135593220339
Advantage 0.005917159763313609
PLR inf
NLR 0.9940828402366864
OR inf


Target model ID: mimic2-iaccd_GaussianProcessClassifier4
Type: attack
Cla

FAR 0.05056179775280899
TNR 0.8656716417910447
PPV 0.949438202247191
NPV 1.0
FNR 0.0
ACC 0.961864406779661
Advantage 0.8656716417910448
PLR 7.444444444444445
NLR 0.0
OR inf


Target model ID: mimic2-iaccd_KNeighborsClassifier3
Type: attack
Classifier: randomForest
Parameters: {'n_neighbors': 5, 'weights': 'distance'}
Dataset: mimic2-iaccd
Attack scenario: worstCase
Metrics
TPR 1.0
FPR 0.8813559322033898
FAR 0.37142857142857144
TNR 0.11864406779661017
PPV 0.6285714285714286
NPV 1.0
FNR 0.0
ACC 0.6462585034013606
Advantage 0.11864406779661019
PLR 1.1346153846153846
NLR 0.0
OR inf


Target model ID: mimic2-iaccd_KNeighborsClassifier3
Type: shadow
Classifier: KNeighborsClassifier
Parameters: {'n_neighbors': 5, 'weights': 'distance'}
Dataset: mimic2-iaccd test split
Attack scenario: Salem1
Metrics
TPR 1.0
FPR 0.11940298507462686
FAR 0.04519774011299435
TNR 0.8805970149253731
PPV 0.9548022598870056
NPV 1.0
FNR 0.0
ACC 0.9661016949152542
Advantage 0.8805970149253731
PLR 8.375
NLR 0.0
OR inf



In [11]:
df = pd.DataFrame()
for s, v in sets.items():
    df = pd.concat([v.data_frame(),df], ignore_index=True)

In [12]:
df.groupby(['Target model ID', 'Classifier', 'Attack scenario', 'Type', "Dataset"])['TPR', 'FPR',
                                                                                   'FAR', 'TNR', 
                                                                                    'PPV', 'NPV',
                                                                                   'FNR', 'ACC',
                                                                                   'Advantage',
                                                                                   ].sum()#.reset_index()
                                                                                    #'PLR', 'NLR',
                                                                                    #'OR']

  df.groupby(['Target model ID', 'Classifier', 'Attack scenario', 'Type', "Dataset"])['TPR', 'FPR',


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,TPR,FPR,FAR,TNR,PPV,NPV,FNR,ACC,Advantage
Target model ID,Classifier,Attack scenario,Type,Dataset,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
mimic2-iaccd_AdaBoostClassifier0,AdaBoostClassifier,Salem1,attack,mimic2-iaccd test split,1.000000,1.000000,0.330532,0.000000,0.669468,0.000000,0.000000,0.669468,0.000000
mimic2-iaccd_AdaBoostClassifier0,AdaBoostClassifier,Salem1,shadow,mimic2-iaccd test split,1.000000,0.014925,0.005882,0.985075,0.994118,1.000000,0.000000,0.995763,0.985075
mimic2-iaccd_AdaBoostClassifier0,AdaBoostClassifier,Salem2,attack,Breast cancer,0.514085,0.475524,0.482270,0.524476,0.517730,0.520833,0.485915,0.519298,0.038560
mimic2-iaccd_AdaBoostClassifier0,AdaBoostClassifier,Salem2,shadow,Breast cancer,0.983871,0.040404,0.021390,0.959596,0.978610,0.969388,0.016129,0.975439,0.943467
mimic2-iaccd_AdaBoostClassifier0,AdaBoostClassifier,target,target,mimic2-iaccd,1.000000,0.000000,0.000000,1.000000,1.000000,1.000000,0.000000,1.000000,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
mimic2-iaccd_SVC0,SVC,Salem1,shadow,mimic2-iaccd test split,1.000000,0.119403,0.045198,0.880597,0.954802,1.000000,0.000000,0.966102,0.880597
mimic2-iaccd_SVC0,SVC,Salem2,attack,Breast cancer,0.542254,0.503497,0.483221,0.496503,0.516779,0.522059,0.457746,0.519298,0.038757
mimic2-iaccd_SVC0,SVC,Salem2,shadow,Breast cancer,0.989247,0.101010,0.051546,0.898990,0.948454,0.978022,0.010753,0.957895,0.888237
mimic2-iaccd_SVC0,SVC,target,target,mimic2-iaccd,1.000000,0.134328,0.050562,0.865672,0.949438,1.000000,0.000000,0.961864,0.865672
