In [1]:
import os
import sys
import itertools
from scenarios import *
from plots import *
from metrics import get_metrics

PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath('.')))
sys.path.append(PROJECT_ROOT)
#print(PROJECT_ROOT)
from data_preprocessing.data_interface import get_data_sklearn

from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier, RandomForestClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier

INFO:/home/alba/Documents/HIC_Dundee/GRAIMAtter/github/GRAIMatter/data_preprocessing/data_interface.py:ROOT PROJECT FOLDER = /home/alba/Documents/HIC_Dundee/GRAIMAtter/github/GRAIMatter


In [2]:
class model_info:
    '''
    For each model it builds a class where information can be more easily identified.
    
    kind: type of model, can be either target, shadow or attack
    name: name of the classifier
    dataset: name of the dataset used to train the model
    params: parameters used for the model
    attack_scenario: if this is part of an attack scenario, state which scenario name
    metrics: calculated metrics for the model
    
    To view the data contained in the instance use model_info.show().
    '''
    def __init__(self, kind, classifier, dataset, params=None, attack_scenario=None, metrics=None):
        self.kind = kind #type of model: target, shadow, attack
        self.classifier = classifier #name of the classifier
        self.dataset = dataset #name of the dataset used to train the model
        self.params = params #parameters used. 'None' assumes default params
        self.attack_scenario = attack_scenario #Only relevant to the attack
        self.metrics = metrics #calculated metrics, expect a dictionary
    
    def show(self):
        print("Type:", self.kind)
        print("Classifier:", self.classifier)
        print("Parameters:", self.params)
        print("Dataset:", self.dataset)
        print("Attack scenario:", self.attack_scenario)
        print("Metrics")
        for metric, value in self.metrics.items():
            print(metric, value)
    
    #def data_frame(self):
    #    return(dataframe of the data, several columns and 1 row)

In [3]:
def create_dir(path:str):
    """
    Creates a new directory if it does not exist.

    path: directory to create.
    """
    if not os.path.isdir(path):
        os.mkdir(path)

Create a directory to save images and results files if it doesn't exist

In [4]:
results_dir = os.path.join(PROJECT_ROOT, 'results')
create_dir(results_dir)

Define available datasets

In [5]:
datasets = ['mimic2-iaccd',
           'in-hospital-mortality',
            'medical-mnist-ab-v-br-100',
            'indian liver',
            'texas hospitals 10']

In [6]:
classifiers = {
        'RandomForestClassifier':RandomForestClassifier(), #bootstrap=False
        'DecisionTreeClassifier':DecisionTreeClassifier(),
        'GaussianProcessClassifier':GaussianProcessClassifier(),
        'MLPClassifier':MLPClassifier(),
        'KNeighborsClassifier':KNeighborsClassifier(),
        'SVC':SVC(),#kernel='rbf', probability=True),
        'AdaBoostClassifier':AdaBoostClassifier()#n_estimators=100)
}

In [7]:
experiment_params = {
    'RandomForestClassifier': {
        #'n_estimators': [10, 20, 100],
        #'criterion':['gini','entropy'],
        #'max_depth':[None,2,4],
        #'max_features':[None,'sqrt','log2'],
        'bootstrap': [True, False],
        'min_samples_split': [2, 10],
        #'class_weight':[None,'balanced','balanced_subsample'],
    },
    'DecisionTreeClassifier': {
        #'criterion':['gini','entropy'],
        'max_depth':[None,2,4],
        #'min_samples_split': [2, 10],
        #'max_features':[None,'sqrt','log2'],
        #'class_weight':[None,'balanced']
    },
    'GaussianProcessClassifier': {
        'max_iter_predict':[50,100,200],
        'warm_start':[True,False],
    },
    'MLPClassifier': {
        #'hidden_layer_size':[(50,),(100,),(200,)],
        #'activation':['identity', 'logistic', 'tanh', 'relu'],
        'solver':['lbfgs', 'sgd', 'adam'],
        #'learning_rate': ['constant', 'invscaling', 'adaptive'],
        #'max_iter': [50,200,400,1000]
    },
    'KNeighborsClassifier': {
        'n_neighbors':[2,5,10,20],
        'weights':['uniform', 'distance'],
        #'algorithm':['ball_tree', 'kd_tree', 'brute']
    },
    'SVC': {
        #'Kernel':['linear', 'poly', 'rbf', 'sigmoid'],
        #'decision_function_shape':['ovo', 'ovr'],
        #'max_iter':[-1, 2, 5],
        'probability':[True]
    },
    'AdaBoostClassifier': {
        #'n_estimators': [10, 20, 50, 100],
        'algorithm':['SAMME', 'SAMME.R']
    }
}


In [8]:
sets = {}
trained_classifiers = {}
for dataset in datasets[:1]:
    print(dataset)
    #load the data
    X, y = get_data_sklearn(dataset)
    #split into training, shadow model and validation data
    X_target_train, X_shadow_train, X_test, y_target_train, y_shadow_train, y_test = split_target_data(X, y)
        
    for name,clf in classifiers.items():
        all_combinations = itertools.product(*experiment_params[name].values())
        print('all', all_combinations)
        for i,combination in enumerate(all_combinations):
            # Turn this particular combination into a dictionary
            params = {n: v for n, v in zip(experiment_params[name].keys(), combination)}
            print('comb dict', params)
            clf = clf.set_params(**params)
            clf.get_params()
            trained_classifiers[name] = clf.fit(X_target_train, y_target_train) 
            sets[name+'_targetModel'+str(i)] = model_info('target',
                                                  name,
                                                  dataset=dataset,
                                                  params=params,
                                                  metrics=get_metrics(clf, X_test, y_test))
            #worst case scenario
            mia_clf_name = 'randomForest'
            proba, mi_test_x, mi_test_y, mi_clf = run_membership_inference_attack(clf, X_target_train, X_test, RandomForestClassifier())#returns predicted probabilities, mi_test_x, mi_test_y and trained attack classifier
            sets[name+'_worstCase'+str(i)] = model_info('attack',
                                                  mia_clf_name,
                                                  attack_scenario='worstCase',
                                                  dataset=dataset,
                                                  params=params,
                                                  metrics=get_metrics(mi_clf, mi_test_x, mi_test_y))

            proba, mi_test_x, mi_test_y, mi_clf, shadow_model = mia_salem_1(classifiers[name], X_shadow_train, y_shadow_train,
                                                 X_test)#, y_test)#returns predicted probabilities, mi_test_x, mi_test_y and trained attack classifier

            sets[name+'_Salem1_shadow'+str(i)] = model_info('shadow',
                                              name,
                                              attack_scenario='Salem1',
                                              dataset=dataset+' test split',
                                              params=params,
                                              metrics=get_metrics(shadow_model, X_test, y_test))
            sets[name+'_Salem1_mia'+str(i)] = model_info('attack',
                                              name,
                                              attack_scenario='Salem1',
                                              dataset=dataset+' test split',
                                              params=None,
                                              metrics=get_metrics(mi_clf, mi_test_x, mi_test_y))
            shadow_dataset = 'Breast cancer'
            proba, mi_test_x, mi_test_y, mi_clf, shadow_model, x_shadow_test, y_shadow_test = mia_salem_2(classifiers[name])#returns predicted probabilities, mi_test_x, mi_test_y and trained attack classifier

            sets[name+'_Salem2_shadow'+str(i)] = model_info('shadow',
                                              name,
                                              attack_scenario='Salem2',
                                              dataset=shadow_dataset,
                                              params=params,
                                              metrics=get_metrics(shadow_model, x_shadow_test, y_shadow_test))
            sets[name+'_Salem2_mia'+str(i)] = model_info('attack',
                                              name,
                                              attack_scenario='Salem2',
                                              dataset=shadow_dataset,
                                              params=None,
                                              metrics=get_metrics(mi_clf, mi_test_x, mi_test_y))


INFO:/home/alba/Documents/HIC_Dundee/GRAIMAtter/github/GRAIMatter/data_preprocessing/data_interface.py:DATASET FOLDER = /home/alba/Documents/HIC_Dundee/GRAIMAtter/github/GRAIMatter/data
INFO:/home/alba/Documents/HIC_Dundee/GRAIMAtter/github/GRAIMatter/data_preprocessing/data_interface.py:Loading mimic2-iaccd
INFO:/home/alba/Documents/HIC_Dundee/GRAIMAtter/github/GRAIMatter/data_preprocessing/data_interface.py:Preprocessing
INFO:numexpr.utils:NumExpr defaulting to 4 threads.


mimic2-iaccd
all <itertools.product object at 0x7f1d1c6a4480>
comb dict {'bootstrap': True, 'min_samples_split': 2}


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes


comb dict {'bootstrap': True, 'min_samples_split': 10}


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes


comb dict {'bootstrap': False, 'min_samples_split': 2}


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio


comb dict {'bootstrap': False, 'min_samples_split': 10}


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes


all <itertools.product object at 0x7f1cf9a78e40>
comb dict {'max_depth': None}


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio
  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio


comb dict {'max_depth': 2}


  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio


comb dict {'max_depth': 4}


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio


all <itertools.product object at 0x7f1d1c6a4480>
comb dict {'max_iter_predict': 50, 'warm_start': True}


  metrics['FAR'] = fp / (fp + tp) #proportion of things classified as positives that are incorrect, also known as false discovery rate
  metrics['PPV'] = tp / (tp + fp) #precision or positive predictive value
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['FAR'] = fp / (fp + tp) #proportion of things classified as positives that are incorrect, also known as false discovery rate
  metrics['PPV'] = tp / (tp + fp) #precision or positive predictive value
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio


comb dict {'max_iter_predict': 50, 'warm_start': False}


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['FAR'] = fp / (fp + tp) #proportion of things classified as positives that are incorrect, also known as false discovery rate
  metrics['PPV'] = tp / (tp + fp) #precision or positive predictive value
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio


comb dict {'max_iter_predict': 100, 'warm_start': True}


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['FAR'] = fp / (fp + tp) #proportion of things classified as positives that are incorrect, also known as false discovery rate
  metrics['PPV'] = tp / (tp + fp) #precision or positive predictive value
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio


comb dict {'max_iter_predict': 100, 'warm_start': False}


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['FAR'] = fp / (fp + tp) #proportion of things classified as positives that are incorrect, also known as false discovery rate
  metrics['PPV'] = tp / (tp + fp) #precision or positive predictive value
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio


comb dict {'max_iter_predict': 200, 'warm_start': True}


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['FAR'] = fp / (fp + tp) #proportion of things classified as positives that are incorrect, also known as false discovery rate
  metrics['PPV'] = tp / (tp + fp) #precision or positive predictive value
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio


comb dict {'max_iter_predict': 200, 'warm_start': False}


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio


all <itertools.product object at 0x7f1cf9a78e40>
comb dict {'solver': 'lbfgs'}


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


comb dict {'solver': 'sgd'}


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio
  metrics['FAR'] = fp / (fp + tp) #proportion of things classified as positives that are incorrect, also known as false discovery rate
  metrics['PPV'] = tp / (tp + fp) #precision or positive predictive value
  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio


comb dict {'solver': 'adam'}


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes


all <itertools.product object at 0x7f1cf7992b80>
comb dict {'n_neighbors': 2, 'weights': 'uniform'}


  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio
  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio


comb dict {'n_neighbors': 2, 'weights': 'distance'}


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes


comb dict {'n_neighbors': 5, 'weights': 'uniform'}


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes


comb dict {'n_neighbors': 5, 'weights': 'distance'}


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes


comb dict {'n_neighbors': 10, 'weights': 'uniform'}


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio


comb dict {'n_neighbors': 10, 'weights': 'distance'}


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event

comb dict {'n_neighbors': 20, 'weights': 'uniform'}


  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes


comb dict {'n_neighbors': 20, 'weights': 'distance'}


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes


all <itertools.product object at 0x7f1cf9a78e40>
comb dict {'probability': True}


  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes


all <itertools.product object at 0x7f1cf9ab2140>
comb dict {'algorithm': 'SAMME'}


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio


comb dict {'algorithm': 'SAMME.R'}


  metrics['PLR'] = metrics['TPR'] / metrics['FPR'] #positive likelihood ratio
  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio
  metrics['OR'] = metrics['PLR'] / metrics['NLR'] #odds ratio, the odds ratio is used to find the probability of an outcome of an event when there are two possible outcomes
  metrics['NPV'] = tn / (tn + fn) #negative predictive value
  metrics['NLR'] = metrics['FNR'] / metrics['TNR'] #negative likelihood ratio


In [9]:
for s, v in sets.items():
    v.show()
    print("\n")

Type: target
Classifier: RandomForestClassifier
Parameters: {'bootstrap': True, 'min_samples_split': 2}
Dataset: mimic2-iaccd
Attack scenario: None
Metrics
TPR 1.0
FPR 0.0
FAR 0.0
TNR 1.0
PPV 1.0
NPV 1.0
FNR 0.0
ACC 1.0
Advantage 1.0
PLR inf
NLR 0.0
OR inf


Type: attack
Classifier: randomForest
Parameters: {'bootstrap': True, 'min_samples_split': 2}
Dataset: mimic2-iaccd
Attack scenario: worstCase
Metrics
TPR 0.9545454545454546
FPR 0.5254237288135594
FAR 0.26956521739130435
TNR 0.4745762711864407
PPV 0.7304347826086957
NPV 0.875
FNR 0.045454545454545456
ACC 0.7619047619047619
Advantage 0.4291217257318952
PLR 1.816715542521994
NLR 0.09577922077922078
OR 18.96774193548387


Type: shadow
Classifier: RandomForestClassifier
Parameters: {'bootstrap': True, 'min_samples_split': 2}
Dataset: mimic2-iaccd test split
Attack scenario: Salem1
Metrics
TPR 1.0
FPR 0.029850746268656716
FAR 0.011695906432748537
TNR 0.9701492537313433
PPV 0.9883040935672515
NPV 1.0
FNR 0.0
ACC 0.9915254237288136
Advant

Advantage 0.0
PLR nan
NLR 1.0
OR nan


Type: attack
Classifier: randomForest
Parameters: {'max_iter_predict': 200, 'warm_start': True}
Dataset: mimic2-iaccd
Attack scenario: worstCase
Metrics
TPR 1.0
FPR 0.0
FAR 0.0
TNR 1.0
PPV 1.0
NPV 1.0
FNR 0.0
ACC 1.0
Advantage 1.0
PLR inf
NLR 0.0
OR inf


Type: shadow
Classifier: GaussianProcessClassifier
Parameters: {'max_iter_predict': 200, 'warm_start': True}
Dataset: mimic2-iaccd test split
Attack scenario: Salem1
Metrics
TPR 0.005917159763313609
FPR 0.0
FAR 0.0
TNR 1.0
PPV 1.0
NPV 0.2851063829787234
FNR 0.9940828402366864
ACC 0.288135593220339
Advantage 0.005917159763313609
PLR inf
NLR 0.9940828402366864
OR inf


Type: attack
Classifier: GaussianProcessClassifier
Parameters: None
Dataset: mimic2-iaccd test split
Attack scenario: Salem1
Metrics
TPR 1.0
FPR 0.0
FAR 0.0
TNR 1.0
PPV 1.0
NPV 1.0
FNR 0.0
ACC 1.0
Advantage 1.0
PLR inf
NLR 0.0
OR inf


Type: shadow
Classifier: GaussianProcessClassifier
Parameters: {'max_iter_predict': 200, 'warm_star

FNR 0.0
ACC 0.6694677871148459
Advantage 0.0
PLR 1.0
NLR nan
OR nan


Type: shadow
Classifier: AdaBoostClassifier
Parameters: {'algorithm': 'SAMME.R'}
Dataset: Breast cancer
Attack scenario: Salem2
Metrics
TPR 0.978494623655914
FPR 0.050505050505050504
FAR 0.026737967914438502
TNR 0.9494949494949495
PPV 0.9732620320855615
NPV 0.9591836734693877
FNR 0.021505376344086023
ACC 0.968421052631579
Advantage 0.9279895731508635
PLR 19.374193548387098
NLR 0.022649279341111876
OR 855.4


Type: attack
Classifier: AdaBoostClassifier
Parameters: None
Dataset: Breast cancer
Attack scenario: Salem2
Metrics
TPR 0.5704225352112676
FPR 0.5104895104895105
FAR 0.474025974025974
TNR 0.48951048951048953
PPV 0.525974025974026
NPV 0.5343511450381679
FNR 0.4295774647887324
ACC 0.5298245614035088
Advantage 0.0599330247217571
PLR 1.1174030484275517
NLR 0.8775653923541247
OR 1.273298899618235


