In [1]:
import os
import numpy as np
import itertools
import random
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.pyplot import gcf
from matplotlib.patches import Patch
from matplotlib.lines import Line2D
from matplotlib.pyplot import gca
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages
#from matplotlib import colors as mcolors

In [2]:
path = "~/studies/GRAIMatter/experiments"
file_names = ["AdaBoost_results.csv",
              "DecisionTree_results.csv", 
              "Random_Forest_loop_results.csv",
              "round_rf_results.csv",
              "SVC_poly_results.csv",
              "SVC_rbf_results.csv",
              "SVC_rbf_dp_results.csv",
              "xgboost_results.csv", 
              "AdaBoost_results_minmax_round.csv",
              "DecisionTreeClassifier_minmax_round_results.csv",
              "round_minmax_rf_results.csv",
              "round_rf_results.csv"
             ]

In [3]:
results_df = pd.DataFrame()
for f in file_names:
    results_df = pd.concat([results_df, pd.read_csv(os.path.join(path, f))], ignore_index=True)

In [4]:
results_df['target_classifier'] = [" ".join(x) for x in zip(list(results_df.target_classifier), list(results_df.kernel.fillna('')))]

In [16]:
results_df.columns

Index(['dataset', 'scenario', 'target_classifier', 'shadow_classifier_name',
       'shadow_dataset', 'attack_classifier', 'repetition', 'full_id',
       'model_data_param_id', 'param_id', 'n_estimators', 'algorithm',
       'target_TPR', 'target_FPR', 'target_FAR', 'target_TNR', 'target_PPV',
       'target_NPV', 'target_FNR', 'target_ACC', 'target_F1score',
       'target_Advantage', 'target_AUC', 'target_pred_prob_var',
       'target_train_TPR', 'target_train_FPR', 'target_train_FAR',
       'target_train_TNR', 'target_train_PPV', 'target_train_NPV',
       'target_train_FNR', 'target_train_ACC', 'target_train_F1score',
       'target_train_Advantage', 'target_train_AUC',
       'target_train_pred_prob_var', 'mia_TPR', 'mia_FPR', 'mia_FAR',
       'mia_TNR', 'mia_PPV', 'mia_NPV', 'mia_FNR', 'mia_ACC', 'mia_F1score',
       'mia_Advantage', 'mia_AUC', 'mia_pred_prob_var', 'shadow_TPR',
       'shadow_FPR', 'shadow_FAR', 'shadow_TNR', 'shadow_PPV', 'shadow_NPV',
       'shadow_FNR',

In [None]:
len(results_df)

In [None]:
results_df.dtypes

In [11]:
round_datasets = [x for x in results_df.dataset.unique() if 'round' in x]
minmax_datasets = [x for x in results_df.dataset.unique() if 'minmax' in x]
round_datasets

['round mimic2-iaccd',
 'round in-hospital-mortality',
 'round indian liver',
 'round synth-ae',
 'round texas hospitals 10']

In [13]:
list(zip(['round', 'm'], round_datasets, minmax_datasets))

[('round', 'round mimic2-iaccd', 'minmax mimic2-iaccd'),
 ('m', 'round in-hospital-mortality', 'minmax in-hospital-mortality')]

In [10]:
results_df[results_df['dataset'] in round_datasets]

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [None]:
set(list(results_df.dataset))

In [None]:
results_df['dataset'] = results_df.dataset.replace({'minmax in-hospital-mortality':'in-hospital-mortality',
                                                   'minmax indian liver':'indian liver',
                                                   'minmax mimic2-iaccd':'mimic2-iaccd',
                                                   'minmax texas hospitals 10': 'texas hospitals 10',
                                                   'minmax synth-ae': 'synth-ae'})

In [None]:
#obtain the number of patients (rows) and column and types of columns per dataset
data = {'texas hospitals 10':"/home/ec2-user/Documents/GRAIMatter/data/TexasHospitals/texas_data10_rm_binary.csv",
        'in-hospital-mortality':
        'indian liver':
        'mimic2-iaccd':'/home/ec2-user/studies/GRAIMatter/data/mimic2-iaccd/1.0/full_cohort_data.csv ',
        'synth-ae':

In [None]:
import sklearn.datasets as skl_datasets

In [None]:
for dataset in datasets:
    data_features, data_labels = get_data_sklearn(dataset)

In [None]:
results_df['normalisation'] = [x.split()[0] if x.split()[0]=='round' or
                               x.split()[0]=='minmax' else 'none' for x in results_df.dataset]

In [None]:
results_df['dataset'] = results_df.dataset.replace({'minmax in-hospital-mortality':'in-hospital-mortality',
                                                    'minmax indian liver':'indian liver',
                                                    'minmax mimic2-iaccd':'mimic2-iaccd',
                                                    'minmax synth-ae':'synth-ae',
                                                    'minmax texas hospitals 10':'texas hospitals 10',
                                                    'round in-hospital-mortality':'in-hospital-mortality',
                                                    'round indian liver':'indian liver',
                                                    'round mimic2-iaccd':'mimic2-iaccd',
                                                    'round synth-ae':'synth-ae'})

In [None]:
results_df

In [None]:
results_df.describe()

In [None]:
common_vars = [#'target_TPR', 
    #'target_FPR', 'target_FAR', 'target_TNR', 'target_PPV',
    #   'target_NPV', 'target_FNR', 'target_ACC', 'target_F1score',
    #   'target_Advantage', 'target_AUC', 'target_pred_prob_var',
    #   'target_train_TPR', 'target_train_FPR', 'target_train_FAR',
    #   'target_train_TNR', 'target_train_PPV', 'target_train_NPV',
    #   'target_train_FNR', 'target_train_ACC', 'target_train_F1score',
    #   'target_train_Advantage', 'target_train_AUC',
    #   'target_train_pred_prob_var', 
    'mia_TPR', 'mia_FPR', 'mia_FAR',
       'mia_TNR', 'mia_PPV', 'mia_NPV', 'mia_FNR', 'mia_ACC', 'mia_F1score',
       'mia_Advantage', 'mia_AUC', 'mia_pred_prob_var']

In [None]:
xvars = ['mia_TPR', 'mia_FPR', 'mia_FAR',
       'mia_TNR', 'mia_PPV', 'mia_NPV', 'mia_FNR', 'mia_ACC', 'mia_F1score',
       'mia_Advantage', 'mia_AUC']#, 'mia_pred_prob_var']

yvars =  ['target_TPR', 'target_FPR', 'target_FAR', 
          'target_TNR', 'target_PPV',  'target_NPV', 'target_FNR', 'target_ACC', 'target_F1score',
       'target_Advantage', 'target_AUC', 'target_pred_prob_var']

par_vars= ['criterion', 'max_depth',
       'min_samples_split', 'min_samples_leaf', 'max_features', 'class_weight',
       'bootstrap', 'kernel', 'C', 'degree', 'probability', 'gamma']

In [None]:
sns.set_palette("Set1")
with PdfPages('pairplots_metrics.pdf') as pdf_pages:
    i = 1
    #for dataset in set(list(results_df.dataset)):
    #    for clf in set(list(results_df.target_classifier)):
    figu = plt.figure(i) 
    g = sns.pairplot(results_df, #[(results_df['dataset']==dataset) & 
                                 #       (results_df['target_classifier']==clf)], 
                             x_vars = xvars,
                             y_vars = xvars,
                             hue ='scenario',
                         corner=True,
                         kind = 'scatter')#reg
            #g.set_title(dataset+' '+clf)
            #g.fig.suptitle(dataset + ' ' + clf)
    plt.tight_layout()
    pdf_pages.savefig(figu)
    i+=1

In [None]:
sns.set_palette("Set1")
with PdfPages('pairplots_metrics_dataset_classifier_scenario.pdf') as pdf_pages:
    i = 1
    for dataset in set(list(results_df.dataset)):
        for clf in set(list(results_df.target_classifier)):
            tmp = results_df[(results_df['dataset']==dataset) & 
                                        (results_df['target_classifier']==clf)]
            if len(tmp)>100:
                print(dataset, clf)
                figu = plt.figure(i) 
                g = sns.pairplot(tmp, 
                                 x_vars = xvars,
                                 y_vars = xvars,
                                 hue ='scenario',
                                 corner=True,
                             kind = 'scatter')#reg
                #g.set_title(dataset+' '+clf)
                g.fig.suptitle(dataset + ' ' + clf)
                plt.tight_layout()
                pdf_pages.savefig(figu)
                i+=1

In [None]:
#are there any conflicting metrics for the same experiment
#for metric in xvars:
    #for i in range(0,5):
    #max(results_df.full_id)-min(resutls_df.full_id)>0.5:
newdf = results_df.groupby(['target_classifier','dataset','scenario','full_id'])[xvars].max()-results_df.groupby(['target_classifier','dataset','scenario','full_id'])[xvars].min()
newdf.reset_index(inplace='True')

In [None]:
newdf = newdf.reset_index()


In [None]:
newdf[newdf.mia_AUC>0.6]

In [None]:
newdf.dtypes

In [None]:
sns.catplot(data=newdf,x='scenario',y='mia_AUC', kind='violin')


In [None]:
for v in xvars[::-1]:
    sns.catplot(data=newdf,
                x='target_classifier', y=v, 
                kind='violin', cut=0)#, row='dataset', col='scenario')
    plt.xticks(rotation=90)

In [None]:
sns.set_palette("tab20")
with PdfPages('pairplots_metrics_dataset_classifier_scenario.pdf') as pdf_pages:
    i = 1
    for dataset in set(list(results_df.dataset)):
        for clf in set(list(results_df.target_classifier)):
            tmp = results_df[(results_df['dataset']==dataset) & 
                                        (results_df['target_classifier']==clf)]
            if len(tmp)>0:
                print(clf, dataset)
                figu = plt.figure(i) 
                g = sns.pairplot(tmp, 
                                 x_vars = xvars,
                                 y_vars = xvars,
                                 hue ='scenario',
                             kind = 'scatter')#reg
                #g.set_title(dataset+' '+clf)
                g.fig.suptitle(dataset + ' ' + clf)
                plt.tight_layout()
                pdf_pages.savefig(figu)
                i+=1
            del tmp

In [None]:
sns.set_palette("Set1")
with PdfPages('pairplots_by_classifier_dataset_paramid.pdf') as pdf_pages:
    i = 1
    for dataset in set(list(results_df.dataset)):
        for clf in set(list(results_df.target_classifier)):
            for sc in set(list(results_df.scenario)):
                if len(results_df[(results_df['dataset']==dataset) & 
                                            (results_df['target_classifier']==clf) &
                                           (results_df['scenario']==sc)])>0:
                    figu = plt.figure(i) 
                    g = sns.pairplot(results_df[(results_df['dataset']==dataset) & 
                                                (results_df['target_classifier']==clf) &
                                               (results_df['scenario']==sc)], 
                                 x_vars = xvars,
                                 y_vars = xvars,
                                 hue ='param_id', kind = 'scatter')#reg
                    #g.set_title(dataset+' '+clf)
                    g.fig.suptitle(dataset + ' ' + clf+ ' ' +sc)
                    plt.tight_layout()
                    pdf_pages.savefig(figu)
                    i+=1

In [None]:
results_df.head()

In [None]:
with PdfPages('violinplots_RF_DT_SVC_ADA_XGBoost.pdf') as pdf_pages:
    i = 0
    for v in common_vars:
        figu = plt.figure(i)        
        g = sns.catplot(data=results_df,
                    row="target_classifier", y=v,
                    x="scenario",
                    #row=,
                    hue="dataset",
                    kind="violin", cut=0 ,
                    inner="quartile",
                    height=3, aspect=2
        )
        #g.set_title(v)
        plt.xticks(rotation=90)
        plt.show()
        plt.tight_layout()
        pdf_pages.savefig(figu)
        i+=1

In [None]:
sns.set_style("whitegrid")
sns.set_palette("tab20")
with PdfPages('pointplots_by_classifier_scenario_dataset.pdf') as pdf_pages:
    i = 0
    for v in common_vars:
        figu = plt.figure(i)        
        sns.catplot(data=results_df,
                    col="target_classifier",
                    y=v,
                    x="scenario",
                    #row=,
                    hue="dataset",
                    kind="point",
                    height=4, aspect=0.8,
        ).set(ylim=(-0.05, 1.05), xlim=(-0.05,1.05))
        #g.set_title(v)
        #g.set(ylim=(-0.05, 1.05))
        #g.set_xticklabels(rotation = 90)
        #plt.xticks(rotation=90)
        #plt.show()
        #plt.tight_layout()
        pdf_pages.savefig(figu)
        i+=1

In [18]:
cmap = {'in-hospital-mortality':'b',
        'minmax in-hospital-mortality':'tab:blue',
        'round in-hospital-mortality':'tab:cyan',
        'indian liver':'g',
        'minmax indian liver':'tab:green',
        'round indian liver':'yellowgreen',
        'mimic2-iaccd':'r',
        'minmax mimic2-iaccd':'tab:red',
        'round mimic2-iaccd':'salmon',
        'synth-ae':'k',
        'minmax synth-ae':'tab:gray',
        'round synth-ae':'silver',
        'texas hospitals 10':'tab:orange',
        'minmax texas hospitals 10':'orange',
        'round texas hospitals 10': 'goldenrod'
       }

In [None]:
sns.set_style("whitegrid")
sns.set_palette(cmap.values())
#mcolors.get_named_colors_mapping().update(cmap)
with PdfPages('pointplots_classifier_dataset_scenario.pdf') as pdf_pages:
    i = 0
    for v in common_vars:
        figu = plt.figure(i)        
        g = sns.catplot(data=results_df,
                    x="target_classifier",
                    y=v,
                    hue="dataset", hue_order=cmap.keys(),
                    #row=,
                    col="scenario",
                    kind="point",
                    height=5, aspect=0.8,
        ).set(ylim=(-0.05, 1.05))
        #g.set_title(v)
        #g.set(ylim=(-0.05, 1.05))
        g.set_xticklabels(rotation = 90)
        #plt.xticks(rotation=90)
        #plt.show()
        plt.tight_layout()
        pdf_pages.savefig(figu)
        i+=1

In [None]:
with PdfPages('pointplots_by_paramid_RF_DT_SVC_ADA_XGBoost.pdf') as pdf_pages:
    i = 0
    for v in common_vars:
        figu = plt.figure(i)        
        g = sns.catplot(data=results_df,
                    col="target_classifier",
                    y=v,
                    hue="scenario",
                    x="param_id",
                    row="dataset",
                    kind="point",
                    height=3, aspect=1,
        )
        #g.set_title(v)
        g.set_xticklabels(rotation = 90)
        #plt.xticks(rotation=90)
        #plt.show()
        plt.tight_layout()
        pdf_pages.savefig(figu)
        i+=1

In [None]:
sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# Initialize the FacetGrid object

df = results_df.groupby(['target_classifier','param_id','dataset','scenario'])['mia_AUC']

pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="", hue="g", aspect=15, height=.5, palette=pal)

# Draw the densities in a few steps
g.map(sns.kdeplot, "x",
      bw_adjust=.5, clip_on=False,
      fill=True, alpha=1, linewidth=1.5)
g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw_adjust=.5)

# passing color=None to refline() uses the hue mapping
g.refline(y=0, linewidth=2, linestyle="-", color=None, clip_on=False)


# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
    ax = plt.gca()
    ax.text(0, .2, label, fontweight="bold", color=color,
            ha="left", va="center", transform=ax.transAxes)


g.map(label, "x")

# Set the subplots to overlap
g.figure.subplots_adjust(hspace=-.25)

# Remove axes details that don't play well with overlap
g.set_titles("")
g.set(yticks=[], ylabel="")
g.despine(bottom=True, left=True)

g = sns.lmplot(data=results_df,
               x="mia_AUC",
               y=,
               hue=,
               col=,
               row=,
               height=3, aspect=1,
              )

In [None]:
len(results_df)

In [None]:
results_df.columns

In [None]:
mia_metrics = [column for column in results_df.columns if "mia_" in column]
other_columns = [column for column in results_df.columns if "mia_" not in column]

In [None]:
cols = ['target_classifier', 'repetition', 
        'dataset', 'param_id','model_data_param_id'
       ]
        #'shadow_classifier_name',
       #'shadow_dataset', 'attack_classifier', 'repetition', 'full_id',
       #'model_data_param_id', 'param_id',

In [None]:
worstcase = results_df[results_df.scenario=="WorstCase"].set_index(cols)[mia_metrics]
salem1 = results_df[results_df.scenario=="Salem1"].set_index(cols)[mia_metrics]
salem2 = results_df[results_df.scenario=="Salem2"].set_index(cols)[mia_metrics]

In [None]:
df = results_df[cols+['scenario']+mia_metrics].pivot_table(index=cols, 
                                      columns=['scenario'],
                                      #aggfunc='size', 
                                      #fill_value=0
                                             ) 

#df.columns = df.columns.map('_'.join)
#df = df.reset_index() 
df

In [None]:
sns.catplot(results_df,#.reset_index(),
           y='mia_AUC', x='dataset',
           hue='scenario',
           #row='dataset',
           col='target_classifier',
           kind='strip'
           )

In [None]:
results_df.columns


In [None]:
rgb = []
#itertools.permutations('ABCD',3)
a= itertools.permutations([x/100 for x in range(0, 101,20)[::-1]], 3)
a= list(set([matplotlib.colors.to_rgba(x,i) for x in a for i in [0.0, 0.5, 1.0]]))
print(len(a))
#n = []
#for x in a:
#    #for i in [0.0,0.25,0.5,0.75,1.0]:
#        n.append(matplotlib.colors.to_rgba(a))
#print('n',n)
sns.palplot(sns.color_palette(a))
#a = random.sample(a, 100)#len(a))
sns.palplot(sns.color_palette(a))
#print(len(a),len(a)/2)
new = []#a[0]]
for x in range(0, int(len(a)/2)):
    new.append(a[x])
    new.append(a[int(len(a)/2+x)])
print(len(a))
#print(new)
#print(a[1::2])
print('new')
c =  sns.set_palette(new)
print('palette')
sns.palplot(sns.color_palette(c))
#sns.palplot(sns.color_palette(sns.set_palette(list(a[1::2]+a[0::2][::-1]))))

In [None]:
c = mpl.colors.ListedColormap(c).with_extremes(over='0.25', under='0.75')
sns.palplot(sns.color_palette(c))

In [None]:
wc_s1 = worstcase-salem1
#wc_s1 = wc_s1[:10000]
#print(wc_s1)
wc_s1.reset_index(inplace=True)
wc_s1.drop("repetition", axis=1, inplace=True)
wc_s1 = wc_s1.groupby(['target_classifier', 'dataset', 'param_id']).mean().reset_index()
#print(wc_s1.tail())
target_classifier = wc_s1.pop("target_classifier")#list(wc_s1.index.get_level_values(0))#
#print(target_classifier)
dataset = wc_s1.pop("dataset")#list(wc_s1.index.get_level_values(1))#'dataset')
#print(dataset)
param = wc_s1.pop("param_id")#list(wc_s1.index.get_level_values(2))#'param_id')#
clf = dict(zip(set(target_classifier), sns.color_palette("Set1")))
dt = dict(zip(set(dataset), sns.color_palette("Paired")))
#p = dict(zip(set(param), sns.color_palette(a)))
tc_colors = [clf[c] for c in target_classifier]#target_classifier.map(clf)
#par_col = [p[c] for c in param]#param.map(p)
data_colors = [dt[c] for c in dataset]#list(dataset.map(dt))
#print(data_colors)
#sns.set_palette("viridis")
row_colors = pd.DataFrame({'Target classifier':tc_colors,
                          'Dataset':data_colors,
                          #'parameters':par_col
                          })
g = sns.clustermap(wc_s1,  cmap='seismic',
                   row_colors=row_colors,
                   figsize=(5, 10),
                   col_cluster=False,
                   dendrogram_ratio=(.1, .1),
                   yticklabels=False,
                   cbar_pos=(0.99, .65, .03, .25),
                  )
plt.title("Worst case - Salem1                                                                             ")#, loc="upper left")

d_legend_lines = [Line2D([0], [0], color=colour, lw=7) for colour in dt.values()]
d_legend_names = [label for label in dt.keys()]

c_legend_lines = [Line2D([0], [0], color=colour, lw=7) for colour in clf.values()]
c_legend_names = [label for label in clf.keys()]

l1 = plt.legend(d_legend_lines, d_legend_names,
                title='Dataset', 
                loc='lower left', bbox_to_anchor=(0, -1.2))
l2 = plt.legend(c_legend_lines, c_legend_names,
                title='Target classifier', 
                loc='lower left', bbox_to_anchor=(0, -2.0))
gca().add_artist(l2)
gca().add_artist(l1)
plt.show()

In [None]:
wc_s2 = worstcase-salem2
#print(wc_s1)
wc_s2.reset_index(inplace=True)
wc_s2.drop("repetition", axis=1, inplace=True)
wc_s2 = wc_s2.groupby(['target_classifier', 'dataset', 'param_id']).mean().reset_index()
target_classifier = wc_s2.pop("target_classifier")#list(wc_s1.index.get_level_values(0))#
#print(target_classifier)
dataset = wc_s2.pop("dataset")#list(wc_s1.index.get_level_values(1))#'dataset')
#print(dataset)
param = wc_s2.pop("param_id")#list(wc_s1.index.get_level_values(2))#'param_id')#
clf = dict(zip(set(target_classifier), sns.color_palette("Set1")))
dt = dict(zip(set(dataset), sns.color_palette("Paired")))
#p = dict(zip(set(param), sns.color_palette(a)))
tc_colors = [clf[c] for c in target_classifier]#target_classifier.map(clf)
#par_col = [p[c] for c in param]#param.map(p)
data_colors = [dt[c] for c in dataset]#list(dataset.map(dt))
#print(data_colors)
#sns.set_palette("viridis")
row_colors = pd.DataFrame({'Target classifier':tc_colors,
                          'Dataset':data_colors,
                          #'parameters':par_col
                          })
g = sns.clustermap(wc_s2,  cmap='seismic',
                   row_colors=row_colors,
                  figsize=(5, 10),
                   col_cluster=False,
                   dendrogram_ratio=(.1, .1),
                   yticklabels=False,
                   cbar_pos=(0.99, .65, .03, .25),
                  )
plt.title("Worst case - Salem2                                                                             ")#, loc="upper left")


d_legend_lines = [Line2D([0], [0], color=colour, lw=7) for colour in dt.values()]
d_legend_names = [label for label in dt.keys()]

c_legend_lines = [Line2D([0], [0], color=colour, lw=7) for colour in clf.values()]
c_legend_names = [label for label in clf.keys()]

l1 = plt.legend(d_legend_lines, d_legend_names,
                title='Dataset', 
                loc='lower left', bbox_to_anchor=(0, -1.2))
l2 = plt.legend(c_legend_lines, c_legend_names,
                title='Target classifier', 
                loc='lower left', bbox_to_anchor=(0, -2.0))
gca().add_artist(l2)
gca().add_artist(l1)
plt.show()

In [None]:
worstcase

In [None]:
cols

In [None]:
worstcase = results_df[results_df.scenario=="WorstCase"].set_index(cols)[mia_metrics]
salem1 = results_df[results_df.scenario=="Salem1"].set_index(cols)[mia_metrics]
salem2 = results_df[results_df.scenario=="Salem2"].set_index(cols)[mia_metrics]

wc_names_map = {worstcase.columns[i]:worstcase.columns[i]+"_WC" for i in range(len(worstcase.columns))}
worstcase.rename(wc_names_map, axis=1, inplace=True)
worstcase.reset_index(inplace=True)
s1_names_map = {salem1.columns[i]:salem1.columns[i]+"_S1" for i in range(len(salem1.columns))}
salem1.rename(s1_names_map, axis=1, inplace=True)
salem1.reset_index(inplace=True)
s2_names_map = {salem2.columns[i]:salem2.columns[i]+"_S2" for i in range(len(salem2.columns))}
salem2.rename(s2_names_map, axis=1, inplace=True)
salem2.reset_index(inplace=True)
df = pd.concat([worstcase, salem1, salem2])
df.reset_index(inplace=True)
df.dropna()

In [None]:
worstcase = results_df[results_df.scenario=="WorstCase"].set_index(cols)[mia_metrics]
salem1 = results_df[results_df.scenario=="Salem1"].set_index(cols)[mia_metrics]
salem2 = results_df[results_df.scenario=="Salem2"].set_index(cols)[mia_metrics]

salem1.columns==worstcase.columns

In [None]:
worstcase

In [None]:
df.columns

In [None]:
df

In [None]:
with PdfPages('pairplots_scenario_RF_DT_SVC_ADA_XGBoost.pdf') as pdf_pages:
    i = 0
    for tc in list(set(list(df.target_classifier)))[::-1]:
        print(tc)
        figu = plt.figure(i) 
        g = sns.pairplot(df[df['target_classifier']==tc], 
                         x_vars = ["mia_AUC_WC","mia_AUC_S1","mia_AUC_S2"],
                         y_vars = ["mia_AUC_WC","mia_AUC_S1","mia_AUC_S2"],
                         #y_vars = yvars,
                         hue ='dataset', kind = 'kde',
                        corner=True)#reg
        #g.set_title(dataset)
        #plt.show()
        plt.tight_layout()
        pdf_pages.savefig(figu)
        i+=1

In [None]:
for tc in list(set(list(df.target_classifier))):
    print(tc)

In [None]:
sns.relplot(results_df, x='mia_AUC', y=',
            hue, size, style)

In [None]:
#df = results_df#.set_index(['scenario', 'dataset', 'full_id'])
#df = df.drop(['target_classifier', 'shadow_classifier_name',
#       'shadow_dataset', 'attack_classifier', 'repetition', 
#       'model_data_param_id', 'param_id', 'n_estimators', 'algorithm', 'criterion', 'max_depth',
#       'min_samples_split', 'min_samples_leaf', 'max_features', 'class_weight',
#       'bootstrap', 'kernel', 'C', 'degree', 'probability', 'gamma'], axis=1)
#df = df.T
df = results_df.pivot_table(index=['scenario', 'repetition', 'target_classfier',
                                  'param_id', 'dataset'],
                            values=mia_metrics)#, aggfunc=np.sum)
#df
 #'model_data_param_id'],

In [None]:
df.head(35)

In [None]:
with PdfPages('lmplots_RF_DT_SVC_ADA.pdf') as pdf_pages:
    i = 0
    for y in par_vars:
        for x in xvars:
            figu = plt.figure(i)        
            g = sns.lmplot(data=results_df,
                        col="target_classifier",
                        hue=y,
                        x=x,
                        row="scenario",
                        ="dataset",
                        #kind="point",
                        height=3, aspect=1,
            )
        #g.set_title(v)
        g.set_xticklabels(rotation = 90)
        #plt.xticks(rotation=90)
        plt.show()
        plt.tight_layout()
        pdf_pages.savefig(figu)
        i+=1

# Correlation

In [None]:
tmp = results_df.groupby(['target_classifier', 'dataset', 'scenario'])['mia_AUC', 'mia_FAR', 'mia_Advantage',
                                                                      'target_AUC', 'target_FAR', 'target_Advantage'].corr()

In [None]:
tmp

# Hypothesis testing

In [None]:
from scipy import stats
from statsmodels.formula.api import ols

In [None]:
model = ols("scenario ~ dataset", results_df).fit()

In [None]:
model = ols('mia_AUC ~ scenario', results_df).fit()
print(model.summary())  

In [None]:
model = ols('mia_AUC ~ target_classifier + scenario + dataset', results_df).fit()
print(model.summary())  

In [None]:
model = ols('mia_AUC ~ full_id', results_df).fit()
print(model.summary()) 

In [None]:
model = ols('mia_AUC ~ scenario + dataset', results_df).fit()
print(model.summary())  

In [None]:
result = sm.ols(formula='wage ~ education + gender + education * gender',
                data=data).fit()    
print(result.summary())

In [None]:
results_df.kernel.fillna('')

results_df

In [None]:
target_classifiers = set(list(results_df.target_classifier))
target_classifiers

In [None]:
#results_df = results_df[results_df.dataset!="medical-mnist-ab-v-br-100"]

In [None]:
rf_low_auc = results_df[results_df.mia_AUC<0.45]

results_df['target_auc_range'] = 'medium'
results_df.loc[(results_df['target_AUC'] < 0.45 ), 'target_auc_range'] = 'low'
results_df.loc[(results_df['target_AUC'] > 0.55 ), 'target_auc_range'] = 'high'

results_df['mia_auc_range'] = 'medium'
results_df.loc[(results_df['mia_AUC'] < 0.45 ), 'mia_auc_range'] = 'low'
results_df.loc[(results_df['mia_AUC'] > 0.55 ), 'mia_auc_range'] = 'high'


In [None]:
#var = ['target_AUC', 'mia_AUC',
#           'target_FAR', 'mia_FAR',
#           'target_Advantage', 'mia_Advantage',
#          'bootstrap','min_samples_split',
#           'min_samples_leaf', 'n_estimators',
#            'max_depth',
#          ]
with PdfPages('densityplots_RF_DT_SVC_ADA.pdf') as pdf_pages:
    i = 0
    for v in common_vars:
        figu = plt.figure(i)  
        g= sns.kdeplot(data=results_df, x=v,  hue="mia_auc_range")
        g.set_title(v)
        g.set(xlim=(0, 1))
        pdf_pages.savefig(figu)
        i += 1
        for clf in target_classifiers:
            figu = plt.figure(i)
            g = sns.kdeplot(data=results_df[results_df["target_classifier"]==clf], x=v,  hue="mia_auc_range")
            g.set_title(clf+" "+v)
            g.set(xlim=(0,1))
            #plt.show()
            pdf_pages.savefig(figu)
            i += 1

In [None]:
var = ['target_AUC', 'mia_AUC',
           'target_FAR', 'mia_FAR',
           'target_Advantage', 'mia_Advantage',
          'bootstrap','min_samples_split',
           'min_samples_leaf', 'n_estimators',
            'max_depth',
          ]
for v in var:
    print(v)
    sns.kdeplot(data=results_df, x=v,  hue="target_auc_range")
    plt.show()

In [None]:
rf_low_auc

In [None]:
results_df

In [None]:
results_df.auc_range.describe()

In [None]:
results_df.columns

In [None]:
sns.set_palette("tab20")


with PdfPages('explore_results_target_mia.pdf') as pdf_pages:
    #df1 = dftest.selemin_samples_splitt_dtypes([np.int, np.float, np.objemin_samples_splitt])
    i = 0
    figu = plt.figure(i)
    #min_samples_splitountplot = sns.min_samples_splitountplot(x=col, data=df1)
    sns.lmplot(
        data=results_df,
        x="mia_AUC", y="target_AUC",
        hue="min_samples_leaf",
        col="dataset",
        row="scenario",
        height=2.5, aspect=1.5
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)
    sns.lmplot(
        data=results_df,
        x="mia_FAR", y="target_FAR",
        hue="min_samples_leaf",
        col="dataset",
        row="scenario",
        height=2.5, aspect=1.5
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)
    sns.lmplot(
        data=results_df,
        x="mia_Advantage", y="target_Advantage",
        hue="min_samples_leaf",
        col="dataset",
        row="scenario",
        height=2.5, aspect=1.5
    )
    pdf_pages.savefig(figu)

    
    i+=1
    figu = plt.figure(i)
    sns.lmplot(
        data=results_df,
        x="mia_AUC", y="target_AUC",
        hue="min_samples_split",
        col="dataset",
        row="scenario",
        height=2.5, aspect=1.5
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)
    sns.lmplot(
        data=results_df,
        x="mia_FAR", y="target_FAR",
        hue="min_samples_split",
        col="dataset",
        row="scenario",
        height=2.5, aspect=1.5
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)
    sns.lmplot(
        data=results_df,
        x="mia_Advantage", y="target_Advantage",
        hue="min_samples_split",
        col="dataset",
        row="scenario",
        height=2.5, aspect=1.5
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)
    sns.lmplot(
        data=results_df,
        x="mia_AUC", y="target_AUC",
        hue="max_depth",
        col="dataset",
        row="scenario",
        height=2.5, aspect=1.5
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)
    sns.lmplot(
        data=results_df,
        x="mia_FAR", y="target_FAR",
        hue="max_depth",
        col="dataset",
        row="scenario",
        height=2.5, aspect=1.5
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)
    sns.lmplot(
        data=results_df,
        x="mia_Advantage", y="target_Advantage",
        hue="max_depth",
        col="dataset",
        row="scenario",
        height=2.5, aspect=1.5
    )
    pdf_pages.savefig(figu)

In [None]:
sns.set_palette("tab20")

with PdfPages('explore_results_mia.pdf') as pdf_pages:
    #df1 = dftest.selemin_samples_splitt_dtypes([np.int, np.float, np.objemin_samples_splitt])
    i = 0
    figu = plt.figure(i)
    #min_samples_splitountplot = sns.min_samples_splitountplot(x=col, data=df1)
    sns.lmplot(
        data=results_df,
        x="mia_AUC", y="target_AUC",
        hue="min_samples_leaf",
        col="dataset",
        row="scenario",
        height=2.5, aspect=1.5
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)
    sns.lmplot(
        data=results_df,
        x="mia_FAR", y="target_FAR",
        hue="min_samples_leaf",
        col="dataset",
        row="scenario",
        height=2.5, aspect=1.5
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)
    sns.lmplot(
        data=results_df,
        x="mia_Advantage", y="target_Advantage",
        hue="min_samples_leaf",
        col="dataset",
        row="scenario",
        height=2.5, aspect=1.5
    )
    pdf_pages.savefig(figu)

    
    i+=1
    figu = plt.figure(i)
    sns.lmplot(
        data=results_df,
        x="mia_AUC", y="target_AUC",
        hue="min_samples_split",
        col="dataset",
        row="scenario",
        height=2.5, aspect=1.5
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)
    sns.lmplot(
        data=results_df,
        x="mia_FAR", y="target_FAR",
        hue="min_samples_split",
        col="dataset",
        row="scenario",
        height=2.5, aspect=1.5
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)
    sns.lmplot(
        data=results_df,
        x="mia_Advantage", y="target_Advantage",
        hue="min_samples_split",
        col="dataset",
        row="scenario",
        height=2.5, aspect=1.5
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)
    sns.lmplot(
        data=results_df,
        x="mia_AUC", y="target_AUC",
        hue="max_depth",
        col="dataset",
        row="scenario",
        height=2.5, aspect=1.5
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)
    sns.lmplot(
        data=results_df,
        x="mia_FAR", y="target_FAR",
        hue="max_depth",
        col="dataset",
        row="scenario",
        height=2.5, aspect=1.5
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)
    sns.lmplot(
        data=results_df,
        x="mia_Advantage", y="target_Advantage",
        hue="max_depth",
        col="dataset",
        row="scenario",
        height=2.5, aspect=1.5
    )
    pdf_pages.savefig(figu)

In [None]:
logging.getLogger('matplotlib').setLevel(logging.WARNING)

with PdfPages('explore_results_simple_randomForest.pdf') as pdf_pages:
    #df1 = dftest.select_dtypes([np.int, np.float, np.object])
    i = 0
    
    figu = plt.figure(i)        
    sns.lmplot(
            data=results_df, x="mia_Advantage", y="mia_AUC",
            col="scenario"

    )
    plt.tight_layout()
    pdf_pages.savefig(figu)

    i+=1
    figu = plt.figure(i)
    sns.lmplot(
            data=results_df, x="mia_FAR", y="mia_AUC",
            col="scenario"

    )
    plt.tight_layout()
    pdf_pages.savefig(figu)

    i+=1
    figu = plt.figure(i)
    sns.lmplot(
            data=results_df, x="mia_Advantage", y="mia_FAR",
            col="scenario"

    )
    plt.tight_layout()
    pdf_pages.savefig(figu)

    i+=1
    figu = plt.figure(i)    
    sns.lmplot(
            data=results_df, x="target_AUC", y="mia_AUC",
            col="scenario"
    )
    plt.tight_layout()
    pdf_pages.savefig(figu)

    i+=1
    figu = plt.figure(i)
    sns.lmplot(
            data=results_df, x="target_FAR", y="mia_FAR",
            col="scenario"

    )
    plt.tight_layout()
    pdf_pages.savefig(figu)

    i+=1
    figu = plt.figure(i)
    sns.lmplot(
            data=results_df, x="target_Advantage", y="mia_Advantage",
            col="scenario"
    )
    plt.tight_layout()
    pdf_pages.savefig(figu)
    i+=1
    
            
    figu = plt.figure(i)    
    sns.lmplot(
        data=results_df, x="target_AUC", y="mia_AUC",
        hue="dataset", col="scenario"
    )
    i+=1
    ##############
    var = ['bootstrap', 'min_samples_split',
           'min_samples_leaf', 'n_estimators',
           'criterion', 'max_depth',
           'class_weight',
          ]
    for v in var:
        figu = plt.figure(i)        
        sns.lmplot(
            data=results_df, x="mia_Advantage", y="mia_AUC",
            hue=v, col="scenario"

        )
        plt.tight_layout()
        pdf_pages.savefig(figu)

        i+=1
        figu = plt.figure(i)
        sns.lmplot(
            data=results_df, x="mia_FAR", y="mia_AUC",
            hue=v, col="scenario"

        )
        plt.tight_layout()
        pdf_pages.savefig(figu)

        i+=1
        figu = plt.figure(i)
        sns.lmplot(
            data=results_df, x="mia_Advantage", y="mia_FAR",
            hue=v, col="scenario"

        )
        plt.tight_layout()
        pdf_pages.savefig(figu)

        i+=1
        figu = plt.figure(i)    
        sns.lmplot(
            data=results_df, x="target_AUC", y="mia_AUC",
            hue=v, col="scenario"

        )
        
        plt.tight_layout()
        pdf_pages.savefig(figu)

        i+=1
        figu = plt.figure(i)
        sns.lmplot(
            data=results_df, x="target_FAR", y="mia_FAR",
            hue=v, col="scenario"

        )
        plt.tight_layout()
        pdf_pages.savefig(figu)

        i+=1
        figu = plt.figure(i)
        sns.lmplot(
            data=results_df, x="target_Advantage", y="mia_Advantage",
            hue=v, col="scenario"

        )
        plt.tight_layout()
        pdf_pages.savefig(figu)
        i+=1

    var = ['target_AUC', 'mia_AUC',
           'target_FAR', 'mia_FAR',
           'target_Advantage', 'mia_Advantage',
          ]
    for v in var:
        figu = plt.figure(i)
        sns.catplot(data=results_df, x="scenario", y=v,
                #hue="scenario", #col="min_samples_split",
                cut=0,
               kind='violin').set_xticklabels(rotation=90)
        plt.tight_layout()
        pdf_pages.savefig(figu)
        i+=1

In [None]:

#logging.getLogger('matplotlib').setLevel(logging.WARNING)

#sns.set_palette("tab20")

#with PdfPages('explore_results_target_mia_SVC_param_id.pdf') as pdf_pages:
#    #df1 = dftest.select_dtypes([np.int, np.float, np.object])
#    i = 0
#    figu = plt.figure(i)
#    #countplot = sns.countplot(x=col, data=df1)
#    sns.lmplot(
#        data=results_df,
#        x="mia_AUC", y="target_AUC",
#        hue="param_id",
#        col="dataset",
#        row="scenario",
#        height=2.5, aspect=1.5
#    )
#    pdf_pages.savefig(figu)
#    figu = plt.figure(i)
#    sns.lmplot(
#        data=results_df,
#        x="mia_FAR", y="target_FAR",
#        hue="param_id",
#        col="dataset",
#        row="scenario",
#        height=2.5, aspect=1.5
#    )
#    pdf_pages.savefig(figu)
#    figu = plt.figure(i)
#    sns.lmplot(
#        data=results_df,
#        x="mia_Advantage", y="target_Advantage",
#        hue="param_id",
#        col="dataset",
#        row="scenario",
#        height=2.5, aspect=1.5
#    )
#    pdf_pages.savefig(figu)

In [None]:
with PdfPages('explore_results_RF_DT_SVC_ADA.pdf') as pdf_pages:
    #df1 = dftest.select_dtypes([np.int, np.float, np.object])
    i = 0
    figu = plt.figure(i)
        #countplot = sns.countplot(x=col, data=df1)
        
    sns.lmplot(
        data=results_df, x="mia_Advantage", y="mia_AUC",
        hue="min_samples_split", row="scenario", col="dataset",
         
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)
    sns.lmplot(
        data=results_df, x="mia_FAR", y="mia_AUC",
        hue="min_samples_split", row="scenario", col="dataset",
         
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)
    sns.lmplot(
        data=results_df, x="mia_Advantage", y="mia_FAR",
        hue="min_samples_split", row="scenario", col="dataset",
         
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)    
    sns.lmplot(
        data=results_df, x="target_AUC", y="mia_AUC",
        hue="min_samples_split", row="scenario", col="dataset",
         
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)
    sns.lmplot(
        data=results_df, x="target_FAR", y="mia_FAR",
        hue="min_samples_split", row="scenario", col="dataset",
         
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)
    sns.lmplot(
        data=results_df, x="target_Advantage", y="mia_Advantage",
        hue="min_samples_split", row="scenario", col="dataset",
         
    )

    var = ['target_AUC', 'mia_AUC',
           'target_FAR', 'mia_FAR',
           'target_Advantage', 'mia_Advantage',
          ]
    for v in var:
    
        i+=1
        figu = plt.figure(i)
        sns.catplot(data=results_df, x="min_samples_split", y=v,
                hue="scenario", col="dataset", #row="min_samples_split",
                cut=0,
               kind='violin').set_xticklabels(rotation=90)
        pdf_pages.savefig(figu)

In [None]:
logging.getLogger('matplotlib').setLevel(logging.WARNING)

with PdfPages('explore_results_simple_randomForest.pdf') as pdf_pages:
    #df1 = dftest.select_dtypes([np.int, np.float, np.object])
    i = 0
    
    figu = plt.figure(i)        
    sns.lmplot(
            data=results_df, x="mia_Advantage", y="mia_AUC",
            col="scenario"

    )
    plt.tight_layout()
    pdf_pages.savefig(figu)

    i+=1
    figu = plt.figure(i)
    sns.lmplot(
            data=results_df, x="mia_FAR", y="mia_AUC",
            col="scenario"

    )
    plt.tight_layout()
    pdf_pages.savefig(figu)

    i+=1
    figu = plt.figure(i)
    sns.lmplot(
            data=results_df, x="mia_Advantage", y="mia_FAR",
            col="scenario"

    )
    plt.tight_layout()
    pdf_pages.savefig(figu)

    i+=1
    figu = plt.figure(i)    
    sns.lmplot(
            data=results_df, x="target_AUC", y="mia_AUC",
            col="scenario"
    )
    plt.tight_layout()
    pdf_pages.savefig(figu)

    i+=1
    figu = plt.figure(i)
    sns.lmplot(
            data=results_df, x="target_FAR", y="mia_FAR",
            col="scenario"

    )
    plt.tight_layout()
    pdf_pages.savefig(figu)

    i+=1
    figu = plt.figure(i)
    sns.lmplot(
            data=results_df, x="target_Advantage", y="mia_Advantage",
            col="scenario"
    )
    plt.tight_layout()
    pdf_pages.savefig(figu)
    i+=1
    
            
    figu = plt.figure(i)    
    sns.lmplot(
        data=results_df, x="target_AUC", y="mia_AUC",
        hue="dataset", col="scenario"
    )
    i+=1
    ##############
    var = ['bootstrap', 'min_samples_split',
           'min_samples_leaf', 'n_estimators',
           'criterion', 'max_depth',
           'class_weight',
          ]
    for v in var:
        figu = plt.figure(i)        
        sns.lmplot(
            data=results_df, x="mia_Advantage", y="mia_AUC",
            hue=v, col="scenario"

        )
        plt.tight_layout()
        pdf_pages.savefig(figu)

        i+=1
        figu = plt.figure(i)
        sns.lmplot(
            data=results_df, x="mia_FAR", y="mia_AUC",
            hue=v, col="scenario"

        )
        plt.tight_layout()
        pdf_pages.savefig(figu)

        i+=1
        figu = plt.figure(i)
        sns.lmplot(
            data=results_df, x="mia_Advantage", y="mia_FAR",
            hue=v, col="scenario"

        )
        plt.tight_layout()
        pdf_pages.savefig(figu)

        i+=1
        figu = plt.figure(i)    
        sns.lmplot(
            data=results_df, x="target_AUC", y="mia_AUC",
            hue=v, col="scenario"

        )
        
        plt.tight_layout()
        pdf_pages.savefig(figu)

        i+=1
        figu = plt.figure(i)
        sns.lmplot(
            data=results_df, x="target_FAR", y="mia_FAR",
            hue=v, col="scenario"

        )
        plt.tight_layout()
        pdf_pages.savefig(figu)

        i+=1
        figu = plt.figure(i)
        sns.lmplot(
            data=results_df, x="target_Advantage", y="mia_Advantage",
            hue=v, col="scenario"

        )
        plt.tight_layout()
        pdf_pages.savefig(figu)
        i+=1

    var = ['target_AUC', 'mia_AUC',
           'target_FAR', 'mia_FAR',
           'target_Advantage', 'mia_Advantage',
          ]
    for v in var:
        figu = plt.figure(i)
        sns.catplot(data=results_df, x="scenario", y=v,
                #hue="scenario", #col="min_samples_split",
                cut=0,
               kind='violin').set_xticklabels(rotation=90)
        plt.tight_layout()
        pdf_pages.savefig(figu)
        i+=1

In [None]:
logging.getLogger('matplotlib').setLevel(logging.WARNING)

with PdfPages('explore_results_simple_randomForest_MIA_rangeAUC.pdf') as pdf_pages:
    #df1 = dftest.select_dtypes([np.int, np.float, np.object])
    i = 0
    
    figu = plt.figure(i)        
    sns.lmplot(
            data=rf_low_auc, x="mia_Advantage", y="mia_AUC",
            col="auc_range"

    )
    plt.tight_layout()
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)        
    sns.kdeplot(
            data=rf_low_auc, y="mia_AUC",
            hue="auc_range"
    )
    plt.tight_layout()
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)
    sns.lmplot(
            data=rf_low_auc, x="mia_FAR", y="mia_AUC",
            col="auc_range"

    )
    plt.tight_layout()
    pdf_pages.savefig(figu)

    i+=1
    figu = plt.figure(i)
    sns.lmplot(
            data=rf_low_auc, x="mia_Advantage", y="mia_FAR",
            col="auc_range"

    )
    plt.tight_layout()
    pdf_pages.savefig(figu)

    i+=1
    figu = plt.figure(i)    
    sns.lmplot(
            data=rf_low_auc, x="target_AUC", y="mia_AUC",
            col="auc_range"
    )
    plt.tight_layout()
    pdf_pages.savefig(figu)

    i+=1
    figu = plt.figure(i)
    sns.lmplot(
            data=rf_low_auc, x="target_FAR", y="mia_FAR",
            col="auc_range"

    )
    plt.tight_layout()
    pdf_pages.savefig(figu)

    i+=1
    figu = plt.figure(i)
    sns.lmplot(
            data=rf_low_auc, x="target_Advantage", y="mia_Advantage",
            col="auc_range"
    )
    plt.tight_layout()
    pdf_pages.savefig(figu)
    i+=1
    
            
    figu = plt.figure(i)    
    sns.lmplot(
        data=rf_low_auc, x="target_AUC", y="mia_AUC",
        hue="dataset", col="auc_range"
    )
    i+=1
    ##############
    var = ['bootstrap', 'min_samples_split',
           'min_samples_leaf', 'n_estimators',
           'criterion', 'max_depth',
           'class_weight',
          ]
    for v in var:
        figu = plt.figure(i)        
        sns.lmplot(
            data=rf_low_auc, x="mia_Advantage", y="mia_AUC",
            hue=v, col="auc_range"

        )
        plt.tight_layout()
        pdf_pages.savefig(figu)

        i+=1
        figu = plt.figure(i)
        sns.lmplot(
            data=rf_low_auc, x="mia_FAR", y="mia_AUC",
            hue=v, col="auc_range"

        )
        plt.tight_layout()
        pdf_pages.savefig(figu)

        i+=1
        figu = plt.figure(i)
        sns.lmplot(
            data=rf_low_auc, x="mia_Advantage", y="mia_FAR",
            hue=v, col="auc_range"

        )
        plt.tight_layout()
        pdf_pages.savefig(figu)

        i+=1
        figu = plt.figure(i)    
        sns.lmplot(
            data=rf_low_auc, x="target_AUC", y="mia_AUC",
            hue=v, col="auc_range"

        )
        
        plt.tight_layout()
        pdf_pages.savefig(figu)

        i+=1
        figu = plt.figure(i)
        sns.lmplot(
            data=rf_low_auc, x="target_FAR", y="mia_FAR",
            hue=v, col="auc_range"

        )
        plt.tight_layout()
        pdf_pages.savefig(figu)

        i+=1
        figu = plt.figure(i)
        sns.lmplot(
            data=rf_low_auc, x="target_Advantage", y="mia_Advantage",
            hue=v, col="auc_range"

        )
        plt.tight_layout()
        pdf_pages.savefig(figu)
        i+=1

    var = ['target_AUC', 'mia_AUC',
           'target_FAR', 'mia_FAR',
           'target_Advantage', 'mia_Advantage',
          ]
    for v in var:
        figu = plt.figure(i)
        sns.catplot(data=rf_low_auc, x="auc_range", y=v,
                #hue="auc_range", #col="min_samples_split",
                cut=0,
               kind='violin').set_xticklabels(rotation=90)
        plt.tight_layout()
        pdf_pages.savefig(figu)
        i+=1

In [None]:
results_filename = "/home/alba/Documents/HIC_Dundee/GRAIMAtter/SVC_rbf_results.csv"
results_df = pd.read_csv(results_filename)
results_df = results_df[results_df.dataset!="medical-mnist-ab-v-br-100"]

logging.getLogger('matplotlib').setLevel(logging.WARNING)

sns.set_palette("tab20")

with PdfPages('explore_results_target_mia_SVC.pdf') as pdf_pages:
    #df1 = dftest.select_dtypes([np.int, np.float, np.object])
    i = 0
    figu = plt.figure(i)
    #countplot = sns.countplot(x=col, data=df1)
    sns.lmplot(
        data=results_df,
        x="mia_AUC", y="target_AUC",
        hue="gamma",
        col="dataset",
        row="scenario",
        height=2.5, aspect=1.5
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)
    sns.lmplot(
        data=results_df,
        x="mia_FAR", y="target_FAR",
        hue="gamma",
        col="dataset",
        row="scenario",
        height=2.5, aspect=1.5
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)
    sns.lmplot(
        data=results_df,
        x="mia_Advantage", y="target_Advantage",
        hue="gamma",
        col="dataset",
        row="scenario",
        height=2.5, aspect=1.5
    )
    pdf_pages.savefig(figu)

    
    i+=1
    figu = plt.figure(i)
    sns.lmplot(
        data=results_df,
        x="mia_AUC", y="target_AUC",
        hue="C",
        col="dataset",
        row="scenario",
        height=2.5, aspect=1.5
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)
    sns.lmplot(
        data=results_df,
        x="mia_FAR", y="target_FAR",
        hue="C",
        col="dataset",
        row="scenario",
        height=2.5, aspect=1.5
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)
    sns.lmplot(
        data=results_df,
        x="mia_Advantage", y="target_Advantage",
        hue="C",
        col="dataset",
        row="scenario",
        height=2.5, aspect=1.5
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)
    sns.lmplot(
        data=results_df,
        x="mia_AUC", y="target_AUC",
        hue="tol",
        col="dataset",
        row="scenario",
        height=2.5, aspect=1.5
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)
    sns.lmplot(
        data=results_df,
        x="mia_FAR", y="target_FAR",
        hue="tol",
        col="dataset",
        row="scenario",
        height=2.5, aspect=1.5
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)
    sns.lmplot(
        data=results_df,
        x="mia_Advantage", y="target_Advantage",
        hue="tol",
        col="dataset",
        row="scenario",
        height=2.5, aspect=1.5
    )
    pdf_pages.savefig(figu)

In [None]:
logging.getLogger('matplotlib').setLevel(logging.WARNING)

with PdfPages('explore_results_simple_SVC.pdf') as pdf_pages:
    #df1 = dftest.select_dtypes([np.int, np.float, np.object])
    i = 0

    figu = plt.figure(i)        
    sns.lmplot(
            data=results_df, x="mia_Advantage", y="mia_AUC",
            col="scenario"

    )
    plt.tight_layout()
    pdf_pages.savefig(figu)

    i+=1
    figu = plt.figure(i)
    sns.lmplot(
            data=results_df, x="mia_FAR", y="mia_AUC",
            col="scenario"

    )
    plt.tight_layout()
    pdf_pages.savefig(figu)

    i+=1
    figu = plt.figure(i)
    sns.lmplot(
            data=results_df, x="mia_Advantage", y="mia_FAR",
            col="scenario"

    )
    plt.tight_layout()
    pdf_pages.savefig(figu)

    i+=1
    figu = plt.figure(i)    
    sns.lmplot(
            data=results_df, x="target_AUC", y="mia_AUC",
            col="scenario"

    )
    plt.tight_layout()
    pdf_pages.savefig(figu)

    i+=1
    figu = plt.figure(i)
    sns.lmplot(
            data=results_df, x="target_FAR", y="mia_FAR",
            col="scenario"

    )
    plt.tight_layout()
    pdf_pages.savefig(figu)

    i+=1
    figu = plt.figure(i)
    sns.lmplot(
           data=results_df, x="target_Advantage", y="mia_Advantage",
            col="scenario"

    )
    plt.tight_layout()
    pdf_pages.savefig(figu)
    i+=1
    ##############
    var = ['gamma', 'tol', 'C']
    
    for v in var:
        figu = plt.figure(i)        
        sns.lmplot(
            data=results_df, x="mia_Advantage", y="mia_AUC",
            hue=v, col="scenario"

        )
        plt.tight_layout()
        pdf_pages.savefig(figu)

        i+=1
        figu = plt.figure(i)
        sns.lmplot(
            data=results_df, x="mia_FAR", y="mia_AUC",
            hue=v, col="scenario"

        )
        plt.tight_layout()
        pdf_pages.savefig(figu)

        i+=1
        figu = plt.figure(i)
        sns.lmplot(
            data=results_df, x="mia_Advantage", y="mia_FAR",
            hue=v, col="scenario"

        )
        plt.tight_layout()
        pdf_pages.savefig(figu)

        i+=1
        figu = plt.figure(i)    
        sns.lmplot(
            data=results_df, x="target_AUC", y="mia_AUC",
            hue=v, col="scenario"

        )
        plt.tight_layout()
        pdf_pages.savefig(figu)

        i+=1
        figu = plt.figure(i)
        sns.lmplot(
            data=results_df, x="target_FAR", y="mia_FAR",
            hue=v, col="scenario"

        )
        plt.tight_layout()
        pdf_pages.savefig(figu)

        i+=1
        figu = plt.figure(i)
        sns.lmplot(
            data=results_df, x="target_Advantage", y="mia_Advantage",
            hue=v, col="scenario"

        )
        plt.tight_layout()
        pdf_pages.savefig(figu)
        i+=1

    var = ['target_AUC', 'mia_AUC',
           'target_FAR', 'mia_FAR',
           'target_Advantage', 'mia_Advantage',
          ]
    for v in var:
        figu = plt.figure(i)
        sns.catplot(data=results_df, x="scenario", y=v,
                #hue="scenario", #col="min_samples_split",
                cut=0,
               kind='violin').set_xticklabels(rotation=90)
        plt.tight_layout()
        pdf_pages.savefig(figu)
        i+=1

In [None]:
logging.getLogger('matplotlib').setLevel(logging.WARNING)

with PdfPages('explore_results_tmp_SVC.pdf') as pdf_pages:
    #df1 = dftest.select_dtypes([np.int, np.float, np.object])
    i = 0
    figu = plt.figure(i)
        #countplot = sns.countplot(x=col, data=df1)
        
    sns.relplot(
        data=results_df, x="mia_Advantage", y="mia_AUC",
        hue="target_classifier", style="scenario", col="dataset",
        kind="scatter", alpha=0.3
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)
    sns.relplot(
        data=results_df, x="mia_FAR", y="mia_AUC",
        hue="target_classifier", style="scenario", col="dataset",
        kind="scatter", alpha=0.3
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)
    sns.relplot(
        data=results_df, x="mia_Advantage", y="mia_FAR",
        hue="target_classifier", style="scenario", col="dataset",
        kind="scatter", alpha=0.3
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)    
    sns.relplot(
        data=results_df, x="target_AUC", y="mia_AUC",
        hue="target_classifier", style="scenario", col="dataset",
        kind="scatter", alpha=0.3
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)
    sns.relplot(
        data=results_df, x="target_FAR", y="mia_FAR",
        hue="target_classifier", style="scenario", col="dataset",
        kind="scatter", alpha=0.3
    )
    pdf_pages.savefig(figu)
    
    i+=1
    figu = plt.figure(i)
    sns.relplot(
        data=results_df, x="target_Advantage", y="mia_Advantage",
        hue="target_classifier", style="scenario", col="dataset",
        kind="scatter", alpha=0.3
    )

    var = ['target_AUC', 'mia_AUC',
           'target_FAR', 'mia_FAR',
           'target_Advantage', 'mia_Advantage',
          ]
    for v in var:
    
        i+=1
        figu = plt.figure(i)
        sns.catplot(data=results_df, x="target_classifier", y=v,
                hue="scenario", col="dataset", #row="target_classifier",
                cut=0,
               kind='violin').set_xticklabels(rotation=90)
        pdf_pages.savefig(figu)

In [None]:
sns.relplot(
    data=results_df, x="target_AUC", y="mia_AUC",
    hue="target_classifier", style="scenario", col="dataset",
    kind="scatter", alpha=0.3
)
sns.relplot(
    data=results_df, x="target_FAR", y="mia_FAR",
    hue="target_classifier", style="scenario", col="dataset",
    kind="scatter", alpha=0.3
)
sns.relplot(
    data=results_df, x="target_Advantage", y="mia_Advantage",
    hue="target_classifier", style="scenario", col="dataset",
    kind="scatter", alpha=0.3
)

In [None]:
var = ['target_AUC', 'mia_AUC',
       'target_FAR', 'mia_FAR',
       'target_Advantage', 'mia_Advantage',
      ]
for v in var:
    sns.catplot(data=results_df, x="target_classifier", y=v,
            hue="scenario", col="dataset", #row="target_classifier",
            cut=0,
           kind='violin').set_xticklabels(rotation=90)

In [21]:
from data_preprocessing.data_interface import get_data_sklearn, DataNotAvailable

datasets_features = {}
for dataset in cmap.keys():
    data_features, data_labels = get_data_sklearn(dataset)
    datasets_features[dataset] = {'nrows':len(data_labels),
                                  'ncols':len(results_df.columns),
                                  'size':len(data_labels)*len(results_df.columns),
                                  'n_binary_cols':sum([1 for x in data_features if len(data_features[x].unique)==2])
                                 }

ModuleNotFoundError: No module named 'data_preprocessing'

In [None]:
sns.relplot(
        data=results_df, x="mia_Advantage", y="mia_AUC",
        hue="target_classifier", style="scenario", col="dataset",
        kind="scatter", alpha=0.3
    )