In [1]:
import pandas as pd
import numpy as np
from scipy.stats import wilcoxon
from scipy import stats
from statsmodels.stats.multitest import multipletests
from sklearn.preprocessing import MinMaxScaler
from matplotlib import pyplot as plt 

def aggregate_numeric(rank=False):
  data_files = [
      "adult_st.csv", 
      "beans_st.csv",
      "occupancy_st.csv", 
      "loan_st.csv", 
      "magic_st.csv",
      "digits_st.csv",
      "creditcard_st.csv", 
      "covertype_st.csv"
  ]
  cols = [
      'Mean_Knn', '10_Knn', 'Mean_Ratio_Knn',  'Mean_Ratio_10Knn',
      '10_Ratio_Knn', '10_Ratio_10Knn', "mean_test_score" 
  ]

  t_list = []
  u_list = []
  y_list = []

  for file in data_files:
      data = pd.read_csv(file) 
      data.params = data.params.astype(str)
      t = data.groupby(["Generator", "Seed", "Model", "params","fold"])[cols].mean().reset_index()
      t_list.append(t)
      u = data.groupby(["Generator", "Seed", "Model", "params","fold"])[cols].mean().reset_index().groupby(["Generator","Model",  "Seed","fold"])[cols].max().reset_index()
      u_list.append(u)
      y = t.merge(u, on= ["Generator",  "Model", "Seed","fold"]+cols)[["Generator","Model",  "Seed","fold"]+cols].drop_duplicates()
      y = y.groupby(["Generator",  "Seed","fold"]).mean().reset_index()
      y.loc[y[y["Generator"]=="train"].index, ["Mean_Knn", "Mean_Ratio_Knn", "Mean_Ratio_10Knn"]]=0
      y_list.append(y)

  mean_list = []

  for i, y in enumerate(y_list):

      scaler = MinMaxScaler()
      mean = pd.DataFrame(scaler.fit_transform(y[["Mean_Knn", "Mean_Ratio_Knn", "Mean_Ratio_10Knn", "mean_test_score"]]), index=y.index, columns =["Mean_Knn", "Mean_Ratio_Knn", "Mean_Ratio_10Knn",  "mean_test_score"])
      mean = mean.merge(y[["Generator", "Seed", "fold"]], left_index=True, right_index=True)
      mean["Privacy"] = (mean["Mean_Knn"] + mean["Mean_Ratio_Knn"] + mean["Mean_Ratio_10Knn"])/3
      mean = mean.rename({"mean_test_score": "ML-Utility"}, axis=1).drop(["Mean_Knn", "Mean_Ratio_Knn", "Mean_Ratio_10Knn"], axis=1)
      scaler = MinMaxScaler()
      y_t = pd.DataFrame(scaler.fit_transform(mean[["Privacy", "ML-Utility"]]), index=mean.index, columns=["Privacy", "ML-Utility"])
      mean = y_t.merge(mean[["Generator", "Seed", "fold"]], left_index=True, right_index=True)
      mean["Trade-off"] = (2 * mean["ML-Utility"] * mean["Privacy"]) / (mean["ML-Utility"] + mean["Privacy"])
      #mean = mean.drop(["Privacy", "ML-Utility"], axis=1)

      mean_list.append(mean) 

  mean_final = []
  std_final = []
  
  if rank == False:
    for i, mean in enumerate(mean_list):

      mean_f = mean.groupby("Generator").mean()[["Trade-off", "Privacy", "ML-Utility"]]
      std_f = mean.groupby("Generator").std()[["Trade-off", "Privacy", "ML-Utility"]]
      mean_final.append(mean_f)
      std_final.append(std_f)
    
    return cols, std_final, mean_final

  else:
   
    cols_new=["Trade-off_Rank", "Privacy_Rank", "ML-Utility_Rank"]
    asce=False
    col = "Trade-off"

    for i, mean in enumerate(mean_list):
      mean_i = mean[~mean["Generator"].isin(["train"])]
      for col in ["Trade-off", "Privacy", "ML-Utility"]:
        mean_i[col+"_Rank"]= mean_i.groupby(["Seed", "fold"])[col].rank(ascending=asce)
        
      mean_f = mean_i.groupby("Generator").mean()[["Trade-off_Rank", "Privacy_Rank", "ML-Utility_Rank"]]
      std_f = mean_i.groupby("Generator").std()[["Trade-off_Rank", "Privacy_Rank", "ML-Utility_Rank"]]
      mean_final.append(mean_f)
      std_final.append(std_f)    

      

    return cols, cols_new, std_final, mean_final 

In [None]:
cols, stds, means, = aggregate_numeric()
std_a,std_b,std_c,std_d,std_e,std_f,std_g,std_h = stds
mean_a,mean_b,mean_c, mean_d,mean_e,mean_f,mean_g, mean_h= means

stds=(std_a*std_a+std_b*std_b+std_c*std_c+std_d*std_d+std_e*std_e+std_f*std_f+std_g*std_g+std_h* std_h)/8
data = ((mean_a+mean_b+mean_c+mean_d+mean_e+mean_f+mean_g+mean_h)/8)


In [None]:
data

In [None]:
stds.applymap(lambda x: np.sqrt(x))

In [None]:
#The Wilcoxon signficance tests used
cols, stds, means = aggregate_numeric()
std_a,std_b,std_c,std_d,std_e,std_f,std_g,std_h = stds
mean_a,mean_b,mean_c, mean_d,mean_e,mean_f,mean_g, mean_h= means

mean_a= mean_a.reset_index()
mean_b= mean_b.reset_index()
mean_c= mean_c.reset_index()
mean_d= mean_d.reset_index()
mean_e= mean_e.reset_index()
mean_f= mean_f.reset_index()
mean_g=mean_g.reset_index()
mean_h=mean_h.reset_index()

for col in ["Trade-off"]:
  cp =[]
  gan =[]
  umap_smote_nc =[]
  smote =[]
  vae = []
  train =[]
  p_values=[]
  
  if col in ["Trade-off"]:
    for value in [mean_a,mean_b,mean_c,mean_d, mean_e, mean_f, mean_g, mean_h]:
      
      data = value 
      [cp.append(x) for x in data[data.Generator == "copula"][col]]
      [gan.append(x) for x in data[data.Generator == "gan"][col]]
      [vae.append(x) for x in data[data.Generator == "vae"][col]]
      [umap_smote_nc.append(x) for x in data[data.Generator == "umap_smote_nc"][col]]
      [train.append(x) for x in data[data.Generator == "train"][col]]
      [smote.append(x) for x in data[data.Generator == "smote_nc"][col]]

    for i in [cp, gan, vae, smote]:
      p_values.append(wilcoxon([t-j for (t,j) in zip(umap_smote_nc,i)])[1])

    values=multipletests(p_values, method ="holm")[1]
    [print(col, name, "{:.1e}".format(value)) for (value, name) in zip(values, ["cp", "gan","vae", "smote" ])]

In [None]:
cols, cols_new, stds, means = aggregate_numeric(rank=True)
std_a,std_b,std_c,std_d,std_e,std_f,std_g,std_h = stds
mean_a,mean_b,mean_c, mean_d,mean_e,mean_f,mean_g, mean_h= means

stds=(std_a*std_a+std_b*std_b+std_c*std_c+std_d*std_d+std_e*std_e+std_f*std_f+std_g*std_g+std_h* std_h)/8
data = ((mean_a+mean_b+mean_c+mean_d+mean_e+mean_f+mean_g+mean_h)/8)

In [None]:
data

In [None]:
stds.applymap(lambda x: np.sqrt(x))

In [None]:
#Friedman Test
cols, cols_new, stds, means = aggregate_numeric(rank=True)
std_a,std_b,std_c,std_d,std_e,std_f,std_g, std_h = stds
mean_a,mean_b,mean_c, mean_d,mean_e,mean_f,mean_g,mean_h= means


for col in cols_new:
  cp = []
  gan = []
  vae = []
  umap_smote_nc = []
  smote=[]
  train =[]


  for i in [mean_a,mean_b,mean_c, mean_d,mean_e,mean_f,mean_g,mean_h]:
    
    data = i.copy().reset_index()
    [cp.append(x) for x in data[data.Generator == "copula"][col]]
    [gan.append(x) for x in data[data.Generator == "gan"][col]]
    [vae.append(x) for x in data[data.Generator == "vae"][col]]
    [smote.append(x) for x in data[data.Generator == "smote_nc"][col]]
    [train.append(x) for x in data[data.Generator == "train"][col]]
    [umap_smote_nc.append(x) for x in data[data.Generator == "umap_smote_nc"][col]]

    
  _, p_value = stats.friedmanchisquare(cp, gan, vae, umap_smote_nc, smote)
  print("{:.1e}".format(p_value), col)
  if p_value >=0.05:
    print(p_value, col)


**Privacy and Utility Metrics**

In [None]:
def aggregate_numeric(rank=False, remove_train=False):

  data_files = ["adult_st.csv", "beans_st.csv", "occupancy_st.csv", "loan_st.csv",
                "magic_st.csv", "digits_st.csv", "creditcard_st.csv", "covertype_st.csv"]



  cols = [ 'Mean_Knn', '10_Knn', 'Mean_Ratio_Knn', 'Mean_Ratio_10Knn',
          '10_Ratio_Knn', '10_Ratio_10Knn', "mean_test_score"]

  t_list = []
  u_list = []
  mean_final =[]
  std_final = []
  for file in data_files:
      data = pd.read_csv(file) 

      if remove_train:
        data = data[~data["Generator"].isin(["train"])]
      data.params = data.params.astype(str)
      t = data.groupby(["Generator", "Seed", "Model", "params","fold"])[cols].mean().reset_index()
      t_list.append(t)
      u = data.groupby(["Generator", "Seed", "Model", "params","fold"])[cols].mean().reset_index().groupby(["Generator","Model",  "Seed","fold"])[cols].max().reset_index()
      u_list.append(u)

      if rank == True:
        cols_new=[]
        asce = False

        for col in cols:
          u[col+"_Rank"]= u.groupby(["Model", "Seed", "fold"])[col].rank(ascending=asce)
          cols_new.append(col+"_Rank")
        
        mean = t.merge(u, on = ["Generator", "Model", "Seed","fold"]+cols)[["Generator", "Model", "Seed","fold"]+cols_new].drop_duplicates().groupby(["Generator", "Model"]).mean()
        std = t.merge(u, on = ["Generator", "Model", "Seed","fold"]+cols)[["Generator", "Model", "Seed","fold"]+cols_new].drop_duplicates().groupby(["Generator", "Model", "Seed"]).std()
        std[cols_new] = std[cols_new]* std[cols_new]
        std = pd.DataFrame(std.groupby(["Generator", "Model"]).mean()[cols_new].apply(lambda x: np.sqrt(x)))
        std[cols_new] = std[cols_new]* std[cols_new]

        mean_final.append(mean)
        std_final.append(std)


      else:
        mean = t.merge(u, on = ["Generator", "Model", "Seed","fold"]+cols)[["Generator", "Model", "Seed","fold"]+cols].drop_duplicates().groupby(["Generator", "Model"]).mean()
        std = t.merge(u, on = ["Generator", "Model", "Seed","fold"]+cols)[["Generator", "Model", "Seed","fold"]+cols].drop_duplicates().groupby(["Generator", "Model", "Seed"]).std()
        std[cols] = std[cols]* std[cols]
        std = pd.DataFrame(std.groupby(["Generator", "Model"]).mean()[cols].apply(lambda x: np.sqrt(x)))
        std[cols] = std[cols]* std[cols]

        mean_final.append(mean)
        std_final.append(std)
  
  if rank == True:
    return cols, cols_new, std_final, mean_final
  else:
    return cols, std_final, mean_final

In [None]:
cols, stds, means = aggregate_numeric()
std_a,std_b,std_c,std_d,std_e,std_f,std_g,std_h = stds
mean_a,mean_b,mean_c, mean_d,mean_e,mean_f,mean_g, mean_h= means


for col in ["Mean_Knn"]:

  stds=(std_a+std_b+std_c+std_d+std_e+std_f+std_g+std_h)/8
  data = ((mean_a+mean_b+mean_c+mean_d+mean_e+mean_f+mean_g+mean_h)/8)[[col]]
  data["std"]= stds.apply(lambda x: np.sqrt(x))[[col]]
  
  if col !=  "mean_test_score":
    data = data.groupby("Generator").mean()

In [None]:
data

In [None]:
#The Wilcoxon signficance tests used

cols, stds, means = aggregate_numeric()
std_a,std_b,std_c,std_d,std_e,std_f,std_g,std_h = stds
mean_a,mean_b,mean_c, mean_d,mean_e,mean_f,mean_g, mean_h= means

mean_a= mean_a.reset_index()
mean_b= mean_b.reset_index()
mean_c= mean_c.reset_index()
mean_d= mean_d.reset_index()
mean_e= mean_e.reset_index()
mean_f= mean_f.reset_index()
mean_g=mean_g.reset_index()
mean_h=mean_h.reset_index()

for col in cols:
  cp =[]
  gan =[]
  umap_smote_nc =[]
  smote =[]
  vae = []
  train =[]
  p_values=[]

  if col == "mean_test_score":
    for classifier in ["lr","gs", "dt", "rf", "xgb","mlp"]:
    
      cp =[]
      gan =[]
      umap_smote_nc =[]
      smote =[]
      vae = []
      train =[]
      p_values=[]
      for value in [mean_a,mean_b,mean_c,mean_d, mean_e, mean_f, mean_g,mean_h]:
        data = value[value.Model==classifier]
        [cp.append(x) for x in data[data.Generator == "copula"][col]]
        [gan.append(x) for x in data[data.Generator == "gan"][col]]
        [vae.append(x) for x in data[data.Generator == "vae"][col]]
        [umap_smote_nc.append(x) for x in data[data.Generator == "umap_smote_nc"][col]]
        [train.append(x) for x in data[data.Generator == "train"][col]]
        [smote.append(x) for x in data[data.Generator == "smote_nc"][col]]

      for i in [cp, gan, vae, smote]:
        p_values.append(wilcoxon([t-j for (t,j) in zip(umap_smote_nc,i)])[1])

      values=multipletests(p_values, method ="holm")[1]
      [print(classifier, name, "{:.1e}".format(value)) for (value, name) in zip(values, ["cp", "gan", "vae", "smote"])]
  
  elif col in ["Mean_Knn", "10_Knn","Mean_Ratio_Knn","Mean_Ratio_10Knn", "10_Ratio_Knn", "10_Ratio_10Knn"]:
    for value in [mean_a,mean_b,mean_c,mean_d, mean_e, mean_f, mean_g, mean_h]:
      
      data = value.groupby("Generator").mean().reset_index()
      [cp.append(x) for x in data[data.Generator == "copula"][col]]
      [gan.append(x) for x in data[data.Generator == "gan"][col]]
      [vae.append(x) for x in data[data.Generator == "vae"][col]]
      [umap_smote_nc.append(x) for x in data[data.Generator == "umap_smote_nc"][col]]
      [train.append(x) for x in data[data.Generator == "train"][col]]
      [smote.append(x) for x in data[data.Generator == "smote_nc"][col]]

    for i in [umap_smote_nc, smote, cp, gan, vae]:
      p_values.append(wilcoxon([t-j for (t,j) in zip(train,i)])[1])
    
    values=multipletests(p_values, method ="holm")[1]
    [print(col, name, "{:.1e}".format(value)) for (value, name) in zip(values, ["umap_smote", "smote", "copula", "gan", "vae"])]
    

In [None]:
cols, cols_new, stds, means = aggregate_numeric(rank=True, remove_train=True)
std_a,std_b,std_c,std_d,std_e,std_f,std_g, std_h = stds
mean_a,mean_b,mean_c, mean_d,mean_e,mean_f,mean_g,mean_h= means

for col in cols_new:
  
  stds=(std_a+std_b+std_c+std_d+std_e+std_f+std_g+std_h)/8
  data = ((mean_a+mean_b+mean_c+mean_d+mean_e+mean_f+mean_g+mean_h)/8)[[col]]
  data["std"]= stds.apply(lambda x: np.sqrt(x))[[col]]
  data.reset_index(inplace=True)
  data = data[~data.Generator.isin(["train"])]
  if col !=  "mean_test_score_Rank":
    
    data = data.groupby("Generator").mean()

In [None]:
for col in cols_new:
  cp = []
  gan = []
  vae = []
  umap_smote_nc = []
  smote=[]
  train =[]
  if col == "mean_test_score_Rank": 

   
    for model in ["gs","rf", "xgb", "lr", "mlp", "dt"]:
      cp = []
      gan = []
      vae = []
      umap_smote_nc = []
      smote=[]
      train =[]
      for i in [mean_a,mean_b,mean_c, mean_d,mean_e,mean_f,mean_g,mean_h]:
        data = i.groupby(["Generator", "Model"])[[col]].mean().reset_index()
        data = data[data["Model"]==model]
        [cp.append(x) for x in data[data.Generator == "copula"][col]]
        [gan.append(x) for x in data[data.Generator == "gan"][col]]
        [vae.append(x) for x in data[data.Generator == "vae"][col]]
        [umap_smote_nc.append(x) for x in data[data.Generator == "umap_smote_nc"][col]]
        [smote.append(x) for x in data[data.Generator == "smote_nc"][col]]
        [train.append(x) for x in data[data.Generator == "train"][col]]
    
      _, p_value = stats.friedmanchisquare(cp, gan,vae, umap_smote_nc, smote)
      print("{:.1e}".format(p_value), col, model)
      if p_value >=0.05:
        print("Failed",  model)
  else:

    for i in [mean_a,mean_b,mean_c, mean_d,mean_e,mean_f,mean_g,mean_h]:
      data = i.groupby(["Generator"])[[col]].mean().reset_index()
      [cp.append(x) for x in data[data.Generator == "copula"][col]]
      [gan.append(x) for x in data[data.Generator == "gan"][col]]
      [vae.append(x) for x in data[data.Generator == "vae"][col]]
      [smote.append(x) for x in data[data.Generator == "smote_nc"][col]]
      [train.append(x) for x in data[data.Generator == "train"][col]]
      [umap_smote_nc.append(x) for x in data[data.Generator == "umap_smote_nc"][col]]

    
    _, p_value = stats.friedmanchisquare(cp, gan, vae, umap_smote_nc, smote)
    print("{:.1e}".format(p_value), col)
    if p_value >=0.05:
      print("Failed",  col)