# Overall DP-GAN Results

Use this file to import results from `dp-gan-privacy-analysis.ipynb` and compute summary statistics.

In [1]:
import pandas as pd
import numpy as np

In [2]:
# list of epsilon values
epsilons = ["005", "05", "1", "3", "13"]

# list of data sizes
sizes = ["300", "3000", "30000"]

Compute maximum empirical epsilon values.

In [3]:
# dictionary for all results
epsilon_results_dict = {}

# loop over data sizes
for size in sizes:
    epsilon_results_dict[size] = {}

    # loop over epsilon values
    for epsilon in epsilons:

        # import epsilon values across all iterations
        current_values = pd.read_csv("epsilons_" + epsilon + "_" + size + ".csv", header = None).iloc[:,0]

        epsilon_results_dict[size][epsilon] = np.max(current_values)
    

In [4]:
epsilon_results_dict

{'300': {'005': 0.5108256237659906,
  '05': 0.4855078157817007,
  '1': 0.5500463369192718,
  '3': 0.38566248081198445,
  '13': 0.5108256237659906},
 '3000': {'005': 0.1484200051182734,
  '05': 0.20067069546215124,
  '1': 0.106380403558572,
  '3': 0.14445128273854504,
  '13': 0.09449084342092252},
 '30000': {'005': 0.15415067982725933,
  '05': 0.09492489758033232,
  '1': 0.08132929517812601,
  '3': 0.07499525741497352,
  '13': 0.06119874526874345}}

Compute median mean-squared error (MSE) for logistic regression coefficients. Also compute the prevalence of "failed" synthesis, i.e., cases where the GAN did not produce two classes of `conversion` values.

In [5]:
# dictionary for all results
mse_results_dict = {}
mse_na_counts = {}

# loop over data sizes
for size in sizes:
    mse_results_dict[size] = {}
    mse_na_counts[size] = {}

    # loop over epsilon values
    for epsilon in epsilons:

        # import epsilon values across all iterations
        current_values = pd.read_csv("MSE_" + epsilon + "_" + size + ".csv", header = None).iloc[:,0]

        mse_results_dict[size][epsilon] = np.log(np.median(current_values[np.isnan(current_values)==False]))
        mse_na_counts[size][epsilon] = np.isnan(current_values).sum()

In [6]:
mse_na_counts

{'300': {'005': np.int64(2),
  '05': np.int64(2),
  '1': np.int64(2),
  '3': np.int64(2),
  '13': np.int64(2)},
 '3000': {'005': np.int64(97),
  '05': np.int64(71),
  '1': np.int64(44),
  '3': np.int64(30),
  '13': np.int64(26)},
 '30000': {'005': np.int64(89),
  '05': np.int64(16),
  '1': np.int64(1),
  '3': np.int64(0),
  '13': np.int64(0)}}

Compute log of MSE.

In [7]:
mse_results_dict

{'300': {'005': np.float64(11.218692067459473),
  '05': np.float64(10.976454151316346),
  '1': np.float64(11.84663445238742),
  '3': np.float64(11.52812538519198),
  '13': np.float64(11.714560004440902)},
 '3000': {'005': np.float64(6.522212104569794),
  '05': np.float64(6.056173895954505),
  '1': np.float64(5.7721522968029095),
  '3': np.float64(6.105203724204018),
  '13': np.float64(5.881736653354809)},
 '30000': {'005': np.float64(6.047309707175292),
  '05': np.float64(5.169657616284163),
  '1': np.float64(5.1537283344439135),
  '3': np.float64(5.1043389292771755),
  '13': np.float64(5.288149874280339)}}

Output values needed for table in paper.