In [8]:
import numpy as np
import os
import sys
import time
import logging
import matplotlib.pyplot as plt

# Navigate to the parent directory of the project structure
project_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
src_dir = os.path.join(project_dir, 'src')

# Add the src directory to sys.path
sys.path.append(src_dir)

import mech.GaussianDist as GaussianModule
import mech.LapDist as LaplaceModule
import mech.toy_DPSGD as DP_SGDModule
import mech.Subsampling as SubsamplingModule

## 1 Example code of Gaussian Mechanism

In [11]:
test_train_sample_size = 100000
test_test_sample_size = 100000
eta = np.array([0.5])

kwargs = GaussianModule.generate_params(num_train_samples = test_train_sample_size, num_test_samples = test_test_sample_size)
estimator = GaussianModule.GaussianDistEstimator(kwargs)
output = estimator.build(eta = eta)
beta_estimate = output["beta"]

### 1.1 Parameters of the tested Gaussian (with N(0, 1) and N(1, 1))

In [12]:
kwargs

{'dist': {'mean0': array([0]),
  'cov0': array([[1]]),
  'mean1': array([1]),
  'cov1': array([[1]])},
 'num_train_samples': 100000,
 'num_test_samples': 100000}

## 2 Example code of other mechanisms, which have the similar API 

In [3]:
kwargs = LaplaceModule.generate_params(num_train_samples = test_train_sample_size, num_test_samples = test_test_sample_size)
estimator = LaplaceModule.LapDistEstimator(kwargs)
output = estimator.build(eta = eta)
beta_estimate = output["beta"]

In [6]:
kwargs = DP_SGDModule.generate_params(num_train_samples = 100000, num_test_samples = 100000)
estimator = DP_SGDModule.toy_DPSGDEstimator(kwargs)
output = estimator.build(eta = eta)
beta_estimate = output["beta"]

In [9]:
kwargs = SubsamplingModule.generate_params(num_train_samples = 100000, num_test_samples = 100000)
estimator = SubsamplingModule.SubsamplingEstimator(kwargs)
output = estimator.build(eta = eta)
beta_estimate = output["beta"]

## 3 Set parameters for theoretical accuracy bound

Below says we need num_train_samples = 10^9 and num_test_samples = 10^7 to get error within 10^-3 with probability gamma = 0.05

Yet, in fact, much less samples needed, the theoretical bound should be able to improve

In [13]:
def compute_expression(n, gamma):
    c_d = 3.8637  # Given value of c_d
    result = 12 * np.sqrt((2 * c_d ** 2 / n) * np.log(4 / gamma))
    return result

# Example usage:
n = 10**9  # Example value for n
gamma = 0.05  # Example value for gamma
print(compute_expression(n, gamma))

0.004340473891924778
