In [1]:
import numpy as np
import os
import sys
import time
import logging
import matplotlib.pyplot as plt

# Navigate to the parent directory of the project structure
project_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
src_dir = os.path.join(project_dir, 'src')

# Add the src directory to sys.path
sys.path.append(src_dir)

import mech.GaussianDist as GaussianModule
import mech.LapDist as LaplaceModule

In [11]:
test_train_sample_size = 100000
test_test_sample_size = 100000
eta = np.array([0.5])

kwargs = GaussianModule.generate_params(num_train_samples = test_train_sample_size, num_test_samples = test_test_sample_size)
estimator = GaussianModule.GaussianDistEstimator(kwargs)
output = estimator.build(eta = eta)
beta_estimate = output["beta"]

In [12]:
kwargs = LaplaceModule.generate_params(num_train_samples = test_train_sample_size, num_test_samples = test_test_sample_size)
estimator = LaplaceModule.LapDistEstimator(kwargs)
output = estimator.build(eta = eta)
beta_estimate = output["beta"]

In [13]:
beta_estimate

array([0.4253])

In [5]:
theta_0 = np.float64(0)
num_samples = 100000

n = 10
x0 = np.zeros(n, dtype=np.float64)
x1 = np.array([1] + [0] * (n - 1), dtype=np.float64)

theta0 = np.full(num_samples, np.float64(theta_0))
theta1 = np.full(num_samples, np.float64(theta_0))

In [7]:
x = np.arange(10)   # Array [0, 1, 2, ..., 9]
m = 5               # Size of each subset
num_samples = 100000
samples = np.random.permutation(np.tile(x, (num_samples, 1)))[:, :m]

In [29]:
from numpy.random import MT19937, RandomState
import secrets
import time
seed = secrets.randbits(128)
rng = RandomState(MT19937(seed))

In [42]:
test_train_sample_size = 1000000
test_test_sample_size = 1000000

import mech.toy_DPSGD as DP_SGDModule
kwargs = DP_SGDModule.generate_params(num_train_samples = test_train_sample_size, num_test_samples = test_test_sample_size)

In [53]:
theta_0 = kwargs["sgd_alg"]["theta_0"]
T = kwargs["sgd_alg"]["T"]
m = kwargs["sgd_alg"]["m"]
eta_learn = kwargs["sgd_alg"]["eta"]
sigma = kwargs["sgd_alg"]["sigma"]

In [47]:
from scipy.stats import norm
from itertools import combinations

def calc_mu(x, m, eta_learn, T_):
    """
    Calculate mu based on the given parameters.
    
    Parameters: 
    x: database, the input of the SGD algorithm
    m: size of the subset of database in each interation
    eta_learn: learning rate of each step
    T_: the number of iteration
    """
    mu = np.sum(eta_learn * (1 - eta_learn)**(T_ - np.array(x)) / m)
    return mu

In [49]:
# Initialize mu_vector
mu_vector = []
for k in range(1, T + 1):
    k_combinations = list(combinations(range(1, T + 1), k))
    mu_values = [calc_mu(x, m=m, eta_learn=eta, T_=T) for x in k_combinations]
    mu_vector.extend(mu_values)

In [54]:
sigma_tilde = eta_learn * sigma * np.sqrt((1 - (1 - eta_learn)**(2 * T)) / (1 - (1 - eta_learn)**2))

In [56]:
alpha = 0.2
np.sum(norm.cdf(norm.ppf(1 - alpha) - np.array(mu_vector) / sigma_tilde) / 2**T)

0.3252972912981455

In [58]:
norm.cdf(norm.ppf(1 - alpha) - np.array(mu_vector) / sigma_tilde).shape

(1023,)

In [60]:
np.array(mu_vector) / sigma_tilde

array([0.08099892, 0.10124865, 0.12656081, ..., 2.5922005 , 2.61245023,
       2.69344914])