In [4]:
import numpy as np
import os
import sys
import time
import logging
import matplotlib.pyplot as plt
import pandas as pd
from scipy.interpolate import interp1d
from scipy.stats import norm

# Navigate to the parent directory of the project structure
project_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
src_dir = os.path.join(project_dir, 'src')

# Add the src directory to sys.path
sys.path.append(src_dir)

import mech.GaussianDist as GaussianModule
import mech.LapDist as LaplaceModule
import mech.toy_DPSGD as DP_SGDModule
import mech.Subsampling as SubsamplingModule

from estimator.estimator_kde import KDE_Estimator


In [5]:
# Define Proposed curve


#Example Gaussian_curve

mu_2 = 1

def Gaussian_curve(alpha):
    return norm.cdf(norm.ppf(1 - alpha) - mu_2)

In [10]:
###Identifying critical point using KDE
n_1 = 10**5  # Default value
h = .1  # Default value
eta_max = 15 #Default value

#Define neighboring databases
x1 = np.array([1] + [0] * 9)
x2 = np.zeros(10)

# Evaluate mechanism function on x_1 and x_2
sigma=1

def Sum_Gauss(x):
    s = np.sum(x) + np.random.normal(0, sigma)
    return s

# Run KDE_Estimator
output_df = pd.DataFrame(KDE_Estimator(eta_max=eta_max, Mechanism=Sum_Gauss, x1=x1, x2=x2, N=n_1, h=h))

In [12]:
#Identify critical point
# Step 1: Filter the data to only include 0 <= alpha <= 1
filtered_data = output_df[(output_df['alpha'] >= 0) & (output_df['alpha'] <= 1)]

# Step 2: Extract alpha and beta values
alpha = filtered_data['alpha']
beta = filtered_data['beta']

# Step 3: Compute maximum deviation
deviation_matrix = np.array(beta - Gaussian_curve(alpha))
min_value = np.argmin(deviation_matrix)

# Step 4: Retrieve the corresponding datapoint from filtered_data
datapoint = filtered_data.iloc[min_value]

# Retrieve the index of the selected datapoint in the original output_df
index = filtered_data.index[min_value]

# Extract alpha and beta values from the datapoint
the_alpha = datapoint["alpha"]
the_beta = datapoint["beta"]

# Print the critical values
print(f"Critical value is: alpha: {the_alpha}, beta: {the_beta}")
eta_value=np.linspace(0,eta_max,1000)[index]

Critical value is: alpha: 0.00022662142680041075, beta: 0.9824621556576492


In [13]:
test_train_sample_size = 10**6
test_test_sample_size = 10**6

eta=np.array([eta_value])

kwargs = GaussianModule.generate_params(num_train_samples = test_train_sample_size, num_test_samples = test_test_sample_size)
estimator = GaussianModule.GaussianDistEstimator(kwargs)
output = estimator.build(eta = eta)
beta_estimate = output["beta"]
alpha_estimate=output["alpha"]

In [14]:
def check_violation(beta_estimate, omega, alpha_estimate, Gaussian_curve):
    if beta_estimate + omega < Gaussian_curve(alpha_estimate + omega):
        return "Violation"
    else:
        return "No Violation"
def compute_expression(n, gamma):
    c_d = 3.8637  # Given value of c_d
    result = 12 * np.sqrt((2 * c_d ** 2 / n) * np.log(4 / gamma))
    return result
gamma=0.05
omega=compute_expression(10**6, gamma)
print(check_violation(beta_estimate, alpha_estimate, omega, Gaussian_curve))

No Violation
