In [1]:
import os
import sys

# Navigate to the parent directory of the project structure
project_dir = os.path.abspath(os.path.join(os.getcwd(), '../..'))
src_dir = os.path.join(project_dir, 'src')
fig_dir = os.path.join(project_dir, 'fig')
data_dir = os.path.join(project_dir, 'data')
log_dir = os.path.join(project_dir, 'log')
os.makedirs(fig_dir, exist_ok=True)

# Add the src directory to sys.path
sys.path.append(src_dir)
import matplotlib.pyplot as plt
import numpy as np

import mech.full_DPSGD as DPSGDModule
from analysis.tradeoff_Gaussian import Gaussian_curve

In [2]:
model_name = 'convnet_balanced'
database_size = 1000
epochs = 20
internal_result_path_dir = "/scratch/bell/wei402/fdp-estimation/results"

internal_result_path = os.path.join(internal_result_path_dir, model_name+'_'+str(database_size)+'_'+str(epochs))


data_args = {
    "method": "default",
    "data_dir": data_dir,
    "internal_result_path": internal_result_path
}

sampler_args = DPSGDModule.generate_params(data_args=data_args, log_dir=log_dir, model_name=model_name, database_size=database_size, epochs=epochs, auditing_approach="1d_cross_entropy", num_test_samples=1000)
sampler = DPSGDModule.DPSGDSampler(sampler_args)

Files already downloaded and verified


05/18/2025 23:28:25:INFO:Initialized convnet_balanced_DPSGDSampler with parameters: batch_size=512, epochs=20, lr=0.10, sigma=1.00, max_grad_norm=1.00, device=cpu


### 3 Estimate $(\alpha(\eta), \beta(\eta))$ for $\eta>=1$

In [3]:
eta = 1.2
Laplace_compute_tradeoff_curve(eta)

(array(0.3639184), array(0.25272111))

In [4]:
start_time = time.time()
num_train_samples = 1000000
train_samples= sampler.gen_samples(eta=eta, num_samples=num_train_samples)
model = train_kNN_model(train_samples)

print(f"Generated model in {time.time() - start_time:.2f}s with {num_train_samples} samples")

Generated model in 0.38s with 1000000 samples


In [5]:
start_time = time.time()
num_test_samples = 100000
samples = sampler.gen_samples(eta=1, num_samples=num_test_samples)
print(f"Generated {num_test_samples} testing samples in {time.time() - start_time:.2f}s")

Generated 100000 testing samples in 0.00s


In [6]:
start_time = time.time()
alpha = 1 - model.score(samples['X'][:num_test_samples], samples['y'][:num_test_samples])
beta = 1 - model.score(samples['X'][num_test_samples:], samples['y'][num_test_samples:])
print(f"(alpha, beta) w.r.t {eta} is ({alpha}, {beta}) [Computation time is {time.time() - start_time:.2f}]")

(alpha, beta) w.r.t 1.2 is (0.33136, 0.27614000000000005) [Computation time is 25.95]


### 4 Estimate $(\alpha(\eta), \beta(\eta))$ for $\eta < 1$

In [7]:
eta=0.39
Laplace_compute_tradeoff_curve(eta)

(array(0.18393972), array(0.5))

In [None]:
start_time = time.time()
num_train_samples = 100000
train_samples= sampler.gen_samples(eta=eta, num_samples=num_train_samples)
model = train_kNN_model(train_samples)

print(f"Generated model in {time.time() - start_time:.2f}s with {num_train_samples} samples")

Generated model in 0.02s with 100000 samples


In [9]:
start_time = time.time()
num_test_samples = 100000
samples = sampler.gen_samples(eta=1, num_samples=num_test_samples)
print(f"Generated {num_test_samples} testing samples in {time.time() - start_time:.2f}s")

Generated 100000 testing samples in 0.00s


In [10]:
start_time = time.time()
alpha = 1 - model.score(samples['X'][:num_test_samples], samples['y'][:num_test_samples])
beta = 1 - model.score(samples['X'][num_test_samples:], samples['y'][num_test_samples:])
print(f"(alpha, beta) w.r.t {eta} is ({alpha}, {beta}) [Computation time is {time.time() - start_time:.2f}]")

(alpha, beta) w.r.t 0.39 is (0.18662999999999996, 0.49338000000000004) [Computation time is 5.09]
