In [1]:
from typing import Sequence

import numpy as np
from numpy.typing import NDArray

from group_amplification.privacy_analysis.base_mechanisms import BaseMechanism, GaussianMechanism
from group_amplification.privacy_analysis.composition.pld.accounting import pld_from_double_mixture_gaussian_mechanism
from group_amplification.privacy_analysis.utils import log_binomial_coefficient

import matplotlib.pyplot as plt

In [2]:
def wor_pmf(k: NDArray[np.int64],
            dataset_size: int,
            batch_size: int) -> NDArray[np.int64]:

    log_pmf = k * np.log(1 / dataset_size)
    log_pmf += (batch_size - k) * np.log(1 - 1 / dataset_size)
    log_pmf += log_binomial_coefficient(batch_size, k)

    return np.exp(log_pmf)

In [3]:
def pld_wor(epsilons: Sequence[float],
            base_mechanism: BaseMechanism,
            dataset_size: int,
            batch_size: int,
            eval_params: dict[str]):

    sensitivities_upper = np.arange(batch_size + 1)
    sampling_probs_upper = wor_pmf(sensitivities_upper, dataset_size, batch_size)

    sensitivities_lower = sensitivities_upper.copy()
    sampling_probs_lower = sampling_probs_upper.copy()

    pld = pld_from_double_mixture_gaussian_mechanism(
                base_mechanism.standard_deviation,
                sensitivities_upper, sensitivities_lower,
                sampling_probs_upper, sampling_probs_lower,
                **eval_params
            )

    return pld.get_delta_for_epsilon(epsilons)

In [4]:
def adp_wor(eps: int, base_mechanism: BaseMechanism,
            dataset_size: int, batch_size: int) -> float:


    w = wor_pmf(np.arange(1, batch_size + 1), dataset_size, batch_size)
    p_not_zero = 1 - wor_pmf(0, dataset_size, batch_size)
    w /= p_not_zero

    sensitivities = np.arange(1, batch_size + 1)
    # See Proof of Proposition 30 in Characteristic Function Accounting paper
    individual_alpha = 1 + (np.exp(eps) - 1) / p_not_zero
    individual_alphas = np.full_like(sensitivities, individual_alpha, dtype='float')

    if isinstance(base_mechanism, GaussianMechanism):
        adps = base_mechanism.adp(individual_alphas, sensitivities)

    else:
        raise ValueError('Only support Gaussian Mechanisms')

    return p_not_zero * w @ adps

In [5]:
epsilons = np.logspace(0, 1, 121)

standard_deviation = 1.0
base_mechanism = GaussianMechanism(standard_deviation)

dataset_size = 100
batch_size = 8
eval_params = {'value_discretization_interval': 1e-2}

In [6]:
deltas_pld = pld_wor(epsilons, base_mechanism,
                     dataset_size, batch_size,
                     eval_params)

In [7]:
deltas_adp = np.array([
    adp_wor(eps, base_mechanism,
            dataset_size, batch_size)
    for eps in epsilons]
)

### Plot the stuff

In [8]:
import seaborn as sns

In [9]:
save_dir = '/ceph/hdd/staff/schuchaj/group_amplification_plots/neurips24/adp/with_replacement/specific_vs_agnostic/gaussian/half_page'

In [10]:
sns.set_theme()

fig, ax = plt.subplots()

pal = sns.color_palette('colorblind', 2)

ax.plot(epsilons, deltas_adp,  c=pal[0], label='Agnostic', linestyle='dashed')
ax.plot(epsilons, deltas_pld, c=pal[1], label='Specific')

ax.plot()

ax.set_ylabel('ADP $\delta(\\varepsilon)$', fontsize=9)
ax.set_xlabel('ADP $\\varepsilon$', fontsize=9)

ax.set_yscale('log')
ax.set_xscale('log')

ax.legend(loc='lower left')
