Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Incorporating the "Connect the Dots" algorithm in PLD accounting libr…
…ary.

Reference: https://arxiv.org/abs/2207.04380

Main changes are as follows:
- Allow vectorized calls to get_delta_for_epsilon,
- Construct pessimistic PLDPmf using Connect-the-Dots algorithm,
- Support Connect-the-Dots algorithm for privacy loss distributions of additive noise mechanisms.

Some other minor changes included as follows:
- Add checks to make sure that truncation bounds in self convolution does not result in truncation of the input list,
- Use more numerically stable scipy.special.logsumexp for moment generating function computation,
- Use more efficient np.roll for shifting the output result for self convolution.

PiperOrigin-RevId: 494208119
Change-Id: I1c50896b3252c19c8802b1e1ebfd6f0ba94c3be2
GitOrigin-RevId: fca6f9ba22ab0a7b3d020d3c5bc95d30405261df
  • Loading branch information
Differential Privacy Team authored and monsieurmuffin committed Dec 15, 2022
1 parent 22648a2 commit b4b2647
Show file tree
Hide file tree
Showing 9 changed files with 1,188 additions and 62 deletions.
Binary file modified common_docs/Privacy_Loss_Distributions.pdf
Binary file not shown.
18 changes: 9 additions & 9 deletions python/dp_accounting/pld/common.py
Expand Up @@ -20,6 +20,7 @@
import numpy as np
from scipy import fft
from scipy import signal
from scipy import special

ArrayLike = Union[np.ndarray, List[float]]

Expand Down Expand Up @@ -241,10 +242,10 @@ def compute_self_convolve_bounds(
np.concatenate((np.arange(-20, 0), np.arange(1, 21))) / len(input_list))

# Compute log of the moment generating function at the specified orders.
log_mgfs = np.log([
np.dot(np.exp(np.arange(len(input_list)) * order), input_list)
log_mgfs = [
special.logsumexp(np.arange(len(input_list)) * order, b=input_list)
for order in orders
])
]

for order, log_mgf_value in zip(orders, log_mgfs):
# Use Chernoff bound to update the upper/lower bound. See equation (5) in
Expand Down Expand Up @@ -280,17 +281,16 @@ def self_convolve(input_list: ArrayLike,
input_list, num_times, tail_mass_truncation)

# Use FFT to compute the convolution
fast_len = fft.next_fast_len(truncation_upper_bound - truncation_lower_bound +
1)
output_len = truncation_upper_bound - truncation_lower_bound + 1
fast_len = fft.next_fast_len(max(output_len, len(input_list)))
truncated_convolution_output = np.real(
fft.ifft(fft.fft(input_list, fast_len)**num_times))

# Discrete Fourier Transform wraps around modulo fast_len. Extract the output
# values in the range of interest.
output_list = [
truncated_convolution_output[i % fast_len]
for i in range(truncation_lower_bound, truncation_upper_bound + 1)
]
output_list = np.roll(
truncated_convolution_output, -truncation_lower_bound
)[:output_len]

return truncation_lower_bound, output_list

Expand Down
11 changes: 11 additions & 0 deletions python/dp_accounting/pld/common_test.py
Expand Up @@ -15,6 +15,7 @@

import math
import unittest
from unittest import mock
from absl.testing import parameterized

from dp_accounting.pld import common
Expand Down Expand Up @@ -230,6 +231,16 @@ def test_compute_self_convolve_with_truncation(self, input_list, num_times,
self.assertEqual(min_val, expected_min_val)
self.assertSequenceAlmostEqual(expected_result_list, result_list)

@mock.patch.object(
common, 'compute_self_convolve_bounds', return_value=(6, 6)
)
def test_compute_self_convolve_with_too_small_truncation(self, _):
# When the truncation bounds returned from compute_self_convolve_bounds are
# too small, the input should not be truncated.
min_val, result_list = common.self_convolve([0, 0, 1], 3)
self.assertEqual(min_val, 6)
self.assertSequenceAlmostEqual([1], result_list)

@parameterized.parameters(
(5, 7, 3, 8.60998489),
(0.5, 3, 0.1, 2.31676098),
Expand Down
91 changes: 91 additions & 0 deletions python/dp_accounting/pld/pld_pmf.py
Expand Up @@ -25,6 +25,7 @@
import numbers
from typing import Iterable, List, Mapping, Sequence, Tuple, Union
import numpy as np
import numpy.typing
from scipy import signal

from dp_accounting.pld import common
Expand Down Expand Up @@ -552,6 +553,96 @@ def create_pmf(loss_probs: Mapping[int, float], discretization: float,
pessimistic_estimate)


def create_pmf_pessimistic_connect_dots(
discretization: float,
rounded_epsilons: numpy.typing.ArrayLike, # dtype int, shape (n,)
deltas: numpy.typing.ArrayLike, # dtype float, shape (n,)
) -> PLDPmf:
"""Returns PLD probability mass function using pessimistic Connect-the-Dots.
This method uses Algorithm 1 (PLD Discretization) from the following paper:
Title: Connect the Dots: Tighter Discrete Approximations of Privacy Loss
Distributions
Authors: V. Doroshenko, B. Ghazi, P. Kamath, R. Kumar, P. Manurangsi
Link: https://arxiv.org/abs/2207.04380
Given a set of (finite) epsilon values that the PLD should be supported on
(in addition to +infinity), the probability mass is computed as follows:
Suppose desired epsilon values are [eps_1, ... , eps_n].
Let eps_0 = -infinity for convenience.
Let delta_i = eps_i-hockey stick divergence for the mechanism.
(Note: delta_0 = 1)
The probability mass on eps_i (1 <= i <= n-1) is given as:
((delta_i - delta_{i-1}) / (exp(eps_{i-1} - eps_i) - 1)
+ (delta_{i+1} - delta_i) / (exp(eps_{i+1} - eps_i) - 1))
The probability mass on eps_n is given as:
(delta_n - delta_{n-1}) / (exp(eps_{n-1} - eps_n) - 1)
The probability mass on +infinity is simply delta_n.
Args:
discretization: The length of the discretization interval, such that all
epsilon values in the support of the privacy loss distribution are integer
multiples of it.
rounded_epsilons: The desired support of the privacy loss distribution
specified as a strictly increasing sequence of integer values. The support
will be given by these values times discretization.
deltas: The delta values corresponding to the epsilon values. These values
must be in non-increasing order, due to the nature of hockey stick
divergence.
Returns:
The pessimistic Connect-the-Dots privacy loss distribution supported on
specified epsilons.
Raises:
ValueError: If any of the following hold:
- rounded_epsilons and deltas do not have the same length, or if one of
them is empty, or
- if rounded_epsilons are not in strictly increasing order, or
- if deltas are not in non-increasing order.
"""
rounded_epsilons = np.asarray(rounded_epsilons, dtype=int)
deltas = np.asarray(deltas, dtype=float)

if (rounded_epsilons.size != deltas.size or rounded_epsilons.size == 0):
raise ValueError('Length of rounded_epsilons and deltas are either unequal '
f'or zero: {rounded_epsilons=}, {deltas=}.')

# Notation: epsilons = [eps_1, ... , eps_n]
# epsilon_diffs = [eps_2 - eps_1, ... , eps_n - eps_{n-1}]
epsilon_diffs = np.diff(rounded_epsilons) * discretization
if np.any(epsilon_diffs <= 0):
raise ValueError('rounded_epsilons are not in strictly increasing order: '
f'{rounded_epsilons=}.')

# Notation: deltas = [delta_1, ... , delta_n]
# delta_diffs = [delta_2 - delta_1, ... , delta_n - delta_{n-1}]
delta_diffs = np.diff(deltas)
if np.any(delta_diffs > 0):
raise ValueError(f'deltas are not in non-increasing order: {deltas=}.')

# delta_diffs_scaled_v1 = [y_0, y_1, ..., y_{n-1}]
# where y_i = (delta_{i+1} - delta_i) / (exp(eps_i - eps_{i+1}) - 1)
# and y_0 = (1 - delta_1)
delta_diffs_scaled_v1 = np.append(1 - deltas[0],
delta_diffs / np.expm1(- epsilon_diffs))

# delta_diffs_scaled_v2 = [z_1, z_2, ..., z_{n-1}, z_n]
# where z_i = (delta_{i+1} - delta_i) / (exp(eps_{i+1} - eps_i) - 1)
# and z_n = 0
delta_diffs_scaled_v2 = np.append(delta_diffs / np.expm1(epsilon_diffs), 0.0)

# PLD contains eps_i with probability mass y_{i-1} + z_i, and
# infinity with probability mass delta_n
return create_pmf(
loss_probs=dict(zip(rounded_epsilons,
delta_diffs_scaled_v1 + delta_diffs_scaled_v2)),
discretization=discretization,
infinity_mass=deltas[-1],
pessimistic_estimate=True)


def compose_pmfs(pmf1: PLDPmf,
pmf2: PLDPmf,
tail_mass_truncation: float = 0) -> PLDPmf:
Expand Down
94 changes: 94 additions & 0 deletions python/dp_accounting/pld/pld_pmf_test.py
Expand Up @@ -54,6 +54,16 @@ def _check_sparse_probs(self, sparse_pmf: pld_pmf.SparsePLDPmf,
test_util.assert_dictionary_almost_equal(self, expected_loss_probs,
sparse_pmf._loss_probs)

def _check_sparse_pld_pmf_equal(self, sparse_pmf: pld_pmf.SparsePLDPmf,
discretization: float, infinity_mass: float,
loss_probs: dict[int, float],
pessimistic_estimate: bool):
self.assertEqual(discretization, sparse_pmf._discretization)
self.assertAlmostEqual(infinity_mass, sparse_pmf._infinity_mass)
self.assertEqual(pessimistic_estimate, sparse_pmf._pessimistic_estimate)
test_util.assert_dictionary_almost_equal(self, loss_probs,
sparse_pmf._loss_probs)

@parameterized.parameters(False, True)
def test_delta_for_epsilon(self, dense: bool):
discretization = 0.1
Expand Down Expand Up @@ -409,6 +419,90 @@ def test_pmf_creation(self, num_points: int, is_sparse: bool):

self.assertEqual(num_points, pmf.size)

@parameterized.named_parameters(
dict(testcase_name='empty',
discretization=0.1,
rounded_epsilons=np.array([]),
deltas=np.array([]),
error_message='unequal or zero'),
dict(testcase_name='unequal_length',
discretization=0.1,
rounded_epsilons=np.array([-1, 1]),
deltas=np.array([0.5]),
error_message='unequal or zero'),
dict(testcase_name='non_sorted_epsilons',
discretization=0.1,
rounded_epsilons=np.array([1, -1]),
deltas=np.array([0, 0.5]),
error_message='not in strictly increasing order'),
dict(testcase_name='non_monotone_deltas',
discretization=0.1,
rounded_epsilons=np.array([-1, 0, 1]),
deltas=np.array([0.5, 0, 0.1]),
error_message='not in non-increasing order'),
)
def test_connect_dots_pmf_value_errors(
self, discretization, rounded_epsilons, deltas, error_message):
with self.assertRaisesRegex(ValueError, error_message):
pld_pmf.create_pmf_pessimistic_connect_dots(discretization,
rounded_epsilons,
deltas)

@parameterized.named_parameters(
dict(testcase_name='trivial',
# mu_upper = [1]
# mu_lower = [1]
discretization=0.1,
rounded_epsilons=np.array([0]),
deltas=np.array([0.0]),
expected_loss_probs={0: 1.0},
expected_infinity_mass=0.0),
dict(testcase_name='rr_eps=0.1',
# mu_upper = [1/(1+e^{-0.1}), 1/(1+e^{0.1})]
# mu_lower = [1/(1+e^{0.1}), 1/(1+e^{-0.1})]
discretization=0.1,
rounded_epsilons=np.array([-1, 1]),
deltas=np.array([1 - np.exp(-0.1), 0.0]),
expected_loss_probs={-1: 1/(1 + np.exp(0.1)),
1: 1/(1 + np.exp(-0.1))},
expected_infinity_mass=0.0),
dict(testcase_name='rr_eps=0.1_abort_w_p_0.5',
# mu_upper = [0.5/(1+e^{-0.1}), 0.5, 0.5/(1+e^{0.1})]
# mu_lower = [0.5/(1+e^{0.1}), 0.5, 0.5/(1+e^{-0.1})]
discretization=0.1,
rounded_epsilons=np.array([-1, 0, 1]),
deltas=np.array([1 - np.exp(-0.1),
0.5*(np.exp(0.1)-1)/(np.exp(0.1)+1),
0.0]),
expected_loss_probs={-1: 0.5/(1 + np.exp(0.1)),
0: 0.5,
1: 0.5/(1 + np.exp(-0.1))},
expected_infinity_mass=0.0),
dict(testcase_name='rr_eps=0.1_delta=0.5',
# mu_upper = [0.5, 0.5/(1+e^{-0.1}), 0.5/(1+e^{0.1}), 0.0]
# mu_lower = [0.0, 0.5/(1+e^{0.1}), 0.5/(1+e^{-0.1}), 0.5]
discretization=0.1,
rounded_epsilons=np.array([-1, 0, 1]),
deltas=np.array([1 - 0.5*np.exp(-0.1),
0.5 + 0.5*(np.exp(0.1)-1)/(np.exp(0.1)+1),
0.5]),
expected_loss_probs={-1: 0.5/(1 + np.exp(0.1)),
0: 0.0,
1: 0.5/(1 + np.exp(-0.1))},
expected_infinity_mass=0.5),
)
def test_connect_dots_pmf_creation(
self, discretization, rounded_epsilons, deltas,
expected_loss_probs, expected_infinity_mass):
pmf = pld_pmf.create_pmf_pessimistic_connect_dots(discretization,
rounded_epsilons,
deltas)
self._check_sparse_pld_pmf_equal(pmf,
discretization,
expected_infinity_mass,
expected_loss_probs,
pessimistic_estimate=True)

@parameterized.parameters((1, 100, True), (10, 100, True), (1, 1001, False),
(10, 101, False), (1001, 1, False),
(1001, 1001, False))
Expand Down

0 comments on commit b4b2647

Please sign in to comment.