/
quantiles.go
460 lines (419 loc) · 17.8 KB
/
quantiles.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
//
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package dpagg
import (
"fmt"
"math"
log "github.com/golang/glog"
"github.com/google/differential-privacy/go/checks"
"github.com/google/differential-privacy/go/noise"
)
// Constants used for QuantileTrees.
const (
numericalTolerance = 1e-6
DefaultTreeHeight = 4
DefaultBranchingFactor = 16
rootIndex = 0
// Fraction a node needs to contribute to the total count of itself and its siblings to be
// considered during the search for a particular quantile. The idea of alpha is to filter out
// noisy empty nodes. This is a post processing parameter with no privacy implications.
alpha = 0.0075
)
// BoundedQuantiles calculates a differentially private quantiles of a collection
// of float64 values using a quantile tree mechanism.
// See https://github.com/google/differential-privacy/blob/main/common_docs/Differentially_Private_Quantile_Trees.pdf.
//
// It supports privacy units that contribute to multiple partitions (via the
// MaxPartitionsContributed parameter) as well as contribute to the same partition
// multiple times (via the MaxContributionsPerPartition parameter), by scaling the
// added noise appropriately.
//
// For general details and key definitions, see
// https://github.com/google/differential-privacy/blob/main/differential_privacy.md#key-definitions.
//
// Note: Do not use when your results may cause overflows for float64
// values. This aggregation is not hardened for such applications yet.
//
// Not thread-safe.
type BoundedQuantiles struct {
// Parameters
epsilon float64
delta float64
lower float64
upper float64
treeHeight int
branchingFactor int
l0Sensitivity int64
lInfSensitivity float64
Noise noise.Noise
noiseKind noise.Kind // necessary for serializing noise.Noise information
// State variables
tree map[int]int64
noisedTree map[int]float64
numLeaves int
leftmostLeafIndex int
state aggregationState
}
// BoundedQuantilesOptions contains the options necessary to initialize a BoundedQuantiles.
type BoundedQuantilesOptions struct {
Epsilon float64 // Privacy parameter ε. Required.
Delta float64 // Privacy parameter δ. Required with Gaussian noise, must be 0 with Laplace noise.
MaxPartitionsContributed int64 // How many distinct partitions may a single privacy unit contribute to? Defaults to 1.
MaxContributionsPerPartition int64 // How many times may a single user contribute to a single partition? Required.
// Lower and Upper bounds for clamping. Default to 0; must be such that Lower < Upper.
Lower, Upper float64
Noise noise.Noise // Type of noise used in BoundedSum. Defaults to Laplace noise.
// It is not recommended to set TreeHeight and BranchingFactor since they require
// implementation-specific insight to modify and they only apply to QuantileTree
// algorithm, which might become obsolote if another algorithm is used.
TreeHeight int // Height of the QuantileTree. Defaults to defaultTreeHeight.
BranchingFactor int // Number of children of every non-leaf node. Defaults to defaultBranchingFactor.
}
// NewBoundedQuantiles returns a new BoundedQuantiles.
func NewBoundedQuantiles(opt *BoundedQuantilesOptions) *BoundedQuantiles {
if opt == nil {
opt = &BoundedQuantilesOptions{}
}
maxContributionsPerPartition := opt.MaxContributionsPerPartition
if maxContributionsPerPartition == 0 {
// TODO: do not exit the program from within library code
log.Fatalf("NewBoundedQuantiles requires a value for MaxContributionsPerPartition")
}
// Set defaults.
maxPartitionsContributed := opt.MaxPartitionsContributed
if maxPartitionsContributed == 0 {
maxPartitionsContributed = 1
}
n := opt.Noise
if n == nil {
n = noise.Laplace()
}
// Check bounds.
lower, upper := opt.Lower, opt.Upper
if lower == 0 && upper == 0 {
// TODO: do not exit the program from within library code
log.Fatalf("NewBoundedQuantiles requires a non-default value for Lower or Upper (automatic bounds determination is not implemented yet). Lower and Upper cannot be both 0")
}
if err := checks.CheckBoundsFloat64("NewBoundedQuantiles", lower, upper); err != nil {
// TODO: do not exit the program from within library code
log.Fatalf("CheckBoundsFloat64(lower %f, upper %f) failed with %v", lower, upper, err)
}
if err := checks.CheckBoundsNotEqual("NewBoundedQuantiles", lower, upper); err != nil {
// TODO: do not exit the program from within library code
log.Fatalf("CheckBoundsNotEqual(lower %f, upper %f) failed with %v", lower, upper, err)
}
// Check tree height and branching factor, set defaults if not specified, and use them to compute numLeaves and leftmostLeafIndex.
treeHeight := opt.TreeHeight
if treeHeight == 0 {
treeHeight = DefaultTreeHeight
}
if err := checks.CheckTreeHeight("NewBoundedQuantiles", treeHeight); err != nil {
// TODO: do not exit the program from within library code
log.Fatalf("CheckTreeHeight failed with %v", err)
}
branchingFactor := opt.BranchingFactor
if branchingFactor == 0 {
branchingFactor = DefaultBranchingFactor
}
if err := checks.CheckBranchingFactor("NewBoundedQuantiles", branchingFactor); err != nil {
// TODO: do not exit the program from within library code
log.Fatalf("CheckBranchingFactor failed with %v", err)
}
numNodes := getNumNodes(treeHeight, branchingFactor)
numLeaves := getNumLeaves(treeHeight, branchingFactor)
// The following assumes that nodes are indexed in a breadth first fashion from left to right.
leftmostLeafIndex := numNodes - numLeaves
eps, del := opt.Epsilon, opt.Delta
// The l_1 sensitivty of a privacy unit's contribution is
// treeHeight * maxPartitionsContributed * maxContributionsPerPartition
// while the l_2 sensitivty is
// sqrt(treeHeight * maxPartitionsContributed) * maxContributionsPerPartition
// (the latter is realized if the privacy unit increments the exact same counters for each of
// their contributions to a particular partition). Setting the l_0 and l_inf sensitivity as
// follows yields the respective l_1 and l_2 values.
l0Sensitivity := int64(treeHeight) * maxPartitionsContributed
lInfSensitivity := float64(maxContributionsPerPartition)
// Check that the parameters are compatible with the noise chosen by calling
// the noise on some dummy value.
n.AddNoiseFloat64(0, l0Sensitivity, lInfSensitivity, eps, del)
return &BoundedQuantiles{
epsilon: eps,
delta: del,
lower: lower,
upper: upper,
treeHeight: treeHeight,
branchingFactor: branchingFactor,
l0Sensitivity: l0Sensitivity,
lInfSensitivity: lInfSensitivity,
Noise: n,
noiseKind: noise.ToKind(n),
tree: make(map[int]int64),
noisedTree: make(map[int]float64),
numLeaves: numLeaves,
leftmostLeafIndex: leftmostLeafIndex,
state: defaultState,
}
}
// Add adds an entry to BoundedQuantiles. It skips (ignores) NaN values because their
// contribution to the final result is not well defined.
func (bq *BoundedQuantiles) Add(e float64) {
if bq.state != defaultState {
// TODO: do not exit the program from within library code
log.Fatalf("BoundedQuantiles cannot be amended. Reason: %v", bq.state.errorMessage())
}
if !math.IsNaN(e) {
// Increment all counts on the path from the leaf node where the value is inserted up to the
// first level (root not included).
clamped, err := ClampFloat64(e, bq.lower, bq.upper)
if err != nil {
// TODO: do not exit the program from within library code
log.Fatalf("Couldn't clamp input value %f, err %v", e, err)
}
index := bq.getIndex(clamped)
for index != rootIndex {
count := bq.tree[index]
bq.tree[index] = count + 1
index = bq.getParent(index)
}
}
}
// Result calculates and returns a differentially private quantile of the values added.
// The specified rank must be between 0.0 and 1.0.
//
// This function can be called multiple times to compute different quantiles. Privacy budget is
// paid only once, on its first invocation. Calling this method repeatedly for the same rank will
// return the same result. The results of repeated calls are guaranteed to be monotonically
// increasing in the sense that r_1 < r_2 implies that Result(r_1) <= Result(r_2).
//
// Note that the returned values is not an unbiased estimate of the raw bounded quantile.
func (bq *BoundedQuantiles) Result(rank float64) float64 {
if bq.state != defaultState && bq.state != resultReturned {
// TODO: do not exit the program from within library code
log.Fatalf("BoundedQuantiles' noised result cannot be computed. Reason: %v", bq.state.errorMessage())
}
bq.state = resultReturned
if rank < 0.0 || rank > 1.0 {
log.Fatalf("rank %f must be >= 0 and <= 1.", rank)
}
rank = adjustRank(rank)
index := rootIndex
// Search for the index of the leaf node containg the specified quantile, starting at the root.
for index < bq.leftmostLeafIndex {
leftmostChildIndex := bq.getLeftmostChild(index)
rightmostChildIndex := bq.getRightmostChild(index)
totalCount := 0.0
for i := leftmostChildIndex; i <= rightmostChildIndex; i++ {
totalCount += math.Max(0.0, bq.getNoisedCount(i))
}
correctedTotalCount := 0.0
for i := leftmostChildIndex; i <= rightmostChildIndex; i++ {
// Treat child nodes contributing less than an alpha fraction to the total count as empty
// subtrees.
if bq.getNoisedCount(i) >= totalCount*alpha {
correctedTotalCount += bq.getNoisedCount(i)
}
}
if correctedTotalCount == 0.0 {
// Either all counts are 0.0 or no child node contributes more than an alpha fraction to the
// total count (the latter can only happen when alpha > 1 / branching factor, which is not
// the case for the default branching factor). This means that all child nodes are
// considered empty and there is no need to proceed further down the tree.
break
}
// Determine the child noide whose subtree contains the quantile.
partialCount := 0.0
for i := leftmostChildIndex; true; i++ {
count := bq.getNoisedCount(i)
// Skip child nodes contributing less than alpha to the total count.
if count >= totalCount*alpha {
partialCount += count
// Check if the quantile is in the current child's subtree.
if partialCount/correctedTotalCount >= rank-numericalTolerance {
rank = (rank - (partialCount-count)/correctedTotalCount) / (count / correctedTotalCount)
// Clamping rank to a value between 0.0 and 1.0. Note that rank can become greater than
// 1 because of the numerical tolerance. Values less than 0.0 should not occur. The
// respective clamping is set in place to be on the safe side.
rank = math.Min(math.Max(0.0, rank), 1.0)
index = i
break
}
}
}
}
// Linearly interpolate between the smallest and largest value associated with the node of the
// current index.
return (1-rank)*bq.getLeftValue(index) + rank*bq.getRightValue(index)
}
// getIndex returns the index of the leaf node associated with the provided value, assuming that
// the leaf nodes partition the range betwen lower and upper into intervals of equal size.
func (bq *BoundedQuantiles) getIndex(value float64) int {
indexFromLeftmostLeaf := int((value - bq.lower) / (bq.upper - bq.lower) * float64(bq.numLeaves))
if value == bq.upper {
indexFromLeftmostLeaf = bq.numLeaves - 1
}
return bq.leftmostLeafIndex + indexFromLeftmostLeaf
}
// getLeftValue returns the smallest value mapped to the subtree of the provided index, assuming that
// the leaf nodes partition the range betwen lower and upper into intervals of equal size.
func (bq *BoundedQuantiles) getLeftValue(index int) float64 {
// Traverse the tree towards the leaves starting at the provided index always taking the leftmost branch.
for index < bq.leftmostLeafIndex {
index = bq.getLeftmostChild(index)
}
return (bq.upper-bq.lower)*(float64((index-bq.leftmostLeafIndex))/float64(bq.numLeaves)) + bq.lower
}
// getRightValue returns the greatest value mapped to the subtree of the provided index, assuming that
// the leaf nodes partition the range betwen lower and upper into intervals of equal size.
func (bq *BoundedQuantiles) getRightValue(index int) float64 {
// Traverse the tree towards the leaves starting at the provided index always taking the rightmost branch.
for index < bq.leftmostLeafIndex {
index = bq.getRightmostChild(index)
}
// The returned value bounds the range of values for which getIndex returns the specified index.
// This bound is not itself contained in that range, i.e., getIndex will return the next index
// when called for the bound.
return (bq.upper-bq.lower)*(float64((index-bq.leftmostLeafIndex+1))/float64(bq.numLeaves)) + bq.lower
}
func (bq *BoundedQuantiles) getLeftmostChild(index int) int {
return index*bq.branchingFactor + 1
}
func (bq *BoundedQuantiles) getRightmostChild(index int) int {
return (index + 1) * bq.branchingFactor
}
func (bq *BoundedQuantiles) getParent(index int) int {
return (index - 1) / bq.branchingFactor
}
func (bq *BoundedQuantiles) getNoisedCount(index int) float64 {
if noisedCount, ok := bq.noisedTree[index]; ok {
return noisedCount
}
rawCount := bq.tree[index]
noisedCount := bq.Noise.AddNoiseFloat64(float64(rawCount), bq.l0Sensitivity, bq.lInfSensitivity, bq.epsilon, bq.delta)
bq.noisedTree[index] = noisedCount
return noisedCount
}
// Clamps the rank to a value between 0.005 and 0.995. The purpose of this adjustment is to mitigate
// the inaccuracy of the quantile tree mechanism around the min and max, i.e., the 0 and 1 rank.
func adjustRank(rank float64) float64 {
return math.Max(math.Min(rank, 0.995), 0.005)
}
func getNumNodes(treeHeight, branchingFactor int) int {
return int((math.Pow(float64(branchingFactor), float64(treeHeight+1)) - 1.0)) / (branchingFactor - 1)
}
func getNumLeaves(treeHeight, branchingFactor int) int {
return int(math.Pow(float64(branchingFactor), float64(treeHeight)))
}
// Merge merges bq2 into bq (i.e., adds to bq all entries that were added to
// bq2). bq2 is consumed by this operation: bq2 may not be used after it is
// merged into bq.
func (bq *BoundedQuantiles) Merge(bq2 *BoundedQuantiles) {
if err := checkMergeBoundedQuantiles(bq, bq2); err != nil {
// TODO: do not exit the program from within library code
log.Exit(err)
}
for index, count := range bq2.tree {
bq.tree[index] += count
}
bq2.state = merged
}
func checkMergeBoundedQuantiles(bq1, bq2 *BoundedQuantiles) error {
if bq1.state != defaultState {
return fmt.Errorf("checkMergeBoundedQuantiles: bq1 cannot be merged with another BoundedQuantiles instance. Reason: %v", bq1.state.errorMessage())
}
if bq2.state != defaultState {
return fmt.Errorf("checkMergeBoundedQuantiles: bq2 cannot be merged with another BoundedQuantiles instance. Reason: %v", bq2.state.errorMessage())
}
if !bqEquallyInitialized(bq1, bq2) {
return fmt.Errorf("checkMergeBoundedQuantiles: bq1 and bq2 are not compatible")
}
return nil
}
func bqEquallyInitialized(bq1, bq2 *BoundedQuantiles) bool {
return bq1.epsilon == bq2.epsilon &&
bq1.delta == bq2.delta &&
bq1.l0Sensitivity == bq2.l0Sensitivity &&
bq1.lInfSensitivity == bq2.lInfSensitivity &&
bq1.lower == bq2.lower &&
bq1.upper == bq2.upper &&
bq1.treeHeight == bq2.treeHeight &&
bq1.branchingFactor == bq2.branchingFactor &&
bq1.noiseKind == bq2.noiseKind &&
bq1.state == bq2.state
}
// encodableBoundedQuantiles can be encoded by the gob package.
type encodableBoundedQuantiles struct {
Epsilon float64
Delta float64
L0Sensitivity int64
LInfSensitivity float64
TreeHeight int
BranchingFactor int
Lower float64
Upper float64
NumLeaves int
LeftmostLeafIndex int
NoiseKind noise.Kind
QuantileTree map[int]int64
}
// GobEncode encodes BoundedQuantiles.
func (bq *BoundedQuantiles) GobEncode() ([]byte, error) {
if bq.state != defaultState {
return nil, fmt.Errorf("BoundedQuantiles object cannot be serialized. Reason: " + bq.state.errorMessage())
}
enc := encodableBoundedQuantiles{
Epsilon: bq.epsilon,
Delta: bq.delta,
L0Sensitivity: bq.l0Sensitivity,
LInfSensitivity: bq.lInfSensitivity,
TreeHeight: bq.treeHeight,
BranchingFactor: bq.branchingFactor,
Lower: bq.lower,
Upper: bq.upper,
NumLeaves: bq.numLeaves,
LeftmostLeafIndex: bq.leftmostLeafIndex,
NoiseKind: noise.ToKind(bq.Noise),
QuantileTree: bq.tree,
}
bq.state = serialized
return encode(enc)
}
// GobDecode decodes BoundedQuantiles.
func (bq *BoundedQuantiles) GobDecode(data []byte) error {
var enc encodableBoundedQuantiles
err := decode(&enc, data)
if err != nil {
log.Fatalf("GobDecode: couldn't decode BoundedQuantiles from bytes")
return err
}
*bq = BoundedQuantiles{
epsilon: enc.Epsilon,
delta: enc.Delta,
l0Sensitivity: enc.L0Sensitivity,
lInfSensitivity: enc.LInfSensitivity,
treeHeight: enc.TreeHeight,
branchingFactor: enc.BranchingFactor,
lower: enc.Lower,
upper: enc.Upper,
noiseKind: enc.NoiseKind,
Noise: noise.ToNoise(enc.NoiseKind),
numLeaves: enc.NumLeaves,
leftmostLeafIndex: enc.LeftmostLeafIndex,
tree: enc.QuantileTree,
noisedTree: make(map[int]float64),
state: defaultState,
}
return nil
}