Skip to content

Commit

Permalink
Clean-ups in the C++ library and Privacy-on-Beam
Browse files Browse the repository at this point in the history
C++:
- Formatting changes to partition selection code
- Fix overflow in automatic bounds approximation
- Stability and security improvements

Privacy-on-Beam:
- Small cleanup improvements.

GitOrigin-RevId: fae985e84bb8ed01502e5995520fa9a7aa4ee93e
Change-Id: I906bc8074eb4bfeee9ea73bbbb91a140f291d63b
  • Loading branch information
Differential Privacy Team authored and monsieurmuffin committed Aug 24, 2020
1 parent 1324cfe commit ab1b003
Show file tree
Hide file tree
Showing 37 changed files with 961 additions and 661 deletions.
8 changes: 8 additions & 0 deletions CONTRIBUTING.md
Expand Up @@ -40,6 +40,14 @@ frameworks.
* **Fine building blocks over large aggregates:** one should be able to
use sub-operations like noise generation and bounds approximation separately
from aggregation functions.
* **Unbiasedness:** aggregations should be unbiased if possible. In particular,
we prefer unbiased aggregations over aggregations that post process results for
consistency reasons (e.g. we do not clipp negative count values to 0 as this
would introduce bias). However, we may use biased aggregations if an unbiased
solution is not known, provides inferior utility, does not support distributed
computation or is significantly more complex to understand/implement/maintain.
The library should clearly indicate which aggregations are unbiased and test for
this property.
* **Robust Testing:** each feature must come with a full set of unit tests, and
the privacy guarantees must be tested end-to-end.
* Markdown is preferred for explaining complex concepts and math over lengthy
Expand Down
1 change: 1 addition & 0 deletions accounting/python/BUILD.bazel
Expand Up @@ -29,6 +29,7 @@ py_library(
srcs = [
"privacy_loss_distribution.py",
],
srcs_version = "PY3",
deps = [
requirement("numpy"),
requirement("scipy"),
Expand Down
14 changes: 4 additions & 10 deletions accounting/python/privacy_loss_distribution.py
Expand Up @@ -253,17 +253,11 @@ def from_two_probability_mass_functions(
# Discretize the probability mass so that the values are integer multiples
# of value_discretization_interval
rounded_probability_mass_function = collections.defaultdict(lambda: 0)
round_fn = math.ceil if pessimistic_estimate else math.floor
for val in probability_mass_function:
if pessimistic_estimate:
# When we would like a pessimistic estimate, round the value up.
rounded_probability_mass_function[int(
math.ceil(val / value_discretization_interval)
)] += probability_mass_function[val]
else:
# When we would like an optimistic estimate, round the value down.
rounded_probability_mass_function[int(
math.floor(val / value_discretization_interval)
)] += probability_mass_function[val]
rounded_probability_mass_function[
round_fn(val / value_discretization_interval)
] += probability_mass_function[val]

return cls(rounded_probability_mass_function, value_discretization_interval,
infinity_mass)
Expand Down
20 changes: 16 additions & 4 deletions cc/algorithms/BUILD
Expand Up @@ -41,6 +41,7 @@ cc_test(
copts = ["-Wno-sign-compare"],
deps = [
":algorithm",
"//base:statusor",
"//base/testing:status_matchers",
"@com_google_googletest//:gtest_main",
],
Expand Down Expand Up @@ -282,6 +283,7 @@ cc_test(
deps = [
":count",
":numerical-mechanisms-testing",
"//base:statusor",
"//base/testing:proto_matchers",
"//base/testing:status_matchers",
"@com_google_googletest//:gtest_main",
Expand All @@ -300,6 +302,7 @@ cc_library(
"//base:logging",
"//base:status",
"//base:statusor",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/strings",
],
)
Expand Down Expand Up @@ -327,6 +330,7 @@ cc_library(
deps = [
":rand",
":util",
"//base:logging",
"//base:status",
"//base:statusor",
"@com_google_absl//absl/memory",
Expand Down Expand Up @@ -359,8 +363,14 @@ cc_library(
deps = [
":distributions",
":util",
"//base:logging",
"//base:status",
"//base:statusor",
"@com_google_differential_privacy//proto:confidence_interval_cc_proto",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/types:optional",
],
)

Expand All @@ -372,6 +382,7 @@ cc_test(
deps = [
":distributions",
":numerical-mechanisms",
"//base:statusor",
"@com_google_googletest//:gtest_main",
],
)
Expand Down Expand Up @@ -451,6 +462,7 @@ cc_test(
":algorithm",
":approx-bounds",
":bounded-algorithm",
"//base:statusor",
"//base/testing:status_matchers",
"@com_google_googletest//:gtest_main",
],
Expand Down Expand Up @@ -497,10 +509,10 @@ cc_library(
copts = ["-Wno-sign-compare"],
deps = [
":numerical-mechanisms",
":util",
":rand",
":util",
"//base:status",
"//base:statusor"
"//base:statusor",
],
)

Expand All @@ -509,8 +521,8 @@ cc_test(
srcs = ["partition-selection_test.cc"],
copts = ["-Wno-sign-compare"],
deps = [
":partition-selection",
":numerical-mechanisms-testing",
":partition-selection",
"@com_google_googletest//:gtest_main",
],
)
)
1 change: 1 addition & 0 deletions cc/algorithms/algorithm_test.cc
Expand Up @@ -23,6 +23,7 @@
#include "base/testing/status_matchers.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "base/statusor.h"

namespace differential_privacy {
namespace {
Expand Down
6 changes: 5 additions & 1 deletion cc/algorithms/approx-bounds.h
Expand Up @@ -263,6 +263,10 @@ class ApproxBounds : public Algorithm<T> {
return 0;
}

// Clamp infinities to highest and lowest value.
value = Clamp(std::numeric_limits<T>::lowest(),
std::numeric_limits<T>::max(), value);

// Sometimes the minimum numeric limit has greater magnitude than the
// maximum. In this case clamp its magnitude at the maximum numeric limit to
// find msb. In reality our negative bin will accommodate the value.
Expand Down Expand Up @@ -538,7 +542,7 @@ class ApproxBounds : public Algorithm<T> {
for (int i = 0; i < bins.size(); ++i) {
double noised_dbl =
mechanism_->AddNoise(static_cast<double>(bins[i]), privacy_budget);
noisy_bins[i] = SafeCastFromDouble<T>(noised_dbl);
SafeCastFromDouble<T>(noised_dbl, noisy_bins[i]);
}
return noisy_bins;
}
Expand Down
21 changes: 21 additions & 0 deletions cc/algorithms/approx-bounds_test.cc
Expand Up @@ -241,6 +241,27 @@ TEST(ApproxBoundsTest, DropNanEntries) {
EXPECT_EQ(result.elements(1).value().float_value(), 1);
}

TEST(ApproxBounds, HandleInfinityEntries) {
std::vector<double> a = {1, 1, 1, INFINITY, INFINITY};
const double bins = 13;
const double base = 2;
const double scale = 7;
std::unique_ptr<ApproxBounds<double>> bounds =
ApproxBounds<double>::Builder()
.SetNumBins(bins)
.SetBase(base)
.SetScale(scale)
.SetThreshold(2)
.SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>())
.Build()
.ValueOrDie();
bounds->AddEntries(a.begin(), a.end());
auto result = bounds->PartialResult().ValueOrDie();
EXPECT_EQ(result.elements(0).value().float_value(), 0);
const double max_result = scale * std::pow(base, bins - 1);
EXPECT_EQ(result.elements(1).value().float_value(), max_result);
}

TEST(ApproxBoundsTest, NumPositiveBins) {
std::unique_ptr<ApproxBounds<double>> bounds = ApproxBounds<double>::Builder()
.SetNumBins(2)
Expand Down
4 changes: 3 additions & 1 deletion cc/algorithms/bounded-sum.h
Expand Up @@ -285,7 +285,9 @@ class BoundedSum : public Algorithm<T> {
// Add noise to sum. Use the remaining privacy budget.
double noisy_sum = mechanism_->AddNoise(sum, remaining_budget);
if (std::is_integral<T>::value) {
AddToOutput<T>(&output, std::round(noisy_sum));
T value;
SafeCastFromDouble<T>(std::round(noisy_sum), value);
AddToOutput<T>(&output, value);
} else {
AddToOutput<T>(&output, noisy_sum);
}
Expand Down
2 changes: 1 addition & 1 deletion cc/algorithms/bounded-variance.h
Expand Up @@ -459,7 +459,7 @@ class BoundedVariance : public Algorithm<T> {
return mechanism_builder->SetEpsilon(epsilon)
.SetL0Sensitivity(l0_sensitivity)
.SetLInfSensitivity(max_contributions_per_partition *
static_cast<double>((upper - lower) / 2))
static_cast<double>(upper - lower) / 2.0)
.Build();
}

Expand Down
5 changes: 3 additions & 2 deletions cc/algorithms/count.h
Expand Up @@ -82,8 +82,9 @@ class Count : public Algorithm<T> {
base::StatusOr<Output> GenerateResult(double privacy_budget,
double noise_interval_level) override {
Output output;
int64_t countWithNoise = SafeCastFromDouble<int64_t>(
std::round(mechanism_->AddNoise(count_, privacy_budget)));
int64_t countWithNoise;
SafeCastFromDouble(std::round(mechanism_->AddNoise(count_, privacy_budget)),
countWithNoise);
AddToOutput<int64_t>(&output, countWithNoise);

base::StatusOr<ConfidenceInterval> interval =
Expand Down
1 change: 1 addition & 0 deletions cc/algorithms/count_test.cc
Expand Up @@ -23,6 +23,7 @@
#include "base/testing/status_matchers.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "base/statusor.h"
#include "algorithms/numerical-mechanisms-testing.h"
#include "proto/data.pb.h"
#include "proto/summary.pb.h"
Expand Down
16 changes: 10 additions & 6 deletions cc/algorithms/distributions.cc
Expand Up @@ -16,9 +16,11 @@
#include "algorithms/distributions.h"

#include <cmath>
#include <limits>

#include "absl/memory/memory.h"
#include "absl/random/random.h"
#include "base/statusor.h"
#include "absl/strings/string_view.h"
#include "algorithms/rand.h"
#include "algorithms/util.h"
Expand Down Expand Up @@ -52,30 +54,32 @@ double ApproximateBinomialProbability(double sqrt_n, int64_t m) {

} // namespace

GaussianDistribution::GaussianDistribution(double stddev)
: stddev_(stddev),
granularity_(GetNextPowerOfTwo(2 * stddev / kBinomialBound)) {
GaussianDistribution::GaussianDistribution(double stddev) : stddev_(stddev) {
DCHECK_GE(stddev, 0.0);
}

double GaussianDistribution::Sample(double scale) {
DCHECK_GT(scale, 0);
// TODO: make graceful behaviour when sigma is too big.
double sigma = scale * stddev_;
double granularity = GetGranularity(scale);

// The square root of n is chosen in a way that ensures that the respective
// binomial distribution approximates a Gaussian distribution close enough.
// The sqrt(n) is taken instead of n, to ensure that all results of arithmetic
// operations fit in 64 bit integer range.
double sqrt_n = 2.0 * sigma / granularity_;
return SampleBinomial(sqrt_n) * granularity_;
double sqrt_n = 2.0 * sigma / granularity;
return SampleBinomial(sqrt_n) * granularity;
}

double GaussianDistribution::Sample() { return Sample(1.0); }

double GaussianDistribution::Stddev() { return stddev_; }

double GaussianDistribution::GetGranularity() { return granularity_; }
double GaussianDistribution::GetGranularity(double scale) const {
double sigma = scale * stddev_;
return GetNextPowerOfTwo(2 * sigma / kBinomialBound);
}

GeometricDistribution::GeometricDistribution(double lambda) : lambda_(lambda) {
DCHECK_GE(lambda, 0);
Expand Down
9 changes: 7 additions & 2 deletions cc/algorithms/distributions.h
Expand Up @@ -17,6 +17,9 @@
#ifndef DIFFERENTIAL_PRIVACY_ALGORITHMS_DISTRIBUTIONS_H_
#define DIFFERENTIAL_PRIVACY_ALGORITHMS_DISTRIBUTIONS_H_

#include <memory>

#include <cstdint>
#include "base/statusor.h"

namespace differential_privacy {
Expand Down Expand Up @@ -44,7 +47,10 @@ class GaussianDistribution {
// Returns the standard deviation of this distribution.
double Stddev();

double GetGranularity();
// Returns the granularity that is also used when calculating Sample(). Be
// careful when using GetGranularity() together with Sample() and make sure to
// use the same parameter for scale in such cases.
double GetGranularity(double scale) const;

private:
// Sample from geometric distribution with probability 0.5. It is much faster
Expand All @@ -53,7 +59,6 @@ class GaussianDistribution {
double SampleBinomial(double sqrt_n);

double stddev_;
double granularity_;
};

// Returns a sample drawn from the geometric distribution of probability
Expand Down

0 comments on commit ab1b003

Please sign in to comment.