This colab notebook uses DP-auditorium to test differentially private mechanisms computing aggregate statistics using PipelineDP.

<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/google/differential-privacy/blob/main/python/dp_auditorium/dp_auditorium/examples/pipelinedp_mean_mechanism_example.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
  </td>
  <td>
    <a target="_blank" href="https://github.com/google/differential-privacy/blob/main/python/dp_auditorium/dp_auditorium/examples/pipelinedp_mean_mechanism_example.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a>
  </td>
</table>

<br>
<br>

In [None]:
#@title Install and import dp_auditorium and all necessary libraries.
!pip install google-vizier equinox pipeline_dp
!git clone https://github.com/google/differential-privacy.git
import sys
sys.path.append('differential-privacy/python/dp_auditorium')

from dp_auditorium import privacy_test_runner
from dp_auditorium.generators import pipeline_dp_vizier_dataset_generator
from dp_auditorium.configs import dataset_generator_config
from dp_auditorium.configs import privacy_property
from dp_auditorium.configs import privacy_test_runner_config
from dp_auditorium.configs import property_tester_config
from dp_auditorium.mechanisms.pipeline_dp import aggregation as pipeline_dp_mechanism
from dp_auditorium.testers import hockey_stick_tester

import pipeline_dp
import tensorflow as tf
tf.compat.v1.enable_eager_execution()

In [None]:
#@title Example of testing PipelineDP mean mechanism
import time

def pipeline_dp_mean_mechanism_report(
    epsilon: float,
    delta: float,
    seed: int,
    max_number_partitions: int = 10,
) -> privacy_test_runner_config.PrivacyTestRunnerResults:
  """Runs the example code for a mean mechanism.

  Args:
    epsilon: standard approximate DP parameter.
    delta: standard approximate DP parameter.
    seed: seed to initialize the random number generator.
    max_number_partitions: maximum number of partitions which can be used by
      dataset generator.

  Returns:
    The result of the example code as PrivacyTestRunnerResults.
  """
  tf.random.set_seed(seed)

  # Specify a config for computing with PipeineDP Mean aggregation, namely
  # computing mean aggregation per partition, i.e. in SQL terms DP version of
  #   SELECT partition_key, mean(value)
  #   GROUP BY partition_key
  # is computed.
  # See https://pipelinedp.io/key-definitions/ on more details of PipelineDP terminology.
  mech_config = pipeline_dp.AggregateParams(
      metrics=[pipeline_dp.Metrics.MEAN],
      # Laplace noise is used for ensuring DP
      noise_kind=pipeline_dp.NoiseKind.LAPLACE,
      # Set contribution bounds:

      # 1. If some privacy unit contributes more than to 1 partition then
      # PipelineDP will choose randomly 1 partition, contributions to others
      # will be dropped.
      max_partitions_contributed=1,
      # 2. If some privacy unit contributes to more than 1 time to some
      # partition then PipelineDP will choose randomly 1 contribution, others
      # contribution will be dropped
      max_contributions_per_partition=1,

      # 3. Each contributions will be clipped to [-1, 1].
      min_value=-1.0,
      max_value=1.0)

  # Initialize the mechanism.
  public_partitions = list(range(max_number_partitions))
  mechanism = pipeline_dp_mechanism.AggregationMechanism(mech_config,
                                                         privacy_property.ApproximateDp(
                                                             epsilon=epsilon,
                                                             delta=delta,
                                                         ), public_partitions)

  # Configuration for a Hockey-Stick property tester. Given arrays s1 and s2
  # with samples from two distributions it will estimate the hockey-stick
  # divergence between the underlying distributions. It checks if the estimated
  # divergence is bounded by delta.
  tester_config = property_tester_config.HockeyStickPropertyTesterConfig(
      training_config=hockey_stick_tester.make_default_hs_training_config(),
      approximate_dp=privacy_property.ApproximateDp(
          epsilon=epsilon,
          delta=delta,
      ),
  )

  # Initialize a classifier model for the Hockey-Stick property tester.
  # This classifier will learn to distinguish between samples of the mechanism
  # on adjacent datasets. Its accuracy level should be controlled by the privacy
  # guarantee.
  base_model = hockey_stick_tester.make_default_hs_base_model()
  # Initialize a property tester.
  property_tester = hockey_stick_tester.HockeyStickPropertyTester(
      config=tester_config,
      base_model=base_model,
  )

  # Configuration for dataset generator. It generates neighboring datasets under
  # the add/remove definition. Unique study name prevents using cached results
  # from previous runs.
  generator_config = dataset_generator_config.VizierDatasetGeneratorConfig(
      study_name=str(time.time()),
      study_owner="owner",
      num_vizier_parameters=2,
      data_type=dataset_generator_config.DataType.DATA_TYPE_FLOAT,
      min_value=-1.0,
      max_value=1.0,
      search_algorithm="RANDOM_SEARCH",
      metric_name="hockey_stick_divergence",
  )

  # Dataset generator will generate datasets of not more than
  # max_number_partitions partitions and not more than 10 privacy units.
  # The same partitions are used as public_partitions and as partitions in
  # dataset. So the mechanism will not drop the partitions. We do not check
  # partition selection. We focus only on checking noise.
  pipeline_dp_generator_config = pipeline_dp_vizier_dataset_generator.PipelineDpDatasetGeneratorConfig(
    max_num_privacy_ids=10, max_num_partitions=max_number_partitions)

  # Initialize the dataset generator.
  dataset_generator = pipeline_dp_vizier_dataset_generator.PipelineDpDatasetGenerator(
    generator_config, pipeline_dp_generator_config)

  # Configuration for the test runner.
  # The test runner coordinates how the test is evaluated. It receives a
  # dataset generator, a property tester and a configuration (see base class for
  # details on these parameters), and runs privacy tests using the property
  # tester on datasets generated by the dataset generator.
  test_runner_config = privacy_test_runner_config.PrivacyTestRunnerConfig(
      property_tester=privacy_test_runner_config.PropertyTester.HOCKEY_STICK_TESTER,
      max_num_trials=10,
      failure_probability=0.05,
      num_samples=10_000,
      # Apply a hyperbolic tangent function to the output of the mechanism
      post_processing=privacy_test_runner_config.PostProcessing.TANH,
  )
  # Initialize the test runner.
  test_runner = privacy_test_runner.PrivacyTestRunner(
      config=test_runner_config,
      dataset_generator=dataset_generator,
      property_tester=property_tester,
  )

  return test_runner.test_privacy(mechanism, "pipeline_dp-mean-mechanism")


EPSILON = 1.0
DELTA = 1e-5
SEED = 1

# The results indicate whether a privacy violation was identified within the
# designated number of trials defined in the configuration. In the absence of a
# violation, a message is returned indicating that the limit of the number of
# trials has been reached. For reference, all computed divergences across all
# trials are also reported.
results = pipeline_dp_mean_mechanism_report(EPSILON, DELTA, SEED)
print(f" \nResults: \n{results}")
if results.found_privacy_violation is not None:
  print("Privacy violations found!")
