Skip to content

Commit

Permalink
Add a tool for assessing the IDPF aggregation parameter size
Browse files Browse the repository at this point in the history
  • Loading branch information
cjpatton committed Feb 26, 2024
1 parent d09f950 commit 2eae6f5
Show file tree
Hide file tree
Showing 6 changed files with 134 additions and 76 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ sha2 = { version = "0.10.8", optional = true }
sha3 = "0.10.8"
subtle = "2.5.0"
thiserror = "1.0"
zipf = { version = "7.0.1", optional = true }

[dev-dependencies]
assert_matches = "1.5.0"
Expand All @@ -49,14 +50,13 @@ once_cell = "1.19.0"
prio = { path = ".", features = ["crypto-dependencies", "test-util"] }
rand = "0.8"
statrs = "0.16.0"
zipf = "7.0.1"

[features]
default = ["crypto-dependencies"]
experimental = ["bitvec", "fiat-crypto", "fixed", "num-bigint", "num-rational", "num-traits", "num-integer", "num-iter", "rand"]
multithreaded = ["rayon"]
crypto-dependencies = ["aes", "ctr", "hmac", "sha2"]
test-util = ["hex", "rand", "serde_json"]
test-util = ["hex", "rand", "serde_json", "zipf"]

[workspace]
members = [".", "binaries"]
Expand Down
76 changes: 3 additions & 73 deletions benches/speed_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ use num_traits::ToPrimitive;
#[cfg(feature = "experimental")]
use prio::dp::distributions::DiscreteGaussian;
#[cfg(feature = "experimental")]
use prio::idpf::test_utils::generate_zipf_distributed_batch;
#[cfg(feature = "experimental")]
use prio::vdaf::prio2::Prio2;
use prio::{
benchmarked::*,
Expand All @@ -35,8 +37,6 @@ use rand::prelude::*;
#[cfg(feature = "experimental")]
use std::iter;
use std::time::Duration;
#[cfg(feature = "experimental")]
use zipf::ZipfDistribution;

/// Seed for generation of random benchmark inputs.
///
Expand Down Expand Up @@ -746,7 +746,7 @@ fn poplar1(c: &mut Criterion) {

// Parameters are chosen to match Chris Wood's experimental setup:
// https://github.com/chris-wood/heavy-hitter-comparison
let (measurements, prefix_tree) = poplar1_generate_zipf_distributed_batch(
let (measurements, prefix_tree) = generate_zipf_distributed_batch(
&mut rng, // rng
size, // bits
10, // threshold
Expand Down Expand Up @@ -794,76 +794,6 @@ fn poplar1(c: &mut Criterion) {
group.finish();
}

/// Generate a set of Poplar1 measurements with the given bit length `bits`. They are sampled
/// according to the Zipf distribution with parameters `zipf_support` and `zipf_exponent`. Return
/// the measurements, along with the prefix tree for the desired threshold.
///
/// The prefix tree consists of a sequence of candidate prefixes for each level. For a given level,
/// the candidate prefixes are computed from the hit counts of the prefixes at the previous level:
/// For any prefix `p` whose hit count is at least the desired threshold, add `p || 0` and `p || 1`
/// to the list.
#[cfg(feature = "experimental")]
fn poplar1_generate_zipf_distributed_batch(
rng: &mut impl Rng,
bits: usize,
threshold: usize,
measurement_count: usize,
zipf_support: usize,
zipf_exponent: f64,
) -> (Vec<IdpfInput>, Vec<Vec<IdpfInput>>) {
// Generate random inputs.
let mut inputs = Vec::with_capacity(zipf_support);
for _ in 0..zipf_support {
let bools: Vec<bool> = (0..bits).map(|_| rng.gen()).collect();
inputs.push(IdpfInput::from_bools(&bools));
}

// Sample a number of inputs according to the Zipf distribution.
let mut samples = Vec::with_capacity(measurement_count);
let zipf = ZipfDistribution::new(zipf_support, zipf_exponent).unwrap();
for _ in 0..measurement_count {
samples.push(inputs[zipf.sample(rng) - 1].clone());
}

// Compute the prefix tree for the desired threshold.
let mut prefix_tree = Vec::with_capacity(bits);
prefix_tree.push(vec![
IdpfInput::from_bools(&[false]),
IdpfInput::from_bools(&[true]),
]);

for level in 0..bits - 1 {
// Compute the hit count of each prefix from the previous level.
let mut hit_counts = vec![0; prefix_tree[level].len()];
for (hit_count, prefix) in hit_counts.iter_mut().zip(prefix_tree[level].iter()) {
for sample in samples.iter() {
let mut is_prefix = true;
for j in 0..prefix.len() {
if prefix[j] != sample[j] {
is_prefix = false;
break;
}
}
if is_prefix {
*hit_count += 1;
}
}
}

// Compute the next set of candidate prefixes.
let mut next_prefixes = Vec::new();
for (hit_count, prefix) in hit_counts.iter().zip(prefix_tree[level].iter()) {
if *hit_count >= threshold {
next_prefixes.push(prefix.clone_with_suffix(&[false]));
next_prefixes.push(prefix.clone_with_suffix(&[true]));
}
}
prefix_tree.push(next_prefixes);
}

(samples, prefix_tree)
}

#[cfg(feature = "experimental")]
criterion_group!(benches, poplar1, prio3, prio2, poly_mul, prng, idpf, dp_noise);
#[cfg(not(feature = "experimental"))]
Expand Down
3 changes: 2 additions & 1 deletion binaries/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ repository = "https://github.com/divviup/libprio-rs"
base64 = "0.21.7"
fixed = "1.25"
fixed-macro = "1.2.0"
prio = { path = "..", features = ["experimental"] }
rand = "0.8"
prio = { path = "..", features = ["experimental", "test-util"] }
46 changes: 46 additions & 0 deletions binaries/src/bin/idpf_agg_param_size.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
use std::time::Instant;

use prio::{
codec::Encode, idpf::test_utils::generate_zipf_distributed_batch,
vdaf::poplar1::Poplar1AggregationParam,
};
use rand::prelude::*;

fn main() {
let bits = 256;
let measurement_count = 10_000;
let threshold = ((measurement_count as f64) * 0.01) as usize; // 1%
let zipf_support = 128;
let zipf_exponent = 1.03;

println!("Generating inputs and computing the prefix tree. This may take some time...");
let start = Instant::now();
let (_measurements, prefix_tree) = generate_zipf_distributed_batch(
&mut thread_rng(),
bits,
threshold,
measurement_count,
zipf_support,
zipf_exponent,
);
let elapsed = start.elapsed();
println!("Finished in {elapsed:?}");

let mut max_agg_param_len = 0;
let mut max_agg_param_level = 0;
for (level, prefixes) in prefix_tree.into_iter().enumerate() {
let num_prefixes = prefixes.len();
let agg_param = Poplar1AggregationParam::try_from_prefixes(prefixes)
.expect("failed to encode prefixes at level {level}");
let agg_param_len = agg_param
.get_encoded()
.expect("failed to encode the aggregation parameter at level {level}")
.len();
if agg_param_len > max_agg_param_len {
max_agg_param_len = agg_param_len;
max_agg_param_level = level;
}
println!("{level}: {agg_param_len} {num_prefixes}");
}
println!("max: {max_agg_param_level}: {max_agg_param_len}");
}
80 changes: 80 additions & 0 deletions src/idpf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -950,6 +950,86 @@ impl IdpfCache for RingBufferCache {
}
}

/// Utilities for testing IDPFs.
#[cfg(feature = "test-util")]
#[cfg_attr(docsrs, doc(cfg(feature = "test-util")))]
pub mod test_utils {
use super::*;

use rand::prelude::*;
use zipf::ZipfDistribution;

/// Generate a set of IDPF inputs with the given bit length `bits`. They are sampled according
/// to the Zipf distribution with parameters `zipf_support` and `zipf_exponent`. Return the
/// measurements, along with the prefixes traversed during the heavy hitters computation for
/// the given threshold.
///
/// The prefix tree consists of a sequence of candidate prefixes for each level. For a given level,
/// the candidate prefixes are computed from the hit counts of the prefixes at the previous level:
/// For any prefix `p` whose hit count is at least the desired threshold, add `p || 0` and `p || 1`
/// to the list.
pub fn generate_zipf_distributed_batch(
rng: &mut impl Rng,
bits: usize,
threshold: usize,
measurement_count: usize,
zipf_support: usize,
zipf_exponent: f64,
) -> (Vec<IdpfInput>, Vec<Vec<IdpfInput>>) {
// Generate random inputs.
let mut inputs = Vec::with_capacity(zipf_support);
for _ in 0..zipf_support {
let bools: Vec<bool> = (0..bits).map(|_| rng.gen()).collect();
inputs.push(IdpfInput::from_bools(&bools));
}

// Sample a number of inputs according to the Zipf distribution.
let mut samples = Vec::with_capacity(measurement_count);
let zipf = ZipfDistribution::new(zipf_support, zipf_exponent).unwrap();
for _ in 0..measurement_count {
samples.push(inputs[zipf.sample(rng) - 1].clone());
}

// Compute the prefix tree for the desired threshold.
let mut prefix_tree = Vec::with_capacity(bits);
prefix_tree.push(vec![
IdpfInput::from_bools(&[false]),
IdpfInput::from_bools(&[true]),
]);

for level in 0..bits - 1 {
// Compute the hit count of each prefix from the previous level.
let mut hit_counts = vec![0; prefix_tree[level].len()];
for (hit_count, prefix) in hit_counts.iter_mut().zip(prefix_tree[level].iter()) {
for sample in samples.iter() {
let mut is_prefix = true;
for j in 0..prefix.len() {
if prefix[j] != sample[j] {
is_prefix = false;
break;
}
}
if is_prefix {
*hit_count += 1;
}
}
}

// Compute the next set of candidate prefixes.
let mut next_prefixes = Vec::with_capacity(prefix_tree.last().unwrap().len());
for (hit_count, prefix) in hit_counts.iter().zip(prefix_tree[level].iter()) {
if *hit_count >= threshold {
next_prefixes.push(prefix.clone_with_suffix(&[false]));
next_prefixes.push(prefix.clone_with_suffix(&[true]));
}
}
prefix_tree.push(next_prefixes);
}

(samples, prefix_tree)
}
}

#[cfg(test)]
mod tests {
use std::{
Expand Down

0 comments on commit 2eae6f5

Please sign in to comment.