diff --git a/Cargo.lock b/Cargo.lock index 07fada72..124e50ba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -779,6 +779,7 @@ dependencies = [ "fixed", "fixed-macro", "prio", + "rand", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 0cdaf8a6..b81fb110 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,6 +34,7 @@ sha2 = { version = "0.10.8", optional = true } sha3 = "0.10.8" subtle = "2.5.0" thiserror = "1.0" +zipf = { version = "7.0.1", optional = true } [dev-dependencies] assert_matches = "1.5.0" @@ -49,14 +50,13 @@ once_cell = "1.19.0" prio = { path = ".", features = ["crypto-dependencies", "test-util"] } rand = "0.8" statrs = "0.16.0" -zipf = "7.0.1" [features] default = ["crypto-dependencies"] experimental = ["bitvec", "fiat-crypto", "fixed", "num-bigint", "num-rational", "num-traits", "num-integer", "num-iter", "rand"] multithreaded = ["rayon"] crypto-dependencies = ["aes", "ctr", "hmac", "sha2"] -test-util = ["hex", "rand", "serde_json"] +test-util = ["hex", "rand", "serde_json", "zipf"] [workspace] members = [".", "binaries"] diff --git a/benches/speed_tests.rs b/benches/speed_tests.rs index f10ef5c8..86827acd 100644 --- a/benches/speed_tests.rs +++ b/benches/speed_tests.rs @@ -16,6 +16,8 @@ use num_traits::ToPrimitive; #[cfg(feature = "experimental")] use prio::dp::distributions::DiscreteGaussian; #[cfg(feature = "experimental")] +use prio::idpf::test_utils::generate_zipf_distributed_batch; +#[cfg(feature = "experimental")] use prio::vdaf::prio2::Prio2; use prio::{ benchmarked::*, @@ -35,8 +37,6 @@ use rand::prelude::*; #[cfg(feature = "experimental")] use std::iter; use std::time::Duration; -#[cfg(feature = "experimental")] -use zipf::ZipfDistribution; /// Seed for generation of random benchmark inputs. /// @@ -746,7 +746,7 @@ fn poplar1(c: &mut Criterion) { // Parameters are chosen to match Chris Wood's experimental setup: // https://github.com/chris-wood/heavy-hitter-comparison - let (measurements, prefix_tree) = poplar1_generate_zipf_distributed_batch( + let (measurements, prefix_tree) = generate_zipf_distributed_batch( &mut rng, // rng size, // bits 10, // threshold @@ -794,76 +794,6 @@ fn poplar1(c: &mut Criterion) { group.finish(); } -/// Generate a set of Poplar1 measurements with the given bit length `bits`. They are sampled -/// according to the Zipf distribution with parameters `zipf_support` and `zipf_exponent`. Return -/// the measurements, along with the prefix tree for the desired threshold. -/// -/// The prefix tree consists of a sequence of candidate prefixes for each level. For a given level, -/// the candidate prefixes are computed from the hit counts of the prefixes at the previous level: -/// For any prefix `p` whose hit count is at least the desired threshold, add `p || 0` and `p || 1` -/// to the list. -#[cfg(feature = "experimental")] -fn poplar1_generate_zipf_distributed_batch( - rng: &mut impl Rng, - bits: usize, - threshold: usize, - measurement_count: usize, - zipf_support: usize, - zipf_exponent: f64, -) -> (Vec, Vec>) { - // Generate random inputs. - let mut inputs = Vec::with_capacity(zipf_support); - for _ in 0..zipf_support { - let bools: Vec = (0..bits).map(|_| rng.gen()).collect(); - inputs.push(IdpfInput::from_bools(&bools)); - } - - // Sample a number of inputs according to the Zipf distribution. - let mut samples = Vec::with_capacity(measurement_count); - let zipf = ZipfDistribution::new(zipf_support, zipf_exponent).unwrap(); - for _ in 0..measurement_count { - samples.push(inputs[zipf.sample(rng) - 1].clone()); - } - - // Compute the prefix tree for the desired threshold. - let mut prefix_tree = Vec::with_capacity(bits); - prefix_tree.push(vec![ - IdpfInput::from_bools(&[false]), - IdpfInput::from_bools(&[true]), - ]); - - for level in 0..bits - 1 { - // Compute the hit count of each prefix from the previous level. - let mut hit_counts = vec![0; prefix_tree[level].len()]; - for (hit_count, prefix) in hit_counts.iter_mut().zip(prefix_tree[level].iter()) { - for sample in samples.iter() { - let mut is_prefix = true; - for j in 0..prefix.len() { - if prefix[j] != sample[j] { - is_prefix = false; - break; - } - } - if is_prefix { - *hit_count += 1; - } - } - } - - // Compute the next set of candidate prefixes. - let mut next_prefixes = Vec::new(); - for (hit_count, prefix) in hit_counts.iter().zip(prefix_tree[level].iter()) { - if *hit_count >= threshold { - next_prefixes.push(prefix.clone_with_suffix(&[false])); - next_prefixes.push(prefix.clone_with_suffix(&[true])); - } - } - prefix_tree.push(next_prefixes); - } - - (samples, prefix_tree) -} - #[cfg(feature = "experimental")] criterion_group!(benches, poplar1, prio3, prio2, poly_mul, prng, idpf, dp_noise); #[cfg(not(feature = "experimental"))] diff --git a/binaries/Cargo.toml b/binaries/Cargo.toml index f3658132..207ca291 100644 --- a/binaries/Cargo.toml +++ b/binaries/Cargo.toml @@ -10,4 +10,5 @@ repository = "https://github.com/divviup/libprio-rs" base64 = "0.21.7" fixed = "1.25" fixed-macro = "1.2.0" -prio = { path = "..", features = ["experimental"] } +rand = "0.8" +prio = { path = "..", features = ["experimental", "test-util"] } diff --git a/binaries/src/bin/idpf_agg_param_size.rs b/binaries/src/bin/idpf_agg_param_size.rs new file mode 100644 index 00000000..23658bf1 --- /dev/null +++ b/binaries/src/bin/idpf_agg_param_size.rs @@ -0,0 +1,46 @@ +use std::time::Instant; + +use prio::{ + codec::Encode, idpf::test_utils::generate_zipf_distributed_batch, + vdaf::poplar1::Poplar1AggregationParam, +}; +use rand::prelude::*; + +fn main() { + let bits = 256; + let measurement_count = 10_000; + let threshold = ((measurement_count as f64) * 0.01) as usize; // 1% + let zipf_support = 128; + let zipf_exponent = 1.03; + + println!("Generating inputs and computing the prefix tree. This may take some time..."); + let start = Instant::now(); + let (_measurements, prefix_tree) = generate_zipf_distributed_batch( + &mut thread_rng(), + bits, + threshold, + measurement_count, + zipf_support, + zipf_exponent, + ); + let elapsed = start.elapsed(); + println!("Finished in {elapsed:?}"); + + let mut max_agg_param_len = 0; + let mut max_agg_param_level = 0; + for (level, prefixes) in prefix_tree.into_iter().enumerate() { + let num_prefixes = prefixes.len(); + let agg_param = Poplar1AggregationParam::try_from_prefixes(prefixes) + .expect("failed to encode prefixes at level {level}"); + let agg_param_len = agg_param + .get_encoded() + .expect("failed to encode the aggregation parameter at level {level}") + .len(); + if agg_param_len > max_agg_param_len { + max_agg_param_len = agg_param_len; + max_agg_param_level = level; + } + println!("{level}: {agg_param_len} {num_prefixes}"); + } + println!("max: {max_agg_param_level}: {max_agg_param_len}"); +} diff --git a/src/idpf.rs b/src/idpf.rs index 0ce85328..481ff631 100644 --- a/src/idpf.rs +++ b/src/idpf.rs @@ -950,6 +950,86 @@ impl IdpfCache for RingBufferCache { } } +/// Utilities for testing IDPFs. +#[cfg(feature = "test-util")] +#[cfg_attr(docsrs, doc(cfg(feature = "test-util")))] +pub mod test_utils { + use super::*; + + use rand::prelude::*; + use zipf::ZipfDistribution; + + /// Generate a set of IDPF inputs with the given bit length `bits`. They are sampled according + /// to the Zipf distribution with parameters `zipf_support` and `zipf_exponent`. Return the + /// measurements, along with the prefixes traversed during the heavy hitters computation for + /// the given threshold. + /// + /// The prefix tree consists of a sequence of candidate prefixes for each level. For a given level, + /// the candidate prefixes are computed from the hit counts of the prefixes at the previous level: + /// For any prefix `p` whose hit count is at least the desired threshold, add `p || 0` and `p || 1` + /// to the list. + pub fn generate_zipf_distributed_batch( + rng: &mut impl Rng, + bits: usize, + threshold: usize, + measurement_count: usize, + zipf_support: usize, + zipf_exponent: f64, + ) -> (Vec, Vec>) { + // Generate random inputs. + let mut inputs = Vec::with_capacity(zipf_support); + for _ in 0..zipf_support { + let bools: Vec = (0..bits).map(|_| rng.gen()).collect(); + inputs.push(IdpfInput::from_bools(&bools)); + } + + // Sample a number of inputs according to the Zipf distribution. + let mut samples = Vec::with_capacity(measurement_count); + let zipf = ZipfDistribution::new(zipf_support, zipf_exponent).unwrap(); + for _ in 0..measurement_count { + samples.push(inputs[zipf.sample(rng) - 1].clone()); + } + + // Compute the prefix tree for the desired threshold. + let mut prefix_tree = Vec::with_capacity(bits); + prefix_tree.push(vec![ + IdpfInput::from_bools(&[false]), + IdpfInput::from_bools(&[true]), + ]); + + for level in 0..bits - 1 { + // Compute the hit count of each prefix from the previous level. + let mut hit_counts = vec![0; prefix_tree[level].len()]; + for (hit_count, prefix) in hit_counts.iter_mut().zip(prefix_tree[level].iter()) { + for sample in samples.iter() { + let mut is_prefix = true; + for j in 0..prefix.len() { + if prefix[j] != sample[j] { + is_prefix = false; + break; + } + } + if is_prefix { + *hit_count += 1; + } + } + } + + // Compute the next set of candidate prefixes. + let mut next_prefixes = Vec::with_capacity(prefix_tree.last().unwrap().len()); + for (hit_count, prefix) in hit_counts.iter().zip(prefix_tree[level].iter()) { + if *hit_count >= threshold { + next_prefixes.push(prefix.clone_with_suffix(&[false])); + next_prefixes.push(prefix.clone_with_suffix(&[true])); + } + } + prefix_tree.push(next_prefixes); + } + + (samples, prefix_tree) + } +} + #[cfg(test)] mod tests { use std::{