Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a tool for assessing the IDPF aggregation parameter size #956

Merged
merged 1 commit into from
Feb 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ sha2 = { version = "0.10.8", optional = true }
sha3 = "0.10.8"
subtle = "2.5.0"
thiserror = "1.0"
zipf = { version = "7.0.1", optional = true }
cjpatton marked this conversation as resolved.
Show resolved Hide resolved

[dev-dependencies]
assert_matches = "1.5.0"
Expand All @@ -49,14 +50,13 @@ once_cell = "1.19.0"
prio = { path = ".", features = ["crypto-dependencies", "test-util"] }
rand = "0.8"
statrs = "0.16.0"
zipf = "7.0.1"

[features]
default = ["crypto-dependencies"]
experimental = ["bitvec", "fiat-crypto", "fixed", "num-bigint", "num-rational", "num-traits", "num-integer", "num-iter", "rand"]
multithreaded = ["rayon"]
crypto-dependencies = ["aes", "ctr", "hmac", "sha2"]
test-util = ["hex", "rand", "serde_json"]
test-util = ["hex", "rand", "serde_json", "zipf"]

[workspace]
members = [".", "binaries"]
Expand Down
76 changes: 3 additions & 73 deletions benches/speed_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ use num_traits::ToPrimitive;
#[cfg(feature = "experimental")]
use prio::dp::distributions::DiscreteGaussian;
#[cfg(feature = "experimental")]
use prio::idpf::test_utils::generate_zipf_distributed_batch;
#[cfg(feature = "experimental")]
use prio::vdaf::prio2::Prio2;
use prio::{
benchmarked::*,
Expand All @@ -35,8 +37,6 @@ use rand::prelude::*;
#[cfg(feature = "experimental")]
use std::iter;
use std::time::Duration;
#[cfg(feature = "experimental")]
use zipf::ZipfDistribution;

/// Seed for generation of random benchmark inputs.
///
Expand Down Expand Up @@ -746,7 +746,7 @@ fn poplar1(c: &mut Criterion) {

// Parameters are chosen to match Chris Wood's experimental setup:
// https://github.com/chris-wood/heavy-hitter-comparison
let (measurements, prefix_tree) = poplar1_generate_zipf_distributed_batch(
let (measurements, prefix_tree) = generate_zipf_distributed_batch(
&mut rng, // rng
size, // bits
10, // threshold
Expand Down Expand Up @@ -794,76 +794,6 @@ fn poplar1(c: &mut Criterion) {
group.finish();
}

/// Generate a set of Poplar1 measurements with the given bit length `bits`. They are sampled
/// according to the Zipf distribution with parameters `zipf_support` and `zipf_exponent`. Return
/// the measurements, along with the prefix tree for the desired threshold.
///
/// The prefix tree consists of a sequence of candidate prefixes for each level. For a given level,
/// the candidate prefixes are computed from the hit counts of the prefixes at the previous level:
/// For any prefix `p` whose hit count is at least the desired threshold, add `p || 0` and `p || 1`
/// to the list.
#[cfg(feature = "experimental")]
fn poplar1_generate_zipf_distributed_batch(
rng: &mut impl Rng,
bits: usize,
threshold: usize,
measurement_count: usize,
zipf_support: usize,
zipf_exponent: f64,
) -> (Vec<IdpfInput>, Vec<Vec<IdpfInput>>) {
// Generate random inputs.
let mut inputs = Vec::with_capacity(zipf_support);
for _ in 0..zipf_support {
let bools: Vec<bool> = (0..bits).map(|_| rng.gen()).collect();
inputs.push(IdpfInput::from_bools(&bools));
}

// Sample a number of inputs according to the Zipf distribution.
let mut samples = Vec::with_capacity(measurement_count);
let zipf = ZipfDistribution::new(zipf_support, zipf_exponent).unwrap();
for _ in 0..measurement_count {
samples.push(inputs[zipf.sample(rng) - 1].clone());
}

// Compute the prefix tree for the desired threshold.
let mut prefix_tree = Vec::with_capacity(bits);
prefix_tree.push(vec![
IdpfInput::from_bools(&[false]),
IdpfInput::from_bools(&[true]),
]);

for level in 0..bits - 1 {
// Compute the hit count of each prefix from the previous level.
let mut hit_counts = vec![0; prefix_tree[level].len()];
for (hit_count, prefix) in hit_counts.iter_mut().zip(prefix_tree[level].iter()) {
for sample in samples.iter() {
let mut is_prefix = true;
for j in 0..prefix.len() {
if prefix[j] != sample[j] {
is_prefix = false;
break;
}
}
if is_prefix {
*hit_count += 1;
}
}
}

// Compute the next set of candidate prefixes.
let mut next_prefixes = Vec::new();
for (hit_count, prefix) in hit_counts.iter().zip(prefix_tree[level].iter()) {
if *hit_count >= threshold {
next_prefixes.push(prefix.clone_with_suffix(&[false]));
next_prefixes.push(prefix.clone_with_suffix(&[true]));
}
}
prefix_tree.push(next_prefixes);
}

(samples, prefix_tree)
}

#[cfg(feature = "experimental")]
criterion_group!(benches, poplar1, prio3, prio2, poly_mul, prng, idpf, dp_noise);
#[cfg(not(feature = "experimental"))]
Expand Down
3 changes: 2 additions & 1 deletion binaries/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ repository = "https://github.com/divviup/libprio-rs"
base64 = "0.21.7"
fixed = "1.25"
fixed-macro = "1.2.0"
prio = { path = "..", features = ["experimental"] }
rand = "0.8"
prio = { path = "..", features = ["experimental", "test-util"] }
46 changes: 46 additions & 0 deletions binaries/src/bin/idpf_agg_param_size.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
use std::time::Instant;

use prio::{
codec::Encode, idpf::test_utils::generate_zipf_distributed_batch,
vdaf::poplar1::Poplar1AggregationParam,
};
use rand::prelude::*;

fn main() {
let bits = 256;
let measurement_count = 10_000;
let threshold = ((measurement_count as f64) * 0.01) as usize; // 1%
let zipf_support = 128;
let zipf_exponent = 1.03;

println!("Generating inputs and computing the prefix tree. This may take some time...");
let start = Instant::now();
let (_measurements, prefix_tree) = generate_zipf_distributed_batch(
&mut thread_rng(),
bits,
threshold,
measurement_count,
zipf_support,
zipf_exponent,
);
let elapsed = start.elapsed();
println!("Finished in {elapsed:?}");

let mut max_agg_param_len = 0;
let mut max_agg_param_level = 0;
for (level, prefixes) in prefix_tree.into_iter().enumerate() {
let num_prefixes = prefixes.len();
let agg_param = Poplar1AggregationParam::try_from_prefixes(prefixes)
.expect("failed to encode prefixes at level {level}");
let agg_param_len = agg_param
.get_encoded()
.expect("failed to encode the aggregation parameter at level {level}")
.len();
if agg_param_len > max_agg_param_len {
max_agg_param_len = agg_param_len;
max_agg_param_level = level;
}
println!("{level}: {agg_param_len} {num_prefixes}");
}
println!("max: {max_agg_param_level}: {max_agg_param_len}");
}
80 changes: 80 additions & 0 deletions src/idpf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -950,6 +950,86 @@ impl IdpfCache for RingBufferCache {
}
}

/// Utilities for testing IDPFs.
#[cfg(feature = "test-util")]
#[cfg_attr(docsrs, doc(cfg(feature = "test-util")))]
pub mod test_utils {
use super::*;

use rand::prelude::*;
use zipf::ZipfDistribution;

/// Generate a set of IDPF inputs with the given bit length `bits`. They are sampled according
/// to the Zipf distribution with parameters `zipf_support` and `zipf_exponent`. Return the
/// measurements, along with the prefixes traversed during the heavy hitters computation for
/// the given threshold.
///
/// The prefix tree consists of a sequence of candidate prefixes for each level. For a given level,
/// the candidate prefixes are computed from the hit counts of the prefixes at the previous level:
/// For any prefix `p` whose hit count is at least the desired threshold, add `p || 0` and `p || 1`
/// to the list.
pub fn generate_zipf_distributed_batch(
rng: &mut impl Rng,
bits: usize,
threshold: usize,
measurement_count: usize,
zipf_support: usize,
zipf_exponent: f64,
) -> (Vec<IdpfInput>, Vec<Vec<IdpfInput>>) {
// Generate random inputs.
let mut inputs = Vec::with_capacity(zipf_support);
for _ in 0..zipf_support {
let bools: Vec<bool> = (0..bits).map(|_| rng.gen()).collect();
inputs.push(IdpfInput::from_bools(&bools));
}

// Sample a number of inputs according to the Zipf distribution.
let mut samples = Vec::with_capacity(measurement_count);
let zipf = ZipfDistribution::new(zipf_support, zipf_exponent).unwrap();
for _ in 0..measurement_count {
samples.push(inputs[zipf.sample(rng) - 1].clone());
}

// Compute the prefix tree for the desired threshold.
let mut prefix_tree = Vec::with_capacity(bits);
prefix_tree.push(vec![
IdpfInput::from_bools(&[false]),
IdpfInput::from_bools(&[true]),
]);

for level in 0..bits - 1 {
// Compute the hit count of each prefix from the previous level.
let mut hit_counts = vec![0; prefix_tree[level].len()];
for (hit_count, prefix) in hit_counts.iter_mut().zip(prefix_tree[level].iter()) {
for sample in samples.iter() {
let mut is_prefix = true;
for j in 0..prefix.len() {
if prefix[j] != sample[j] {
is_prefix = false;
break;
}
}
if is_prefix {
*hit_count += 1;
}
}
}

// Compute the next set of candidate prefixes.
let mut next_prefixes = Vec::with_capacity(prefix_tree.last().unwrap().len());
for (hit_count, prefix) in hit_counts.iter().zip(prefix_tree[level].iter()) {
if *hit_count >= threshold {
next_prefixes.push(prefix.clone_with_suffix(&[false]));
next_prefixes.push(prefix.clone_with_suffix(&[true]));
}
}
prefix_tree.push(next_prefixes);
}

(samples, prefix_tree)
}
}

#[cfg(test)]
mod tests {
use std::{
Expand Down