Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add rust unit tests #35

Merged
merged 4 commits into from
Aug 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/bloom_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use std::mem::size_of;
use std::path::PathBuf;
use std::sync::atomic::{AtomicU32, Ordering};

mod bloom_test;
// A thread-safe bloom filter.
pub struct BloomFilter {
bits: Vec<AtomicU32>,
Expand Down
56 changes: 56 additions & 0 deletions src/bloom_filter/bloom_test.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#[cfg(test)]
mod tests {
use super::super::BloomFilter;

// n: number of items in filter. p: false positive rate
// m: number of bits in filter. k: number of hashers
// n = ceil(m / (-k / log(1 - exp(log(p) / k))))
// p = pow(1 - exp(-k / (m / n)), k)
// m = ceil((n * log(p)) / log(1 / pow(2, log(2))));
// k = round((m / n) * log(2));

#[test]
fn bloom_optimal_hasher_number() {
let size_in_bytes = 1_000_000_000;
let expected_elements = 1_000_000_000;
assert_eq!(
BloomFilter::optimal_number_of_hashers(size_in_bytes, expected_elements),
6
);
assert_eq!(
BloomFilter::optimal_number_of_hashers(1_000_000, 500_000),
12
)
}
#[test]
fn bloom_test_prob_of_false_positive() {
// calculated from https://hur.st/bloomfilter/
let size_in_bytes = 1_000_000_000;
let expected_elements = 1_000_000_000;
let num_hashers = 8;
assert_eq!(
BloomFilter::prob_of_false_positive(size_in_bytes, expected_elements, num_hashers),
0.025_491_740_593_406_025 as f64
);
assert_eq!(
BloomFilter::prob_of_false_positive(1_048_576, 524288, 2),
0.013_806_979_447_406_826 as f64
)
}

#[test]
fn bloom_suggest_size() {
// it's hard to derive this exactly since the algorithm is doing closest power of 2
// instead of exact theoretical optimum
let expected_elements = 1_000_000;
let desired_false_positive_rate = 0.0001 as f64;
let theoretical_optimum = ((expected_elements as f64 * desired_false_positive_rate.ln())
/ f64::ln(1.0 / 2.0f64.powf(2.0f64.ln())))
.ceil()
.div_euclid(8f64) as usize;
let suggested_size =
BloomFilter::suggest_size_in_bytes(expected_elements, desired_false_positive_rate);
assert_eq!(suggested_size, 4_194_304);
assert_eq!(suggested_size, theoretical_optimum.next_power_of_two())
}
}
Loading