Skip to content

Commit

Permalink
Now we have benchmarks and a file to run.
Browse files Browse the repository at this point in the history
  • Loading branch information
derekdreery authored and abonander committed Jul 2, 2019
1 parent dfecaf1 commit 1919661
Show file tree
Hide file tree
Showing 7 changed files with 68 additions and 53 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[workspace]
members = ["phf", "phf_builder", "phf_codegen", "phf_codegen/test", "phf_generator", "phf_macros", "phf_shared", "phf_bench"]
members = ["phf", "phf_builder", "phf_codegen", "phf_codegen/test", "phf_generator", "phf_macros", "phf_shared", "phf_bench", "phf_raw"]

[profile.release]
debug = true
Expand Down
4 changes: 1 addition & 3 deletions phf_bench/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,4 @@ edition = "2018"
[dependencies]
phf_generator = { path = "../phf_generator" }
rand = "*"

[build-dependencies]
xz2 = "*"
xz2 = "0.1"
27 changes: 0 additions & 27 deletions phf_bench/build.rs

This file was deleted.

12 changes: 12 additions & 0 deletions phf_bench/src/bin/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
use std::time::Instant;
use std::io::prelude::*;
use std::fs::File;

fn main() {
let pws = phf_bench::get_pws(500_000);
let start = Instant::now();
let pws_hash = phf_generator::generate_hash(&pws);
println!("Duration: {:?}", Instant::now() - start);
let mut dummy = File::create("/dev/null").expect("opening /dev/null");
write!(dummy, "{:?}", pws_hash).expect("writing to /dev/null");
}
37 changes: 37 additions & 0 deletions phf_bench/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#![feature(test, extern_crate_item_prelude)]
extern crate test;

use xz2::read::XzDecoder;
use std::fs::File;
use std::env;
use std::io::{self, prelude::*};
use std::path::Path;


pub fn get_pws(count: usize) -> Vec<String> {
let input_path = Path::new(&env::var("CARGO_MANIFEST_DIR").unwrap_or(".".into()))
.join("data/rockyou.txt.xz");
let input = io::BufReader::new(XzDecoder::new(File::open(input_path).unwrap()));

input.split(b'\n')
.map(|line| line.unwrap())
.filter_map(|word| String::from_utf8(word).ok())
.take(count)
.collect()

}

#[cfg(test)]
mod tests {
use test::Bencher;
use phf_generator::generate_hash;
use super::get_pws;

const PASSWORD_COUNT: usize = 100_000;

#[bench]
fn bench_100_000(b: &mut Bencher) {
let pws = get_pws(PASSWORD_COUNT);
b.iter(|| generate_hash(&pws))
}
}
22 changes: 0 additions & 22 deletions phf_bench/src/main.rs

This file was deleted.

17 changes: 17 additions & 0 deletions phf_generator/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ pub fn generate_hash<H: PhfHash>(entries: &[H]) -> HashState {
loop {
if let Some(s) = try_generate_hash(entries, &mut rng) {
return s;
} else {
println!("Finding hash failed, retrying");
}
}
}
Expand Down Expand Up @@ -52,6 +54,7 @@ fn try_generate_hash<H: PhfHash>(entries: &[H], rng: &mut SmallRng) -> Option<Ha
})
.collect();

// We want the number of buckets to be rounded up.
let buckets_len = (entries.len() + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA;
let mut buckets = (0..buckets_len)
.map(|i| {
Expand All @@ -62,6 +65,7 @@ fn try_generate_hash<H: PhfHash>(entries: &[H], rng: &mut SmallRng) -> Option<Ha
})
.collect::<Vec<_>>();

// Sort into buckets by value of hash
for (i, hash) in hashes.iter().enumerate() {
buckets[(hash.g % (buckets_len as u32)) as usize].keys.push(i);
}
Expand All @@ -88,7 +92,12 @@ fn try_generate_hash<H: PhfHash>(entries: &[H], rng: &mut SmallRng) -> Option<Ha
// chosen the right disps.
let mut values_to_add = vec![];

// For debugging - see how many d1 and d2s it takes.
let mut track_attempts = vec![];
let mut track_count = 0u64;

'buckets: for bucket in &buckets {
let mut attempts = 0u64;
for d1 in 0..(table_len as u32) {
'disps: for d2 in 0..(table_len as u32) {
values_to_add.clear();
Expand All @@ -98,6 +107,7 @@ fn try_generate_hash<H: PhfHash>(entries: &[H], rng: &mut SmallRng) -> Option<Ha
let idx = (phf_shared::displace(hashes[key].f1, hashes[key].f2, d1, d2) %
(table_len as u32)) as usize;
if map[idx].is_some() || try_map[idx] == generation {
attempts += 1;
continue 'disps;
}
try_map[idx] = generation;
Expand All @@ -109,6 +119,9 @@ fn try_generate_hash<H: PhfHash>(entries: &[H], rng: &mut SmallRng) -> Option<Ha
for &(idx, key) in &values_to_add {
map[idx] = Some(key);
}
//println!("Bucket {} took {} attempts", track_count, attempts);
track_count += 1;
track_attempts.push(attempts);
continue 'buckets;
}
}
Expand All @@ -117,6 +130,10 @@ fn try_generate_hash<H: PhfHash>(entries: &[H], rng: &mut SmallRng) -> Option<Ha
return None;
}

let count = track_attempts.iter().count() as u64;
let sum = track_attempts.iter().sum::<u64>();
println!("count: {}, sum: {}, average: {}", count, sum, sum / count);

Some(HashState {
key: key,
disps: disps,
Expand Down

0 comments on commit 1919661

Please sign in to comment.