Skip to content

Commit

Permalink
Uses a hashset to de-duplicate results
Browse files Browse the repository at this point in the history
This creates a `HashSet<u64>` using FNV Hashing, because it's much
faster than Rust's default SIP hash and this isn't used for anything
that needs to be cyptographically secure. It also allows us to keep a
hashset of 64bit numbers, instead of Strings or string slices thus
reducing memory consumption.

In unscientific benchmarks of ~5,200 `.cheat` lines containing about
`2,600` duplicate lines there was no noticable increase in memory
consumption (benchmarking time is harder since you wait for human
input), both master (HEAD b78903a) and this commit used 10.8mb max
RSS.

To attempt to measure time (and again max RSS) running master against
this commit using the command `navi --print --path dups_folder/ best "git tag"`
resulted in a *decrease* of memory consumption by about 200k, and no
noticable time increase.

Closes #283
  • Loading branch information
kbknapp committed Mar 18, 2020
1 parent b78903a commit a51a54d
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 4 deletions.
16 changes: 12 additions & 4 deletions src/cheat.rs
@@ -1,9 +1,10 @@
use crate::display;
use crate::filesystem;
use crate::fnv::HashLine;
use crate::option::Config;
use crate::welcome;
use regex::Regex;
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use std::fs;
use std::io::Write;

Expand Down Expand Up @@ -111,6 +112,7 @@ fn read_file(
path: &str,
variables: &mut HashMap<String, Suggestion>,
stdin: &mut std::process::ChildStdin,
set: &mut HashSet<u64>,
) -> bool {
let mut tags = String::from("");
let mut comment = String::from("");
Expand All @@ -121,6 +123,11 @@ fn read_file(
if let Ok(lines) = filesystem::read_lines(path) {
for l in lines {
let line = l.unwrap();
let hash = line.hash_line();
if set.contains(&hash) {
continue;
}
set.insert(hash);

// blank
if line.is_empty() {
Expand Down Expand Up @@ -177,14 +184,15 @@ pub fn read_all(
let mut found_something = false;
let paths = filesystem::cheat_paths(config);
let folders = paths.split(':');
let mut set = HashSet::new();

for folder in folders {
if let Ok(paths) = fs::read_dir(folder) {
for path in paths {
let path_os_str = path.unwrap().path().into_os_string();
let path_str = path_os_str.to_str().unwrap();
let path = path.unwrap().path();
let path_str = path.to_str().unwrap();
if path_str.ends_with(".cheat")
&& read_file(path_str, &mut variables, stdin)
&& read_file(path_str, &mut variables, stdin, &mut set)
&& !found_something
{
found_something = true;
Expand Down
42 changes: 42 additions & 0 deletions src/fnv.rs
@@ -0,0 +1,42 @@
use std::hash::{Hash, Hasher};

const MAGIC_INIT: u64 = 0x811C_9DC5;

pub trait HashLine: Hash {
fn hash_line(&self) -> u64;
}

impl<T> HashLine for T
where
T: Hash,
{
fn hash_line(&self) -> u64 {
let mut hasher = FnvHasher::new();
self.hash(&mut hasher);
hasher.finish()
}
}

pub(crate) struct FnvHasher(u64);

impl FnvHasher {
pub(crate) fn new() -> Self {
FnvHasher(MAGIC_INIT)
}
}

impl Hasher for FnvHasher {
fn finish(&self) -> u64 {
self.0
}
fn write(&mut self, bytes: &[u8]) {
let FnvHasher(mut hash) = *self;

for byte in bytes.iter() {
hash ^= u64::from(*byte);
hash = hash.wrapping_mul(0x0100_0000_01b3);
}

*self = FnvHasher(hash);
}
}
1 change: 1 addition & 0 deletions src/main.rs
Expand Up @@ -5,6 +5,7 @@ mod cheat;
mod cmds;
mod display;
mod filesystem;
mod fnv;
mod fzf;
mod git;
mod handler;
Expand Down

0 comments on commit a51a54d

Please sign in to comment.