From edffc13ffd73c869febd9c1e12ae8e07600c5a91 Mon Sep 17 00:00:00 2001 From: Vincent Hanquez Date: Wed, 20 Nov 2019 15:37:53 +0800 Subject: [PATCH] hamt: optimise representation * reduce the size implication case of collision: moving to just a pointer (Box) on the Entry type, as the case happens rarely * reduce the size of normal Node to 20 bytes (without padding) * allow storage control of K/V directly to prevent boxing of primitives type --- imhamt/src/content.rs | 374 ----------------------------------- imhamt/src/hamt.rs | 29 +-- imhamt/src/helper.rs | 10 +- imhamt/src/lib.rs | 100 ++++++++-- imhamt/src/node/reference.rs | 244 ++++++++++++++++++----- 5 files changed, 296 insertions(+), 461 deletions(-) delete mode 100644 imhamt/src/content.rs diff --git a/imhamt/src/content.rs b/imhamt/src/content.rs deleted file mode 100644 index 5914ac647..000000000 --- a/imhamt/src/content.rs +++ /dev/null @@ -1,374 +0,0 @@ -use super::hash::HashedKey; -use super::helper::*; -use super::operation::*; -use super::sharedref::SharedRef; -use std::slice; - -pub struct KV(K, V); - -impl KV { - pub fn new(k: K, v: V) -> Self { - KV(k, v) - } - pub fn get_key(&self) -> &K { - &self.0 - } - - pub fn get_value(&self) -> &V { - &self.1 - } -} - -impl KV { - pub fn replace(&self, v: V) -> Self { - KV(self.0.clone(), v) - } -} - -pub enum SmallVec { - One(T), - //Two(T, T), - Many(Box<[T]>), -} - -/// Leaf content is usually one key-value pair, -/// but can contains multiples pair when having a collision. -/// -/// All the key held here have the same hash -pub struct LeafContent { - pub(crate) hashed: HashedKey, - pub(crate) content: SmallVec>>, -} - -impl LeafContent { - pub fn len(&self) -> usize { - match self.content { - SmallVec::One(_) => 1, - SmallVec::Many(ref v) => v.len(), - } - } -} - -impl LeafContent { - pub fn single(h: HashedKey, kv: SharedRef>) -> Self { - LeafContent { - hashed: h, - content: SmallVec::One(kv), - } - } - - pub fn add(&self, kv: SharedRef>) -> Result { - // check for duplicated key - match self.content { - SmallVec::One(ref fkv) => { - if kv.get_key() == fkv.get_key() { - return Err(InsertError::EntryExists); - }; - let v = vec![SharedRef::clone(fkv), kv]; - Ok(LeafContent { - hashed: self.hashed, - content: SmallVec::Many(v.into()), - }) - } - SmallVec::Many(ref content) => { - for fkv in content.iter() { - if kv.get_key() == fkv.get_key() { - return Err(InsertError::EntryExists); - } - } - let v = clone_array_and_extend(content, kv); - Ok(LeafContent { - hashed: self.hashed, - content: SmallVec::Many(v), - }) - } - } - } - - pub fn find(&self, h: HashedKey, k: &K) -> Option<&V> { - if self.hashed == h { - // looks in all the keys for a match - match self.content { - SmallVec::One(ref fkv) => { - if k == fkv.get_key() { - return Some(fkv.get_value()); - } - None - } - SmallVec::Many(ref v) => { - for fkv in v.iter() { - if k == fkv.get_key() { - return Some(fkv.get_value()); - } - } - None - } - } - } else { - None - } - } -} - -pub enum LeafIterator<'a, K, V> { - One(bool, &'a SharedRef>), - Many(slice::Iter<'a, SharedRef>>), -} - -impl<'a, K, V> LeafContent { - pub fn iter(&'a self) -> LeafIterator<'a, K, V> { - match self.content { - SmallVec::Many(ref content) => LeafIterator::Many(content.iter()), - SmallVec::One(ref kvs) => LeafIterator::One(false, kvs), - } - } -} - -impl<'a, K, V> Iterator for LeafIterator<'a, K, V> { - type Item = &'a SharedRef>; - - fn next(&mut self) -> Option { - match self { - LeafIterator::Many(ref mut c) => c.next(), - LeafIterator::One(ref mut consumed, o) => { - if *consumed { - None - } else { - *consumed = true; - Some(o) - } - } - } - } -} - -impl LeafContent { - pub fn update(&self, h: &HashedKey, k: &K, f: F) -> Result, UpdateError> - where - F: FnOnce(&V) -> Result, U>, - { - if self.hashed != *h { - return Err(UpdateError::KeyNotFound); - } - match self.content { - SmallVec::One(ref fkv) => { - if k != fkv.get_key() { - return Err(UpdateError::KeyNotFound); - } - match f(fkv.get_value()).map_err(UpdateError::ValueCallbackError)? { - None => Ok(None), - Some(newv) => { - let newkv = KV::new(k.clone(), newv); - let newcontent = LeafContent { - hashed: self.hashed, - content: SmallVec::One(SharedRef::new(newkv)), - }; - Ok(Some(newcontent)) - } - } - } - SmallVec::Many(ref content) => { - assert!(content.len() > 1); - // looks in all the keys for a match - let mut found = None; - for (i, fkv) in content.iter().enumerate() { - if k == fkv.get_key() { - found = Some(i); - break; - } - } - match found { - None => Err(UpdateError::KeyNotFound), - Some(pos) => { - // content == 1 is handled by SmallVec::One - match f(content[pos].get_value()) - .map_err(UpdateError::ValueCallbackError)? - { - None => { - // trigger deletion - if content.len() == 2 { - let to_keep = if pos == 0 { - SharedRef::clone(&content[1]) - } else { - SharedRef::clone(&content[0]) - }; - Ok(Some(LeafContent { - hashed: self.hashed, - content: SmallVec::One(to_keep), - })) - } else { - let newv = clone_array_and_remove_at_pos(content, pos); - Ok(Some(LeafContent { - hashed: self.hashed, - content: SmallVec::Many(newv), - })) - } - } - Some(newv) => { - // update vector at position - let newkv = KV::new(k.clone(), newv); - let new_array = - clone_array_and_set_at_pos(content, SharedRef::new(newkv), pos); - let newcontent = LeafContent { - hashed: self.hashed, - content: SmallVec::Many(new_array), - }; - Ok(Some(newcontent)) - } - } - } - } - } - } - } -} - -impl LeafContent { - pub fn replace(&self, k: &K, v: V) -> Result<(Self, V), ReplaceError> { - match self.content { - SmallVec::One(ref fkv) => { - if k != fkv.get_key() { - return Err(ReplaceError::KeyNotFound); - }; - let lc = LeafContent { - hashed: self.hashed, - content: SmallVec::One(SharedRef::new(fkv.replace(v))), - }; - Ok((lc, fkv.get_value().clone())) - } - SmallVec::Many(ref content) => { - let mut found = None; - for (i, fkv) in content.iter().enumerate() { - if k == fkv.get_key() { - found = Some(i); - break; - } - } - match found { - None => Err(ReplaceError::KeyNotFound), - Some(pos) => { - let oldv = content[pos].get_value().clone(); - let newkv = KV::new(k.clone(), v); - let new_array = - clone_array_and_set_at_pos(content, SharedRef::new(newkv), pos); - let newcontent = LeafContent { - hashed: self.hashed, - content: SmallVec::Many(new_array), - }; - Ok((newcontent, oldv)) - } - } - } - } - } -} - -impl LeafContent { - pub fn remove(&self, h: &HashedKey, k: &K) -> Result, RemoveError> { - if self.hashed != *h { - return Err(RemoveError::KeyNotFound); - } - match self.content { - SmallVec::One(ref fkv) => { - if k != fkv.get_key() { - return Err(RemoveError::KeyNotFound); - } - return Ok(None); - } - SmallVec::Many(ref content) => { - assert!(content.len() > 1); - // looks in all the keys for a match - let mut found = None; - for (i, fkv) in content.iter().enumerate() { - if k == fkv.get_key() { - found = Some(i); - break; - } - } - match found { - None => Err(RemoveError::KeyNotFound), - Some(pos) => { - if content.len() == 1 { - Ok(None) - } else if content.len() == 2 { - let to_keep = if pos == 0 { - SharedRef::clone(&content[1]) - } else { - SharedRef::clone(&content[0]) - }; - Ok(Some(LeafContent { - hashed: self.hashed, - content: SmallVec::One(to_keep), - })) - } else { - let newv = clone_array_and_remove_at_pos(content, pos); - Ok(Some(LeafContent { - hashed: self.hashed, - content: SmallVec::Many(newv), - })) - } - } - } - } - } - } -} - -impl LeafContent { - pub fn remove_match(&self, h: &HashedKey, k: &K, v: &V) -> Result, RemoveError> { - if self.hashed != *h { - return Err(RemoveError::KeyNotFound); - } - - match self.content { - SmallVec::One(ref fkv) => { - if k != fkv.get_key() { - return Err(RemoveError::KeyNotFound); - } - if v != fkv.get_value() { - return Err(RemoveError::ValueNotMatching); - } - return Ok(None); - } - SmallVec::Many(ref content) => { - assert!(content.len() > 1); - // looks in all the keys for a match - let mut found = None; - for (i, fkv) in content.iter().enumerate() { - if k == fkv.get_key() { - found = Some(i); - break; - } - } - match found { - None => Err(RemoveError::KeyNotFound), - Some(pos) => { - if content[pos].get_value() != v { - return Err(RemoveError::ValueNotMatching); - } - - if content.len() == 1 { - Ok(None) - } else if content.len() == 2 { - let to_keep = if pos == 0 { - SharedRef::clone(&content[1]) - } else { - SharedRef::clone(&content[0]) - }; - Ok(Some(LeafContent { - hashed: self.hashed, - content: SmallVec::One(to_keep), - })) - } else { - let newv = clone_array_and_remove_at_pos(content, pos); - Ok(Some(LeafContent { - hashed: self.hashed, - content: SmallVec::Many(newv), - })) - } - } - } - } - } - } -} diff --git a/imhamt/src/hamt.rs b/imhamt/src/hamt.rs index e050f76cb..d174a0200 100644 --- a/imhamt/src/hamt.rs +++ b/imhamt/src/hamt.rs @@ -1,4 +1,3 @@ -use super::content::LeafIterator; use super::hash::{Hash, HashedKey, Hasher}; use super::node::{ insert_rec, lookup_one, remove_eq_rec, remove_rec, replace_rec, size_rec, update_rec, Entry, @@ -8,6 +7,7 @@ pub use super::operation::{InsertError, RemoveError, ReplaceError, UpdateError}; use std::iter::FromIterator; use std::marker::PhantomData; use std::mem::swap; +use std::slice; #[derive(Clone)] pub struct Hamt { @@ -17,7 +17,7 @@ pub struct Hamt { pub struct HamtIter<'a, K, V> { stack: Vec>, - content: Option>, + content: Option>, } impl Hamt { @@ -37,7 +37,7 @@ impl Hamt { } } -impl Hamt { +impl Hamt { pub fn insert(&self, k: K, v: V) -> Result { let h = HashedKey::compute(self.hasher, &k); let newroot = insert_rec(&self.root, h, 0, k, v)?; @@ -48,10 +48,10 @@ impl Hamt { } } -impl Hamt { +impl Hamt { pub fn remove_match(&self, k: &K, v: &V) -> Result { let h = HashedKey::compute(self.hasher, &k); - let newroot = remove_eq_rec(&self.root, &h, 0, k, v)?; + let newroot = remove_eq_rec(&self.root, h, 0, k, v)?; match newroot { None => Ok(Self::new()), Some(r) => Ok(Hamt { @@ -62,10 +62,10 @@ impl Hamt { } } -impl Hamt { +impl Hamt { pub fn remove(&self, k: &K) -> Result { let h = HashedKey::compute(self.hasher, &k); - let newroot = remove_rec(&self.root, &h, 0, k)?; + let newroot = remove_rec(&self.root, h, 0, k)?; match newroot { None => Ok(Self::new()), Some(r) => Ok(Hamt { @@ -81,7 +81,7 @@ impl Hamt { /// and the old value. pub fn replace(&self, k: &K, v: V) -> Result<(Self, V), ReplaceError> { let h = HashedKey::compute(self.hasher, &k); - let (newroot, oldv) = replace_rec(&self.root, &h, 0, k, v)?; + let (newroot, oldv) = replace_rec(&self.root, h, 0, k, v)?; Ok(( Hamt { root: newroot, @@ -92,7 +92,7 @@ impl Hamt { } } -impl Hamt { +impl Hamt { /// Update the element at the key K. /// /// If the closure F in parameter returns None, then the key is deleted. @@ -103,7 +103,7 @@ impl Hamt { F: FnOnce(&V) -> Result, U>, { let h = HashedKey::compute(self.hasher, &k); - let newroot = update_rec(&self.root, &h, 0, k, f)?; + let newroot = update_rec(&self.root, h, 0, k, f)?; match newroot { None => Ok(Self::new()), Some(r) => Ok(Hamt { @@ -120,6 +120,7 @@ impl Hamt { pub fn insert_or_update(&self, k: K, v: V, f: F) -> Result> where F: FnOnce(&V) -> Result, U>, + V: Clone, { match self.update(&k, f) { Ok(new_self) => Ok(new_self), @@ -141,6 +142,7 @@ impl Hamt { pub fn insert_or_update_simple(&self, k: K, v: V, f: F) -> Self where F: for<'a> FnOnce(&'a V) -> Option, + V: Clone, { match self.update(&k, |x| Ok(f(x))) { Ok(new_self) => new_self, @@ -194,7 +196,7 @@ impl<'a, K, V> Iterator for HamtIter<'a, K, V> { None => self.content = None, Some(ref o) => { self.content = Some(iter); - return Some((o.get_key(), o.get_value())); + return Some((&o.0, &o.1)); } }, None => match self.stack.last_mut() { @@ -205,7 +207,8 @@ impl<'a, K, V> Iterator for HamtIter<'a, K, V> { } Some(o) => match o.as_ref() { &Entry::SubNode(ref sub) => self.stack.push(sub.iter()), - &Entry::Leaf(ref leaf) => self.content = Some(leaf.iter()), + &Entry::Leaf(_, ref k, ref v) => return Some((&k,&v)), + &Entry::LeafMany(_, ref col) => self.content = Some(col.iter()), }, }, }, @@ -214,7 +217,7 @@ impl<'a, K, V> Iterator for HamtIter<'a, K, V> { } } -impl FromIterator<(K, V)> for Hamt { +impl FromIterator<(K, V)> for Hamt { fn from_iter>(iter: I) -> Self { let mut h = Hamt::new(); for (k, v) in iter { diff --git a/imhamt/src/helper.rs b/imhamt/src/helper.rs index 76117602c..3dc30923b 100644 --- a/imhamt/src/helper.rs +++ b/imhamt/src/helper.rs @@ -1,5 +1,5 @@ #[inline] -pub fn clone_array_and_insert_at_pos(v: &Box<[A]>, a: A, pos: usize) -> Box<[A]> { +pub fn clone_array_and_insert_at_pos(v: &[A], a: A, pos: usize) -> Box<[A]> { // copy all elements but insert a new elements at position pos let mut new_array: Vec = Vec::with_capacity(v.len() + 1); new_array.extend_from_slice(&v[0..pos]); @@ -9,7 +9,7 @@ pub fn clone_array_and_insert_at_pos(v: &Box<[A]>, a: A, pos: usize) - } #[inline] -pub fn clone_array_and_set_at_pos(v: &Box<[A]>, a: A, pos: usize) -> Box<[A]> { +pub fn clone_array_and_set_at_pos(v: &[A], a: A, pos: usize) -> Box<[A]> { // copy all elements except at pos where a replaces it. let mut new_array: Vec = Vec::with_capacity(v.len()); if pos > 0 { @@ -23,13 +23,13 @@ pub fn clone_array_and_set_at_pos(v: &Box<[A]>, a: A, pos: usize) -> B } #[inline] -pub fn clone_array_and_remove_at_pos(v: &Box<[A]>, pos: usize) -> Box<[A]> { - let mut v = v.clone().into_vec(); +pub fn clone_array_and_remove_at_pos(v: &[A], pos: usize) -> Box<[A]> { + let mut v : Vec<_> = v.to_vec(); v.remove(pos); v.into() } -pub fn clone_array_and_extend(v: &Box<[A]>, end: A) -> Box<[A]> { +pub fn clone_array_and_extend(v: &[A], end: A) -> Box<[A]> { let mut new_array: Vec = Vec::with_capacity(v.len() + 1); new_array.extend_from_slice(&v[..]); new_array.push(end); diff --git a/imhamt/src/lib.rs b/imhamt/src/lib.rs index b4ba77c7d..3f1c84ba5 100644 --- a/imhamt/src/lib.rs +++ b/imhamt/src/lib.rs @@ -10,7 +10,6 @@ extern crate quickcheck_macros; extern crate test; mod bitmap; -mod content; mod hamt; mod hash; mod helper; @@ -59,7 +58,7 @@ mod tests { 3 => PlanOperation::Update(Arbitrary::arbitrary(g)), 4 => PlanOperation::UpdateRemoval(Arbitrary::arbitrary(g)), 5 => PlanOperation::Replace(Arbitrary::arbitrary(g), Arbitrary::arbitrary(g)), - _ => unimplemented!(), + _ => panic!("test internal error: quickcheck tag code is invalid"), }; v.push(op) } @@ -142,7 +141,7 @@ mod tests { } fn next_u32(x: &u32) -> Result, ()> { - Ok(Some(x + 1)) + Ok(Some(*x + 1)) } #[test] @@ -189,8 +188,8 @@ mod tests { .remove_match(&k1, &v1) .expect("cannot remove from already inserted"); - assert_eq!(h.size(), 16 + 3); - assert_eq!(h2.size(), 16 + 2); + assert_eq!(h.size(), keys.len() + 3); + assert_eq!(h2.size(), keys.len() + 2); assert_eq!(h.lookup(&k1), Some(&v1)); assert_eq!(h2.lookup(&k1), None); @@ -216,12 +215,15 @@ mod tests { Err(UpdateError::KeyNotFound) ); - assert_eq!(h.size(), 16 + 1); - assert_eq!(h2.size(), 16 + 2); + assert_eq!(h.size(), keys.len() + 1); + assert_eq!(h2.size(), keys.len() + 2); } - use hash::HashedKey; - use std::marker::PhantomData; + //use hash::HashedKey; + //use std::marker::PhantomData; + + /* commented -- as this doesn't do what it says on the tin. + it doesn't test for h collision, but node splitting #[test] fn collision() { @@ -231,8 +233,9 @@ mod tests { let mut found = None; for i in 0..10000 { let x = format!("key{}", i); - let h2 = HashedKey::compute(PhantomData::, &"keyx".to_string()); - if h2.level_index(0) == l { + let h2 = HashedKey::compute(PhantomData::, &x); + if h1 == h2 { + //if h2.level_index(0) == l { found = Some(x.clone()); break; } @@ -242,7 +245,9 @@ mod tests { None => assert!(false), Some(x) => { let mut h: Hamt = Hamt::new(); + println!("k0: {}", k0); h = h.insert(k0.clone(), 1u32).unwrap(); + println!("x: {}", x); h = h.insert(x.clone(), 2u32).unwrap(); assert_eq!(h.size(), 2); assert_eq!(h.lookup(&k0), Some(&1u32)); @@ -258,6 +263,7 @@ mod tests { } } } + */ fn property_btreemap_eq( reference: &BTreeMap, @@ -293,6 +299,40 @@ mod tests { property_btreemap_eq(&reference, &h) } + #[derive(Clone, Debug, PartialEq, Eq)] + pub struct LargeVec(Vec); + + const LARGE_MIN : usize = 1000; + const LARGE_DIFF : usize = 1000; + + impl Arbitrary for LargeVec { + fn arbitrary(g: &mut G) -> Self { + let nb = LARGE_MIN + (usize::arbitrary(g) % LARGE_DIFF); + let mut v = Vec::with_capacity(nb); + for _ in 0..nb { + v.push(Arbitrary::arbitrary(g)) + } + LargeVec(v) + } + } + + #[quickcheck] + fn large_insert_equivalent(xs: LargeVec<(String, u32)>) -> bool { + let xs = xs.0; + let mut reference = BTreeMap::new(); + let mut h: Hamt = Hamt::new(); + for (k, v) in xs.iter() { + if reference.get(k).is_some() { + continue; + } + reference.insert(k.clone(), v.clone()); + h = h.insert(k.clone(), *v).unwrap(); + } + + property_btreemap_eq(&reference, &h) + } + + fn get_key_nth(b: &BTreeMap, n: usize) -> Option { let keys_nb = b.len(); if keys_nb == 0 { @@ -302,10 +342,13 @@ mod tests { Some(keys.nth(n % keys_nb).unwrap().clone()) } - #[quickcheck] - fn plan_equivalent(xs: Plan) -> bool { + fn arbitrary_hamt_and_btree(xs: Plan, update_f: F) -> (Hamt, BTreeMap) + where K: Hash+Clone+Eq+Ord+Sync, + V: Clone+PartialEq+Sync, + F: Fn(&V) -> Result, ()> + Copy, + { let mut reference = BTreeMap::new(); - let mut h: Hamt = Hamt::new(); + let mut h: Hamt = Hamt::new(); //println!("plan {} operations", xs.0.len()); for op in xs.0.iter() { match op { @@ -314,7 +357,7 @@ mod tests { continue; } reference.insert(k.clone(), v.clone()); - h = h.insert(k.clone(), *v).unwrap(); + h = h.insert(k.clone(), v.clone()).unwrap(); } PlanOperation::DeleteOne(r) => match get_key_nth(&reference, *r) { None => continue, @@ -337,16 +380,21 @@ mod tests { let v = reference.get_mut(&k).unwrap(); *v = newv.clone(); - h = h.replace(&k, *newv).unwrap().0; + h = h.replace(&k, newv.clone()).unwrap().0; } }, PlanOperation::Update(r) => match get_key_nth(&reference, *r) { None => continue, Some(k) => { let v = reference.get_mut(&k).unwrap(); - *v = *v + 1; - - h = h.update(&k, next_u32).unwrap(); + match update_f(v).unwrap() { + None => { + reference.remove(&k); + } + Some(newv) => *v = newv, + } + + h = h.update(&k, update_f).unwrap(); } }, PlanOperation::UpdateRemoval(r) => match get_key_nth(&reference, *r) { @@ -358,8 +406,22 @@ mod tests { }, } } + (h, reference) + } + + #[quickcheck] + fn plan_equivalent(xs: Plan) -> bool { + let (h, reference) = arbitrary_hamt_and_btree(xs, next_u32); property_btreemap_eq(&reference, &h) } + + #[quickcheck] + fn iter_equivalent(xs: Plan) -> bool { + use std::iter::FromIterator; + let (h, reference) = arbitrary_hamt_and_btree(xs, next_u32); + let after_iter = BTreeMap::from_iter(h.iter().map(|(k, v)| (k.clone(), v.clone()))); + reference == after_iter + } } #[cfg(test)] diff --git a/imhamt/src/node/reference.rs b/imhamt/src/node/reference.rs index f254a1f6a..c4882b6a8 100644 --- a/imhamt/src/node/reference.rs +++ b/imhamt/src/node/reference.rs @@ -1,5 +1,4 @@ use super::super::bitmap::{ArrayIndex, SmallBitmap}; -use super::super::content::{LeafContent, KV}; use super::super::hash::{HashedKey, LevelIndex}; use super::super::helper; use super::super::operation::*; @@ -19,8 +18,98 @@ pub struct Node { pub type NodeIter<'a, K, V> = slice::Iter<'a, SharedRef>>; +pub struct Collision(Box>); + +impl Collision { + pub fn from_vec(vec: Vec<(K,V)>) -> Self { + assert!(vec.len() >= 2); + Collision(Box::new(vec.into())) + } + pub fn from_box(b: Box<[(K,V)]>) -> Self { + assert!(b.len() >= 2); + Collision(Box::new(b)) + } + pub fn len(&self) -> usize { + self.0.len() + } + pub fn iter<'a>(&'a self) -> slice::Iter<'a, (K, V)> { + self.0.iter() + } +} + +impl Collision { + pub fn insert(&self, k: K, v: V) -> Result { + if self.0.iter().find(|(lk, _)| lk == &k).is_some() { + Err(InsertError::EntryExists) + } else { + Ok(Collision::from_box(helper::clone_array_and_extend(&self.0, (k, v)))) + } + } + + fn get_record_and_pos(&self, k: &K) -> Option<(usize, &(K, V))> { + self.0 + .iter() + .enumerate() + .find(|(_, (fk, _))| fk == k) + } + + pub fn remove(&self, h: HashedKey, k: &K) -> Result, RemoveError> { + let (pos, _) = self.get_record_and_pos(k).ok_or(RemoveError::KeyNotFound)?; + if self.0.len() == 2 { + let to_keep = if pos == 0 { &self.0[1] } else { &self.0[0] }; + Ok(Entry::Leaf(h, to_keep.0.clone(), to_keep.1.clone())) + } else { + let col = Collision::from_box(helper::clone_array_and_remove_at_pos(&self.0, pos)); + Ok(Entry::LeafMany(h, col)) + } + } + + pub fn remove_match(&self, h: HashedKey, k: &K, v: &V) -> Result, RemoveError> + where V: PartialEq, + { + let (pos, _) = self.get_record_and_pos(k).ok_or(RemoveError::KeyNotFound)?; + if &self.0[pos].1 != v { + Err(RemoveError::ValueNotMatching) + } else if self.0.len() == 2 { + let to_keep = if pos == 0 { &self.0[1] } else { &self.0[0] }; + Ok(Entry::Leaf(h, to_keep.0.clone(), to_keep.1.clone())) + } else { + let col = Collision::from_box(helper::clone_array_and_remove_at_pos(&self.0, pos)); + Ok(Entry::LeafMany(h, col)) + } + } + + pub fn update(&self, h: HashedKey, k: &K, f: F) -> Result, UpdateError> + where F: FnOnce(&V) -> Result, U>, + { + let (pos, (_, v)) = self.get_record_and_pos(k).ok_or(UpdateError::KeyNotFound)?; + match f(v).map_err(|u| UpdateError::ValueCallbackError(u))? { + None => { + if self.0.len() == 2 { + let to_keep = if pos == 0 { &self.0[1] } else { &self.0[0] }; + Ok(Entry::Leaf(h, to_keep.0.clone(), to_keep.1.clone())) + } else { + let col = Collision::from_box(helper::clone_array_and_remove_at_pos(&self.0, pos)); + Ok(Entry::LeafMany(h, col)) + } + } + Some(newv) => { + let newcol = Collision::from_box(helper::clone_array_and_set_at_pos(&self.0, (k.clone(), newv), pos)); + Ok(Entry::LeafMany(h, newcol)) + } + } + } + + pub fn replace(&self, k: &K, v: V) -> Result<(Self, V), ReplaceError> { + let (pos, (_, oldv)) = self.get_record_and_pos(k).ok_or(ReplaceError::KeyNotFound)?; + let newcol = Collision::from_box(helper::clone_array_and_set_at_pos(&self.0, (k.clone(), v), pos)); + Ok((newcol, oldv.clone())) + } +} + pub enum Entry { - Leaf(LeafContent), + Leaf(HashedKey, K, V), + LeafMany(HashedKey, Collision), SubNode(Node), } @@ -119,7 +208,7 @@ impl Node { // this is guaranteed by the trie design not to recurse forever, because at some // point the hashedkey value being shifted by level_index will match to 0, // creating Leaf and Collision node instead of Subnode. -pub fn insert_rec( +pub fn insert_rec( node: &Node, h: HashedKey, lvl: usize, @@ -129,30 +218,32 @@ pub fn insert_rec( let level_hash = h.level_index(lvl); let idx = node.bitmap.get_index_sparse(level_hash); if idx.is_not_found() { - let kv = SharedRef::new(KV::new(k, v)); - let content = LeafContent::single(h, kv); - let e = SharedRef::new(Entry::Leaf(content)); + let e = SharedRef::new(Entry::Leaf(h, k, v)); Ok(node.set_at(level_hash, e)) } else { match &(node.get_child(idx)).as_ref() { - &Entry::Leaf(ref content) => { + &Entry::Leaf(lh, lk, lv) => { // in case of same hash, then we append to the collision type // otherwise we create a new subnode - if content.hashed == h { - let kv = SharedRef::new(KV::new(k, v)); - let newent = Entry::Leaf(content.add(kv)?); - let e = SharedRef::new(newent); + if *lh == h { + if lk == &k { + return Err(InsertError::EntryExists) + } + let dat = vec![ + (lk.clone(), lv.clone()), + (k, v), + ]; + let e = SharedRef::new(Entry::LeafMany(*lh, Collision::from_vec(dat))); Ok(node.replace_at(idx, e)) } else { - let leaf_idx = content.hashed.level_index(lvl + 1); + let leaf_idx = lh.level_index(lvl + 1); let entry_next_idx = h.level_index(lvl + 1); let subnode = Node::singleton(leaf_idx, SharedRef::clone(node.get_child(idx))); if entry_next_idx != leaf_idx { - let kv = SharedRef::new(KV::new(k, v)); let subnode = subnode.set_at( entry_next_idx, - SharedRef::new(Entry::Leaf(LeafContent::single(h, kv))), + SharedRef::new(Entry::Leaf(h, k, v)), ); Ok(node.replace_at(idx, SharedRef::new(Entry::SubNode(subnode)))) } else { @@ -162,12 +253,16 @@ pub fn insert_rec( } } } + &Entry::LeafMany(lh, col) => { + assert_eq!(*lh, h); + let col = col.insert(k, v)?; + Ok(node.replace_at(idx, SharedRef::new(Entry::LeafMany(*lh, col)))) + } &Entry::SubNode(sub) => { if lvl > 13 { // this is to appease the compiler for now, but globally an impossible // state. - assert!(false); - unimplemented!() + unreachable!() } else { let r = insert_rec(sub, h, lvl + 1, k, v)?; let e = SharedRef::new(Entry::SubNode(r)); @@ -196,19 +291,32 @@ pub fn lookup_one<'a, K: PartialEq, V>( LookupRet::NotFound } else { match &(node.get_child(idx)).as_ref() { - &Entry::Leaf(content) => match content.find(*h, k) { - None => LookupRet::NotFound, - Some(v) => LookupRet::Found(v), - }, + &Entry::Leaf(lh, lk, lv) => { + if lh == h && lk == k { + LookupRet::Found(lv) + } else { + LookupRet::NotFound + } + } + &Entry::LeafMany(lh, col) => { + if lh != h { + LookupRet::NotFound + } else { + match col.0.iter().find(|(lk, _)| lk == k) { + None => LookupRet::NotFound, + Some(lkv) => LookupRet::Found(&lkv.1), + } + } + } &Entry::SubNode(sub) => LookupRet::ContinueIn(sub), } } } // recursively try to remove a key with an expected equality value v -pub fn remove_eq_rec( +pub fn remove_eq_rec( node: &Node, - h: &HashedKey, + h: HashedKey, lvl: usize, k: &K, v: &V, @@ -219,10 +327,21 @@ pub fn remove_eq_rec( return Err(RemoveError::KeyNotFound); } else { match &(node.get_child(idx)).as_ref() { - &Entry::Leaf(content) => { - let new_content = content.remove_match(h, k, v)?; - let new_ent = new_content.and_then(|x| Some(SharedRef::new(Entry::Leaf(x)))); - Ok(node.clear_or_replace_at(level_hash, new_ent)) + &Entry::Leaf(lh, lk, lv) => { + if *lh == h && lk == k { + if lv == v { + Ok(node.clear_at(level_hash)) + } else { + Err(RemoveError::ValueNotMatching) + } + } else { + Err(RemoveError::KeyNotFound) + } + } + &Entry::LeafMany(lh, col) => { + assert_eq!(*lh, h); + let replacement = col.remove_match(h, k, v)?; + Ok(Some(node.replace_at(idx, SharedRef::new(replacement)))) } &Entry::SubNode(sub) => match remove_eq_rec(sub, h, lvl + 1, k, v)? { None => Ok(node.clear_at(level_hash)), @@ -236,9 +355,9 @@ pub fn remove_eq_rec( } // recursively try to remove a key -pub fn remove_rec( +pub fn remove_rec( node: &Node, - h: &HashedKey, + h: HashedKey, lvl: usize, k: &K, ) -> Result>, RemoveError> { @@ -248,10 +367,17 @@ pub fn remove_rec( return Err(RemoveError::KeyNotFound); } else { match &(node.get_child(idx)).as_ref() { - &Entry::Leaf(content) => { - let new_content = content.remove(h, k)?; - let new_ent = new_content.and_then(|x| Some(SharedRef::new(Entry::Leaf(x)))); - Ok(node.clear_or_replace_at(level_hash, new_ent)) + &Entry::Leaf(lh, lk, _) => { + if *lh == h && lk == k { + Ok(node.clear_at(level_hash)) + } else { + Err(RemoveError::KeyNotFound) + } + } + &Entry::LeafMany(lh, col) => { + assert_eq!(*lh, h); + let replacement = col.remove(h, k)?; + Ok(Some(node.replace_at(idx, SharedRef::new(replacement)))) } &Entry::SubNode(sub) => match remove_rec(sub, h, lvl + 1, k)? { None => Ok(node.clear_at(level_hash)), @@ -267,9 +393,9 @@ pub fn remove_rec( // recursively try to update a key. // // note, an update cannot create a new value, it can only delete or update an existing value. -pub fn update_rec( +pub fn update_rec( node: &Node, - h: &HashedKey, + h: HashedKey, lvl: usize, k: &K, f: F, @@ -280,13 +406,23 @@ where let level_hash = h.level_index(lvl); let idx = node.bitmap.get_index_sparse(level_hash); if idx.is_not_found() { - return Err(UpdateError::KeyNotFound); + Err(UpdateError::KeyNotFound) } else { match &(node.get_child(idx)).as_ref() { - &Entry::Leaf(content) => { - let new_content = content.update(h, k, f)?; - let new_ent = new_content.and_then(|x| Some(SharedRef::new(Entry::Leaf(x)))); - Ok(node.clear_or_replace_at(level_hash, new_ent)) + &Entry::Leaf(lh, lk, lv) => { + if *lh == h && lk == k { + let newv = f(lv).map_err(UpdateError::ValueCallbackError)?; + Ok(node.clear_or_replace_at(level_hash, newv.map(|x| + SharedRef::new(Entry::Leaf(*lh, lk.clone(), x)) + ))) + } else { + Err(UpdateError::KeyNotFound) + } + } + &Entry::LeafMany(lh, col) => { + assert_eq!(*lh, h); + let replacement = col.update(h, k, f)?; + Ok(Some(node.replace_at(idx, SharedRef::new(replacement)))) } &Entry::SubNode(sub) => match update_rec(sub, h, lvl + 1, k, f)? { None => Ok(node.clear_at(level_hash)), @@ -300,11 +436,9 @@ where } // recursively try to replace a key's value. -// -// note, an update cannot create a new value, it can only delete or update an existing value. pub fn replace_rec( node: &Node, - h: &HashedKey, + h: HashedKey, lvl: usize, k: &K, v: V, @@ -312,13 +446,21 @@ pub fn replace_rec( let level_hash = h.level_index(lvl); let idx = node.bitmap.get_index_sparse(level_hash); if idx.is_not_found() { - return Err(ReplaceError::KeyNotFound); + Err(ReplaceError::KeyNotFound) } else { match &(node.get_child(idx)).as_ref() { - &Entry::Leaf(content) => { - let (new_content, oldv) = content.replace(k, v)?; - let new_ent = SharedRef::new(Entry::Leaf(new_content)); - Ok((node.replace_at(idx, new_ent), oldv)) + &Entry::Leaf(lh, lk, lv) => { + if *lh == h && lk == k { + let new_ent = SharedRef::new(Entry::Leaf(*lh, lk.clone(), v)); + Ok((node.replace_at(idx, new_ent), lv.clone())) + } else { + Err(ReplaceError::KeyNotFound) + } + } + &Entry::LeafMany(lh, col) => { + assert_eq!(*lh, h); + let (replacement, old_value) = col.replace(k, v)?; + Ok((node.replace_at(idx, SharedRef::new(Entry::LeafMany(*lh, replacement))), old_value)) } &Entry::SubNode(sub) => { let (newsub, oldv) = replace_rec(sub, h, lvl + 1, k, v)?; @@ -333,14 +475,15 @@ pub fn size_rec(node: &Node) -> usize { let mut sum = 0; for c in node.children.iter() { match &c.as_ref() { - &Entry::Leaf(ref content) => sum += content.len(), + &Entry::Leaf(_, _, _) => sum += 1, + &Entry::LeafMany(_, col) => sum += col.len(), &Entry::SubNode(sub) => sum += size_rec(&sub), } } sum } -//// debug +// debug module pub mod debug { use super::*; use std::cmp; @@ -349,7 +492,8 @@ pub mod debug { let mut max_depth = 0; for c in node.children.iter() { match &c.as_ref() { - &Entry::Leaf(_) => {} + &Entry::Leaf(_,_,_) => {} + &Entry::LeafMany(_,_) => {} &Entry::SubNode(sub) => { let child_depth = depth_rec(&sub); max_depth = cmp::max(max_depth, child_depth)