From 815d8a7c22e6e4b08d3661519842960d57637b63 Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Mon, 26 Feb 2024 11:51:58 +0000 Subject: [PATCH 01/32] . --- src/btreeset.rs | 101 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 src/btreeset.rs diff --git a/src/btreeset.rs b/src/btreeset.rs new file mode 100644 index 00000000..c47e1df1 --- /dev/null +++ b/src/btreeset.rs @@ -0,0 +1,101 @@ +//! This module implements a key store based on a B-Tree +//! in stable memory. +//! +//! # V1 layout +//! +//! ```text +//! ---------------------------------------- <- Address 0 +//! Magic "BTS" ↕ 3 bytes +//! ---------------------------------------- +//! Layout version ↕ 1 byte +//! ---------------------------------------- +//! Max key size ↕ 4 bytes OR Page size ↕ 4 bytes +//! ---------------------------------------- ---------------------------------------- +//! Root node address ↕ 8 bytes +//! ---------------------------------------- +//! Length (number of elements) ↕ 8 bytes +//! ---------------------------------------- <- Address 24 (PACKED_HEADER_SIZE) +//! Reserved space ↕ 24 bytes +//! ---------------------------------------- <- Address 48 (ALLOCATOR_OFFSET) +//! Allocator +//! ---------------------------------------- +//! ... free memory for nodes +//! ---------------------------------------- +//! ``` + +#[cfg(test)] +mod proptests; + +const MAGIC: &[u8; 3] = b"BTS"; +const LAYOUT_VERSION: u8 = 1; +// The sum of all the header fields, i.e. size of a packed header. +const PACKED_HEADER_SIZE: usize = 24; +// The offset where the allocator begins. +const ALLOCATOR_OFFSET: usize = 48; + +// The default page size to use in BTreeMap V2 in bytes. +const DEFAULT_PAGE_SIZE: u32 = 1024; + +// A marker to indicate that the `PageSize` stored in the header is a `PageSize::Value`. +const PAGE_SIZE_VALUE_MARKER: u32 = u32::MAX; + +/// A "stable" set based on a B-tree. +/// +/// The implementation is based on the algorithm outlined in "Introduction to Algorithms" +/// by Cormen et al. +pub struct BTreeSet +where + K: Storable + Ord + Clone, + M: Memory, +{ + // The address of the root node. If a root node doesn't exist, the address + // is set to NULL. + root_addr: Address, + + version: Version, + + // An allocator used for managing memory and allocating nodes. + allocator: Allocator, + + // The number of elements in the map. + length: u64, + + // A marker to communicate to the Rust compiler that we own these types. + _phantom: PhantomData<(K)>, +} + +/// The packed header size must be <= ALLOCATOR_OFFSET. +struct BTreeHeader { + version: Version, + root_addr: Address, + length: u64, + // Reserved bytes for future extensions +} + +impl BTreeSet +where + K: Storable + Ord + Clone, + M: Memory, +{ + /// Initializes a `BTreeSet`. + /// + /// If the memory provided already contains a `BTreeSet`, then that + /// map is loaded. Otherwise, a new `BTreeSet` instance is created. + pub fn init(memory: M) -> Self { + if memory.size() == 0 { + // Memory is empty. Create a new map. + return BTreeSet::new(memory); + } + + // Check if the magic in the memory corresponds to a BTreeSet. + let mut dst = vec![0; 3]; + memory.read(0, &mut dst); + if dst != MAGIC { + // No BTreeSet found. Create a new instance. + BTreeSet::new(memory) + } else { + // The memory already contains a BTreeSet. Load it. + BTreeSet::load(memory) + } + } +} From 86ddec121d66dff83cc297148a7f0adbe01ee14d Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Tue, 19 Mar 2024 11:48:27 +0000 Subject: [PATCH 02/32] . --- src/btreeset.rs | 240 ++++++++++++++++++++++++++++++++++-------------- src/lib.rs | 1 + 2 files changed, 171 insertions(+), 70 deletions(-) diff --git a/src/btreeset.rs b/src/btreeset.rs index c47e1df1..b0bf86c3 100644 --- a/src/btreeset.rs +++ b/src/btreeset.rs @@ -1,43 +1,39 @@ -//! This module implements a key store based on a B-Tree -//! in stable memory. -//! -//! # V1 layout -//! -//! ```text -//! ---------------------------------------- <- Address 0 -//! Magic "BTS" ↕ 3 bytes -//! ---------------------------------------- -//! Layout version ↕ 1 byte -//! ---------------------------------------- -//! Max key size ↕ 4 bytes OR Page size ↕ 4 bytes -//! ---------------------------------------- ---------------------------------------- -//! Root node address ↕ 8 bytes -//! ---------------------------------------- -//! Length (number of elements) ↕ 8 bytes -//! ---------------------------------------- <- Address 24 (PACKED_HEADER_SIZE) -//! Reserved space ↕ 24 bytes -//! ---------------------------------------- <- Address 48 (ALLOCATOR_OFFSET) -//! Allocator -//! ---------------------------------------- -//! ... free memory for nodes -//! ---------------------------------------- -//! ``` +use crate::{btreemap::Iter, BTreeMap, Memory, Storable}; +use core::ops::RangeBounds; -#[cfg(test)] -mod proptests; +/// An iterator over the entries of a [`BTreeMap`]. +#[must_use = "iterators are lazy and do nothing unless consumed"] +pub struct IterSet<'a, K, M> +where + K: Storable + Ord + Clone, + M: Memory, +{ + iter_internal: Iter<'a, K, (), M>, +} -const MAGIC: &[u8; 3] = b"BTS"; -const LAYOUT_VERSION: u8 = 1; -// The sum of all the header fields, i.e. size of a packed header. -const PACKED_HEADER_SIZE: usize = 24; -// The offset where the allocator begins. -const ALLOCATOR_OFFSET: usize = 48; +impl<'a, K, M> IterSet<'a, K, M> +where + K: Storable + Ord + Clone, + M: Memory, +{ + fn new(iter: Iter<'a, K, (), M>) -> Self { + IterSet { + iter_internal: iter, + } + } +} -// The default page size to use in BTreeMap V2 in bytes. -const DEFAULT_PAGE_SIZE: u32 = 1024; +impl Iterator for IterSet<'_, K, M> +where + K: Storable + Ord + Clone, + M: Memory, +{ + type Item = K; -// A marker to indicate that the `PageSize` stored in the header is a `PageSize::Value`. -const PAGE_SIZE_VALUE_MARKER: u32 = u32::MAX; + fn next(&mut self) -> Option { + self.iter_internal.next().map(|(a, _)| a) + } +} /// A "stable" set based on a B-tree. /// @@ -48,28 +44,7 @@ where K: Storable + Ord + Clone, M: Memory, { - // The address of the root node. If a root node doesn't exist, the address - // is set to NULL. - root_addr: Address, - - version: Version, - - // An allocator used for managing memory and allocating nodes. - allocator: Allocator, - - // The number of elements in the map. - length: u64, - - // A marker to communicate to the Rust compiler that we own these types. - _phantom: PhantomData<(K)>, -} - -/// The packed header size must be <= ALLOCATOR_OFFSET. -struct BTreeHeader { - version: Version, - root_addr: Address, - length: u64, - // Reserved bytes for future extensions + map: BTreeMap, } impl BTreeSet @@ -82,20 +57,145 @@ where /// If the memory provided already contains a `BTreeSet`, then that /// map is loaded. Otherwise, a new `BTreeSet` instance is created. pub fn init(memory: M) -> Self { - if memory.size() == 0 { - // Memory is empty. Create a new map. - return BTreeSet::new(memory); + BTreeSet { + map: BTreeMap::::init(memory), + } + } + + /// Creates a new instance a `BTreeSet`. + pub fn new(memory: M) -> Self { + BTreeSet { + map: BTreeMap::::new(memory), } + } - // Check if the magic in the memory corresponds to a BTreeSet. - let mut dst = vec![0; 3]; - memory.read(0, &mut dst); - if dst != MAGIC { - // No BTreeSet found. Create a new instance. - BTreeSet::new(memory) - } else { - // The memory already contains a BTreeSet. Load it. - BTreeSet::load(memory) + /// Loads the set from memory. + pub fn load(memory: M) -> Self { + BTreeSet { + map: BTreeMap::::load(memory), } } + + /// Inserts a key into the set. + pub fn insert(&mut self, key: K) -> bool { + self.map.insert(key, ()) != None + } + + /// Returns `true` if the key exists in the map, `false` otherwise. + pub fn contains_key(&self, key: &K) -> bool { + self.map.get(key).is_some() + } + + /// Returns `true` if the map contains no elements. + pub fn is_empty(&self) -> bool { + self.map.is_empty() + } + + /// Returns the number of elements in the map. + pub fn len(&self) -> u64 { + self.map.len() + } + + /// Returns the underlying memory. + pub fn into_memory(self) -> M { + self.map.into_memory() + } + + /// Removes all elements from the set. + pub fn clear(&mut self) { + self.map.clear_new(); + } + + /// Returns the first key in the map. This key + /// is the minimum key in the map. + pub fn first_key(&self) -> Option { + self.map.first_key_value().map(|(a, _)| a) + } + + /// Returns the last key in the set. This key + /// is the maximum key in the set. + pub fn last_key(&self) -> Option { + self.map.last_key_value().map(|(a, _)| a) + } + + /// Removes a key from the map, returning true if it exists. + pub fn remove(&mut self, key: &K) -> bool { + self.map.remove(key) != None + } + + /// Removes and returns the last element in the set. The key of this element is the maximum key that was in the set. + pub fn pop_last(&mut self) -> Option { + self.map.pop_last().map(|(a, _)| a) + } + + /// Removes and returns the first element in the set. The key of this element is the minimum key that was in the set. + pub fn pop_first(&mut self) -> Option { + self.map.pop_first().map(|(a, _)| a) + } + + /// Returns an iterator over the entries of the set, sorted by key. + pub fn iter(&self) -> IterSet { + IterSet::new(self.map.iter()) + } + + /// Returns an iterator over the entries in the set where keys + /// belong to the specified range. + pub fn range(&self, key_range: impl RangeBounds) -> IterSet { + IterSet::new(self.map.range(key_range)) + } + + /// Returns an iterator pointing to the first element below the given bound. + /// Returns an empty iterator if there are no keys below the given bound. + pub fn iter_upper_bound(&self, bound: &K) -> IterSet { + IterSet::new(self.map.iter_upper_bound(bound)) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::{ + storable::{Blob, Bound as StorableBound}, + VectorMemory, + }; + use std::cell::RefCell; + use std::rc::Rc; + + pub(crate) fn make_memory() -> Rc>> { + Rc::new(RefCell::new(Vec::new())) + } + + // A helper method to succinctly create an entry. + fn e(x: u8) -> (Blob<10>, Vec) { + (b(&[x]), vec![]) + } + + pub(crate) fn b(x: &[u8]) -> Blob<10> { + Blob::<10>::try_from(x).unwrap() + } + + // A test runner that runs the test using both V1 and V2 btrees. + pub fn btree_test(f: F) + where + K: Storable + Ord + Clone, + F: Fn(BTreeSet) -> R, + { + let mem = make_memory(); + let btree = BTreeSet::new(mem); + f(btree); + } + + #[test] + fn init_preserves_data_set() { + btree_test(|mut btree| { + assert!(btree.insert(b(&[1, 2, 3]))); + assert!(btree.contains_key(&b(&[1, 2, 3]))); + + // Reload the btree + let btree = BTreeSet::init(btree.into_memory()); + + // Data still exists. + assert!(btree.contains_key(&b(&[1, 2, 3]))); + }); + } } diff --git a/src/lib.rs b/src/lib.rs index efe8d1b1..2f8f8f95 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,6 +8,7 @@ pub mod file_mem; mod ic0_memory; // Memory API for canisters. pub mod log; pub use log::{Log as StableLog, Log}; +pub mod btreeset; pub mod memory_manager; pub mod min_heap; pub mod reader; From 948393f5d56b5b865d7312994ccbf7d599083055 Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Tue, 19 Mar 2024 13:10:04 +0000 Subject: [PATCH 03/32] fix test --- src/btreeset.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/btreeset.rs b/src/btreeset.rs index b0bf86c3..308c3517 100644 --- a/src/btreeset.rs +++ b/src/btreeset.rs @@ -76,9 +76,10 @@ where } } - /// Inserts a key into the set. + /// Inserts a key into the set. Returns true if key + /// did not exist in the set before. pub fn insert(&mut self, key: K) -> bool { - self.map.insert(key, ()) != None + self.map.insert(key, ()) == None } /// Returns `true` if the key exists in the map, `false` otherwise. From 3484ff6e6e983bfed102f487010e11736a4e1bbb Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Tue, 19 Mar 2024 13:11:11 +0000 Subject: [PATCH 04/32] clippy --- src/btreeset.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/btreeset.rs b/src/btreeset.rs index 308c3517..510ab05a 100644 --- a/src/btreeset.rs +++ b/src/btreeset.rs @@ -79,7 +79,7 @@ where /// Inserts a key into the set. Returns true if key /// did not exist in the set before. pub fn insert(&mut self, key: K) -> bool { - self.map.insert(key, ()) == None + self.map.insert(key, ()).is_none() } /// Returns `true` if the key exists in the map, `false` otherwise. @@ -121,7 +121,7 @@ where /// Removes a key from the map, returning true if it exists. pub fn remove(&mut self, key: &K) -> bool { - self.map.remove(key) != None + self.map.remove(key).is_some() } /// Removes and returns the last element in the set. The key of this element is the maximum key that was in the set. From 2e1f509a2c9e260a50010df1440ef742563db5a2 Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Tue, 19 Mar 2024 13:54:24 +0000 Subject: [PATCH 05/32] refactor --- src/btreeset.rs | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/src/btreeset.rs b/src/btreeset.rs index 510ab05a..e0b3a6c0 100644 --- a/src/btreeset.rs +++ b/src/btreeset.rs @@ -1,29 +1,28 @@ -use crate::{btreemap::Iter, BTreeMap, Memory, Storable}; +use crate::{btreemap::Iter as IterMap, BTreeMap, Memory, Storable}; use core::ops::RangeBounds; -/// An iterator over the entries of a [`BTreeMap`]. -#[must_use = "iterators are lazy and do nothing unless consumed"] -pub struct IterSet<'a, K, M> +/// An iterator over the entries of a [`BTreeSet`]. +pub struct Iter<'a, K, M> where K: Storable + Ord + Clone, M: Memory, { - iter_internal: Iter<'a, K, (), M>, + iter_internal: IterMap<'a, K, (), M>, } -impl<'a, K, M> IterSet<'a, K, M> +impl<'a, K, M> Iter<'a, K, M> where K: Storable + Ord + Clone, M: Memory, { - fn new(iter: Iter<'a, K, (), M>) -> Self { - IterSet { + fn new(iter: IterMap<'a, K, (), M>) -> Self { + Iter { iter_internal: iter, } } } -impl Iterator for IterSet<'_, K, M> +impl Iterator for Iter<'_, K, M> where K: Storable + Ord + Clone, M: Memory, @@ -135,20 +134,20 @@ where } /// Returns an iterator over the entries of the set, sorted by key. - pub fn iter(&self) -> IterSet { - IterSet::new(self.map.iter()) + pub fn iter(&self) -> Iter { + Iter::new(self.map.iter()) } /// Returns an iterator over the entries in the set where keys /// belong to the specified range. - pub fn range(&self, key_range: impl RangeBounds) -> IterSet { - IterSet::new(self.map.range(key_range)) + pub fn range(&self, key_range: impl RangeBounds) -> Iter { + Iter::new(self.map.range(key_range)) } /// Returns an iterator pointing to the first element below the given bound. /// Returns an empty iterator if there are no keys below the given bound. - pub fn iter_upper_bound(&self, bound: &K) -> IterSet { - IterSet::new(self.map.iter_upper_bound(bound)) + pub fn iter_upper_bound(&self, bound: &K) -> Iter { + Iter::new(self.map.iter_upper_bound(bound)) } } From e245d9b149c6e8a8cc0566b04c514cde1fba88bb Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Fri, 25 Apr 2025 09:06:38 +0000 Subject: [PATCH 06/32] . --- benchmarks/src/btreeset.rs | 52 +++++++++++ src/btreeset.rs | 176 ++++++++++++++++++++++++++++++++++--- src/btreeset/proptests.rs | 174 ++++++++++++++++++++++++++++++++++++ 3 files changed, 390 insertions(+), 12 deletions(-) create mode 100644 benchmarks/src/btreeset.rs create mode 100644 src/btreeset/proptests.rs diff --git a/benchmarks/src/btreeset.rs b/benchmarks/src/btreeset.rs new file mode 100644 index 00000000..4111d7c0 --- /dev/null +++ b/benchmarks/src/btreeset.rs @@ -0,0 +1,52 @@ +use crate::Random; +use canbench_rs::{bench, bench_fn, BenchResult}; +use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; +use tiny_rng::{Rand, Rng}; + +#[bench(raw)] +pub fn btreeset_insert_10k_elements() -> BenchResult { + let mut btreeset = BTreeSet::new(DefaultMemoryImpl::default()); + + bench_fn(|| { + for i in 0..10_000 { + btreeset.insert(i); + } + }) +} + +#[bench(raw)] +pub fn btreeset_remove_10k_elements() -> BenchResult { + let mut btreeset = BTreeSet::new(DefaultMemoryImpl::default()); + + for i in 0..10_000 { + btreeset.insert(i); + } + + bench_fn(|| { + for i in 0..10_000 { + btreeset.remove(&i); + } + }) +} + +#[bench(raw)] +pub fn btreeset_iterate_10k_elements() -> BenchResult { + let mut btreeset = BTreeSet::new(DefaultMemoryImpl::default()); + + for i in 0..10_000 { + btreeset.insert(i); + } + + bench_fn(|| for _ in btreeset.iter() {}) +} + +#[bench(raw)] +pub fn btreeset_range_query() -> BenchResult { + let mut btreeset = BTreeSet::new(DefaultMemoryImpl::default()); + + for i in 0..10_000 { + btreeset.insert(i); + } + + bench_fn(|| for _ in btreeset.range(2000..8000) {}) +} diff --git a/src/btreeset.rs b/src/btreeset.rs index e0b3a6c0..3b2729a2 100644 --- a/src/btreeset.rs +++ b/src/btreeset.rs @@ -1,6 +1,9 @@ use crate::{btreemap::Iter as IterMap, BTreeMap, Memory, Storable}; use core::ops::RangeBounds; +#[cfg(test)] +mod proptests; + /// An iterator over the entries of a [`BTreeSet`]. pub struct Iter<'a, K, M> where @@ -154,10 +157,8 @@ where #[cfg(test)] mod test { use super::*; - use crate::{ - storable::{Blob, Bound as StorableBound}, - VectorMemory, - }; + use crate::storable::Blob; + use crate::VectorMemory; use std::cell::RefCell; use std::rc::Rc; @@ -165,17 +166,12 @@ mod test { Rc::new(RefCell::new(Vec::new())) } - // A helper method to succinctly create an entry. - fn e(x: u8) -> (Blob<10>, Vec) { - (b(&[x]), vec![]) - } - pub(crate) fn b(x: &[u8]) -> Blob<10> { Blob::<10>::try_from(x).unwrap() } - // A test runner that runs the test using both V1 and V2 btrees. - pub fn btree_test(f: F) + /// A test runner that runs the test using `BTreeSet`. + pub fn run_btree_test(f: F) where K: Storable + Ord + Clone, F: Fn(BTreeSet) -> R, @@ -187,7 +183,7 @@ mod test { #[test] fn init_preserves_data_set() { - btree_test(|mut btree| { + run_btree_test(|mut btree| { assert!(btree.insert(b(&[1, 2, 3]))); assert!(btree.contains_key(&b(&[1, 2, 3]))); @@ -199,3 +195,159 @@ mod test { }); } } + +#[cfg(test)] +mod tests { + use super::*; + use std::cell::RefCell; + use std::rc::Rc; + + /// Creates a new shared memory instance. + pub(crate) fn make_memory() -> Rc>> { + Rc::new(RefCell::new(Vec::new())) + } + + #[test] + fn test_insert_and_contains() { + let mem = make_memory(); + let mut btreeset = BTreeSet::new(mem); + + assert!(!btreeset.contains_key(&1u32)); + btreeset.insert(1u32); + assert!(btreeset.contains_key(&1u32)); + } + + #[test] + fn test_remove() { + let mem = make_memory(); + let mut btreeset = BTreeSet::new(mem); + + btreeset.insert(1u32); + assert!(btreeset.contains_key(&1u32)); + btreeset.remove(&1u32); + assert!(!btreeset.contains_key(&1u32)); + } + + #[test] + fn test_iter_upper_bound() { + let mem = make_memory(); + let mut btreeset = BTreeSet::new(mem); + + for i in 0u32..100 { + btreeset.insert(i); + for j in 0u32..=i { + assert_eq!( + btreeset.iter_upper_bound(&(j + 1)).next(), + Some(j), + "failed to get an upper bound for {}", + j + 1 + ); + } + assert_eq!( + btreeset.iter_upper_bound(&0).next(), + None, + "0 must not have an upper bound" + ); + } + } + + #[test] + fn test_iter() { + let mem = make_memory(); + let mut btreeset = BTreeSet::new(mem); + + btreeset.insert(1u32); + btreeset.insert(2u32); + btreeset.insert(3u32); + + let elements: Vec<_> = btreeset.iter().collect(); + assert_eq!(elements, vec![1u32, 2u32, 3u32]); + } + + #[test] + fn test_range() { + let mem = make_memory(); + let mut btreeset = BTreeSet::new(mem); + + for i in 1u32..=10 { + btreeset.insert(i); + } + + let range: Vec<_> = btreeset.range(4u32..8u32).collect(); + assert_eq!(range, vec![4u32, 5u32, 6u32, 7u32]); + } + + #[test] + fn test_first_and_last() { + let mem = make_memory(); + let mut btreeset = BTreeSet::new(mem); + + btreeset.insert(3u32); + btreeset.insert(1u32); + btreeset.insert(2u32); + + assert_eq!(btreeset.first_key(), Some(1u32)); + assert_eq!(btreeset.last_key(), Some(3u32)); + } + + #[test] + fn test_len_and_is_empty() { + let mem = make_memory(); + let mut btreeset = BTreeSet::new(mem); + + assert!(btreeset.is_empty()); + assert_eq!(btreeset.len(), 0); + + btreeset.insert(1u32); + assert!(!btreeset.is_empty()); + assert_eq!(btreeset.len(), 1); + } + + #[test] + fn test_pop_first_and_last() { + let mem = make_memory(); + let mut btreeset = BTreeSet::new(mem); + + btreeset.insert(3u32); + btreeset.insert(1u32); + btreeset.insert(2u32); + + assert_eq!(btreeset.pop_first(), Some(1u32)); + assert_eq!(btreeset.pop_last(), Some(3u32)); + assert_eq!(btreeset.len(), 1); + } + + #[test] + fn test_clear() { + let mem = make_memory(); + let mut btreeset = BTreeSet::new(mem); + + btreeset.insert(1u32); + btreeset.insert(2u32); + btreeset.clear(); + + assert!(btreeset.is_empty()); + assert_eq!(btreeset.len(), 0); + } + + #[test] + fn test_range_various_prefixes() { + let mem = make_memory(); + let mut btreeset = BTreeSet::new(mem); + + for i in [ + 1u32, 2u32, 3u32, 4u32, 11u32, 12u32, 13u32, 14u32, 21u32, 22u32, 23u32, 24u32, + ] { + btreeset.insert(i); + } + + let range: Vec<_> = btreeset.range(10u32..20u32).collect(); + assert_eq!(range, vec![11u32, 12u32, 13u32, 14u32]); + + let range: Vec<_> = btreeset.range(0u32..10u32).collect(); + assert_eq!(range, vec![1u32, 2u32, 3u32, 4u32]); + + let range: Vec<_> = btreeset.range(20u32..30u32).collect(); + assert_eq!(range, vec![21u32, 22u32, 23u32, 24u32]); + } +} diff --git a/src/btreeset/proptests.rs b/src/btreeset/proptests.rs new file mode 100644 index 00000000..bb8f855e --- /dev/null +++ b/src/btreeset/proptests.rs @@ -0,0 +1,174 @@ +use crate::{btreeset::BTreeSet, Memory}; +use proptest::collection::vec as pvec; +use proptest::prelude::*; +use std::collections::BTreeSet as StdBTreeSet; +use test_strategy::proptest; + +#[derive(Debug, Clone)] +enum Operation { + Insert(Vec), + Remove(Vec), + Contains(Vec), + Iter { from: usize, len: usize }, + Range { from: usize, len: usize }, + PopFirst, + PopLast, +} + +// A custom strategy that gives unequal weights to the different operations. +fn operation_strategy() -> impl Strategy { + prop_oneof![ + 50 => any::>().prop_map(Operation::Insert), + 20 => any::>().prop_map(Operation::Remove), + 20 => any::>().prop_map(Operation::Contains), + 5 => (any::(), any::()) + .prop_map(|(from, len)| Operation::Iter { from, len }), + 5 => (any::(), any::()) + .prop_map(|(from, len)| Operation::Range { from, len }), + 2 => Just(Operation::PopFirst), + 2 => Just(Operation::PopLast), + ] +} + +// Runs a comprehensive test for the major stable BTreeSet operations. +// Results are validated against a standard BTreeSet. +#[proptest(cases = 10)] +fn comprehensive(#[strategy(pvec(operation_strategy(), 100..5_000))] ops: Vec) { + let mem = crate::btreeset::test::make_memory(); + let mut btreeset = BTreeSet::new(mem); + let mut std_btreeset = StdBTreeSet::new(); + + // Execute all the operations, validating that the stable btreeset behaves similarly to a std + // btreeset. + for op in ops.into_iter() { + execute_operation(&mut std_btreeset, &mut btreeset, op); + } +} + +#[proptest] +fn set_min_max(#[strategy(pvec(any::(), 10..100))] keys: Vec) { + crate::btreeset::test::run_btree_test(|mut set| { + prop_assert_eq!(set.first_key(), None); + prop_assert_eq!(set.last_key(), None); + + for (n, key) in keys.iter().enumerate() { + set.insert(*key); + + let min = keys[0..=n].iter().min().unwrap(); + let max = keys[0..=n].iter().max().unwrap(); + + prop_assert_eq!(set.first_key(), Some(*min)); + prop_assert_eq!(set.last_key(), Some(*max)); + } + + Ok(()) + }); +} + +#[proptest] +fn set_upper_bound_iter(#[strategy(pvec(0u64..u64::MAX - 1, 10..100))] keys: Vec) { + crate::btreeset::test::run_btree_test(|mut set| { + for k in keys.iter() { + set.insert(*k); + + // Fixed: Removed `.copied()` as `Option` is not an iterator. + prop_assert_eq!(Some(*k), set.iter_upper_bound(&(k + 1)).next()); + } + + Ok(()) + }); +} + +// Given an operation, executes it on the given stable btreeset and standard btreeset, verifying +// that the result of the operation is equal in both btrees. +fn execute_operation( + std_btreeset: &mut StdBTreeSet>, + btreeset: &mut BTreeSet, M>, + op: Operation, +) { + match op { + Operation::Insert(key) => { + let std_res = std_btreeset.insert(key.clone()); + + eprintln!("Insert({})", hex::encode(&key)); + let res = btreeset.insert(key); + assert_eq!(std_res, res); + } + Operation::Remove(key) => { + let std_res = std_btreeset.remove(&key); + + eprintln!("Remove({})", hex::encode(&key)); + let res = btreeset.remove(&key); + assert_eq!(std_res, res); + } + Operation::Contains(key) => { + let std_res = std_btreeset.contains(&key); + + eprintln!("Contains({})", hex::encode(&key)); + let res = btreeset.contains_key(&key); + assert_eq!(std_res, res); + } + Operation::Iter { from, len } => { + assert_eq!(std_btreeset.len(), btreeset.len() as usize); + if std_btreeset.is_empty() { + return; + } + + let from = from % std_btreeset.len(); + let len = len % std_btreeset.len(); + + eprintln!("Iterate({}, {})", from, len); + let std_iter = std_btreeset.iter().skip(from).take(len); + let mut stable_iter = btreeset.iter().skip(from).take(len); + for k1 in std_iter { + let k2 = stable_iter.next().unwrap(); + assert_eq!(k1, &k2); + } + assert!(stable_iter.next().is_none()); + } + Operation::Range { from, len } => { + assert_eq!(std_btreeset.len(), btreeset.len() as usize); + if std_btreeset.is_empty() { + return; + } + + eprintln!("Range({}, {})", from, len); + let from = from % std_btreeset.len(); + let end = std::cmp::min(std_btreeset.len() - 1, from + len); + + // Create a range for the stable btreeset from the keys at indexes `from` and `end`. + let range_start = btreeset.iter().skip(from).take(1).next().unwrap().clone(); + let range_end = btreeset.iter().skip(end).take(1).next().unwrap().clone(); + let stable_range = btreeset.range(range_start..range_end); + + // Create a range for the std btreeset from the keys at indexes `from` and `end`. + let range_start = std_btreeset + .iter() + .skip(from) + .take(1) + .next() + .unwrap() + .clone(); + let range_end = std_btreeset + .iter() + .skip(end) + .take(1) + .next() + .unwrap() + .clone(); + let std_range = std_btreeset.range(range_start..range_end); + + for (k1, k2) in std_range.zip(stable_range) { + assert_eq!(k1, &k2); + } + } + Operation::PopFirst => { + eprintln!("PopFirst"); + assert_eq!(std_btreeset.pop_first(), btreeset.pop_first()); + } + Operation::PopLast => { + eprintln!("PopLast"); + assert_eq!(std_btreeset.pop_last(), btreeset.pop_last()); + } + }; +} From 6ff4a332e7c6782ef129d79b46472538403b7064 Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Fri, 25 Apr 2025 10:07:44 +0000 Subject: [PATCH 07/32] . --- benchmarks/src/btreeset.rs | 77 ++++++++++++++++++++++++++++---------- benchmarks/src/main.rs | 1 + canbench_results.yml | 50 ++++++++++++++++++++++++- 3 files changed, 107 insertions(+), 21 deletions(-) diff --git a/benchmarks/src/btreeset.rs b/benchmarks/src/btreeset.rs index 4111d7c0..4f5d3b48 100644 --- a/benchmarks/src/btreeset.rs +++ b/benchmarks/src/btreeset.rs @@ -1,52 +1,89 @@ -use crate::Random; use canbench_rs::{bench, bench_fn, BenchResult}; -use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; -use tiny_rng::{Rand, Rng}; +use ic_stable_structures::storable::Blob; +use ic_stable_structures::{btreeset::BTreeSet, DefaultMemoryImpl, Storable}; -#[bench(raw)] -pub fn btreeset_insert_10k_elements() -> BenchResult { +// Define type alias for Blob<8>. +type Blob8 = Blob<8>; + +/// Helper macro to generate benchmarks. +macro_rules! bench_tests { + ($( $fn_name:ident, $helper:ident, $k:expr );+ $(;)?) => { + $( + #[bench(raw)] + pub fn $fn_name() -> BenchResult { + $helper::<$k>() + } + )+ + }; +} +// Profiles inserting a large number of keys into a BTreeSet. +fn insert_helper() -> BenchResult { let mut btreeset = BTreeSet::new(DefaultMemoryImpl::default()); + let num_keys = 10_000; bench_fn(|| { - for i in 0..10_000 { - btreeset.insert(i); + for i in 0..num_keys { + let key = generate_key::(i); + btreeset.insert(key); } }) } -#[bench(raw)] -pub fn btreeset_remove_10k_elements() -> BenchResult { +// Profiles removing a large number of keys from a BTreeSet. +fn remove_helper() -> BenchResult { let mut btreeset = BTreeSet::new(DefaultMemoryImpl::default()); + let num_keys = 10_000; - for i in 0..10_000 { - btreeset.insert(i); + for i in 0..num_keys { + btreeset.insert(generate_key::(i)); } bench_fn(|| { - for i in 0..10_000 { - btreeset.remove(&i); + for i in 0..num_keys { + let key = generate_key::(i); + btreeset.remove(&key); } }) } -#[bench(raw)] -pub fn btreeset_iterate_10k_elements() -> BenchResult { +// Profiles iterating over a BTreeSet. +fn iter_helper() -> BenchResult { let mut btreeset = BTreeSet::new(DefaultMemoryImpl::default()); for i in 0..10_000 { - btreeset.insert(i); + btreeset.insert(generate_key::(i)); } bench_fn(|| for _ in btreeset.iter() {}) } -#[bench(raw)] -pub fn btreeset_range_query() -> BenchResult { +// Profiles range queries on a BTreeSet. +fn range_helper() -> BenchResult { let mut btreeset = BTreeSet::new(DefaultMemoryImpl::default()); for i in 0..10_000 { - btreeset.insert(i); + btreeset.insert(generate_key::(i)); } - bench_fn(|| for _ in btreeset.range(2000..8000) {}) + let start = generate_key::(2000); + let end = generate_key::(8000); + + bench_fn(|| for _ in btreeset.range(start..end) {}) +} + +// Generates keys directly based on the type `K`. +fn generate_key(i: u32) -> K { + K::from_bytes(std::borrow::Cow::Owned(i.to_be_bytes().to_vec())) +} + +// Define benchmarks for various BTreeSet operations with different types. +bench_tests! { + btreeset_insert_u32, insert_helper, u32; + btreeset_insert_blob_8, insert_helper, Blob8; + btreeset_remove_u32, remove_helper, u32; + btreeset_remove_blob_8, remove_helper, Blob8; + btreeset_iter_u32, iter_helper, u32; + btreeset_iter_blob_8, iter_helper, Blob8; + btreeset_range_u32, range_helper, u32; + btreeset_range_blob_8, range_helper, Blob8; } diff --git a/benchmarks/src/main.rs b/benchmarks/src/main.rs index 723976b5..e3f7adba 100644 --- a/benchmarks/src/main.rs +++ b/benchmarks/src/main.rs @@ -2,6 +2,7 @@ use ic_stable_structures::storable::{Blob, Storable}; use tiny_rng::{Rand, Rng}; mod btreemap; +mod btreeset; mod memory_manager; mod vec; diff --git a/canbench_results.yml b/canbench_results.yml index fd1265e1..2342a543 100644 --- a/canbench_results.yml +++ b/canbench_results.yml @@ -157,7 +157,7 @@ benches: scopes: {} btreemap_insert_10mib_values: total: - instructions: 5239421355 + instructions: 5239412179 heap_increase: 0 stable_memory_increase: 3613 scopes: {} @@ -629,6 +629,54 @@ benches: heap_increase: 0 stable_memory_increase: 0 scopes: {} + btreeset_insert_blob_8: + total: + instructions: 493151302 + heap_increase: 0 + stable_memory_increase: 6 + scopes: {} + btreeset_insert_u32: + total: + instructions: 437528599 + heap_increase: 0 + stable_memory_increase: 5 + scopes: {} + btreeset_iter_blob_8: + total: + instructions: 17485721 + heap_increase: 0 + stable_memory_increase: 0 + scopes: {} + btreeset_iter_u32: + total: + instructions: 14354367 + heap_increase: 0 + stable_memory_increase: 0 + scopes: {} + btreeset_range_blob_8: + total: + instructions: 11185498 + heap_increase: 0 + stable_memory_increase: 0 + scopes: {} + btreeset_range_u32: + total: + instructions: 8666520 + heap_increase: 0 + stable_memory_increase: 0 + scopes: {} + btreeset_remove_blob_8: + total: + instructions: 481746325 + heap_increase: 0 + stable_memory_increase: 0 + scopes: {} + btreeset_remove_u32: + total: + instructions: 427326663 + heap_increase: 0 + stable_memory_increase: 0 + scopes: {} memory_manager_baseline: total: instructions: 1176576907 From dde82679fa72db9101a5d558fb5c7579eb923d05 Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Fri, 25 Apr 2025 13:25:18 +0000 Subject: [PATCH 08/32] . --- src/btreeset/proptests.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/btreeset/proptests.rs b/src/btreeset/proptests.rs index bb8f855e..d1084b57 100644 --- a/src/btreeset/proptests.rs +++ b/src/btreeset/proptests.rs @@ -71,7 +71,6 @@ fn set_upper_bound_iter(#[strategy(pvec(0u64..u64::MAX - 1, 10..100))] keys: Vec for k in keys.iter() { set.insert(*k); - // Fixed: Removed `.copied()` as `Option` is not an iterator. prop_assert_eq!(Some(*k), set.iter_upper_bound(&(k + 1)).next()); } From 9615d6cf0a095b0a43723e88820ea8a0c921b989 Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Fri, 25 Apr 2025 13:29:50 +0000 Subject: [PATCH 09/32] . --- src/btreeset.rs | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/src/btreeset.rs b/src/btreeset.rs index 3b2729a2..acd5dff2 100644 --- a/src/btreeset.rs +++ b/src/btreeset.rs @@ -162,6 +162,7 @@ mod test { use std::cell::RefCell; use std::rc::Rc; + /// Creates a new shared memory instance. pub(crate) fn make_memory() -> Rc>> { Rc::new(RefCell::new(Vec::new())) } @@ -194,18 +195,6 @@ mod test { assert!(btree.contains_key(&b(&[1, 2, 3]))); }); } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::cell::RefCell; - use std::rc::Rc; - - /// Creates a new shared memory instance. - pub(crate) fn make_memory() -> Rc>> { - Rc::new(RefCell::new(Vec::new())) - } #[test] fn test_insert_and_contains() { From e3d31cb8b5d4e21d18603e5930730b02ce1f70d0 Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Fri, 25 Apr 2025 13:35:29 +0000 Subject: [PATCH 10/32] . --- src/btreeset.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/btreeset.rs b/src/btreeset.rs index acd5dff2..25d662ce 100644 --- a/src/btreeset.rs +++ b/src/btreeset.rs @@ -304,6 +304,8 @@ mod test { assert_eq!(btreeset.pop_first(), Some(1u32)); assert_eq!(btreeset.pop_last(), Some(3u32)); assert_eq!(btreeset.len(), 1); + assert_eq!(btreeset.first_key(), Some(2u32)); + assert_eq!(btreeset.last_key(), Some(2u32)); } #[test] From 9386c457fada6c0603653afa23e9f1041fa52d47 Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Fri, 25 Apr 2025 13:36:37 +0000 Subject: [PATCH 11/32] . --- src/btreeset/proptests.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/btreeset/proptests.rs b/src/btreeset/proptests.rs index d1084b57..b4fe0088 100644 --- a/src/btreeset/proptests.rs +++ b/src/btreeset/proptests.rs @@ -16,6 +16,8 @@ enum Operation { } // A custom strategy that gives unequal weights to the different operations. +// Note that `Insert` has a higher weight than `Remove` so that, on average, BTreeSets +// are growing in size the more operations are executed. fn operation_strategy() -> impl Strategy { prop_oneof![ 50 => any::>().prop_map(Operation::Insert), From 53fa85e30760589367c339510697a2919bc7b2f7 Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Fri, 25 Apr 2025 14:25:22 +0000 Subject: [PATCH 12/32] . --- README.md | 20 ++++++++++++++++++-- src/btreeset.rs | 30 ++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index db1bbba3..93a2eb06 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ For more information about the philosophy behind the library, see [Roman's tutor ## Available Data Structures - [BTreeMap]: A Key-Value store +- [BTreeSet]: A set of unique elements - [Vec]: A growable array - [Log]: An append-only list of variable-size entries - [Cell]: A serializable value @@ -38,7 +39,9 @@ Stable structures are able to work directly in stable memory because each data s its own memory. When initializing a stable structure, a memory is provided that the data structure can use to store its data. -Here's a basic example: +Here's a basic examples: + +### Example: BTreeMap ```rust use ic_stable_structures::{BTreeMap, DefaultMemoryImpl}; @@ -54,12 +57,25 @@ This includes stable memory, a vector ([VectorMemory]), or even a flat file ([Fi The example above initializes a [BTreeMap] with a [DefaultMemoryImpl], which maps to stable memory when used in a canister and to a [VectorMemory] otherwise. +### Example: BTreeSet + +The `BTreeSet` is a stable set implementation based on a B-Tree. It allows efficient insertion, deletion, and lookup of unique elements. + +```rust +use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; +let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + +set.insert(42); +assert!(set.contains(&42)); +assert_eq!(set.pop_first(), Some(42)); +assert!(set.is_empty()); +``` + Note that **stable structures cannot share memories.** Each memory must belong to only one stable structure. For example, this fails when run in a canister: - ```no_run use ic_stable_structures::{BTreeMap, DefaultMemoryImpl}; let mut map_1: BTreeMap = BTreeMap::init(DefaultMemoryImpl::default()); diff --git a/src/btreeset.rs b/src/btreeset.rs index 25d662ce..fcb74681 100644 --- a/src/btreeset.rs +++ b/src/btreeset.rs @@ -1,3 +1,33 @@ +//! This module implements a set based on a B-Tree +//! in stable memory. +//! +//! # Overview +//! +//! A `BTreeSet` is a "stable" set based on a B-tree. It is designed to work directly in stable memory, +//! allowing it to persist across upgrades and scale to large sizes. +//! +//! The implementation is based on the algorithm outlined in "Introduction to Algorithms" +//! by Cormen et al. +//! +//! ## Features +//! +//! - **Efficient Operations**: Provides efficient insertion, deletion, and lookup operations. +//! - **Persistence**: Works directly in stable memory, persisting across upgrades. +//! - **Scalability**: Can scale to gigabytes in size. +//! +//! ## Example +//! +//! ```rust +//! use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; +//! +//! let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); +//! +//! set.insert(42); +//! assert!(set.contains(&42)); +//! assert_eq!(set.pop_first(), Some(42)); +//! assert!(set.is_empty()); +//! ``` + use crate::{btreemap::Iter as IterMap, BTreeMap, Memory, Storable}; use core::ops::RangeBounds; From a16992384c95d1dee599ab385c1fd118bdb2f9e1 Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Fri, 25 Apr 2025 14:31:25 +0000 Subject: [PATCH 13/32] . --- src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lib.rs b/src/lib.rs index d6933284..e528dd4b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,6 +22,7 @@ pub use vec::{Vec as StableVec, Vec}; pub mod vec_mem; pub mod writer; pub use btreemap::{BTreeMap, BTreeMap as StableBTreeMap}; +pub use btreeset::{BTreeSet, BTreeSet as StableBTreeSet}; pub use file_mem::FileMemory; #[cfg(target_arch = "wasm32")] pub use ic0_memory::Ic0StableMemory; From 198e1e80ed79379fc9bc65b42675cec2f0b5f2e7 Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Fri, 25 Apr 2025 14:35:49 +0000 Subject: [PATCH 14/32] . --- src/btreeset.rs | 38 +++++++++++++++++++------------------- src/btreeset/proptests.rs | 10 +++++----- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/src/btreeset.rs b/src/btreeset.rs index fcb74681..b46fdde7 100644 --- a/src/btreeset.rs +++ b/src/btreeset.rs @@ -114,17 +114,17 @@ where self.map.insert(key, ()).is_none() } - /// Returns `true` if the key exists in the map, `false` otherwise. - pub fn contains_key(&self, key: &K) -> bool { + /// Returns `true` if the key exists in the set, `false` otherwise. + pub fn contains(&self, key: &K) -> bool { self.map.get(key).is_some() } - /// Returns `true` if the map contains no elements. + /// Returns `true` if the set contains no elements. pub fn is_empty(&self) -> bool { self.map.is_empty() } - /// Returns the number of elements in the map. + /// Returns the number of elements in the set. pub fn len(&self) -> u64 { self.map.len() } @@ -139,19 +139,19 @@ where self.map.clear_new(); } - /// Returns the first key in the map. This key - /// is the minimum key in the map. - pub fn first_key(&self) -> Option { + /// Returns the first key in the set. This key + /// is the minimum key in the set. + pub fn first(&self) -> Option { self.map.first_key_value().map(|(a, _)| a) } /// Returns the last key in the set. This key /// is the maximum key in the set. - pub fn last_key(&self) -> Option { + pub fn last(&self) -> Option { self.map.last_key_value().map(|(a, _)| a) } - /// Removes a key from the map, returning true if it exists. + /// Removes a key from the set, returning true if it exists. pub fn remove(&mut self, key: &K) -> bool { self.map.remove(key).is_some() } @@ -216,13 +216,13 @@ mod test { fn init_preserves_data_set() { run_btree_test(|mut btree| { assert!(btree.insert(b(&[1, 2, 3]))); - assert!(btree.contains_key(&b(&[1, 2, 3]))); + assert!(btree.contains(&b(&[1, 2, 3]))); // Reload the btree let btree = BTreeSet::init(btree.into_memory()); // Data still exists. - assert!(btree.contains_key(&b(&[1, 2, 3]))); + assert!(btree.contains(&b(&[1, 2, 3]))); }); } @@ -231,9 +231,9 @@ mod test { let mem = make_memory(); let mut btreeset = BTreeSet::new(mem); - assert!(!btreeset.contains_key(&1u32)); + assert!(!btreeset.contains(&1u32)); btreeset.insert(1u32); - assert!(btreeset.contains_key(&1u32)); + assert!(btreeset.contains(&1u32)); } #[test] @@ -242,9 +242,9 @@ mod test { let mut btreeset = BTreeSet::new(mem); btreeset.insert(1u32); - assert!(btreeset.contains_key(&1u32)); + assert!(btreeset.contains(&1u32)); btreeset.remove(&1u32); - assert!(!btreeset.contains_key(&1u32)); + assert!(!btreeset.contains(&1u32)); } #[test] @@ -305,8 +305,8 @@ mod test { btreeset.insert(1u32); btreeset.insert(2u32); - assert_eq!(btreeset.first_key(), Some(1u32)); - assert_eq!(btreeset.last_key(), Some(3u32)); + assert_eq!(btreeset.first(), Some(1u32)); + assert_eq!(btreeset.last(), Some(3u32)); } #[test] @@ -334,8 +334,8 @@ mod test { assert_eq!(btreeset.pop_first(), Some(1u32)); assert_eq!(btreeset.pop_last(), Some(3u32)); assert_eq!(btreeset.len(), 1); - assert_eq!(btreeset.first_key(), Some(2u32)); - assert_eq!(btreeset.last_key(), Some(2u32)); + assert_eq!(btreeset.first(), Some(2u32)); + assert_eq!(btreeset.last(), Some(2u32)); } #[test] diff --git a/src/btreeset/proptests.rs b/src/btreeset/proptests.rs index b4fe0088..5a8a9f1a 100644 --- a/src/btreeset/proptests.rs +++ b/src/btreeset/proptests.rs @@ -50,8 +50,8 @@ fn comprehensive(#[strategy(pvec(operation_strategy(), 100..5_000))] ops: Vec(), 10..100))] keys: Vec) { crate::btreeset::test::run_btree_test(|mut set| { - prop_assert_eq!(set.first_key(), None); - prop_assert_eq!(set.last_key(), None); + prop_assert_eq!(set.first(), None); + prop_assert_eq!(set.last(), None); for (n, key) in keys.iter().enumerate() { set.insert(*key); @@ -59,8 +59,8 @@ fn set_min_max(#[strategy(pvec(any::(), 10..100))] keys: Vec) { let min = keys[0..=n].iter().min().unwrap(); let max = keys[0..=n].iter().max().unwrap(); - prop_assert_eq!(set.first_key(), Some(*min)); - prop_assert_eq!(set.last_key(), Some(*max)); + prop_assert_eq!(set.first(), Some(*min)); + prop_assert_eq!(set.last(), Some(*max)); } Ok(()) @@ -106,7 +106,7 @@ fn execute_operation( let std_res = std_btreeset.contains(&key); eprintln!("Contains({})", hex::encode(&key)); - let res = btreeset.contains_key(&key); + let res = btreeset.contains(&key); assert_eq!(std_res, res); } Operation::Iter { from, len } => { From 8e9ae09852b427c3703fcacfcee8ea91fbf107cc Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Mon, 28 Apr 2025 07:44:15 +0000 Subject: [PATCH 15/32] . --- benchmarks/src/btreeset.rs | 89 -------------------------------------- benchmarks/src/main.rs | 1 - canbench_results.yml | 48 -------------------- 3 files changed, 138 deletions(-) delete mode 100644 benchmarks/src/btreeset.rs diff --git a/benchmarks/src/btreeset.rs b/benchmarks/src/btreeset.rs deleted file mode 100644 index 4f5d3b48..00000000 --- a/benchmarks/src/btreeset.rs +++ /dev/null @@ -1,89 +0,0 @@ -use canbench_rs::{bench, bench_fn, BenchResult}; -use ic_stable_structures::storable::Blob; -use ic_stable_structures::{btreeset::BTreeSet, DefaultMemoryImpl, Storable}; - -// Define type alias for Blob<8>. -type Blob8 = Blob<8>; - -/// Helper macro to generate benchmarks. -macro_rules! bench_tests { - ($( $fn_name:ident, $helper:ident, $k:expr );+ $(;)?) => { - $( - #[bench(raw)] - pub fn $fn_name() -> BenchResult { - $helper::<$k>() - } - )+ - }; -} -// Profiles inserting a large number of keys into a BTreeSet. -fn insert_helper() -> BenchResult { - let mut btreeset = BTreeSet::new(DefaultMemoryImpl::default()); - let num_keys = 10_000; - - bench_fn(|| { - for i in 0..num_keys { - let key = generate_key::(i); - btreeset.insert(key); - } - }) -} - -// Profiles removing a large number of keys from a BTreeSet. -fn remove_helper() -> BenchResult { - let mut btreeset = BTreeSet::new(DefaultMemoryImpl::default()); - let num_keys = 10_000; - - for i in 0..num_keys { - btreeset.insert(generate_key::(i)); - } - - bench_fn(|| { - for i in 0..num_keys { - let key = generate_key::(i); - btreeset.remove(&key); - } - }) -} - -// Profiles iterating over a BTreeSet. -fn iter_helper() -> BenchResult { - let mut btreeset = BTreeSet::new(DefaultMemoryImpl::default()); - - for i in 0..10_000 { - btreeset.insert(generate_key::(i)); - } - - bench_fn(|| for _ in btreeset.iter() {}) -} - -// Profiles range queries on a BTreeSet. -fn range_helper() -> BenchResult { - let mut btreeset = BTreeSet::new(DefaultMemoryImpl::default()); - - for i in 0..10_000 { - btreeset.insert(generate_key::(i)); - } - - let start = generate_key::(2000); - let end = generate_key::(8000); - - bench_fn(|| for _ in btreeset.range(start..end) {}) -} - -// Generates keys directly based on the type `K`. -fn generate_key(i: u32) -> K { - K::from_bytes(std::borrow::Cow::Owned(i.to_be_bytes().to_vec())) -} - -// Define benchmarks for various BTreeSet operations with different types. -bench_tests! { - btreeset_insert_u32, insert_helper, u32; - btreeset_insert_blob_8, insert_helper, Blob8; - btreeset_remove_u32, remove_helper, u32; - btreeset_remove_blob_8, remove_helper, Blob8; - btreeset_iter_u32, iter_helper, u32; - btreeset_iter_blob_8, iter_helper, Blob8; - btreeset_range_u32, range_helper, u32; - btreeset_range_blob_8, range_helper, Blob8; -} diff --git a/benchmarks/src/main.rs b/benchmarks/src/main.rs index e3f7adba..723976b5 100644 --- a/benchmarks/src/main.rs +++ b/benchmarks/src/main.rs @@ -2,7 +2,6 @@ use ic_stable_structures::storable::{Blob, Storable}; use tiny_rng::{Rand, Rng}; mod btreemap; -mod btreeset; mod memory_manager; mod vec; diff --git a/canbench_results.yml b/canbench_results.yml index 2342a543..da2706af 100644 --- a/canbench_results.yml +++ b/canbench_results.yml @@ -629,54 +629,6 @@ benches: heap_increase: 0 stable_memory_increase: 0 scopes: {} - btreeset_insert_blob_8: - total: - instructions: 493151302 - heap_increase: 0 - stable_memory_increase: 6 - scopes: {} - btreeset_insert_u32: - total: - instructions: 437528599 - heap_increase: 0 - stable_memory_increase: 5 - scopes: {} - btreeset_iter_blob_8: - total: - instructions: 17485721 - heap_increase: 0 - stable_memory_increase: 0 - scopes: {} - btreeset_iter_u32: - total: - instructions: 14354367 - heap_increase: 0 - stable_memory_increase: 0 - scopes: {} - btreeset_range_blob_8: - total: - instructions: 11185498 - heap_increase: 0 - stable_memory_increase: 0 - scopes: {} - btreeset_range_u32: - total: - instructions: 8666520 - heap_increase: 0 - stable_memory_increase: 0 - scopes: {} - btreeset_remove_blob_8: - total: - instructions: 481746325 - heap_increase: 0 - stable_memory_increase: 0 - scopes: {} - btreeset_remove_u32: - total: - instructions: 427326663 - heap_increase: 0 - stable_memory_increase: 0 - scopes: {} memory_manager_baseline: total: instructions: 1176576907 From 58332f086d12285c98c45727492114cf22b00e4a Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Mon, 28 Apr 2025 08:40:16 +0000 Subject: [PATCH 16/32] . --- src/btreeset.rs | 185 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 182 insertions(+), 3 deletions(-) diff --git a/src/btreeset.rs b/src/btreeset.rs index b46fdde7..2b943226 100644 --- a/src/btreeset.rs +++ b/src/btreeset.rs @@ -88,13 +88,29 @@ where /// /// If the memory provided already contains a `BTreeSet`, then that /// map is loaded. Otherwise, a new `BTreeSet` instance is created. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let set: BTreeSet = BTreeSet::init(DefaultMemoryImpl::default()); + /// ``` pub fn init(memory: M) -> Self { BTreeSet { map: BTreeMap::::init(memory), } } - /// Creates a new instance a `BTreeSet`. + /// Creates a new instance of a `BTreeSet`. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// ``` pub fn new(memory: M) -> Self { BTreeSet { map: BTreeMap::::new(memory), @@ -102,83 +118,246 @@ where } /// Loads the set from memory. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let set: BTreeSet = BTreeSet::load(DefaultMemoryImpl::default()); + /// ``` pub fn load(memory: M) -> Self { BTreeSet { map: BTreeMap::::load(memory), } } - /// Inserts a key into the set. Returns true if key + /// Inserts a key into the set. Returns `true` if the key /// did not exist in the set before. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// assert!(set.insert(42)); + /// assert!(!set.insert(42)); // Key already exists + /// ``` pub fn insert(&mut self, key: K) -> bool { self.map.insert(key, ()).is_none() } /// Returns `true` if the key exists in the set, `false` otherwise. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// set.insert(42); + /// assert!(set.contains(&42)); + /// assert!(!set.contains(&7)); + /// ``` pub fn contains(&self, key: &K) -> bool { self.map.get(key).is_some() } /// Returns `true` if the set contains no elements. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// assert!(set.is_empty()); + /// ``` pub fn is_empty(&self) -> bool { self.map.is_empty() } /// Returns the number of elements in the set. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// set.insert(42); + /// set.insert(7); + /// assert_eq!(set.len(), 2); + /// ``` pub fn len(&self) -> u64 { self.map.len() } /// Returns the underlying memory. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// let memory = set.into_memory(); + /// ``` pub fn into_memory(self) -> M { self.map.into_memory() } /// Removes all elements from the set. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// set.insert(42); + /// set.clear(); + /// assert!(set.is_empty()); + /// ``` pub fn clear(&mut self) { self.map.clear_new(); } /// Returns the first key in the set. This key /// is the minimum key in the set. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// set.insert(42); + /// set.insert(7); + /// assert_eq!(set.first(), Some(7)); + /// ``` pub fn first(&self) -> Option { self.map.first_key_value().map(|(a, _)| a) } /// Returns the last key in the set. This key /// is the maximum key in the set. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// set.insert(42); + /// set.insert(7); + /// assert_eq!(set.last(), Some(42)); + /// ``` pub fn last(&self) -> Option { self.map.last_key_value().map(|(a, _)| a) } - /// Removes a key from the set, returning true if it exists. + /// Removes a key from the set, returning `true` if it exists. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// set.insert(42); + /// assert!(set.remove(&42)); + /// assert!(!set.contains(&42)); + /// ``` pub fn remove(&mut self, key: &K) -> bool { self.map.remove(key).is_some() } /// Removes and returns the last element in the set. The key of this element is the maximum key that was in the set. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// set.insert(42); + /// set.insert(7); + /// assert_eq!(set.pop_last(), Some(42)); + /// ``` pub fn pop_last(&mut self) -> Option { self.map.pop_last().map(|(a, _)| a) } /// Removes and returns the first element in the set. The key of this element is the minimum key that was in the set. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// set.insert(42); + /// set.insert(7); + /// assert_eq!(set.pop_first(), Some(7)); + /// ``` pub fn pop_first(&mut self) -> Option { self.map.pop_first().map(|(a, _)| a) } /// Returns an iterator over the entries of the set, sorted by key. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// set.insert(42); + /// set.insert(7); + /// for key in set.iter() { + /// println!("{}", key); + /// } + /// ``` pub fn iter(&self) -> Iter { Iter::new(self.map.iter()) } /// Returns an iterator over the entries in the set where keys /// belong to the specified range. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// set.insert(1); + /// set.insert(2); + /// set.insert(3); + /// let range: Vec<_> = set.range(2..).collect(); + /// assert_eq!(range, vec![2, 3]); + /// ``` pub fn range(&self, key_range: impl RangeBounds) -> Iter { Iter::new(self.map.range(key_range)) } /// Returns an iterator pointing to the first element below the given bound. /// Returns an empty iterator if there are no keys below the given bound. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// set.insert(1); + /// set.insert(2); + /// set.insert(3); + /// let upper_bound: Vec<_> = set.iter_upper_bound(&3).collect(); + /// assert_eq!(upper_bound, vec![1, 2, 3]); + /// ``` pub fn iter_upper_bound(&self, bound: &K) -> Iter { Iter::new(self.map.iter_upper_bound(bound)) } From d720fa2f2b2402f6efd3b9a8bf9962bebbab796f Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Mon, 28 Apr 2025 09:24:57 +0000 Subject: [PATCH 17/32] . --- src/btreeset.rs | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/btreeset.rs b/src/btreeset.rs index 2b943226..13159a32 100644 --- a/src/btreeset.rs +++ b/src/btreeset.rs @@ -124,7 +124,15 @@ where /// ```rust /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; /// - /// let set: BTreeSet = BTreeSet::load(DefaultMemoryImpl::default()); + /// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// set.insert(42); + /// + /// // Save the set to memory + /// let memory = set.into_memory(); + /// + /// // Load the set from memory + /// let loaded_set: BTreeSet = BTreeSet::load(memory); + /// assert!(loaded_set.contains(&42)); /// ``` pub fn load(memory: M) -> Self { BTreeSet { @@ -343,8 +351,8 @@ where Iter::new(self.map.range(key_range)) } - /// Returns an iterator pointing to the first element below the given bound. - /// Returns an empty iterator if there are no keys below the given bound. + /// Returns an iterator pointing to the first element strictly below the given bound. + /// Returns an empty iterator if there are no keys strictly below the given bound. /// /// # Example /// @@ -355,8 +363,9 @@ where /// set.insert(1); /// set.insert(2); /// set.insert(3); - /// let upper_bound: Vec<_> = set.iter_upper_bound(&3).collect(); - /// assert_eq!(upper_bound, vec![1, 2, 3]); + /// + /// let upper_bound: Option = set.iter_upper_bound(&3).next(); + /// assert_eq!(upper_bound, Some(2)); /// ``` pub fn iter_upper_bound(&self, bound: &K) -> Iter { Iter::new(self.map.iter_upper_bound(bound)) @@ -433,7 +442,9 @@ mod test { for i in 0u32..100 { btreeset.insert(i); - for j in 0u32..=i { + + // Test that `iter_upper_bound` returns the largest element strictly below the bound. + for j in 1u32..=i { assert_eq!( btreeset.iter_upper_bound(&(j + 1)).next(), Some(j), From 17534a1a534d103566099257d702f92c1b8022a0 Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Mon, 28 Apr 2025 09:38:23 +0000 Subject: [PATCH 18/32] . --- src/btreeset.rs | 114 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 99 insertions(+), 15 deletions(-) diff --git a/src/btreeset.rs b/src/btreeset.rs index 13159a32..42a0d8a3 100644 --- a/src/btreeset.rs +++ b/src/btreeset.rs @@ -531,34 +531,118 @@ mod test { #[test] fn test_clear() { let mem = make_memory(); - let mut btreeset = BTreeSet::new(mem); + let mut btreeset: BTreeSet = BTreeSet::new(mem); - btreeset.insert(1u32); - btreeset.insert(2u32); - btreeset.clear(); + btreeset.insert(1); + btreeset.insert(2); + btreeset.insert(3); + assert_eq!(btreeset.len(), 3); + btreeset.clear(); assert!(btreeset.is_empty()); assert_eq!(btreeset.len(), 0); + assert_eq!(btreeset.iter().next(), None); } #[test] - fn test_range_various_prefixes() { + fn test_iterate_large_set() { let mem = make_memory(); - let mut btreeset = BTreeSet::new(mem); + let mut btreeset: BTreeSet = BTreeSet::new(mem); + + for i in 0..1000 { + btreeset.insert(i); + } + + let elements: Vec<_> = btreeset.iter().collect(); + assert_eq!(elements.len(), 1000); + assert_eq!(elements[0], 0); + assert_eq!(elements[999], 999); + } + + #[test] + fn test_iter_upper_bound_large_set() { + let mem = make_memory(); + let mut btreeset: BTreeSet = BTreeSet::new(mem); + + for i in 0u32..1000 { + btreeset.insert(i); + } + + assert_eq!(btreeset.iter_upper_bound(&500).next(), Some(499)); + assert_eq!(btreeset.iter_upper_bound(&0).next(), None); + assert_eq!(btreeset.iter_upper_bound(&1000).next(), Some(999)); + } + + #[test] + fn test_range_large_set() { + let mem = make_memory(); + let mut btreeset: BTreeSet = BTreeSet::new(mem); - for i in [ - 1u32, 2u32, 3u32, 4u32, 11u32, 12u32, 13u32, 14u32, 21u32, 22u32, 23u32, 24u32, - ] { + for i in 0u32..1000 { btreeset.insert(i); } - let range: Vec<_> = btreeset.range(10u32..20u32).collect(); - assert_eq!(range, vec![11u32, 12u32, 13u32, 14u32]); + let range: Vec<_> = btreeset.range(100..200).collect(); + assert_eq!(range.len(), 100); + assert_eq!(range[0], 100); + assert_eq!(range[99], 199); + } - let range: Vec<_> = btreeset.range(0u32..10u32).collect(); - assert_eq!(range, vec![1u32, 2u32, 3u32, 4u32]); + #[test] + fn test_empty_set() { + let mem = make_memory(); + let btreeset: BTreeSet = BTreeSet::new(mem); + + assert!(btreeset.is_empty()); + assert_eq!(btreeset.len(), 0); + assert_eq!(btreeset.first(), None); + assert_eq!(btreeset.last(), None); + assert_eq!(btreeset.iter().next(), None); + } + + #[test] + fn test_insert_duplicate() { + let mem = make_memory(); + let mut btreeset: BTreeSet = BTreeSet::new(mem); + + assert!(btreeset.insert(42)); + assert!(!btreeset.insert(42)); // Duplicate insert + assert_eq!(btreeset.len(), 1); + assert!(btreeset.contains(&42)); + } + + #[test] + fn test_remove_nonexistent() { + let mem = make_memory(); + let mut btreeset: BTreeSet = BTreeSet::new(mem); + + assert!(!btreeset.remove(&42)); // Removing a non-existent element + assert!(btreeset.is_empty()); + } + + #[test] + fn test_pop_first_and_last_empty() { + let mem = make_memory(); + let mut btreeset: BTreeSet = BTreeSet::new(mem); + + assert_eq!(btreeset.pop_first(), None); + assert_eq!(btreeset.pop_last(), None); + } + + #[test] + fn test_iter_upper_bound_empty() { + let mem = make_memory(); + let btreeset: BTreeSet = BTreeSet::new(mem); + + assert_eq!(btreeset.iter_upper_bound(&42u32).next(), None); + } + + #[test] + fn test_range_empty() { + let mem = make_memory(); + let btreeset: BTreeSet = BTreeSet::new(mem); - let range: Vec<_> = btreeset.range(20u32..30u32).collect(); - assert_eq!(range, vec![21u32, 22u32, 23u32, 24u32]); + let range: Vec<_> = btreeset.range(10..20).collect(); + assert!(range.is_empty()); } } From 5cf86f50911ad9da16cc81fde2342d6e9c442fa5 Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Mon, 28 Apr 2025 09:48:06 +0000 Subject: [PATCH 19/32] . --- src/btreeset.rs | 107 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/src/btreeset.rs b/src/btreeset.rs index 42a0d8a3..b61ae6ec 100644 --- a/src/btreeset.rs +++ b/src/btreeset.rs @@ -645,4 +645,111 @@ mod test { let range: Vec<_> = btreeset.range(10..20).collect(); assert!(range.is_empty()); } + + #[test] + fn test_insert_and_remove_large_set() { + let mem = make_memory(); + let mut btreeset: BTreeSet = BTreeSet::new(mem); + + for i in 0..1_000 { + assert!(btreeset.insert(i)); + } + assert_eq!(btreeset.len(), 1_000); + + for i in 0..1_000 { + assert!(btreeset.remove(&i)); + } + assert!(btreeset.is_empty()); + } + + #[test] + fn test_remove_nonexistent_large_set() { + let mem = make_memory(); + let mut btreeset: BTreeSet = BTreeSet::new(mem); + + for i in 0..1_000 { + assert!(btreeset.insert(i)); + } + + for i in 1_000..2_000 { + assert!(!btreeset.remove(&i)); // Non-existent elements + } + assert_eq!(btreeset.len(), 1_000); + } + + #[test] + fn test_iterate_empty_set() { + let mem = make_memory(); + let btreeset: BTreeSet = BTreeSet::new(mem); + + let elements: Vec<_> = btreeset.iter().collect(); + assert!(elements.is_empty()); + } + + #[test] + fn test_range_with_no_matches() { + let mem = make_memory(); + let mut btreeset: BTreeSet = BTreeSet::new(mem); + + for i in 0..10 { + btreeset.insert(i); + } + + let range: Vec<_> = btreeset.range(20..30).collect(); + assert!(range.is_empty()); + } + + #[test] + fn test_clear_and_reuse() { + let mem = make_memory(); + let mut btreeset: BTreeSet = BTreeSet::new(mem); + + for i in 0..100 { + btreeset.insert(i); + } + assert_eq!(btreeset.len(), 100); + + btreeset.clear(); + assert!(btreeset.is_empty()); + + for i in 100..200 { + btreeset.insert(i); + } + assert_eq!(btreeset.len(), 100); + assert!(btreeset.contains(&150)); + } + + #[test] + fn test_pop_first_and_last_large_set() { + let mem = make_memory(); + let mut btreeset: BTreeSet = BTreeSet::new(mem); + + for i in 0..1_000 { + btreeset.insert(i); + } + + for i in 0..500 { + assert_eq!(btreeset.pop_first(), Some(i)); + } + + for i in (500..1_000).rev() { + assert_eq!(btreeset.pop_last(), Some(i)); + } + + assert!(btreeset.is_empty()); + } + + #[test] + fn test_iter_upper_bound_edge_cases() { + let mem = make_memory(); + let mut btreeset: BTreeSet = BTreeSet::new(mem); + + for i in 1..=10 { + btreeset.insert(i); + } + + assert_eq!(btreeset.iter_upper_bound(&1).next(), None); // No element strictly below 1 + assert_eq!(btreeset.iter_upper_bound(&5).next(), Some(4)); // Largest element below 5 + assert_eq!(btreeset.iter_upper_bound(&11).next(), Some(10)); // Largest element below 11 + } } From 2393c00c977f4b4a9acaba5063c30f671b6b4645 Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Mon, 28 Apr 2025 09:59:02 +0000 Subject: [PATCH 20/32] . --- src/btreeset.rs | 57 ++++++++++++++++++++++--------------------------- 1 file changed, 26 insertions(+), 31 deletions(-) diff --git a/src/btreeset.rs b/src/btreeset.rs index b61ae6ec..3ea25972 100644 --- a/src/btreeset.rs +++ b/src/btreeset.rs @@ -1,33 +1,3 @@ -//! This module implements a set based on a B-Tree -//! in stable memory. -//! -//! # Overview -//! -//! A `BTreeSet` is a "stable" set based on a B-tree. It is designed to work directly in stable memory, -//! allowing it to persist across upgrades and scale to large sizes. -//! -//! The implementation is based on the algorithm outlined in "Introduction to Algorithms" -//! by Cormen et al. -//! -//! ## Features -//! -//! - **Efficient Operations**: Provides efficient insertion, deletion, and lookup operations. -//! - **Persistence**: Works directly in stable memory, persisting across upgrades. -//! - **Scalability**: Can scale to gigabytes in size. -//! -//! ## Example -//! -//! ```rust -//! use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; -//! -//! let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); -//! -//! set.insert(42); -//! assert!(set.contains(&42)); -//! assert_eq!(set.pop_first(), Some(42)); -//! assert!(set.is_empty()); -//! ``` - use crate::{btreemap::Iter as IterMap, BTreeMap, Memory, Storable}; use core::ops::RangeBounds; @@ -67,10 +37,35 @@ where } } -/// A "stable" set based on a B-tree. +/// This module implements a set based on a B-Tree +/// in stable memory. +/// +/// # Overview +/// +/// A `BTreeSet` is a "stable" set based on a B-tree. It is designed to work directly in stable memory, +/// allowing it to persist across upgrades and scale to large sizes. /// /// The implementation is based on the algorithm outlined in "Introduction to Algorithms" /// by Cormen et al. +/// +/// ## Features +/// +/// - **Efficient Operations**: Provides efficient insertion, deletion, and lookup operations. +/// - **Persistence**: Works directly in stable memory, persisting across upgrades. +/// - **Scalability**: Can scale to gigabytes in size. +/// +/// ## Example +/// +/// ```rust +/// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; +/// +/// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); +/// +/// set.insert(42); +/// assert!(set.contains(&42)); +/// assert_eq!(set.pop_first(), Some(42)); +/// assert!(set.is_empty()); +/// ``` pub struct BTreeSet where K: Storable + Ord + Clone, From 308e1d972f59d42ad9ae7f2306f599bd11e1b520 Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Mon, 28 Apr 2025 10:03:28 +0000 Subject: [PATCH 21/32] . --- src/btreeset.rs | 100 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 89 insertions(+), 11 deletions(-) diff --git a/src/btreeset.rs b/src/btreeset.rs index 3ea25972..4410f6b9 100644 --- a/src/btreeset.rs +++ b/src/btreeset.rs @@ -37,24 +37,30 @@ where } } -/// This module implements a set based on a B-Tree -/// in stable memory. +/// This module implements a set based on a B-Tree in stable memory. /// /// # Overview /// -/// A `BTreeSet` is a "stable" set based on a B-tree. It is designed to work directly in stable memory, -/// allowing it to persist across upgrades and scale to large sizes. +/// A `BTreeSet` is a "stable" set implementation based on a B-tree, designed to work directly in stable memory. /// -/// The implementation is based on the algorithm outlined in "Introduction to Algorithms" -/// by Cormen et al. +/// # Memory Implementations /// -/// ## Features +/// `BTreeSet` works with any memory implementation that satisfies the [`Memory`] trait: /// -/// - **Efficient Operations**: Provides efficient insertion, deletion, and lookup operations. -/// - **Persistence**: Works directly in stable memory, persisting across upgrades. -/// - **Scalability**: Can scale to gigabytes in size. +/// - [`Ic0StableMemory`](crate::Ic0StableMemory): Stores data in the Internet Computer's stable memory. +/// - [`VectorMemory`](crate::VectorMemory): In-memory implementation backed by a Rust `Vec`. +/// - [`FileMemory`](crate::FileMemory): Persists data to disk using a file. +/// - [`DefaultMemoryImpl`](crate::DefaultMemoryImpl): Automatically selects the appropriate memory backend +/// based on the environment: +/// - Uses `Ic0StableMemory` when running in an Internet Computer canister (wasm32 target). +/// - Falls back to `VectorMemory` in other environments (like tests or non-IC contexts). /// -/// ## Example +/// For most use cases, [`DefaultMemoryImpl`](crate::DefaultMemoryImpl) is recommended as it provides +/// the right implementation based on the runtime context. +/// +/// # Examples +/// +/// ## Basic Usage /// /// ```rust /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; @@ -66,6 +72,78 @@ where /// assert_eq!(set.pop_first(), Some(42)); /// assert!(set.is_empty()); /// ``` +/// +/// ## Range Queries +/// +/// ```rust +/// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; +/// +/// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); +/// set.insert(1); +/// set.insert(2); +/// set.insert(3); +/// +/// let range: Vec<_> = set.range(2..).collect(); +/// assert_eq!(range, vec![2, 3]); +/// ``` +/// +/// ## Custom Types +/// +/// You can store custom types in a `BTreeSet` by implementing the `Storable` trait: +/// +/// ```rust +/// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl, Storable}; +/// use std::borrow::Cow; +/// +/// #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] +/// struct CustomType { +/// id: u64, +/// } +/// +/// impl Storable for CustomType { +/// fn to_bytes(&self) -> Cow<[u8]> { +/// Cow::Owned(self.id.to_le_bytes().to_vec()) +/// } +/// +/// fn from_bytes(bytes: Cow<[u8]>) -> Self { +/// let id = u64::from_le_bytes(bytes.as_ref().try_into().unwrap()); +/// CustomType { id } +/// } +/// +/// const BOUND: ic_stable_structures::storable::Bound = +/// ic_stable_structures::storable::Bound::Bounded { +/// max_size: 8, +/// is_fixed_size: true, +/// }; +/// } +/// +/// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); +/// set.insert(CustomType { id: 42 }); +/// assert!(set.contains(&CustomType { id: 42 })); +/// ``` +/// +/// ### Bounded vs Unbounded Types +/// +/// When implementing `Storable`, you must specify whether your type is bounded or unbounded: +/// +/// - **Unbounded (`Bound::Unbounded`)**: +/// - Use when your type's serialized size can vary or has no fixed maximum. +/// - Recommended for most custom types, especially those containing Strings or Vecs. +/// - Example: `const BOUND: Bound = Bound::Unbounded;` +/// +/// - **Bounded (`Bound::Bounded{ max_size, is_fixed_size }`)**: +/// - Use when you know the maximum serialized size of your type. +/// - Enables memory optimizations in the `BTreeSet`. +/// - Example: `const BOUND: Bound = Bound::Bounded { max_size: 100, is_fixed_size: false };` +/// - For types with truly fixed size (like primitive types), set `is_fixed_size: true`. +/// +/// If unsure, use `Bound::Unbounded` as it's the safer choice. +/// +/// # Warning +/// +/// Once you've deployed with a bounded type, you cannot increase its `max_size` in +/// future versions without risking data corruption. You can, however, migrate from a bounded type +/// to an unbounded type if needed. For evolving data structures, prefer `Bound::Unbounded`. pub struct BTreeSet where K: Storable + Ord + Clone, From 7b55e60fc3b24813603f71640e4c2605e7724aea Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Tue, 29 Apr 2025 09:04:46 +0000 Subject: [PATCH 22/32] . --- src/btreeset.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/btreeset.rs b/src/btreeset.rs index 4410f6b9..e1ad2833 100644 --- a/src/btreeset.rs +++ b/src/btreeset.rs @@ -1,3 +1,5 @@ +//! This module implements a set based on a B-Tree in stable memory. + use crate::{btreemap::Iter as IterMap, BTreeMap, Memory, Storable}; use core::ops::RangeBounds; @@ -37,7 +39,7 @@ where } } -/// This module implements a set based on a B-Tree in stable memory. +/// A B-Tree set implementation that stores its data into a designated memory. /// /// # Overview /// From 481741e107b93d37dd59f5c520fc28d8d5e3f631 Mon Sep 17 00:00:00 2001 From: Dragoljub Djuric Date: Tue, 29 Apr 2025 11:10:03 +0200 Subject: [PATCH 23/32] Update src/btreeset.rs Co-authored-by: Dimitris Sarlis --- src/btreeset.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/btreeset.rs b/src/btreeset.rs index e1ad2833..e061e24c 100644 --- a/src/btreeset.rs +++ b/src/btreeset.rs @@ -192,7 +192,7 @@ where } } - /// Loads the set from memory. + /// Loads the `BTreeSet` from memory. /// /// # Example /// From 147d88cb678154b622f8a627e94ba264215182e5 Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Tue, 29 Apr 2025 09:51:53 +0000 Subject: [PATCH 24/32] . --- src/btreeset.rs | 231 ++++++++++++++++++++++++++++++++++++++ src/btreeset/proptests.rs | 58 ++++++++++ 2 files changed, 289 insertions(+) diff --git a/src/btreeset.rs b/src/btreeset.rs index e061e24c..8ab8ba09 100644 --- a/src/btreeset.rs +++ b/src/btreeset.rs @@ -445,6 +445,82 @@ where pub fn iter_upper_bound(&self, bound: &K) -> Iter { Iter::new(self.map.iter_upper_bound(bound)) } + + /// Returns an iterator over the union of this set and another. + /// + /// The union of two sets is a set containing all elements that are in either set. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set1: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// let mut set2: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// + /// set1.insert(1); + /// set1.insert(2); + /// set2.insert(2); + /// set2.insert(3); + /// + /// let union: Vec<_> = set1.union(&set2).collect(); + /// assert_eq!(union, vec![1, 2, 3]); + /// ``` + pub fn union<'a>(&'a self, other: &'a BTreeSet) -> impl Iterator + 'a { + let mut iter_self = self.iter(); + let mut iter_other = other.iter(); + let mut next_self = iter_self.next(); + let mut next_other = iter_other.next(); + + std::iter::from_fn(move || match (next_self.clone(), next_other.clone()) { + (Some(ref a), Some(ref b)) => { + if a < b { + next_self = iter_self.next(); + Some(a.clone()) + } else if a > b { + next_other = iter_other.next(); + Some(b.clone()) + } else { + next_self = iter_self.next(); + next_other = iter_other.next(); + Some(a.clone()) + } + } + (Some(ref a), None) => { + next_self = iter_self.next(); + Some(a.clone()) + } + (None, Some(ref b)) => { + next_other = iter_other.next(); + Some(b.clone()) + } + (None, None) => None, + }) + } + + /// Returns an iterator over the intersection of this set and another. + /// + /// The intersection of two sets is a set containing only the elements that are in both sets. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set1: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// let mut set2: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// + /// set1.insert(1); + /// set1.insert(2); + /// set2.insert(2); + /// set2.insert(3); + /// + /// let intersection: Vec<_> = set1.intersection(&set2).collect(); + /// assert_eq!(intersection, vec![2]); + /// ``` + pub fn intersection<'a>(&'a self, other: &'a BTreeSet) -> impl Iterator + 'a { + self.iter().filter(move |item| other.contains(item)) + } } #[cfg(test)] @@ -475,6 +551,161 @@ mod test { f(btree); } + #[test] + fn test_union_disjoint_sets() { + let mem1 = make_memory(); + let mem2 = make_memory(); + let mut set1: BTreeSet = BTreeSet::new(mem1); + let mut set2: BTreeSet = BTreeSet::new(mem2); + + set1.insert(1); + set1.insert(2); + + set2.insert(3); + set2.insert(4); + + let union: Vec<_> = set1.union(&set2).collect(); + assert_eq!(union, vec![1, 2, 3, 4]); + } + + #[test] + fn test_intersection_disjoint_sets() { + let mem1 = make_memory(); + let mem2 = make_memory(); + let mut set1: BTreeSet = BTreeSet::new(mem1); + let mut set2: BTreeSet = BTreeSet::new(mem2); + + set1.insert(1); + set1.insert(2); + + set2.insert(3); + set2.insert(4); + + let intersection: Vec<_> = set1.intersection(&set2).collect(); + assert!(intersection.is_empty()); + } + + #[test] + fn test_union_with_duplicates() { + let mem1 = make_memory(); + let mem2 = make_memory(); + let mut set1: BTreeSet = BTreeSet::new(mem1); + let mut set2: BTreeSet = BTreeSet::new(mem2); + + set1.insert(1); + set1.insert(2); + set1.insert(3); + + set2.insert(2); + set2.insert(3); + set2.insert(4); + + let union: Vec<_> = set1.union(&set2).collect(); + assert_eq!(union, vec![1, 2, 3, 4]); + } + + #[test] + fn test_intersection_with_duplicates() { + let mem1 = make_memory(); + let mem2 = make_memory(); + let mut set1: BTreeSet = BTreeSet::new(mem1); + let mut set2: BTreeSet = BTreeSet::new(mem2); + + set1.insert(1); + set1.insert(2); + set1.insert(3); + + set2.insert(2); + set2.insert(3); + set2.insert(4); + + let intersection: Vec<_> = set1.intersection(&set2).collect(); + assert_eq!(intersection, vec![2, 3]); + } + + #[test] + fn test_union_and_intersection_with_identical_sets() { + let mem1 = Rc::new(RefCell::new(Vec::new())); + let mem2 = Rc::new(RefCell::new(Vec::new())); + let mut set1: BTreeSet = BTreeSet::new(mem1); + let mut set2: BTreeSet = BTreeSet::new(mem2); + + for i in 0..100 { + set1.insert(i); + set2.insert(i); + } + + let union: Vec<_> = set1.union(&set2).collect(); + assert_eq!(union.len(), 100); + assert_eq!(union, (0..100).collect::>()); + + let intersection: Vec<_> = set1.intersection(&set2).collect(); + assert_eq!(intersection.len(), 100); + assert_eq!(intersection, (0..100).collect::>()); + } + + #[test] + fn test_union_and_intersection_with_non_overlapping_sets() { + let mem1 = Rc::new(RefCell::new(Vec::new())); + let mem2 = Rc::new(RefCell::new(Vec::new())); + let mut set1: BTreeSet = BTreeSet::new(mem1); + let mut set2: BTreeSet = BTreeSet::new(mem2); + + for i in 0..50 { + set1.insert(i); + } + for i in 50..100 { + set2.insert(i); + } + + let union: Vec<_> = set1.union(&set2).collect(); + assert_eq!(union.len(), 100); + assert_eq!(union, (0..100).collect::>()); + + let intersection: Vec<_> = set1.intersection(&set2).collect(); + assert!(intersection.is_empty()); + } + + #[test] + fn test_union_with_large_sets() { + let mem1 = Rc::new(RefCell::new(Vec::new())); + let mem2 = Rc::new(RefCell::new(Vec::new())); + let mut set1: BTreeSet = BTreeSet::new(mem1); + let mut set2: BTreeSet = BTreeSet::new(mem2); + + for i in 0..1000 { + set1.insert(i); + } + for i in 500..1500 { + set2.insert(i); + } + + let union: Vec<_> = set1.union(&set2).collect(); + assert_eq!(union.len(), 1500); + assert_eq!(union[0], 0); + assert_eq!(union[1499], 1499); + } + + #[test] + fn test_intersection_with_large_sets() { + let mem1 = Rc::new(RefCell::new(Vec::new())); + let mem2 = Rc::new(RefCell::new(Vec::new())); + let mut set1: BTreeSet = BTreeSet::new(mem1); + let mut set2: BTreeSet = BTreeSet::new(mem2); + + for i in 0..1000 { + set1.insert(i); + } + for i in 500..1500 { + set2.insert(i); + } + + let intersection: Vec<_> = set1.intersection(&set2).collect(); + assert_eq!(intersection.len(), 500); + assert_eq!(intersection[0], 500); + assert_eq!(intersection[499], 999); + } + #[test] fn init_preserves_data_set() { run_btree_test(|mut btree| { diff --git a/src/btreeset/proptests.rs b/src/btreeset/proptests.rs index 5a8a9f1a..8e8dd91f 100644 --- a/src/btreeset/proptests.rs +++ b/src/btreeset/proptests.rs @@ -173,3 +173,61 @@ fn execute_operation( } }; } + +#[proptest] +fn test_union( + #[strategy(pvec(any::(), 1..100))] keys1: Vec, + #[strategy(pvec(any::(), 1..100))] keys2: Vec, +) { + crate::btreeset::test::run_btree_test(|mut set1| { + let mut set2 = BTreeSet::new(crate::btreeset::test::make_memory()); + let mut std_set1 = StdBTreeSet::new(); + let mut std_set2 = StdBTreeSet::new(); + + for key in &keys1 { + set1.insert(*key); + std_set1.insert(*key); + } + + for key in &keys2 { + set2.insert(*key); + std_set2.insert(*key); + } + + let union: Vec<_> = set1.union(&set2).collect(); + let std_union: Vec<_> = std_set1.union(&std_set2).cloned().collect(); + + prop_assert_eq!(union, std_union); + + Ok(()) + }); +} + +#[proptest] +fn test_intersection( + #[strategy(pvec(any::(), 1..100))] keys1: Vec, + #[strategy(pvec(any::(), 1..100))] keys2: Vec, +) { + crate::btreeset::test::run_btree_test(|mut set1| { + let mut set2 = BTreeSet::new(crate::btreeset::test::make_memory()); + let mut std_set1 = StdBTreeSet::new(); + let mut std_set2 = StdBTreeSet::new(); + + for key in &keys1 { + set1.insert(*key); + std_set1.insert(*key); + } + + for key in &keys2 { + set2.insert(*key); + std_set2.insert(*key); + } + + let intersection: Vec<_> = set1.intersection(&set2).collect(); + let std_intersection: Vec<_> = std_set1.intersection(&std_set2).cloned().collect(); + + prop_assert_eq!(intersection, std_intersection); + + Ok(()) + }); +} From 176192e699934efa9cbe0b9cd8929dfcabd224b3 Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Tue, 29 Apr 2025 09:53:51 +0000 Subject: [PATCH 25/32] Revert canbench_results.yml change. --- canbench_results.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/canbench_results.yml b/canbench_results.yml index da2706af..fd1265e1 100644 --- a/canbench_results.yml +++ b/canbench_results.yml @@ -157,7 +157,7 @@ benches: scopes: {} btreemap_insert_10mib_values: total: - instructions: 5239412179 + instructions: 5239421355 heap_increase: 0 stable_memory_increase: 3613 scopes: {} From cd4735ef869b393fc8755115de13ae9f1b8a7402 Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Tue, 29 Apr 2025 09:58:07 +0000 Subject: [PATCH 26/32] Add comment explaining relation of BTreeSet and BTreeMap. --- src/btreeset.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/btreeset.rs b/src/btreeset.rs index 8ab8ba09..059bdcde 100644 --- a/src/btreeset.rs +++ b/src/btreeset.rs @@ -151,6 +151,10 @@ where K: Storable + Ord + Clone, M: Memory, { + // The underlying implementation uses a BTreeMap with unit values. + // This design allows us to reuse the existing BTreeMap implementation. + // However, if needed, this could be optimized in the future to avoid + // the overhead of storing unit values. map: BTreeMap, } From ab02321ad1ca4c46d109824947c2478d915cc116 Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Tue, 29 Apr 2025 10:04:50 +0000 Subject: [PATCH 27/32] Optimize intersection function. --- src/btreeset.rs | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/src/btreeset.rs b/src/btreeset.rs index 059bdcde..7c553923 100644 --- a/src/btreeset.rs +++ b/src/btreeset.rs @@ -516,14 +516,35 @@ where /// /// set1.insert(1); /// set1.insert(2); + /// set1.insert(3); + /// /// set2.insert(2); /// set2.insert(3); + /// set2.insert(4); /// /// let intersection: Vec<_> = set1.intersection(&set2).collect(); - /// assert_eq!(intersection, vec![2]); + /// assert_eq!(intersection, vec![2, 3]); /// ``` pub fn intersection<'a>(&'a self, other: &'a BTreeSet) -> impl Iterator + 'a { - self.iter().filter(move |item| other.contains(item)) + let mut iter_self = self.iter(); + let mut iter_other = other.iter(); + let mut next_self = iter_self.next(); + let mut next_other = iter_other.next(); + + std::iter::from_fn(move || { + while let (Some(ref a), Some(ref b)) = (next_self.clone(), next_other.clone()) { + if a < b { + next_self = iter_self.next(); + } else if a > b { + next_other = iter_other.next(); + } else { + next_self = iter_self.next(); + next_other = iter_other.next(); + return Some(a.clone()); + } + } + None + }) } } From f4bf556294a9af45a5575b36ec4df67a26de56da Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Tue, 29 Apr 2025 10:07:45 +0000 Subject: [PATCH 28/32] Comment union and intersection functions. --- src/btreeset.rs | 49 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/src/btreeset.rs b/src/btreeset.rs index 7c553923..1a51cfce 100644 --- a/src/btreeset.rs +++ b/src/btreeset.rs @@ -476,29 +476,39 @@ where let mut next_self = iter_self.next(); let mut next_other = iter_other.next(); - std::iter::from_fn(move || match (next_self.clone(), next_other.clone()) { - (Some(ref a), Some(ref b)) => { - if a < b { + // Use a closure to merge the two iterators while maintaining sorted order. + std::iter::from_fn(move || { + match (next_self.clone(), next_other.clone()) { + // If both iterators have elements, compare the current elements. + (Some(ref a), Some(ref b)) => { + if a < b { + // If the element from `self` is smaller, yield it and advance `self`. + next_self = iter_self.next(); + Some(a.clone()) + } else if a > b { + // If the element from `other` is smaller, yield it and advance `other`. + next_other = iter_other.next(); + Some(b.clone()) + } else { + // If the elements are equal, yield one and advance both iterators. + next_self = iter_self.next(); + next_other = iter_other.next(); + Some(a.clone()) + } + } + // If only `self` has elements remaining, yield them. + (Some(ref a), None) => { next_self = iter_self.next(); Some(a.clone()) - } else if a > b { + } + // If only `other` has elements remaining, yield them. + (None, Some(ref b)) => { next_other = iter_other.next(); Some(b.clone()) - } else { - next_self = iter_self.next(); - next_other = iter_other.next(); - Some(a.clone()) } + // If both iterators are exhausted, stop the iteration. + (None, None) => None, } - (Some(ref a), None) => { - next_self = iter_self.next(); - Some(a.clone()) - } - (None, Some(ref b)) => { - next_other = iter_other.next(); - Some(b.clone()) - } - (None, None) => None, }) } @@ -531,18 +541,23 @@ where let mut next_self = iter_self.next(); let mut next_other = iter_other.next(); + // Use a closure to find common elements by traversing both iterators simultaneously. std::iter::from_fn(move || { while let (Some(ref a), Some(ref b)) = (next_self.clone(), next_other.clone()) { if a < b { + // If the element from `self` is smaller, advance `self`. next_self = iter_self.next(); } else if a > b { + // If the element from `other` is smaller, advance `other`. next_other = iter_other.next(); } else { + // If the elements are equal, yield one and advance both iterators. next_self = iter_self.next(); next_other = iter_other.next(); return Some(a.clone()); } } + // Stop the iteration when either iterator is exhausted. None }) } From 8ea02aa03289c0d975b2d6b4e55285ba37553b5e Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Tue, 29 Apr 2025 10:13:33 +0000 Subject: [PATCH 29/32] fix clippy error. --- src/btreeset.rs | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/src/btreeset.rs b/src/btreeset.rs index 1a51cfce..1bf6277f 100644 --- a/src/btreeset.rs +++ b/src/btreeset.rs @@ -480,22 +480,24 @@ where std::iter::from_fn(move || { match (next_self.clone(), next_other.clone()) { // If both iterators have elements, compare the current elements. - (Some(ref a), Some(ref b)) => { - if a < b { + (Some(ref a), Some(ref b)) => match a.cmp(b) { + std::cmp::Ordering::Less => { // If the element from `self` is smaller, yield it and advance `self`. next_self = iter_self.next(); Some(a.clone()) - } else if a > b { + } + std::cmp::Ordering::Greater => { // If the element from `other` is smaller, yield it and advance `other`. next_other = iter_other.next(); Some(b.clone()) - } else { + } + std::cmp::Ordering::Equal => { // If the elements are equal, yield one and advance both iterators. next_self = iter_self.next(); next_other = iter_other.next(); Some(a.clone()) } - } + }, // If only `self` has elements remaining, yield them. (Some(ref a), None) => { next_self = iter_self.next(); @@ -544,17 +546,21 @@ where // Use a closure to find common elements by traversing both iterators simultaneously. std::iter::from_fn(move || { while let (Some(ref a), Some(ref b)) = (next_self.clone(), next_other.clone()) { - if a < b { - // If the element from `self` is smaller, advance `self`. - next_self = iter_self.next(); - } else if a > b { - // If the element from `other` is smaller, advance `other`. - next_other = iter_other.next(); - } else { - // If the elements are equal, yield one and advance both iterators. - next_self = iter_self.next(); - next_other = iter_other.next(); - return Some(a.clone()); + match a.cmp(b) { + std::cmp::Ordering::Less => { + // If the element from `self` is smaller, advance `self`. + next_self = iter_self.next(); + } + std::cmp::Ordering::Greater => { + // If the element from `other` is smaller, advance `other`. + next_other = iter_other.next(); + } + std::cmp::Ordering::Equal => { + // If the elements are equal, yield one and advance both iterators. + next_self = iter_self.next(); + next_other = iter_other.next(); + return Some(a.clone()); + } } } // Stop the iteration when either iterator is exhausted. From 62a7e5b0a488a18c60fb78901c3e6f74c4863e1f Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Tue, 29 Apr 2025 11:42:27 +0000 Subject: [PATCH 30/32] . --- src/btreeset.rs | 172 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) diff --git a/src/btreeset.rs b/src/btreeset.rs index 1bf6277f..a43c3783 100644 --- a/src/btreeset.rs +++ b/src/btreeset.rs @@ -567,6 +567,178 @@ where None }) } + + /// Returns `true` if this set has no elements in common with another set. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set1: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// let mut set2: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// + /// set1.insert(1); + /// set1.insert(2); + /// set2.insert(3); + /// set2.insert(4); + /// + /// assert!(set1.is_disjoint(&set2)); + /// set2.insert(2); + /// assert!(!set1.is_disjoint(&set2)); + /// ``` + pub fn is_disjoint(&self, other: &BTreeSet) -> bool { + self.intersection(other).next().is_none() + } + + /// Returns `true` if this set is a subset of another set. + /// + /// A set `A` is a subset of a set `B` if all elements of `A` are also elements of `B`. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set1: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// let mut set2: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// + /// set1.insert(1); + /// set1.insert(2); + /// set2.insert(1); + /// set2.insert(2); + /// set2.insert(3); + /// + /// assert!(set1.is_subset(&set2)); + /// assert!(!set2.is_subset(&set1)); + /// ``` + pub fn is_subset(&self, other: &BTreeSet) -> bool { + let mut self_iter = self.iter(); + let mut other_iter = other.iter(); + + let mut self_next = self_iter.next(); + let mut other_next = other_iter.next(); + + while let Some(ref self_key) = self_next { + match other_next { + Some(ref other_key) => match self_key.cmp(other_key) { + std::cmp::Ordering::Equal => { + // Keys match, advance both iterators. + self_next = self_iter.next(); + other_next = other_iter.next(); + } + std::cmp::Ordering::Greater => { + // Advance the `other` iterator if its key is smaller. + other_next = other_iter.next(); + } + std::cmp::Ordering::Less => { + // If `self_key` is not found in `other`, return false. + return false; + } + }, + None => { + // If `other` is exhausted but `self` is not, return false. + return false; + } + } + } + + // If we exhaust `self`, it is a subset of `other`. + true + } + + /// Returns `true` if this set is a superset of another set. + /// + /// A set `A` is a superset of a set `B` if all elements of `B` are also elements of `A`. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set1: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// let mut set2: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// + /// set1.insert(1); + /// set1.insert(2); + /// set1.insert(3); + /// set2.insert(1); + /// set2.insert(2); + /// + /// assert!(set1.is_superset(&set2)); + /// assert!(!set2.is_superset(&set1)); + /// ``` + pub fn is_superset(&self, other: &BTreeSet) -> bool { + other.is_subset(self) + } + + /// Returns an iterator over the symmetric difference of this set and another. + /// + /// The symmetric difference of two sets is the set of elements that are in either of the sets, + /// but not in their intersection. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set1: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// let mut set2: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// + /// set1.insert(1); + /// set1.insert(2); + /// set2.insert(2); + /// set2.insert(3); + /// + /// let symmetric_diff: Vec<_> = set1.symmetric_difference(&set2).collect(); + /// assert_eq!(symmetric_diff, vec![1, 3]); + /// ``` + pub fn symmetric_difference<'a>( + &'a self, + other: &'a BTreeSet, + ) -> impl Iterator + 'a { + let mut iter_self = self.iter(); + let mut iter_other = other.iter(); + let mut next_self = iter_self.next(); + let mut next_other = iter_other.next(); + + std::iter::from_fn(move || { + match (next_self.clone(), next_other.clone()) { + (Some(ref a), Some(ref b)) => match a.cmp(b) { + std::cmp::Ordering::Less => { + // If the current element in `self` is smaller, it is part of the symmetric difference. + // Advance the `self` iterator. + next_self = iter_self.next(); + Some(a.clone()) + } + std::cmp::Ordering::Greater => { + // If the current element in `other` is smaller, it is part of the symmetric difference. + // Advance the `other` iterator. + next_other = iter_other.next(); + Some(b.clone()) + } + std::cmp::Ordering::Equal => { + // If the elements are equal, they are part of the intersection and should be skipped. + // Advance both iterators. + next_self = iter_self.next(); + next_other = iter_other.next(); + None + } + }, + (Some(ref a), None) => { + // If `other` is exhausted, all remaining elements in `self` are part of the symmetric difference. + next_self = iter_self.next(); + Some(a.clone()) + } + (None, Some(ref b)) => { + // If `self` is exhausted, all remaining elements in `other` are part of the symmetric difference. + next_other = iter_other.next(); + Some(b.clone()) + } + (None, None) => None, // Both iterators are exhausted. + } + }) + } } #[cfg(test)] From 666c15399f219dd50d713cde237595fea9a22acd Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Tue, 29 Apr 2025 11:46:13 +0000 Subject: [PATCH 31/32] Revert "." This reverts commit 62a7e5b0a488a18c60fb78901c3e6f74c4863e1f. --- src/btreeset.rs | 172 ------------------------------------------------ 1 file changed, 172 deletions(-) diff --git a/src/btreeset.rs b/src/btreeset.rs index a43c3783..1bf6277f 100644 --- a/src/btreeset.rs +++ b/src/btreeset.rs @@ -567,178 +567,6 @@ where None }) } - - /// Returns `true` if this set has no elements in common with another set. - /// - /// # Example - /// - /// ```rust - /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; - /// - /// let mut set1: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); - /// let mut set2: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); - /// - /// set1.insert(1); - /// set1.insert(2); - /// set2.insert(3); - /// set2.insert(4); - /// - /// assert!(set1.is_disjoint(&set2)); - /// set2.insert(2); - /// assert!(!set1.is_disjoint(&set2)); - /// ``` - pub fn is_disjoint(&self, other: &BTreeSet) -> bool { - self.intersection(other).next().is_none() - } - - /// Returns `true` if this set is a subset of another set. - /// - /// A set `A` is a subset of a set `B` if all elements of `A` are also elements of `B`. - /// - /// # Example - /// - /// ```rust - /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; - /// - /// let mut set1: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); - /// let mut set2: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); - /// - /// set1.insert(1); - /// set1.insert(2); - /// set2.insert(1); - /// set2.insert(2); - /// set2.insert(3); - /// - /// assert!(set1.is_subset(&set2)); - /// assert!(!set2.is_subset(&set1)); - /// ``` - pub fn is_subset(&self, other: &BTreeSet) -> bool { - let mut self_iter = self.iter(); - let mut other_iter = other.iter(); - - let mut self_next = self_iter.next(); - let mut other_next = other_iter.next(); - - while let Some(ref self_key) = self_next { - match other_next { - Some(ref other_key) => match self_key.cmp(other_key) { - std::cmp::Ordering::Equal => { - // Keys match, advance both iterators. - self_next = self_iter.next(); - other_next = other_iter.next(); - } - std::cmp::Ordering::Greater => { - // Advance the `other` iterator if its key is smaller. - other_next = other_iter.next(); - } - std::cmp::Ordering::Less => { - // If `self_key` is not found in `other`, return false. - return false; - } - }, - None => { - // If `other` is exhausted but `self` is not, return false. - return false; - } - } - } - - // If we exhaust `self`, it is a subset of `other`. - true - } - - /// Returns `true` if this set is a superset of another set. - /// - /// A set `A` is a superset of a set `B` if all elements of `B` are also elements of `A`. - /// - /// # Example - /// - /// ```rust - /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; - /// - /// let mut set1: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); - /// let mut set2: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); - /// - /// set1.insert(1); - /// set1.insert(2); - /// set1.insert(3); - /// set2.insert(1); - /// set2.insert(2); - /// - /// assert!(set1.is_superset(&set2)); - /// assert!(!set2.is_superset(&set1)); - /// ``` - pub fn is_superset(&self, other: &BTreeSet) -> bool { - other.is_subset(self) - } - - /// Returns an iterator over the symmetric difference of this set and another. - /// - /// The symmetric difference of two sets is the set of elements that are in either of the sets, - /// but not in their intersection. - /// - /// # Example - /// - /// ```rust - /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; - /// - /// let mut set1: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); - /// let mut set2: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); - /// - /// set1.insert(1); - /// set1.insert(2); - /// set2.insert(2); - /// set2.insert(3); - /// - /// let symmetric_diff: Vec<_> = set1.symmetric_difference(&set2).collect(); - /// assert_eq!(symmetric_diff, vec![1, 3]); - /// ``` - pub fn symmetric_difference<'a>( - &'a self, - other: &'a BTreeSet, - ) -> impl Iterator + 'a { - let mut iter_self = self.iter(); - let mut iter_other = other.iter(); - let mut next_self = iter_self.next(); - let mut next_other = iter_other.next(); - - std::iter::from_fn(move || { - match (next_self.clone(), next_other.clone()) { - (Some(ref a), Some(ref b)) => match a.cmp(b) { - std::cmp::Ordering::Less => { - // If the current element in `self` is smaller, it is part of the symmetric difference. - // Advance the `self` iterator. - next_self = iter_self.next(); - Some(a.clone()) - } - std::cmp::Ordering::Greater => { - // If the current element in `other` is smaller, it is part of the symmetric difference. - // Advance the `other` iterator. - next_other = iter_other.next(); - Some(b.clone()) - } - std::cmp::Ordering::Equal => { - // If the elements are equal, they are part of the intersection and should be skipped. - // Advance both iterators. - next_self = iter_self.next(); - next_other = iter_other.next(); - None - } - }, - (Some(ref a), None) => { - // If `other` is exhausted, all remaining elements in `self` are part of the symmetric difference. - next_self = iter_self.next(); - Some(a.clone()) - } - (None, Some(ref b)) => { - // If `self` is exhausted, all remaining elements in `other` are part of the symmetric difference. - next_other = iter_other.next(); - Some(b.clone()) - } - (None, None) => None, // Both iterators are exhausted. - } - }) - } } #[cfg(test)] From 75b962bde32bbf7e3b22763eaf1d7444d9117e3d Mon Sep 17 00:00:00 2001 From: Dragoljub Duric Date: Tue, 29 Apr 2025 12:23:18 +0000 Subject: [PATCH 32/32] remove redundant tests --- src/btreeset.rs | 36 +----------------------------------- 1 file changed, 1 insertion(+), 35 deletions(-) diff --git a/src/btreeset.rs b/src/btreeset.rs index 1bf6277f..63133c2e 100644 --- a/src/btreeset.rs +++ b/src/btreeset.rs @@ -597,40 +597,6 @@ mod test { f(btree); } - #[test] - fn test_union_disjoint_sets() { - let mem1 = make_memory(); - let mem2 = make_memory(); - let mut set1: BTreeSet = BTreeSet::new(mem1); - let mut set2: BTreeSet = BTreeSet::new(mem2); - - set1.insert(1); - set1.insert(2); - - set2.insert(3); - set2.insert(4); - - let union: Vec<_> = set1.union(&set2).collect(); - assert_eq!(union, vec![1, 2, 3, 4]); - } - - #[test] - fn test_intersection_disjoint_sets() { - let mem1 = make_memory(); - let mem2 = make_memory(); - let mut set1: BTreeSet = BTreeSet::new(mem1); - let mut set2: BTreeSet = BTreeSet::new(mem2); - - set1.insert(1); - set1.insert(2); - - set2.insert(3); - set2.insert(4); - - let intersection: Vec<_> = set1.intersection(&set2).collect(); - assert!(intersection.is_empty()); - } - #[test] fn test_union_with_duplicates() { let mem1 = make_memory(); @@ -691,7 +657,7 @@ mod test { } #[test] - fn test_union_and_intersection_with_non_overlapping_sets() { + fn test_union_and_intersection_with_disjoin_sets() { let mem1 = Rc::new(RefCell::new(Vec::new())); let mem2 = Rc::new(RefCell::new(Vec::new())); let mut set1: BTreeSet = BTreeSet::new(mem1);