From 3a069cdc50d8ef710b52b8054821e6b4d4031cbd Mon Sep 17 00:00:00 2001 From: Mikhail Atuchin Date: Wed, 15 Oct 2025 01:36:22 +0400 Subject: [PATCH 01/11] Speed up cosmetic filtering --- Cargo.lock | 11 +- Cargo.toml | 1 + src/cosmetic_filter_cache.rs | 33 +-- src/cosmetic_filter_cache_builder.rs | 42 +++- src/flatbuffers/containers/fb_index.rs | 44 ++++ src/flatbuffers/containers/flat_multimap.rs | 3 - src/flatbuffers/containers/flat_serialize.rs | 7 +- src/flatbuffers/containers/hash_index.rs | 205 ++++++++++++++++++ src/flatbuffers/containers/hash_map.rs | 88 ++++++++ src/flatbuffers/containers/hash_set.rs | 57 +++++ src/flatbuffers/containers/mod.rs | 4 + src/flatbuffers/containers/sorted_index.rs | 26 +-- src/flatbuffers/fb_network_filter.fbs | 8 +- .../fb_network_filter_generated.rs | 194 +++++++++++++++-- tests/unit/engine.rs | 8 +- 15 files changed, 658 insertions(+), 73 deletions(-) create mode 100644 src/flatbuffers/containers/fb_index.rs create mode 100644 src/flatbuffers/containers/hash_index.rs create mode 100644 src/flatbuffers/containers/hash_map.rs create mode 100644 src/flatbuffers/containers/hash_set.rs diff --git a/Cargo.lock b/Cargo.lock index 3d64b3eb..614389af 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -23,6 +23,7 @@ dependencies = [ "regex", "reqwest", "rmp-serde", + "rustc-hash 1.1.0", "seahash", "selectors", "serde", @@ -1311,7 +1312,7 @@ dependencies = [ "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash", + "rustc-hash 2.1.1", "rustls", "socket2", "thiserror 2.0.12", @@ -1331,7 +1332,7 @@ dependencies = [ "lru-slab", "rand 0.9.1", "ring", - "rustc-hash", + "rustc-hash 2.1.1", "rustls", "rustls-pki-types", "slab", @@ -1586,6 +1587,12 @@ version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustc-hash" version = "2.1.1" diff --git a/Cargo.toml b/Cargo.toml index 16f60c38..dfc99cf6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,6 +34,7 @@ idna = "1.0.3" serde = { workspace = true } serde_json = { workspace = true } seahash = "4.1.0" +rustc-hash = { version = "1.1.0", default-features = false } memchr = "2.4" base64 = "0.22" rmp-serde = "0.15" diff --git a/src/cosmetic_filter_cache.rs b/src/cosmetic_filter_cache.rs index ca39a8b1..034eef0d 100644 --- a/src/cosmetic_filter_cache.rs +++ b/src/cosmetic_filter_cache.rs @@ -16,8 +16,9 @@ use crate::filters::cosmetic::{CosmeticFilterAction, CosmeticFilterOperator}; use crate::filters::filter_data_context::FilterDataContextRef; use crate::flatbuffers::containers::flat_map::FlatMapView; -use crate::flatbuffers::containers::flat_multimap::{FlatMapStringView, FlatMultiMapView}; -use crate::flatbuffers::containers::flat_set::FlatSetView; +use crate::flatbuffers::containers::flat_multimap::FlatMultiMapView; +use crate::flatbuffers::containers::hash_map::HashMapStringView; +use crate::flatbuffers::containers::hash_set::HashSetView; use crate::resources::{PermissionMask, ResourceStorage}; use crate::utils::Hash; @@ -169,13 +170,13 @@ impl CosmeticFilterCache { let mut selectors = vec![]; let cosmetic_filters = self.filter_data_context.memory.root().cosmetic_filters(); - let simple_class_rules = FlatSetView::new(cosmetic_filters.simple_class_rules()); - let simple_id_rules = FlatSetView::new(cosmetic_filters.simple_id_rules()); - let complex_class_rules = FlatMapStringView::new( + let simple_class_rules = HashSetView::new(cosmetic_filters.simple_class_rules()); + let simple_id_rules = HashSetView::new(cosmetic_filters.simple_id_rules()); + let complex_class_rules = HashMapStringView::new( cosmetic_filters.complex_class_rules_index(), cosmetic_filters.complex_class_rules_values(), ); - let complex_id_rules = FlatMapStringView::new( + let complex_id_rules = HashMapStringView::new( cosmetic_filters.complex_id_rules_index(), cosmetic_filters.complex_id_rules_values(), ); @@ -185,10 +186,12 @@ impl CosmeticFilterCache { if simple_class_rules.contains(class) && !exceptions.contains(&format!(".{}", class)) { selectors.push(format!(".{}", class)); } - if let Some(bucket) = complex_class_rules.get(class) { - for (_, sel) in bucket { - if !exceptions.contains(sel) { - selectors.push(sel.to_string()); + if let Some(values) = complex_class_rules.get(class) { + { + for sel in values.data() { + if !exceptions.contains(sel) { + selectors.push(sel.to_string()); + } } } } @@ -198,10 +201,12 @@ impl CosmeticFilterCache { if simple_id_rules.contains(id) && !exceptions.contains(&format!("#{}", id)) { selectors.push(format!("#{}", id)); } - if let Some(bucket) = complex_id_rules.get(id) { - for (_, sel) in bucket { - if !exceptions.contains(sel) { - selectors.push(sel.to_string()); + if let Some(values) = complex_id_rules.get(id) { + { + for sel in values.data() { + if !exceptions.contains(sel) { + selectors.push(sel.to_string()); + } } } } diff --git a/src/cosmetic_filter_cache_builder.rs b/src/cosmetic_filter_cache_builder.rs index 2673107f..59294964 100644 --- a/src/cosmetic_filter_cache_builder.rs +++ b/src/cosmetic_filter_cache_builder.rs @@ -9,6 +9,8 @@ use crate::filters::cosmetic::{CosmeticFilter, CosmeticFilterMask, CosmeticFilte use crate::filters::flatbuffer_generated::fb; use crate::flatbuffers::containers::flat_map::FlatMapBuilder; use crate::flatbuffers::containers::flat_multimap::FlatMultiMapBuilder; +use crate::flatbuffers::containers::hash_map::HashMapBuilder; +use crate::flatbuffers::containers::hash_set::HashSetBuilder; use crate::flatbuffers::containers::flat_serialize::{ serialize_vec_opt, FlatBuilder, FlatSerialize, @@ -56,13 +58,18 @@ impl<'a, B: FlatBuilder<'a>> FlatSerialize<'a, B> for HostnameRule { } } +#[derive(Default, Clone)] +struct StringVector { + data: Vec, +} + #[derive(Default)] pub(crate) struct CosmeticFilterCacheBuilder { - simple_class_rules: HashSet, - simple_id_rules: HashSet, + simple_class_rules: HashSetBuilder, + simple_id_rules: HashSetBuilder, misc_generic_selectors: HashSet, - complex_class_rules: FlatMultiMapBuilder, - complex_id_rules: FlatMultiMapBuilder, + complex_class_rules: HashMapBuilder, + complex_id_rules: HashMapBuilder, hostname_hide: FlatMultiMapBuilder, hostname_inject_script: FlatMultiMapBuilder, @@ -110,7 +117,10 @@ impl CosmeticFilterCacheBuilder { if key == selector { self.simple_class_rules.insert(class); } else { - self.complex_class_rules.insert(class, selector); + let selectors = self + .complex_class_rules + .get_or_insert(class, StringVector::default()); + selectors.data.push(selector); } } } else if selector.starts_with('#') { @@ -120,7 +130,10 @@ impl CosmeticFilterCacheBuilder { if key == selector { self.simple_id_rules.insert(id); } else { - self.complex_id_rules.insert(id, selector); + let selectors = self + .complex_id_rules + .get_or_insert(id, StringVector::default()); + selectors.data.push(selector); } } } else { @@ -204,11 +217,24 @@ impl CosmeticFilterCacheBuilder { } } +impl<'a, B: FlatBuilder<'a>> FlatSerialize<'a, B> for StringVector { + type Output = WIPOffset>; + + fn serialize(value: Self, builder: &mut B) -> WIPOffset> { + let v = FlatSerialize::serialize(value.data, builder); + fb::StringVector::create( + builder.raw_builder(), + &fb::StringVectorArgs { data: Some(v) }, + ) + } +} + impl<'a, B: FlatBuilder<'a>> FlatSerialize<'a, B> for CosmeticFilterCacheBuilder { type Output = WIPOffset>; + fn serialize(value: Self, builder: &mut B) -> WIPOffset> { - let complex_class_rules = FlatMultiMapBuilder::finish(value.complex_class_rules, builder); - let complex_id_rules = FlatMultiMapBuilder::finish(value.complex_id_rules, builder); + let complex_class_rules = HashMapBuilder::finish(value.complex_class_rules, builder); + let complex_id_rules = HashMapBuilder::finish(value.complex_id_rules, builder); // Handle top-level hostname hide and inject_script for better deduplication let hostname_hide = FlatMultiMapBuilder::finish(value.hostname_hide, builder); diff --git a/src/flatbuffers/containers/fb_index.rs b/src/flatbuffers/containers/fb_index.rs new file mode 100644 index 00000000..2524dfc4 --- /dev/null +++ b/src/flatbuffers/containers/fb_index.rs @@ -0,0 +1,44 @@ +use flatbuffers::{Follow, Vector}; + +/// A trait to access indexed data in a flatbuffer. +/// It has two implementations: +/// 1. a faster &[I] for slices; +/// 2. a slower for flatbuffers::Vector, that uses Follow() internally. +/// Note: it intentally returns values using a copy, because it's faster +/// than by reference. +pub(crate) trait FbIndex { + fn len(&self) -> usize; + fn get(&self, index: usize) -> I; +} + +impl FbIndex for &[I] { + #[inline(always)] + fn len(&self) -> usize { + <[I]>::len(self) + } + + #[inline(always)] + fn get(&self, index: usize) -> I { + self[index] + } +} + +impl FbIndex<()> for () { + #[inline(always)] + fn len(&self) -> usize { + 0 + } + fn get(&self, _index: usize) {} +} + +impl<'a, T: Follow<'a>> FbIndex for Vector<'a, T> { + #[inline(always)] + fn len(&self) -> usize { + Vector::len(self) + } + + #[inline(always)] + fn get(&self, index: usize) -> T::Inner { + Vector::get(self, index) + } +} diff --git a/src/flatbuffers/containers/flat_multimap.rs b/src/flatbuffers/containers/flat_multimap.rs index 6ccb28a7..99b6255f 100644 --- a/src/flatbuffers/containers/flat_multimap.rs +++ b/src/flatbuffers/containers/flat_multimap.rs @@ -129,9 +129,6 @@ impl FlatMultiMapBuilder { } } -pub(crate) type FlatMapStringView<'a, V> = - FlatMultiMapView<'a, &'a str, V, Vector<'a, flatbuffers::ForwardsUOffset<&'a str>>>; - #[cfg(test)] #[path = "../../../tests/unit/flatbuffers/containers/flat_multimap.rs"] mod unit_tests; diff --git a/src/flatbuffers/containers/flat_serialize.rs b/src/flatbuffers/containers/flat_serialize.rs index 09ab1234..dbf1e3da 100644 --- a/src/flatbuffers/containers/flat_serialize.rs +++ b/src/flatbuffers/containers/flat_serialize.rs @@ -27,8 +27,13 @@ pub trait FlatSerialize<'b, B: FlatBuilder<'b>>: Sized { impl<'b> FlatBuilder<'b> for flatbuffers::FlatBufferBuilder<'b> { fn create_string(&mut self, s: &str) -> WIPOffset<&'b str> { - self.create_string(s) + if s.is_empty() { + self.create_shared_string(s) + } else { + self.create_string(s) + } } + fn raw_builder(&mut self) -> &mut flatbuffers::FlatBufferBuilder<'b> { self } diff --git a/src/flatbuffers/containers/hash_index.rs b/src/flatbuffers/containers/hash_index.rs new file mode 100644 index 00000000..f2bf61b9 --- /dev/null +++ b/src/flatbuffers/containers/hash_index.rs @@ -0,0 +1,205 @@ +/// An inner implementation of a HashMap-like container with open addressing. +/// Designed to be used in HashMap, HashSet, HashMultiMap. +/// The load factor is 25%-50%. +/// Uses RustC FxHasher as a hash function. +/// A default value is used to mark empty slots, so it can't be used as a key. +use std::marker::PhantomData; + +use crate::flatbuffers::containers::fb_index::FbIndex; + +pub(crate) trait HashKey: Eq + std::hash::Hash + Default + Clone { + fn is_empty(&self) -> bool; +} + +pub(crate) trait FbHashKey: Eq + std::hash::Hash { + fn is_empty(&self) -> bool; +} + +impl HashKey for String { + fn is_empty(&self) -> bool { + self.is_empty() + } +} + +impl FbHashKey for &str { + fn is_empty(&self) -> bool { + str::is_empty(self) + } +} + +#[inline(always)] +fn next_bucket(mut bucket: usize, capacity: usize, step: &mut usize) -> usize { + bucket += *step * *step; + *step += 1; + bucket % capacity +} + +fn find_matching_bucket>( + indexes: &Keys, + mut bucket: usize, + key: I, + capacity: usize, + step: &mut usize, +) -> Option { + debug_assert!(bucket < capacity); + debug_assert!(*step > 0); + debug_assert!(indexes.len() == capacity); + loop { + let data = indexes.get(bucket); + if FbHashKey::is_empty(&data) { + return None; + } + + if data == key { + return Some(bucket); + } + + bucket = next_bucket(bucket, capacity, step); + } +} + +pub(crate) struct HashIndexView, Values: FbIndex> { + indexes: Keys, + values: Values, + _phantom_i: PhantomData, + _phantom_v: PhantomData, +} + +impl, Values: FbIndex> HashIndexView { + pub fn new(indexes: Keys, values: Values) -> Self { + Self { + indexes, + values, + _phantom_i: PhantomData, + _phantom_v: PhantomData, + } + } + + pub fn get_single(&self, key: I) -> Option { + let bucket = self.find_single_bucket(key); + bucket.map(|idx| self.values.get(idx)) + } + + fn find_single_bucket(&self, key: I) -> Option { + let capacity = self.indexes.len(); + let bucket = get_hash(&key) % capacity; + find_matching_bucket(&self.indexes, bucket, key, capacity, &mut 1) + } +} + +pub(crate) struct HashIndexBuilder { + indexes: Vec, + values: Vec, + size: usize, +} + +fn get_hash(key: &I) -> usize { + // RustC Hash is 2x faster than DefaultHasher. + use rustc_hash::FxHasher; + use std::hash::Hasher; + let mut hasher = FxHasher::default(); + key.hash(&mut hasher); + hasher.finish() as usize +} + +impl Default for HashIndexBuilder { + fn default() -> Self { + Self::new_with_capacity(4) + } +} + +impl HashIndexBuilder { + pub fn new_with_capacity(capacity: usize) -> Self { + debug_assert!(capacity >= 4); + let self_ = Self { + size: 0, + indexes: vec![I::default(); capacity], + values: vec![V::default(); capacity], + }; + debug_assert_eq!(self_.indexes.len(), capacity); + debug_assert_eq!(self_.capacity(), capacity); + self_ + } + + pub fn insert(&mut self, key: I, value: V, allow_duplicates: bool) -> (usize, &mut V) { + debug_assert!(!HashKey::is_empty(&key), "Key is empty"); + let target_hash = get_hash(&key); + + let capacity = self.capacity(); + assert!(capacity >= 4); + let mut bucket = target_hash % capacity; + + let mut step = 1; + + loop { + if HashKey::is_empty(&self.indexes[bucket]) { + // Found an empty bucket, take it and insert new key-value pair. + self.indexes[bucket] = key; + self.values[bucket] = value; + self.size += 1; + self.maybe_increase_capacity(allow_duplicates); + return (bucket, &mut self.values[bucket]); + } + + if self.indexes[bucket] == key && !allow_duplicates { + // Update the value for an existing key. + self.values[bucket] = value; + return (bucket, &mut self.values[bucket]); + } + + bucket = next_bucket(bucket, capacity, &mut step); + } + } + + fn capacity(&self) -> usize { + self.indexes.len() + } + + pub fn find_single_bucket(&mut self, key: &I) -> Option { + let capacity = self.indexes.len(); + let mut bucket = get_hash(key) % capacity; + let mut step = 1; + loop { + let data = &self.indexes[bucket]; + if HashKey::is_empty(data) { + return None; + } + + if data == key { + return Some(bucket); + } + + bucket = next_bucket(bucket, capacity, &mut step); + } + } + + pub fn get_or_insert(&mut self, key: I, value: V) -> &mut V { + if let Some(existing_bucket) = self.find_single_bucket(&key) { + return &mut self.values[existing_bucket]; + } + let (_, new_value) = self.insert(key, value, false); + new_value + } + + fn maybe_increase_capacity(&mut self, allow_duplicates: bool) { + // Use 50% load factor. + if self.size * 2 > self.capacity() { + self.size = 0; + let new_capacity = self.capacity() * 2; + let old_indexes = std::mem::take(&mut self.indexes); + let old_values = std::mem::take(&mut self.values); + self.indexes = vec![I::default(); new_capacity]; + self.values = vec![V::default(); new_capacity]; + + for (key, value) in old_indexes.into_iter().zip(old_values.into_iter()) { + if !HashKey::is_empty(&key) { + self.insert(key, value, allow_duplicates); + } + } + } + } + + pub fn consume(value: Self) -> (Vec, Vec) { + (value.indexes, value.values) + } +} diff --git a/src/flatbuffers/containers/hash_map.rs b/src/flatbuffers/containers/hash_map.rs new file mode 100644 index 00000000..1bd39e05 --- /dev/null +++ b/src/flatbuffers/containers/hash_map.rs @@ -0,0 +1,88 @@ +/// A HashMap implementation backed by a HashIndex. +/// Uses more memory than FlatMap, but gives faster lookup. +use crate::flatbuffers::containers::{ + fb_index::FbIndex, + flat_serialize::{FlatBuilder, FlatMapBuilderOutput, FlatSerialize}, + hash_index::{FbHashKey, HashIndexBuilder, HashIndexView, HashKey}, +}; + +/// A builder for a HashMap that can be serialized into a flatbuffer. +/// A default key is used to mark empty slots, so (default_key, _) pair +/// can't be added. +#[derive(Default)] +pub(crate) struct HashMapBuilder { + builder: HashIndexBuilder, +} + +impl HashMapBuilder { + #[allow(unused)] + pub fn insert(&mut self, key: I, value: V) { + self.builder.insert(key, value, false /* allow_duplicate */); + } + + pub fn get_or_insert(&mut self, key: I, value: V) -> &mut V { + self.builder.get_or_insert(key, value) + } + + pub fn finish<'b, B: FlatBuilder<'b>>( + value: Self, + builder: &mut B, + ) -> FlatMapBuilderOutput<'b, I, V, B> + where + I: FlatSerialize<'b, B>, + V: FlatSerialize<'b, B>, + { + let (indexes, values) = HashIndexBuilder::consume(value.builder); + + let keys = indexes + .into_iter() + .map(|i| FlatSerialize::serialize(i, builder)) + .collect::>(); + let values = values + .into_iter() + .map(|v| FlatSerialize::serialize(v, builder)) + .collect::>(); + + let keys = builder.raw_builder().create_vector(&keys); + let values = builder.raw_builder().create_vector(&values); + + FlatMapBuilderOutput { keys, values } + } +} + +/// A view of a HashMap stored in a flatbuffer. +/// The default key is considered as an empty slot, `get(default_key)` always +/// returns None. +pub(crate) struct HashMapView +where + I: FbHashKey, + Keys: FbIndex, + Values: FbIndex, +{ + view: HashIndexView, +} + +impl HashMapView +where + I: FbHashKey, + Keys: FbIndex, + Values: FbIndex, +{ + pub fn new(keys: Keys, values: Values) -> Self { + assert_eq!(keys.len(), values.len()); + Self { + view: HashIndexView::new(keys, values), + } + } + + pub fn get(&self, key: I) -> Option { + self.view.get_single(key) + } +} + +pub type HashMapStringView<'a, V> = HashMapView< + &'a str, + V, + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>>, + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<>::Inner>>, +>; diff --git a/src/flatbuffers/containers/hash_set.rs b/src/flatbuffers/containers/hash_set.rs new file mode 100644 index 00000000..7c567e6a --- /dev/null +++ b/src/flatbuffers/containers/hash_set.rs @@ -0,0 +1,57 @@ +/// A HashSet implementation backed by a HashIndex. +/// Uses more memory than FlatSet, but gives faster lookup. +use crate::flatbuffers::containers::{ + fb_index::FbIndex, + flat_serialize::{FlatBuilder, FlatSerialize, WIPFlatVec}, + hash_index::{FbHashKey, HashIndexBuilder, HashIndexView, HashKey}, +}; + +/// A builder for a HashSet that can be serialized into a flatbuffer. +/// A default value is used to mark empty slots, so it can't be added. +#[derive(Default)] +pub(crate) struct HashSetBuilder { + builder: HashIndexBuilder, +} + +impl HashSetBuilder { + pub fn insert(&mut self, key: I) { + self.builder.insert(key, (), false /* allow_duplicate */); + } +} + +impl<'b, B: FlatBuilder<'b>, I: FlatSerialize<'b, B> + HashKey> FlatSerialize<'b, B> + for HashSetBuilder +{ + type Output = WIPFlatVec<'b, I, B>; + + fn serialize(value: Self, builder: &mut B) -> Self::Output + where + I: FlatSerialize<'b, B>, + { + let (indexes, _) = HashIndexBuilder::consume(value.builder); + let v = indexes + .into_iter() + .map(|x| FlatSerialize::serialize(x, builder)) + .collect::>(); + builder.raw_builder().create_vector(&v) + } +} + +/// A view of a HashSet stored in a flatbuffer. +/// The default value is considered as an empty slot, `contains(default_value)` +/// always returns false. +pub(crate) struct HashSetView> { + view: HashIndexView, +} + +impl> HashSetView { + pub fn new(keys: Keys) -> Self { + Self { + view: HashIndexView::new(keys, ()), + } + } + + pub fn contains(&self, key: I) -> bool { + self.view.get_single(key).is_some() + } +} diff --git a/src/flatbuffers/containers/mod.rs b/src/flatbuffers/containers/mod.rs index 20eb251d..18eb9202 100644 --- a/src/flatbuffers/containers/mod.rs +++ b/src/flatbuffers/containers/mod.rs @@ -1,5 +1,9 @@ +pub(crate) mod fb_index; pub(crate) mod flat_map; pub(crate) mod flat_multimap; pub(crate) mod flat_serialize; pub(crate) mod flat_set; +pub(crate) mod hash_index; +pub(crate) mod hash_map; +pub(crate) mod hash_set; pub(crate) mod sorted_index; diff --git a/src/flatbuffers/containers/sorted_index.rs b/src/flatbuffers/containers/sorted_index.rs index 166f491f..8335eafc 100644 --- a/src/flatbuffers/containers/sorted_index.rs +++ b/src/flatbuffers/containers/sorted_index.rs @@ -1,9 +1,9 @@ use flatbuffers::{Follow, Vector}; +use crate::flatbuffers::containers::fb_index::FbIndex; + // Represents sorted sequence to perform the binary search. -pub(crate) trait SortedIndex { - fn len(&self) -> usize; - fn get(&self, index: usize) -> I; +pub(crate) trait SortedIndex: FbIndex { fn partition_point(&self, predicate: F) -> usize where F: FnMut(&I) -> bool; @@ -13,16 +13,6 @@ pub(crate) trait SortedIndex { // if possible, because it faster than getting values with flatbuffer's // get method. impl SortedIndex for &[I] { - #[inline(always)] - fn len(&self) -> usize { - <[I]>::len(self) - } - - #[inline(always)] - fn get(&self, index: usize) -> I { - self[index] - } - #[inline(always)] fn partition_point(&self, predicate: F) -> usize where @@ -39,16 +29,6 @@ impl<'a, T: Follow<'a>> SortedIndex for Vector<'a, T> where T::Inner: Ord, { - #[inline(always)] - fn len(&self) -> usize { - Vector::len(self) - } - - #[inline(always)] - fn get(&self, index: usize) -> T::Inner { - Vector::get(self, index) - } - fn partition_point(&self, mut predicate: F) -> usize where F: FnMut(&T::Inner) -> bool, diff --git a/src/flatbuffers/fb_network_filter.fbs b/src/flatbuffers/fb_network_filter.fbs index c85698c0..c4b7ed01 100644 --- a/src/flatbuffers/fb_network_filter.fbs +++ b/src/flatbuffers/fb_network_filter.fbs @@ -52,6 +52,10 @@ table HostnameSpecificRules { procedural_action_exception: [string]; } +table StringVector { + data: [string] (required); +} + /// A table to store cosmetic filter rules (including supported structures). table CosmeticFilters { /// Rules that are just the CSS class of an element to be hidden on all sites, e.g. `##.ad`. @@ -71,13 +75,13 @@ table CosmeticFilters { /// class, e.g. `##.ad image`. /// Stored as a multi-map `hostname_hash` => `selector` complex_class_rules_index: [string] (required); - complex_class_rules_values: [string] (required); + complex_class_rules_values: [StringVector] (required); /// Rules that are the CSS selector of an element to be hidden on all sites, starting with an /// id, e.g. `###banner > .text a`. /// Stored as a multi-map `hostname_hash` => `selector` complex_id_rules_index: [string] (required); - complex_id_rules_values: [string] (required); + complex_id_rules_values: [StringVector] (required); /// Simple hostname-specific hide rules, e.g. `example.com##.ad`. /// Stored as a multi-map `hostname_hash` => `selector`. diff --git a/src/flatbuffers/fb_network_filter_generated.rs b/src/flatbuffers/fb_network_filter_generated.rs index 35e98f26..631f285b 100644 --- a/src/flatbuffers/fb_network_filter_generated.rs +++ b/src/flatbuffers/fb_network_filter_generated.rs @@ -987,6 +987,164 @@ pub mod fb { ) } } + pub enum StringVectorOffset {} + #[derive(Copy, Clone, PartialEq)] + + pub struct StringVector<'a> { + pub _tab: flatbuffers::Table<'a>, + } + + impl<'a> flatbuffers::Follow<'a> for StringVector<'a> { + type Inner = StringVector<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { + _tab: flatbuffers::Table::new(buf, loc), + } + } + } + + impl<'a> StringVector<'a> { + pub const VT_DATA: flatbuffers::VOffsetT = 4; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + StringVector { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + args: &'args StringVectorArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = StringVectorBuilder::new(_fbb); + if let Some(x) = args.data { + builder.add_data(x); + } + builder.finish() + } + + pub fn unpack(&self) -> StringVectorT { + let data = { + let x = self.data(); + x.iter().map(|s| s.to_string()).collect() + }; + StringVectorT { data } + } + + #[inline] + pub fn data(&self) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(StringVector::VT_DATA, None) + .unwrap() + } + } + } + + impl flatbuffers::Verifiable for StringVector<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::>, + >>("data", Self::VT_DATA, true)? + .finish(); + Ok(()) + } + } + pub struct StringVectorArgs<'a> { + pub data: Option< + flatbuffers::WIPOffset>>, + >, + } + impl<'a> Default for StringVectorArgs<'a> { + #[inline] + fn default() -> Self { + StringVectorArgs { + data: None, // required field + } + } + } + + pub struct StringVectorBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> StringVectorBuilder<'a, 'b, A> { + #[inline] + pub fn add_data( + &mut self, + data: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_ + .push_slot_always::>(StringVector::VT_DATA, data); + } + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> StringVectorBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + StringVectorBuilder { + fbb_: _fbb, + start_: start, + } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + self.fbb_.required(o, StringVector::VT_DATA, "data"); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for StringVector<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("StringVector"); + ds.field("data", &self.data()); + ds.finish() + } + } + #[non_exhaustive] + #[derive(Debug, Clone, PartialEq)] + pub struct StringVectorT { + pub data: Vec, + } + impl Default for StringVectorT { + fn default() -> Self { + Self { + data: Default::default(), + } + } + } + impl StringVectorT { + pub fn pack<'b, A: flatbuffers::Allocator + 'b>( + &self, + _fbb: &mut flatbuffers::FlatBufferBuilder<'b, A>, + ) -> flatbuffers::WIPOffset> { + let data = Some({ + let x = &self.data; + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + StringVector::create(_fbb, &StringVectorArgs { data }) + } + } pub enum CosmeticFiltersOffset {} #[derive(Copy, Clone, PartialEq)] @@ -1096,7 +1254,7 @@ pub mod fb { }; let complex_class_rules_values = { let x = self.complex_class_rules_values(); - x.iter().map(|s| s.to_string()).collect() + x.iter().map(|t| t.unpack()).collect() }; let complex_id_rules_index = { let x = self.complex_id_rules_index(); @@ -1104,7 +1262,7 @@ pub mod fb { }; let complex_id_rules_values = { let x = self.complex_id_rules_values(); - x.iter().map(|s| s.to_string()).collect() + x.iter().map(|t| t.unpack()).collect() }; let hostname_hide_index = { let x = self.hostname_hide_index(); @@ -1220,14 +1378,14 @@ pub mod fb { #[inline] pub fn complex_class_rules_values( &self, - ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>> { // Safety: // Created from valid Table for this object // which contains a valid value in this slot unsafe { self._tab .get::>, + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>, >>(CosmeticFilters::VT_COMPLEX_CLASS_RULES_VALUES, None) .unwrap() } @@ -1253,14 +1411,14 @@ pub mod fb { #[inline] pub fn complex_id_rules_values( &self, - ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>> { // Safety: // Created from valid Table for this object // which contains a valid value in this slot unsafe { self._tab .get::>, + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>, >>(CosmeticFilters::VT_COMPLEX_ID_RULES_VALUES, None) .unwrap() } @@ -1397,7 +1555,7 @@ pub mod fb { true, )? .visit_field::>, + flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset>, >>( "complex_class_rules_values", Self::VT_COMPLEX_CLASS_RULES_VALUES, @@ -1411,7 +1569,7 @@ pub mod fb { true, )? .visit_field::>, + flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset>, >>( "complex_id_rules_values", Self::VT_COMPLEX_ID_RULES_VALUES, @@ -1463,13 +1621,17 @@ pub mod fb { flatbuffers::WIPOffset>>, >, pub complex_class_rules_values: Option< - flatbuffers::WIPOffset>>, + flatbuffers::WIPOffset< + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>>, + >, >, pub complex_id_rules_index: Option< flatbuffers::WIPOffset>>, >, pub complex_id_rules_values: Option< - flatbuffers::WIPOffset>>, + flatbuffers::WIPOffset< + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>>, + >, >, pub hostname_hide_index: Option>>, pub hostname_hide_values: Option< @@ -1565,7 +1727,7 @@ pub mod fb { pub fn add_complex_class_rules_values( &mut self, complex_class_rules_values: flatbuffers::WIPOffset< - flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset>>, >, ) { self.fbb_.push_slot_always::>( @@ -1589,7 +1751,7 @@ pub mod fb { pub fn add_complex_id_rules_values( &mut self, complex_id_rules_values: flatbuffers::WIPOffset< - flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset>>, >, ) { self.fbb_.push_slot_always::>( @@ -1774,9 +1936,9 @@ pub mod fb { pub simple_id_rules: Vec, pub misc_generic_selectors: Vec, pub complex_class_rules_index: Vec, - pub complex_class_rules_values: Vec, + pub complex_class_rules_values: Vec, pub complex_id_rules_index: Vec, - pub complex_id_rules_values: Vec, + pub complex_id_rules_values: Vec, pub hostname_hide_index: Vec, pub hostname_hide_values: Vec, pub hostname_inject_script_index: Vec, @@ -1830,7 +1992,7 @@ pub mod fb { }); let complex_class_rules_values = Some({ let x = &self.complex_class_rules_values; - let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + let w: Vec<_> = x.iter().map(|t| t.pack(_fbb)).collect(); _fbb.create_vector(&w) }); let complex_id_rules_index = Some({ @@ -1840,7 +2002,7 @@ pub mod fb { }); let complex_id_rules_values = Some({ let x = &self.complex_id_rules_values; - let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + let w: Vec<_> = x.iter().map(|t| t.pack(_fbb)).collect(); _fbb.create_vector(&w) }); let hostname_hide_index = Some({ diff --git a/tests/unit/engine.rs b/tests/unit/engine.rs index ea3883c2..1e2741c6 100644 --- a/tests/unit/engine.rs +++ b/tests/unit/engine.rs @@ -183,7 +183,7 @@ mod tests { fn deserialization_generate_simple() { let mut engine = Engine::from_rules(["ad-banner"], Default::default()); let data = engine.serialize().to_vec(); - const EXPECTED_HASH: u64 = 15201305923211912617; + const EXPECTED_HASH: u64 = 884296823183764168; assert_eq!(hash(&data), EXPECTED_HASH, "{}", HASH_MISMATCH_MSG); engine.deserialize(&data).unwrap(); } @@ -193,7 +193,7 @@ mod tests { let mut engine = Engine::from_rules(["ad-banner$tag=abc"], Default::default()); engine.use_tags(&["abc"]); let data = engine.serialize().to_vec(); - const EXPECTED_HASH: u64 = 5114301339390262037; + const EXPECTED_HASH: u64 = 7887643884738497753; assert_eq!(hash(&data), EXPECTED_HASH, "{}", HASH_MISMATCH_MSG); engine.deserialize(&data).unwrap(); } @@ -227,9 +227,9 @@ mod tests { } let expected_hash: u64 = if cfg!(feature = "css-validation") { - 2942520321544562177 + 16474517741373816646 } else { - 17713004238689548675 + 18073139195397769096 }; assert_eq!(hash(&data), expected_hash, "{}", HASH_MISMATCH_MSG); From 31ef2cace498430937ef62810b976228c163e084 Mon Sep 17 00:00:00 2001 From: Mikhail Atuchin Date: Wed, 15 Oct 2025 02:13:49 +0400 Subject: [PATCH 02/11] Fix comments --- src/flatbuffers/containers/fb_index.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/flatbuffers/containers/fb_index.rs b/src/flatbuffers/containers/fb_index.rs index 2524dfc4..29f03e9e 100644 --- a/src/flatbuffers/containers/fb_index.rs +++ b/src/flatbuffers/containers/fb_index.rs @@ -4,6 +4,7 @@ use flatbuffers::{Follow, Vector}; /// It has two implementations: /// 1. a faster &[I] for slices; /// 2. a slower for flatbuffers::Vector, that uses Follow() internally. +/// /// Note: it intentally returns values using a copy, because it's faster /// than by reference. pub(crate) trait FbIndex { From 28c32d8c77f265fb2b4fc86311df168be603334e Mon Sep 17 00:00:00 2001 From: Mikhail Atuchin Date: Wed, 15 Oct 2025 19:57:22 +0400 Subject: [PATCH 03/11] bucket to slot rename --- src/flatbuffers/containers/hash_index.rs | 62 ++++++++++++------------ 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/src/flatbuffers/containers/hash_index.rs b/src/flatbuffers/containers/hash_index.rs index f2bf61b9..c3f7d439 100644 --- a/src/flatbuffers/containers/hash_index.rs +++ b/src/flatbuffers/containers/hash_index.rs @@ -28,33 +28,33 @@ impl FbHashKey for &str { } #[inline(always)] -fn next_bucket(mut bucket: usize, capacity: usize, step: &mut usize) -> usize { - bucket += *step * *step; +fn next_slot(mut slot: usize, capacity: usize, step: &mut usize) -> usize { + slot += *step * *step; *step += 1; - bucket % capacity + slot % capacity } -fn find_matching_bucket>( +fn find_matching_slot>( indexes: &Keys, - mut bucket: usize, + mut slot: usize, key: I, capacity: usize, step: &mut usize, ) -> Option { - debug_assert!(bucket < capacity); + debug_assert!(slot < capacity); debug_assert!(*step > 0); debug_assert!(indexes.len() == capacity); loop { - let data = indexes.get(bucket); + let data = indexes.get(slot); if FbHashKey::is_empty(&data) { return None; } if data == key { - return Some(bucket); + return Some(slot); } - bucket = next_bucket(bucket, capacity, step); + slot = next_slot(slot, capacity, step); } } @@ -76,14 +76,14 @@ impl, Values: FbIndex> HashIndexView Option { - let bucket = self.find_single_bucket(key); - bucket.map(|idx| self.values.get(idx)) + let slot = self.find_single_slot(key); + slot.map(|idx| self.values.get(idx)) } - fn find_single_bucket(&self, key: I) -> Option { + fn find_single_slot(&self, key: I) -> Option { let capacity = self.indexes.len(); - let bucket = get_hash(&key) % capacity; - find_matching_bucket(&self.indexes, bucket, key, capacity, &mut 1) + let slot = get_hash(&key) % capacity; + find_matching_slot(&self.indexes, slot, key, capacity, &mut 1) } } @@ -127,27 +127,27 @@ impl HashIndexBuilder { let capacity = self.capacity(); assert!(capacity >= 4); - let mut bucket = target_hash % capacity; + let mut slot = target_hash % capacity; let mut step = 1; loop { - if HashKey::is_empty(&self.indexes[bucket]) { - // Found an empty bucket, take it and insert new key-value pair. - self.indexes[bucket] = key; - self.values[bucket] = value; + if HashKey::is_empty(&self.indexes[slot]) { + // Found an empty slot, take it and insert new key-value pair. + self.indexes[slot] = key; + self.values[slot] = value; self.size += 1; self.maybe_increase_capacity(allow_duplicates); - return (bucket, &mut self.values[bucket]); + return (slot, &mut self.values[slot]); } - if self.indexes[bucket] == key && !allow_duplicates { + if self.indexes[slot] == key && !allow_duplicates { // Update the value for an existing key. - self.values[bucket] = value; - return (bucket, &mut self.values[bucket]); + self.values[slot] = value; + return (slot, &mut self.values[slot]); } - bucket = next_bucket(bucket, capacity, &mut step); + slot = next_slot(slot, capacity, &mut step); } } @@ -155,27 +155,27 @@ impl HashIndexBuilder { self.indexes.len() } - pub fn find_single_bucket(&mut self, key: &I) -> Option { + pub fn find_single_slot(&mut self, key: &I) -> Option { let capacity = self.indexes.len(); - let mut bucket = get_hash(key) % capacity; + let mut slot = get_hash(key) % capacity; let mut step = 1; loop { - let data = &self.indexes[bucket]; + let data = &self.indexes[slot]; if HashKey::is_empty(data) { return None; } if data == key { - return Some(bucket); + return Some(slot); } - bucket = next_bucket(bucket, capacity, &mut step); + slot = next_slot(slot, capacity, &mut step); } } pub fn get_or_insert(&mut self, key: I, value: V) -> &mut V { - if let Some(existing_bucket) = self.find_single_bucket(&key) { - return &mut self.values[existing_bucket]; + if let Some(existing_slot) = self.find_single_slot(&key) { + return &mut self.values[existing_slot]; } let (_, new_value) = self.insert(key, value, false); new_value From 3eca26207cd187f647b232790af9dad00314a43b Mon Sep 17 00:00:00 2001 From: Mikhail Atuchin Date: Wed, 15 Oct 2025 21:25:05 +0400 Subject: [PATCH 04/11] Cleaup hash_index.rs --- src/flatbuffers/containers/hash_index.rs | 153 +++++++++-------------- tests/unit/engine.rs | 4 +- 2 files changed, 64 insertions(+), 93 deletions(-) diff --git a/src/flatbuffers/containers/hash_index.rs b/src/flatbuffers/containers/hash_index.rs index c3f7d439..4c1fe298 100644 --- a/src/flatbuffers/containers/hash_index.rs +++ b/src/flatbuffers/containers/hash_index.rs @@ -3,6 +3,7 @@ /// The load factor is 25%-50%. /// Uses RustC FxHasher as a hash function. /// A default value is used to mark empty slots, so it can't be used as a key. +/// Inspired by https://source.chromium.org/chromium/chromium/src/+/main:components/url_pattern_index/closed_hash_map.h use std::marker::PhantomData; use crate::flatbuffers::containers::fb_index::FbIndex; @@ -27,34 +28,21 @@ impl FbHashKey for &str { } } -#[inline(always)] -fn next_slot(mut slot: usize, capacity: usize, step: &mut usize) -> usize { - slot += *step * *step; - *step += 1; - slot % capacity -} - -fn find_matching_slot>( - indexes: &Keys, - mut slot: usize, - key: I, - capacity: usize, - step: &mut usize, -) -> Option { - debug_assert!(slot < capacity); - debug_assert!(*step > 0); - debug_assert!(indexes.len() == capacity); +pub fn find_slot( + key: &I, + table_size: usize, + probe: impl Fn(usize) -> bool, +) -> usize { + debug_assert!(table_size.is_power_of_two()); + let table_mask = table_size - 1; + let mut slot = get_hash(&key) & table_mask; + let mut step = 1; loop { - let data = indexes.get(slot); - if FbHashKey::is_empty(&data) { - return None; - } - - if data == key { - return Some(slot); + if probe(slot) { + return slot; } - - slot = next_slot(slot, capacity, step); + slot = (slot + step) & table_mask; + step += 1; } } @@ -75,15 +63,19 @@ impl, Values: FbIndex> HashIndexView Option { - let slot = self.find_single_slot(key); - slot.map(|idx| self.values.get(idx)) + fn capacity(&self) -> usize { + self.indexes.len() } - fn find_single_slot(&self, key: I) -> Option { - let capacity = self.indexes.len(); - let slot = get_hash(&key) % capacity; - find_matching_slot(&self.indexes, slot, key, capacity, &mut 1) + pub fn get_single(&self, key: I) -> Option { + let slot = find_slot(&key, self.capacity(), |slot| -> bool { + FbHashKey::is_empty(&self.indexes.get(slot)) || self.indexes.get(slot) == key + }); + if FbHashKey::is_empty(&self.indexes.get(slot)) { + None + } else { + Some(self.values.get(slot)) + } } } @@ -123,31 +115,21 @@ impl HashIndexBuilder { pub fn insert(&mut self, key: I, value: V, allow_duplicates: bool) -> (usize, &mut V) { debug_assert!(!HashKey::is_empty(&key), "Key is empty"); - let target_hash = get_hash(&key); - - let capacity = self.capacity(); - assert!(capacity >= 4); - let mut slot = target_hash % capacity; - - let mut step = 1; - - loop { - if HashKey::is_empty(&self.indexes[slot]) { - // Found an empty slot, take it and insert new key-value pair. - self.indexes[slot] = key; - self.values[slot] = value; - self.size += 1; - self.maybe_increase_capacity(allow_duplicates); - return (slot, &mut self.values[slot]); - } - if self.indexes[slot] == key && !allow_duplicates { - // Update the value for an existing key. - self.values[slot] = value; - return (slot, &mut self.values[slot]); - } - - slot = next_slot(slot, capacity, &mut step); + let slot = find_slot(&key, self.capacity(), |slot| -> bool { + HashKey::is_empty(&self.indexes[slot]) + || (self.indexes[slot] == key && !allow_duplicates) + }); + + if HashKey::is_empty(&self.indexes[slot]) { + self.indexes[slot] = key; + self.values[slot] = value; + self.size += 1; + self.maybe_increase_capacity(); + (slot, &mut self.values[slot]) + } else { + self.values[slot] = value; + (slot, &mut self.values[slot]) } } @@ -155,46 +137,35 @@ impl HashIndexBuilder { self.indexes.len() } - pub fn find_single_slot(&mut self, key: &I) -> Option { - let capacity = self.indexes.len(); - let mut slot = get_hash(key) % capacity; - let mut step = 1; - loop { - let data = &self.indexes[slot]; - if HashKey::is_empty(data) { - return None; - } - - if data == key { - return Some(slot); - } - - slot = next_slot(slot, capacity, &mut step); - } - } - pub fn get_or_insert(&mut self, key: I, value: V) -> &mut V { - if let Some(existing_slot) = self.find_single_slot(&key) { - return &mut self.values[existing_slot]; + let slot = find_slot(&key, self.capacity(), |slot| -> bool { + HashKey::is_empty(&self.indexes[slot]) || self.indexes[slot] == key + }); + if !HashKey::is_empty(&self.indexes[slot]) { + return &mut self.values[slot]; } let (_, new_value) = self.insert(key, value, false); new_value } - fn maybe_increase_capacity(&mut self, allow_duplicates: bool) { - // Use 50% load factor. - if self.size * 2 > self.capacity() { - self.size = 0; - let new_capacity = self.capacity() * 2; - let old_indexes = std::mem::take(&mut self.indexes); - let old_values = std::mem::take(&mut self.values); - self.indexes = vec![I::default(); new_capacity]; - self.values = vec![V::default(); new_capacity]; - - for (key, value) in old_indexes.into_iter().zip(old_values.into_iter()) { - if !HashKey::is_empty(&key) { - self.insert(key, value, allow_duplicates); - } + fn maybe_increase_capacity(&mut self) { + if self.size * 2 <= self.capacity() { // Use 50% load factor. + return; + } + + let new_capacity = (self.capacity() * 2).next_power_of_two(); + let old_indexes = std::mem::take(&mut self.indexes); + let old_values = std::mem::take(&mut self.values); + self.indexes = vec![I::default(); new_capacity]; + self.values = vec![V::default(); new_capacity]; + + for (key, value) in old_indexes.into_iter().zip(old_values.into_iter()) { + if !HashKey::is_empty(&key) { + let slot = find_slot(&key, new_capacity, |slot| -> bool { + HashKey::is_empty(&self.indexes[slot]) + }); + self.indexes[slot] = key; + self.values[slot] = value; } } } diff --git a/tests/unit/engine.rs b/tests/unit/engine.rs index 1e2741c6..d3f74cb1 100644 --- a/tests/unit/engine.rs +++ b/tests/unit/engine.rs @@ -227,9 +227,9 @@ mod tests { } let expected_hash: u64 = if cfg!(feature = "css-validation") { - 16474517741373816646 + 15959922653220214643 } else { - 18073139195397769096 + 16953879754096715156 }; assert_eq!(hash(&data), expected_hash, "{}", HASH_MISMATCH_MSG); From 88585bdda7af7069d5eba05a75ef5ba438b0b9a9 Mon Sep 17 00:00:00 2001 From: Mikhail Atuchin Date: Wed, 15 Oct 2025 23:55:00 +0400 Subject: [PATCH 05/11] Add unit tests --- src/data_format/mod.rs | 6 +- src/flatbuffers/containers/hash_index.rs | 52 +++++++++--- src/flatbuffers/containers/hash_map.rs | 14 ++++ src/flatbuffers/containers/hash_set.rs | 14 ++++ tests/unit/flatbuffers/containers/hash_map.rs | 82 +++++++++++++++++++ tests/unit/flatbuffers/containers/hash_set.rs | 76 +++++++++++++++++ 6 files changed, 228 insertions(+), 16 deletions(-) create mode 100644 tests/unit/flatbuffers/containers/hash_map.rs create mode 100644 tests/unit/flatbuffers/containers/hash_set.rs diff --git a/src/data_format/mod.rs b/src/data_format/mod.rs index 98b980e0..d62dca13 100644 --- a/src/data_format/mod.rs +++ b/src/data_format/mod.rs @@ -17,7 +17,7 @@ const ADBLOCK_RUST_DAT_MAGIC: [u8; 4] = [0xd1, 0xd9, 0x3a, 0xaf]; /// The version of the data format. /// If the data format version is incremented, the data is considered as incompatible. -const ADBLOCK_FLATBUFFER_VERSION: u8 = 2; +const ADBLOCK_RUST_DAT_VERSION: u8 = 2; /// The total length of the header prefix (magic + version + seahash) const HEADER_PREFIX_LENGTH: usize = 4 + 1 + 8; @@ -35,7 +35,7 @@ pub(crate) fn serialize_dat_file(data: &[u8]) -> Vec { let mut serialized = Vec::with_capacity(data.len() + HEADER_PREFIX_LENGTH); let hash = seahash::hash(data).to_le_bytes(); serialized.extend_from_slice(&ADBLOCK_RUST_DAT_MAGIC); - serialized.push(ADBLOCK_FLATBUFFER_VERSION); + serialized.push(ADBLOCK_RUST_DAT_VERSION); serialized.extend_from_slice(&hash); assert_eq!(serialized.len(), HEADER_PREFIX_LENGTH); @@ -49,7 +49,7 @@ pub(crate) fn deserialize_dat_file(serialized: &[u8]) -> Result<&[u8], Deseriali } let version = serialized[ADBLOCK_RUST_DAT_MAGIC.len()]; - if version != ADBLOCK_FLATBUFFER_VERSION { + if version != ADBLOCK_RUST_DAT_VERSION { return Err(DeserializationError::VersionMismatch(version)); } let data = &serialized[HEADER_PREFIX_LENGTH..]; diff --git a/src/flatbuffers/containers/hash_index.rs b/src/flatbuffers/containers/hash_index.rs index 4c1fe298..b3779e16 100644 --- a/src/flatbuffers/containers/hash_index.rs +++ b/src/flatbuffers/containers/hash_index.rs @@ -63,7 +63,7 @@ impl, Values: FbIndex> HashIndexView usize { + pub fn capacity(&self) -> usize { self.indexes.len() } @@ -77,6 +77,17 @@ impl, Values: FbIndex> HashIndexView usize { + let mut len = 0; + for i in 0..self.capacity() { + if !FbHashKey::is_empty(&self.indexes.get(i)) { + len += 1; + } + } + len + } } pub(crate) struct HashIndexBuilder { @@ -102,15 +113,11 @@ impl Default for HashIndexBuilder { impl HashIndexBuilder { pub fn new_with_capacity(capacity: usize) -> Self { - debug_assert!(capacity >= 4); - let self_ = Self { + Self { size: 0, indexes: vec![I::default(); capacity], values: vec![V::default(); capacity], - }; - debug_assert_eq!(self_.indexes.len(), capacity); - debug_assert_eq!(self_.capacity(), capacity); - self_ + } } pub fn insert(&mut self, key: I, value: V, allow_duplicates: bool) -> (usize, &mut V) { @@ -149,7 +156,8 @@ impl HashIndexBuilder { } fn maybe_increase_capacity(&mut self) { - if self.size * 2 <= self.capacity() { // Use 50% load factor. + if self.size * 2 <= self.capacity() { + // Use 50% load factor. return; } @@ -161,11 +169,11 @@ impl HashIndexBuilder { for (key, value) in old_indexes.into_iter().zip(old_values.into_iter()) { if !HashKey::is_empty(&key) { - let slot = find_slot(&key, new_capacity, |slot| -> bool { - HashKey::is_empty(&self.indexes[slot]) - }); - self.indexes[slot] = key; - self.values[slot] = value; + let slot = find_slot(&key, new_capacity, |slot| -> bool { + HashKey::is_empty(&self.indexes[slot]) + }); + self.indexes[slot] = key; + self.values[slot] = value; } } } @@ -174,3 +182,21 @@ impl HashIndexBuilder { (value.indexes, value.values) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_get_hash() { + // Verify get_hash is stable. + // If the value changes, update ADBLOCK_RUST_DAT_VERSION. + let message = "If the value changes, update ADBLOCK_RUST_DAT_VERSION."; + assert_eq!( + get_hash(&"adblock-rust"), + 15102204115509201409, + "{}", + message + ); + } +} diff --git a/src/flatbuffers/containers/hash_map.rs b/src/flatbuffers/containers/hash_map.rs index 1bd39e05..865e5b8f 100644 --- a/src/flatbuffers/containers/hash_map.rs +++ b/src/flatbuffers/containers/hash_map.rs @@ -78,6 +78,16 @@ where pub fn get(&self, key: I) -> Option { self.view.get_single(key) } + + #[cfg(test)] + pub fn capacity(&self) -> usize { + self.view.capacity() + } + + #[cfg(test)] + pub fn len(&self) -> usize { + self.view.len() + } } pub type HashMapStringView<'a, V> = HashMapView< @@ -86,3 +96,7 @@ pub type HashMapStringView<'a, V> = HashMapView< flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>>, flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<>::Inner>>, >; + +#[cfg(test)] +#[path = "../../../tests/unit/flatbuffers/containers/hash_map.rs"] +mod unit_tests; diff --git a/src/flatbuffers/containers/hash_set.rs b/src/flatbuffers/containers/hash_set.rs index 7c567e6a..ffe77963 100644 --- a/src/flatbuffers/containers/hash_set.rs +++ b/src/flatbuffers/containers/hash_set.rs @@ -54,4 +54,18 @@ impl> HashSetView { pub fn contains(&self, key: I) -> bool { self.view.get_single(key).is_some() } + + #[cfg(test)] + pub fn len(&self) -> usize { + self.view.len() + } + + #[cfg(test)] + pub fn capacity(&self) -> usize { + self.view.capacity() + } } + +#[cfg(test)] +#[path = "../../../tests/unit/flatbuffers/containers/hash_set.rs"] +mod unit_tests; diff --git a/tests/unit/flatbuffers/containers/hash_map.rs b/tests/unit/flatbuffers/containers/hash_map.rs new file mode 100644 index 00000000..a6265878 --- /dev/null +++ b/tests/unit/flatbuffers/containers/hash_map.rs @@ -0,0 +1,82 @@ +#[allow(unknown_lints)] +#[allow( + dead_code, + clippy::all, + unused_imports, + unsafe_code, + mismatched_lifetime_syntaxes +)] +#[path = "./test_containers_generated.rs"] +pub mod flat; +#[cfg(test)] +mod tests { + use super::super::*; + use super::flat::fb_test; + + fn serialize_map(values: Vec<(&str, &str)>) -> Vec { + let mut builder = flatbuffers::FlatBufferBuilder::new(); + let mut map = HashMapBuilder::default(); + for (key, value) in values { + map.insert(key.to_string(), value.to_string()); + } + let map = HashMapBuilder::finish(map, &mut builder); + let map_serialized = fb_test::TestStringMap::create( + &mut builder, + &fb_test::TestStringMapArgs { + keys: Some(map.keys), + values: Some(map.values), + }, + ); + let root = fb_test::TestRoot::create( + &mut builder, + &fb_test::TestRootArgs { + test_string_map: Some(map_serialized), + ..Default::default() + }, + ); + builder.finish(root, None); + builder.finished_data().to_vec() + } + + fn load_map<'a>(data: &'a [u8]) -> HashMapStringView<'a, &'a str> { + let root = fb_test::root_as_test_root(data).unwrap(); + let flat_map = root.test_string_map().unwrap(); + HashMapView::new(flat_map.keys(), flat_map.values()) + } + + #[test] + fn test_empty_map() { + let values = vec![]; + let data = serialize_map(values); + let map = load_map(&data); + assert_eq!(map.len(), 0); + assert_eq!(map.capacity(), 4); + assert!(map.get("a").is_none()); + } + + #[test] + fn test_duplicate_keys() { + let values = vec![("b", "20"), ("a", "10"), ("b", "30")]; + let data = serialize_map(values); + let map = load_map(&data); + assert_eq!(map.len(), 2); + assert_eq!(map.capacity(), 4); + assert_eq!(map.get("a").unwrap(), "10"); + assert_eq!(map.get("b").unwrap(), "30"); + } + + // TODO: test get_or_insert + + #[test] + fn test_string_builder() { + let values = vec![("b", "20"), ("a", "10"), ("c", "30")]; + let data = serialize_map(values); + let map = load_map(&data); + + assert_eq!(map.get("a").unwrap(), "10"); + assert_eq!(map.get("b").unwrap(), "20"); + assert_eq!(map.get("c").unwrap(), "30"); + assert!(map.get("d").is_none()); + assert!(map.get("").is_none()); + } +} diff --git a/tests/unit/flatbuffers/containers/hash_set.rs b/tests/unit/flatbuffers/containers/hash_set.rs new file mode 100644 index 00000000..47b8fce2 --- /dev/null +++ b/tests/unit/flatbuffers/containers/hash_set.rs @@ -0,0 +1,76 @@ +#[allow(unknown_lints)] +#[allow( + dead_code, + clippy::all, + unused_imports, + unsafe_code, + mismatched_lifetime_syntaxes +)] +#[path = "./test_containers_generated.rs"] +pub mod flat; +#[cfg(test)] +mod tests { + use super::super::*; + use super::flat::fb_test; + + fn serialize_set(values: Vec<&str>) -> Vec { + let mut builder = flatbuffers::FlatBufferBuilder::new(); + let mut set = HashSetBuilder::default(); + for value in values { + set.insert(value.to_string()); + } + let test_string_set = Some(FlatSerialize::serialize(set, &mut builder)); + + let root = fb_test::TestRoot::create( + &mut builder, + &fb_test::TestRootArgs { + test_string_set, + ..Default::default() + }, + ); + builder.finish(root, None); + builder.finished_data().to_vec() + } + + fn load_set<'a>( + data: &'a [u8], + ) -> HashSetView<&'a str, flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>>> { + let root = fb_test::root_as_test_root(data).unwrap(); + let flat_set = root.test_string_set().unwrap(); + HashSetView::new(flat_set) + } + + #[test] + fn test_empty_map() { + let values = vec![]; + let data = serialize_set(values); + let set = load_set(&data); + assert_eq!(set.len(), 0); + assert_eq!(set.capacity(), 4); + assert!(!set.contains("a")); + } + + #[test] + fn test_duplicate_keys() { + let values = vec!["b", "a", "b"]; + let data = serialize_set(values); + let set = load_set(&data); + assert_eq!(set.len(), 2); + assert_eq!(set.capacity(), 4); + assert!(set.contains("a")); + assert!(set.contains("b")); + } + + #[test] + fn test_string_builder() { + let values = vec!["b", "a", "c"]; + let data = serialize_set(values); + let set = load_set(&data); + + assert!(set.contains("a")); + assert!(set.contains("b")); + assert!(set.contains("c")); + assert!(!set.contains("d")); + assert!(!set.contains("")); + } +} From f405b34546ef3dbf1e9a3b275d8f9507c6b9e85c Mon Sep 17 00:00:00 2001 From: Mikhail Atuchin Date: Thu, 16 Oct 2025 00:05:20 +0400 Subject: [PATCH 06/11] Fix copilot notes --- src/flatbuffers/containers/fb_index.rs | 2 +- src/flatbuffers/containers/flat_serialize.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/flatbuffers/containers/fb_index.rs b/src/flatbuffers/containers/fb_index.rs index 29f03e9e..dc0f0ffb 100644 --- a/src/flatbuffers/containers/fb_index.rs +++ b/src/flatbuffers/containers/fb_index.rs @@ -5,7 +5,7 @@ use flatbuffers::{Follow, Vector}; /// 1. a faster &[I] for slices; /// 2. a slower for flatbuffers::Vector, that uses Follow() internally. /// -/// Note: it intentally returns values using a copy, because it's faster +/// Note: it intentionally returns values using a copy, because it's faster /// than by reference. pub(crate) trait FbIndex { fn len(&self) -> usize; diff --git a/src/flatbuffers/containers/flat_serialize.rs b/src/flatbuffers/containers/flat_serialize.rs index dbf1e3da..a341d7a0 100644 --- a/src/flatbuffers/containers/flat_serialize.rs +++ b/src/flatbuffers/containers/flat_serialize.rs @@ -28,9 +28,9 @@ pub trait FlatSerialize<'b, B: FlatBuilder<'b>>: Sized { impl<'b> FlatBuilder<'b> for flatbuffers::FlatBufferBuilder<'b> { fn create_string(&mut self, s: &str) -> WIPOffset<&'b str> { if s.is_empty() { - self.create_shared_string(s) + flatbuffers::FlatBufferBuilder::create_shared_string(self, s) } else { - self.create_string(s) + flatbuffers::FlatBufferBuilder::create_string(self, s) } } From 2e8637d333a098103c4ba048ea57b134b078692e Mon Sep 17 00:00:00 2001 From: Mikhail Atuchin Date: Thu, 16 Oct 2025 00:30:17 +0400 Subject: [PATCH 07/11] Fix format --- src/cosmetic_filter_cache.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/cosmetic_filter_cache.rs b/src/cosmetic_filter_cache.rs index 034eef0d..546666d7 100644 --- a/src/cosmetic_filter_cache.rs +++ b/src/cosmetic_filter_cache.rs @@ -187,11 +187,9 @@ impl CosmeticFilterCache { selectors.push(format!(".{}", class)); } if let Some(values) = complex_class_rules.get(class) { - { - for sel in values.data() { - if !exceptions.contains(sel) { - selectors.push(sel.to_string()); - } + for sel in values.data() { + if !exceptions.contains(sel) { + selectors.push(sel.to_string()); } } } @@ -202,11 +200,9 @@ impl CosmeticFilterCache { selectors.push(format!("#{}", id)); } if let Some(values) = complex_id_rules.get(id) { - { - for sel in values.data() { - if !exceptions.contains(sel) { - selectors.push(sel.to_string()); - } + for sel in values.data() { + if !exceptions.contains(sel) { + selectors.push(sel.to_string()); } } } From df21416fedcfeeb9aa723b69aa6d9c1fc5037a08 Mon Sep 17 00:00:00 2001 From: Mikhail Atuchin Date: Thu, 16 Oct 2025 12:00:47 +0400 Subject: [PATCH 08/11] Review from @anton-edm --- src/cosmetic_filter_cache_builder.rs | 10 ++++------ src/flatbuffers/containers/fb_index.rs | 4 ++++ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/cosmetic_filter_cache_builder.rs b/src/cosmetic_filter_cache_builder.rs index 59294964..9e0c3e4e 100644 --- a/src/cosmetic_filter_cache_builder.rs +++ b/src/cosmetic_filter_cache_builder.rs @@ -59,9 +59,7 @@ impl<'a, B: FlatBuilder<'a>> FlatSerialize<'a, B> for HostnameRule { } #[derive(Default, Clone)] -struct StringVector { - data: Vec, -} +struct StringVector(Vec); #[derive(Default)] pub(crate) struct CosmeticFilterCacheBuilder { @@ -120,7 +118,7 @@ impl CosmeticFilterCacheBuilder { let selectors = self .complex_class_rules .get_or_insert(class, StringVector::default()); - selectors.data.push(selector); + selectors.0.push(selector); } } } else if selector.starts_with('#') { @@ -133,7 +131,7 @@ impl CosmeticFilterCacheBuilder { let selectors = self .complex_id_rules .get_or_insert(id, StringVector::default()); - selectors.data.push(selector); + selectors.0.push(selector); } } } else { @@ -221,7 +219,7 @@ impl<'a, B: FlatBuilder<'a>> FlatSerialize<'a, B> for StringVector { type Output = WIPOffset>; fn serialize(value: Self, builder: &mut B) -> WIPOffset> { - let v = FlatSerialize::serialize(value.data, builder); + let v = FlatSerialize::serialize(value.0, builder); fb::StringVector::create( builder.raw_builder(), &fb::StringVectorArgs { data: Some(v) }, diff --git a/src/flatbuffers/containers/fb_index.rs b/src/flatbuffers/containers/fb_index.rs index dc0f0ffb..2398a6f1 100644 --- a/src/flatbuffers/containers/fb_index.rs +++ b/src/flatbuffers/containers/fb_index.rs @@ -8,7 +8,11 @@ use flatbuffers::{Follow, Vector}; /// Note: it intentionally returns values using a copy, because it's faster /// than by reference. pub(crate) trait FbIndex { + /// Returns the number of elements. fn len(&self) -> usize; + + /// Returns a copy of the value at the given index. + /// 'index' must be in range [0, len()), otherwise panics. fn get(&self, index: usize) -> I; } From ccd4376ac19a5272431727cc298cb348dbe34c30 Mon Sep 17 00:00:00 2001 From: Mikhail Atuchin Date: Thu, 16 Oct 2025 23:03:00 +0400 Subject: [PATCH 09/11] Improve the comments --- Cargo.toml | 1 + src/flatbuffers/containers/hash_index.rs | 33 +++++++++++++++++++----- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index dfc99cf6..96820f9f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,6 +34,7 @@ idna = "1.0.3" serde = { workspace = true } serde_json = { workspace = true } seahash = "4.1.0" +# rustc-hash v1.1.0 provides a better performance than 2.x, chromium pins the same version. rustc-hash = { version = "1.1.0", default-features = false } memchr = "2.4" base64 = "0.22" diff --git a/src/flatbuffers/containers/hash_index.rs b/src/flatbuffers/containers/hash_index.rs index b3779e16..9c967e6c 100644 --- a/src/flatbuffers/containers/hash_index.rs +++ b/src/flatbuffers/containers/hash_index.rs @@ -8,26 +8,36 @@ use std::marker::PhantomData; use crate::flatbuffers::containers::fb_index::FbIndex; +/// A trait for hash table builder keys, i.e. String. +/// The default value is used to mark empty slots. pub(crate) trait HashKey: Eq + std::hash::Hash + Default + Clone { + /// Returns true if the key is empty. fn is_empty(&self) -> bool; } -pub(crate) trait FbHashKey: Eq + std::hash::Hash { - fn is_empty(&self) -> bool; -} - -impl HashKey for String { +impl HashKey for T { fn is_empty(&self) -> bool { - self.is_empty() + self == &T::default() } } +/// A trait for hash table view keys that can be used in flatbuffers, i.e. &str. +/// The implementation must synchronized with matching HashKey trait. +pub(crate) trait FbHashKey: Eq + std::hash::Hash { + /// Returns true if the key is empty. + fn is_empty(&self) -> bool; +} + impl FbHashKey for &str { fn is_empty(&self) -> bool { str::is_empty(self) } } +/// An internal function to find a slot in the hash table for the given key. +/// Returns the slot index. +/// 'table_size' is the table size. It must be a power of two. +/// 'probe' must return true at least for one slot (supposing the table isn't full). pub fn find_slot( key: &I, table_size: usize, @@ -46,6 +56,9 @@ pub fn find_slot( } } +/// A flatbuffer-compatible view of a hash table. +/// It's used to access the hash table without copying the keys and values. +/// Is loaded from HashIndexBuilder data, serialized into a flatbuffer. pub(crate) struct HashIndexView, Values: FbIndex> { indexes: Keys, values: Values, @@ -79,6 +92,8 @@ impl, Values: FbIndex> HashIndexView usize { let mut len = 0; for i in 0..self.capacity() { @@ -90,12 +105,18 @@ impl, Values: FbIndex> HashIndexView { indexes: Vec, values: Vec, size: usize, } +/// An internal function to hash a key. +/// The hash must be persistent across different runs of the program. fn get_hash(key: &I) -> usize { // RustC Hash is 2x faster than DefaultHasher. use rustc_hash::FxHasher; From a567b9427c1d2673e9d50d91577a55c733c88da7 Mon Sep 17 00:00:00 2001 From: Mikhail Atuchin Date: Thu, 16 Oct 2025 23:12:17 +0400 Subject: [PATCH 10/11] Add a unit test --- tests/unit/flatbuffers/containers/hash_map.rs | 39 ++++++++++++++----- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/tests/unit/flatbuffers/containers/hash_map.rs b/tests/unit/flatbuffers/containers/hash_map.rs index a6265878..c38c012d 100644 --- a/tests/unit/flatbuffers/containers/hash_map.rs +++ b/tests/unit/flatbuffers/containers/hash_map.rs @@ -14,28 +14,32 @@ mod tests { use super::flat::fb_test; fn serialize_map(values: Vec<(&str, &str)>) -> Vec { - let mut builder = flatbuffers::FlatBufferBuilder::new(); - let mut map = HashMapBuilder::default(); + let mut builder = HashMapBuilder::default(); for (key, value) in values { - map.insert(key.to_string(), value.to_string()); + builder.insert(key.to_string(), value.to_string()); } - let map = HashMapBuilder::finish(map, &mut builder); + serialize_builder(builder) + } + + fn serialize_builder(builder: HashMapBuilder) -> Vec { + let mut flat_builder = flatbuffers::FlatBufferBuilder::new(); + let map = HashMapBuilder::finish(builder, &mut flat_builder); let map_serialized = fb_test::TestStringMap::create( - &mut builder, + &mut flat_builder, &fb_test::TestStringMapArgs { keys: Some(map.keys), values: Some(map.values), }, ); let root = fb_test::TestRoot::create( - &mut builder, + &mut flat_builder, &fb_test::TestRootArgs { test_string_map: Some(map_serialized), ..Default::default() }, ); - builder.finish(root, None); - builder.finished_data().to_vec() + flat_builder.finish(root, None); + flat_builder.finished_data().to_vec() } fn load_map<'a>(data: &'a [u8]) -> HashMapStringView<'a, &'a str> { @@ -65,7 +69,24 @@ mod tests { assert_eq!(map.get("b").unwrap(), "30"); } - // TODO: test get_or_insert + #[test] + fn test_builder_getters() { + let mut builder = HashMapBuilder::default(); + builder.insert("a".to_string(), "10".to_string()); + assert_eq!( + builder.get_or_insert("a".to_string(), "20".to_string()), + "10" + ); + assert_eq!( + builder.get_or_insert("b".to_string(), "20".to_string()), + "20" + ); + let data = serialize_builder(builder); + let map = load_map(&data); + assert_eq!(map.get("a").unwrap(), "10"); + assert_eq!(map.get("b").unwrap(), "20"); + assert!(map.get("c").is_none()); + } #[test] fn test_string_builder() { From 388c759fe920ff81de4eb8d866dd0b18bb7c84b4 Mon Sep 17 00:00:00 2001 From: Mikhail Atuchin Date: Thu, 16 Oct 2025 23:48:52 +0400 Subject: [PATCH 11/11] update memory expectations --- tests/unit/engine.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/engine.rs b/tests/unit/engine.rs index d3f74cb1..9c6fc1bb 100644 --- a/tests/unit/engine.rs +++ b/tests/unit/engine.rs @@ -221,7 +221,7 @@ mod tests { #[cfg(feature = "debug-info")] { let debug_info = engine.get_debug_info(); - let expected_size = 8_527_344_f32; + let expected_size = 8_963_552_f32; assert!(debug_info.flatbuffer_size >= (expected_size * 0.99) as usize); assert!(debug_info.flatbuffer_size <= (expected_size * 1.01) as usize); }