diff --git a/Cargo.lock b/Cargo.lock index 3d64b3eb..614389af 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -23,6 +23,7 @@ dependencies = [ "regex", "reqwest", "rmp-serde", + "rustc-hash 1.1.0", "seahash", "selectors", "serde", @@ -1311,7 +1312,7 @@ dependencies = [ "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash", + "rustc-hash 2.1.1", "rustls", "socket2", "thiserror 2.0.12", @@ -1331,7 +1332,7 @@ dependencies = [ "lru-slab", "rand 0.9.1", "ring", - "rustc-hash", + "rustc-hash 2.1.1", "rustls", "rustls-pki-types", "slab", @@ -1586,6 +1587,12 @@ version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustc-hash" version = "2.1.1" diff --git a/Cargo.toml b/Cargo.toml index 16f60c38..96820f9f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,6 +34,8 @@ idna = "1.0.3" serde = { workspace = true } serde_json = { workspace = true } seahash = "4.1.0" +# rustc-hash v1.1.0 provides a better performance than 2.x, chromium pins the same version. +rustc-hash = { version = "1.1.0", default-features = false } memchr = "2.4" base64 = "0.22" rmp-serde = "0.15" diff --git a/src/cosmetic_filter_cache.rs b/src/cosmetic_filter_cache.rs index ca39a8b1..546666d7 100644 --- a/src/cosmetic_filter_cache.rs +++ b/src/cosmetic_filter_cache.rs @@ -16,8 +16,9 @@ use crate::filters::cosmetic::{CosmeticFilterAction, CosmeticFilterOperator}; use crate::filters::filter_data_context::FilterDataContextRef; use crate::flatbuffers::containers::flat_map::FlatMapView; -use crate::flatbuffers::containers::flat_multimap::{FlatMapStringView, FlatMultiMapView}; -use crate::flatbuffers::containers::flat_set::FlatSetView; +use crate::flatbuffers::containers::flat_multimap::FlatMultiMapView; +use crate::flatbuffers::containers::hash_map::HashMapStringView; +use crate::flatbuffers::containers::hash_set::HashSetView; use crate::resources::{PermissionMask, ResourceStorage}; use crate::utils::Hash; @@ -169,13 +170,13 @@ impl CosmeticFilterCache { let mut selectors = vec![]; let cosmetic_filters = self.filter_data_context.memory.root().cosmetic_filters(); - let simple_class_rules = FlatSetView::new(cosmetic_filters.simple_class_rules()); - let simple_id_rules = FlatSetView::new(cosmetic_filters.simple_id_rules()); - let complex_class_rules = FlatMapStringView::new( + let simple_class_rules = HashSetView::new(cosmetic_filters.simple_class_rules()); + let simple_id_rules = HashSetView::new(cosmetic_filters.simple_id_rules()); + let complex_class_rules = HashMapStringView::new( cosmetic_filters.complex_class_rules_index(), cosmetic_filters.complex_class_rules_values(), ); - let complex_id_rules = FlatMapStringView::new( + let complex_id_rules = HashMapStringView::new( cosmetic_filters.complex_id_rules_index(), cosmetic_filters.complex_id_rules_values(), ); @@ -185,8 +186,8 @@ impl CosmeticFilterCache { if simple_class_rules.contains(class) && !exceptions.contains(&format!(".{}", class)) { selectors.push(format!(".{}", class)); } - if let Some(bucket) = complex_class_rules.get(class) { - for (_, sel) in bucket { + if let Some(values) = complex_class_rules.get(class) { + for sel in values.data() { if !exceptions.contains(sel) { selectors.push(sel.to_string()); } @@ -198,8 +199,8 @@ impl CosmeticFilterCache { if simple_id_rules.contains(id) && !exceptions.contains(&format!("#{}", id)) { selectors.push(format!("#{}", id)); } - if let Some(bucket) = complex_id_rules.get(id) { - for (_, sel) in bucket { + if let Some(values) = complex_id_rules.get(id) { + for sel in values.data() { if !exceptions.contains(sel) { selectors.push(sel.to_string()); } diff --git a/src/cosmetic_filter_cache_builder.rs b/src/cosmetic_filter_cache_builder.rs index 2673107f..9e0c3e4e 100644 --- a/src/cosmetic_filter_cache_builder.rs +++ b/src/cosmetic_filter_cache_builder.rs @@ -9,6 +9,8 @@ use crate::filters::cosmetic::{CosmeticFilter, CosmeticFilterMask, CosmeticFilte use crate::filters::flatbuffer_generated::fb; use crate::flatbuffers::containers::flat_map::FlatMapBuilder; use crate::flatbuffers::containers::flat_multimap::FlatMultiMapBuilder; +use crate::flatbuffers::containers::hash_map::HashMapBuilder; +use crate::flatbuffers::containers::hash_set::HashSetBuilder; use crate::flatbuffers::containers::flat_serialize::{ serialize_vec_opt, FlatBuilder, FlatSerialize, @@ -56,13 +58,16 @@ impl<'a, B: FlatBuilder<'a>> FlatSerialize<'a, B> for HostnameRule { } } +#[derive(Default, Clone)] +struct StringVector(Vec); + #[derive(Default)] pub(crate) struct CosmeticFilterCacheBuilder { - simple_class_rules: HashSet, - simple_id_rules: HashSet, + simple_class_rules: HashSetBuilder, + simple_id_rules: HashSetBuilder, misc_generic_selectors: HashSet, - complex_class_rules: FlatMultiMapBuilder, - complex_id_rules: FlatMultiMapBuilder, + complex_class_rules: HashMapBuilder, + complex_id_rules: HashMapBuilder, hostname_hide: FlatMultiMapBuilder, hostname_inject_script: FlatMultiMapBuilder, @@ -110,7 +115,10 @@ impl CosmeticFilterCacheBuilder { if key == selector { self.simple_class_rules.insert(class); } else { - self.complex_class_rules.insert(class, selector); + let selectors = self + .complex_class_rules + .get_or_insert(class, StringVector::default()); + selectors.0.push(selector); } } } else if selector.starts_with('#') { @@ -120,7 +128,10 @@ impl CosmeticFilterCacheBuilder { if key == selector { self.simple_id_rules.insert(id); } else { - self.complex_id_rules.insert(id, selector); + let selectors = self + .complex_id_rules + .get_or_insert(id, StringVector::default()); + selectors.0.push(selector); } } } else { @@ -204,11 +215,24 @@ impl CosmeticFilterCacheBuilder { } } +impl<'a, B: FlatBuilder<'a>> FlatSerialize<'a, B> for StringVector { + type Output = WIPOffset>; + + fn serialize(value: Self, builder: &mut B) -> WIPOffset> { + let v = FlatSerialize::serialize(value.0, builder); + fb::StringVector::create( + builder.raw_builder(), + &fb::StringVectorArgs { data: Some(v) }, + ) + } +} + impl<'a, B: FlatBuilder<'a>> FlatSerialize<'a, B> for CosmeticFilterCacheBuilder { type Output = WIPOffset>; + fn serialize(value: Self, builder: &mut B) -> WIPOffset> { - let complex_class_rules = FlatMultiMapBuilder::finish(value.complex_class_rules, builder); - let complex_id_rules = FlatMultiMapBuilder::finish(value.complex_id_rules, builder); + let complex_class_rules = HashMapBuilder::finish(value.complex_class_rules, builder); + let complex_id_rules = HashMapBuilder::finish(value.complex_id_rules, builder); // Handle top-level hostname hide and inject_script for better deduplication let hostname_hide = FlatMultiMapBuilder::finish(value.hostname_hide, builder); diff --git a/src/data_format/mod.rs b/src/data_format/mod.rs index 98b980e0..d62dca13 100644 --- a/src/data_format/mod.rs +++ b/src/data_format/mod.rs @@ -17,7 +17,7 @@ const ADBLOCK_RUST_DAT_MAGIC: [u8; 4] = [0xd1, 0xd9, 0x3a, 0xaf]; /// The version of the data format. /// If the data format version is incremented, the data is considered as incompatible. -const ADBLOCK_FLATBUFFER_VERSION: u8 = 2; +const ADBLOCK_RUST_DAT_VERSION: u8 = 2; /// The total length of the header prefix (magic + version + seahash) const HEADER_PREFIX_LENGTH: usize = 4 + 1 + 8; @@ -35,7 +35,7 @@ pub(crate) fn serialize_dat_file(data: &[u8]) -> Vec { let mut serialized = Vec::with_capacity(data.len() + HEADER_PREFIX_LENGTH); let hash = seahash::hash(data).to_le_bytes(); serialized.extend_from_slice(&ADBLOCK_RUST_DAT_MAGIC); - serialized.push(ADBLOCK_FLATBUFFER_VERSION); + serialized.push(ADBLOCK_RUST_DAT_VERSION); serialized.extend_from_slice(&hash); assert_eq!(serialized.len(), HEADER_PREFIX_LENGTH); @@ -49,7 +49,7 @@ pub(crate) fn deserialize_dat_file(serialized: &[u8]) -> Result<&[u8], Deseriali } let version = serialized[ADBLOCK_RUST_DAT_MAGIC.len()]; - if version != ADBLOCK_FLATBUFFER_VERSION { + if version != ADBLOCK_RUST_DAT_VERSION { return Err(DeserializationError::VersionMismatch(version)); } let data = &serialized[HEADER_PREFIX_LENGTH..]; diff --git a/src/flatbuffers/containers/fb_index.rs b/src/flatbuffers/containers/fb_index.rs new file mode 100644 index 00000000..2398a6f1 --- /dev/null +++ b/src/flatbuffers/containers/fb_index.rs @@ -0,0 +1,49 @@ +use flatbuffers::{Follow, Vector}; + +/// A trait to access indexed data in a flatbuffer. +/// It has two implementations: +/// 1. a faster &[I] for slices; +/// 2. a slower for flatbuffers::Vector, that uses Follow() internally. +/// +/// Note: it intentionally returns values using a copy, because it's faster +/// than by reference. +pub(crate) trait FbIndex { + /// Returns the number of elements. + fn len(&self) -> usize; + + /// Returns a copy of the value at the given index. + /// 'index' must be in range [0, len()), otherwise panics. + fn get(&self, index: usize) -> I; +} + +impl FbIndex for &[I] { + #[inline(always)] + fn len(&self) -> usize { + <[I]>::len(self) + } + + #[inline(always)] + fn get(&self, index: usize) -> I { + self[index] + } +} + +impl FbIndex<()> for () { + #[inline(always)] + fn len(&self) -> usize { + 0 + } + fn get(&self, _index: usize) {} +} + +impl<'a, T: Follow<'a>> FbIndex for Vector<'a, T> { + #[inline(always)] + fn len(&self) -> usize { + Vector::len(self) + } + + #[inline(always)] + fn get(&self, index: usize) -> T::Inner { + Vector::get(self, index) + } +} diff --git a/src/flatbuffers/containers/flat_multimap.rs b/src/flatbuffers/containers/flat_multimap.rs index 6ccb28a7..99b6255f 100644 --- a/src/flatbuffers/containers/flat_multimap.rs +++ b/src/flatbuffers/containers/flat_multimap.rs @@ -129,9 +129,6 @@ impl FlatMultiMapBuilder { } } -pub(crate) type FlatMapStringView<'a, V> = - FlatMultiMapView<'a, &'a str, V, Vector<'a, flatbuffers::ForwardsUOffset<&'a str>>>; - #[cfg(test)] #[path = "../../../tests/unit/flatbuffers/containers/flat_multimap.rs"] mod unit_tests; diff --git a/src/flatbuffers/containers/flat_serialize.rs b/src/flatbuffers/containers/flat_serialize.rs index 09ab1234..a341d7a0 100644 --- a/src/flatbuffers/containers/flat_serialize.rs +++ b/src/flatbuffers/containers/flat_serialize.rs @@ -27,8 +27,13 @@ pub trait FlatSerialize<'b, B: FlatBuilder<'b>>: Sized { impl<'b> FlatBuilder<'b> for flatbuffers::FlatBufferBuilder<'b> { fn create_string(&mut self, s: &str) -> WIPOffset<&'b str> { - self.create_string(s) + if s.is_empty() { + flatbuffers::FlatBufferBuilder::create_shared_string(self, s) + } else { + flatbuffers::FlatBufferBuilder::create_string(self, s) + } } + fn raw_builder(&mut self) -> &mut flatbuffers::FlatBufferBuilder<'b> { self } diff --git a/src/flatbuffers/containers/hash_index.rs b/src/flatbuffers/containers/hash_index.rs new file mode 100644 index 00000000..9c967e6c --- /dev/null +++ b/src/flatbuffers/containers/hash_index.rs @@ -0,0 +1,223 @@ +/// An inner implementation of a HashMap-like container with open addressing. +/// Designed to be used in HashMap, HashSet, HashMultiMap. +/// The load factor is 25%-50%. +/// Uses RustC FxHasher as a hash function. +/// A default value is used to mark empty slots, so it can't be used as a key. +/// Inspired by https://source.chromium.org/chromium/chromium/src/+/main:components/url_pattern_index/closed_hash_map.h +use std::marker::PhantomData; + +use crate::flatbuffers::containers::fb_index::FbIndex; + +/// A trait for hash table builder keys, i.e. String. +/// The default value is used to mark empty slots. +pub(crate) trait HashKey: Eq + std::hash::Hash + Default + Clone { + /// Returns true if the key is empty. + fn is_empty(&self) -> bool; +} + +impl HashKey for T { + fn is_empty(&self) -> bool { + self == &T::default() + } +} + +/// A trait for hash table view keys that can be used in flatbuffers, i.e. &str. +/// The implementation must synchronized with matching HashKey trait. +pub(crate) trait FbHashKey: Eq + std::hash::Hash { + /// Returns true if the key is empty. + fn is_empty(&self) -> bool; +} + +impl FbHashKey for &str { + fn is_empty(&self) -> bool { + str::is_empty(self) + } +} + +/// An internal function to find a slot in the hash table for the given key. +/// Returns the slot index. +/// 'table_size' is the table size. It must be a power of two. +/// 'probe' must return true at least for one slot (supposing the table isn't full). +pub fn find_slot( + key: &I, + table_size: usize, + probe: impl Fn(usize) -> bool, +) -> usize { + debug_assert!(table_size.is_power_of_two()); + let table_mask = table_size - 1; + let mut slot = get_hash(&key) & table_mask; + let mut step = 1; + loop { + if probe(slot) { + return slot; + } + slot = (slot + step) & table_mask; + step += 1; + } +} + +/// A flatbuffer-compatible view of a hash table. +/// It's used to access the hash table without copying the keys and values. +/// Is loaded from HashIndexBuilder data, serialized into a flatbuffer. +pub(crate) struct HashIndexView, Values: FbIndex> { + indexes: Keys, + values: Values, + _phantom_i: PhantomData, + _phantom_v: PhantomData, +} + +impl, Values: FbIndex> HashIndexView { + pub fn new(indexes: Keys, values: Values) -> Self { + Self { + indexes, + values, + _phantom_i: PhantomData, + _phantom_v: PhantomData, + } + } + + pub fn capacity(&self) -> usize { + self.indexes.len() + } + + pub fn get_single(&self, key: I) -> Option { + let slot = find_slot(&key, self.capacity(), |slot| -> bool { + FbHashKey::is_empty(&self.indexes.get(slot)) || self.indexes.get(slot) == key + }); + if FbHashKey::is_empty(&self.indexes.get(slot)) { + None + } else { + Some(self.values.get(slot)) + } + } + + #[cfg(test)] + /// Returns the number of non-empty slots in the hash table. + /// Slow, use only for tests. + pub fn len(&self) -> usize { + let mut len = 0; + for i in 0..self.capacity() { + if !FbHashKey::is_empty(&self.indexes.get(i)) { + len += 1; + } + } + len + } +} + +/// A builder for a hash table. +/// The default value is used to mark empty slots. +/// `consume()` output is suppose to be serialized into a flatbuffer and +/// used as a HashIndexView. +pub(crate) struct HashIndexBuilder { + indexes: Vec, + values: Vec, + size: usize, +} + +/// An internal function to hash a key. +/// The hash must be persistent across different runs of the program. +fn get_hash(key: &I) -> usize { + // RustC Hash is 2x faster than DefaultHasher. + use rustc_hash::FxHasher; + use std::hash::Hasher; + let mut hasher = FxHasher::default(); + key.hash(&mut hasher); + hasher.finish() as usize +} + +impl Default for HashIndexBuilder { + fn default() -> Self { + Self::new_with_capacity(4) + } +} + +impl HashIndexBuilder { + pub fn new_with_capacity(capacity: usize) -> Self { + Self { + size: 0, + indexes: vec![I::default(); capacity], + values: vec![V::default(); capacity], + } + } + + pub fn insert(&mut self, key: I, value: V, allow_duplicates: bool) -> (usize, &mut V) { + debug_assert!(!HashKey::is_empty(&key), "Key is empty"); + + let slot = find_slot(&key, self.capacity(), |slot| -> bool { + HashKey::is_empty(&self.indexes[slot]) + || (self.indexes[slot] == key && !allow_duplicates) + }); + + if HashKey::is_empty(&self.indexes[slot]) { + self.indexes[slot] = key; + self.values[slot] = value; + self.size += 1; + self.maybe_increase_capacity(); + (slot, &mut self.values[slot]) + } else { + self.values[slot] = value; + (slot, &mut self.values[slot]) + } + } + + fn capacity(&self) -> usize { + self.indexes.len() + } + + pub fn get_or_insert(&mut self, key: I, value: V) -> &mut V { + let slot = find_slot(&key, self.capacity(), |slot| -> bool { + HashKey::is_empty(&self.indexes[slot]) || self.indexes[slot] == key + }); + if !HashKey::is_empty(&self.indexes[slot]) { + return &mut self.values[slot]; + } + let (_, new_value) = self.insert(key, value, false); + new_value + } + + fn maybe_increase_capacity(&mut self) { + if self.size * 2 <= self.capacity() { + // Use 50% load factor. + return; + } + + let new_capacity = (self.capacity() * 2).next_power_of_two(); + let old_indexes = std::mem::take(&mut self.indexes); + let old_values = std::mem::take(&mut self.values); + self.indexes = vec![I::default(); new_capacity]; + self.values = vec![V::default(); new_capacity]; + + for (key, value) in old_indexes.into_iter().zip(old_values.into_iter()) { + if !HashKey::is_empty(&key) { + let slot = find_slot(&key, new_capacity, |slot| -> bool { + HashKey::is_empty(&self.indexes[slot]) + }); + self.indexes[slot] = key; + self.values[slot] = value; + } + } + } + + pub fn consume(value: Self) -> (Vec, Vec) { + (value.indexes, value.values) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_get_hash() { + // Verify get_hash is stable. + // If the value changes, update ADBLOCK_RUST_DAT_VERSION. + let message = "If the value changes, update ADBLOCK_RUST_DAT_VERSION."; + assert_eq!( + get_hash(&"adblock-rust"), + 15102204115509201409, + "{}", + message + ); + } +} diff --git a/src/flatbuffers/containers/hash_map.rs b/src/flatbuffers/containers/hash_map.rs new file mode 100644 index 00000000..865e5b8f --- /dev/null +++ b/src/flatbuffers/containers/hash_map.rs @@ -0,0 +1,102 @@ +/// A HashMap implementation backed by a HashIndex. +/// Uses more memory than FlatMap, but gives faster lookup. +use crate::flatbuffers::containers::{ + fb_index::FbIndex, + flat_serialize::{FlatBuilder, FlatMapBuilderOutput, FlatSerialize}, + hash_index::{FbHashKey, HashIndexBuilder, HashIndexView, HashKey}, +}; + +/// A builder for a HashMap that can be serialized into a flatbuffer. +/// A default key is used to mark empty slots, so (default_key, _) pair +/// can't be added. +#[derive(Default)] +pub(crate) struct HashMapBuilder { + builder: HashIndexBuilder, +} + +impl HashMapBuilder { + #[allow(unused)] + pub fn insert(&mut self, key: I, value: V) { + self.builder.insert(key, value, false /* allow_duplicate */); + } + + pub fn get_or_insert(&mut self, key: I, value: V) -> &mut V { + self.builder.get_or_insert(key, value) + } + + pub fn finish<'b, B: FlatBuilder<'b>>( + value: Self, + builder: &mut B, + ) -> FlatMapBuilderOutput<'b, I, V, B> + where + I: FlatSerialize<'b, B>, + V: FlatSerialize<'b, B>, + { + let (indexes, values) = HashIndexBuilder::consume(value.builder); + + let keys = indexes + .into_iter() + .map(|i| FlatSerialize::serialize(i, builder)) + .collect::>(); + let values = values + .into_iter() + .map(|v| FlatSerialize::serialize(v, builder)) + .collect::>(); + + let keys = builder.raw_builder().create_vector(&keys); + let values = builder.raw_builder().create_vector(&values); + + FlatMapBuilderOutput { keys, values } + } +} + +/// A view of a HashMap stored in a flatbuffer. +/// The default key is considered as an empty slot, `get(default_key)` always +/// returns None. +pub(crate) struct HashMapView +where + I: FbHashKey, + Keys: FbIndex, + Values: FbIndex, +{ + view: HashIndexView, +} + +impl HashMapView +where + I: FbHashKey, + Keys: FbIndex, + Values: FbIndex, +{ + pub fn new(keys: Keys, values: Values) -> Self { + assert_eq!(keys.len(), values.len()); + Self { + view: HashIndexView::new(keys, values), + } + } + + pub fn get(&self, key: I) -> Option { + self.view.get_single(key) + } + + #[cfg(test)] + pub fn capacity(&self) -> usize { + self.view.capacity() + } + + #[cfg(test)] + pub fn len(&self) -> usize { + self.view.len() + } +} + +pub type HashMapStringView<'a, V> = HashMapView< + &'a str, + V, + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>>, + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<>::Inner>>, +>; + +#[cfg(test)] +#[path = "../../../tests/unit/flatbuffers/containers/hash_map.rs"] +mod unit_tests; diff --git a/src/flatbuffers/containers/hash_set.rs b/src/flatbuffers/containers/hash_set.rs new file mode 100644 index 00000000..ffe77963 --- /dev/null +++ b/src/flatbuffers/containers/hash_set.rs @@ -0,0 +1,71 @@ +/// A HashSet implementation backed by a HashIndex. +/// Uses more memory than FlatSet, but gives faster lookup. +use crate::flatbuffers::containers::{ + fb_index::FbIndex, + flat_serialize::{FlatBuilder, FlatSerialize, WIPFlatVec}, + hash_index::{FbHashKey, HashIndexBuilder, HashIndexView, HashKey}, +}; + +/// A builder for a HashSet that can be serialized into a flatbuffer. +/// A default value is used to mark empty slots, so it can't be added. +#[derive(Default)] +pub(crate) struct HashSetBuilder { + builder: HashIndexBuilder, +} + +impl HashSetBuilder { + pub fn insert(&mut self, key: I) { + self.builder.insert(key, (), false /* allow_duplicate */); + } +} + +impl<'b, B: FlatBuilder<'b>, I: FlatSerialize<'b, B> + HashKey> FlatSerialize<'b, B> + for HashSetBuilder +{ + type Output = WIPFlatVec<'b, I, B>; + + fn serialize(value: Self, builder: &mut B) -> Self::Output + where + I: FlatSerialize<'b, B>, + { + let (indexes, _) = HashIndexBuilder::consume(value.builder); + let v = indexes + .into_iter() + .map(|x| FlatSerialize::serialize(x, builder)) + .collect::>(); + builder.raw_builder().create_vector(&v) + } +} + +/// A view of a HashSet stored in a flatbuffer. +/// The default value is considered as an empty slot, `contains(default_value)` +/// always returns false. +pub(crate) struct HashSetView> { + view: HashIndexView, +} + +impl> HashSetView { + pub fn new(keys: Keys) -> Self { + Self { + view: HashIndexView::new(keys, ()), + } + } + + pub fn contains(&self, key: I) -> bool { + self.view.get_single(key).is_some() + } + + #[cfg(test)] + pub fn len(&self) -> usize { + self.view.len() + } + + #[cfg(test)] + pub fn capacity(&self) -> usize { + self.view.capacity() + } +} + +#[cfg(test)] +#[path = "../../../tests/unit/flatbuffers/containers/hash_set.rs"] +mod unit_tests; diff --git a/src/flatbuffers/containers/mod.rs b/src/flatbuffers/containers/mod.rs index 20eb251d..18eb9202 100644 --- a/src/flatbuffers/containers/mod.rs +++ b/src/flatbuffers/containers/mod.rs @@ -1,5 +1,9 @@ +pub(crate) mod fb_index; pub(crate) mod flat_map; pub(crate) mod flat_multimap; pub(crate) mod flat_serialize; pub(crate) mod flat_set; +pub(crate) mod hash_index; +pub(crate) mod hash_map; +pub(crate) mod hash_set; pub(crate) mod sorted_index; diff --git a/src/flatbuffers/containers/sorted_index.rs b/src/flatbuffers/containers/sorted_index.rs index 166f491f..8335eafc 100644 --- a/src/flatbuffers/containers/sorted_index.rs +++ b/src/flatbuffers/containers/sorted_index.rs @@ -1,9 +1,9 @@ use flatbuffers::{Follow, Vector}; +use crate::flatbuffers::containers::fb_index::FbIndex; + // Represents sorted sequence to perform the binary search. -pub(crate) trait SortedIndex { - fn len(&self) -> usize; - fn get(&self, index: usize) -> I; +pub(crate) trait SortedIndex: FbIndex { fn partition_point(&self, predicate: F) -> usize where F: FnMut(&I) -> bool; @@ -13,16 +13,6 @@ pub(crate) trait SortedIndex { // if possible, because it faster than getting values with flatbuffer's // get method. impl SortedIndex for &[I] { - #[inline(always)] - fn len(&self) -> usize { - <[I]>::len(self) - } - - #[inline(always)] - fn get(&self, index: usize) -> I { - self[index] - } - #[inline(always)] fn partition_point(&self, predicate: F) -> usize where @@ -39,16 +29,6 @@ impl<'a, T: Follow<'a>> SortedIndex for Vector<'a, T> where T::Inner: Ord, { - #[inline(always)] - fn len(&self) -> usize { - Vector::len(self) - } - - #[inline(always)] - fn get(&self, index: usize) -> T::Inner { - Vector::get(self, index) - } - fn partition_point(&self, mut predicate: F) -> usize where F: FnMut(&T::Inner) -> bool, diff --git a/src/flatbuffers/fb_network_filter.fbs b/src/flatbuffers/fb_network_filter.fbs index c85698c0..c4b7ed01 100644 --- a/src/flatbuffers/fb_network_filter.fbs +++ b/src/flatbuffers/fb_network_filter.fbs @@ -52,6 +52,10 @@ table HostnameSpecificRules { procedural_action_exception: [string]; } +table StringVector { + data: [string] (required); +} + /// A table to store cosmetic filter rules (including supported structures). table CosmeticFilters { /// Rules that are just the CSS class of an element to be hidden on all sites, e.g. `##.ad`. @@ -71,13 +75,13 @@ table CosmeticFilters { /// class, e.g. `##.ad image`. /// Stored as a multi-map `hostname_hash` => `selector` complex_class_rules_index: [string] (required); - complex_class_rules_values: [string] (required); + complex_class_rules_values: [StringVector] (required); /// Rules that are the CSS selector of an element to be hidden on all sites, starting with an /// id, e.g. `###banner > .text a`. /// Stored as a multi-map `hostname_hash` => `selector` complex_id_rules_index: [string] (required); - complex_id_rules_values: [string] (required); + complex_id_rules_values: [StringVector] (required); /// Simple hostname-specific hide rules, e.g. `example.com##.ad`. /// Stored as a multi-map `hostname_hash` => `selector`. diff --git a/src/flatbuffers/fb_network_filter_generated.rs b/src/flatbuffers/fb_network_filter_generated.rs index 35e98f26..631f285b 100644 --- a/src/flatbuffers/fb_network_filter_generated.rs +++ b/src/flatbuffers/fb_network_filter_generated.rs @@ -987,6 +987,164 @@ pub mod fb { ) } } + pub enum StringVectorOffset {} + #[derive(Copy, Clone, PartialEq)] + + pub struct StringVector<'a> { + pub _tab: flatbuffers::Table<'a>, + } + + impl<'a> flatbuffers::Follow<'a> for StringVector<'a> { + type Inner = StringVector<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { + _tab: flatbuffers::Table::new(buf, loc), + } + } + } + + impl<'a> StringVector<'a> { + pub const VT_DATA: flatbuffers::VOffsetT = 4; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + StringVector { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + args: &'args StringVectorArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = StringVectorBuilder::new(_fbb); + if let Some(x) = args.data { + builder.add_data(x); + } + builder.finish() + } + + pub fn unpack(&self) -> StringVectorT { + let data = { + let x = self.data(); + x.iter().map(|s| s.to_string()).collect() + }; + StringVectorT { data } + } + + #[inline] + pub fn data(&self) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(StringVector::VT_DATA, None) + .unwrap() + } + } + } + + impl flatbuffers::Verifiable for StringVector<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::>, + >>("data", Self::VT_DATA, true)? + .finish(); + Ok(()) + } + } + pub struct StringVectorArgs<'a> { + pub data: Option< + flatbuffers::WIPOffset>>, + >, + } + impl<'a> Default for StringVectorArgs<'a> { + #[inline] + fn default() -> Self { + StringVectorArgs { + data: None, // required field + } + } + } + + pub struct StringVectorBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> StringVectorBuilder<'a, 'b, A> { + #[inline] + pub fn add_data( + &mut self, + data: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_ + .push_slot_always::>(StringVector::VT_DATA, data); + } + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> StringVectorBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + StringVectorBuilder { + fbb_: _fbb, + start_: start, + } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + self.fbb_.required(o, StringVector::VT_DATA, "data"); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for StringVector<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("StringVector"); + ds.field("data", &self.data()); + ds.finish() + } + } + #[non_exhaustive] + #[derive(Debug, Clone, PartialEq)] + pub struct StringVectorT { + pub data: Vec, + } + impl Default for StringVectorT { + fn default() -> Self { + Self { + data: Default::default(), + } + } + } + impl StringVectorT { + pub fn pack<'b, A: flatbuffers::Allocator + 'b>( + &self, + _fbb: &mut flatbuffers::FlatBufferBuilder<'b, A>, + ) -> flatbuffers::WIPOffset> { + let data = Some({ + let x = &self.data; + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + StringVector::create(_fbb, &StringVectorArgs { data }) + } + } pub enum CosmeticFiltersOffset {} #[derive(Copy, Clone, PartialEq)] @@ -1096,7 +1254,7 @@ pub mod fb { }; let complex_class_rules_values = { let x = self.complex_class_rules_values(); - x.iter().map(|s| s.to_string()).collect() + x.iter().map(|t| t.unpack()).collect() }; let complex_id_rules_index = { let x = self.complex_id_rules_index(); @@ -1104,7 +1262,7 @@ pub mod fb { }; let complex_id_rules_values = { let x = self.complex_id_rules_values(); - x.iter().map(|s| s.to_string()).collect() + x.iter().map(|t| t.unpack()).collect() }; let hostname_hide_index = { let x = self.hostname_hide_index(); @@ -1220,14 +1378,14 @@ pub mod fb { #[inline] pub fn complex_class_rules_values( &self, - ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>> { // Safety: // Created from valid Table for this object // which contains a valid value in this slot unsafe { self._tab .get::>, + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>, >>(CosmeticFilters::VT_COMPLEX_CLASS_RULES_VALUES, None) .unwrap() } @@ -1253,14 +1411,14 @@ pub mod fb { #[inline] pub fn complex_id_rules_values( &self, - ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>> { // Safety: // Created from valid Table for this object // which contains a valid value in this slot unsafe { self._tab .get::>, + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>, >>(CosmeticFilters::VT_COMPLEX_ID_RULES_VALUES, None) .unwrap() } @@ -1397,7 +1555,7 @@ pub mod fb { true, )? .visit_field::>, + flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset>, >>( "complex_class_rules_values", Self::VT_COMPLEX_CLASS_RULES_VALUES, @@ -1411,7 +1569,7 @@ pub mod fb { true, )? .visit_field::>, + flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset>, >>( "complex_id_rules_values", Self::VT_COMPLEX_ID_RULES_VALUES, @@ -1463,13 +1621,17 @@ pub mod fb { flatbuffers::WIPOffset>>, >, pub complex_class_rules_values: Option< - flatbuffers::WIPOffset>>, + flatbuffers::WIPOffset< + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>>, + >, >, pub complex_id_rules_index: Option< flatbuffers::WIPOffset>>, >, pub complex_id_rules_values: Option< - flatbuffers::WIPOffset>>, + flatbuffers::WIPOffset< + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>>, + >, >, pub hostname_hide_index: Option>>, pub hostname_hide_values: Option< @@ -1565,7 +1727,7 @@ pub mod fb { pub fn add_complex_class_rules_values( &mut self, complex_class_rules_values: flatbuffers::WIPOffset< - flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset>>, >, ) { self.fbb_.push_slot_always::>( @@ -1589,7 +1751,7 @@ pub mod fb { pub fn add_complex_id_rules_values( &mut self, complex_id_rules_values: flatbuffers::WIPOffset< - flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset>>, >, ) { self.fbb_.push_slot_always::>( @@ -1774,9 +1936,9 @@ pub mod fb { pub simple_id_rules: Vec, pub misc_generic_selectors: Vec, pub complex_class_rules_index: Vec, - pub complex_class_rules_values: Vec, + pub complex_class_rules_values: Vec, pub complex_id_rules_index: Vec, - pub complex_id_rules_values: Vec, + pub complex_id_rules_values: Vec, pub hostname_hide_index: Vec, pub hostname_hide_values: Vec, pub hostname_inject_script_index: Vec, @@ -1830,7 +1992,7 @@ pub mod fb { }); let complex_class_rules_values = Some({ let x = &self.complex_class_rules_values; - let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + let w: Vec<_> = x.iter().map(|t| t.pack(_fbb)).collect(); _fbb.create_vector(&w) }); let complex_id_rules_index = Some({ @@ -1840,7 +2002,7 @@ pub mod fb { }); let complex_id_rules_values = Some({ let x = &self.complex_id_rules_values; - let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + let w: Vec<_> = x.iter().map(|t| t.pack(_fbb)).collect(); _fbb.create_vector(&w) }); let hostname_hide_index = Some({ diff --git a/tests/unit/engine.rs b/tests/unit/engine.rs index ea3883c2..9c6fc1bb 100644 --- a/tests/unit/engine.rs +++ b/tests/unit/engine.rs @@ -183,7 +183,7 @@ mod tests { fn deserialization_generate_simple() { let mut engine = Engine::from_rules(["ad-banner"], Default::default()); let data = engine.serialize().to_vec(); - const EXPECTED_HASH: u64 = 15201305923211912617; + const EXPECTED_HASH: u64 = 884296823183764168; assert_eq!(hash(&data), EXPECTED_HASH, "{}", HASH_MISMATCH_MSG); engine.deserialize(&data).unwrap(); } @@ -193,7 +193,7 @@ mod tests { let mut engine = Engine::from_rules(["ad-banner$tag=abc"], Default::default()); engine.use_tags(&["abc"]); let data = engine.serialize().to_vec(); - const EXPECTED_HASH: u64 = 5114301339390262037; + const EXPECTED_HASH: u64 = 7887643884738497753; assert_eq!(hash(&data), EXPECTED_HASH, "{}", HASH_MISMATCH_MSG); engine.deserialize(&data).unwrap(); } @@ -221,15 +221,15 @@ mod tests { #[cfg(feature = "debug-info")] { let debug_info = engine.get_debug_info(); - let expected_size = 8_527_344_f32; + let expected_size = 8_963_552_f32; assert!(debug_info.flatbuffer_size >= (expected_size * 0.99) as usize); assert!(debug_info.flatbuffer_size <= (expected_size * 1.01) as usize); } let expected_hash: u64 = if cfg!(feature = "css-validation") { - 2942520321544562177 + 15959922653220214643 } else { - 17713004238689548675 + 16953879754096715156 }; assert_eq!(hash(&data), expected_hash, "{}", HASH_MISMATCH_MSG); diff --git a/tests/unit/flatbuffers/containers/hash_map.rs b/tests/unit/flatbuffers/containers/hash_map.rs new file mode 100644 index 00000000..c38c012d --- /dev/null +++ b/tests/unit/flatbuffers/containers/hash_map.rs @@ -0,0 +1,103 @@ +#[allow(unknown_lints)] +#[allow( + dead_code, + clippy::all, + unused_imports, + unsafe_code, + mismatched_lifetime_syntaxes +)] +#[path = "./test_containers_generated.rs"] +pub mod flat; +#[cfg(test)] +mod tests { + use super::super::*; + use super::flat::fb_test; + + fn serialize_map(values: Vec<(&str, &str)>) -> Vec { + let mut builder = HashMapBuilder::default(); + for (key, value) in values { + builder.insert(key.to_string(), value.to_string()); + } + serialize_builder(builder) + } + + fn serialize_builder(builder: HashMapBuilder) -> Vec { + let mut flat_builder = flatbuffers::FlatBufferBuilder::new(); + let map = HashMapBuilder::finish(builder, &mut flat_builder); + let map_serialized = fb_test::TestStringMap::create( + &mut flat_builder, + &fb_test::TestStringMapArgs { + keys: Some(map.keys), + values: Some(map.values), + }, + ); + let root = fb_test::TestRoot::create( + &mut flat_builder, + &fb_test::TestRootArgs { + test_string_map: Some(map_serialized), + ..Default::default() + }, + ); + flat_builder.finish(root, None); + flat_builder.finished_data().to_vec() + } + + fn load_map<'a>(data: &'a [u8]) -> HashMapStringView<'a, &'a str> { + let root = fb_test::root_as_test_root(data).unwrap(); + let flat_map = root.test_string_map().unwrap(); + HashMapView::new(flat_map.keys(), flat_map.values()) + } + + #[test] + fn test_empty_map() { + let values = vec![]; + let data = serialize_map(values); + let map = load_map(&data); + assert_eq!(map.len(), 0); + assert_eq!(map.capacity(), 4); + assert!(map.get("a").is_none()); + } + + #[test] + fn test_duplicate_keys() { + let values = vec![("b", "20"), ("a", "10"), ("b", "30")]; + let data = serialize_map(values); + let map = load_map(&data); + assert_eq!(map.len(), 2); + assert_eq!(map.capacity(), 4); + assert_eq!(map.get("a").unwrap(), "10"); + assert_eq!(map.get("b").unwrap(), "30"); + } + + #[test] + fn test_builder_getters() { + let mut builder = HashMapBuilder::default(); + builder.insert("a".to_string(), "10".to_string()); + assert_eq!( + builder.get_or_insert("a".to_string(), "20".to_string()), + "10" + ); + assert_eq!( + builder.get_or_insert("b".to_string(), "20".to_string()), + "20" + ); + let data = serialize_builder(builder); + let map = load_map(&data); + assert_eq!(map.get("a").unwrap(), "10"); + assert_eq!(map.get("b").unwrap(), "20"); + assert!(map.get("c").is_none()); + } + + #[test] + fn test_string_builder() { + let values = vec![("b", "20"), ("a", "10"), ("c", "30")]; + let data = serialize_map(values); + let map = load_map(&data); + + assert_eq!(map.get("a").unwrap(), "10"); + assert_eq!(map.get("b").unwrap(), "20"); + assert_eq!(map.get("c").unwrap(), "30"); + assert!(map.get("d").is_none()); + assert!(map.get("").is_none()); + } +} diff --git a/tests/unit/flatbuffers/containers/hash_set.rs b/tests/unit/flatbuffers/containers/hash_set.rs new file mode 100644 index 00000000..47b8fce2 --- /dev/null +++ b/tests/unit/flatbuffers/containers/hash_set.rs @@ -0,0 +1,76 @@ +#[allow(unknown_lints)] +#[allow( + dead_code, + clippy::all, + unused_imports, + unsafe_code, + mismatched_lifetime_syntaxes +)] +#[path = "./test_containers_generated.rs"] +pub mod flat; +#[cfg(test)] +mod tests { + use super::super::*; + use super::flat::fb_test; + + fn serialize_set(values: Vec<&str>) -> Vec { + let mut builder = flatbuffers::FlatBufferBuilder::new(); + let mut set = HashSetBuilder::default(); + for value in values { + set.insert(value.to_string()); + } + let test_string_set = Some(FlatSerialize::serialize(set, &mut builder)); + + let root = fb_test::TestRoot::create( + &mut builder, + &fb_test::TestRootArgs { + test_string_set, + ..Default::default() + }, + ); + builder.finish(root, None); + builder.finished_data().to_vec() + } + + fn load_set<'a>( + data: &'a [u8], + ) -> HashSetView<&'a str, flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>>> { + let root = fb_test::root_as_test_root(data).unwrap(); + let flat_set = root.test_string_set().unwrap(); + HashSetView::new(flat_set) + } + + #[test] + fn test_empty_map() { + let values = vec![]; + let data = serialize_set(values); + let set = load_set(&data); + assert_eq!(set.len(), 0); + assert_eq!(set.capacity(), 4); + assert!(!set.contains("a")); + } + + #[test] + fn test_duplicate_keys() { + let values = vec!["b", "a", "b"]; + let data = serialize_set(values); + let set = load_set(&data); + assert_eq!(set.len(), 2); + assert_eq!(set.capacity(), 4); + assert!(set.contains("a")); + assert!(set.contains("b")); + } + + #[test] + fn test_string_builder() { + let values = vec!["b", "a", "c"]; + let data = serialize_set(values); + let set = load_set(&data); + + assert!(set.contains("a")); + assert!(set.contains("b")); + assert!(set.contains("c")); + assert!(!set.contains("d")); + assert!(!set.contains("")); + } +}