diff --git a/Cargo.lock b/Cargo.lock index 904eae6b..66e53155 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -24,6 +24,7 @@ dependencies = [ "regex", "reqwest", "rmp-serde", + "rustc-hash 1.1.0", "seahash", "selectors", "serde", @@ -1269,7 +1270,7 @@ dependencies = [ "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash", + "rustc-hash 2.1.1", "rustls", "socket2", "thiserror 2.0.12", @@ -1289,7 +1290,7 @@ dependencies = [ "lru-slab", "rand 0.9.1", "ring", - "rustc-hash", + "rustc-hash 2.1.1", "rustls", "rustls-pki-types", "slab", @@ -1508,6 +1509,12 @@ version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustc-hash" version = "2.1.1" diff --git a/Cargo.toml b/Cargo.toml index d51ff37e..918b665f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,6 +34,8 @@ idna = "1.0.3" serde = { workspace = true } serde_json = { workspace = true } seahash = "4.1.0" +# rustc-hash v1.1.0 provides a better performance than 2.x, chromium pins the same version. +rustc-hash = { version = "1.1.0", default-features = false } memchr = "2.4" base64 = "0.22" rmp-serde = "0.15" @@ -92,10 +94,10 @@ harness = false [features] # If disabling default features, consider explicitly re-enabling the # "embedded-domain-resolver" feature. -default = ["embedded-domain-resolver", "full-regex-handling", "unsync-regex-caching"] +default = ["embedded-domain-resolver", "full-regex-handling", "single-thread"] full-regex-handling = [] -unsync-regex-caching = [] # disables `Send` and `Sync` on `Engine`. -regex-debug-info = [] +single-thread = [] # disables `Send` and `Sync` on `Engine`. +debug-info = [] css-validation = ["cssparser", "selectors"] content-blocking = [] embedded-domain-resolver = ["addr"] # Requires setting an external domain resolver if disabled. diff --git a/README.md b/README.md index 79f98227..db33ffcf 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ By default, `adblock-rust` ships with a built-in domain resolution implementatio `adblock-rust` uses uBlock Origin-compatible resources for scriptlet injection and redirect rules. The `resource-assembler` feature allows `adblock-rust` to parse these resources directly from the file formats used by the uBlock Origin repository. -#### Thread safety (`unsync-regex-caching`) +#### Thread safety (`single-thread`) -The `unsync-regex-caching` feature enables optimizations for rule matching speed and the amount of memory used by the engine. +The `single-thread` feature enables optimizations for rule matching speed and the amount of memory used by the engine. This feature can be disabled to make the engine `Send + Sync`, although it is recommended to only access the engine on a single thread to maintain optimal performance. diff --git a/benches/bench_matching.rs b/benches/bench_matching.rs index 178f3d04..516374ac 100644 --- a/benches/bench_matching.rs +++ b/benches/bench_matching.rs @@ -2,11 +2,9 @@ use criterion::*; use serde::{Deserialize, Serialize}; -use adblock::blocker::{Blocker, BlockerOptions}; use adblock::request::Request; -use adblock::resources::ResourceStorage; use adblock::url_parser::parse_url; -use adblock::Engine; +use adblock::{Engine, FilterSet}; #[path = "../tests/test_utils.rs"] mod test_utils; @@ -36,14 +34,13 @@ fn load_requests() -> Vec { reqs } -fn get_blocker(rules: impl IntoIterator>) -> Blocker { +fn get_engine(rules: impl IntoIterator>) -> Engine { let (network_filters, _) = adblock::lists::parse_filters(rules, false, Default::default()); - let blocker_options = BlockerOptions { - enable_optimizations: true, - }; - - Blocker::new(network_filters, &blocker_options) + Engine::from_filter_set( + FilterSet::new_with_rules(network_filters, vec![], false), + true, + ) } fn bench_rule_matching(engine: &Engine, requests: &[TestRequest]) -> (u32, u32) { @@ -61,15 +58,11 @@ fn bench_rule_matching(engine: &Engine, requests: &[TestRequest]) -> (u32, u32) (matches, passes) } -fn bench_matching_only( - blocker: &Blocker, - resources: &ResourceStorage, - requests: &[Request], -) -> (u32, u32) { +fn bench_matching_only(engine: &Engine, requests: &[Request]) -> (u32, u32) { let mut matches = 0; let mut passes = 0; requests.iter().for_each(|parsed| { - let check = blocker.check(parsed, resources); + let check = engine.check_network_request(parsed); if check.matched { matches += 1; } else { @@ -150,14 +143,13 @@ fn rule_match_parsed_el(c: &mut Criterion) { .filter_map(Result::ok) .collect(); let requests_len = requests_parsed.len() as u64; - let blocker = get_blocker(rules); - let resources = ResourceStorage::default(); + let engine = get_engine(rules); group.throughput(Throughput::Elements(requests_len)); group.sample_size(10); group.bench_function("easylist", move |b| { - b.iter(|| bench_matching_only(&blocker, &resources, &requests_parsed)) + b.iter(|| bench_matching_only(&engine, &requests_parsed)) }); group.finish(); @@ -170,8 +162,7 @@ fn rule_match_parsed_elep_slimlist(c: &mut Criterion) { "data/easylist.to/easylist/easylist.txt", "data/easylist.to/easylist/easyprivacy.txt", ]); - let blocker = get_blocker(full_rules); - let resources = ResourceStorage::default(); + let engine = get_engine(full_rules); let requests = load_requests(); let requests_parsed: Vec<_> = requests @@ -182,7 +173,7 @@ fn rule_match_parsed_elep_slimlist(c: &mut Criterion) { let requests_len = requests_parsed.len() as u64; let slim_rules = rules_from_lists(&["data/slim-list.txt"]); - let slim_blocker = get_blocker(slim_rules); + let slim_engine = get_engine(slim_rules); let requests_copy = load_requests(); let requests_parsed_copy: Vec<_> = requests_copy @@ -195,11 +186,10 @@ fn rule_match_parsed_elep_slimlist(c: &mut Criterion) { group.sample_size(10); group.bench_function("el+ep", move |b| { - b.iter(|| bench_matching_only(&blocker, &resources, &requests_parsed)) + b.iter(|| bench_matching_only(&engine, &requests_parsed)) }); - let resources = ResourceStorage::default(); group.bench_function("slimlist", move |b| { - b.iter(|| bench_matching_only(&slim_blocker, &resources, &requests_parsed_copy)) + b.iter(|| bench_matching_only(&slim_engine, &requests_parsed_copy)) }); group.finish(); diff --git a/benches/bench_redirect_performance.rs b/benches/bench_redirect_performance.rs index 2ed4f2a4..71f09e09 100644 --- a/benches/bench_redirect_performance.rs +++ b/benches/bench_redirect_performance.rs @@ -1,10 +1,10 @@ +use adblock::{Engine, FilterSet}; use criterion::*; use tokio::runtime::Runtime; -use adblock::blocker::{Blocker, BlockerOptions}; use adblock::filters::network::{NetworkFilter, NetworkFilterMask, NetworkFilterMaskHelper}; use adblock::request::Request; -use adblock::resources::ResourceStorage; +use adblock::resources::Resource; const DEFAULT_LISTS_URL: &str = "https://raw.githubusercontent.com/brave/adblock-resources/master/filter_lists/list_catalog.json"; @@ -84,18 +84,13 @@ fn get_redirect_rules() -> Vec { .collect() } -/// Loads the supplied rules, and the test set of resources, into a Blocker -fn get_preloaded_blocker(rules: Vec) -> Blocker { - let blocker_options = BlockerOptions { - enable_optimizations: true, - }; - - Blocker::new(rules, &blocker_options) +/// Loads the supplied rules, and the test set of resources, into a Engine +fn get_preloaded_engine(rules: Vec) -> Engine { + let filter_set = FilterSet::new_with_rules(rules, vec![], false); + Engine::from_filter_set(filter_set, true /* optimize */) } -fn build_resources_for_filters(#[allow(unused)] filters: &[NetworkFilter]) -> ResourceStorage { - let mut resources = ResourceStorage::default(); - +fn get_resources_for_filters(#[allow(unused)] filters: &[NetworkFilter]) -> Vec { #[cfg(feature = "resource-assembler")] { use adblock::resources::resource_assembler::assemble_web_accessible_resources; @@ -111,10 +106,7 @@ fn build_resources_for_filters(#[allow(unused)] filters: &[NetworkFilter]) -> Re "data/test/fake-uBO-files/scriptlets.js", )), ); - - resource_data.into_iter().for_each(|resource| { - let _res = resources.add_resource(resource); - }); + resource_data } #[cfg(not(feature = "resource-assembler"))] @@ -141,12 +133,8 @@ fn build_resources_for_filters(#[allow(unused)] filters: &[NetworkFilter]) -> Re permission: Default::default(), } }) - .for_each(|resource| { - let _res = resources.add_resource(resource); - }); + .collect() } - - resources } /// Maps network filter rules into `Request`s that would trigger those rules @@ -211,9 +199,9 @@ pub fn build_custom_requests(rules: Vec) -> Vec { .collect::>() } -fn bench_fn(blocker: &Blocker, resources: &ResourceStorage, requests: &[Request]) { +fn bench_fn(engine: &Engine, requests: &[Request]) { requests.iter().for_each(|request| { - let block_result = blocker.check(request, resources); + let block_result = engine.check_network_request(request); assert!( block_result.redirect.is_some(), "{:?}, {:?}", @@ -228,8 +216,10 @@ fn redirect_performance(c: &mut Criterion) { let rules = get_redirect_rules(); - let blocker = get_preloaded_blocker(rules.clone()); - let resources = build_resources_for_filters(&rules); + let mut engine = get_preloaded_engine(rules.clone()); + let resources = get_resources_for_filters(&rules); + engine.use_resources(resources); + let requests = build_custom_requests(rules.clone()); let requests_len = requests.len() as u64; @@ -237,7 +227,7 @@ fn redirect_performance(c: &mut Criterion) { group.sample_size(10); group.bench_function("without_alias_lookup", move |b| { - b.iter(|| bench_fn(&blocker, &resources, &requests)) + b.iter(|| bench_fn(&engine, &requests)) }); group.finish(); diff --git a/benches/bench_rules.rs b/benches/bench_rules.rs index ce2a17e1..4136bbde 100644 --- a/benches/bench_rules.rs +++ b/benches/bench_rules.rs @@ -1,8 +1,7 @@ use criterion::*; use once_cell::sync::Lazy; -use adblock::blocker::{Blocker, BlockerOptions}; -use adblock::Engine; +use adblock::{Engine, FilterSet}; #[path = "../tests/test_utils.rs"] mod test_utils; @@ -79,14 +78,13 @@ fn list_parse(c: &mut Criterion) { group.finish(); } -fn get_blocker(rules: impl IntoIterator>) -> Blocker { +fn get_engine(rules: impl IntoIterator>) -> Engine { let (network_filters, _) = adblock::lists::parse_filters(rules, false, Default::default()); - let blocker_options = BlockerOptions { - enable_optimizations: true, - }; - - Blocker::new(network_filters, &blocker_options) + Engine::from_filter_set( + FilterSet::new_with_rules(network_filters, vec![], false), + true, + ) } fn blocker_new(c: &mut Criterion) { @@ -102,11 +100,11 @@ fn blocker_new(c: &mut Criterion) { .collect(); let brave_list_rules: Vec<_> = rules_from_lists(&["data/brave/brave-main-list.txt"]).collect(); let engine = Engine::from_rules(&brave_list_rules, Default::default()); - let engine_serialized = engine.serialize().unwrap(); + let engine_serialized = engine.serialize().to_vec(); - group.bench_function("el+ep", move |b| b.iter(|| get_blocker(&easylist_rules))); + group.bench_function("el+ep", move |b| b.iter(|| get_engine(&easylist_rules))); group.bench_function("brave-list", move |b| { - b.iter(|| get_blocker(&brave_list_rules)) + b.iter(|| get_engine(&brave_list_rules)) }); group.bench_function("brave-list-deserialize", move |b| { b.iter(|| { diff --git a/benches/bench_serialization.rs b/benches/bench_serialization.rs index 8076eb5f..ff84d17a 100644 --- a/benches/bench_serialization.rs +++ b/benches/bench_serialization.rs @@ -18,19 +18,19 @@ fn serialization(c: &mut Criterion) { ]); let engine = Engine::from_rules(full_rules, Default::default()); - b.iter(|| assert!(!engine.serialize().unwrap().is_empty())) + b.iter(|| assert!(!engine.serialize().to_vec().is_empty())) }); group.bench_function("el", move |b| { let full_rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]); let engine = Engine::from_rules(full_rules, Default::default()); - b.iter(|| assert!(!engine.serialize().unwrap().is_empty())) + b.iter(|| assert!(!engine.serialize().to_vec().is_empty())) }); group.bench_function("slimlist", move |b| { let full_rules = rules_from_lists(&["data/slim-list.txt"]); let engine = Engine::from_rules(full_rules, Default::default()); - b.iter(|| assert!(!engine.serialize().unwrap().is_empty())) + b.iter(|| assert!(!engine.serialize().to_vec().is_empty())) }); group.finish(); @@ -48,7 +48,7 @@ fn deserialization(c: &mut Criterion) { ]); let engine = Engine::from_rules(full_rules, Default::default()); - let serialized = engine.serialize().unwrap(); + let serialized = engine.serialize().to_vec(); b.iter(|| { let mut deserialized = Engine::default(); @@ -59,7 +59,7 @@ fn deserialization(c: &mut Criterion) { let full_rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]); let engine = Engine::from_rules(full_rules, Default::default()); - let serialized = engine.serialize().unwrap(); + let serialized = engine.serialize().to_vec(); b.iter(|| { let mut deserialized = Engine::default(); @@ -70,7 +70,7 @@ fn deserialization(c: &mut Criterion) { let full_rules = rules_from_lists(&["data/slim-list.txt"]); let engine = Engine::from_rules(full_rules, Default::default()); - let serialized = engine.serialize().unwrap(); + let serialized = engine.serialize().to_vec(); b.iter(|| { let mut deserialized = Engine::default(); diff --git a/examples/generate-dat.rs b/examples/generate-dat.rs index 86845c5f..513db3f7 100644 --- a/examples/generate-dat.rs +++ b/examples/generate-dat.rs @@ -21,7 +21,7 @@ fn main() { ) .unwrap(); assert!(engine.check_network_request(&request).exception.is_some()); - let serialized = engine.serialize().expect("Could not serialize!"); + let serialized = engine.serialize().to_vec(); // Write to file let mut file = File::create("engine.dat").expect("Could not create serialization file"); diff --git a/js/src/lib.rs b/js/src/lib.rs index 7396162a..99517824 100644 --- a/js/src/lib.rs +++ b/js/src/lib.rs @@ -248,7 +248,7 @@ fn engine_url_cosmetic_resources(mut cx: FunctionContext) -> JsResult { fn engine_serialize(mut cx: FunctionContext) -> JsResult { let this = cx.argument::>(0)?; let serialized = if let Ok(engine) = this.0.lock() { - engine.serialize().unwrap() + engine.serialize().to_vec() } else { cx.throw_error("Failed to acquire lock on engine")? }; @@ -323,20 +323,6 @@ fn engine_clear_tags(mut cx: FunctionContext) -> JsResult { Ok(JsNull::new(&mut cx)) } -fn engine_add_resource(mut cx: FunctionContext) -> JsResult { - let this = cx.argument::>(0)?; - - let resource_arg = cx.argument::(1)?; - let resource: Resource = json_ffi::from_js(&mut cx, resource_arg)?; - - let success = if let Ok(mut engine) = this.0.lock() { - engine.add_resource(resource).is_ok() - } else { - cx.throw_error("Failed to acquire lock on engine")? - }; - Ok(cx.boolean(success)) -} - fn validate_request(mut cx: FunctionContext) -> JsResult { let url: String = cx.argument::(0)?.value(&mut cx); let source_url: String = cx.argument::(1)?.value(&mut cx); @@ -424,7 +410,6 @@ register_module!(mut m, { m.export_function("Engine_useResources", engine_use_resources)?; m.export_function("Engine_tagExists", engine_tag_exists)?; m.export_function("Engine_clearTags", engine_clear_tags)?; - m.export_function("Engine_addResource", engine_add_resource)?; m.export_function("validateRequest", validate_request)?; m.export_function("uBlockResources", ublock_resources)?; diff --git a/src/blocker.rs b/src/blocker.rs index c84efd8a..dfb8d62d 100644 --- a/src/blocker.rs +++ b/src/blocker.rs @@ -6,12 +6,13 @@ use serde::Serialize; use std::collections::HashSet; use std::ops::DerefMut; -use crate::filters::network::{NetworkFilter, NetworkFilterMaskHelper}; +use crate::filters::fb_network_builder::NetworkFilterListId; +use crate::filters::filter_data_context::FilterDataContextRef; +use crate::filters::network::NetworkFilterMaskHelper; use crate::network_filter_list::NetworkFilterList; use crate::regex_manager::{RegexManager, RegexManagerDiscardPolicy}; use crate::request::Request; use crate::resources::ResourceStorage; -use crate::utils::Hash; /// Options used when constructing a [`Blocker`]. pub struct BlockerOptions { @@ -66,26 +67,16 @@ static NO_TAGS: Lazy> = Lazy::new(HashSet::new); /// Stores network filters for efficient querying. pub struct Blocker { - pub(crate) csp: NetworkFilterList, - pub(crate) exceptions: NetworkFilterList, - pub(crate) importants: NetworkFilterList, - pub(crate) redirects: NetworkFilterList, - pub(crate) removeparam: NetworkFilterList, - pub(crate) filters: NetworkFilterList, - pub(crate) generic_hide: NetworkFilterList, - // Enabled tags are not serialized - when deserializing, tags of the existing // instance (the one we are recreating lists into) are maintained pub(crate) tags_enabled: HashSet, - pub(crate) tagged_filters_all: NetworkFilterList, - - pub(crate) enable_optimizations: bool, - // Not serialized - #[cfg(feature = "unsync-regex-caching")] + #[cfg(feature = "single-thread")] pub(crate) regex_manager: std::cell::RefCell, - #[cfg(not(feature = "unsync-regex-caching"))] + #[cfg(not(feature = "single-thread"))] pub(crate) regex_manager: std::sync::Mutex, + + pub(crate) filter_data_context: FilterDataContextRef, } impl Blocker { @@ -95,7 +86,51 @@ impl Blocker { self.check_parameterised(request, resources, false, false) } - #[cfg(feature = "unsync-regex-caching")] + pub(crate) fn get_list(&self, id: NetworkFilterListId) -> NetworkFilterList<'_> { + NetworkFilterList { + list: self + .filter_data_context + .memory + .root() + .network_rules() + .get(id as usize), + filter_data_context: &self.filter_data_context, + } + } + + pub(crate) fn csp(&self) -> NetworkFilterList<'_> { + self.get_list(NetworkFilterListId::Csp) + } + + pub(crate) fn exceptions(&self) -> NetworkFilterList<'_> { + self.get_list(NetworkFilterListId::Exceptions) + } + + pub(crate) fn importants(&self) -> NetworkFilterList<'_> { + self.get_list(NetworkFilterListId::Importants) + } + + pub(crate) fn redirects(&self) -> NetworkFilterList<'_> { + self.get_list(NetworkFilterListId::Redirects) + } + + pub(crate) fn removeparam(&self) -> NetworkFilterList<'_> { + self.get_list(NetworkFilterListId::RemoveParam) + } + + pub(crate) fn filters(&self) -> NetworkFilterList<'_> { + self.get_list(NetworkFilterListId::Filters) + } + + pub(crate) fn generic_hide(&self) -> NetworkFilterList<'_> { + self.get_list(NetworkFilterListId::GenericHide) + } + + pub(crate) fn tagged_filters_all(&self) -> NetworkFilterList<'_> { + self.get_list(NetworkFilterListId::TaggedFiltersAll) + } + + #[cfg(feature = "single-thread")] fn borrow_regex_manager(&self) -> std::cell::RefMut<'_, RegexManager> { #[allow(unused_mut)] let mut manager = self.regex_manager.borrow_mut(); @@ -106,7 +141,7 @@ impl Blocker { manager } - #[cfg(not(feature = "unsync-regex-caching"))] + #[cfg(not(feature = "single-thread"))] fn borrow_regex_manager(&self) -> std::sync::MutexGuard<'_, RegexManager> { let mut manager = self.regex_manager.lock().unwrap(); manager.update_time(); @@ -115,7 +150,7 @@ impl Blocker { pub fn check_generic_hide(&self, hostname_request: &Request) -> bool { let mut regex_manager = self.borrow_regex_manager(); - self.generic_hide + self.generic_hide() .check(hostname_request, &HashSet::new(), &mut regex_manager) .is_some() } @@ -139,13 +174,15 @@ impl Blocker { // 4. exceptions - if any non-important match of forced // Always check important filters - let important_filter = self.importants.check(request, &NO_TAGS, &mut regex_manager); + let important_filter = self + .importants() + .check(request, &NO_TAGS, &mut regex_manager); // only check the rest of the rules if not previously matched let filter = if important_filter.is_none() && !matched_rule { - self.tagged_filters_all + self.tagged_filters_all() .check(request, &self.tags_enabled, &mut regex_manager) - .or_else(|| self.filters.check(request, &NO_TAGS, &mut regex_manager)) + .or_else(|| self.filters().check(request, &NO_TAGS, &mut regex_manager)) } else { important_filter }; @@ -153,19 +190,19 @@ impl Blocker { let exception = match filter.as_ref() { // if no other rule matches, only check exceptions if forced to None if matched_rule || force_check_exceptions => { - self.exceptions + self.exceptions() .check(request, &self.tags_enabled, &mut regex_manager) } None => None, // If matched an important filter, exceptions don't atter Some(f) if f.is_important() => None, Some(_) => self - .exceptions + .exceptions() .check(request, &self.tags_enabled, &mut regex_manager), }; let redirect_filters = - self.redirects + self.redirects() .check_all(request, &NO_TAGS, regex_manager.deref_mut()); // Extract the highest priority redirect directive. @@ -231,7 +268,7 @@ impl Blocker { let rewritten_url = if important { None } else { - Self::apply_removeparam(&self.removeparam, request, regex_manager.deref_mut()) + Self::apply_removeparam(&self.removeparam(), request, regex_manager.deref_mut()) }; // If something has already matched before but we don't know what, still return a match @@ -346,7 +383,7 @@ impl Blocker { let mut regex_manager = self.borrow_regex_manager(); let filters = self - .csp + .csp() .check_all(request, &self.tags_enabled, &mut regex_manager); if filters.is_empty() { @@ -390,96 +427,28 @@ impl Blocker { Some(merged) } - pub fn new(network_filters: Vec, options: &BlockerOptions) -> Self { - // Capacity of filter subsets estimated based on counts in EasyList and EasyPrivacy - if necessary - // the Vectors will grow beyond the pre-set capacity, but it is more efficient to allocate all at once - // $csp= - let mut csp = Vec::with_capacity(200); - // @@filter - let mut exceptions = Vec::with_capacity(network_filters.len() / 8); - // $important - let mut importants = Vec::with_capacity(200); - // $redirect, $redirect-rule - let mut redirects = Vec::with_capacity(200); - // $removeparam - let mut removeparam = Vec::with_capacity(60); - // $tag= - let mut tagged_filters_all = Vec::with_capacity(200); - // $badfilter - let mut badfilters = Vec::with_capacity(100); - // $generichide - let mut generic_hide = Vec::with_capacity(4000); - // All other filters - let mut filters = Vec::with_capacity(network_filters.len()); - - // Injections - // TODO: resource handling - - if !network_filters.is_empty() { - for filter in network_filters.iter() { - if filter.is_badfilter() { - badfilters.push(filter); - } - } - let badfilter_ids: HashSet = badfilters - .iter() - .map(|f| f.get_id_without_badfilter()) - .collect(); - for filter in network_filters { - // skip any bad filters - let filter_id = filter.get_id(); - if badfilter_ids.contains(&filter_id) || filter.is_badfilter() { - continue; - } - - // Redirects are independent of blocking behavior. - if filter.is_redirect() { - redirects.push(filter.clone()); - } - - if filter.is_csp() { - csp.push(filter); - } else if filter.is_removeparam() { - removeparam.push(filter); - } else if filter.is_generic_hide() { - generic_hide.push(filter); - } else if filter.is_exception() { - exceptions.push(filter); - } else if filter.is_important() { - importants.push(filter); - } else if filter.tag.is_some() && !filter.is_redirect() { - // `tag` + `redirect` is unsupported for now. - tagged_filters_all.push(filter); - } else if (filter.is_redirect() && filter.also_block_redirect()) - || !filter.is_redirect() - { - filters.push(filter); - } - } - } - + pub(crate) fn from_context(filter_data_context: FilterDataContextRef) -> Self { Self { - csp: NetworkFilterList::new(csp, options.enable_optimizations), - exceptions: NetworkFilterList::new(exceptions, options.enable_optimizations), - importants: NetworkFilterList::new(importants, options.enable_optimizations), - redirects: NetworkFilterList::new(redirects, options.enable_optimizations), - // Don't optimize removeparam, since it can fuse filters without respecting distinct - // queryparam values - removeparam: NetworkFilterList::new(removeparam, false), - filters: NetworkFilterList::new(filters, options.enable_optimizations), - generic_hide: NetworkFilterList::new(generic_hide, options.enable_optimizations), - // Tags special case for enabling/disabling them dynamically + filter_data_context, tags_enabled: HashSet::new(), - tagged_filters_all: NetworkFilterList::new( - tagged_filters_all, - options.enable_optimizations, - ), - // Options - enable_optimizations: options.enable_optimizations, regex_manager: Default::default(), } } + #[cfg(test)] + pub fn new( + network_filters: Vec, + options: &BlockerOptions, + ) -> Self { + use crate::engine::Engine; + use crate::FilterSet; + + let mut filter_set = FilterSet::new(true); + filter_set.network_filters = network_filters; + let engine = Engine::from_filter_set(filter_set, options.enable_optimizations); + Self::from_context(engine.filter_data_context()) + } + pub fn use_tags(&mut self, tags: &[&str]) { let tag_set: HashSet = tags.iter().map(|&t| String::from(t)).collect(); self.tags_with_set(tag_set); @@ -518,13 +487,13 @@ impl Blocker { regex_manager.set_discard_policy(new_discard_policy); } - #[cfg(feature = "regex-debug-info")] + #[cfg(feature = "debug-info")] pub fn discard_regex(&self, regex_id: u64) { let mut regex_manager = self.borrow_regex_manager(); regex_manager.discard_regex(regex_id); } - #[cfg(feature = "regex-debug-info")] + #[cfg(feature = "debug-info")] pub fn get_regex_debug_info(&self) -> crate::regex_manager::RegexDebugInfo { let regex_manager = self.borrow_regex_manager(); regex_manager.get_debug_info() diff --git a/src/cosmetic_filter_cache.rs b/src/cosmetic_filter_cache.rs index aef3bff7..546666d7 100644 --- a/src/cosmetic_filter_cache.rs +++ b/src/cosmetic_filter_cache.rs @@ -7,16 +7,24 @@ //! The primary API exposed by this module is the `CosmeticFilterCache` struct, which stores //! cosmetic filters and allows them to be queried efficiently at runtime for any which may be //! relevant to a particular page. +//! To build `CosmeticFilterCache`, use `CosmeticFilterCacheBuilder`. -use crate::filters::cosmetic::{ - CosmeticFilter, CosmeticFilterAction, CosmeticFilterMask, CosmeticFilterOperator, -}; +use crate::cosmetic_filter_utils::decode_script_with_permission; +#[cfg(test)] +use crate::filters::cosmetic::CosmeticFilter; +use crate::filters::cosmetic::{CosmeticFilterAction, CosmeticFilterOperator}; +use crate::filters::filter_data_context::FilterDataContextRef; + +use crate::flatbuffers::containers::flat_map::FlatMapView; +use crate::flatbuffers::containers::flat_multimap::FlatMultiMapView; +use crate::flatbuffers::containers::hash_map::HashMapStringView; +use crate::flatbuffers::containers::hash_set::HashSetView; use crate::resources::{PermissionMask, ResourceStorage}; + use crate::utils::Hash; use std::collections::{HashMap, HashSet}; -use memchr::memchr as find_char; use serde::{Deserialize, Serialize}; /// Contains cosmetic filter information intended to be used on a particular URL. @@ -63,106 +71,76 @@ impl UrlSpecificResources { /// will be blocked on any particular page, although when used correctly, all provided rules and /// scriptlets should be safe to apply. pub(crate) struct CosmeticFilterCache { - /// Rules that are just the CSS class of an element to be hidden on all sites, e.g. `##.ad`. - pub(crate) simple_class_rules: HashSet, - /// Rules that are just the CSS id of an element to be hidden on all sites, e.g. `###banner`. - pub(crate) simple_id_rules: HashSet, - /// Rules that are the CSS selector of an element to be hidden on all sites, starting with a - /// class, e.g. `##.ad image`. - pub(crate) complex_class_rules: HashMap>, - /// Rules that are the CSS selector of an element to be hidden on all sites, starting with an - /// id, e.g. `###banner > .text a`. - pub(crate) complex_id_rules: HashMap>, - - pub(crate) specific_rules: HostnameRuleDb, - - /// Rules that are the CSS selector of an element to be hidden on all sites that do not fit - /// into any of the class or id buckets above, e.g. `##a[href="https://malware.com"]` - pub(crate) misc_generic_selectors: HashSet, + filter_data_context: FilterDataContextRef, } -impl CosmeticFilterCache { - pub fn new() -> Self { - Self { - simple_class_rules: HashSet::new(), - simple_id_rules: HashSet::new(), - complex_class_rules: HashMap::new(), - complex_id_rules: HashMap::new(), - - specific_rules: HostnameRuleDb::default(), +/// Representations of filters with complex behavior that relies on in-page JS logic. +/// +/// These get stored in-memory as JSON and should be deserialized/acted on by a content script. +/// JSON is pragmatic here since there are relatively fewer of these type of rules, and they will +/// be handled by in-page JS anyways. +#[derive(Deserialize, Serialize, Clone)] +pub struct ProceduralOrActionFilter { + /// A selector for elements that this filter applies to. + /// This may be a plain CSS selector, or it can consist of multiple procedural operators. + pub selector: Vec, + /// An action to apply to matching elements. + /// If no action is present, the filter assumes default behavior of hiding the element with + /// a style of `display: none !important`. + #[serde(skip_serializing_if = "Option::is_none")] + pub action: Option, +} - misc_generic_selectors: HashSet::new(), +impl ProceduralOrActionFilter { + /// Returns `(selector, style)` if the filter can be expressed in pure CSS. + pub fn as_css(&self) -> Option<(String, String)> { + match (&self.selector[..], &self.action) { + ([CosmeticFilterOperator::CssSelector(selector)], None) => { + Some((selector.to_string(), "display: none !important".to_string())) + } + ( + [CosmeticFilterOperator::CssSelector(selector)], + Some(CosmeticFilterAction::Style(style)), + ) => Some((selector.to_string(), style.to_string())), + _ => None, } } - pub fn from_rules(rules: Vec) -> Self { - let mut self_ = Self { - simple_class_rules: HashSet::with_capacity(rules.len() / 2), - simple_id_rules: HashSet::with_capacity(rules.len() / 2), - complex_class_rules: HashMap::with_capacity(rules.len() / 2), - complex_id_rules: HashMap::with_capacity(rules.len() / 2), - - specific_rules: HostnameRuleDb::default(), - - misc_generic_selectors: HashSet::with_capacity(rules.len() / 30), - }; - - for rule in rules { - self_.add_filter(rule) + /// Convenience constructor for pure CSS style filters. + #[cfg(test)] + pub(crate) fn from_css(selector: String, style: String) -> Self { + Self { + selector: vec![CosmeticFilterOperator::CssSelector(selector)], + action: Some(CosmeticFilterAction::Style(style)), } - - self_ } +} - pub fn add_filter(&mut self, rule: CosmeticFilter) { - if rule.has_hostname_constraint() { - if let Some(generic_rule) = rule.hidden_generic_rule() { - self.add_generic_filter(generic_rule); - } - self.specific_rules.store_rule(rule); - } else { - self.add_generic_filter(rule); +fn hostname_domain_hashes(hostname: &str, domain: &str) -> (Vec, Vec) { + let request_entities = + crate::filters::cosmetic::get_entity_hashes_from_labels(hostname, domain); + let request_hostnames = + crate::filters::cosmetic::get_hostname_hashes_from_labels(hostname, domain); + + (request_entities, request_hostnames) +} + +impl CosmeticFilterCache { + pub fn from_context(filter_data_context: FilterDataContextRef) -> Self { + Self { + filter_data_context, } } - /// Add a filter, assuming it has already been determined to be a generic rule - fn add_generic_filter(&mut self, rule: CosmeticFilter) { - let selector = match rule.plain_css_selector() { - Some(s) => s.to_string(), - None => { - // Procedural cosmetic filters cannot be generic. - // Silently ignoring this filter. - return; - } - }; + #[cfg(test)] + pub fn from_rules(rules: Vec) -> Self { + use crate::engine::Engine; + use crate::FilterSet; - if selector.starts_with('.') { - if let Some(key) = key_from_selector(&selector) { - assert!(key.starts_with('.')); - let class = key[1..].to_string(); - if key == selector { - self.simple_class_rules.insert(class); - } else if let Some(bucket) = self.complex_class_rules.get_mut(&class) { - bucket.push(selector); - } else { - self.complex_class_rules.insert(class, vec![selector]); - } - } - } else if selector.starts_with('#') { - if let Some(key) = key_from_selector(&selector) { - assert!(key.starts_with('#')); - let id = key[1..].to_string(); - if key == selector { - self.simple_id_rules.insert(id); - } else if let Some(bucket) = self.complex_id_rules.get_mut(&id) { - bucket.push(selector); - } else { - self.complex_id_rules.insert(id, vec![selector]); - } - } - } else { - self.misc_generic_selectors.insert(selector); - } + let mut filter_set = FilterSet::new(true); + filter_set.cosmetic_filters = rules; + let engine = Engine::from_filter_set(filter_set, true); + engine.cosmetic_cache() } /// Generic class/id rules are by far the most common type of cosmetic filtering rule, and they @@ -191,34 +169,42 @@ impl CosmeticFilterCache { ) -> Vec { let mut selectors = vec![]; + let cosmetic_filters = self.filter_data_context.memory.root().cosmetic_filters(); + let simple_class_rules = HashSetView::new(cosmetic_filters.simple_class_rules()); + let simple_id_rules = HashSetView::new(cosmetic_filters.simple_id_rules()); + let complex_class_rules = HashMapStringView::new( + cosmetic_filters.complex_class_rules_index(), + cosmetic_filters.complex_class_rules_values(), + ); + let complex_id_rules = HashMapStringView::new( + cosmetic_filters.complex_id_rules_index(), + cosmetic_filters.complex_id_rules_values(), + ); + classes.into_iter().for_each(|class| { let class = class.as_ref(); - if self.simple_class_rules.contains(class) - && !exceptions.contains(&format!(".{}", class)) - { + if simple_class_rules.contains(class) && !exceptions.contains(&format!(".{}", class)) { selectors.push(format!(".{}", class)); } - if let Some(bucket) = self.complex_class_rules.get(class) { - selectors.extend( - bucket - .iter() - .filter(|sel| !exceptions.contains(*sel)) - .map(|s| s.to_owned()), - ); + if let Some(values) = complex_class_rules.get(class) { + for sel in values.data() { + if !exceptions.contains(sel) { + selectors.push(sel.to_string()); + } + } } }); ids.into_iter().for_each(|id| { let id = id.as_ref(); - if self.simple_id_rules.contains(id) && !exceptions.contains(&format!("#{}", id)) { + if simple_id_rules.contains(id) && !exceptions.contains(&format!("#{}", id)) { selectors.push(format!("#{}", id)); } - if let Some(bucket) = self.complex_id_rules.get(id) { - selectors.extend( - bucket - .iter() - .filter(|sel| !exceptions.contains(*sel)) - .map(|s| s.to_owned()), - ); + if let Some(values) = complex_id_rules.get(id) { + for sel in values.data() { + if !exceptions.contains(sel) { + selectors.push(sel.to_string()); + } + } } }); @@ -258,75 +244,82 @@ impl CosmeticFilterCache { .chain(request_hostnames.iter()) .collect(); - fn populate_set( - hash: &Hash, - source_bin: &HostnameFilterBin, - dest_set: &mut HashSet, - ) { - if let Some(s) = source_bin.get(hash) { - s.iter().for_each(|s| { - dest_set.insert(s.to_owned()); - }); - } - } + let cosmetic_filters = self.filter_data_context.memory.root().cosmetic_filters(); + let hostname_rules_view = FlatMapView::new( + cosmetic_filters.hostname_index(), + cosmetic_filters.hostname_values(), + ); + let hostname_hide_view = FlatMultiMapView::new( + cosmetic_filters.hostname_hide_index(), + cosmetic_filters.hostname_hide_values(), + ); + let hostname_inject_script_view = FlatMultiMapView::new( + cosmetic_filters.hostname_inject_script_index(), + cosmetic_filters.hostname_inject_script_values(), + ); + for hash in hashes.iter() { - populate_set( - hash, - &self.specific_rules.hide, - &mut specific_hide_selectors, - ); - populate_set( - hash, - &self.specific_rules.procedural_action, - &mut procedural_actions, - ); - // special behavior: `script_injections` doesn't have to own the strings yet, since the - // scripts need to be fetched and templated later - if let Some(s) = self.specific_rules.inject_script.get(hash) { - s.iter().for_each(|(s, mask)| { + // Handle top-level hide selectors + if let Some(hide_iterator) = hostname_hide_view.get(**hash) { + for (_, hide_selector) in hide_iterator { + if !exceptions.contains(hide_selector) { + specific_hide_selectors.insert(hide_selector.to_owned()); + } + } + } + + // Handle top-level inject scripts with encoded permissions + if let Some(script_iterator) = hostname_inject_script_view.get(**hash) { + for (_, encoded_script) in script_iterator { + let (permission, script) = decode_script_with_permission(encoded_script); script_injections - .entry(s) - .and_modify(|entry| *entry |= *mask) - .or_insert(*mask); - }); + .entry(script) + .and_modify(|entry| *entry |= permission) + .or_insert(permission); + } } - } - fn prune_set( - hash: &Hash, - source_bin: &HostnameFilterBin, - dest_set: &mut HashSet, - ) { - if let Some(s) = source_bin.get(hash) { - s.iter().for_each(|s| { - dest_set.remove(s); - }); + // Handle remaining rule types from HostnameSpecificRules + if let Some(hostname_rules) = hostname_rules_view.get(**hash) { + // Process procedural actions + if let Some(procedural_actions_rules) = hostname_rules.procedural_action() { + for action in procedural_actions_rules.iter() { + procedural_actions.insert(action.to_owned()); + } + } } } + + // Process unhide/exception filters for hash in hashes.iter() { - // special behavior: unhide rules need to go in `exceptions` as well - if let Some(s) = self.specific_rules.unhide.get(hash) { - s.iter().for_each(|s| { - specific_hide_selectors.remove(s); - exceptions.insert(s.to_owned()); - }); - } - prune_set( - hash, - &self.specific_rules.procedural_action_exception, - &mut procedural_actions, - ); - // same logic but not using prune_set since strings are unowned, (see above) - if let Some(s) = self.specific_rules.uninject_script.get(hash) { - for s in s { - if s.is_empty() { - except_all_scripts = true; - script_injections.clear(); + if let Some(hostname_rules) = hostname_rules_view.get(**hash) { + // Process unhide selectors (special behavior: they also go in exceptions) + if let Some(unhide_rules) = hostname_rules.unhide() { + for selector in unhide_rules.iter() { + specific_hide_selectors.remove(selector); + exceptions.insert(selector.to_owned()); + } + } + + // Process procedural action exceptions + if let Some(procedural_exceptions) = hostname_rules.procedural_action_exception() { + for action in procedural_exceptions.iter() { + procedural_actions.remove(action); } - if except_all_scripts { - continue; + } + + // Process script uninjects + if let Some(uninject_scripts) = hostname_rules.uninject_script() { + for script in uninject_scripts.iter() { + if script.is_empty() { + except_all_scripts = true; + script_injections.clear(); + } + if except_all_scripts { + continue; + } + script_injections.remove(script); } - script_injections.remove(s.as_str()); } } } @@ -334,11 +327,16 @@ impl CosmeticFilterCache { let hide_selectors = if generichide { specific_hide_selectors } else { - let mut hide_selectors = self - .misc_generic_selectors - .difference(&exceptions) - .cloned() - .collect::>(); + let cosmetic_filters = self.filter_data_context.memory.root().cosmetic_filters(); + let misc_generic_selectors_vector = cosmetic_filters.misc_generic_selectors(); + + // Calculate the intersection of the two sets, O(n * log m) time + let mut hide_selectors = HashSet::new(); + for selector in misc_generic_selectors_vector.iter() { + if !exceptions.contains(selector) { + hide_selectors.insert(selector.to_string()); + } + } specific_hide_selectors.into_iter().for_each(|sel| { hide_selectors.insert(sel); }); @@ -357,257 +355,6 @@ impl CosmeticFilterCache { } } -/// Each hostname-specific filter can be pointed to by several different hostnames, and each -/// hostname can correspond to several different filters. To effectively store and access those -/// filters by hostname, all the non-hostname information for filters is stored in per-hostname -/// "buckets" within a Vec, and each bucket is identified by its index. Hostname hashes are used as -/// keys to get the indices of relevant buckets, which are in turn used to retrieve all the filters -/// that apply. -#[derive(Default)] -pub(crate) struct HostnameFilterBin(pub HashMap>); - -impl HostnameFilterBin { - pub fn insert(&mut self, token: &Hash, filter: T) { - if let Some(bucket) = self.0.get_mut(token) { - bucket.push(filter); - } else { - self.0.insert(*token, vec![filter]); - } - } - - fn get(&self, token: &Hash) -> Option<&Vec> { - self.0.get(token) - } -} - -impl HostnameFilterBin { - /// Convenience method that serializes to JSON - pub fn insert_procedural_action_filter(&mut self, token: &Hash, f: &ProceduralOrActionFilter) { - self.insert(token, serde_json::to_string(f).unwrap()); - } -} - -/// Holds filter bins categorized by filter type. -#[derive(Default)] -pub(crate) struct HostnameRuleDb { - /// Simple hostname-specific hide rules, e.g. `example.com##.ad`. - /// - /// The parameter is the rule's CSS selector. - pub hide: HostnameFilterBin, - /// Simple hostname-specific hide exception rules, e.g. `example.com#@#.ad`. - /// - /// The parameter is the rule's CSS selector. - pub unhide: HostnameFilterBin, - /// Hostname-specific rules with a scriptlet to inject along with any arguments, e.g. - /// `example.com##+js(acis, Number.isNan)`. - /// - /// The parameter is the contents of the `+js(...)` syntax construct. - pub inject_script: HostnameFilterBin<(String, PermissionMask)>, - /// Hostname-specific rules to except a scriptlet to inject along with any arguments, e.g. - /// `example.com#@#+js(acis, Number.isNan)`. - /// - /// The parameter is the contents of the `+js(...)` syntax construct. - /// - /// In practice, these rules are extremely rare in filter lists. - pub uninject_script: HostnameFilterBin, - /// Procedural filters and/or filters with a [`CosmeticFilterAction`]. - /// - /// Each is a [`ProceduralOrActionFilter`] struct serialized as JSON. - pub procedural_action: HostnameFilterBin, - /// Exceptions for procedural filters and/or filters with a [`CosmeticFilterAction`]. - /// - /// Each is a [`ProceduralOrActionFilter`] struct serialized as JSON. - pub procedural_action_exception: HostnameFilterBin, -} - -/// Representations of filters with complex behavior that relies on in-page JS logic. -/// -/// These get stored in-memory as JSON and should be deserialized/acted on by a content script. -/// JSON is pragmatic here since there are relatively fewer of these type of rules, and they will -/// be handled by in-page JS anyways. -#[derive(Deserialize, Serialize, Clone)] -pub struct ProceduralOrActionFilter { - /// A selector for elements that this filter applies to. - /// This may be a plain CSS selector, or it can consist of multiple procedural operators. - pub selector: Vec, - /// An action to apply to matching elements. - /// If no action is present, the filter assumes default behavior of hiding the element with - /// a style of `display: none !important`. - #[serde(skip_serializing_if = "Option::is_none")] - pub action: Option, -} - -impl ProceduralOrActionFilter { - /// Returns `(selector, style)` if the filter can be expressed in pure CSS. - pub fn as_css(&self) -> Option<(String, String)> { - match (&self.selector[..], &self.action) { - ([CosmeticFilterOperator::CssSelector(selector)], None) => { - Some((selector.to_string(), "display: none !important".to_string())) - } - ( - [CosmeticFilterOperator::CssSelector(selector)], - Some(CosmeticFilterAction::Style(style)), - ) => Some((selector.to_string(), style.to_string())), - _ => None, - } - } - - /// Convenience constructor for pure CSS style filters. - pub(crate) fn from_css(selector: String, style: String) -> Self { - Self { - selector: vec![CosmeticFilterOperator::CssSelector(selector)], - action: Some(CosmeticFilterAction::Style(style)), - } - } -} - -impl HostnameRuleDb { - pub fn store_rule(&mut self, rule: CosmeticFilter) { - use SpecificFilterType::*; - - let unhide = rule.mask.contains(CosmeticFilterMask::UNHIDE); - let script_inject = rule.mask.contains(CosmeticFilterMask::SCRIPT_INJECT); - - let kind = match ( - script_inject, - rule.plain_css_selector().map(|s| s.to_string()), - rule.action, - ) { - (false, Some(selector), None) => Hide(selector), - (true, Some(selector), None) => InjectScript((selector, rule.permission)), - (false, selector, action) => ProceduralOrAction( - serde_json::to_string(&ProceduralOrActionFilter { - selector: selector - .map(|selector| vec![CosmeticFilterOperator::CssSelector(selector)]) - .unwrap_or(rule.selector), - action, - }) - .unwrap(), - ), - (true, _, Some(_)) => return, // script injection with action - shouldn't be possible - (true, None, _) => return, // script injection without plain CSS selector - shouldn't be possible - }; - - let kind = if unhide { kind.negated() } else { kind }; - - let tokens_to_insert = std::iter::empty() - .chain(rule.hostnames.unwrap_or_default()) - .chain(rule.entities.unwrap_or_default()); - - tokens_to_insert.for_each(|t| self.store(&t, kind.clone())); - - let tokens_to_insert_negated = std::iter::empty() - .chain(rule.not_hostnames.unwrap_or_default()) - .chain(rule.not_entities.unwrap_or_default()); - - let negated = kind.negated(); - - tokens_to_insert_negated.for_each(|t| self.store(&t, negated.clone())); - } - - fn store(&mut self, token: &Hash, kind: SpecificFilterType) { - use SpecificFilterType::*; - - match kind { - Hide(s) => self.hide.insert(token, s), - Unhide(s) => self.unhide.insert(token, s), - InjectScript(s) => self.inject_script.insert(token, s), - UninjectScript((s, _)) => self.uninject_script.insert(token, s), - ProceduralOrAction(s) => self.procedural_action.insert(token, s), - ProceduralOrActionException(s) => self.procedural_action_exception.insert(token, s), - } - } -} - -/// Exists to use common logic for binning filters correctly -#[derive(Clone)] -enum SpecificFilterType { - Hide(String), - Unhide(String), - InjectScript((String, PermissionMask)), - UninjectScript((String, PermissionMask)), - ProceduralOrAction(String), - ProceduralOrActionException(String), -} - -impl SpecificFilterType { - fn negated(self) -> Self { - match self { - Self::Hide(s) => Self::Unhide(s), - Self::Unhide(s) => Self::Hide(s), - Self::InjectScript(s) => Self::UninjectScript(s), - Self::UninjectScript(s) => Self::InjectScript(s), - Self::ProceduralOrAction(s) => Self::ProceduralOrActionException(s), - Self::ProceduralOrActionException(s) => Self::ProceduralOrAction(s), - } - } -} - -fn hostname_domain_hashes(hostname: &str, domain: &str) -> (Vec, Vec) { - let request_entities = - crate::filters::cosmetic::get_entity_hashes_from_labels(hostname, domain); - let request_hostnames = - crate::filters::cosmetic::get_hostname_hashes_from_labels(hostname, domain); - - (request_entities, request_hostnames) -} - -/// Returns the first token of a CSS selector. -/// -/// This should only be called once `selector` has been verified to start with either a "#" or "." -/// character. -fn key_from_selector(selector: &str) -> Option { - use once_cell::sync::Lazy; - use regex::Regex; - - static RE_PLAIN_SELECTOR: Lazy = Lazy::new(|| Regex::new(r"^[#.][\w\\-]+").unwrap()); - static RE_PLAIN_SELECTOR_ESCAPED: Lazy = - Lazy::new(|| Regex::new(r"^[#.](?:\\[0-9A-Fa-f]+ |\\.|\w|-)+").unwrap()); - static RE_ESCAPE_SEQUENCE: Lazy = - Lazy::new(|| Regex::new(r"\\([0-9A-Fa-f]+ |.)").unwrap()); - - // If there are no escape characters in the selector, just take the first class or id token. - let mat = RE_PLAIN_SELECTOR.find(selector); - if let Some(location) = mat { - let key = &location.as_str(); - if find_char(b'\\', key.as_bytes()).is_none() { - return Some((*key).into()); - } - } else { - return None; - } - - // Otherwise, the characters in the selector must be escaped. - let mat = RE_PLAIN_SELECTOR_ESCAPED.find(selector); - if let Some(location) = mat { - let mut key = String::with_capacity(selector.len()); - let escaped = &location.as_str(); - let mut beginning = 0; - let mat = RE_ESCAPE_SEQUENCE.captures_iter(escaped); - for capture in mat { - // Unwrap is safe because the 0th capture group is the match itself - let location = capture.get(0).unwrap(); - key += &escaped[beginning..location.start()]; - beginning = location.end(); - // Unwrap is safe because there is a capture group specified in the regex - let capture = capture.get(1).unwrap().as_str(); - if capture.chars().count() == 1 { - // Check number of unicode characters rather than byte length - key += capture; - } else { - // This u32 conversion can overflow - let codepoint = u32::from_str_radix(&capture[..capture.len() - 1], 16).ok()?; - - // Not all u32s are valid Unicode codepoints - key += &core::char::from_u32(codepoint)?.to_string(); - } - } - Some(key + &escaped[beginning..]) - } else { - None - } -} - #[cfg(test)] #[path = "../tests/unit/cosmetic_filter_cache.rs"] mod unit_tests; diff --git a/src/cosmetic_filter_cache_builder.rs b/src/cosmetic_filter_cache_builder.rs new file mode 100644 index 00000000..9e0c3e4e --- /dev/null +++ b/src/cosmetic_filter_cache_builder.rs @@ -0,0 +1,271 @@ +//! Provides API to prepare and serialize cosmetic filter rules to a flatbuffer. +//! To build the struct, use `CosmeticFilterCacheBuilder`. +//! To use the serialized rules, use `CosmeticFilterCache`. + +use crate::cosmetic_filter_cache::ProceduralOrActionFilter; +use crate::cosmetic_filter_utils::SpecificFilterType; +use crate::cosmetic_filter_utils::{encode_script_with_permission, key_from_selector}; +use crate::filters::cosmetic::{CosmeticFilter, CosmeticFilterMask, CosmeticFilterOperator}; +use crate::filters::flatbuffer_generated::fb; +use crate::flatbuffers::containers::flat_map::FlatMapBuilder; +use crate::flatbuffers::containers::flat_multimap::FlatMultiMapBuilder; +use crate::flatbuffers::containers::hash_map::HashMapBuilder; +use crate::flatbuffers::containers::hash_set::HashSetBuilder; + +use crate::flatbuffers::containers::flat_serialize::{ + serialize_vec_opt, FlatBuilder, FlatSerialize, +}; + +use crate::utils::Hash; + +use std::collections::{HashMap, HashSet}; + +use flatbuffers::WIPOffset; + +/// Accumulates hostname-specific rules for a single domain before building HostnameSpecificRules +/// Note: hide and inject_script are now handled separately at the top level +/// See HostnameSpecificRules declaration for more details. +#[derive(Default)] +struct HostnameRule { + unhide: Vec, + uninject_script: Vec, + procedural_action: Vec, + procedural_action_exception: Vec, +} + +impl<'a, B: FlatBuilder<'a>> FlatSerialize<'a, B> for HostnameRule { + type Output = WIPOffset>; + + fn serialize( + value: Self, + builder: &mut B, + ) -> flatbuffers::WIPOffset> { + let unhide = serialize_vec_opt(value.unhide, builder); + let uninject_script = serialize_vec_opt(value.uninject_script, builder); + let procedural_action = serialize_vec_opt(value.procedural_action, builder); + let procedural_action_exception = + serialize_vec_opt(value.procedural_action_exception, builder); + + fb::HostnameSpecificRules::create( + builder.raw_builder(), + &fb::HostnameSpecificRulesArgs { + unhide, + uninject_script, + procedural_action, + procedural_action_exception, + }, + ) + } +} + +#[derive(Default, Clone)] +struct StringVector(Vec); + +#[derive(Default)] +pub(crate) struct CosmeticFilterCacheBuilder { + simple_class_rules: HashSetBuilder, + simple_id_rules: HashSetBuilder, + misc_generic_selectors: HashSet, + complex_class_rules: HashMapBuilder, + complex_id_rules: HashMapBuilder, + + hostname_hide: FlatMultiMapBuilder, + hostname_inject_script: FlatMultiMapBuilder, + + specific_rules: HashMap, +} + +impl CosmeticFilterCacheBuilder { + pub fn from_rules(rules: Vec) -> Self { + let mut self_ = Self::default(); + + for rule in rules { + self_.add_filter(rule) + } + + self_ + } + + pub fn add_filter(&mut self, rule: CosmeticFilter) { + if rule.has_hostname_constraint() { + if let Some(generic_rule) = rule.hidden_generic_rule() { + self.add_generic_filter(generic_rule); + } + self.store_hostname_rule(rule); + } else { + self.add_generic_filter(rule); + } + } + + /// Add a filter, assuming it has already been determined to be a generic rule + fn add_generic_filter(&mut self, rule: CosmeticFilter) { + let selector = match rule.plain_css_selector() { + Some(s) => s.to_string(), + None => { + // Procedural cosmetic filters cannot be generic. + // Silently ignoring this filter. + return; + } + }; + + if selector.starts_with('.') { + if let Some(key) = key_from_selector(&selector) { + assert!(key.starts_with('.')); + let class = key[1..].to_string(); + if key == selector { + self.simple_class_rules.insert(class); + } else { + let selectors = self + .complex_class_rules + .get_or_insert(class, StringVector::default()); + selectors.0.push(selector); + } + } + } else if selector.starts_with('#') { + if let Some(key) = key_from_selector(&selector) { + assert!(key.starts_with('#')); + let id = key[1..].to_string(); + if key == selector { + self.simple_id_rules.insert(id); + } else { + let selectors = self + .complex_id_rules + .get_or_insert(id, StringVector::default()); + selectors.0.push(selector); + } + } + } else { + self.misc_generic_selectors.insert(selector); + } + } + + fn store_hostname_rule(&mut self, rule: CosmeticFilter) { + use SpecificFilterType::*; + + let unhide = rule.mask.contains(CosmeticFilterMask::UNHIDE); + let script_inject = rule.mask.contains(CosmeticFilterMask::SCRIPT_INJECT); + + let kind = match ( + script_inject, + rule.plain_css_selector().map(|s| s.to_string()), + rule.action, + ) { + (false, Some(selector), None) => Hide(selector), + (true, Some(selector), None) => InjectScript((selector, rule.permission)), + (false, selector, action) => ProceduralOrAction( + serde_json::to_string(&ProceduralOrActionFilter { + selector: selector + .map(|selector| vec![CosmeticFilterOperator::CssSelector(selector)]) + .unwrap_or(rule.selector), + action, + }) + .unwrap(), + ), + (true, _, Some(_)) => return, // script injection with action - shouldn't be possible + (true, None, _) => return, // script injection without plain CSS selector - shouldn't be possible + }; + + let kind = if unhide { kind.negated() } else { kind }; + + let tokens_to_insert = std::iter::empty() + .chain(rule.hostnames.unwrap_or_default()) + .chain(rule.entities.unwrap_or_default()); + + tokens_to_insert.for_each(|t| self.store_hostname_filter(&t, kind.clone())); + + let tokens_to_insert_negated = std::iter::empty() + .chain(rule.not_hostnames.unwrap_or_default()) + .chain(rule.not_entities.unwrap_or_default()); + + let negated = kind.negated(); + + tokens_to_insert_negated.for_each(|t| self.store_hostname_filter(&t, negated.clone())); + } + + fn store_hostname_filter(&mut self, token: &Hash, kind: SpecificFilterType) { + use SpecificFilterType::*; + + match kind { + // Handle hide and inject_script at top level for better deduplication + Hide(s) => { + self.hostname_hide.insert(*token, s); + } + InjectScript((s, permission)) => { + let encoded_script = encode_script_with_permission(s, permission); + self.hostname_inject_script.insert(*token, encoded_script); + } + // Handle remaining types through HostnameRule + Unhide(s) => { + let entry = self.specific_rules.entry(*token).or_default(); + entry.unhide.push(s); + } + UninjectScript((s, _)) => { + let entry = self.specific_rules.entry(*token).or_default(); + entry.uninject_script.push(s); + } + ProceduralOrAction(s) => { + let entry = self.specific_rules.entry(*token).or_default(); + entry.procedural_action.push(s); + } + ProceduralOrActionException(s) => { + let entry = self.specific_rules.entry(*token).or_default(); + entry.procedural_action_exception.push(s); + } + } + } +} + +impl<'a, B: FlatBuilder<'a>> FlatSerialize<'a, B> for StringVector { + type Output = WIPOffset>; + + fn serialize(value: Self, builder: &mut B) -> WIPOffset> { + let v = FlatSerialize::serialize(value.0, builder); + fb::StringVector::create( + builder.raw_builder(), + &fb::StringVectorArgs { data: Some(v) }, + ) + } +} + +impl<'a, B: FlatBuilder<'a>> FlatSerialize<'a, B> for CosmeticFilterCacheBuilder { + type Output = WIPOffset>; + + fn serialize(value: Self, builder: &mut B) -> WIPOffset> { + let complex_class_rules = HashMapBuilder::finish(value.complex_class_rules, builder); + let complex_id_rules = HashMapBuilder::finish(value.complex_id_rules, builder); + + // Handle top-level hostname hide and inject_script for better deduplication + let hostname_hide = FlatMultiMapBuilder::finish(value.hostname_hide, builder); + let hostname_inject_script = + FlatMultiMapBuilder::finish(value.hostname_inject_script, builder); + + // Handle remaining rule types through HostnameSpecificRules + let hostname_specific_rules = FlatMapBuilder::finish(value.specific_rules, builder); + + let simple_class_rules = Some(FlatSerialize::serialize(value.simple_class_rules, builder)); + let simple_id_rules = Some(FlatSerialize::serialize(value.simple_id_rules, builder)); + let misc_generic_selectors = Some(FlatSerialize::serialize( + value.misc_generic_selectors, + builder, + )); + + fb::CosmeticFilters::create( + builder.raw_builder(), + &fb::CosmeticFiltersArgs { + simple_class_rules, + simple_id_rules, + misc_generic_selectors, + complex_class_rules_index: Some(complex_class_rules.keys), + complex_class_rules_values: Some(complex_class_rules.values), + complex_id_rules_index: Some(complex_id_rules.keys), + complex_id_rules_values: Some(complex_id_rules.values), + hostname_hide_index: Some(hostname_hide.keys), + hostname_hide_values: Some(hostname_hide.values), + hostname_inject_script_index: Some(hostname_inject_script.keys), + hostname_inject_script_values: Some(hostname_inject_script.values), + hostname_index: Some(hostname_specific_rules.keys), + hostname_values: Some(hostname_specific_rules.values), + }, + ) + } +} diff --git a/src/cosmetic_filter_utils.rs b/src/cosmetic_filter_utils.rs new file mode 100644 index 00000000..8df17b0e --- /dev/null +++ b/src/cosmetic_filter_utils.rs @@ -0,0 +1,109 @@ +//! Some utility functions for manipulating cosmetic filter rules. +//! Used by `CosmeticFilterCacheBuilder` and `CosmeticFilterCache`. + +use crate::resources::PermissionMask; +use memchr::memchr as find_char; + +/// Returns the first token of a CSS selector. +/// +/// This should only be called once `selector` has been verified to start with either a "#" or "." +/// character. +pub(crate) fn key_from_selector(selector: &str) -> Option { + use once_cell::sync::Lazy; + use regex::Regex; + + static RE_PLAIN_SELECTOR: Lazy = Lazy::new(|| Regex::new(r"^[#.][\w\\-]+").unwrap()); + static RE_PLAIN_SELECTOR_ESCAPED: Lazy = + Lazy::new(|| Regex::new(r"^[#.](?:\\[0-9A-Fa-f]+ |\\.|\w|-)+").unwrap()); + static RE_ESCAPE_SEQUENCE: Lazy = + Lazy::new(|| Regex::new(r"\\([0-9A-Fa-f]+ |.)").unwrap()); + + // If there are no escape characters in the selector, just take the first class or id token. + let mat = RE_PLAIN_SELECTOR.find(selector); + if let Some(location) = mat { + let key = &location.as_str(); + if find_char(b'\\', key.as_bytes()).is_none() { + return Some((*key).into()); + } + } else { + return None; + } + + // Otherwise, the characters in the selector must be escaped. + let mat = RE_PLAIN_SELECTOR_ESCAPED.find(selector); + if let Some(location) = mat { + let mut key = String::with_capacity(selector.len()); + let escaped = &location.as_str(); + let mut beginning = 0; + let mat = RE_ESCAPE_SEQUENCE.captures_iter(escaped); + for capture in mat { + // Unwrap is safe because the 0th capture group is the match itself + let location = capture.get(0).unwrap(); + key += &escaped[beginning..location.start()]; + beginning = location.end(); + // Unwrap is safe because there is a capture group specified in the regex + let capture = capture.get(1).unwrap().as_str(); + if capture.chars().count() == 1 { + // Check number of unicode characters rather than byte length + key += capture; + } else { + // This u32 conversion can overflow + let codepoint = u32::from_str_radix(&capture[..capture.len() - 1], 16).ok()?; + + // Not all u32s are valid Unicode codepoints + key += &core::char::from_u32(codepoint)?.to_string(); + } + } + Some(key + &escaped[beginning..]) + } else { + None + } +} + +/// Exists to use common logic for binning filters correctly +#[derive(Clone)] +pub(crate) enum SpecificFilterType { + Hide(String), + Unhide(String), + InjectScript((String, PermissionMask)), + UninjectScript((String, PermissionMask)), + ProceduralOrAction(String), + ProceduralOrActionException(String), +} + +impl SpecificFilterType { + pub(crate) fn negated(self) -> Self { + match self { + Self::Hide(s) => Self::Unhide(s), + Self::Unhide(s) => Self::Hide(s), + Self::InjectScript(s) => Self::UninjectScript(s), + Self::UninjectScript(s) => Self::InjectScript(s), + Self::ProceduralOrAction(s) => Self::ProceduralOrActionException(s), + Self::ProceduralOrActionException(s) => Self::ProceduralOrAction(s), + } + } +} + +/// Encodes permission bits in the last byte of a script string +/// Returns the script with permission byte prepended +pub(crate) fn encode_script_with_permission( + mut script: String, + permission: PermissionMask, +) -> String { + script.push(permission.to_bits() as char); + script +} + +/// Decodes permission bits from the last byte of a script string +/// Returns (permission, script) tuple +pub(crate) fn decode_script_with_permission(encoded_script: &str) -> (PermissionMask, &str) { + if encoded_script.is_empty() { + return (PermissionMask::default(), encoded_script); + } + + let last_char = encoded_script.chars().last().unwrap(); + let permission_bits = last_char as u8; + let permission = PermissionMask::from_bits(permission_bits); + let script = &encoded_script[..encoded_script.len() - 1]; + (permission, script) +} diff --git a/src/data_format/mod.rs b/src/data_format/mod.rs index 489c8ab5..d62dca13 100644 --- a/src/data_format/mod.rs +++ b/src/data_format/mod.rs @@ -4,92 +4,67 @@ //! In order to support multiple format versions simultaneously, this module wraps around different //! serialization/deserialization implementations and can automatically dispatch to the appropriate //! one. - -mod storage; - -pub(crate) mod utils; - -use crate::blocker::Blocker; -use crate::cosmetic_filter_cache::CosmeticFilterCache; -use crate::network_filter_list::NetworkFilterListParsingError; +//! +//! The current .dat file format: +//! 1. magic (4 bytes) +//! 2. version (1 byte) +//! 3. seahash of the data (8 bytes) +//! 4. data (the rest of the file) /// Newer formats start with this magic byte sequence. /// Calculated as the leading 4 bytes of `echo -n 'brave/adblock-rust' | sha512sum`. const ADBLOCK_RUST_DAT_MAGIC: [u8; 4] = [0xd1, 0xd9, 0x3a, 0xaf]; -const ADBLOCK_RUST_DAT_VERSION: u8 = 1; -#[derive(Debug)] -pub enum SerializationError { - RmpSerdeError(rmp_serde::encode::Error), -} +/// The version of the data format. +/// If the data format version is incremented, the data is considered as incompatible. +const ADBLOCK_RUST_DAT_VERSION: u8 = 2; -impl From for SerializationError { - fn from(e: rmp_serde::encode::Error) -> Self { - Self::RmpSerdeError(e) - } -} +/// The total length of the header prefix (magic + version + seahash) +const HEADER_PREFIX_LENGTH: usize = 4 + 1 + 8; -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum DeserializationError { - RmpSerdeError(rmp_serde::decode::Error), - UnsupportedFormatVersion(u8), - NoHeaderFound, + BadHeader, + BadChecksum, + VersionMismatch(u8), FlatBufferParsingError(flatbuffers::InvalidFlatbuffer), ValidationError, } -impl From for DeserializationError { - fn from(x: std::convert::Infallible) -> Self { - match x {} - } -} +pub(crate) fn serialize_dat_file(data: &[u8]) -> Vec { + let mut serialized = Vec::with_capacity(data.len() + HEADER_PREFIX_LENGTH); + let hash = seahash::hash(data).to_le_bytes(); + serialized.extend_from_slice(&ADBLOCK_RUST_DAT_MAGIC); + serialized.push(ADBLOCK_RUST_DAT_VERSION); + serialized.extend_from_slice(&hash); + assert_eq!(serialized.len(), HEADER_PREFIX_LENGTH); -impl From for DeserializationError { - fn from(e: rmp_serde::decode::Error) -> Self { - Self::RmpSerdeError(e) - } + serialized.extend_from_slice(data); + serialized } -impl From for DeserializationError { - fn from(e: NetworkFilterListParsingError) -> Self { - match e { - NetworkFilterListParsingError::InvalidFlatbuffer(invalid_flatbuffer) => { - Self::FlatBufferParsingError(invalid_flatbuffer) - } - NetworkFilterListParsingError::UniqueDomainsOutOfBounds(_) => Self::ValidationError, - } +pub(crate) fn deserialize_dat_file(serialized: &[u8]) -> Result<&[u8], DeserializationError> { + if serialized.len() < HEADER_PREFIX_LENGTH || !serialized.starts_with(&ADBLOCK_RUST_DAT_MAGIC) { + return Err(DeserializationError::BadHeader); } -} - -pub(crate) fn serialize_engine( - blocker: &Blocker, - cfc: &CosmeticFilterCache, -) -> Result, SerializationError> { - let serialize_format = storage::SerializeFormat::from((blocker, cfc)); - serialize_format.serialize() -} -pub(crate) fn deserialize_engine( - serialized: &[u8], -) -> Result<(Blocker, CosmeticFilterCache), DeserializationError> { - let deserialize_format = storage::DeserializeFormat::deserialize(serialized)?; - deserialize_format.try_into() -} - -// Verify the header (MAGIC + VERSION) and return the data after the header. -pub fn parse_dat_header(serialized: &[u8]) -> Result<&[u8], DeserializationError> { - if !serialized.starts_with(&ADBLOCK_RUST_DAT_MAGIC) { - return Err(DeserializationError::NoHeaderFound); - } - if serialized.len() < ADBLOCK_RUST_DAT_MAGIC.len() + 1 { - return Err(DeserializationError::NoHeaderFound); - } let version = serialized[ADBLOCK_RUST_DAT_MAGIC.len()]; if version != ADBLOCK_RUST_DAT_VERSION { - return Err(DeserializationError::UnsupportedFormatVersion(version)); + return Err(DeserializationError::VersionMismatch(version)); } - - Ok(&serialized[ADBLOCK_RUST_DAT_MAGIC.len() + 1..]) + let data = &serialized[HEADER_PREFIX_LENGTH..]; + + // Check the hash to ensure the data isn't corrupted. + let expected_hash = &serialized[(ADBLOCK_RUST_DAT_MAGIC.len() + 1)..HEADER_PREFIX_LENGTH]; + if expected_hash != seahash::hash(data).to_le_bytes() { + println!( + "Expected hash: {:?}, actual hash: {:?}", + expected_hash, + seahash::hash(data).to_le_bytes() + ); + return Err(DeserializationError::BadChecksum); + } + Ok(data) } #[cfg(test)] @@ -108,4 +83,24 @@ mod tests { assert!(result.starts_with(&ADBLOCK_RUST_DAT_MAGIC)); } + + #[test] + fn serialize_deserialize_test() { + let data = b"test"; + let serialized = serialize_dat_file(data); + let deserialized = deserialize_dat_file(&serialized).unwrap(); + assert_eq!(data, deserialized); + } + + #[test] + fn corrupted_data_test() { + let data = b"test"; + let serialized = serialize_dat_file(data); + let mut corrupted_serialized = serialized.clone(); + corrupted_serialized[HEADER_PREFIX_LENGTH] = 0; + assert_eq!( + Err(DeserializationError::BadChecksum), + deserialize_dat_file(&corrupted_serialized) + ); + } } diff --git a/src/data_format/storage.rs b/src/data_format/storage.rs deleted file mode 100644 index 56ef244c..00000000 --- a/src/data_format/storage.rs +++ /dev/null @@ -1,395 +0,0 @@ -//! Contains representations of data from the adblocking engine in a -//! forwards-and-backwards-compatible format, as well as utilities for converting these to and from -//! the actual `Engine` components. -//! -//! Any new fields should be added to the _end_ of both `SerializeFormat` and `DeserializeFormat`. - -use std::collections::{HashMap, HashSet}; - -use rmp_serde as rmps; -use serde::{Deserialize, Serialize}; - -use crate::blocker::Blocker; -use crate::cosmetic_filter_cache::{CosmeticFilterCache, HostnameRuleDb, ProceduralOrActionFilter}; -use crate::network_filter_list::NetworkFilterList; -use crate::utils::Hash; - -use super::utils::{stabilize_hashmap_serialization, stabilize_hashset_serialization}; -use super::{DeserializationError, SerializationError}; - -/// Each variant describes a single rule that is specific to a particular hostname. -#[derive(Clone, Debug, Deserialize, Serialize)] -enum LegacySpecificFilterType { - Hide(String), - Unhide(String), - Style(String, String), - UnhideStyle(String, String), - ScriptInject(String), - UnhideScriptInject(String), -} - -#[derive(Deserialize, Serialize, Default)] -pub(crate) struct LegacyHostnameRuleDb { - #[serde(serialize_with = "stabilize_hashmap_serialization")] - db: HashMap>, -} - -impl From<&HostnameRuleDb> for LegacyHostnameRuleDb { - fn from(v: &HostnameRuleDb) -> Self { - let mut db = HashMap::>::new(); - for (hash, bin) in v.hide.0.iter() { - for f in bin { - db.entry(*hash) - .and_modify(|v| v.push(LegacySpecificFilterType::Hide(f.to_owned()))) - .or_insert_with(|| vec![LegacySpecificFilterType::Hide(f.to_owned())]); - } - } - for (hash, bin) in v.unhide.0.iter() { - for f in bin { - db.entry(*hash) - .and_modify(|v| v.push(LegacySpecificFilterType::Unhide(f.to_owned()))) - .or_insert_with(|| vec![LegacySpecificFilterType::Unhide(f.to_owned())]); - } - } - for (hash, bin) in v.inject_script.0.iter() { - for (f, _mask) in bin { - db.entry(*hash) - .and_modify(|v| v.push(LegacySpecificFilterType::ScriptInject(f.to_owned()))) - .or_insert_with(|| vec![LegacySpecificFilterType::ScriptInject(f.to_owned())]); - } - } - for (hash, bin) in v.uninject_script.0.iter() { - for f in bin { - db.entry(*hash) - .and_modify(|v| { - v.push(LegacySpecificFilterType::UnhideScriptInject(f.to_owned())) - }) - .or_insert_with(|| { - vec![LegacySpecificFilterType::UnhideScriptInject(f.to_owned())] - }); - } - } - for (hash, bin) in v.procedural_action.0.iter() { - for f in bin { - if let Ok(f) = serde_json::from_str::(f) { - if let Some((selector, style)) = f.as_css() { - db.entry(*hash) - .and_modify(|v| { - v.push(LegacySpecificFilterType::Style( - selector.clone(), - style.clone(), - )) - }) - .or_insert_with(|| { - vec![LegacySpecificFilterType::Style(selector, style)] - }); - } - } - } - } - for (hash, bin) in v.procedural_action_exception.0.iter() { - for f in bin { - if let Ok(f) = serde_json::from_str::(f) { - if let Some((selector, style)) = f.as_css() { - db.entry(*hash) - .and_modify(|v| { - v.push(LegacySpecificFilterType::UnhideStyle( - selector.to_owned(), - style.to_owned(), - )) - }) - .or_insert_with(|| { - vec![LegacySpecificFilterType::UnhideStyle( - selector.to_owned(), - style.to_owned(), - )] - }); - } - } - } - } - LegacyHostnameRuleDb { db } - } -} - -impl From for HostnameRuleDb { - fn from(val: LegacyHostnameRuleDb) -> Self { - use crate::cosmetic_filter_cache::HostnameFilterBin; - - let mut hide = HostnameFilterBin::default(); - let mut unhide = HostnameFilterBin::default(); - let mut procedural_action = HostnameFilterBin::default(); - let mut procedural_action_exception = HostnameFilterBin::default(); - let mut inject_script = HostnameFilterBin::default(); - let mut uninject_script = HostnameFilterBin::default(); - - for (hash, bin) in val.db.into_iter() { - for rule in bin.into_iter() { - match rule { - LegacySpecificFilterType::Hide(s) => hide.insert(&hash, s), - LegacySpecificFilterType::Unhide(s) => unhide.insert(&hash, s), - LegacySpecificFilterType::Style(s, st) => procedural_action - .insert_procedural_action_filter( - &hash, - &ProceduralOrActionFilter::from_css(s, st), - ), - LegacySpecificFilterType::UnhideStyle(s, st) => procedural_action_exception - .insert_procedural_action_filter( - &hash, - &ProceduralOrActionFilter::from_css(s, st), - ), - LegacySpecificFilterType::ScriptInject(s) => { - inject_script.insert(&hash, (s, Default::default())) - } - LegacySpecificFilterType::UnhideScriptInject(s) => { - uninject_script.insert(&hash, s) - } - } - } - } - HostnameRuleDb { - hide, - unhide, - inject_script, - uninject_script, - procedural_action, - procedural_action_exception, - } - } -} - -#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)] -pub(crate) struct LegacyRedirectResource { - pub content_type: String, - pub data: String, -} - -#[derive(Serialize, Deserialize, Debug, PartialEq, Default)] -pub(crate) struct LegacyRedirectResourceStorage { - #[serde(serialize_with = "stabilize_hashmap_serialization")] - pub resources: HashMap, -} - -#[derive(Clone, Deserialize, Serialize)] -pub(crate) struct LegacyScriptletResource { - scriptlet: String, -} - -#[derive(Default, Deserialize, Serialize)] -pub(crate) struct LegacyScriptletResourceStorage { - #[serde(serialize_with = "stabilize_hashmap_serialization")] - resources: HashMap, -} - -/// Forces a `NetworkFilterList` to be serialized by converting to an -/// intermediate representation that is constructed with `NetworkFilterFmt` instead. -fn serialize_network_filter_list(list: &NetworkFilterList, s: S) -> Result -where - S: serde::Serializer, -{ - #[derive(Serialize, Default)] - struct NetworkFilterListSerializeFmt { - flatbuffer_memory: Vec, - } - - let storage_list = NetworkFilterListSerializeFmt { - flatbuffer_memory: list.memory.data().to_vec(), - }; - - storage_list.serialize(s) -} - -/// Provides structural aggregration of referenced adblock engine data to allow for allocation-free -/// serialization. -#[derive(Serialize)] -pub(crate) struct SerializeFormat<'a> { - #[serde(serialize_with = "serialize_network_filter_list")] - csp: &'a NetworkFilterList, - #[serde(serialize_with = "serialize_network_filter_list")] - exceptions: &'a NetworkFilterList, - #[serde(serialize_with = "serialize_network_filter_list")] - importants: &'a NetworkFilterList, - #[serde(serialize_with = "serialize_network_filter_list")] - redirects: &'a NetworkFilterList, - #[serde(serialize_with = "serialize_network_filter_list")] - filters: &'a NetworkFilterList, - #[serde(serialize_with = "serialize_network_filter_list")] - generic_hide: &'a NetworkFilterList, - - #[serde(serialize_with = "serialize_network_filter_list")] - tagged_filters_all: &'a NetworkFilterList, - - enable_optimizations: bool, - - resources: LegacyRedirectResourceStorage, - - #[serde(serialize_with = "stabilize_hashset_serialization")] - simple_class_rules: &'a HashSet, - #[serde(serialize_with = "stabilize_hashset_serialization")] - simple_id_rules: &'a HashSet, - #[serde(serialize_with = "stabilize_hashmap_serialization")] - complex_class_rules: &'a HashMap>, - #[serde(serialize_with = "stabilize_hashmap_serialization")] - complex_id_rules: &'a HashMap>, - - specific_rules: LegacyHostnameRuleDb, - - #[serde(serialize_with = "stabilize_hashset_serialization")] - misc_generic_selectors: &'a HashSet, - - scriptlets: LegacyScriptletResourceStorage, - - #[serde(serialize_with = "stabilize_hashmap_serialization")] - procedural_action: &'a HashMap>, - #[serde(serialize_with = "stabilize_hashmap_serialization")] - procedural_action_exception: &'a HashMap>, - - #[serde(serialize_with = "serialize_network_filter_list")] - removeparam: &'a NetworkFilterList, -} - -impl SerializeFormat<'_> { - pub fn serialize(&self) -> Result, SerializationError> { - let mut output = super::ADBLOCK_RUST_DAT_MAGIC.to_vec(); - output.push(super::ADBLOCK_RUST_DAT_VERSION); - rmps::encode::write(&mut output, &self)?; - Ok(output) - } -} - -#[derive(Debug, Deserialize, Default)] -pub(crate) struct NetworkFilterListDeserializeFmt { - pub flatbuffer_memory: Vec, -} - -impl TryFrom for NetworkFilterList { - fn try_from(v: NetworkFilterListDeserializeFmt) -> Result { - Ok(NetworkFilterList::try_from_unverified_memory( - v.flatbuffer_memory, - )?) - } - - type Error = DeserializationError; -} - -/// Structural representation of adblock engine data that can be built up from deserialization and -/// used directly to construct new `Engine` components without unnecessary allocation. -#[derive(Deserialize)] -pub(crate) struct DeserializeFormat { - csp: NetworkFilterListDeserializeFmt, - exceptions: NetworkFilterListDeserializeFmt, - importants: NetworkFilterListDeserializeFmt, - redirects: NetworkFilterListDeserializeFmt, - filters: NetworkFilterListDeserializeFmt, - generic_hide: NetworkFilterListDeserializeFmt, - - tagged_filters_all: NetworkFilterListDeserializeFmt, - - enable_optimizations: bool, - - _resources: LegacyRedirectResourceStorage, - - simple_class_rules: HashSet, - simple_id_rules: HashSet, - complex_class_rules: HashMap>, - complex_id_rules: HashMap>, - - specific_rules: LegacyHostnameRuleDb, - - misc_generic_selectors: HashSet, - - _scriptlets: LegacyScriptletResourceStorage, - - #[serde(default)] - procedural_action: HashMap>, - #[serde(default)] - procedural_action_exception: HashMap>, - - #[serde(default)] - removeparam: NetworkFilterListDeserializeFmt, -} - -impl DeserializeFormat { - pub fn deserialize(serialized: &[u8]) -> Result { - let data = super::parse_dat_header(serialized)?; - let format: Self = rmps::decode::from_read(data)?; - Ok(format) - } -} - -impl<'a> From<(&'a Blocker, &'a CosmeticFilterCache)> for SerializeFormat<'a> { - fn from(v: (&'a Blocker, &'a CosmeticFilterCache)) -> Self { - let (blocker, cfc) = v; - Self { - csp: &blocker.csp, - exceptions: &blocker.exceptions, - importants: &blocker.importants, - redirects: &blocker.redirects, - filters: &blocker.filters, - generic_hide: &blocker.generic_hide, - - tagged_filters_all: &blocker.tagged_filters_all, - - enable_optimizations: blocker.enable_optimizations, - - resources: LegacyRedirectResourceStorage::default(), - - simple_class_rules: &cfc.simple_class_rules, - simple_id_rules: &cfc.simple_id_rules, - complex_class_rules: &cfc.complex_class_rules, - complex_id_rules: &cfc.complex_id_rules, - - specific_rules: (&cfc.specific_rules).into(), - - misc_generic_selectors: &cfc.misc_generic_selectors, - - scriptlets: LegacyScriptletResourceStorage::default(), - - procedural_action: &cfc.specific_rules.procedural_action.0, - procedural_action_exception: &cfc.specific_rules.procedural_action_exception.0, - - removeparam: &blocker.removeparam, - } - } -} - -impl TryFrom for (Blocker, CosmeticFilterCache) { - fn try_from(v: DeserializeFormat) -> Result { - use crate::cosmetic_filter_cache::HostnameFilterBin; - - let mut specific_rules: HostnameRuleDb = v.specific_rules.into(); - specific_rules.procedural_action = HostnameFilterBin(v.procedural_action); - specific_rules.procedural_action_exception = - HostnameFilterBin(v.procedural_action_exception); - - Ok(( - Blocker { - csp: v.csp.try_into()?, - exceptions: v.exceptions.try_into()?, - importants: v.importants.try_into()?, - redirects: v.redirects.try_into()?, - removeparam: v.removeparam.try_into()?, - filters: v.filters.try_into()?, - generic_hide: v.generic_hide.try_into()?, - - tags_enabled: Default::default(), - tagged_filters_all: v.tagged_filters_all.try_into()?, - - enable_optimizations: v.enable_optimizations, - regex_manager: Default::default(), - }, - CosmeticFilterCache { - simple_class_rules: v.simple_class_rules, - simple_id_rules: v.simple_id_rules, - complex_class_rules: v.complex_class_rules, - complex_id_rules: v.complex_id_rules, - - specific_rules, - - misc_generic_selectors: v.misc_generic_selectors, - }, - )) - } - - type Error = DeserializationError; -} diff --git a/src/data_format/utils.rs b/src/data_format/utils.rs deleted file mode 100644 index 3b3b3e81..00000000 --- a/src/data_format/utils.rs +++ /dev/null @@ -1,32 +0,0 @@ -//! Common utilities associated with serialization and deserialization of the `Engine` data into -//! binary formats. - -use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; - -use serde::{Serialize, Serializer}; - -/// Forces a `HashSet` to be serialized with a stable ordering by temporarily representing it as a -/// `BTreeSet`. -pub fn stabilize_hashset_serialization(set: &HashSet, s: S) -> Result -where - S: Serializer, - V: Ord + serde::Serialize, -{ - let stabilized: BTreeSet<&V> = set.iter().collect(); - stabilized.serialize(s) -} - -/// Forces a `HashMap` to be serialized with a stable ordering by temporarily representing it as a -/// `BTreeMap`. -pub fn stabilize_hashmap_serialization( - set: &HashMap, - s: S, -) -> Result -where - S: Serializer, - K: Ord + Serialize, - V: Serialize, -{ - let stabilized: BTreeMap<&K, &V> = set.iter().collect(); - stabilized.serialize(s) -} diff --git a/src/engine.rs b/src/engine.rs index 1fe39093..6037b05a 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -1,11 +1,20 @@ //! The adblock [`Engine`] is the primary interface for adblocking. -use crate::blocker::{Blocker, BlockerOptions, BlockerResult}; +use crate::blocker::{Blocker, BlockerResult}; use crate::cosmetic_filter_cache::{CosmeticFilterCache, UrlSpecificResources}; +use crate::cosmetic_filter_cache_builder::CosmeticFilterCacheBuilder; +use crate::data_format::{deserialize_dat_file, serialize_dat_file, DeserializationError}; +use crate::filters::cosmetic::CosmeticFilter; +use crate::filters::fb_builder::EngineFlatBuilder; +use crate::filters::fb_network_builder::NetworkRulesBuilder; +use crate::filters::filter_data_context::{FilterDataContext, FilterDataContextRef}; +use crate::filters::network::NetworkFilter; +use crate::flatbuffers::containers::flat_serialize::FlatSerialize; +use crate::flatbuffers::unsafe_tools::VerifiedFlatbufferMemory; use crate::lists::{FilterSet, ParseOptions}; use crate::regex_manager::RegexManagerDiscardPolicy; use crate::request::Request; -use crate::resources::{Resource, ResourceStorage}; +use crate::resources::{Resource, ResourceStorage, ResourceStorageBackend}; use std::collections::HashSet; @@ -46,32 +55,22 @@ pub struct Engine { blocker: Blocker, cosmetic_cache: CosmeticFilterCache, resources: ResourceStorage, + filter_data_context: FilterDataContextRef, +} + +#[cfg(feature = "debug-info")] +pub struct EngineDebugInfo { + pub regex_debug_info: crate::regex_manager::RegexDebugInfo, + pub flatbuffer_size: usize, } impl Default for Engine { - /// Equivalent to `Engine::new(true)`. fn default() -> Self { - Self::new(true) + Self::from_filter_set(FilterSet::new(false), false) } } impl Engine { - /// Creates a new adblocking `Engine`. `Engine`s created without rules should generally only be - /// used with deserialization. - /// - `optimize` specifies whether or not to attempt to compress the internal representation by - /// combining similar rules. - pub fn new(optimize: bool) -> Self { - let blocker_options = BlockerOptions { - enable_optimizations: optimize, - }; - - Self { - blocker: Blocker::new(vec![], &blocker_options), - cosmetic_cache: CosmeticFilterCache::new(), - resources: ResourceStorage::default(), - } - } - /// Loads rules in a single format, enabling optimizations and discarding debug information. pub fn from_rules( rules: impl IntoIterator>, @@ -101,6 +100,16 @@ impl Engine { Self::from_filter_set(filter_set, optimize) } + #[cfg(test)] + pub(crate) fn cosmetic_cache(self) -> CosmeticFilterCache { + self.cosmetic_cache + } + + #[cfg(test)] + pub(crate) fn filter_data_context(self) -> FilterDataContextRef { + self.filter_data_context + } + /// Loads rules from the given `FilterSet`. It is recommended to use a `FilterSet` when adding /// rules from multiple sources. pub fn from_filter_set(set: FilterSet, optimize: bool) -> Self { @@ -110,14 +119,17 @@ impl Engine { .. } = set; - let blocker_options = BlockerOptions { - enable_optimizations: optimize, - }; + let memory = make_flatbuffer(network_filters, cosmetic_filters, optimize); + + let filter_data_context = FilterDataContext::new(memory); Self { - blocker: Blocker::new(network_filters, &blocker_options), - cosmetic_cache: CosmeticFilterCache::from_rules(cosmetic_filters), + blocker: Blocker::from_context(FilterDataContextRef::clone(&filter_data_context)), + cosmetic_cache: CosmeticFilterCache::from_context(FilterDataContextRef::clone( + &filter_data_context, + )), resources: ResourceStorage::default(), + filter_data_context, } } @@ -181,17 +193,36 @@ impl Engine { self.blocker.tags_enabled().contains(&tag.to_owned()) } - /// Sets this engine's resources to be _only_ the ones provided in `resources`. + /// Sets this engine's [Resource]s to be _only_ the ones provided in `resources`. + /// + /// The resources will be held in-memory. If you have special caching, management, or sharing + /// requirements, consider [Engine::use_resource_storage] instead. pub fn use_resources(&mut self, resources: impl IntoIterator) { - self.resources = ResourceStorage::from_resources(resources); + let storage = crate::resources::InMemoryResourceStorage::from_resources(resources); + self.use_resource_storage(storage); } - /// Sets this engine's resources to additionally include `resource`. - pub fn add_resource( + /// Sets this engine's backend for [Resource] storage to a custom implementation of + /// [ResourceStorageBackend]. + /// + /// If you're okay with the [Engine] holding these resources in-memory, use + /// [Engine::use_resources] instead. + #[cfg(not(feature = "single-thread"))] + pub fn use_resource_storage( &mut self, - resource: Resource, - ) -> Result<(), crate::resources::AddResourceError> { - self.resources.add_resource(resource) + resources: R, + ) { + self.resources = ResourceStorage::from_backend(resources); + } + + /// Sets this engine's backend for [Resource] storage to a custom implementation of + /// [ResourceStorageBackend]. + /// + /// If you're okay with the [Engine] holding these resources in-memory, use + /// [Engine::use_resources] instead. + #[cfg(feature = "single-thread")] + pub fn use_resource_storage(&mut self, resources: R) { + self.resources = ResourceStorage::from_backend(resources); } // Cosmetic filter functionality @@ -235,19 +266,23 @@ impl Engine { self.blocker.set_regex_discard_policy(new_discard_policy); } - #[cfg(feature = "regex-debug-info")] + #[cfg(feature = "debug-info")] pub fn discard_regex(&mut self, regex_id: u64) { self.blocker.discard_regex(regex_id); } - #[cfg(feature = "regex-debug-info")] - pub fn get_regex_debug_info(&self) -> crate::regex_manager::RegexDebugInfo { - self.blocker.get_regex_debug_info() + #[cfg(feature = "debug-info")] + pub fn get_debug_info(&self) -> EngineDebugInfo { + EngineDebugInfo { + regex_debug_info: self.blocker.get_regex_debug_info(), + flatbuffer_size: self.filter_data_context.memory.data().len(), + } } /// Serializes the `Engine` into a binary format so that it can be quickly reloaded later. - pub fn serialize(&self) -> Result, crate::data_format::SerializationError> { - crate::data_format::serialize_engine(&self.blocker, &self.cosmetic_cache) + pub fn serialize(&self) -> Vec { + let data = self.filter_data_context.memory.data(); + serialize_dat_file(data) } /// Deserialize the `Engine` from the binary format generated by `Engine::serialize`. @@ -255,22 +290,28 @@ impl Engine { /// Note that the binary format has a built-in version number that may be incremented. There is /// no guarantee that later versions of the format will be deserializable across minor versions /// of adblock-rust; the format is provided only as a caching optimization. - pub fn deserialize( - &mut self, - serialized: &[u8], - ) -> Result<(), crate::data_format::DeserializationError> { + pub fn deserialize(&mut self, serialized: &[u8]) -> Result<(), DeserializationError> { let current_tags = self.blocker.tags_enabled(); - let (blocker, cosmetic_cache) = crate::data_format::deserialize_engine(serialized)?; - self.blocker = blocker; + + let data = deserialize_dat_file(serialized)?; + let memory = VerifiedFlatbufferMemory::from_raw(data) + .map_err(DeserializationError::FlatBufferParsingError)?; + + let context = FilterDataContext::new(memory); + self.filter_data_context = context; + self.blocker = + Blocker::from_context(FilterDataContextRef::clone(&self.filter_data_context)); self.blocker .use_tags(¤t_tags.iter().map(|s| &**s).collect::>()); - self.cosmetic_cache = cosmetic_cache; + self.cosmetic_cache = CosmeticFilterCache::from_context(FilterDataContextRef::clone( + &self.filter_data_context, + )); Ok(()) } } /// Static assertions for `Engine: Send + Sync` traits. -#[cfg(not(feature = "unsync-regex-caching"))] +#[cfg(not(feature = "single-thread"))] fn _assertions() { fn _assert_send() {} fn _assert_sync() {} @@ -279,6 +320,19 @@ fn _assertions() { _assert_sync::(); } +fn make_flatbuffer( + network_filters: Vec, + cosmetic_filters: Vec, + optimize: bool, +) -> VerifiedFlatbufferMemory { + let mut builder = EngineFlatBuilder::default(); + let network_rules_builder = NetworkRulesBuilder::from_rules(network_filters, optimize); + let network_rules = FlatSerialize::serialize(network_rules_builder, &mut builder); + let cosmetic_rules = CosmeticFilterCacheBuilder::from_rules(cosmetic_filters); + let cosmetic_rules = FlatSerialize::serialize(cosmetic_rules, &mut builder); + builder.finish(network_rules, cosmetic_rules) +} + #[cfg(test)] #[path = "../tests/unit/engine.rs"] mod unit_tests; diff --git a/src/filters/fb_builder.rs b/src/filters/fb_builder.rs new file mode 100644 index 00000000..6ea470dd --- /dev/null +++ b/src/filters/fb_builder.rs @@ -0,0 +1,60 @@ +//! Builder for creating flatbuffer with serialized engine. + +use std::collections::HashMap; + +use flatbuffers::WIPOffset; + +use crate::filters::fb_network_builder::NetworkFilterListBuilder; +use crate::flatbuffers::containers::flat_serialize::{FlatBuilder, WIPFlatVec}; +use crate::flatbuffers::unsafe_tools::VerifiedFlatbufferMemory; +use crate::utils::Hash; + +use super::flat::fb; + +#[derive(Default)] +pub(crate) struct EngineFlatBuilder<'a> { + fb_builder: flatbuffers::FlatBufferBuilder<'a>, + unique_domains_hashes: Vec, + unique_domains_hashes_map: HashMap, +} + +impl<'a> EngineFlatBuilder<'a> { + pub fn get_or_insert_unique_domain_hash(&mut self, h: &Hash) -> u32 { + if let Some(&index) = self.unique_domains_hashes_map.get(h) { + return index; + } + let index = self.unique_domains_hashes.len() as u32; + self.unique_domains_hashes.push(*h); + self.unique_domains_hashes_map.insert(*h, index); + index + } + + pub fn finish( + &mut self, + network_rules: WIPFlatVec<'a, NetworkFilterListBuilder, EngineFlatBuilder<'a>>, + cosmetic_rules: WIPOffset>, + ) -> VerifiedFlatbufferMemory { + let unique_domains_hashes = + Some(self.fb_builder.create_vector(&self.unique_domains_hashes)); + let engine = fb::Engine::create( + self.raw_builder(), + &fb::EngineArgs { + network_rules: Some(network_rules), + unique_domains_hashes, + cosmetic_filters: Some(cosmetic_rules), + }, + ); + self.raw_builder().finish(engine, None); + VerifiedFlatbufferMemory::from_builder(self.raw_builder()) + } +} + +impl<'a> FlatBuilder<'a> for EngineFlatBuilder<'a> { + fn create_string(&mut self, s: &str) -> WIPOffset<&'a str> { + self.fb_builder.create_string(s) + } + + fn raw_builder(&mut self) -> &mut flatbuffers::FlatBufferBuilder<'a> { + &mut self.fb_builder + } +} diff --git a/src/filters/fb_network.rs b/src/filters/fb_network.rs index 2b467e1e..4d5aebee 100644 --- a/src/filters/fb_network.rs +++ b/src/filters/fb_network.rs @@ -1,171 +1,13 @@ //! Flatbuffer-compatible versions of [NetworkFilter] and related functionality. -use std::collections::HashMap; -use std::vec; +use crate::filters::filter_data_context::FilterDataContext; +use crate::filters::network::{NetworkFilterMask, NetworkFilterMaskHelper, NetworkMatchable}; +use crate::flatbuffers::unsafe_tools::fb_vector_to_slice; -use flatbuffers::WIPOffset; - -use crate::filters::network::{ - NetworkFilter, NetworkFilterMask, NetworkFilterMaskHelper, NetworkMatchable, -}; -use crate::filters::unsafe_tools::{fb_vector_to_slice, VerifiedFlatFilterListMemory}; - -use crate::network_filter_list::NetworkFilterList; use crate::regex_manager::RegexManager; use crate::request::Request; -use crate::utils::{Hash, ShortHash}; - -#[allow(unknown_lints)] -#[allow( - dead_code, - clippy::all, - unused_imports, - unsafe_code, - mismatched_lifetime_syntaxes -)] -#[path = "../flatbuffers/fb_network_filter_generated.rs"] -pub mod flat; -use flat::fb; - -/// Builder for [NetworkFilterList]. -pub(crate) struct FlatNetworkFiltersListBuilder<'a> { - builder: flatbuffers::FlatBufferBuilder<'a>, - filters: Vec>>, - - unique_domains_hashes: Vec, - unique_domains_hashes_map: HashMap, -} - -impl FlatNetworkFiltersListBuilder<'_> { - pub fn new() -> Self { - Self { - builder: flatbuffers::FlatBufferBuilder::new(), - filters: vec![], - unique_domains_hashes: vec![], - unique_domains_hashes_map: HashMap::new(), - } - } - - fn get_or_insert_unique_domain_hash(&mut self, h: &Hash) -> u32 { - if let Some(&index) = self.unique_domains_hashes_map.get(h) { - return index; - } - let index = self.unique_domains_hashes.len() as u32; - self.unique_domains_hashes.push(*h); - self.unique_domains_hashes_map.insert(*h, index); - index - } - - pub fn add(&mut self, network_filter: &NetworkFilter) -> u32 { - let opt_domains = network_filter.opt_domains.as_ref().map(|v| { - let mut o: Vec = v - .iter() - .map(|x| self.get_or_insert_unique_domain_hash(x)) - .collect(); - o.sort_unstable(); - o.dedup(); - self.builder.create_vector(&o) - }); - - let opt_not_domains = network_filter.opt_not_domains.as_ref().map(|v| { - let mut o: Vec = v - .iter() - .map(|x| self.get_or_insert_unique_domain_hash(x)) - .collect(); - o.sort_unstable(); - o.dedup(); - self.builder.create_vector(&o) - }); - - let modifier_option = network_filter - .modifier_option - .as_ref() - .map(|s| self.builder.create_string(s)); - - let hostname = network_filter - .hostname - .as_ref() - .map(|s| self.builder.create_string(s)); - - let tag = network_filter - .tag - .as_ref() - .map(|s| self.builder.create_string(s)); - - let patterns = if network_filter.filter.iter().len() > 0 { - let offsets: Vec> = network_filter - .filter - .iter() - .map(|s| self.builder.create_string(s)) - .collect(); - Some(self.builder.create_vector(&offsets)) - } else { - None - }; - - let raw_line = network_filter - .raw_line - .as_ref() - .map(|v| self.builder.create_string(v.as_str())); - - let filter = fb::NetworkFilter::create( - &mut self.builder, - &fb::NetworkFilterArgs { - mask: network_filter.mask.bits(), - patterns, - modifier_option, - opt_domains, - opt_not_domains, - hostname, - tag, - raw_line, - }, - ); - - self.filters.push(filter); - u32::try_from(self.filters.len() - 1).expect("< u32::MAX") - } - - pub fn finish( - &mut self, - mut filter_map: HashMap>, - ) -> VerifiedFlatFilterListMemory { - let unique_domains_hashes = self.builder.create_vector(&self.unique_domains_hashes); - - let len = filter_map.len(); - - // Convert filter_map keys to a sorted vector of (hash, filter_indices). - let mut entries: Vec<_> = filter_map.drain().collect(); - entries.sort_unstable_by_key(|(k, _)| *k); - - // Convert sorted_entries to two flatbuffers vectors. - let mut flat_index: Vec = Vec::with_capacity(len); - let mut flat_values: Vec<_> = Vec::with_capacity(len); - for (key, filter_indices) in entries { - for &filter_index in &filter_indices { - flat_index.push(key); - flat_values.push(self.filters[filter_index as usize]); - } - } - - let filter_map_index = self.builder.create_vector(&flat_index); - let filter_map_values = self.builder.create_vector(&flat_values); - - let storage = fb::NetworkFilterList::create( - &mut self.builder, - &fb::NetworkFilterListArgs { - filter_map_index: Some(filter_map_index), - filter_map_values: Some(filter_map_values), - unique_domains_hashes: Some(unique_domains_hashes), - }, - ); - self.builder.finish(storage, None); - - // TODO: consider using builder.collapse() to avoid reallocating memory. - VerifiedFlatFilterListMemory::from_builder(&self.builder) - } -} +use crate::filters::flatbuffer_generated::fb; /// A list of string parts that can be matched against a URL. pub(crate) struct FlatPatterns<'a> { patterns: Option>>, @@ -222,7 +64,7 @@ impl ExactSizeIterator for FlatPatternsIterator<'_> { /// Internal implementation of [NetworkFilter] that is compatible with flatbuffers. pub(crate) struct FlatNetworkFilter<'a> { key: u64, - owner: &'a NetworkFilterList, + filter_data_context: &'a FilterDataContext, fb_filter: &'a fb::NetworkFilter<'a>, pub(crate) mask: NetworkFilterMask, @@ -233,15 +75,13 @@ impl<'a> FlatNetworkFilter<'a> { pub fn new( filter: &'a fb::NetworkFilter<'a>, index: usize, - owner: &'a NetworkFilterList, + filter_data_context: &'a FilterDataContext, ) -> Self { - let list_address: *const NetworkFilterList = owner as *const NetworkFilterList; - Self { fb_filter: filter, - key: index as u64 | (((list_address) as u64) << 32), + key: index as u64, mask: NetworkFilterMask::from_bits_retain(filter.mask()), - owner, + filter_data_context, } } @@ -308,14 +148,14 @@ impl NetworkMatchable for FlatNetworkFilter<'_> { if !check_included_domains_mapped( self.include_domains(), request, - &self.owner.unique_domains_hashes_map, + &self.filter_data_context.unique_domains_hashes_map, ) { return false; } if !check_excluded_domains_mapped( self.exclude_domains(), request, - &self.owner.unique_domains_hashes_map, + &self.filter_data_context.unique_domains_hashes_map, ) { return false; } diff --git a/src/filters/fb_network_builder.rs b/src/filters/fb_network_builder.rs new file mode 100644 index 00000000..63e0f52a --- /dev/null +++ b/src/filters/fb_network_builder.rs @@ -0,0 +1,291 @@ +//! Structures to store network filters to flatbuffer + +use std::collections::{HashMap, HashSet}; + +use flatbuffers::WIPOffset; + +use crate::filters::fb_builder::EngineFlatBuilder; +use crate::filters::network::NetworkFilter; + +use crate::filters::network::NetworkFilterMaskHelper; +use crate::flatbuffers::containers::flat_multimap::FlatMultiMapBuilder; +use crate::flatbuffers::containers::flat_serialize::{FlatBuilder, FlatSerialize, WIPFlatVec}; +use crate::network_filter_list::token_histogram; +use crate::optimizer; +use crate::utils::{to_short_hash, Hash, ShortHash}; + +use super::flat::fb; + +pub(crate) enum NetworkFilterListId { + Csp = 0, + Exceptions = 1, + Importants = 2, + Redirects = 3, + RemoveParam = 4, + Filters = 5, + GenericHide = 6, + TaggedFiltersAll = 7, + Size = 8, +} + +#[derive(Default, Clone)] +pub(crate) struct NetworkFilterListBuilder { + filters: Vec, + optimize: bool, +} + +pub(crate) struct NetworkRulesBuilder { + lists: Vec, +} + +impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for &NetworkFilter { + type Output = WIPOffset>; + + fn serialize( + network_filter: &NetworkFilter, + builder: &mut EngineFlatBuilder<'a>, + ) -> WIPOffset> { + let opt_domains = network_filter.opt_domains.as_ref().map(|v| { + let mut o: Vec = v + .iter() + .map(|x| builder.get_or_insert_unique_domain_hash(x)) + .collect(); + o.sort_unstable(); + o.dedup(); + FlatSerialize::serialize(o, builder) + }); + + let opt_not_domains = network_filter.opt_not_domains.as_ref().map(|v| { + let mut o: Vec = v + .iter() + .map(|x| builder.get_or_insert_unique_domain_hash(x)) + .collect(); + o.sort_unstable(); + o.dedup(); + FlatSerialize::serialize(o, builder) + }); + + let modifier_option = network_filter + .modifier_option + .as_ref() + .map(|s| builder.create_string(s)); + + let hostname = network_filter + .hostname + .as_ref() + .map(|s| builder.create_string(s)); + + let tag = network_filter + .tag + .as_ref() + .map(|s| builder.create_string(s)); + + let patterns = if network_filter.filter.iter().len() > 0 { + let offsets: Vec> = network_filter + .filter + .iter() + .map(|s| builder.create_string(s)) + .collect(); + Some(FlatSerialize::serialize(offsets, builder)) + } else { + None + }; + + let raw_line = network_filter + .raw_line + .as_ref() + .map(|v| builder.create_string(v.as_str())); + + let network_filter = fb::NetworkFilter::create( + builder.raw_builder(), + &fb::NetworkFilterArgs { + mask: network_filter.mask.bits(), + patterns, + modifier_option, + opt_domains, + opt_not_domains, + hostname, + tag, + raw_line, + }, + ); + + network_filter + } +} + +impl NetworkFilterListBuilder { + fn new(optimize: bool) -> Self { + Self { + filters: vec![], + optimize, + } + } +} + +impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for NetworkFilterListBuilder { + type Output = WIPOffset>; + fn serialize( + rule_list: Self, + builder: &mut EngineFlatBuilder<'a>, + ) -> WIPOffset> { + let mut filter_map = HashMap::>>>::new(); + + let mut optimizable = HashMap::>::new(); + + // Compute tokens for all filters + let filter_tokens: Vec<_> = rule_list + .filters + .into_iter() + .map(|filter| { + let tokens = filter.get_tokens(); + (filter, tokens) + }) + .collect(); + + // compute the tokens' frequency histogram + let (total_number_of_tokens, tokens_histogram) = token_histogram(&filter_tokens); + + { + for (network_filter, multi_tokens) in filter_tokens.into_iter() { + let flat_filter = if !rule_list.optimize + || !optimizer::is_filter_optimizable_by_patterns(&network_filter) + { + Some(FlatSerialize::serialize(&network_filter, builder)) + } else { + None + }; + + for tokens in multi_tokens { + let mut best_token: ShortHash = 0; + let mut min_count = total_number_of_tokens + 1; + for token in tokens { + let token = to_short_hash(token); + match tokens_histogram.get(&token) { + None => { + min_count = 0; + best_token = token + } + Some(&count) if count < min_count => { + min_count = count; + best_token = token + } + _ => {} + } + } + + if let Some(flat_filter) = flat_filter { + filter_map.entry(best_token).or_default().push(flat_filter); + } else { + optimizable + .entry(best_token) + .or_default() + .push(network_filter.clone()); + } + } // tokens + } + } + + if rule_list.optimize { + // Sort the entries to ensure deterministic iteration order + let mut optimizable_entries: Vec<_> = optimizable.drain().collect(); + optimizable_entries.sort_unstable_by_key(|(token, _)| *token); + + for (token, v) in optimizable_entries { + let optimized = optimizer::optimize(v); + + for filter in optimized { + let flat_filter = FlatSerialize::serialize(&filter, builder); + filter_map.entry(token).or_default().push(flat_filter); + } + } + } else { + debug_assert!( + optimizable.is_empty(), + "Should be empty if optimization is off" + ); + } + + let flat_filter_map_builder = FlatMultiMapBuilder::from_filter_map(filter_map); + let flat_filter_map = FlatMultiMapBuilder::finish(flat_filter_map_builder, builder); + + fb::NetworkFilterList::create( + builder.raw_builder(), + &fb::NetworkFilterListArgs { + filter_map_index: Some(flat_filter_map.keys), + filter_map_values: Some(flat_filter_map.values), + }, + ) + } +} + +impl NetworkRulesBuilder { + pub fn from_rules(network_filters: Vec, optimize: bool) -> Self { + let mut lists = vec![]; + for list_id in 0..NetworkFilterListId::Size as usize { + // Don't optimize removeparam, since it can fuse filters without respecting distinct + let optimize = optimize && list_id != NetworkFilterListId::RemoveParam as usize; + lists.push(NetworkFilterListBuilder::new(optimize)); + } + let mut self_ = Self { lists }; + + let mut badfilter_ids: HashSet = HashSet::new(); + + // Collect badfilter ids in advance. + for filter in network_filters.iter() { + if filter.is_badfilter() { + badfilter_ids.insert(filter.get_id_without_badfilter()); + } + } + + for filter in network_filters.into_iter() { + // skip any bad filters + let filter_id = filter.get_id(); + if badfilter_ids.contains(&filter_id) || filter.is_badfilter() { + continue; + } + + // Redirects are independent of blocking behavior. + if filter.is_redirect() { + self_.add_filter(filter.clone(), NetworkFilterListId::Redirects); + } + type FilterId = NetworkFilterListId; + + let list_id: FilterId = if filter.is_csp() { + FilterId::Csp + } else if filter.is_removeparam() { + FilterId::RemoveParam + } else if filter.is_generic_hide() { + FilterId::GenericHide + } else if filter.is_exception() { + FilterId::Exceptions + } else if filter.is_important() { + FilterId::Importants + } else if filter.tag.is_some() && !filter.is_redirect() { + // `tag` + `redirect` is unsupported for now. + FilterId::TaggedFiltersAll + } else if (filter.is_redirect() && filter.also_block_redirect()) + || !filter.is_redirect() + { + FilterId::Filters + } else { + continue; + }; + + self_.add_filter(filter, list_id); + } + + self_ + } + + fn add_filter(&mut self, network_filter: NetworkFilter, list_id: NetworkFilterListId) { + self.lists[list_id as usize].filters.push(network_filter); + } +} + +impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for NetworkRulesBuilder { + type Output = WIPFlatVec<'a, NetworkFilterListBuilder, EngineFlatBuilder<'a>>; + fn serialize(value: Self, builder: &mut EngineFlatBuilder<'a>) -> Self::Output { + FlatSerialize::serialize(value.lists, builder) + } +} diff --git a/src/filters/filter_data_context.rs b/src/filters/filter_data_context.rs new file mode 100644 index 00000000..985ef1eb --- /dev/null +++ b/src/filters/filter_data_context.rs @@ -0,0 +1,31 @@ +use crate::flatbuffers::unsafe_tools::VerifiedFlatbufferMemory; +use crate::utils::Hash; +use std::collections::HashMap; + +#[cfg(feature = "single-thread")] +pub(crate) type FilterDataContextRef = std::rc::Rc; +#[cfg(not(feature = "single-thread"))] +pub(crate) type FilterDataContextRef = std::sync::Arc; + +// The struct is used to store the flatbuffer and supporting data +// for both network filter and cosmetic filters. +// Supposed to be stored via FilterDataContextRef to avoid copying the data. +pub(crate) struct FilterDataContext { + pub(crate) memory: VerifiedFlatbufferMemory, + pub(crate) unique_domains_hashes_map: HashMap, +} + +impl FilterDataContext { + pub(crate) fn new(memory: VerifiedFlatbufferMemory) -> FilterDataContextRef { + // Reconstruct the unique_domains_hashes_map from the flatbuffer data + let root = memory.root(); + let mut unique_domains_hashes_map: HashMap = HashMap::new(); + for (index, hash) in root.unique_domains_hashes().iter().enumerate() { + unique_domains_hashes_map.insert(hash, index as u32); + } + FilterDataContextRef::new(Self { + memory, + unique_domains_hashes_map, + }) + } +} diff --git a/src/filters/flat_filter_map.rs b/src/filters/flat_filter_map.rs deleted file mode 100644 index 40df1958..00000000 --- a/src/filters/flat_filter_map.rs +++ /dev/null @@ -1,73 +0,0 @@ -//! Holds the implementation of [FlatFilterMap]. - -use flatbuffers::{Follow, ForwardsUOffset, Vector}; -use std::cmp::PartialOrd; - -/// A map-like container that uses flatbuffer references. -/// Provides O(log n) lookup time using binary search on the sorted index. -pub(crate) struct FlatFilterMap<'a, I: PartialOrd + Copy, V> { - index: &'a [I], - values: Vector<'a, ForwardsUOffset>, -} - -/// Iterator over NetworkFilter objects from [FlatFilterMap] -pub(crate) struct FlatFilterMapIterator<'a, I: PartialOrd + Copy, V> { - current_index: usize, - key: I, - indexes: &'a [I], - values: Vector<'a, ForwardsUOffset>, -} - -impl<'a, I, V> Iterator for FlatFilterMapIterator<'a, I, V> -where - I: PartialOrd + Copy, - V: Follow<'a>, -{ - type Item = (usize, >::Inner); - - fn next(&mut self) -> Option { - if self.current_index < self.indexes.len() { - if self.indexes[self.current_index] != self.key { - return None; - } - let index = self.current_index; - let filter = self.values.get(self.current_index); - self.current_index += 1; - Some((index, filter)) - } else { - None - } - } -} - -impl<'a, I: PartialOrd + Copy, V> FlatFilterMap<'a, I, V> { - /// Construct [FlatFilterMap] from two vectors: - /// - index: sorted array of keys - /// - values: array of values, same length as index - pub fn new(index: &'a [I], values: Vector<'a, ForwardsUOffset>) -> Self { - // Sanity check the size are equal. Note: next() will handle |values| correctly. - debug_assert!(index.len() == values.len()); - - debug_assert!(index.is_sorted()); - - Self { index, values } - } - - /// Get an iterator over NetworkFilter objects with the given hash key. - pub fn get(&self, key: I) -> FlatFilterMapIterator<'a, I, V> { - let start = self.index.partition_point(|x| *x < key); - FlatFilterMapIterator { - current_index: start, - key, - indexes: self.index, - values: self.values, - } - } -} - -impl FlatFilterMap<'_, I, V> { - #[cfg(test)] - pub fn total_size(&self) -> usize { - self.index.len() - } -} diff --git a/src/filters/mod.rs b/src/filters/mod.rs index 1e25e7ad..9d3394f8 100644 --- a/src/filters/mod.rs +++ b/src/filters/mod.rs @@ -4,7 +4,23 @@ mod abstract_network; mod network_matchers; pub mod cosmetic; +pub(crate) mod fb_builder; pub(crate) mod fb_network; -pub(crate) mod flat_filter_map; +pub(crate) mod fb_network_builder; +pub(crate) mod filter_data_context; pub mod network; -pub(crate) mod unsafe_tools; + +#[allow(unknown_lints)] +#[allow( + dead_code, + clippy::all, + unused_imports, + unsafe_code, + mismatched_lifetime_syntaxes +)] +#[path = "../flatbuffers/fb_network_filter_generated.rs"] +mod flat; + +pub(crate) mod flatbuffer_generated { + pub use super::flat::fb; +} diff --git a/src/flatbuffers/containers/fb_index.rs b/src/flatbuffers/containers/fb_index.rs new file mode 100644 index 00000000..2398a6f1 --- /dev/null +++ b/src/flatbuffers/containers/fb_index.rs @@ -0,0 +1,49 @@ +use flatbuffers::{Follow, Vector}; + +/// A trait to access indexed data in a flatbuffer. +/// It has two implementations: +/// 1. a faster &[I] for slices; +/// 2. a slower for flatbuffers::Vector, that uses Follow() internally. +/// +/// Note: it intentionally returns values using a copy, because it's faster +/// than by reference. +pub(crate) trait FbIndex { + /// Returns the number of elements. + fn len(&self) -> usize; + + /// Returns a copy of the value at the given index. + /// 'index' must be in range [0, len()), otherwise panics. + fn get(&self, index: usize) -> I; +} + +impl FbIndex for &[I] { + #[inline(always)] + fn len(&self) -> usize { + <[I]>::len(self) + } + + #[inline(always)] + fn get(&self, index: usize) -> I { + self[index] + } +} + +impl FbIndex<()> for () { + #[inline(always)] + fn len(&self) -> usize { + 0 + } + fn get(&self, _index: usize) {} +} + +impl<'a, T: Follow<'a>> FbIndex for Vector<'a, T> { + #[inline(always)] + fn len(&self) -> usize { + Vector::len(self) + } + + #[inline(always)] + fn get(&self, index: usize) -> T::Inner { + Vector::get(self, index) + } +} diff --git a/src/flatbuffers/containers/flat_map.rs b/src/flatbuffers/containers/flat_map.rs new file mode 100644 index 00000000..aaabd7db --- /dev/null +++ b/src/flatbuffers/containers/flat_map.rs @@ -0,0 +1,78 @@ +use std::marker::PhantomData; + +use crate::flatbuffers::containers; +use containers::flat_serialize::{FlatBuilder, FlatMapBuilderOutput, FlatSerialize}; +use containers::sorted_index::SortedIndex; +use flatbuffers::{Follow, Vector}; + +pub(crate) struct FlatMapView<'a, I: Ord, V, Keys> +where + Keys: SortedIndex, + V: Follow<'a>, +{ + keys: Keys, + values: Vector<'a, V>, + _phantom: PhantomData, +} + +impl<'a, I: Ord + Copy, V, Keys> FlatMapView<'a, I, V, Keys> +where + Keys: SortedIndex + Clone, + V: flatbuffers::Follow<'a>, +{ + pub fn new(keys: Keys, values: Vector<'a, V>) -> Self { + debug_assert!(keys.len() == values.len()); + Self { + keys, + values, + _phantom: PhantomData, + } + } + + #[cfg(test)] + pub fn len(&self) -> usize { + self.keys.len() + } + + pub fn get(&self, key: I) -> Option<>::Inner> { + let index = self.keys.partition_point(|x| *x < key); + if index < self.keys.len() && self.keys.get(index) == key { + Some(self.values.get(index)) + } else { + None + } + } +} + +pub(crate) struct FlatMapBuilder; + +impl FlatMapBuilder { + pub fn finish<'a, I, V, B: FlatBuilder<'a>>( + value: std::collections::HashMap, + builder: &mut B, + ) -> FlatMapBuilderOutput<'a, I, V, B> + where + I: FlatSerialize<'a, B> + Ord, + V: FlatSerialize<'a, B>, + { + let mut entries: Vec<_> = value.into_iter().collect(); + entries.sort_unstable_by(|(a, _), (b, _)| a.cmp(b)); + + let mut indexes = Vec::with_capacity(entries.len()); + let mut values = Vec::with_capacity(entries.len()); + + for (key, value) in entries.into_iter() { + indexes.push(FlatSerialize::serialize(key, builder)); + values.push(FlatSerialize::serialize(value, builder)); + } + + FlatMapBuilderOutput { + keys: builder.raw_builder().create_vector(&indexes), + values: builder.raw_builder().create_vector(&values), + } + } +} + +#[cfg(test)] +#[path = "../../../tests/unit/flatbuffers/containers/flat_map.rs"] +mod unit_tests; diff --git a/src/flatbuffers/containers/flat_multimap.rs b/src/flatbuffers/containers/flat_multimap.rs new file mode 100644 index 00000000..99b6255f --- /dev/null +++ b/src/flatbuffers/containers/flat_multimap.rs @@ -0,0 +1,134 @@ +use std::{collections::HashMap, marker::PhantomData}; + +use crate::flatbuffers::containers; +use containers::flat_serialize::{FlatBuilder, FlatMapBuilderOutput, FlatSerialize}; +use containers::sorted_index::SortedIndex; +use flatbuffers::{Follow, Vector}; + +/// A map-like container that uses flatbuffer references. +/// Provides O(log n) lookup time using binary search on the sorted index. +/// I is a key type, Keys is specific container of keys, &[I] for fast indexing (u32, u64) +/// and flatbuffers::Vector if there is no conversion from Vector (str) to slice. +pub(crate) struct FlatMultiMapView<'a, I: Ord, V, Keys> +where + Keys: SortedIndex, + V: Follow<'a>, +{ + keys: Keys, + values: Vector<'a, V>, + _phantom: PhantomData, +} + +impl<'a, I: Ord + Copy, V, Keys> FlatMultiMapView<'a, I, V, Keys> +where + Keys: SortedIndex + Clone, + V: Follow<'a>, +{ + pub fn new(keys: Keys, values: Vector<'a, V>) -> Self { + debug_assert!(keys.len() == values.len()); + + Self { + keys, + values, + _phantom: PhantomData, + } + } + + pub fn get(&self, key: I) -> Option> { + let index = self.keys.partition_point(|x| *x < key); + if index < self.keys.len() && self.keys.get(index) == key { + Some(FlatMultiMapViewIterator { + index, + key, + keys: self.keys.clone(), // Cloning is 3-4% faster than & in benchmarks + values: self.values, + }) + } else { + None + } + } + + #[cfg(test)] + pub fn total_size(&self) -> usize { + self.keys.len() + } +} + +pub(crate) struct FlatMultiMapViewIterator<'a, I: Ord + Copy, V, Keys> +where + Keys: SortedIndex, + V: Follow<'a>, +{ + index: usize, + key: I, + keys: Keys, + values: Vector<'a, V>, +} + +impl<'a, I, V, Keys> Iterator for FlatMultiMapViewIterator<'a, I, V, Keys> +where + I: Ord + Copy, + V: Follow<'a>, + Keys: SortedIndex, +{ + type Item = (usize, >::Inner); + + fn next(&mut self) -> Option { + if self.index < self.keys.len() && self.keys.get(self.index) == self.key { + self.index += 1; + Some((self.index - 1, self.values.get(self.index - 1))) + } else { + None + } + } +} + +#[derive(Default)] +pub(crate) struct FlatMultiMapBuilder { + map: HashMap>, +} + +impl FlatMultiMapBuilder { + pub fn from_filter_map(map: HashMap>) -> Self { + Self { map } + } + + #[allow(dead_code)] // Unused code is allowed during cosmetic filter migration + pub fn insert(&mut self, key: I, value: V) { + self.map.entry(key).or_default().push(value); + } + + pub fn finish<'a, B: FlatBuilder<'a>>( + value: Self, + builder: &mut B, + ) -> FlatMapBuilderOutput<'a, I, V, B> + where + I: FlatSerialize<'a, B>, + V: FlatSerialize<'a, B>, + { + let mut entries: Vec<_> = value.map.into_iter().collect(); + entries.sort_unstable_by(|(a, _), (b, _)| a.cmp(b)); + let mut indexes = Vec::with_capacity(entries.len()); + let mut values = Vec::with_capacity(entries.len()); + + for (key, mv) in entries.into_iter() { + let index = FlatSerialize::serialize(key, builder); + for value in mv.into_iter() { + indexes.push(index.clone()); + values.push(FlatSerialize::serialize(value, builder)); + } + } + + let indexes_vec = builder.raw_builder().create_vector(&indexes); + let values_vec = builder.raw_builder().create_vector(&values); + + FlatMapBuilderOutput { + keys: indexes_vec, + values: values_vec, + } + } +} + +#[cfg(test)] +#[path = "../../../tests/unit/flatbuffers/containers/flat_multimap.rs"] +mod unit_tests; diff --git a/src/flatbuffers/containers/flat_serialize.rs b/src/flatbuffers/containers/flat_serialize.rs new file mode 100644 index 00000000..a341d7a0 --- /dev/null +++ b/src/flatbuffers/containers/flat_serialize.rs @@ -0,0 +1,114 @@ +use flatbuffers::{Vector, WIPOffset}; + +// A generic builder trait that is used to serialize flatbuffers. +// flatbuffers::FlatBufferBuilder can be used as Builder. +// Although, you can extend it to create a custom builder. +pub trait FlatBuilder<'a> { + fn create_string(&mut self, s: &str) -> WIPOffset<&'a str>; + fn raw_builder(&mut self) -> &mut flatbuffers::FlatBufferBuilder<'a>; +} + +// The trait to serialize structure into flatbuffer. +// * Implement the traits directly if the structure has a direct representation +// as a flatbuffer. I.e. for HashSet, Vec, etc. +// * Prefer using implent the traits for MyStruct instead of &MyStruct to +// prevent cloning. I.e. FlatMultiMapBuilder (wrapping HashMap>). +// * Make MyStructBuilder implement FlatSerialize, if extra work is required +// during building. +// * Make MyStructBuilder with finish() INSTEAD of implementing FlatSerialize +// if the output doesn't fit the trait. I.e. FlatMapBuilder, that returns +// a pair of vectors. +// * Axillary functions (i.e. string deduplication) can be implemented +// in a custom builder. +pub trait FlatSerialize<'b, B: FlatBuilder<'b>>: Sized { + type Output: Sized + Clone + flatbuffers::Push + 'b; + fn serialize(value: Self, builder: &mut B) -> Self::Output; +} + +impl<'b> FlatBuilder<'b> for flatbuffers::FlatBufferBuilder<'b> { + fn create_string(&mut self, s: &str) -> WIPOffset<&'b str> { + if s.is_empty() { + flatbuffers::FlatBufferBuilder::create_shared_string(self, s) + } else { + flatbuffers::FlatBufferBuilder::create_string(self, s) + } + } + + fn raw_builder(&mut self) -> &mut flatbuffers::FlatBufferBuilder<'b> { + self + } +} + +impl<'b, B: FlatBuilder<'b>> FlatSerialize<'b, B> for String { + type Output = WIPOffset<&'b str>; + fn serialize(value: Self, builder: &mut B) -> Self::Output { + builder.create_string(&value) + } +} + +impl<'b, B: FlatBuilder<'b>> FlatSerialize<'b, B> for &str { + type Output = WIPOffset<&'b str>; + fn serialize(value: Self, builder: &mut B) -> Self::Output { + builder.create_string(value) + } +} + +impl<'b, B: FlatBuilder<'b>> FlatSerialize<'b, B> for u32 { + type Output = u32; + fn serialize(value: Self, _builder: &mut B) -> Self::Output { + value + } +} + +impl<'b, B: FlatBuilder<'b>> FlatSerialize<'b, B> for u64 { + type Output = u64; + fn serialize(value: Self, _builder: &mut B) -> Self::Output { + value + } +} + +impl<'b, B: FlatBuilder<'b>, T: 'b> FlatSerialize<'b, B> for WIPOffset { + type Output = WIPOffset; + fn serialize(value: Self, _builder: &mut B) -> Self::Output { + value + } +} + +pub(crate) type WIPFlatVec<'b, T, B> = + WIPOffset>::Output as flatbuffers::Push>::Output>>; + +// Serialize a vector of items if T implements FlatSerialize. +// It puts the items first, then puts the vector of offsets. +impl<'b, B: FlatBuilder<'b>, T: FlatSerialize<'b, B>> FlatSerialize<'b, B> for Vec { + type Output = WIPFlatVec<'b, T, B>; + fn serialize(value: Self, builder: &mut B) -> Self::Output { + let v = value + .into_iter() + .map(|x| FlatSerialize::serialize(x, builder)) + .collect::>(); + builder.raw_builder().create_vector(&v) + } +} + +pub(crate) struct FlatMapBuilderOutput<'b, I, V, B: FlatBuilder<'b>> +where + I: FlatSerialize<'b, B>, + V: FlatSerialize<'b, B>, +{ + pub(crate) keys: WIPFlatVec<'b, I, B>, + pub(crate) values: WIPFlatVec<'b, V, B>, +} + +// A helper function to serialize a vector of items if T implements FlatSerialize. +// It returns None if the vector is empty, otherwise it returns the vector of offsets. +#[allow(dead_code)] // Unused code is allowed during cosmetic filter migration +pub(crate) fn serialize_vec_opt<'b, B: FlatBuilder<'b>, T: FlatSerialize<'b, B>>( + value: Vec, + builder: &mut B, +) -> Option> { + if value.is_empty() { + None + } else { + Some(FlatSerialize::serialize(value, builder)) + } +} diff --git a/src/flatbuffers/containers/flat_set.rs b/src/flatbuffers/containers/flat_set.rs new file mode 100644 index 00000000..a6c4d771 --- /dev/null +++ b/src/flatbuffers/containers/flat_set.rs @@ -0,0 +1,68 @@ +#![allow(dead_code)] + +use std::marker::PhantomData; + +use crate::flatbuffers::containers; +use containers::flat_serialize::{FlatBuilder, FlatSerialize, WIPFlatVec}; +use containers::sorted_index::SortedIndex; + +/// A set-like container that uses flatbuffer references. +/// Provides O(log n) lookup time using binary search on the sorted data. +/// I is a key type, Keys is specific container of keys, &[I] for fast indexing (u32, u64) +/// and flatbuffers::Vector if there is no conversion from Vector (str) to slice. +pub(crate) struct FlatSetView +where + Keys: SortedIndex, +{ + keys: Keys, + _phantom: PhantomData, +} + +impl FlatSetView +where + I: Ord, + Keys: SortedIndex, +{ + pub fn new(keys: Keys) -> Self { + Self { + keys, + _phantom: PhantomData, + } + } + + pub fn contains(&self, key: I) -> bool { + let index = self.keys.partition_point(|x| *x < key); + index < self.keys.len() && self.keys.get(index) == key + } + + #[inline(always)] + pub fn len(&self) -> usize { + self.keys.len() + } + + #[inline(always)] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } +} + +impl<'b, B: FlatBuilder<'b>, T: FlatSerialize<'b, B> + std::hash::Hash + Ord> FlatSerialize<'b, B> + for std::collections::HashSet +{ + type Output = WIPFlatVec<'b, T, B>; + + fn serialize(value: Self, builder: &mut B) -> Self::Output { + let mut items = value.into_iter().collect::>(); + items.sort_unstable(); + let v = items + .into_iter() + .map(|x| FlatSerialize::serialize(x, builder)) + .collect::>(); + + builder.raw_builder().create_vector(&v) + } +} + +#[cfg(test)] +#[path = "../../../tests/unit/flatbuffers/containers/flat_set.rs"] +mod unit_tests; diff --git a/src/flatbuffers/containers/hash_index.rs b/src/flatbuffers/containers/hash_index.rs new file mode 100644 index 00000000..9c967e6c --- /dev/null +++ b/src/flatbuffers/containers/hash_index.rs @@ -0,0 +1,223 @@ +/// An inner implementation of a HashMap-like container with open addressing. +/// Designed to be used in HashMap, HashSet, HashMultiMap. +/// The load factor is 25%-50%. +/// Uses RustC FxHasher as a hash function. +/// A default value is used to mark empty slots, so it can't be used as a key. +/// Inspired by https://source.chromium.org/chromium/chromium/src/+/main:components/url_pattern_index/closed_hash_map.h +use std::marker::PhantomData; + +use crate::flatbuffers::containers::fb_index::FbIndex; + +/// A trait for hash table builder keys, i.e. String. +/// The default value is used to mark empty slots. +pub(crate) trait HashKey: Eq + std::hash::Hash + Default + Clone { + /// Returns true if the key is empty. + fn is_empty(&self) -> bool; +} + +impl HashKey for T { + fn is_empty(&self) -> bool { + self == &T::default() + } +} + +/// A trait for hash table view keys that can be used in flatbuffers, i.e. &str. +/// The implementation must synchronized with matching HashKey trait. +pub(crate) trait FbHashKey: Eq + std::hash::Hash { + /// Returns true if the key is empty. + fn is_empty(&self) -> bool; +} + +impl FbHashKey for &str { + fn is_empty(&self) -> bool { + str::is_empty(self) + } +} + +/// An internal function to find a slot in the hash table for the given key. +/// Returns the slot index. +/// 'table_size' is the table size. It must be a power of two. +/// 'probe' must return true at least for one slot (supposing the table isn't full). +pub fn find_slot( + key: &I, + table_size: usize, + probe: impl Fn(usize) -> bool, +) -> usize { + debug_assert!(table_size.is_power_of_two()); + let table_mask = table_size - 1; + let mut slot = get_hash(&key) & table_mask; + let mut step = 1; + loop { + if probe(slot) { + return slot; + } + slot = (slot + step) & table_mask; + step += 1; + } +} + +/// A flatbuffer-compatible view of a hash table. +/// It's used to access the hash table without copying the keys and values. +/// Is loaded from HashIndexBuilder data, serialized into a flatbuffer. +pub(crate) struct HashIndexView, Values: FbIndex> { + indexes: Keys, + values: Values, + _phantom_i: PhantomData, + _phantom_v: PhantomData, +} + +impl, Values: FbIndex> HashIndexView { + pub fn new(indexes: Keys, values: Values) -> Self { + Self { + indexes, + values, + _phantom_i: PhantomData, + _phantom_v: PhantomData, + } + } + + pub fn capacity(&self) -> usize { + self.indexes.len() + } + + pub fn get_single(&self, key: I) -> Option { + let slot = find_slot(&key, self.capacity(), |slot| -> bool { + FbHashKey::is_empty(&self.indexes.get(slot)) || self.indexes.get(slot) == key + }); + if FbHashKey::is_empty(&self.indexes.get(slot)) { + None + } else { + Some(self.values.get(slot)) + } + } + + #[cfg(test)] + /// Returns the number of non-empty slots in the hash table. + /// Slow, use only for tests. + pub fn len(&self) -> usize { + let mut len = 0; + for i in 0..self.capacity() { + if !FbHashKey::is_empty(&self.indexes.get(i)) { + len += 1; + } + } + len + } +} + +/// A builder for a hash table. +/// The default value is used to mark empty slots. +/// `consume()` output is suppose to be serialized into a flatbuffer and +/// used as a HashIndexView. +pub(crate) struct HashIndexBuilder { + indexes: Vec, + values: Vec, + size: usize, +} + +/// An internal function to hash a key. +/// The hash must be persistent across different runs of the program. +fn get_hash(key: &I) -> usize { + // RustC Hash is 2x faster than DefaultHasher. + use rustc_hash::FxHasher; + use std::hash::Hasher; + let mut hasher = FxHasher::default(); + key.hash(&mut hasher); + hasher.finish() as usize +} + +impl Default for HashIndexBuilder { + fn default() -> Self { + Self::new_with_capacity(4) + } +} + +impl HashIndexBuilder { + pub fn new_with_capacity(capacity: usize) -> Self { + Self { + size: 0, + indexes: vec![I::default(); capacity], + values: vec![V::default(); capacity], + } + } + + pub fn insert(&mut self, key: I, value: V, allow_duplicates: bool) -> (usize, &mut V) { + debug_assert!(!HashKey::is_empty(&key), "Key is empty"); + + let slot = find_slot(&key, self.capacity(), |slot| -> bool { + HashKey::is_empty(&self.indexes[slot]) + || (self.indexes[slot] == key && !allow_duplicates) + }); + + if HashKey::is_empty(&self.indexes[slot]) { + self.indexes[slot] = key; + self.values[slot] = value; + self.size += 1; + self.maybe_increase_capacity(); + (slot, &mut self.values[slot]) + } else { + self.values[slot] = value; + (slot, &mut self.values[slot]) + } + } + + fn capacity(&self) -> usize { + self.indexes.len() + } + + pub fn get_or_insert(&mut self, key: I, value: V) -> &mut V { + let slot = find_slot(&key, self.capacity(), |slot| -> bool { + HashKey::is_empty(&self.indexes[slot]) || self.indexes[slot] == key + }); + if !HashKey::is_empty(&self.indexes[slot]) { + return &mut self.values[slot]; + } + let (_, new_value) = self.insert(key, value, false); + new_value + } + + fn maybe_increase_capacity(&mut self) { + if self.size * 2 <= self.capacity() { + // Use 50% load factor. + return; + } + + let new_capacity = (self.capacity() * 2).next_power_of_two(); + let old_indexes = std::mem::take(&mut self.indexes); + let old_values = std::mem::take(&mut self.values); + self.indexes = vec![I::default(); new_capacity]; + self.values = vec![V::default(); new_capacity]; + + for (key, value) in old_indexes.into_iter().zip(old_values.into_iter()) { + if !HashKey::is_empty(&key) { + let slot = find_slot(&key, new_capacity, |slot| -> bool { + HashKey::is_empty(&self.indexes[slot]) + }); + self.indexes[slot] = key; + self.values[slot] = value; + } + } + } + + pub fn consume(value: Self) -> (Vec, Vec) { + (value.indexes, value.values) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_get_hash() { + // Verify get_hash is stable. + // If the value changes, update ADBLOCK_RUST_DAT_VERSION. + let message = "If the value changes, update ADBLOCK_RUST_DAT_VERSION."; + assert_eq!( + get_hash(&"adblock-rust"), + 15102204115509201409, + "{}", + message + ); + } +} diff --git a/src/flatbuffers/containers/hash_map.rs b/src/flatbuffers/containers/hash_map.rs new file mode 100644 index 00000000..865e5b8f --- /dev/null +++ b/src/flatbuffers/containers/hash_map.rs @@ -0,0 +1,102 @@ +/// A HashMap implementation backed by a HashIndex. +/// Uses more memory than FlatMap, but gives faster lookup. +use crate::flatbuffers::containers::{ + fb_index::FbIndex, + flat_serialize::{FlatBuilder, FlatMapBuilderOutput, FlatSerialize}, + hash_index::{FbHashKey, HashIndexBuilder, HashIndexView, HashKey}, +}; + +/// A builder for a HashMap that can be serialized into a flatbuffer. +/// A default key is used to mark empty slots, so (default_key, _) pair +/// can't be added. +#[derive(Default)] +pub(crate) struct HashMapBuilder { + builder: HashIndexBuilder, +} + +impl HashMapBuilder { + #[allow(unused)] + pub fn insert(&mut self, key: I, value: V) { + self.builder.insert(key, value, false /* allow_duplicate */); + } + + pub fn get_or_insert(&mut self, key: I, value: V) -> &mut V { + self.builder.get_or_insert(key, value) + } + + pub fn finish<'b, B: FlatBuilder<'b>>( + value: Self, + builder: &mut B, + ) -> FlatMapBuilderOutput<'b, I, V, B> + where + I: FlatSerialize<'b, B>, + V: FlatSerialize<'b, B>, + { + let (indexes, values) = HashIndexBuilder::consume(value.builder); + + let keys = indexes + .into_iter() + .map(|i| FlatSerialize::serialize(i, builder)) + .collect::>(); + let values = values + .into_iter() + .map(|v| FlatSerialize::serialize(v, builder)) + .collect::>(); + + let keys = builder.raw_builder().create_vector(&keys); + let values = builder.raw_builder().create_vector(&values); + + FlatMapBuilderOutput { keys, values } + } +} + +/// A view of a HashMap stored in a flatbuffer. +/// The default key is considered as an empty slot, `get(default_key)` always +/// returns None. +pub(crate) struct HashMapView +where + I: FbHashKey, + Keys: FbIndex, + Values: FbIndex, +{ + view: HashIndexView, +} + +impl HashMapView +where + I: FbHashKey, + Keys: FbIndex, + Values: FbIndex, +{ + pub fn new(keys: Keys, values: Values) -> Self { + assert_eq!(keys.len(), values.len()); + Self { + view: HashIndexView::new(keys, values), + } + } + + pub fn get(&self, key: I) -> Option { + self.view.get_single(key) + } + + #[cfg(test)] + pub fn capacity(&self) -> usize { + self.view.capacity() + } + + #[cfg(test)] + pub fn len(&self) -> usize { + self.view.len() + } +} + +pub type HashMapStringView<'a, V> = HashMapView< + &'a str, + V, + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>>, + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<>::Inner>>, +>; + +#[cfg(test)] +#[path = "../../../tests/unit/flatbuffers/containers/hash_map.rs"] +mod unit_tests; diff --git a/src/flatbuffers/containers/hash_set.rs b/src/flatbuffers/containers/hash_set.rs new file mode 100644 index 00000000..ffe77963 --- /dev/null +++ b/src/flatbuffers/containers/hash_set.rs @@ -0,0 +1,71 @@ +/// A HashSet implementation backed by a HashIndex. +/// Uses more memory than FlatSet, but gives faster lookup. +use crate::flatbuffers::containers::{ + fb_index::FbIndex, + flat_serialize::{FlatBuilder, FlatSerialize, WIPFlatVec}, + hash_index::{FbHashKey, HashIndexBuilder, HashIndexView, HashKey}, +}; + +/// A builder for a HashSet that can be serialized into a flatbuffer. +/// A default value is used to mark empty slots, so it can't be added. +#[derive(Default)] +pub(crate) struct HashSetBuilder { + builder: HashIndexBuilder, +} + +impl HashSetBuilder { + pub fn insert(&mut self, key: I) { + self.builder.insert(key, (), false /* allow_duplicate */); + } +} + +impl<'b, B: FlatBuilder<'b>, I: FlatSerialize<'b, B> + HashKey> FlatSerialize<'b, B> + for HashSetBuilder +{ + type Output = WIPFlatVec<'b, I, B>; + + fn serialize(value: Self, builder: &mut B) -> Self::Output + where + I: FlatSerialize<'b, B>, + { + let (indexes, _) = HashIndexBuilder::consume(value.builder); + let v = indexes + .into_iter() + .map(|x| FlatSerialize::serialize(x, builder)) + .collect::>(); + builder.raw_builder().create_vector(&v) + } +} + +/// A view of a HashSet stored in a flatbuffer. +/// The default value is considered as an empty slot, `contains(default_value)` +/// always returns false. +pub(crate) struct HashSetView> { + view: HashIndexView, +} + +impl> HashSetView { + pub fn new(keys: Keys) -> Self { + Self { + view: HashIndexView::new(keys, ()), + } + } + + pub fn contains(&self, key: I) -> bool { + self.view.get_single(key).is_some() + } + + #[cfg(test)] + pub fn len(&self) -> usize { + self.view.len() + } + + #[cfg(test)] + pub fn capacity(&self) -> usize { + self.view.capacity() + } +} + +#[cfg(test)] +#[path = "../../../tests/unit/flatbuffers/containers/hash_set.rs"] +mod unit_tests; diff --git a/src/flatbuffers/containers/mod.rs b/src/flatbuffers/containers/mod.rs new file mode 100644 index 00000000..18eb9202 --- /dev/null +++ b/src/flatbuffers/containers/mod.rs @@ -0,0 +1,9 @@ +pub(crate) mod fb_index; +pub(crate) mod flat_map; +pub(crate) mod flat_multimap; +pub(crate) mod flat_serialize; +pub(crate) mod flat_set; +pub(crate) mod hash_index; +pub(crate) mod hash_map; +pub(crate) mod hash_set; +pub(crate) mod sorted_index; diff --git a/src/flatbuffers/containers/sorted_index.rs b/src/flatbuffers/containers/sorted_index.rs new file mode 100644 index 00000000..8335eafc --- /dev/null +++ b/src/flatbuffers/containers/sorted_index.rs @@ -0,0 +1,53 @@ +use flatbuffers::{Follow, Vector}; + +use crate::flatbuffers::containers::fb_index::FbIndex; + +// Represents sorted sequence to perform the binary search. +pub(crate) trait SortedIndex: FbIndex { + fn partition_point(&self, predicate: F) -> usize + where + F: FnMut(&I) -> bool; +} + +// Implementation for slices. Prefer using this with fb_vector_to_slice +// if possible, because it faster than getting values with flatbuffer's +// get method. +impl SortedIndex for &[I] { + #[inline(always)] + fn partition_point(&self, predicate: F) -> usize + where + F: FnMut(&I) -> bool, + { + debug_assert!(self.is_sorted()); + <[I]>::partition_point(self, predicate) + } +} + +// General implementation for flatbuffers::Vector, it uses get to +// obtain values. +impl<'a, T: Follow<'a>> SortedIndex for Vector<'a, T> +where + T::Inner: Ord, +{ + fn partition_point(&self, mut predicate: F) -> usize + where + F: FnMut(&T::Inner) -> bool, + { + debug_assert!(self.iter().is_sorted()); + + let mut left = 0; + let mut right = self.len(); + + while left < right { + let mid = left + (right - left) / 2; + let value = self.get(mid); + if predicate(&value) { + left = mid + 1; + } else { + right = mid; + } + } + + left + } +} diff --git a/src/flatbuffers/fb_network_filter.fbs b/src/flatbuffers/fb_network_filter.fbs index cc5d0eb8..c4b7ed01 100644 --- a/src/flatbuffers/fb_network_filter.fbs +++ b/src/flatbuffers/fb_network_filter.fbs @@ -9,9 +9,9 @@ namespace fb; table NetworkFilter { mask: uint32; // NetworkFilterMask (network.rs) - // These arrays contain sorted (ascending) indices in the |unique_domains_hashes| - // instead of the hashes themselves. This approach saves memory, as there - // typically aren’t many unique hashes + /// These arrays contain sorted (ascending) indices in the |unique_domains_hashes| + /// instead of the hashes themselves. This approach saves memory, as there + /// typically aren’t many unique hashes opt_domains: [uint32]; opt_not_domains: [uint32]; @@ -27,7 +27,93 @@ table NetworkFilter { table NetworkFilterList { filter_map_index: [uint32] (required); filter_map_values: [NetworkFilter] (required); +} + +/// A table to store the most host-specific cosmetic rules. +/// Although, the most common kind of rule (see hostname_inject_script_* +/// and hostname_hide_*) are stored separately to save memory. +table HostnameSpecificRules { + /// Simple hide exception rules, e.g. `example.com#@#.ad`. + /// The content is the rule's CSS selector. + unhide: [string]; + + /// Rules to except a scriptlet to inject along with any arguments, e.g. + /// `example.com#@#+js(acis, Number.isNan)`. + /// The content is the contents of the `+js(...)` syntax construct. + /// In practice, these rules are extremely rare in filter lists. + uninject_script: [string]; + + /// Procedural filters and/or filters with a [`CosmeticFilterAction`]. + /// Each is a [`ProceduralOrActionFilter`] struct serialized as JSON. + procedural_action: [string]; + + /// Exceptions for procedural filters and/or filters with a [`CosmeticFilterAction`]. + /// Each is a [`ProceduralOrActionFilter`] struct serialized as JSON. + procedural_action_exception: [string]; +} + +table StringVector { + data: [string] (required); +} + +/// A table to store cosmetic filter rules (including supported structures). +table CosmeticFilters { + /// Rules that are just the CSS class of an element to be hidden on all sites, e.g. `##.ad`. + /// Stored as a flat_set. + simple_class_rules: [string] (required); + + /// Rules that are just the CSS id of an element to be hidden on all sites, e.g. `###banner`. + /// Stored as a flat_set. + simple_id_rules: [string] (required); + + /// Rules that are the CSS selector of an element to be hidden on all sites that do not fit + /// into any of the class or id buckets, e.g. `##a[href="https://malware.com"]` + /// Stored as a flat_set. + misc_generic_selectors: [string] (required); + + /// Rules that are the CSS selector of an element to be hidden on all sites, starting with a + /// class, e.g. `##.ad image`. + /// Stored as a multi-map `hostname_hash` => `selector` + complex_class_rules_index: [string] (required); + complex_class_rules_values: [StringVector] (required); + + /// Rules that are the CSS selector of an element to be hidden on all sites, starting with an + /// id, e.g. `###banner > .text a`. + /// Stored as a multi-map `hostname_hash` => `selector` + complex_id_rules_index: [string] (required); + complex_id_rules_values: [StringVector] (required); + + /// Simple hostname-specific hide rules, e.g. `example.com##.ad`. + /// Stored as a multi-map `hostname_hash` => `selector`. + /// Doesn't belong to HostnameSpecificRules for performance reasons. + hostname_hide_index: [uint64] (required); + hostname_hide_values: [string] (required); + + /// Rules with a scriptlet to inject along with any arguments, e.g. + /// `example.com##+js(acis, Number.isNan)`. + /// Stored as a multi-map `hostname_hash` => `script_plus_permission_byte` + /// The content is the contents of the `+js(...)` syntax construct plus + /// last byte stores permission to save memory. + /// Doesn't belong to HostnameSpecificRules for performance reasons. + hostname_inject_script_index: [uint64] (required); + hostname_inject_script_values: [string] (required); + + /// A map to store the other host-specific cosmetic rules. + hostname_index: [uint64] (required); + hostname_values: [HostnameSpecificRules] (required); +} + +/// A root type containing a serialized Engine. +table Engine { + /// Contains several NetworkFilterList matching to different kinds of lists. + /// The indexes are matching NetworkFilterListId. + /// The size must be NetworkFilterListId::Size. + network_rules: [NetworkFilterList] (required); + + /// Contains hashes for opt_(not)_domains. See opt_domains for details. unique_domains_hashes: [uint64] (required); + + cosmetic_filters: CosmeticFilters (required); } -root_type NetworkFilterList; +root_type Engine; diff --git a/src/flatbuffers/fb_network_filter_generated.rs b/src/flatbuffers/fb_network_filter_generated.rs index 5dd1757d..631f285b 100644 --- a/src/flatbuffers/fb_network_filter_generated.rs +++ b/src/flatbuffers/fb_network_filter_generated.rs @@ -118,6 +118,9 @@ pub mod fb { .unwrap() } } + /// These arrays contain sorted (ascending) indices in the |unique_domains_hashes| + /// instead of the hashes themselves. This approach saves memory, as there + /// typically aren’t many unique hashes #[inline] pub fn opt_domains(&self) -> Option> { // Safety: @@ -446,7 +449,6 @@ pub mod fb { impl<'a> NetworkFilterList<'a> { pub const VT_FILTER_MAP_INDEX: flatbuffers::VOffsetT = 4; pub const VT_FILTER_MAP_VALUES: flatbuffers::VOffsetT = 6; - pub const VT_UNIQUE_DOMAINS_HASHES: flatbuffers::VOffsetT = 8; #[inline] pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { @@ -463,9 +465,6 @@ pub mod fb { args: &'args NetworkFilterListArgs<'args>, ) -> flatbuffers::WIPOffset> { let mut builder = NetworkFilterListBuilder::new(_fbb); - if let Some(x) = args.unique_domains_hashes { - builder.add_unique_domains_hashes(x); - } if let Some(x) = args.filter_map_values { builder.add_filter_map_values(x); } @@ -484,14 +483,9 @@ pub mod fb { let x = self.filter_map_values(); x.iter().map(|t| t.unpack()).collect() }; - let unique_domains_hashes = { - let x = self.unique_domains_hashes(); - x.into_iter().collect() - }; NetworkFilterListT { filter_map_index, filter_map_values, - unique_domains_hashes, } } @@ -524,20 +518,6 @@ pub mod fb { .unwrap() } } - #[inline] - pub fn unique_domains_hashes(&self) -> flatbuffers::Vector<'a, u64> { - // Safety: - // Created from valid Table for this object - // which contains a valid value in this slot - unsafe { - self._tab - .get::>>( - NetworkFilterList::VT_UNIQUE_DOMAINS_HASHES, - None, - ) - .unwrap() - } - } } impl flatbuffers::Verifiable for NetworkFilterList<'_> { @@ -556,11 +536,6 @@ pub mod fb { .visit_field::>, >>("filter_map_values", Self::VT_FILTER_MAP_VALUES, true)? - .visit_field::>>( - "unique_domains_hashes", - Self::VT_UNIQUE_DOMAINS_HASHES, - true, - )? .finish(); Ok(()) } @@ -572,15 +547,13 @@ pub mod fb { flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>>, >, >, - pub unique_domains_hashes: Option>>, } impl<'a> Default for NetworkFilterListArgs<'a> { #[inline] fn default() -> Self { NetworkFilterListArgs { - filter_map_index: None, // required field - filter_map_values: None, // required field - unique_domains_hashes: None, // required field + filter_map_index: None, // required field + filter_map_values: None, // required field } } } @@ -613,16 +586,6 @@ pub mod fb { ); } #[inline] - pub fn add_unique_domains_hashes( - &mut self, - unique_domains_hashes: flatbuffers::WIPOffset>, - ) { - self.fbb_.push_slot_always::>( - NetworkFilterList::VT_UNIQUE_DOMAINS_HASHES, - unique_domains_hashes, - ); - } - #[inline] pub fn new( _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, ) -> NetworkFilterListBuilder<'a, 'b, A> { @@ -645,11 +608,6 @@ pub mod fb { NetworkFilterList::VT_FILTER_MAP_VALUES, "filter_map_values", ); - self.fbb_.required( - o, - NetworkFilterList::VT_UNIQUE_DOMAINS_HASHES, - "unique_domains_hashes", - ); flatbuffers::WIPOffset::new(o.value()) } } @@ -659,7 +617,6 @@ pub mod fb { let mut ds = f.debug_struct("NetworkFilterList"); ds.field("filter_map_index", &self.filter_map_index()); ds.field("filter_map_values", &self.filter_map_values()); - ds.field("unique_domains_hashes", &self.unique_domains_hashes()); ds.finish() } } @@ -668,14 +625,12 @@ pub mod fb { pub struct NetworkFilterListT { pub filter_map_index: Vec, pub filter_map_values: Vec, - pub unique_domains_hashes: Vec, } impl Default for NetworkFilterListT { fn default() -> Self { Self { filter_map_index: Default::default(), filter_map_values: Default::default(), - unique_domains_hashes: Default::default(), } } } @@ -693,102 +648,1762 @@ pub mod fb { let w: Vec<_> = x.iter().map(|t| t.pack(_fbb)).collect(); _fbb.create_vector(&w) }); - let unique_domains_hashes = Some({ - let x = &self.unique_domains_hashes; - _fbb.create_vector(x) - }); NetworkFilterList::create( _fbb, &NetworkFilterListArgs { filter_map_index, filter_map_values, - unique_domains_hashes, }, ) } } - #[inline] - /// Verifies that a buffer of bytes contains a `NetworkFilterList` - /// and returns it. - /// Note that verification is still experimental and may not - /// catch every error, or be maximally performant. For the - /// previous, unchecked, behavior use - /// `root_as_network_filter_list_unchecked`. - pub fn root_as_network_filter_list( - buf: &[u8], - ) -> Result { - flatbuffers::root::(buf) + pub enum HostnameSpecificRulesOffset {} + #[derive(Copy, Clone, PartialEq)] + + /// A table to store the most host-specific cosmetic rules. + /// Although, the most common kind of rule (see hostname_inject_script_* + /// and hostname_hide_*) are stored separately to save memory. + pub struct HostnameSpecificRules<'a> { + pub _tab: flatbuffers::Table<'a>, } - #[inline] - /// Verifies that a buffer of bytes contains a size prefixed - /// `NetworkFilterList` and returns it. - /// Note that verification is still experimental and may not - /// catch every error, or be maximally performant. For the - /// previous, unchecked, behavior use - /// `size_prefixed_root_as_network_filter_list_unchecked`. - pub fn size_prefixed_root_as_network_filter_list( - buf: &[u8], - ) -> Result { - flatbuffers::size_prefixed_root::(buf) + + impl<'a> flatbuffers::Follow<'a> for HostnameSpecificRules<'a> { + type Inner = HostnameSpecificRules<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { + _tab: flatbuffers::Table::new(buf, loc), + } + } } - #[inline] - /// Verifies, with the given options, that a buffer of bytes - /// contains a `NetworkFilterList` and returns it. - /// Note that verification is still experimental and may not - /// catch every error, or be maximally performant. For the - /// previous, unchecked, behavior use - /// `root_as_network_filter_list_unchecked`. - pub fn root_as_network_filter_list_with_opts<'b, 'o>( - opts: &'o flatbuffers::VerifierOptions, - buf: &'b [u8], - ) -> Result, flatbuffers::InvalidFlatbuffer> { - flatbuffers::root_with_opts::>(opts, buf) + + impl<'a> HostnameSpecificRules<'a> { + pub const VT_UNHIDE: flatbuffers::VOffsetT = 4; + pub const VT_UNINJECT_SCRIPT: flatbuffers::VOffsetT = 6; + pub const VT_PROCEDURAL_ACTION: flatbuffers::VOffsetT = 8; + pub const VT_PROCEDURAL_ACTION_EXCEPTION: flatbuffers::VOffsetT = 10; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + HostnameSpecificRules { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + args: &'args HostnameSpecificRulesArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = HostnameSpecificRulesBuilder::new(_fbb); + if let Some(x) = args.procedural_action_exception { + builder.add_procedural_action_exception(x); + } + if let Some(x) = args.procedural_action { + builder.add_procedural_action(x); + } + if let Some(x) = args.uninject_script { + builder.add_uninject_script(x); + } + if let Some(x) = args.unhide { + builder.add_unhide(x); + } + builder.finish() + } + + pub fn unpack(&self) -> HostnameSpecificRulesT { + let unhide = self + .unhide() + .map(|x| x.iter().map(|s| s.to_string()).collect()); + let uninject_script = self + .uninject_script() + .map(|x| x.iter().map(|s| s.to_string()).collect()); + let procedural_action = self + .procedural_action() + .map(|x| x.iter().map(|s| s.to_string()).collect()); + let procedural_action_exception = self + .procedural_action_exception() + .map(|x| x.iter().map(|s| s.to_string()).collect()); + HostnameSpecificRulesT { + unhide, + uninject_script, + procedural_action, + procedural_action_exception, + } + } + + /// Simple hide exception rules, e.g. `example.com#@#.ad`. + /// The content is the rule's CSS selector. + #[inline] + pub fn unhide( + &self, + ) -> Option>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>, + >>(HostnameSpecificRules::VT_UNHIDE, None) + } + } + /// Rules to except a scriptlet to inject along with any arguments, e.g. + /// `example.com#@#+js(acis, Number.isNan)`. + /// The content is the contents of the `+js(...)` syntax construct. + /// In practice, these rules are extremely rare in filter lists. + #[inline] + pub fn uninject_script( + &self, + ) -> Option>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>, + >>(HostnameSpecificRules::VT_UNINJECT_SCRIPT, None) + } + } + /// Procedural filters and/or filters with a [`CosmeticFilterAction`]. + /// Each is a [`ProceduralOrActionFilter`] struct serialized as JSON. + #[inline] + pub fn procedural_action( + &self, + ) -> Option>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>, + >>(HostnameSpecificRules::VT_PROCEDURAL_ACTION, None) + } + } + /// Exceptions for procedural filters and/or filters with a [`CosmeticFilterAction`]. + /// Each is a [`ProceduralOrActionFilter`] struct serialized as JSON. + #[inline] + pub fn procedural_action_exception( + &self, + ) -> Option>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>, + >>( + HostnameSpecificRules::VT_PROCEDURAL_ACTION_EXCEPTION, None + ) + } + } } - #[inline] - /// Verifies, with the given verifier options, that a buffer of - /// bytes contains a size prefixed `NetworkFilterList` and returns - /// it. Note that verification is still experimental and may not - /// catch every error, or be maximally performant. For the - /// previous, unchecked, behavior use - /// `root_as_network_filter_list_unchecked`. - pub fn size_prefixed_root_as_network_filter_list_with_opts<'b, 'o>( - opts: &'o flatbuffers::VerifierOptions, - buf: &'b [u8], - ) -> Result, flatbuffers::InvalidFlatbuffer> { - flatbuffers::size_prefixed_root_with_opts::>(opts, buf) + + impl flatbuffers::Verifiable for HostnameSpecificRules<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::>, + >>("unhide", Self::VT_UNHIDE, false)? + .visit_field::>, + >>("uninject_script", Self::VT_UNINJECT_SCRIPT, false)? + .visit_field::>, + >>("procedural_action", Self::VT_PROCEDURAL_ACTION, false)? + .visit_field::>, + >>( + "procedural_action_exception", + Self::VT_PROCEDURAL_ACTION_EXCEPTION, + false, + )? + .finish(); + Ok(()) + } } - #[inline] - /// Assumes, without verification, that a buffer of bytes contains a NetworkFilterList and returns it. - /// # Safety - /// Callers must trust the given bytes do indeed contain a valid `NetworkFilterList`. - pub unsafe fn root_as_network_filter_list_unchecked(buf: &[u8]) -> NetworkFilterList { - flatbuffers::root_unchecked::(buf) + pub struct HostnameSpecificRulesArgs<'a> { + pub unhide: Option< + flatbuffers::WIPOffset>>, + >, + pub uninject_script: Option< + flatbuffers::WIPOffset>>, + >, + pub procedural_action: Option< + flatbuffers::WIPOffset>>, + >, + pub procedural_action_exception: Option< + flatbuffers::WIPOffset>>, + >, } - #[inline] - /// Assumes, without verification, that a buffer of bytes contains a size prefixed NetworkFilterList and returns it. - /// # Safety - /// Callers must trust the given bytes do indeed contain a valid size prefixed `NetworkFilterList`. - pub unsafe fn size_prefixed_root_as_network_filter_list_unchecked( - buf: &[u8], - ) -> NetworkFilterList { - flatbuffers::size_prefixed_root_unchecked::(buf) + impl<'a> Default for HostnameSpecificRulesArgs<'a> { + #[inline] + fn default() -> Self { + HostnameSpecificRulesArgs { + unhide: None, + uninject_script: None, + procedural_action: None, + procedural_action_exception: None, + } + } + } + + pub struct HostnameSpecificRulesBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> HostnameSpecificRulesBuilder<'a, 'b, A> { + #[inline] + pub fn add_unhide( + &mut self, + unhide: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + HostnameSpecificRules::VT_UNHIDE, + unhide, + ); + } + #[inline] + pub fn add_uninject_script( + &mut self, + uninject_script: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + HostnameSpecificRules::VT_UNINJECT_SCRIPT, + uninject_script, + ); + } + #[inline] + pub fn add_procedural_action( + &mut self, + procedural_action: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + HostnameSpecificRules::VT_PROCEDURAL_ACTION, + procedural_action, + ); + } + #[inline] + pub fn add_procedural_action_exception( + &mut self, + procedural_action_exception: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + HostnameSpecificRules::VT_PROCEDURAL_ACTION_EXCEPTION, + procedural_action_exception, + ); + } + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> HostnameSpecificRulesBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + HostnameSpecificRulesBuilder { + fbb_: _fbb, + start_: start, + } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for HostnameSpecificRules<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("HostnameSpecificRules"); + ds.field("unhide", &self.unhide()); + ds.field("uninject_script", &self.uninject_script()); + ds.field("procedural_action", &self.procedural_action()); + ds.field( + "procedural_action_exception", + &self.procedural_action_exception(), + ); + ds.finish() + } + } + #[non_exhaustive] + #[derive(Debug, Clone, PartialEq)] + pub struct HostnameSpecificRulesT { + pub unhide: Option>, + pub uninject_script: Option>, + pub procedural_action: Option>, + pub procedural_action_exception: Option>, + } + impl Default for HostnameSpecificRulesT { + fn default() -> Self { + Self { + unhide: None, + uninject_script: None, + procedural_action: None, + procedural_action_exception: None, + } + } + } + impl HostnameSpecificRulesT { + pub fn pack<'b, A: flatbuffers::Allocator + 'b>( + &self, + _fbb: &mut flatbuffers::FlatBufferBuilder<'b, A>, + ) -> flatbuffers::WIPOffset> { + let unhide = self.unhide.as_ref().map(|x| { + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let uninject_script = self.uninject_script.as_ref().map(|x| { + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let procedural_action = self.procedural_action.as_ref().map(|x| { + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let procedural_action_exception = self.procedural_action_exception.as_ref().map(|x| { + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + HostnameSpecificRules::create( + _fbb, + &HostnameSpecificRulesArgs { + unhide, + uninject_script, + procedural_action, + procedural_action_exception, + }, + ) + } + } + pub enum StringVectorOffset {} + #[derive(Copy, Clone, PartialEq)] + + pub struct StringVector<'a> { + pub _tab: flatbuffers::Table<'a>, + } + + impl<'a> flatbuffers::Follow<'a> for StringVector<'a> { + type Inner = StringVector<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { + _tab: flatbuffers::Table::new(buf, loc), + } + } + } + + impl<'a> StringVector<'a> { + pub const VT_DATA: flatbuffers::VOffsetT = 4; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + StringVector { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + args: &'args StringVectorArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = StringVectorBuilder::new(_fbb); + if let Some(x) = args.data { + builder.add_data(x); + } + builder.finish() + } + + pub fn unpack(&self) -> StringVectorT { + let data = { + let x = self.data(); + x.iter().map(|s| s.to_string()).collect() + }; + StringVectorT { data } + } + + #[inline] + pub fn data(&self) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(StringVector::VT_DATA, None) + .unwrap() + } + } + } + + impl flatbuffers::Verifiable for StringVector<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::>, + >>("data", Self::VT_DATA, true)? + .finish(); + Ok(()) + } + } + pub struct StringVectorArgs<'a> { + pub data: Option< + flatbuffers::WIPOffset>>, + >, + } + impl<'a> Default for StringVectorArgs<'a> { + #[inline] + fn default() -> Self { + StringVectorArgs { + data: None, // required field + } + } + } + + pub struct StringVectorBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> StringVectorBuilder<'a, 'b, A> { + #[inline] + pub fn add_data( + &mut self, + data: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_ + .push_slot_always::>(StringVector::VT_DATA, data); + } + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> StringVectorBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + StringVectorBuilder { + fbb_: _fbb, + start_: start, + } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + self.fbb_.required(o, StringVector::VT_DATA, "data"); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for StringVector<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("StringVector"); + ds.field("data", &self.data()); + ds.finish() + } + } + #[non_exhaustive] + #[derive(Debug, Clone, PartialEq)] + pub struct StringVectorT { + pub data: Vec, + } + impl Default for StringVectorT { + fn default() -> Self { + Self { + data: Default::default(), + } + } + } + impl StringVectorT { + pub fn pack<'b, A: flatbuffers::Allocator + 'b>( + &self, + _fbb: &mut flatbuffers::FlatBufferBuilder<'b, A>, + ) -> flatbuffers::WIPOffset> { + let data = Some({ + let x = &self.data; + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + StringVector::create(_fbb, &StringVectorArgs { data }) + } + } + pub enum CosmeticFiltersOffset {} + #[derive(Copy, Clone, PartialEq)] + + /// A table to store cosmetic filter rules (including supported structures). + pub struct CosmeticFilters<'a> { + pub _tab: flatbuffers::Table<'a>, + } + + impl<'a> flatbuffers::Follow<'a> for CosmeticFilters<'a> { + type Inner = CosmeticFilters<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { + _tab: flatbuffers::Table::new(buf, loc), + } + } + } + + impl<'a> CosmeticFilters<'a> { + pub const VT_SIMPLE_CLASS_RULES: flatbuffers::VOffsetT = 4; + pub const VT_SIMPLE_ID_RULES: flatbuffers::VOffsetT = 6; + pub const VT_MISC_GENERIC_SELECTORS: flatbuffers::VOffsetT = 8; + pub const VT_COMPLEX_CLASS_RULES_INDEX: flatbuffers::VOffsetT = 10; + pub const VT_COMPLEX_CLASS_RULES_VALUES: flatbuffers::VOffsetT = 12; + pub const VT_COMPLEX_ID_RULES_INDEX: flatbuffers::VOffsetT = 14; + pub const VT_COMPLEX_ID_RULES_VALUES: flatbuffers::VOffsetT = 16; + pub const VT_HOSTNAME_HIDE_INDEX: flatbuffers::VOffsetT = 18; + pub const VT_HOSTNAME_HIDE_VALUES: flatbuffers::VOffsetT = 20; + pub const VT_HOSTNAME_INJECT_SCRIPT_INDEX: flatbuffers::VOffsetT = 22; + pub const VT_HOSTNAME_INJECT_SCRIPT_VALUES: flatbuffers::VOffsetT = 24; + pub const VT_HOSTNAME_INDEX: flatbuffers::VOffsetT = 26; + pub const VT_HOSTNAME_VALUES: flatbuffers::VOffsetT = 28; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + CosmeticFilters { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + args: &'args CosmeticFiltersArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = CosmeticFiltersBuilder::new(_fbb); + if let Some(x) = args.hostname_values { + builder.add_hostname_values(x); + } + if let Some(x) = args.hostname_index { + builder.add_hostname_index(x); + } + if let Some(x) = args.hostname_inject_script_values { + builder.add_hostname_inject_script_values(x); + } + if let Some(x) = args.hostname_inject_script_index { + builder.add_hostname_inject_script_index(x); + } + if let Some(x) = args.hostname_hide_values { + builder.add_hostname_hide_values(x); + } + if let Some(x) = args.hostname_hide_index { + builder.add_hostname_hide_index(x); + } + if let Some(x) = args.complex_id_rules_values { + builder.add_complex_id_rules_values(x); + } + if let Some(x) = args.complex_id_rules_index { + builder.add_complex_id_rules_index(x); + } + if let Some(x) = args.complex_class_rules_values { + builder.add_complex_class_rules_values(x); + } + if let Some(x) = args.complex_class_rules_index { + builder.add_complex_class_rules_index(x); + } + if let Some(x) = args.misc_generic_selectors { + builder.add_misc_generic_selectors(x); + } + if let Some(x) = args.simple_id_rules { + builder.add_simple_id_rules(x); + } + if let Some(x) = args.simple_class_rules { + builder.add_simple_class_rules(x); + } + builder.finish() + } + + pub fn unpack(&self) -> CosmeticFiltersT { + let simple_class_rules = { + let x = self.simple_class_rules(); + x.iter().map(|s| s.to_string()).collect() + }; + let simple_id_rules = { + let x = self.simple_id_rules(); + x.iter().map(|s| s.to_string()).collect() + }; + let misc_generic_selectors = { + let x = self.misc_generic_selectors(); + x.iter().map(|s| s.to_string()).collect() + }; + let complex_class_rules_index = { + let x = self.complex_class_rules_index(); + x.iter().map(|s| s.to_string()).collect() + }; + let complex_class_rules_values = { + let x = self.complex_class_rules_values(); + x.iter().map(|t| t.unpack()).collect() + }; + let complex_id_rules_index = { + let x = self.complex_id_rules_index(); + x.iter().map(|s| s.to_string()).collect() + }; + let complex_id_rules_values = { + let x = self.complex_id_rules_values(); + x.iter().map(|t| t.unpack()).collect() + }; + let hostname_hide_index = { + let x = self.hostname_hide_index(); + x.into_iter().collect() + }; + let hostname_hide_values = { + let x = self.hostname_hide_values(); + x.iter().map(|s| s.to_string()).collect() + }; + let hostname_inject_script_index = { + let x = self.hostname_inject_script_index(); + x.into_iter().collect() + }; + let hostname_inject_script_values = { + let x = self.hostname_inject_script_values(); + x.iter().map(|s| s.to_string()).collect() + }; + let hostname_index = { + let x = self.hostname_index(); + x.into_iter().collect() + }; + let hostname_values = { + let x = self.hostname_values(); + x.iter().map(|t| t.unpack()).collect() + }; + CosmeticFiltersT { + simple_class_rules, + simple_id_rules, + misc_generic_selectors, + complex_class_rules_index, + complex_class_rules_values, + complex_id_rules_index, + complex_id_rules_values, + hostname_hide_index, + hostname_hide_values, + hostname_inject_script_index, + hostname_inject_script_values, + hostname_index, + hostname_values, + } + } + + /// Rules that are just the CSS class of an element to be hidden on all sites, e.g. `##.ad`. + /// Stored as a flat_set. + #[inline] + pub fn simple_class_rules( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(CosmeticFilters::VT_SIMPLE_CLASS_RULES, None) + .unwrap() + } + } + /// Rules that are just the CSS id of an element to be hidden on all sites, e.g. `###banner`. + /// Stored as a flat_set. + #[inline] + pub fn simple_id_rules( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(CosmeticFilters::VT_SIMPLE_ID_RULES, None) + .unwrap() + } + } + /// Rules that are the CSS selector of an element to be hidden on all sites that do not fit + /// into any of the class or id buckets, e.g. `##a[href="https://malware.com"]` + /// Stored as a flat_set. + #[inline] + pub fn misc_generic_selectors( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(CosmeticFilters::VT_MISC_GENERIC_SELECTORS, None) + .unwrap() + } + } + /// Rules that are the CSS selector of an element to be hidden on all sites, starting with a + /// class, e.g. `##.ad image`. + /// Stored as a multi-map `hostname_hash` => `selector` + #[inline] + pub fn complex_class_rules_index( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(CosmeticFilters::VT_COMPLEX_CLASS_RULES_INDEX, None) + .unwrap() + } + } + #[inline] + pub fn complex_class_rules_values( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(CosmeticFilters::VT_COMPLEX_CLASS_RULES_VALUES, None) + .unwrap() + } + } + /// Rules that are the CSS selector of an element to be hidden on all sites, starting with an + /// id, e.g. `###banner > .text a`. + /// Stored as a multi-map `hostname_hash` => `selector` + #[inline] + pub fn complex_id_rules_index( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(CosmeticFilters::VT_COMPLEX_ID_RULES_INDEX, None) + .unwrap() + } + } + #[inline] + pub fn complex_id_rules_values( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(CosmeticFilters::VT_COMPLEX_ID_RULES_VALUES, None) + .unwrap() + } + } + /// Simple hostname-specific hide rules, e.g. `example.com##.ad`. + /// Stored as a multi-map `hostname_hash` => `selector`. + /// Doesn't belong to HostnameSpecificRules for performance reasons. + #[inline] + pub fn hostname_hide_index(&self) -> flatbuffers::Vector<'a, u64> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>>( + CosmeticFilters::VT_HOSTNAME_HIDE_INDEX, + None, + ) + .unwrap() + } + } + #[inline] + pub fn hostname_hide_values( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(CosmeticFilters::VT_HOSTNAME_HIDE_VALUES, None) + .unwrap() + } + } + /// Rules with a scriptlet to inject along with any arguments, e.g. + /// `example.com##+js(acis, Number.isNan)`. + /// Stored as a multi-map `hostname_hash` => `script_plus_permission_byte` + /// The content is the contents of the `+js(...)` syntax construct plus + /// last byte stores permission to save memory. + /// Doesn't belong to HostnameSpecificRules for performance reasons. + #[inline] + pub fn hostname_inject_script_index(&self) -> flatbuffers::Vector<'a, u64> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>>( + CosmeticFilters::VT_HOSTNAME_INJECT_SCRIPT_INDEX, + None, + ) + .unwrap() + } + } + #[inline] + pub fn hostname_inject_script_values( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(CosmeticFilters::VT_HOSTNAME_INJECT_SCRIPT_VALUES, None) + .unwrap() + } + } + /// A map to store the other host-specific cosmetic rules. + #[inline] + pub fn hostname_index(&self) -> flatbuffers::Vector<'a, u64> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>>( + CosmeticFilters::VT_HOSTNAME_INDEX, + None, + ) + .unwrap() + } + } + #[inline] + pub fn hostname_values( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>> + { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::, + >, + >>(CosmeticFilters::VT_HOSTNAME_VALUES, None) + .unwrap() + } + } + } + + impl flatbuffers::Verifiable for CosmeticFilters<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::>, + >>("simple_class_rules", Self::VT_SIMPLE_CLASS_RULES, true)? + .visit_field::>, + >>("simple_id_rules", Self::VT_SIMPLE_ID_RULES, true)? + .visit_field::>, + >>( + "misc_generic_selectors", + Self::VT_MISC_GENERIC_SELECTORS, + true, + )? + .visit_field::>, + >>( + "complex_class_rules_index", + Self::VT_COMPLEX_CLASS_RULES_INDEX, + true, + )? + .visit_field::>, + >>( + "complex_class_rules_values", + Self::VT_COMPLEX_CLASS_RULES_VALUES, + true, + )? + .visit_field::>, + >>( + "complex_id_rules_index", + Self::VT_COMPLEX_ID_RULES_INDEX, + true, + )? + .visit_field::>, + >>( + "complex_id_rules_values", + Self::VT_COMPLEX_ID_RULES_VALUES, + true, + )? + .visit_field::>>( + "hostname_hide_index", + Self::VT_HOSTNAME_HIDE_INDEX, + true, + )? + .visit_field::>, + >>("hostname_hide_values", Self::VT_HOSTNAME_HIDE_VALUES, true)? + .visit_field::>>( + "hostname_inject_script_index", + Self::VT_HOSTNAME_INJECT_SCRIPT_INDEX, + true, + )? + .visit_field::>, + >>( + "hostname_inject_script_values", + Self::VT_HOSTNAME_INJECT_SCRIPT_VALUES, + true, + )? + .visit_field::>>( + "hostname_index", + Self::VT_HOSTNAME_INDEX, + true, + )? + .visit_field::>, + >>("hostname_values", Self::VT_HOSTNAME_VALUES, true)? + .finish(); + Ok(()) + } + } + pub struct CosmeticFiltersArgs<'a> { + pub simple_class_rules: Option< + flatbuffers::WIPOffset>>, + >, + pub simple_id_rules: Option< + flatbuffers::WIPOffset>>, + >, + pub misc_generic_selectors: Option< + flatbuffers::WIPOffset>>, + >, + pub complex_class_rules_index: Option< + flatbuffers::WIPOffset>>, + >, + pub complex_class_rules_values: Option< + flatbuffers::WIPOffset< + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>>, + >, + >, + pub complex_id_rules_index: Option< + flatbuffers::WIPOffset>>, + >, + pub complex_id_rules_values: Option< + flatbuffers::WIPOffset< + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>>, + >, + >, + pub hostname_hide_index: Option>>, + pub hostname_hide_values: Option< + flatbuffers::WIPOffset>>, + >, + pub hostname_inject_script_index: + Option>>, + pub hostname_inject_script_values: Option< + flatbuffers::WIPOffset>>, + >, + pub hostname_index: Option>>, + pub hostname_values: Option< + flatbuffers::WIPOffset< + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>>, + >, + >, + } + impl<'a> Default for CosmeticFiltersArgs<'a> { + #[inline] + fn default() -> Self { + CosmeticFiltersArgs { + simple_class_rules: None, // required field + simple_id_rules: None, // required field + misc_generic_selectors: None, // required field + complex_class_rules_index: None, // required field + complex_class_rules_values: None, // required field + complex_id_rules_index: None, // required field + complex_id_rules_values: None, // required field + hostname_hide_index: None, // required field + hostname_hide_values: None, // required field + hostname_inject_script_index: None, // required field + hostname_inject_script_values: None, // required field + hostname_index: None, // required field + hostname_values: None, // required field + } + } + } + + pub struct CosmeticFiltersBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> CosmeticFiltersBuilder<'a, 'b, A> { + #[inline] + pub fn add_simple_class_rules( + &mut self, + simple_class_rules: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_SIMPLE_CLASS_RULES, + simple_class_rules, + ); + } + #[inline] + pub fn add_simple_id_rules( + &mut self, + simple_id_rules: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_SIMPLE_ID_RULES, + simple_id_rules, + ); + } + #[inline] + pub fn add_misc_generic_selectors( + &mut self, + misc_generic_selectors: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_MISC_GENERIC_SELECTORS, + misc_generic_selectors, + ); + } + #[inline] + pub fn add_complex_class_rules_index( + &mut self, + complex_class_rules_index: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_COMPLEX_CLASS_RULES_INDEX, + complex_class_rules_index, + ); + } + #[inline] + pub fn add_complex_class_rules_values( + &mut self, + complex_class_rules_values: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset>>, + >, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_COMPLEX_CLASS_RULES_VALUES, + complex_class_rules_values, + ); + } + #[inline] + pub fn add_complex_id_rules_index( + &mut self, + complex_id_rules_index: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_COMPLEX_ID_RULES_INDEX, + complex_id_rules_index, + ); + } + #[inline] + pub fn add_complex_id_rules_values( + &mut self, + complex_id_rules_values: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset>>, + >, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_COMPLEX_ID_RULES_VALUES, + complex_id_rules_values, + ); + } + #[inline] + pub fn add_hostname_hide_index( + &mut self, + hostname_hide_index: flatbuffers::WIPOffset>, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_HOSTNAME_HIDE_INDEX, + hostname_hide_index, + ); + } + #[inline] + pub fn add_hostname_hide_values( + &mut self, + hostname_hide_values: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_HOSTNAME_HIDE_VALUES, + hostname_hide_values, + ); + } + #[inline] + pub fn add_hostname_inject_script_index( + &mut self, + hostname_inject_script_index: flatbuffers::WIPOffset>, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_HOSTNAME_INJECT_SCRIPT_INDEX, + hostname_inject_script_index, + ); + } + #[inline] + pub fn add_hostname_inject_script_values( + &mut self, + hostname_inject_script_values: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_HOSTNAME_INJECT_SCRIPT_VALUES, + hostname_inject_script_values, + ); + } + #[inline] + pub fn add_hostname_index( + &mut self, + hostname_index: flatbuffers::WIPOffset>, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_HOSTNAME_INDEX, + hostname_index, + ); + } + #[inline] + pub fn add_hostname_values( + &mut self, + hostname_values: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset>>, + >, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_HOSTNAME_VALUES, + hostname_values, + ); + } + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> CosmeticFiltersBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + CosmeticFiltersBuilder { + fbb_: _fbb, + start_: start, + } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + self.fbb_.required( + o, + CosmeticFilters::VT_SIMPLE_CLASS_RULES, + "simple_class_rules", + ); + self.fbb_ + .required(o, CosmeticFilters::VT_SIMPLE_ID_RULES, "simple_id_rules"); + self.fbb_.required( + o, + CosmeticFilters::VT_MISC_GENERIC_SELECTORS, + "misc_generic_selectors", + ); + self.fbb_.required( + o, + CosmeticFilters::VT_COMPLEX_CLASS_RULES_INDEX, + "complex_class_rules_index", + ); + self.fbb_.required( + o, + CosmeticFilters::VT_COMPLEX_CLASS_RULES_VALUES, + "complex_class_rules_values", + ); + self.fbb_.required( + o, + CosmeticFilters::VT_COMPLEX_ID_RULES_INDEX, + "complex_id_rules_index", + ); + self.fbb_.required( + o, + CosmeticFilters::VT_COMPLEX_ID_RULES_VALUES, + "complex_id_rules_values", + ); + self.fbb_.required( + o, + CosmeticFilters::VT_HOSTNAME_HIDE_INDEX, + "hostname_hide_index", + ); + self.fbb_.required( + o, + CosmeticFilters::VT_HOSTNAME_HIDE_VALUES, + "hostname_hide_values", + ); + self.fbb_.required( + o, + CosmeticFilters::VT_HOSTNAME_INJECT_SCRIPT_INDEX, + "hostname_inject_script_index", + ); + self.fbb_.required( + o, + CosmeticFilters::VT_HOSTNAME_INJECT_SCRIPT_VALUES, + "hostname_inject_script_values", + ); + self.fbb_ + .required(o, CosmeticFilters::VT_HOSTNAME_INDEX, "hostname_index"); + self.fbb_ + .required(o, CosmeticFilters::VT_HOSTNAME_VALUES, "hostname_values"); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for CosmeticFilters<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("CosmeticFilters"); + ds.field("simple_class_rules", &self.simple_class_rules()); + ds.field("simple_id_rules", &self.simple_id_rules()); + ds.field("misc_generic_selectors", &self.misc_generic_selectors()); + ds.field( + "complex_class_rules_index", + &self.complex_class_rules_index(), + ); + ds.field( + "complex_class_rules_values", + &self.complex_class_rules_values(), + ); + ds.field("complex_id_rules_index", &self.complex_id_rules_index()); + ds.field("complex_id_rules_values", &self.complex_id_rules_values()); + ds.field("hostname_hide_index", &self.hostname_hide_index()); + ds.field("hostname_hide_values", &self.hostname_hide_values()); + ds.field( + "hostname_inject_script_index", + &self.hostname_inject_script_index(), + ); + ds.field( + "hostname_inject_script_values", + &self.hostname_inject_script_values(), + ); + ds.field("hostname_index", &self.hostname_index()); + ds.field("hostname_values", &self.hostname_values()); + ds.finish() + } + } + #[non_exhaustive] + #[derive(Debug, Clone, PartialEq)] + pub struct CosmeticFiltersT { + pub simple_class_rules: Vec, + pub simple_id_rules: Vec, + pub misc_generic_selectors: Vec, + pub complex_class_rules_index: Vec, + pub complex_class_rules_values: Vec, + pub complex_id_rules_index: Vec, + pub complex_id_rules_values: Vec, + pub hostname_hide_index: Vec, + pub hostname_hide_values: Vec, + pub hostname_inject_script_index: Vec, + pub hostname_inject_script_values: Vec, + pub hostname_index: Vec, + pub hostname_values: Vec, + } + impl Default for CosmeticFiltersT { + fn default() -> Self { + Self { + simple_class_rules: Default::default(), + simple_id_rules: Default::default(), + misc_generic_selectors: Default::default(), + complex_class_rules_index: Default::default(), + complex_class_rules_values: Default::default(), + complex_id_rules_index: Default::default(), + complex_id_rules_values: Default::default(), + hostname_hide_index: Default::default(), + hostname_hide_values: Default::default(), + hostname_inject_script_index: Default::default(), + hostname_inject_script_values: Default::default(), + hostname_index: Default::default(), + hostname_values: Default::default(), + } + } + } + impl CosmeticFiltersT { + pub fn pack<'b, A: flatbuffers::Allocator + 'b>( + &self, + _fbb: &mut flatbuffers::FlatBufferBuilder<'b, A>, + ) -> flatbuffers::WIPOffset> { + let simple_class_rules = Some({ + let x = &self.simple_class_rules; + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let simple_id_rules = Some({ + let x = &self.simple_id_rules; + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let misc_generic_selectors = Some({ + let x = &self.misc_generic_selectors; + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let complex_class_rules_index = Some({ + let x = &self.complex_class_rules_index; + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let complex_class_rules_values = Some({ + let x = &self.complex_class_rules_values; + let w: Vec<_> = x.iter().map(|t| t.pack(_fbb)).collect(); + _fbb.create_vector(&w) + }); + let complex_id_rules_index = Some({ + let x = &self.complex_id_rules_index; + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let complex_id_rules_values = Some({ + let x = &self.complex_id_rules_values; + let w: Vec<_> = x.iter().map(|t| t.pack(_fbb)).collect(); + _fbb.create_vector(&w) + }); + let hostname_hide_index = Some({ + let x = &self.hostname_hide_index; + _fbb.create_vector(x) + }); + let hostname_hide_values = Some({ + let x = &self.hostname_hide_values; + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let hostname_inject_script_index = Some({ + let x = &self.hostname_inject_script_index; + _fbb.create_vector(x) + }); + let hostname_inject_script_values = Some({ + let x = &self.hostname_inject_script_values; + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let hostname_index = Some({ + let x = &self.hostname_index; + _fbb.create_vector(x) + }); + let hostname_values = Some({ + let x = &self.hostname_values; + let w: Vec<_> = x.iter().map(|t| t.pack(_fbb)).collect(); + _fbb.create_vector(&w) + }); + CosmeticFilters::create( + _fbb, + &CosmeticFiltersArgs { + simple_class_rules, + simple_id_rules, + misc_generic_selectors, + complex_class_rules_index, + complex_class_rules_values, + complex_id_rules_index, + complex_id_rules_values, + hostname_hide_index, + hostname_hide_values, + hostname_inject_script_index, + hostname_inject_script_values, + hostname_index, + hostname_values, + }, + ) + } + } + pub enum EngineOffset {} + #[derive(Copy, Clone, PartialEq)] + + /// A root type containing a serialized Engine. + pub struct Engine<'a> { + pub _tab: flatbuffers::Table<'a>, + } + + impl<'a> flatbuffers::Follow<'a> for Engine<'a> { + type Inner = Engine<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { + _tab: flatbuffers::Table::new(buf, loc), + } + } + } + + impl<'a> Engine<'a> { + pub const VT_NETWORK_RULES: flatbuffers::VOffsetT = 4; + pub const VT_UNIQUE_DOMAINS_HASHES: flatbuffers::VOffsetT = 6; + pub const VT_COSMETIC_FILTERS: flatbuffers::VOffsetT = 8; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + Engine { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + args: &'args EngineArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = EngineBuilder::new(_fbb); + if let Some(x) = args.cosmetic_filters { + builder.add_cosmetic_filters(x); + } + if let Some(x) = args.unique_domains_hashes { + builder.add_unique_domains_hashes(x); + } + if let Some(x) = args.network_rules { + builder.add_network_rules(x); + } + builder.finish() + } + + pub fn unpack(&self) -> EngineT { + let network_rules = { + let x = self.network_rules(); + x.iter().map(|t| t.unpack()).collect() + }; + let unique_domains_hashes = { + let x = self.unique_domains_hashes(); + x.into_iter().collect() + }; + let cosmetic_filters = { + let x = self.cosmetic_filters(); + Box::new(x.unpack()) + }; + EngineT { + network_rules, + unique_domains_hashes, + cosmetic_filters, + } + } + + /// Contains several NetworkFilterList matching to different kinds of lists. + /// The indexes are matching NetworkFilterListId. + /// The size must be NetworkFilterListId::Size. + #[inline] + pub fn network_rules( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(Engine::VT_NETWORK_RULES, None) + .unwrap() + } + } + /// Contains hashes for opt_(not)_domains. See opt_domains for details. + #[inline] + pub fn unique_domains_hashes(&self) -> flatbuffers::Vector<'a, u64> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>>( + Engine::VT_UNIQUE_DOMAINS_HASHES, + None, + ) + .unwrap() + } + } + #[inline] + pub fn cosmetic_filters(&self) -> CosmeticFilters<'a> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>( + Engine::VT_COSMETIC_FILTERS, + None, + ) + .unwrap() + } + } + } + + impl flatbuffers::Verifiable for Engine<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::>, + >>("network_rules", Self::VT_NETWORK_RULES, true)? + .visit_field::>>( + "unique_domains_hashes", + Self::VT_UNIQUE_DOMAINS_HASHES, + true, + )? + .visit_field::>( + "cosmetic_filters", + Self::VT_COSMETIC_FILTERS, + true, + )? + .finish(); + Ok(()) + } + } + pub struct EngineArgs<'a> { + pub network_rules: Option< + flatbuffers::WIPOffset< + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>>, + >, + >, + pub unique_domains_hashes: Option>>, + pub cosmetic_filters: Option>>, + } + impl<'a> Default for EngineArgs<'a> { + #[inline] + fn default() -> Self { + EngineArgs { + network_rules: None, // required field + unique_domains_hashes: None, // required field + cosmetic_filters: None, // required field + } + } + } + + pub struct EngineBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> EngineBuilder<'a, 'b, A> { + #[inline] + pub fn add_network_rules( + &mut self, + network_rules: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset>>, + >, + ) { + self.fbb_.push_slot_always::>( + Engine::VT_NETWORK_RULES, + network_rules, + ); + } + #[inline] + pub fn add_unique_domains_hashes( + &mut self, + unique_domains_hashes: flatbuffers::WIPOffset>, + ) { + self.fbb_.push_slot_always::>( + Engine::VT_UNIQUE_DOMAINS_HASHES, + unique_domains_hashes, + ); + } + #[inline] + pub fn add_cosmetic_filters( + &mut self, + cosmetic_filters: flatbuffers::WIPOffset>, + ) { + self.fbb_ + .push_slot_always::>( + Engine::VT_COSMETIC_FILTERS, + cosmetic_filters, + ); + } + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> EngineBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + EngineBuilder { + fbb_: _fbb, + start_: start, + } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + self.fbb_ + .required(o, Engine::VT_NETWORK_RULES, "network_rules"); + self.fbb_ + .required(o, Engine::VT_UNIQUE_DOMAINS_HASHES, "unique_domains_hashes"); + self.fbb_ + .required(o, Engine::VT_COSMETIC_FILTERS, "cosmetic_filters"); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for Engine<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("Engine"); + ds.field("network_rules", &self.network_rules()); + ds.field("unique_domains_hashes", &self.unique_domains_hashes()); + ds.field("cosmetic_filters", &self.cosmetic_filters()); + ds.finish() + } + } + #[non_exhaustive] + #[derive(Debug, Clone, PartialEq)] + pub struct EngineT { + pub network_rules: Vec, + pub unique_domains_hashes: Vec, + pub cosmetic_filters: Box, + } + impl Default for EngineT { + fn default() -> Self { + Self { + network_rules: Default::default(), + unique_domains_hashes: Default::default(), + cosmetic_filters: Default::default(), + } + } + } + impl EngineT { + pub fn pack<'b, A: flatbuffers::Allocator + 'b>( + &self, + _fbb: &mut flatbuffers::FlatBufferBuilder<'b, A>, + ) -> flatbuffers::WIPOffset> { + let network_rules = Some({ + let x = &self.network_rules; + let w: Vec<_> = x.iter().map(|t| t.pack(_fbb)).collect(); + _fbb.create_vector(&w) + }); + let unique_domains_hashes = Some({ + let x = &self.unique_domains_hashes; + _fbb.create_vector(x) + }); + let cosmetic_filters = Some({ + let x = &self.cosmetic_filters; + x.pack(_fbb) + }); + Engine::create( + _fbb, + &EngineArgs { + network_rules, + unique_domains_hashes, + cosmetic_filters, + }, + ) + } + } + #[inline] + /// Verifies that a buffer of bytes contains a `Engine` + /// and returns it. + /// Note that verification is still experimental and may not + /// catch every error, or be maximally performant. For the + /// previous, unchecked, behavior use + /// `root_as_engine_unchecked`. + pub fn root_as_engine(buf: &[u8]) -> Result { + flatbuffers::root::(buf) + } + #[inline] + /// Verifies that a buffer of bytes contains a size prefixed + /// `Engine` and returns it. + /// Note that verification is still experimental and may not + /// catch every error, or be maximally performant. For the + /// previous, unchecked, behavior use + /// `size_prefixed_root_as_engine_unchecked`. + pub fn size_prefixed_root_as_engine( + buf: &[u8], + ) -> Result { + flatbuffers::size_prefixed_root::(buf) + } + #[inline] + /// Verifies, with the given options, that a buffer of bytes + /// contains a `Engine` and returns it. + /// Note that verification is still experimental and may not + /// catch every error, or be maximally performant. For the + /// previous, unchecked, behavior use + /// `root_as_engine_unchecked`. + pub fn root_as_engine_with_opts<'b, 'o>( + opts: &'o flatbuffers::VerifierOptions, + buf: &'b [u8], + ) -> Result, flatbuffers::InvalidFlatbuffer> { + flatbuffers::root_with_opts::>(opts, buf) + } + #[inline] + /// Verifies, with the given verifier options, that a buffer of + /// bytes contains a size prefixed `Engine` and returns + /// it. Note that verification is still experimental and may not + /// catch every error, or be maximally performant. For the + /// previous, unchecked, behavior use + /// `root_as_engine_unchecked`. + pub fn size_prefixed_root_as_engine_with_opts<'b, 'o>( + opts: &'o flatbuffers::VerifierOptions, + buf: &'b [u8], + ) -> Result, flatbuffers::InvalidFlatbuffer> { + flatbuffers::size_prefixed_root_with_opts::>(opts, buf) + } + #[inline] + /// Assumes, without verification, that a buffer of bytes contains a Engine and returns it. + /// # Safety + /// Callers must trust the given bytes do indeed contain a valid `Engine`. + pub unsafe fn root_as_engine_unchecked(buf: &[u8]) -> Engine { + flatbuffers::root_unchecked::(buf) + } + #[inline] + /// Assumes, without verification, that a buffer of bytes contains a size prefixed Engine and returns it. + /// # Safety + /// Callers must trust the given bytes do indeed contain a valid size prefixed `Engine`. + pub unsafe fn size_prefixed_root_as_engine_unchecked(buf: &[u8]) -> Engine { + flatbuffers::size_prefixed_root_unchecked::(buf) } #[inline] - pub fn finish_network_filter_list_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>( + pub fn finish_engine_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>( fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, - root: flatbuffers::WIPOffset>, + root: flatbuffers::WIPOffset>, ) { fbb.finish(root, None); } #[inline] - pub fn finish_size_prefixed_network_filter_list_buffer< - 'a, - 'b, - A: flatbuffers::Allocator + 'a, - >( + pub fn finish_size_prefixed_engine_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>( fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, - root: flatbuffers::WIPOffset>, + root: flatbuffers::WIPOffset>, ) { fbb.finish_size_prefixed(root, None); } diff --git a/src/flatbuffers/mod.rs b/src/flatbuffers/mod.rs new file mode 100644 index 00000000..61dc0bd6 --- /dev/null +++ b/src/flatbuffers/mod.rs @@ -0,0 +1,2 @@ +pub(crate) mod containers; +pub(crate) mod unsafe_tools; diff --git a/src/filters/unsafe_tools.rs b/src/flatbuffers/unsafe_tools.rs similarity index 72% rename from src/filters/unsafe_tools.rs rename to src/flatbuffers/unsafe_tools.rs index a10f6b60..7d3973c9 100644 --- a/src/filters/unsafe_tools.rs +++ b/src/flatbuffers/unsafe_tools.rs @@ -1,10 +1,9 @@ //! Unsafe utility functions for working with flatbuffers and other low-level operations. -use crate::filters::fb_network::flat::fb; +use crate::filters::flatbuffer_generated::fb; // Minimum alignment for the beginning of the flatbuffer data. -// Should be 4 while we support armv7 and x86_32. -const MIN_ALIGNMENT: usize = 4; +const MIN_ALIGNMENT: usize = 8; /// Converts a flatbuffers Vector to a slice. /// # Safety @@ -20,7 +19,7 @@ pub fn fb_vector_to_slice(vector: flatbuffers::Vector<'_, T>) -> &[T] { // the alignment of the data must be a divisor of MIN_ALIGNMENT. assert!(MIN_ALIGNMENT.is_multiple_of(std::mem::size_of::())); } - let _ = static_assert_alignment::; + const { static_assert_alignment::() }; assert!(bytes.len().is_multiple_of(std::mem::size_of::())); assert!((bytes.as_ptr() as usize).is_multiple_of(std::mem::align_of::())); @@ -36,10 +35,10 @@ pub fn fb_vector_to_slice(vector: flatbuffers::Vector<'_, T>) -> &[T] { // It could be constructed from raw data (includes the flatbuffer verification) // or from a builder that have just been used to construct the flatbuffer // Invariants: -// 1. self.data() is properly verified flatbuffer contains FilterList. +// 1. self.data() is properly verified flatbuffer contains the root object. // 2. self.data() is aligned to MIN_ALIGNMENT bytes. // This is necessary for fb_vector_to_slice. -pub(crate) struct VerifiedFlatFilterListMemory { +pub(crate) struct VerifiedFlatbufferMemory { // The buffer containing the flatbuffer data. raw_data: Vec, @@ -48,36 +47,40 @@ pub(crate) struct VerifiedFlatFilterListMemory { start: usize, } -impl VerifiedFlatFilterListMemory { - pub(crate) fn from_raw(data: Vec) -> Result { - let memory = Self::from_vec(data); +impl VerifiedFlatbufferMemory { + pub(crate) fn from_raw(data: &[u8]) -> Result { + let memory = Self::from_slice(data); // Verify that the data is a valid flatbuffer. - let _ = fb::root_as_network_filter_list(memory.data())?; + let _ = fb::root_as_engine(memory.data())?; Ok(memory) } - // Creates a new VerifiedFlatFilterListMemory from a builder. + // Creates a new VerifiedFlatbufferMemory from a builder. // Skip the verification, the builder must contains a valid FilterList. pub(crate) fn from_builder(builder: &flatbuffers::FlatBufferBuilder<'_>) -> Self { - let raw_data = builder.finished_data().to_vec(); - Self::from_vec(raw_data) + Self::from_slice(builder.finished_data()) } // Properly align the buffer to MIN_ALIGNMENT bytes. - pub(crate) fn from_vec(mut vec: Vec) -> Self { + pub(crate) fn from_slice(data: &[u8]) -> Self { + let mut vec = Vec::with_capacity(data.len() + MIN_ALIGNMENT); let shift = vec.as_ptr() as usize % MIN_ALIGNMENT; + let start = if shift == 0 { 0 } else { - vec.reserve(vec.len() + MIN_ALIGNMENT); // vec.as_ptr() is changed let shift = vec.as_ptr() as usize % MIN_ALIGNMENT; let padding = MIN_ALIGNMENT - shift; + assert!(vec.capacity() >= padding); vec.splice(0..0, vec![0u8; padding]); padding }; + vec.extend_from_slice(data); + assert!((vec.as_ptr() as usize + start).is_multiple_of(MIN_ALIGNMENT)); + let memory = Self { raw_data: vec, start, @@ -86,8 +89,8 @@ impl VerifiedFlatFilterListMemory { memory } - pub(crate) fn filter_list(&self) -> fb::NetworkFilterList<'_> { - unsafe { fb::root_as_network_filter_list_unchecked(self.data()) } + pub(crate) fn root(&self) -> fb::Engine<'_> { + unsafe { fb::root_as_engine_unchecked(self.data()) } } pub fn data(&self) -> &[u8] { diff --git a/src/lib.rs b/src/lib.rs index 1af625e4..2e9c84fa 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,9 +20,12 @@ pub mod blocker; #[cfg(feature = "content-blocking")] pub mod content_blocking; pub mod cosmetic_filter_cache; +mod cosmetic_filter_cache_builder; +mod cosmetic_filter_utils; mod data_format; mod engine; pub mod filters; +mod flatbuffers; pub mod lists; mod network_filter_list; mod optimizer; @@ -51,7 +54,7 @@ mod sync_tests { } #[test] - #[cfg(not(feature = "unsync-regex-caching"))] + #[cfg(not(feature = "single-thread"))] fn assert_engine_sync() { static_assert_sync::(); } diff --git a/src/lists.rs b/src/lists.rs index 41d4746b..cc147db5 100644 --- a/src/lists.rs +++ b/src/lists.rs @@ -231,6 +231,20 @@ impl FilterSet { } } + // Used in benchmarks to avoid parsing the rules twice. + #[doc(hidden)] + pub fn new_with_rules( + network_filters: Vec, + cosmetic_filters: Vec, + debug: bool, + ) -> Self { + Self { + debug, + network_filters, + cosmetic_filters, + } + } + /// Adds the contents of an entire filter list to this `FilterSet`. Filters that cannot be /// parsed successfully are ignored. Returns any discovered metadata about the list of rules /// added. diff --git a/src/network_filter_list.rs b/src/network_filter_list.rs index 058ca346..2b81ba4d 100644 --- a/src/network_filter_list.rs +++ b/src/network_filter_list.rs @@ -2,14 +2,16 @@ use std::{collections::HashMap, collections::HashSet, fmt}; -use crate::filters::fb_network::flat::fb; -use crate::filters::fb_network::{FlatNetworkFilter, FlatNetworkFiltersListBuilder}; -use crate::filters::flat_filter_map::FlatFilterMap; +use flatbuffers::ForwardsUOffset; + +use crate::filters::fb_network::FlatNetworkFilter; +use crate::filters::filter_data_context::FilterDataContext; +use crate::filters::flatbuffer_generated::fb; use crate::filters::network::{ NetworkFilter, NetworkFilterMask, NetworkFilterMaskHelper, NetworkMatchable, }; -use crate::filters::unsafe_tools::{fb_vector_to_slice, VerifiedFlatFilterListMemory}; -use crate::optimizer; +use crate::flatbuffers::containers::flat_multimap::FlatMultiMapView; +use crate::flatbuffers::unsafe_tools::fb_vector_to_slice; use crate::regex_manager::RegexManager; use crate::request::Request; use crate::utils::{fast_hash, to_short_hash, Hash, ShortHash}; @@ -49,148 +51,24 @@ impl NetworkFilterMaskHelper for CheckResult { } } -#[derive(Debug, Clone)] -pub enum NetworkFilterListParsingError { - InvalidFlatbuffer(flatbuffers::InvalidFlatbuffer), - UniqueDomainsOutOfBounds(usize), -} - /// Internal structure to keep track of a collection of network filters. -pub(crate) struct NetworkFilterList { - pub(crate) memory: VerifiedFlatFilterListMemory, - pub(crate) unique_domains_hashes_map: HashMap, +pub(crate) struct NetworkFilterList<'a> { + pub(crate) list: fb::NetworkFilterList<'a>, + pub(crate) filter_data_context: &'a FilterDataContext, } -impl Default for NetworkFilterList { - fn default() -> Self { - let mut builder = FlatNetworkFiltersListBuilder::new(); - let memory = builder.finish(HashMap::new()); - Self { - memory, - unique_domains_hashes_map: HashMap::new(), - } - } -} - -impl NetworkFilterList { - /// Create a new [NetworkFilterList] from raw memory (includes verification). - pub(crate) fn try_from_unverified_memory( - flatbuffer_memory: Vec, - ) -> Result { - let memory = VerifiedFlatFilterListMemory::from_raw(flatbuffer_memory) - .map_err(NetworkFilterListParsingError::InvalidFlatbuffer)?; - - Self::try_from_verified_memory(memory) - } - - pub(crate) fn try_from_verified_memory( - memory: VerifiedFlatFilterListMemory, - ) -> Result { - let root = memory.filter_list(); +type FlatNetworkFilterMap<'a> = + FlatMultiMapView<'a, ShortHash, ForwardsUOffset>, &'a [ShortHash]>; - // Reconstruct the unique_domains_hashes_map from the flatbuffer data - let len = root.unique_domains_hashes().len(); - let mut unique_domains_hashes_map: HashMap = - HashMap::with_capacity(len); - for (index, hash) in root.unique_domains_hashes().iter().enumerate() { - unique_domains_hashes_map.insert( - hash, - u32::try_from(index) - .map_err(|_| NetworkFilterListParsingError::UniqueDomainsOutOfBounds(index))?, - ); - } - - Ok(Self { - memory, - unique_domains_hashes_map, - }) - } - - pub fn get_filter_map(&self) -> FlatFilterMap<'_, ShortHash, fb::NetworkFilter<'_>> { - let filters_list = self.memory.filter_list(); - FlatFilterMap::new( +impl NetworkFilterList<'_> { + pub fn get_filter_map(&self) -> FlatNetworkFilterMap<'_> { + let filters_list = &self.list; + FlatNetworkFilterMap::new( fb_vector_to_slice(filters_list.filter_map_index()), filters_list.filter_map_values(), ) } - pub fn new(filters: Vec, optimize: bool) -> Self { - // Compute tokens for all filters - let filter_tokens: Vec<_> = filters - .into_iter() - .map(|filter| { - let tokens = filter.get_tokens(); - (filter, tokens) - }) - .collect(); - // compute the tokens' frequency histogram - let (total_number_of_tokens, tokens_histogram) = token_histogram(&filter_tokens); - - let mut flat_builder = FlatNetworkFiltersListBuilder::new(); - let mut filter_map = HashMap::>::new(); - - let mut optimizable = HashMap::>::new(); - { - for (network_filter, multi_tokens) in filter_tokens { - let index = if !optimize - || !optimizer::is_filter_optimizable_by_patterns(&network_filter) - { - Some(flat_builder.add(&network_filter)) - } else { - None - }; - - for tokens in multi_tokens { - let mut best_token: ShortHash = 0; - let mut min_count = total_number_of_tokens + 1; - for token in tokens { - let token = to_short_hash(token); - match tokens_histogram.get(&token) { - None => { - min_count = 0; - best_token = token - } - Some(&count) if count < min_count => { - min_count = count; - best_token = token - } - _ => {} - } - } - if let Some(index) = index { - insert_dup(&mut filter_map, best_token, index); - } else { - insert_dup(&mut optimizable, best_token, network_filter.clone()); - } - } // tokens - } - } - - if optimize { - // Sort the entries to ensure deterministic iteration order - let mut optimizable_entries: Vec<_> = optimizable.drain().collect(); - optimizable_entries.sort_unstable_by_key(|(token, _)| *token); - - for (token, v) in optimizable_entries { - let optimized = optimizer::optimize(v); - - for filter in optimized { - let index = flat_builder.add(&filter); - insert_dup(&mut filter_map, token, index); - } - } - } else { - debug_assert!( - optimizable.is_empty(), - "Should be empty if optimization is off" - ); - } - - let memory = flat_builder.finish(filter_map); - - Self::try_from_verified_memory(memory).unwrap_or_default() - } - /// Returns the first found filter, if any, that matches the given request. The backing storage /// has a non-deterministic order, so this should be used for any category of filters where a /// match from each would be functionally equivalent. For example, if two different exception @@ -202,7 +80,7 @@ impl NetworkFilterList { active_tags: &HashSet, regex_manager: &mut RegexManager, ) -> Option { - let filters_list = self.memory.filter_list(); + let filters_list = self.list; if filters_list.filter_map_index().is_empty() { return None; @@ -211,18 +89,21 @@ impl NetworkFilterList { let filter_map = self.get_filter_map(); for token in request.get_tokens_for_match() { - for (index, fb_filter) in filter_map.get(to_short_hash(*token)) { - let filter = FlatNetworkFilter::new(&fb_filter, index, self); - - // if matched, also needs to be tagged with an active tag (or not tagged at all) - if filter.matches(request, regex_manager) - && filter.tag().is_none_or(|t| active_tags.contains(t)) - { - return Some(CheckResult { - filter_mask: filter.mask, - modifier_option: filter.modifier_option(), - raw_line: filter.raw_line(), - }); + if let Some(iter) = filter_map.get(to_short_hash(*token)) { + for (index, fb_filter) in iter { + let filter = + FlatNetworkFilter::new(&fb_filter, index, self.filter_data_context); + + // if matched, also needs to be tagged with an active tag (or not tagged at all) + if filter.matches(request, regex_manager) + && filter.tag().is_none_or(|t| active_tags.contains(t)) + { + return Some(CheckResult { + filter_mask: filter.mask, + modifier_option: filter.modifier_option(), + raw_line: filter.raw_line(), + }); + } } } } @@ -242,7 +123,7 @@ impl NetworkFilterList { ) -> Vec { let mut filters: Vec = vec![]; - let filters_list = self.memory.filter_list(); + let filters_list = self.list; if filters_list.filter_map_index().is_empty() { return filters; @@ -251,18 +132,21 @@ impl NetworkFilterList { let filter_map = self.get_filter_map(); for token in request.get_tokens_for_match() { - for (index, fb_filter) in filter_map.get(to_short_hash(*token)) { - let filter = FlatNetworkFilter::new(&fb_filter, index, self); - - // if matched, also needs to be tagged with an active tag (or not tagged at all) - if filter.matches(request, regex_manager) - && filter.tag().is_none_or(|t| active_tags.contains(t)) - { - filters.push(CheckResult { - filter_mask: filter.mask, - modifier_option: filter.modifier_option(), - raw_line: filter.raw_line(), - }); + if let Some(iter) = filter_map.get(to_short_hash(*token)) { + for (index, fb_filter) in iter { + let filter = + FlatNetworkFilter::new(&fb_filter, index, self.filter_data_context); + + // if matched, also needs to be tagged with an active tag (or not tagged at all) + if filter.matches(request, regex_manager) + && filter.tag().is_none_or(|t| active_tags.contains(t)) + { + filters.push(CheckResult { + filter_mask: filter.mask, + modifier_option: filter.modifier_option(), + raw_line: filter.raw_line(), + }); + } } } } @@ -270,25 +154,6 @@ impl NetworkFilterList { } } -/// Inserts a value into the `Vec` under the specified key in the `HashMap`. The entry will be -/// created if it does not exist. If it already exists, it will be inserted in the `Vec` in a -/// sorted order. -pub(crate) fn insert_dup( - map: &mut HashMap, H>, - k: K, - v: V, -) where - K: std::cmp::Ord + std::hash::Hash, - V: PartialOrd, -{ - let entry = map.entry(k).or_default(); - - match entry.binary_search_by(|f| f.partial_cmp(&v).unwrap_or(std::cmp::Ordering::Equal)) { - Ok(_pos) => (), // Can occur if the exact same rule is inserted twice. No reason to add anything. - Err(slot) => entry.insert(slot, v), - } -} - pub(crate) fn token_histogram( filter_tokens: &[(T, Vec>)], ) -> (u32, HashMap) { diff --git a/src/regex_manager.rs b/src/regex_manager.rs index e54da304..20ff910e 100644 --- a/src/regex_manager.rs +++ b/src/regex_manager.rs @@ -39,7 +39,7 @@ const DEFAULT_DISCARD_UNUSED_TIME: Duration = Duration::from_secs(180); /// Reports [`RegexManager`] metrics that may be useful for creating an optimized /// [`RegexManagerDiscardPolicy`]. -#[cfg(feature = "regex-debug-info")] +#[cfg(feature = "debug-info")] pub struct RegexDebugInfo { /// Information about each regex contained in the [`RegexManager`]. pub regex_data: Vec, @@ -48,7 +48,7 @@ pub struct RegexDebugInfo { } /// Describes metrics about a single regex from the [`RegexManager`]. -#[cfg(feature = "regex-debug-info")] +#[cfg(feature = "debug-info")] pub struct RegexDebugEntry { /// Id for this particular regex, which is constant and unique for its lifetime. /// @@ -312,7 +312,7 @@ impl RegexManager { } /// Discard one regex, identified by its id from a [`RegexDebugEntry`]. - #[cfg(feature = "regex-debug-info")] + #[cfg(feature = "debug-info")] pub fn discard_regex(&mut self, regex_id: u64) { self.map .iter_mut() @@ -322,7 +322,7 @@ impl RegexManager { }); } - #[cfg(feature = "regex-debug-info")] + #[cfg(feature = "debug-info")] pub(crate) fn get_debug_regex_data(&self) -> Vec { use itertools::Itertools; self.map @@ -336,13 +336,13 @@ impl RegexManager { .collect_vec() } - #[cfg(feature = "regex-debug-info")] + #[cfg(feature = "debug-info")] pub(crate) fn get_compiled_regex_count(&self) -> usize { self.compiled_regex_count } /// Collect metrics that may be useful for creating an optimized [`RegexManagerDiscardPolicy`]. - #[cfg(feature = "regex-debug-info")] + #[cfg(feature = "debug-info")] pub fn get_debug_info(&self) -> RegexDebugInfo { RegexDebugInfo { regex_data: self.get_debug_regex_data(), diff --git a/src/resources/mod.rs b/src/resources/mod.rs index 7fda7af2..6c2c5274 100644 --- a/src/resources/mod.rs +++ b/src/resources/mod.rs @@ -15,7 +15,10 @@ pub mod resource_assembler; mod resource_storage; pub(crate) use resource_storage::parse_scriptlet_args; #[doc(inline)] -pub use resource_storage::{AddResourceError, ResourceStorage, ScriptletResourceError}; +pub use resource_storage::{ + AddResourceError, InMemoryResourceStorage, ResourceStorage, ResourceStorageBackend, + ScriptletResourceError, +}; use memchr::memrchr as find_char_reverse; use serde::{Deserialize, Serialize}; @@ -34,7 +37,7 @@ use serde::{Deserialize, Serialize}; /// ``` /// # use adblock::Engine; /// # use adblock::lists::ParseOptions; -/// # use adblock::resources::{MimeType, PermissionMask, Resource, ResourceType}; +/// # use adblock::resources::{MimeType, PermissionMask, Resource, ResourceStorage, ResourceType}; /// # let mut filter_set = adblock::lists::FilterSet::default(); /// # let untrusted_filters = vec![""]; /// # let trusted_filters = vec![""]; @@ -59,14 +62,14 @@ use serde::{Deserialize, Serialize}; /// let mut engine = Engine::from_filter_set(filter_set, true); /// // The `trusted-set-cookie` scriptlet cannot be injected without `COOKIE_ACCESS` /// // permission. -/// engine.add_resource(Resource { +/// engine.use_resources([Resource { /// name: "trusted-set-cookie.js".to_string(), /// aliases: vec![], /// kind: ResourceType::Mime(MimeType::ApplicationJavascript), /// content: base64::encode("document.cookie = '...';"), /// dependencies: vec![], /// permission: COOKIE_ACCESS, -/// }); +/// }]); /// ``` #[derive(Serialize, Deserialize, Clone, Copy, Default)] #[repr(transparent)] @@ -101,6 +104,10 @@ impl PermissionMask { Self(bits) } + pub fn to_bits(&self) -> u8 { + self.0 + } + /// Can `filter_mask` authorize injecting a resource requiring `self` permissions? pub fn is_injectable_by(&self, filter_mask: PermissionMask) -> bool { // For any particular bit index, the scriptlet is injectable if: diff --git a/src/resources/resource_storage.rs b/src/resources/resource_storage.rs index 9109d884..b42dc8f7 100644 --- a/src/resources/resource_storage.rs +++ b/src/resources/resource_storage.rs @@ -35,7 +35,7 @@ impl ResourceContent { /// A internal representation of a Resource to store. Stores the content /// in the decoded form to use less memory. /// See [Resource] for details -struct ResourceImpl { +pub struct ResourceImpl { name: String, kind: ResourceType, content: ResourceContent, @@ -44,14 +44,134 @@ struct ResourceImpl { } /// Unified resource storage for both redirects and scriptlets. -#[derive(Default)] +/// +/// By default, this uses an in-memory storage implementation, however this can be changed using +/// a custom [ResourceStorageBackend] if desired. pub struct ResourceStorage { + #[cfg(not(feature = "single-thread"))] + backend: Box, + #[cfg(feature = "single-thread")] + backend: Box, +} + +/// Loads an empty `InMemoryResourceStorage` backend. +impl Default for ResourceStorage { + fn default() -> Self { + Self { + backend: Box::new(InMemoryResourceStorage::default()), + } + } +} + +impl ResourceStorage { + #[cfg(not(feature = "single-thread"))] + pub fn from_backend(backend: S) -> Self { + Self { + backend: Box::new(backend), + } + } + + #[cfg(feature = "single-thread")] + pub fn from_backend(backend: S) -> Self { + Self { + backend: Box::new(backend), + } + } + + /// Constructor using an `InMemoryResourceStorage` as the backend with the given resources. + #[cfg(test)] + pub fn in_memory_from_resources(resources: impl IntoIterator) -> Self { + Self::from_backend(InMemoryResourceStorage::from_resources(resources)) + } +} + +/// Customizable backend for [Resource] storage. +/// Custom implementations could be used to enable (for example) sharing of resources between +/// multiple [crate::Engine]s, an on-disk backend, or special caching behavior. +pub trait ResourceStorageBackend { + /// Gets the resource associated with `resource_ident`, respecting aliases if necessary. + fn get_resource(&self, resource_ident: &str) -> Option; +} + +/// Default implementation of [ResourceStorageBackend] that stores all resources in memory. +#[derive(Default)] +pub struct InMemoryResourceStorage { /// Stores each resource by its canonical name resources: HashMap, /// Stores mappings from aliases to their canonical resource names aliases: HashMap, } +impl ResourceStorageBackend for InMemoryResourceStorage { + fn get_resource(&self, resource_ident: &str) -> Option { + let resource = if let Some(resource) = self.resources.get(resource_ident) { + Some(resource) + } else if let Some(canonical_name) = self.aliases.get(resource_ident) { + self.resources.get(canonical_name) + } else { + None + }; + + resource.cloned() + } +} + +impl InMemoryResourceStorage { + /// Convenience constructor that allows building storage for many resources at once. Errors are + /// silently consumed. + pub fn from_resources(resources: impl IntoIterator) -> Self { + let mut self_ = Self::default(); + + resources.into_iter().for_each(|resource| { + #[allow(clippy::unnecessary_lazy_evaluations)] + self_.add_resource(resource).unwrap_or_else(|_e| { + #[cfg(test)] + eprintln!("Failed to add resource: {:?}", _e) + }) + }); + + self_ + } + + /// Adds a resource to storage so that it can be retrieved later. + pub fn add_resource(&mut self, resource: Resource) -> Result<(), AddResourceError> { + let resource_content: ResourceContent; + + if let ResourceType::Mime(content_type) = &resource.kind { + if !resource.dependencies.is_empty() && !content_type.supports_dependencies() { + return Err(AddResourceError::ContentTypeDoesNotSupportDependencies); + } + + if content_type.is_textual() { + resource_content = ResourceContent::text_from_base64(&resource.content)?; + } else { + resource_content = ResourceContent::raw_from_base64(&resource.content)?; + } + } else { + resource_content = ResourceContent::text_from_base64(&resource.content)?; + } + + for ident in std::iter::once(&resource.name).chain(resource.aliases.iter()) { + if self.resources.contains_key(ident) || self.aliases.contains_key(ident) { + return Err(AddResourceError::NameAlreadyAdded); + } + } + + resource.aliases.iter().for_each(|alias| { + self.aliases.insert(alias.clone(), resource.name.clone()); + }); + let resource_impl = ResourceImpl { + name: resource.name.clone(), + kind: resource.kind, + content: resource_content, + dependencies: resource.dependencies, + permission: resource.permission, + }; + self.resources.insert(resource.name, resource_impl); + Ok(()) + } +} + /// Formats `arg` such that it either is a JSON string, or is safe to insert within a JSON string, /// depending on `QUOTED`. /// @@ -146,61 +266,6 @@ fn extract_function_name(fn_def: &str) -> Option<&str> { } impl ResourceStorage { - /// Convenience constructor that allows building storage for many resources at once. Errors are - /// silently consumed. - pub fn from_resources(resources: impl IntoIterator) -> Self { - let mut self_ = Self::default(); - - resources.into_iter().for_each(|resource| { - #[allow(clippy::unnecessary_lazy_evaluations)] - self_.add_resource(resource).unwrap_or_else(|_e| { - #[cfg(test)] - eprintln!("Failed to add resource: {:?}", _e) - }) - }); - - self_ - } - - /// Adds a resource to storage so that it can be retrieved later. - pub fn add_resource(&mut self, resource: Resource) -> Result<(), AddResourceError> { - let resource_content: ResourceContent; - - if let ResourceType::Mime(content_type) = &resource.kind { - if !resource.dependencies.is_empty() && !content_type.supports_dependencies() { - return Err(AddResourceError::ContentTypeDoesNotSupportDependencies); - } - - if content_type.is_textual() { - resource_content = ResourceContent::text_from_base64(&resource.content)?; - } else { - resource_content = ResourceContent::raw_from_base64(&resource.content)?; - } - } else { - resource_content = ResourceContent::text_from_base64(&resource.content)?; - } - - for ident in std::iter::once(&resource.name).chain(resource.aliases.iter()) { - if self.resources.contains_key(ident) || self.aliases.contains_key(ident) { - return Err(AddResourceError::NameAlreadyAdded); - } - } - - resource.aliases.iter().for_each(|alias| { - self.aliases.insert(alias.clone(), resource.name.clone()); - }); - let resource_impl = ResourceImpl { - name: resource.name.clone(), - kind: resource.kind, - content: resource_content, - dependencies: resource.dependencies, - permission: resource.permission, - }; - self.resources.insert(resource.name, resource_impl); - - Ok(()) - } - /// Given the contents of the `+js(...)` parts of multiple filters, return a script string /// appropriate for injection in a page. pub fn get_scriptlet_resources<'a>( @@ -237,10 +302,10 @@ impl ResourceStorage { /// /// Note that no ordering is guaranteed; function definitions in JS can appear after they are /// used. - fn recursive_dependencies<'a: 'b, 'b>( - &'a self, + fn recursive_dependencies( + &self, new_dep: &str, - prev_deps: &mut Vec<&'b ResourceImpl>, + prev_deps: &mut Vec, filter_permission: PermissionMask, ) -> Result<(), ScriptletResourceError> { if prev_deps.iter().any(|dep| dep.name == new_dep) { @@ -249,9 +314,10 @@ impl ResourceStorage { let resource = self.get_permissioned_resource(new_dep, filter_permission)?; + let deps = resource.dependencies.clone(); prev_deps.push(resource); - for dep in resource.dependencies.iter() { + for dep in deps.iter() { self.recursive_dependencies(dep, prev_deps, filter_permission)?; } @@ -260,11 +326,11 @@ impl ResourceStorage { /// Given the contents of a single `+js(...)` filter part, return a scriptlet string /// appropriate for injection in a page. - fn get_scriptlet_resource<'a: 'b, 'b>( - &'a self, + fn get_scriptlet_resource( + &self, scriptlet_args: &str, filter_permission: PermissionMask, - required_deps: &mut Vec<&'b ResourceImpl>, + required_deps: &mut Vec, ) -> Result { // `unwrap` is safe because these are guaranteed valid at filter parsing. let scriptlet_args = parse_scriptlet_args(scriptlet_args).unwrap(); @@ -322,7 +388,7 @@ impl ResourceStorage { /// Get a data-URL formatted resource appropriate for a `$redirect` response. pub fn get_redirect_resource(&self, resource_ident: &str) -> Option { - let resource = self.get_internal_resource(resource_ident); + let resource = self.backend.get_resource(resource_ident); resource.and_then(|resource| { if !resource.permission.is_default() { @@ -344,26 +410,14 @@ impl ResourceStorage { }) } - /// Gets the resource associated with `resource_ident`, respecting aliases if necessary. - fn get_internal_resource(&self, resource_ident: &str) -> Option<&ResourceImpl> { - let resource = if let Some(resource) = self.resources.get(resource_ident) { - Some(resource) - } else if let Some(canonical_name) = self.aliases.get(resource_ident) { - self.resources.get(canonical_name) - } else { - None - }; - - resource - } - fn get_permissioned_resource( &self, scriptlet_name: &str, filter_permission: PermissionMask, - ) -> Result<&ResourceImpl, ScriptletResourceError> { + ) -> Result { let resource = self - .get_internal_resource(scriptlet_name) + .backend + .get_resource(scriptlet_name) .ok_or(ScriptletResourceError::NoMatchingScriptlet)?; if !resource.permission.is_injectable_by(filter_permission) { diff --git a/tests/legacy_harness.rs b/tests/legacy_harness.rs index e87d9312..37cf5be8 100644 --- a/tests/legacy_harness.rs +++ b/tests/legacy_harness.rs @@ -330,7 +330,7 @@ mod legacy_check_match { let mut engine_deserialized = Engine::default(); // second empty engine_deserialized.use_tags(tags); { - let engine_serialized = engine.serialize().unwrap(); + let engine_serialized = engine.serialize().to_vec(); engine_deserialized.deserialize(&engine_serialized).unwrap(); // override from serialized copy } @@ -404,7 +404,7 @@ mod legacy_check_match { ); let mut engine_deserialized = Engine::default(); // second empty { - let engine_serialized = engine.serialize().unwrap(); + let engine_serialized = engine.serialize().to_vec(); engine_deserialized.deserialize(&engine_serialized).unwrap(); // override from serialized copy } @@ -898,8 +898,8 @@ mod legacy_misc_tests { false, ); // enable debugging and disable optimizations - let serialized = engine.serialize().unwrap(); - let mut engine2 = Engine::new(false); + let serialized = engine.serialize().to_vec(); + let mut engine2 = Engine::default(); engine2.deserialize(&serialized).unwrap(); assert!( diff --git a/tests/live.rs b/tests/live.rs index c79288dc..fc75f278 100644 --- a/tests/live.rs +++ b/tests/live.rs @@ -229,6 +229,7 @@ fn check_live_from_filterlists() { #[cfg(feature = "resource-assembler")] #[test] +#[ignore = "issues/499"] fn check_live_redirects() { use adblock::resources::resource_assembler::assemble_web_accessible_resources; @@ -281,11 +282,11 @@ fn check_live_redirects() { /// deserializing from it. fn stable_serialization_through_load() { let engine1 = Engine::from_filter_set(ALL_FILTERS.lock().unwrap().clone(), true); - let ser1 = engine1.serialize().unwrap(); + let ser1 = engine1.serialize().to_vec(); - let mut engine2 = Engine::new(true); + let mut engine2 = Engine::default(); engine2.deserialize(&ser1).unwrap(); - let ser2 = engine2.serialize().unwrap(); + let ser2 = engine2.serialize().to_vec(); assert_eq!(ser1, ser2); } diff --git a/tests/ublock-coverage.rs b/tests/ublock-coverage.rs index 2c3b406a..94aa7aca 100644 --- a/tests/ublock-coverage.rs +++ b/tests/ublock-coverage.rs @@ -174,7 +174,7 @@ fn check_specifics_default() { #[test] fn check_basic_works_after_deserialization() { let engine = get_blocker_engine(); - let serialized = engine.serialize().unwrap(); + let serialized = engine.serialize().to_vec(); let mut deserialized_engine = Engine::default(); deserialized_engine.deserialize(&serialized).unwrap(); diff --git a/tests/unit/blocker.rs b/tests/unit/blocker.rs index c4b9f0f7..4bd952a3 100644 --- a/tests/unit/blocker.rs +++ b/tests/unit/blocker.rs @@ -4,7 +4,7 @@ mod blocker_tests { use super::super::*; use crate::lists::parse_filters; use crate::request::Request; - use crate::resources::Resource; + use crate::resources::{Resource, ResourceStorage}; use base64::{engine::Engine as _, prelude::BASE64_STANDARD}; use std::collections::HashSet; use std::iter::FromIterator; @@ -85,15 +85,11 @@ mod blocker_tests { }; let blocker = Blocker::new(network_filters, &blocker_options); - let mut resources = ResourceStorage::default(); - - resources - .add_resource(Resource::simple( - "noop-0.1s.mp3", - crate::resources::MimeType::AudioMp3, - "mp3", - )) - .unwrap(); + let resources = ResourceStorage::in_memory_from_resources([Resource::simple( + "noop-0.1s.mp3", + crate::resources::MimeType::AudioMp3, + "mp3", + )]); let matched_rule = blocker.check(&request, &resources); assert!(!matched_rule.matched); @@ -129,15 +125,11 @@ mod blocker_tests { }; let blocker = Blocker::new(network_filters, &blocker_options); - let mut resources = ResourceStorage::default(); - - resources - .add_resource(Resource::simple( - "noop-0.1s.mp3", - crate::resources::MimeType::AudioMp3, - "mp3", - )) - .unwrap(); + let resources = ResourceStorage::in_memory_from_resources([Resource::simple( + "noop-0.1s.mp3", + crate::resources::MimeType::AudioMp3, + "mp3", + )]); let matched_rule = blocker.check(&request, &resources); assert!(!matched_rule.matched); @@ -168,15 +160,11 @@ mod blocker_tests { }; let blocker = Blocker::new(network_filters, &blocker_options); - let mut resources = ResourceStorage::default(); - - resources - .add_resource(Resource::simple( - "noop.txt", - crate::resources::MimeType::TextPlain, - "noop", - )) - .unwrap(); + let resources = ResourceStorage::in_memory_from_resources([Resource::simple( + "noop.txt", + crate::resources::MimeType::TextPlain, + "noop", + )]); let matched_rule = blocker.check(&request, &resources); assert!(matched_rule.matched); @@ -514,15 +502,11 @@ mod blocker_tests { }; let blocker = Blocker::new(network_filters, &blocker_options); - let mut resources = ResourceStorage::default(); - - resources - .add_resource(Resource::simple( - "noopjs", - crate::resources::MimeType::ApplicationJavascript, - "(() => {})()", - )) - .unwrap(); + let resources = ResourceStorage::in_memory_from_resources([Resource::simple( + "noopjs", + crate::resources::MimeType::ApplicationJavascript, + "(() => {})()", + )]); let result = blocker.check( &Request::new( @@ -979,26 +963,28 @@ mod blocker_tests { }; let blocker = Blocker::new(network_filters, &blocker_options); - let mut resources = ResourceStorage::default(); - fn add_simple_resource( - resources: &mut ResourceStorage, - identifier: &str, - ) -> Option { - resources - .add_resource(Resource::simple( - identifier, - crate::resources::MimeType::TextPlain, - identifier, - )) - .unwrap(); - Some(format!( + fn simple_resource(identifier: &str) -> Resource { + Resource::simple( + identifier, + crate::resources::MimeType::TextPlain, + identifier, + ) + } + fn simple_redirect(identifier: &str) -> String { + format!( "data:text/plain;base64,{}", BASE64_STANDARD.encode(identifier) - )) + ) } - let a_redirect = add_simple_resource(&mut resources, "a"); - let b_redirect = add_simple_resource(&mut resources, "b"); - let c_redirect = add_simple_resource(&mut resources, "c"); + let test_cases = ["a", "b", "c"]; + let resources = ResourceStorage::in_memory_from_resources(test_cases.map(simple_resource)); + let redirects = test_cases + .into_iter() + .map(simple_redirect) + .collect::>(); + let a_redirect = Some(redirects[0].clone()); + let b_redirect = Some(redirects[1].clone()); + let c_redirect = Some(redirects[2].clone()); let result = blocker.check( &Request::new( @@ -1472,17 +1458,17 @@ mod legacy_rule_parsing_tests { // Some filters in the filter_map are pointed at by multiple tokens, increasing the total number of items assert!( - blocker.exceptions.get_filter_map().total_size() - + blocker.generic_hide.get_filter_map().total_size() + blocker.exceptions().get_filter_map().total_size() + + blocker.generic_hide().get_filter_map().total_size() >= expectation.exceptions, "Number of collected exceptions does not match expectation" ); assert!( - blocker.filters.get_filter_map().total_size() - + blocker.importants.get_filter_map().total_size() - + blocker.redirects.get_filter_map().total_size() - + blocker.csp.get_filter_map().total_size() + blocker.filters().get_filter_map().total_size() + + blocker.importants().get_filter_map().total_size() + + blocker.redirects().get_filter_map().total_size() + + blocker.csp().get_filter_map().total_size() >= expectation.filters - expectation.duplicates, "Number of collected network filters does not match expectation" ); diff --git a/tests/unit/cosmetic_filter_cache.rs b/tests/unit/cosmetic_filter_cache.rs index 7af960dc..7b937f46 100644 --- a/tests/unit/cosmetic_filter_cache.rs +++ b/tests/unit/cosmetic_filter_cache.rs @@ -1,6 +1,6 @@ #[cfg(test)] mod key_from_selector_tests { - use super::super::key_from_selector; + use crate::cosmetic_filter_utils::key_from_selector; #[test] fn no_escapes() { @@ -203,7 +203,7 @@ mod cosmetic_cache_tests { "c.g.cosmetic.net#@#+js(nowebrtc.js)", "d.g.cosmetic.net#@#+js()", ]); - let resources = ResourceStorage::from_resources([ + let resources = ResourceStorage::in_memory_from_resources([ Resource { name: "set-constant.js".into(), aliases: vec![], @@ -665,7 +665,7 @@ mod cosmetic_cache_tests { .map(|r| CosmeticFilter::parse(r, false, Default::default()).unwrap()) .collect::>(), ); - let resources = ResourceStorage::from_resources([Resource { + let resources = ResourceStorage::in_memory_from_resources([Resource { name: "abort-on-property-read.js".into(), aliases: vec!["aopr".to_string()], kind: ResourceType::Template, diff --git a/tests/unit/engine.rs b/tests/unit/engine.rs index 0a03a80c..03e3c032 100644 --- a/tests/unit/engine.rs +++ b/tests/unit/engine.rs @@ -153,7 +153,7 @@ mod tests { let mut engine = Engine::from_rules(filters, Default::default()); engine.enable_tags(&["stuff"]); engine.enable_tags(&["brian"]); - let serialized = engine.serialize().unwrap(); + let serialized = engine.serialize(); let mut deserialized_engine = Engine::default(); deserialized_engine.enable_tags(&["stuff"]); deserialized_engine.deserialize(&serialized).unwrap(); @@ -182,8 +182,8 @@ mod tests { #[test] fn deserialization_generate_simple() { let mut engine = Engine::from_rules(["ad-banner"], Default::default()); - let data = engine.serialize().unwrap(); - const EXPECTED_HASH: u64 = 9023363977439833140; + let data = engine.serialize().to_vec(); + const EXPECTED_HASH: u64 = 884296823183764168; assert_eq!(hash(&data), EXPECTED_HASH, "{}", HASH_MISMATCH_MSG); engine.deserialize(&data).unwrap(); } @@ -192,8 +192,8 @@ mod tests { fn deserialization_generate_tags() { let mut engine = Engine::from_rules(["ad-banner$tag=abc"], Default::default()); engine.use_tags(&["abc"]); - let data = engine.serialize().unwrap(); - const EXPECTED_HASH: u64 = 17490165506820084756; + let data = engine.serialize().to_vec(); + const EXPECTED_HASH: u64 = 7887643884738497753; assert_eq!(hash(&data), EXPECTED_HASH, "{}", HASH_MISMATCH_MSG); engine.deserialize(&data).unwrap(); } @@ -207,7 +207,7 @@ mod tests { Resource::simple("noopcss", MimeType::TextCss, ""), ]); - let serialized = engine.serialize().unwrap(); + let serialized = engine.serialize().to_vec(); println!("Engine serialized: {:?}", serialized); engine.deserialize(&serialized).unwrap(); } @@ -216,12 +216,30 @@ mod tests { fn deserialization_brave_list() { let rules = rules_from_lists(&["data/brave/brave-main-list.txt"]); let mut engine = Engine::from_rules_parametrised(rules, Default::default(), false, true); - let data = engine.serialize().unwrap(); + let data = engine.serialize().to_vec(); - let expected_hash = if cfg!(feature = "css-validation") { - 11154262451234023377 + #[cfg(feature = "debug-info")] + { + let debug_info = engine.get_debug_info(); + let low_bound = 9_500_000; + let high_bound = 10_000_000; + assert!( + debug_info.flatbuffer_size >= low_bound, + "Expected size >= {} bytes, got {}", + low_bound, + debug_info.flatbuffer_size + ); + assert!( + debug_info.flatbuffer_size <= high_bound, + "Expected size <= {} bytes, got {}", + high_bound, + debug_info.flatbuffer_size + ); + } + let expected_hash: u64 = if cfg!(feature = "css-validation") { + 1870862363610703254 } else { - 48716029470216845 + 17169786507112655088 }; assert_eq!(hash(&data), expected_hash, "{}", HASH_MISMATCH_MSG); @@ -492,13 +510,11 @@ mod tests { ], Default::default()); let mut engine = Engine::from_filter_set(filter_set, false); - engine - .add_resource(Resource::simple( - "addthis.com/addthis_widget.js", - MimeType::ApplicationJavascript, - "window.addthis = undefined", - )) - .unwrap(); + engine.use_resources([Resource::simple( + "addthis.com/addthis_widget.js", + MimeType::ApplicationJavascript, + "window.addthis = undefined", + )]); let request = Request::new("https://s7.addthis.com/js/250/addthis_widget.js?pub=resto", "https://www.rhmodern.com/catalog/product/product.jsp?productId=prod14970086&categoryId=cat7150028", "script").unwrap(); let result = engine.check_network_request(&request); diff --git a/tests/unit/filters/network_matchers.rs b/tests/unit/filters/network_matchers.rs index 54392eab..96be7d58 100644 --- a/tests/unit/filters/network_matchers.rs +++ b/tests/unit/filters/network_matchers.rs @@ -678,7 +678,7 @@ mod match_tests { #[test] #[ignore] // Not going to handle lookaround regexes - #[cfg(feature = "regex-debug-info")] + #[cfg(feature = "debug-info")] fn check_lookaround_regex_handled() { { let filter = r#"/^https?:\/\/([0-9a-z\-]+\.)?(9anime|animeland|animenova|animeplus|animetoon|animewow|gamestorrent|goodanime|gogoanime|igg-games|kimcartoon|memecenter|readcomiconline|toonget|toonova|watchcartoononline)\.[a-z]{2,4}\/(?!([Ee]xternal|[Ii]mages|[Ss]cripts|[Uu]ploads|ac|ajax|assets|combined|content|cov|cover|(img\/bg)|(img\/icon)|inc|jwplayer|player|playlist-cat-rss|static|thumbs|wp-content|wp-includes)\/)(.*)/$image,other,script,~third-party,xmlhttprequest,domain=~animeland.hu"#; diff --git a/tests/unit/flatbuffers/containers/flat_map.rs b/tests/unit/flatbuffers/containers/flat_map.rs new file mode 100644 index 00000000..80be0cfb --- /dev/null +++ b/tests/unit/flatbuffers/containers/flat_map.rs @@ -0,0 +1,95 @@ +#[allow(unknown_lints)] +#[allow( + dead_code, + clippy::all, + unused_imports, + unsafe_code, + mismatched_lifetime_syntaxes +)] +#[path = "./test_containers_generated.rs"] +pub mod flat; +#[cfg(test)] +mod tests { + use std::collections::HashMap; + + use super::super::*; + use super::flat::fb_test; + + // Helper function to create a Vector from a slice + fn create_vector_u32<'a>( + builder: &'a mut flatbuffers::FlatBufferBuilder, + data: &'a [u32], + ) -> flatbuffers::Vector<'a, u32> { + let vec_offset = builder.create_vector(data); + builder.finish(vec_offset, None); + let buf = builder.finished_data(); + flatbuffers::root::>(buf).unwrap() + } + + #[test] + fn test_empty_map() { + let index: &[u32] = &[]; + let mut builder = flatbuffers::FlatBufferBuilder::new(); + let values = create_vector_u32(&mut builder, &[]); + let map = FlatMapView::new(index, values); + + assert_eq!(map.len(), 0); + assert!(map.get(1).is_none()); + } + + #[test] + fn test_multiple_elements() { + let index: &[u32] = &[1, 2, 4, 6, 100, 102]; + let mut builder = flatbuffers::FlatBufferBuilder::new(); + let values = create_vector_u32(&mut builder, &[10, 20, 30, 40, 50, 60]); + + let map = FlatMapView::new(index, values); + + assert_eq!(map.len(), 6); + + assert_eq!(map.get(2), Some(20)); + assert_eq!(map.get(4), Some(30)); + assert_eq!(map.get(100), Some(50)); + assert_eq!(map.get(102), Some(60)); + assert!(map.get(103).is_none()); + } + + #[test] + fn test_string_builder() { + let mut builder = flatbuffers::FlatBufferBuilder::new(); + let mut map = HashMap::new(); + map.insert("b", "20"); + map.insert("a", "10"); + map.insert("c", "30"); + let map = FlatMapBuilder::finish(map, &mut builder); + + // Serialize to the test flatbuffer. + let test_map = fb_test::TestStringMap::create( + &mut builder, + &fb_test::TestStringMapArgs { + keys: Some(map.keys), + values: Some(map.values), + }, + ); + let root = fb_test::TestRoot::create( + &mut builder, + &fb_test::TestRootArgs { + test_string_map: Some(test_map), + ..Default::default() + }, + ); + builder.finish(root, None); + + // Load from the serialized test flatbuffer. + let data = builder.finished_data(); + let root = fb_test::root_as_test_root(data).unwrap(); + let flat_map = root.test_string_map().unwrap(); + let map = FlatMapView::new(flat_map.keys(), flat_map.values()); + + assert_eq!(map.get("a").unwrap(), "10"); + assert_eq!(map.get("b").unwrap(), "20"); + assert_eq!(map.get("c").unwrap(), "30"); + assert!(map.get("d").is_none()); + assert!(map.get("").is_none()); + } +} diff --git a/tests/unit/flatbuffers/containers/flat_multimap.rs b/tests/unit/flatbuffers/containers/flat_multimap.rs new file mode 100644 index 00000000..a962c2ea --- /dev/null +++ b/tests/unit/flatbuffers/containers/flat_multimap.rs @@ -0,0 +1,202 @@ +#[allow(unknown_lints)] +#[allow( + dead_code, + clippy::all, + unused_imports, + unsafe_code, + mismatched_lifetime_syntaxes +)] +#[path = "./test_containers_generated.rs"] +pub mod flat; +#[cfg(test)] +mod tests { + use super::super::*; + use super::flat::fb_test; + + // Helper function to create a Vector from a slice + fn create_vector_u32<'a>( + builder: &'a mut flatbuffers::FlatBufferBuilder, + data: &'a [u32], + ) -> flatbuffers::Vector<'a, u32> { + let vec_offset = builder.create_vector(data); + builder.finish(vec_offset, None); + let buf = builder.finished_data(); + flatbuffers::root::>(buf).unwrap() + } + + #[test] + fn test_empty_map() { + let index: &[u32] = &[]; + let mut builder = flatbuffers::FlatBufferBuilder::new(); + let values = create_vector_u32(&mut builder, &[]); + let map = FlatMultiMapView::new(index, values); + + assert_eq!(map.total_size(), 0); + assert!(map.get(1).is_none()); + } + + #[test] + fn test_single_element() { + let index: &[u32] = &[1]; + let mut builder = flatbuffers::FlatBufferBuilder::new(); + let values = create_vector_u32(&mut builder, &[100]); + let map = FlatMultiMapView::new(index, values); + + assert_eq!(map.total_size(), 1); + + // Test existing key + let mut iter = map.get(1).unwrap(); + assert_eq!(iter.next(), Some((0, 100))); + assert_eq!(iter.next(), None); + + // Test non-existing key + assert!(map.get(2).is_none()); + } + + #[test] + fn test_multiple_elements() { + let index: &[u32] = &[1, 1, 2, 2, 2, 3]; + let mut builder = flatbuffers::FlatBufferBuilder::new(); + let values = create_vector_u32(&mut builder, &[10, 20, 30, 40, 50, 60]); + + let map = FlatMultiMapView::new(index, values); + + assert_eq!(map.total_size(), 6); + + // Test key with single value + let mut iter = map.get(3).unwrap(); + assert_eq!(iter.next(), Some((5, 60))); + assert_eq!(iter.next(), None); + + // Test key with multiple values + let mut iter = map.get(2).unwrap(); + assert_eq!(iter.next(), Some((2, 30))); + assert_eq!(iter.next(), Some((3, 40))); + assert_eq!(iter.next(), Some((4, 50))); + assert_eq!(iter.next(), None); + + // Test non-existing key + assert!(map.get(4).is_none()); + } + + #[test] + fn test_all_same_keys() { + let index: &[u32] = &[5, 5, 5]; + let mut builder = flatbuffers::FlatBufferBuilder::new(); + let values = create_vector_u32(&mut builder, &[100, 200, 300]); + let map = FlatMultiMapView::new(index, values); + + assert_eq!(map.total_size(), 3); + + let mut iter = map.get(5).unwrap(); + assert_eq!(iter.next(), Some((0, 100))); + assert_eq!(iter.next(), Some((1, 200))); + assert_eq!(iter.next(), Some((2, 300))); + assert_eq!(iter.next(), None); + } + + #[test] + fn test_non_contiguous_keys() { + let index: &[u32] = &[1, 3, 5]; + let mut builder = flatbuffers::FlatBufferBuilder::new(); + let values = create_vector_u32(&mut builder, &[10, 30, 50]); + let map = FlatMultiMapView::new(index, values); + + assert_eq!(map.total_size(), 3); + + assert_eq!(map.get(1).unwrap().next(), Some((0, 10))); + assert_eq!(map.get(3).unwrap().next(), Some((1, 30))); + assert_eq!(map.get(5).unwrap().next(), Some((2, 50))); + assert!(map.get(2).is_none()); + assert!(map.get(4).is_none()); + } + + #[test] + fn test_uint_builder() { + let mut builder = flatbuffers::FlatBufferBuilder::new(); + let mut map = FlatMultiMapBuilder::::default(); + map.insert(2, 20); + map.insert(1, 10); + map.insert(2, 30); + let map = FlatMultiMapBuilder::finish(map, &mut builder); + + // Serialize to the test flatbuffer. + let test_map = fb_test::TestUIntMap::create( + &mut builder, + &fb_test::TestUIntMapArgs { + keys: Some(map.keys), + values: Some(map.values), + }, + ); + + let root = fb_test::TestRoot::create( + &mut builder, + &fb_test::TestRootArgs { + test_uint_map: Some(test_map), + ..Default::default() + }, + ); + builder.finish(root, None); + + // Load from the serialized test flatbuffer. + use crate::flatbuffers::unsafe_tools::fb_vector_to_slice; + let data = builder.finished_data(); + let root = fb_test::root_as_test_root(data).unwrap(); + let flat_map = root.test_uint_map().unwrap(); + let map = FlatMultiMapView::::new( + fb_vector_to_slice(flat_map.keys()), + flat_map.values(), + ); + + assert_eq!(map.total_size(), 3); + assert_eq!(map.get(1).unwrap().collect::>(), vec![(0, 10)]); + assert_eq!( + map.get(2).unwrap().collect::>(), + vec![(1, 20), (2, 30)] + ); + assert!(map.get(0).is_none()); + assert!(map.get(3).is_none()); + } + + #[test] + fn test_string_builder() { + let mut builder = flatbuffers::FlatBufferBuilder::new(); + let mut map = FlatMultiMapBuilder::<&str, &str>::default(); + map.insert("b", "20"); + map.insert("a", "10"); + map.insert("b", "30"); + let map = FlatMultiMapBuilder::finish(map, &mut builder); + + // Serialize to the test flatbuffer. + let test_map = fb_test::TestStringMap::create( + &mut builder, + &fb_test::TestStringMapArgs { + keys: Some(map.keys), + values: Some(map.values), + }, + ); + let root = fb_test::TestRoot::create( + &mut builder, + &fb_test::TestRootArgs { + test_string_map: Some(test_map), + ..Default::default() + }, + ); + builder.finish(root, None); + + // Load from the serialized test flatbuffer. + let data = builder.finished_data(); + let root = fb_test::root_as_test_root(data).unwrap(); + let flat_map = root.test_string_map().unwrap(); + let map = FlatMultiMapView::new(flat_map.keys(), flat_map.values()); + + assert_eq!(map.total_size(), 3); + assert_eq!(map.get("a").unwrap().collect::>(), vec![(0, "10")]); + assert_eq!( + map.get("b").unwrap().collect::>(), + vec![(1, "20"), (2, "30")] + ); + assert!(map.get("c").is_none()); + assert!(map.get("d").is_none()); + } +} diff --git a/tests/unit/flatbuffers/containers/flat_set.rs b/tests/unit/flatbuffers/containers/flat_set.rs new file mode 100644 index 00000000..21d7145d --- /dev/null +++ b/tests/unit/flatbuffers/containers/flat_set.rs @@ -0,0 +1,94 @@ +#[allow(unknown_lints)] +#[allow( + dead_code, + clippy::all, + unused_imports, + unsafe_code, + mismatched_lifetime_syntaxes +)] +#[path = "./test_containers_generated.rs"] +pub mod flat; +#[cfg(test)] +mod tests { + use std::collections::HashSet; + + use super::super::*; + use super::flat::fb_test; + + #[test] + fn test_flat_set_view() { + let data = vec![1, 2, 2, 3, 4, 4, 4, 5]; + let set = FlatSetView::::new(&data); + + // Test contains + assert!(set.contains(1)); + assert!(set.contains(2)); + assert!(set.contains(4)); + assert!(!set.contains(6)); + + // Test len + assert_eq!(set.len(), 8); + } + + #[test] + fn test_uint_builder() { + let mut builder = flatbuffers::FlatBufferBuilder::new(); + let mut set = HashSet::::default(); + set.insert(2); + set.insert(1); + set.insert(2); + let set = FlatSerialize::serialize(set, &mut builder); + + // Serialize to the test flatbuffer. + let root = fb_test::TestRoot::create( + &mut builder, + &fb_test::TestRootArgs { + test_uint_set: Some(set), + ..Default::default() + }, + ); + builder.finish(root, None); + + // Load from the serialized test flatbuffer. + use crate::flatbuffers::unsafe_tools::fb_vector_to_slice; + let data = builder.finished_data(); + let root = fb_test::root_as_test_root(data).unwrap(); + let set = + FlatSetView::::new(fb_vector_to_slice(root.test_uint_set().unwrap())); + + assert_eq!(set.len(), 2); + assert!(set.contains(1)); + assert!(set.contains(2)); + assert!(!set.contains(3)); + } + + #[test] + fn test_string_builder() { + let mut builder = flatbuffers::FlatBufferBuilder::new(); + let mut set = HashSet::<&str>::default(); + set.insert("b"); + set.insert("a"); + set.insert("b"); + let set = FlatSerialize::serialize(set, &mut builder); + + // Serialize to the test flatbuffer. + let root = fb_test::TestRoot::create( + &mut builder, + &fb_test::TestRootArgs { + test_string_set: Some(set), + ..Default::default() + }, + ); + builder.finish(root, None); + + // Load from the serialized test flatbuffer. + let data = builder.finished_data(); + let root = fb_test::root_as_test_root(data).unwrap(); + let set = FlatSetView::new(root.test_string_set().unwrap()); + + assert_eq!(set.len(), 2); + assert!(set.contains("a")); + assert!(set.contains("b")); + assert!(!set.contains("c")); + } +} diff --git a/tests/unit/flatbuffers/containers/hash_map.rs b/tests/unit/flatbuffers/containers/hash_map.rs new file mode 100644 index 00000000..c38c012d --- /dev/null +++ b/tests/unit/flatbuffers/containers/hash_map.rs @@ -0,0 +1,103 @@ +#[allow(unknown_lints)] +#[allow( + dead_code, + clippy::all, + unused_imports, + unsafe_code, + mismatched_lifetime_syntaxes +)] +#[path = "./test_containers_generated.rs"] +pub mod flat; +#[cfg(test)] +mod tests { + use super::super::*; + use super::flat::fb_test; + + fn serialize_map(values: Vec<(&str, &str)>) -> Vec { + let mut builder = HashMapBuilder::default(); + for (key, value) in values { + builder.insert(key.to_string(), value.to_string()); + } + serialize_builder(builder) + } + + fn serialize_builder(builder: HashMapBuilder) -> Vec { + let mut flat_builder = flatbuffers::FlatBufferBuilder::new(); + let map = HashMapBuilder::finish(builder, &mut flat_builder); + let map_serialized = fb_test::TestStringMap::create( + &mut flat_builder, + &fb_test::TestStringMapArgs { + keys: Some(map.keys), + values: Some(map.values), + }, + ); + let root = fb_test::TestRoot::create( + &mut flat_builder, + &fb_test::TestRootArgs { + test_string_map: Some(map_serialized), + ..Default::default() + }, + ); + flat_builder.finish(root, None); + flat_builder.finished_data().to_vec() + } + + fn load_map<'a>(data: &'a [u8]) -> HashMapStringView<'a, &'a str> { + let root = fb_test::root_as_test_root(data).unwrap(); + let flat_map = root.test_string_map().unwrap(); + HashMapView::new(flat_map.keys(), flat_map.values()) + } + + #[test] + fn test_empty_map() { + let values = vec![]; + let data = serialize_map(values); + let map = load_map(&data); + assert_eq!(map.len(), 0); + assert_eq!(map.capacity(), 4); + assert!(map.get("a").is_none()); + } + + #[test] + fn test_duplicate_keys() { + let values = vec![("b", "20"), ("a", "10"), ("b", "30")]; + let data = serialize_map(values); + let map = load_map(&data); + assert_eq!(map.len(), 2); + assert_eq!(map.capacity(), 4); + assert_eq!(map.get("a").unwrap(), "10"); + assert_eq!(map.get("b").unwrap(), "30"); + } + + #[test] + fn test_builder_getters() { + let mut builder = HashMapBuilder::default(); + builder.insert("a".to_string(), "10".to_string()); + assert_eq!( + builder.get_or_insert("a".to_string(), "20".to_string()), + "10" + ); + assert_eq!( + builder.get_or_insert("b".to_string(), "20".to_string()), + "20" + ); + let data = serialize_builder(builder); + let map = load_map(&data); + assert_eq!(map.get("a").unwrap(), "10"); + assert_eq!(map.get("b").unwrap(), "20"); + assert!(map.get("c").is_none()); + } + + #[test] + fn test_string_builder() { + let values = vec![("b", "20"), ("a", "10"), ("c", "30")]; + let data = serialize_map(values); + let map = load_map(&data); + + assert_eq!(map.get("a").unwrap(), "10"); + assert_eq!(map.get("b").unwrap(), "20"); + assert_eq!(map.get("c").unwrap(), "30"); + assert!(map.get("d").is_none()); + assert!(map.get("").is_none()); + } +} diff --git a/tests/unit/flatbuffers/containers/hash_set.rs b/tests/unit/flatbuffers/containers/hash_set.rs new file mode 100644 index 00000000..47b8fce2 --- /dev/null +++ b/tests/unit/flatbuffers/containers/hash_set.rs @@ -0,0 +1,76 @@ +#[allow(unknown_lints)] +#[allow( + dead_code, + clippy::all, + unused_imports, + unsafe_code, + mismatched_lifetime_syntaxes +)] +#[path = "./test_containers_generated.rs"] +pub mod flat; +#[cfg(test)] +mod tests { + use super::super::*; + use super::flat::fb_test; + + fn serialize_set(values: Vec<&str>) -> Vec { + let mut builder = flatbuffers::FlatBufferBuilder::new(); + let mut set = HashSetBuilder::default(); + for value in values { + set.insert(value.to_string()); + } + let test_string_set = Some(FlatSerialize::serialize(set, &mut builder)); + + let root = fb_test::TestRoot::create( + &mut builder, + &fb_test::TestRootArgs { + test_string_set, + ..Default::default() + }, + ); + builder.finish(root, None); + builder.finished_data().to_vec() + } + + fn load_set<'a>( + data: &'a [u8], + ) -> HashSetView<&'a str, flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>>> { + let root = fb_test::root_as_test_root(data).unwrap(); + let flat_set = root.test_string_set().unwrap(); + HashSetView::new(flat_set) + } + + #[test] + fn test_empty_map() { + let values = vec![]; + let data = serialize_set(values); + let set = load_set(&data); + assert_eq!(set.len(), 0); + assert_eq!(set.capacity(), 4); + assert!(!set.contains("a")); + } + + #[test] + fn test_duplicate_keys() { + let values = vec!["b", "a", "b"]; + let data = serialize_set(values); + let set = load_set(&data); + assert_eq!(set.len(), 2); + assert_eq!(set.capacity(), 4); + assert!(set.contains("a")); + assert!(set.contains("b")); + } + + #[test] + fn test_string_builder() { + let values = vec!["b", "a", "c"]; + let data = serialize_set(values); + let set = load_set(&data); + + assert!(set.contains("a")); + assert!(set.contains("b")); + assert!(set.contains("c")); + assert!(!set.contains("d")); + assert!(!set.contains("")); + } +} diff --git a/tests/unit/flatbuffers/containers/test_containers.fbs b/tests/unit/flatbuffers/containers/test_containers.fbs new file mode 100644 index 00000000..f44d519d --- /dev/null +++ b/tests/unit/flatbuffers/containers/test_containers.fbs @@ -0,0 +1,25 @@ +// A test flatbuffer that is used in unit_tests. +// To build *_generated.rs run: +// 1. flatc --rust --gen-object-api -o tests/unit/flatbuffers/containers/ tests/unit/flatbuffers/containers/test_containers.fbs +// 2. cargo fmt +namespace fb_test; + +table TestUIntMap { + keys: [uint64] (required); + values: [uint32] (required); +} + +table TestStringMap { + keys: [string] (required); + values: [string] (required); +} + +table TestRoot { + test_uint_map: TestUIntMap; + test_string_map: TestStringMap; + + test_uint_set: [uint64]; + test_string_set: [string]; +} + +root_type TestRoot; diff --git a/tests/unit/flatbuffers/containers/test_containers_generated.rs b/tests/unit/flatbuffers/containers/test_containers_generated.rs new file mode 100644 index 00000000..75ee889d --- /dev/null +++ b/tests/unit/flatbuffers/containers/test_containers_generated.rs @@ -0,0 +1,785 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +// @generated + +use core::cmp::Ordering; +use core::mem; + +extern crate flatbuffers; +use self::flatbuffers::{EndianScalar, Follow}; + +#[allow(unused_imports, dead_code)] +pub mod fb_test { + + use core::cmp::Ordering; + use core::mem; + + extern crate flatbuffers; + use self::flatbuffers::{EndianScalar, Follow}; + + pub enum TestUIntMapOffset {} + #[derive(Copy, Clone, PartialEq)] + + pub struct TestUIntMap<'a> { + pub _tab: flatbuffers::Table<'a>, + } + + impl<'a> flatbuffers::Follow<'a> for TestUIntMap<'a> { + type Inner = TestUIntMap<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { + _tab: flatbuffers::Table::new(buf, loc), + } + } + } + + impl<'a> TestUIntMap<'a> { + pub const VT_KEYS: flatbuffers::VOffsetT = 4; + pub const VT_VALUES: flatbuffers::VOffsetT = 6; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + TestUIntMap { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + args: &'args TestUIntMapArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = TestUIntMapBuilder::new(_fbb); + if let Some(x) = args.values { + builder.add_values(x); + } + if let Some(x) = args.keys { + builder.add_keys(x); + } + builder.finish() + } + + pub fn unpack(&self) -> TestUIntMapT { + let keys = { + let x = self.keys(); + x.into_iter().collect() + }; + let values = { + let x = self.values(); + x.into_iter().collect() + }; + TestUIntMapT { keys, values } + } + + #[inline] + pub fn keys(&self) -> flatbuffers::Vector<'a, u64> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>>( + TestUIntMap::VT_KEYS, + None, + ) + .unwrap() + } + } + #[inline] + pub fn values(&self) -> flatbuffers::Vector<'a, u32> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>>( + TestUIntMap::VT_VALUES, + None, + ) + .unwrap() + } + } + } + + impl flatbuffers::Verifiable for TestUIntMap<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::>>( + "keys", + Self::VT_KEYS, + true, + )? + .visit_field::>>( + "values", + Self::VT_VALUES, + true, + )? + .finish(); + Ok(()) + } + } + pub struct TestUIntMapArgs<'a> { + pub keys: Option>>, + pub values: Option>>, + } + impl<'a> Default for TestUIntMapArgs<'a> { + #[inline] + fn default() -> Self { + TestUIntMapArgs { + keys: None, // required field + values: None, // required field + } + } + } + + pub struct TestUIntMapBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> TestUIntMapBuilder<'a, 'b, A> { + #[inline] + pub fn add_keys(&mut self, keys: flatbuffers::WIPOffset>) { + self.fbb_ + .push_slot_always::>(TestUIntMap::VT_KEYS, keys); + } + #[inline] + pub fn add_values(&mut self, values: flatbuffers::WIPOffset>) { + self.fbb_ + .push_slot_always::>(TestUIntMap::VT_VALUES, values); + } + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> TestUIntMapBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + TestUIntMapBuilder { + fbb_: _fbb, + start_: start, + } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + self.fbb_.required(o, TestUIntMap::VT_KEYS, "keys"); + self.fbb_.required(o, TestUIntMap::VT_VALUES, "values"); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for TestUIntMap<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("TestUIntMap"); + ds.field("keys", &self.keys()); + ds.field("values", &self.values()); + ds.finish() + } + } + #[non_exhaustive] + #[derive(Debug, Clone, PartialEq)] + pub struct TestUIntMapT { + pub keys: Vec, + pub values: Vec, + } + impl Default for TestUIntMapT { + fn default() -> Self { + Self { + keys: Default::default(), + values: Default::default(), + } + } + } + impl TestUIntMapT { + pub fn pack<'b, A: flatbuffers::Allocator + 'b>( + &self, + _fbb: &mut flatbuffers::FlatBufferBuilder<'b, A>, + ) -> flatbuffers::WIPOffset> { + let keys = Some({ + let x = &self.keys; + _fbb.create_vector(x) + }); + let values = Some({ + let x = &self.values; + _fbb.create_vector(x) + }); + TestUIntMap::create(_fbb, &TestUIntMapArgs { keys, values }) + } + } + pub enum TestStringMapOffset {} + #[derive(Copy, Clone, PartialEq)] + + pub struct TestStringMap<'a> { + pub _tab: flatbuffers::Table<'a>, + } + + impl<'a> flatbuffers::Follow<'a> for TestStringMap<'a> { + type Inner = TestStringMap<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { + _tab: flatbuffers::Table::new(buf, loc), + } + } + } + + impl<'a> TestStringMap<'a> { + pub const VT_KEYS: flatbuffers::VOffsetT = 4; + pub const VT_VALUES: flatbuffers::VOffsetT = 6; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + TestStringMap { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + args: &'args TestStringMapArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = TestStringMapBuilder::new(_fbb); + if let Some(x) = args.values { + builder.add_values(x); + } + if let Some(x) = args.keys { + builder.add_keys(x); + } + builder.finish() + } + + pub fn unpack(&self) -> TestStringMapT { + let keys = { + let x = self.keys(); + x.iter().map(|s| s.to_string()).collect() + }; + let values = { + let x = self.values(); + x.iter().map(|s| s.to_string()).collect() + }; + TestStringMapT { keys, values } + } + + #[inline] + pub fn keys(&self) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(TestStringMap::VT_KEYS, None) + .unwrap() + } + } + #[inline] + pub fn values(&self) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(TestStringMap::VT_VALUES, None) + .unwrap() + } + } + } + + impl flatbuffers::Verifiable for TestStringMap<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::>, + >>("keys", Self::VT_KEYS, true)? + .visit_field::>, + >>("values", Self::VT_VALUES, true)? + .finish(); + Ok(()) + } + } + pub struct TestStringMapArgs<'a> { + pub keys: Option< + flatbuffers::WIPOffset>>, + >, + pub values: Option< + flatbuffers::WIPOffset>>, + >, + } + impl<'a> Default for TestStringMapArgs<'a> { + #[inline] + fn default() -> Self { + TestStringMapArgs { + keys: None, // required field + values: None, // required field + } + } + } + + pub struct TestStringMapBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> TestStringMapBuilder<'a, 'b, A> { + #[inline] + pub fn add_keys( + &mut self, + keys: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_ + .push_slot_always::>(TestStringMap::VT_KEYS, keys); + } + #[inline] + pub fn add_values( + &mut self, + values: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_ + .push_slot_always::>(TestStringMap::VT_VALUES, values); + } + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> TestStringMapBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + TestStringMapBuilder { + fbb_: _fbb, + start_: start, + } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + self.fbb_.required(o, TestStringMap::VT_KEYS, "keys"); + self.fbb_.required(o, TestStringMap::VT_VALUES, "values"); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for TestStringMap<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("TestStringMap"); + ds.field("keys", &self.keys()); + ds.field("values", &self.values()); + ds.finish() + } + } + #[non_exhaustive] + #[derive(Debug, Clone, PartialEq)] + pub struct TestStringMapT { + pub keys: Vec, + pub values: Vec, + } + impl Default for TestStringMapT { + fn default() -> Self { + Self { + keys: Default::default(), + values: Default::default(), + } + } + } + impl TestStringMapT { + pub fn pack<'b, A: flatbuffers::Allocator + 'b>( + &self, + _fbb: &mut flatbuffers::FlatBufferBuilder<'b, A>, + ) -> flatbuffers::WIPOffset> { + let keys = Some({ + let x = &self.keys; + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let values = Some({ + let x = &self.values; + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + TestStringMap::create(_fbb, &TestStringMapArgs { keys, values }) + } + } + pub enum TestRootOffset {} + #[derive(Copy, Clone, PartialEq)] + + pub struct TestRoot<'a> { + pub _tab: flatbuffers::Table<'a>, + } + + impl<'a> flatbuffers::Follow<'a> for TestRoot<'a> { + type Inner = TestRoot<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { + _tab: flatbuffers::Table::new(buf, loc), + } + } + } + + impl<'a> TestRoot<'a> { + pub const VT_TEST_UINT_MAP: flatbuffers::VOffsetT = 4; + pub const VT_TEST_STRING_MAP: flatbuffers::VOffsetT = 6; + pub const VT_TEST_UINT_SET: flatbuffers::VOffsetT = 8; + pub const VT_TEST_STRING_SET: flatbuffers::VOffsetT = 10; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + TestRoot { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + args: &'args TestRootArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = TestRootBuilder::new(_fbb); + if let Some(x) = args.test_string_set { + builder.add_test_string_set(x); + } + if let Some(x) = args.test_uint_set { + builder.add_test_uint_set(x); + } + if let Some(x) = args.test_string_map { + builder.add_test_string_map(x); + } + if let Some(x) = args.test_uint_map { + builder.add_test_uint_map(x); + } + builder.finish() + } + + pub fn unpack(&self) -> TestRootT { + let test_uint_map = self.test_uint_map().map(|x| Box::new(x.unpack())); + let test_string_map = self.test_string_map().map(|x| Box::new(x.unpack())); + let test_uint_set = self.test_uint_set().map(|x| x.into_iter().collect()); + let test_string_set = self + .test_string_set() + .map(|x| x.iter().map(|s| s.to_string()).collect()); + TestRootT { + test_uint_map, + test_string_map, + test_uint_set, + test_string_set, + } + } + + #[inline] + pub fn test_uint_map(&self) -> Option> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>( + TestRoot::VT_TEST_UINT_MAP, + None, + ) + } + } + #[inline] + pub fn test_string_map(&self) -> Option> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>( + TestRoot::VT_TEST_STRING_MAP, + None, + ) + } + } + #[inline] + pub fn test_uint_set(&self) -> Option> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>>( + TestRoot::VT_TEST_UINT_SET, + None, + ) + } + } + #[inline] + pub fn test_string_set( + &self, + ) -> Option>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>, + >>(TestRoot::VT_TEST_STRING_SET, None) + } + } + } + + impl flatbuffers::Verifiable for TestRoot<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::>( + "test_uint_map", + Self::VT_TEST_UINT_MAP, + false, + )? + .visit_field::>( + "test_string_map", + Self::VT_TEST_STRING_MAP, + false, + )? + .visit_field::>>( + "test_uint_set", + Self::VT_TEST_UINT_SET, + false, + )? + .visit_field::>, + >>("test_string_set", Self::VT_TEST_STRING_SET, false)? + .finish(); + Ok(()) + } + } + pub struct TestRootArgs<'a> { + pub test_uint_map: Option>>, + pub test_string_map: Option>>, + pub test_uint_set: Option>>, + pub test_string_set: Option< + flatbuffers::WIPOffset>>, + >, + } + impl<'a> Default for TestRootArgs<'a> { + #[inline] + fn default() -> Self { + TestRootArgs { + test_uint_map: None, + test_string_map: None, + test_uint_set: None, + test_string_set: None, + } + } + } + + pub struct TestRootBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> TestRootBuilder<'a, 'b, A> { + #[inline] + pub fn add_test_uint_map( + &mut self, + test_uint_map: flatbuffers::WIPOffset>, + ) { + self.fbb_ + .push_slot_always::>( + TestRoot::VT_TEST_UINT_MAP, + test_uint_map, + ); + } + #[inline] + pub fn add_test_string_map( + &mut self, + test_string_map: flatbuffers::WIPOffset>, + ) { + self.fbb_ + .push_slot_always::>( + TestRoot::VT_TEST_STRING_MAP, + test_string_map, + ); + } + #[inline] + pub fn add_test_uint_set( + &mut self, + test_uint_set: flatbuffers::WIPOffset>, + ) { + self.fbb_.push_slot_always::>( + TestRoot::VT_TEST_UINT_SET, + test_uint_set, + ); + } + #[inline] + pub fn add_test_string_set( + &mut self, + test_string_set: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + TestRoot::VT_TEST_STRING_SET, + test_string_set, + ); + } + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> TestRootBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + TestRootBuilder { + fbb_: _fbb, + start_: start, + } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for TestRoot<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("TestRoot"); + ds.field("test_uint_map", &self.test_uint_map()); + ds.field("test_string_map", &self.test_string_map()); + ds.field("test_uint_set", &self.test_uint_set()); + ds.field("test_string_set", &self.test_string_set()); + ds.finish() + } + } + #[non_exhaustive] + #[derive(Debug, Clone, PartialEq)] + pub struct TestRootT { + pub test_uint_map: Option>, + pub test_string_map: Option>, + pub test_uint_set: Option>, + pub test_string_set: Option>, + } + impl Default for TestRootT { + fn default() -> Self { + Self { + test_uint_map: None, + test_string_map: None, + test_uint_set: None, + test_string_set: None, + } + } + } + impl TestRootT { + pub fn pack<'b, A: flatbuffers::Allocator + 'b>( + &self, + _fbb: &mut flatbuffers::FlatBufferBuilder<'b, A>, + ) -> flatbuffers::WIPOffset> { + let test_uint_map = self.test_uint_map.as_ref().map(|x| x.pack(_fbb)); + let test_string_map = self.test_string_map.as_ref().map(|x| x.pack(_fbb)); + let test_uint_set = self.test_uint_set.as_ref().map(|x| _fbb.create_vector(x)); + let test_string_set = self.test_string_set.as_ref().map(|x| { + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + TestRoot::create( + _fbb, + &TestRootArgs { + test_uint_map, + test_string_map, + test_uint_set, + test_string_set, + }, + ) + } + } + #[inline] + /// Verifies that a buffer of bytes contains a `TestRoot` + /// and returns it. + /// Note that verification is still experimental and may not + /// catch every error, or be maximally performant. For the + /// previous, unchecked, behavior use + /// `root_as_test_root_unchecked`. + pub fn root_as_test_root(buf: &[u8]) -> Result { + flatbuffers::root::(buf) + } + #[inline] + /// Verifies that a buffer of bytes contains a size prefixed + /// `TestRoot` and returns it. + /// Note that verification is still experimental and may not + /// catch every error, or be maximally performant. For the + /// previous, unchecked, behavior use + /// `size_prefixed_root_as_test_root_unchecked`. + pub fn size_prefixed_root_as_test_root( + buf: &[u8], + ) -> Result { + flatbuffers::size_prefixed_root::(buf) + } + #[inline] + /// Verifies, with the given options, that a buffer of bytes + /// contains a `TestRoot` and returns it. + /// Note that verification is still experimental and may not + /// catch every error, or be maximally performant. For the + /// previous, unchecked, behavior use + /// `root_as_test_root_unchecked`. + pub fn root_as_test_root_with_opts<'b, 'o>( + opts: &'o flatbuffers::VerifierOptions, + buf: &'b [u8], + ) -> Result, flatbuffers::InvalidFlatbuffer> { + flatbuffers::root_with_opts::>(opts, buf) + } + #[inline] + /// Verifies, with the given verifier options, that a buffer of + /// bytes contains a size prefixed `TestRoot` and returns + /// it. Note that verification is still experimental and may not + /// catch every error, or be maximally performant. For the + /// previous, unchecked, behavior use + /// `root_as_test_root_unchecked`. + pub fn size_prefixed_root_as_test_root_with_opts<'b, 'o>( + opts: &'o flatbuffers::VerifierOptions, + buf: &'b [u8], + ) -> Result, flatbuffers::InvalidFlatbuffer> { + flatbuffers::size_prefixed_root_with_opts::>(opts, buf) + } + #[inline] + /// Assumes, without verification, that a buffer of bytes contains a TestRoot and returns it. + /// # Safety + /// Callers must trust the given bytes do indeed contain a valid `TestRoot`. + pub unsafe fn root_as_test_root_unchecked(buf: &[u8]) -> TestRoot { + flatbuffers::root_unchecked::(buf) + } + #[inline] + /// Assumes, without verification, that a buffer of bytes contains a size prefixed TestRoot and returns it. + /// # Safety + /// Callers must trust the given bytes do indeed contain a valid size prefixed `TestRoot`. + pub unsafe fn size_prefixed_root_as_test_root_unchecked(buf: &[u8]) -> TestRoot { + flatbuffers::size_prefixed_root_unchecked::(buf) + } + #[inline] + pub fn finish_test_root_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>( + fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + root: flatbuffers::WIPOffset>, + ) { + fbb.finish(root, None); + } + + #[inline] + pub fn finish_size_prefixed_test_root_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>( + fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + root: flatbuffers::WIPOffset>, + ) { + fbb.finish_size_prefixed(root, None); + } +} // pub mod fb_test diff --git a/tests/unit/regex_manager.rs b/tests/unit/regex_manager.rs index 8ffe1ff4..fdd9d107 100644 --- a/tests/unit/regex_manager.rs +++ b/tests/unit/regex_manager.rs @@ -1,4 +1,4 @@ -#[cfg(all(test, feature = "regex-debug-info"))] +#[cfg(all(test, feature = "debug-info"))] mod tests { use super::super::*; diff --git a/tests/unit/resources/resource_storage.rs b/tests/unit/resources/resource_storage.rs index b2b00054..9a4dd65b 100644 --- a/tests/unit/resources/resource_storage.rs +++ b/tests/unit/resources/resource_storage.rs @@ -93,7 +93,7 @@ mod redirect_storage_tests { #[test] fn get_resource_by_name() { - let mut storage = ResourceStorage::default(); + let mut storage = InMemoryResourceStorage::default(); storage .add_resource(Resource::simple( "name.js", @@ -102,6 +102,10 @@ mod redirect_storage_tests { )) .unwrap(); + let storage = ResourceStorage { + backend: Box::new(storage), + }; + assert_eq!( storage.get_redirect_resource("name.js"), Some(format!( @@ -113,11 +117,15 @@ mod redirect_storage_tests { #[test] fn get_resource_by_alias() { - let mut storage = ResourceStorage::default(); + let mut storage = InMemoryResourceStorage::default(); let mut r = Resource::simple("name.js", MimeType::ApplicationJavascript, "resource data"); r.aliases.push("alias.js".to_string()); storage.add_resource(r).unwrap(); + let storage = ResourceStorage { + backend: Box::new(storage), + }; + assert_eq!( storage.get_redirect_resource("alias.js"), Some(format!( @@ -129,12 +137,16 @@ mod redirect_storage_tests { #[test] fn permissions() { - let mut storage = ResourceStorage::default(); + let mut storage = InMemoryResourceStorage::default(); let mut r = Resource::simple("name.js", MimeType::ApplicationJavascript, "resource data"); r.aliases.push("alias.js".to_string()); r.permission = PermissionMask::from_bits(0b00000001); storage.add_resource(r).unwrap(); + let storage = ResourceStorage { + backend: Box::new(storage), + }; + assert_eq!(storage.get_redirect_resource("name.js"), None,); assert_eq!(storage.get_redirect_resource("alias.js"), None,); } @@ -237,7 +249,7 @@ mod scriptlet_storage_tests { #[test] fn get_patched_scriptlets() { - let resources = ResourceStorage::from_resources([ + let resources = ResourceStorage::in_memory_from_resources([ Resource { name: "greet.js".to_string(), aliases: vec![], @@ -339,7 +351,7 @@ mod scriptlet_storage_tests { #[test] fn parse_template_file_format() { - let resources = ResourceStorage::from_resources([ + let resources = ResourceStorage::in_memory_from_resources([ Resource { name: "abort-current-inline-script.js".into(), aliases: vec!["acis.js".into()], @@ -446,7 +458,7 @@ mod scriptlet_storage_tests { /// cause a panic. #[test] fn patch_argslist_many_args() { - let resources = ResourceStorage::from_resources([Resource { + let resources = ResourceStorage::in_memory_from_resources([Resource { name: "abort-current-script.js".into(), aliases: vec!["acs.js".into()], kind: ResourceType::Mime(MimeType::ApplicationJavascript), @@ -477,7 +489,7 @@ mod scriptlet_storage_tests { const PERM01: PermissionMask = PermissionMask::from_bits(0b00000001); const PERM10: PermissionMask = PermissionMask::from_bits(0b00000010); const PERM11: PermissionMask = PermissionMask::from_bits(0b00000011); - let resources = ResourceStorage::from_resources([ + let resources = ResourceStorage::in_memory_from_resources([ Resource::simple( "default-perms.js", MimeType::ApplicationJavascript, @@ -566,7 +578,7 @@ mod scriptlet_storage_tests { #[test] fn dependencies() { const PERM01: PermissionMask = PermissionMask::from_bits(0b00000001); - let resources = ResourceStorage::from_resources([ + let resources = ResourceStorage::in_memory_from_resources([ Resource::simple("simple.fn", MimeType::FnJavascript, "simple"), Resource { name: "permissioned.fn".into(), @@ -793,3 +805,62 @@ mod scriptlet_storage_tests { assert_eq!(resources.get_scriptlet_resources([("test, 1", PERM01), ("test-wrapper, 2", PERM01), ("shared, 3", Default::default())]), "permissioned\na\ncommon\nb\nfunction test() {}\nfunction testWrapper() { test(arguments) }\nfunction shared() { }\ntry {\ntest(\"1\")\n} catch ( e ) { }\ntry {\ntestWrapper(\"2\")\n} catch ( e ) { }\ntry {\nshared(\"3\")\n} catch ( e ) { }\n"); } } + +#[cfg(all(test, feature = "single-thread"))] +mod shared_storage_tests { + use super::super::*; + use crate::resources::MimeType; + + use std::rc::Rc; + + #[derive(Clone)] + struct BraveCoreResourceStorage { + shared_storage: Rc, + } + + impl ResourceStorageBackend for BraveCoreResourceStorage { + fn get_resource(&self, resource_ident: &str) -> Option { + self.shared_storage.get_resource(resource_ident) + } + } + + #[test] + fn share_resources() { + let in_memory_storage = InMemoryResourceStorage::from_resources([Resource::simple( + "test-scriptlet.js", + MimeType::ApplicationJavascript, + "success!", + )]); + + let shared_storage = Rc::new(in_memory_storage); + + let mut engine1 = + crate::Engine::from_rules(["example1.com##+js(test-scriptlet)"], Default::default()); + engine1.use_resource_storage(BraveCoreResourceStorage { + shared_storage: Rc::clone(&shared_storage), + }); + + let mut engine2 = + crate::Engine::from_rules(["example2.com##+js(test-scriptlet)"], Default::default()); + engine2.use_resource_storage(BraveCoreResourceStorage { + shared_storage: Rc::clone(&shared_storage), + }); + + assert!(engine1 + .url_cosmetic_resources("https://example1.com") + .injected_script + .contains("success!")); + assert!(!engine1 + .url_cosmetic_resources("https://example2.com") + .injected_script + .contains("success!")); + assert!(!engine2 + .url_cosmetic_resources("https://example1.com") + .injected_script + .contains("success!")); + assert!(engine2 + .url_cosmetic_resources("https://example2.com") + .injected_script + .contains("success!")); + } +}