diff --git a/benches/bench_rules.rs b/benches/bench_rules.rs index f5b850ef..4136bbde 100644 --- a/benches/bench_rules.rs +++ b/benches/bench_rules.rs @@ -100,7 +100,7 @@ fn blocker_new(c: &mut Criterion) { .collect(); let brave_list_rules: Vec<_> = rules_from_lists(&["data/brave/brave-main-list.txt"]).collect(); let engine = Engine::from_rules(&brave_list_rules, Default::default()); - let engine_serialized = engine.serialize().unwrap(); + let engine_serialized = engine.serialize().to_vec(); group.bench_function("el+ep", move |b| b.iter(|| get_engine(&easylist_rules))); group.bench_function("brave-list", move |b| { diff --git a/benches/bench_serialization.rs b/benches/bench_serialization.rs index 8076eb5f..ff84d17a 100644 --- a/benches/bench_serialization.rs +++ b/benches/bench_serialization.rs @@ -18,19 +18,19 @@ fn serialization(c: &mut Criterion) { ]); let engine = Engine::from_rules(full_rules, Default::default()); - b.iter(|| assert!(!engine.serialize().unwrap().is_empty())) + b.iter(|| assert!(!engine.serialize().to_vec().is_empty())) }); group.bench_function("el", move |b| { let full_rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]); let engine = Engine::from_rules(full_rules, Default::default()); - b.iter(|| assert!(!engine.serialize().unwrap().is_empty())) + b.iter(|| assert!(!engine.serialize().to_vec().is_empty())) }); group.bench_function("slimlist", move |b| { let full_rules = rules_from_lists(&["data/slim-list.txt"]); let engine = Engine::from_rules(full_rules, Default::default()); - b.iter(|| assert!(!engine.serialize().unwrap().is_empty())) + b.iter(|| assert!(!engine.serialize().to_vec().is_empty())) }); group.finish(); @@ -48,7 +48,7 @@ fn deserialization(c: &mut Criterion) { ]); let engine = Engine::from_rules(full_rules, Default::default()); - let serialized = engine.serialize().unwrap(); + let serialized = engine.serialize().to_vec(); b.iter(|| { let mut deserialized = Engine::default(); @@ -59,7 +59,7 @@ fn deserialization(c: &mut Criterion) { let full_rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]); let engine = Engine::from_rules(full_rules, Default::default()); - let serialized = engine.serialize().unwrap(); + let serialized = engine.serialize().to_vec(); b.iter(|| { let mut deserialized = Engine::default(); @@ -70,7 +70,7 @@ fn deserialization(c: &mut Criterion) { let full_rules = rules_from_lists(&["data/slim-list.txt"]); let engine = Engine::from_rules(full_rules, Default::default()); - let serialized = engine.serialize().unwrap(); + let serialized = engine.serialize().to_vec(); b.iter(|| { let mut deserialized = Engine::default(); diff --git a/examples/generate-dat.rs b/examples/generate-dat.rs index 86845c5f..513db3f7 100644 --- a/examples/generate-dat.rs +++ b/examples/generate-dat.rs @@ -21,7 +21,7 @@ fn main() { ) .unwrap(); assert!(engine.check_network_request(&request).exception.is_some()); - let serialized = engine.serialize().expect("Could not serialize!"); + let serialized = engine.serialize().to_vec(); // Write to file let mut file = File::create("engine.dat").expect("Could not create serialization file"); diff --git a/js/src/lib.rs b/js/src/lib.rs index 7396162a..ee8a7add 100644 --- a/js/src/lib.rs +++ b/js/src/lib.rs @@ -248,7 +248,7 @@ fn engine_url_cosmetic_resources(mut cx: FunctionContext) -> JsResult { fn engine_serialize(mut cx: FunctionContext) -> JsResult { let this = cx.argument::>(0)?; let serialized = if let Ok(engine) = this.0.lock() { - engine.serialize().unwrap() + engine.serialize().to_vec() } else { cx.throw_error("Failed to acquire lock on engine")? }; diff --git a/src/blocker.rs b/src/blocker.rs index 46f21df6..d83861bf 100644 --- a/src/blocker.rs +++ b/src/blocker.rs @@ -6,8 +6,8 @@ use serde::Serialize; use std::collections::HashSet; use std::ops::DerefMut; -use crate::filters::fb_builder::NetworkFilterListId; -use crate::filters::fb_network::FilterDataContextRef; +use crate::filters::fb_network_builder::NetworkFilterListId; +use crate::filters::filter_data_context::FilterDataContextRef; use crate::filters::network::NetworkFilterMaskHelper; use crate::network_filter_list::NetworkFilterList; use crate::regex_manager::{RegexManager, RegexManagerDiscardPolicy}; @@ -440,11 +440,13 @@ impl Blocker { network_filters: Vec, options: &BlockerOptions, ) -> Self { - use crate::filters::{fb_builder::make_flatbuffer, fb_network::FilterDataContext}; + use crate::engine::Engine; + use crate::FilterSet; - let memory = make_flatbuffer(network_filters, options.enable_optimizations); - let filter_data_context = FilterDataContext::new(memory); - Self::from_context(filter_data_context) + let mut filter_set = FilterSet::new(true); + filter_set.network_filters = network_filters; + let engine = Engine::from_filter_set(filter_set, options.enable_optimizations); + Self::from_context(engine.filter_data_context()) } pub fn use_tags(&mut self, tags: &[&str]) { diff --git a/src/cosmetic_filter_cache.rs b/src/cosmetic_filter_cache.rs index aef3bff7..ca39a8b1 100644 --- a/src/cosmetic_filter_cache.rs +++ b/src/cosmetic_filter_cache.rs @@ -7,16 +7,23 @@ //! The primary API exposed by this module is the `CosmeticFilterCache` struct, which stores //! cosmetic filters and allows them to be queried efficiently at runtime for any which may be //! relevant to a particular page. +//! To build `CosmeticFilterCache`, use `CosmeticFilterCacheBuilder`. -use crate::filters::cosmetic::{ - CosmeticFilter, CosmeticFilterAction, CosmeticFilterMask, CosmeticFilterOperator, -}; +use crate::cosmetic_filter_utils::decode_script_with_permission; +#[cfg(test)] +use crate::filters::cosmetic::CosmeticFilter; +use crate::filters::cosmetic::{CosmeticFilterAction, CosmeticFilterOperator}; +use crate::filters::filter_data_context::FilterDataContextRef; + +use crate::flatbuffers::containers::flat_map::FlatMapView; +use crate::flatbuffers::containers::flat_multimap::{FlatMapStringView, FlatMultiMapView}; +use crate::flatbuffers::containers::flat_set::FlatSetView; use crate::resources::{PermissionMask, ResourceStorage}; + use crate::utils::Hash; use std::collections::{HashMap, HashSet}; -use memchr::memchr as find_char; use serde::{Deserialize, Serialize}; /// Contains cosmetic filter information intended to be used on a particular URL. @@ -63,106 +70,76 @@ impl UrlSpecificResources { /// will be blocked on any particular page, although when used correctly, all provided rules and /// scriptlets should be safe to apply. pub(crate) struct CosmeticFilterCache { - /// Rules that are just the CSS class of an element to be hidden on all sites, e.g. `##.ad`. - pub(crate) simple_class_rules: HashSet, - /// Rules that are just the CSS id of an element to be hidden on all sites, e.g. `###banner`. - pub(crate) simple_id_rules: HashSet, - /// Rules that are the CSS selector of an element to be hidden on all sites, starting with a - /// class, e.g. `##.ad image`. - pub(crate) complex_class_rules: HashMap>, - /// Rules that are the CSS selector of an element to be hidden on all sites, starting with an - /// id, e.g. `###banner > .text a`. - pub(crate) complex_id_rules: HashMap>, - - pub(crate) specific_rules: HostnameRuleDb, - - /// Rules that are the CSS selector of an element to be hidden on all sites that do not fit - /// into any of the class or id buckets above, e.g. `##a[href="https://malware.com"]` - pub(crate) misc_generic_selectors: HashSet, + filter_data_context: FilterDataContextRef, } -impl CosmeticFilterCache { - pub fn new() -> Self { - Self { - simple_class_rules: HashSet::new(), - simple_id_rules: HashSet::new(), - complex_class_rules: HashMap::new(), - complex_id_rules: HashMap::new(), - - specific_rules: HostnameRuleDb::default(), +/// Representations of filters with complex behavior that relies on in-page JS logic. +/// +/// These get stored in-memory as JSON and should be deserialized/acted on by a content script. +/// JSON is pragmatic here since there are relatively fewer of these type of rules, and they will +/// be handled by in-page JS anyways. +#[derive(Deserialize, Serialize, Clone)] +pub struct ProceduralOrActionFilter { + /// A selector for elements that this filter applies to. + /// This may be a plain CSS selector, or it can consist of multiple procedural operators. + pub selector: Vec, + /// An action to apply to matching elements. + /// If no action is present, the filter assumes default behavior of hiding the element with + /// a style of `display: none !important`. + #[serde(skip_serializing_if = "Option::is_none")] + pub action: Option, +} - misc_generic_selectors: HashSet::new(), +impl ProceduralOrActionFilter { + /// Returns `(selector, style)` if the filter can be expressed in pure CSS. + pub fn as_css(&self) -> Option<(String, String)> { + match (&self.selector[..], &self.action) { + ([CosmeticFilterOperator::CssSelector(selector)], None) => { + Some((selector.to_string(), "display: none !important".to_string())) + } + ( + [CosmeticFilterOperator::CssSelector(selector)], + Some(CosmeticFilterAction::Style(style)), + ) => Some((selector.to_string(), style.to_string())), + _ => None, } } - pub fn from_rules(rules: Vec) -> Self { - let mut self_ = Self { - simple_class_rules: HashSet::with_capacity(rules.len() / 2), - simple_id_rules: HashSet::with_capacity(rules.len() / 2), - complex_class_rules: HashMap::with_capacity(rules.len() / 2), - complex_id_rules: HashMap::with_capacity(rules.len() / 2), - - specific_rules: HostnameRuleDb::default(), - - misc_generic_selectors: HashSet::with_capacity(rules.len() / 30), - }; - - for rule in rules { - self_.add_filter(rule) + /// Convenience constructor for pure CSS style filters. + #[cfg(test)] + pub(crate) fn from_css(selector: String, style: String) -> Self { + Self { + selector: vec![CosmeticFilterOperator::CssSelector(selector)], + action: Some(CosmeticFilterAction::Style(style)), } - - self_ } +} - pub fn add_filter(&mut self, rule: CosmeticFilter) { - if rule.has_hostname_constraint() { - if let Some(generic_rule) = rule.hidden_generic_rule() { - self.add_generic_filter(generic_rule); - } - self.specific_rules.store_rule(rule); - } else { - self.add_generic_filter(rule); +fn hostname_domain_hashes(hostname: &str, domain: &str) -> (Vec, Vec) { + let request_entities = + crate::filters::cosmetic::get_entity_hashes_from_labels(hostname, domain); + let request_hostnames = + crate::filters::cosmetic::get_hostname_hashes_from_labels(hostname, domain); + + (request_entities, request_hostnames) +} + +impl CosmeticFilterCache { + pub fn from_context(filter_data_context: FilterDataContextRef) -> Self { + Self { + filter_data_context, } } - /// Add a filter, assuming it has already been determined to be a generic rule - fn add_generic_filter(&mut self, rule: CosmeticFilter) { - let selector = match rule.plain_css_selector() { - Some(s) => s.to_string(), - None => { - // Procedural cosmetic filters cannot be generic. - // Silently ignoring this filter. - return; - } - }; + #[cfg(test)] + pub fn from_rules(rules: Vec) -> Self { + use crate::engine::Engine; + use crate::FilterSet; - if selector.starts_with('.') { - if let Some(key) = key_from_selector(&selector) { - assert!(key.starts_with('.')); - let class = key[1..].to_string(); - if key == selector { - self.simple_class_rules.insert(class); - } else if let Some(bucket) = self.complex_class_rules.get_mut(&class) { - bucket.push(selector); - } else { - self.complex_class_rules.insert(class, vec![selector]); - } - } - } else if selector.starts_with('#') { - if let Some(key) = key_from_selector(&selector) { - assert!(key.starts_with('#')); - let id = key[1..].to_string(); - if key == selector { - self.simple_id_rules.insert(id); - } else if let Some(bucket) = self.complex_id_rules.get_mut(&id) { - bucket.push(selector); - } else { - self.complex_id_rules.insert(id, vec![selector]); - } - } - } else { - self.misc_generic_selectors.insert(selector); - } + let mut filter_set = FilterSet::new(true); + filter_set.cosmetic_filters = rules; + let engine = Engine::from_filter_set(filter_set, true); + engine.cosmetic_cache() } /// Generic class/id rules are by far the most common type of cosmetic filtering rule, and they @@ -191,34 +168,42 @@ impl CosmeticFilterCache { ) -> Vec { let mut selectors = vec![]; + let cosmetic_filters = self.filter_data_context.memory.root().cosmetic_filters(); + let simple_class_rules = FlatSetView::new(cosmetic_filters.simple_class_rules()); + let simple_id_rules = FlatSetView::new(cosmetic_filters.simple_id_rules()); + let complex_class_rules = FlatMapStringView::new( + cosmetic_filters.complex_class_rules_index(), + cosmetic_filters.complex_class_rules_values(), + ); + let complex_id_rules = FlatMapStringView::new( + cosmetic_filters.complex_id_rules_index(), + cosmetic_filters.complex_id_rules_values(), + ); + classes.into_iter().for_each(|class| { let class = class.as_ref(); - if self.simple_class_rules.contains(class) - && !exceptions.contains(&format!(".{}", class)) - { + if simple_class_rules.contains(class) && !exceptions.contains(&format!(".{}", class)) { selectors.push(format!(".{}", class)); } - if let Some(bucket) = self.complex_class_rules.get(class) { - selectors.extend( - bucket - .iter() - .filter(|sel| !exceptions.contains(*sel)) - .map(|s| s.to_owned()), - ); + if let Some(bucket) = complex_class_rules.get(class) { + for (_, sel) in bucket { + if !exceptions.contains(sel) { + selectors.push(sel.to_string()); + } + } } }); ids.into_iter().for_each(|id| { let id = id.as_ref(); - if self.simple_id_rules.contains(id) && !exceptions.contains(&format!("#{}", id)) { + if simple_id_rules.contains(id) && !exceptions.contains(&format!("#{}", id)) { selectors.push(format!("#{}", id)); } - if let Some(bucket) = self.complex_id_rules.get(id) { - selectors.extend( - bucket - .iter() - .filter(|sel| !exceptions.contains(*sel)) - .map(|s| s.to_owned()), - ); + if let Some(bucket) = complex_id_rules.get(id) { + for (_, sel) in bucket { + if !exceptions.contains(sel) { + selectors.push(sel.to_string()); + } + } } }); @@ -258,75 +243,82 @@ impl CosmeticFilterCache { .chain(request_hostnames.iter()) .collect(); - fn populate_set( - hash: &Hash, - source_bin: &HostnameFilterBin, - dest_set: &mut HashSet, - ) { - if let Some(s) = source_bin.get(hash) { - s.iter().for_each(|s| { - dest_set.insert(s.to_owned()); - }); - } - } + let cosmetic_filters = self.filter_data_context.memory.root().cosmetic_filters(); + let hostname_rules_view = FlatMapView::new( + cosmetic_filters.hostname_index(), + cosmetic_filters.hostname_values(), + ); + let hostname_hide_view = FlatMultiMapView::new( + cosmetic_filters.hostname_hide_index(), + cosmetic_filters.hostname_hide_values(), + ); + let hostname_inject_script_view = FlatMultiMapView::new( + cosmetic_filters.hostname_inject_script_index(), + cosmetic_filters.hostname_inject_script_values(), + ); + for hash in hashes.iter() { - populate_set( - hash, - &self.specific_rules.hide, - &mut specific_hide_selectors, - ); - populate_set( - hash, - &self.specific_rules.procedural_action, - &mut procedural_actions, - ); - // special behavior: `script_injections` doesn't have to own the strings yet, since the - // scripts need to be fetched and templated later - if let Some(s) = self.specific_rules.inject_script.get(hash) { - s.iter().for_each(|(s, mask)| { + // Handle top-level hide selectors + if let Some(hide_iterator) = hostname_hide_view.get(**hash) { + for (_, hide_selector) in hide_iterator { + if !exceptions.contains(hide_selector) { + specific_hide_selectors.insert(hide_selector.to_owned()); + } + } + } + + // Handle top-level inject scripts with encoded permissions + if let Some(script_iterator) = hostname_inject_script_view.get(**hash) { + for (_, encoded_script) in script_iterator { + let (permission, script) = decode_script_with_permission(encoded_script); script_injections - .entry(s) - .and_modify(|entry| *entry |= *mask) - .or_insert(*mask); - }); + .entry(script) + .and_modify(|entry| *entry |= permission) + .or_insert(permission); + } } - } - fn prune_set( - hash: &Hash, - source_bin: &HostnameFilterBin, - dest_set: &mut HashSet, - ) { - if let Some(s) = source_bin.get(hash) { - s.iter().for_each(|s| { - dest_set.remove(s); - }); + // Handle remaining rule types from HostnameSpecificRules + if let Some(hostname_rules) = hostname_rules_view.get(**hash) { + // Process procedural actions + if let Some(procedural_actions_rules) = hostname_rules.procedural_action() { + for action in procedural_actions_rules.iter() { + procedural_actions.insert(action.to_owned()); + } + } } } + + // Process unhide/exception filters for hash in hashes.iter() { - // special behavior: unhide rules need to go in `exceptions` as well - if let Some(s) = self.specific_rules.unhide.get(hash) { - s.iter().for_each(|s| { - specific_hide_selectors.remove(s); - exceptions.insert(s.to_owned()); - }); - } - prune_set( - hash, - &self.specific_rules.procedural_action_exception, - &mut procedural_actions, - ); - // same logic but not using prune_set since strings are unowned, (see above) - if let Some(s) = self.specific_rules.uninject_script.get(hash) { - for s in s { - if s.is_empty() { - except_all_scripts = true; - script_injections.clear(); + if let Some(hostname_rules) = hostname_rules_view.get(**hash) { + // Process unhide selectors (special behavior: they also go in exceptions) + if let Some(unhide_rules) = hostname_rules.unhide() { + for selector in unhide_rules.iter() { + specific_hide_selectors.remove(selector); + exceptions.insert(selector.to_owned()); + } + } + + // Process procedural action exceptions + if let Some(procedural_exceptions) = hostname_rules.procedural_action_exception() { + for action in procedural_exceptions.iter() { + procedural_actions.remove(action); } - if except_all_scripts { - continue; + } + + // Process script uninjects + if let Some(uninject_scripts) = hostname_rules.uninject_script() { + for script in uninject_scripts.iter() { + if script.is_empty() { + except_all_scripts = true; + script_injections.clear(); + } + if except_all_scripts { + continue; + } + script_injections.remove(script); } - script_injections.remove(s.as_str()); } } } @@ -334,11 +326,16 @@ impl CosmeticFilterCache { let hide_selectors = if generichide { specific_hide_selectors } else { - let mut hide_selectors = self - .misc_generic_selectors - .difference(&exceptions) - .cloned() - .collect::>(); + let cosmetic_filters = self.filter_data_context.memory.root().cosmetic_filters(); + let misc_generic_selectors_vector = cosmetic_filters.misc_generic_selectors(); + + // Calculate the intersection of the two sets, O(n * log m) time + let mut hide_selectors = HashSet::new(); + for selector in misc_generic_selectors_vector.iter() { + if !exceptions.contains(selector) { + hide_selectors.insert(selector.to_string()); + } + } specific_hide_selectors.into_iter().for_each(|sel| { hide_selectors.insert(sel); }); @@ -357,257 +354,6 @@ impl CosmeticFilterCache { } } -/// Each hostname-specific filter can be pointed to by several different hostnames, and each -/// hostname can correspond to several different filters. To effectively store and access those -/// filters by hostname, all the non-hostname information for filters is stored in per-hostname -/// "buckets" within a Vec, and each bucket is identified by its index. Hostname hashes are used as -/// keys to get the indices of relevant buckets, which are in turn used to retrieve all the filters -/// that apply. -#[derive(Default)] -pub(crate) struct HostnameFilterBin(pub HashMap>); - -impl HostnameFilterBin { - pub fn insert(&mut self, token: &Hash, filter: T) { - if let Some(bucket) = self.0.get_mut(token) { - bucket.push(filter); - } else { - self.0.insert(*token, vec![filter]); - } - } - - fn get(&self, token: &Hash) -> Option<&Vec> { - self.0.get(token) - } -} - -impl HostnameFilterBin { - /// Convenience method that serializes to JSON - pub fn insert_procedural_action_filter(&mut self, token: &Hash, f: &ProceduralOrActionFilter) { - self.insert(token, serde_json::to_string(f).unwrap()); - } -} - -/// Holds filter bins categorized by filter type. -#[derive(Default)] -pub(crate) struct HostnameRuleDb { - /// Simple hostname-specific hide rules, e.g. `example.com##.ad`. - /// - /// The parameter is the rule's CSS selector. - pub hide: HostnameFilterBin, - /// Simple hostname-specific hide exception rules, e.g. `example.com#@#.ad`. - /// - /// The parameter is the rule's CSS selector. - pub unhide: HostnameFilterBin, - /// Hostname-specific rules with a scriptlet to inject along with any arguments, e.g. - /// `example.com##+js(acis, Number.isNan)`. - /// - /// The parameter is the contents of the `+js(...)` syntax construct. - pub inject_script: HostnameFilterBin<(String, PermissionMask)>, - /// Hostname-specific rules to except a scriptlet to inject along with any arguments, e.g. - /// `example.com#@#+js(acis, Number.isNan)`. - /// - /// The parameter is the contents of the `+js(...)` syntax construct. - /// - /// In practice, these rules are extremely rare in filter lists. - pub uninject_script: HostnameFilterBin, - /// Procedural filters and/or filters with a [`CosmeticFilterAction`]. - /// - /// Each is a [`ProceduralOrActionFilter`] struct serialized as JSON. - pub procedural_action: HostnameFilterBin, - /// Exceptions for procedural filters and/or filters with a [`CosmeticFilterAction`]. - /// - /// Each is a [`ProceduralOrActionFilter`] struct serialized as JSON. - pub procedural_action_exception: HostnameFilterBin, -} - -/// Representations of filters with complex behavior that relies on in-page JS logic. -/// -/// These get stored in-memory as JSON and should be deserialized/acted on by a content script. -/// JSON is pragmatic here since there are relatively fewer of these type of rules, and they will -/// be handled by in-page JS anyways. -#[derive(Deserialize, Serialize, Clone)] -pub struct ProceduralOrActionFilter { - /// A selector for elements that this filter applies to. - /// This may be a plain CSS selector, or it can consist of multiple procedural operators. - pub selector: Vec, - /// An action to apply to matching elements. - /// If no action is present, the filter assumes default behavior of hiding the element with - /// a style of `display: none !important`. - #[serde(skip_serializing_if = "Option::is_none")] - pub action: Option, -} - -impl ProceduralOrActionFilter { - /// Returns `(selector, style)` if the filter can be expressed in pure CSS. - pub fn as_css(&self) -> Option<(String, String)> { - match (&self.selector[..], &self.action) { - ([CosmeticFilterOperator::CssSelector(selector)], None) => { - Some((selector.to_string(), "display: none !important".to_string())) - } - ( - [CosmeticFilterOperator::CssSelector(selector)], - Some(CosmeticFilterAction::Style(style)), - ) => Some((selector.to_string(), style.to_string())), - _ => None, - } - } - - /// Convenience constructor for pure CSS style filters. - pub(crate) fn from_css(selector: String, style: String) -> Self { - Self { - selector: vec![CosmeticFilterOperator::CssSelector(selector)], - action: Some(CosmeticFilterAction::Style(style)), - } - } -} - -impl HostnameRuleDb { - pub fn store_rule(&mut self, rule: CosmeticFilter) { - use SpecificFilterType::*; - - let unhide = rule.mask.contains(CosmeticFilterMask::UNHIDE); - let script_inject = rule.mask.contains(CosmeticFilterMask::SCRIPT_INJECT); - - let kind = match ( - script_inject, - rule.plain_css_selector().map(|s| s.to_string()), - rule.action, - ) { - (false, Some(selector), None) => Hide(selector), - (true, Some(selector), None) => InjectScript((selector, rule.permission)), - (false, selector, action) => ProceduralOrAction( - serde_json::to_string(&ProceduralOrActionFilter { - selector: selector - .map(|selector| vec![CosmeticFilterOperator::CssSelector(selector)]) - .unwrap_or(rule.selector), - action, - }) - .unwrap(), - ), - (true, _, Some(_)) => return, // script injection with action - shouldn't be possible - (true, None, _) => return, // script injection without plain CSS selector - shouldn't be possible - }; - - let kind = if unhide { kind.negated() } else { kind }; - - let tokens_to_insert = std::iter::empty() - .chain(rule.hostnames.unwrap_or_default()) - .chain(rule.entities.unwrap_or_default()); - - tokens_to_insert.for_each(|t| self.store(&t, kind.clone())); - - let tokens_to_insert_negated = std::iter::empty() - .chain(rule.not_hostnames.unwrap_or_default()) - .chain(rule.not_entities.unwrap_or_default()); - - let negated = kind.negated(); - - tokens_to_insert_negated.for_each(|t| self.store(&t, negated.clone())); - } - - fn store(&mut self, token: &Hash, kind: SpecificFilterType) { - use SpecificFilterType::*; - - match kind { - Hide(s) => self.hide.insert(token, s), - Unhide(s) => self.unhide.insert(token, s), - InjectScript(s) => self.inject_script.insert(token, s), - UninjectScript((s, _)) => self.uninject_script.insert(token, s), - ProceduralOrAction(s) => self.procedural_action.insert(token, s), - ProceduralOrActionException(s) => self.procedural_action_exception.insert(token, s), - } - } -} - -/// Exists to use common logic for binning filters correctly -#[derive(Clone)] -enum SpecificFilterType { - Hide(String), - Unhide(String), - InjectScript((String, PermissionMask)), - UninjectScript((String, PermissionMask)), - ProceduralOrAction(String), - ProceduralOrActionException(String), -} - -impl SpecificFilterType { - fn negated(self) -> Self { - match self { - Self::Hide(s) => Self::Unhide(s), - Self::Unhide(s) => Self::Hide(s), - Self::InjectScript(s) => Self::UninjectScript(s), - Self::UninjectScript(s) => Self::InjectScript(s), - Self::ProceduralOrAction(s) => Self::ProceduralOrActionException(s), - Self::ProceduralOrActionException(s) => Self::ProceduralOrAction(s), - } - } -} - -fn hostname_domain_hashes(hostname: &str, domain: &str) -> (Vec, Vec) { - let request_entities = - crate::filters::cosmetic::get_entity_hashes_from_labels(hostname, domain); - let request_hostnames = - crate::filters::cosmetic::get_hostname_hashes_from_labels(hostname, domain); - - (request_entities, request_hostnames) -} - -/// Returns the first token of a CSS selector. -/// -/// This should only be called once `selector` has been verified to start with either a "#" or "." -/// character. -fn key_from_selector(selector: &str) -> Option { - use once_cell::sync::Lazy; - use regex::Regex; - - static RE_PLAIN_SELECTOR: Lazy = Lazy::new(|| Regex::new(r"^[#.][\w\\-]+").unwrap()); - static RE_PLAIN_SELECTOR_ESCAPED: Lazy = - Lazy::new(|| Regex::new(r"^[#.](?:\\[0-9A-Fa-f]+ |\\.|\w|-)+").unwrap()); - static RE_ESCAPE_SEQUENCE: Lazy = - Lazy::new(|| Regex::new(r"\\([0-9A-Fa-f]+ |.)").unwrap()); - - // If there are no escape characters in the selector, just take the first class or id token. - let mat = RE_PLAIN_SELECTOR.find(selector); - if let Some(location) = mat { - let key = &location.as_str(); - if find_char(b'\\', key.as_bytes()).is_none() { - return Some((*key).into()); - } - } else { - return None; - } - - // Otherwise, the characters in the selector must be escaped. - let mat = RE_PLAIN_SELECTOR_ESCAPED.find(selector); - if let Some(location) = mat { - let mut key = String::with_capacity(selector.len()); - let escaped = &location.as_str(); - let mut beginning = 0; - let mat = RE_ESCAPE_SEQUENCE.captures_iter(escaped); - for capture in mat { - // Unwrap is safe because the 0th capture group is the match itself - let location = capture.get(0).unwrap(); - key += &escaped[beginning..location.start()]; - beginning = location.end(); - // Unwrap is safe because there is a capture group specified in the regex - let capture = capture.get(1).unwrap().as_str(); - if capture.chars().count() == 1 { - // Check number of unicode characters rather than byte length - key += capture; - } else { - // This u32 conversion can overflow - let codepoint = u32::from_str_radix(&capture[..capture.len() - 1], 16).ok()?; - - // Not all u32s are valid Unicode codepoints - key += &core::char::from_u32(codepoint)?.to_string(); - } - } - Some(key + &escaped[beginning..]) - } else { - None - } -} - #[cfg(test)] #[path = "../tests/unit/cosmetic_filter_cache.rs"] mod unit_tests; diff --git a/src/cosmetic_filter_cache_builder.rs b/src/cosmetic_filter_cache_builder.rs new file mode 100644 index 00000000..2673107f --- /dev/null +++ b/src/cosmetic_filter_cache_builder.rs @@ -0,0 +1,247 @@ +//! Provides API to prepare and serialize cosmetic filter rules to a flatbuffer. +//! To build the struct, use `CosmeticFilterCacheBuilder`. +//! To use the serialized rules, use `CosmeticFilterCache`. + +use crate::cosmetic_filter_cache::ProceduralOrActionFilter; +use crate::cosmetic_filter_utils::SpecificFilterType; +use crate::cosmetic_filter_utils::{encode_script_with_permission, key_from_selector}; +use crate::filters::cosmetic::{CosmeticFilter, CosmeticFilterMask, CosmeticFilterOperator}; +use crate::filters::flatbuffer_generated::fb; +use crate::flatbuffers::containers::flat_map::FlatMapBuilder; +use crate::flatbuffers::containers::flat_multimap::FlatMultiMapBuilder; + +use crate::flatbuffers::containers::flat_serialize::{ + serialize_vec_opt, FlatBuilder, FlatSerialize, +}; + +use crate::utils::Hash; + +use std::collections::{HashMap, HashSet}; + +use flatbuffers::WIPOffset; + +/// Accumulates hostname-specific rules for a single domain before building HostnameSpecificRules +/// Note: hide and inject_script are now handled separately at the top level +/// See HostnameSpecificRules declaration for more details. +#[derive(Default)] +struct HostnameRule { + unhide: Vec, + uninject_script: Vec, + procedural_action: Vec, + procedural_action_exception: Vec, +} + +impl<'a, B: FlatBuilder<'a>> FlatSerialize<'a, B> for HostnameRule { + type Output = WIPOffset>; + + fn serialize( + value: Self, + builder: &mut B, + ) -> flatbuffers::WIPOffset> { + let unhide = serialize_vec_opt(value.unhide, builder); + let uninject_script = serialize_vec_opt(value.uninject_script, builder); + let procedural_action = serialize_vec_opt(value.procedural_action, builder); + let procedural_action_exception = + serialize_vec_opt(value.procedural_action_exception, builder); + + fb::HostnameSpecificRules::create( + builder.raw_builder(), + &fb::HostnameSpecificRulesArgs { + unhide, + uninject_script, + procedural_action, + procedural_action_exception, + }, + ) + } +} + +#[derive(Default)] +pub(crate) struct CosmeticFilterCacheBuilder { + simple_class_rules: HashSet, + simple_id_rules: HashSet, + misc_generic_selectors: HashSet, + complex_class_rules: FlatMultiMapBuilder, + complex_id_rules: FlatMultiMapBuilder, + + hostname_hide: FlatMultiMapBuilder, + hostname_inject_script: FlatMultiMapBuilder, + + specific_rules: HashMap, +} + +impl CosmeticFilterCacheBuilder { + pub fn from_rules(rules: Vec) -> Self { + let mut self_ = Self::default(); + + for rule in rules { + self_.add_filter(rule) + } + + self_ + } + + pub fn add_filter(&mut self, rule: CosmeticFilter) { + if rule.has_hostname_constraint() { + if let Some(generic_rule) = rule.hidden_generic_rule() { + self.add_generic_filter(generic_rule); + } + self.store_hostname_rule(rule); + } else { + self.add_generic_filter(rule); + } + } + + /// Add a filter, assuming it has already been determined to be a generic rule + fn add_generic_filter(&mut self, rule: CosmeticFilter) { + let selector = match rule.plain_css_selector() { + Some(s) => s.to_string(), + None => { + // Procedural cosmetic filters cannot be generic. + // Silently ignoring this filter. + return; + } + }; + + if selector.starts_with('.') { + if let Some(key) = key_from_selector(&selector) { + assert!(key.starts_with('.')); + let class = key[1..].to_string(); + if key == selector { + self.simple_class_rules.insert(class); + } else { + self.complex_class_rules.insert(class, selector); + } + } + } else if selector.starts_with('#') { + if let Some(key) = key_from_selector(&selector) { + assert!(key.starts_with('#')); + let id = key[1..].to_string(); + if key == selector { + self.simple_id_rules.insert(id); + } else { + self.complex_id_rules.insert(id, selector); + } + } + } else { + self.misc_generic_selectors.insert(selector); + } + } + + fn store_hostname_rule(&mut self, rule: CosmeticFilter) { + use SpecificFilterType::*; + + let unhide = rule.mask.contains(CosmeticFilterMask::UNHIDE); + let script_inject = rule.mask.contains(CosmeticFilterMask::SCRIPT_INJECT); + + let kind = match ( + script_inject, + rule.plain_css_selector().map(|s| s.to_string()), + rule.action, + ) { + (false, Some(selector), None) => Hide(selector), + (true, Some(selector), None) => InjectScript((selector, rule.permission)), + (false, selector, action) => ProceduralOrAction( + serde_json::to_string(&ProceduralOrActionFilter { + selector: selector + .map(|selector| vec![CosmeticFilterOperator::CssSelector(selector)]) + .unwrap_or(rule.selector), + action, + }) + .unwrap(), + ), + (true, _, Some(_)) => return, // script injection with action - shouldn't be possible + (true, None, _) => return, // script injection without plain CSS selector - shouldn't be possible + }; + + let kind = if unhide { kind.negated() } else { kind }; + + let tokens_to_insert = std::iter::empty() + .chain(rule.hostnames.unwrap_or_default()) + .chain(rule.entities.unwrap_or_default()); + + tokens_to_insert.for_each(|t| self.store_hostname_filter(&t, kind.clone())); + + let tokens_to_insert_negated = std::iter::empty() + .chain(rule.not_hostnames.unwrap_or_default()) + .chain(rule.not_entities.unwrap_or_default()); + + let negated = kind.negated(); + + tokens_to_insert_negated.for_each(|t| self.store_hostname_filter(&t, negated.clone())); + } + + fn store_hostname_filter(&mut self, token: &Hash, kind: SpecificFilterType) { + use SpecificFilterType::*; + + match kind { + // Handle hide and inject_script at top level for better deduplication + Hide(s) => { + self.hostname_hide.insert(*token, s); + } + InjectScript((s, permission)) => { + let encoded_script = encode_script_with_permission(s, permission); + self.hostname_inject_script.insert(*token, encoded_script); + } + // Handle remaining types through HostnameRule + Unhide(s) => { + let entry = self.specific_rules.entry(*token).or_default(); + entry.unhide.push(s); + } + UninjectScript((s, _)) => { + let entry = self.specific_rules.entry(*token).or_default(); + entry.uninject_script.push(s); + } + ProceduralOrAction(s) => { + let entry = self.specific_rules.entry(*token).or_default(); + entry.procedural_action.push(s); + } + ProceduralOrActionException(s) => { + let entry = self.specific_rules.entry(*token).or_default(); + entry.procedural_action_exception.push(s); + } + } + } +} + +impl<'a, B: FlatBuilder<'a>> FlatSerialize<'a, B> for CosmeticFilterCacheBuilder { + type Output = WIPOffset>; + fn serialize(value: Self, builder: &mut B) -> WIPOffset> { + let complex_class_rules = FlatMultiMapBuilder::finish(value.complex_class_rules, builder); + let complex_id_rules = FlatMultiMapBuilder::finish(value.complex_id_rules, builder); + + // Handle top-level hostname hide and inject_script for better deduplication + let hostname_hide = FlatMultiMapBuilder::finish(value.hostname_hide, builder); + let hostname_inject_script = + FlatMultiMapBuilder::finish(value.hostname_inject_script, builder); + + // Handle remaining rule types through HostnameSpecificRules + let hostname_specific_rules = FlatMapBuilder::finish(value.specific_rules, builder); + + let simple_class_rules = Some(FlatSerialize::serialize(value.simple_class_rules, builder)); + let simple_id_rules = Some(FlatSerialize::serialize(value.simple_id_rules, builder)); + let misc_generic_selectors = Some(FlatSerialize::serialize( + value.misc_generic_selectors, + builder, + )); + + fb::CosmeticFilters::create( + builder.raw_builder(), + &fb::CosmeticFiltersArgs { + simple_class_rules, + simple_id_rules, + misc_generic_selectors, + complex_class_rules_index: Some(complex_class_rules.keys), + complex_class_rules_values: Some(complex_class_rules.values), + complex_id_rules_index: Some(complex_id_rules.keys), + complex_id_rules_values: Some(complex_id_rules.values), + hostname_hide_index: Some(hostname_hide.keys), + hostname_hide_values: Some(hostname_hide.values), + hostname_inject_script_index: Some(hostname_inject_script.keys), + hostname_inject_script_values: Some(hostname_inject_script.values), + hostname_index: Some(hostname_specific_rules.keys), + hostname_values: Some(hostname_specific_rules.values), + }, + ) + } +} diff --git a/src/cosmetic_filter_utils.rs b/src/cosmetic_filter_utils.rs new file mode 100644 index 00000000..8df17b0e --- /dev/null +++ b/src/cosmetic_filter_utils.rs @@ -0,0 +1,109 @@ +//! Some utility functions for manipulating cosmetic filter rules. +//! Used by `CosmeticFilterCacheBuilder` and `CosmeticFilterCache`. + +use crate::resources::PermissionMask; +use memchr::memchr as find_char; + +/// Returns the first token of a CSS selector. +/// +/// This should only be called once `selector` has been verified to start with either a "#" or "." +/// character. +pub(crate) fn key_from_selector(selector: &str) -> Option { + use once_cell::sync::Lazy; + use regex::Regex; + + static RE_PLAIN_SELECTOR: Lazy = Lazy::new(|| Regex::new(r"^[#.][\w\\-]+").unwrap()); + static RE_PLAIN_SELECTOR_ESCAPED: Lazy = + Lazy::new(|| Regex::new(r"^[#.](?:\\[0-9A-Fa-f]+ |\\.|\w|-)+").unwrap()); + static RE_ESCAPE_SEQUENCE: Lazy = + Lazy::new(|| Regex::new(r"\\([0-9A-Fa-f]+ |.)").unwrap()); + + // If there are no escape characters in the selector, just take the first class or id token. + let mat = RE_PLAIN_SELECTOR.find(selector); + if let Some(location) = mat { + let key = &location.as_str(); + if find_char(b'\\', key.as_bytes()).is_none() { + return Some((*key).into()); + } + } else { + return None; + } + + // Otherwise, the characters in the selector must be escaped. + let mat = RE_PLAIN_SELECTOR_ESCAPED.find(selector); + if let Some(location) = mat { + let mut key = String::with_capacity(selector.len()); + let escaped = &location.as_str(); + let mut beginning = 0; + let mat = RE_ESCAPE_SEQUENCE.captures_iter(escaped); + for capture in mat { + // Unwrap is safe because the 0th capture group is the match itself + let location = capture.get(0).unwrap(); + key += &escaped[beginning..location.start()]; + beginning = location.end(); + // Unwrap is safe because there is a capture group specified in the regex + let capture = capture.get(1).unwrap().as_str(); + if capture.chars().count() == 1 { + // Check number of unicode characters rather than byte length + key += capture; + } else { + // This u32 conversion can overflow + let codepoint = u32::from_str_radix(&capture[..capture.len() - 1], 16).ok()?; + + // Not all u32s are valid Unicode codepoints + key += &core::char::from_u32(codepoint)?.to_string(); + } + } + Some(key + &escaped[beginning..]) + } else { + None + } +} + +/// Exists to use common logic for binning filters correctly +#[derive(Clone)] +pub(crate) enum SpecificFilterType { + Hide(String), + Unhide(String), + InjectScript((String, PermissionMask)), + UninjectScript((String, PermissionMask)), + ProceduralOrAction(String), + ProceduralOrActionException(String), +} + +impl SpecificFilterType { + pub(crate) fn negated(self) -> Self { + match self { + Self::Hide(s) => Self::Unhide(s), + Self::Unhide(s) => Self::Hide(s), + Self::InjectScript(s) => Self::UninjectScript(s), + Self::UninjectScript(s) => Self::InjectScript(s), + Self::ProceduralOrAction(s) => Self::ProceduralOrActionException(s), + Self::ProceduralOrActionException(s) => Self::ProceduralOrAction(s), + } + } +} + +/// Encodes permission bits in the last byte of a script string +/// Returns the script with permission byte prepended +pub(crate) fn encode_script_with_permission( + mut script: String, + permission: PermissionMask, +) -> String { + script.push(permission.to_bits() as char); + script +} + +/// Decodes permission bits from the last byte of a script string +/// Returns (permission, script) tuple +pub(crate) fn decode_script_with_permission(encoded_script: &str) -> (PermissionMask, &str) { + if encoded_script.is_empty() { + return (PermissionMask::default(), encoded_script); + } + + let last_char = encoded_script.chars().last().unwrap(); + let permission_bits = last_char as u8; + let permission = PermissionMask::from_bits(permission_bits); + let script = &encoded_script[..encoded_script.len() - 1]; + (permission, script) +} diff --git a/src/data_format/mod.rs b/src/data_format/mod.rs index 0f8f5048..98b980e0 100644 --- a/src/data_format/mod.rs +++ b/src/data_format/mod.rs @@ -4,92 +4,67 @@ //! In order to support multiple format versions simultaneously, this module wraps around different //! serialization/deserialization implementations and can automatically dispatch to the appropriate //! one. - -mod storage; - -pub(crate) mod utils; - -use crate::cosmetic_filter_cache::CosmeticFilterCache; -use crate::flatbuffers::unsafe_tools::VerifiedFlatbufferMemory; -use crate::network_filter_list::NetworkFilterListParsingError; +//! +//! The current .dat file format: +//! 1. magic (4 bytes) +//! 2. version (1 byte) +//! 3. seahash of the data (8 bytes) +//! 4. data (the rest of the file) /// Newer formats start with this magic byte sequence. /// Calculated as the leading 4 bytes of `echo -n 'brave/adblock-rust' | sha512sum`. const ADBLOCK_RUST_DAT_MAGIC: [u8; 4] = [0xd1, 0xd9, 0x3a, 0xaf]; -const ADBLOCK_RUST_DAT_VERSION: u8 = 1; -#[derive(Debug)] -pub enum SerializationError { - RmpSerdeError(rmp_serde::encode::Error), -} +/// The version of the data format. +/// If the data format version is incremented, the data is considered as incompatible. +const ADBLOCK_FLATBUFFER_VERSION: u8 = 2; -impl From for SerializationError { - fn from(e: rmp_serde::encode::Error) -> Self { - Self::RmpSerdeError(e) - } -} +/// The total length of the header prefix (magic + version + seahash) +const HEADER_PREFIX_LENGTH: usize = 4 + 1 + 8; -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum DeserializationError { - RmpSerdeError(rmp_serde::decode::Error), - UnsupportedFormatVersion(u8), - NoHeaderFound, + BadHeader, + BadChecksum, + VersionMismatch(u8), FlatBufferParsingError(flatbuffers::InvalidFlatbuffer), ValidationError, } -impl From for DeserializationError { - fn from(x: std::convert::Infallible) -> Self { - match x {} - } -} +pub(crate) fn serialize_dat_file(data: &[u8]) -> Vec { + let mut serialized = Vec::with_capacity(data.len() + HEADER_PREFIX_LENGTH); + let hash = seahash::hash(data).to_le_bytes(); + serialized.extend_from_slice(&ADBLOCK_RUST_DAT_MAGIC); + serialized.push(ADBLOCK_FLATBUFFER_VERSION); + serialized.extend_from_slice(&hash); + assert_eq!(serialized.len(), HEADER_PREFIX_LENGTH); -impl From for DeserializationError { - fn from(e: rmp_serde::decode::Error) -> Self { - Self::RmpSerdeError(e) - } + serialized.extend_from_slice(data); + serialized } -impl From for DeserializationError { - fn from(e: NetworkFilterListParsingError) -> Self { - match e { - NetworkFilterListParsingError::InvalidFlatbuffer(invalid_flatbuffer) => { - Self::FlatBufferParsingError(invalid_flatbuffer) - } - NetworkFilterListParsingError::UniqueDomainsOutOfBounds(_) => Self::ValidationError, - } +pub(crate) fn deserialize_dat_file(serialized: &[u8]) -> Result<&[u8], DeserializationError> { + if serialized.len() < HEADER_PREFIX_LENGTH || !serialized.starts_with(&ADBLOCK_RUST_DAT_MAGIC) { + return Err(DeserializationError::BadHeader); } -} - -pub(crate) fn serialize_engine( - flatbuffer_memory: &VerifiedFlatbufferMemory, - cfc: &CosmeticFilterCache, -) -> Result, SerializationError> { - let serialize_format = storage::SerializeFormat::from((flatbuffer_memory, cfc)); - serialize_format.serialize() -} -pub(crate) fn deserialize_engine( - serialized: &[u8], -) -> Result<(VerifiedFlatbufferMemory, CosmeticFilterCache), DeserializationError> { - let deserialize_format = storage::DeserializeFormat::deserialize(serialized)?; - deserialize_format.try_into() -} - -// Verify the header (MAGIC + VERSION) and return the data after the header. -pub fn parse_dat_header(serialized: &[u8]) -> Result<&[u8], DeserializationError> { - if !serialized.starts_with(&ADBLOCK_RUST_DAT_MAGIC) { - return Err(DeserializationError::NoHeaderFound); - } - if serialized.len() < ADBLOCK_RUST_DAT_MAGIC.len() + 1 { - return Err(DeserializationError::NoHeaderFound); - } let version = serialized[ADBLOCK_RUST_DAT_MAGIC.len()]; - if version != ADBLOCK_RUST_DAT_VERSION { - return Err(DeserializationError::UnsupportedFormatVersion(version)); + if version != ADBLOCK_FLATBUFFER_VERSION { + return Err(DeserializationError::VersionMismatch(version)); } - - Ok(&serialized[ADBLOCK_RUST_DAT_MAGIC.len() + 1..]) + let data = &serialized[HEADER_PREFIX_LENGTH..]; + + // Check the hash to ensure the data isn't corrupted. + let expected_hash = &serialized[(ADBLOCK_RUST_DAT_MAGIC.len() + 1)..HEADER_PREFIX_LENGTH]; + if expected_hash != seahash::hash(data).to_le_bytes() { + println!( + "Expected hash: {:?}, actual hash: {:?}", + expected_hash, + seahash::hash(data).to_le_bytes() + ); + return Err(DeserializationError::BadChecksum); + } + Ok(data) } #[cfg(test)] @@ -108,4 +83,24 @@ mod tests { assert!(result.starts_with(&ADBLOCK_RUST_DAT_MAGIC)); } + + #[test] + fn serialize_deserialize_test() { + let data = b"test"; + let serialized = serialize_dat_file(data); + let deserialized = deserialize_dat_file(&serialized).unwrap(); + assert_eq!(data, deserialized); + } + + #[test] + fn corrupted_data_test() { + let data = b"test"; + let serialized = serialize_dat_file(data); + let mut corrupted_serialized = serialized.clone(); + corrupted_serialized[HEADER_PREFIX_LENGTH] = 0; + assert_eq!( + Err(DeserializationError::BadChecksum), + deserialize_dat_file(&corrupted_serialized) + ); + } } diff --git a/src/data_format/storage.rs b/src/data_format/storage.rs deleted file mode 100644 index 140d2bfc..00000000 --- a/src/data_format/storage.rs +++ /dev/null @@ -1,308 +0,0 @@ -//! Contains representations of data from the adblocking engine in a -//! forwards-and-backwards-compatible format, as well as utilities for converting these to and from -//! the actual `Engine` components. -//! -//! Any new fields should be added to the _end_ of both `SerializeFormat` and `DeserializeFormat`. - -use std::collections::{HashMap, HashSet}; - -use rmp_serde as rmps; -use serde::{Deserialize, Serialize}; - -use crate::cosmetic_filter_cache::{CosmeticFilterCache, HostnameRuleDb, ProceduralOrActionFilter}; -use crate::flatbuffers::unsafe_tools::VerifiedFlatbufferMemory; -use crate::utils::Hash; - -use super::utils::{stabilize_hashmap_serialization, stabilize_hashset_serialization}; -use super::{DeserializationError, SerializationError}; - -/// Each variant describes a single rule that is specific to a particular hostname. -#[derive(Clone, Debug, Deserialize, Serialize)] -enum LegacySpecificFilterType { - Hide(String), - Unhide(String), - Style(String, String), - UnhideStyle(String, String), - ScriptInject(String), - UnhideScriptInject(String), -} - -#[derive(Deserialize, Serialize, Default)] -pub(crate) struct LegacyHostnameRuleDb { - #[serde(serialize_with = "stabilize_hashmap_serialization")] - db: HashMap>, -} - -impl From<&HostnameRuleDb> for LegacyHostnameRuleDb { - fn from(v: &HostnameRuleDb) -> Self { - let mut db = HashMap::>::new(); - for (hash, bin) in v.hide.0.iter() { - for f in bin { - db.entry(*hash) - .and_modify(|v| v.push(LegacySpecificFilterType::Hide(f.to_owned()))) - .or_insert_with(|| vec![LegacySpecificFilterType::Hide(f.to_owned())]); - } - } - for (hash, bin) in v.unhide.0.iter() { - for f in bin { - db.entry(*hash) - .and_modify(|v| v.push(LegacySpecificFilterType::Unhide(f.to_owned()))) - .or_insert_with(|| vec![LegacySpecificFilterType::Unhide(f.to_owned())]); - } - } - for (hash, bin) in v.inject_script.0.iter() { - for (f, _mask) in bin { - db.entry(*hash) - .and_modify(|v| v.push(LegacySpecificFilterType::ScriptInject(f.to_owned()))) - .or_insert_with(|| vec![LegacySpecificFilterType::ScriptInject(f.to_owned())]); - } - } - for (hash, bin) in v.uninject_script.0.iter() { - for f in bin { - db.entry(*hash) - .and_modify(|v| { - v.push(LegacySpecificFilterType::UnhideScriptInject(f.to_owned())) - }) - .or_insert_with(|| { - vec![LegacySpecificFilterType::UnhideScriptInject(f.to_owned())] - }); - } - } - for (hash, bin) in v.procedural_action.0.iter() { - for f in bin { - if let Ok(f) = serde_json::from_str::(f) { - if let Some((selector, style)) = f.as_css() { - db.entry(*hash) - .and_modify(|v| { - v.push(LegacySpecificFilterType::Style( - selector.clone(), - style.clone(), - )) - }) - .or_insert_with(|| { - vec![LegacySpecificFilterType::Style(selector, style)] - }); - } - } - } - } - for (hash, bin) in v.procedural_action_exception.0.iter() { - for f in bin { - if let Ok(f) = serde_json::from_str::(f) { - if let Some((selector, style)) = f.as_css() { - db.entry(*hash) - .and_modify(|v| { - v.push(LegacySpecificFilterType::UnhideStyle( - selector.to_owned(), - style.to_owned(), - )) - }) - .or_insert_with(|| { - vec![LegacySpecificFilterType::UnhideStyle( - selector.to_owned(), - style.to_owned(), - )] - }); - } - } - } - } - LegacyHostnameRuleDb { db } - } -} - -impl From for HostnameRuleDb { - fn from(val: LegacyHostnameRuleDb) -> Self { - use crate::cosmetic_filter_cache::HostnameFilterBin; - - let mut hide = HostnameFilterBin::default(); - let mut unhide = HostnameFilterBin::default(); - let mut procedural_action = HostnameFilterBin::default(); - let mut procedural_action_exception = HostnameFilterBin::default(); - let mut inject_script = HostnameFilterBin::default(); - let mut uninject_script = HostnameFilterBin::default(); - - for (hash, bin) in val.db.into_iter() { - for rule in bin.into_iter() { - match rule { - LegacySpecificFilterType::Hide(s) => hide.insert(&hash, s), - LegacySpecificFilterType::Unhide(s) => unhide.insert(&hash, s), - LegacySpecificFilterType::Style(s, st) => procedural_action - .insert_procedural_action_filter( - &hash, - &ProceduralOrActionFilter::from_css(s, st), - ), - LegacySpecificFilterType::UnhideStyle(s, st) => procedural_action_exception - .insert_procedural_action_filter( - &hash, - &ProceduralOrActionFilter::from_css(s, st), - ), - LegacySpecificFilterType::ScriptInject(s) => { - inject_script.insert(&hash, (s, Default::default())) - } - LegacySpecificFilterType::UnhideScriptInject(s) => { - uninject_script.insert(&hash, s) - } - } - } - } - HostnameRuleDb { - hide, - unhide, - inject_script, - uninject_script, - procedural_action, - procedural_action_exception, - } - } -} - -#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)] -pub(crate) struct LegacyRedirectResource { - pub content_type: String, - pub data: String, -} - -#[derive(Serialize, Deserialize, Debug, PartialEq, Default)] -pub(crate) struct LegacyRedirectResourceStorage { - #[serde(serialize_with = "stabilize_hashmap_serialization")] - pub resources: HashMap, -} - -#[derive(Clone, Deserialize, Serialize)] -pub(crate) struct LegacyScriptletResource { - scriptlet: String, -} - -#[derive(Default, Deserialize, Serialize)] -pub(crate) struct LegacyScriptletResourceStorage { - #[serde(serialize_with = "stabilize_hashmap_serialization")] - resources: HashMap, -} - -/// Provides structural aggregration of referenced adblock engine data to allow for allocation-free -/// serialization. -#[derive(Serialize)] -pub(crate) struct SerializeFormat<'a> { - flatbuffer_memory: Vec, - - resources: LegacyRedirectResourceStorage, - - #[serde(serialize_with = "stabilize_hashset_serialization")] - simple_class_rules: &'a HashSet, - #[serde(serialize_with = "stabilize_hashset_serialization")] - simple_id_rules: &'a HashSet, - #[serde(serialize_with = "stabilize_hashmap_serialization")] - complex_class_rules: &'a HashMap>, - #[serde(serialize_with = "stabilize_hashmap_serialization")] - complex_id_rules: &'a HashMap>, - - specific_rules: LegacyHostnameRuleDb, - - #[serde(serialize_with = "stabilize_hashset_serialization")] - misc_generic_selectors: &'a HashSet, - - scriptlets: LegacyScriptletResourceStorage, - - #[serde(serialize_with = "stabilize_hashmap_serialization")] - procedural_action: &'a HashMap>, - #[serde(serialize_with = "stabilize_hashmap_serialization")] - procedural_action_exception: &'a HashMap>, -} - -impl SerializeFormat<'_> { - pub fn serialize(&self) -> Result, SerializationError> { - let mut output = super::ADBLOCK_RUST_DAT_MAGIC.to_vec(); - output.push(super::ADBLOCK_RUST_DAT_VERSION); - rmps::encode::write(&mut output, &self)?; - Ok(output) - } -} - -/// Structural representation of adblock engine data that can be built up from deserialization and -/// used directly to construct new `Engine` components without unnecessary allocation. -#[derive(Deserialize)] -pub(crate) struct DeserializeFormat { - flatbuffer_memory: Vec, - - _resources: LegacyRedirectResourceStorage, - - simple_class_rules: HashSet, - simple_id_rules: HashSet, - complex_class_rules: HashMap>, - complex_id_rules: HashMap>, - - specific_rules: LegacyHostnameRuleDb, - - misc_generic_selectors: HashSet, - - _scriptlets: LegacyScriptletResourceStorage, - - #[serde(default)] - procedural_action: HashMap>, - #[serde(default)] - procedural_action_exception: HashMap>, -} - -impl DeserializeFormat { - pub fn deserialize(serialized: &[u8]) -> Result { - let data = super::parse_dat_header(serialized)?; - let format: Self = rmps::decode::from_read(data)?; - Ok(format) - } -} - -impl<'a> From<(&'a VerifiedFlatbufferMemory, &'a CosmeticFilterCache)> for SerializeFormat<'a> { - fn from(v: (&'a VerifiedFlatbufferMemory, &'a CosmeticFilterCache)) -> Self { - let (memory, cfc) = v; - Self { - flatbuffer_memory: memory.data().to_vec(), - - resources: LegacyRedirectResourceStorage::default(), - - simple_class_rules: &cfc.simple_class_rules, - simple_id_rules: &cfc.simple_id_rules, - complex_class_rules: &cfc.complex_class_rules, - complex_id_rules: &cfc.complex_id_rules, - - specific_rules: (&cfc.specific_rules).into(), - - misc_generic_selectors: &cfc.misc_generic_selectors, - - scriptlets: LegacyScriptletResourceStorage::default(), - - procedural_action: &cfc.specific_rules.procedural_action.0, - procedural_action_exception: &cfc.specific_rules.procedural_action_exception.0, - } - } -} - -impl TryFrom for (VerifiedFlatbufferMemory, CosmeticFilterCache) { - fn try_from(v: DeserializeFormat) -> Result { - use crate::cosmetic_filter_cache::HostnameFilterBin; - - let mut specific_rules: HostnameRuleDb = v.specific_rules.into(); - specific_rules.procedural_action = HostnameFilterBin(v.procedural_action); - specific_rules.procedural_action_exception = - HostnameFilterBin(v.procedural_action_exception); - - let memory = VerifiedFlatbufferMemory::from_raw(v.flatbuffer_memory) - .map_err(DeserializationError::FlatBufferParsingError)?; - - Ok(( - memory, - CosmeticFilterCache { - simple_class_rules: v.simple_class_rules, - simple_id_rules: v.simple_id_rules, - complex_class_rules: v.complex_class_rules, - complex_id_rules: v.complex_id_rules, - - specific_rules, - - misc_generic_selectors: v.misc_generic_selectors, - }, - )) - } - - type Error = DeserializationError; -} diff --git a/src/data_format/utils.rs b/src/data_format/utils.rs deleted file mode 100644 index 3b3b3e81..00000000 --- a/src/data_format/utils.rs +++ /dev/null @@ -1,32 +0,0 @@ -//! Common utilities associated with serialization and deserialization of the `Engine` data into -//! binary formats. - -use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; - -use serde::{Serialize, Serializer}; - -/// Forces a `HashSet` to be serialized with a stable ordering by temporarily representing it as a -/// `BTreeSet`. -pub fn stabilize_hashset_serialization(set: &HashSet, s: S) -> Result -where - S: Serializer, - V: Ord + serde::Serialize, -{ - let stabilized: BTreeSet<&V> = set.iter().collect(); - stabilized.serialize(s) -} - -/// Forces a `HashMap` to be serialized with a stable ordering by temporarily representing it as a -/// `BTreeMap`. -pub fn stabilize_hashmap_serialization( - set: &HashMap, - s: S, -) -> Result -where - S: Serializer, - K: Ord + Serialize, - V: Serialize, -{ - let stabilized: BTreeMap<&K, &V> = set.iter().collect(); - stabilized.serialize(s) -} diff --git a/src/engine.rs b/src/engine.rs index ce1e6cd5..bc99908b 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -2,8 +2,15 @@ use crate::blocker::{Blocker, BlockerResult}; use crate::cosmetic_filter_cache::{CosmeticFilterCache, UrlSpecificResources}; -use crate::filters::fb_builder::make_flatbuffer; -use crate::filters::fb_network::{FilterDataContext, FilterDataContextRef}; +use crate::cosmetic_filter_cache_builder::CosmeticFilterCacheBuilder; +use crate::data_format::{deserialize_dat_file, serialize_dat_file, DeserializationError}; +use crate::filters::cosmetic::CosmeticFilter; +use crate::filters::fb_builder::EngineFlatBuilder; +use crate::filters::fb_network_builder::NetworkRulesBuilder; +use crate::filters::filter_data_context::{FilterDataContext, FilterDataContextRef}; +use crate::filters::network::NetworkFilter; +use crate::flatbuffers::containers::flat_serialize::FlatSerialize; +use crate::flatbuffers::unsafe_tools::VerifiedFlatbufferMemory; use crate::lists::{FilterSet, ParseOptions}; use crate::regex_manager::RegexManagerDiscardPolicy; use crate::request::Request; @@ -53,14 +60,7 @@ pub struct Engine { impl Default for Engine { fn default() -> Self { - let filter_data_context = FilterDataContextRef::new(Default::default()); - - Self { - blocker: Blocker::from_context(FilterDataContextRef::clone(&filter_data_context)), - cosmetic_cache: CosmeticFilterCache::new(), - resources: ResourceStorage::default(), - filter_data_context, - } + Self::from_filter_set(FilterSet::new(false), false) } } @@ -94,6 +94,16 @@ impl Engine { Self::from_filter_set(filter_set, optimize) } + #[cfg(test)] + pub(crate) fn cosmetic_cache(self) -> CosmeticFilterCache { + self.cosmetic_cache + } + + #[cfg(test)] + pub(crate) fn filter_data_context(self) -> FilterDataContextRef { + self.filter_data_context + } + /// Loads rules from the given `FilterSet`. It is recommended to use a `FilterSet` when adding /// rules from multiple sources. pub fn from_filter_set(set: FilterSet, optimize: bool) -> Self { @@ -103,13 +113,15 @@ impl Engine { .. } = set; - let memory = make_flatbuffer(network_filters, optimize); + let memory = make_flatbuffer(network_filters, cosmetic_filters, optimize); let filter_data_context = FilterDataContext::new(memory); Self { blocker: Blocker::from_context(FilterDataContextRef::clone(&filter_data_context)), - cosmetic_cache: CosmeticFilterCache::from_rules(cosmetic_filters), + cosmetic_cache: CosmeticFilterCache::from_context(FilterDataContextRef::clone( + &filter_data_context, + )), resources: ResourceStorage::default(), filter_data_context, } @@ -240,8 +252,9 @@ impl Engine { } /// Serializes the `Engine` into a binary format so that it can be quickly reloaded later. - pub fn serialize(&self) -> Result, crate::data_format::SerializationError> { - crate::data_format::serialize_engine(&self.filter_data_context.memory, &self.cosmetic_cache) + pub fn serialize(&self) -> Vec { + let data = self.filter_data_context.memory.data(); + serialize_dat_file(data) } /// Deserialize the `Engine` from the binary format generated by `Engine::serialize`. @@ -249,18 +262,22 @@ impl Engine { /// Note that the binary format has a built-in version number that may be incremented. There is /// no guarantee that later versions of the format will be deserializable across minor versions /// of adblock-rust; the format is provided only as a caching optimization. - pub fn deserialize( - &mut self, - serialized: &[u8], - ) -> Result<(), crate::data_format::DeserializationError> { + pub fn deserialize(&mut self, serialized: &[u8]) -> Result<(), DeserializationError> { let current_tags = self.blocker.tags_enabled(); - let (memory, cosmetic_cache) = crate::data_format::deserialize_engine(serialized)?; - self.filter_data_context = FilterDataContext::new(memory); + + let data = deserialize_dat_file(serialized)?; + let memory = VerifiedFlatbufferMemory::from_raw(data) + .map_err(DeserializationError::FlatBufferParsingError)?; + + let context = FilterDataContext::new(memory); + self.filter_data_context = context; self.blocker = Blocker::from_context(FilterDataContextRef::clone(&self.filter_data_context)); self.blocker .use_tags(¤t_tags.iter().map(|s| &**s).collect::>()); - self.cosmetic_cache = cosmetic_cache; + self.cosmetic_cache = CosmeticFilterCache::from_context(FilterDataContextRef::clone( + &self.filter_data_context, + )); Ok(()) } } @@ -275,6 +292,19 @@ fn _assertions() { _assert_sync::(); } +fn make_flatbuffer( + network_filters: Vec, + cosmetic_filters: Vec, + optimize: bool, +) -> VerifiedFlatbufferMemory { + let mut builder = EngineFlatBuilder::default(); + let network_rules_builder = NetworkRulesBuilder::from_rules(network_filters, optimize); + let network_rules = FlatSerialize::serialize(network_rules_builder, &mut builder); + let cosmetic_rules = CosmeticFilterCacheBuilder::from_rules(cosmetic_filters); + let cosmetic_rules = FlatSerialize::serialize(cosmetic_rules, &mut builder); + builder.finish(network_rules, cosmetic_rules) +} + #[cfg(test)] #[path = "../tests/unit/engine.rs"] mod unit_tests; diff --git a/src/filters/fb_builder.rs b/src/filters/fb_builder.rs index 9775fa6b..6ea470dd 100644 --- a/src/filters/fb_builder.rs +++ b/src/filters/fb_builder.rs @@ -1,43 +1,18 @@ //! Builder for creating flatbuffer with serialized engine. -//! Currently the work in progress, therefore only some fields of Engine -//! are serialized to flatbuffer. -//! The entry point is `FlatBufferBuilder::make_flatbuffer`. -use std::collections::{HashMap, HashSet}; -use std::vec; +use std::collections::HashMap; use flatbuffers::WIPOffset; -use crate::filters::network::{NetworkFilter, NetworkFilterMaskHelper}; -use crate::flatbuffers::containers::flat_multimap::FlatMultiMapBuilder; -use crate::flatbuffers::containers::flat_serialize::{FlatBuilder, FlatSerialize, WIPFlatVec}; +use crate::filters::fb_network_builder::NetworkFilterListBuilder; +use crate::flatbuffers::containers::flat_serialize::{FlatBuilder, WIPFlatVec}; use crate::flatbuffers::unsafe_tools::VerifiedFlatbufferMemory; -use crate::network_filter_list::token_histogram; -use crate::optimizer; -use crate::utils::{to_short_hash, Hash, ShortHash}; +use crate::utils::Hash; -use super::fb_network::flat::fb; - -pub(crate) enum NetworkFilterListId { - Csp = 0, - Exceptions = 1, - Importants = 2, - Redirects = 3, - RemoveParam = 4, - Filters = 5, - GenericHide = 6, - TaggedFiltersAll = 7, - Size = 8, -} - -#[derive(Default, Clone)] -struct NetworkFilterListBuilder { - filters: Vec, - optimize: bool, -} +use super::flat::fb; #[derive(Default)] -struct EngineFlatBuilder<'a> { +pub(crate) struct EngineFlatBuilder<'a> { fb_builder: flatbuffers::FlatBufferBuilder<'a>, unique_domains_hashes: Vec, unique_domains_hashes_map: HashMap, @@ -57,15 +32,16 @@ impl<'a> EngineFlatBuilder<'a> { pub fn finish( &mut self, network_rules: WIPFlatVec<'a, NetworkFilterListBuilder, EngineFlatBuilder<'a>>, + cosmetic_rules: WIPOffset>, ) -> VerifiedFlatbufferMemory { let unique_domains_hashes = Some(self.fb_builder.create_vector(&self.unique_domains_hashes)); - let network_rules = Some(network_rules); let engine = fb::Engine::create( self.raw_builder(), &fb::EngineArgs { - network_rules, + network_rules: Some(network_rules), unique_domains_hashes, + cosmetic_filters: Some(cosmetic_rules), }, ); self.raw_builder().finish(engine, None); @@ -82,269 +58,3 @@ impl<'a> FlatBuilder<'a> for EngineFlatBuilder<'a> { &mut self.fb_builder } } - -struct NetworkRulesBuilder { - lists: Vec, -} - -impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for &NetworkFilter { - type Output = WIPOffset>; - - fn serialize( - network_filter: &NetworkFilter, - builder: &mut EngineFlatBuilder<'a>, - ) -> WIPOffset> { - let opt_domains = network_filter.opt_domains.as_ref().map(|v| { - let mut o: Vec = v - .iter() - .map(|x| builder.get_or_insert_unique_domain_hash(x)) - .collect(); - o.sort_unstable(); - o.dedup(); - FlatSerialize::serialize(o, builder) - }); - - let opt_not_domains = network_filter.opt_not_domains.as_ref().map(|v| { - let mut o: Vec = v - .iter() - .map(|x| builder.get_or_insert_unique_domain_hash(x)) - .collect(); - o.sort_unstable(); - o.dedup(); - FlatSerialize::serialize(o, builder) - }); - - let modifier_option = network_filter - .modifier_option - .as_ref() - .map(|s| builder.create_string(s)); - - let hostname = network_filter - .hostname - .as_ref() - .map(|s| builder.create_string(s)); - - let tag = network_filter - .tag - .as_ref() - .map(|s| builder.create_string(s)); - - let patterns = if network_filter.filter.iter().len() > 0 { - let offsets: Vec> = network_filter - .filter - .iter() - .map(|s| builder.create_string(s)) - .collect(); - Some(FlatSerialize::serialize(offsets, builder)) - } else { - None - }; - - let raw_line = network_filter - .raw_line - .as_ref() - .map(|v| builder.create_string(v.as_str())); - - let network_filter = fb::NetworkFilter::create( - &mut builder.fb_builder, - &fb::NetworkFilterArgs { - mask: network_filter.mask.bits(), - patterns, - modifier_option, - opt_domains, - opt_not_domains, - hostname, - tag, - raw_line, - }, - ); - - network_filter - } -} - -impl NetworkFilterListBuilder { - fn new(optimize: bool) -> Self { - Self { - filters: vec![], - optimize, - } - } -} - -impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for NetworkFilterListBuilder { - type Output = WIPOffset>; - fn serialize( - rule_list: Self, - builder: &mut EngineFlatBuilder<'a>, - ) -> WIPOffset> { - let mut filter_map = HashMap::>>>::new(); - - let mut optimizable = HashMap::>::new(); - - // Compute tokens for all filters - let filter_tokens: Vec<_> = rule_list - .filters - .into_iter() - .map(|filter| { - let tokens = filter.get_tokens(); - (filter, tokens) - }) - .collect(); - - // compute the tokens' frequency histogram - let (total_number_of_tokens, tokens_histogram) = token_histogram(&filter_tokens); - - { - for (network_filter, multi_tokens) in filter_tokens.into_iter() { - let flat_filter = if !rule_list.optimize - || !optimizer::is_filter_optimizable_by_patterns(&network_filter) - { - Some(FlatSerialize::serialize(&network_filter, builder)) - } else { - None - }; - - for tokens in multi_tokens { - let mut best_token: ShortHash = 0; - let mut min_count = total_number_of_tokens + 1; - for token in tokens { - let token = to_short_hash(token); - match tokens_histogram.get(&token) { - None => { - min_count = 0; - best_token = token - } - Some(&count) if count < min_count => { - min_count = count; - best_token = token - } - _ => {} - } - } - - if let Some(flat_filter) = flat_filter { - filter_map.entry(best_token).or_default().push(flat_filter); - } else { - optimizable - .entry(best_token) - .or_default() - .push(network_filter.clone()); - } - } // tokens - } - } - - if rule_list.optimize { - // Sort the entries to ensure deterministic iteration order - let mut optimizable_entries: Vec<_> = optimizable.drain().collect(); - optimizable_entries.sort_unstable_by_key(|(token, _)| *token); - - for (token, v) in optimizable_entries { - let optimized = optimizer::optimize(v); - - for filter in optimized { - let flat_filter = FlatSerialize::serialize(&filter, builder); - filter_map.entry(token).or_default().push(flat_filter); - } - } - } else { - debug_assert!( - optimizable.is_empty(), - "Should be empty if optimization is off" - ); - } - - let flat_filter_map_builder = FlatMultiMapBuilder::from_filter_map(filter_map); - let flat_filter_map = FlatMultiMapBuilder::finish(flat_filter_map_builder, builder); - - fb::NetworkFilterList::create( - builder.raw_builder(), - &fb::NetworkFilterListArgs { - filter_map_index: Some(flat_filter_map.keys), - filter_map_values: Some(flat_filter_map.values), - }, - ) - } -} - -impl NetworkRulesBuilder { - pub fn from_rules(network_filters: Vec, optimize: bool) -> Self { - let mut lists = vec![]; - for list_id in 0..NetworkFilterListId::Size as usize { - // Don't optimize removeparam, since it can fuse filters without respecting distinct - let optimize = optimize && list_id != NetworkFilterListId::RemoveParam as usize; - lists.push(NetworkFilterListBuilder::new(optimize)); - } - let mut self_ = Self { lists }; - - let mut badfilter_ids: HashSet = HashSet::new(); - - // Collect badfilter ids in advance. - for filter in network_filters.iter() { - if filter.is_badfilter() { - badfilter_ids.insert(filter.get_id_without_badfilter()); - } - } - - for filter in network_filters.into_iter() { - // skip any bad filters - let filter_id = filter.get_id(); - if badfilter_ids.contains(&filter_id) || filter.is_badfilter() { - continue; - } - - // Redirects are independent of blocking behavior. - if filter.is_redirect() { - self_.add_filter(filter.clone(), NetworkFilterListId::Redirects); - } - type FilterId = NetworkFilterListId; - - let list_id: FilterId = if filter.is_csp() { - FilterId::Csp - } else if filter.is_removeparam() { - FilterId::RemoveParam - } else if filter.is_generic_hide() { - FilterId::GenericHide - } else if filter.is_exception() { - FilterId::Exceptions - } else if filter.is_important() { - FilterId::Importants - } else if filter.tag.is_some() && !filter.is_redirect() { - // `tag` + `redirect` is unsupported for now. - FilterId::TaggedFiltersAll - } else if (filter.is_redirect() && filter.also_block_redirect()) - || !filter.is_redirect() - { - FilterId::Filters - } else { - continue; - }; - - self_.add_filter(filter, list_id); - } - - self_ - } - - fn add_filter(&mut self, network_filter: NetworkFilter, list_id: NetworkFilterListId) { - self.lists[list_id as usize].filters.push(network_filter); - } -} - -impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for NetworkRulesBuilder { - type Output = WIPFlatVec<'a, NetworkFilterListBuilder, EngineFlatBuilder<'a>>; - fn serialize(value: Self, builder: &mut EngineFlatBuilder<'a>) -> Self::Output { - FlatSerialize::serialize(value.lists, builder) - } -} - -pub fn make_flatbuffer( - network_filters: Vec, - optimize: bool, -) -> VerifiedFlatbufferMemory { - let mut builder = EngineFlatBuilder::default(); - let network_rules_builder = NetworkRulesBuilder::from_rules(network_filters, optimize); - let network_rules = FlatSerialize::serialize(network_rules_builder, &mut builder); - builder.finish(network_rules) -} diff --git a/src/filters/fb_network.rs b/src/filters/fb_network.rs index 7e680502..4d5aebee 100644 --- a/src/filters/fb_network.rs +++ b/src/filters/fb_network.rs @@ -1,27 +1,13 @@ //! Flatbuffer-compatible versions of [NetworkFilter] and related functionality. -use std::collections::HashMap; - -use crate::filters::fb_builder::make_flatbuffer; +use crate::filters::filter_data_context::FilterDataContext; use crate::filters::network::{NetworkFilterMask, NetworkFilterMaskHelper, NetworkMatchable}; -use crate::flatbuffers::unsafe_tools::{fb_vector_to_slice, VerifiedFlatbufferMemory}; +use crate::flatbuffers::unsafe_tools::fb_vector_to_slice; use crate::regex_manager::RegexManager; use crate::request::Request; -use crate::utils::Hash; - -#[allow(unknown_lints)] -#[allow( - dead_code, - clippy::all, - unused_imports, - unsafe_code, - mismatched_lifetime_syntaxes -)] -#[path = "../flatbuffers/fb_network_filter_generated.rs"] -pub mod flat; -use flat::fb; +use crate::filters::flatbuffer_generated::fb; /// A list of string parts that can be matched against a URL. pub(crate) struct FlatPatterns<'a> { patterns: Option>>, @@ -75,43 +61,6 @@ impl ExactSizeIterator for FlatPatternsIterator<'_> { } } -#[cfg(feature = "single-thread")] -pub(crate) type FilterDataContextRef = std::rc::Rc; -#[cfg(not(feature = "single-thread"))] -pub(crate) type FilterDataContextRef = std::sync::Arc; - -// The struct is used to store the flatbuffer and supporting data -// for both network filter and cosmetic filters. -// Supposed to be stored via FilterDataContextRef to avoid copying the data. -pub(crate) struct FilterDataContext { - pub(crate) memory: VerifiedFlatbufferMemory, - pub(crate) unique_domains_hashes_map: HashMap, -} - -impl Default for FilterDataContext { - fn default() -> Self { - Self { - memory: make_flatbuffer(vec![], false), - unique_domains_hashes_map: HashMap::new(), - } - } -} - -impl FilterDataContext { - pub(crate) fn new(memory: VerifiedFlatbufferMemory) -> FilterDataContextRef { - // Reconstruct the unique_domains_hashes_map from the flatbuffer data - let root = memory.root(); - let mut unique_domains_hashes_map: HashMap = HashMap::new(); - for (index, hash) in root.unique_domains_hashes().iter().enumerate() { - unique_domains_hashes_map.insert(hash, index as u32); - } - FilterDataContextRef::new(Self { - memory, - unique_domains_hashes_map, - }) - } -} - /// Internal implementation of [NetworkFilter] that is compatible with flatbuffers. pub(crate) struct FlatNetworkFilter<'a> { key: u64, diff --git a/src/filters/fb_network_builder.rs b/src/filters/fb_network_builder.rs new file mode 100644 index 00000000..63e0f52a --- /dev/null +++ b/src/filters/fb_network_builder.rs @@ -0,0 +1,291 @@ +//! Structures to store network filters to flatbuffer + +use std::collections::{HashMap, HashSet}; + +use flatbuffers::WIPOffset; + +use crate::filters::fb_builder::EngineFlatBuilder; +use crate::filters::network::NetworkFilter; + +use crate::filters::network::NetworkFilterMaskHelper; +use crate::flatbuffers::containers::flat_multimap::FlatMultiMapBuilder; +use crate::flatbuffers::containers::flat_serialize::{FlatBuilder, FlatSerialize, WIPFlatVec}; +use crate::network_filter_list::token_histogram; +use crate::optimizer; +use crate::utils::{to_short_hash, Hash, ShortHash}; + +use super::flat::fb; + +pub(crate) enum NetworkFilterListId { + Csp = 0, + Exceptions = 1, + Importants = 2, + Redirects = 3, + RemoveParam = 4, + Filters = 5, + GenericHide = 6, + TaggedFiltersAll = 7, + Size = 8, +} + +#[derive(Default, Clone)] +pub(crate) struct NetworkFilterListBuilder { + filters: Vec, + optimize: bool, +} + +pub(crate) struct NetworkRulesBuilder { + lists: Vec, +} + +impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for &NetworkFilter { + type Output = WIPOffset>; + + fn serialize( + network_filter: &NetworkFilter, + builder: &mut EngineFlatBuilder<'a>, + ) -> WIPOffset> { + let opt_domains = network_filter.opt_domains.as_ref().map(|v| { + let mut o: Vec = v + .iter() + .map(|x| builder.get_or_insert_unique_domain_hash(x)) + .collect(); + o.sort_unstable(); + o.dedup(); + FlatSerialize::serialize(o, builder) + }); + + let opt_not_domains = network_filter.opt_not_domains.as_ref().map(|v| { + let mut o: Vec = v + .iter() + .map(|x| builder.get_or_insert_unique_domain_hash(x)) + .collect(); + o.sort_unstable(); + o.dedup(); + FlatSerialize::serialize(o, builder) + }); + + let modifier_option = network_filter + .modifier_option + .as_ref() + .map(|s| builder.create_string(s)); + + let hostname = network_filter + .hostname + .as_ref() + .map(|s| builder.create_string(s)); + + let tag = network_filter + .tag + .as_ref() + .map(|s| builder.create_string(s)); + + let patterns = if network_filter.filter.iter().len() > 0 { + let offsets: Vec> = network_filter + .filter + .iter() + .map(|s| builder.create_string(s)) + .collect(); + Some(FlatSerialize::serialize(offsets, builder)) + } else { + None + }; + + let raw_line = network_filter + .raw_line + .as_ref() + .map(|v| builder.create_string(v.as_str())); + + let network_filter = fb::NetworkFilter::create( + builder.raw_builder(), + &fb::NetworkFilterArgs { + mask: network_filter.mask.bits(), + patterns, + modifier_option, + opt_domains, + opt_not_domains, + hostname, + tag, + raw_line, + }, + ); + + network_filter + } +} + +impl NetworkFilterListBuilder { + fn new(optimize: bool) -> Self { + Self { + filters: vec![], + optimize, + } + } +} + +impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for NetworkFilterListBuilder { + type Output = WIPOffset>; + fn serialize( + rule_list: Self, + builder: &mut EngineFlatBuilder<'a>, + ) -> WIPOffset> { + let mut filter_map = HashMap::>>>::new(); + + let mut optimizable = HashMap::>::new(); + + // Compute tokens for all filters + let filter_tokens: Vec<_> = rule_list + .filters + .into_iter() + .map(|filter| { + let tokens = filter.get_tokens(); + (filter, tokens) + }) + .collect(); + + // compute the tokens' frequency histogram + let (total_number_of_tokens, tokens_histogram) = token_histogram(&filter_tokens); + + { + for (network_filter, multi_tokens) in filter_tokens.into_iter() { + let flat_filter = if !rule_list.optimize + || !optimizer::is_filter_optimizable_by_patterns(&network_filter) + { + Some(FlatSerialize::serialize(&network_filter, builder)) + } else { + None + }; + + for tokens in multi_tokens { + let mut best_token: ShortHash = 0; + let mut min_count = total_number_of_tokens + 1; + for token in tokens { + let token = to_short_hash(token); + match tokens_histogram.get(&token) { + None => { + min_count = 0; + best_token = token + } + Some(&count) if count < min_count => { + min_count = count; + best_token = token + } + _ => {} + } + } + + if let Some(flat_filter) = flat_filter { + filter_map.entry(best_token).or_default().push(flat_filter); + } else { + optimizable + .entry(best_token) + .or_default() + .push(network_filter.clone()); + } + } // tokens + } + } + + if rule_list.optimize { + // Sort the entries to ensure deterministic iteration order + let mut optimizable_entries: Vec<_> = optimizable.drain().collect(); + optimizable_entries.sort_unstable_by_key(|(token, _)| *token); + + for (token, v) in optimizable_entries { + let optimized = optimizer::optimize(v); + + for filter in optimized { + let flat_filter = FlatSerialize::serialize(&filter, builder); + filter_map.entry(token).or_default().push(flat_filter); + } + } + } else { + debug_assert!( + optimizable.is_empty(), + "Should be empty if optimization is off" + ); + } + + let flat_filter_map_builder = FlatMultiMapBuilder::from_filter_map(filter_map); + let flat_filter_map = FlatMultiMapBuilder::finish(flat_filter_map_builder, builder); + + fb::NetworkFilterList::create( + builder.raw_builder(), + &fb::NetworkFilterListArgs { + filter_map_index: Some(flat_filter_map.keys), + filter_map_values: Some(flat_filter_map.values), + }, + ) + } +} + +impl NetworkRulesBuilder { + pub fn from_rules(network_filters: Vec, optimize: bool) -> Self { + let mut lists = vec![]; + for list_id in 0..NetworkFilterListId::Size as usize { + // Don't optimize removeparam, since it can fuse filters without respecting distinct + let optimize = optimize && list_id != NetworkFilterListId::RemoveParam as usize; + lists.push(NetworkFilterListBuilder::new(optimize)); + } + let mut self_ = Self { lists }; + + let mut badfilter_ids: HashSet = HashSet::new(); + + // Collect badfilter ids in advance. + for filter in network_filters.iter() { + if filter.is_badfilter() { + badfilter_ids.insert(filter.get_id_without_badfilter()); + } + } + + for filter in network_filters.into_iter() { + // skip any bad filters + let filter_id = filter.get_id(); + if badfilter_ids.contains(&filter_id) || filter.is_badfilter() { + continue; + } + + // Redirects are independent of blocking behavior. + if filter.is_redirect() { + self_.add_filter(filter.clone(), NetworkFilterListId::Redirects); + } + type FilterId = NetworkFilterListId; + + let list_id: FilterId = if filter.is_csp() { + FilterId::Csp + } else if filter.is_removeparam() { + FilterId::RemoveParam + } else if filter.is_generic_hide() { + FilterId::GenericHide + } else if filter.is_exception() { + FilterId::Exceptions + } else if filter.is_important() { + FilterId::Importants + } else if filter.tag.is_some() && !filter.is_redirect() { + // `tag` + `redirect` is unsupported for now. + FilterId::TaggedFiltersAll + } else if (filter.is_redirect() && filter.also_block_redirect()) + || !filter.is_redirect() + { + FilterId::Filters + } else { + continue; + }; + + self_.add_filter(filter, list_id); + } + + self_ + } + + fn add_filter(&mut self, network_filter: NetworkFilter, list_id: NetworkFilterListId) { + self.lists[list_id as usize].filters.push(network_filter); + } +} + +impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for NetworkRulesBuilder { + type Output = WIPFlatVec<'a, NetworkFilterListBuilder, EngineFlatBuilder<'a>>; + fn serialize(value: Self, builder: &mut EngineFlatBuilder<'a>) -> Self::Output { + FlatSerialize::serialize(value.lists, builder) + } +} diff --git a/src/filters/filter_data_context.rs b/src/filters/filter_data_context.rs new file mode 100644 index 00000000..985ef1eb --- /dev/null +++ b/src/filters/filter_data_context.rs @@ -0,0 +1,31 @@ +use crate::flatbuffers::unsafe_tools::VerifiedFlatbufferMemory; +use crate::utils::Hash; +use std::collections::HashMap; + +#[cfg(feature = "single-thread")] +pub(crate) type FilterDataContextRef = std::rc::Rc; +#[cfg(not(feature = "single-thread"))] +pub(crate) type FilterDataContextRef = std::sync::Arc; + +// The struct is used to store the flatbuffer and supporting data +// for both network filter and cosmetic filters. +// Supposed to be stored via FilterDataContextRef to avoid copying the data. +pub(crate) struct FilterDataContext { + pub(crate) memory: VerifiedFlatbufferMemory, + pub(crate) unique_domains_hashes_map: HashMap, +} + +impl FilterDataContext { + pub(crate) fn new(memory: VerifiedFlatbufferMemory) -> FilterDataContextRef { + // Reconstruct the unique_domains_hashes_map from the flatbuffer data + let root = memory.root(); + let mut unique_domains_hashes_map: HashMap = HashMap::new(); + for (index, hash) in root.unique_domains_hashes().iter().enumerate() { + unique_domains_hashes_map.insert(hash, index as u32); + } + FilterDataContextRef::new(Self { + memory, + unique_domains_hashes_map, + }) + } +} diff --git a/src/filters/mod.rs b/src/filters/mod.rs index c1702a2d..9d3394f8 100644 --- a/src/filters/mod.rs +++ b/src/filters/mod.rs @@ -6,4 +6,21 @@ mod network_matchers; pub mod cosmetic; pub(crate) mod fb_builder; pub(crate) mod fb_network; +pub(crate) mod fb_network_builder; +pub(crate) mod filter_data_context; pub mod network; + +#[allow(unknown_lints)] +#[allow( + dead_code, + clippy::all, + unused_imports, + unsafe_code, + mismatched_lifetime_syntaxes +)] +#[path = "../flatbuffers/fb_network_filter_generated.rs"] +mod flat; + +pub(crate) mod flatbuffer_generated { + pub use super::flat::fb; +} diff --git a/src/flatbuffers/containers/flat_map.rs b/src/flatbuffers/containers/flat_map.rs new file mode 100644 index 00000000..aaabd7db --- /dev/null +++ b/src/flatbuffers/containers/flat_map.rs @@ -0,0 +1,78 @@ +use std::marker::PhantomData; + +use crate::flatbuffers::containers; +use containers::flat_serialize::{FlatBuilder, FlatMapBuilderOutput, FlatSerialize}; +use containers::sorted_index::SortedIndex; +use flatbuffers::{Follow, Vector}; + +pub(crate) struct FlatMapView<'a, I: Ord, V, Keys> +where + Keys: SortedIndex, + V: Follow<'a>, +{ + keys: Keys, + values: Vector<'a, V>, + _phantom: PhantomData, +} + +impl<'a, I: Ord + Copy, V, Keys> FlatMapView<'a, I, V, Keys> +where + Keys: SortedIndex + Clone, + V: flatbuffers::Follow<'a>, +{ + pub fn new(keys: Keys, values: Vector<'a, V>) -> Self { + debug_assert!(keys.len() == values.len()); + Self { + keys, + values, + _phantom: PhantomData, + } + } + + #[cfg(test)] + pub fn len(&self) -> usize { + self.keys.len() + } + + pub fn get(&self, key: I) -> Option<>::Inner> { + let index = self.keys.partition_point(|x| *x < key); + if index < self.keys.len() && self.keys.get(index) == key { + Some(self.values.get(index)) + } else { + None + } + } +} + +pub(crate) struct FlatMapBuilder; + +impl FlatMapBuilder { + pub fn finish<'a, I, V, B: FlatBuilder<'a>>( + value: std::collections::HashMap, + builder: &mut B, + ) -> FlatMapBuilderOutput<'a, I, V, B> + where + I: FlatSerialize<'a, B> + Ord, + V: FlatSerialize<'a, B>, + { + let mut entries: Vec<_> = value.into_iter().collect(); + entries.sort_unstable_by(|(a, _), (b, _)| a.cmp(b)); + + let mut indexes = Vec::with_capacity(entries.len()); + let mut values = Vec::with_capacity(entries.len()); + + for (key, value) in entries.into_iter() { + indexes.push(FlatSerialize::serialize(key, builder)); + values.push(FlatSerialize::serialize(value, builder)); + } + + FlatMapBuilderOutput { + keys: builder.raw_builder().create_vector(&indexes), + values: builder.raw_builder().create_vector(&values), + } + } +} + +#[cfg(test)] +#[path = "../../../tests/unit/flatbuffers/containers/flat_map.rs"] +mod unit_tests; diff --git a/src/flatbuffers/containers/flat_multimap.rs b/src/flatbuffers/containers/flat_multimap.rs index 99b6255f..6ccb28a7 100644 --- a/src/flatbuffers/containers/flat_multimap.rs +++ b/src/flatbuffers/containers/flat_multimap.rs @@ -129,6 +129,9 @@ impl FlatMultiMapBuilder { } } +pub(crate) type FlatMapStringView<'a, V> = + FlatMultiMapView<'a, &'a str, V, Vector<'a, flatbuffers::ForwardsUOffset<&'a str>>>; + #[cfg(test)] #[path = "../../../tests/unit/flatbuffers/containers/flat_multimap.rs"] mod unit_tests; diff --git a/src/flatbuffers/containers/mod.rs b/src/flatbuffers/containers/mod.rs index 50164fd2..20eb251d 100644 --- a/src/flatbuffers/containers/mod.rs +++ b/src/flatbuffers/containers/mod.rs @@ -1,3 +1,4 @@ +pub(crate) mod flat_map; pub(crate) mod flat_multimap; pub(crate) mod flat_serialize; pub(crate) mod flat_set; diff --git a/src/flatbuffers/fb_network_filter.fbs b/src/flatbuffers/fb_network_filter.fbs index 332a91ae..c85698c0 100644 --- a/src/flatbuffers/fb_network_filter.fbs +++ b/src/flatbuffers/fb_network_filter.fbs @@ -9,9 +9,9 @@ namespace fb; table NetworkFilter { mask: uint32; // NetworkFilterMask (network.rs) - // These arrays contain sorted (ascending) indices in the |unique_domains_hashes| - // instead of the hashes themselves. This approach saves memory, as there - // typically aren’t many unique hashes + /// These arrays contain sorted (ascending) indices in the |unique_domains_hashes| + /// instead of the hashes themselves. This approach saves memory, as there + /// typically aren’t many unique hashes opt_domains: [uint32]; opt_not_domains: [uint32]; @@ -29,17 +29,87 @@ table NetworkFilterList { filter_map_values: [NetworkFilter] (required); } -// A root type containing a serialized Engine. -// Currently it contains only some of engine fields: -// network filters and supporing struct. +/// A table to store the most host-specific cosmetic rules. +/// Although, the most common kind of rule (see hostname_inject_script_* +/// and hostname_hide_*) are stored separately to save memory. +table HostnameSpecificRules { + /// Simple hide exception rules, e.g. `example.com#@#.ad`. + /// The content is the rule's CSS selector. + unhide: [string]; + + /// Rules to except a scriptlet to inject along with any arguments, e.g. + /// `example.com#@#+js(acis, Number.isNan)`. + /// The content is the contents of the `+js(...)` syntax construct. + /// In practice, these rules are extremely rare in filter lists. + uninject_script: [string]; + + /// Procedural filters and/or filters with a [`CosmeticFilterAction`]. + /// Each is a [`ProceduralOrActionFilter`] struct serialized as JSON. + procedural_action: [string]; + + /// Exceptions for procedural filters and/or filters with a [`CosmeticFilterAction`]. + /// Each is a [`ProceduralOrActionFilter`] struct serialized as JSON. + procedural_action_exception: [string]; +} + +/// A table to store cosmetic filter rules (including supported structures). +table CosmeticFilters { + /// Rules that are just the CSS class of an element to be hidden on all sites, e.g. `##.ad`. + /// Stored as a flat_set. + simple_class_rules: [string] (required); + + /// Rules that are just the CSS id of an element to be hidden on all sites, e.g. `###banner`. + /// Stored as a flat_set. + simple_id_rules: [string] (required); + + /// Rules that are the CSS selector of an element to be hidden on all sites that do not fit + /// into any of the class or id buckets, e.g. `##a[href="https://malware.com"]` + /// Stored as a flat_set. + misc_generic_selectors: [string] (required); + + /// Rules that are the CSS selector of an element to be hidden on all sites, starting with a + /// class, e.g. `##.ad image`. + /// Stored as a multi-map `hostname_hash` => `selector` + complex_class_rules_index: [string] (required); + complex_class_rules_values: [string] (required); + + /// Rules that are the CSS selector of an element to be hidden on all sites, starting with an + /// id, e.g. `###banner > .text a`. + /// Stored as a multi-map `hostname_hash` => `selector` + complex_id_rules_index: [string] (required); + complex_id_rules_values: [string] (required); + + /// Simple hostname-specific hide rules, e.g. `example.com##.ad`. + /// Stored as a multi-map `hostname_hash` => `selector`. + /// Doesn't belong to HostnameSpecificRules for performance reasons. + hostname_hide_index: [uint64] (required); + hostname_hide_values: [string] (required); + + /// Rules with a scriptlet to inject along with any arguments, e.g. + /// `example.com##+js(acis, Number.isNan)`. + /// Stored as a multi-map `hostname_hash` => `script_plus_permission_byte` + /// The content is the contents of the `+js(...)` syntax construct plus + /// last byte stores permission to save memory. + /// Doesn't belong to HostnameSpecificRules for performance reasons. + hostname_inject_script_index: [uint64] (required); + hostname_inject_script_values: [string] (required); + + /// A map to store the other host-specific cosmetic rules. + hostname_index: [uint64] (required); + hostname_values: [HostnameSpecificRules] (required); +} + +/// A root type containing a serialized Engine. table Engine { - // Contains several NetworkFilterList matching to different kinds of lists. - // The indexes are matching NetworkFilterListId. - // The size must be NetworkFilterListId::Size. + /// Contains several NetworkFilterList matching to different kinds of lists. + /// The indexes are matching NetworkFilterListId. + /// The size must be NetworkFilterListId::Size. network_rules: [NetworkFilterList] (required); - // Contains hashes for opt_(not)_domains. See opt_domains for details. + /// Contains hashes for opt_(not)_domains. See opt_domains for details. unique_domains_hashes: [uint64] (required); + + cosmetic_filters: CosmeticFilters (required); } root_type Engine; diff --git a/src/flatbuffers/fb_network_filter_generated.rs b/src/flatbuffers/fb_network_filter_generated.rs index 5b1e7ece..35e98f26 100644 --- a/src/flatbuffers/fb_network_filter_generated.rs +++ b/src/flatbuffers/fb_network_filter_generated.rs @@ -118,6 +118,9 @@ pub mod fb { .unwrap() } } + /// These arrays contain sorted (ascending) indices in the |unique_domains_hashes| + /// instead of the hashes themselves. This approach saves memory, as there + /// typically aren’t many unique hashes #[inline] pub fn opt_domains(&self) -> Option> { // Safety: @@ -654,9 +657,1243 @@ pub mod fb { ) } } + pub enum HostnameSpecificRulesOffset {} + #[derive(Copy, Clone, PartialEq)] + + /// A table to store the most host-specific cosmetic rules. + /// Although, the most common kind of rule (see hostname_inject_script_* + /// and hostname_hide_*) are stored separately to save memory. + pub struct HostnameSpecificRules<'a> { + pub _tab: flatbuffers::Table<'a>, + } + + impl<'a> flatbuffers::Follow<'a> for HostnameSpecificRules<'a> { + type Inner = HostnameSpecificRules<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { + _tab: flatbuffers::Table::new(buf, loc), + } + } + } + + impl<'a> HostnameSpecificRules<'a> { + pub const VT_UNHIDE: flatbuffers::VOffsetT = 4; + pub const VT_UNINJECT_SCRIPT: flatbuffers::VOffsetT = 6; + pub const VT_PROCEDURAL_ACTION: flatbuffers::VOffsetT = 8; + pub const VT_PROCEDURAL_ACTION_EXCEPTION: flatbuffers::VOffsetT = 10; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + HostnameSpecificRules { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + args: &'args HostnameSpecificRulesArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = HostnameSpecificRulesBuilder::new(_fbb); + if let Some(x) = args.procedural_action_exception { + builder.add_procedural_action_exception(x); + } + if let Some(x) = args.procedural_action { + builder.add_procedural_action(x); + } + if let Some(x) = args.uninject_script { + builder.add_uninject_script(x); + } + if let Some(x) = args.unhide { + builder.add_unhide(x); + } + builder.finish() + } + + pub fn unpack(&self) -> HostnameSpecificRulesT { + let unhide = self + .unhide() + .map(|x| x.iter().map(|s| s.to_string()).collect()); + let uninject_script = self + .uninject_script() + .map(|x| x.iter().map(|s| s.to_string()).collect()); + let procedural_action = self + .procedural_action() + .map(|x| x.iter().map(|s| s.to_string()).collect()); + let procedural_action_exception = self + .procedural_action_exception() + .map(|x| x.iter().map(|s| s.to_string()).collect()); + HostnameSpecificRulesT { + unhide, + uninject_script, + procedural_action, + procedural_action_exception, + } + } + + /// Simple hide exception rules, e.g. `example.com#@#.ad`. + /// The content is the rule's CSS selector. + #[inline] + pub fn unhide( + &self, + ) -> Option>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>, + >>(HostnameSpecificRules::VT_UNHIDE, None) + } + } + /// Rules to except a scriptlet to inject along with any arguments, e.g. + /// `example.com#@#+js(acis, Number.isNan)`. + /// The content is the contents of the `+js(...)` syntax construct. + /// In practice, these rules are extremely rare in filter lists. + #[inline] + pub fn uninject_script( + &self, + ) -> Option>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>, + >>(HostnameSpecificRules::VT_UNINJECT_SCRIPT, None) + } + } + /// Procedural filters and/or filters with a [`CosmeticFilterAction`]. + /// Each is a [`ProceduralOrActionFilter`] struct serialized as JSON. + #[inline] + pub fn procedural_action( + &self, + ) -> Option>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>, + >>(HostnameSpecificRules::VT_PROCEDURAL_ACTION, None) + } + } + /// Exceptions for procedural filters and/or filters with a [`CosmeticFilterAction`]. + /// Each is a [`ProceduralOrActionFilter`] struct serialized as JSON. + #[inline] + pub fn procedural_action_exception( + &self, + ) -> Option>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>, + >>( + HostnameSpecificRules::VT_PROCEDURAL_ACTION_EXCEPTION, None + ) + } + } + } + + impl flatbuffers::Verifiable for HostnameSpecificRules<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::>, + >>("unhide", Self::VT_UNHIDE, false)? + .visit_field::>, + >>("uninject_script", Self::VT_UNINJECT_SCRIPT, false)? + .visit_field::>, + >>("procedural_action", Self::VT_PROCEDURAL_ACTION, false)? + .visit_field::>, + >>( + "procedural_action_exception", + Self::VT_PROCEDURAL_ACTION_EXCEPTION, + false, + )? + .finish(); + Ok(()) + } + } + pub struct HostnameSpecificRulesArgs<'a> { + pub unhide: Option< + flatbuffers::WIPOffset>>, + >, + pub uninject_script: Option< + flatbuffers::WIPOffset>>, + >, + pub procedural_action: Option< + flatbuffers::WIPOffset>>, + >, + pub procedural_action_exception: Option< + flatbuffers::WIPOffset>>, + >, + } + impl<'a> Default for HostnameSpecificRulesArgs<'a> { + #[inline] + fn default() -> Self { + HostnameSpecificRulesArgs { + unhide: None, + uninject_script: None, + procedural_action: None, + procedural_action_exception: None, + } + } + } + + pub struct HostnameSpecificRulesBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> HostnameSpecificRulesBuilder<'a, 'b, A> { + #[inline] + pub fn add_unhide( + &mut self, + unhide: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + HostnameSpecificRules::VT_UNHIDE, + unhide, + ); + } + #[inline] + pub fn add_uninject_script( + &mut self, + uninject_script: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + HostnameSpecificRules::VT_UNINJECT_SCRIPT, + uninject_script, + ); + } + #[inline] + pub fn add_procedural_action( + &mut self, + procedural_action: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + HostnameSpecificRules::VT_PROCEDURAL_ACTION, + procedural_action, + ); + } + #[inline] + pub fn add_procedural_action_exception( + &mut self, + procedural_action_exception: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + HostnameSpecificRules::VT_PROCEDURAL_ACTION_EXCEPTION, + procedural_action_exception, + ); + } + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> HostnameSpecificRulesBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + HostnameSpecificRulesBuilder { + fbb_: _fbb, + start_: start, + } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for HostnameSpecificRules<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("HostnameSpecificRules"); + ds.field("unhide", &self.unhide()); + ds.field("uninject_script", &self.uninject_script()); + ds.field("procedural_action", &self.procedural_action()); + ds.field( + "procedural_action_exception", + &self.procedural_action_exception(), + ); + ds.finish() + } + } + #[non_exhaustive] + #[derive(Debug, Clone, PartialEq)] + pub struct HostnameSpecificRulesT { + pub unhide: Option>, + pub uninject_script: Option>, + pub procedural_action: Option>, + pub procedural_action_exception: Option>, + } + impl Default for HostnameSpecificRulesT { + fn default() -> Self { + Self { + unhide: None, + uninject_script: None, + procedural_action: None, + procedural_action_exception: None, + } + } + } + impl HostnameSpecificRulesT { + pub fn pack<'b, A: flatbuffers::Allocator + 'b>( + &self, + _fbb: &mut flatbuffers::FlatBufferBuilder<'b, A>, + ) -> flatbuffers::WIPOffset> { + let unhide = self.unhide.as_ref().map(|x| { + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let uninject_script = self.uninject_script.as_ref().map(|x| { + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let procedural_action = self.procedural_action.as_ref().map(|x| { + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let procedural_action_exception = self.procedural_action_exception.as_ref().map(|x| { + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + HostnameSpecificRules::create( + _fbb, + &HostnameSpecificRulesArgs { + unhide, + uninject_script, + procedural_action, + procedural_action_exception, + }, + ) + } + } + pub enum CosmeticFiltersOffset {} + #[derive(Copy, Clone, PartialEq)] + + /// A table to store cosmetic filter rules (including supported structures). + pub struct CosmeticFilters<'a> { + pub _tab: flatbuffers::Table<'a>, + } + + impl<'a> flatbuffers::Follow<'a> for CosmeticFilters<'a> { + type Inner = CosmeticFilters<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { + _tab: flatbuffers::Table::new(buf, loc), + } + } + } + + impl<'a> CosmeticFilters<'a> { + pub const VT_SIMPLE_CLASS_RULES: flatbuffers::VOffsetT = 4; + pub const VT_SIMPLE_ID_RULES: flatbuffers::VOffsetT = 6; + pub const VT_MISC_GENERIC_SELECTORS: flatbuffers::VOffsetT = 8; + pub const VT_COMPLEX_CLASS_RULES_INDEX: flatbuffers::VOffsetT = 10; + pub const VT_COMPLEX_CLASS_RULES_VALUES: flatbuffers::VOffsetT = 12; + pub const VT_COMPLEX_ID_RULES_INDEX: flatbuffers::VOffsetT = 14; + pub const VT_COMPLEX_ID_RULES_VALUES: flatbuffers::VOffsetT = 16; + pub const VT_HOSTNAME_HIDE_INDEX: flatbuffers::VOffsetT = 18; + pub const VT_HOSTNAME_HIDE_VALUES: flatbuffers::VOffsetT = 20; + pub const VT_HOSTNAME_INJECT_SCRIPT_INDEX: flatbuffers::VOffsetT = 22; + pub const VT_HOSTNAME_INJECT_SCRIPT_VALUES: flatbuffers::VOffsetT = 24; + pub const VT_HOSTNAME_INDEX: flatbuffers::VOffsetT = 26; + pub const VT_HOSTNAME_VALUES: flatbuffers::VOffsetT = 28; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + CosmeticFilters { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + args: &'args CosmeticFiltersArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = CosmeticFiltersBuilder::new(_fbb); + if let Some(x) = args.hostname_values { + builder.add_hostname_values(x); + } + if let Some(x) = args.hostname_index { + builder.add_hostname_index(x); + } + if let Some(x) = args.hostname_inject_script_values { + builder.add_hostname_inject_script_values(x); + } + if let Some(x) = args.hostname_inject_script_index { + builder.add_hostname_inject_script_index(x); + } + if let Some(x) = args.hostname_hide_values { + builder.add_hostname_hide_values(x); + } + if let Some(x) = args.hostname_hide_index { + builder.add_hostname_hide_index(x); + } + if let Some(x) = args.complex_id_rules_values { + builder.add_complex_id_rules_values(x); + } + if let Some(x) = args.complex_id_rules_index { + builder.add_complex_id_rules_index(x); + } + if let Some(x) = args.complex_class_rules_values { + builder.add_complex_class_rules_values(x); + } + if let Some(x) = args.complex_class_rules_index { + builder.add_complex_class_rules_index(x); + } + if let Some(x) = args.misc_generic_selectors { + builder.add_misc_generic_selectors(x); + } + if let Some(x) = args.simple_id_rules { + builder.add_simple_id_rules(x); + } + if let Some(x) = args.simple_class_rules { + builder.add_simple_class_rules(x); + } + builder.finish() + } + + pub fn unpack(&self) -> CosmeticFiltersT { + let simple_class_rules = { + let x = self.simple_class_rules(); + x.iter().map(|s| s.to_string()).collect() + }; + let simple_id_rules = { + let x = self.simple_id_rules(); + x.iter().map(|s| s.to_string()).collect() + }; + let misc_generic_selectors = { + let x = self.misc_generic_selectors(); + x.iter().map(|s| s.to_string()).collect() + }; + let complex_class_rules_index = { + let x = self.complex_class_rules_index(); + x.iter().map(|s| s.to_string()).collect() + }; + let complex_class_rules_values = { + let x = self.complex_class_rules_values(); + x.iter().map(|s| s.to_string()).collect() + }; + let complex_id_rules_index = { + let x = self.complex_id_rules_index(); + x.iter().map(|s| s.to_string()).collect() + }; + let complex_id_rules_values = { + let x = self.complex_id_rules_values(); + x.iter().map(|s| s.to_string()).collect() + }; + let hostname_hide_index = { + let x = self.hostname_hide_index(); + x.into_iter().collect() + }; + let hostname_hide_values = { + let x = self.hostname_hide_values(); + x.iter().map(|s| s.to_string()).collect() + }; + let hostname_inject_script_index = { + let x = self.hostname_inject_script_index(); + x.into_iter().collect() + }; + let hostname_inject_script_values = { + let x = self.hostname_inject_script_values(); + x.iter().map(|s| s.to_string()).collect() + }; + let hostname_index = { + let x = self.hostname_index(); + x.into_iter().collect() + }; + let hostname_values = { + let x = self.hostname_values(); + x.iter().map(|t| t.unpack()).collect() + }; + CosmeticFiltersT { + simple_class_rules, + simple_id_rules, + misc_generic_selectors, + complex_class_rules_index, + complex_class_rules_values, + complex_id_rules_index, + complex_id_rules_values, + hostname_hide_index, + hostname_hide_values, + hostname_inject_script_index, + hostname_inject_script_values, + hostname_index, + hostname_values, + } + } + + /// Rules that are just the CSS class of an element to be hidden on all sites, e.g. `##.ad`. + /// Stored as a flat_set. + #[inline] + pub fn simple_class_rules( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(CosmeticFilters::VT_SIMPLE_CLASS_RULES, None) + .unwrap() + } + } + /// Rules that are just the CSS id of an element to be hidden on all sites, e.g. `###banner`. + /// Stored as a flat_set. + #[inline] + pub fn simple_id_rules( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(CosmeticFilters::VT_SIMPLE_ID_RULES, None) + .unwrap() + } + } + /// Rules that are the CSS selector of an element to be hidden on all sites that do not fit + /// into any of the class or id buckets, e.g. `##a[href="https://malware.com"]` + /// Stored as a flat_set. + #[inline] + pub fn misc_generic_selectors( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(CosmeticFilters::VT_MISC_GENERIC_SELECTORS, None) + .unwrap() + } + } + /// Rules that are the CSS selector of an element to be hidden on all sites, starting with a + /// class, e.g. `##.ad image`. + /// Stored as a multi-map `hostname_hash` => `selector` + #[inline] + pub fn complex_class_rules_index( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(CosmeticFilters::VT_COMPLEX_CLASS_RULES_INDEX, None) + .unwrap() + } + } + #[inline] + pub fn complex_class_rules_values( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(CosmeticFilters::VT_COMPLEX_CLASS_RULES_VALUES, None) + .unwrap() + } + } + /// Rules that are the CSS selector of an element to be hidden on all sites, starting with an + /// id, e.g. `###banner > .text a`. + /// Stored as a multi-map `hostname_hash` => `selector` + #[inline] + pub fn complex_id_rules_index( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(CosmeticFilters::VT_COMPLEX_ID_RULES_INDEX, None) + .unwrap() + } + } + #[inline] + pub fn complex_id_rules_values( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(CosmeticFilters::VT_COMPLEX_ID_RULES_VALUES, None) + .unwrap() + } + } + /// Simple hostname-specific hide rules, e.g. `example.com##.ad`. + /// Stored as a multi-map `hostname_hash` => `selector`. + /// Doesn't belong to HostnameSpecificRules for performance reasons. + #[inline] + pub fn hostname_hide_index(&self) -> flatbuffers::Vector<'a, u64> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>>( + CosmeticFilters::VT_HOSTNAME_HIDE_INDEX, + None, + ) + .unwrap() + } + } + #[inline] + pub fn hostname_hide_values( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(CosmeticFilters::VT_HOSTNAME_HIDE_VALUES, None) + .unwrap() + } + } + /// Rules with a scriptlet to inject along with any arguments, e.g. + /// `example.com##+js(acis, Number.isNan)`. + /// Stored as a multi-map `hostname_hash` => `script_plus_permission_byte` + /// The content is the contents of the `+js(...)` syntax construct plus + /// last byte stores permission to save memory. + /// Doesn't belong to HostnameSpecificRules for performance reasons. + #[inline] + pub fn hostname_inject_script_index(&self) -> flatbuffers::Vector<'a, u64> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>>( + CosmeticFilters::VT_HOSTNAME_INJECT_SCRIPT_INDEX, + None, + ) + .unwrap() + } + } + #[inline] + pub fn hostname_inject_script_values( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(CosmeticFilters::VT_HOSTNAME_INJECT_SCRIPT_VALUES, None) + .unwrap() + } + } + /// A map to store the other host-specific cosmetic rules. + #[inline] + pub fn hostname_index(&self) -> flatbuffers::Vector<'a, u64> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>>( + CosmeticFilters::VT_HOSTNAME_INDEX, + None, + ) + .unwrap() + } + } + #[inline] + pub fn hostname_values( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>> + { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::, + >, + >>(CosmeticFilters::VT_HOSTNAME_VALUES, None) + .unwrap() + } + } + } + + impl flatbuffers::Verifiable for CosmeticFilters<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::>, + >>("simple_class_rules", Self::VT_SIMPLE_CLASS_RULES, true)? + .visit_field::>, + >>("simple_id_rules", Self::VT_SIMPLE_ID_RULES, true)? + .visit_field::>, + >>( + "misc_generic_selectors", + Self::VT_MISC_GENERIC_SELECTORS, + true, + )? + .visit_field::>, + >>( + "complex_class_rules_index", + Self::VT_COMPLEX_CLASS_RULES_INDEX, + true, + )? + .visit_field::>, + >>( + "complex_class_rules_values", + Self::VT_COMPLEX_CLASS_RULES_VALUES, + true, + )? + .visit_field::>, + >>( + "complex_id_rules_index", + Self::VT_COMPLEX_ID_RULES_INDEX, + true, + )? + .visit_field::>, + >>( + "complex_id_rules_values", + Self::VT_COMPLEX_ID_RULES_VALUES, + true, + )? + .visit_field::>>( + "hostname_hide_index", + Self::VT_HOSTNAME_HIDE_INDEX, + true, + )? + .visit_field::>, + >>("hostname_hide_values", Self::VT_HOSTNAME_HIDE_VALUES, true)? + .visit_field::>>( + "hostname_inject_script_index", + Self::VT_HOSTNAME_INJECT_SCRIPT_INDEX, + true, + )? + .visit_field::>, + >>( + "hostname_inject_script_values", + Self::VT_HOSTNAME_INJECT_SCRIPT_VALUES, + true, + )? + .visit_field::>>( + "hostname_index", + Self::VT_HOSTNAME_INDEX, + true, + )? + .visit_field::>, + >>("hostname_values", Self::VT_HOSTNAME_VALUES, true)? + .finish(); + Ok(()) + } + } + pub struct CosmeticFiltersArgs<'a> { + pub simple_class_rules: Option< + flatbuffers::WIPOffset>>, + >, + pub simple_id_rules: Option< + flatbuffers::WIPOffset>>, + >, + pub misc_generic_selectors: Option< + flatbuffers::WIPOffset>>, + >, + pub complex_class_rules_index: Option< + flatbuffers::WIPOffset>>, + >, + pub complex_class_rules_values: Option< + flatbuffers::WIPOffset>>, + >, + pub complex_id_rules_index: Option< + flatbuffers::WIPOffset>>, + >, + pub complex_id_rules_values: Option< + flatbuffers::WIPOffset>>, + >, + pub hostname_hide_index: Option>>, + pub hostname_hide_values: Option< + flatbuffers::WIPOffset>>, + >, + pub hostname_inject_script_index: + Option>>, + pub hostname_inject_script_values: Option< + flatbuffers::WIPOffset>>, + >, + pub hostname_index: Option>>, + pub hostname_values: Option< + flatbuffers::WIPOffset< + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>>, + >, + >, + } + impl<'a> Default for CosmeticFiltersArgs<'a> { + #[inline] + fn default() -> Self { + CosmeticFiltersArgs { + simple_class_rules: None, // required field + simple_id_rules: None, // required field + misc_generic_selectors: None, // required field + complex_class_rules_index: None, // required field + complex_class_rules_values: None, // required field + complex_id_rules_index: None, // required field + complex_id_rules_values: None, // required field + hostname_hide_index: None, // required field + hostname_hide_values: None, // required field + hostname_inject_script_index: None, // required field + hostname_inject_script_values: None, // required field + hostname_index: None, // required field + hostname_values: None, // required field + } + } + } + + pub struct CosmeticFiltersBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> CosmeticFiltersBuilder<'a, 'b, A> { + #[inline] + pub fn add_simple_class_rules( + &mut self, + simple_class_rules: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_SIMPLE_CLASS_RULES, + simple_class_rules, + ); + } + #[inline] + pub fn add_simple_id_rules( + &mut self, + simple_id_rules: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_SIMPLE_ID_RULES, + simple_id_rules, + ); + } + #[inline] + pub fn add_misc_generic_selectors( + &mut self, + misc_generic_selectors: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_MISC_GENERIC_SELECTORS, + misc_generic_selectors, + ); + } + #[inline] + pub fn add_complex_class_rules_index( + &mut self, + complex_class_rules_index: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_COMPLEX_CLASS_RULES_INDEX, + complex_class_rules_index, + ); + } + #[inline] + pub fn add_complex_class_rules_values( + &mut self, + complex_class_rules_values: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_COMPLEX_CLASS_RULES_VALUES, + complex_class_rules_values, + ); + } + #[inline] + pub fn add_complex_id_rules_index( + &mut self, + complex_id_rules_index: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_COMPLEX_ID_RULES_INDEX, + complex_id_rules_index, + ); + } + #[inline] + pub fn add_complex_id_rules_values( + &mut self, + complex_id_rules_values: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_COMPLEX_ID_RULES_VALUES, + complex_id_rules_values, + ); + } + #[inline] + pub fn add_hostname_hide_index( + &mut self, + hostname_hide_index: flatbuffers::WIPOffset>, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_HOSTNAME_HIDE_INDEX, + hostname_hide_index, + ); + } + #[inline] + pub fn add_hostname_hide_values( + &mut self, + hostname_hide_values: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_HOSTNAME_HIDE_VALUES, + hostname_hide_values, + ); + } + #[inline] + pub fn add_hostname_inject_script_index( + &mut self, + hostname_inject_script_index: flatbuffers::WIPOffset>, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_HOSTNAME_INJECT_SCRIPT_INDEX, + hostname_inject_script_index, + ); + } + #[inline] + pub fn add_hostname_inject_script_values( + &mut self, + hostname_inject_script_values: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, + >, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_HOSTNAME_INJECT_SCRIPT_VALUES, + hostname_inject_script_values, + ); + } + #[inline] + pub fn add_hostname_index( + &mut self, + hostname_index: flatbuffers::WIPOffset>, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_HOSTNAME_INDEX, + hostname_index, + ); + } + #[inline] + pub fn add_hostname_values( + &mut self, + hostname_values: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset>>, + >, + ) { + self.fbb_.push_slot_always::>( + CosmeticFilters::VT_HOSTNAME_VALUES, + hostname_values, + ); + } + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> CosmeticFiltersBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + CosmeticFiltersBuilder { + fbb_: _fbb, + start_: start, + } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + self.fbb_.required( + o, + CosmeticFilters::VT_SIMPLE_CLASS_RULES, + "simple_class_rules", + ); + self.fbb_ + .required(o, CosmeticFilters::VT_SIMPLE_ID_RULES, "simple_id_rules"); + self.fbb_.required( + o, + CosmeticFilters::VT_MISC_GENERIC_SELECTORS, + "misc_generic_selectors", + ); + self.fbb_.required( + o, + CosmeticFilters::VT_COMPLEX_CLASS_RULES_INDEX, + "complex_class_rules_index", + ); + self.fbb_.required( + o, + CosmeticFilters::VT_COMPLEX_CLASS_RULES_VALUES, + "complex_class_rules_values", + ); + self.fbb_.required( + o, + CosmeticFilters::VT_COMPLEX_ID_RULES_INDEX, + "complex_id_rules_index", + ); + self.fbb_.required( + o, + CosmeticFilters::VT_COMPLEX_ID_RULES_VALUES, + "complex_id_rules_values", + ); + self.fbb_.required( + o, + CosmeticFilters::VT_HOSTNAME_HIDE_INDEX, + "hostname_hide_index", + ); + self.fbb_.required( + o, + CosmeticFilters::VT_HOSTNAME_HIDE_VALUES, + "hostname_hide_values", + ); + self.fbb_.required( + o, + CosmeticFilters::VT_HOSTNAME_INJECT_SCRIPT_INDEX, + "hostname_inject_script_index", + ); + self.fbb_.required( + o, + CosmeticFilters::VT_HOSTNAME_INJECT_SCRIPT_VALUES, + "hostname_inject_script_values", + ); + self.fbb_ + .required(o, CosmeticFilters::VT_HOSTNAME_INDEX, "hostname_index"); + self.fbb_ + .required(o, CosmeticFilters::VT_HOSTNAME_VALUES, "hostname_values"); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for CosmeticFilters<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("CosmeticFilters"); + ds.field("simple_class_rules", &self.simple_class_rules()); + ds.field("simple_id_rules", &self.simple_id_rules()); + ds.field("misc_generic_selectors", &self.misc_generic_selectors()); + ds.field( + "complex_class_rules_index", + &self.complex_class_rules_index(), + ); + ds.field( + "complex_class_rules_values", + &self.complex_class_rules_values(), + ); + ds.field("complex_id_rules_index", &self.complex_id_rules_index()); + ds.field("complex_id_rules_values", &self.complex_id_rules_values()); + ds.field("hostname_hide_index", &self.hostname_hide_index()); + ds.field("hostname_hide_values", &self.hostname_hide_values()); + ds.field( + "hostname_inject_script_index", + &self.hostname_inject_script_index(), + ); + ds.field( + "hostname_inject_script_values", + &self.hostname_inject_script_values(), + ); + ds.field("hostname_index", &self.hostname_index()); + ds.field("hostname_values", &self.hostname_values()); + ds.finish() + } + } + #[non_exhaustive] + #[derive(Debug, Clone, PartialEq)] + pub struct CosmeticFiltersT { + pub simple_class_rules: Vec, + pub simple_id_rules: Vec, + pub misc_generic_selectors: Vec, + pub complex_class_rules_index: Vec, + pub complex_class_rules_values: Vec, + pub complex_id_rules_index: Vec, + pub complex_id_rules_values: Vec, + pub hostname_hide_index: Vec, + pub hostname_hide_values: Vec, + pub hostname_inject_script_index: Vec, + pub hostname_inject_script_values: Vec, + pub hostname_index: Vec, + pub hostname_values: Vec, + } + impl Default for CosmeticFiltersT { + fn default() -> Self { + Self { + simple_class_rules: Default::default(), + simple_id_rules: Default::default(), + misc_generic_selectors: Default::default(), + complex_class_rules_index: Default::default(), + complex_class_rules_values: Default::default(), + complex_id_rules_index: Default::default(), + complex_id_rules_values: Default::default(), + hostname_hide_index: Default::default(), + hostname_hide_values: Default::default(), + hostname_inject_script_index: Default::default(), + hostname_inject_script_values: Default::default(), + hostname_index: Default::default(), + hostname_values: Default::default(), + } + } + } + impl CosmeticFiltersT { + pub fn pack<'b, A: flatbuffers::Allocator + 'b>( + &self, + _fbb: &mut flatbuffers::FlatBufferBuilder<'b, A>, + ) -> flatbuffers::WIPOffset> { + let simple_class_rules = Some({ + let x = &self.simple_class_rules; + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let simple_id_rules = Some({ + let x = &self.simple_id_rules; + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let misc_generic_selectors = Some({ + let x = &self.misc_generic_selectors; + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let complex_class_rules_index = Some({ + let x = &self.complex_class_rules_index; + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let complex_class_rules_values = Some({ + let x = &self.complex_class_rules_values; + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let complex_id_rules_index = Some({ + let x = &self.complex_id_rules_index; + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let complex_id_rules_values = Some({ + let x = &self.complex_id_rules_values; + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let hostname_hide_index = Some({ + let x = &self.hostname_hide_index; + _fbb.create_vector(x) + }); + let hostname_hide_values = Some({ + let x = &self.hostname_hide_values; + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let hostname_inject_script_index = Some({ + let x = &self.hostname_inject_script_index; + _fbb.create_vector(x) + }); + let hostname_inject_script_values = Some({ + let x = &self.hostname_inject_script_values; + let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); + _fbb.create_vector(&w) + }); + let hostname_index = Some({ + let x = &self.hostname_index; + _fbb.create_vector(x) + }); + let hostname_values = Some({ + let x = &self.hostname_values; + let w: Vec<_> = x.iter().map(|t| t.pack(_fbb)).collect(); + _fbb.create_vector(&w) + }); + CosmeticFilters::create( + _fbb, + &CosmeticFiltersArgs { + simple_class_rules, + simple_id_rules, + misc_generic_selectors, + complex_class_rules_index, + complex_class_rules_values, + complex_id_rules_index, + complex_id_rules_values, + hostname_hide_index, + hostname_hide_values, + hostname_inject_script_index, + hostname_inject_script_values, + hostname_index, + hostname_values, + }, + ) + } + } pub enum EngineOffset {} #[derive(Copy, Clone, PartialEq)] + /// A root type containing a serialized Engine. pub struct Engine<'a> { pub _tab: flatbuffers::Table<'a>, } @@ -674,6 +1911,7 @@ pub mod fb { impl<'a> Engine<'a> { pub const VT_NETWORK_RULES: flatbuffers::VOffsetT = 4; pub const VT_UNIQUE_DOMAINS_HASHES: flatbuffers::VOffsetT = 6; + pub const VT_COSMETIC_FILTERS: flatbuffers::VOffsetT = 8; #[inline] pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { @@ -690,6 +1928,9 @@ pub mod fb { args: &'args EngineArgs<'args>, ) -> flatbuffers::WIPOffset> { let mut builder = EngineBuilder::new(_fbb); + if let Some(x) = args.cosmetic_filters { + builder.add_cosmetic_filters(x); + } if let Some(x) = args.unique_domains_hashes { builder.add_unique_domains_hashes(x); } @@ -708,12 +1949,20 @@ pub mod fb { let x = self.unique_domains_hashes(); x.into_iter().collect() }; + let cosmetic_filters = { + let x = self.cosmetic_filters(); + Box::new(x.unpack()) + }; EngineT { network_rules, unique_domains_hashes, + cosmetic_filters, } } + /// Contains several NetworkFilterList matching to different kinds of lists. + /// The indexes are matching NetworkFilterListId. + /// The size must be NetworkFilterListId::Size. #[inline] pub fn network_rules( &self, @@ -729,6 +1978,7 @@ pub mod fb { .unwrap() } } + /// Contains hashes for opt_(not)_domains. See opt_domains for details. #[inline] pub fn unique_domains_hashes(&self) -> flatbuffers::Vector<'a, u64> { // Safety: @@ -743,6 +1993,20 @@ pub mod fb { .unwrap() } } + #[inline] + pub fn cosmetic_filters(&self) -> CosmeticFilters<'a> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>( + Engine::VT_COSMETIC_FILTERS, + None, + ) + .unwrap() + } + } } impl flatbuffers::Verifiable for Engine<'_> { @@ -761,6 +2025,11 @@ pub mod fb { Self::VT_UNIQUE_DOMAINS_HASHES, true, )? + .visit_field::>( + "cosmetic_filters", + Self::VT_COSMETIC_FILTERS, + true, + )? .finish(); Ok(()) } @@ -772,6 +2041,7 @@ pub mod fb { >, >, pub unique_domains_hashes: Option>>, + pub cosmetic_filters: Option>>, } impl<'a> Default for EngineArgs<'a> { #[inline] @@ -779,6 +2049,7 @@ pub mod fb { EngineArgs { network_rules: None, // required field unique_domains_hashes: None, // required field + cosmetic_filters: None, // required field } } } @@ -811,6 +2082,17 @@ pub mod fb { ); } #[inline] + pub fn add_cosmetic_filters( + &mut self, + cosmetic_filters: flatbuffers::WIPOffset>, + ) { + self.fbb_ + .push_slot_always::>( + Engine::VT_COSMETIC_FILTERS, + cosmetic_filters, + ); + } + #[inline] pub fn new( _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, ) -> EngineBuilder<'a, 'b, A> { @@ -827,6 +2109,8 @@ pub mod fb { .required(o, Engine::VT_NETWORK_RULES, "network_rules"); self.fbb_ .required(o, Engine::VT_UNIQUE_DOMAINS_HASHES, "unique_domains_hashes"); + self.fbb_ + .required(o, Engine::VT_COSMETIC_FILTERS, "cosmetic_filters"); flatbuffers::WIPOffset::new(o.value()) } } @@ -836,6 +2120,7 @@ pub mod fb { let mut ds = f.debug_struct("Engine"); ds.field("network_rules", &self.network_rules()); ds.field("unique_domains_hashes", &self.unique_domains_hashes()); + ds.field("cosmetic_filters", &self.cosmetic_filters()); ds.finish() } } @@ -844,12 +2129,14 @@ pub mod fb { pub struct EngineT { pub network_rules: Vec, pub unique_domains_hashes: Vec, + pub cosmetic_filters: Box, } impl Default for EngineT { fn default() -> Self { Self { network_rules: Default::default(), unique_domains_hashes: Default::default(), + cosmetic_filters: Default::default(), } } } @@ -867,11 +2154,16 @@ pub mod fb { let x = &self.unique_domains_hashes; _fbb.create_vector(x) }); + let cosmetic_filters = Some({ + let x = &self.cosmetic_filters; + x.pack(_fbb) + }); Engine::create( _fbb, &EngineArgs { network_rules, unique_domains_hashes, + cosmetic_filters, }, ) } diff --git a/src/flatbuffers/unsafe_tools.rs b/src/flatbuffers/unsafe_tools.rs index 15156546..3ce79137 100644 --- a/src/flatbuffers/unsafe_tools.rs +++ b/src/flatbuffers/unsafe_tools.rs @@ -1,6 +1,6 @@ //! Unsafe utility functions for working with flatbuffers and other low-level operations. -use crate::filters::fb_network::flat::fb; +use crate::filters::flatbuffer_generated::fb; // Minimum alignment for the beginning of the flatbuffer data. const MIN_ALIGNMENT: usize = 8; @@ -48,8 +48,8 @@ pub(crate) struct VerifiedFlatbufferMemory { } impl VerifiedFlatbufferMemory { - pub(crate) fn from_raw(data: Vec) -> Result { - let memory = Self::from_vec(data); + pub(crate) fn from_raw(data: &[u8]) -> Result { + let memory = Self::from_slice(data); // Verify that the data is a valid flatbuffer. let _ = fb::root_as_engine(memory.data())?; @@ -60,23 +60,27 @@ impl VerifiedFlatbufferMemory { // Creates a new VerifiedFlatbufferMemory from a builder. // Skip the verification, the builder must contains a valid FilterList. pub(crate) fn from_builder(builder: &flatbuffers::FlatBufferBuilder<'_>) -> Self { - let raw_data = builder.finished_data().to_vec(); - Self::from_vec(raw_data) + Self::from_slice(builder.finished_data()) } // Properly align the buffer to MIN_ALIGNMENT bytes. - pub(crate) fn from_vec(mut vec: Vec) -> Self { + pub(crate) fn from_slice(data: &[u8]) -> Self { + let mut vec = Vec::with_capacity(data.len() + MIN_ALIGNMENT); let shift = vec.as_ptr() as usize % MIN_ALIGNMENT; + let start = if shift == 0 { 0 } else { - vec.reserve(vec.len() + MIN_ALIGNMENT); // vec.as_ptr() is changed let shift = vec.as_ptr() as usize % MIN_ALIGNMENT; let padding = MIN_ALIGNMENT - shift; + assert!(vec.capacity() >= padding); vec.splice(0..0, vec![0u8; padding]); padding }; + vec.extend_from_slice(data); + assert!((vec.as_ptr() as usize + start) % MIN_ALIGNMENT == 0); + let memory = Self { raw_data: vec, start, diff --git a/src/lib.rs b/src/lib.rs index d6327d2d..2e9c84fa 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,6 +20,8 @@ pub mod blocker; #[cfg(feature = "content-blocking")] pub mod content_blocking; pub mod cosmetic_filter_cache; +mod cosmetic_filter_cache_builder; +mod cosmetic_filter_utils; mod data_format; mod engine; pub mod filters; diff --git a/src/network_filter_list.rs b/src/network_filter_list.rs index b4d1c761..2b81ba4d 100644 --- a/src/network_filter_list.rs +++ b/src/network_filter_list.rs @@ -4,8 +4,9 @@ use std::{collections::HashMap, collections::HashSet, fmt}; use flatbuffers::ForwardsUOffset; -use crate::filters::fb_network::flat::fb; -use crate::filters::fb_network::{FilterDataContext, FlatNetworkFilter}; +use crate::filters::fb_network::FlatNetworkFilter; +use crate::filters::filter_data_context::FilterDataContext; +use crate::filters::flatbuffer_generated::fb; use crate::filters::network::{ NetworkFilter, NetworkFilterMask, NetworkFilterMaskHelper, NetworkMatchable, }; @@ -50,12 +51,6 @@ impl NetworkFilterMaskHelper for CheckResult { } } -#[derive(Debug, Clone)] -pub enum NetworkFilterListParsingError { - InvalidFlatbuffer(flatbuffers::InvalidFlatbuffer), - UniqueDomainsOutOfBounds(usize), -} - /// Internal structure to keep track of a collection of network filters. pub(crate) struct NetworkFilterList<'a> { pub(crate) list: fb::NetworkFilterList<'a>, diff --git a/src/resources/mod.rs b/src/resources/mod.rs index 7fda7af2..eb7c2321 100644 --- a/src/resources/mod.rs +++ b/src/resources/mod.rs @@ -101,6 +101,10 @@ impl PermissionMask { Self(bits) } + pub fn to_bits(&self) -> u8 { + self.0 + } + /// Can `filter_mask` authorize injecting a resource requiring `self` permissions? pub fn is_injectable_by(&self, filter_mask: PermissionMask) -> bool { // For any particular bit index, the scriptlet is injectable if: diff --git a/tests/legacy_harness.rs b/tests/legacy_harness.rs index a11d5449..37cf5be8 100644 --- a/tests/legacy_harness.rs +++ b/tests/legacy_harness.rs @@ -330,7 +330,7 @@ mod legacy_check_match { let mut engine_deserialized = Engine::default(); // second empty engine_deserialized.use_tags(tags); { - let engine_serialized = engine.serialize().unwrap(); + let engine_serialized = engine.serialize().to_vec(); engine_deserialized.deserialize(&engine_serialized).unwrap(); // override from serialized copy } @@ -404,7 +404,7 @@ mod legacy_check_match { ); let mut engine_deserialized = Engine::default(); // second empty { - let engine_serialized = engine.serialize().unwrap(); + let engine_serialized = engine.serialize().to_vec(); engine_deserialized.deserialize(&engine_serialized).unwrap(); // override from serialized copy } @@ -898,7 +898,7 @@ mod legacy_misc_tests { false, ); // enable debugging and disable optimizations - let serialized = engine.serialize().unwrap(); + let serialized = engine.serialize().to_vec(); let mut engine2 = Engine::default(); engine2.deserialize(&serialized).unwrap(); diff --git a/tests/live.rs b/tests/live.rs index 402fd89c..fc75f278 100644 --- a/tests/live.rs +++ b/tests/live.rs @@ -282,11 +282,11 @@ fn check_live_redirects() { /// deserializing from it. fn stable_serialization_through_load() { let engine1 = Engine::from_filter_set(ALL_FILTERS.lock().unwrap().clone(), true); - let ser1 = engine1.serialize().unwrap(); + let ser1 = engine1.serialize().to_vec(); let mut engine2 = Engine::default(); engine2.deserialize(&ser1).unwrap(); - let ser2 = engine2.serialize().unwrap(); + let ser2 = engine2.serialize().to_vec(); assert_eq!(ser1, ser2); } diff --git a/tests/ublock-coverage.rs b/tests/ublock-coverage.rs index 2c3b406a..94aa7aca 100644 --- a/tests/ublock-coverage.rs +++ b/tests/ublock-coverage.rs @@ -174,7 +174,7 @@ fn check_specifics_default() { #[test] fn check_basic_works_after_deserialization() { let engine = get_blocker_engine(); - let serialized = engine.serialize().unwrap(); + let serialized = engine.serialize().to_vec(); let mut deserialized_engine = Engine::default(); deserialized_engine.deserialize(&serialized).unwrap(); diff --git a/tests/unit/cosmetic_filter_cache.rs b/tests/unit/cosmetic_filter_cache.rs index 7af960dc..7d2be339 100644 --- a/tests/unit/cosmetic_filter_cache.rs +++ b/tests/unit/cosmetic_filter_cache.rs @@ -1,6 +1,6 @@ #[cfg(test)] mod key_from_selector_tests { - use super::super::key_from_selector; + use crate::cosmetic_filter_utils::key_from_selector; #[test] fn no_escapes() { diff --git a/tests/unit/engine.rs b/tests/unit/engine.rs index ae7b6cea..674384bc 100644 --- a/tests/unit/engine.rs +++ b/tests/unit/engine.rs @@ -153,7 +153,7 @@ mod tests { let mut engine = Engine::from_rules(filters, Default::default()); engine.enable_tags(&["stuff"]); engine.enable_tags(&["brian"]); - let serialized = engine.serialize().unwrap(); + let serialized = engine.serialize(); let mut deserialized_engine = Engine::default(); deserialized_engine.enable_tags(&["stuff"]); deserialized_engine.deserialize(&serialized).unwrap(); @@ -182,8 +182,8 @@ mod tests { #[test] fn deserialization_generate_simple() { let mut engine = Engine::from_rules(["ad-banner"], Default::default()); - let data = engine.serialize().unwrap(); - const EXPECTED_HASH: u64 = 14059407383857257100; + let data = engine.serialize().to_vec(); + const EXPECTED_HASH: u64 = 15201305923211912617; assert_eq!(hash(&data), EXPECTED_HASH, "{}", HASH_MISMATCH_MSG); engine.deserialize(&data).unwrap(); } @@ -192,8 +192,8 @@ mod tests { fn deserialization_generate_tags() { let mut engine = Engine::from_rules(["ad-banner$tag=abc"], Default::default()); engine.use_tags(&["abc"]); - let data = engine.serialize().unwrap(); - const EXPECTED_HASH: u64 = 1772924818985173219; + let data = engine.serialize().to_vec(); + const EXPECTED_HASH: u64 = 5114301339390262037; assert_eq!(hash(&data), EXPECTED_HASH, "{}", HASH_MISMATCH_MSG); engine.deserialize(&data).unwrap(); } @@ -207,7 +207,7 @@ mod tests { Resource::simple("noopcss", MimeType::TextCss, ""), ]); - let serialized = engine.serialize().unwrap(); + let serialized = engine.serialize().to_vec(); println!("Engine serialized: {:?}", serialized); engine.deserialize(&serialized).unwrap(); } @@ -216,12 +216,12 @@ mod tests { fn deserialization_brave_list() { let rules = rules_from_lists(&["data/brave/brave-main-list.txt"]); let mut engine = Engine::from_rules_parametrised(rules, Default::default(), false, true); - let data = engine.serialize().unwrap(); + let data = engine.serialize().to_vec(); - let expected_hash = if cfg!(feature = "css-validation") { - 12046041060659687422 + let expected_hash: u64 = if cfg!(feature = "css-validation") { + 2942520321544562177 } else { - 11420623023091203502 + 17713004238689548675 }; assert_eq!(hash(&data), expected_hash, "{}", HASH_MISMATCH_MSG); diff --git a/tests/unit/flatbuffers/containers/flat_map.rs b/tests/unit/flatbuffers/containers/flat_map.rs new file mode 100644 index 00000000..80be0cfb --- /dev/null +++ b/tests/unit/flatbuffers/containers/flat_map.rs @@ -0,0 +1,95 @@ +#[allow(unknown_lints)] +#[allow( + dead_code, + clippy::all, + unused_imports, + unsafe_code, + mismatched_lifetime_syntaxes +)] +#[path = "./test_containers_generated.rs"] +pub mod flat; +#[cfg(test)] +mod tests { + use std::collections::HashMap; + + use super::super::*; + use super::flat::fb_test; + + // Helper function to create a Vector from a slice + fn create_vector_u32<'a>( + builder: &'a mut flatbuffers::FlatBufferBuilder, + data: &'a [u32], + ) -> flatbuffers::Vector<'a, u32> { + let vec_offset = builder.create_vector(data); + builder.finish(vec_offset, None); + let buf = builder.finished_data(); + flatbuffers::root::>(buf).unwrap() + } + + #[test] + fn test_empty_map() { + let index: &[u32] = &[]; + let mut builder = flatbuffers::FlatBufferBuilder::new(); + let values = create_vector_u32(&mut builder, &[]); + let map = FlatMapView::new(index, values); + + assert_eq!(map.len(), 0); + assert!(map.get(1).is_none()); + } + + #[test] + fn test_multiple_elements() { + let index: &[u32] = &[1, 2, 4, 6, 100, 102]; + let mut builder = flatbuffers::FlatBufferBuilder::new(); + let values = create_vector_u32(&mut builder, &[10, 20, 30, 40, 50, 60]); + + let map = FlatMapView::new(index, values); + + assert_eq!(map.len(), 6); + + assert_eq!(map.get(2), Some(20)); + assert_eq!(map.get(4), Some(30)); + assert_eq!(map.get(100), Some(50)); + assert_eq!(map.get(102), Some(60)); + assert!(map.get(103).is_none()); + } + + #[test] + fn test_string_builder() { + let mut builder = flatbuffers::FlatBufferBuilder::new(); + let mut map = HashMap::new(); + map.insert("b", "20"); + map.insert("a", "10"); + map.insert("c", "30"); + let map = FlatMapBuilder::finish(map, &mut builder); + + // Serialize to the test flatbuffer. + let test_map = fb_test::TestStringMap::create( + &mut builder, + &fb_test::TestStringMapArgs { + keys: Some(map.keys), + values: Some(map.values), + }, + ); + let root = fb_test::TestRoot::create( + &mut builder, + &fb_test::TestRootArgs { + test_string_map: Some(test_map), + ..Default::default() + }, + ); + builder.finish(root, None); + + // Load from the serialized test flatbuffer. + let data = builder.finished_data(); + let root = fb_test::root_as_test_root(data).unwrap(); + let flat_map = root.test_string_map().unwrap(); + let map = FlatMapView::new(flat_map.keys(), flat_map.values()); + + assert_eq!(map.get("a").unwrap(), "10"); + assert_eq!(map.get("b").unwrap(), "20"); + assert_eq!(map.get("c").unwrap(), "30"); + assert!(map.get("d").is_none()); + assert!(map.get("").is_none()); + } +}