diff --git a/Cargo.toml b/Cargo.toml index 13e04cba..c8e8e3f0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -78,11 +78,12 @@ harness = false [features] # If disabling default features, consider explicitly re-enabling the # "embedded-domain-resolver" feature. -default = ["embedded-domain-resolver", "full-regex-handling", "object-pooling"] +default = ["embedded-domain-resolver", "full-regex-handling", "object-pooling", "unsync-regex-caching"] full-domain-matching = [] metrics = [] full-regex-handling = [] -object-pooling = ["lifeguard"] +object-pooling = ["lifeguard"] # disables `Send` and `Sync` on `Engine`. +unsync-regex-caching = [] # disables `Send` and `Sync` on `Engine`. css-validation = ["cssparser", "selectors"] content-blocking = ["serde_json"] embedded-domain-resolver = ["addr"] # Requires setting an external domain resolver if disabled. diff --git a/README.md b/README.md index ffe9b957..abbfeb0f 100644 --- a/README.md +++ b/README.md @@ -88,3 +88,8 @@ By default, `adblock-rust` ships with a built-in domain resolution implementatio `adblock-rust` uses uBlock Origin-compatible resources for scriptlet injection and redirect rules. The `resource-assembler` feature allows `adblock-rust` to parse these resources directly from the file formats used by the uBlock Origin repository. + +### Thread safety + +The `object-pooling` and `unsync-regex-caching` features enable optimizations for rule matching speed and the amount of memory used by the engine. +These features can be disabled to make the engine `Send + Sync`, although it is recommended to only access the engine on a single thread to maintain optimal performance. diff --git a/src/data_format/legacy.rs b/src/data_format/legacy.rs index 70b2f138..91e069b6 100644 --- a/src/data_format/legacy.rs +++ b/src/data_format/legacy.rs @@ -195,7 +195,7 @@ impl From for NetworkFilter { id: v.id, opt_domains_union: v.opt_domains_union, opt_not_domains_union: v.opt_not_domains_union, - regex: std::sync::Arc::new(std::sync::RwLock::new(None)), + regex: crate::filters::network::RegexStorage::default(), } } } diff --git a/src/engine.rs b/src/engine.rs index a362f85c..7bdf16f4 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -268,6 +268,15 @@ impl Engine { } } +/// Static assertions for `Engine: Send + Sync` traits. +#[cfg(not(any(feature = "object-pooling", feature = "optimized-regex-cache")))] +fn _assertions() { + fn _assert_send() {} + fn _assert_sync() {} + + _assert_send::(); + _assert_sync::(); +} #[cfg(test)] mod tests { diff --git a/src/filters/network.rs b/src/filters/network.rs index 65a51dd7..f206950d 100644 --- a/src/filters/network.rs +++ b/src/filters/network.rs @@ -4,7 +4,7 @@ use once_cell::sync::Lazy; use crate::url_parser::parse_url; use std::fmt; -use std::sync::{Arc, RwLock}; +use std::sync::Arc; use crate::request; use crate::utils; @@ -425,6 +425,51 @@ fn parse_filter_options(raw_options: &str, opts: ParseOptions) -> Result>>>, + + /// RefCell allows for cloned NetworkFilters to point to the same Option and what is inside. + /// When the Regex hasn't been compiled, is stored, afterwards Arc to CompiledRegex + /// to avoid expensive cloning of the Regex itself. + /// Non-thread safe, should be used from a single thread. + #[cfg(feature = "unsync-regex-caching")] + pub(crate) regex: std::cell::RefCell>>, +} + +impl RegexStorage { + pub fn get(&self) -> Option> { + #[cfg(not(feature = "unsync-regex-caching"))] + if let Some(cache) = &*self.regex.read().unwrap() { + return Some(Arc::clone(cache)); + } + + #[cfg(feature = "unsync-regex-caching")] + if let Some(cache) = &*self.regex.borrow() { + return Some(Arc::clone(cache)); + } + None + } + + pub fn set(&self, regex: Arc) { + #[cfg(not(feature = "unsync-regex-caching"))] + { + *self.regex.write().unwrap() = Some(regex); + } + + #[cfg(feature = "unsync-regex-caching")] + { + *self.regex.borrow_mut() = Some(regex); + } + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct NetworkFilter { pub mask: NetworkFilterMask, @@ -445,15 +490,9 @@ pub struct NetworkFilter { pub opt_domains_union: Option, pub opt_not_domains_union: Option, - // Regex compild lazily, using "Interior Mutability" - // Arc (Atomic Reference Counter) allows for cloned NetworkFilters - // to point to the same RwLock and what is inside. - // RwLock allows for concurrent access when reading as well as writing - // from the inside. - // When the Regex hasn't been compiled, is stored, afterwards Arc to Some - // to avoid expensive cloning of the Regex itself. + // Regex compiled lazily and cached using interior mutability. #[serde(skip_serializing, skip_deserializing)] - pub(crate) regex: Arc>>> + pub(crate) regex: RegexStorage, } // TODO - restrict the API so that this is always true - i.e. lazy-calculate IDs from actual data, @@ -831,7 +870,7 @@ impl NetworkFilter { id: utils::fast_hash(&line), opt_domains_union, opt_not_domains_union, - regex: Arc::new(RwLock::new(None)) + regex: RegexStorage::default(), }) } @@ -1057,15 +1096,9 @@ impl NetworkMatchable for NetworkFilter { if !self.is_regex() && !self.is_complete_regex() { return Arc::new(CompiledRegex::MatchAll); } - // Create a new scope to contain the lifetime of the - // dynamic read borrow - { - let cache = self.regex.as_ref().read().unwrap(); - if cache.is_some() { - return cache.as_ref().unwrap().clone(); - } + if let Some(cache) = self.regex.get() { + return cache; } - let mut cache = self.regex.as_ref().write().unwrap(); let regex = compile_regex( &self.filter, self.is_right_anchor(), @@ -1073,7 +1106,7 @@ impl NetworkMatchable for NetworkFilter { self.is_complete_regex(), ); let arc_regex = Arc::new(regex); - *cache = Some(arc_regex.clone()); + self.regex.set(arc_regex.clone()); arc_regex } } diff --git a/tests/live.rs b/tests/live.rs index b1a19fca..cb8b4d1c 100644 --- a/tests/live.rs +++ b/tests/live.rs @@ -52,7 +52,7 @@ pub struct RemoteFilterSource { /// Fetch all filters once and store them in a lazy-loaded static variable to avoid unnecessary /// network traffic. -static ALL_FILTERS: once_cell::sync::Lazy = once_cell::sync::Lazy::new(|| { +static ALL_FILTERS: once_cell::sync::Lazy> = once_cell::sync::Lazy::new(|| { async fn get_all_filters() -> adblock::lists::FilterSet { use futures::FutureExt; @@ -91,11 +91,11 @@ static ALL_FILTERS: once_cell::sync::Lazy = once_cell } let async_runtime = Runtime::new().expect("Could not start Tokio runtime"); - async_runtime.block_on(get_all_filters()) + std::sync::Mutex::new(async_runtime.block_on(get_all_filters())) }); fn get_blocker_engine() -> Engine { - let mut engine = Engine::from_filter_set(ALL_FILTERS.clone(), true); + let mut engine = Engine::from_filter_set(ALL_FILTERS.lock().unwrap().clone(), true); engine.use_tags(&["fb-embeds", "twitter-embeds"]); @@ -134,7 +134,7 @@ fn get_blocker_engine_deserialized_ios() -> Engine { .lines() .map(|s| s.to_owned()) .collect(); - + let engine = Engine::from_rules_parametrised(&filters, Default::default(), true, false); engine } @@ -204,7 +204,7 @@ fn check_live_deserialized_specific_urls() { fn check_live_from_filterlists() { let engine = get_blocker_engine(); let requests = load_requests(); - + for req in requests { let checked = engine.check_network_urls(&req.url, &req.sourceUrl, &req.r#type); assert_eq!(checked.matched, req.blocked, @@ -217,7 +217,7 @@ fn check_live_from_filterlists() { fn check_live_deserialized_file() { let engine = get_blocker_engine_deserialized(); let requests = load_requests(); - + for req in requests { println!("Checking {:?}", req); let checked = engine.check_network_urls(&req.url, &req.sourceUrl, &req.r#type); @@ -232,7 +232,7 @@ fn check_live_deserialized_file() { fn check_live_deserialized_ios() { let engine = get_blocker_engine_deserialized_ios(); let requests = load_requests(); - + for req in requests { let checked = engine.check_network_urls(&req.url, &req.sourceUrl, &req.r#type); assert_eq!(checked.matched, req.blocked, @@ -252,7 +252,7 @@ fn check_live_redirects() { let resources = assemble_web_accessible_resources(war_dir, redirect_engine_path); engine.use_resources(&resources); - { + { let checked = engine.check_network_urls( "https://c.amazon-adsystem.com/aax2/amzn_ads.js", "https://aussieexotics.com/", @@ -274,17 +274,17 @@ fn check_live_redirects() { checked.filter, checked.exception); assert!(checked.redirect.is_some()); } - + } #[test] /// Ensure that two different engines loaded from the same textual filter set serialize to /// identical buffers. fn stable_serialization() { - let engine1 = Engine::from_filter_set(ALL_FILTERS.clone(), true); + let engine1 = Engine::from_filter_set(ALL_FILTERS.lock().unwrap().clone(), true); let ser1 = engine1.serialize_raw().unwrap(); - let engine2 = Engine::from_filter_set(ALL_FILTERS.clone(), true); + let engine2 = Engine::from_filter_set(ALL_FILTERS.lock().unwrap().clone(), true); let ser2 = engine2.serialize_raw().unwrap(); assert_eq!(ser1, ser2); @@ -294,7 +294,7 @@ fn stable_serialization() { /// Ensure that one engine's serialization result can be exactly reproduced by another engine after /// deserializing from it. fn stable_serialization_through_load() { - let engine1 = Engine::from_filter_set(ALL_FILTERS.clone(), true); + let engine1 = Engine::from_filter_set(ALL_FILTERS.lock().unwrap().clone(), true); let ser1 = engine1.serialize_raw().unwrap(); let mut engine2 = Engine::new(true);