From 25f77e9521a88201ca1463762fabf1260a56d190 Mon Sep 17 00:00:00 2001 From: Mikhail Atuchin Date: Fri, 19 Sep 2025 18:07:10 +0400 Subject: [PATCH 1/4] Allow to share resources using ResourceStorageBackend trait --- js/src/lib.rs | 15 -- src/engine.rs | 23 +-- src/resources/mod.rs | 11 +- src/resources/resource_storage.rs | 180 ++++++++++++++--------- tests/unit/blocker.rs | 94 +++++------- tests/unit/cosmetic_filter_cache.rs | 4 +- tests/unit/engine.rs | 12 +- tests/unit/resources/resource_storage.rs | 28 +++- 8 files changed, 200 insertions(+), 167 deletions(-) diff --git a/js/src/lib.rs b/js/src/lib.rs index ee8a7add..99517824 100644 --- a/js/src/lib.rs +++ b/js/src/lib.rs @@ -323,20 +323,6 @@ fn engine_clear_tags(mut cx: FunctionContext) -> JsResult { Ok(JsNull::new(&mut cx)) } -fn engine_add_resource(mut cx: FunctionContext) -> JsResult { - let this = cx.argument::>(0)?; - - let resource_arg = cx.argument::(1)?; - let resource: Resource = json_ffi::from_js(&mut cx, resource_arg)?; - - let success = if let Ok(mut engine) = this.0.lock() { - engine.add_resource(resource).is_ok() - } else { - cx.throw_error("Failed to acquire lock on engine")? - }; - Ok(cx.boolean(success)) -} - fn validate_request(mut cx: FunctionContext) -> JsResult { let url: String = cx.argument::(0)?.value(&mut cx); let source_url: String = cx.argument::(1)?.value(&mut cx); @@ -424,7 +410,6 @@ register_module!(mut m, { m.export_function("Engine_useResources", engine_use_resources)?; m.export_function("Engine_tagExists", engine_tag_exists)?; m.export_function("Engine_clearTags", engine_clear_tags)?; - m.export_function("Engine_addResource", engine_add_resource)?; m.export_function("validateRequest", validate_request)?; m.export_function("uBlockResources", ublock_resources)?; diff --git a/src/engine.rs b/src/engine.rs index bc99908b..7f52a9a1 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -14,7 +14,7 @@ use crate::flatbuffers::unsafe_tools::VerifiedFlatbufferMemory; use crate::lists::{FilterSet, ParseOptions}; use crate::regex_manager::RegexManagerDiscardPolicy; use crate::request::Request; -use crate::resources::{Resource, ResourceStorage}; +use crate::resources::{Resource, ResourceStorage, ResourceStorageBackend}; use std::collections::HashSet; @@ -187,17 +187,22 @@ impl Engine { self.blocker.tags_enabled().contains(&tag.to_owned()) } - /// Sets this engine's resources to be _only_ the ones provided in `resources`. + /// Sets this engine's [Resource]s to be _only_ the ones provided in `resources`. + /// + /// The resources will be held in-memory. If you have special caching, management, or sharing + /// requirements, consider [Engine::use_resource_storage] instead. pub fn use_resources(&mut self, resources: impl IntoIterator) { - self.resources = ResourceStorage::from_resources(resources); + let storage = crate::resources::InMemoryResourceStorage::from_resources(resources); + self.use_resource_storage(storage); } - /// Sets this engine's resources to additionally include `resource`. - pub fn add_resource( - &mut self, - resource: Resource, - ) -> Result<(), crate::resources::AddResourceError> { - self.resources.add_resource(resource) + /// Sets this engine's backend for [Resource] storage to a custom implementation of + /// [ResourceStorageBackend]. + /// + /// If you're okay with the [Engine] holding these resources in-memory, use + /// [Engine::use_resources] instead. + pub fn use_resource_storage(&mut self, resources: R) { + self.resources = ResourceStorage::from_backend(resources); } // Cosmetic filter functionality diff --git a/src/resources/mod.rs b/src/resources/mod.rs index eb7c2321..6c2c5274 100644 --- a/src/resources/mod.rs +++ b/src/resources/mod.rs @@ -15,7 +15,10 @@ pub mod resource_assembler; mod resource_storage; pub(crate) use resource_storage::parse_scriptlet_args; #[doc(inline)] -pub use resource_storage::{AddResourceError, ResourceStorage, ScriptletResourceError}; +pub use resource_storage::{ + AddResourceError, InMemoryResourceStorage, ResourceStorage, ResourceStorageBackend, + ScriptletResourceError, +}; use memchr::memrchr as find_char_reverse; use serde::{Deserialize, Serialize}; @@ -34,7 +37,7 @@ use serde::{Deserialize, Serialize}; /// ``` /// # use adblock::Engine; /// # use adblock::lists::ParseOptions; -/// # use adblock::resources::{MimeType, PermissionMask, Resource, ResourceType}; +/// # use adblock::resources::{MimeType, PermissionMask, Resource, ResourceStorage, ResourceType}; /// # let mut filter_set = adblock::lists::FilterSet::default(); /// # let untrusted_filters = vec![""]; /// # let trusted_filters = vec![""]; @@ -59,14 +62,14 @@ use serde::{Deserialize, Serialize}; /// let mut engine = Engine::from_filter_set(filter_set, true); /// // The `trusted-set-cookie` scriptlet cannot be injected without `COOKIE_ACCESS` /// // permission. -/// engine.add_resource(Resource { +/// engine.use_resources([Resource { /// name: "trusted-set-cookie.js".to_string(), /// aliases: vec![], /// kind: ResourceType::Mime(MimeType::ApplicationJavascript), /// content: base64::encode("document.cookie = '...';"), /// dependencies: vec![], /// permission: COOKIE_ACCESS, -/// }); +/// }]); /// ``` #[derive(Serialize, Deserialize, Clone, Copy, Default)] #[repr(transparent)] diff --git a/src/resources/resource_storage.rs b/src/resources/resource_storage.rs index 39150000..d8393dc3 100644 --- a/src/resources/resource_storage.rs +++ b/src/resources/resource_storage.rs @@ -10,14 +10,113 @@ use thiserror::Error; use super::{PermissionMask, Resource, ResourceType}; /// Unified resource storage for both redirects and scriptlets. -#[derive(Default)] +/// +/// By default, this uses an in-memory storage implementation, however this can be changed using +/// a custom [ResourceStorageBackend] if desired. pub struct ResourceStorage { + backend: Box, +} + +/// Loads an empty `InMemoryResourceStorage` backend. +impl Default for ResourceStorage { + fn default() -> Self { + Self { + backend: Box::new(InMemoryResourceStorage::default()), + } + } +} + +impl ResourceStorage { + pub fn from_backend(backend: S) -> Self { + Self { + backend: Box::new(backend), + } + } + + /// Constructor using an `InMemoryResourceStorage` as the backend with the given resources. + #[cfg(test)] + pub fn in_memory_from_resources(resources: impl IntoIterator) -> Self { + Self::from_backend(InMemoryResourceStorage::from_resources(resources)) + } +} + +/// Customizable backend for [Resource] storage. +/// Custom implementations could be used to enable (for example) sharing of resources between +/// multiple [crate::Engine]s, an on-disk backend, or special caching behavior. +pub trait ResourceStorageBackend { + /// Gets the resource associated with `resource_ident`, respecting aliases if necessary. + fn get_resource(&self, resource_ident: &str) -> Option; +} + +/// Default implementation of [ResourceStorageBackend] that stores all resources in memory. +#[derive(Default)] +pub struct InMemoryResourceStorage { /// Stores each resource by its canonical name resources: HashMap, /// Stores mappings from aliases to their canonical resource names aliases: HashMap, } +impl ResourceStorageBackend for InMemoryResourceStorage { + fn get_resource(&self, resource_ident: &str) -> Option { + let resource = if let Some(resource) = self.resources.get(resource_ident) { + Some(resource) + } else if let Some(canonical_name) = self.aliases.get(resource_ident) { + self.resources.get(canonical_name) + } else { + None + }; + + resource.cloned() + } +} + +impl InMemoryResourceStorage { + /// Convenience constructor that allows building storage for many resources at once. Errors are + /// silently consumed. + pub fn from_resources(resources: impl IntoIterator) -> Self { + let mut self_ = Self::default(); + + resources.into_iter().for_each(|resource| { + #[allow(clippy::unnecessary_lazy_evaluations)] + self_.add_resource(resource).unwrap_or_else(|_e| { + #[cfg(test)] + eprintln!("Failed to add resource: {:?}", _e) + }) + }); + + self_ + } + + /// Adds a resource to storage so that it can be retrieved later. + pub fn add_resource(&mut self, resource: Resource) -> Result<(), AddResourceError> { + if let ResourceType::Mime(content_type) = &resource.kind { + if !resource.dependencies.is_empty() && !content_type.supports_dependencies() { + return Err(AddResourceError::ContentTypeDoesNotSupportDependencies); + } + + // Ensure the resource contents are valid base64 (and utf8 if applicable) + let decoded = BASE64_STANDARD.decode(&resource.content)?; + if content_type.is_textual() { + let _ = String::from_utf8(decoded)?; + } + } + + for ident in std::iter::once(&resource.name).chain(resource.aliases.iter()) { + if self.resources.contains_key(ident) || self.aliases.contains_key(ident) { + return Err(AddResourceError::NameAlreadyAdded); + } + } + + resource.aliases.iter().for_each(|alias| { + self.aliases.insert(alias.clone(), resource.name.clone()); + }); + self.resources.insert(resource.name.clone(), resource); + + Ok(()) + } +} + /// Formats `arg` such that it either is a JSON string, or is safe to insert within a JSON string, /// depending on `QUOTED`. /// @@ -112,50 +211,6 @@ fn extract_function_name(fn_def: &str) -> Option<&str> { } impl ResourceStorage { - /// Convenience constructor that allows building storage for many resources at once. Errors are - /// silently consumed. - pub fn from_resources(resources: impl IntoIterator) -> Self { - let mut self_ = Self::default(); - - resources.into_iter().for_each(|resource| { - #[allow(clippy::unnecessary_lazy_evaluations)] - self_.add_resource(resource).unwrap_or_else(|_e| { - #[cfg(test)] - eprintln!("Failed to add resource: {:?}", _e) - }) - }); - - self_ - } - - /// Adds a resource to storage so that it can be retrieved later. - pub fn add_resource(&mut self, resource: Resource) -> Result<(), AddResourceError> { - if let ResourceType::Mime(content_type) = &resource.kind { - if !resource.dependencies.is_empty() && !content_type.supports_dependencies() { - return Err(AddResourceError::ContentTypeDoesNotSupportDependencies); - } - - // Ensure the resource contents are valid base64 (and utf8 if applicable) - let decoded = BASE64_STANDARD.decode(&resource.content)?; - if content_type.is_textual() { - let _ = String::from_utf8(decoded)?; - } - } - - for ident in std::iter::once(&resource.name).chain(resource.aliases.iter()) { - if self.resources.contains_key(ident) || self.aliases.contains_key(ident) { - return Err(AddResourceError::NameAlreadyAdded); - } - } - - resource.aliases.iter().for_each(|alias| { - self.aliases.insert(alias.clone(), resource.name.clone()); - }); - self.resources.insert(resource.name.clone(), resource); - - Ok(()) - } - /// Given the contents of the `+js(...)` parts of multiple filters, return a script string /// appropriate for injection in a page. pub fn get_scriptlet_resources<'a>( @@ -194,10 +249,10 @@ impl ResourceStorage { /// /// Note that no ordering is guaranteed; function definitions in JS can appear after they are /// used. - fn recursive_dependencies<'a: 'b, 'b>( - &'a self, + fn recursive_dependencies( + &self, new_dep: &str, - prev_deps: &mut Vec<&'b Resource>, + prev_deps: &mut Vec, filter_permission: PermissionMask, ) -> Result<(), ScriptletResourceError> { if prev_deps.iter().any(|dep| dep.name == new_dep) { @@ -206,9 +261,10 @@ impl ResourceStorage { let resource = self.get_permissioned_resource(new_dep, filter_permission)?; + let deps = resource.dependencies.clone(); prev_deps.push(resource); - for dep in resource.dependencies.iter() { + for dep in deps.iter() { self.recursive_dependencies(dep, prev_deps, filter_permission)?; } @@ -217,11 +273,11 @@ impl ResourceStorage { /// Given the contents of a single `+js(...)` filter part, return a scriptlet string /// appropriate for injection in a page. - fn get_scriptlet_resource<'a: 'b, 'b>( - &'a self, + fn get_scriptlet_resource( + &self, scriptlet_args: &str, filter_permission: PermissionMask, - required_deps: &mut Vec<&'b Resource>, + required_deps: &mut Vec, ) -> Result { // `unwrap` is safe because these are guaranteed valid at filter parsing. let scriptlet_args = parse_scriptlet_args(scriptlet_args).unwrap(); @@ -274,7 +330,7 @@ impl ResourceStorage { /// Get a data-URL formatted resource appropriate for a `$redirect` response. pub fn get_redirect_resource(&self, resource_ident: &str) -> Option { - let resource = self.get_internal_resource(resource_ident); + let resource = self.backend.get_resource(resource_ident); resource.and_then(|resource| { if !resource.permission.is_default() { @@ -291,26 +347,14 @@ impl ResourceStorage { }) } - /// Gets the resource associated with `resource_ident`, respecting aliases if necessary. - fn get_internal_resource(&self, resource_ident: &str) -> Option<&Resource> { - let resource = if let Some(resource) = self.resources.get(resource_ident) { - Some(resource) - } else if let Some(canonical_name) = self.aliases.get(resource_ident) { - self.resources.get(canonical_name) - } else { - None - }; - - resource - } - fn get_permissioned_resource( &self, scriptlet_name: &str, filter_permission: PermissionMask, - ) -> Result<&Resource, ScriptletResourceError> { + ) -> Result { let resource = self - .get_internal_resource(scriptlet_name) + .backend + .get_resource(scriptlet_name) .ok_or(ScriptletResourceError::NoMatchingScriptlet)?; if !resource.permission.is_injectable_by(filter_permission) { diff --git a/tests/unit/blocker.rs b/tests/unit/blocker.rs index 04dd76f6..7d0d8ebc 100644 --- a/tests/unit/blocker.rs +++ b/tests/unit/blocker.rs @@ -4,7 +4,7 @@ mod blocker_tests { use super::super::*; use crate::lists::parse_filters; use crate::request::Request; - use crate::resources::Resource; + use crate::resources::{Resource, ResourceStorage}; use base64::{engine::Engine as _, prelude::BASE64_STANDARD}; use std::collections::HashSet; use std::iter::FromIterator; @@ -85,15 +85,11 @@ mod blocker_tests { }; let blocker = Blocker::new(network_filters, &blocker_options); - let mut resources = ResourceStorage::default(); - - resources - .add_resource(Resource::simple( - "noop-0.1s.mp3", - crate::resources::MimeType::AudioMp3, - "mp3", - )) - .unwrap(); + let resources = ResourceStorage::in_memory_from_resources([Resource::simple( + "noop-0.1s.mp3", + crate::resources::MimeType::AudioMp3, + "mp3", + )]); let matched_rule = blocker.check(&request, &resources); assert!(!matched_rule.matched); @@ -129,15 +125,11 @@ mod blocker_tests { }; let blocker = Blocker::new(network_filters, &blocker_options); - let mut resources = ResourceStorage::default(); - - resources - .add_resource(Resource::simple( - "noop-0.1s.mp3", - crate::resources::MimeType::AudioMp3, - "mp3", - )) - .unwrap(); + let resources = ResourceStorage::in_memory_from_resources([Resource::simple( + "noop-0.1s.mp3", + crate::resources::MimeType::AudioMp3, + "mp3", + )]); let matched_rule = blocker.check(&request, &resources); assert!(!matched_rule.matched); @@ -168,15 +160,11 @@ mod blocker_tests { }; let blocker = Blocker::new(network_filters, &blocker_options); - let mut resources = ResourceStorage::default(); - - resources - .add_resource(Resource::simple( - "noop.txt", - crate::resources::MimeType::TextPlain, - "noop", - )) - .unwrap(); + let resources = ResourceStorage::in_memory_from_resources([Resource::simple( + "noop.txt", + crate::resources::MimeType::TextPlain, + "noop", + )]); let matched_rule = blocker.check(&request, &resources); assert!(matched_rule.matched); @@ -514,15 +502,11 @@ mod blocker_tests { }; let blocker = Blocker::new(network_filters, &blocker_options); - let mut resources = ResourceStorage::default(); - - resources - .add_resource(Resource::simple( - "noopjs", - crate::resources::MimeType::ApplicationJavascript, - "(() => {})()", - )) - .unwrap(); + let resources = ResourceStorage::in_memory_from_resources([Resource::simple( + "noopjs", + crate::resources::MimeType::ApplicationJavascript, + "(() => {})()", + )]); let result = blocker.check( &Request::new( @@ -979,26 +963,28 @@ mod blocker_tests { }; let blocker = Blocker::new(network_filters, &blocker_options); - let mut resources = ResourceStorage::default(); - fn add_simple_resource( - resources: &mut ResourceStorage, - identifier: &str, - ) -> Option { - resources - .add_resource(Resource::simple( - identifier, - crate::resources::MimeType::TextPlain, - identifier, - )) - .unwrap(); - Some(format!( + fn simple_resource(identifier: &str) -> Resource { + Resource::simple( + identifier, + crate::resources::MimeType::TextPlain, + identifier, + ) + } + fn simple_redirect(identifier: &str) -> String { + format!( "data:text/plain;base64,{}", BASE64_STANDARD.encode(identifier) - )) + ) } - let a_redirect = add_simple_resource(&mut resources, "a"); - let b_redirect = add_simple_resource(&mut resources, "b"); - let c_redirect = add_simple_resource(&mut resources, "c"); + let test_cases = ["a", "b", "c"]; + let resources = ResourceStorage::in_memory_from_resources(test_cases.map(simple_resource)); + let redirects = test_cases + .into_iter() + .map(simple_redirect) + .collect::>(); + let a_redirect = Some(redirects[0].clone()); + let b_redirect = Some(redirects[1].clone()); + let c_redirect = Some(redirects[2].clone()); let result = blocker.check( &Request::new( diff --git a/tests/unit/cosmetic_filter_cache.rs b/tests/unit/cosmetic_filter_cache.rs index 7d2be339..7b937f46 100644 --- a/tests/unit/cosmetic_filter_cache.rs +++ b/tests/unit/cosmetic_filter_cache.rs @@ -203,7 +203,7 @@ mod cosmetic_cache_tests { "c.g.cosmetic.net#@#+js(nowebrtc.js)", "d.g.cosmetic.net#@#+js()", ]); - let resources = ResourceStorage::from_resources([ + let resources = ResourceStorage::in_memory_from_resources([ Resource { name: "set-constant.js".into(), aliases: vec![], @@ -665,7 +665,7 @@ mod cosmetic_cache_tests { .map(|r| CosmeticFilter::parse(r, false, Default::default()).unwrap()) .collect::>(), ); - let resources = ResourceStorage::from_resources([Resource { + let resources = ResourceStorage::in_memory_from_resources([Resource { name: "abort-on-property-read.js".into(), aliases: vec!["aopr".to_string()], kind: ResourceType::Template, diff --git a/tests/unit/engine.rs b/tests/unit/engine.rs index 674384bc..7ad00a6c 100644 --- a/tests/unit/engine.rs +++ b/tests/unit/engine.rs @@ -492,13 +492,11 @@ mod tests { ], Default::default()); let mut engine = Engine::from_filter_set(filter_set, false); - engine - .add_resource(Resource::simple( - "addthis.com/addthis_widget.js", - MimeType::ApplicationJavascript, - "window.addthis = undefined", - )) - .unwrap(); + engine.use_resources([Resource::simple( + "addthis.com/addthis_widget.js", + MimeType::ApplicationJavascript, + "window.addthis = undefined", + )]); let request = Request::new("https://s7.addthis.com/js/250/addthis_widget.js?pub=resto", "https://www.rhmodern.com/catalog/product/product.jsp?productId=prod14970086&categoryId=cat7150028", "script").unwrap(); let result = engine.check_network_request(&request); diff --git a/tests/unit/resources/resource_storage.rs b/tests/unit/resources/resource_storage.rs index b2b00054..4bc7309f 100644 --- a/tests/unit/resources/resource_storage.rs +++ b/tests/unit/resources/resource_storage.rs @@ -93,7 +93,7 @@ mod redirect_storage_tests { #[test] fn get_resource_by_name() { - let mut storage = ResourceStorage::default(); + let mut storage = InMemoryResourceStorage::default(); storage .add_resource(Resource::simple( "name.js", @@ -102,6 +102,10 @@ mod redirect_storage_tests { )) .unwrap(); + let storage = ResourceStorage { + backend: Box::new(storage), + }; + assert_eq!( storage.get_redirect_resource("name.js"), Some(format!( @@ -113,11 +117,15 @@ mod redirect_storage_tests { #[test] fn get_resource_by_alias() { - let mut storage = ResourceStorage::default(); + let mut storage = InMemoryResourceStorage::default(); let mut r = Resource::simple("name.js", MimeType::ApplicationJavascript, "resource data"); r.aliases.push("alias.js".to_string()); storage.add_resource(r).unwrap(); + let storage = ResourceStorage { + backend: Box::new(storage), + }; + assert_eq!( storage.get_redirect_resource("alias.js"), Some(format!( @@ -129,12 +137,16 @@ mod redirect_storage_tests { #[test] fn permissions() { - let mut storage = ResourceStorage::default(); + let mut storage = InMemoryResourceStorage::default(); let mut r = Resource::simple("name.js", MimeType::ApplicationJavascript, "resource data"); r.aliases.push("alias.js".to_string()); r.permission = PermissionMask::from_bits(0b00000001); storage.add_resource(r).unwrap(); + let storage = ResourceStorage { + backend: Box::new(storage), + }; + assert_eq!(storage.get_redirect_resource("name.js"), None,); assert_eq!(storage.get_redirect_resource("alias.js"), None,); } @@ -237,7 +249,7 @@ mod scriptlet_storage_tests { #[test] fn get_patched_scriptlets() { - let resources = ResourceStorage::from_resources([ + let resources = ResourceStorage::in_memory_from_resources([ Resource { name: "greet.js".to_string(), aliases: vec![], @@ -339,7 +351,7 @@ mod scriptlet_storage_tests { #[test] fn parse_template_file_format() { - let resources = ResourceStorage::from_resources([ + let resources = ResourceStorage::in_memory_from_resources([ Resource { name: "abort-current-inline-script.js".into(), aliases: vec!["acis.js".into()], @@ -446,7 +458,7 @@ mod scriptlet_storage_tests { /// cause a panic. #[test] fn patch_argslist_many_args() { - let resources = ResourceStorage::from_resources([Resource { + let resources = ResourceStorage::in_memory_from_resources([Resource { name: "abort-current-script.js".into(), aliases: vec!["acs.js".into()], kind: ResourceType::Mime(MimeType::ApplicationJavascript), @@ -477,7 +489,7 @@ mod scriptlet_storage_tests { const PERM01: PermissionMask = PermissionMask::from_bits(0b00000001); const PERM10: PermissionMask = PermissionMask::from_bits(0b00000010); const PERM11: PermissionMask = PermissionMask::from_bits(0b00000011); - let resources = ResourceStorage::from_resources([ + let resources = ResourceStorage::in_memory_from_resources([ Resource::simple( "default-perms.js", MimeType::ApplicationJavascript, @@ -566,7 +578,7 @@ mod scriptlet_storage_tests { #[test] fn dependencies() { const PERM01: PermissionMask = PermissionMask::from_bits(0b00000001); - let resources = ResourceStorage::from_resources([ + let resources = ResourceStorage::in_memory_from_resources([ Resource::simple("simple.fn", MimeType::FnJavascript, "simple"), Resource { name: "permissioned.fn".into(), From 4f400b8d4360dab5f3da0878f22ab34c7a9aa49f Mon Sep 17 00:00:00 2001 From: Anton Lazarev Date: Tue, 30 Sep 2025 22:04:15 -0700 Subject: [PATCH 2/4] support compilation without `single-thread` feature unfortunately this requires duplicating some definitions to support additional `Send + Sync` trait bounds, since Rust does not natively support conditional supertraits. --- src/engine.rs | 14 ++++++++++++++ src/resources/resource_storage.rs | 11 +++++++++++ 2 files changed, 25 insertions(+) diff --git a/src/engine.rs b/src/engine.rs index 7f52a9a1..ec46c8b3 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -201,6 +201,20 @@ impl Engine { /// /// If you're okay with the [Engine] holding these resources in-memory, use /// [Engine::use_resources] instead. + #[cfg(not(feature = "single-thread"))] + pub fn use_resource_storage( + &mut self, + resources: R, + ) { + self.resources = ResourceStorage::from_backend(resources); + } + + /// Sets this engine's backend for [Resource] storage to a custom implementation of + /// [ResourceStorageBackend]. + /// + /// If you're okay with the [Engine] holding these resources in-memory, use + /// [Engine::use_resources] instead. + #[cfg(feature = "single-thread")] pub fn use_resource_storage(&mut self, resources: R) { self.resources = ResourceStorage::from_backend(resources); } diff --git a/src/resources/resource_storage.rs b/src/resources/resource_storage.rs index d8393dc3..bf0d15fa 100644 --- a/src/resources/resource_storage.rs +++ b/src/resources/resource_storage.rs @@ -14,6 +14,9 @@ use super::{PermissionMask, Resource, ResourceType}; /// By default, this uses an in-memory storage implementation, however this can be changed using /// a custom [ResourceStorageBackend] if desired. pub struct ResourceStorage { + #[cfg(not(feature = "single-thread"))] + backend: Box, + #[cfg(feature = "single-thread")] backend: Box, } @@ -27,6 +30,14 @@ impl Default for ResourceStorage { } impl ResourceStorage { + #[cfg(not(feature = "single-thread"))] + pub fn from_backend(backend: S) -> Self { + Self { + backend: Box::new(backend), + } + } + + #[cfg(feature = "single-thread")] pub fn from_backend(backend: S) -> Self { Self { backend: Box::new(backend), From b949fc8046e31f68b933dd6ee69d373a4446e5c7 Mon Sep 17 00:00:00 2001 From: Anton Lazarev Date: Tue, 30 Sep 2025 21:34:12 -0700 Subject: [PATCH 3/4] regex-debug-info => debug-info --- Cargo.toml | 2 +- src/blocker.rs | 4 ++-- src/engine.rs | 17 +++++++++++++---- src/regex_manager.rs | 12 ++++++------ tests/unit/engine.rs | 8 ++++++++ tests/unit/filters/network_matchers.rs | 2 +- tests/unit/regex_manager.rs | 2 +- 7 files changed, 32 insertions(+), 15 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index cc0023bd..16f60c38 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -94,7 +94,7 @@ harness = false default = ["embedded-domain-resolver", "full-regex-handling", "single-thread"] full-regex-handling = [] single-thread = [] # disables `Send` and `Sync` on `Engine`. -regex-debug-info = [] +debug-info = [] css-validation = ["cssparser", "selectors"] content-blocking = [] embedded-domain-resolver = ["addr"] # Requires setting an external domain resolver if disabled. diff --git a/src/blocker.rs b/src/blocker.rs index d83861bf..dfb8d62d 100644 --- a/src/blocker.rs +++ b/src/blocker.rs @@ -487,13 +487,13 @@ impl Blocker { regex_manager.set_discard_policy(new_discard_policy); } - #[cfg(feature = "regex-debug-info")] + #[cfg(feature = "debug-info")] pub fn discard_regex(&self, regex_id: u64) { let mut regex_manager = self.borrow_regex_manager(); regex_manager.discard_regex(regex_id); } - #[cfg(feature = "regex-debug-info")] + #[cfg(feature = "debug-info")] pub fn get_regex_debug_info(&self) -> crate::regex_manager::RegexDebugInfo { let regex_manager = self.borrow_regex_manager(); regex_manager.get_debug_info() diff --git a/src/engine.rs b/src/engine.rs index ec46c8b3..6037b05a 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -58,6 +58,12 @@ pub struct Engine { filter_data_context: FilterDataContextRef, } +#[cfg(feature = "debug-info")] +pub struct EngineDebugInfo { + pub regex_debug_info: crate::regex_manager::RegexDebugInfo, + pub flatbuffer_size: usize, +} + impl Default for Engine { fn default() -> Self { Self::from_filter_set(FilterSet::new(false), false) @@ -260,14 +266,17 @@ impl Engine { self.blocker.set_regex_discard_policy(new_discard_policy); } - #[cfg(feature = "regex-debug-info")] + #[cfg(feature = "debug-info")] pub fn discard_regex(&mut self, regex_id: u64) { self.blocker.discard_regex(regex_id); } - #[cfg(feature = "regex-debug-info")] - pub fn get_regex_debug_info(&self) -> crate::regex_manager::RegexDebugInfo { - self.blocker.get_regex_debug_info() + #[cfg(feature = "debug-info")] + pub fn get_debug_info(&self) -> EngineDebugInfo { + EngineDebugInfo { + regex_debug_info: self.blocker.get_regex_debug_info(), + flatbuffer_size: self.filter_data_context.memory.data().len(), + } } /// Serializes the `Engine` into a binary format so that it can be quickly reloaded later. diff --git a/src/regex_manager.rs b/src/regex_manager.rs index e54da304..20ff910e 100644 --- a/src/regex_manager.rs +++ b/src/regex_manager.rs @@ -39,7 +39,7 @@ const DEFAULT_DISCARD_UNUSED_TIME: Duration = Duration::from_secs(180); /// Reports [`RegexManager`] metrics that may be useful for creating an optimized /// [`RegexManagerDiscardPolicy`]. -#[cfg(feature = "regex-debug-info")] +#[cfg(feature = "debug-info")] pub struct RegexDebugInfo { /// Information about each regex contained in the [`RegexManager`]. pub regex_data: Vec, @@ -48,7 +48,7 @@ pub struct RegexDebugInfo { } /// Describes metrics about a single regex from the [`RegexManager`]. -#[cfg(feature = "regex-debug-info")] +#[cfg(feature = "debug-info")] pub struct RegexDebugEntry { /// Id for this particular regex, which is constant and unique for its lifetime. /// @@ -312,7 +312,7 @@ impl RegexManager { } /// Discard one regex, identified by its id from a [`RegexDebugEntry`]. - #[cfg(feature = "regex-debug-info")] + #[cfg(feature = "debug-info")] pub fn discard_regex(&mut self, regex_id: u64) { self.map .iter_mut() @@ -322,7 +322,7 @@ impl RegexManager { }); } - #[cfg(feature = "regex-debug-info")] + #[cfg(feature = "debug-info")] pub(crate) fn get_debug_regex_data(&self) -> Vec { use itertools::Itertools; self.map @@ -336,13 +336,13 @@ impl RegexManager { .collect_vec() } - #[cfg(feature = "regex-debug-info")] + #[cfg(feature = "debug-info")] pub(crate) fn get_compiled_regex_count(&self) -> usize { self.compiled_regex_count } /// Collect metrics that may be useful for creating an optimized [`RegexManagerDiscardPolicy`]. - #[cfg(feature = "regex-debug-info")] + #[cfg(feature = "debug-info")] pub fn get_debug_info(&self) -> RegexDebugInfo { RegexDebugInfo { regex_data: self.get_debug_regex_data(), diff --git a/tests/unit/engine.rs b/tests/unit/engine.rs index 7ad00a6c..ea3883c2 100644 --- a/tests/unit/engine.rs +++ b/tests/unit/engine.rs @@ -218,6 +218,14 @@ mod tests { let mut engine = Engine::from_rules_parametrised(rules, Default::default(), false, true); let data = engine.serialize().to_vec(); + #[cfg(feature = "debug-info")] + { + let debug_info = engine.get_debug_info(); + let expected_size = 8_527_344_f32; + assert!(debug_info.flatbuffer_size >= (expected_size * 0.99) as usize); + assert!(debug_info.flatbuffer_size <= (expected_size * 1.01) as usize); + } + let expected_hash: u64 = if cfg!(feature = "css-validation") { 2942520321544562177 } else { diff --git a/tests/unit/filters/network_matchers.rs b/tests/unit/filters/network_matchers.rs index 54392eab..96be7d58 100644 --- a/tests/unit/filters/network_matchers.rs +++ b/tests/unit/filters/network_matchers.rs @@ -678,7 +678,7 @@ mod match_tests { #[test] #[ignore] // Not going to handle lookaround regexes - #[cfg(feature = "regex-debug-info")] + #[cfg(feature = "debug-info")] fn check_lookaround_regex_handled() { { let filter = r#"/^https?:\/\/([0-9a-z\-]+\.)?(9anime|animeland|animenova|animeplus|animetoon|animewow|gamestorrent|goodanime|gogoanime|igg-games|kimcartoon|memecenter|readcomiconline|toonget|toonova|watchcartoononline)\.[a-z]{2,4}\/(?!([Ee]xternal|[Ii]mages|[Ss]cripts|[Uu]ploads|ac|ajax|assets|combined|content|cov|cover|(img\/bg)|(img\/icon)|inc|jwplayer|player|playlist-cat-rss|static|thumbs|wp-content|wp-includes)\/)(.*)/$image,other,script,~third-party,xmlhttprequest,domain=~animeland.hu"#; diff --git a/tests/unit/regex_manager.rs b/tests/unit/regex_manager.rs index 8ffe1ff4..fdd9d107 100644 --- a/tests/unit/regex_manager.rs +++ b/tests/unit/regex_manager.rs @@ -1,4 +1,4 @@ -#[cfg(all(test, feature = "regex-debug-info"))] +#[cfg(all(test, feature = "debug-info"))] mod tests { use super::super::*; From 21ffdbbbb2e272756d65ba8822dbe8ffaec40f78 Mon Sep 17 00:00:00 2001 From: Anton Lazarev Date: Thu, 9 Oct 2025 11:13:58 -0700 Subject: [PATCH 4/4] add test for resource storage sharing --- tests/unit/resources/resource_storage.rs | 68 ++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/tests/unit/resources/resource_storage.rs b/tests/unit/resources/resource_storage.rs index 4bc7309f..2c597a91 100644 --- a/tests/unit/resources/resource_storage.rs +++ b/tests/unit/resources/resource_storage.rs @@ -805,3 +805,71 @@ mod scriptlet_storage_tests { assert_eq!(resources.get_scriptlet_resources([("test, 1", PERM01), ("test-wrapper, 2", PERM01), ("shared, 3", Default::default())]), "permissioned\na\ncommon\nb\nfunction test() {}\nfunction testWrapper() { test(arguments) }\nfunction shared() { }\ntry {\ntest(\"1\")\n} catch ( e ) { }\ntry {\ntestWrapper(\"2\")\n} catch ( e ) { }\ntry {\nshared(\"3\")\n} catch ( e ) { }\n"); } } + +#[cfg(all(test, feature = "single-thread"))] +mod shared_storage_tests { + use super::super::*; + use crate::resources::MimeType; + + use std::rc::Rc; + + /// To be wrapped in [Rc] for shared access across engines. + struct BraveCoreResourceStorageInner { + /// Stores each resource by its canonical name + resources: HashMap, + } + + #[derive(Clone)] + struct BraveCoreResourceStorage { + shared_storage: Rc, + } + + impl ResourceStorageBackend for BraveCoreResourceStorage { + fn get_resource(&self, resource_ident: &str) -> Option { + self.shared_storage.resources.get(resource_ident).cloned() + } + } + + #[test] + fn share_resources() { + let shared_storage = Rc::new(BraveCoreResourceStorageInner { + resources: HashMap::from_iter([( + "test-scriptlet.js".to_string(), + Resource::simple( + "test-scriptlet", + MimeType::ApplicationJavascript, + "success!", + ), + )]), + }); + + let mut engine1 = + crate::Engine::from_rules(["example1.com##+js(test-scriptlet)"], Default::default()); + engine1.use_resource_storage(BraveCoreResourceStorage { + shared_storage: Rc::clone(&shared_storage), + }); + + let mut engine2 = + crate::Engine::from_rules(["example2.com##+js(test-scriptlet)"], Default::default()); + engine2.use_resource_storage(BraveCoreResourceStorage { + shared_storage: Rc::clone(&shared_storage), + }); + + assert!(engine1 + .url_cosmetic_resources("https://example1.com") + .injected_script + .contains("success!")); + assert!(!engine1 + .url_cosmetic_resources("https://example2.com") + .injected_script + .contains("success!")); + assert!(!engine2 + .url_cosmetic_resources("https://example1.com") + .injected_script + .contains("success!")); + assert!(engine2 + .url_cosmetic_resources("https://example2.com") + .injected_script + .contains("success!")); + } +}