Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "adblock"
version = "0.11.1"
version = "0.12.0"
authors = ["Anton Lazarev <alazarev@brave.com>", "Andrius Aucinas"]
edition = "2021"

Expand Down Expand Up @@ -39,6 +39,7 @@ rustc-hash = { version = "1.1.0", default-features = false }
memchr = "2.4"
base64 = "0.22"
rmp-serde = "0.15"
arrayvec = "0.7"
cssparser = { version = "0.34", optional = true }
selectors = { version = "0.26", optional = true }
precomputed-hash = "0.1"
Expand Down
2 changes: 1 addition & 1 deletion js/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "adblock-rs"
version = "0.11.1"
version = "0.12.0"
authors = ["Anton Lazarev <alazarev@brave.com>", "Andrius Aucinas"]
edition = "2021"
license = "MPL-2.0"
Expand Down
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "adblock-rs",
"version": "0.11.1",
"version": "0.12.0",
"description": "Very fast, Rust-based, native implementation of ad-blocker engine for Node",
"keywords": [
"adblock",
Expand Down
28 changes: 19 additions & 9 deletions src/blocker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,11 @@ pub struct Blocker {
pub(crate) filter_data_context: FilterDataContextRef,
}

#[cfg(feature = "single-thread")]
pub(crate) type RegexManagerRef<'a> = std::cell::RefMut<'a, RegexManager>;
#[cfg(not(feature = "single-thread"))]
pub(crate) type RegexManagerRef<'a> = std::sync::MutexGuard<'a, RegexManager>;

impl Blocker {
/// Decide if a network request (usually from WebRequest API) should be
/// blocked, redirected or allowed.
Expand Down Expand Up @@ -130,31 +135,36 @@ impl Blocker {
self.get_list(NetworkFilterListId::TaggedFiltersAll)
}

#[cfg(feature = "single-thread")]
fn borrow_regex_manager(&self) -> std::cell::RefMut<'_, RegexManager> {
/// Borrow mutable reference to the regex manager for the ['Blocker`].
/// Only one caller can borrow the regex manager at a time.
pub(crate) fn borrow_regex_manager(&self) -> RegexManagerRef<'_> {
#[cfg(feature = "single-thread")]
#[allow(unused_mut)]
let mut manager = self.regex_manager.borrow_mut();
#[cfg(not(feature = "single-thread"))]
let mut manager = self.regex_manager.lock().unwrap();

#[cfg(not(target_arch = "wasm32"))]
manager.update_time();

manager
}

#[cfg(not(feature = "single-thread"))]
fn borrow_regex_manager(&self) -> std::sync::MutexGuard<'_, RegexManager> {
let mut manager = self.regex_manager.lock().unwrap();
manager.update_time();
manager
}

pub fn check_generic_hide(&self, hostname_request: &Request) -> bool {
let mut regex_manager = self.borrow_regex_manager();
self.generic_hide()
.check(hostname_request, &HashSet::new(), &mut regex_manager)
.is_some()
}

#[cfg(test)]
pub(crate) fn check_exceptions(&self, request: &Request) -> bool {
let mut regex_manager = self.borrow_regex_manager();
self.exceptions()
.check(request, &HashSet::new(), &mut regex_manager)
.is_some()
}

pub fn check_parameterised(
&self,
request: &Request,
Expand Down
10 changes: 10 additions & 0 deletions src/engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,11 @@ impl Engine {
self.blocker.check(request, &self.resources)
}

#[cfg(test)]
pub(crate) fn check_network_request_exceptions(&self, request: &Request) -> bool {
self.blocker.check_exceptions(request)
}

pub fn check_network_request_subset(
&self,
request: &Request,
Expand Down Expand Up @@ -266,6 +271,11 @@ impl Engine {
self.blocker.set_regex_discard_policy(new_discard_policy);
}

#[cfg(test)]
pub fn borrow_regex_manager(&self) -> crate::blocker::RegexManagerRef<'_> {
self.blocker.borrow_regex_manager()
}

#[cfg(feature = "debug-info")]
pub fn discard_regex(&mut self, regex_id: u64) {
self.blocker.discard_regex(regex_id);
Expand Down
5 changes: 0 additions & 5 deletions src/filters/fb_network.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,9 +171,4 @@ impl NetworkMatchable for FlatNetworkFilter<'_> {
regex_manager,
)
}

#[cfg(test)]
fn matches_test(&self, request: &Request) -> bool {
self.matches(request, &mut RegexManager::default())
}
}
6 changes: 4 additions & 2 deletions src/filters/fb_network_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use flatbuffers::WIPOffset;
use crate::filters::fb_builder::EngineFlatBuilder;
use crate::filters::network::{FilterTokens, NetworkFilter};
use crate::filters::token_selector::TokenSelector;
use crate::utils::TokensBuffer;

use crate::filters::network::NetworkFilterMaskHelper;
use crate::flatbuffers::containers::flat_multimap::FlatMultiMapBuilder;
Expand Down Expand Up @@ -134,6 +135,7 @@ impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for NetworkFilterListBuilder {
let mut optimizable = HashMap::<ShortHash, Vec<NetworkFilter>>::new();

let mut token_frequencies = TokenSelector::new(rule_list.filters.len());
let mut tokens_buffer = TokensBuffer::default();

{
for network_filter in rule_list.filters {
Expand All @@ -157,7 +159,7 @@ impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for NetworkFilterListBuilder {
}
};

let multi_tokens = network_filter.get_tokens_optimized();
let multi_tokens = network_filter.get_tokens(&mut tokens_buffer);
match multi_tokens {
FilterTokens::Empty => {
// No tokens, add to fallback bucket (token 0)
Expand All @@ -171,7 +173,7 @@ impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for NetworkFilterListBuilder {
}
}
FilterTokens::Other(tokens) => {
let best_token = token_frequencies.select_least_used_token(&tokens);
let best_token = token_frequencies.select_least_used_token(tokens);
token_frequencies.record_usage(best_token);
store_filter(best_token);
}
Expand Down
95 changes: 34 additions & 61 deletions src/filters/network.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,7 @@ use crate::filters::abstract_network::{
use crate::lists::ParseOptions;
use crate::regex_manager::RegexManager;
use crate::request;
use crate::utils::{self, Hash};

pub(crate) const TOKENS_BUFFER_SIZE: usize = 200;
use crate::utils::{self, Hash, TokensBuffer};

/// For now, only support `$removeparam` with simple alphanumeric/dash/underscore patterns.
static VALID_PARAM: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[a-zA-Z0-9_\-]+$").unwrap());
Expand Down Expand Up @@ -312,10 +310,10 @@ pub enum FilterPart {
}

#[derive(Debug, PartialEq)]
pub enum FilterTokens {
pub(crate) enum FilterTokens<'a> {
Empty,
OptDomains(Vec<Hash>),
Other(Vec<Hash>),
OptDomains(&'a [Hash]),
Other(&'a [Hash]),
}

pub struct FilterPartIterator<'a> {
Expand Down Expand Up @@ -883,19 +881,11 @@ impl NetworkFilter {
)
}

#[deprecated(since = "0.11.1", note = "use get_tokens_optimized instead")]
pub fn get_tokens(&self) -> Vec<Vec<Hash>> {
match self.get_tokens_optimized() {
FilterTokens::OptDomains(domains) => {
domains.into_iter().map(|domain| vec![domain]).collect()
}
FilterTokens::Other(tokens) => vec![tokens],
FilterTokens::Empty => vec![],
}
}

pub fn get_tokens_optimized(&self) -> FilterTokens {
let mut tokens: Vec<Hash> = Vec::with_capacity(TOKENS_BUFFER_SIZE);
pub(crate) fn get_tokens<'a>(
&'a self,
tokens_buffer: &'a mut TokensBuffer,
) -> FilterTokens<'a> {
tokens_buffer.clear();

// If there is only one domain and no domain negation, we also use this
// domain as a token.
Expand All @@ -905,7 +895,7 @@ impl NetworkFilter {
{
if let Some(domains) = self.opt_domains.as_ref() {
if let Some(domain) = domains.first() {
tokens.push(*domain)
tokens_buffer.push(*domain);
}
}
}
Expand All @@ -918,7 +908,7 @@ impl NetworkFilter {
(self.is_plain() || self.is_regex()) && !self.is_right_anchor();
let skip_first_token = self.is_right_anchor();

utils::tokenize_filter_to(f, skip_first_token, skip_last_token, &mut tokens);
utils::tokenize_filter_to(f, skip_first_token, skip_last_token, tokens_buffer);
}
}
FilterPart::AnyOf(_) => (), // across AnyOf set of filters no single token is guaranteed to match to a request
Expand All @@ -928,45 +918,55 @@ impl NetworkFilter {
// Append tokens from hostname, if any
if !self.mask.contains(NetworkFilterMask::IS_HOSTNAME_REGEX) {
if let Some(hostname) = self.hostname.as_ref() {
utils::tokenize_to(hostname, &mut tokens);
utils::tokenize_to(hostname, tokens_buffer);
}
} else if let Some(hostname) = self.hostname.as_ref() {
// Find last dot to tokenize the prefix
let last_dot_pos = hostname.rfind('.');
if let Some(last_dot_pos) = last_dot_pos {
utils::tokenize_to(&hostname[..last_dot_pos], &mut tokens);
utils::tokenize_to(&hostname[..last_dot_pos], tokens_buffer);
}
}

if tokens.is_empty() && self.mask.contains(NetworkFilterMask::IS_REMOVEPARAM) {
if tokens_buffer.is_empty() && self.mask.contains(NetworkFilterMask::IS_REMOVEPARAM) {
if let Some(removeparam) = &self.modifier_option {
if VALID_PARAM.is_match(removeparam) {
utils::tokenize_to(&removeparam.to_ascii_lowercase(), &mut tokens);
utils::tokenize_to(&removeparam.to_ascii_lowercase(), tokens_buffer);
}
}
}

// If we got no tokens for the filter/hostname part, then we will dispatch
// this filter in multiple buckets based on the domains option.
if tokens.is_empty() && self.opt_domains.is_some() && self.opt_not_domains.is_none() {
if tokens_buffer.is_empty() && self.opt_domains.is_some() && self.opt_not_domains.is_none()
{
if let Some(opt_domains) = self.opt_domains.as_ref() {
if !opt_domains.is_empty() {
return FilterTokens::OptDomains(opt_domains.clone());
return FilterTokens::OptDomains(opt_domains);
}
}
FilterTokens::Empty
} else {
// Add optional token for protocol
if self.for_http() && !self.for_https() {
tokens.push(utils::fast_hash("http"));
tokens_buffer.push(utils::fast_hash("http"));
} else if self.for_https() && !self.for_http() {
tokens.push(utils::fast_hash("https"));
tokens_buffer.push(utils::fast_hash("https"));
}

// Remake a vector to drop extra capacity.
let mut t = Vec::with_capacity(tokens.len());
t.extend(tokens);
FilterTokens::Other(t)
FilterTokens::Other(tokens_buffer.as_slice())
}
}

#[cfg(test)]
pub(crate) fn matches_test(&self, request: &request::Request) -> bool {
let filter_set = crate::FilterSet::new_with_rules(vec![self.clone()], vec![], true);
let engine = crate::Engine::from_filter_set(filter_set, true);

if self.is_exception() {
engine.check_network_request_exceptions(request)
} else {
engine.check_network_request(request).matched
}
}
}
Expand All @@ -986,35 +986,8 @@ impl fmt::Display for NetworkFilter {
}
}

pub trait NetworkMatchable {
pub(crate) trait NetworkMatchable {
fn matches(&self, request: &request::Request, regex_manager: &mut RegexManager) -> bool;

#[cfg(test)]
fn matches_test(&self, request: &request::Request) -> bool;
}

impl NetworkMatchable for NetworkFilter {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can also make NetworkMatchable into a pub(crate) trait since it's only implemented for FlatNetworkFilter now

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. I also removed unused matches_test.
Maybe it makes sense to completely remove trait NetworkMatchable, but since it non-public, we can do it later.

fn matches(&self, request: &request::Request, regex_manager: &mut RegexManager) -> bool {
use crate::filters::network_matchers::{
check_excluded_domains, check_included_domains, check_options, check_pattern,
};
check_options(self.mask, request)
&& check_included_domains(self.opt_domains.as_deref(), request)
&& check_excluded_domains(self.opt_not_domains.as_deref(), request)
&& check_pattern(
self.mask,
self.filter.iter(),
self.hostname.as_deref(),
(self as *const NetworkFilter) as u64,
request,
regex_manager,
)
}

#[cfg(test)]
fn matches_test(&self, request: &request::Request) -> bool {
self.matches(request, &mut RegexManager::default())
}
}

// ---------------------------------------------------------------------------
Expand Down
Loading