Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimisations and `document` request and rule handling fixes #35

Merged
merged 10 commits into from Jun 28, 2019

adds rule hit logging behind feature flag, some inlining optimisation…

…s, disables ineffective filter optimisation
  • Loading branch information
AndriusA committed Jun 14, 2019
commit f13103e1ade609ff72002e8c34f1c57112bf34d3
@@ -66,3 +66,4 @@ harness = false
[features]

full-domain-matching = [] # feature has no explicit dependencies
metrics = []
@@ -27,6 +27,18 @@ pub struct BlockerResult {
pub filter: Option<String>,
}

impl Default for BlockerResult {
fn default() -> BlockerResult {
BlockerResult {
matched: false,
explicit_cancel: false,
redirect: None,
exception: None,
filter: None
}
}
}

#[derive(Debug, PartialEq)]
pub enum BlockerError {
SerializationError,
@@ -58,6 +70,9 @@ pub struct Blocker {
tags_enabled: HashSet<String>,
tagged_filters_all: Vec<NetworkFilter>,

#[serde(skip_serializing, skip_deserializing)]
hot_filters: NetworkFilterList,

debug: bool,
enable_optimizations: bool,
load_cosmetic_filters: bool,
@@ -74,31 +89,42 @@ impl Blocker {
*/
pub fn check(&self, request: &Request) -> BlockerResult {
if !self.load_network_filters || !request.is_supported {
return BlockerResult {
matched: false,
explicit_cancel: false,
redirect: None,
exception: None,
filter: None,
};
return BlockerResult::default();
}

// Check the filters in the following order:
// 1. $important (not subject to exceptions)
// 2. redirection ($redirect=resource)
// 3. normal filters
// 4. exceptions
#[cfg(feature = "metrics")]
print!("importants\t");

let filter = self
.importants
// .filters
.check(request)
.or_else(|| self.filters_tagged.check(request))
.or_else(|| self.redirects.check(request))
.or_else(|| self.filters.check(request));
.or_else(|| {
#[cfg(feature = "metrics")]
print!("tagged\t");
self.filters_tagged.check(request)
})
.or_else(|| {
#[cfg(feature = "metrics")]
print!("redirects\t");
self.redirects.check(request)
})
.or_else(|| {
#[cfg(feature = "metrics")]
print!("filters\t");
self.filters.check(request)
});

let exception = filter.as_ref().and_then(|f| {
// Set `bug` of request
// TODO - avoid mutability
if !f.is_important() {
#[cfg(feature = "metrics")]
print!("exceptions\t");
if f.has_bug() {
let mut request_bug = request.clone();
request_bug.bug = f.bug;
@@ -110,6 +136,9 @@ impl Blocker {
None
}
});

#[cfg(feature = "metrics")]
println!("");

// only match redirects if we have them set up
let redirect: Option<String> = filter.as_ref().and_then(|f| {
@@ -221,6 +250,7 @@ impl Blocker {
// Tags special case for enabling/disabling them dynamically
tags_enabled: HashSet::new(),
tagged_filters_all,
hot_filters: NetworkFilterList::default(),
// Options
debug: options.debug,
enable_optimizations: options.enable_optimizations,
@@ -326,7 +356,7 @@ impl Blocker {
}
}

#[derive(Serialize, Deserialize)]
#[derive(Serialize, Deserialize, Default)]
struct NetworkFilterList {
filter_map: HashMap<Hash, Vec<Arc<NetworkFilter>>>,
// optimized: Option<bool>
@@ -462,28 +492,67 @@ impl NetworkFilterList {
}

pub fn check(&self, request: &Request) -> Option<&NetworkFilter> {
#[cfg(feature = "metrics")]
let mut filters_checked = 0;
#[cfg(feature = "metrics")]
let mut filter_buckets = 0;

#[cfg(not(feature = "metrics"))]
{
if self.filter_map.is_empty() {
return None;
}
}

if let Some(source_hostname_hashes) = request.source_hostname_hashes.as_ref() {
for token in source_hostname_hashes {
if let Some(filter_bucket) = self.filter_map.get(token) {
#[cfg(feature = "metrics")]
{
filter_buckets += 1;
}

for filter in filter_bucket {
#[cfg(feature = "metrics")]
{
filters_checked += 1;
}
if filter.matches(request) {
#[cfg(feature = "metrics")]
print!("true\t{}\t{}\tskipped\t{}\t{}\t", filter_buckets, filters_checked, filter_buckets, filters_checked);
return Some(filter);
}
}
}
}
}

#[cfg(feature = "metrics")]
print!("false\t{}\t{}\t", filter_buckets, filters_checked);

for token in request.get_tokens() {
if let Some(filter_bucket) = self.filter_map.get(token) {
#[cfg(feature = "metrics")]
{
filter_buckets += 1;
}
for filter in filter_bucket {
#[cfg(feature = "metrics")]
{
filters_checked += 1;
}
if filter.matches(request) {
#[cfg(feature = "metrics")]
print!("true\t{}\t{}\t", filter_buckets, filters_checked);
return Some(filter);
}
}
}
}

#[cfg(feature = "metrics")]
print!("false\t{}\t{}\t", filter_buckets, filters_checked);

None
}
}
@@ -1004,7 +1004,7 @@ fn is_anchored_by_hostname(filter_hostname: &str, hostname: &str, wildcard_filte
}
}


#[inline]
fn get_url_after_hostname<'a>(url: &'a str, hostname: &str) -> &'a str {
let start = twoway::find_str(url, hostname).unwrap_or_else(|| url.len());
&url[start + hostname.len()..]
@@ -1015,6 +1015,7 @@ fn get_url_after_hostname<'a>(url: &'a str, hostname: &str) -> &'a str {
// ---------------------------------------------------------------------------

// pattern$fuzzy
#[inline]
fn check_pattern_fuzzy_filter(filter: &NetworkFilter, request: &request::Request) -> bool {
filter
.fuzzy_signature
@@ -1040,6 +1041,7 @@ fn check_pattern_fuzzy_filter(filter: &NetworkFilter, request: &request::Request
}

// pattern
#[inline]
fn check_pattern_plain_filter_filter(filter: &NetworkFilter, request: &request::Request) -> bool {
match &filter.filter {
FilterPart::Empty => true,
@@ -1056,6 +1058,7 @@ fn check_pattern_plain_filter_filter(filter: &NetworkFilter, request: &request::
}

// pattern|
#[inline]
fn check_pattern_right_anchor_filter(filter: &NetworkFilter, request: &request::Request) -> bool {
match &filter.filter {
FilterPart::Empty => true,
@@ -1072,6 +1075,7 @@ fn check_pattern_right_anchor_filter(filter: &NetworkFilter, request: &request::
}

// |pattern
#[inline]
fn check_pattern_left_anchor_filter(filter: &NetworkFilter, request: &request::Request) -> bool {
match &filter.filter {
FilterPart::Empty => true,
@@ -1088,6 +1092,7 @@ fn check_pattern_left_anchor_filter(filter: &NetworkFilter, request: &request::R
}

// |pattern|
#[inline]
fn check_pattern_left_right_anchor_filter(
filter: &NetworkFilter,
request: &request::Request,
@@ -1107,6 +1112,7 @@ fn check_pattern_left_right_anchor_filter(
}

// pattern*^
#[inline]
fn check_pattern_regex_filter_at(
filter: &NetworkFilter,
request: &request::Request,
@@ -1121,6 +1127,7 @@ fn check_pattern_regex_filter(filter: &NetworkFilter, request: &request::Request
}

// ||pattern*^
#[inline]
fn check_pattern_hostname_anchor_regex_filter(
filter: &NetworkFilter,
request: &request::Request,
@@ -1143,6 +1150,7 @@ fn check_pattern_hostname_anchor_regex_filter(
}

// ||pattern|
#[inline]
fn check_pattern_hostname_right_anchor_filter(
filter: &NetworkFilter,
request: &request::Request,
@@ -1171,6 +1179,7 @@ fn check_pattern_hostname_right_anchor_filter(
}

// |||pattern|
#[inline]
fn check_pattern_hostname_left_right_anchor_filter(
filter: &NetworkFilter,
request: &request::Request,
@@ -1210,6 +1219,7 @@ fn check_pattern_hostname_left_right_anchor_filter(

// ||pattern + left-anchor => This means that a plain pattern needs to appear
// exactly after the hostname, with nothing in between.
#[inline]
fn check_pattern_hostname_left_anchor_filter(
filter: &NetworkFilter,
request: &request::Request,
@@ -1246,6 +1256,7 @@ fn check_pattern_hostname_left_anchor_filter(
}

// ||pattern
#[inline]
fn check_pattern_hostname_anchor_filter(
filter: &NetworkFilter,
request: &request::Request,
@@ -1281,6 +1292,7 @@ fn check_pattern_hostname_anchor_filter(
}

// ||pattern$fuzzy
#[inline]
fn check_pattern_hostname_anchor_fuzzy_filter(
filter: &NetworkFilter,
request: &request::Request,
@@ -1332,6 +1344,7 @@ fn check_pattern(filter: &NetworkFilter, request: &request::Request) -> bool {
}
}

#[inline]
pub fn check_cpt_allowed(filter: &NetworkFilter, cpt: &request::RequestType) -> bool {
match NetworkFilterMask::from(cpt) {
NetworkFilterMask::UNMATCHED => filter.cpt_any(),
@@ -13,12 +13,14 @@ trait Optimization {
* Fusion a set of `filters` by applying optimizations sequentially.
*/
pub fn optimize(filters: Vec<NetworkFilter>) -> Vec<NetworkFilter> {
let simple_pattern_group = SimplePatternGroup {};
let union_domain_group = UnionDomainGroup {};
let mut optimized: Vec<NetworkFilter> = Vec::new();
let (mut fused, unfused) = apply_optimisation(&union_domain_group, filters);
optimized.append(&mut fused);
let (mut fused, mut unfused) = apply_optimisation(&simple_pattern_group, unfused);

// let union_domain_group = UnionDomainGroup {};
// let (mut fused, unfused) = apply_optimisation(&union_domain_group, filters);
// optimized.append(&mut fused);

let simple_pattern_group = SimplePatternGroup {};
let (mut fused, mut unfused) = apply_optimisation(&simple_pattern_group, filters);
optimized.append(&mut fused);

// Append whatever is still left unfused
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.