Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimisations and `document` request and rule handling fixes #35

Merged
merged 10 commits into from Jun 28, 2019

removes some inlining annotations again as benchmarks don't indicate …

…any improvement, removes obsolete util functions
  • Loading branch information
AndriusA committed Jun 20, 2019
commit 7e03530dac0906279279ad863392eaab0b31633b
@@ -122,7 +122,7 @@ fn rule_match(c: &mut Criterion) {
b.iter(|| bench_rule_matching(&engine, &slim_req))
},)
.throughput(Throughput::Elements(requests_len))
.sample_size(10)
.sample_size(20)
);
}

@@ -354,7 +354,7 @@ fn rule_match_browserlike_comparable(c: &mut Criterion) {
b.iter(|| bench_rule_matching_browserlike(&engine, &slim))
},)
.throughput(Throughput::Elements(requests_len))
.sample_size(10)
.sample_size(20)
);
}

@@ -18,7 +18,7 @@ struct TestRequest {
}

fn load_requests() -> Vec<TestRequest> {
adblock::utils::read_rules("data/requests.json")
adblock::utils::read_file_lines("data/requests.json")
.into_iter()
.map(|r| serde_json::from_str(&r))
.filter_map(Result::ok)
@@ -1,6 +1,6 @@
use hashbrown::HashMap;
use std::sync::Arc;
use serde::{Deserialize, Serialize, Deserializer};
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use std::iter::FromIterator;
use object_pool::Pool;
@@ -1012,7 +1012,6 @@ fn is_anchored_by_hostname(filter_hostname: &str, hostname: &str, wildcard_filte
}
}

#[inline]
fn get_url_after_hostname<'a>(url: &'a str, hostname: &str) -> &'a str {
let start = twoway::find_str(url, hostname).unwrap_or_else(|| url.len());
&url[start + hostname.len()..]
@@ -1023,7 +1022,6 @@ fn get_url_after_hostname<'a>(url: &'a str, hostname: &str) -> &'a str {
// ---------------------------------------------------------------------------

// pattern$fuzzy
#[inline]
fn check_pattern_fuzzy_filter(filter: &NetworkFilter, request: &request::Request) -> bool {
filter
.fuzzy_signature
@@ -1049,7 +1047,6 @@ fn check_pattern_fuzzy_filter(filter: &NetworkFilter, request: &request::Request
}

// pattern
#[inline]
fn check_pattern_plain_filter_filter(filter: &NetworkFilter, request: &request::Request) -> bool {
match &filter.filter {
FilterPart::Empty => true,
@@ -1066,7 +1063,6 @@ fn check_pattern_plain_filter_filter(filter: &NetworkFilter, request: &request::
}

// pattern|
#[inline]
fn check_pattern_right_anchor_filter(filter: &NetworkFilter, request: &request::Request) -> bool {
match &filter.filter {
FilterPart::Empty => true,
@@ -1083,7 +1079,6 @@ fn check_pattern_right_anchor_filter(filter: &NetworkFilter, request: &request::
}

// |pattern
#[inline]
fn check_pattern_left_anchor_filter(filter: &NetworkFilter, request: &request::Request) -> bool {
match &filter.filter {
FilterPart::Empty => true,
@@ -1100,7 +1095,6 @@ fn check_pattern_left_anchor_filter(filter: &NetworkFilter, request: &request::R
}

// |pattern|
#[inline]
fn check_pattern_left_right_anchor_filter(
filter: &NetworkFilter,
request: &request::Request,
@@ -1120,7 +1114,6 @@ fn check_pattern_left_right_anchor_filter(
}

// pattern*^
#[inline]
fn check_pattern_regex_filter_at(
filter: &NetworkFilter,
request: &request::Request,
@@ -1135,7 +1128,6 @@ fn check_pattern_regex_filter(filter: &NetworkFilter, request: &request::Request
}

// ||pattern*^
#[inline]
fn check_pattern_hostname_anchor_regex_filter(
filter: &NetworkFilter,
request: &request::Request,
@@ -1158,7 +1150,6 @@ fn check_pattern_hostname_anchor_regex_filter(
}

// ||pattern|
#[inline]
fn check_pattern_hostname_right_anchor_filter(
filter: &NetworkFilter,
request: &request::Request,
@@ -1187,7 +1178,6 @@ fn check_pattern_hostname_right_anchor_filter(
}

// |||pattern|
#[inline]
fn check_pattern_hostname_left_right_anchor_filter(
filter: &NetworkFilter,
request: &request::Request,
@@ -1227,7 +1217,6 @@ fn check_pattern_hostname_left_right_anchor_filter(

// ||pattern + left-anchor => This means that a plain pattern needs to appear
// exactly after the hostname, with nothing in between.
#[inline]
fn check_pattern_hostname_left_anchor_filter(
filter: &NetworkFilter,
request: &request::Request,
@@ -1264,7 +1253,6 @@ fn check_pattern_hostname_left_anchor_filter(
}

// ||pattern
#[inline]
fn check_pattern_hostname_anchor_filter(
filter: &NetworkFilter,
request: &request::Request,
@@ -1300,7 +1288,6 @@ fn check_pattern_hostname_anchor_filter(
}

// ||pattern$fuzzy
#[inline]
fn check_pattern_hostname_anchor_fuzzy_filter(
filter: &NetworkFilter,
request: &request::Request,
@@ -1352,7 +1339,6 @@ fn check_pattern(filter: &NetworkFilter, request: &request::Request) -> bool {
}
}

#[inline]
pub fn check_cpt_allowed(filter: &NetworkFilter, cpt: &request::RequestType) -> bool {
match NetworkFilterMask::from(cpt) {
NetworkFilterMask::UNMATCHED => filter.cpt_any(),
@@ -12,12 +12,12 @@ pub fn fast_hash(input: &str) -> Hash {
hash(input.as_bytes()) as Hash
}


#[inline]
fn is_allowed_filter(ch: char) -> bool {
ch.is_alphanumeric() || ch == '%'
}


#[inline]
fn is_allowed_hostname(ch: char) -> bool {
is_allowed_filter(ch) || ch == '_' /* '_' */ || ch == '-' /* '-' */
}
@@ -45,7 +45,7 @@ fn fast_tokenizer_no_regex(
if is_allowed_code(c) {
if !inside {
inside = true;
start = i
start = i;
}
} else if inside {
inside = false;
@@ -57,11 +57,10 @@ fn fast_tokenizer_no_regex(
{
let hash = fast_hash(&pattern[start..i]);
tokens_buffer.push(hash);

}
preceding_ch = Some(c)
preceding_ch = Some(c);
} else {
preceding_ch = Some(c)
preceding_ch = Some(c);
}

}
@@ -152,30 +151,10 @@ pub fn create_combined_fuzzy_signature(patterns: &[String]) -> Vec<Hash> {
tokens
}

pub fn bin_search<T: Ord>(arr: &[T], elt: &T) -> Option<usize> {
arr.binary_search(elt).ok()
}

pub fn bin_lookup<T: Ord>(arr: &[T], elt: T) -> bool {
arr.binary_search(&elt).is_ok()
}

pub fn bin_lookup_optional<T: Ord>(arr: &[T], elt: Option<T>) -> bool {
elt.map(|i| {
arr.binary_search(&i).is_ok()
}).unwrap_or(false)
}

pub fn has_unicode(pattern: &str) -> bool {
let chars = pattern.chars();
for c in chars {
if !c.is_ascii() {
return true
}
}
false
}

const EXPECTED_RULES: usize = 75000;
#[cfg(not(target_arch = "wasm32"))]
pub fn read_file_lines(filename: &str) -> Vec<String> {
@@ -329,44 +308,4 @@ mod tests {
assert_eq!(bin_lookup(&vec![1, 2, 3, 4, 42], 5), false);
}

#[test]
fn bin_search_works() {
// empty array
assert_eq!(bin_search(&Vec::new(), &42), None);
// array of length 1
assert_eq!(bin_search(&vec![1], &42), None);
assert_eq!(bin_search(&vec![42], &42), Some(0));
// array of length 2
assert_eq!(bin_search(&vec![0, 1], &42), None);
assert_eq!(bin_search(&vec![1, 42], &42), Some(1));
assert_eq!(bin_search(&vec![42, 45], &42), Some(0));
assert_ne!(bin_search(&vec![42, 42], &42), None);

// bigger arrays
let data : Vec<Hash> = (1..=1000).map(|x| x*x).collect();
assert_eq!(bin_search(&data, &42), None);
assert_eq!(bin_search(&data, &1), Some(0));
assert_eq!(bin_search(&data, &4), Some(1));
assert_eq!(bin_search(&data, &(1000*1000)), Some(1000-1));
}

#[test]
fn has_unicode_works() {
let ascii: String = (b'!'..=b'~') // Start as u8
.map(|c| c as char)
.collect();

assert_eq!(has_unicode(&ascii), false);
assert_eq!(has_unicode("。◕ ∀ ◕。)"), true);
assert_eq!(has_unicode("`ィ(´∀`∩"), true);
assert_eq!(has_unicode("__ロ(,_,*)"), true);
assert_eq!(has_unicode("・( ̄∀ ̄)・:*:"), true);
assert_eq!(has_unicode("゚・✿ヾ╲(。◕‿◕。)╱✿・゚"), true);
assert_eq!(has_unicode(",。・:*:・゜’( ☻ ω ☻ )。・:*:・゜’"), true);
assert_eq!(has_unicode("(╯°□°)╯︵ ┻━┻)"), true);
assert_eq!(has_unicode("(ノಥ益ಥ)ノ ┻━┻"), true);
assert_eq!(has_unicode("┬─┬ノ( º _ ºノ)"), true);
assert_eq!(has_unicode("( ͡° ͜ʖ ͡°)"), true);
assert_eq!(has_unicode("¯_(ツ)_/¯"), true);
}
}
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.