Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimisations and `document` request and rule handling fixes #35

Merged
merged 10 commits into from Jun 28, 2019

Large diffs are not rendered by default.

@@ -35,6 +35,8 @@ seahash = "3"
twoway = "0.2"
base64 = "0.10"
rmp-serde = "0.13.7"
object-pool = "0.3"
hashbrown = { version = "0.3", features = ["serde"], default-features = false }

# [target.'cfg(any(unix, windows))'.dependencies]
# rayon = "1.0"
@@ -64,5 +66,7 @@ name = "bench_rules"
harness = false

[features]

default = ["full-regex-handling"]
full-domain-matching = [] # feature has no explicit dependencies
metrics = []
full-regex-handling = []
@@ -13,7 +13,7 @@ use adblock::url_parser::UrlParser;
use adblock::engine::Engine;

#[allow(non_snake_case)]
#[derive(Serialize, Deserialize)]
#[derive(Serialize, Deserialize, Clone)]
struct TestRequest {
frameUrl: String,
url: String,
@@ -39,22 +39,21 @@ fn get_blocker(rules: &Vec<String>) -> Blocker {
Blocker::new(network_filters, &blocker_options)
}

fn bench_rule_matching(blocker: &Blocker, requests: &Vec<TestRequest>) -> (u32, u32, u32) {
fn bench_rule_matching(engine: &Engine, requests: &Vec<TestRequest>) -> (u32, u32) {
let mut matches = 0;
let mut passes = 0;
let mut errors = 0;
requests
.iter()
.for_each(|r| {
let req: Result<Request, _> = Request::from_urls(&r.url, &r.frameUrl, &r.cpt);
match req.map(|parsed| blocker.check(&parsed)).as_ref() {
Ok(check) if check.matched => matches += 1,
Ok(_) => passes += 1,
Err(_) => errors += 1
let res = engine.check_network_urls(&r.url, &r.frameUrl, &r.cpt);
if res.matched {
matches += 1;
} else {
passes += 1;
}
});
// println!("Got {} matches, {} passes, {} errors", matches, passes, errors);
(matches, passes, errors)
(matches, passes)
}

fn bench_matching_only(blocker: &Blocker, requests: &Vec<Request>) -> (u32, u32) {
@@ -70,7 +69,7 @@ fn bench_matching_only(blocker: &Blocker, requests: &Vec<Request>) -> (u32, u32)
passes += 1;
}
});
println!("Got {} matches, {} passes", matches, passes);
// println!("Got {} matches, {} passes", matches, passes);
(matches, passes)
}

@@ -87,72 +86,47 @@ fn bench_rule_matching_browserlike(blocker: &Engine, requests: &Vec<(String, Str
passes += 1;
}
});
println!("Got {} matches, {} passes", matches, passes);
// println!("Got {} matches, {} passes", matches, passes);
(matches, passes)
}

fn rule_match(c: &mut Criterion) {

let rules = rules_from_lists(&vec![
String::from("data/easylist.to/easylist/easylist.txt"),
]);
let requests = load_requests();
let elep_req = requests.clone();
let el_req = requests.clone();
let slim_req = requests.clone();
let requests_len = requests.len() as u32;
c.bench(
"rule-match",
Benchmark::new(
"el",
move |b| {
let blocker = get_blocker(&rules);
b.iter(|| bench_rule_matching(&blocker, &requests))
},
).throughput(Throughput::Elements(requests_len))
.sample_size(10)
);
}

fn rule_match_elep(c: &mut Criterion) {

let rules = rules_from_lists(&vec![
String::from("data/easylist.to/easylist/easylist.txt"),
String::from("data/easylist.to/easylist/easyprivacy.txt"),
]);
let requests = load_requests();
let requests_len = requests.len() as u32;
c.bench(
"rule-match",
Benchmark::new(
"el+ep",
move |b| {
let blocker = get_blocker(&rules);
b.iter(|| bench_rule_matching(&blocker, &requests))
},
).throughput(Throughput::Elements(requests_len))
.sample_size(10)
);
}

fn rule_match_slim(c: &mut Criterion) {
let rules = rules_from_lists(&vec![
String::from("data/slim-list.txt"),
]);
let requests = load_requests();
let requests_len = requests.len() as u32;

c.bench(
"rule-match",
Benchmark::new(
"slim",
move |b| {
let blocker = get_blocker(&rules);
b.iter(|| bench_rule_matching(&blocker, &requests))
},
).throughput(Throughput::Elements(requests_len))
.sample_size(10)
Benchmark::new("el+ep", move |b| {
let rules = rules_from_lists(&vec![
"data/easylist.to/easylist/easylist.txt".to_owned(),
"data/easylist.to/easylist/easyprivacy.txt".to_owned()
]);
let engine = Engine::from_rules(&rules);
b.iter(|| bench_rule_matching(&engine, &elep_req))
},)
.with_function("easylist", move |b| {
let rules = rules_from_lists(&vec![
"data/easylist.to/easylist/easylist.txt".to_owned(),
]);
let engine = Engine::from_rules(&rules);
b.iter(|| bench_rule_matching(&engine, &el_req))
},)
.with_function("slimlist", move |b| {
let rules = rules_from_lists(&vec![
"data/slim-list.txt".to_owned()
]);
let engine = Engine::from_rules(&rules);
b.iter(|| bench_rule_matching(&engine, &slim_req))
},)
.throughput(Throughput::Elements(requests_len))
.sample_size(20)
);
}

fn rule_match_only_el(c: &mut Criterion) {
fn rule_match_parsed_el(c: &mut Criterion) {

let rules = rules_from_lists(&vec![
String::from("data/easylist.to/easylist/easylist.txt"),
@@ -164,7 +138,7 @@ fn rule_match_only_el(c: &mut Criterion) {
c.bench(
"rule-match-parsed",
Benchmark::new(
"el",
"easylist",
move |b| {
b.iter(|| bench_matching_only(&blocker, &requests_parsed))
},
@@ -173,7 +147,7 @@ fn rule_match_only_el(c: &mut Criterion) {
);
}

fn rule_match_slimlist_comparable(c: &mut Criterion) {
fn rule_match_parsed_elep_slimlist(c: &mut Criterion) {

let full_rules = rules_from_lists(&vec![
String::from("data/easylist.to/easylist/easylist.txt"),
@@ -302,16 +276,12 @@ fn deserialization(c: &mut Criterion) {
);
}

fn rule_match_browserlike_elep(c: &mut Criterion) {

let rules = rules_from_lists(&vec![
String::from("data/easylist.to/easylist/easylist.txt"),
String::from("data/easylist.to/easylist/easyprivacy.txt"),
]);
fn rule_match_browserlike_comparable(c: &mut Criterion) {
let requests = load_requests();
let requests_len = requests.len() as u32;

let requests_parsed: Vec<(String, String, String, String, Option<bool>)> = requests.iter().map(|r| {
fn requests_parsed(requests: &[TestRequest]) -> Vec<(String, String, String, String, Option<bool>)> {
requests.iter().map(|r| {
let url_norm = r.url.to_ascii_lowercase();
let source_url_norm = r.frameUrl.to_ascii_lowercase();

@@ -343,32 +313,57 @@ fn rule_match_browserlike_elep(c: &mut Criterion) {
}
})
.filter_map(Result::ok)
.collect();
.collect::<Vec<_>>()
}

let elep_req = requests_parsed(&requests);
let el_req = elep_req.clone();
let slim = elep_req.clone();

c.bench(
"rule-match-browserlike",
Benchmark::new(
"el+ep",
move |b| {
let blocker = get_blocker(&rules);
let engine = Engine {
blocker
};
b.iter(|| bench_rule_matching_browserlike(&engine, &requests_parsed))
},
).throughput(Throughput::Elements(requests_len))
.sample_size(10)
Benchmark::new("el+ep", move |b| {
let rules = rules_from_lists(&vec![
"data/easylist.to/easylist/easylist.txt".to_owned(),
"data/easylist.to/easylist/easyprivacy.txt".to_owned()
]);
let blocker = get_blocker(&rules);
let engine = Engine {
blocker
};
b.iter(|| bench_rule_matching_browserlike(&engine, &elep_req))
},)
.with_function("el", move |b| {
let rules = rules_from_lists(&vec![
"data/easylist.to/easylist/easylist.txt".to_owned(),
]);
let blocker = get_blocker(&rules);
let engine = Engine {
blocker
};
b.iter(|| bench_rule_matching_browserlike(&engine, &el_req))
},)
.with_function("slimlist", move |b| {
let rules = rules_from_lists(&vec![
"data/slim-list.txt".to_owned()
]);
let blocker = get_blocker(&rules);
let engine = Engine {
blocker
};
b.iter(|| bench_rule_matching_browserlike(&engine, &slim))
},)
.throughput(Throughput::Elements(requests_len))
.sample_size(20)
);
}

criterion_group!(
benches,
rule_match_elep,
rule_match_only_el,
rule_match_slimlist_comparable,
rule_match,
rule_match_slim,
rule_match_browserlike_elep,
rule_match_parsed_el,
rule_match_parsed_elep_slimlist,
rule_match_browserlike_comparable,
serialization,
deserialization
);
@@ -10,21 +10,19 @@ use adblock::url_parser::UrlParser;
use adblock::request::Request;

#[allow(non_snake_case)]
#[derive(Serialize, Deserialize)]
#[derive(Serialize, Deserialize, Clone)]
struct TestRequest {
frameUrl: String,
url: String,
cpt: String,
}

fn load_requests() -> Vec<TestRequest> {
let requests_str = adblock::utils::read_file_lines("data/requests.json");
let reqs: Vec<TestRequest> = requests_str
adblock::utils::read_file_lines("data/requests.json")
.into_iter()
.map(|r| serde_json::from_str(&r))
.filter_map(Result::ok)
.collect();
reqs
.collect::<Vec<_>>()
}

fn request_parsing_throughput(c: &mut Criterion) {
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.