Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implements uBO style polyfills for requests redirects #29

Merged
merged 9 commits into from Jun 18, 2019
Next

Implements uBO style polyfills for requests matched on rules with `re…

…direct` option

Closes #27
  • Loading branch information
AndriusA committed Jun 5, 2019
commit f1cae2f37ed467b1468e2ae8e21f71243208d3f5

Some generated files are not rendered by default. Learn more.

@@ -33,6 +33,7 @@ bincode = "1.1"
flate2 = "1"
seahash = "3"
twoway = "0.2"
base64 = "0.10"

# [target.'cfg(any(unix, windows))'.dependencies]
# rayon = "1.0"
@@ -8,6 +8,8 @@ use crate::filters::network::{NetworkFilter, NetworkMatchable};
use crate::request::Request;
use crate::utils::{fast_hash, Hash};
use crate::optimizer;
use crate::resources::{Resources};
use base64;

pub struct BlockerOptions {
pub debug: bool,
@@ -16,6 +18,7 @@ pub struct BlockerOptions {
pub load_network_filters: bool,
}

#[derive(Debug)]
pub struct BlockerResult {
pub matched: bool,
pub explicit_cancel: bool,
@@ -49,6 +52,8 @@ pub struct Blocker {
enable_optimizations: bool,
load_cosmetic_filters: bool,
load_network_filters: bool,

resources: Option<Resources>
}

impl Blocker {
@@ -95,11 +100,28 @@ impl Blocker {
}
});

// If there is a match
// only match redirects if we have them set up
let redirect: Option<String> = filter.as_ref().and_then(|f| {
if f.is_redirect() {
// TODO: build up redirect URL from matching resource
unimplemented!()
// If there is a match
if let Some(blocker_redirects) = self.resources.as_ref() {
// Filter redirect option is set
if let Some(redirect) = f.redirect.as_ref() {
// And we have a matching redirect resource
if let Some(resource) = blocker_redirects.get_resource(redirect) {
let mut data_url: String;
if resource.content_type.contains(';') {
data_url = format!("data:{},{}", resource.content_type, resource.data);
} else {
data_url = format!("data:{};base64,{}", resource.content_type, base64::encode(&resource.data));
}
Some(data_url.trim().to_owned())
} else {
// TOOD: handle error - throw?

This comment has been minimized.

Copy link
@pes10k

pes10k Jun 6, 2019

Collaborator

Could this be logged somehow, even if its just mirroring the "didn't understand filter: X" stuff the current lib does? Would be a nice, noisy reminder if there is some new filter format we don't support, something like that

None
}
} else {
None
}
} else {
None
}
@@ -195,6 +217,8 @@ impl Blocker {
enable_optimizations: options.enable_optimizations,
load_cosmetic_filters: options.load_cosmetic_filters,
load_network_filters: options.load_network_filters,

resources: None
}
}

@@ -232,6 +256,12 @@ impl Blocker {
pub fn tags_enabled(&self) -> Vec<String> {
self.tags_enabled.iter().cloned().collect()
}

pub fn with_resources<'a>(&'a mut self, resources: &'a str) -> &'a mut Blocker {
let resources = Resources::parse(resources);
self.resources = Some(resources);
self
}
}

#[derive(Serialize, Deserialize)]
@@ -60,8 +60,20 @@ impl Engine {
}

pub fn check_network_urls(&self, url: &str, source_url: &str, request_type: &str) -> BlockerResult {
let request = Request::from_urls(&url, &source_url, &request_type).unwrap();
self.blocker.check(&request)
Request::from_urls(&url, &source_url, &request_type)
.map(|request| {
self.blocker.check(&request)
})
.unwrap_or_else(|_e| {
BlockerResult {
matched: false,
explicit_cancel: false,
redirect: None,
exception: None,
filter: None,
}
})

}

pub fn check_network_urls_with_hostnames(&self, url: &str, hostname: &str, source_hostname: &str, request_type: &str, third_party_request: Option<bool>) -> BlockerResult {
@@ -81,6 +93,11 @@ impl Engine {
pub fn tags_disable<'a>(&'a mut self, tags: &[&str]) -> () {
self.blocker.tags_disable(tags);
}

pub fn with_resources<'a>(&'a mut self, resources: &'a str) -> &'a mut Engine {
self.blocker.with_resources(resources);
self
}
}


@@ -14,6 +14,7 @@ extern crate bincode; // binary serialization/deserialization
extern crate flate2;
extern crate regex;
extern crate idna; // utf domain handling
extern crate base64;

#[cfg(test)]
extern crate csv; // csv handling library used for processing test data
@@ -28,3 +29,4 @@ pub mod optimizer;
pub mod url_parser;
pub mod engine;
pub mod filter_lists;
pub mod resources;
@@ -0,0 +1,195 @@
use std::collections::HashMap;
use regex::Regex;
use serde::{Deserialize, Serialize};

#[derive(Serialize, Deserialize, Debug, PartialEq)]
pub struct Resource {
pub content_type: String,
pub data: String
}

#[derive(Serialize, Deserialize, Debug, PartialEq)]
pub struct Resources {
pub resources: HashMap<String, Resource>
}

impl Resources {
pub fn parse(data: &str) -> Resources {
let chunks = data.split("\n\n");
let mut type_to_resource: HashMap<String, HashMap<String, String>> = HashMap::new();

lazy_static! {
static ref COMMENTS_RE: Regex = Regex::new(r"(?m:^\s*#.*$)").unwrap();
}

for chunk in chunks {
let resource: String = COMMENTS_RE.replace_all(&chunk, "").to_string();
let resource: String = resource.trim().to_owned();
if resource.is_empty() {
continue;
}
let first_new_line = resource.find("\n");
let first_new_line_pos;
// No new line, but appears to encode mime type and teh content is not base64, so can be empty
if first_new_line.is_none() && resource.contains(" ") && resource.contains("/") && !resource.contains(";base64") {
first_new_line_pos = resource.len();
} else if first_new_line.is_none() {
continue;
} else {
first_new_line_pos = first_new_line.unwrap();
}
let (first_line, body) = resource.split_at(first_new_line_pos);
let mut first_line_items = first_line.split_ascii_whitespace();
let (name, rtype) = (
first_line_items.next(),
first_line_items.next()
);
if name.is_none() || rtype.is_none() {
continue;
}
let rtype = rtype.unwrap().to_owned();
let name = name.unwrap().to_owned();
let body = body.trim().to_owned();

let ttr = type_to_resource.entry(rtype).or_insert(HashMap::new());
ttr.insert(name, body);
}

// Create a mapping from resource name to { contentType, data }
// used for request redirection.
let mut resources: HashMap<String, Resource> = HashMap::new();
for (content_type, type_resources) in type_to_resource {
for (name, resource) in type_resources {
resources.insert(name, Resource {
content_type: content_type.to_owned(),
data: resource
});
}
}

Resources {
resources,
}
}

pub fn get_resource(&self, name: &str) -> Option<&Resource> {
self.resources.get(name)
}
}

#[cfg(test)]
mod tests {

use super::*;
use crate::utils;

#[test]
fn parses_empty_resources() {
let resources = Resources::parse("");
assert!(resources.resources.is_empty());
}

#[test]
fn parses_one_resource() {
let resources_str = "foo application/javascript\ncontent";
let resources = Resources::parse(resources_str);
assert!(resources.resources.is_empty() == false);
let mut expected = HashMap::new();
expected.insert("foo".to_owned(), Resource {
content_type: "application/javascript".to_owned(),
data: "content".to_owned()
});
assert_eq!(resources.resources, expected);
}

#[test]
fn parses_two_resources() {
let resources_str = r###"
foo application/javascript
content1
pixel.png image/png;base64
content2"###;
let resources = Resources::parse(resources_str);
assert!(resources.resources.is_empty() == false);
let mut expected = HashMap::new();
expected.insert("foo".to_owned(), Resource {
content_type: "application/javascript".to_owned(),
data: "content1".to_owned()
});
expected.insert("pixel.png".to_owned(), Resource {
content_type: "image/png;base64".to_owned(),
data: "content2".to_owned()
});
assert_eq!(resources.resources, expected);
}

#[test]
fn robust_to_weird_format() {
let resources_str = r###"
# Comment
# Comment 2
foo application/javascript
content1
# Comment 3
# Type missing
pixel.png
content
# Content missing
pixel.png image/png;base64
# This one is good!
pixel.png image/png;base64
content2
"###;

let resources = Resources::parse(resources_str);
assert!(resources.resources.is_empty() == false);
let mut expected = HashMap::new();
expected.insert("foo".to_owned(), Resource {
content_type: "application/javascript".to_owned(),
data: "content1".to_owned()
});
expected.insert("pixel.png".to_owned(), Resource {
content_type: "image/png;base64".to_owned(),
data: "content2".to_owned()
});
assert_eq!(resources.resources, expected);
}

#[test]
fn parses_noop_resources() {
let resources_str = r###"
nooptext text/plain
noopcss text/css
"###;
let resources = Resources::parse(resources_str);
assert!(resources.resources.is_empty() == false);
let mut expected = HashMap::new();
expected.insert("nooptext".to_owned(), Resource {
content_type: "text/plain".to_owned(),
data: "".to_owned()
});
expected.insert("noopcss".to_owned(), Resource {
content_type: "text/css".to_owned(),
data: "".to_owned()
});
assert_eq!(resources.resources, expected);
}

#[test]
fn handles_ubo_resources() {
let resources_lines = utils::read_file_lines("data/uBlockOrigin/resources.txt");
let resources_str = resources_lines.join("\n");
assert!(!resources_str.is_empty());
let resources = Resources::parse(&resources_str);
assert!(resources.resources.is_empty() == false);
assert_eq!(resources.resources.len(), 110);
}
}
@@ -177,7 +177,7 @@ pub fn has_unicode(pattern: &str) -> bool {

const EXPECTED_RULES: usize = 75000;
#[cfg(not(target_arch = "wasm32"))]
pub fn read_rules(filename: &str) -> Vec<String> {
pub fn read_file_lines(filename: &str) -> Vec<String> {
let f = File::open(filename).unwrap_or_else(|_| panic!("File {} not found", filename));
let reader = BufReader::new(f);
let mut rules: Vec<String> = Vec::with_capacity(EXPECTED_RULES);
@@ -192,7 +192,7 @@ pub fn read_rules(filename: &str) -> Vec<String> {
pub fn rules_from_lists(lists: &[String]) -> Vec<String> {
let mut rules: Vec<String> = Vec::with_capacity(EXPECTED_RULES);
for filename in lists {
let mut list_rules = read_rules(filename);
let mut list_rules = read_file_lines(filename);
rules.append(&mut list_rules);
}
rules.shrink_to_fit();
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.