Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 64 additions & 31 deletions src/cosmetic_filter_cache_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use crate::cosmetic_filter_cache::ProceduralOrActionFilter;
use crate::cosmetic_filter_utils::SpecificFilterType;
use crate::cosmetic_filter_utils::{encode_script_with_permission, key_from_selector};
use crate::filters::cosmetic::{CosmeticFilter, CosmeticFilterMask, CosmeticFilterOperator};
use crate::filters::fb_builder::{EngineFlatBuilder, ShareableString};
use crate::filters::flatbuffer_generated::fb;
use crate::flatbuffers::containers::flat_map::FlatMapBuilder;
use crate::flatbuffers::containers::flat_multimap::FlatMultiMapBuilder;
Expand All @@ -27,18 +28,18 @@ use flatbuffers::WIPOffset;
/// See HostnameSpecificRules declaration for more details.
#[derive(Default)]
struct HostnameRule {
unhide: Vec<String>,
uninject_script: Vec<String>,
procedural_action: Vec<String>,
procedural_action_exception: Vec<String>,
unhide: Vec<ShareableString>,
uninject_script: Vec<ShareableString>,
procedural_action: Vec<ShareableString>,
procedural_action_exception: Vec<ShareableString>,
}

impl<'a, B: FlatBuilder<'a>> FlatSerialize<'a, B> for HostnameRule {
impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for HostnameRule {
type Output = WIPOffset<fb::HostnameSpecificRules<'a>>;

fn serialize(
value: Self,
builder: &mut B,
builder: &mut EngineFlatBuilder<'a>,
) -> flatbuffers::WIPOffset<fb::HostnameSpecificRules<'a>> {
let unhide = serialize_vec_opt(value.unhide, builder);
let uninject_script = serialize_vec_opt(value.uninject_script, builder);
Expand Down Expand Up @@ -69,29 +70,29 @@ pub(crate) struct CosmeticFilterCacheBuilder {
complex_class_rules: HashMapBuilder<String, StringVector>,
complex_id_rules: HashMapBuilder<String, StringVector>,

hostname_hide: FlatMultiMapBuilder<Hash, String>,
hostname_inject_script: FlatMultiMapBuilder<Hash, String>,
hostname_hide: FlatMultiMapBuilder<Hash, ShareableString>,
hostname_inject_script: FlatMultiMapBuilder<Hash, ShareableString>,

specific_rules: HashMap<Hash, HostnameRule>,
}

impl CosmeticFilterCacheBuilder {
pub fn from_rules(rules: Vec<CosmeticFilter>) -> Self {
pub fn from_rules(rules: Vec<CosmeticFilter>, builder: &mut EngineFlatBuilder) -> Self {
let mut self_ = Self::default();

for rule in rules {
self_.add_filter(rule)
self_.add_filter(rule, builder);
}

self_
}

pub fn add_filter(&mut self, rule: CosmeticFilter) {
pub fn add_filter(&mut self, rule: CosmeticFilter, builder: &mut EngineFlatBuilder) {
if rule.has_hostname_constraint() {
if let Some(generic_rule) = rule.hidden_generic_rule() {
self.add_generic_filter(generic_rule);
}
self.store_hostname_rule(rule);
self.store_hostname_rule(rule, builder);
} else {
self.add_generic_filter(rule);
}
Expand Down Expand Up @@ -139,7 +140,7 @@ impl CosmeticFilterCacheBuilder {
}
}

fn store_hostname_rule(&mut self, rule: CosmeticFilter) {
fn store_hostname_rule(&mut self, rule: CosmeticFilter, builder: &mut EngineFlatBuilder) {
use SpecificFilterType::*;

let unhide = rule.mask.contains(CosmeticFilterMask::UNHIDE);
Expand Down Expand Up @@ -171,45 +172,74 @@ impl CosmeticFilterCacheBuilder {
.chain(rule.hostnames.unwrap_or_default())
.chain(rule.entities.unwrap_or_default());

tokens_to_insert.for_each(|t| self.store_hostname_filter(&t, kind.clone()));
self.store_hostname_filter(tokens_to_insert, &kind, builder);

let negated = kind.negated();
let tokens_to_insert_negated = std::iter::empty()
.chain(rule.not_hostnames.unwrap_or_default())
.chain(rule.not_entities.unwrap_or_default());

let negated = kind.negated();

tokens_to_insert_negated.for_each(|t| self.store_hostname_filter(&t, negated.clone()));
self.store_hostname_filter(tokens_to_insert_negated, &negated, builder);
}

fn store_hostname_filter(&mut self, token: &Hash, kind: SpecificFilterType) {
fn store_hostname_filter(
&mut self,
tokens: impl IntoIterator<Item = Hash>,
kind: &SpecificFilterType,
builder: &mut EngineFlatBuilder,
) {
use SpecificFilterType::*;

match kind {
// Handle hide and inject_script at top level for better deduplication
Hide(s) => {
self.hostname_hide.insert(*token, s);
let mut shareable_string = None;
for token in tokens {
let s = shareable_string.get_or_insert_with(|| builder.add_shareable_string(s));
self.hostname_hide.insert(token, s.clone());
}
}
InjectScript((s, permission)) => {
let encoded_script = encode_script_with_permission(s, permission);
self.hostname_inject_script.insert(*token, encoded_script);
let mut shareable_string = None;
for token in tokens {
let s = shareable_string.get_or_insert_with(|| {
builder.add_shareable_string(&encode_script_with_permission(s, permission))
});
self.hostname_inject_script.insert(token, s.clone());
}
}
// Handle remaining types through HostnameRule
Unhide(s) => {
let entry = self.specific_rules.entry(*token).or_default();
entry.unhide.push(s);
let mut shareable_string = None;
for token in tokens {
let s = shareable_string.get_or_insert_with(|| builder.add_shareable_string(s));
let entry = self.specific_rules.entry(token).or_default();
entry.unhide.push(s.clone());
}
}
UninjectScript((s, _)) => {
let entry = self.specific_rules.entry(*token).or_default();
entry.uninject_script.push(s);
let mut shareable_string = None;
for token in tokens {
let s = shareable_string.get_or_insert_with(|| builder.add_shareable_string(s));
let entry = self.specific_rules.entry(token).or_default();
entry.uninject_script.push(s.clone());
}
}
ProceduralOrAction(s) => {
let entry = self.specific_rules.entry(*token).or_default();
entry.procedural_action.push(s);
let mut shareable_string = None;
for token in tokens {
let s = shareable_string.get_or_insert_with(|| builder.add_shareable_string(s));
let entry = self.specific_rules.entry(token).or_default();
entry.procedural_action.push(s.clone());
}
}
ProceduralOrActionException(s) => {
let entry = self.specific_rules.entry(*token).or_default();
entry.procedural_action_exception.push(s);
let mut shareable_string = None;
for token in tokens {
let s = shareable_string.get_or_insert_with(|| builder.add_shareable_string(s));
let entry = self.specific_rules.entry(token).or_default();
entry.procedural_action_exception.push(s.clone());
}
}
}
}
Expand All @@ -227,10 +257,13 @@ impl<'a, B: FlatBuilder<'a>> FlatSerialize<'a, B> for StringVector {
}
}

impl<'a, B: FlatBuilder<'a>> FlatSerialize<'a, B> for CosmeticFilterCacheBuilder {
impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for CosmeticFilterCacheBuilder {
type Output = WIPOffset<fb::CosmeticFilters<'a>>;

fn serialize(value: Self, builder: &mut B) -> WIPOffset<fb::CosmeticFilters<'a>> {
fn serialize(
value: Self,
builder: &mut EngineFlatBuilder<'a>,
) -> WIPOffset<fb::CosmeticFilters<'a>> {
let complex_class_rules = HashMapBuilder::finish(value.complex_class_rules, builder);
let complex_id_rules = HashMapBuilder::finish(value.complex_id_rules, builder);

Expand Down
15 changes: 7 additions & 8 deletions src/cosmetic_filter_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,17 +86,16 @@ impl SpecificFilterType {

/// Encodes permission bits in the last 2 ascii chars of a script string
/// Returns the script with permission appended
pub(crate) fn encode_script_with_permission(
mut script: String,
permission: PermissionMask,
) -> String {
pub(crate) fn encode_script_with_permission(script: &str, permission: &PermissionMask) -> String {
const HEX_CHARS: &[u8; 16] = b"0123456789abcdef";
let high = (permission.to_bits() >> 4) as usize;
let low = (permission.to_bits() & 0x0f) as usize;

script.push(HEX_CHARS[high] as char);
script.push(HEX_CHARS[low] as char);
script
let mut encoded_script = String::with_capacity(script.len() + 2);
encoded_script.push_str(script);
encoded_script.push(HEX_CHARS[high] as char);
encoded_script.push(HEX_CHARS[low] as char);
encoded_script
}

/// Decodes permission bits from the last 2 ascii chars of a script string
Expand Down Expand Up @@ -133,7 +132,7 @@ mod tests {
let script = "console.log('测试 🚀 emoji')".to_string();
let permission = PermissionMask::from_bits(permission);

let encoded = encode_script_with_permission(script.clone(), permission);
let encoded = encode_script_with_permission(&script, &permission);
let (decoded_permission, decoded_script) = decode_script_with_permission(&encoded);

assert_eq!(decoded_permission.to_bits(), permission.to_bits());
Expand Down
2 changes: 1 addition & 1 deletion src/data_format/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ const ADBLOCK_RUST_DAT_MAGIC: [u8; 4] = [0xd1, 0xd9, 0x3a, 0xaf];

/// The version of the data format.
/// If the data format version is incremented, the data is considered as incompatible.
const ADBLOCK_RUST_DAT_VERSION: u8 = 2;
const ADBLOCK_RUST_DAT_VERSION: u8 = 3;

/// The total length of the header prefix (magic + version + seahash)
const HEADER_PREFIX_LENGTH: usize = 4 + 1 + 8;
Expand Down
2 changes: 1 addition & 1 deletion src/engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ fn make_flatbuffer(
let mut builder = EngineFlatBuilder::default();
let network_rules_builder = NetworkRulesBuilder::from_rules(network_filters, optimize);
let network_rules = FlatSerialize::serialize(network_rules_builder, &mut builder);
let cosmetic_rules = CosmeticFilterCacheBuilder::from_rules(cosmetic_filters);
let cosmetic_rules = CosmeticFilterCacheBuilder::from_rules(cosmetic_filters, &mut builder);
let cosmetic_rules = FlatSerialize::serialize(cosmetic_rules, &mut builder);
builder.finish(network_rules, cosmetic_rules)
}
Expand Down
29 changes: 28 additions & 1 deletion src/filters/fb_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,24 @@ use std::collections::HashMap;
use flatbuffers::WIPOffset;

use crate::filters::fb_network_builder::NetworkFilterListBuilder;
use crate::flatbuffers::containers::flat_serialize::{FlatBuilder, WIPFlatVec};
use crate::flatbuffers::containers::flat_serialize::{FlatBuilder, FlatSerialize, WIPFlatVec};
use crate::flatbuffers::unsafe_tools::VerifiedFlatbufferMemory;
use crate::utils::Hash;

use super::flat::fb;

#[derive(Clone, Default)]
pub(crate) struct ShareableString {
index: Option<usize>,
}

#[derive(Default)]
pub(crate) struct EngineFlatBuilder<'a> {
fb_builder: flatbuffers::FlatBufferBuilder<'a>,
unique_domains_hashes: Vec<Hash>,
unique_domains_hashes_map: HashMap<Hash, u32>,
shared_strings: Vec<WIPOffset<&'a str>>,
shared_strings_original: Vec<String>,
}

impl<'a> EngineFlatBuilder<'a> {
Expand All @@ -29,6 +36,15 @@ impl<'a> EngineFlatBuilder<'a> {
index
}

pub fn add_shareable_string(&mut self, s: &str) -> ShareableString {
let wip_offset = self.fb_builder.create_string(s);
self.shared_strings.push(wip_offset);
self.shared_strings_original.push(s.to_string());
ShareableString {
index: Some(self.shared_strings.len() - 1),
}
}

pub fn finish(
&mut self,
network_rules: WIPFlatVec<'a, NetworkFilterListBuilder, EngineFlatBuilder<'a>>,
Expand Down Expand Up @@ -58,3 +74,14 @@ impl<'a> FlatBuilder<'a> for EngineFlatBuilder<'a> {
&mut self.fb_builder
}
}

impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for ShareableString {
type Output = WIPOffset<&'a str>;
fn serialize(value: Self, builder: &mut EngineFlatBuilder<'a>) -> Self::Output {
if let Some(index) = value.index {
builder.shared_strings[index]
} else {
builder.raw_builder().create_shared_string("")
}
}
}
12 changes: 6 additions & 6 deletions tests/unit/engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ mod tests {
fn deserialization_generate_simple() {
let mut engine = Engine::from_rules(["ad-banner"], Default::default());
let data = engine.serialize().to_vec();
const EXPECTED_HASH: u64 = 884296823183764168;
const EXPECTED_HASH: u64 = 10945714988765761881;
assert_eq!(hash(&data), EXPECTED_HASH, "{HASH_MISMATCH_MSG}");
engine.deserialize(&data).unwrap();
}
Expand All @@ -193,7 +193,7 @@ mod tests {
let mut engine = Engine::from_rules(["ad-banner$tag=abc"], Default::default());
engine.use_tags(&["abc"]);
let data = engine.serialize().to_vec();
const EXPECTED_HASH: u64 = 7887643884738497753;
const EXPECTED_HASH: u64 = 4608037684406751718;
assert_eq!(hash(&data), EXPECTED_HASH, "{HASH_MISMATCH_MSG}");
engine.deserialize(&data).unwrap();
}
Expand Down Expand Up @@ -221,8 +221,8 @@ mod tests {
#[cfg(feature = "debug-info")]
{
let debug_info = engine.get_debug_info();
let low_bound = 9_500_000;
let high_bound = 10_000_000;
let low_bound = 8_000_000;
let high_bound = 8_500_000;
assert!(
debug_info.flatbuffer_size >= low_bound,
"Expected size >= {} bytes, got {}",
Expand All @@ -237,9 +237,9 @@ mod tests {
);
}
let expected_hash: u64 = if cfg!(feature = "css-validation") {
18094146314477408965
9439492009815519037
} else {
8215024964158872824
14803842039735157685
};

assert_eq!(hash(&data), expected_hash, "{HASH_MISMATCH_MSG}");
Expand Down