Skip to content

Commit

Permalink
Exclude data type (i.e. csp=) from bidi-trie
Browse files Browse the repository at this point in the history
We need a `matchAll()` method on the bidi-trie before
we can store filters of type `data` in it.

Related issue:
- uBlockOrigin/uBlock-issues#761

Related commit:
- 7971b22
  • Loading branch information
gorhill committed Oct 22, 2019
1 parent 2681209 commit 30393fd
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 24 deletions.
102 changes: 80 additions & 22 deletions src/js/static-net-filtering.js
Expand Up @@ -161,6 +161,8 @@ const toNormalizedType = {

const BlockImportant = BlockAction | Important;

const typeValueFromCatBits = catBits => (catBits >>> 4) & 0b11111;

/******************************************************************************/

// See the following as short-lived registers, used during evaluation. They are
Expand All @@ -183,12 +185,10 @@ let $requestTypeBit = 0;
const restrSeparator = '(?:[^%.0-9a-z_-]|$)';

// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions
const reEscape = /[.*+?^${}()|[\]\\]/g;

// Convert a plain string (devoid of special characters) into a regex.
const restrFromPlainPattern = function(s) {
return s.replace(restrFromPlainPattern.escape, '\\$&');
};
restrFromPlainPattern.escape = /[.*+?^${}()|[\]\\]/g;
const restrFromPlainPattern = s => s.replace(reEscape, '\\$&');

const restrFromGenericPattern = function(s, anchor = 0) {
let reStr = s.replace(restrFromGenericPattern.rePlainChars, '\\$&')
Expand Down Expand Up @@ -270,9 +270,7 @@ const CHAR_CLASS_SEPARATOR = 0b00000001;
}
}

const isSeparatorChar = function(c) {
return (charClassMap[c] & CHAR_CLASS_SEPARATOR) !== 0;
};
const isSeparatorChar = c => (charClassMap[c] & CHAR_CLASS_SEPARATOR) !== 0;

/******************************************************************************/

Expand Down Expand Up @@ -494,8 +492,8 @@ registerFilterClass(FilterTrue);

const FilterPatternPlain = class {
constructor(i, n) {
this.i = i;
this.n = n;
this.i = i | 0;
this.n = n | 0;
}

match() {
Expand Down Expand Up @@ -596,8 +594,8 @@ const FilterPatternPlainX = class extends FilterPatternPlain {

const FilterPatternLeft = class {
constructor(i, n) {
this.i = i;
this.n = n;
this.i = i | 0;
this.n = n | 0;
}

match() {
Expand Down Expand Up @@ -682,8 +680,8 @@ registerFilterClass(FilterPatternLeftEx);

const FilterPatternRight = class {
constructor(i, n) {
this.i = i;
this.n = n;
this.i = i | 0;
this.n = n | 0;
}

match() {
Expand Down Expand Up @@ -1497,8 +1495,8 @@ const FilterDataHolderResult = class {

const FilterCollection = class {
constructor(i = 0, n = 0) {
this.i = i;
this.n = n;
this.i = i | 0;
this.n = n | 0;
}

get size() {
Expand Down Expand Up @@ -1796,6 +1794,47 @@ registerFilterClass(FilterHTTPJustOrigin);

/******************************************************************************/

const FilterPlainTrie = class {
constructor(trie) {
this.plainTrie = trie;
}

match() {
if ( this.plainTrie.matches($tokenBeg) ) {
this.$matchedUnit = this.plainTrie.$iu;
return true;
}
return false;
}

matchAndFetchData(/* type, out */) {
// TODO
}

logData(details) {
const s = $requestURL.slice(this.plainTrie.$l, this.plainTrie.$r);
details.pattern.push(s);
details.regex.push(restrFromPlainPattern(s));
if ( this.$matchedUnit !== -1 ) {
filterUnits[this.$matchedUnit].logData(details);
}
}

toSelfie() {
return [ this.fid, bidiTrie.compileOne(this.plainTrie) ];
}

static fromSelfie(args) {
return new FilterPlainTrie(bidiTrie.createOne(args[1]));
}
};

FilterPlainTrie.prototype.$matchedUnit = 0;

registerFilterClass(FilterPlainTrie);

/******************************************************************************/

const FilterBucket = class extends FilterCollection {
match() {
if ( this.plainTrie !== null ) {
Expand Down Expand Up @@ -1847,6 +1886,7 @@ const FilterBucket = class extends FilterCollection {
}

optimize() {
if ( this.n < 3 ) { return; }
const units = filterUnits;
const trieables = new Set();
let i = this.i;
Expand All @@ -1858,7 +1898,7 @@ const FilterBucket = class extends FilterCollection {
i = filterSequences[i+1];
if ( i === 0 ) { break; }
}
if ( trieables.size <= 2 ) { return; }
if ( trieables.size < 3 ) { return; }
if ( this.plainTrie === null ) {
this.plainTrie = bidiTrie.createOne();
}
Expand All @@ -1880,6 +1920,9 @@ const FilterBucket = class extends FilterCollection {
if ( inext === 0 ) { break; }
i = inext;
}
if ( this.i === 0 ) {
return new FilterPlainTrie(this.plainTrie);
}
}

_addToTrie(iunit) {
Expand Down Expand Up @@ -2707,11 +2750,17 @@ FilterContainer.prototype.freeze = function() {
this.badFilters.clear();
this.goodFilters.clear();

for ( const bucket of this.categories.values() ) {
// Skip 'data' type since bidi-trie does not (yet) support matchAll().
const dataTypeValue = typeValueFromCatBits(typeNameToTypeValue['data']);
for ( const [ catBits, bucket ] of this.categories ) {
if ( typeValueFromCatBits(catBits) === dataTypeValue ) { continue; }
for ( const iunit of bucket.values() ) {
const f = units[iunit];
if ( f instanceof FilterBucket === false ) { continue; }
f.optimize();
const g = f.optimize();
if ( g !== undefined ) {
units[iunit] = g;
}
}
}

Expand Down Expand Up @@ -3589,8 +3638,9 @@ FilterContainer.prototype.filterClassHistogram = function() {
for ( const fclass of filterClasses ) {
filterClassDetails.set(fclass.fid, { name: fclass.name, count: 0, });
}
// Artificial classes to report content of tries
filterClassDetails.set(1000, { name: 'FilterPlainTrie', count: 0, });
// Artificial classes to report content counts
filterClassDetails.set(1000, { name: 'FilterPlainTrie Content', count: 0, });
filterClassDetails.set(1001, { name: 'FilterHostnameDict Content', count: 0, });

const countFilter = function(f) {
if ( f instanceof Object === false ) { return; }
Expand All @@ -3600,17 +3650,21 @@ FilterContainer.prototype.filterClassHistogram = function() {
for ( const f of filterUnits ) {
if ( f === null ) { continue; }
countFilter(f);
if ( f instanceof FilterBucket ) {
if ( f instanceof FilterCollection ) {
let i = f.i;
while ( i !== 0 ) {
countFilter(filterUnits[filterSequences[i+0]]);
i = filterSequences[i+1];
}
if ( f.plainTrie !== null ) {
if ( f.plainTrie ) {
filterClassDetails.get(1000).count += f.plainTrie.size;
}
continue;
}
if ( f instanceof FilterHostnameDict ) {
filterClassDetails.get(1001).count += f.size;
continue;
}
if ( f instanceof FilterComposite ) {
let i = f.i;
while ( i !== 0 ) {
Expand All @@ -3619,6 +3673,10 @@ FilterContainer.prototype.filterClassHistogram = function() {
}
continue;
}
if ( f instanceof FilterPlainTrie ) {
filterClassDetails.get(1000).count += f.plainTrie.size;
continue;
}
}
const results = Array.from(filterClassDetails.values()).sort((a, b) => {
return b.count - a.count;
Expand Down
3 changes: 1 addition & 2 deletions src/js/strie.js
Expand Up @@ -101,9 +101,8 @@
*/

const PAGE_SIZE = 65536*2;
// i32 / i8
const HAYSTACK_START = 0;
const HAYSTACK_SIZE = 2048;
const HAYSTACK_SIZE = 2048; // i32 / i8
const HAYSTACK_SIZE_SLOT = HAYSTACK_SIZE >>> 2; // 512 / 2048
const TRIE0_SLOT = HAYSTACK_SIZE_SLOT + 1; // 512 / 2052
const TRIE1_SLOT = HAYSTACK_SIZE_SLOT + 2; // 513 / 2056
Expand Down

0 comments on commit 30393fd

Please sign in to comment.