Skip to content

Commit

Permalink
Increase resolution of known-token lookup table
Browse files Browse the repository at this point in the history
Related commit:
- 69a43e0

Using 32 bits of token hash rather than just the 16 lower
bits does help discard more unknown tokens.

Using the default filter lists, the known-token lookup
table is populated by 12,276 entries, out of 65,536, thus
making the case that theoretically there is a lot of
possible tokens which can be discarded.

In practice, running the built-in
staticNetFilteringEngine.benchmark() with default filter
lists, I find that 1,518,929 tokens were skipped out of
4,441,891 extracted tokens, or 34%.
  • Loading branch information
gorhill committed Apr 27, 2019
1 parent 6093845 commit 96dce22
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 10 deletions.
2 changes: 1 addition & 1 deletion src/js/background.js
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ const µBlock = (function() { // jshint ignore:line
// Read-only
systemSettings: {
compiledMagic: 12, // Increase when compiled format changes
selfieMagic: 12 // Increase when selfie format changes
selfieMagic: 13 // Increase when selfie format changes
},

restoreBackupSettings: {
Expand Down
5 changes: 2 additions & 3 deletions src/js/static-net-filtering.js
Original file line number Diff line number Diff line change
Expand Up @@ -2326,7 +2326,6 @@ FilterContainer.prototype.freeze = function() {
const filterDataHolderId = FilterDataHolder.fid;
const redirectTypeValue = typeNameToTypeValue.redirect;
const unserialize = µb.CompiledLineIO.unserialize;
const knownTokens = this.urlTokenizer.knownTokens;

for ( const line of this.goodFilters ) {
if ( this.badFilters.has(line) ) {
Expand Down Expand Up @@ -2358,7 +2357,7 @@ FilterContainer.prototype.freeze = function() {
entry.next = bucket;
}
this.dataFilters.set(tokenHash, entry);
knownTokens[tokenHash & 0xFFFF] = 1;
this.urlTokenizer.addKnownToken(tokenHash);
continue;
}

Expand Down Expand Up @@ -2405,7 +2404,7 @@ FilterContainer.prototype.freeze = function() {
continue;
}

knownTokens[tokenHash & 0xFFFF] = 1;
this.urlTokenizer.addKnownToken(tokenHash);

if ( entry === undefined ) {
bucket.set(tokenHash, filterFromCompiledData(fdata));
Expand Down
16 changes: 10 additions & 6 deletions src/js/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,15 @@

resetKnownTokens() {
this.knownTokens.fill(0);
this.knownTokens[this.dotTokenHash & 0xFFFF] = 1;
this.knownTokens[this.anyTokenHash & 0xFFFF] = 1;
this.knownTokens[this.anyHTTPSTokenHash & 0xFFFF] = 1;
this.knownTokens[this.anyHTTPTokenHash & 0xFFFF] = 1;
this.knownTokens[this.noTokenHash & 0xFFFF] = 1;
this.addKnownToken(this.dotTokenHash);
this.addKnownToken(this.anyTokenHash);
this.addKnownToken(this.anyHTTPSTokenHash);
this.addKnownToken(this.anyHTTPTokenHash);
this.addKnownToken(this.noTokenHash);
}

addKnownToken(th) {
this.knownTokens[th & 0xFFFF ^ th >>> 16] = 1;
}

// Tokenize on demand.
Expand Down Expand Up @@ -172,7 +176,7 @@
th = th * 64 + v;
n += 1;
}
if ( knownTokens[th & 0xFFFF] !== 0 ) {
if ( knownTokens[th & 0xFFFF ^ th >>> 16] !== 0 ) {
tokens[j+0] = th;
tokens[j+1] = ti;
j += 2;
Expand Down

0 comments on commit 96dce22

Please sign in to comment.