From 96dce2221821615022c7197543f1ce0575b8f9eb Mon Sep 17 00:00:00 2001 From: Raymond Hill Date: Sat, 27 Apr 2019 08:18:01 -0400 Subject: [PATCH] Increase resolution of known-token lookup table Related commit: - https://github.com/gorhill/uBlock/commit/69a43e07c4bc017f3320a669c1e80147c17dddcf Using 32 bits of token hash rather than just the 16 lower bits does help discard more unknown tokens. Using the default filter lists, the known-token lookup table is populated by 12,276 entries, out of 65,536, thus making the case that theoretically there is a lot of possible tokens which can be discarded. In practice, running the built-in staticNetFilteringEngine.benchmark() with default filter lists, I find that 1,518,929 tokens were skipped out of 4,441,891 extracted tokens, or 34%. --- src/js/background.js | 2 +- src/js/static-net-filtering.js | 5 ++--- src/js/utils.js | 16 ++++++++++------ 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/js/background.js b/src/js/background.js index 7a0c45089cd52..aa427a7720f25 100644 --- a/src/js/background.js +++ b/src/js/background.js @@ -138,7 +138,7 @@ const µBlock = (function() { // jshint ignore:line // Read-only systemSettings: { compiledMagic: 12, // Increase when compiled format changes - selfieMagic: 12 // Increase when selfie format changes + selfieMagic: 13 // Increase when selfie format changes }, restoreBackupSettings: { diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index 7a8e72db8209a..7747f5ee41825 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -2326,7 +2326,6 @@ FilterContainer.prototype.freeze = function() { const filterDataHolderId = FilterDataHolder.fid; const redirectTypeValue = typeNameToTypeValue.redirect; const unserialize = µb.CompiledLineIO.unserialize; - const knownTokens = this.urlTokenizer.knownTokens; for ( const line of this.goodFilters ) { if ( this.badFilters.has(line) ) { @@ -2358,7 +2357,7 @@ FilterContainer.prototype.freeze = function() { entry.next = bucket; } this.dataFilters.set(tokenHash, entry); - knownTokens[tokenHash & 0xFFFF] = 1; + this.urlTokenizer.addKnownToken(tokenHash); continue; } @@ -2405,7 +2404,7 @@ FilterContainer.prototype.freeze = function() { continue; } - knownTokens[tokenHash & 0xFFFF] = 1; + this.urlTokenizer.addKnownToken(tokenHash); if ( entry === undefined ) { bucket.set(tokenHash, filterFromCompiledData(fdata)); diff --git a/src/js/utils.js b/src/js/utils.js index 696a0b6967fc8..b3ec4b29b2631 100644 --- a/src/js/utils.js +++ b/src/js/utils.js @@ -81,11 +81,15 @@ resetKnownTokens() { this.knownTokens.fill(0); - this.knownTokens[this.dotTokenHash & 0xFFFF] = 1; - this.knownTokens[this.anyTokenHash & 0xFFFF] = 1; - this.knownTokens[this.anyHTTPSTokenHash & 0xFFFF] = 1; - this.knownTokens[this.anyHTTPTokenHash & 0xFFFF] = 1; - this.knownTokens[this.noTokenHash & 0xFFFF] = 1; + this.addKnownToken(this.dotTokenHash); + this.addKnownToken(this.anyTokenHash); + this.addKnownToken(this.anyHTTPSTokenHash); + this.addKnownToken(this.anyHTTPTokenHash); + this.addKnownToken(this.noTokenHash); + } + + addKnownToken(th) { + this.knownTokens[th & 0xFFFF ^ th >>> 16] = 1; } // Tokenize on demand. @@ -172,7 +176,7 @@ th = th * 64 + v; n += 1; } - if ( knownTokens[th & 0xFFFF] !== 0 ) { + if ( knownTokens[th & 0xFFFF ^ th >>> 16] !== 0 ) { tokens[j+0] = th; tokens[j+1] = ti; j += 2;