diff --git a/platform/common/vapi-background.js b/platform/common/vapi-background.js index 54148039eb6af..08cfd58720470 100644 --- a/platform/common/vapi-background.js +++ b/platform/common/vapi-background.js @@ -1671,10 +1671,7 @@ vAPI.cloud = (( ) => { const push = async function(details) { const { datakey, data, encode } = details; - if ( - data === undefined || - typeof data === 'string' && data === '' - ) { + if ( data === undefined || typeof data === 'string' && data === '' ) { return deleteChunks(datakey, 0); } const item = { @@ -1682,10 +1679,9 @@ vAPI.cloud = (( ) => { tstamp: Date.now(), data, }; - const json = JSON.stringify(item); const encoded = encode instanceof Function - ? await encode(json) - : json; + ? await encode(item) + : JSON.stringify(item); // Chunkify taking into account QUOTA_BYTES_PER_ITEM: // https://developer.chrome.com/extensions/storage#property-sync @@ -1750,13 +1746,16 @@ vAPI.cloud = (( ) => { i += 1; } encoded = encoded.join(''); - const json = decode instanceof Function - ? await decode(encoded) - : encoded; + let entry = null; try { - entry = JSON.parse(json); - } catch(ex) { + if ( decode instanceof Function ) { + entry = await decode(encoded) || null; + } + if ( typeof entry === 'string' ) { + entry = JSON.parse(entry); + } + } catch(_) { } return entry; }; diff --git a/src/js/assets.js b/src/js/assets.js index 5a550dfb4643c..6484d289d9b8e 100644 --- a/src/js/assets.js +++ b/src/js/assets.js @@ -528,12 +528,12 @@ function getAssetSourceRegistry() { assetSourceRegistryPromise = cacheStorage.get( 'assetSourceRegistry' ).then(bin => { - if ( - bin instanceof Object && - bin.assetSourceRegistry instanceof Object - ) { - assetSourceRegistry = bin.assetSourceRegistry; - return assetSourceRegistry; + if ( bin instanceof Object ) { + if ( bin.assetSourceRegistry instanceof Object ) { + assetSourceRegistry = bin.assetSourceRegistry; + ubolog('Loaded assetSourceRegistry'); + return assetSourceRegistry; + } } return assets.fetchText( µb.assetsBootstrapLocation || µb.assetsJsonPath @@ -543,6 +543,7 @@ function getAssetSourceRegistry() { : assets.fetchText(µb.assetsJsonPath); }).then(details => { updateAssetSourceRegistry(details.content, true); + ubolog('Loaded assetSourceRegistry'); return assetSourceRegistry; }); }); @@ -673,39 +674,27 @@ let assetCacheRegistryPromise; let assetCacheRegistry = {}; function getAssetCacheRegistry() { - if ( assetCacheRegistryPromise === undefined ) { - assetCacheRegistryPromise = cacheStorage.get( - 'assetCacheRegistry' - ).then(bin => { - if ( - bin instanceof Object && - bin.assetCacheRegistry instanceof Object - ) { - if ( Object.keys(assetCacheRegistry).length === 0 ) { - assetCacheRegistry = bin.assetCacheRegistry; - } else { - console.error( - 'getAssetCacheRegistry(): assetCacheRegistry reassigned!' - ); - if ( - Object.keys(bin.assetCacheRegistry).sort().join() !== - Object.keys(assetCacheRegistry).sort().join() - ) { - console.error( - 'getAssetCacheRegistry(): assetCacheRegistry changes overwritten!' - ); - } - } - } - return assetCacheRegistry; - }); + if ( assetCacheRegistryPromise !== undefined ) { + return assetCacheRegistryPromise; } - + assetCacheRegistryPromise = cacheStorage.get( + 'assetCacheRegistry' + ).then(bin => { + if ( bin instanceof Object === false ) { return; } + if ( bin.assetCacheRegistry instanceof Object === false ) { return; } + if ( Object.keys(assetCacheRegistry).length !== 0 ) { + return console.error('getAssetCacheRegistry(): assetCacheRegistry reassigned!'); + } + ubolog('Loaded assetCacheRegistry'); + assetCacheRegistry = bin.assetCacheRegistry; + }).then(( ) => + assetCacheRegistry + ); return assetCacheRegistryPromise; } const saveAssetCacheRegistry = (( ) => { - const save = function() { + const save = ( ) => { timer.off(); cacheStorage.set({ assetCacheRegistry }); }; @@ -726,7 +715,9 @@ async function assetCacheRead(assetKey, updateReadTime = false) { const reportBack = function(content) { if ( content instanceof Blob ) { content = ''; } const details = { assetKey, content }; - if ( content === '' ) { details.error = 'ENOTFOUND'; } + if ( content === '' || content === undefined ) { + details.error = 'ENOTFOUND'; + } return details; }; @@ -742,17 +733,11 @@ async function assetCacheRead(assetKey, updateReadTime = false) { ) + ' ms'; } - if ( - bin instanceof Object === false || - bin.hasOwnProperty(internalKey) === false - ) { - return reportBack(''); - } + if ( bin instanceof Object === false ) { return reportBack(''); } + if ( bin.hasOwnProperty(internalKey) === false ) { return reportBack(''); } const entry = assetCacheRegistry[assetKey]; - if ( entry === undefined ) { - return reportBack(''); - } + if ( entry === undefined ) { return reportBack(''); } entry.readTime = Date.now(); if ( updateReadTime ) { @@ -762,34 +747,22 @@ async function assetCacheRead(assetKey, updateReadTime = false) { return reportBack(bin[internalKey]); } -async function assetCacheWrite(assetKey, details) { - let content = ''; - let options = {}; - if ( typeof details === 'string' ) { - content = details; - } else if ( details instanceof Object ) { - content = details.content || ''; - options = details; - } - - if ( content === '' ) { +async function assetCacheWrite(assetKey, content, options = {}) { + if ( content === '' || content === undefined ) { return assetCacheRemove(assetKey); } - const cacheDict = await getAssetCacheRegistry(); + const { resourceTime, url } = options; - let entry = cacheDict[assetKey]; - if ( entry === undefined ) { - entry = cacheDict[assetKey] = {}; - } - entry.writeTime = entry.readTime = Date.now(); - entry.resourceTime = options.resourceTime || 0; - if ( typeof options.url === 'string' ) { - entry.remoteURL = options.url; - } - cacheStorage.set({ - assetCacheRegistry, - [`cache/${assetKey}`]: content + getAssetCacheRegistry().then(cacheDict => { + const entry = cacheDict[assetKey] || {}; + cacheDict[assetKey] = entry; + entry.writeTime = entry.readTime = Date.now(); + entry.resourceTime = resourceTime || 0; + if ( typeof url === 'string' ) { + entry.remoteURL = url; + } + cacheStorage.set({ assetCacheRegistry, [`cache/${assetKey}`]: content }); }); const result = { assetKey, content }; @@ -800,21 +773,31 @@ async function assetCacheWrite(assetKey, details) { return result; } -async function assetCacheRemove(pattern) { +async function assetCacheRemove(pattern, options = {}) { const cacheDict = await getAssetCacheRegistry(); const removedEntries = []; const removedContent = []; for ( const assetKey in cacheDict ) { - if ( pattern instanceof RegExp && !pattern.test(assetKey) ) { - continue; - } - if ( typeof pattern === 'string' && assetKey !== pattern ) { - continue; + if ( pattern instanceof RegExp ) { + if ( pattern.test(assetKey) === false ) { continue; } + } else if ( typeof pattern === 'string' ) { + if ( assetKey !== pattern ) { continue; } } removedEntries.push(assetKey); - removedContent.push('cache/' + assetKey); + removedContent.push(`cache/${assetKey}`); delete cacheDict[assetKey]; } + if ( options.janitor && pattern instanceof RegExp ) { + const re = new RegExp( + pattern.source.replace(/^\^/, 'cache\/'), + pattern.flags + ); + const keys = await cacheStorage.keys(re); + for ( const key of keys ) { + removedContent.push(key); + ubolog(`Removing stray ${key}`); + } + } if ( removedContent.length !== 0 ) { await Promise.all([ cacheStorage.remove(removedContent), @@ -980,8 +963,7 @@ assets.get = async function(assetKey, options = {}) { } if ( details.content === '' ) { continue; } if ( reIsExternalPath.test(contentURL) && options.dontCache !== true ) { - assetCacheWrite(assetKey, { - content: details.content, + assetCacheWrite(assetKey, details.content, { url: contentURL, silent: options.silent === true, }); @@ -1057,8 +1039,7 @@ async function getRemote(assetKey, options = {}) { } // Success - assetCacheWrite(assetKey, { - content: result.content, + assetCacheWrite(assetKey, result.content, { url: contentURL, resourceTime: result.resourceTime || 0, }); @@ -1101,6 +1082,17 @@ assets.put = async function(assetKey, content) { /******************************************************************************/ +assets.toCache = async function(assetKey, content) { + return assetCacheWrite(assetKey, content); +}; + +assets.fromCache = async function(assetKey) { + const details = await assetCacheRead(assetKey); + return details && details.content; +}; + +/******************************************************************************/ + assets.metadata = async function() { await Promise.all([ getAssetSourceRegistry(), @@ -1147,8 +1139,8 @@ assets.metadata = async function() { assets.purge = assetCacheMarkAsDirty; -assets.remove = function(pattern) { - return assetCacheRemove(pattern); +assets.remove = function(...args) { + return assetCacheRemove(...args); }; assets.rmrf = function() { @@ -1300,8 +1292,7 @@ async function diffUpdater() { 'Diff-Path', 'Diff-Expires', ]); - assetCacheWrite(data.assetKey, { - content: data.text, + assetCacheWrite(data.assetKey, data.text, { resourceTime: metadata.lastModified || 0, }); metadata.diffUpdated = true; diff --git a/src/js/background.js b/src/js/background.js index 470a13177f827..80bab5a95168e 100644 --- a/src/js/background.js +++ b/src/js/background.js @@ -56,6 +56,7 @@ const hiddenSettingsDefault = { blockingProfiles: '11111/#F00 11010/#C0F 11001/#00F 00001', cacheStorageAPI: 'unset', cacheStorageCompression: true, + cacheStorageMultithread: 2, cacheControlForFirefox1376932: 'no-cache, no-store, must-revalidate', cloudStorageCompression: true, cnameIgnoreList: 'unset', @@ -181,7 +182,7 @@ const µBlock = { // jshint ignore:line // Read-only systemSettings: { compiledMagic: 57, // Increase when compiled format changes - selfieMagic: 57, // Increase when selfie format changes + selfieMagic: 58, // Increase when selfie format changes }, // https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501 diff --git a/src/js/base64-custom.js b/src/js/base64-custom.js index 34141b8a0f512..0d9a43fa2275d 100644 --- a/src/js/base64-custom.js +++ b/src/js/base64-custom.js @@ -46,105 +46,6 @@ const digitToVal = new Uint8Array(128); } } -// The sparse base64 codec is best for buffers which contains a lot of -// small u32 integer values. Those small u32 integer values are better -// represented with stringified integers, because small values can be -// represented with fewer bits than the usual base64 codec. For example, -// 0 become '0 ', i.e. 16 bits instead of 48 bits with official base64 -// codec. - -const sparseBase64 = { - magic: 'Base64_1', - - encode: function(arrbuf, arrlen) { - const inputLength = (arrlen + 3) >>> 2; - const inbuf = new Uint32Array(arrbuf, 0, inputLength); - const outputLength = this.magic.length + 7 + inputLength * 7; - const outbuf = new Uint8Array(outputLength); - // magic bytes - let j = 0; - for ( let i = 0; i < this.magic.length; i++ ) { - outbuf[j++] = this.magic.charCodeAt(i); - } - // array size - let v = inputLength; - do { - outbuf[j++] = valToDigit[v & 0b111111]; - v >>>= 6; - } while ( v !== 0 ); - outbuf[j++] = 0x20 /* ' ' */; - // array content - for ( let i = 0; i < inputLength; i++ ) { - v = inbuf[i]; - do { - outbuf[j++] = valToDigit[v & 0b111111]; - v >>>= 6; - } while ( v !== 0 ); - outbuf[j++] = 0x20 /* ' ' */; - } - if ( typeof TextDecoder === 'undefined' ) { - return JSON.stringify( - Array.from(new Uint32Array(outbuf.buffer, 0, j >>> 2)) - ); - } - const textDecoder = new TextDecoder(); - return textDecoder.decode(new Uint8Array(outbuf.buffer, 0, j)); - }, - - decode: function(instr, arrbuf) { - if ( instr.charCodeAt(0) === 0x5B /* '[' */ ) { - const inbuf = JSON.parse(instr); - if ( arrbuf instanceof ArrayBuffer === false ) { - return new Uint32Array(inbuf); - } - const outbuf = new Uint32Array(arrbuf); - outbuf.set(inbuf); - return outbuf; - } - if ( instr.startsWith(this.magic) === false ) { - throw new Error('Invalid µBlock.base64 encoding'); - } - const inputLength = instr.length; - const outputLength = this.decodeSize(instr) >> 2; - const outbuf = arrbuf instanceof ArrayBuffer === false - ? new Uint32Array(outputLength) - : new Uint32Array(arrbuf); - let i = instr.indexOf(' ', this.magic.length) + 1; - if ( i === -1 ) { - throw new Error('Invalid µBlock.base64 encoding'); - } - // array content - let j = 0; - for (;;) { - if ( j === outputLength || i >= inputLength ) { break; } - let v = 0, l = 0; - for (;;) { - const c = instr.charCodeAt(i++); - if ( c === 0x20 /* ' ' */ ) { break; } - v += digitToVal[c] << l; - l += 6; - } - outbuf[j++] = v; - } - if ( i < inputLength || j < outputLength ) { - throw new Error('Invalid µBlock.base64 encoding'); - } - return outbuf; - }, - - decodeSize: function(instr) { - if ( instr.startsWith(this.magic) === false ) { return 0; } - let v = 0, l = 0, i = this.magic.length; - for (;;) { - const c = instr.charCodeAt(i++); - if ( c === 0x20 /* ' ' */ ) { break; } - v += digitToVal[c] << l; - l += 6; - } - return v << 2; - }, -}; - // The dense base64 codec is best for typed buffers which values are // more random. For example, buffer contents as a result of compression // contain less repetitive values and thus the content is more @@ -154,7 +55,7 @@ const sparseBase64 = { // ArrayBuffer fails, the content of the resulting Uint8Array is // non-sensical. WASM-related? -const denseBase64 = { +export const denseBase64 = { magic: 'DenseBase64_1', encode: function(input) { @@ -242,5 +143,3 @@ const denseBase64 = { }; /******************************************************************************/ - -export { denseBase64, sparseBase64 }; diff --git a/src/js/biditrie.js b/src/js/biditrie.js index d0f64ee5b54b6..1329316384d89 100644 --- a/src/js/biditrie.js +++ b/src/js/biditrie.js @@ -576,34 +576,19 @@ class BidiTrieContainer { }; } - serialize(encoder) { - if ( encoder instanceof Object ) { - return encoder.encode( - this.buf32.buffer, - this.buf32[CHAR1_SLOT] - ); - } - return Array.from( - new Uint32Array( - this.buf32.buffer, - 0, - this.buf32[CHAR1_SLOT] + 3 >>> 2 - ) + toSelfie() { + return this.buf32.subarray( + 0, + this.buf32[CHAR1_SLOT] + 3 >>> 2 ); } - unserialize(selfie, decoder) { - const shouldDecode = typeof selfie === 'string'; - let byteLength = shouldDecode - ? decoder.decodeSize(selfie) - : selfie.length << 2; + fromSelfie(selfie) { + if ( selfie instanceof Uint32Array === false ) { return false; } + let byteLength = selfie.length << 2; if ( byteLength === 0 ) { return false; } this.reallocateBuf(byteLength); - if ( shouldDecode ) { - decoder.decode(selfie, this.buf8.buffer); - } else { - this.buf32.set(selfie); - } + this.buf32.set(selfie); return true; } diff --git a/src/js/cachestorage.js b/src/js/cachestorage.js index ef056af95eb14..e70fc322996a0 100644 --- a/src/js/cachestorage.js +++ b/src/js/cachestorage.js @@ -19,179 +19,362 @@ Home: https://github.com/gorhill/uBlock */ -/* global browser, IDBDatabase, indexedDB */ +/* global browser, indexedDB */ 'use strict'; /******************************************************************************/ import lz4Codec from './lz4.js'; -import µb from './background.js'; import webext from './webext.js'; +import µb from './background.js'; +import { ubolog } from './console.js'; +import * as scuo from './scuo-serializer.js'; /******************************************************************************/ -// The code below has been originally manually imported from: -// Commit: https://github.com/nikrolls/uBlock-Edge/commit/d1538ea9bea89d507219d3219592382eee306134 -// Commit date: 29 October 2016 -// Commit author: https://github.com/nikrolls -// Commit message: "Implement cacheStorage using IndexedDB" - -// The original imported code has been subsequently modified as it was not -// compatible with Firefox. -// (a Promise thing, see https://github.com/dfahlander/Dexie.js/issues/317) -// Furthermore, code to migrate from browser.storage.local to vAPI.storage -// has been added, for seamless migration of cache-related entries into -// indexedDB. - -// https://bugzilla.mozilla.org/show_bug.cgi?id=1371255 -// Firefox-specific: we use indexedDB because browser.storage.local() has -// poor performance in Firefox. -// https://github.com/uBlockOrigin/uBlock-issues/issues/328 -// Use IndexedDB for Chromium as well, to take advantage of LZ4 -// compression. -// https://github.com/uBlockOrigin/uBlock-issues/issues/399 -// Revert Chromium support of IndexedDB, use advanced setting to force -// IndexedDB. -// https://github.com/uBlockOrigin/uBlock-issues/issues/409 -// Allow forcing the use of webext storage on Firefox. - const STORAGE_NAME = 'uBlock0CacheStorage'; +const extensionStorage = webext.storage.local; + +const keysFromGetArg = arg => { + if ( arg === null || arg === undefined ) { return []; } + const type = typeof arg; + if ( type === 'string' ) { return [ arg ]; } + if ( Array.isArray(arg) ) { return arg; } + if ( type !== 'object' ) { return; } + return Object.keys(arg); +}; + +// Cache API is subject to quota so we will use it only for what is key +// performance-wise +const shouldCache = bin => { + const out = {}; + for ( const key of Object.keys(bin) ) { + if ( key.startsWith('cache/') ) { + if ( /^cache\/(compiled|selfie)\//.test(key) === false ) { continue; } + } + out[key] = bin[key]; + } + return out; +}; + +/******************************************************************************* + * + * Extension storage + * + * Always available. + * + * */ + +const cacheStorage = (( ) => { + + const LARGE = 65536; + + const compress = async (key, data) => { + const isLarge = typeof data === 'string' && data.length >= LARGE; + const µbhs = µb.hiddenSettings; + const after = await scuo.serializeAsync(data, { + compress: isLarge && µbhs.cacheStorageCompression, + multithreaded: isLarge && µbhs.cacheStorageMultithread || 0, + }); + return { key, data: after }; + }; -// Default to webext storage. -const storageLocal = webext.storage.local; - -let storageReadyResolve; -const storageReadyPromise = new Promise(resolve => { - storageReadyResolve = resolve; -}); - -const cacheStorage = { - name: 'browser.storage.local', - get(...args) { - return storageReadyPromise.then(( ) => - storageLocal.get(...args).catch(reason => { - console.log(reason); - }) - ); - }, - set(...args) { - return storageReadyPromise.then(( ) => - storageLocal.set(...args).catch(reason => { - console.log(reason); - }) - ); - }, - remove(...args) { - return storageReadyPromise.then(( ) => - storageLocal.remove(...args).catch(reason => { - console.log(reason); - }) - ); - }, - clear(...args) { - return storageReadyPromise.then(( ) => - storageLocal.clear(...args).catch(reason => { - console.log(reason); - }) - ); - }, - select: function(selectedBackend) { - let actualBackend = selectedBackend; - if ( actualBackend === undefined || actualBackend === 'unset' ) { - actualBackend = vAPI.webextFlavor.soup.has('firefox') - ? 'indexedDB' - : 'browser.storage.local'; + const decompress = async (key, data) => { + if ( scuo.canDeserialize(data) === false ) { + return { key, data }; } - if ( actualBackend === 'indexedDB' ) { - return selectIDB().then(success => { - if ( success || selectedBackend === 'indexedDB' ) { - clearWebext(); - storageReadyResolve(); - return 'indexedDB'; + const isLarge = data.length >= LARGE; + const after = await scuo.deserializeAsync(data, { + multithreaded: isLarge && µb.hiddenSettings.cacheStorageMultithread || 0, + }); + return { key, data: after }; + }; + + return { + name: 'browser.storage.local', + + get(arg) { + const keys = arg; + return cacheAPI.get(keysFromGetArg(arg)).then(bin => { + if ( bin !== undefined ) { return bin; } + return extensionStorage.get(keys).catch(reason => { + ubolog(reason); + }); + }).then(bin => { + if ( bin instanceof Object === false ) { return bin; } + const promises = []; + for ( const key of Object.keys(bin) ) { + promises.push(decompress(key, bin[key])); + } + return Promise.all(promises); + }).then(results => { + const bin = {}; + for ( const { key, data } of results ) { + bin[key] = data; } - clearIDB(); - storageReadyResolve(); - return 'browser.storage.local'; + return bin; + }).catch(reason => { + ubolog(reason); }); - } - if ( actualBackend === 'browser.storage.local' ) { - clearIDB(); - } - storageReadyResolve(); - return Promise.resolve('browser.storage.local'); - - }, - error: undefined -}; + }, + + async keys(regex) { + const results = await Promise.all([ + cacheAPI.keys(regex), + extensionStorage.get(null).catch(( ) => {}), + ]); + const keys = new Set(results[0]); + const bin = results[1] || {}; + for ( const key of Object.keys(bin) ) { + if ( regex && regex.test(key) === false ) { continue; } + keys.add(key); + } + return keys; + }, + + async set(keyvalStore) { + const keys = Object.keys(keyvalStore); + if ( keys.length === 0 ) { return; } + const promises = []; + for ( const key of keys ) { + promises.push(compress(key, keyvalStore[key])); + } + const results = await Promise.all(promises); + const serializedStore = {}; + for ( const { key, data } of results ) { + serializedStore[key] = data; + } + cacheAPI.set(shouldCache(serializedStore)); + return extensionStorage.set(serializedStore).catch(reason => { + ubolog(reason); + }); + }, + + remove(...args) { + cacheAPI.remove(...args); + return extensionStorage.remove(...args).catch(reason => { + ubolog(reason); + }); + }, + + clear(...args) { + cacheAPI.clear(...args); + return extensionStorage.clear(...args).catch(reason => { + ubolog(reason); + }); + }, + + async migrate(cacheAPI) { + if ( cacheAPI === 'browser.storage.local' ) { return; } + if ( cacheAPI !== 'indexedDB' ) { + if ( vAPI.webextFlavor.soup.has('firefox') === false ) { return; } + } + if ( browser.extension.inIncognitoContext ) { return; } + // Copy all items to new cache storage + const bin = await idbStorage.get(null); + if ( typeof bin !== 'object' || bin === null ) { return; } + const toMigrate = []; + for ( const key of Object.keys(bin) ) { + if ( key.startsWith('cache/selfie/') ) { continue; } + ubolog(`Migrating ${key}=${JSON.stringify(bin[key]).slice(0,32)}`); + toMigrate.push(cacheStorage.set({ [key]: bin[key] })); + } + idbStorage.clear(); + return Promise.all(toMigrate); + }, + + error: undefined + }; +})(); // Not all platforms support getBytesInUse -if ( storageLocal.getBytesInUse instanceof Function ) { +if ( extensionStorage.getBytesInUse instanceof Function ) { cacheStorage.getBytesInUse = function(...args) { - return storageLocal.getBytesInUse(...args).catch(reason => { - console.log(reason); + return extensionStorage.getBytesInUse(...args).catch(reason => { + ubolog(reason); }); }; } -// Reassign API entries to that of indexedDB-based ones -const selectIDB = async function() { - let db; - let dbPromise; +/******************************************************************************* + * + * Cache API + * + * Purpose is to mirror cache-related items from extension storage, as its + * read/write operations are faster. May not be available/populated in + * private/incognito mode. + * + * */ + +const cacheAPI = (( ) => { + const caches = globalThis.caches; + const cacheStoragePromise = new Promise(resolve => { + if ( typeof caches !== 'object' || caches === null ) { + ubolog('CacheStorage API not available'); + resolve(null); + return; + } + resolve(caches.open(STORAGE_NAME).catch(reason => { + ubolog(reason); + })); + }); - const noopfn = function () { + const urlPrefix = 'https://ublock0.invalid/'; + + const keyToURL = key => + `${urlPrefix}${encodeURIComponent(key)}`; + + const urlToKey = url => + decodeURIComponent(url.slice(urlPrefix.length)); + + const getOne = async key => { + const cache = await cacheStoragePromise; + if ( cache === null ) { return; } + return cache.match(keyToURL(key)).then(response => { + if ( response instanceof Response === false ) { return; } + return response.text(); + }).then(text => { + if ( text === undefined ) { return; } + return { key, text }; + }).catch(reason => { + ubolog(reason); + }); }; - const disconnect = function() { - dbTimer.off(); - if ( db instanceof IDBDatabase ) { - db.close(); - db = undefined; - } + const getAll = async ( ) => { + const cache = await cacheStoragePromise; + if ( cache === null ) { return; } + return cache.keys().then(requests => { + const promises = []; + for ( const request of requests ) { + promises.push(getOne(urlToKey(request.url))); + } + return Promise.all(promises); + }).then(responses => { + const bin = {}; + for ( const response of responses ) { + if ( response === undefined ) { continue; } + bin[response.key] = response.text; + } + return bin; + }).catch(reason => { + ubolog(reason); + }); }; - const dbTimer = vAPI.defer.create(( ) => { - disconnect(); - }); + const setOne = async (key, text) => { + if ( text === undefined ) { return removeOne(key); } + const blob = new Blob([ text ], { type: 'text/plain;charset=utf-8'}); + const cache = await cacheStoragePromise; + if ( cache === null ) { return; } + return cache + .put(keyToURL(key), new Response(blob)) + .catch(reason => { + ubolog(reason); + }); + }; + + const removeOne = async key => { + const cache = await cacheStoragePromise; + if ( cache === null ) { return; } + return cache.delete(keyToURL(key)).catch(reason => { + ubolog(reason); + }); + }; + + return { + async get(arg) { + const keys = keysFromGetArg(arg); + if ( keys === undefined ) { return; } + if ( keys.length === 0 ) { + return getAll(); + } + const bin = {}; + const toFetch = keys.slice(); + const hasDefault = typeof arg === 'object' && Array.isArray(arg) === false; + for ( let i = 0; i < toFetch.length; i++ ) { + const key = toFetch[i]; + if ( hasDefault && arg[key] !== undefined ) { + bin[key] = arg[key]; + } + toFetch[i] = getOne(key); + } + const responses = await Promise.all(toFetch); + for ( const response of responses ) { + if ( response instanceof Object === false ) { continue; } + const { key, text } = response; + if ( typeof key !== 'string' ) { continue; } + if ( typeof text !== 'string' ) { continue; } + bin[key] = text; + } + if ( Object.keys(bin).length === 0 ) { return; } + return bin; + }, + + async keys(regex) { + const cache = await cacheStoragePromise; + if ( cache === null ) { return []; } + return cache.keys().then(requests => + requests.map(r => urlToKey(r.url)) + .filter(k => regex === undefined || regex.test(k)) + ).catch(( ) => []); + }, + + async set(keyvalStore) { + const keys = Object.keys(keyvalStore); + if ( keys.length === 0 ) { return; } + const promises = []; + for ( const key of keys ) { + promises.push(setOne(key, keyvalStore[key])); + } + return Promise.all(promises); + }, + + async remove(keys) { + const toRemove = []; + if ( typeof keys === 'string' ) { + toRemove.push(removeOne(keys)); + } else if ( Array.isArray(keys) ) { + for ( const key of keys ) { + toRemove.push(removeOne(key)); + } + } + return Promise.all(toRemove); + }, - const keepAlive = function() { - dbTimer.offon(Math.max( - µb.hiddenSettings.autoUpdateAssetFetchPeriod * 2 * 1000, - 180000 - )); + async clear() { + return globalThis.caches.delete(STORAGE_NAME).catch(reason => { + ubolog(reason); + }); + }, }; +})(); - // https://github.com/gorhill/uBlock/issues/3156 - // I have observed that no event was fired in Tor Browser 7.0.7 + - // medium security level after the request to open the database was - // created. When this occurs, I have also observed that the `error` - // property was already set, so this means uBO can detect here whether - // the database can be opened successfully. A try-catch block is - // necessary when reading the `error` property because we are not - // allowed to read this property outside of event handlers in newer - // implementation of IDBRequest (my understanding). +/******************************************************************************* + * + * IndexedDB + * + * Deprecated, exists only for the purpose of migrating from older versions. + * + * */ + +const idbStorage = (( ) => { + let dbPromise; const getDb = function() { - keepAlive(); - if ( db !== undefined ) { - return Promise.resolve(db); - } - if ( dbPromise !== undefined ) { - return dbPromise; - } + if ( dbPromise !== undefined ) { return dbPromise; } dbPromise = new Promise(resolve => { let req; try { req = indexedDB.open(STORAGE_NAME, 1); if ( req.error ) { - console.log(req.error); + ubolog(req.error); req = undefined; } } catch(ex) { } if ( req === undefined ) { - db = null; - dbPromise = undefined; return resolve(null); } req.onupgradeneeded = function(ev) { @@ -215,24 +398,16 @@ const selectIDB = async function() { req.onsuccess = function(ev) { if ( resolve === undefined ) { return; } req = undefined; - db = ev.target.result; - dbPromise = undefined; - resolve(db); + resolve(ev.target.result); resolve = undefined; }; req.onerror = req.onblocked = function() { if ( resolve === undefined ) { return; } - req = undefined; - console.log(this.error); - db = null; - dbPromise = undefined; resolve(null); resolve = undefined; }; vAPI.defer.once(5000).then(( ) => { if ( resolve === undefined ) { return; } - db = null; - dbPromise = undefined; resolve(null); resolve = undefined; }); @@ -253,60 +428,12 @@ const selectIDB = async function() { }); }; - const toBlob = function(data) { - const value = data instanceof Uint8Array - ? new Blob([ data ]) - : data; - return Promise.resolve(value); - }; - - const compress = function(store, key, data) { - return lz4Codec.encode(data, toBlob).then(value => { - store.push({ key, value }); - }); - }; - const decompress = function(store, key, data) { return lz4Codec.decode(data, fromBlob).then(data => { store[key] = data; }); }; - const getFromDb = async function(keys, keyvalStore, callback) { - if ( typeof callback !== 'function' ) { return; } - if ( keys.length === 0 ) { return callback(keyvalStore); } - const promises = []; - const gotOne = function() { - if ( typeof this.result !== 'object' ) { return; } - const { key, value } = this.result; - keyvalStore[key] = value; - if ( value instanceof Blob === false ) { return; } - promises.push(decompress(keyvalStore, key, value)); - }; - try { - const db = await getDb(); - if ( !db ) { return callback(); } - const transaction = db.transaction(STORAGE_NAME, 'readonly'); - transaction.oncomplete = - transaction.onerror = - transaction.onabort = ( ) => { - Promise.all(promises).then(( ) => { - callback(keyvalStore); - }); - }; - const table = transaction.objectStore(STORAGE_NAME); - for ( const key of keys ) { - const req = table.get(key); - req.onsuccess = gotOne; - req.onerror = noopfn; - } - } - catch(reason) { - console.info(`cacheStorage.getFromDb() failed: ${reason}`); - callback(); - } - }; - const visitAllFromDb = async function(visitFn) { const db = await getDb(); if ( !db ) { return visitFn(); } @@ -341,190 +468,40 @@ const selectIDB = async function() { if ( entry.value instanceof Blob === false ) { return; } promises.push(decompress(keyvalStore, key, value)); }).catch(reason => { - console.info(`cacheStorage.getAllFromDb() failed: ${reason}`); + ubolog(`cacheStorage.getAllFromDb() failed: ${reason}`); callback(); }); }; - // https://github.com/uBlockOrigin/uBlock-issues/issues/141 - // Mind that IDBDatabase.transaction() and IDBObjectStore.put() - // can throw: - // https://developer.mozilla.org/en-US/docs/Web/API/IDBDatabase/transaction - // https://developer.mozilla.org/en-US/docs/Web/API/IDBObjectStore/put - - const putToDb = async function(keyvalStore, callback) { - if ( typeof callback !== 'function' ) { - callback = noopfn; - } - const keys = Object.keys(keyvalStore); - if ( keys.length === 0 ) { return callback(); } - const promises = [ getDb() ]; - const entries = []; - const dontCompress = - µb.hiddenSettings.cacheStorageCompression !== true; - for ( const key of keys ) { - const value = keyvalStore[key]; - const isString = typeof value === 'string'; - if ( isString === false || dontCompress ) { - entries.push({ key, value }); - continue; - } - promises.push(compress(entries, key, value)); - } - const finish = ( ) => { - if ( callback === undefined ) { return; } - let cb = callback; - callback = undefined; - cb(); - }; - try { - const results = await Promise.all(promises); - const db = results[0]; - if ( !db ) { return callback(); } - const transaction = db.transaction( - STORAGE_NAME, - 'readwrite' - ); - transaction.oncomplete = - transaction.onerror = - transaction.onabort = finish; - const table = transaction.objectStore(STORAGE_NAME); - for ( const entry of entries ) { - table.put(entry); - } - } catch (ex) { - finish(); - } - }; - - const deleteFromDb = async function(input, callback) { - if ( typeof callback !== 'function' ) { - callback = noopfn; - } - const keys = Array.isArray(input) ? input.slice() : [ input ]; - if ( keys.length === 0 ) { return callback(); } - const finish = ( ) => { - if ( callback === undefined ) { return; } - let cb = callback; - callback = undefined; - cb(); - }; - try { - const db = await getDb(); - if ( !db ) { return callback(); } - const transaction = db.transaction(STORAGE_NAME, 'readwrite'); - transaction.oncomplete = - transaction.onerror = - transaction.onabort = finish; - const table = transaction.objectStore(STORAGE_NAME); - for ( const key of keys ) { - table.delete(key); - } - } catch (ex) { - finish(); - } - }; - const clearDb = async function(callback) { if ( typeof callback !== 'function' ) { - callback = noopfn; + callback = ()=>{}; } try { const db = await getDb(); if ( !db ) { return callback(); } - const transaction = db.transaction(STORAGE_NAME, 'readwrite'); - transaction.oncomplete = - transaction.onerror = - transaction.onabort = ( ) => { - callback(); - }; - transaction.objectStore(STORAGE_NAME).clear(); + db.close(); + indexedDB.deleteDatabase(STORAGE_NAME); + callback(); } catch(reason) { - console.info(`cacheStorage.clearDb() failed: ${reason}`); callback(); } }; - await getDb(); - if ( !db ) { return false; } - - cacheStorage.name = 'indexedDB'; - cacheStorage.get = function get(keys) { - return storageReadyPromise.then(( ) => - new Promise(resolve => { - if ( keys === null ) { - return getAllFromDb(bin => resolve(bin)); - } - let toRead, output = {}; - if ( typeof keys === 'string' ) { - toRead = [ keys ]; - } else if ( Array.isArray(keys) ) { - toRead = keys; - } else /* if ( typeof keys === 'object' ) */ { - toRead = Object.keys(keys); - output = keys; - } - getFromDb(toRead, output, bin => resolve(bin)); - }) - ); - }; - cacheStorage.set = function set(keys) { - return storageReadyPromise.then(( ) => - new Promise(resolve => { - putToDb(keys, details => resolve(details)); - }) - ); - }; - cacheStorage.remove = function remove(keys) { - return storageReadyPromise.then(( ) => - new Promise(resolve => { - deleteFromDb(keys, ( ) => resolve()); - }) - ); - }; - cacheStorage.clear = function clear() { - return storageReadyPromise.then(( ) => - new Promise(resolve => { + return { + get: function get() { + return new Promise(resolve => { + return getAllFromDb(bin => resolve(bin)); + }); + }, + clear: function clear() { + return new Promise(resolve => { clearDb(( ) => resolve()); - }) - ); - }; - cacheStorage.getBytesInUse = function getBytesInUse() { - return Promise.resolve(0); + }); + }, }; - return true; -}; - -// https://github.com/uBlockOrigin/uBlock-issues/issues/328 -// Delete cache-related entries from webext storage. -const clearWebext = async function() { - let bin; - try { - bin = await webext.storage.local.get('assetCacheRegistry'); - } catch(ex) { - console.error(ex); - } - if ( bin instanceof Object === false ) { return; } - if ( bin.assetCacheRegistry instanceof Object === false ) { return; } - const toRemove = [ - 'assetCacheRegistry', - 'assetSourceRegistry', - ]; - for ( const key in bin.assetCacheRegistry ) { - if ( bin.assetCacheRegistry.hasOwnProperty(key) ) { - toRemove.push('cache/' + key); - } - } - webext.storage.local.remove(toRemove); -}; - -const clearIDB = function() { - try { - indexedDB.deleteDatabase(STORAGE_NAME); - } catch(ex) { - } -}; +})(); /******************************************************************************/ diff --git a/src/js/cosmetic-filtering.js b/src/js/cosmetic-filtering.js index f4782bc375407..04fc93a8fa28f 100644 --- a/src/js/cosmetic-filtering.js +++ b/src/js/cosmetic-filtering.js @@ -292,7 +292,7 @@ FilterContainer.prototype.reset = function() { this.highlyGeneric.complex.str = ''; this.highlyGeneric.complex.mru.reset(); - this.selfieVersion = 1; + this.selfieVersion = 2; }; /******************************************************************************/ @@ -576,9 +576,11 @@ FilterContainer.prototype.toSelfie = function() { acceptedCount: this.acceptedCount, discardedCount: this.discardedCount, specificFilters: this.specificFilters.toSelfie(), - lowlyGeneric: Array.from(this.lowlyGeneric), - highSimpleGenericHideArray: Array.from(this.highlyGeneric.simple.dict), - highComplexGenericHideArray: Array.from(this.highlyGeneric.complex.dict), + lowlyGeneric: this.lowlyGeneric, + highSimpleGenericHideDict: this.highlyGeneric.simple.dict, + highSimpleGenericHideStr: this.highlyGeneric.simple.str, + highComplexGenericHideDict: this.highlyGeneric.complex.dict, + highComplexGenericHideStr: this.highlyGeneric.complex.str, }; }; @@ -593,11 +595,11 @@ FilterContainer.prototype.fromSelfie = function(selfie) { this.acceptedCount = selfie.acceptedCount; this.discardedCount = selfie.discardedCount; this.specificFilters.fromSelfie(selfie.specificFilters); - this.lowlyGeneric = new Map(selfie.lowlyGeneric); - this.highlyGeneric.simple.dict = new Set(selfie.highSimpleGenericHideArray); - this.highlyGeneric.simple.str = selfie.highSimpleGenericHideArray.join(',\n'); - this.highlyGeneric.complex.dict = new Set(selfie.highComplexGenericHideArray); - this.highlyGeneric.complex.str = selfie.highComplexGenericHideArray.join(',\n'); + this.lowlyGeneric = selfie.lowlyGeneric; + this.highlyGeneric.simple.dict = selfie.highSimpleGenericHideDict; + this.highlyGeneric.simple.str = selfie.highSimpleGenericHideStr; + this.highlyGeneric.complex.dict = selfie.highComplexGenericHideDict; + this.highlyGeneric.complex.str = selfie.highComplexGenericHideStr; this.frozen = true; }; diff --git a/src/js/hntrie.js b/src/js/hntrie.js index e8031a651bfcd..cc726db5d87bb 100644 --- a/src/js/hntrie.js +++ b/src/js/hntrie.js @@ -445,28 +445,17 @@ class HNTrieContainer { }; } - serialize(encoder) { - if ( encoder instanceof Object ) { - return encoder.encode( - this.buf32.buffer, - this.buf32[CHAR1_SLOT] - ); - } - return Array.from( - new Uint32Array( - this.buf32.buffer, - 0, - this.buf32[CHAR1_SLOT] + 3 >>> 2 - ) + toSelfie() { + return this.buf32.subarray( + 0, + this.buf32[CHAR1_SLOT] + 3 >>> 2 ); } - unserialize(selfie, decoder) { + fromSelfie(selfie) { + if ( selfie instanceof Uint32Array === false ) { return false; } this.needle = ''; - const shouldDecode = typeof selfie === 'string'; - let byteLength = shouldDecode - ? decoder.decodeSize(selfie) - : selfie.length << 2; + let byteLength = selfie.length << 2; if ( byteLength === 0 ) { return false; } byteLength = roundToPageSize(byteLength); if ( this.wasmMemory !== null ) { @@ -477,14 +466,10 @@ class HNTrieContainer { this.buf = new Uint8Array(this.wasmMemory.buffer); this.buf32 = new Uint32Array(this.buf.buffer); } - } else if ( byteLength > this.buf.length ) { - this.buf = new Uint8Array(byteLength); - this.buf32 = new Uint32Array(this.buf.buffer); - } - if ( shouldDecode ) { - decoder.decode(selfie, this.buf.buffer); - } else { this.buf32.set(selfie); + } else { + this.buf32 = selfie; + this.buf = new Uint8Array(this.buf32.buffer); } // https://github.com/uBlockOrigin/uBlock-issues/issues/2925 this.buf[255] = 0; diff --git a/src/js/messaging.js b/src/js/messaging.js index ec3f0f4e5ff24..38b03a4093460 100644 --- a/src/js/messaging.js +++ b/src/js/messaging.js @@ -45,6 +45,7 @@ import { dnrRulesetFromRawLists } from './static-dnr-filtering.js'; import { i18n$ } from './i18n.js'; import { redirectEngine } from './redirect-engine.js'; import * as sfp from './static-filtering-parser.js'; +import * as scuo from './scuo-serializer.js'; import { permanentFirewall, @@ -925,21 +926,6 @@ const fromBase64 = function(encoded) { return Promise.resolve(u8array !== undefined ? u8array : encoded); }; -const toBase64 = function(data) { - const value = data instanceof Uint8Array - ? denseBase64.encode(data) - : data; - return Promise.resolve(value); -}; - -const compress = function(json) { - return lz4Codec.encode(json, toBase64); -}; - -const decompress = function(encoded) { - return lz4Codec.decode(encoded, fromBase64); -}; - const onMessage = function(request, sender, callback) { // Cloud storage support is optional. if ( µb.cloudStorageSupported !== true ) { @@ -961,15 +947,25 @@ const onMessage = function(request, sender, callback) { return; case 'cloudPull': - request.decode = decompress; + request.decode = encoded => { + if ( scuo.canDeserialize(encoded) ) { + return scuo.deserializeAsync(encoded, { thread: true }); + } + // Legacy decoding: needs to be kept around for the foreseeable future. + return lz4Codec.decode(encoded, fromBase64); + }; return vAPI.cloud.pull(request).then(result => { callback(result); }); case 'cloudPush': - if ( µb.hiddenSettings.cloudStorageCompression ) { - request.encode = compress; - } + request.encode = data => { + const options = { + compress: µb.hiddenSettings.cloudStorageCompression, + thread: true, + }; + return scuo.serializeAsync(data, options); + }; return vAPI.cloud.push(request).then(result => { callback(result); }); diff --git a/src/js/redirect-engine.js b/src/js/redirect-engine.js index 2f58066039a8d..7d70e35ee3c11 100644 --- a/src/js/redirect-engine.js +++ b/src/js/redirect-engine.js @@ -24,11 +24,7 @@ /******************************************************************************/ import redirectableResources from './redirect-resources.js'; - -import { - LineIterator, - orphanizeString, -} from './text-utils.js'; +import { LineIterator, orphanizeString } from './text-utils.js'; /******************************************************************************/ @@ -448,33 +444,22 @@ class RedirectEngine { } selfieFromResources(storage) { - storage.put( - RESOURCES_SELFIE_NAME, - JSON.stringify({ - version: RESOURCES_SELFIE_VERSION, - aliases: Array.from(this.aliases), - resources: Array.from(this.resources), - }) - ); + return storage.toCache(RESOURCES_SELFIE_NAME, { + version: RESOURCES_SELFIE_VERSION, + aliases: this.aliases, + resources: this.resources, + }); } async resourcesFromSelfie(storage) { - const result = await storage.get(RESOURCES_SELFIE_NAME); - let selfie; - try { - selfie = JSON.parse(result.content); - } catch(ex) { - } - if ( - selfie instanceof Object === false || - selfie.version !== RESOURCES_SELFIE_VERSION || - Array.isArray(selfie.resources) === false - ) { - return false; - } - this.aliases = new Map(selfie.aliases); - this.resources = new Map(); - for ( const [ token, entry ] of selfie.resources ) { + const selfie = await storage.fromCache(RESOURCES_SELFIE_NAME); + if ( selfie instanceof Object === false ) { return false; } + if ( selfie.version !== RESOURCES_SELFIE_VERSION ) { return false; } + if ( selfie.aliases instanceof Map === false ) { return false; } + if ( selfie.resources instanceof Map === false ) { return false; } + this.aliases = selfie.aliases; + this.resources = selfie.resources; + for ( const [ token, entry ] of this.resources ) { this.resources.set(token, RedirectEntry.fromDetails(entry)); } return true; diff --git a/src/js/reverselookup.js b/src/js/reverselookup.js index c21ca4bb15d9b..e7bf24e94aa35 100644 --- a/src/js/reverselookup.js +++ b/src/js/reverselookup.js @@ -62,7 +62,7 @@ const stopWorker = function() { }; const workerTTLTimer = vAPI.defer.create(stopWorker); -const workerTTL = { min: 5 }; +const workerTTL = { min: 1.5 }; const initWorker = function() { if ( worker === null ) { diff --git a/src/js/scriptlet-filtering-core.js b/src/js/scriptlet-filtering-core.js index 75818eb976e6f..907844fbc198a 100644 --- a/src/js/scriptlet-filtering-core.js +++ b/src/js/scriptlet-filtering-core.js @@ -200,7 +200,7 @@ export class ScriptletFilteringEngine { } fromSelfie(selfie) { - if ( selfie instanceof Object === false ) { return false; } + if ( typeof selfie !== 'object' || selfie === null ) { return false; } if ( selfie.version !== VERSION ) { return false; } this.scriptletDB.fromSelfie(selfie); return true; diff --git a/src/js/scuo-serializer.js b/src/js/scuo-serializer.js new file mode 100644 index 0000000000000..1ffffa6bec600 --- /dev/null +++ b/src/js/scuo-serializer.js @@ -0,0 +1,1307 @@ +/******************************************************************************* + + uBlock Origin - a browser extension to block requests. + Copyright (C) 2024-present Raymond Hill + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see {http://www.gnu.org/licenses/}. + + Home: https://github.com/gorhill/uBlock +*/ + +'use strict'; + +/******************************************************************************* + * + * Structured-Cloneable to Unicode-Only SERIALIZER + * + * Purpose: + * + * Serialize/deserialize arbitrary JS data to/from well-formed Unicode strings. + * + * The browser does not expose an API to serialize structured-cloneable types + * into a single string. JSON.stringify() does not support complex JavaScript + * objects, and does not support references to composite types. Unless the + * data to serialize is only JS strings, it is difficult to easily switch + * from one type of storage to another. + * + * Serializing to a well-formed Unicode string allows to store structured- + * cloneable data to any storage. Not all storages support storing binary data, + * but all storages support storing Unicode strings. + * + * Structured-cloneable types: + * https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Structured_clone_algorithm#supported_types + * + * ----------------+------------------+------------------+---------------------- + * Data types | String | JSONable | structured-cloneable + * ================+============================================================ + * document.cookie | Yes | No | No + * ----------------+------------------+------------------+---------------------- + * localStorage | Yes | No | No + * ----------------+------------------+------------------+---------------------- + * IndexedDB | Yes | Yes | Yes + * ----------------+------------------+------------------+---------------------- + * browser.storage | Yes | Yes | No + * ----------------+------------------+------------------+---------------------- + * Cache API | Yes | No | No + * ----------------+------------------+------------------+---------------------- + * + * The above table shows that only JS strings can be persisted natively to all + * types of storage. The purpose of this library is to convert + * structure-cloneable data (which is a superset of JSONable data) into a + * single JS string. The resulting string is meant to be as small as possible. + * As a result, it is not human-readable, though it contains only printable + * ASCII characters -- and possibly Unicode characters beyond ASCII. + * + * The resulting JS string will not contain characters which require escaping + * should it be converted to a JSON value. However it may contain characters + * which require escaping should it be converted to a URI component. + * + * Characteristics: + * + * - Serializes/deserializes data to/from a single well-formed Unicode string + * - Strings do not require escaping, i.e. they are stored as-is + * - Supports multiple references to same object + * - Supports reference cycles + * - Supports synchronous and asynchronous API + * - Supports usage of Worker + * - Optionally supports LZ4 compression + * + * TODO: + * + * - Harden against unexpected conditions, such as corrupted string during + * deserialization. + * - Evaluate supporting checksum. + * + * */ + +const VERSION = 1; +const SEPARATORCHAR = ' '; +const SEPARATORCHARCODE = SEPARATORCHAR.charCodeAt(0); +const SENTINELCHAR = '!'; +const SENTINELCHARCODE = SENTINELCHAR.charCodeAt(0); +const MAGICPREFIX = `UOSC_${VERSION}${SEPARATORCHAR}`; +const MAGICLZ4PREFIX = `UOSC/lz4_${VERSION}${SEPARATORCHAR}`; +const FAILMARK = Number.MAX_SAFE_INTEGER; +// Avoid characters which require escaping when serialized to JSON: +const SAFECHARS = "&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|}~"; +const NUMSAFECHARS = SAFECHARS.length; +const BITS_PER_SAFECHARS = Math.log2(NUMSAFECHARS); + +const { intToChar, intToCharCode, charCodeToInt } = (( ) => { + const intToChar = []; + const intToCharCode = []; + const charCodeToInt = []; + for ( let i = 0; i < NUMSAFECHARS; i++ ) { + intToChar[i] = SAFECHARS.charAt(i); + intToCharCode[i] = SAFECHARS.charCodeAt(i); + charCodeToInt[i] = 0; + } + for ( let i = NUMSAFECHARS; i < 128; i++ ) { + intToChar[i] = ''; + intToCharCode[i] = 0; + charCodeToInt[i] = 0; + } + for ( let i = 0; i < SAFECHARS.length; i++ ) { + charCodeToInt[SAFECHARS.charCodeAt(i)] = i; + } + return { intToChar, intToCharCode, charCodeToInt }; +})(); + +let iota = 1; +const I_STRING_SMALL = iota++; +const I_STRING_LARGE = iota++; +const I_ZERO = iota++; +const I_INTEGER_SMALL_POS = iota++; +const I_INTEGER_SMALL_NEG = iota++; +const I_INTEGER_LARGE_POS = iota++; +const I_INTEGER_LARGE_NEG = iota++; +const I_BOOL_FALSE = iota++; +const I_BOOL_TRUE = iota++; +const I_NULL = iota++; +const I_UNDEFINED = iota++; +const I_FLOAT = iota++; +const I_REGEXP = iota++; +const I_DATE = iota++; +const I_REFERENCE = iota++; +const I_SMALL_OBJECT = iota++; +const I_LARGE_OBJECT = iota++; +const I_ARRAY_SMALL = iota++; +const I_ARRAY_LARGE = iota++; +const I_SET_SMALL = iota++; +const I_SET_LARGE = iota++; +const I_MAP_SMALL = iota++; +const I_MAP_LARGE = iota++; +const I_ARRAYBUFFER = iota++; +const I_INT8ARRAY = iota++; +const I_UINT8ARRAY = iota++; +const I_UINT8CLAMPEDARRAY = iota++; +const I_INT16ARRAY = iota++; +const I_UINT16ARRAY = iota++; +const I_INT32ARRAY = iota++; +const I_UINT32ARRAY = iota++; +const I_FLOAT32ARRAY = iota++; +const I_FLOAT64ARRAY = iota++; +const I_DATAVIEW = iota++; + +const C_STRING_SMALL = intToChar[I_STRING_SMALL]; +const C_STRING_LARGE = intToChar[I_STRING_LARGE]; +const C_ZERO = intToChar[I_ZERO]; +const C_INTEGER_SMALL_POS = intToChar[I_INTEGER_SMALL_POS]; +const C_INTEGER_SMALL_NEG = intToChar[I_INTEGER_SMALL_NEG]; +const C_INTEGER_LARGE_POS = intToChar[I_INTEGER_LARGE_POS]; +const C_INTEGER_LARGE_NEG = intToChar[I_INTEGER_LARGE_NEG]; +const C_BOOL_FALSE = intToChar[I_BOOL_FALSE]; +const C_BOOL_TRUE = intToChar[I_BOOL_TRUE]; +const C_NULL = intToChar[I_NULL]; +const C_UNDEFINED = intToChar[I_UNDEFINED]; +const C_FLOAT = intToChar[I_FLOAT]; +const C_REGEXP = intToChar[I_REGEXP]; +const C_DATE = intToChar[I_DATE]; +const C_REFERENCE = intToChar[I_REFERENCE]; +const C_SMALL_OBJECT = intToChar[I_SMALL_OBJECT]; +const C_LARGE_OBJECT = intToChar[I_LARGE_OBJECT]; +const C_ARRAY_SMALL = intToChar[I_ARRAY_SMALL]; +const C_ARRAY_LARGE = intToChar[I_ARRAY_LARGE]; +const C_SET_SMALL = intToChar[I_SET_SMALL]; +const C_SET_LARGE = intToChar[I_SET_LARGE]; +const C_MAP_SMALL = intToChar[I_MAP_SMALL]; +const C_MAP_LARGE = intToChar[I_MAP_LARGE]; +const C_ARRAYBUFFER = intToChar[I_ARRAYBUFFER]; +const C_INT8ARRAY = intToChar[I_INT8ARRAY]; +const C_UINT8ARRAY = intToChar[I_UINT8ARRAY]; +const C_UINT8CLAMPEDARRAY = intToChar[I_UINT8CLAMPEDARRAY]; +const C_INT16ARRAY = intToChar[I_INT16ARRAY]; +const C_UINT16ARRAY = intToChar[I_UINT16ARRAY]; +const C_INT32ARRAY = intToChar[I_INT32ARRAY]; +const C_UINT32ARRAY = intToChar[I_UINT32ARRAY]; +const C_FLOAT32ARRAY = intToChar[I_FLOAT32ARRAY]; +const C_FLOAT64ARRAY = intToChar[I_FLOAT64ARRAY]; +const C_DATAVIEW = intToChar[I_DATAVIEW]; + +// Just reuse already defined constants, we just need distinct values +const I_STRING = I_STRING_SMALL; +const I_NUMBER = I_FLOAT; +const I_BOOL = I_BOOL_FALSE; +const I_OBJECT = I_SMALL_OBJECT; +const I_ARRAY = I_ARRAY_SMALL; +const I_SET = I_SET_SMALL; +const I_MAP = I_MAP_SMALL; + +const typeToSerializedInt = { + 'string': I_STRING, + 'number': I_NUMBER, + 'boolean': I_BOOL, + 'object': I_OBJECT, +}; + +const xtypeToSerializedInt = { + '[object RegExp]': I_REGEXP, + '[object Date]': I_DATE, + '[object Array]': I_ARRAY, + '[object Set]': I_SET, + '[object Map]': I_MAP, + '[object ArrayBuffer]': I_ARRAYBUFFER, + '[object Int8Array]': I_INT8ARRAY, + '[object Uint8Array]': I_UINT8ARRAY, + '[object Uint8ClampedArray]': I_UINT8CLAMPEDARRAY, + '[object Int16Array]': I_INT16ARRAY, + '[object Uint16Array]': I_UINT16ARRAY, + '[object Int32Array]': I_INT32ARRAY, + '[object Uint32Array]': I_UINT32ARRAY, + '[object Float32Array]': I_FLOAT32ARRAY, + '[object Float64Array]': I_FLOAT64ARRAY, + '[object DataView]': I_DATAVIEW, +}; + +const typeToSerializedChar = { + '[object Int8Array]': C_INT8ARRAY, + '[object Uint8Array]': C_UINT8ARRAY, + '[object Uint8ClampedArray]': C_UINT8CLAMPEDARRAY, + '[object Int16Array]': C_INT16ARRAY, + '[object Uint16Array]': C_UINT16ARRAY, + '[object Int32Array]': C_INT32ARRAY, + '[object Uint32Array]': C_UINT32ARRAY, + '[object Float32Array]': C_FLOAT32ARRAY, + '[object Float64Array]': C_FLOAT64ARRAY, +}; + +const toArrayBufferViewConstructor = { + [`${I_INT8ARRAY}`]: Int8Array, + [`${I_UINT8ARRAY}`]: Uint8Array, + [`${I_UINT8CLAMPEDARRAY}`]: Uint8ClampedArray, + [`${I_INT16ARRAY}`]: Int16Array, + [`${I_UINT16ARRAY}`]: Uint16Array, + [`${I_INT32ARRAY}`]: Int32Array, + [`${I_UINT32ARRAY}`]: Uint32Array, + [`${I_FLOAT32ARRAY}`]: Float32Array, + [`${I_FLOAT64ARRAY}`]: Float64Array, + [`${I_DATAVIEW}`]: DataView, +}; + +/******************************************************************************/ + +const textDecoder = new TextDecoder(); +const textEncoder = new TextEncoder(); +const isInteger = Number.isInteger; + +const writeRefs = new Map(); +const writeBuffer = []; + +const readRefs = new Map(); +let readStr = ''; +let readPtr = 0; +let readEnd = 0; + +let refCounter = 1; + +let uint8Input = null; + +const uint8InputFromAsciiStr = s => { + if ( uint8Input === null || uint8Input.length < s.length ) { + uint8Input = new Uint8Array(s.length + 0x03FF & ~0x03FF); + } + textEncoder.encodeInto(s, uint8Input); + return uint8Input; +}; + +const isInstanceOf = (o, s) => { + return typeof o === 'object' && o !== null && ( + s === 'Object' || Object.prototype.toString.call(o) === `[object ${s}]` + ); +}; + +/******************************************************************************* + * + * A large Uint is always a positive integer (can be zero), assumed to be + * large, i.e. > NUMSAFECHARS -- but not necessarily. The serialized value has + * always at least one digit, and is always followed by a separator. + * + * */ + +const strFromLargeUint = i => { + let r = 0, s = ''; + for (;;) { + r = i % NUMSAFECHARS; + s += intToChar[r]; + i -= r; + if ( i === 0 ) { break; } + i /= NUMSAFECHARS; + } + return s + SEPARATORCHAR; +}; + +const deserializeLargeUint = ( ) => { + let c = readStr.charCodeAt(readPtr++); + let n = charCodeToInt[c]; + let m = 1; + while ( (c = readStr.charCodeAt(readPtr++)) !== SEPARATORCHARCODE ) { + m *= NUMSAFECHARS; + n += m * charCodeToInt[c]; + } + return n; +}; + +/******************************************************************************* + * + * Methods specific to ArrayBuffer objects to serialize optimally according to + * the content of the buffer. + * + * In sparse mode, number of output bytes per input int32 (4-byte) value: + * [v === zero]: 1 byte (separator) + * [v !== zero]: n digits + 1 byte (separator) + * + * */ + +const sparseValueLen = v => v !== 0 + ? (Math.log2(v) / BITS_PER_SAFECHARS | 0) + 2 + : 1; + +const analyzeArrayBuffer = arrbuf => { + const byteLength = arrbuf.byteLength; + const uint32len = byteLength >>> 2; + const uint32arr = new Uint32Array(arrbuf, 0, uint32len); + let notzeroCount = 0; + for ( let i = uint32len-1; i >= 0; i-- ) { + if ( uint32arr[i] === 0 ) { continue; } + notzeroCount = i + 1; + break; + } + const end = notzeroCount + 1 <= uint32len ? notzeroCount << 2 : byteLength; + const endUint32 = end >>> 2; + const remUint8 = end & 0b11; + const denseSize = endUint32 * 5 + (remUint8 ? remUint8 + 1 : 0); + let sparseSize = 0; + for ( let i = 0; i < endUint32; i++ ) { + sparseSize += sparseValueLen(uint32arr[i]); + if ( sparseSize > denseSize ) { + return { end, dense: true, denseSize }; + } + } + if ( remUint8 !== 0 ) { + sparseSize += 1; // sentinel + const uint8arr = new Uint8Array(arrbuf, endUint32 << 2); + for ( let i = 0; i < remUint8; i++ ) { + sparseSize += sparseValueLen(uint8arr[i]); + } + } + return { end, dense: false, sparseSize }; +}; + +const denseArrayBufferToStr = (arrbuf, details) => { + const end = details.end; + const m = end % 4; + const n = end - m; + const uin32len = n >>> 2; + const uint32arr = new Uint32Array(arrbuf, 0, uin32len); + const output = new Uint8Array(details.denseSize); + let j = 0, v = 0; + for ( let i = 0; i < uin32len; i++ ) { + v = uint32arr[i]; + output[j+0] = intToCharCode[v % NUMSAFECHARS]; + v = v / NUMSAFECHARS | 0; + output[j+1] = intToCharCode[v % NUMSAFECHARS]; + v = v / NUMSAFECHARS | 0; + output[j+2] = intToCharCode[v % NUMSAFECHARS]; + v = v / NUMSAFECHARS | 0; + output[j+3] = intToCharCode[v % NUMSAFECHARS]; + v = v / NUMSAFECHARS | 0; + output[j+4] = intToCharCode[v]; + j += 5; + } + if ( m !== 0 ) { + const uint8arr = new Uint8Array(arrbuf, n); + v = uint8arr[0]; + if ( m > 1 ) { + v += uint8arr[1] << 8; + if ( m > 2 ) { + v += uint8arr[2] << 16; + } + } + output[j+0] = intToCharCode[v % NUMSAFECHARS]; + v = v / NUMSAFECHARS | 0; + output[j+1] = intToCharCode[v % NUMSAFECHARS]; + if ( m > 1 ) { + v = v / NUMSAFECHARS | 0; + output[j+2] = intToCharCode[v % NUMSAFECHARS]; + if ( m > 2 ) { + v = v / NUMSAFECHARS | 0; + output[j+3] = intToCharCode[v % NUMSAFECHARS]; + } + } + } + return textDecoder.decode(output); +}; + +const BASE88_POW1 = NUMSAFECHARS; +const BASE88_POW2 = NUMSAFECHARS * BASE88_POW1; +const BASE88_POW3 = NUMSAFECHARS * BASE88_POW2; +const BASE88_POW4 = NUMSAFECHARS * BASE88_POW3; + +const denseArrayBufferFromStr = (denseStr, arrbuf) => { + const input = uint8InputFromAsciiStr(denseStr); + const end = denseStr.length; + const m = end % 5; + const n = end - m; + const uin32len = n / 5 * 4 >>> 2; + const uint32arr = new Uint32Array(arrbuf, 0, uin32len); + let j = 0, v = 0; + for ( let i = 0; i < n; i += 5 ) { + v = charCodeToInt[input[i+0]]; + v += charCodeToInt[input[i+1]] * BASE88_POW1; + v += charCodeToInt[input[i+2]] * BASE88_POW2; + v += charCodeToInt[input[i+3]] * BASE88_POW3; + v += charCodeToInt[input[i+4]] * BASE88_POW4; + uint32arr[j++] = v; + } + if ( m === 0 ) { return; } + v = charCodeToInt[input[n+0]] + + charCodeToInt[input[n+1]] * BASE88_POW1; + if ( m > 2 ) { + v += charCodeToInt[input[n+2]] * BASE88_POW2; + if ( m > 3 ) { + v += charCodeToInt[input[n+3]] * BASE88_POW3; + } + } + const uint8arr = new Uint8Array(arrbuf, j << 2); + uint8arr[0] = v & 255; + if ( v !== 0 ) { + v >>>= 8; + uint8arr[1] = v & 255; + if ( v !== 0 ) { + v >>>= 8; + uint8arr[2] = v & 255; + } + } +}; + +const sparseArrayBufferToStr = (arrbuf, details) => { + const end = details.end; + const uint8out = new Uint8Array(details.sparseSize); + const uint32len = end >>> 2; + const uint32arr = new Uint32Array(arrbuf, 0, uint32len); + let j = 0, n = 0, r = 0; + for ( let i = 0; i < uint32len; i++ ) { + n = uint32arr[i]; + if ( n !== 0 ) { + for (;;) { + r = n % NUMSAFECHARS; + uint8out[j++] = intToCharCode[r]; + n -= r; + if ( n === 0 ) { break; } + n /= NUMSAFECHARS; + } + } + uint8out[j++] = SEPARATORCHARCODE; + } + const uint8rem = end & 0b11; + if ( uint8rem !== 0 ) { + uint8out[j++] = SENTINELCHARCODE; + const uint8arr = new Uint8Array(arrbuf, end - uint8rem, uint8rem); + for ( let i = 0; i < uint8rem; i++ ) { + n = uint8arr[i]; + if ( n !== 0 ) { + for (;;) { + r = n % NUMSAFECHARS; + uint8out[j++] = intToCharCode[r]; + n -= r; + if ( n === 0 ) { break; } + n /= NUMSAFECHARS; + } + } + uint8out[j++] = SEPARATORCHARCODE; + } + } + return textDecoder.decode(uint8out); +}; + +const sparseArrayBufferFromStr = (sparseStr, arrbuf) => { + const sparseLen = sparseStr.length; + const input = uint8InputFromAsciiStr(sparseStr); + const end = arrbuf.byteLength; + const uint32len = end >>> 2; + const uint32arr = new Uint32Array(arrbuf, 0, uint32len); + let i = 0, j = 0, c = 0, n = 0, m = 0; + for ( ; j < sparseLen; i++ ) { + c = input[j++]; + if ( c === SEPARATORCHARCODE ) { continue; } + if ( c === SENTINELCHARCODE ) { break; } + n = charCodeToInt[c]; + m = 1; + for (;;) { + c = input[j++]; + if ( c === SEPARATORCHARCODE ) { break; } + m *= NUMSAFECHARS; + n += m * charCodeToInt[c]; + } + uint32arr[i] = n; + } + if ( c === SENTINELCHARCODE ) { + i <<= 2; + const uint8arr = new Uint8Array(arrbuf, i); + for ( ; j < sparseLen; i++ ) { + c = input[j++]; + if ( c === SEPARATORCHARCODE ) { continue; } + n = charCodeToInt[c]; + m = 1; + for (;;) { + c = input[j++]; + if ( c === SEPARATORCHARCODE ) { break; } + m *= NUMSAFECHARS; + n += m * charCodeToInt[c]; + } + uint8arr[i] = n; + } + } +}; + +/******************************************************************************/ + +const _serialize = data => { + // Primitive types + if ( data === 0 ) { + writeBuffer.push(C_ZERO); + return; + } + if ( data === null ) { + writeBuffer.push(C_NULL); + return; + } + if ( data === undefined ) { + writeBuffer.push(C_UNDEFINED); + return; + } + // Type name + switch ( typeToSerializedInt[typeof data] ) { + case I_STRING: { + const length = data.length; + if ( length < NUMSAFECHARS ) { + writeBuffer.push(C_STRING_SMALL + intToChar[length], data); + } else { + writeBuffer.push(C_STRING_LARGE + strFromLargeUint(length), data); + } + return; + } + case I_NUMBER: + if ( isInteger(data) ) { + if ( data >= NUMSAFECHARS ) { + writeBuffer.push(C_INTEGER_LARGE_POS + strFromLargeUint(data)); + } else if ( data > 0 ) { + writeBuffer.push(C_INTEGER_SMALL_POS + intToChar[data]); + } else if ( data > -NUMSAFECHARS ) { + writeBuffer.push(C_INTEGER_SMALL_NEG + intToChar[-data]); + } else { + writeBuffer.push(C_INTEGER_LARGE_NEG + strFromLargeUint(-data)); + } + } else { + const s = `${data}`; + writeBuffer.push(C_FLOAT + strFromLargeUint(s.length) + s); + } + return; + case I_BOOL: + writeBuffer.push(data ? C_BOOL_TRUE : C_BOOL_FALSE); + return; + case I_OBJECT: + break; + default: + return; + } + const xtypeName = Object.prototype.toString.call(data); + const xtypeInt = xtypeToSerializedInt[xtypeName]; + if ( xtypeInt === I_REGEXP ) { + writeBuffer.push(C_REGEXP); + _serialize(data.source); + _serialize(data.flags); + return; + } + if ( xtypeInt === I_DATE ) { + writeBuffer.push(C_DATE + _serialize(data.getTime())); + return; + } + // Reference to composite types + const ref = writeRefs.get(data); + if ( ref !== undefined ) { + writeBuffer.push(C_REFERENCE + strFromLargeUint(ref)); + return; + } + // Remember reference + writeRefs.set(data, refCounter++); + // Extended type name + switch ( xtypeInt ) { + case I_ARRAY: { + const size = data.length; + if ( size < NUMSAFECHARS ) { + writeBuffer.push(C_ARRAY_SMALL + intToChar[size]); + } else { + writeBuffer.push(C_ARRAY_LARGE + strFromLargeUint(size)); + } + for ( const v of data ) { + _serialize(v); + } + return; + } + case I_SET: { + const size = data.size; + if ( size < NUMSAFECHARS ) { + writeBuffer.push(C_SET_SMALL + intToChar[size]); + } else { + writeBuffer.push(C_SET_LARGE + strFromLargeUint(size)); + } + for ( const v of data ) { + _serialize(v); + } + return; + } + case I_MAP: { + const size = data.size; + if ( size < NUMSAFECHARS ) { + writeBuffer.push(C_MAP_SMALL + intToChar[size]); + } else { + writeBuffer.push(C_MAP_LARGE + strFromLargeUint(size)); + } + for ( const [ k, v ] of data ) { + _serialize(k); + _serialize(v); + } + return; + } + case I_ARRAYBUFFER: { + const byteLength = data.byteLength; + writeBuffer.push(C_ARRAYBUFFER + strFromLargeUint(byteLength)); + _serialize(data.maxByteLength); + const arrbuffDetails = analyzeArrayBuffer(data); + _serialize(arrbuffDetails.dense); + const str = arrbuffDetails.dense + ? denseArrayBufferToStr(data, arrbuffDetails) + : sparseArrayBufferToStr(data, arrbuffDetails); + _serialize(str); + //console.log(`arrbuf size=${byteLength} content size=${arrbuffDetails.end} dense=${arrbuffDetails.dense} array size=${arrbuffDetails.dense ? arrbuffDetails.denseSize : arrbuffDetails.sparseSize} serialized size=${str.length}`); + return; + } + case I_INT8ARRAY: + case I_UINT8ARRAY: + case I_UINT8CLAMPEDARRAY: + case I_INT16ARRAY: + case I_UINT16ARRAY: + case I_INT32ARRAY: + case I_UINT32ARRAY: + case I_FLOAT32ARRAY: + case I_FLOAT64ARRAY: + writeBuffer.push( + typeToSerializedChar[xtypeName], + strFromLargeUint(data.byteOffset), + strFromLargeUint(data.length) + ); + _serialize(data.buffer); + return; + case I_DATAVIEW: + writeBuffer.push(C_DATAVIEW, strFromLargeUint(data.byteOffset), strFromLargeUint(data.byteLength)); + _serialize(data.buffer); + return; + default: { + const keys = Object.keys(data); + const size = keys.length; + if ( size < NUMSAFECHARS ) { + writeBuffer.push(C_SMALL_OBJECT + intToChar[size]); + } else { + writeBuffer.push(C_LARGE_OBJECT + strFromLargeUint(size)); + } + for ( const key of keys ) { + _serialize(key); + _serialize(data[key]); + } + break; + } + } +}; + +/******************************************************************************/ + +const _deserialize = ( ) => { + if ( readPtr >= readEnd ) { return; } + const type = charCodeToInt[readStr.charCodeAt(readPtr++)]; + switch ( type ) { + // Primitive types + case I_STRING_SMALL: + case I_STRING_LARGE: { + const size = type === I_STRING_SMALL + ? charCodeToInt[readStr.charCodeAt(readPtr++)] + : deserializeLargeUint(); + const beg = readPtr; + readPtr += size; + return readStr.slice(beg, readPtr); + } + case I_ZERO: + return 0; + case I_INTEGER_SMALL_POS: + return charCodeToInt[readStr.charCodeAt(readPtr++)]; + case I_INTEGER_SMALL_NEG: + return -charCodeToInt[readStr.charCodeAt(readPtr++)]; + case I_INTEGER_LARGE_POS: + return deserializeLargeUint(); + case I_INTEGER_LARGE_NEG: + return -deserializeLargeUint(); + case I_BOOL_FALSE: + return false; + case I_BOOL_TRUE: + return true; + case I_NULL: + return null; + case I_UNDEFINED: + return; + case I_FLOAT: { + const size = deserializeLargeUint(); + const beg = readPtr; + readPtr += size; + return parseFloat(readStr.slice(beg, readPtr)); + } + case I_REGEXP: { + const source = _deserialize(); + const flags = _deserialize(); + return new RegExp(source, flags); + } + case I_DATE: { + const time = _deserialize(); + return new Date(time); + } + case I_REFERENCE: { + const ref = deserializeLargeUint(); + return readRefs.get(ref); + } + case I_SMALL_OBJECT: + case I_LARGE_OBJECT: { + const entries = []; + const size = type === I_SMALL_OBJECT + ? charCodeToInt[readStr.charCodeAt(readPtr++)] + : deserializeLargeUint(); + for ( let i = 0; i < size; i++ ) { + const k = _deserialize(); + const v = _deserialize(); + entries.push([ k, v ]); + } + const out = Object.fromEntries(entries); + readRefs.set(refCounter++, out); + return out; + } + case I_ARRAY_SMALL: + case I_ARRAY_LARGE: { + const out = []; + const size = type === I_ARRAY_SMALL + ? charCodeToInt[readStr.charCodeAt(readPtr++)] + : deserializeLargeUint(); + for ( let i = 0; i < size; i++ ) { + out.push(_deserialize()); + } + readRefs.set(refCounter++, out); + return out; + } + case I_SET_SMALL: + case I_SET_LARGE: { + const entries = []; + const size = type === I_SET_SMALL + ? charCodeToInt[readStr.charCodeAt(readPtr++)] + : deserializeLargeUint(); + for ( let i = 0; i < size; i++ ) { + entries.push(_deserialize()); + } + const out = new Set(entries); + readRefs.set(refCounter++, out); + return out; + } + case I_MAP_SMALL: + case I_MAP_LARGE: { + const entries = []; + const size = type === I_MAP_SMALL + ? charCodeToInt[readStr.charCodeAt(readPtr++)] + : deserializeLargeUint(); + for ( let i = 0; i < size; i++ ) { + const k = _deserialize(); + const v = _deserialize(); + entries.push([ k, v ]); + } + const out = new Map(entries); + readRefs.set(refCounter++, out); + return out; + } + case I_ARRAYBUFFER: { + const byteLength = deserializeLargeUint(); + const maxByteLength = _deserialize(); + let options; + if ( maxByteLength !== 0 && maxByteLength !== byteLength ) { + options = { maxByteLength }; + } + const arrbuf = new ArrayBuffer(byteLength, options); + const dense = _deserialize(); + const str = _deserialize(); + if ( dense ) { + denseArrayBufferFromStr(str, arrbuf); + } else { + sparseArrayBufferFromStr(str, arrbuf); + } + readRefs.set(refCounter++, arrbuf); + return arrbuf; + } + case I_INT8ARRAY: + case I_UINT8ARRAY: + case I_UINT8CLAMPEDARRAY: + case I_INT16ARRAY: + case I_UINT16ARRAY: + case I_INT32ARRAY: + case I_UINT32ARRAY: + case I_FLOAT32ARRAY: + case I_FLOAT64ARRAY: + case I_DATAVIEW: { + const byteOffset = deserializeLargeUint(); + const length = deserializeLargeUint(); + const arrayBuffer = _deserialize(); + const ctor = toArrayBufferViewConstructor[`${type}`]; + const out = new ctor(arrayBuffer, byteOffset, length); + readRefs.set(refCounter++, out); + return out; + } + default: + break; + } + readPtr = FAILMARK; +}; + +/******************************************************************************* + * + * LZ4 block compression/decompression + * + * Imported from: + * https://github.com/gorhill/lz4-wasm/blob/8995cdef7b/dist/lz4-block-codec-js.js + * + * Customized to avoid external dependencies as I entertain the idea of + * spinning off the serializer as a standalone utility for all to use. + * + * */ + +class LZ4BlockJS { + constructor() { + this.hashTable = undefined; + this.outputBuffer = undefined; + } + reset() { + this.hashTable = undefined; + this.outputBuffer = undefined; + } + growOutputBuffer(size) { + if ( this.outputBuffer !== undefined ) { + if ( this.outputBuffer.byteLength >= size ) { return; } + } + this.outputBuffer = new ArrayBuffer(size + 0xFFFF & 0x7FFF0000); + } + encodeBound(size) { + return size > 0x7E000000 ? 0 : size + (size / 255 | 0) + 16; + } + encodeBlock(iBuf, oOffset) { + const iLen = iBuf.byteLength; + if ( iLen >= 0x7E000000 ) { throw new RangeError(); } + // "The last match must start at least 12 bytes before end of block" + const lastMatchPos = iLen - 12; + // "The last 5 bytes are always literals" + const lastLiteralPos = iLen - 5; + if ( this.hashTable === undefined ) { + this.hashTable = new Int32Array(65536); + } + this.hashTable.fill(-65536); + if ( isInstanceOf(iBuf, 'ArrayBuffer') ) { + iBuf = new Uint8Array(iBuf); + } + const oLen = oOffset + this.encodeBound(iLen); + this.growOutputBuffer(oLen); + const oBuf = new Uint8Array(this.outputBuffer, 0, oLen); + let iPos = 0; + let oPos = oOffset; + let anchorPos = 0; + // sequence-finding loop + for (;;) { + let refPos; + let mOffset; + let sequence = iBuf[iPos] << 8 | iBuf[iPos+1] << 16 | iBuf[iPos+2] << 24; + // match-finding loop + while ( iPos <= lastMatchPos ) { + sequence = sequence >>> 8 | iBuf[iPos+3] << 24; + const hash = (sequence * 0x9E37 & 0xFFFF) + (sequence * 0x79B1 >>> 16) & 0xFFFF; + refPos = this.hashTable[hash]; + this.hashTable[hash] = iPos; + mOffset = iPos - refPos; + if ( + mOffset < 65536 && + iBuf[refPos+0] === ((sequence ) & 0xFF) && + iBuf[refPos+1] === ((sequence >>> 8) & 0xFF) && + iBuf[refPos+2] === ((sequence >>> 16) & 0xFF) && + iBuf[refPos+3] === ((sequence >>> 24) & 0xFF) + ) { + break; + } + iPos += 1; + } + // no match found + if ( iPos > lastMatchPos ) { break; } + // match found + let lLen = iPos - anchorPos; + let mLen = iPos; + iPos += 4; refPos += 4; + while ( iPos < lastLiteralPos && iBuf[iPos] === iBuf[refPos] ) { + iPos += 1; refPos += 1; + } + mLen = iPos - mLen; + const token = mLen < 19 ? mLen - 4 : 15; + // write token, length of literals if needed + if ( lLen >= 15 ) { + oBuf[oPos++] = 0xF0 | token; + let l = lLen - 15; + while ( l >= 255 ) { + oBuf[oPos++] = 255; + l -= 255; + } + oBuf[oPos++] = l; + } else { + oBuf[oPos++] = (lLen << 4) | token; + } + // write literals + while ( lLen-- ) { + oBuf[oPos++] = iBuf[anchorPos++]; + } + if ( mLen === 0 ) { break; } + // write offset of match + oBuf[oPos+0] = mOffset; + oBuf[oPos+1] = mOffset >>> 8; + oPos += 2; + // write length of match if needed + if ( mLen >= 19 ) { + let l = mLen - 19; + while ( l >= 255 ) { + oBuf[oPos++] = 255; + l -= 255; + } + oBuf[oPos++] = l; + } + anchorPos = iPos; + } + // last sequence is literals only + let lLen = iLen - anchorPos; + if ( lLen >= 15 ) { + oBuf[oPos++] = 0xF0; + let l = lLen - 15; + while ( l >= 255 ) { + oBuf[oPos++] = 255; + l -= 255; + } + oBuf[oPos++] = l; + } else { + oBuf[oPos++] = lLen << 4; + } + while ( lLen-- ) { + oBuf[oPos++] = iBuf[anchorPos++]; + } + return new Uint8Array(oBuf.buffer, 0, oPos); + } + decodeBlock(iBuf, iOffset, oLen) { + const iLen = iBuf.byteLength; + this.growOutputBuffer(oLen); + const oBuf = new Uint8Array(this.outputBuffer, 0, oLen); + let iPos = iOffset, oPos = 0; + while ( iPos < iLen ) { + const token = iBuf[iPos++]; + // literals + let clen = token >>> 4; + // length of literals + if ( clen !== 0 ) { + if ( clen === 15 ) { + let l; + for (;;) { + l = iBuf[iPos++]; + if ( l !== 255 ) { break; } + clen += 255; + } + clen += l; + } + // copy literals + const end = iPos + clen; + while ( iPos < end ) { + oBuf[oPos++] = iBuf[iPos++]; + } + if ( iPos === iLen ) { break; } + } + // match + const mOffset = iBuf[iPos+0] | (iBuf[iPos+1] << 8); + if ( mOffset === 0 || mOffset > oPos ) { return; } + iPos += 2; + // length of match + clen = (token & 0x0F) + 4; + if ( clen === 19 ) { + let l; + for (;;) { + l = iBuf[iPos++]; + if ( l !== 255 ) { break; } + clen += 255; + } + clen += l; + } + // copy match + const end = oPos + clen; + let mPos = oPos - mOffset; + while ( oPos < end ) { + oBuf[oPos++] = oBuf[mPos++]; + } + } + return oBuf; + } + encode(input, outputOffset) { + if ( isInstanceOf(input, 'ArrayBuffer') ) { + input = new Uint8Array(input); + } else if ( isInstanceOf(input, 'Uint8Array') === false ) { + throw new TypeError(); + } + return this.encodeBlock(input, outputOffset); + } + decode(input, inputOffset, outputSize) { + if ( isInstanceOf(input, 'ArrayBuffer') ) { + input = new Uint8Array(input); + } else if ( isInstanceOf(input, 'Uint8Array') === false ) { + throw new TypeError(); + } + return this.decodeBlock(input, inputOffset, outputSize); + } +} + +/******************************************************************************* + * + * Synchronous APIs + * + * */ + +export const serialize = (data, options = {}) => { + refCounter = 1; + _serialize(data); + writeBuffer.unshift(MAGICPREFIX); + const s = writeBuffer.join(''); + writeRefs.clear(); + writeBuffer.length = 0; + if ( options.compress !== true ) { return s; } + const lz4Util = new LZ4BlockJS(); + const encoder = new TextEncoder(); + const uint8ArrayBefore = encoder.encode(s); + const uint8ArrayAfter = lz4Util.encode(uint8ArrayBefore, 0); + const lz4 = { + size: uint8ArrayBefore.length, + data: new Uint8Array(uint8ArrayAfter), + }; + refCounter = 1; + _serialize(lz4); + writeBuffer.unshift(MAGICLZ4PREFIX); + const t = writeBuffer.join(''); + writeRefs.clear(); + writeBuffer.length = 0; + const ratio = t.length / s.length; + return ratio <= 0.85 ? t : s; +}; + +export const deserialize = s => { + if ( s.startsWith(MAGICLZ4PREFIX) ) { + refCounter = 1; + readStr = s; + readEnd = s.length; + readPtr = MAGICLZ4PREFIX.length; + const lz4 = _deserialize(); + readRefs.clear(); + readStr = ''; + const lz4Util = new LZ4BlockJS(); + const uint8ArrayAfter = lz4Util.decode(lz4.data, 0, lz4.size); + s = textDecoder.decode(new Uint8Array(uint8ArrayAfter)); + } + if ( s.startsWith(MAGICPREFIX) === false ) { return; } + refCounter = 1; + readStr = s; + readEnd = s.length; + readPtr = MAGICPREFIX.length; + const data = _deserialize(); + readRefs.clear(); + readStr = ''; + uint8Input = null; + if ( readPtr === FAILMARK ) { return; } + return data; +}; + +export const canDeserialize = s => + typeof s === 'string' && + (s.startsWith(MAGICLZ4PREFIX) || s.startsWith(MAGICPREFIX)); + +/******************************************************************************* + * + * Configuration + * + * */ + +const defaultConfig = { + threadTTL: 5000, +}; + +const validateConfig = { + threadTTL: val => val > 0, +}; + +const currentConfig = Object.assign({}, defaultConfig); + +export const getConfig = ( ) => Object.assign({}, currentConfig); + +export const setConfig = config => { + for ( const key in Object.keys(config) ) { + if ( defaultConfig.hasOwnProperty(key) === false ) { continue; } + const val = config[key]; + if ( typeof val !== typeof defaultConfig[key] ) { continue; } + if ( (validateConfig[key])(val) === false ) { continue; } + currentConfig[key] = val; + } +}; + +/******************************************************************************* + * + * Asynchronous APIs + * + * Being asynchronous allows to support workers and future features such as + * checksums. + * + * */ + +class Thread { + constructor(gcer) { + this.jobs = new Map(); + this.jobIdGenerator = 1; + this.workerAccessTime = 0; + this.workerTimer = undefined; + this.gcer = gcer; + this.workerPromise = new Promise(resolve => { + let worker = null; + try { + worker = new Worker('js/scuo-serializer.js', { type: 'module' }); + worker.onmessage = ev => { + const msg = ev.data; + if ( isInstanceOf(msg, 'Object') === false ) { return; } + if ( msg.what === 'ready!' ) { + worker.onmessage = ev => { this.onmessage(ev); }; + worker.onerror = null; + resolve(worker); + } + }; + worker.onerror = ( ) => { + worker.onmessage = worker.onerror = null; + resolve(null); + }; + worker.postMessage({ what: 'ready?', config: currentConfig }); + } catch(ex) { + console.info(ex); + worker.onmessage = worker.onerror = null; + resolve(null); + } + }); + } + + countdownWorker() { + if ( this.workerTimer !== undefined ) { return; } + this.workerTimer = setTimeout(async ( ) => { + this.workerTimer = undefined; + if ( this.jobs.size !== 0 ) { return; } + const idleTime = Date.now() - this.workerAccessTime; + if ( idleTime < currentConfig.threadTTL ) { + return this.countdownWorker(); + } + const worker = await this.workerPromise; + if ( this.jobs.size !== 0 ) { return; } + this.gcer(this); + if ( worker === null ) { return; } + worker.onmessage = worker.onerror = null; + worker.terminate(); + }, currentConfig.threadTTL); + } + + onmessage(ev) { + const job = ev.data; + const resolve = this.jobs.get(job.id); + if ( resolve === undefined ) { return; } + this.jobs.delete(job.id); + resolve(job.result); + if ( this.jobs.size !== 0 ) { return; } + this.countdownWorker(); + } + + async serialize(data, options) { + this.workerAccessTime = Date.now(); + const worker = await this.workerPromise; + if ( worker === null ) { + const result = serialize(data, options); + this.countdownWorker(); + return result; + } + const id = this.jobIdGenerator++; + return new Promise(resolve => { + const job = { what: 'serialize', id, data, options }; + this.jobs.set(job.id, resolve); + worker.postMessage(job); + }); + } + + async deserialize(data, options) { + this.workerAccessTime = Date.now(); + const worker = await this.workerPromise; + if ( worker === null ) { + const result = deserialize(data, options); + this.countdownWorker(); + return result; + } + const id = this.jobIdGenerator++; + return new Promise(resolve => { + const job = { what: 'deserialize', id, data, options }; + this.jobs.set(job.id, resolve); + worker.postMessage(job); + }); + } +} + +const threads = { + pool: [], + thread(maxPoolSize) { + for ( const thread of this.pool ) { + if ( thread.jobs.size === 0 ) { return thread; } + } + const len = this.pool.length; + if ( len !== 0 && len >= maxPoolSize ) { + if ( len === 1 ) { return this.pool[0]; } + return this.pool.reduce((best, candidate) => + candidate.jobs.size < best.jobs.size ? candidate : best + ); + } + const thread = new Thread(thread => { + const pos = this.pool.indexOf(thread); + if ( pos === -1 ) { return; } + this.pool.splice(pos, 1); + }); + this.pool.push(thread); + return thread; + }, +}; + +export async function serializeAsync(data, options = {}) { + const maxThreadCount = options.multithreaded || 0; + if ( maxThreadCount === 0 ) { + return serialize(data, options); + } + const result = await threads + .thread(maxThreadCount) + .serialize(data, options); + if ( result !== undefined ) { return result; } + return serialize(data, options); +} + +export async function deserializeAsync(data, options = {}) { + const maxThreadCount = options.multithreaded || 0; + if ( maxThreadCount === 0 ) { + return deserialize(data, options); + } + const result = await threads + .thread(maxThreadCount) + .deserialize(data, options); + if ( result !== undefined ) { return result; } + return deserialize(data, options); +} + +/******************************************************************************* + * + * Worker-only code + * + * */ + +if ( isInstanceOf(globalThis, 'DedicatedWorkerGlobalScope') ) { + globalThis.onmessage = ev => { + const msg = ev.data; + switch ( msg.what ) { + case 'ready?': + setConfig(msg.config); + globalThis.postMessage({ what: 'ready!' }); + break; + case 'serialize': + case 'deserialize': { + const result = msg.what === 'serialize' + ? serialize(msg.data, msg.options) + : deserialize(msg.data); + globalThis.postMessage({ id: msg.id, result }); + break; + } + } + }; +} + +/******************************************************************************/ diff --git a/src/js/start.js b/src/js/start.js index 5762619b6d2a4..877d909c4d92d 100644 --- a/src/js/start.js +++ b/src/js/start.js @@ -139,7 +139,7 @@ const initializeTabs = async ( ) => { // https://www.reddit.com/r/uBlockOrigin/comments/s7c9go/ // Abort suspending network requests when uBO is merely being installed. -const onVersionReady = lastVersion => { +const onVersionReady = async lastVersion => { if ( lastVersion === vAPI.app.version ) { return; } vAPI.storage.set({ @@ -155,6 +155,11 @@ const onVersionReady = lastVersion => { return; } + // Migrate cache storage + if ( lastVersionInt < vAPI.app.intFromVersion('1.56.1b1') ) { + await cacheStorage.migrate(µb.hiddenSettings.cacheStorageAPI); + } + // Since built-in resources may have changed since last version, we // force a reload of all resources. redirectEngine.invalidateResourcesSelfie(io); @@ -252,19 +257,19 @@ const onUserSettingsReady = fetched => { // Wait for removal of invalid cached data to be completed. const onCacheSettingsReady = async (fetched = {}) => { + let selfieIsInvalid = false; if ( fetched.compiledMagic !== µb.systemSettings.compiledMagic ) { µb.compiledFormatChanged = true; - µb.selfieIsInvalid = true; + selfieIsInvalid = true; ubolog(`Serialized format of static filter lists changed`); } if ( fetched.selfieMagic !== µb.systemSettings.selfieMagic ) { - µb.selfieIsInvalid = true; + selfieIsInvalid = true; ubolog(`Serialized format of selfie changed`); } - if ( µb.selfieIsInvalid ) { - µb.selfieManager.destroy(); - cacheStorage.set(µb.systemSettings); - } + if ( selfieIsInvalid === false ) { return; } + µb.selfieManager.destroy({ janitor: true }); + cacheStorage.set(µb.systemSettings); }; /******************************************************************************/ @@ -305,10 +310,7 @@ const onHiddenSettingsReady = async ( ) => { } // Maybe override default cache storage - µb.supportStats.cacheBackend = await cacheStorage.select( - µb.hiddenSettings.cacheStorageAPI - ); - ubolog(`Backend storage for cache will be ${µb.supportStats.cacheBackend}`); + µb.supportStats.cacheBackend = 'browser.storage.local'; }; /******************************************************************************/ @@ -333,7 +335,6 @@ const onFirstFetchReady = (fetched, adminExtra) => { sessionSwitches.assign(permanentSwitches); onNetWhitelistReady(fetched.netWhitelist, adminExtra); - onVersionReady(fetched.version); }; /******************************************************************************/ @@ -389,23 +390,20 @@ try { const adminExtra = await vAPI.adminStorage.get('toAdd'); ubolog(`Extra admin settings ready ${Date.now()-vAPI.T0} ms after launch`); - // https://github.com/uBlockOrigin/uBlock-issues/issues/1365 - // Wait for onCacheSettingsReady() to be fully ready. - const [ , , lastVersion ] = await Promise.all([ + const lastVersion = await vAPI.storage.get(createDefaultProps()).then(async fetched => { + ubolog(`Version ready ${Date.now()-vAPI.T0} ms after launch`); + await onVersionReady(fetched.version); + return fetched; + }).then(fetched => { + ubolog(`First fetch ready ${Date.now()-vAPI.T0} ms after launch`); + onFirstFetchReady(fetched, adminExtra); + return fetched.version; + }); + + await Promise.all([ µb.loadSelectedFilterLists().then(( ) => { ubolog(`List selection ready ${Date.now()-vAPI.T0} ms after launch`); }), - cacheStorage.get( - { compiledMagic: 0, selfieMagic: 0 } - ).then(fetched => { - ubolog(`Cache magic numbers ready ${Date.now()-vAPI.T0} ms after launch`); - onCacheSettingsReady(fetched); - }), - vAPI.storage.get(createDefaultProps()).then(fetched => { - ubolog(`First fetch ready ${Date.now()-vAPI.T0} ms after launch`); - onFirstFetchReady(fetched, adminExtra); - return fetched.version; - }), µb.loadUserSettings().then(fetched => { ubolog(`User settings ready ${Date.now()-vAPI.T0} ms after launch`); onUserSettingsReady(fetched); @@ -413,6 +411,10 @@ try { µb.loadPublicSuffixList().then(( ) => { ubolog(`PSL ready ${Date.now()-vAPI.T0} ms after launch`); }), + cacheStorage.get({ compiledMagic: 0, selfieMagic: 0 }).then(bin => { + ubolog(`Cache magic numbers ready ${Date.now()-vAPI.T0} ms after launch`); + onCacheSettingsReady(bin); + }), ]); // https://github.com/uBlockOrigin/uBlock-issues/issues/1547 diff --git a/src/js/static-ext-filtering-db.js b/src/js/static-ext-filtering-db.js index 64a9c8df0177e..e669c1e11a3ce 100644 --- a/src/js/static-ext-filtering-db.js +++ b/src/js/static-ext-filtering-db.js @@ -141,8 +141,8 @@ const StaticExtFilteringHostnameDB = class { toSelfie() { return { version: this.version, - hostnameToSlotIdMap: Array.from(this.hostnameToSlotIdMap), - regexToSlotIdMap: Array.from(this.regexToSlotIdMap), + hostnameToSlotIdMap: this.hostnameToSlotIdMap, + regexToSlotIdMap: this.regexToSlotIdMap, hostnameSlots: this.hostnameSlots, strSlots: this.strSlots, size: this.size @@ -150,11 +150,11 @@ const StaticExtFilteringHostnameDB = class { } fromSelfie(selfie) { - if ( selfie === undefined ) { return; } - this.hostnameToSlotIdMap = new Map(selfie.hostnameToSlotIdMap); + if ( typeof selfie !== 'object' || selfie === null ) { return; } + this.hostnameToSlotIdMap = selfie.hostnameToSlotIdMap; // Regex-based lookup available in uBO 1.47.0 and above - if ( Array.isArray(selfie.regexToSlotIdMap) ) { - this.regexToSlotIdMap = new Map(selfie.regexToSlotIdMap); + if ( selfie.regexToSlotIdMap ) { + this.regexToSlotIdMap = selfie.regexToSlotIdMap; } this.hostnameSlots = selfie.hostnameSlots; this.strSlots = selfie.strSlots; diff --git a/src/js/static-ext-filtering.js b/src/js/static-ext-filtering.js index 8a2905eb6969c..e616e6350e927 100644 --- a/src/js/static-ext-filtering.js +++ b/src/js/static-ext-filtering.js @@ -26,9 +26,8 @@ import cosmeticFilteringEngine from './cosmetic-filtering.js'; import htmlFilteringEngine from './html-filtering.js'; import httpheaderFilteringEngine from './httpheader-filtering.js'; -import io from './assets.js'; -import logger from './logger.js'; import scriptletFilteringEngine from './scriptlet-filtering.js'; +import logger from './logger.js'; /******************************************************************************* @@ -147,34 +146,24 @@ staticExtFilteringEngine.fromCompiledContent = function(reader, options) { htmlFilteringEngine.fromCompiledContent(reader, options); }; -staticExtFilteringEngine.toSelfie = function(path) { - return io.put( - `${path}/main`, - JSON.stringify({ - cosmetic: cosmeticFilteringEngine.toSelfie(), - scriptlets: scriptletFilteringEngine.toSelfie(), - httpHeaders: httpheaderFilteringEngine.toSelfie(), - html: htmlFilteringEngine.toSelfie(), - }) - ); +staticExtFilteringEngine.toSelfie = function() { + return { + cosmetic: cosmeticFilteringEngine.toSelfie(), + scriptlets: scriptletFilteringEngine.toSelfie(), + httpHeaders: httpheaderFilteringEngine.toSelfie(), + html: htmlFilteringEngine.toSelfie(), + }; }; -staticExtFilteringEngine.fromSelfie = function(path) { - return io.get(`${path}/main`).then(details => { - let selfie; - try { - selfie = JSON.parse(details.content); - } catch (ex) { - } - if ( selfie instanceof Object === false ) { return false; } - cosmeticFilteringEngine.fromSelfie(selfie.cosmetic); - httpheaderFilteringEngine.fromSelfie(selfie.httpHeaders); - htmlFilteringEngine.fromSelfie(selfie.html); - if ( scriptletFilteringEngine.fromSelfie(selfie.scriptlets) === false ) { - return false; - } - return true; - }); +staticExtFilteringEngine.fromSelfie = async function(selfie) { + if ( typeof selfie !== 'object' || selfie === null ) { return false; } + cosmeticFilteringEngine.fromSelfie(selfie.cosmetic); + httpheaderFilteringEngine.fromSelfie(selfie.httpHeaders); + htmlFilteringEngine.fromSelfie(selfie.html); + if ( scriptletFilteringEngine.fromSelfie(selfie.scriptlets) === false ) { + return false; + } + return true; }; /******************************************************************************/ diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index 9189c01bc9604..86d042248c253 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -28,7 +28,6 @@ import { queueTask, dropTask } from './tasks.js'; import BidiTrieContainer from './biditrie.js'; import HNTrieContainer from './hntrie.js'; -import { sparseBase64 } from './base64-custom.js'; import { CompiledListReader } from './static-filtering-io.js'; import * as sfp from './static-filtering-parser.js'; @@ -493,17 +492,13 @@ const filterDataReset = ( ) => { filterData.fill(0); filterDataWritePtr = 2; }; -const filterDataToSelfie = ( ) => { - return JSON.stringify(Array.from(filterData.subarray(0, filterDataWritePtr))); -}; +const filterDataToSelfie = ( ) => + filterData.subarray(0, filterDataWritePtr); + const filterDataFromSelfie = selfie => { - if ( typeof selfie !== 'string' || selfie === '' ) { return false; } - const data = JSON.parse(selfie); - if ( Array.isArray(data) === false ) { return false; } - filterDataGrow(data.length); - filterDataWritePtr = data.length; - filterData.set(data); - filterDataShrink(); + if ( selfie instanceof Int32Array === false ) { return false; } + filterData = selfie; + filterDataWritePtr = selfie.length; return true; }; @@ -519,53 +514,15 @@ const filterRefsReset = ( ) => { filterRefs.fill(null); filterRefsWritePtr = 1; }; -const filterRefsToSelfie = ( ) => { - const refs = []; - for ( let i = 0; i < filterRefsWritePtr; i++ ) { - const v = filterRefs[i]; - if ( v instanceof RegExp ) { - refs.push({ t: 1, s: v.source, f: v.flags }); - continue; - } - if ( Array.isArray(v) ) { - refs.push({ t: 2, v }); - continue; - } - if ( typeof v !== 'object' || v === null ) { - refs.push({ t: 0, v }); - continue; - } - const out = Object.create(null); - for ( const prop of Object.keys(v) ) { - const value = v[prop]; - out[prop] = prop.startsWith('$') - ? (typeof value === 'string' ? '' : null) - : value; - } - refs.push({ t: 3, v: out }); - } - return JSON.stringify(refs); -}; +const filterRefsToSelfie = ( ) => + filterRefs.slice(0, filterRefsWritePtr); + const filterRefsFromSelfie = selfie => { - if ( typeof selfie !== 'string' || selfie === '' ) { return false; } - const refs = JSON.parse(selfie); - if ( Array.isArray(refs) === false ) { return false; } - for ( let i = 0; i < refs.length; i++ ) { - const v = refs[i]; - switch ( v.t ) { - case 0: - case 2: - case 3: - filterRefs[i] = v.v; - break; - case 1: - filterRefs[i] = new RegExp(v.s, v.f); - break; - default: - throw new Error('Unknown filter reference!'); - } + if ( Array.isArray(selfie) === false ) { return false; } + for ( let i = 0, n = selfie.length; i < n; i++ ) { + filterRefs[i] = selfie[i]; } - filterRefsWritePtr = refs.length; + filterRefsWritePtr = selfie.length; return true; }; @@ -3121,14 +3078,11 @@ const urlTokenizer = new (class { } toSelfie() { - return sparseBase64.encode( - this.knownTokens.buffer, - this.knownTokens.byteLength - ); + return this.knownTokens; } fromSelfie(selfie) { - return sparseBase64.decode(selfie, this.knownTokens.buffer); + this.knownTokens = selfie; } // https://github.com/chrisaljoudi/uBlock/issues/1118 @@ -4674,52 +4628,24 @@ FilterContainer.prototype.optimize = function(throttle = 0) { /******************************************************************************/ -FilterContainer.prototype.toSelfie = async function(storage, path) { - if ( typeof storage !== 'object' || storage === null ) { return; } - if ( typeof storage.put !== 'function' ) { return; } - +FilterContainer.prototype.toSelfie = function() { bidiTrieOptimize(true); - keyvalStore.setItem( - 'SNFE.origHNTrieContainer.trieDetails', + keyvalStore.setItem('SNFE.origHNTrieContainer.trieDetails', origHNTrieContainer.optimize() ); - - return Promise.all([ - storage.put( - `${path}/destHNTrieContainer`, - destHNTrieContainer.serialize(sparseBase64) - ), - storage.put( - `${path}/origHNTrieContainer`, - origHNTrieContainer.serialize(sparseBase64) - ), - storage.put( - `${path}/bidiTrie`, - bidiTrie.serialize(sparseBase64) - ), - storage.put( - `${path}/filterData`, - filterDataToSelfie() - ), - storage.put( - `${path}/filterRefs`, - filterRefsToSelfie() - ), - storage.put( - `${path}/main`, - JSON.stringify({ - version: this.selfieVersion, - processedFilterCount: this.processedFilterCount, - acceptedCount: this.acceptedCount, - discardedCount: this.discardedCount, - bitsToBucket: Array.from(this.bitsToBucket).map(kv => { - kv[1] = Array.from(kv[1]); - return kv; - }), - urlTokenizer: urlTokenizer.toSelfie(), - }) - ) - ]); + return { + version: this.selfieVersion, + processedFilterCount: this.processedFilterCount, + acceptedCount: this.acceptedCount, + discardedCount: this.discardedCount, + bitsToBucket: this.bitsToBucket, + urlTokenizer: urlTokenizer.toSelfie(), + destHNTrieContainer: destHNTrieContainer.toSelfie(), + origHNTrieContainer: origHNTrieContainer.toSelfie(), + bidiTrie: bidiTrie.toSelfie(), + filterData: filterDataToSelfie(), + filterRefs: filterRefsToSelfie(), + }; }; FilterContainer.prototype.serialize = async function() { @@ -4735,53 +4661,27 @@ FilterContainer.prototype.serialize = async function() { /******************************************************************************/ -FilterContainer.prototype.fromSelfie = async function(storage, path) { - if ( typeof storage !== 'object' || storage === null ) { return; } - if ( typeof storage.get !== 'function' ) { return; } +FilterContainer.prototype.fromSelfie = async function(selfie) { + if ( typeof selfie !== 'object' || selfie === null ) { return; } this.reset(); this.notReady = true; - const results = await Promise.all([ - storage.get(`${path}/main`), - storage.get(`${path}/destHNTrieContainer`).then(details => - destHNTrieContainer.unserialize(details.content, sparseBase64) - ), - storage.get(`${path}/origHNTrieContainer`).then(details => - origHNTrieContainer.unserialize(details.content, sparseBase64) - ), - storage.get(`${path}/bidiTrie`).then(details => - bidiTrie.unserialize(details.content, sparseBase64) - ), - storage.get(`${path}/filterData`).then(details => - filterDataFromSelfie(details.content) - ), - storage.get(`${path}/filterRefs`).then(details => - filterRefsFromSelfie(details.content) - ), - ]); - + const results = [ + destHNTrieContainer.fromSelfie(selfie.destHNTrieContainer), + origHNTrieContainer.fromSelfie(selfie.origHNTrieContainer), + bidiTrie.fromSelfie(selfie.bidiTrie), + filterDataFromSelfie(selfie.filterData), + filterRefsFromSelfie(selfie.filterRefs), + ]; if ( results.slice(1).every(v => v === true) === false ) { return false; } - const details = results[0]; - if ( typeof details !== 'object' || details === null ) { return false; } - if ( typeof details.content !== 'string' ) { return false; } - if ( details.content === '' ) { return false; } - let selfie; - try { - selfie = JSON.parse(details.content); - } catch (ex) { - } - if ( typeof selfie !== 'object' || selfie === null ) { return false; } if ( selfie.version !== this.selfieVersion ) { return false; } this.processedFilterCount = selfie.processedFilterCount; this.acceptedCount = selfie.acceptedCount; this.discardedCount = selfie.discardedCount; - this.bitsToBucket = new Map(selfie.bitsToBucket.map(kv => { - kv[1] = new Map(kv[1]); - return kv; - })); + this.bitsToBucket = selfie.bitsToBucket; urlTokenizer.fromSelfie(selfie.urlTokenizer); // If this point is never reached, it means the internal state is diff --git a/src/js/storage.js b/src/js/storage.js index 5325a200f92c1..68b52209da820 100644 --- a/src/js/storage.js +++ b/src/js/storage.js @@ -38,7 +38,6 @@ import µb from './background.js'; import { hostnameFromURI } from './uri-utils.js'; import { i18n, i18n$ } from './i18n.js'; import { redirectEngine } from './redirect-engine.js'; -import { sparseBase64 } from './base64-custom.js'; import { ubolog, ubologSet } from './console.js'; import * as sfp from './static-filtering-parser.js'; @@ -974,7 +973,7 @@ onBroadcast(msg => { /******************************************************************************/ µb.getCompiledFilterList = async function(assetKey) { - const compiledPath = 'compiled/' + assetKey; + const compiledPath = `compiled/${assetKey}`; // https://github.com/uBlockOrigin/uBlock-issues/issues/1365 // Verify that the list version matches that of the current compiled @@ -983,11 +982,10 @@ onBroadcast(msg => { this.compiledFormatChanged === false && this.badLists.has(assetKey) === false ) { - const compiledDetails = await io.get(compiledPath); + const content = await io.fromCache(compiledPath); const compilerVersion = `${this.systemSettings.compiledMagic}\n`; - if ( compiledDetails.content.startsWith(compilerVersion) ) { - compiledDetails.assetKey = assetKey; - return compiledDetails; + if ( content.startsWith(compilerVersion) ) { + return { assetKey, content }; } } @@ -1017,7 +1015,7 @@ onBroadcast(msg => { assetKey, trustedSource: this.isTrustedList(assetKey), }); - io.put(compiledPath, compiledContent); + io.toCache(compiledPath, compiledContent); return { assetKey, content: compiledContent }; }; @@ -1046,7 +1044,7 @@ onBroadcast(msg => { /******************************************************************************/ µb.removeCompiledFilterList = function(assetKey) { - io.remove('compiled/' + assetKey); + io.remove(`compiled/${assetKey}`); }; µb.removeFilterList = function(assetKey) { @@ -1173,20 +1171,17 @@ onBroadcast(msg => { const results = await Promise.all(fetchPromises); if ( Array.isArray(results) === false ) { return results; } - let content = ''; + const content = []; for ( let i = 1; i < results.length; i++ ) { const result = results[i]; - if ( - result instanceof Object === false || - typeof result.content !== 'string' || - result.content === '' - ) { - continue; - } - content += '\n\n' + result.content; + if ( result instanceof Object === false ) { continue; } + if ( typeof result.content !== 'string' ) { continue; } + if ( result.content === '' ) { continue; } + content.push(result.content); + } + if ( content.length !== 0 ) { + redirectEngine.resourcesFromString(content.join('\n\n')); } - - redirectEngine.resourcesFromString(content); redirectEngine.selfieFromResources(io); } catch(ex) { ubolog(ex); @@ -1225,8 +1220,8 @@ onBroadcast(msg => { } try { - const result = await io.get(`compiled/${this.pslAssetKey}`); - if ( psl.fromSelfie(result.content, sparseBase64) ) { return; } + const selfie = await io.fromCache(`compiled/${this.pslAssetKey}`); + if ( psl.fromSelfie(selfie) ) { return; } } catch (reason) { ubolog(reason); } @@ -1240,7 +1235,7 @@ onBroadcast(msg => { µb.compilePublicSuffixList = function(content) { const psl = publicSuffixList; psl.parse(content, punycode.toASCII); - io.put(`compiled/${this.pslAssetKey}`, psl.toSelfie(sparseBase64)); + return io.toCache(`compiled/${this.pslAssetKey}`, psl.toSelfie()); }; /******************************************************************************/ @@ -1260,39 +1255,24 @@ onBroadcast(msg => { if ( µb.inMemoryFilters.length !== 0 ) { return; } if ( Object.keys(µb.availableFilterLists).length === 0 ) { return; } await Promise.all([ - io.put( - 'selfie/main', - JSON.stringify({ - magic: µb.systemSettings.selfieMagic, - availableFilterLists: µb.availableFilterLists, - }) - ), - redirectEngine.toSelfie('selfie/redirectEngine'), - staticExtFilteringEngine.toSelfie( - 'selfie/staticExtFilteringEngine' + io.toCache('selfie/main', { + magic: µb.systemSettings.selfieMagic, + availableFilterLists: µb.availableFilterLists, + }), + io.toCache('selfie/staticExtFilteringEngine', + staticExtFilteringEngine.toSelfie() ), - staticNetFilteringEngine.toSelfie(io, - 'selfie/staticNetFilteringEngine' + io.toCache('selfie/staticNetFilteringEngine', + staticNetFilteringEngine.toSelfie() ), ]); lz4Codec.relinquish(); µb.selfieIsInvalid = false; + ubolog(`Selfie was created`); }; const loadMain = async function() { - const details = await io.get('selfie/main'); - if ( - details instanceof Object === false || - typeof details.content !== 'string' || - details.content === '' - ) { - return false; - } - let selfie; - try { - selfie = JSON.parse(details.content); - } catch(ex) { - } + const selfie = await io.fromCache('selfie/main'); if ( selfie instanceof Object === false ) { return false; } if ( selfie.magic !== µb.systemSettings.selfieMagic ) { return false; } if ( selfie.availableFilterLists instanceof Object === false ) { return false; } @@ -1306,12 +1286,11 @@ onBroadcast(msg => { try { const results = await Promise.all([ loadMain(), - redirectEngine.fromSelfie('selfie/redirectEngine'), - staticExtFilteringEngine.fromSelfie( - 'selfie/staticExtFilteringEngine' + io.fromCache('selfie/staticExtFilteringEngine').then(selfie => + staticExtFilteringEngine.fromSelfie(selfie) ), - staticNetFilteringEngine.fromSelfie(io, - 'selfie/staticNetFilteringEngine' + io.fromCache('selfie/staticNetFilteringEngine').then(selfie => + staticNetFilteringEngine.fromSelfie(selfie) ), ]); if ( results.every(v => v) ) { @@ -1325,10 +1304,11 @@ onBroadcast(msg => { return false; }; - const destroy = function() { + const destroy = function(options = {}) { if ( µb.selfieIsInvalid === false ) { - io.remove(/^selfie\//); + io.remove(/^selfie\//, options); µb.selfieIsInvalid = true; + ubolog(`Selfie was removed`); } if ( µb.wakeupReason === 'createSelfie' ) { µb.wakeupReason = ''; @@ -1594,8 +1574,7 @@ onBroadcast(msg => { if ( topic === 'after-asset-updated' ) { // Skip selfie-related content. if ( details.assetKey.startsWith('selfie/') ) { return; } - const cached = typeof details.content === 'string' && - details.content !== ''; + const cached = typeof details.content === 'string' && details.content !== ''; if ( this.availableFilterLists.hasOwnProperty(details.assetKey) ) { if ( cached ) { if ( this.selectedFilterLists.indexOf(details.assetKey) !== -1 ) { @@ -1604,8 +1583,7 @@ onBroadcast(msg => { details.content ); if ( this.badLists.has(details.assetKey) === false ) { - io.put( - 'compiled/' + details.assetKey, + io.toCache(`compiled/${details.assetKey}`, this.compileFilters(details.content, { assetKey: details.assetKey, trustedSource: this.isTrustedList(details.assetKey), diff --git a/src/lib/publicsuffixlist/publicsuffixlist.js b/src/lib/publicsuffixlist/publicsuffixlist.js index 6483c89e2321c..87910d4b0bbdf 100644 --- a/src/lib/publicsuffixlist/publicsuffixlist.js +++ b/src/lib/publicsuffixlist/publicsuffixlist.js @@ -13,8 +13,6 @@ /*! Home: https://github.com/gorhill/publicsuffixlist.js -- GPLv3 APLv2 */ -/* globals WebAssembly, exports:true, module */ - 'use strict'; /******************************************************************************* @@ -70,7 +68,7 @@ const RULES_PTR_SLOT = 100; // 100 / 400 (400-256=144 => 144>128) const SUFFIX_NOT_FOUND_SLOT = 399; // -- / 399 (safe, see above) const CHARDATA_PTR_SLOT = 101; // 101 / 404 const EMPTY_STRING = ''; -const SELFIE_MAGIC = 2; +const SELFIE_MAGIC = 3; let wasmMemory; let pslBuffer32; @@ -499,9 +497,7 @@ const toSelfie = function(encoder) { } return { magic: SELFIE_MAGIC, - buf32: Array.from( - new Uint32Array(pslBuffer8.buffer, 0, pslByteLength >>> 2) - ), + buf32: pslBuffer32.subarray(0, pslByteLength >> 2), }; }; @@ -524,7 +520,7 @@ const fromSelfie = function(selfie, decoder) { } else if ( selfie instanceof Object && selfie.magic === SELFIE_MAGIC && - Array.isArray(selfie.buf32) + selfie.buf32 instanceof Uint32Array ) { byteLength = selfie.buf32.length << 2; allocateBuffers(byteLength);