Skip to content

Commit

Permalink
* Added DSL to describe extraction rules
Browse files Browse the repository at this point in the history
* Allow to overwrite headers in double-fetch to bypass consent dialogs
  • Loading branch information
philipp-classen committed Aug 4, 2022
1 parent 011edac commit 70272c3
Show file tree
Hide file tree
Showing 20 changed files with 1,672 additions and 140 deletions.
8 changes: 6 additions & 2 deletions configs/ghostery.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ module.exports = {
publish: publish.toEdge('browser-core', 'ghostery'),
sourceMaps: false,
format: 'common',
settings: { ...urls,
settings: {
...urls,
channel: 'CH80',
MSGCHANNEL: 'web-extension',
URL_CHANGED_EVENT_DEBOUNCE: 500,
Expand All @@ -33,7 +34,10 @@ module.exports = {
},
},
HUMAN_WEB_LITE_COLLECTOR_VIA_PROXY: 'https://collector-hpn.ghostery.net',
HUMAN_WEB_LITE_COLLECTOR_DIRECT: 'https://collector-hpn.ghostery.net' },
HUMAN_WEB_LITE_COLLECTOR_DIRECT: 'https://collector-hpn.ghostery.net',
HUMAN_WEB_LITE_PATTERNS: 'https://cdn2.ghostery.com/human-web-android/patterns.json',
HUMAN_WEB_LITE_AUTO_TRIGGER: true
},
default_prefs: {
'modules.human-web.enabled': true,
'modules.antitracking.enabled': true,
Expand Down
17 changes: 2 additions & 15 deletions modules/human-web-lite/sources/html-parser.es
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,10 @@
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/

// TODO: here we need something that works in react-native.
// The following code is copied from human-web and probably
// will not work outside of the browser.
//
// If there are problems, maybe one of the libraries mentioned here
// can be used on Mobile:
// * https://stackoverflow.com/q/38343951/783510
//
// Note: In jsdom, this implementation should work:
//
// return new JSDOM(html).window.document;
//
// However, it is hard to use jsdom outside of NodeJs.
// I failed to get it working in the Browser, for example.
//
import window from '../core/globals-window';

// TODO: consider using linkedom here as well
// (Note: we need something that works in react-native)
export default function parseHtml(html) {
if (!parseHtml.domParser) {
parseHtml.domParser = new window.DOMParser();
Expand Down
85 changes: 76 additions & 9 deletions modules/human-web-lite/sources/http.es
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,90 @@

const SECOND = 1000;

// TODO: Can we use the "AbortController" API on Mobile? Otherwise,
// running jobs in background might be difficult because we risk that
// the app will be killed.
export async function anonymousHttpGet(url, { timeout = 15 * SECOND } = {}) {
/**
* Performs a HTTP Get.
*
* Optional:
* - headers: allows to overwrite headers
* - redirect: 'follow' or 'manual' (default; i.e. redirects are errors)
*/
export async function anonymousHttpGet(url, {
headers = null,
redirect = null,
timeout = 15 * SECOND,
} = {}) {
const options = {
credentials: 'omit',
mode: 'no-cors',
redirect: 'manual',
redirect: redirect || 'manual',

// TODO: Or maybe this does work? It is not part of the fetch standard,
// but I have seen it in some react-native examples.
// If it works, it could be used if AbortController is not available.
timeout,
};
const response = await fetch(url, options);
if (!response.ok) {
throw new Error(`Failed to fetch url ${url}: ${response.statusText}`);

// The following code overwrite the headers of the request.
// Note that "fetch" allows to overwrite headers in a simple declarative way,
// but unfortunately it is limited. For example, it is not possible to
// overwrite the cookie headers. The following code will work for all
// type of headers.
//
// The matching logic is not perfect but should be fairly accurate.
// Ideally, we would want to run the handler only for the request that we
// are about to trigger, but not for any other requests to avoid unintended
// side-effects. To mitigate the risk, uninstall the handler at the first
// opportunity: either if it is called or if the request finished
// (and we know the handle will never be called).
let webRequestHandler;
const uninstallHandler = () => {
if (webRequestHandler) {
chrome.webRequest.onBeforeSendHeaders.removeListener(webRequestHandler);
webRequestHandler = null;
}
};
const headerNames = Object.keys(headers || {});
if (headerNames.length > 0) {
webRequestHandler = (details) => {
if (details.url !== url || details.type !== 'xmlhttprequest' || details.method !== 'GET') {
// does that match the request that we intended to trigger
return {};
}

// match: now we can already deregister the listener
// (it should not be executed multiple times)
uninstallHandler();
const normalizedHeaders = headerNames.map(x => x.toLowerCase());

/* eslint-disable no-param-reassign */
details.requestHeaders = details.requestHeaders.filter(
header => !normalizedHeaders.includes(header.name.toLowerCase())
);

headerNames.forEach((name) => {
details.requestHeaders.push({
name,
value: headers[name],
});
});

return {
requestHeaders: details.requestHeaders
};
};
chrome.webRequest.onBeforeSendHeaders.addListener(webRequestHandler, {
urls: [url]
}, ['blocking', 'requestHeaders']);
}

try {
const response = await fetch(url, options);
if (!response.ok) {
throw new Error(`Failed to fetch url ${url}: ${response.statusText}`);
}
uninstallHandler();
return response.text();
} finally {
uninstallHandler();
}
return response.text();
}
31 changes: 26 additions & 5 deletions modules/human-web-lite/sources/human-web-lite.es
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/

import Patterns from './patterns';
import PatternsUpdater from './patterns-updater';
import Sanitizer from './sanitizer';
import UrlAnalyzer from './url-analyzer';
import MessageSender from './message-sender';
Expand All @@ -21,8 +23,16 @@ export default class HumanWebLite {
// to collect data.
this.isActive = false;

this.patterns = new Patterns();
this.patternsUpdater = new PatternsUpdater({
config,
patterns: this.patterns,
storage,
storageKey: 'patterns',
});

this.sanitizer = new Sanitizer(config);
this.urlAnalyzer = new UrlAnalyzer();
this.urlAnalyzer = new UrlAnalyzer(this.patterns);
this.persistedHashes = new PersistedHashes({
storage,
storageKey: 'deduplication_hashes',
Expand All @@ -32,16 +42,15 @@ export default class HumanWebLite {
this.messageSender = new MessageSender(this.duplicateDetector, hpn);
this.searchExtractor = new SearchExtractor({
config,
patterns: this.patterns,
sanitizer: this.sanitizer,
persistedHashes: this.persistedHashes,
});
this.jobScheduler = new JobScheduler(this.messageSender, this.searchExtractor);
}

async init() {
// TODO: In a feature-complete implementation, you would need
// to have a mechanism to keep the extraction patterns up-to-date.
// As we have hard-coded patterns, there is nothing to do here.
await this.patternsUpdater.init();
this.isActive = true;
}

Expand Down Expand Up @@ -78,7 +87,19 @@ export default class HumanWebLite {
return true;
}

processPendingJobs() {
async processPendingJobs() {
await this._ensurePatternsAreUpToDate();
return this.jobScheduler.processPendingJobs();
}

async _ensurePatternsAreUpToDate() {
// Currently, the PatternsUpdater needs to be externally triggered.
// This implementation detail could be avoided, if the PatternsUpdater
// could use a browser API like timers in persistent background pages
// or the Alert API (Manifest V3).
// The "update" function is a quick operation unless for the rare
// situation that the patterns are outdated and need to be fetched.
// Thus, there should be no harm in calling it here.
await this.patternsUpdater.update();
}
}
Loading

0 comments on commit 70272c3

Please sign in to comment.