-
Notifications
You must be signed in to change notification settings - Fork 93
/
adblocker.ts
676 lines (592 loc) · 21.4 KB
/
adblocker.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
/*!
* Copyright (c) 2017-present Cliqz GmbH. All rights reserved.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
import { Browser, Runtime, WebRequest, WebNavigation } from 'webextension-polyfill-ts';
import { parse } from 'tldts-experimental';
import {
FiltersEngine,
HTMLSelector,
isUTF8,
Request,
StreamingHtmlFilter,
} from '@cliqz/adblocker';
import { IBackgroundCallback, IMessageFromBackground } from '@cliqz/adblocker-content';
export type OnBeforeRequestDetailsType = Pick<
WebRequest.OnBeforeRequestDetailsType,
'url' | 'type' | 'requestId' | 'tabId' | 'originUrl' | 'documentUrl'
> & {
initiator?: string; // Chromium only
};
type OnHeadersReceivedDetailsType = Pick<
WebRequest.OnHeadersReceivedDetailsType,
'responseHeaders' | 'url' | 'type' | 'tabId' | 'requestId'
>;
type StreamFilter = WebRequest.StreamFilter & {
onstart: (event: any) => void;
ondata: (event: { data: ArrayBuffer }) => void;
onstop: (event: any) => void;
onerror: (event: any) => void;
};
function isFirefox() {
try {
return navigator.userAgent.indexOf('Firefox') !== -1;
} catch (e) {
return false;
}
}
/**
* There are different ways to inject scriptlets ("push" vs "pull").
* This function should decide based on the environment what to use:
*
* 1) "Pushing" means the adblocker will listen on "onCommitted" events
* and then execute scripts by running the tabs.executeScript API.
* 2) "Pulling" means the adblocker will inject a content script, which
* runs before the page loads (and on the DOM changes), fetches
* scriplets from the background and runs them.
*
* Note:
* - the "push" model requires permission to the webNavigation API.
* If that is not available, the implementation will fall back to the
* "pull" model, which does not have this requirement.
*/
function usePushScriptsInjection() {
// There is no fundamental reason why it should not work on Firefox,
// but given that there are no known issues with Firefox, let's keep
// the old, proven technique until there is evidence that changes
// are needed.
//
// Take YouTube as an example: on Chrome (or forks like Edge), the adblocker
// will sometimes fail to block ads if you reload the page multiple times;
// on Firefox, the same steps do not seem to trigger any ads.
return !isFirefox();
}
const USE_PUSH_SCRIPTS_INJECTION = usePushScriptsInjection();
/**
* Create an instance of `Request` from WebRequest details.
*/
export function fromWebRequestDetails(details: OnBeforeRequestDetailsType): Request {
const sourceUrl = details.initiator || details.originUrl || details.documentUrl;
return Request.fromRawDetails(
sourceUrl
? {
_originalRequestDetails: details,
requestId: details.requestId,
sourceUrl,
tabId: details.tabId,
type: details.type,
url: details.url,
}
: {
_originalRequestDetails: details,
requestId: details.requestId,
tabId: details.tabId,
type: details.type,
url: details.url,
},
);
}
/**
* Helper used when injecting custom CSP headers to update `responseHeaders`.
*/
export function updateResponseHeadersWithCSP(
details: OnHeadersReceivedDetailsType,
policies: string | undefined,
): WebRequest.BlockingResponse {
if (policies === undefined) {
return {};
}
let responseHeaders = details.responseHeaders || [];
const CSP_HEADER_NAME = 'content-security-policy';
// Collect existing CSP headers from response
responseHeaders.forEach(({ name, value }) => {
if (name.toLowerCase() === CSP_HEADER_NAME) {
policies += `; ${value}`;
}
});
// Remove all CSP headers from response
responseHeaders = responseHeaders.filter(({ name }) => name.toLowerCase() !== CSP_HEADER_NAME);
// Add updated CSP header
responseHeaders.push({ name: CSP_HEADER_NAME, value: policies });
return { responseHeaders };
}
/**
* Enable stream HTML filter on request `id` using `rules`.
*/
export function filterRequestHTML(
filterResponseData: Browser['webRequest']['filterResponseData'],
{ id }: { id: string },
rules: HTMLSelector[],
): void {
// Create filter to observe loading of resource
const filter = filterResponseData(id) as StreamFilter;
const decoder = new TextDecoder();
const encoder = new TextEncoder();
const htmlFilter = new StreamingHtmlFilter(rules);
const teardown = (event: { data?: ArrayBuffer }) => {
// Before disconnecting our streaming filter, we need to be extra careful
// and make sure that no data remains in either our streaming `TextDecoder`
// instance or the HTML filterer.
//
// In case any data remains, we write it to filter.
try {
const remaining = htmlFilter.write(decoder.decode()) + htmlFilter.flush();
if (remaining.length !== 0) {
filter.write(encoder.encode(remaining));
}
} catch (ex) {
// If we reach this point, there is probably no way we can recover...
console.error('Failed to flush HTML filterer', ex);
}
// If latest event had some data attached (i.e. 'ondata' event), we make
// sure to flush it through the filterer before disconnecting.
if (event.data !== undefined) {
filter.write(event.data);
}
// Disconnect streaming filter.
filter.disconnect();
};
filter.ondata = (event) => {
// On any chunk of data we implementa very fast UTF-8 validity check to make
// sure that we will be able to decode it. Note that in theory it should be
// possible that a chunk ends on the boundary of a multi-byte UTF-8 code and
// this check would fail?
if (isUTF8(new Uint8Array(event.data)) === false) {
return teardown(event);
}
try {
filter.write(encoder.encode(htmlFilter.write(decoder.decode(event.data, { stream: true }))));
} catch (ex) {
// If we fail to decode a chunk, we need to be extra conservative and stop
// listening to streaming response. Teardown takes care of flushing any
// data remaining in the pipeline and disconnecting the listener.
return teardown(event);
}
};
filter.onstop = () => {
teardown({});
};
}
/**
* This abstraction takes care of blocking in one instance of `browser` (in
* practice this would be `chrome` or `browser` global in the WebExtension
* context).
*/
export class BlockingContext {
private readonly onBeforeRequest: (
details: OnBeforeRequestDetailsType,
) => WebRequest.BlockingResponse;
private readonly onHeadersReceived: (
details: OnHeadersReceivedDetailsType,
) => WebRequest.BlockingResponse;
private readonly onRuntimeMessage: (
msg: IBackgroundCallback & { action?: string },
sender: Runtime.MessageSender,
) => Promise<IMessageFromBackground | {}>;
private readonly onCommittedHandler:
| ((details: WebNavigation.OnCommittedDetailsType) => void)
| undefined;
constructor(
private readonly browser: Browser,
private readonly blocker: WebExtensionBlocker,
) {
this.onBeforeRequest = (details) => blocker.onBeforeRequest(browser, details);
this.onHeadersReceived = (details) => blocker.onHeadersReceived(browser, details);
this.onRuntimeMessage = (msg, sender) => blocker.onRuntimeMessage(browser, msg, sender);
if (
this.blocker.config.enablePushInjectionsOnNavigationEvents === true &&
USE_PUSH_SCRIPTS_INJECTION
) {
if (this.browser.webNavigation?.onCommitted) {
this.onCommittedHandler = (details) => blocker.onCommittedHandler(browser, details);
} else {
console.warn(
'Consider adding the "webNavigation" permission in the manifest to improve the reliability of the adblocker. ' +
'If you do not want to see this warning, turn off the "enablePushInjectionsOnNavigationEvents" flag.',
);
}
}
}
public enable() {
if (this.blocker.config.loadNetworkFilters === true && this.browser.webRequest !== undefined) {
this.browser.webRequest.onBeforeRequest.addListener(
this.onBeforeRequest,
{ urls: ['<all_urls>'] },
['blocking'],
);
this.browser.webRequest.onHeadersReceived.addListener(
this.onHeadersReceived,
{ urls: ['<all_urls>'], types: ['main_frame'] },
['blocking', 'responseHeaders'],
);
}
// Start listening to messages coming from the content-script
if (
this.blocker.config.loadCosmeticFilters === true &&
this.browser.runtime !== undefined &&
this.browser.runtime.onMessage !== undefined
) {
this.browser.runtime.onMessage.addListener(this.onRuntimeMessage);
}
if (this.onCommittedHandler) {
this.browser.webNavigation.onCommitted.addListener(this.onCommittedHandler);
}
}
public disable(): void {
if (this.browser.webRequest !== undefined) {
this.browser.webRequest.onBeforeRequest.removeListener(this.onBeforeRequest);
this.browser.webRequest.onHeadersReceived.removeListener(this.onHeadersReceived);
}
if (this.browser.runtime !== undefined && this.browser.runtime.onMessage !== undefined) {
this.browser.runtime.onMessage.removeListener(this.onRuntimeMessage);
}
if (this.onCommittedHandler) {
this.browser.webNavigation.onCommitted.removeListener(this.onCommittedHandler);
}
}
get pushInjectionsActive() {
return this.onCommittedHandler !== undefined;
}
}
/**
* Wrap `FiltersEngine` into a WebExtension-friendly helper class. It exposes
* methods to interface with WebExtension APIs needed to block ads.
*/
export class WebExtensionBlocker extends FiltersEngine {
private readonly contexts: WeakMap<Browser, BlockingContext> = new WeakMap();
// ----------------------------------------------------------------------- //
// Helpers to enable and disable blocking for 'browser'
// ----------------------------------------------------------------------- //
public enableBlockingInBrowser(browser: Browser): BlockingContext {
let context: undefined | BlockingContext = this.contexts.get(browser);
if (context !== undefined) {
return context;
}
// Create new blocking context for `browser`
context = new BlockingContext(browser, this);
this.contexts.set(browser, context);
context.enable();
return context;
}
public disableBlockingInBrowser(browser: Browser): void {
const context: undefined | BlockingContext = this.contexts.get(browser);
if (context === undefined) {
throw new Error('Trying to disable blocking which was not enabled');
}
this.contexts.delete(browser);
context.disable();
}
public onCommittedHandler(
browser: Browser,
details: WebNavigation.OnCommittedDetailsType,
): void {
const { hostname, domain } = parse(details.url);
if (!hostname) {
return;
}
// Find the scriptlets to run and execute them as soon as possible.
//
// If possible, everything in this path should be kept synchronously,
// since the scriptlets will attempt to patch the website while it is
// already loading. Every additional asynchronous step increases the risk
// of losing the race (i.e. that the patching is too late to have an effect)
const { active, scripts } = this.getCosmeticsFilters({
url: details.url,
hostname,
domain: domain || '',
getBaseRules: false,
getInjectionRules: true,
getExtendedRules: false,
getRulesFromDOM: false,
getRulesFromHostname: true,
});
if (active === false) {
return;
}
if (scripts.length > 0) {
this.executeScriptlets(browser, details.tabId, scripts);
}
}
public isBlockingEnabled(browser: Browser): boolean {
return this.contexts.has(browser);
}
private pushInjectionsActive(browser: Browser): boolean {
const context = this.contexts.get(browser);
if (!context) {
// This means the browser instance is not controlled by the library directly.
// For instance, if there is another wrapping layer on top (e.g. Ghostery).
return false;
}
return context.pushInjectionsActive;
}
// ----------------------------------------------------------------------- //
// WebExtensionBlocker-specific additions to FiltersEngine
//
// Note: some of these methods internally require access to the 'browser'
// global in order to perform their function. Because WebExtensionBlocker can
// be registered in multiple ones (in theory), we do not want to depend either
// on the global object, or a single instance of 'browser' stored internally
// (except as part of a BlockingContext which binds one 'browser' object with
// a WebExtensionBlocker object to perform blocking in this context), so an
// extra 'browser' argument is often needed.
// ----------------------------------------------------------------------- //
/**
* This methods takes care of optionally performing HTML filtering.
*
* This can only be done if:
* 1. Request is 'main_frame'
* 2. `enableHtmlFiltering` is set to `true`.
* 3. `browser.webRequest.filterResponseData` (Firefox only!).
* 4. `TextEncoder` and `TextDecoder` are available.
*/
public performHTMLFiltering(browser: Browser, request: Request): void {
if (
this.config.enableHtmlFiltering === true &&
browser.webRequest !== undefined &&
browser.webRequest.filterResponseData !== undefined &&
request.isMainFrame() === true &&
typeof TextDecoder !== 'undefined' &&
typeof TextEncoder !== 'undefined'
) {
const htmlFilters = this.getHtmlFilters(request);
if (htmlFilters.length !== 0) {
filterRequestHTML(browser.webRequest.filterResponseData, request, htmlFilters);
}
}
}
private handleRuntimeMessage = async (
browser: Browser,
msg: IBackgroundCallback & { action?: string },
sender: Runtime.MessageSender,
sendResponse: (response: IMessageFromBackground) => void,
): Promise<void> => {
const promises: Promise<void>[] = [];
// Make sure we only listen to messages coming from our content-script
// based on the value of `action`.
if (msg.action !== 'getCosmeticsFilters') {
return;
}
if (sender.tab === undefined) {
throw new Error('required "sender.tab" information is not available');
}
if (sender.tab.id === undefined) {
throw new Error('required "sender.tab.id" information is not available');
}
if (sender.frameId === undefined) {
throw new Error('required "sender.frameId" information is not available');
}
// Extract hostname from sender's URL
const { url = '', frameId } = sender;
const parsed = parse(url);
const hostname = parsed.hostname || '';
const domain = parsed.domain || '';
// Once per tab/page load we inject base stylesheets. These are always
// the same for all frames of a given page because they do not depend on
// a particular domain and cannot be cancelled using unhide rules.
// Because of this, we specify `allFrames: true` when injecting them so
// that we do not need to perform this operation for sub-frames.
if (frameId === 0 && msg.lifecycle === 'start') {
const { active, styles } = this.getCosmeticsFilters({
domain,
hostname,
url,
classes: msg.classes,
hrefs: msg.hrefs,
ids: msg.ids,
// This needs to be done only once per tab
getBaseRules: true,
getInjectionRules: false,
getExtendedRules: false,
getRulesFromDOM: false,
getRulesFromHostname: false,
});
if (active === false) {
return;
}
promises.push(
this.injectStylesWebExtension(browser, styles, {
tabId: sender.tab.id,
allFrames: true,
}),
);
}
// Separately, requests cosmetics which depend on the page it self
// (either because of the hostname or content of the DOM). Content script
// logic is responsible for returning information about lists of classes,
// ids and hrefs observed in the DOM. MutationObserver is also used to
// make sure we can react to changes.
{
const { active, styles, scripts, extended } = this.getCosmeticsFilters({
domain,
hostname,
url,
classes: msg.classes,
hrefs: msg.hrefs,
ids: msg.ids,
// This needs to be done only once per frame
getBaseRules: false,
getInjectionRules: msg.lifecycle === 'start',
getExtendedRules: msg.lifecycle === 'start',
getRulesFromHostname: msg.lifecycle === 'start',
// This will be done every time we get information about DOM mutation
getRulesFromDOM: msg.lifecycle === 'dom-update',
});
if (active === false) {
return;
}
promises.push(
this.injectStylesWebExtension(browser, styles, { tabId: sender.tab.id, frameId }),
);
// Inject scripts from content script
if (scripts.length !== 0 && !this.pushInjectionsActive(browser)) {
sendResponse({
active,
extended,
scripts,
styles: '',
});
}
}
await Promise.all(promises);
};
/**
* Deal with request cancellation (`{ cancel: true }`) and redirection (`{ redirectUrl: '...' }`).
*/
public onBeforeRequest = (
browser: Browser,
details: OnBeforeRequestDetailsType,
): WebRequest.BlockingResponse => {
const request = fromWebRequestDetails(details);
if (this.config.guessRequestTypeFromUrl === true && request.type === 'other') {
request.guessTypeOfRequest();
}
if (request.isMainFrame()) {
this.performHTMLFiltering(browser, request);
return {};
}
const { redirect, match } = this.match(request);
if (redirect !== undefined) {
return { redirectUrl: redirect.dataUrl };
} else if (match === true) {
return { cancel: true };
}
return {};
};
public onHeadersReceived = (
_: Browser,
details: OnHeadersReceivedDetailsType,
): WebRequest.BlockingResponse => {
return updateResponseHeadersWithCSP(
details,
this.getCSPDirectives(fromWebRequestDetails(details)),
);
};
public onRuntimeMessage = (
browser: Browser,
msg: IBackgroundCallback & { action?: string },
sender: Runtime.MessageSender,
): Promise<IMessageFromBackground | {}> => {
return new Promise((resolve, reject) => {
this.handleRuntimeMessage(browser, msg, sender, resolve)
.catch(reject)
.finally(() => resolve({}));
});
};
private async injectStylesWebExtension(
browser: Browser,
styles: string,
{
tabId,
frameId,
allFrames = false,
}: {
tabId: number;
frameId?: number;
allFrames?: boolean;
},
): Promise<void> {
// Abort if stylesheet is empty.
if (styles.length === 0) {
return;
}
// Abort if `this.browser.tabs` is not available.
if (browser.tabs === undefined) {
throw new Error('required "tabs" API is not defined');
}
// Abort if `this.browser.tabs.insertCSS` is not available.
if (browser.tabs.insertCSS === undefined) {
throw new Error('required "tabs.insertCSS" API is not defined');
}
// Proceed with stylesheet injection.
return browser.tabs.insertCSS(
tabId,
frameId
? {
allFrames,
code: styles,
cssOrigin: 'user',
frameId,
matchAboutBlank: true,
runAt: 'document_start',
}
: {
allFrames,
code: styles,
cssOrigin: 'user',
matchAboutBlank: true,
runAt: 'document_start',
},
);
}
private executeScriptlets(browser: Browser, tabId: number, scripts: string[]): void {
// Dynamically injected scripts scripts can be difficult to find later in
// the debugger. Console logs simplifies setting up breakpoints if needed.
let debugMarker;
if (this.config.debug) {
debugMarker = (text: string) =>
`console.log('[ADBLOCKER-DEBUG]:', ${JSON.stringify(text)});`;
} else {
debugMarker = () => '';
}
// the scriptlet code that contains patches for the website
const codeRunningInPage = `(function(){
${debugMarker('run scriptlets (executing in "page world")')}
${scripts.join('\n\n')}}
)()`;
// wrapper to break the "isolated world" so that the patching operates
// on the website, not on the content script's isolated environment.
const codeRunningInContentScript = `
(function(code) {
${debugMarker('run injection wrapper (executing in "content script world")')}
var script;
try {
script = document.createElement('script');
script.appendChild(document.createTextNode(decodeURIComponent(code)));
(document.head || document.documentElement).appendChild(script);
} catch (ex) {
console.error('Failed to run script', ex);
}
if (script) {
if (script.parentNode) {
script.parentNode.removeChild(script);
}
script.textContent = '';
}
})(\`${encodeURIComponent(codeRunningInPage)}\`);`;
browser.tabs
.executeScript(tabId, {
code: codeRunningInContentScript,
runAt: 'document_start',
allFrames: true,
matchAboutBlank: true,
})
.catch((err) => {
console.error('Failed to inject scriptlets', err);
});
}
}
// Re-export symbols from @cliqz/adblocker
export * from '@cliqz/adblocker';