Skip to content

Commit

Permalink
Remove tldts dependency and allow to plug any implementation instead
Browse files Browse the repository at this point in the history
* Adblocker does not include tldts by default anymore
* API now expect either already constructed Request as argument of both
  hostname and domain when needed (e.g.: getCosmeticsFilters)
* A makeRequest helper is provided to construct Request objects
* [BREAKING] engine.match expects a `Request` as argument
* [BREAKING] engine.matchAll expects a `Request` as argument
* [BREAKING] engine.getCSPDirectives expects a `Request` as argument
* [BREAKING] engine.getCosmeticsFilter expects a new `domain` argument
* [BREAKING] `Request`'s constructor does not apply default value
  anymore and expects all arguments to be provided and initialized. You
  can now use `makeRequest` to reproduce the previous behavior of `new
  Request`.
  • Loading branch information
remusao committed Jan 8, 2019
1 parent 730f0d4 commit 86a7eeb
Show file tree
Hide file tree
Showing 11 changed files with 266 additions and 114 deletions.
19 changes: 13 additions & 6 deletions example/background.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { getDomain, getHostname } from 'tldts';
import * as adblocker from '../index';

/**
Expand Down Expand Up @@ -96,11 +97,14 @@ function requestFromDetails({
if (tabs.has(tabId)) {
source = tabs.get(tabId).source;
}
return {
sourceUrl: source,
type,
url,
};
return adblocker.makeRequest(
{
sourceUrl: source,
type,
url,
},
{ getDomain, getHostname },
);
}

loadAdblocker().then((engine) => {
Expand Down Expand Up @@ -157,7 +161,10 @@ loadAdblocker().then((engine) => {

// Answer to content-script with a list of nodes
if (msg.action === 'getCosmeticsFilters') {
const { active, blockedScripts, styles, scripts } = engine.getCosmeticsFilters(hostname);
const { active, blockedScripts, styles, scripts } = engine.getCosmeticsFilters(
hostname,
getDomain(hostname) || '',
);
if (active === false) {
return;
}
Expand Down
2 changes: 1 addition & 1 deletion index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ export { default as CosmeticsInjection } from './src/cosmetics-injection';
// Blocking
export { default as FiltersEngine } from './src/engine/engine';
export { default as ReverseIndex } from './src/engine/reverse-index';
export { default as Request } from './src/request';
export { default as Request, makeRequest } from './src/request';
export { deserializeEngine } from './src/serialization';

export { default as matchCosmeticFilter } from './src/matching/cosmetics';
Expand Down
10 changes: 4 additions & 6 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,13 @@
"rollup": "^1.0.1",
"rollup-plugin-commonjs": "^9.2.0",
"rollup-plugin-node-resolve": "^4.0.0",
"tldts": "^4.0.0",
"ts-jest": "^23.10.5",
"tslint": "^5.12.0",
"typescript": "^3.2.2"
},
"dependencies": {
"punycode": "^2.1.1",
"tldts": "^3.1.1",
"tslib": "^1.9.3"
}
}
2 changes: 1 addition & 1 deletion rollup.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ export default [
// Commonjs and ES module bundles (without third-party deps)
{
input: './build/index.js',
external: ['tldts', 'tslib', 'punycode'],
external: ['tslib', 'punycode'],
output: [
{ file: pkg.module, format: 'es' },
{ file: pkg.main, format: 'cjs' },
Expand Down
4 changes: 2 additions & 2 deletions src/engine/bucket/cosmetics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,13 @@ export default class CosmeticFilterBucket {
this.size = this.hostnameIndex.size + this.genericRules.length;
}

public getCosmeticsFilters(hostname: string) {
public getCosmeticsFilters(hostname: string, domain: string) {
const disabledRules = new Set();
const rules: CosmeticFilter[] = [];

// Collect rules specifying a domain
this.hostnameIndex.iterMatchingFilters(tokenizeHostnames(hostname), (rule: CosmeticFilter) => {
if (matchCosmeticFilter(rule, hostname)) {
if (matchCosmeticFilter(rule, hostname, domain)) {
if (rule.isUnhide()) {
disabledRules.add(rule.getSelector());
} else {
Expand Down
20 changes: 6 additions & 14 deletions src/engine/engine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { CosmeticFilter } from '../parsing/cosmetic-filter';
import IFilter from '../parsing/interface';
import { parseJSResource, parseList } from '../parsing/list';
import { NetworkFilter } from '../parsing/network-filter';
import Request, { IRequestInitialization, RequestType } from '../request';
import Request, { RequestType } from '../request';
import { serializeEngine } from '../serialization';

import CosmeticFilterBucket from './bucket/cosmetics';
Expand Down Expand Up @@ -246,13 +246,13 @@ export default class FilterEngine {
// this.cosmetics.optimizeAheadOfTime();
}

public getCosmeticsFilters(hostname: string) {
public getCosmeticsFilters(hostname: string, domain: string | null | undefined) {
const styles: string[] = [];
const scripts: string[] = [];
const blockedScripts: string[] = [];

if (this.loadCosmeticFilters) {
const rules = this.cosmetics.getCosmeticsFilters(hostname);
const rules = this.cosmetics.getCosmeticsFilters(hostname, domain || '');
for (let i = 0; i < rules.length; i += 1) {
const rule: CosmeticFilter = rules[i];

Expand All @@ -277,9 +277,7 @@ export default class FilterEngine {
};
}

public matchAll(rawRequest: Partial<IRequestInitialization>): Set<NetworkFilter> {
const request = new Request(rawRequest);

public matchAll(request: Request): Set<NetworkFilter> {
const filters: NetworkFilter[] = [];
if (request.isSupported) {
filters.push(...this.importants.matchAll(request));
Expand All @@ -292,12 +290,11 @@ export default class FilterEngine {
return new Set(filters);
}

public getCSPDirectives(rawRequest: Partial<IRequestInitialization>): string | undefined {
public getCSPDirectives(request: Request): string | undefined {
if (!this.loadNetworkFilters) {
return undefined;
}

const request = new Request(rawRequest);
if (request.isSupported !== true || request.type !== RequestType.document) {
return undefined;
}
Expand Down Expand Up @@ -325,7 +322,7 @@ export default class FilterEngine {
}

public match(
rawRequest: Partial<IRequestInitialization>,
request: Request,
): {
match: boolean;
redirect?: string;
Expand All @@ -336,11 +333,6 @@ export default class FilterEngine {
return { match: false };
}

// Transforms { url, sourceUrl, cpt } into a more complete request context
// containing domains, general domains and tokens for this request. This
// context will be used during the matching in the engine.
const request = new Request(rawRequest);

let filter: NetworkFilter | undefined;
let exception: NetworkFilter | undefined;
let redirect: string | undefined;
Expand Down
61 changes: 38 additions & 23 deletions src/matching/cosmetics.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
import { getPublicSuffix } from 'tldts';

import { CosmeticFilter } from '../parsing/cosmetic-filter';

/* Checks that hostnamePattern matches at the end of the hostname.
* Partial matches are allowed, but hostname should be a valid
* subdomain of hostnamePattern.
*/
function checkHostnamesPartialMatch(
hostname: string,
hostnamePattern: string,
): boolean {
function checkHostnamesPartialMatch(hostname: string, hostnamePattern: string): boolean {
if (hostname.endsWith(hostnamePattern)) {
const patternIndex = hostname.length - hostnamePattern.length;
if (patternIndex === 0 || hostname[patternIndex - 1] === '.') {
Expand All @@ -28,23 +23,13 @@ function checkHostnamesPartialMatch(
*/
function matchHostname(
hostname: string,
hostnameWithoutPublicSuffix: string | null,
hostnamePattern: string,
): boolean {
if (hostnamePattern.endsWith('.*')) {
// Match entity:
const entity = hostnamePattern.slice(0, -2);

// Ignore TLDs suffix
const publicSuffix = getPublicSuffix(hostname);
if (publicSuffix === null) {
return false;
}

const hostnameWithoutSuffix = hostname.substr(0, hostname.length - publicSuffix.length - 1);

if (hostnameWithoutSuffix.length > 0) {
// Check if we have a match
return checkHostnamesPartialMatch(hostnameWithoutSuffix, entity);
// Check if we have an entity match
if (hostnameWithoutPublicSuffix !== null) {
return checkHostnamesPartialMatch(hostnameWithoutPublicSuffix, hostnamePattern.slice(0, -2));
}

return false;
Expand All @@ -53,7 +38,31 @@ function matchHostname(
return checkHostnamesPartialMatch(hostname, hostnamePattern);
}

export default function matchCosmeticFilter(filter: CosmeticFilter, hostname: string): boolean {
/**
* Given a hostname and its domain, return the hostname without the public
* suffix. We know that the domain, with one less label on the left, will be a
* the public suffix; and from there we know which trailing portion of
* `hostname` we should remove.
*/
export function getHostnameWithoutPublicSuffix(hostname: string, domain: string): string | null {
let hostnameWithoutPublicSuffix: string | null = null;

const indexOfDot = domain.indexOf('.');
if (indexOfDot !== -1) {
const publicSuffix = domain.slice(indexOfDot + 1);
hostnameWithoutPublicSuffix = hostname.slice(0, -publicSuffix.length - 1);
}

return hostnameWithoutPublicSuffix;
}

export default function matchCosmeticFilter(
filter: CosmeticFilter,
hostname: string,
domain: string,
): boolean {
const hostnameWithoutPublicSuffix = getHostnameWithoutPublicSuffix(hostname, domain);

// Check hostnames
if (filter.hasHostnames()) {
if (hostname) {
Expand All @@ -62,15 +71,21 @@ export default function matchCosmeticFilter(filter: CosmeticFilter, hostname: st
// Check for exceptions
for (let i = 0; i < hostnames.length; i += 1) {
const filterHostname = hostnames[i];
if (filterHostname[0] === '~' && matchHostname(hostname, filterHostname.slice(1))) {
if (
filterHostname[0] === '~' &&
matchHostname(hostname, hostnameWithoutPublicSuffix, filterHostname.slice(1))
) {
return false;
}
}

// Check for positive matches
for (let i = 0; i < hostnames.length; i += 1) {
const filterHostname = hostnames[i];
if (filterHostname[0] !== '~' && matchHostname(hostname, filterHostname)) {
if (
filterHostname[0] !== '~' &&
matchHostname(hostname, hostnameWithoutPublicSuffix, filterHostname)
) {
return true;
}
}
Expand Down
87 changes: 70 additions & 17 deletions src/request.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import { getDomain, getHostname } from 'tldts';
import { createFuzzySignature, fastHash, tokenize } from './utils';

// TODO - add unit tests (for initialization with existing domain, hostname, etc.)
Expand Down Expand Up @@ -113,28 +112,25 @@ export default class Request {
private fuzzySignature?: Uint32Array;

constructor({
type = 'document',
url = '',
hostname,
type,

domain,
hostname,
url,

sourceUrl = '',
sourceHostname,
sourceDomain,
}: Partial<IRequestInitialization> = {}) {
sourceHostname,
sourceUrl,
}: IRequestInitialization) {
this.type = CPT_TO_TYPE[type] || RequestType.other;

this.url = url.toLowerCase();

// Optionally extract hostname and domain of url
this.hostname = hostname || getHostname(this.url) || '';
this.domain = domain || getDomain(this.hostname) || '';

this.sourceUrl = sourceUrl.toLowerCase();
this.url = url;
this.hostname = hostname;
this.domain = domain;

// Optionally extract hostname and domain of sourceUrl
this.sourceHostname = sourceHostname || getHostname(this.sourceUrl) || '';
this.sourceDomain = sourceDomain || getDomain(this.sourceHostname) || '';
this.sourceUrl = sourceUrl;
this.sourceHostname = sourceHostname;
this.sourceDomain = sourceDomain;

this.sourceHostnameHash = fastHash(this.sourceHostname);
this.sourceDomainHash = fastHash(this.sourceDomain);
Expand Down Expand Up @@ -198,3 +194,60 @@ export default class Request {
return this.fuzzySignature;
}
}

/**
* The library does not include a URL parser anymore, but for matching we still
* rely on information about hostnames and domains of request URL as well as
* source URL (optionally); the `makeRequest` helper function helps construct a
* `Request` from partial inputs but you need to provide implementations of
* functions to extract a hostname from a URL and extract the domain of a given
* hostname. You could use `tldts` for this purpose but any other implementation
* based on public suffix lists would work as well.
*
* Example of usage:
*
* import * as tldts from 'tldts';
*
* makeRequest({ url: 'https://foo.com', type: 'script' }, tldts);
*/
export function makeRequest(
{
url = '',
hostname,
domain,
sourceUrl = '',
sourceHostname,
sourceDomain,
type = 'document',
}: Partial<IRequestInitialization>,
{
getHostname,
getDomain,
}: {
getHostname: (url: string) => string | null;
getDomain: (url: string) => string | null;
},
): Request {
// Initialize URL
url = url.toLowerCase();
hostname = hostname || getHostname(url) || '';
domain = domain || getDomain(hostname) || '';

// Initialize source URL
sourceUrl = sourceUrl.toLowerCase();
sourceHostname = sourceHostname || getHostname(sourceUrl) || '';
sourceDomain = sourceDomain || getDomain(sourceHostname) || '';

// source URL
return new Request({
domain,
hostname,
url,

sourceDomain,
sourceHostname,
sourceUrl,

type,
});
}
Loading

0 comments on commit 86a7eeb

Please sign in to comment.