Skip to content

Commit

Permalink
Implement network filter option replace=
Browse files Browse the repository at this point in the history
Reference documentation:
https://adguard.com/kb/general/ad-filtering/create-own-filters/#replace-modifier

This is a network filter option which can only be loaded from a
trusted source.

Since this filter is about modifying the response body, it currently
only works in Firefox.

As discussed with filter list maintainers.
  • Loading branch information
gorhill committed Nov 3, 2023
1 parent aeff955 commit 7c3e060
Show file tree
Hide file tree
Showing 7 changed files with 537 additions and 444 deletions.
4 changes: 2 additions & 2 deletions src/js/background.js
Expand Up @@ -184,8 +184,8 @@ const µBlock = { // jshint ignore:line

// Read-only
systemSettings: {
compiledMagic: 56, // Increase when compiled format changes
selfieMagic: 56, // Increase when selfie format changes
compiledMagic: 57, // Increase when compiled format changes
selfieMagic: 57, // Increase when selfie format changes
},

// https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501
Expand Down
5 changes: 5 additions & 0 deletions src/js/benchmarks.js
Expand Up @@ -174,6 +174,7 @@ const loadBenchmarkDataset = (( ) => {
let removeparamCount = 0;
let cspCount = 0;
let permissionsCount = 0;
let replaceCount = 0;
for ( let i = 0; i < requests.length; i++ ) {
const request = requests[i];
fctxt.setURL(request.url);
Expand Down Expand Up @@ -202,6 +203,9 @@ const loadBenchmarkDataset = (( ) => {
}
}
staticNetFilteringEngine.matchHeaders(fctxt, []);
if ( staticNetFilteringEngine.matchAndFetchModifiers(fctxt, 'replace') ) {
replaceCount += 1;
}
} else if ( redirectEngine !== undefined ) {
if ( staticNetFilteringEngine.redirectRequest(redirectEngine, fctxt) ) {
redirectCount += 1;
Expand All @@ -222,6 +226,7 @@ const loadBenchmarkDataset = (( ) => {
`\tremoveparam=: ${removeparamCount}`,
`\tcsp=: ${cspCount}`,
`\tpermissions=: ${permissionsCount}`,
`\treplace=: ${replaceCount}`,
];
const s = output.join('\n');
console.info(s);
Expand Down
1 change: 1 addition & 0 deletions src/js/filtering-context.js
Expand Up @@ -175,6 +175,7 @@ const FilteringContext = class {

fromFilteringContext(other) {
this.realm = other.realm;
this.id = other.id;
this.type = other.type;
this.method = other.method;
this.url = other.url;
Expand Down
6 changes: 3 additions & 3 deletions src/js/html-filtering.js
Expand Up @@ -429,15 +429,15 @@ htmlFilteringEngine.retrieve = function(details) {
}
};

htmlFilteringEngine.apply = function(doc, details) {
htmlFilteringEngine.apply = function(doc, details, selectors) {
docRegister = doc;
let modified = false;
for ( const selector of details.selectors.plains ) {
for ( const selector of selectors.plains ) {
if ( applyCSSSelector(details, selector) ) {
modified = true;
}
}
for ( const selector of details.selectors.procedurals ) {
for ( const selector of selectors.procedurals ) {
if ( applyProceduralSelector(details, selector) ) {
modified = true;
}
Expand Down
75 changes: 65 additions & 10 deletions src/js/static-filtering-parser.js
Expand Up @@ -187,6 +187,7 @@ export const NODE_TYPE_NET_OPTION_NAME_POPUP = iota++;
export const NODE_TYPE_NET_OPTION_NAME_REDIRECT = iota++;
export const NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE = iota++;
export const NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM = iota++;
export const NODE_TYPE_NET_OPTION_NAME_REPLACE = iota++;
export const NODE_TYPE_NET_OPTION_NAME_SCRIPT = iota++;
export const NODE_TYPE_NET_OPTION_NAME_SHIDE = iota++;
export const NODE_TYPE_NET_OPTION_NAME_TO = iota++;
Expand Down Expand Up @@ -265,6 +266,7 @@ export const nodeTypeFromOptionName = new Map([
/* synonym */ [ 'rewrite', NODE_TYPE_NET_OPTION_NAME_REDIRECT ],
[ 'redirect-rule', NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE ],
[ 'removeparam', NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM ],
[ 'replace', NODE_TYPE_NET_OPTION_NAME_REPLACE ],
/* synonym */ [ 'queryprune', NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM ],
[ 'script', NODE_TYPE_NET_OPTION_NAME_SCRIPT ],
[ 'shide', NODE_TYPE_NET_OPTION_NAME_SHIDE ],
Expand Down Expand Up @@ -597,9 +599,14 @@ const exCharCodeAt = (s, i) => {
return pos >= 0 ? s.charCodeAt(pos) : -1;
};

const toEscapedCharRegex = c => {
const safe = c.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
return new RegExp(`((?:^|[^\\\\])(?:\\\\\\\\)*)\\\\${safe}`, 'g');
};

/******************************************************************************/

class argListParser {
class ArgListParser {
constructor(separatorChar = ',', mustQuote = false) {
this.separatorChar = this.actualSeparatorChar = separatorChar;
this.separatorCode = this.actualSeparatorCode = separatorChar.charCodeAt(0);
Expand All @@ -612,10 +619,10 @@ class argListParser {
this.reWhitespaceStart = /^\s+/;
this.reWhitespaceEnd = /\s+$/;
this.reOddTrailingEscape = /(?:^|[^\\])(?:\\\\)*\\$/;
this.reEscapedDoubleQuote = /((?:^|[^\\])(?:\\\\)*)\\"/g;
this.reEscapedSingleQuote = /((?:^|[^\\])(?:\\\\)*)\\'/g;
this.reEscapedBacktick = /((?:^|[^\\])(?:\\\\)*)\\`/g;
this.reEscapedSeparator = new RegExp(`((?:^|[^\\\\])(?:\\\\\\\\)*)\\\\${this.separatorChar}`, 'g');
this.reEscapedDoubleQuote = toEscapedCharRegex('"');
this.reEscapedSingleQuote = toEscapedCharRegex("'");
this.reEscapedBacktick = toEscapedCharRegex('`');
this.reEscapedSeparator = toEscapedCharRegex(this.separatorChar);
this.unescapedSeparator = `$1${this.separatorChar}`;
}
nextArg(pattern, beg = 0) {
Expand Down Expand Up @@ -871,7 +878,7 @@ export class AstFilterParser {
this.rePlainEntity = /^(?:[\da-z][\da-z_-]*\.)+\*$/;
this.reHostsSink = /^[\w%.:\[\]-]+\s+/;
this.reHostsRedirect = /(?:0\.0\.0\.0|broadcasthost|local|localhost(?:\.localdomain)?|ip6-\w+)(?:[^\w.-]|$)/;
this.reNetOptionComma = /,(?!\d*\})/g;
this.reNetOptionComma = /,(?:~?[13a-z-]+(?:=.*?)?|_+)(?:,|$)/;
this.rePointlessLeftAnchor = /^\|\|?\*+/;
this.reIsTokenChar = /^[%0-9A-Za-z]/;
this.rePointlessLeadingWildcards = /^(\*+)[^%0-9A-Za-z\u{a0}-\u{10FFFF}]/u;
Expand All @@ -898,7 +905,7 @@ export class AstFilterParser {
this.reGoodRegexToken = /[^\x01%0-9A-Za-z][%0-9A-Za-z]{7,}|[^\x01%0-9A-Za-z][%0-9A-Za-z]{1,6}[^\x01%0-9A-Za-z]/;
this.reBadCSP = /(?:=|;)\s*report-(?:to|uri)\b/;
this.reNoopOption = /^_+$/;
this.scriptletArgListParser = new argListParser(',');
this.scriptletArgListParser = new ArgListParser(',');
}

parse(raw) {
Expand Down Expand Up @@ -1414,6 +1421,7 @@ export class AstFilterParser {
break;
case NODE_TYPE_NET_OPTION_NAME_REDIRECT:
case NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE:
case NODE_TYPE_NET_OPTION_NAME_REPLACE:
case NODE_TYPE_NET_OPTION_NAME_URLTRANSFORM:
realBad = isNegated || (isException || hasValue) === false ||
modifierType !== 0;
Expand Down Expand Up @@ -1474,6 +1482,20 @@ export class AstFilterParser {
realBad = abstractTypeCount || behaviorTypeCount || unredirectableTypeCount;
break;
}
case NODE_TYPE_NET_OPTION_NAME_REPLACE: {
realBad = abstractTypeCount || behaviorTypeCount || unredirectableTypeCount;
if ( realBad ) { break; }
if ( this.options.trustedSource !== true ) {
this.astError = AST_ERROR_UNTRUSTED_SOURCE;
realBad = true;
break;
}
if ( this.interactive ) {
const value = this.getNetOptionValue(NODE_TYPE_NET_OPTION_NAME_REPLACE);
realBad = parseReplaceValue(value) === undefined;
}
break;
}
case NODE_TYPE_NET_OPTION_NAME_URLTRANSFORM:
realBad = abstractTypeCount || behaviorTypeCount || unredirectableTypeCount;
if ( realBad ) { break; }
Expand Down Expand Up @@ -1959,9 +1981,8 @@ export class AstFilterParser {
}

endOfNetOption(s, beg) {
this.reNetOptionComma.lastIndex = beg;
const match = this.reNetOptionComma.exec(s);
return match !== null ? match.index : s.length;
const match = this.reNetOptionComma.exec(s.slice(beg));
return match !== null ? beg + match.index : s.length;
}

parseNetOption(parent) {
Expand Down Expand Up @@ -2975,6 +2996,39 @@ export function parseHeaderValue(arg) {
return out;
}


// https://adguard.com/kb/general/ad-filtering/create-own-filters/#replace-modifier

export function parseReplaceValue(s) {
if ( s.charCodeAt(0) !== 0x2F /* / */ ) { return; }
const { reEscapedComma, reEscapedDollarSign } = parseReplaceValue;
const parser = new ArgListParser('/');
parser.nextArg(s, 1);
let pattern = s.slice(parser.argBeg, parser.argEnd);
if ( parser.transform ) {
pattern = parser.normalizeArg(pattern);
}
pattern = pattern
.replace(reEscapedDollarSign, '$1$$$')
.replace(reEscapedComma, '$1,');
parser.nextArg(s, parser.separatorEnd);
let replacement = s.slice(parser.argBeg, parser.argEnd);
if ( parser.separatorEnd === parser.separatorBeg ) { return; }
if ( parser.transform ) {
replacement = parser.normalizeArg(replacement);
}
replacement = replacement
.replace(reEscapedDollarSign, '$1$$')
.replace(reEscapedComma, '$1,');
const flags = s.slice(parser.separatorEnd);
try {
return { re: new RegExp(pattern, flags), replacement };
} catch(_) {
}
}
parseReplaceValue.reEscapedDollarSign = toEscapedCharRegex('$');
parseReplaceValue.reEscapedComma = toEscapedCharRegex(',');

/******************************************************************************/

export const netOptionTokenDescriptors = new Map([
Expand Down Expand Up @@ -3025,6 +3079,7 @@ export const netOptionTokenDescriptors = new Map([
/* synonym */ [ 'rewrite', { mustAssign: true } ],
[ 'redirect-rule', { mustAssign: true } ],
[ 'removeparam', { } ],
[ 'replace', { mustAssign: true } ],
/* synonym */ [ 'queryprune', { } ],
[ 'script', { canNegate: true } ],
[ 'shide', { } ],
Expand Down

23 comments on commit 7c3e060

@uBlock-user
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

filters with simple regex are shown as invalid - ||example.org^$replace=/^uBlockOrigin$/i

but complex regex are shown valid - ||example.org^$replace=/(<VAST[\s\S]*?>)[\s\S]*<\/VAST>/\$1<\/VAST>/i

a limitation ?

@gwarser
Copy link
Contributor

@gwarser gwarser commented on 7c3e060 Nov 4, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

replace = "/" regexp "/" replacement "/" modifiers

Missing the "replacement" part?

Try

||example.org^$replace=/^uBlockOrigin$//i

@uBlock-user
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, didn't realise replacement was compulsory for removal, thanks.

@peace2000
Copy link
Contributor

@peace2000 peace2000 commented on 7c3e060 Nov 4, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@gorhill Is this only for replacing content in page source code but it cannot replace content in external scripts the page loads?

@gorhill
Copy link
Owner Author

@gorhill gorhill commented on 7c3e060 Nov 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The documentation linked in the commit says:

$replace rules apply to any text response, but will not apply to binary (media, image, object, etc.).

@uBlock-user
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is .m3u8 file an object or a text response ?

@gorhill
Copy link
Owner Author

@gorhill gorhill commented on 7c3e060 Nov 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You tell me. If you want me to look into it, having a URL which I can readily paste in the address bar would be nice.

@gwarser
Copy link
Contributor

@gwarser gwarser commented on 7c3e060 Nov 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Resource response content-type must start with text/ or be one of
application/javascript,
application/json,
application/xml,
application/xhtml+xml,

Is the change visible in dev tools debugger [edit: or it was "Network" tab where it does not show even now?]? If it is, it did not work for me with this commit on application/javascript files on AG doc page. Will test more recent version.


It's working in 1.53.1b5, but no way to preview the effect in dev tools. If you omit the type, you can open in tab, and the changes are visible there. Nightly 121.0a1.


Dev tools debugger shows the change.


More types added later in ae24032

@uBlock-user
Copy link
Contributor

@uBlock-user uBlock-user commented on 7c3e060 Nov 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You tell me.

If I knew, we wouldn't be having this conversation.

If you want me to look into it, having a URL which I can readily paste in the address bar would be nice.

No readily available with me. Take twitch for example.

@gorhill
Copy link
Owner Author

@gorhill gorhill commented on 7c3e060 Nov 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I knew

It's matter of opening the browser dev tools, then Network pane, then select one of those m3u8 resource, and look-up content type.

Take twitch for example

Now with that repro case in mind, I just did the described steps above, and I get application/vnd.apple.mpegurl. Looking at the content, it appears to be text. So I will add application/vnd.apple.mpegurl as a valid target for response filtering.

@uBlock-user
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@gwarser response content type is application/vnd.apple.mpegurl, so no chance huh ?

@uBlock-user
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

then select one of those m3u8 resource, and look-up content type.

Ah, you meant that. That I know.

So I will add application/vnd.apple.mpegurl as a valid target for response filtering.

great, thanks!!

@uBlock-user
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mentioned twitch because of uBlockOrigin/uBlock-issues#2758, and now it seems $replace remains the only possibility.

@gorhill
Copy link
Owner Author

@gorhill gorhill commented on 7c3e060 Nov 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@peace2000
Copy link
Contributor

@peace2000 peace2000 commented on 7c3e060 Nov 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why I cannot replace text loadChartbeat with test ?

Page: tivi.fi

Filter: ||tivi.fi^$replace=/loadChartbeat/test/

That string in the page source code.

@gwarser
Copy link
Contributor

@gwarser gwarser commented on 7c3e060 Nov 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tivi.fi##^script does not work. ?

@peace2000
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tivi.fi##^script does not work. ?

That's true also.

@gorhill
Copy link
Owner Author

@gorhill gorhill commented on 7c3e060 Nov 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Found a mistake, there was an extraneous j += 1 left in there. This will be fixed in next build.

@peace2000
Copy link
Contributor

@peace2000 peace2000 commented on 7c3e060 Nov 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That fixed it.

However I found a new link where some texts can't be replaced.

https://www.is.fi/seksi-parisuhde/art-2000008547493.html

Replace text authenticated with free in the page source code.

||is.fi^$replace=/authenticated/free/

Out of 13 instances, only one is replaced, 12 aren't.

@gwarser
Copy link
Contributor

@gwarser gwarser commented on 7c3e060 Nov 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use regex flags. g perhaps?


This works:

 ||is.fi^$replace=/authenticated/free/g

@peace2000
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that was it. Now it works.

@peace2000
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about this url?:

https://app-conf.almamedia.net/v3/conf.json

Trying to replace global with test but doesn't work.

||app-conf.almamedia.net^$replace=/global/test/

@gorhill
Copy link
Owner Author

@gorhill gorhill commented on 7c3e060 Nov 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Content type is application/octet-stream, so this can't go through a filterer which can just handle text-based content.

Please sign in to comment.