From 636437b3bb582d5d6d430ccf0a2a47e5dccfbe72 Mon Sep 17 00:00:00 2001 From: Krzysztof Modras Date: Fri, 27 Oct 2023 16:51:34 +0200 Subject: [PATCH 1/3] Example of breaking scriptlet --- packages/adblocker/test/parsing.test.ts | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/packages/adblocker/test/parsing.test.ts b/packages/adblocker/test/parsing.test.ts index 0684ba7c3d..c66e5729ee 100644 --- a/packages/adblocker/test/parsing.test.ts +++ b/packages/adblocker/test/parsing.test.ts @@ -2146,6 +2146,17 @@ describe('scriptlets arguments parsing', () => { it('complex', () => { for (const [scriptlet, expected] of [ + [ + 'www.amazon.de#@#+js(xml-prune, xpath(//*[name()="Period"][.//*[@value="Ad"]] | //*[name()="Period"]/@start), [value="Ad"], .mpd)', + { + name: 'xml-prune', + args: [ + 'xpath(//*[name()="Period"][.//*[@value="Ad"]] | //*[name()="Period"]/@start)', + '[value="Ad"]', + '.mpd', + ], + }, + ], [ 'acs, Math, /\\}\\s*\\(.*?\\b(self|this|window)\\b.*?\\)/', { @@ -2227,7 +2238,11 @@ describe('scriptlets arguments parsing', () => { 'trusted-replace-fetch-response, /\\"adPlacements.*?\\"\\}\\}\\}\\]\\,/, , url:player?key= method:/post/i bodyUsed:true', { name: 'trusted-replace-fetch-response', - args: ['/\\"adPlacements.*?\\"\\}\\}\\}\\]\\,/', '', 'url:player?key= method:/post/i bodyUsed:true'], + args: [ + '/\\"adPlacements.*?\\"\\}\\}\\}\\]\\,/', + '', + 'url:player?key= method:/post/i bodyUsed:true', + ], }, ], [ From dbeddfa6988ca3dbc5b281d8a3a9252a8a7e105f Mon Sep 17 00:00:00 2001 From: Krzysztof Modras Date: Fri, 27 Oct 2023 18:19:48 +0200 Subject: [PATCH 2/3] Attempt for a fix --- packages/adblocker/src/filters/cosmetic.ts | 27 ++++++++++++++-------- packages/adblocker/test/parsing.test.ts | 9 +++++++- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/packages/adblocker/src/filters/cosmetic.ts b/packages/adblocker/src/filters/cosmetic.ts index 4d815d5f8f..7c29676647 100644 --- a/packages/adblocker/src/filters/cosmetic.ts +++ b/packages/adblocker/src/filters/cosmetic.ts @@ -687,6 +687,7 @@ export default class CosmeticFilter implements IFilter { let inRegexp = false; let objectNesting = 0; let lastCharIsBackslash = false; + let inArgument = false; for (; index < selector.length; index += 1) { const char = selector[index]; @@ -715,17 +716,25 @@ export default class CosmeticFilter implements IFilter { inRegexp = false; } } else { - if (char === '"') { - inDoubleQuotes = true; - } else if (char === "'") { - inSingleQuotes = true; - } else if (char === '{') { - objectNesting += 1; - } else if (char === '/') { - inRegexp = true; - } else if (char === ',') { + if (inArgument === false) { + if (char === ' ') { + // ignore + } else if (char === '"') { + inDoubleQuotes = true; + } else if (char === "'") { + inSingleQuotes = true; + } else if (char === '{') { + objectNesting += 1; + } else if (char === '/') { + inRegexp = true; + } else { + inArgument = true; + } + } + if (char === ',') { parts.push(selector.slice(lastComaIndex + 1, index).trim()); lastComaIndex = index; + inArgument = false; } } } diff --git a/packages/adblocker/test/parsing.test.ts b/packages/adblocker/test/parsing.test.ts index c66e5729ee..9bce477423 100644 --- a/packages/adblocker/test/parsing.test.ts +++ b/packages/adblocker/test/parsing.test.ts @@ -2147,7 +2147,14 @@ describe('scriptlets arguments parsing', () => { it('complex', () => { for (const [scriptlet, expected] of [ [ - 'www.amazon.de#@#+js(xml-prune, xpath(//*[name()="Period"][.//*[@value="Ad"]] | //*[name()="Period"]/@start), [value="Ad"], .mpd)', + 'xml-prune,a/b,,c', + { + name: 'xml-prune', + args: ['a/b', '', 'c'], + }, + ], + [ + 'xml-prune, xpath(//*[name()="Period"][.//*[@value="Ad"]] | //*[name()="Period"]/@start), [value="Ad"], .mpd', { name: 'xml-prune', args: [ From 103f5e9895d21247e45e5a133574e9772f83d21e Mon Sep 17 00:00:00 2001 From: Krzysztof Modras Date: Fri, 27 Oct 2023 21:35:22 +0200 Subject: [PATCH 3/3] Heuristic to prevent too eager argument type classification --- packages/adblocker/src/filters/cosmetic.ts | 8 ++-- packages/adblocker/test/parsing.test.ts | 46 ++++++++++++++++++++++ 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/packages/adblocker/src/filters/cosmetic.ts b/packages/adblocker/src/filters/cosmetic.ts index 7c29676647..dfaaeec932 100644 --- a/packages/adblocker/src/filters/cosmetic.ts +++ b/packages/adblocker/src/filters/cosmetic.ts @@ -719,13 +719,13 @@ export default class CosmeticFilter implements IFilter { if (inArgument === false) { if (char === ' ') { // ignore - } else if (char === '"') { + } else if (char === '"' && selector.indexOf('"', index + 1) > 0) { inDoubleQuotes = true; - } else if (char === "'") { + } else if (char === "'" && selector.indexOf("'", index + 1) > 0) { inSingleQuotes = true; - } else if (char === '{') { + } else if (char === '{' && selector.indexOf('}', index + 1) > 0) { objectNesting += 1; - } else if (char === '/') { + } else if (char === '/' && selector.indexOf('/', index + 1) > 0) { inRegexp = true; } else { inArgument = true; diff --git a/packages/adblocker/test/parsing.test.ts b/packages/adblocker/test/parsing.test.ts index 9bce477423..b26e9b1bfd 100644 --- a/packages/adblocker/test/parsing.test.ts +++ b/packages/adblocker/test/parsing.test.ts @@ -2146,6 +2146,52 @@ describe('scriptlets arguments parsing', () => { it('complex', () => { for (const [scriptlet, expected] of [ + [ + 'script-name, {x, y', + { + name: 'script-name', + args: ['{x', 'y'], + }, + ], + [ + 'script-name, "x, y', + { + name: 'script-name', + args: ['"x', 'y'], + }, + ], + [ + "script-name, 'x, y", + { + name: 'script-name', + args: ["'x", 'y'], + }, + ], + [ + 'script-name, /x, y', + { + name: 'script-name', + args: ['/x', 'y'], + }, + ], + [ + 'xml-prune,a/b,///,,c', + { + name: 'xml-prune', + args: ['a/b', '///', '', 'c'], + }, + ], + [ + `xml-prune, xpath(//*[name()="MPD"]/@mediaPresentationDuration | //*[name()="Period"][.//*[name()="BaseURL" and contains(text()\\,'/ads-')]] | //*[name()="Period"]/@start), Period[id^="Ad"i], .mpd`, + { + name: 'xml-prune', + args: [ + `xpath(//*[name()="MPD"]/@mediaPresentationDuration | //*[name()="Period"][.//*[name()="BaseURL" and contains(text()\\,'/ads-')]] | //*[name()="Period"]/@start)`, + 'Period[id^="Ad"i]', + '.mpd', + ], + }, + ], [ 'xml-prune,a/b,,c', {