darkreader · Gusted · May 14, 2020 · May 14, 2020 · May 14, 2020 · May 14, 2020
diff --git a/src/utils/ipv6.ts b/src/utils/ipv6.ts
diff --git a/src/utils/matching.ts b/src/utils/matching.ts
@@ -0,0 +1,120 @@
+const regexpCache = new Map();
+
+interface MatchInterface {
+    regexp: RegExp;
+    negated: boolean;
+}
+
+function makeRegexp(pattern: string): MatchInterface {
+    if (regexpCache.has(pattern)) {
+        return regexpCache.get(pattern);
+    }
+
+    const negated = pattern[0] === '!';
+    if (negated) {
+        pattern = pattern.substr(1);
+    }
+
+    // Check if the pattern is regex
+    if (pattern[0] === '/') {
+        // Get the flag of the specified regex
+        const flag = pattern.substr(pattern.lastIndexOf('/') + 1);
+        // Remove the / indentifiers so Regexp can make valid regexp of it.
+        pattern = pattern.substr(1).substr(0, pattern.lastIndexOf('/') - 1);
+        const regexp = new RegExp(pattern, flag);
+        const regObject: MatchInterface = {
+            regexp,
+            negated,
+        };
+        return regObject;
+    }
+    // Magic replacement to ensure pattern is valid
+    pattern = pattern.replace(/[|\\{}()[\]^$+*?.]/g, '\\$&').replace(/-/g, '\\x2d').replace(/\\\*/g, '[\\s\\S]*');
+    // Create an "hard" regexp to ensure that it's the exact domain we are matching and not some subdomain.
+    const regexp = new RegExp(`^${pattern}(?![A-Za-z0-9.])`, 'i');
+    const regObject: MatchInterface = {
+        regexp,
+        negated,
+    };
+    // Make sure it's cached!
+    regexpCache.set(pattern, regObject);
+    return regObject;
+}
+
+/**
+ * Sanitazed the website so it ensures different input
+ * with the same meaning will have the same output here.
+ */
+function sanitazeInput(input: string) {
+    return (input.replace(/^\^/, '')
+        .replace(/\$$/, '')
+        .replace(/\?.*$/, '')
+        // Regular URLs have two slashes, while local file URLs have three:
+        // file:///C:/path/to/file
+        .replace(/^.*?\/{2,3}/, '')
+        .replace(/\/$/, '')
+    );
+}
+
+export function isMatch(input: string, pattern: string) {
+    if (input === '' || pattern === '') {
+        return false;
+    }
+
+    // Check if it's an regexp.
+    if (pattern[0] === '/') {
+        const flag = pattern.substr(pattern.lastIndexOf('/') + 1);
+        pattern = pattern.substr(1).substr(0, pattern.lastIndexOf('/') - 1);
+        return (new RegExp(pattern, flag)).test(input);
+    }
+
+    input = sanitazeInput(input);
+    const sanitazedPattern = sanitazeInput(pattern);
+    const compiledRegexp = makeRegexp(sanitazedPattern);
+    const match = Boolean(compiledRegexp.regexp.exec(input));
+
+    // If it's negated make sure the result is inverted.
+    const matched = compiledRegexp.negated ? !match : match;
+    return matched;
+}
+
+export function isInPattern(input: string, patterns: any[]) {
+    if (input === '' || patterns.length === 0) {
+        return false;
+    }
+
+    input = sanitazeInput(input);
+    // These sets are important to check if it's in the list.
+    // Or if some pattern omitted this input etc.
+    const omit = new Set();
+    const keep = new Set();
+    const items = new Set();
+    let negatives = 0;
+
+    for (let i = 0, len = patterns.length; i < len; i++) {
+        const pattern = sanitazeInput(patterns[i]);
+        const matchRegex = makeRegexp(pattern);
+        // Don't add the ! into the items list.
+        items.add(pattern[0] === '!' ? pattern.slice(1) : pattern);
+
+        // However it's negated make sure to up the negatives counter.
+        const negated = matchRegex.negated;
+        if (negated) {
+            negatives++;
+        }
+        const matched = Boolean(matchRegex.regexp.exec(input));
+        if (!matched) {
+            continue;
+        }
+        if (negated) {
+            omit.add(input);
+        } else {
+            omit.delete(input);
+            keep.add(input);
+        }
+    }
+
+    const result = negatives === patterns.length ? [...items] : [...keep];
+    const matches = result.filter((item) => !omit.has(item));
+    return matches.length !== 0;
+}
diff --git a/src/utils/url.ts b/src/utils/url.ts
@@ -1,5 +1,5 @@
 import type {UserSettings} from '../definitions';
-import {isIPV6, compareIPV6} from './ipv6';
+import {isInPattern, isMatch} from './matching';
 import {isThunderbird} from './platform';
 
 let anchor: HTMLAnchorElement;
@@ -88,12 +88,7 @@ export function compareURLPatterns(a: string, b: string) {
  * @paramlist List to search into.
  */
 export function isURLInList(url: string, list: string[]) {
-    for (let i = 0; i < list.length; i++) {
-        if (isURLMatched(url, list[i])) {
-            return true;
-        }
-    }
-    return false;
+    return isInPattern(url, list);
 }
 
 /**
@@ -102,79 +97,7 @@ export function isURLInList(url: string, list: string[]) {
  * @param urlTemplate URL template ("google.*", "youtube.com" etc).
  */
 export function isURLMatched(url: string, urlTemplate: string): boolean {
-    const isFirstIPV6 = isIPV6(url);
-    const isSecondIPV6 = isIPV6(urlTemplate);
-    if (isFirstIPV6 && isSecondIPV6) {
-        return compareIPV6(url, urlTemplate);
-    } else if (!isFirstIPV6 && !isSecondIPV6) {
-        const regex = createUrlRegex(urlTemplate);
-        return Boolean(url.match(regex));
-    }
-    return false;
-}
-
-function createUrlRegex(urlTemplate: string): RegExp {
-    urlTemplate = urlTemplate.trim();
-    const exactBeginning = (urlTemplate[0] === '^');
-    const exactEnding = (urlTemplate[urlTemplate.length - 1] === '$');
-
-    urlTemplate = (urlTemplate
-        .replace(/^\^/, '') // Remove ^ at start
-        .replace(/\$$/, '') // Remove $ at end
-        .replace(/^.*?\/{2,3}/, '') // Remove scheme
-        .replace(/\?.*$/, '') // Remove query
-        .replace(/\/$/, '') // Remove last slash
-    );
-
-    let slashIndex: number;
-    let beforeSlash: string;
-    let afterSlash: string;
-    if ((slashIndex = urlTemplate.indexOf('/')) >= 0) {
-        beforeSlash = urlTemplate.substring(0, slashIndex); // google.*
-        afterSlash = urlTemplate.replace(/\$/g, '').substring(slashIndex); // /login/abc
-    } else {
-        beforeSlash = urlTemplate.replace(/\$/g, '');
-    }
-
-    //
-    // SCHEME and SUBDOMAINS
-
-    let result = (exactBeginning ?
-        '^(.*?\\:\\/{2,3})?' // Scheme
-        : '^(.*?\\:\\/{2,3})?([^\/]*?\\.)?' // Scheme and subdomains
-    );
-
-    //
-    // HOST and PORT
-
-    const hostParts = beforeSlash.split('.');
-    result += '(';
-    for (let i = 0; i < hostParts.length; i++) {
-        if (hostParts[i] === '*') {
-            hostParts[i] = '[^\\.\\/]+?';
-        }
-    }
-    result += hostParts.join('\\.');
-    result += ')';
-
-    //
-    // PATH and QUERY
-
-    if (afterSlash) {
-        result += '(';
-        result += afterSlash.replace('/', '\\/');
-        result += ')';
-    }
-
-    result += (exactEnding ?
-        '(\\/?(\\?[^\/]*?)?)$' // All following queries
-        : '(\\/?.*?)$' // All following paths and queries
-    );
-
-    //
-    // Result
-
-    return new RegExp(result, 'i');
+    return isMatch(url, urlTemplate);
 }
 
 export function isPDF(url: string) {

diff --git a/tests/generators/utils/parse.tests.ts b/tests/generators/utils/parse.tests.ts
@@ -296,9 +296,6 @@ test('The generic fix appears first', () => {
         }, {
             'url': ['long.sub.example.com'],
             'directive':'long'
-        }, {
-            'url': ['sub.example.com'],
-            'directive':'sub'
         }]);
 });
 
@@ -349,7 +346,7 @@ test('Fixes appear only once', () => {
         }]);
 });
 
-test('Implied wildcards', () => {
+test('No implied wildcards', () => {
     interface TestFix {
         url: string[];
         directive: string[];
@@ -386,10 +383,5 @@ test('Implied wildcards', () => {
         {
             'url': ['*'],
             'directive': 'hello world'
-        }, {
-            'url': [
-                'example.com',
-            ],
-            'directive': 'one'
         }]);
 });