From 4663fd4a1bde5e57e8474f35146eb52c2f7ba72e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Berson?= <remi@cliqz.com>
Date: Wed, 24 Apr 2019 11:12:46 +0200
Subject: [PATCH 1/4] clean-up filters parsing benchmark

---
 bench/micro.js         | 21 ++++++---------------
 bench/run_benchmark.js |  4 +++-
 2 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/bench/micro.js b/bench/micro.js
index ad85db32dd..8e605e500e 100644
--- a/bench/micro.js
+++ b/bench/micro.js
@@ -36,28 +36,19 @@ function benchStringTokenize({ filters }) {
   return dummy;
 }
 
-function benchParsingImpl(lists, { loadNetworkFilters, loadCosmeticFilters }) {
-  let dummy = 0;
-
-  for (let i = 0; i < lists.length; i += 1) {
-    dummy = (dummy + adblocker.parseFilters(lists[i], {
-      loadNetworkFilters,
-      loadCosmeticFilters,
-    }).networkFilters.length) >>> 0;
-  }
-
-  return dummy;
+function benchParsingImpl(lists, options) {
+  return adblocker.parseFilters(lists, options);
 }
 
-function benchCosmeticsFiltersParsing({ lists }) {
-  return benchParsingImpl(lists, {
+function benchCosmeticsFiltersParsing({ combinedLists }) {
+  return benchParsingImpl(combinedLists, {
     loadCosmeticFilters: true,
     loadNetworkFilters: false,
   });
 }
 
-function benchNetworkFiltersParsing({ lists }) {
-  return benchParsingImpl(lists, {
+function benchNetworkFiltersParsing({ combinedLists }) {
+  return benchParsingImpl(combinedLists, {
     loadCosmeticFilters: false,
     loadNetworkFilters: true,
   });
diff --git a/bench/run_benchmark.js b/bench/run_benchmark.js
index 70c378ecfe..86c56066b8 100644
--- a/bench/run_benchmark.js
+++ b/bench/run_benchmark.js
@@ -80,7 +80,8 @@ function runMicroBenchmarks(lists, resources) {
   }, true /* Also serialize engine */);
 
   const filters = getFiltersFromLists(lists);
-  const { networkFilters, cosmeticFilters } = parseFilters(filters.join('\n'));
+  const combinedLists = filters.join('\n');
+  const { networkFilters, cosmeticFilters } = parseFilters(combinedLists);
   const results = {};
 
   // Arguments shared among benchmarks
@@ -92,6 +93,7 @@ function runMicroBenchmarks(lists, resources) {
     serialized,
     networkFilters,
     cosmeticFilters,
+    combinedLists,
   };
 
   [

From fdf00d23e06e2e6a06a1e73b49dd492ff0436c2f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Berson?= <remi@cliqz.com>
Date: Wed, 24 Apr 2019 11:45:28 +0200
Subject: [PATCH 2/4] speed-up parseFilters and detectFilterType (10-15% gain)

---
 bench/micro.js |  10 ++--
 src/lists.ts   | 124 +++++++++++++++++++++++++++++++++----------------
 2 files changed, 88 insertions(+), 46 deletions(-)

diff --git a/bench/micro.js b/bench/micro.js
index 8e605e500e..c7d936a069 100644
--- a/bench/micro.js
+++ b/bench/micro.js
@@ -1,6 +1,6 @@
 /* eslint-disable no-bitwise */
 
-const adblocker = require('../');
+const { FiltersEngine, fastHash, tokenize, parseFilters } = require('../');
 const { createEngine } = require('./utils');
 
 
@@ -17,13 +17,13 @@ function benchEngineSerialization({ engine }) {
 }
 
 function benchEngineDeserialization({ serialized }) {
-  return adblocker.FiltersEngine.deserialize(serialized);
+  return FiltersEngine.deserialize(serialized);
 }
 
 function benchStringHashing({ filters }) {
   let dummy = 0;
   for (let i = 0; i < filters.length; i += 1) {
-    dummy = (dummy + adblocker.fastHash(filters[i])) % 1000000000;
+    dummy = (dummy + fastHash(filters[i])) >>> 0;
   }
   return dummy;
 }
@@ -31,13 +31,13 @@ function benchStringHashing({ filters }) {
 function benchStringTokenize({ filters }) {
   let dummy = 0;
   for (let i = 0; i < filters.length; i += 1) {
-    dummy = (dummy + adblocker.tokenize(filters[i]).length) % 1000000000;
+    dummy = (dummy + tokenize(filters[i]).length) >>> ;
   }
   return dummy;
 }
 
 function benchParsingImpl(lists, options) {
-  return adblocker.parseFilters(lists, options);
+  return parseFilters(lists, options);
 }
 
 function benchCosmeticsFiltersParsing({ combinedLists }) {
diff --git a/src/lists.ts b/src/lists.ts
index b4ab5a1051..930abe791f 100644
--- a/src/lists.ts
+++ b/src/lists.ts
@@ -3,8 +3,6 @@ import CosmeticFilter from './filters/cosmetic';
 import NetworkFilter from './filters/network';
 import { fastStartsWith, fastStartsWithFrom } from './utils';
 
-const SPACE = /\s/;
-
 const enum FilterType {
   NOT_SUPPORTED,
   NETWORK,
@@ -18,50 +16,89 @@ const enum FilterType {
  * `NetworkFilter` or `CosmeticFilter`.
  */
 function detectFilterType(line: string): FilterType {
+  // Ignore empty line
+  if (line.length === 0 || line.length === 1) {
+    return FilterType.NOT_SUPPORTED;
+  }
+
   // Ignore comments
+  const firstCharCode: number = line.charCodeAt(0);
+  const secondCharCode: number = line.charCodeAt(1);
   if (
-    line.length === 1 ||
-    line.charAt(0) === '!' ||
-    (line.charAt(0) === '#' && SPACE.test(line.charAt(1))) ||
-    fastStartsWith(line, '[Adblock')
+    firstCharCode === 33 /* '!' */ ||
+    (firstCharCode === 35 /* '#' */ && secondCharCode <= 32) ||
+    (firstCharCode === 91 /* '[' */ && fastStartsWith(line, '[Adblock'))
   ) {
     return FilterType.NOT_SUPPORTED;
   }
 
-  if (fastStartsWith(line, '|') || fastStartsWith(line, '@@|')) {
+  // Fast heuristics to detect network filters
+  const lastCharCode: number = line.charCodeAt(line.length - 1);
+  if (
+    firstCharCode === 36 /* '$' */ ||
+    firstCharCode === 38 /* '&' */ ||
+    firstCharCode === 42 /* '*' */ ||
+    firstCharCode === 45 /* '-' */ ||
+    firstCharCode === 46 /* '.' */ ||
+    firstCharCode === 47 /* '/' */ ||
+    firstCharCode === 58 /* ':' */ ||
+    firstCharCode === 61 /* '=' */ ||
+    firstCharCode === 63 /* '?' */ ||
+    firstCharCode === 64 /* '@' */ ||
+    firstCharCode === 95 /* '_' */ ||
+    firstCharCode === 124 /* '|' */ ||
+    lastCharCode === 124 /* '|' */
+  ) {
     return FilterType.NETWORK;
   }
 
   // Ignore Adguard cosmetics
-  // `$$`
-  if (line.indexOf('$$') !== -1) {
-    return FilterType.NOT_SUPPORTED;
+  // `$$` = HTML filtering rules
+  const dollarIndex: number = line.indexOf('$');
+  if (dollarIndex !== -1 && dollarIndex !== line.length - 1) {
+    const afterDollarIndex = dollarIndex + 1;
+    const afterDollarCharCode = line.charCodeAt(afterDollarIndex);
+
+    // Ignore Adguard HTML rewrite rules
+    if (
+      afterDollarCharCode === 36 /* '$' */ ||
+      (afterDollarCharCode === 64 /* '@' */ &&
+        fastStartsWithFrom(line, /* $@$ */ '@$', afterDollarIndex))
+    ) {
+      return FilterType.NOT_SUPPORTED;
+    }
   }
 
   // Check if filter is cosmetics
-  const sharpIndex = line.indexOf('#');
-  if (sharpIndex !== -1) {
+  const sharpIndex: number = line.indexOf('#');
+  if (sharpIndex !== -1 && sharpIndex !== line.length - 1) {
     const afterSharpIndex = sharpIndex + 1;
+    const afterSharpCharCode = line.charCodeAt(afterSharpIndex);
 
-    // Ignore Adguard cosmetics
-    // `#$#` `#@$#`
-    // `#%#` `#@%#`
-    // `#?#`
     if (
-      fastStartsWithFrom(line, /* #@$# */ '@$#', afterSharpIndex) ||
-      fastStartsWithFrom(line, /* #@%# */ '@%#', afterSharpIndex) ||
-      fastStartsWithFrom(line, /* #%# */ '%#', afterSharpIndex) ||
-      fastStartsWithFrom(line, /* #$# */ '$#', afterSharpIndex) ||
-      fastStartsWithFrom(line, /* #?# */ '?#', afterSharpIndex)
-    ) {
-      return FilterType.NOT_SUPPORTED;
-    } else if (
-      fastStartsWithFrom(line, /* ## */ '#', afterSharpIndex) ||
-      fastStartsWithFrom(line, /* #@# */ '@#', afterSharpIndex)
+      afterSharpCharCode === 35 /* '#'*/ ||
+      (afterSharpCharCode === 64 /* '@' */ &&
+        fastStartsWithFrom(line, /* #@# */ '@#', afterSharpIndex))
     ) {
       // Parse supported cosmetic filter
       // `##` `#@#`
       return FilterType.COSMETIC;
+    } else if (
+      (afterSharpCharCode === 64 /* '@'*/ &&
+        (fastStartsWithFrom(line, /* #@$# */ '@$#', afterSharpIndex) ||
+          fastStartsWithFrom(line, /* #@%# */ '@%#', afterSharpIndex))) ||
+      (afterSharpCharCode === 37 /* '%' */ &&
+        fastStartsWithFrom(line, /* #%# */ '%#', afterSharpIndex)) ||
+      (afterSharpCharCode === 36 /* '$' */ &&
+        fastStartsWithFrom(line, /* #$# */ '$#', afterSharpIndex)) ||
+      (afterSharpCharCode === 63 /* '?' */ &&
+        fastStartsWithFrom(line, /* #?# */ '?#', afterSharpIndex))
+    ) {
+      // Ignore Adguard cosmetics
+      // `#$#` `#@$#`
+      // `#%#` `#@%#`
+      // `#?#`
+      return FilterType.NOT_SUPPORTED;
     }
   }
 
@@ -95,26 +132,31 @@ export function parseFilters(
 
   const networkFilters: NetworkFilter[] = [];
   const cosmeticFilters: CosmeticFilter[] = [];
-
   const lines = list.split('\n');
 
   for (let i = 0; i < lines.length; i += 1) {
-    const line = lines[i].trim();
+    let line = lines[i];
 
-    if (line.length > 0) {
-      const filterType = detectFilterType(line);
+    // Check if `line` should be trimmed before parsing
+    const isTrimmingNeeded =
+      line.length > 1 && (line.charCodeAt(0) <= 32 || line.charCodeAt(line.length - 1) <= 32);
+    if (isTrimmingNeeded) {
+      line = line.trim();
+    }
 
-      if (filterType === FilterType.NETWORK && config.loadNetworkFilters) {
-        const filter = NetworkFilter.parse(line, config.debug);
-        if (filter !== null) {
-          networkFilters.push(filter);
-        }
-      } else if (filterType === FilterType.COSMETIC && config.loadCosmeticFilters) {
-        const filter = CosmeticFilter.parse(line, config.debug);
-        if (filter !== null) {
-          if (config.loadGenericCosmeticsFilters === true || filter.isGenericHide() === false) {
-            cosmeticFilters.push(filter);
-          }
+    // Detect if filter is supported, network or cosmetic
+    const filterType = detectFilterType(line);
+
+    if (filterType === FilterType.NETWORK && config.loadNetworkFilters === true) {
+      const filter = NetworkFilter.parse(line, config.debug);
+      if (filter !== null) {
+        networkFilters.push(filter);
+      }
+    } else if (filterType === FilterType.COSMETIC && config.loadCosmeticFilters === true) {
+      const filter = CosmeticFilter.parse(line, config.debug);
+      if (filter !== null) {
+        if (config.loadGenericCosmeticsFilters === true || filter.isGenericHide() === false) {
+          cosmeticFilters.push(filter);
         }
       }
     }

From 67181cd82537ab58305b0c7552f46830c3c26d79 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Berson?= <remi@cliqz.com>
Date: Wed, 24 Apr 2019 11:59:59 +0200
Subject: [PATCH 3/4] speed-up CosmeticFilter.parse (15% gain)

---
 bench/micro.js          |  2 +-
 src/filters/cosmetic.ts | 78 +++++++++++++++++++++++------------------
 2 files changed, 44 insertions(+), 36 deletions(-)

diff --git a/bench/micro.js b/bench/micro.js
index c7d936a069..f688c38f9b 100644
--- a/bench/micro.js
+++ b/bench/micro.js
@@ -31,7 +31,7 @@ function benchStringHashing({ filters }) {
 function benchStringTokenize({ filters }) {
   let dummy = 0;
   for (let i = 0; i < filters.length; i += 1) {
-    dummy = (dummy + tokenize(filters[i]).length) >>> ;
+    dummy = (dummy + tokenize(filters[i]).length) >>> 0;
   }
   return dummy;
 }
diff --git a/src/filters/cosmetic.ts b/src/filters/cosmetic.ts
index ca1ccae395..f72cd4d919 100644
--- a/src/filters/cosmetic.ts
+++ b/src/filters/cosmetic.ts
@@ -179,10 +179,9 @@ export default class CosmeticFilter implements IFilter {
    * used to parse tens of thousands of lines.
    */
   public static parse(line: string, debug: boolean = false): CosmeticFilter | null {
-    // Mask to store attributes
-    // Each flag (unhide, scriptInject, etc.) takes only 1 bit
-    // at a specific offset defined in COSMETICS_MASK.
-    // cf: COSMETICS_MASK for the offset of each property
+    // Mask to store attributes. Each flag (unhide, scriptInject, etc.) takes
+    // only 1 bit at a specific offset defined in COSMETICS_MASK.  cf:
+    // COSMETICS_MASK for the offset of each property
     let mask = 0;
     let selector: string | undefined;
     let hostnames: Uint32Array | undefined;
@@ -225,38 +224,40 @@ export default class CosmeticFilter implements IFilter {
       const hostnamesArray: number[] = [];
       const notHostnamesArray: number[] = [];
 
-      // TODO - this could be done without any string copy
-      line
-        .slice(0, sharpIndex)
-        .split(',')
-        .forEach((hostname) => {
-          if (hasUnicode(hostname)) {
-            hostname = toASCII(hostname);
-            mask = setBit(mask, COSMETICS_MASK.isUnicode);
-          }
+      const parts = line.slice(0, sharpIndex).split(',');
+      for (let i = 0; i < parts.length; i += 1) {
+        let hostname = parts[i];
+        if (hasUnicode(hostname)) {
+          hostname = toASCII(hostname);
+          mask = setBit(mask, COSMETICS_MASK.isUnicode);
+        }
 
-          const negation: boolean = hostname[0] === '~';
-          const entity: boolean = hostname.endsWith('.*');
+        const negation: boolean = hostname.charCodeAt(0) === 126 /* '~' */;
+        const entity: boolean =
+            hostname.charCodeAt(hostname.length - 1) === 42 /* '*' */ &&
+            hostname.charCodeAt(hostname.length - 2) === 46 /* '.' */;
 
-          const start: number = negation ? 1 : 0;
-          const end: number = entity ? hostname.length - 2 : hostname.length;
+        const start: number = negation ? 1 : 0;
+        const end: number = entity ? hostname.length - 2 : hostname.length;
 
-          const hash = hashHostnameBackward(hostname.slice(start, end));
+        const hash = hashHostnameBackward(
+          negation === true || entity === true ? hostname.slice(start, end) : hostname,
+        );
 
-          if (negation) {
-            if (entity) {
-              notEntitiesArray.push(hash);
-            } else {
-              notHostnamesArray.push(hash);
-            }
+        if (negation) {
+          if (entity) {
+            notEntitiesArray.push(hash);
           } else {
-            if (entity) {
-              entitiesArray.push(hash);
-            } else {
-              hostnamesArray.push(hash);
-            }
+            notHostnamesArray.push(hash);
           }
-        });
+        } else {
+          if (entity) {
+            entitiesArray.push(hash);
+          } else {
+            hostnamesArray.push(hash);
+          }
+        }
+      }
 
       if (entitiesArray.length !== 0) {
         entities = new Uint32Array(entitiesArray).sort();
@@ -276,14 +277,16 @@ export default class CosmeticFilter implements IFilter {
     }
 
     // We should not have unhide without any hostname
-    // NOTE: it does not make sense either to only have a negated domain or
-    // entity (e.g.: ~domain.com or ~entity.*), these are thus ignored.
     if (getBit(mask, COSMETICS_MASK.unhide) && hostnames === undefined && entities === undefined) {
       return null;
     }
 
     // Deal with script:inject and script:contains
-    if (fastStartsWithFrom(line, 'script:', suffixStartIndex)) {
+    if (
+      line.length - suffixStartIndex > 7 &&
+      line.charCodeAt(suffixStartIndex) === 115 /* 's' */ &&
+      fastStartsWithFrom(line, 'script:', suffixStartIndex)
+    ) {
       //      script:inject(.......)
       //                    ^      ^
       //   script:contains(/......./)
@@ -306,7 +309,11 @@ export default class CosmeticFilter implements IFilter {
       }
 
       selector = line.slice(scriptSelectorIndexStart, scriptSelectorIndexEnd);
-    } else if (fastStartsWithFrom(line, '+js(', suffixStartIndex)) {
+    } else if (
+      line.length - suffixStartIndex > 4 &&
+      line.charCodeAt(suffixStartIndex) === 43 /* '+' */ &&
+      fastStartsWithFrom(line, '+js(', suffixStartIndex)
+    ) {
       mask = setBit(mask, COSMETICS_MASK.scriptInject);
       selector = line.slice(suffixStartIndex + 4, line.length - 1);
     } else {
@@ -663,7 +670,8 @@ export default class CosmeticFilter implements IFilter {
 
     // Note, we do not need to use negated domains or entities as tokens here
     // since they will by definition not match on their own, unless accompanied
-    // by a domain or entity.
+    // by a domain or entity. Instead, they are handled in
+    // `CosmeticFilterBucket.getCosmeticsFilters`.
 
     if (this.hostnames !== undefined) {
       for (let i = 0; i < this.hostnames.length; i += 1) {

From e0ba6ed23895b0d6fe47d9e10474ce2161f2d23f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Berson?= <remi@cliqz.com>
Date: Wed, 24 Apr 2019 15:01:51 +0200
Subject: [PATCH 4/4] speed-up NetworkFilter.parse (10% gain)

---
 src/filters/network.ts | 165 +++++++++++++++++++++--------------------
 src/utils.ts           |   4 +-
 2 files changed, 87 insertions(+), 82 deletions(-)

diff --git a/src/filters/network.ts b/src/filters/network.ts
index 7605c67f25..396c911cb1 100644
--- a/src/filters/network.ts
+++ b/src/filters/network.ts
@@ -13,6 +13,8 @@ import {
   fastStartsWithFrom,
   getBit,
   hasUnicode,
+  isAlpha,
+  isDigit,
   setBit,
   tokenizeFilterInPlace,
   tokenizeInPlace,
@@ -23,6 +25,12 @@ const TOKENS_BUFFER = new TokensBuffer(200);
 const HTTP_HASH = fastHash('http');
 const HTTPS_HASH = fastHash('https');
 
+function isAllowedHostname(ch: number): boolean {
+  return (
+    isDigit(ch) || isAlpha(ch) || ch === 95 /* '_' */ || ch === 45 /* '-' */ || ch === 46 /* '.' */
+  );
+}
+
 /**
  * Masks used to store options of network filters in a bitmask.
  */
@@ -142,8 +150,6 @@ function computeFilterId(
   return hash >>> 0;
 }
 
-const SEPARATOR = /[/^*]/;
-
 /**
  * Compiles a filter pattern to a regex. This is only performed *lazily* for
  * filters containing at least a * or ^ symbol. Because Regexes are expansive,
@@ -208,7 +214,7 @@ export default class NetworkFilter implements IFilter {
     let filterIndexEnd: number = line.length;
 
     // @@filter == Exception
-    if (fastStartsWith(line, '@@')) {
+    if (line.charCodeAt(0) === 64 /* '@' */ && line.charCodeAt(1) === 64 /* '@' */) {
       filterIndexStart += 2;
       mask = setBit(mask, NETWORK_FILTER_MASK.isException);
     }
@@ -225,29 +231,19 @@ export default class NetworkFilter implements IFilter {
 
       // --------------------------------------------------------------------- //
       // parseOptions
-      // TODO: This could be implemented without string copy,
-      // using indices, like in main parsing functions.
-      const rawOptions = line.slice(optionsIndex + 1);
-      const options = rawOptions.split(',');
+      // --------------------------------------------------------------------- //
+      const options = line.slice(optionsIndex + 1).split(',');
       for (let i = 0; i < options.length; i += 1) {
         const rawOption = options[i];
-        let negation = false;
-        let option = rawOption;
-
-        // Check for negation: ~option
-        if (fastStartsWith(option, '~')) {
-          negation = true;
-          option = option.slice(1);
-        } else {
-          negation = false;
-        }
+        const negation = rawOption.charCodeAt(0) === 126 /* '~' */;
+        let option = negation === true ? rawOption.slice(1) : rawOption;
 
         // Check for options: option=value1|value2
         let optionValue: string = '';
-        if (option.indexOf('=') !== -1) {
-          const optionAndValues = option.split('=', 2);
-          option = optionAndValues[0];
-          optionValue = optionAndValues[1];
+        const indexOfEqual: number = option.indexOf('=');
+        if (indexOfEqual !== -1) {
+          optionValue = option.slice(indexOfEqual + 1);
+          option = option.slice(0, indexOfEqual);
         }
 
         switch (option) {
@@ -259,7 +255,7 @@ export default class NetworkFilter implements IFilter {
             for (let j = 0; j < optionValues.length; j += 1) {
               const value: string = optionValues[j];
               if (value) {
-                if (fastStartsWith(value, '~')) {
+                if (value.charCodeAt(0) === 126 /* '~' */) {
                   optNotDomainsArray.push(fastHash(value.slice(1)));
                 } else {
                   optDomainsArray.push(fastHash(value));
@@ -417,72 +413,77 @@ export default class NetworkFilter implements IFilter {
     // Identify kind of pattern
 
     // Deal with hostname pattern
-    if (line[filterIndexEnd - 1] === '|') {
+    if (line.charCodeAt(filterIndexEnd - 1) === 124 /* '|' */) {
       mask = setBit(mask, NETWORK_FILTER_MASK.isRightAnchor);
       filterIndexEnd -= 1;
     }
 
-    if (fastStartsWithFrom(line, '||', filterIndexStart)) {
-      mask = setBit(mask, NETWORK_FILTER_MASK.isHostnameAnchor);
-      filterIndexStart += 2;
-    } else if (line[filterIndexStart] === '|') {
-      mask = setBit(mask, NETWORK_FILTER_MASK.isLeftAnchor);
-      filterIndexStart += 1;
+    if (line.charCodeAt(filterIndexStart) === 124 /* '|' */) {
+      if (line.charCodeAt(filterIndexStart + 1) === 124 /* '|' */) {
+        mask = setBit(mask, NETWORK_FILTER_MASK.isHostnameAnchor);
+        filterIndexStart += 2;
+      } else {
+        mask = setBit(mask, NETWORK_FILTER_MASK.isLeftAnchor);
+        filterIndexStart += 1;
+      }
     }
 
-    const isRegex = checkIsRegex(line, filterIndexStart, filterIndexEnd);
-    mask = setNetworkMask(mask, NETWORK_FILTER_MASK.isRegex, isRegex);
+    // const isRegex = checkIsRegex(line, filterIndexStart, filterIndexEnd);
+    // mask = setNetworkMask(mask, NETWORK_FILTER_MASK.isRegex, isRegex);
 
     if (getBit(mask, NETWORK_FILTER_MASK.isHostnameAnchor)) {
-      if (isRegex) {
-        // Split at the first '/', '*' or '^' character to get the hostname
-        // and then the pattern.
-        // TODO - this could be made more efficient if we could match between two
-        // indices. Once again, we have to do more work than is really needed.
-        const firstSeparator = line.search(SEPARATOR);
-        // NOTE: `firstSeparator` shall never be -1 here since `isRegex` is true.
-        // This means there must be at least an occurrence of `*` or `^`
-        // somewhere.
+      // Split at the first character which is not allowed in a hostname
+      let firstSeparator = filterIndexStart;
+      while (
+        firstSeparator < filterIndexEnd &&
+        isAllowedHostname(line.charCodeAt(firstSeparator)) === true
+      ) {
+        firstSeparator += 1;
+      }
 
+      // No separator found so hostname has full length
+      if (firstSeparator === filterIndexEnd) {
+        hostname = line.slice(filterIndexStart, filterIndexEnd);
+        filterIndexStart = filterIndexEnd;
+        // mask = setBit(mask, NETWORK_FILTER_MASK.isLeftAnchor);
+      } else {
+        // Found a separator
         hostname = line.slice(filterIndexStart, firstSeparator);
         filterIndexStart = firstSeparator;
-
-        // If the only symbol remaining for the selector is '^' then ignore it
-        // but set the filter as right anchored since there should not be any
-        // other label on the right
-        if (filterIndexEnd - filterIndexStart === 1 && line[filterIndexStart] === '^') {
-          mask = clearBit(mask, NETWORK_FILTER_MASK.isRegex);
-          filterIndexStart = filterIndexEnd;
-          mask = setNetworkMask(mask, NETWORK_FILTER_MASK.isRightAnchor, true);
+        const separatorCode = line.charCodeAt(firstSeparator);
+
+        if (separatorCode === 94 /* '^' */) {
+          // If the only symbol remaining for the selector is '^' then ignore it
+          // but set the filter as right anchored since there should not be any
+          // other label on the right
+          if (filterIndexEnd - filterIndexStart === 1) {
+            filterIndexStart = filterIndexEnd;
+            mask = setBit(mask, NETWORK_FILTER_MASK.isRightAnchor);
+          } else {
+            mask = setBit(mask, NETWORK_FILTER_MASK.isRegex);
+            mask = setBit(mask, NETWORK_FILTER_MASK.isLeftAnchor);
+          }
+        } else if (separatorCode === 42 /* '*' */) {
+          mask = setBit(mask, NETWORK_FILTER_MASK.isRegex);
         } else {
-          mask = setNetworkMask(mask, NETWORK_FILTER_MASK.isLeftAnchor, true);
-          mask = setNetworkMask(
-            mask,
-            NETWORK_FILTER_MASK.isRegex,
-            checkIsRegex(line, filterIndexStart, filterIndexEnd),
-          );
-        }
-      } else {
-        // Look for next /
-        const slashIndex = line.indexOf('/', filterIndexStart);
-        if (slashIndex !== -1) {
-          hostname = line.slice(filterIndexStart, slashIndex);
-          filterIndexStart = slashIndex;
           mask = setBit(mask, NETWORK_FILTER_MASK.isLeftAnchor);
-        } else {
-          hostname = line.slice(filterIndexStart, filterIndexEnd);
-          filterIndexStart = filterIndexEnd;
         }
       }
     }
 
     // Remove trailing '*'
-    if (filterIndexEnd - filterIndexStart > 0 && line[filterIndexEnd - 1] === '*') {
+    if (
+      filterIndexEnd - filterIndexStart > 0 &&
+      line.charCodeAt(filterIndexEnd - 1) === 42 /* '*' */
+    ) {
       filterIndexEnd -= 1;
     }
 
     // Remove leading '*' if the filter is not hostname anchored.
-    if (filterIndexEnd - filterIndexStart > 0 && line[filterIndexStart] === '*') {
+    if (
+      filterIndexEnd - filterIndexStart > 0 &&
+      line.charCodeAt(filterIndexStart) === 42 /* '*' */
+    ) {
       mask = clearBit(mask, NETWORK_FILTER_MASK.isLeftAnchor);
       filterIndexStart += 1;
     }
@@ -527,11 +528,13 @@ export default class NetworkFilter implements IFilter {
     if (filterIndexEnd - filterIndexStart > 0) {
       filter = line.slice(filterIndexStart, filterIndexEnd).toLowerCase();
       mask = setNetworkMask(mask, NETWORK_FILTER_MASK.isUnicode, hasUnicode(filter));
-      mask = setNetworkMask(
-        mask,
-        NETWORK_FILTER_MASK.isRegex,
-        checkIsRegex(filter, 0, filter.length),
-      );
+      if (getBit(mask, NETWORK_FILTER_MASK.isRegex) === false) {
+        mask = setNetworkMask(
+          mask,
+          NETWORK_FILTER_MASK.isRegex,
+          checkIsRegex(filter, 0, filter.length),
+        );
+      }
     }
 
     // TODO
@@ -1233,15 +1236,16 @@ function setNetworkMask(mask: number, m: number, value: boolean): number {
 
 /**
  * Check if the sub-string contained between the indices start and end is a
- * regex filter (it contains a '*' or '^' char). Here we are limited by the
- * capability of javascript to check the presence of a pattern between two
- * indices (same for Regex...).
- * // TODO - we could use sticky regex here
+ * regex filter (it contains a '*' or '^' char).
  */
 function checkIsRegex(filter: string, start: number, end: number): boolean {
-  const starIndex = filter.indexOf('*', start);
-  const separatorIndex = filter.indexOf('^', start);
-  return (starIndex !== -1 && starIndex < end) || (separatorIndex !== -1 && separatorIndex < end);
+  const indexOfSeparator = filter.indexOf('^', start);
+  if (indexOfSeparator !== -1 && indexOfSeparator < end) {
+    return true;
+  }
+
+  const indexOfWildcard = filter.indexOf('*', start);
+  return indexOfWildcard !== -1 && indexOfWildcard < end;
 }
 
 /**
@@ -1428,14 +1432,15 @@ function checkPatternHostnameLeftRightAnchorFilter(
 // ||pattern + left-anchor => This means that a plain pattern needs to appear
 // exactly after the hostname, with nothing in between.
 function checkPatternHostnameLeftAnchorFilter(filter: NetworkFilter, request: Request): boolean {
-  if (isAnchoredByHostname(filter.getHostname(), request.hostname)) {
+  const filterHostname = filter.getHostname();
+  if (isAnchoredByHostname(filterHostname, request.hostname)) {
     // Since this is not a regex, the filter pattern must follow the hostname
     // with nothing in between. So we extract the part of the URL following
     // after hostname and will perform the matching on it.
     return fastStartsWithFrom(
       request.url,
       filter.getFilter(),
-      request.url.indexOf(filter.getHostname()) + filter.getHostname().length,
+      request.url.indexOf(filterHostname) + filterHostname.length,
     );
   }
 
diff --git a/src/utils.ts b/src/utils.ts
index 2e895b00cb..ca6fc259d3 100644
--- a/src/utils.ts
+++ b/src/utils.ts
@@ -73,13 +73,13 @@ export function fastStartsWithFrom(haystack: string, needle: string, start: numb
 }
 
 // Efficient manuel lexer
-function isDigit(ch: number): boolean {
+export function isDigit(ch: number): boolean {
   // 48 == '0'
   // 57 == '9'
   return ch >= 48 && ch <= 57;
 }
 
-function isAlpha(ch: number): boolean {
+export function isAlpha(ch: number): boolean {
   // Force to lower-case
   ch |= 32;
   // 65 == 'A'