Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(ext/url): improve URLPattern perf #21488

Merged
merged 2 commits into from
Dec 8, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
189 changes: 146 additions & 43 deletions ext/url/01_urlpattern.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,20 @@ import * as webidl from "ext:deno_webidl/00_webidl.js";
import { createFilteredInspectProxy } from "ext:deno_console/01_console.js";
const primordials = globalThis.__bootstrap.primordials;
const {
ArrayPrototypePop,
ArrayPrototypePush,
MathRandom,
ObjectAssign,
ObjectCreate,
ObjectPrototypeIsPrototypeOf,
RegExpPrototypeExec,
RegExpPrototypeTest,
ObjectPrototypeIsPrototypeOf,
SafeMap,
SafeRegExp,
Symbol,
SymbolFor,
TypeError,
} = primordials;

const EMPTY_MATCH = [""];
const _components = Symbol("components");

/**
Expand Down Expand Up @@ -55,10 +58,88 @@ const COMPONENTS_KEYS = [
* @property {string[]} groupNameList
*/

/**
* This implements a least-recently-used cache that has a pseudo-"young
* generation" by using sampling. The idea is that we want to keep the most
* recently used items in the cache, but we don't want to pay the cost of
* updating the cache on every access. This relies on the fact that the data
* we're caching is not uniformly distributed, and that the most recently used
* items are more likely to be used again soon (long tail distribution).
*
* The LRU cache is implemented as a Map, with the key being the cache key and
* the value being the cache value. When an item is accessed, it is moved to the
* end of the Map. When an item is inserted, if the Map is at capacity, the
* first item in the Map is deleted. Because maps iterate using insertion order,
* this means that the oldest item is always the first.
*
* The sampling is implemented by using a random number generator to decide
* whether to update the cache on each access. This means that the cache will
* not be updated on every access, but will be updated on a random subset of
* accesses.
*
* @template K
* @template V
*/
class SampledLRUCache {
/** @type {SafeMap<K, V>} */
#map = new SafeMap();
#capacity = 0;
#sampleRate = 0.1;

/** @type {K} */
#lastUsedKey = undefined;
/** @type {V} */
#lastUsedValue = undefined;

/** @param {number} capacity */
constructor(capacity) {
this.#capacity = capacity;
}

/**
* @param {K} key
* @param {(key: K) => V} factory
* @return {V}
*/
getOrInsert(key, factory) {
if (this.#lastUsedKey === key) return this.#lastUsedValue;
const value = this.#map.get(key);
if (value !== undefined) {
if (MathRandom() < this.#sampleRate) {
// put the item into the map
this.#map.delete(key);
this.#map.set(key, value);
}
this.#lastUsedKey = key;
this.#lastUsedValue = value;
return value;
} else {
// value doesn't exist yet, create
const value = factory(key);
if (MathRandom() < this.#sampleRate) {
// if the map is at capacity, delete the oldest (first) element
if (this.#map.size > this.#capacity) {
// deno-lint-ignore prefer-primordials
this.#map.delete(this.#map.keys().next().value);
}
// insert the new value
this.#map.set(key, value);
}
this.#lastUsedKey = key;
this.#lastUsedValue = value;
return value;
}
}
}

const matchInputCache = new SampledLRUCache(4096);

class URLPattern {
/** @type {Components} */
[_components];

#reusedResult;

/**
* @param {URLPatternInput} input
* @param {string} [baseURL]
Expand All @@ -81,9 +162,6 @@ class URLPattern {
components[key].regexpString,
"u",
);
// used for fast path
components[key].matchOnEmptyInput =
components[key].regexpString === "^$";
} catch (e) {
throw new TypeError(`${prefix}: ${key} is invalid; ${e.message}`);
}
Expand Down Expand Up @@ -145,20 +223,28 @@ class URLPattern {
baseURL = webidl.converters.USVString(baseURL, prefix, "Argument 2");
}

const res = ops.op_urlpattern_process_match_input(
input,
baseURL,
);
if (res === null) {
return false;
}
const res = baseURL === undefined
? matchInputCache.getOrInsert(
input,
ops.op_urlpattern_process_match_input,
)
: ops.op_urlpattern_process_match_input(input, baseURL);
if (res === null) return false;

const values = res[0];

for (let i = 0; i < COMPONENTS_KEYS.length; ++i) {
const key = COMPONENTS_KEYS[i];
if (!RegExpPrototypeTest(this[_components][key].regexp, values[key])) {
return false;
const component = this[_components][key];
switch (component.regexpString) {
case "^$":
if (values[key] !== "") return false;
break;
case "^(.*)$":
break;
default: {
if (!RegExpPrototypeTest(component.regexp, values[key])) return false;
}
}
}

Expand All @@ -179,48 +265,65 @@ class URLPattern {
baseURL = webidl.converters.USVString(baseURL, prefix, "Argument 2");
}

const res = ops.op_urlpattern_process_match_input(
input,
baseURL,
);
const res = baseURL === undefined
? matchInputCache.getOrInsert(
input,
ops.op_urlpattern_process_match_input,
)
: ops.op_urlpattern_process_match_input(input, baseURL);
if (res === null) {
return null;
}

const { 0: values, 1: inputs } = res;
if (inputs[1] === null) {
ArrayPrototypePop(inputs);
}
const { 0: values, 1: inputs } = res; /** @type {URLPatternResult} */

// globalThis.allocAttempt++;
this.#reusedResult ??= { inputs: [undefined] };
const result = this.#reusedResult;
// We don't construct the `inputs` until after the matching is done under
// the assumption that most patterns do not match.

/** @type {URLPatternResult} */
const result = { inputs };
const components = this[_components];

for (let i = 0; i < COMPONENTS_KEYS.length; ++i) {
const key = COMPONENTS_KEYS[i];
/** @type {Component} */
const component = this[_components][key];
const input = values[key];

const match = component.matchOnEmptyInput && input === ""
? EMPTY_MATCH // fast path
: RegExpPrototypeExec(component.regexp, input);
const component = components[key];

if (match === null) {
return null;
}
const res = result[key] ??= {
input: values[key],
groups: component.regexpString === "^(.*)$" ? { "0": values[key] } : {},
};

const groups = {};
const groupList = component.groupNameList;
for (let i = 0; i < groupList.length; ++i) {
groups[groupList[i]] = match[i + 1] ?? "";
switch (component.regexpString) {
case "^$":
if (values[key] !== "") return null;
break;
case "^(.*)$":
res.groups["0"] = values[key];
break;
default: {
const match = RegExpPrototypeExec(component.regexp, values[key]);
if (match === null) return null;
const groupList = component.groupNameList;
const groups = res.groups;
for (let i = 0; i < groupList.length; ++i) {
// TODO(lucacasonato): this is vulnerable to override mistake
groups[groupList[i]] = match[i + 1] ?? "";
}
break;
}
}

result[key] = {
input,
groups,
};
res.input = values[key];
}

// Now populate result.inputs
result.inputs[0] = typeof inputs[0] === "string"
? inputs[0]
: ObjectAssign(ObjectCreate(null), inputs[0]);
if (inputs[1] !== null) ArrayPrototypePush(result.inputs, inputs[1]);

this.#reusedResult = undefined;
return result;
}

Expand Down