Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(ext/url): improve URLPattern perf #21488

Merged
merged 2 commits into from
Dec 8, 2023
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
169 changes: 126 additions & 43 deletions ext/url/01_urlpattern.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,20 @@ import * as webidl from "ext:deno_webidl/00_webidl.js";
import { createFilteredInspectProxy } from "ext:deno_console/01_console.js";
const primordials = globalThis.__bootstrap.primordials;
const {
ArrayPrototypePop,
ArrayPrototypePush,
MathRandom,
ObjectAssign,
ObjectCreate,
ObjectPrototypeIsPrototypeOf,
RegExpPrototypeExec,
RegExpPrototypeTest,
ObjectPrototypeIsPrototypeOf,
SafeMap,
SafeRegExp,
Symbol,
SymbolFor,
TypeError,
} = primordials;

const EMPTY_MATCH = [""];
const _components = Symbol("components");

/**
Expand Down Expand Up @@ -55,10 +58,68 @@ const COMPONENTS_KEYS = [
* @property {string[]} groupNameList
*/

/**
* @template K
* @template V
*/
class LRUCache {
/** @type {Map<K, V>} */
#map = new SafeMap();
#capacity = 0;

/** @type {K} */
#lastUsedKey = undefined;
/** @type {V} */
#lastUsedValue = undefined;

/** @param {number} capacity */
constructor(capacity) {
this.#capacity = capacity;
}

/**
* @param {K} key
* @param {(key: K) => V} factory
* @return {V}
*/
getOrInsert(key, factory) {
if (this.#lastUsedKey === key) return this.#lastUsedValue;
const value = this.#map.get(key);
if (value !== undefined) {
if (MathRandom() < 0.1) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally should be actually do a least recently used strategy but fine for now. Maybe rename class to RandomLruCache

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does do least recently used - maps are insertion ordered. See the code below this line (the re-insertion is to put the used item back to the "hot" side of the LRU), we use keys().next().value to get item at the "cold" end of the LRU.

I am intentionally using a random chance to determine whether to refresh / insert as a very cheap pseudo "young space" where if a item is not used at least 10 times on average (but not 10 times in a row), it doesn't graduate into the more permanent LRU storage. This is done because inserting and refreshing LRU items is actually comparatively very expensive (about 50% of the operation it is caching).

This is a "pseudo young space" because it is random based, and thus sometimes misses the mark. In the benchmark configurations I have run that were written based on usage experience from real websites using Fresh (like dotcom), where there are some very hot routes, and a long tail of cold routes, I think this will do well.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But yes, I can rename to SampledLruCache

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good, thanks for explaning

// put the item into the map
this.#map.delete(key);
this.#map.set(key, value);
}
this.#lastUsedKey = key;
this.#lastUsedValue = value;
return value;
} else {
// value doesn't exist yet, create
const value = factory(key);
if (MathRandom() < 0.1) {
// if the map is at capacity, delete the oldest (first) element
if (this.#map.size > this.#capacity) {
this.#map.delete(this.#map.keys().next().value);
}
// insert the new value
this.#map.set(key, value);
}
this.#lastUsedKey = key;
this.#lastUsedValue = value;
return value;
}
}
}

const matchInputCache = new LRUCache(4096);

class URLPattern {
/** @type {Components} */
[_components];

#reusedResult;

/**
* @param {URLPatternInput} input
* @param {string} [baseURL]
Expand All @@ -81,9 +142,6 @@ class URLPattern {
components[key].regexpString,
"u",
);
// used for fast path
components[key].matchOnEmptyInput =
components[key].regexpString === "^$";
} catch (e) {
throw new TypeError(`${prefix}: ${key} is invalid; ${e.message}`);
}
Expand Down Expand Up @@ -145,20 +203,28 @@ class URLPattern {
baseURL = webidl.converters.USVString(baseURL, prefix, "Argument 2");
}

const res = ops.op_urlpattern_process_match_input(
input,
baseURL,
);
if (res === null) {
return false;
}
const res = baseURL === undefined
? matchInputCache.getOrInsert(
input,
ops.op_urlpattern_process_match_input,
)
: ops.op_urlpattern_process_match_input(input, baseURL);
if (res === null) return false;

const values = res[0];

for (let i = 0; i < COMPONENTS_KEYS.length; ++i) {
const key = COMPONENTS_KEYS[i];
if (!RegExpPrototypeTest(this[_components][key].regexp, values[key])) {
return false;
const component = this[_components][key];
switch (component.regexpString) {
case "^$":
if (values[key] !== "") return false;
break;
case "^(.*)$":
break;
default: {
if (!RegExpPrototypeTest(component.regexp, values[key])) return false;
}
}
}

Expand All @@ -179,48 +245,65 @@ class URLPattern {
baseURL = webidl.converters.USVString(baseURL, prefix, "Argument 2");
}

const res = ops.op_urlpattern_process_match_input(
input,
baseURL,
);
const res = baseURL === undefined
? matchInputCache.getOrInsert(
input,
ops.op_urlpattern_process_match_input,
)
: ops.op_urlpattern_process_match_input(input, baseURL);
if (res === null) {
return null;
}

const { 0: values, 1: inputs } = res;
if (inputs[1] === null) {
ArrayPrototypePop(inputs);
}
const { 0: values, 1: inputs } = res; /** @type {URLPatternResult} */

// globalThis.allocAttempt++;
this.#reusedResult ??= { inputs: [undefined] };
const result = this.#reusedResult;
// We don't construct the `inputs` until after the matching is done under
// the assumption that most patterns do not match.

/** @type {URLPatternResult} */
const result = { inputs };
const components = this[_components];

for (let i = 0; i < COMPONENTS_KEYS.length; ++i) {
const key = COMPONENTS_KEYS[i];
/** @type {Component} */
const component = this[_components][key];
const input = values[key];

const match = component.matchOnEmptyInput && input === ""
? EMPTY_MATCH // fast path
: RegExpPrototypeExec(component.regexp, input);
const component = components[key];

if (match === null) {
return null;
}
const res = result[key] ??= {
input: values[key],
groups: component.regexpString === "^(.*)$" ? { "0": values[key] } : {},
};

const groups = {};
const groupList = component.groupNameList;
for (let i = 0; i < groupList.length; ++i) {
groups[groupList[i]] = match[i + 1] ?? "";
switch (component.regexpString) {
case "^$":
if (values[key] !== "") return null;
break;
case "^(.*)$":
res.groups["0"] = values[key];
break;
default: {
const match = RegExpPrototypeExec(component.regexp, values[key]);
if (match === null) return null;
const groupList = component.groupNameList;
const groups = res.groups;
for (let i = 0; i < groupList.length; ++i) {
// TODO: this should use ObjectDefineProperty
groups[groupList[i]] = match[i + 1] ?? "";
}
break;
}
}

result[key] = {
input,
groups,
};
res.input = values[key];
}

// Now populate result.inputs
result.inputs[0] = typeof inputs[0] === "string"
? inputs[0]
: ObjectAssign(ObjectCreate(null), inputs[0]);
if (inputs[1] !== null) ArrayPrototypePush(result.inputs, inputs[1]);

this.#reusedResult = undefined;
return result;
}

Expand Down