Skip to content

Commit

Permalink
perf(ext/url): cleanup and optimize url parsing op args (#11763)
Browse files Browse the repository at this point in the history
This splits the previous `op_url_parse` into:
- `op_url_parse`: parses a href with an optional base
- `op_url_reparse`: reparses a href with a modifier

This is a cleaner separation of concerns and it allows us to optimize & simplify args passed. Resulting in a 25% reduction in call overhead (~5000ns/call => ~3700ns/call in url_ops bench on my M1 Air)
  • Loading branch information
AaronO committed Aug 18, 2021
1 parent e554546 commit bf0bacb
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 97 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

75 changes: 28 additions & 47 deletions ext/url/00_url.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,22 @@
const _list = Symbol("list");
const _urlObject = Symbol("url object");

// WARNING: must match rust code's UrlSetter::*
const SET_HASH = 1;
const SET_HOST = 2;
const SET_HOSTNAME = 3;
const SET_PASSWORD = 4;
const SET_PATHNAME = 5;
const SET_PORT = 6;
const SET_PROTOCOL = 7;
const SET_SEARCH = 8;
const SET_USERNAME = 9;

// Helper function
function opUrlReparse(href, setter, value) {
return core.opSync("op_url_reparse", href, [setter, value]);
}

class URLSearchParams {
[_list];
[_urlObject] = null;
Expand Down Expand Up @@ -78,11 +94,7 @@
if (url === null) {
return;
}
const parts = core.opSync("op_url_parse", {
href: url.href,
setSearch: this.toString(),
});
url[_url] = parts;
url[_url] = opUrlReparse(url.href, SET_SEARCH, this.toString());
}

/**
Expand Down Expand Up @@ -277,9 +289,7 @@
});
}
this[webidl.brand] = webidl.brand;

const parts = core.opSync("op_url_parse", { href: url, baseHref: base });
this[_url] = parts;
this[_url] = core.opSync("op_url_parse", url, base);
}

[SymbolFor("Deno.privateCustomInspect")](inspect) {
Expand Down Expand Up @@ -326,10 +336,7 @@
context: "Argument 1",
});
try {
this[_url] = core.opSync("op_url_parse", {
href: this[_url].href,
setHash: value,
});
this[_url] = opUrlReparse(this[_url].href, SET_HASH, value);
} catch {
/* pass */
}
Expand All @@ -351,10 +358,7 @@
context: "Argument 1",
});
try {
this[_url] = core.opSync("op_url_parse", {
href: this[_url].href,
setHost: value,
});
this[_url] = opUrlReparse(this[_url].href, SET_HOST, value);
} catch {
/* pass */
}
Expand All @@ -376,10 +380,7 @@
context: "Argument 1",
});
try {
this[_url] = core.opSync("op_url_parse", {
href: this[_url].href,
setHostname: value,
});
this[_url] = opUrlReparse(this[_url].href, SET_HOSTNAME, value);
} catch {
/* pass */
}
Expand All @@ -400,9 +401,7 @@
prefix,
context: "Argument 1",
});
this[_url] = core.opSync("op_url_parse", {
href: value,
});
this[_url] = core.opSync("op_url_parse", value);
this.#updateSearchParams();
}

Expand All @@ -428,10 +427,7 @@
context: "Argument 1",
});
try {
this[_url] = core.opSync("op_url_parse", {
href: this[_url].href,
setPassword: value,
});
this[_url] = opUrlReparse(this[_url].href, SET_PASSWORD, value);
} catch {
/* pass */
}
Expand All @@ -453,10 +449,7 @@
context: "Argument 1",
});
try {
this[_url] = core.opSync("op_url_parse", {
href: this[_url].href,
setPathname: value,
});
this[_url] = opUrlReparse(this[_url].href, SET_PATHNAME, value);
} catch {
/* pass */
}
Expand All @@ -478,10 +471,7 @@
context: "Argument 1",
});
try {
this[_url] = core.opSync("op_url_parse", {
href: this[_url].href,
setPort: value,
});
this[_url] = opUrlReparse(this[_url].href, SET_PORT, value);
} catch {
/* pass */
}
Expand All @@ -503,10 +493,7 @@
context: "Argument 1",
});
try {
this[_url] = core.opSync("op_url_parse", {
href: this[_url].href,
setProtocol: value,
});
this[_url] = opUrlReparse(this[_url].href, SET_PROTOCOL, value);
} catch {
/* pass */
}
Expand All @@ -528,10 +515,7 @@
context: "Argument 1",
});
try {
this[_url] = core.opSync("op_url_parse", {
href: this[_url].href,
setSearch: value,
});
this[_url] = opUrlReparse(this[_url].href, SET_SEARCH, value);
this.#updateSearchParams();
} catch {
/* pass */
Expand All @@ -554,10 +538,7 @@
context: "Argument 1",
});
try {
this[_url] = core.opSync("op_url_parse", {
href: this[_url].href,
setUsername: value,
});
this[_url] = opUrlReparse(this[_url].href, SET_USERNAME, value);
} catch {
/* pass */
}
Expand Down
1 change: 1 addition & 0 deletions ext/url/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ deno_core = { version = "0.97.0", path = "../../core" }
idna = "0.2.3"
percent-encoding = "2.1.0"
serde = { version = "1.0.126", features = ["derive"] }
serde_repr = "0.1.7"

[dev-dependencies]
deno_bench_util = { version = "0.9.0", path = "../../bench_util" }
Expand Down
114 changes: 64 additions & 50 deletions ext/url/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ use deno_core::url::quirks;
use deno_core::url::Url;
use deno_core::Extension;
use deno_core::ZeroCopyBuf;
use serde::Deserialize;
use serde::Serialize;
use std::panic::catch_unwind;
use std::path::PathBuf;
Expand All @@ -24,6 +23,7 @@ pub fn init() -> Extension {
))
.ops(vec![
("op_url_parse", op_sync(op_url_parse)),
("op_url_reparse", op_sync(op_url_reparse)),
(
"op_url_parse_search_params",
op_sync(op_url_parse_search_params),
Expand All @@ -36,24 +36,6 @@ pub fn init() -> Extension {
.build()
}

#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct UrlParseArgs {
href: String,
base_href: Option<String>,
// If one of the following are present, this is a setter call. Apply the
// proper `Url::set_*()` method after (re)parsing `href`.
set_hash: Option<String>,
set_host: Option<String>,
set_hostname: Option<String>,
set_password: Option<String>,
set_pathname: Option<String>,
set_port: Option<String>,
set_protocol: Option<String>,
set_search: Option<String>,
set_username: Option<String>,
}

#[derive(Serialize)]
pub struct UrlParts {
href: String,
Expand All @@ -73,56 +55,88 @@ pub struct UrlParts {
/// optional part to "set" after parsing. Return `UrlParts`.
pub fn op_url_parse(
_state: &mut deno_core::OpState,
args: UrlParseArgs,
_: (),
href: String,
base_href: Option<String>,
) -> Result<UrlParts, AnyError> {
let base_url = args
.base_href
let base_url = base_href
.as_ref()
.map(|b| Url::parse(b).map_err(|_| type_error("Invalid base URL")))
.transpose()?;
let mut url = Url::options()
let url = Url::options()
.base_url(base_url.as_ref())
.parse(&args.href)
.parse(&href)
.map_err(|_| type_error("Invalid URL"))?;

if let Some(hash) = args.set_hash.as_ref() {
quirks::set_hash(&mut url, hash);
} else if let Some(host) = args.set_host.as_ref() {
quirks::set_host(&mut url, host).map_err(|_| uri_error("Invalid host"))?;
} else if let Some(hostname) = args.set_hostname.as_ref() {
quirks::set_hostname(&mut url, hostname)
.map_err(|_| uri_error("Invalid hostname"))?;
} else if let Some(password) = args.set_password.as_ref() {
quirks::set_password(&mut url, password)
.map_err(|_| uri_error("Invalid password"))?;
} else if let Some(pathname) = args.set_pathname.as_ref() {
quirks::set_pathname(&mut url, pathname);
} else if let Some(port) = args.set_port.as_ref() {
quirks::set_port(&mut url, port).map_err(|_| uri_error("Invalid port"))?;
} else if let Some(protocol) = args.set_protocol.as_ref() {
quirks::set_protocol(&mut url, protocol)
.map_err(|_| uri_error("Invalid protocol"))?;
} else if let Some(search) = args.set_search.as_ref() {
quirks::set_search(&mut url, search);
} else if let Some(username) = args.set_username.as_ref() {
quirks::set_username(&mut url, username)
.map_err(|_| uri_error("Invalid username"))?;
url_result(url, href, base_href)
}

#[derive(
serde_repr::Serialize_repr, serde_repr::Deserialize_repr, PartialEq, Debug,
)]
#[repr(u8)]
pub enum UrlSetter {
Hash = 1,
Host = 2,
Hostname = 3,
Password = 4,
Pathname = 5,
Port = 6,
Protocol = 7,
Search = 8,
Username = 9,
}

pub fn op_url_reparse(
_state: &mut deno_core::OpState,
href: String,
setter_opts: (UrlSetter, String),
) -> Result<UrlParts, AnyError> {
let mut url = Url::options()
.parse(&href)
.map_err(|_| type_error("Invalid URL"))?;

let (setter, setter_value) = setter_opts;
let value = setter_value.as_ref();

match setter {
UrlSetter::Hash => quirks::set_hash(&mut url, value),
UrlSetter::Host => quirks::set_host(&mut url, value)
.map_err(|_| uri_error("Invalid host"))?,
UrlSetter::Hostname => quirks::set_hostname(&mut url, value)
.map_err(|_| uri_error("Invalid hostname"))?,
UrlSetter::Password => quirks::set_password(&mut url, value)
.map_err(|_| uri_error("Invalid password"))?,
UrlSetter::Pathname => quirks::set_pathname(&mut url, value),
UrlSetter::Port => quirks::set_port(&mut url, value)
.map_err(|_| uri_error("Invalid port"))?,
UrlSetter::Protocol => quirks::set_protocol(&mut url, value)
.map_err(|_| uri_error("Invalid protocol"))?,
UrlSetter::Search => quirks::set_search(&mut url, value),
UrlSetter::Username => quirks::set_username(&mut url, value)
.map_err(|_| uri_error("Invalid username"))?,
}

url_result(url, href, None)
}

fn url_result(
url: Url,
href: String,
base_href: Option<String>,
) -> Result<UrlParts, AnyError> {
// TODO(nayeemrmn): Panic that occurs in rust-url for the `non-spec:`
// url-constructor wpt tests: https://github.com/servo/rust-url/issues/670.
let username = catch_unwind(|| quirks::username(&url)).map_err(|_| {
generic_error(format!(
"Internal error while parsing \"{}\"{}, \
see https://github.com/servo/rust-url/issues/670",
args.href,
args
.base_href
href,
base_href
.map(|b| format!(" against \"{}\"", b))
.unwrap_or_default()
))
})?;

Ok(UrlParts {
href: quirks::href(&url).to_string(),
hash: quirks::hash(&url).to_string(),
Expand Down

0 comments on commit bf0bacb

Please sign in to comment.