Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Merged by Bors] - Optimize String.prototype.normalize #2848

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
34 changes: 8 additions & 26 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 6 additions & 4 deletions boa_engine/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ rust-version.workspace = true
profiler = ["boa_profiler/profiler"]
deser = ["boa_interner/serde", "boa_ast/serde"]
intl = [
"dep:boa_icu_provider",
"boa_icu_provider/full",
"icu_normalizer/serde",
"icu_normalizer/std",
"dep:icu_locid_transform",
"dep:icu_locid",
"dep:icu_datetime",
Expand Down Expand Up @@ -53,6 +55,7 @@ boa_profiler.workspace = true
boa_macros.workspace = true
boa_ast.workspace = true
boa_parser.workspace = true
boa_icu_provider.workspace = true
serde = { version = "1.0.160", features = ["derive", "rc"] }
serde_json = "1.0.96"
rand = "0.8.5"
Expand All @@ -66,7 +69,6 @@ indexmap = "1.9.3"
ryu-js = "0.2.2"
chrono = { version = "0.4.24", default-features = false, features = ["clock", "std"] }
fast-float = "0.2.0"
unicode-normalization = "0.1.22"
once_cell = "1.17.1"
tap = "1.0.1"
sptr = "0.3.2"
Expand All @@ -77,10 +79,10 @@ num_enum = "0.6.1"
pollster = "0.3.0"
thin-vec = "0.2.12"
itertools = { version = "0.10.5", default-features = false }
icu_normalizer = "1.2.0"

# intl deps
boa_icu_provider = { workspace = true, optional = true }
icu_locid_transform = { version = "1.2.1", features = ["serde"], optional = true }
icu_locid_transform = { version = "1.2.1", features = ["std", "serde"], optional = true }
icu_locid = { version = "1.2.0", features = ["serde"], optional = true }
icu_datetime = { version = "1.2.0", features = ["serde", "experimental"], optional = true }
icu_calendar = { version = "1.2.0", optional = true }
Expand Down
122 changes: 62 additions & 60 deletions boa_engine/src/builtins/string/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,23 @@ use crate::{
Context, JsArgs, JsResult, JsString, JsValue,
};
use boa_profiler::Profiler;
use icu_normalizer::{ComposingNormalizer, DecomposingNormalizer};
use std::cmp::{max, min};

use super::{BuiltInBuilder, BuiltInConstructor, IntrinsicObject};

mod string_iterator;
pub(crate) use string_iterator::StringIterator;

/// The set of normalizers required for the `String.prototype.normalize` function.
#[derive(Debug)]
pub(crate) struct StringNormalizers {
pub(crate) nfc: ComposingNormalizer,
pub(crate) nfkc: ComposingNormalizer,
pub(crate) nfd: DecomposingNormalizer,
pub(crate) nfkd: DecomposingNormalizer,
}

#[cfg(test)]
mod tests;

Expand Down Expand Up @@ -2024,7 +2034,6 @@ impl String {
args: &[JsValue],
context: &mut Context<'_>,
) -> JsResult<JsValue> {
use unicode_normalization::UnicodeNormalization;
/// Represents the type of normalization applied to a [`JsString`]
#[derive(Clone, Copy)]
pub(crate) enum Normalization {
Expand All @@ -2033,79 +2042,72 @@ impl String {
Nfkc,
Nfkd,
}

// 1. Let O be ? RequireObjectCoercible(this value).
let this = this.require_object_coercible()?;

// 2. Let S be ? ToString(O).
let s = this.to_string(context)?;

let f = match args.get_or_undefined(0) {
// 3. If form is undefined, let f be "NFC".
&JsValue::Undefined => js_string!("NFC"),
// 4. Else, let f be ? ToString(form).
form => form.to_string(context)?,
};

// 6. Let ns be the String value that is the result of normalizing S
// into the normalization form named by f as specified in
// https://unicode.org/reports/tr15/.
let normalization = match f {
ntype if &ntype == utf16!("NFC") => Normalization::Nfc,
ntype if &ntype == utf16!("NFD") => Normalization::Nfd,
ntype if &ntype == utf16!("NFKC") => Normalization::Nfkc,
ntype if &ntype == utf16!("NFKD") => Normalization::Nfkd,
// 5. If f is not one of "NFC", "NFD", "NFKC", or "NFKD", throw a RangeError exception.
_ => {
return Err(JsNativeError::range()
.with_message("The normalization form should be one of NFC, NFD, NFKC, NFKD.")
.into());
}
};

let mut code_points = s.code_points();
let mut result = Vec::with_capacity(s.len());

let mut next_unpaired_surrogate = None;
let mut buf = [0; 2];

loop {
let only_chars = code_points.by_ref().map_while(|cpoint| match cpoint {
CodePoint::Unicode(c) => Some(c),
CodePoint::UnpairedSurrogate(s) => {
next_unpaired_surrogate = Some(s);
None
let normalization = match args.get_or_undefined(0) {
// 3. If form is undefined, let f be "NFC".
&JsValue::Undefined => Normalization::Nfc,
// 4. Else, let f be ? ToString(form).
f => match f.to_string(context)? {
ntype if &ntype == utf16!("NFC") => Normalization::Nfc,
ntype if &ntype == utf16!("NFD") => Normalization::Nfd,
ntype if &ntype == utf16!("NFKC") => Normalization::Nfkc,
ntype if &ntype == utf16!("NFKD") => Normalization::Nfkd,
// 5. If f is not one of "NFC", "NFD", "NFKC", or "NFKD", throw a RangeError exception.
_ => {
return Err(JsNativeError::range()
.with_message(
"The normalization form should be one of NFC, NFD, NFKC, NFKD.",
)
.into());
}
});
},
};

match normalization {
Normalization::Nfc => {
for mapped in only_chars.nfc() {
result.extend_from_slice(mapped.encode_utf16(&mut buf));
}
}
Normalization::Nfd => {
for mapped in only_chars.nfd() {
result.extend_from_slice(mapped.encode_utf16(&mut buf));
}
}
Normalization::Nfkc => {
for mapped in only_chars.nfkc() {
result.extend_from_slice(mapped.encode_utf16(&mut buf));
}
}
Normalization::Nfkd => {
for mapped in only_chars.nfkd() {
result.extend_from_slice(mapped.encode_utf16(&mut buf));
let normalizers = {
#[cfg(not(feature = "intl"))]
{
use once_cell::sync::Lazy;
static NORMALIZERS: Lazy<StringNormalizers> = Lazy::new(|| {
let provider = &boa_icu_provider::minimal();
let nfc = ComposingNormalizer::try_new_nfc_unstable(provider)
.expect("minimal data should always be updated");
let nfkc = ComposingNormalizer::try_new_nfkc_unstable(provider)
.expect("minimal data should always be updated");
let nfd = DecomposingNormalizer::try_new_nfd_unstable(provider)
.expect("minimal data should always be updated");
let nfkd = DecomposingNormalizer::try_new_nfkd_unstable(provider)
.expect("minimal data should always be updated");

StringNormalizers {
nfc,
nfkc,
nfd,
nfkd,
}
}
});
&*NORMALIZERS
}

if let Some(surr) = next_unpaired_surrogate.take() {
result.push(surr);
} else {
break;
#[cfg(feature = "intl")]
{
context.icu().string_normalizers()
}
}
};

let result = match normalization {
Normalization::Nfc => normalizers.nfc.normalize_utf16(&s),
Normalization::Nfd => normalizers.nfd.normalize_utf16(&s),
Normalization::Nfkc => normalizers.nfkc.normalize_utf16(&s),
Normalization::Nfkd => normalizers.nfkd.normalize_utf16(&s),
};

// 7. Return ns.
Ok(js_string!(result).into())
Expand Down