From 6bb998b1a73b1420bea8282ff770876ee001e91f Mon Sep 17 00:00:00 2001 From: Monti-27 Date: Thu, 19 Mar 2026 19:26:54 +0530 Subject: [PATCH 1/2] fix textdecoder labels --- core/runtime/src/text/mod.rs | 62 ++++++++++++++++++++++++++++------ core/runtime/src/text/tests.rs | 60 ++++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+), 11 deletions(-) diff --git a/core/runtime/src/text/mod.rs b/core/runtime/src/text/mod.rs index f8a501cda41..863e3702216 100644 --- a/core/runtime/src/text/mod.rs +++ b/core/runtime/src/text/mod.rs @@ -34,6 +34,51 @@ pub enum Encoding { Utf16Be, } +const UTF_8_LABELS: &[&str] = &[ + "unicode-1-1-utf-8", + "unicode11utf8", + "unicode20utf8", + "utf-8", + "utf8", + "x-unicode20utf8", +]; + +const UTF_16BE_LABELS: &[&str] = &["unicodefffe", "utf-16be"]; + +const UTF_16LE_LABELS: &[&str] = &[ + "csunicode", + "iso-10646-ucs-2", + "ucs-2", + "unicode", + "unicodefeff", + "utf-16", + "utf-16le", +]; + +#[inline] +fn resolve_text_decoder_label(label: &str) -> Option { + let label = label.trim_matches(['\u{0009}', '\u{000A}', '\u{000C}', '\u{000D}', '\u{0020}']); + + if UTF_8_LABELS + .iter() + .any(|supported| label.eq_ignore_ascii_case(supported)) + { + Some(Encoding::Utf8) + } else if UTF_16LE_LABELS + .iter() + .any(|supported| label.eq_ignore_ascii_case(supported)) + { + Some(Encoding::Utf16Le) + } else if UTF_16BE_LABELS + .iter() + .any(|supported| label.eq_ignore_ascii_case(supported)) + { + Some(Encoding::Utf16Be) + } else { + None + } +} + /// The [`TextDecoder`][mdn] class represents an encoder for a specific method, that is /// a specific character encoding, like `utf-8`. /// @@ -62,17 +107,12 @@ impl TextDecoder { let ignore_bom = options.and_then(|o| o.ignore_bom).unwrap_or(false); let encoding = match encoding { - Some(enc) => match enc.to_std_string_lossy().as_str() { - "utf-8" => Encoding::Utf8, - // Default encoding is Little Endian. - "utf-16" | "utf-16le" => Encoding::Utf16Le, - "utf-16be" => Encoding::Utf16Be, - e => { - return Err( - js_error!(RangeError: "The given encoding '{}' is not supported.", e), - ); - } - }, + Some(enc) => { + let label = enc.to_std_string_lossy(); + resolve_text_decoder_label(&label).ok_or_else( + || js_error!(RangeError: "The given encoding '{}' is not supported.", label), + )? + } None => Encoding::default(), }; diff --git a/core/runtime/src/text/tests.rs b/core/runtime/src/text/tests.rs index f5a1dade51e..8b59d23b874 100644 --- a/core/runtime/src/text/tests.rs +++ b/core/runtime/src/text/tests.rs @@ -288,6 +288,66 @@ fn decoder_bom_ignore_bom_false(encoding: &'static str, bytes: &'static [u8]) { ); } +#[test_case("UTF-8", "utf-8"; "uppercase utf8")] +#[test_case(" utf-8 ", "utf-8"; "spaced utf8")] +#[test_case("\nutf-16\t", "utf-16le"; "spaced utf16")] +#[test_case("UTF-16BE", "utf-16be"; "uppercase utf16be")] +#[test_case("utf8", "utf-8"; "utf8 alias")] +#[test_case("Unicode-1-1-UTF-8", "utf-8"; "unicode alias")] +#[test_case("csUnicode", "utf-16le"; "csunicode alias")] +#[test_case(" unicodefeff ", "utf-16le"; "unicodefeff alias")] +#[test_case("UnicodeFFFE", "utf-16be"; "unicodefffe alias")] +fn decoder_normalizes_supported_labels(label: &'static str, expected: &'static str) { + let context = &mut Context::default(); + text::register(None, context).unwrap(); + + run_test_actions_with( + [ + TestAction::run(format!( + r#" + const d = new TextDecoder({label:?}); + encoding = d.encoding; + "# + )), + TestAction::inspect_context(move |context| { + let encoding = context + .global_object() + .get(js_str!("encoding"), context) + .unwrap(); + assert_eq!(encoding.as_string(), Some(JsString::from(expected))); + }), + ], + context, + ); +} + +#[test] +fn decoder_rejects_unsupported_label_after_normalization() { + let context = &mut Context::default(); + text::register(None, context).unwrap(); + + run_test_actions_with( + [ + TestAction::run(indoc! {r#" + threw = false; + try { + new TextDecoder(" utf-32 "); + } catch (e) { + threw = e instanceof RangeError; + } + "#}), + TestAction::inspect_context(|context| { + let threw = context + .global_object() + .get(js_str!("threw"), context) + .unwrap(); + assert_eq!(threw.as_boolean(), Some(true)); + }), + ], + context, + ); +} + #[test] fn decoder_ignore_bom_getter() { let context = &mut Context::default(); From 495d948fd3881052eb332f6bb865ea1fbc31d5bf Mon Sep 17 00:00:00 2001 From: Monti-27 Date: Thu, 19 Mar 2026 21:08:23 +0530 Subject: [PATCH 2/2] clean up decoder labels --- core/runtime/src/text/mod.rs | 56 ++++++++++++---------------------- core/runtime/src/text/tests.rs | 19 ++++-------- 2 files changed, 26 insertions(+), 49 deletions(-) diff --git a/core/runtime/src/text/mod.rs b/core/runtime/src/text/mod.rs index 863e3702216..821151c6410 100644 --- a/core/runtime/src/text/mod.rs +++ b/core/runtime/src/text/mod.rs @@ -34,49 +34,33 @@ pub enum Encoding { Utf16Be, } -const UTF_8_LABELS: &[&str] = &[ - "unicode-1-1-utf-8", - "unicode11utf8", - "unicode20utf8", - "utf-8", - "utf8", - "x-unicode20utf8", -]; - -const UTF_16BE_LABELS: &[&str] = &["unicodefffe", "utf-16be"]; - -const UTF_16LE_LABELS: &[&str] = &[ - "csunicode", - "iso-10646-ucs-2", - "ucs-2", - "unicode", - "unicodefeff", - "utf-16", - "utf-16le", +const TEXT_DECODER_LABELS: &[(&str, Encoding)] = &[ + ("unicode-1-1-utf-8", Encoding::Utf8), + ("unicode11utf8", Encoding::Utf8), + ("unicode20utf8", Encoding::Utf8), + ("utf-8", Encoding::Utf8), + ("utf8", Encoding::Utf8), + ("x-unicode20utf8", Encoding::Utf8), + ("unicodefffe", Encoding::Utf16Be), + ("utf-16be", Encoding::Utf16Be), + ("csunicode", Encoding::Utf16Le), + ("iso-10646-ucs-2", Encoding::Utf16Le), + ("ucs-2", Encoding::Utf16Le), + ("unicode", Encoding::Utf16Le), + ("unicodefeff", Encoding::Utf16Le), + ("utf-16", Encoding::Utf16Le), + ("utf-16le", Encoding::Utf16Le), ]; #[inline] fn resolve_text_decoder_label(label: &str) -> Option { let label = label.trim_matches(['\u{0009}', '\u{000A}', '\u{000C}', '\u{000D}', '\u{0020}']); - if UTF_8_LABELS - .iter() - .any(|supported| label.eq_ignore_ascii_case(supported)) - { - Some(Encoding::Utf8) - } else if UTF_16LE_LABELS + TEXT_DECODER_LABELS .iter() - .any(|supported| label.eq_ignore_ascii_case(supported)) - { - Some(Encoding::Utf16Le) - } else if UTF_16BE_LABELS - .iter() - .any(|supported| label.eq_ignore_ascii_case(supported)) - { - Some(Encoding::Utf16Be) - } else { - None - } + .find_map(|(supported, encoding)| { + label.eq_ignore_ascii_case(supported).then_some(*encoding) + }) } /// The [`TextDecoder`][mdn] class represents an encoder for a specific method, that is diff --git a/core/runtime/src/text/tests.rs b/core/runtime/src/text/tests.rs index 8b59d23b874..730128bfe94 100644 --- a/core/runtime/src/text/tests.rs +++ b/core/runtime/src/text/tests.rs @@ -327,23 +327,16 @@ fn decoder_rejects_unsupported_label_after_normalization() { text::register(None, context).unwrap(); run_test_actions_with( - [ - TestAction::run(indoc! {r#" - threw = false; + [TestAction::run(indoc! {r#" try { new TextDecoder(" utf-32 "); + throw new Error("expected RangeError"); } catch (e) { - threw = e instanceof RangeError; + if (!(e instanceof RangeError)) { + throw e; + } } - "#}), - TestAction::inspect_context(|context| { - let threw = context - .global_object() - .get(js_str!("threw"), context) - .unwrap(); - assert_eq!(threw.as_boolean(), Some(true)); - }), - ], + "#})], context, ); }