Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 35 additions & 11 deletions core/runtime/src/text/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,35 @@ pub enum Encoding {
Utf16Be,
}

const TEXT_DECODER_LABELS: &[(&str, Encoding)] = &[
("unicode-1-1-utf-8", Encoding::Utf8),
("unicode11utf8", Encoding::Utf8),
("unicode20utf8", Encoding::Utf8),
("utf-8", Encoding::Utf8),
("utf8", Encoding::Utf8),
("x-unicode20utf8", Encoding::Utf8),
("unicodefffe", Encoding::Utf16Be),
("utf-16be", Encoding::Utf16Be),
("csunicode", Encoding::Utf16Le),
("iso-10646-ucs-2", Encoding::Utf16Le),
("ucs-2", Encoding::Utf16Le),
("unicode", Encoding::Utf16Le),
("unicodefeff", Encoding::Utf16Le),
("utf-16", Encoding::Utf16Le),
("utf-16le", Encoding::Utf16Le),
];

#[inline]
fn resolve_text_decoder_label(label: &str) -> Option<Encoding> {
let label = label.trim_matches(['\u{0009}', '\u{000A}', '\u{000C}', '\u{000D}', '\u{0020}']);

TEXT_DECODER_LABELS
.iter()
.find_map(|(supported, encoding)| {
label.eq_ignore_ascii_case(supported).then_some(*encoding)
})
}

/// The [`TextDecoder`][mdn] class represents an encoder for a specific method, that is
/// a specific character encoding, like `utf-8`.
///
Expand Down Expand Up @@ -62,17 +91,12 @@ impl TextDecoder {
let ignore_bom = options.and_then(|o| o.ignore_bom).unwrap_or(false);

let encoding = match encoding {
Some(enc) => match enc.to_std_string_lossy().as_str() {
"utf-8" => Encoding::Utf8,
// Default encoding is Little Endian.
"utf-16" | "utf-16le" => Encoding::Utf16Le,
"utf-16be" => Encoding::Utf16Be,
e => {
return Err(
js_error!(RangeError: "The given encoding '{}' is not supported.", e),
);
}
},
Some(enc) => {
let label = enc.to_std_string_lossy();
resolve_text_decoder_label(&label).ok_or_else(
|| js_error!(RangeError: "The given encoding '{}' is not supported.", label),
)?
}
None => Encoding::default(),
};

Expand Down
53 changes: 53 additions & 0 deletions core/runtime/src/text/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,59 @@ fn decoder_bom_ignore_bom_false(encoding: &'static str, bytes: &'static [u8]) {
);
}

#[test_case("UTF-8", "utf-8"; "uppercase utf8")]
#[test_case(" utf-8 ", "utf-8"; "spaced utf8")]
#[test_case("\nutf-16\t", "utf-16le"; "spaced utf16")]
#[test_case("UTF-16BE", "utf-16be"; "uppercase utf16be")]
#[test_case("utf8", "utf-8"; "utf8 alias")]
#[test_case("Unicode-1-1-UTF-8", "utf-8"; "unicode alias")]
#[test_case("csUnicode", "utf-16le"; "csunicode alias")]
#[test_case(" unicodefeff ", "utf-16le"; "unicodefeff alias")]
#[test_case("UnicodeFFFE", "utf-16be"; "unicodefffe alias")]
fn decoder_normalizes_supported_labels(label: &'static str, expected: &'static str) {
let context = &mut Context::default();
text::register(None, context).unwrap();

run_test_actions_with(
[
TestAction::run(format!(
r#"
const d = new TextDecoder({label:?});
encoding = d.encoding;
"#
)),
TestAction::inspect_context(move |context| {
let encoding = context
.global_object()
.get(js_str!("encoding"), context)
.unwrap();
assert_eq!(encoding.as_string(), Some(JsString::from(expected)));
}),
],
context,
);
}

#[test]
fn decoder_rejects_unsupported_label_after_normalization() {
let context = &mut Context::default();
text::register(None, context).unwrap();

run_test_actions_with(
[TestAction::run(indoc! {r#"
try {
new TextDecoder(" utf-32 ");
throw new Error("expected RangeError");
} catch (e) {
if (!(e instanceof RangeError)) {
throw e;
}
}
"#})],
context,
);
}

#[test]
fn decoder_ignore_bom_getter() {
let context = &mut Context::default();
Expand Down
Loading