Skip to content

Commit

Permalink
Fix more Annex B tests (#2841)
Browse files Browse the repository at this point in the history
This Pull Request fixes some additional Annex B tests.

It changes the following:

- Fixes bugs related to parsing HTML closing comments (`-->`).
- Implements `RegExp::compile` behind the `annex-b` feature.
- Ignores the `legacy-regexp` feature flag, since it's still stage 3.
  • Loading branch information
jedel1043 committed Apr 19, 2023
1 parent 95a8198 commit 40a5ae0
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 38 deletions.
14 changes: 9 additions & 5 deletions boa_engine/src/builtins/function/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ use boa_parser::{Parser, Source};
use boa_profiler::Profiler;
use thin_vec::ThinVec;

use std::fmt;
use std::{fmt, io::Read};

use super::{promise::PromiseCapability, BuiltInBuilder, BuiltInConstructor, IntrinsicObject};

Expand Down Expand Up @@ -642,12 +642,16 @@ impl BuiltInFunctionObject {
.into());
}

let body_arg = body_arg.to_string(context)?;
// 11. Let bodyString be the string-concatenation of 0x000A (LINE FEED), ? ToString(bodyArg), and 0x000A (LINE FEED).
let body_arg = body_arg.to_string(context)?.to_std_string_escaped();
let body = b"\n".chain(body_arg.as_bytes()).chain(b"\n".as_slice());

// TODO: make parser generic to u32 iterators
let body = match Parser::new(Source::from_bytes(&body_arg.to_std_string_escaped()))
.parse_function_body(context.interner_mut(), generator, r#async)
{
let body = match Parser::new(Source::from_reader(body, None)).parse_function_body(
context.interner_mut(),
generator,
r#async,
) {
Ok(statement_list) => statement_list,
Err(e) => {
return Err(JsNativeError::syntax()
Expand Down
100 changes: 76 additions & 24 deletions boa_engine/src/builtins/regexp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ use crate::{
};
use boa_parser::lexer::regex::RegExpFlags;
use boa_profiler::Profiler;
use regress::Regex;
use regress::{Flags, Regex};
use std::str::FromStr;

use super::{BuiltInBuilder, BuiltInConstructor, IntrinsicObject};
Expand Down Expand Up @@ -94,7 +94,7 @@ impl IntrinsicObject for RegExp {
.callable(Self::get_source)
.name("get source")
.build();
BuiltInBuilder::from_standard_constructor::<Self>(realm)
let regexp = BuiltInBuilder::from_standard_constructor::<Self>(realm)
.static_accessor(
JsSymbol::species(),
Some(get_species),
Expand Down Expand Up @@ -137,8 +137,12 @@ impl IntrinsicObject for RegExp {
.accessor(utf16!("unicode"), Some(get_unicode), None, flag_attributes)
.accessor(utf16!("sticky"), Some(get_sticky), None, flag_attributes)
.accessor(utf16!("flags"), Some(get_flags), None, flag_attributes)
.accessor(utf16!("source"), Some(get_source), None, flag_attributes)
.build();
.accessor(utf16!("source"), Some(get_source), None, flag_attributes);

#[cfg(feature = "annex-b")]
let regexp = regexp.method(Self::compile, "compile", 2);

regexp.build();
}

fn get(intrinsics: &Intrinsics) -> JsObject {
Expand Down Expand Up @@ -288,26 +292,29 @@ impl RegExp {
Ok(result) => result,
};

// TODO: Correct UTF-16 handling in 6. - 8.

// 9. Let parseResult be ParsePattern(patternText, u).
// 10. If parseResult is a non-empty List of SyntaxError objects, throw a SyntaxError exception.
// 11. Assert: parseResult is a Pattern Parse Node.
// 12. Set obj.[[OriginalSource]] to P.
// 13. Set obj.[[OriginalFlags]] to F.
// 14. NOTE: The definitions of DotAll, IgnoreCase, Multiline, and Unicode in 22.2.2.1 refer to this value of obj.[[OriginalFlags]].
// 15. Set obj.[[RegExpMatcher]] to CompilePattern of parseResult.
// TODO: add support for utf16 regex to remove this conversions.
let ps = p.to_std_string_escaped();
let fs = f.to_std_string_escaped();
let matcher = match Regex::with_flags(&ps, fs.as_ref()) {
Err(error) => {
return Err(JsNativeError::syntax()
.with_message(format!("failed to create matcher: {}", error.text))
.into());
}
Ok(val) => val,
};
// 10. If u is true, then
// a. Let patternText be StringToCodePoints(P).
// 11. Else,
// a. Let patternText be the result of interpreting each of P's 16-bit elements as a Unicode BMP code point. UTF-16 decoding is not applied to the elements.
// 12. Let parseResult be ParsePattern(patternText, u).
// 13. If parseResult is a non-empty List of SyntaxError objects, throw a SyntaxError exception.
// 14. Assert: parseResult is a Pattern Parse Node.
// 15. Set obj.[[OriginalSource]] to P.
// 16. Set obj.[[OriginalFlags]] to F.
// 17. Let capturingGroupsCount be CountLeftCapturingParensWithin(parseResult).
// 18. Let rer be the RegExp Record { [[IgnoreCase]]: i, [[Multiline]]: m, [[DotAll]]: s, [[Unicode]]: u, [[CapturingGroupsCount]]: capturingGroupsCount }.
// 19. Set obj.[[RegExpRecord]] to rer.
// 20. Set obj.[[RegExpMatcher]] to CompilePattern of parseResult with argument rer.
let matcher =
match Regex::from_unicode(p.code_points().map(CodePoint::as_u32), Flags::from(flags)) {
Err(error) => {
return Err(JsNativeError::syntax()
.with_message(format!("failed to create matcher: {}", error.text))
.into());
}
Ok(val) => val,
};

let regexp = Self {
matcher,
flags,
Expand Down Expand Up @@ -1659,6 +1666,51 @@ impl RegExp {
// 22. Return A.
Ok(a.into())
}

/// [`RegExp.prototype.compile ( pattern, flags )`][spec]
///
/// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype.compile
#[cfg(feature = "annex-b")]
fn compile(this: &JsValue, args: &[JsValue], context: &mut Context<'_>) -> JsResult<JsValue> {
// 1. Let O be the this value.
// 2. Perform ? RequireInternalSlot(O, [[RegExpMatcher]]).
let this = this
.as_object()
.filter(|o| o.borrow().is_regexp())
.cloned()
.ok_or_else(|| {
JsNativeError::typ()
.with_message("`RegExp.prototype.compile` cannot be called for a non-object")
})?;
let pattern = args.get_or_undefined(0);
let flags = args.get_or_undefined(1);
// 3. If pattern is an Object and pattern has a [[RegExpMatcher]] internal slot, then
let (pattern, flags) = if let Some((p, f)) = pattern.as_object().and_then(|o| {
let o = o.borrow();
o.as_regexp()
.map(|rx| (rx.original_source.clone(), rx.original_flags.clone()))
}) {
// a. If flags is not undefined, throw a TypeError exception.
if !flags.is_undefined() {
return Err(JsNativeError::typ()
.with_message(
"`RegExp.prototype.compile` cannot be \
called with both a RegExp initializer and new flags",
)
.into());
}
// b. Let P be pattern.[[OriginalSource]].
// c. Let F be pattern.[[OriginalFlags]].
(p.into(), f.into())
} else {
// 4. Else,
// a. Let P be pattern.
// b. Let F be flags.
(pattern.clone(), flags.clone())
};
// 5. Return ? RegExpInitialize(O, P, F).
Self::initialize(this, &pattern, &flags, context)
}
}

/// `22.2.5.2.3 AdvanceStringIndex ( S, index, unicode )`
Expand Down
2 changes: 1 addition & 1 deletion boa_engine/src/string/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ impl JsString {
}

/// Gets an iterator of all the Unicode codepoints of a [`JsString`].
pub fn code_points(&self) -> impl Iterator<Item = CodePoint> + '_ {
pub fn code_points(&self) -> impl Iterator<Item = CodePoint> + Clone + '_ {
char::decode_utf16(self.iter().copied()).map(|res| match res {
Ok(c) => CodePoint::Unicode(c),
Err(e) => CodePoint::UnpairedSurrogate(e.unpaired_surrogate()),
Expand Down
7 changes: 1 addition & 6 deletions boa_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -318,12 +318,7 @@ impl<R> Lexer<R> {
}
}?;

if token.kind() == &TokenKind::Comment {
// Skip comment
self.next(interner)
} else {
Ok(Some(token))
}
Ok(Some(token))
} else {
Err(Error::syntax(
format!(
Expand Down
14 changes: 13 additions & 1 deletion boa_parser/src/lexer/regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use bitflags::bitflags;
use boa_ast::Position;
use boa_interner::{Interner, Sym};
use boa_profiler::Profiler;
use regress::Regex;
use regress::{Flags, Regex};
use std::{
io::{self, ErrorKind, Read},
str::{self, FromStr},
Expand Down Expand Up @@ -237,3 +237,15 @@ impl ToString for RegExpFlags {
s
}
}

impl From<RegExpFlags> for Flags {
fn from(value: RegExpFlags) -> Self {
Self {
icase: value.contains(RegExpFlags::IGNORE_CASE),
multiline: value.contains(RegExpFlags::MULTILINE),
dot_all: value.contains(RegExpFlags::DOT_ALL),
unicode: value.contains(RegExpFlags::UNICODE),
..Self::default()
}
}
}
2 changes: 1 addition & 1 deletion boa_parser/src/parser/cursor/buffered_lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,8 @@ where
} else {
self.peeked[self.write_index] = self.lexer.next(interner)?;
}
self.write_index = (self.write_index + 1) % PEEK_BUF_SIZE;

self.write_index = (self.write_index + 1) % PEEK_BUF_SIZE;
debug_assert_ne!(
self.read_index, self.write_index,
"we reached the read index with the write index"
Expand Down
1 change: 1 addition & 0 deletions test_ignore.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ features = [
"decorators",
"array-grouping",
"IsHTMLDDA",
"legacy-regexp",

# Non-implemented Intl features
"intl-normative-optional",
Expand Down

0 comments on commit 40a5ae0

Please sign in to comment.