Skip to content

Commit

Permalink
Allow unicode escaped characters in identifiers that are keywords (#2021
Browse files Browse the repository at this point in the history
)

This Pull Request changes the following:

- Remove syntax error for unicode escaped characters in keywords from the lexer.
- Adjust the lexer tokens for keywords to indicate if they contain unicode escaped characters.
- Throw syntax errors in parser, when keywords cannot contain unicode escaped characters.
  • Loading branch information
raskad committed Apr 9, 2022
1 parent 2e0254b commit 2a63de3
Show file tree
Hide file tree
Showing 41 changed files with 403 additions and 267 deletions.
9 changes: 1 addition & 8 deletions boa_engine/src/syntax/lexer/identifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,6 @@ impl<R> Tokenizer<R> for Identifier {
Self::take_identifier_name(cursor, start_pos, self.init)?;

let token_kind = if let Ok(keyword) = identifier_name.parse() {
if contains_escaped_chars {
return Err(Error::Syntax(
"unicode escaped characters are not allowed in keyword".into(),
start_pos,
));
}

if cursor.strict_mode() && keyword == Keyword::With {
return Err(Error::Syntax(
"using 'with' statement not allowed in strict mode".into(),
Expand All @@ -108,7 +101,7 @@ impl<R> Tokenizer<R> for Identifier {
Keyword::True => TokenKind::BooleanLiteral(true),
Keyword::False => TokenKind::BooleanLiteral(false),
Keyword::Null => TokenKind::NullLiteral,
_ => TokenKind::Keyword(keyword),
_ => TokenKind::Keyword((keyword, contains_escaped_chars)),
}
} else {
if cursor.strict_mode()
Expand Down
76 changes: 38 additions & 38 deletions boa_engine/src/syntax/lexer/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ fn check_single_line_comment() {
let mut interner = Interner::default();

let expected = [
TokenKind::Keyword(Keyword::Var),
TokenKind::Keyword((Keyword::Var, false)),
TokenKind::LineTerminator,
TokenKind::LineTerminator,
TokenKind::BooleanLiteral(true),
Expand All @@ -52,7 +52,7 @@ fn check_single_line_comment_with_crlf_ending() {
let mut interner = Interner::default();

let expected = [
TokenKind::Keyword(Keyword::Var),
TokenKind::Keyword((Keyword::Var, false)),
TokenKind::LineTerminator,
TokenKind::LineTerminator,
TokenKind::BooleanLiteral(true),
Expand All @@ -69,7 +69,7 @@ fn check_multi_line_comment() {

let sym = interner.get_or_intern_static("x");
let expected = [
TokenKind::Keyword(Keyword::Var),
TokenKind::Keyword((Keyword::Var, false)),
TokenKind::LineTerminator,
TokenKind::identifier(sym),
];
Expand Down Expand Up @@ -251,40 +251,40 @@ fn check_keywords() {
let mut interner = Interner::default();

let expected = [
TokenKind::Keyword(Keyword::Await),
TokenKind::Keyword(Keyword::Break),
TokenKind::Keyword(Keyword::Case),
TokenKind::Keyword(Keyword::Catch),
TokenKind::Keyword(Keyword::Class),
TokenKind::Keyword(Keyword::Const),
TokenKind::Keyword(Keyword::Continue),
TokenKind::Keyword(Keyword::Debugger),
TokenKind::Keyword(Keyword::Default),
TokenKind::Keyword(Keyword::Delete),
TokenKind::Keyword(Keyword::Do),
TokenKind::Keyword(Keyword::Else),
TokenKind::Keyword(Keyword::Export),
TokenKind::Keyword(Keyword::Extends),
TokenKind::Keyword(Keyword::Finally),
TokenKind::Keyword(Keyword::For),
TokenKind::Keyword(Keyword::Function),
TokenKind::Keyword(Keyword::If),
TokenKind::Keyword(Keyword::Import),
TokenKind::Keyword(Keyword::In),
TokenKind::Keyword(Keyword::InstanceOf),
TokenKind::Keyword(Keyword::New),
TokenKind::Keyword(Keyword::Return),
TokenKind::Keyword(Keyword::Super),
TokenKind::Keyword(Keyword::Switch),
TokenKind::Keyword(Keyword::This),
TokenKind::Keyword(Keyword::Throw),
TokenKind::Keyword(Keyword::Try),
TokenKind::Keyword(Keyword::TypeOf),
TokenKind::Keyword(Keyword::Var),
TokenKind::Keyword(Keyword::Void),
TokenKind::Keyword(Keyword::While),
TokenKind::Keyword(Keyword::With),
TokenKind::Keyword(Keyword::Yield),
TokenKind::Keyword((Keyword::Await, false)),
TokenKind::Keyword((Keyword::Break, false)),
TokenKind::Keyword((Keyword::Case, false)),
TokenKind::Keyword((Keyword::Catch, false)),
TokenKind::Keyword((Keyword::Class, false)),
TokenKind::Keyword((Keyword::Const, false)),
TokenKind::Keyword((Keyword::Continue, false)),
TokenKind::Keyword((Keyword::Debugger, false)),
TokenKind::Keyword((Keyword::Default, false)),
TokenKind::Keyword((Keyword::Delete, false)),
TokenKind::Keyword((Keyword::Do, false)),
TokenKind::Keyword((Keyword::Else, false)),
TokenKind::Keyword((Keyword::Export, false)),
TokenKind::Keyword((Keyword::Extends, false)),
TokenKind::Keyword((Keyword::Finally, false)),
TokenKind::Keyword((Keyword::For, false)),
TokenKind::Keyword((Keyword::Function, false)),
TokenKind::Keyword((Keyword::If, false)),
TokenKind::Keyword((Keyword::Import, false)),
TokenKind::Keyword((Keyword::In, false)),
TokenKind::Keyword((Keyword::InstanceOf, false)),
TokenKind::Keyword((Keyword::New, false)),
TokenKind::Keyword((Keyword::Return, false)),
TokenKind::Keyword((Keyword::Super, false)),
TokenKind::Keyword((Keyword::Switch, false)),
TokenKind::Keyword((Keyword::This, false)),
TokenKind::Keyword((Keyword::Throw, false)),
TokenKind::Keyword((Keyword::Try, false)),
TokenKind::Keyword((Keyword::TypeOf, false)),
TokenKind::Keyword((Keyword::Var, false)),
TokenKind::Keyword((Keyword::Void, false)),
TokenKind::Keyword((Keyword::While, false)),
TokenKind::Keyword((Keyword::With, false)),
TokenKind::Keyword((Keyword::Yield, false)),
];

expect_tokens(&mut lexer, &expected, &mut interner);
Expand All @@ -299,7 +299,7 @@ fn check_variable_definition_tokens() {
let a_sym = interner.get_or_intern_static("a");
let hello_sym = interner.get_or_intern_static("hello");
let expected = [
TokenKind::Keyword(Keyword::Let),
TokenKind::Keyword((Keyword::Let, false)),
TokenKind::identifier(a_sym),
TokenKind::Punctuator(Punctuator::Assign),
TokenKind::string_literal(hello_sym),
Expand Down
15 changes: 5 additions & 10 deletions boa_engine/src/syntax/lexer/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@ pub enum TokenKind {
/// A private identifier.
PrivateIdentifier(Sym),

/// A keyword.
Keyword(Keyword),
/// A keyword and a flag if the keyword contains unicode escaped chars.
Keyword((Keyword, bool)),

/// A `null` literal.
NullLiteral,
Expand Down Expand Up @@ -142,8 +142,8 @@ impl From<bool> for TokenKind {
}
}

impl From<Keyword> for TokenKind {
fn from(kw: Keyword) -> Self {
impl From<(Keyword, bool)> for TokenKind {
fn from(kw: (Keyword, bool)) -> Self {
Self::Keyword(kw)
}
}
Expand Down Expand Up @@ -176,11 +176,6 @@ impl TokenKind {
Self::Identifier(ident)
}

/// Creates a `Keyword` token kind.
pub fn keyword(keyword: Keyword) -> Self {
Self::Keyword(keyword)
}

/// Creates a `NumericLiteral` token kind.
pub fn numeric_literal<L>(lit: L) -> Self
where
Expand Down Expand Up @@ -229,7 +224,7 @@ impl TokenKind {
Self::EOF => "end of file".to_owned(),
Self::Identifier(ident) => interner.resolve_expect(ident).to_owned(),
Self::PrivateIdentifier(ident) => format!("#{}", interner.resolve_expect(ident)),
Self::Keyword(word) => word.to_string(),
Self::Keyword((word, _)) => word.to_string(),
Self::NullLiteral => "null".to_owned(),
Self::NumericLiteral(Numeric::Rational(num)) => num.to_string(),
Self::NumericLiteral(Numeric::Integer(num)) => num.to_string(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ where
Ok(if let Some(tok) = cursor.peek(0, interner)? {
matches!(
tok.kind(),
TokenKind::Keyword(Keyword::Delete | Keyword::Void | Keyword::TypeOf)
TokenKind::Keyword((Keyword::Delete | Keyword::Void | Keyword::TypeOf, _))
| TokenKind::Punctuator(
Punctuator::Add | Punctuator::Sub | Punctuator::Not | Punctuator::Neg
)
Expand Down
4 changes: 2 additions & 2 deletions boa_engine/src/syntax/parser/expression/assignment/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,12 @@ where
.kind()
{
// [+Yield]YieldExpression[?In, ?Await]
TokenKind::Keyword(Keyword::Yield) if self.allow_yield.0 => {
TokenKind::Keyword((Keyword::Yield, _)) if self.allow_yield.0 => {
return YieldExpression::new(self.allow_in, self.allow_await)
.parse(cursor, interner)
}
// ArrowFunction[?In, ?Yield, ?Await] -> ArrowParameters[?Yield, ?Await] -> BindingIdentifier[?Yield, ?Await]
TokenKind::Identifier(_) | TokenKind::Keyword(Keyword::Yield | Keyword::Await) => {
TokenKind::Identifier(_) | TokenKind::Keyword((Keyword::Yield | Keyword::Await, _)) => {
if let Ok(tok) =
cursor.peek_expect_no_lineterminator(1, "assignment expression", interner)
{
Expand Down
7 changes: 4 additions & 3 deletions boa_engine/src/syntax/parser/expression/assignment/yield.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ where
let _timer = Profiler::global().start_event("YieldExpression", "Parsing");

cursor.expect(
TokenKind::Keyword(Keyword::Yield),
TokenKind::Keyword((Keyword::Yield, false)),
"yield expression",
interner,
)?;
Expand Down Expand Up @@ -87,7 +87,7 @@ where
| Punctuator::OpenBlock
| Punctuator::Div,
)
| TokenKind::Keyword(
| TokenKind::Keyword((
Keyword::Yield
| Keyword::Await
| Keyword::Delete
Expand All @@ -98,7 +98,8 @@ where
| Keyword::Function
| Keyword::Class
| Keyword::Async,
)
_,
))
| TokenKind::BooleanLiteral(_)
| TokenKind::NullLiteral
| TokenKind::StringLiteral(_)
Expand Down
2 changes: 1 addition & 1 deletion boa_engine/src/syntax/parser/expression/await_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ where
interner: &mut Interner,
) -> Result<Self::Output, ParseError> {
cursor.expect(
TokenKind::Keyword(Keyword::Await),
TokenKind::Keyword((Keyword::Await, false)),
"Await expression parsing",
interner,
)?;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ where
TokenKind::Identifier(name) => {
lhs = GetConstField::new(lhs, *name).into();
}
TokenKind::Keyword(kw) => {
TokenKind::Keyword((kw, _)) => {
lhs = GetConstField::new(lhs, kw.to_sym(interner)).into();
}
_ => {
Expand Down
45 changes: 25 additions & 20 deletions boa_engine/src/syntax/parser/expression/left_hand_side/member.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,27 +64,32 @@ where
fn parse(self, cursor: &mut Cursor<R>, interner: &mut Interner) -> ParseResult {
let _timer = Profiler::global().start_event("MemberExpression", "Parsing");

let mut lhs = if cursor
.peek(0, interner)?
.ok_or(ParseError::AbruptEnd)?
.kind()
== &TokenKind::Keyword(Keyword::New)
{
let _next = cursor.next(interner).expect("new keyword disappeared");
let lhs = self.parse(cursor, interner)?;
let args = match cursor.peek(0, interner)? {
Some(next) if next.kind() == &TokenKind::Punctuator(Punctuator::OpenParen) => {
Arguments::new(self.allow_yield, self.allow_await).parse(cursor, interner)?
}
_ => Box::new([]),
};
let call_node = Call::new(lhs, args);
let token = cursor.peek(0, interner)?.ok_or(ParseError::AbruptEnd)?;
let mut lhs = match token.kind() {
TokenKind::Keyword((Keyword::New, true)) => {
return Err(ParseError::general(
"keyword must not contain escaped characters",
token.span().start(),
));
}
TokenKind::Keyword((Keyword::New, false)) => {
let _next = cursor.next(interner).expect("new keyword disappeared");
let lhs = self.parse(cursor, interner)?;
let args = match cursor.peek(0, interner)? {
Some(next) if next.kind() == &TokenKind::Punctuator(Punctuator::OpenParen) => {
Arguments::new(self.allow_yield, self.allow_await)
.parse(cursor, interner)?
}
_ => Box::new([]),
};
let call_node = Call::new(lhs, args);

Node::from(New::from(call_node))
} else {
PrimaryExpression::new(self.name, self.allow_yield, self.allow_await)
.parse(cursor, interner)?
Node::from(New::from(call_node))
}
_ => PrimaryExpression::new(self.name, self.allow_yield, self.allow_await)
.parse(cursor, interner)?,
};

while let Some(tok) = cursor.peek(0, interner)? {
match tok.kind() {
TokenKind::Punctuator(Punctuator::Dot) => {
Expand All @@ -96,7 +101,7 @@ where

match token.kind() {
TokenKind::Identifier(name) => lhs = GetConstField::new(lhs, *name).into(),
TokenKind::Keyword(kw) => {
TokenKind::Keyword((kw, _)) => {
lhs = GetConstField::new(lhs, kw.to_sym(interner)).into();
}
TokenKind::BooleanLiteral(bool) => {
Expand Down
10 changes: 8 additions & 2 deletions boa_engine/src/syntax/parser/expression/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ macro_rules! expression { ($name:ident, $lower:ident, [$( $op:path ),*], [$( $lo
$lower::new($( self.$low_param ),*).parse(cursor, interner)?
).into();
}
TokenKind::Keyword(op) if $( op == $op )||* => {
TokenKind::Keyword((op, false)) if $( op == $op )||* => {
let _next = cursor.next(interner).expect("token disappeared");
lhs = BinOp::new(
op.as_binop().expect("Could not get binary operation."),
Expand Down Expand Up @@ -540,7 +540,13 @@ where
)
.into();
}
TokenKind::Keyword(op)
TokenKind::Keyword((Keyword::InstanceOf | Keyword::In, true)) => {
return Err(ParseError::general(
"Keyword must not contain escaped characters",
tok.span().start(),
));
}
TokenKind::Keyword((op, false))
if op == Keyword::InstanceOf
|| (op == Keyword::In && self.allow_in == AllowIn(true)) =>
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,11 @@ where
) -> Result<Self::Output, ParseError> {
let _timer = Profiler::global().start_event("AsyncFunctionExpression", "Parsing");
cursor.peek_expect_no_lineterminator(0, "async function expression", interner)?;
cursor.expect(Keyword::Function, "async function expression", interner)?;
cursor.expect(
(Keyword::Function, false),
"async function expression",
interner,
)?;

let name = match cursor
.peek(0, interner)?
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,11 @@ where
let _timer = Profiler::global().start_event("AsyncGeneratorExpression", "Parsing");

cursor.peek_expect_no_lineterminator(0, "async generator expression", interner)?;
cursor.expect(Keyword::Function, "async generator expression", interner)?;
cursor.expect(
(Keyword::Function, false),
"async generator expression",
interner,
)?;
cursor.expect(
TokenKind::Punctuator(Punctuator::Mul),
"async generator expression",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ where

let token = cursor.peek(0, interner)?.ok_or(ParseError::AbruptEnd)?;
let name = match token.kind() {
TokenKind::Identifier(_) | TokenKind::Keyword(Keyword::Yield | Keyword::Await) => {
TokenKind::Identifier(_) | TokenKind::Keyword((Keyword::Yield | Keyword::Await, _)) => {
BindingIdentifier::new(self.allow_yield, self.allow_await)
.parse(cursor, interner)?
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ where
.ok_or(ParseError::AbruptEnd)?
.kind()
{
TokenKind::Identifier(_) | TokenKind::Keyword(Keyword::Yield | Keyword::Await) => {
TokenKind::Identifier(_) | TokenKind::Keyword((Keyword::Yield | Keyword::Await, _)) => {
Some(BindingIdentifier::new(false, false).parse(cursor, interner)?)
}
_ => self.name,
Expand Down

0 comments on commit 2a63de3

Please sign in to comment.