Merge b58365a into 2a509de

boa-dev · Oct 4, 2020 · b359035 · b359035
2 parents 2a509de + b58365a
commit b359035
Show file tree

Hide file tree

Showing 48 changed files with 601 additions and 264 deletions.
diff --git a/boa/src/exec/tests.rs b/boa/src/exec/tests.rs
@@ -1383,3 +1383,132 @@ fn test_identifier_op() {
     let scenario = "break = 1";
     assert_eq!(&exec(scenario), "\"SyntaxError\": \"expected token \'identifier\', got \'=\' in binding identifier at line 1, col 7\"");
 }
+
+#[test]
+fn test_strict_mode_octal() {
+    // Checks as per https://tc39.es/ecma262/#sec-literals-numeric-literals that 0 prefix
+    // octal number literal syntax is a syntax error in strict mode.
+
+    let scenario = r#"
+    'use strict';
+    var n = 023;
+    "#;
+
+    let mut engine = Context::new();
+
+    let string = dbg!(forward(&mut engine, scenario));
+
+    assert!(string.starts_with("Uncaught \"SyntaxError\": "));
+    assert!(string.contains("1:3"));
+}
+
+#[test]
+fn test_strict_mode_with() {
+    // Checks as per https://tc39.es/ecma262/#sec-with-statement-static-semantics-early-errors
+    // that a with statement is an error in strict mode code.
+
+    let scenario = r#"
+    'use strict';
+    function f(x, o) {
+        with (o) {
+            console.log(x);
+        }
+    }
+    "#;
+
+    let mut engine = Context::new();
+
+    let string = dbg!(forward(&mut engine, scenario));
+
+    assert!(string.starts_with("Uncaught \"SyntaxError\": "));
+    assert!(string.contains("3:5"));
+}
+
+#[test]
+fn test_strict_mode_delete() {
+    // Checks as per https://tc39.es/ecma262/#sec-delete-operator-static-semantics-early-errors
+    // that delete on a variable name is an error in strict mode code.
+
+    let scenario = r#"
+    'use strict';
+    let x = 10;
+    delete x;
+    "#;
+
+    let mut engine = Context::new();
+
+    let string = dbg!(forward(&mut engine, scenario));
+
+    assert!(string.starts_with("Uncaught \"SyntaxError\": "));
+    assert!(string.contains("3:1"));
+}
+
+#[test]
+fn test_strict_mode_reserved_name() {
+    // Checks that usage of a reserved keyword for an identifier name is
+    // an error in strict mode code as per https://tc39.es/ecma262/#sec-strict-mode-of-ecmascript.
+
+    let mut engine = Context::new();
+
+    let test_cases = [
+        "var implements = 10;",
+        "var interface = 10;",
+        "var let = 10;",
+        "var package = 10;",
+        "var private = 10;",
+        "var protected = 10;",
+        "var public = 10;",
+        "var static = 10;",
+        "var yield = 10;",
+        "var eval = 10;",
+        "var arguments = 10;",
+    ];
+
+    for case in test_cases.iter() {
+        let scenario = format!("'use strict'; \n {}", case);
+
+        let string = dbg!(forward(&mut engine, &scenario));
+
+        assert!(string.starts_with("Uncaught \"SyntaxError\": "));
+        assert!(string.contains("2:1"));
+    }
+}
+
+#[test]
+fn test_strict_mode_func_decl_in_block() {
+    // Checks that a function declaration in a block is an error in
+    // strict mode code.
+    // TODO - find the spec reference for this.
+
+    let scenario = r#"
+    'use strict';
+    let a = 4;
+    let b = 5;
+    if (a < b) { function f() {} }
+    "#;
+
+    let mut engine = Context::new();
+
+    let string = dbg!(forward(&mut engine, scenario));
+
+    assert!(string.starts_with("Uncaught \"SyntaxError\": "));
+    assert!(string.contains("4:1"));
+}
+
+#[test]
+fn test_strict_mode_dup_func_parameters() {
+    // Checks that a function cannot contain duplicate parameter
+    // names in strict mode code.
+
+    let scenario = r#"
+    'use strict';
+    function f(a, b, b) {}
+    "#;
+
+    let mut engine = Context::new();
+
+    let string = dbg!(forward(&mut engine, scenario));
+
+    assert!(string.starts_with("Uncaught \"SyntaxError\": "));
+    assert!(string.contains("2:1"));
+}
diff --git a/boa/src/syntax/lexer/comment.rs b/boa/src/syntax/lexer/comment.rs
@@ -23,7 +23,12 @@ use std::io::Read;
 pub(super) struct SingleLineComment;
 
 impl<R> Tokenizer<R> for SingleLineComment {
-    fn lex(&mut self, cursor: &mut Cursor<R>, start_pos: Position) -> Result<Token, Error>
+    fn lex(
+        &mut self,
+        cursor: &mut Cursor<R>,
+        start_pos: Position,
+        strict_mode: bool,
+    ) -> Result<Token, Error>
     where
         R: Read,
     {
@@ -58,7 +63,12 @@ impl<R> Tokenizer<R> for SingleLineComment {
 pub(super) struct MultiLineComment;
 
 impl<R> Tokenizer<R> for MultiLineComment {
-    fn lex(&mut self, cursor: &mut Cursor<R>, start_pos: Position) -> Result<Token, Error>
+    fn lex(
+        &mut self,
+        cursor: &mut Cursor<R>,
+        start_pos: Position,
+        strict_mode: bool,
+    ) -> Result<Token, Error>
     where
         R: Read,
     {

diff --git a/boa/src/syntax/lexer/identifier.rs b/boa/src/syntax/lexer/identifier.rs
@@ -4,12 +4,26 @@ use super::{Cursor, Error, Tokenizer};
 use crate::{
     profiler::BoaProfiler,
     syntax::{
-        ast::{Position, Span},
+        ast::{Keyword, Position, Span},
         lexer::{Token, TokenKind},
     },
 };
 use std::io::Read;
 
+const STRICT_FORBIDDEN_IDENTIFIERS: [&str; 11] = [
+    "eval",
+    "arguments",
+    "implements",
+    "interface",
+    "let",
+    "package",
+    "private",
+    "protected",
+    "public",
+    "static",
+    "yield",
+];
+
 /// Identifier lexing.
 ///
 /// More information:
@@ -31,7 +45,12 @@ impl Identifier {
 }
 
 impl<R> Tokenizer<R> for Identifier {
-    fn lex(&mut self, cursor: &mut Cursor<R>, start_pos: Position) -> Result<Token, Error>
+    fn lex(
+        &mut self,
+        cursor: &mut Cursor<R>,
+        start_pos: Position,
+        strict_mode: bool,
+    ) -> Result<Token, Error>
     where
         R: Read,
     {
@@ -49,8 +68,24 @@ impl<R> Tokenizer<R> for Identifier {
             "null" => TokenKind::NullLiteral,
             slice => {
                 if let Ok(keyword) = slice.parse() {
+                    if strict_mode && keyword == Keyword::With {
+                        return Err(Error::Syntax(
+                            "using 'with' statement not allowed in strict mode".into(),
+                            start_pos,
+                        ));
+                    }
                     TokenKind::Keyword(keyword)
                 } else {
+                    if strict_mode && STRICT_FORBIDDEN_IDENTIFIERS.contains(&slice) {
+                        return Err(Error::Syntax(
+                            format!(
+                                "using future reserved keyword '{}' not allowed in strict mode",
+                                slice
+                            )
+                            .into(),
+                            start_pos,
+                        ));
+                    }
                     TokenKind::identifier(slice)
                 }
             }

diff --git a/boa/src/syntax/lexer/mod.rs b/boa/src/syntax/lexer/mod.rs
@@ -48,7 +48,12 @@ pub use token::{Token, TokenKind};
 
 trait Tokenizer<R> {
     /// Lexes the next token.
-    fn lex(&mut self, cursor: &mut Cursor<R>, start_pos: Position) -> Result<Token, Error>
+    fn lex(
+        &mut self,
+        cursor: &mut Cursor<R>,
+        start_pos: Position,
+        strict_mode: bool,
+    ) -> Result<Token, Error>
     where
         R: Read;
 }
@@ -109,7 +114,11 @@ impl<R> Lexer<R> {
     // that means it could be multiple different tokens depending on the input token.
     //
     // As per https://tc39.es/ecma262/#sec-ecmascript-language-lexical-grammar
-    pub(crate) fn lex_slash_token(&mut self, start: Position) -> Result<Token, Error>
+    pub(crate) fn lex_slash_token(
+        &mut self,
+        start: Position,
+        strict_mode: bool,
+    ) -> Result<Token, Error>
     where
         R: Read,
     {
@@ -119,11 +128,11 @@ impl<R> Lexer<R> {
             match c {
                 '/' => {
                     self.cursor.next_char()?.expect("/ token vanished"); // Consume the '/'
-                    SingleLineComment.lex(&mut self.cursor, start)
+                    SingleLineComment.lex(&mut self.cursor, start, strict_mode)
                 }
                 '*' => {
                     self.cursor.next_char()?.expect("* token vanished"); // Consume the '*'
-                    MultiLineComment.lex(&mut self.cursor, start)
+                    MultiLineComment.lex(&mut self.cursor, start, strict_mode)
                 }
                 ch => {
                     match self.get_goal() {
@@ -146,7 +155,7 @@ impl<R> Lexer<R> {
                         }
                         InputElement::RegExp | InputElement::RegExpOrTemplateTail => {
                             // Can be a regular expression.
-                            RegexLiteral.lex(&mut self.cursor, start)
+                            RegexLiteral.lex(&mut self.cursor, start, strict_mode)
                         }
                     }
                 }
@@ -188,13 +197,13 @@ impl<R> Lexer<R> {
                 TokenKind::LineTerminator,
                 Span::new(start, self.cursor.pos()),
             )),
-            '"' | '\'' => StringLiteral::new(next_chr).lex(&mut self.cursor, start),
-            '`' => TemplateLiteral.lex(&mut self.cursor, start),
+            '"' | '\'' => StringLiteral::new(next_chr).lex(&mut self.cursor, start, strict_mode),
+            '`' => TemplateLiteral.lex(&mut self.cursor, start, strict_mode),
             _ if next_chr.is_digit(10) => {
-                NumberLiteral::new(next_chr, strict_mode).lex(&mut self.cursor, start)
+                NumberLiteral::new(next_chr).lex(&mut self.cursor, start, strict_mode)
             }
             _ if next_chr.is_alphabetic() || next_chr == '$' || next_chr == '_' => {
-                Identifier::new(next_chr).lex(&mut self.cursor, start)
+                Identifier::new(next_chr).lex(&mut self.cursor, start, strict_mode)
             }
             ';' => Ok(Token::new(
                 Punctuator::Semicolon.into(),
@@ -204,7 +213,7 @@ impl<R> Lexer<R> {
                 Punctuator::Colon.into(),
                 Span::new(start, self.cursor.pos()),
             )),
-            '.' => SpreadLiteral::new().lex(&mut self.cursor, start),
+            '.' => SpreadLiteral::new().lex(&mut self.cursor, start, strict_mode),
             '(' => Ok(Token::new(
                 Punctuator::OpenParen.into(),
                 Span::new(start, self.cursor.pos()),
@@ -237,9 +246,9 @@ impl<R> Lexer<R> {
                 Punctuator::Question.into(),
                 Span::new(start, self.cursor.pos()),
             )),
-            '/' => self.lex_slash_token(start),
+            '/' => self.lex_slash_token(start, strict_mode),
             '=' | '*' | '+' | '-' | '%' | '|' | '&' | '^' | '<' | '>' | '!' | '~' => {
-                Operator::new(next_chr).lex(&mut self.cursor, start)
+                Operator::new(next_chr).lex(&mut self.cursor, start, strict_mode)
             }
             _ => {
                 let details = format!(

diff --git a/boa/src/syntax/lexer/number.rs b/boa/src/syntax/lexer/number.rs
@@ -24,13 +24,12 @@ use std::{io::Read, str::FromStr};
 #[derive(Debug, Clone, Copy)]
 pub(super) struct NumberLiteral {
     init: char,
-    strict_mode: bool,
 }
 
 impl NumberLiteral {
     /// Creates a new string literal lexer.
-    pub(super) fn new(init: char, strict_mode: bool) -> Self {
-        Self { init, strict_mode }
+    pub(super) fn new(init: char) -> Self {
+        Self { init }
     }
 }
 
@@ -135,7 +134,12 @@ where
 }
 
 impl<R> Tokenizer<R> for NumberLiteral {
-    fn lex(&mut self, cursor: &mut Cursor<R>, start_pos: Position) -> Result<Token, Error>
+    fn lex(
+        &mut self,
+        cursor: &mut Cursor<R>,
+        start_pos: Position,
+        strict_mode: bool,
+    ) -> Result<Token, Error>
     where
         R: Read,
     {
@@ -187,7 +191,7 @@ impl<R> Tokenizer<R> for NumberLiteral {
                     ch => {
                         if ch.is_digit(8) {
                             // LegacyOctalIntegerLiteral
-                            if self.strict_mode {
+                            if strict_mode {
                                 // LegacyOctalIntegerLiteral is forbidden with strict mode true.
                                 return Err(Error::syntax(
                                     "implicit octal literals are not allowed in strict mode",
@@ -205,7 +209,7 @@ impl<R> Tokenizer<R> for NumberLiteral {
                             // Indicates a numerical digit comes after then 0 but it isn't an octal digit
                             // so therefore this must be a number with an unneeded leading 0. This is
                             // forbidden in strict mode.
-                            if self.strict_mode {
+                            if strict_mode {
                                 return Err(Error::syntax(
                                     "leading 0's are not allowed in strict mode",
                                     start_pos,

diff --git a/boa/src/syntax/lexer/operator.rs b/boa/src/syntax/lexer/operator.rs
@@ -93,7 +93,12 @@ impl Operator {
 }
 
 impl<R> Tokenizer<R> for Operator {
-    fn lex(&mut self, cursor: &mut Cursor<R>, start_pos: Position) -> Result<Token, Error>
+    fn lex(
+        &mut self,
+        cursor: &mut Cursor<R>,
+        start_pos: Position,
+        strict_mode: bool,
+    ) -> Result<Token, Error>
     where
         R: Read,
     {

diff --git a/boa/src/syntax/lexer/regex.rs b/boa/src/syntax/lexer/regex.rs
@@ -33,7 +33,12 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer};
 pub(super) struct RegexLiteral;
 
 impl<R> Tokenizer<R> for RegexLiteral {
-    fn lex(&mut self, cursor: &mut Cursor<R>, start_pos: Position) -> Result<Token, Error>
+    fn lex(
+        &mut self,
+        cursor: &mut Cursor<R>,
+        start_pos: Position,
+        strict_mode: bool,
+    ) -> Result<Token, Error>
     where
         R: Read,
     {