Merge 9fd68d7 into 9755795

calmjs · Apr 22, 2018 · 874176c · 874176c
2 parents 9755795 + 9fd68d7
commit 874176c
Show file tree

Hide file tree

Showing 3 changed files with 245 additions and 0 deletions.
diff --git a/src/calmjs/parse/lexers/es2015.py b/src/calmjs/parse/lexers/es2015.py
@@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+"""
+ES2015 (ECMAScript 6th Edition/ES6) lexer.
+"""
+
+import re
+import ply
+
+from calmjs.parse.lexers.es5 import Lexer as ES5Lexer
+
+template_token_types = (
+    (re.compile(r'`.*`', re.S),
+        'TEMPLATE_NOSUB'),
+    (re.compile(r'`.*\${', re.S),
+        'TEMPLATE_HEAD'),
+    (re.compile(r'}.*\${', re.S),
+        'TEMPLATE_MIDDLE'),
+    (re.compile(r'}.*`', re.S),
+        'TEMPLATE_TAIL'),
+)
+
+
+class Lexer(ES5Lexer):
+    """
+    ES2015 lexer.
+    """
+
+    # Punctuators (ES6)
+    # t_DOLLAR_LBRACE  = r'${'
+    # this is also a right brace punctuator...
+    # t_RBRACE        = r'}'
+    t_ARROW          = r'=>'
+    t_SPREAD         = r'\.\.\.'
+
+    tokens = ES5Lexer.tokens + (
+        # ES2015 punctuators
+        'ARROW', 'SPREAD',    # => ...
+
+        # ES2015 terminal types
+        'TEMPLATE_NOSUB', 'TEMPLATE_HEAD', 'TEMPLATE_MIDDLE', 'TEMPLATE_TAIL',
+    )
+
+    template = r"""
+    (?:(?:`|})                         # opening ` or }
+        (?: [^`\\]                     # no \, or `
+            | \\[a-zA-Z!-\/:-@\[-`{-~] # or escaped characters
+            | \\(\n|\r(?!\n)|\u2028|\u2029|\r\n)  # or line continuation
+            | \\x[0-9a-fA-F]{2}        # or hex_escape_sequence
+            | \\u[0-9a-fA-F]{4}        # or unicode_escape_sequence
+            | \\(?:[1-7][0-7]{0,2}|[0-7]{2,3}) # or octal_escape_sequence
+            | \\0                      # or <NUL> (15.10.2.11)
+        )*?                            # zero or many times
+    (?:`|\${))                         # closing ` or ${
+    """  # `
+
+    @ply.lex.TOKEN(template)
+    def t_TEMPLATE_RAW(self, token):
+        for patt, token_type in template_token_types:
+            if patt.match(token.value):
+                token.type = token_type
+        return token
diff --git a/src/calmjs/parse/tests/lexer.py b/src/calmjs/parse/tests/lexer.py
@@ -458,6 +458,108 @@
     )
 ]
 
+es2015_cases = [
+    (
+        'const_keyword',
+        ('const c',
+         ['CONST const', 'ID c']),
+    ), (
+        'punctuators',
+        ('=> ...',
+         ['ARROW =>', 'SPREAD ...']),
+    ), (
+        'arrow_functions',
+        ('const c = (name) => { return name; }',
+         ['CONST const', 'ID c', 'EQ =', 'LPAREN (', 'ID name', 'RPAREN )',
+          'ARROW =>', 'LBRACE {', 'RETURN return', 'ID name', 'SEMI ;',
+          'RBRACE }']),
+    ), (
+        'spread',
+        ('[...spring, ...summer]',
+         ['LBRACKET [', 'SPREAD ...', 'ID spring', 'COMMA ,', 'SPREAD ...',
+          'ID summer', 'RBRACKET ]']),
+    ), (
+        'template_literal',
+        ('`foo`',
+         ['TEMPLATE_NOSUB `foo`']),
+    ), (
+        'template_multiline',
+        ('`foo\nbar\r\nfoo`',
+         ['TEMPLATE_NOSUB `foo\nbar\r\nfoo`']),
+    ), (
+        'template_other_newlines',
+        ('`foo\u2028\u2029foo`',
+         ['TEMPLATE_NOSUB `foo\u2028\u2029foo`']),
+    ), (
+        'template_literal_with_dollar',
+        ('`foo$`',
+         ['TEMPLATE_NOSUB `foo$`']),
+    ), (
+        'template_head_tail',
+        (r'`hello ${name} while this`',
+         ['TEMPLATE_HEAD `hello ${', 'ID name', 'TEMPLATE_TAIL } while this`']),
+    ), (
+        'template_empty_head_tail',
+        (r'`${name}`',
+         ['TEMPLATE_HEAD `${', 'ID name', 'TEMPLATE_TAIL }`']),
+    ), (
+        'template_nested',
+        (r'`${`${a * 2}`} ${b}`',
+         ['TEMPLATE_HEAD `${', 'TEMPLATE_HEAD `${', 'ID a', 'MULT *',
+          'NUMBER 2', 'TEMPLATE_TAIL }`', 'TEMPLATE_MIDDLE } ${', 'ID b',
+          'TEMPLATE_TAIL }`']),
+    ), (
+        'template_some_keywords',
+        (r'`this -> ${this}.`',
+         ['TEMPLATE_HEAD `this -> ${', 'THIS this', 'TEMPLATE_TAIL }.`']),
+    ), (
+        'template_literal_escape',
+        (r'`f\`o`',
+         [r'TEMPLATE_NOSUB `f\`o`']),
+    ), (
+        'template_literal_assignment',
+        ('s = `hello world`',
+         ['ID s', 'EQ =', 'TEMPLATE_NOSUB `hello world`']),
+    )
+]
+
+es2015_pos_cases = [
+    (
+        'single_line_template',
+        """
+        var foo = `bar`;
+        """, ([
+            'var 1:0', 'foo 1:4', '= 1:8', '`bar` 1:10', '; 1:15'
+        ], [
+            'var 1:1', 'foo 1:5', '= 1:9', '`bar` 1:11', '; 1:16',
+        ])
+    ), (
+        'multi_line',
+        """
+        var foo = `bar
+        ${1}`;
+        """, ([
+            'var 1:0', 'foo 1:4', '= 1:8', '`bar\n${ 1:10',
+            '1 2:17', '}` 2:18', '; 2:20',
+        ], [
+            'var 1:1', 'foo 1:5', '= 1:9', '`bar\n${ 1:11',
+            '1 2:3', '}` 2:4', '; 2:6',
+        ])
+    ), (
+        'multi_line_joined',
+        r"""
+        var foo = `bar\
+        ${1}`;
+        """, ([
+            'var 1:0', 'foo 1:4', '= 1:8', '`bar\\\n${ 1:10',
+            '1 2:18', '}` 2:19', '; 2:21',
+        ], [
+            'var 1:1', 'foo 1:5', '= 1:9', '`bar\n${ 1:11',
+            '1 2:3', '}` 2:4', '; 2:6',
+        ])
+    )
+]
+
 
 def run_lexer(value, lexer_cls):
     lexer = lexer_cls()

diff --git a/src/calmjs/parse/tests/test_es2015_lexer.py b/src/calmjs/parse/tests/test_es2015_lexer.py
@@ -0,0 +1,82 @@
+###############################################################################
+# encoding: utf-8
+#
+# Copyright (c) 2011 Ruslan Spivak
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+###############################################################################
+
+__author__ = 'Ruslan Spivak <ruslan.spivak@gmail.com>'
+
+import unittest
+from functools import partial
+
+from calmjs.parse.lexers.es2015 import Lexer
+from calmjs.parse.exceptions import ECMASyntaxError
+
+from calmjs.parse.testing.util import build_equality_testcase
+from calmjs.parse.tests.lexer import (
+    run_lexer,
+    run_lexer_pos,
+    es5_cases,
+    es5_pos_cases,
+    es2015_cases,
+    es2015_pos_cases,
+)
+
+
+class LexerFailureTestCase(unittest.TestCase):
+
+    def test_initial_template_character(self):
+        lexer = Lexer()
+        lexer.input('`')
+        with self.assertRaises(ECMASyntaxError) as e:
+            [token for token in lexer]
+        self.assertEqual(str(e.exception), "Illegal character '`' at 1:1")
+
+
+LexerKeywordTestCase = build_equality_testcase(
+    'LexerTestCase', partial(run_lexer, lexer_cls=Lexer), (
+        (label, data[0], data[1],) for label, data in [(
+            # Keywords
+            # ('break case ...', ['BREAK break', 'CASE case', ...])
+            'keywords_all',
+            (' '.join(kw.lower() for kw in Lexer.keywords),
+             ['%s %s' % (kw, kw.lower()) for kw in Lexer.keywords]
+             ),
+        )]
+    )
+)
+
+LexerES5TestCase = build_equality_testcase(
+    'LexerES5TestCase', partial(run_lexer, lexer_cls=Lexer), (
+        (label, data[0], data[1],) for label, data in es5_cases))
+
+LexerES5PosTestCase = build_equality_testcase(
+    'LexerES5PosTestCase', partial(
+        run_lexer_pos, lexer_cls=Lexer), es5_pos_cases)
+
+LexerES2015TestCase = build_equality_testcase(
+    'LexerES2015TestCase', partial(run_lexer, lexer_cls=Lexer), (
+        (label, data[0], data[1],) for label, data in es2015_cases))
+
+LexerES2015PosTestCase = build_equality_testcase(
+    'LexerES2015PosTestCase', partial(
+        run_lexer_pos, lexer_cls=Lexer), es2015_pos_cases)