Skip to content

Commit

Permalink
Merge 9fd68d7 into 9755795
Browse files Browse the repository at this point in the history
  • Loading branch information
metatoaster committed Apr 22, 2018
2 parents 9755795 + 9fd68d7 commit 874176c
Show file tree
Hide file tree
Showing 3 changed files with 245 additions and 0 deletions.
61 changes: 61 additions & 0 deletions src/calmjs/parse/lexers/es2015.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# -*- coding: utf-8 -*-
"""
ES2015 (ECMAScript 6th Edition/ES6) lexer.
"""

import re
import ply

from calmjs.parse.lexers.es5 import Lexer as ES5Lexer

template_token_types = (
(re.compile(r'`.*`', re.S),
'TEMPLATE_NOSUB'),
(re.compile(r'`.*\${', re.S),
'TEMPLATE_HEAD'),
(re.compile(r'}.*\${', re.S),
'TEMPLATE_MIDDLE'),
(re.compile(r'}.*`', re.S),
'TEMPLATE_TAIL'),
)


class Lexer(ES5Lexer):
"""
ES2015 lexer.
"""

# Punctuators (ES6)
# t_DOLLAR_LBRACE = r'${'
# this is also a right brace punctuator...
# t_RBRACE = r'}'
t_ARROW = r'=>'
t_SPREAD = r'\.\.\.'

tokens = ES5Lexer.tokens + (
# ES2015 punctuators
'ARROW', 'SPREAD', # => ...

# ES2015 terminal types
'TEMPLATE_NOSUB', 'TEMPLATE_HEAD', 'TEMPLATE_MIDDLE', 'TEMPLATE_TAIL',
)

template = r"""
(?:(?:`|}) # opening ` or }
(?: [^`\\] # no \, or `
| \\[a-zA-Z!-\/:-@\[-`{-~] # or escaped characters
| \\(\n|\r(?!\n)|\u2028|\u2029|\r\n) # or line continuation
| \\x[0-9a-fA-F]{2} # or hex_escape_sequence
| \\u[0-9a-fA-F]{4} # or unicode_escape_sequence
| \\(?:[1-7][0-7]{0,2}|[0-7]{2,3}) # or octal_escape_sequence
| \\0 # or <NUL> (15.10.2.11)
)*? # zero or many times
(?:`|\${)) # closing ` or ${
""" # `

@ply.lex.TOKEN(template)
def t_TEMPLATE_RAW(self, token):
for patt, token_type in template_token_types:
if patt.match(token.value):
token.type = token_type
return token
102 changes: 102 additions & 0 deletions src/calmjs/parse/tests/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,108 @@
)
]

es2015_cases = [
(
'const_keyword',
('const c',
['CONST const', 'ID c']),
), (
'punctuators',
('=> ...',
['ARROW =>', 'SPREAD ...']),
), (
'arrow_functions',
('const c = (name) => { return name; }',
['CONST const', 'ID c', 'EQ =', 'LPAREN (', 'ID name', 'RPAREN )',
'ARROW =>', 'LBRACE {', 'RETURN return', 'ID name', 'SEMI ;',
'RBRACE }']),
), (
'spread',
('[...spring, ...summer]',
['LBRACKET [', 'SPREAD ...', 'ID spring', 'COMMA ,', 'SPREAD ...',
'ID summer', 'RBRACKET ]']),
), (
'template_literal',
('`foo`',
['TEMPLATE_NOSUB `foo`']),
), (
'template_multiline',
('`foo\nbar\r\nfoo`',
['TEMPLATE_NOSUB `foo\nbar\r\nfoo`']),
), (
'template_other_newlines',
('`foo\u2028\u2029foo`',
['TEMPLATE_NOSUB `foo\u2028\u2029foo`']),
), (
'template_literal_with_dollar',
('`foo$`',
['TEMPLATE_NOSUB `foo$`']),
), (
'template_head_tail',
(r'`hello ${name} while this`',
['TEMPLATE_HEAD `hello ${', 'ID name', 'TEMPLATE_TAIL } while this`']),
), (
'template_empty_head_tail',
(r'`${name}`',
['TEMPLATE_HEAD `${', 'ID name', 'TEMPLATE_TAIL }`']),
), (
'template_nested',
(r'`${`${a * 2}`} ${b}`',
['TEMPLATE_HEAD `${', 'TEMPLATE_HEAD `${', 'ID a', 'MULT *',
'NUMBER 2', 'TEMPLATE_TAIL }`', 'TEMPLATE_MIDDLE } ${', 'ID b',
'TEMPLATE_TAIL }`']),
), (
'template_some_keywords',
(r'`this -> ${this}.`',
['TEMPLATE_HEAD `this -> ${', 'THIS this', 'TEMPLATE_TAIL }.`']),
), (
'template_literal_escape',
(r'`f\`o`',
[r'TEMPLATE_NOSUB `f\`o`']),
), (
'template_literal_assignment',
('s = `hello world`',
['ID s', 'EQ =', 'TEMPLATE_NOSUB `hello world`']),
)
]

es2015_pos_cases = [
(
'single_line_template',
"""
var foo = `bar`;
""", ([
'var 1:0', 'foo 1:4', '= 1:8', '`bar` 1:10', '; 1:15'
], [
'var 1:1', 'foo 1:5', '= 1:9', '`bar` 1:11', '; 1:16',
])
), (
'multi_line',
"""
var foo = `bar
${1}`;
""", ([
'var 1:0', 'foo 1:4', '= 1:8', '`bar\n${ 1:10',
'1 2:17', '}` 2:18', '; 2:20',
], [
'var 1:1', 'foo 1:5', '= 1:9', '`bar\n${ 1:11',
'1 2:3', '}` 2:4', '; 2:6',
])
), (
'multi_line_joined',
r"""
var foo = `bar\
${1}`;
""", ([
'var 1:0', 'foo 1:4', '= 1:8', '`bar\\\n${ 1:10',
'1 2:18', '}` 2:19', '; 2:21',
], [
'var 1:1', 'foo 1:5', '= 1:9', '`bar\n${ 1:11',
'1 2:3', '}` 2:4', '; 2:6',
])
)
]


def run_lexer(value, lexer_cls):
lexer = lexer_cls()
Expand Down
82 changes: 82 additions & 0 deletions src/calmjs/parse/tests/test_es2015_lexer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
###############################################################################
# encoding: utf-8
#
# Copyright (c) 2011 Ruslan Spivak
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
###############################################################################

__author__ = 'Ruslan Spivak <ruslan.spivak@gmail.com>'

import unittest
from functools import partial

from calmjs.parse.lexers.es2015 import Lexer
from calmjs.parse.exceptions import ECMASyntaxError

from calmjs.parse.testing.util import build_equality_testcase
from calmjs.parse.tests.lexer import (
run_lexer,
run_lexer_pos,
es5_cases,
es5_pos_cases,
es2015_cases,
es2015_pos_cases,
)


class LexerFailureTestCase(unittest.TestCase):

def test_initial_template_character(self):
lexer = Lexer()
lexer.input('`')
with self.assertRaises(ECMASyntaxError) as e:
[token for token in lexer]
self.assertEqual(str(e.exception), "Illegal character '`' at 1:1")


LexerKeywordTestCase = build_equality_testcase(
'LexerTestCase', partial(run_lexer, lexer_cls=Lexer), (
(label, data[0], data[1],) for label, data in [(
# Keywords
# ('break case ...', ['BREAK break', 'CASE case', ...])
'keywords_all',
(' '.join(kw.lower() for kw in Lexer.keywords),
['%s %s' % (kw, kw.lower()) for kw in Lexer.keywords]
),
)]
)
)

LexerES5TestCase = build_equality_testcase(
'LexerES5TestCase', partial(run_lexer, lexer_cls=Lexer), (
(label, data[0], data[1],) for label, data in es5_cases))

LexerES5PosTestCase = build_equality_testcase(
'LexerES5PosTestCase', partial(
run_lexer_pos, lexer_cls=Lexer), es5_pos_cases)

LexerES2015TestCase = build_equality_testcase(
'LexerES2015TestCase', partial(run_lexer, lexer_cls=Lexer), (
(label, data[0], data[1],) for label, data in es2015_cases))

LexerES2015PosTestCase = build_equality_testcase(
'LexerES2015PosTestCase', partial(
run_lexer_pos, lexer_cls=Lexer), es2015_pos_cases)

0 comments on commit 874176c

Please sign in to comment.