Skip to content

Commit

Permalink
Merge branch 'rowcol' into 0.9.x-rowcol
Browse files Browse the repository at this point in the history
- bring in (mostly) correct line/column number reporting
  • Loading branch information
metatoaster committed Jun 13, 2017
2 parents e988052 + a6c5eaf commit 124ab8a
Show file tree
Hide file tree
Showing 10 changed files with 1,320 additions and 766 deletions.
19 changes: 19 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,25 @@
Changelog
=========

0.9.1 - Unreleased
------------------

- Corrected the line number reporting for the lexer, and correct the
propagation of that to the parser and the Node subclasses. Fixes the
incorrect implementation added by `moses-palmer/slimit@8f9a39c7769
<https://github.com/moses-palmer/slimit/commit/8f9a39c7769>`_ (where
the line numbers are tabulated incorrectly when comments are present,
and also the yacc tracking added by `moses-palmer/slimit@6aa92d68e0
<https://github.com/moses-palmer/slimit/commit/6aa92d68e0>`_ (where
the custom lexer class does not provide the position attributes
required by ply).
- Implemented bookkeeping of column number.
- The repr form of Node now shows the line/col number info by default;
the visit method of the ReprVisitor class have not been changed, only
the invocation of it via the callable form has as that is the call
target for __repr__. This is a good time to mention that named
methods afford the most control for usage as documented already.

0.9.0 - 2017-06-09
------------------

Expand Down
39 changes: 29 additions & 10 deletions src/calmjs/parse/asttypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,34 @@


class Node(object):
def __init__(self, children=None, p=None):
def __init__(self, children=None):
self._children_list = [] if children is None else children
self.setpos(p)

def setpos(self, p):
self.lexpos = None if p is None else p.lexpos(1)
self.lineno = None if p is None else p.lineno(1)
# print 'setpos', self, p, self.lexpos, self.lineno
self.lexpos = self.lineno = self.colno = None

def setpos(self, p, idx=1):
self.lexpos = p.lexpos(idx)
self.lineno = p.lineno(idx)
# YaccProduction does not provide helpers for colno, so query
# for a helper out of class and see if it helps...
self.colno = (
p.lexer.lookup_colno(self.lineno, self.lexpos) if callable(
getattr(p.lexer, 'lookup_colno', None)) else 0
)

# the very ugly debugger invocation for locating the special
# cases that are required

# if not self.lexpos and not self.lineno:
# print('setpos', self.__class__.__name__, p.stack,
# self.lexpos, self.lineno, self.colno)
# # uncomment when yacc_tracking is True
# # import pdb;pdb.set_trace()
# # uncomment when yacc_tracking is False
# # import sys
# # from traceback import extract_stack
# # _src = extract_stack(sys._getframe(1), 1)[0].line
# # if '# require yacc_tracking' not in _src:
# # import pdb;pdb.set_trace()

def __iter__(self):
for child in self.children():
Expand Down Expand Up @@ -175,11 +195,10 @@ class VarStatement(Node):


class VarDecl(Node):
def __init__(self, identifier, initializer=None, p=None):
def __init__(self, identifier, initializer=None):
self.identifier = identifier
self.identifier._mangle_candidate = True
self.initializer = initializer
self.setpos(p)

def children(self):
return [self.identifier, self.initializer]
Expand Down Expand Up @@ -368,7 +387,7 @@ def __init__(self, elements):
self.elements = elements

def children(self):
return self.elements
return [self.elements]


class Debugger(Node):
Expand Down
76 changes: 66 additions & 10 deletions src/calmjs/parse/lexers/es5.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
COMBINING_MARK,
CONNECTOR_PUNCTUATION,
)
from calmjs.parse.utils import format_lex_token

# See "Regular Expression Literals" at
# http://www.mozilla.org/js/language/js20-2002-04/rationale/syntax.html
Expand Down Expand Up @@ -109,20 +110,43 @@ class Lexer(object):
http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-262.pdf
"""
def __init__(self):
self.lexer = None
self.prev_token = None
self.cur_token = None
self.cur_token_real = None
self.next_tokens = []
self.token_stack = [[None, []]]
self.newline_idx = [0]
self.build()

@property
def lineno(self):
return self.lexer.lineno if self.lexer else 0

@property
def lexpos(self):
return self.lexer.lexpos if self.lexer else 0

@property
def last_newline_lexpos(self):
return self.newline_idx[-1]

def build(self, **kwargs):
"""Build the lexer."""
self.lexer = ply.lex.lex(object=self, **kwargs)

def input(self, text):
self.lexer.input(text)

def _set_pos(self, token):
lines = token.value.splitlines(True)
lexpos = token.lexpos
for line in lines:
if line[-1:] in '\r\n':
lexpos += len(line)
self.lexer.lineno += 1
self.newline_idx.append(lexpos)

def token(self):
if self.next_tokens:
return self.next_tokens.pop()
Expand All @@ -139,15 +163,15 @@ def token(self):
except IndexError:
tok = self._get_update_token()
if tok is not None and tok.type == 'LINE_TERMINATOR':
lexer.lineno += len(tok.value.splitlines())
self._set_pos(tok)
continue
else:
return tok

if char != '/' or (char == '/' and next_char in ('/', '*')):
tok = self._get_update_token()
if tok.type in DIVISION_SYNTAX_MARKERS:
lexer.lineno += len(tok.value.splitlines())
self._set_pos(tok)
continue
else:
return tok
Expand Down Expand Up @@ -227,8 +251,11 @@ def _get_update_token(self):
# TODO actually give up earlier than this with the first
# mismatch.
raise ECMASyntaxError(
"Mismatched '%s' at line %d" % (
self.cur_token.value, self.cur_token.lineno)
"Mismatched '%s' at %d:%d" % (
self.cur_token.value,
self.cur_token.lineno,
self._get_colno(self.cur_token),
)
)

# insert semicolon before restricted tokens
Expand All @@ -240,18 +267,44 @@ def _get_update_token(self):
'RETURN', 'THROW']):
return self._create_semi_token(self.cur_token)

return self.cur_token
return self._set_colno(self.cur_token)

def _set_colno(self, token):
if token:
token.colno = self._get_colno(token)
return token

def _get_colno(self, token):
# have a 1 offset to map nicer to commonly used/configured
# text editors.
return token.lexpos - self.last_newline_lexpos + 1

def lookup_colno(self, lineno, lexpos):
"""
Look up a colno from the lineno and lexpos.
"""

# have a 1 offset to map nicer to commonly used/configured
# text editors.
return lexpos - self.newline_idx[lineno - 1] + 1

def _create_semi_token(self, orig_token):
token = ply.lex.LexToken()
token.type = 'SEMI'
token.value = ';'
if orig_token is not None:
token.lineno = orig_token.lineno
# TODO figure out whether/how to normalize this with the
# actual length of the original token...
# Though, if actual use case boils down to error reporting,
# line number is sufficient, and leaving it as 0 means it
# shouldn't get dealt with during source map generation.
token.colno = 0
token.lexpos = orig_token.lexpos
else:
token.lineno = 0
token.lexpos = 0
token.colno = 0
return token

# iterator protocol
Expand Down Expand Up @@ -350,8 +403,8 @@ def next(self):

def t_regex_error(self, token):
raise ECMARegexSyntaxError(
"Error parsing regular expression '%s' at %s" % (
token.value, token.lineno)
"Error parsing regular expression '%s' at %s:%s" % (
token.value, token.lineno, self._get_colno(token))
)

# Punctuators
Expand Down Expand Up @@ -517,6 +570,9 @@ def t_ID(self, token):
return token

def t_error(self, token):
# TODO figure out how to report column instead of lexpos.
raise ECMASyntaxError('Illegal character %r at %s:%s after %s' % (
token.value[0], token.lineno, token.lexpos, self.cur_token))
raise ECMASyntaxError(
'Illegal character %r at %s:%s after %s' % (
token.value[0], token.lineno, self._get_colno(token),
format_lex_token(self.cur_token),
)
)

0 comments on commit 124ab8a

Please sign in to comment.