New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Revise comma handling on templates #2213
Changes from all commits
518c6b8
657a711
3e82007
c5da629
550206a
0eb0353
f0a14bf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -311,33 +311,51 @@ class Parser(object): | |
replaced with a real, accepted parsing technique (PEG, parser | ||
generator, etc.). | ||
""" | ||
def __init__(self, string): | ||
def __init__(self, string, in_argument=False): | ||
""" Create a new parser. | ||
:param in_arguments: boolean that indicates the parser is to be | ||
used for parsing function arguments, ie. considering commas | ||
(`ARG_SEP`) a special character | ||
""" | ||
self.string = string | ||
self.in_argument = in_argument | ||
self.pos = 0 | ||
self.parts = [] | ||
|
||
# Common parsing resources. | ||
special_chars = (SYMBOL_DELIM, FUNC_DELIM, GROUP_OPEN, GROUP_CLOSE, | ||
ARG_SEP, ESCAPE_CHAR) | ||
ESCAPE_CHAR) | ||
special_char_re = re.compile(r'[%s]|$' % | ||
u''.join(re.escape(c) for c in special_chars)) | ||
escapable_chars = (SYMBOL_DELIM, FUNC_DELIM, GROUP_CLOSE, ARG_SEP) | ||
terminator_chars = (GROUP_CLOSE,) | ||
|
||
def parse_expression(self): | ||
"""Parse a template expression starting at ``pos``. Resulting | ||
components (Unicode strings, Symbols, and Calls) are added to | ||
the ``parts`` field, a list. The ``pos`` field is updated to be | ||
the next character after the expression. | ||
""" | ||
# Append comma (ARG_SEP) to the list of special characters only when | ||
# parsing function arguments. | ||
extra_special_chars = () | ||
special_char_re = self.special_char_re | ||
if self.in_argument: | ||
extra_special_chars = (ARG_SEP,) | ||
special_char_re = re.compile( | ||
r'[%s]|$' % u''.join(re.escape(c) for c in | ||
self.special_chars + extra_special_chars)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Take 2, using an def __init__(self, string, in_argument=False):
...
if in_argument:
self.special_chars = ...
self.special_char_re = ... It might be quite a minor concern, as the Parsers that are used for list arguments only get a single call to parse_expression (and none to the other methods) in practice, so I'd be up for another refactoring if you think the trade-off makes sense. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes; this looks great! The small bit of extra cruft is a little bit annoying, but I agree this is the right direction. It's preferable, as you note, to overriding those class variables. |
||
|
||
text_parts = [] | ||
|
||
while self.pos < len(self.string): | ||
char = self.string[self.pos] | ||
|
||
if char not in self.special_chars: | ||
if char not in self.special_chars + extra_special_chars: | ||
# A non-special character. Skip to the next special | ||
# character, treating the interstice as literal text. | ||
next_pos = ( | ||
self.special_char_re.search( | ||
special_char_re.search( | ||
self.string[self.pos:]).start() + self.pos | ||
) | ||
text_parts.append(self.string[self.pos:next_pos]) | ||
|
@@ -348,14 +366,14 @@ def parse_expression(self): | |
# The last character can never begin a structure, so we | ||
# just interpret it as a literal character (unless it | ||
# terminates the expression, as with , and }). | ||
if char not in (GROUP_CLOSE, ARG_SEP): | ||
if char not in self.terminator_chars + extra_special_chars: | ||
text_parts.append(char) | ||
self.pos += 1 | ||
break | ||
|
||
next_char = self.string[self.pos + 1] | ||
if char == ESCAPE_CHAR and next_char in \ | ||
(SYMBOL_DELIM, FUNC_DELIM, GROUP_CLOSE, ARG_SEP): | ||
if char == ESCAPE_CHAR and next_char in (self.escapable_chars + | ||
extra_special_chars): | ||
# An escaped special character ($$, $}, etc.). Note that | ||
# ${ is not an escape sequence: this is ambiguous with | ||
# the start of a symbol and it's not necessary (just | ||
|
@@ -375,7 +393,7 @@ def parse_expression(self): | |
elif char == FUNC_DELIM: | ||
# Parse a function call. | ||
self.parse_call() | ||
elif char in (GROUP_CLOSE, ARG_SEP): | ||
elif char in self.terminator_chars + extra_special_chars: | ||
# Template terminated. | ||
break | ||
elif char == GROUP_OPEN: | ||
|
@@ -483,7 +501,7 @@ def parse_argument_list(self): | |
expressions = [] | ||
|
||
while self.pos < len(self.string): | ||
subparser = Parser(self.string[self.pos:]) | ||
subparser = Parser(self.string[self.pos:], in_argument=True) | ||
subparser.parse_expression() | ||
|
||
# Extract and advance past the parsed expression. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I left
ARG_SEP
deliberately in the list of escapable chars in order to allow both escaped and unescaped commands outside a function (-f 'foo, $bar'
would work exactly the same as-f 'foo$, $bar'
).I'm undecided about it, though: on one hand, it keeps the syntax backwards compatible-ish, but on the other hand introduces some ambiguity.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, I agree here—I think it's more predictable to make $, work the same way—as an escape—both inside and outside of function arguments.