Skip to content

Commit

Permalink
Added optional support for defaulted states
Browse files Browse the repository at this point in the history
  • Loading branch information
dabeaz committed Apr 21, 2015
1 parent aa8f06c commit af65167
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 63 deletions.
29 changes: 29 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
@@ -1,5 +1,34 @@
Version 3.5
---------------------
04/21/15: beazley
Added optional support for defaulted_states in the parser.
A defaulted_state is a state where the only legal action is
a reduction of a single grammar rule across all valid input
tokens. For such states, the rule is reduced and the
reading of the next lookahead token is delayed until it is
actually needed at a later point in time.

This delay in consuming the next lookahead token is a potentially
important feature in advanced parsing applications that require tight
interaction between the lexer and the parser. For example, a grammar
rule change modify the lexer state upon reduction and have such changes
take effect before the next input token is read.

One potential danger of defaulted_states is that syntax errors might
be deferred to a a later point of processing than where they were detected
in the past. Thus, it's possible that your error handling could change
slightly over previous versions of PLY.

In order to enable defaulted_state support. You need to activate it
explicitly. For example:

parser = yacc.yacc()
parser.use_defaulted_states()

04/21/15: beazley
Fixed debug logging in the parser. It wasn't properly reporting goto states
on grammar rule reductions.

04/20/15: beazley
Added actions to be defined to character literals (Issue #32). For example:

Expand Down
2 changes: 1 addition & 1 deletion ply/lex.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def __call__(self, *args, **kwargs):
class Lexer:
def __init__(self):
self.lexre = None # Master regular expression. This is a list of
# tuples (re,findex) where re is a compiled
# tuples (re, findex) where re is a compiled
# regular expression and findex is a list
# mapping regex group numbers to rules
self.lexretext = None # Current regular expression strings
Expand Down
157 changes: 95 additions & 62 deletions ply/yacc.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ def __init__(self, lrtab, errorf):
self.action = lrtab.lr_action
self.goto = lrtab.lr_goto
self.errorfunc = errorf
self.defaulted_states = { }

def errok(self):
self.errorok = True
Expand All @@ -300,6 +301,20 @@ def restart(self):
self.symstack.append(sym)
self.statestack.append(0)

# Defaulted state support (Experimental)
# This method identifies parser states where there is only one possible reduction action.
# For such states, the parser can make a choose to make a rule reduction without consuming
# the next look-ahead token. This delayed invocation of the tokenizer can be useful in
# certain kinds of advanced parsing situations where the lexer and parser interact with
# each other or change states (i.e., manipulation of scope, lexer states, etc.).
#
# See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions
def use_defaulted_states(self):
for state, actions in self.action.items():
rules = list(actions.values())
if rules and rules[0] < 0 and all(rules[0] == rule for rule in rules):
self.defaulted_states[state] = rules[0]

def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
if debug or yaccdevel:
if isinstance(debug, int):
Expand Down Expand Up @@ -327,13 +342,14 @@ def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=N

def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
#--! parsedebug-start
lookahead = None # Current lookahead symbol
lookaheadstack = [] # Stack of lookahead symbols
actions = self.action # Local reference to action table (to avoid lookup on self.)
goto = self.goto # Local reference to goto table (to avoid lookup on self.)
prod = self.productions # Local reference to production list (to avoid lookup on self.)
pslice = YaccProduction(None) # Production object passed to grammar rules
errorcount = 0 # Used during error recovery
lookahead = None # Current lookahead symbol
lookaheadstack = [] # Stack of lookahead symbols
actions = self.action # Local reference to action table (to avoid lookup on self.)
goto = self.goto # Local reference to goto table (to avoid lookup on self.)
prod = self.productions # Local reference to production list (to avoid lookup on self.)
defaulted_states = self.defaulted_states # Local reference to defaulted states
pslice = YaccProduction(None) # Production object passed to grammar rules
errorcount = 0 # Used during error recovery

#--! DEBUG
debug.info('PLY: PARSE DEBUG START')
Expand Down Expand Up @@ -388,24 +404,30 @@ def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenf
debug.debug('State : %s', state)
#--! DEBUG

if not lookahead:
if not lookaheadstack:
lookahead = get_token() # Get the next token
else:
lookahead = lookaheadstack.pop()
if state not in defaulted_states:
if not lookahead:
lookahead = YaccSymbol()
lookahead.type = '$end'
if not lookaheadstack:
lookahead = get_token() # Get the next token
else:
lookahead = lookaheadstack.pop()
if not lookahead:
lookahead = YaccSymbol()
lookahead.type = '$end'

# Check the action table
ltype = lookahead.type
t = actions[state].get(ltype)
else:
t = defaulted_states[state]
#--! DEBUG
debug.debug('Defaulted state %s: Reduce using %d', state, -t)
#--! DEBUG

#--! DEBUG
debug.debug('Stack : %s',
('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
#--! DEBUG

# Check the action table
ltype = lookahead.type
t = actions[state].get(ltype)

if t is not None:
if t > 0:
# shift a symbol on the stack
Expand Down Expand Up @@ -437,10 +459,12 @@ def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenf

#--! DEBUG
if plen:
debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str,
'['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']', -t)
debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str,
'['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']',
goto[statestack[-1-plen]][pname])
else:
debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [], -t)
debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [],
goto[statestack[-1]][pname])

#--! DEBUG

Expand Down Expand Up @@ -652,13 +676,14 @@ def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenf

def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
#--! parseopt-start
lookahead = None # Current lookahead symbol
lookaheadstack = [] # Stack of lookahead symbols
actions = self.action # Local reference to action table (to avoid lookup on self.)
goto = self.goto # Local reference to goto table (to avoid lookup on self.)
prod = self.productions # Local reference to production list (to avoid lookup on self.)
pslice = YaccProduction(None) # Production object passed to grammar rules
errorcount = 0 # Used during error recovery
lookahead = None # Current lookahead symbol
lookaheadstack = [] # Stack of lookahead symbols
actions = self.action # Local reference to action table (to avoid lookup on self.)
goto = self.goto # Local reference to goto table (to avoid lookup on self.)
prod = self.productions # Local reference to production list (to avoid lookup on self.)
defaulted_states = self.defaulted_states # Local reference to defaulted states
pslice = YaccProduction(None) # Production object passed to grammar rules
errorcount = 0 # Used during error recovery


# If no lexer was given, we will try to use the lex module
Expand Down Expand Up @@ -706,19 +731,22 @@ def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfun
# the next token off of the lookaheadstack or from the lexer


if not lookahead:
if not lookaheadstack:
lookahead = get_token() # Get the next token
else:
lookahead = lookaheadstack.pop()
if state not in defaulted_states:
if not lookahead:
lookahead = YaccSymbol()
lookahead.type = '$end'

if not lookaheadstack:
lookahead = get_token() # Get the next token
else:
lookahead = lookaheadstack.pop()
if not lookahead:
lookahead = YaccSymbol()
lookahead.type = '$end'

# Check the action table
ltype = lookahead.type
t = actions[state].get(ltype)
else:
t = defaulted_states[state]

# Check the action table
ltype = lookahead.type
t = actions[state].get(ltype)

if t is not None:
if t > 0:
Expand Down Expand Up @@ -941,13 +969,14 @@ def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfun

def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
#--! parseopt-notrack-start
lookahead = None # Current lookahead symbol
lookaheadstack = [] # Stack of lookahead symbols
actions = self.action # Local reference to action table (to avoid lookup on self.)
goto = self.goto # Local reference to goto table (to avoid lookup on self.)
prod = self.productions # Local reference to production list (to avoid lookup on self.)
pslice = YaccProduction(None) # Production object passed to grammar rules
errorcount = 0 # Used during error recovery
lookahead = None # Current lookahead symbol
lookaheadstack = [] # Stack of lookahead symbols
actions = self.action # Local reference to action table (to avoid lookup on self.)
goto = self.goto # Local reference to goto table (to avoid lookup on self.)
prod = self.productions # Local reference to production list (to avoid lookup on self.)
defaulted_states = self.defaulted_states # Local reference to defaulted states
pslice = YaccProduction(None) # Production object passed to grammar rules
errorcount = 0 # Used during error recovery


# If no lexer was given, we will try to use the lex module
Expand Down Expand Up @@ -995,19 +1024,22 @@ def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False,
# the next token off of the lookaheadstack or from the lexer


if not lookahead:
if not lookaheadstack:
lookahead = get_token() # Get the next token
else:
lookahead = lookaheadstack.pop()
if state not in defaulted_states:
if not lookahead:
lookahead = YaccSymbol()
lookahead.type = '$end'

if not lookaheadstack:
lookahead = get_token() # Get the next token
else:
lookahead = lookaheadstack.pop()
if not lookahead:
lookahead = YaccSymbol()
lookahead.type = '$end'

# Check the action table
ltype = lookahead.type
t = actions[state].get(ltype)
else:
t = defaulted_states[state]

# Check the action table
ltype = lookahead.type
t = actions[state].get(ltype)

if t is not None:
if t > 0:
Expand Down Expand Up @@ -1495,7 +1527,7 @@ def add_production(self, prodname, syms, func=None, file='', line=0):
try:
c = eval(s)
if (len(c) > 1):
raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' %
raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' %
(file, line, s, prodname))
if c not in self.Terminals:
self.Terminals[c] = []
Expand All @@ -1511,7 +1543,7 @@ def add_production(self, prodname, syms, func=None, file='', line=0):
if syms[-1] == '%prec':
raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line))
if syms[-2] != '%prec':
raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' %
raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' %
(file, line))
precname = syms[-1]
prodprec = self.Precedence.get(precname)
Expand Down Expand Up @@ -1962,6 +1994,7 @@ def bind_callables(self, pdict):
for p in self.lr_productions:
p.bind(pdict)


# -----------------------------------------------------------------------------
# === LR Generator ===
#
Expand Down Expand Up @@ -2550,7 +2583,7 @@ def lr_parse_table(self):
else:
chosenp, rejectp = oldp, pp
self.rr_conflicts.append((st, chosenp, rejectp))
log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)',
log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)',
a, st_actionp[a].number, st_actionp[a])
else:
raise LALRError('Unknown conflict in state %d' % st)
Expand Down Expand Up @@ -2943,7 +2976,7 @@ def validate_modules(self):
counthash[name] = linen
else:
filename = inspect.getsourcefile(module)
self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d',
self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d',
filename, linen, name, prev)

# Get the start symbol
Expand Down Expand Up @@ -3089,7 +3122,7 @@ def validate_pfunctions(self):
self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__)
self.error = True
elif not func.__doc__:
self.log.warning('%s:%d: No documentation string specified in function %r (ignored)',
self.log.warning('%s:%d: No documentation string specified in function %r (ignored)',
file, line, func.__name__)
else:
try:
Expand Down

0 comments on commit af65167

Please sign in to comment.