From 3c0603d38ef46a4b7dc175420276abab35386c2f Mon Sep 17 00:00:00 2001 From: Loren Gordon Date: Mon, 3 Apr 2017 09:42:48 -0400 Subject: [PATCH] Updates vendored PLY library to v3.10 (#184) Fixes #175 --- pycparser/ply/LICENSE | 4 +-- pycparser/ply/cpp.py | 61 ++++++++++++++++++------------------------- pycparser/ply/lex.py | 20 +++++++------- pycparser/ply/yacc.py | 41 +++++++++++++++-------------- 4 files changed, 60 insertions(+), 66 deletions(-) diff --git a/pycparser/ply/LICENSE b/pycparser/ply/LICENSE index 21d5c35e..bac0d9a5 100644 --- a/pycparser/ply/LICENSE +++ b/pycparser/ply/LICENSE @@ -1,6 +1,6 @@ -PLY (Python Lex-Yacc) Version 3.9 +PLY (Python Lex-Yacc) Version 3.10 -Copyright (C) 2001-2016, +Copyright (C) 2001-2017 David M. Beazley (Dabeaz LLC) All rights reserved. diff --git a/pycparser/ply/cpp.py b/pycparser/ply/cpp.py index 7288ddd6..bd287218 100644 --- a/pycparser/ply/cpp.py +++ b/pycparser/ply/cpp.py @@ -3,9 +3,10 @@ # # Author: David Beazley (http://www.dabeaz.com) # Eli Bendersky [http://eli.thegreenplace.net] +# Copyright (C) 2017 # All rights reserved # -# This module implements an ANSI-C style lexical preprocessor for PLY. +# This module implements an ANSI-C style lexical preprocessor for PLY. # ----------------------------------------------------------------------------- from __future__ import generators @@ -77,7 +78,8 @@ def t_CPP_COMMENT2(t): r'(//.*?(\n|$))' # replace with '/n' t.type = 'CPP_WS'; t.value = '\n' - + return t + def t_error(t): t.type = t.value[0] t.value = t.value[0] @@ -91,8 +93,8 @@ def t_error(t): # ----------------------------------------------------------------------------- # trigraph() -# -# Given an input string, this function replaces all trigraph sequences. +# +# Given an input string, this function replaces all trigraph sequences. # The following mapping is used: # # ??= # @@ -262,7 +264,7 @@ def lexprobe(self): # ---------------------------------------------------------------------- # add_path() # - # Adds a search path to the preprocessor. + # Adds a search path to the preprocessor. # ---------------------------------------------------------------------- def add_path(self,path): @@ -306,7 +308,7 @@ def group_lines(self,input): # ---------------------------------------------------------------------- # tokenstrip() - # + # # Remove leading/trailing whitespace tokens from a token list # ---------------------------------------------------------------------- @@ -332,7 +334,7 @@ def tokenstrip(self,tokens): # argument. Each argument is represented by a list of tokens. # # When collecting arguments, leading and trailing whitespace is removed - # from each argument. + # from each argument. # # This function properly handles nested parenthesis and commas---these do not # define new arguments. @@ -344,7 +346,7 @@ def collect_args(self,tokenlist): current_arg = [] nesting = 1 tokenlen = len(tokenlist) - + # Search for the opening '('. i = 0 while (i < tokenlen) and (tokenlist[i].type in self.t_WS): @@ -378,7 +380,7 @@ def collect_args(self,tokenlist): else: current_arg.append(t) i += 1 - + # Missing end argument self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments") return 0, [],[] @@ -390,9 +392,9 @@ def collect_args(self,tokenlist): # This is used to speed up macro expansion later on---we'll know # right away where to apply patches to the value to form the expansion # ---------------------------------------------------------------------- - + def macro_prescan(self,macro): - macro.patch = [] # Standard macro arguments + macro.patch = [] # Standard macro arguments macro.str_patch = [] # String conversion expansion macro.var_comma_patch = [] # Variadic macro comma patch i = 0 @@ -439,7 +441,7 @@ def macro_expand_args(self,macro,args): rep = [copy.copy(_x) for _x in macro.value] # Make string expansion patches. These do not alter the length of the replacement sequence - + str_expansion = {} for argnum, i in macro.str_patch: if argnum not in str_expansion: @@ -457,7 +459,7 @@ def macro_expand_args(self,macro,args): # Make all other patches. The order of these matters. It is assumed that the patch list # has been sorted in reverse order of patch location since replacements will cause the # size of the replacement sequence to expand from the patch point. - + expanded = { } for ptype, argnum, i in macro.patch: # Concatenation. Argument is left unexpanded @@ -494,7 +496,7 @@ def expand_macros(self,tokens,expanded=None): if t.value in self.macros and t.value not in expanded: # Yes, we found a macro match expanded[t.value] = True - + m = self.macros[t.value] if not m.arglist: # A simple macro @@ -526,7 +528,7 @@ def expand_macros(self,tokens,expanded=None): else: args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1] del args[len(m.arglist):] - + # Get macro replacement text rep = self.macro_expand_args(m,args) rep = self.expand_macros(rep,expanded) @@ -539,13 +541,13 @@ def expand_macros(self,tokens,expanded=None): elif t.value == '__LINE__': t.type = self.t_INTEGER t.value = self.t_INTEGER_TYPE(t.lineno) - + i += 1 return tokens - # ---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # evalexpr() - # + # # Evaluate an expression token sequence for the purposes of evaluating # integral expressions. # ---------------------------------------------------------------------- @@ -592,7 +594,7 @@ def evalexpr(self,tokens): tokens[i].value = str(tokens[i].value) while tokens[i].value[-1] not in "0123456789abcdefABCDEF": tokens[i].value = tokens[i].value[:-1] - + expr = "".join([str(x.value) for x in tokens]) expr = expr.replace("&&"," and ") expr = expr.replace("||"," or ") @@ -617,7 +619,7 @@ def parsegen(self,input,source=None): if not source: source = "" - + self.define("__FILE__ \"%s\"" % source) self.source = source @@ -636,7 +638,7 @@ def parsegen(self,input,source=None): for tok in x: if tok.type in self.t_WS and '\n' in tok.value: chunk.append(tok) - + dirtokens = self.tokenstrip(x[i+1:]) if dirtokens: name = dirtokens[0].value @@ -644,7 +646,7 @@ def parsegen(self,input,source=None): else: name = "" args = [] - + if name == 'define': if enable: for tok in self.expand_macros(chunk): @@ -704,7 +706,7 @@ def parsegen(self,input,source=None): iftrigger = True else: self.error(self.source,dirtokens[0].lineno,"Misplaced #elif") - + elif name == 'else': if ifstack: if ifstack[-1][0]: @@ -874,7 +876,7 @@ def undef(self,tokens): def parse(self,input,source=None,ignore={}): self.ignore = ignore self.parser = self.parsegen(input,source) - + # ---------------------------------------------------------------------- # token() # @@ -904,14 +906,3 @@ def token(self): tok = p.token() if not tok: break print(p.source, tok) - - - - - - - - - - - diff --git a/pycparser/ply/lex.py b/pycparser/ply/lex.py index c2d49cc9..bb291284 100644 --- a/pycparser/ply/lex.py +++ b/pycparser/ply/lex.py @@ -1,6 +1,7 @@ # ----------------------------------------------------------------------------- # ply: lex.py # +# Copyright (C) 2001-2017 # Eli Bendersky [http://eli.thegreenplace.net] # David M. Beazley (Dabeaz LLC) # All rights reserved. @@ -31,8 +32,8 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ----------------------------------------------------------------------------- -__version__ = '3.9' -__tabversion__ = '3.8' +__version__ = '3.10' +__tabversion__ = '3.10' import re import sys @@ -184,7 +185,7 @@ def writetab(self, lextab, outputdir=''): tf.write('_lexliterals = %s\n' % repr(self.lexliterals)) tf.write('_lexstateinfo = %s\n' % repr(self.lexstateinfo)) - # Rewrite the lexstatere table, replacing function objects with function names + # Rewrite the lexstatere table, replacing function objects with function names tabre = {} for statename, lre in self.lexstatere.items(): titem = [] @@ -230,7 +231,7 @@ def readtab(self, tabfile, fdict): titem = [] txtitem = [] for pat, func_name in lre: - titem.append((re.compile(pat, lextab._lexreflags | re.VERBOSE), _names_to_funcs(func_name, fdict))) + titem.append((re.compile(pat, lextab._lexreflags), _names_to_funcs(func_name, fdict))) self.lexstatere[statename] = titem self.lexstateretext[statename] = txtitem @@ -495,7 +496,7 @@ def _form_master_re(relist, reflags, ldict, toknames): return [] regex = '|'.join(relist) try: - lexre = re.compile(regex, re.VERBOSE | reflags) + lexre = re.compile(regex, reflags) # Build the index to function map for the matching engine lexindexfunc = [None] * (max(lexre.groupindex.values()) + 1) @@ -536,7 +537,7 @@ def _statetoken(s, names): for i, part in enumerate(parts[1:], 1): if part not in names and part != 'ANY': break - + if i > 1: states = tuple(parts[1:i]) else: @@ -758,7 +759,7 @@ def validate_rules(self): continue try: - c = re.compile('(?P<%s>%s)' % (fname, _get_regex(f)), re.VERBOSE | self.reflags) + c = re.compile('(?P<%s>%s)' % (fname, _get_regex(f)), self.reflags) if c.match(''): self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file, line, f.__name__) self.error = True @@ -782,7 +783,7 @@ def validate_rules(self): continue try: - c = re.compile('(?P<%s>%s)' % (name, r), re.VERBOSE | self.reflags) + c = re.compile('(?P<%s>%s)' % (name, r), self.reflags) if (c.match('')): self.log.error("Regular expression for rule '%s' matches empty string", name) self.error = True @@ -861,7 +862,7 @@ def validate_module(self, module): # Build all of the regular expression rules from definitions in the supplied module # ----------------------------------------------------------------------------- def lex(module=None, object=None, debug=False, optimize=False, lextab='lextab', - reflags=0, nowarn=False, outputdir=None, debuglog=None, errorlog=None): + reflags=int(re.VERBOSE), nowarn=False, outputdir=None, debuglog=None, errorlog=None): if lextab is None: lextab = 'lextab' @@ -1097,4 +1098,3 @@ def set_regex(f): # Alternative spelling of the TOKEN decorator Token = TOKEN - diff --git a/pycparser/ply/yacc.py b/pycparser/ply/yacc.py index 539da066..39892f81 100644 --- a/pycparser/ply/yacc.py +++ b/pycparser/ply/yacc.py @@ -1,6 +1,7 @@ # ----------------------------------------------------------------------------- # ply: yacc.py # +# Copyright (C) 2001-2017 # Eli Bendersky [http://eli.thegreenplace.net] # David M. Beazley (Dabeaz LLC) # All rights reserved. @@ -67,8 +68,8 @@ import base64 import warnings -__version__ = '3.9' -__tabversion__ = '3.8' +__version__ = '3.10' +__tabversion__ = '3.10' #----------------------------------------------------------------------------- # === User configurable parameters === @@ -2585,8 +2586,13 @@ def lr_parse_table(self): # Need to decide on shift or reduce here # By default we favor shifting. Need to add # some precedence rules here. - sprec, slevel = Productions[st_actionp[a].number].prec - rprec, rlevel = Precedence.get(a, ('right', 0)) + + # Shift precedence comes from the token + sprec, slevel = Precedence.get(a, ('right', 0)) + + # Reduce precedence comes from rule being reduced (p) + rprec, rlevel = Productions[p.number].prec + if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): # We really need to reduce here. st_action[a] = -p.number @@ -2644,8 +2650,13 @@ def lr_parse_table(self): # - if precedence of reduce rule is higher, we reduce. # - if precedence of reduce is same and left assoc, we reduce. # - otherwise we shift - rprec, rlevel = Productions[st_actionp[a].number].prec + + # Shift precedence comes from the token sprec, slevel = Precedence.get(a, ('right', 0)) + + # Reduce precedence comes from the rule that could have been reduced + rprec, rlevel = Productions[st_actionp[a].number].prec + if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): # We decide to shift here... highest precedence to shift Productions[st_actionp[a].number].reduced -= 1 @@ -2958,28 +2969,20 @@ def validate_all(self): # Compute a signature over the grammar def signature(self): + parts = [] try: - from hashlib import md5 - except ImportError: - from md5 import md5 - try: - sig = md5() if self.start: - sig.update(self.start.encode('latin-1')) + parts.append(self.start) if self.prec: - sig.update(''.join([''.join(p) for p in self.prec]).encode('latin-1')) + parts.append(''.join([''.join(p) for p in self.prec])) if self.tokens: - sig.update(' '.join(self.tokens).encode('latin-1')) + parts.append(' '.join(self.tokens)) for f in self.pfuncs: if f[3]: - sig.update(f[3].encode('latin-1')) + parts.append(f[3]) except (TypeError, ValueError): pass - - digest = base64.b16encode(sig.digest()) - if sys.version_info[0] >= 3: - digest = digest.decode('latin-1') - return digest + return ''.join(parts) # ----------------------------------------------------------------------------- # validate_modules()