Permalink
Browse files

Initial commit.

  • Loading branch information...
0 parents commit 7d0485494eaa8fe27a14526d890a5ff2030adc9d @alex committed Aug 6, 2012
Showing with 676 additions and 0 deletions.
  1. +1 −0 .gitignore
  2. 0 README.rst
  3. +2 −0 rply/__init__.py
  4. +2 −0 rply/errors.py
  5. +183 −0 rply/grammar.py
  6. +38 −0 rply/parser.py
  7. +380 −0 rply/parsergenerator.py
  8. +13 −0 rply/token.py
  9. +25 −0 rply/utils.py
  10. 0 tests/__init__.py
  11. +14 −0 tests/test_basic.py
  12. +18 −0 tests/test_parserbuilder.py
@@ -0,0 +1 @@
+*.py[co]
No changes.
@@ -0,0 +1,2 @@
+from rply.parsergenerator import ParserGenerator
+from rply.token import Token
@@ -0,0 +1,2 @@
+class ParserGeneratorError(Exception):
+ pass
@@ -0,0 +1,183 @@
+from rply.errors import ParserGeneratorError
+
+
+def rightmost_terminal(symbols, terminals):
+ for sym in reversed(symbols):
+ if sym in terminals:
+ return sym
+ return None
+
+
+class Grammar(object):
+ def __init__(self, terminals):
+ # A list of all the productions
+ self.productions = [None]
+ # A dictionary mapping the names of non-terminals to a list of all
+ # productions of that nonterminal
+ self.prod_names = {}
+ # A dictionary mapping the names of terminals to a list of the rules
+ # where they are used
+ self.terminals = dict.fromkeys(terminals, [])
+ # A dictionary mapping names of nonterminals to a list of rule numbers
+ # where they are used
+ self.nonterminals = {}
+ self.first = {}
+ self.follow = {}
+ self.precedence = {}
+ self.start = None
+
+ def add_production(self, prod_name, syms, func):
+ if prod_name in self.terminals:
+ raise ParserGeneratorError("Illegal rule name %s" % prod_name)
+
+ precname = rightmost_terminal(syms, self.terminals)
+ prod_prec = self.precedence.get(precname, ("right", 0))
+
+ pnumber = len(self.productions)
+ self.nonterminals.setdefault(prod_name, [])
+
+ for t in syms:
+ if t in self.terminals:
+ self.terminals[t].append(pnumber)
+ else:
+ self.nonterminals.setdefault(t, []).append(pnumber)
+
+ p = Production(pnumber, prod_name, syms, prod_prec, func)
+ self.productions.append(p)
+
+ self.prod_names.setdefault(prod_name, []).append(p)
+
+ def set_start(self):
+ start = self.productions[1].name
+ self.productions[0] = Production(0, "S'", [start], ("right", 0), None)
+ self.nonterminals[start].append(0)
+ self.start = start
+
+ def build_lritems(self):
+ """
+ Walks the list of productions and builds a complete set of the LR
+ items.
+ """
+ for p in self.productions:
+ lastlri = p
+ i = 0
+ lr_items = []
+ while True:
+ if i > p.getlength():
+ lri = None
+ else:
+ try:
+ before = p.prod[i - 1]
+ except IndexError:
+ before = None
+ try:
+ after = self.prod_names[p.prod[i + 1]]
+ except (IndexError, KeyError):
+ after = []
+ lri = LRItem(p, i, before, after)
+ lastlri.lr_next = lri
+ if lri is None:
+ break
+ lr_items.append(lri)
+ lastlri = lri
+ i += 1
+ p.lr_items = lr_items
+
+ def _first(self, beta):
+ result = []
+ for x in beta:
+ x_produces_empty = False
+ for f in self.first[x]:
+ if f == "<empty>":
+ x_produces_empty = True
+ else:
+ if f not in result:
+ result.append(f)
+ if not x_produces_empty:
+ break
+ else:
+ result.append("<empty>")
+ return result
+
+ def compute_first(self):
+ for t in self.terminals:
+ self.first[t] = [t]
+
+ self.first["$end"] = ["$end"]
+
+ for n in self.nonterminals:
+ self.first[n] = []
+
+ changed = True
+ while changed:
+ changed = False
+ for n in self.nonterminals:
+ for p in self.prod_names[n]:
+ for f in self._first(p.prod):
+ if f not in self.first[n]:
+ self.first[n].append(f)
+ changed = True
+
+ def compute_follow(self):
+ for k in self.nonterminals:
+ self.follow[k] = []
+
+ start = self.start
+ self.follow[start] = ["$end"]
+
+ added = True
+ while added:
+ added = False
+ for p in self.productions[1:]:
+ for i, B in enumerate(p.prod):
+ if B in self.nonterminals:
+ fst = self._first(p.prod[i + 1:])
+ has_empty = False
+ for f in fst:
+ if f != "<empty>" and f not in self.follow[B]:
+ self.follow[B].append(f)
+ added = True
+ if f == "<empty>":
+ has_empty = True
+ if has_empty or i == (len(p.prod) - 1):
+ for f in self.follow[p.name]:
+ if f not in self.follow[B]:
+ self.follow[B].append(f)
+ added = True
+
+
+
+class Production(object):
+ def __init__(self, num, name, prod, precedence, func):
+ self.name = name
+ self.prod = prod
+ self.number = num
+ self.func = func
+ self.prec = precedence
+
+ self.unique_syms = []
+ for s in self.prod:
+ if s not in self.unique_syms:
+ self.unique_syms.append(s)
+
+ self.lr_items = []
+ self.lr_next = None
+
+ def getlength(self):
+ return len(self.prod)
+
+
+class LRItem(object):
+ def __init__(self, p, n, before, after):
+ self.name = p.name
+ self.prod = p.prod[:]
+ self.prod.insert(n, ".")
+ self.number = p.number
+ self.lr_index = n
+ self.lookaheads = {}
+ self.unique_syms = p.unique_syms
+ self.lr_before = before
+ self.lr_after = after
+
+ def getlength(self):
+ return len(self.prod)
@@ -0,0 +1,38 @@
+class LRParser(object):
+ def __init__(self, lr_table):
+ self.lr_table = lr_table
+
+ def parse(self, tokenizer):
+ from rply import Token
+
+ lookahead = None
+ lookaheadstack = []
+ error_count = 0
+
+ statestack = []
+ symstack = []
+
+ statestack.append(0)
+ symstack.append(Token("$end", None))
+ state = 0
+ while True:
+ if lookahead is None:
+ if lookaheadstack:
+ lookahead = lookaheadstack.pop()
+ else:
+ lookahead = tokenizer.next()
+
+ if lookahead is None:
+ lookahead = Token("$end", None)
+
+ ltype = lookahead.gettokentype()
+ if ltype in self.lr_table.lr_action[state]:
+ t = self.lr_table.lr_action[state][ltype]
+ if t > 0:
+ raise NotImplementedError
+ elif t < 0:
+ raise NotImplementedError
+ else:
+ raise NotImplementedError
+ else:
+ raise NotImplementedError
Oops, something went wrong.

0 comments on commit 7d04854

Please sign in to comment.