Skip to content

Commit

Permalink
Restructure; format grammars; values on defns
Browse files Browse the repository at this point in the history
And add SymbolTable class
  • Loading branch information
goodmami committed Mar 31, 2020
1 parent 49dcff8 commit 28f261f
Show file tree
Hide file tree
Showing 12 changed files with 419 additions and 363 deletions.
48 changes: 31 additions & 17 deletions pe/_definition.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@

from typing import Tuple, Optional, Any
import re

from pe._errors import Error
from pe._constants import Operator
from pe._constants import Operator, Value
from pe._escape import escape


Expand All @@ -26,54 +27,66 @@

class Definition:
"""An abstract definition of a parsing expression."""
__slots__ = 'op', 'args',
__slots__ = 'op', 'args', 'value',

def __init__(self, op: Operator, args: Tuple[Any, ...]):
def __init__(self, op: Operator, args: Tuple[Any, ...], value: Value):
self.op = op
self.args = args
self.value = value

def __repr__(self):
return f'({self.op}, {self.args!r})'
return f'({self.op}, {self.args!r}, {self.value!r})'

def __str__(self):
return _format(self, 0, None)

def __eq__(self, other: object):
if not isinstance(other, Definition):
return NotImplemented
return (self.op == other.op) and (self.args == other.args)
return (self.op == other.op
and self.args == other.args
and self.value == other.value)

def format(self, indent=0):
return _format(self, indent, None)


def _format(defn: Definition,
indent: int,
prev_op: Optional[Operator]) -> str:
op = defn.op
args = defn.args
fmt = '({})' if prev_op and op.precedence >= prev_op.precedence else '{}'
fmt = '({})' if prev_op and op.precedence <= prev_op.precedence else '{}'
if op == DOT:
return '.'
elif op == LIT:
return f'"{escape(args[0])}"'
return f'''"{escape(args[0], ignore="'-[]")}"'''
elif op == CLS:
return f'[{escape(args[0])}]'
inner = ... # TODO: properly escape classes
return f'[{args[0]}]'
elif op == RGX:
raise Error('no syntax exists for regular expressions')
return f'`{args[0]}`' # temporary syntax
elif op == SYM:
return args[0]
# quantified expressions don't need to be grouped because there
# cannot be multiple quantifiers (e.g., ("a"*)*) and nothing of a
# higher precedence can take subexpressions
elif op == OPT:
return fmt.format(_format(args[0], indent, op)) + '?'
return _format(args[0], indent, op) + '?'
elif op == STR:
return fmt.format(_format(args[0], indent, op)) + '*'
return _format(args[0], indent, op) + '*'
elif op == PLS:
return fmt.format(_format(args[0], indent, op)) + '+'
return _format(args[0], indent, op) + '+'
# valued expressions and those of lower precedence may need to be
# grouped to be valid
elif op == AND:
return '&' + fmt.format(_format(args[0], indent, op))
return fmt.format('&' + _format(args[0], indent, op))
elif op == NOT:
return '!' + fmt.format(_format(args[0], indent, op))
return fmt.format('!' + _format(args[0], indent, op))
elif op == RAW:
return '~' + fmt.format(_format(args[0], indent, op))
return fmt.format('~' + _format(args[0], indent, op))
elif op == DIS:
return ':' + fmt.format(_format(args[0], indent, op))
return fmt.format(':' + _format(args[0], indent, op))
elif op == BND:
d, name = args
return f'{name}:' + fmt.format(_format(args[0], indent, op))
Expand All @@ -84,4 +97,5 @@ def _format(defn: Definition,
return fmt.format(' / '.join(_format(d, indent, op)
for d in args[0]))
elif op == RUL:
raise Error('no syntax exists for rules')
return fmt.format(_format(args[0], indent, op))
# raise Error('no syntax exists for rules')
17 changes: 13 additions & 4 deletions pe/_escape.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
'\\' : '\\\\',
']' : '\\]',
}
_escape_re = re.compile(
'({})'.format(
'|'.join(map(re.escape, _escapes))))
_codepoint_escapes: List[str] = [
'\\\\[0-7]{1,3}', # oct
'\\\\x[0-9a-fA-F]{2}', # hex
Expand All @@ -29,11 +32,17 @@
+ _codepoint_escapes)))


def escape(string: str):
def escape(string: str, ignore=''):
"""Escape special characters for literals and character classes."""
return re.sub('(' + '|'.join(map(re.escape, _escapes)) + ')',
lambda m: _escapes.get(m.group(0), m.group(0)),
string)

def _escape(m: reMatch):
c = m.group(0)
if c in ignore:
return c
else:
return _escapes.get(m.group(0), m.group(0))

return _escape_re.sub(_escape, string)


def _unescape(m: reMatch):
Expand Down
3 changes: 1 addition & 2 deletions pe/_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,7 @@ def compile(source,
p = parser_class(g, flags=flags)

if flags & Flag.DEBUG:
for name, defn in g.definitions.items():
print(name, defn)
print(g)

return p

Expand Down
79 changes: 60 additions & 19 deletions pe/_grammar.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@

from typing import Dict, Callable

from pe._constants import Operator
from pe._constants import Operator, Value
from pe._errors import Error
from pe._definition import Definition
from pe.operators import Rule

from pe.operators import Rule, Nonterminal


class Grammar:
Expand All @@ -16,7 +15,7 @@ def __init__(self,
actions: Dict[str, Callable] = None,
start: str = 'Start'):
self.start = start
self.definitions = definitions or {}
self.definitions = dict(definitions or [])
self.actions = actions or {}
self.final = False

Expand All @@ -25,11 +24,22 @@ def __repr__(self):
f'actions={self.actions!r}, '
f'start={self.start!r})')

def __str__(self):
defs = []
width = max(len(name) for name in self.definitions)
for name in self.definitions:
defn = self[name].format(len(name) + 2)
defs.append(f'{name:{width}} <- {defn}')
return '\n'.join(defs)

def __setitem__(self, name: str, definition: Definition):
self.definitions[name] = definition

def __getitem__(self, name):
return self.definitions[name]
if name not in self.definitions:
return Nonterminal(name)
else:
return self.definitions[name]

def __eq__(self, other: object):
if not isinstance(other, Grammar):
Expand All @@ -41,30 +51,61 @@ def __eq__(self, other: object):
def finalize(self):
if self.final:
raise Error('grammar is already finalized')
defs = self.definitions
acts = self.actions
for name in defs:
expr = defs[name]
if name in acts:
if expr.op == Operator.RUL:
# check if name is same or None?
expr = expr.args[0]
expr = Rule(expr, acts[name], name=name)
defs[name] = expr
_check_closed(expr, defs)
defs = _insert_rules(self.definitions, self.actions)
_resolve_deferred(defs)
# now recursively finalize expressions
for expr in defs.values():
_finalize(expr, defs, True)
self.final = True


def _check_closed(expr, defs):
def _insert_rules(defs, acts):
for name in defs:
expr = defs[name]
if name in acts:
if expr.op == Operator.RUL:
# check if name is same or None?
expr = expr.args[0]
expr = Rule(expr, acts[name], name=name)
defs[name] = expr
return defs


def _resolve_deferred(defs):
resolved = {}
for name, expr in defs.items():
if expr.value != Value.DEFERRED:
resolved[name] = expr.value
to_resolve = set(defs).difference(resolved)
while to_resolve:
found = False
for name in to_resolve:
expr = defs[name]
if expr.op == Operator.SYM and expr.args[0] in resolved:
expr.value = resolved[expr.args[0]]
resolved[name] = expr.value
to_resolve.remove(name)
found = True
break
if not found:
raise GrammarError('could not resolve expressions: {}'
.format(', '.join(to_resolve)))


def _finalize(expr, defs, structured):
op = expr.op
args = expr.args
if not structured:
expr.value = Value.EMPTY
if op == Operator.SYM:
if args[0] not in defs:
raise Error(f'undefined nonterminal: {args[0]}')
elif op in (Operator.DOT, Operator.LIT, Operator.CLS, Operator.RGX):
pass
elif op in (Operator.SEQ, Operator.CHC):
for term in args[0]:
_check_closed(term, defs)
_finalize(term, defs, structured)
elif op in (Operator.DIS, Operator.RAW):
_finalize(args[0], defs, False)
else:
_check_closed(args[0], defs)
_finalize(args[0], defs, structured)
12 changes: 6 additions & 6 deletions pe/_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,15 @@ def groupdict(self):
return dict(self._kwargs or ())

def value(self):
return evaluate(self._args, self.pe.value_type)
return evaluate(self._args, self.pe.value)


def evaluate(args, value_type: Value):
if value_type == Value.ITERABLE:
def evaluate(args, value: Value):
if value == Value.ITERABLE:
return args
elif value_type == Value.ATOMIC:
elif value == Value.ATOMIC:
return args[0]
elif value_type == Value.EMPTY:
elif value == Value.EMPTY:
return None
else:
raise Error(f'invalid value type: {value_type!r}')
raise Error(f'invalid value type: {value!r}')
Loading

0 comments on commit 28f261f

Please sign in to comment.