Permalink
Cannot retrieve contributors at this time
import tokenizer | |
doc = path("doc:/chartparser") | |
# The upgraded chart parser is layout sensitive without | |
# needing preprocessing in the token stream. | |
# Therefore the tokenizer no longer needs any feedback | |
# from the parser to parse these languages. | |
# This example is too simple to trigger any of the potential | |
# bugs in the grammar, but it's enough to give an idea of the usage. | |
main = (): | |
file = Nonterminal('file') | |
symbol = Terminal('symbol') | |
on_indent = (bb, cc): | |
return bb.indent < cc.start.col and cc.start.col == cc.indent | |
on_newline = (bb, cc): | |
return bb.indent == cc.start.col and cc.start.col == cc.indent | |
user_grammar = [ | |
Rule(file, [symbol, Condition(symbol, [on_indent])]), | |
Rule(file, [symbol, Condition(symbol, [on_newline])]), | |
] | |
parsergen = preprocess(user_grammar, file) | |
parser = parsergen() | |
parser.step(symbol, "hello", (lno=0, col=0), (lno=0, col=5)) | |
parser.step(symbol, "world", (lno=1, col=2), (lno=1, col=7)) | |
print("accepted?", parser.accepted) | |
print(repr(parser.traverse( | |
((x, a): | |
return a), | |
(x): | |
return ""))) | |
# The interface to construct a grammar consists of | |
# rules, terminals, nonterminals and conditions. | |
# Rules have a nonterminal on the left side and | |
# list of nonterminals, terminals on the right side. | |
# The rules may be annotated to help reading parse trees. | |
class Rule | |
+init = (self, lhs, rhs, annotation=null): | |
self.lhs = lhs | |
self.rhs = rhs | |
self.annotation = annotation | |
+repr = (self): | |
rhs = [] | |
for item in self.rhs | |
rhs.append(repr(item)) | |
out = repr(self.lhs) ++ " -> " ++ " ".join(rhs) | |
return out | |
# Earlier I did not separate terminals from | |
# non-terminals because it was not strictly | |
# necessary. That turned out to confuse | |
# when designing grammars. | |
class Terminal | |
+init = (self, name): | |
self.name = name | |
+repr = (self): | |
if self.name | |
return "@" ++ self.name | |
return "<Terminal>" | |
# .getsym makes it easier to extract the symbol from rhs item. | |
getsym = (self): | |
return self | |
class Nonterminal | |
+init = (self, name): | |
self.name = name | |
+repr = (self): | |
if self.name | |
return self.name | |
return "<Nonterminal>" | |
getsym = (self): | |
return self | |
class Condition | |
+init = (self, symbol, constraints): | |
self.symbol = symbol | |
self.constraints = constraints | |
getsym = (self): | |
return self.symbol | |
+repr = (self): | |
cons = [] | |
for c in self.constraints | |
cons.append(repr(c)) | |
return "{" ++ ", ".join(cons) ++ "}" ++ repr(self.symbol) | |
# To reduce the work during parsing the grammar is preprocessed. | |
# the user calls preprocess(user_grammar, accept_symbol) | |
# and gets an initiator back. The initiator can be used to | |
# init parsers. | |
# The preprocessing transforms the grammar into NNF, which | |
# divides the grammar into null and non-null rules. | |
# The null rules are not used in middle of parsing, but | |
# they are useful when interpreting the parse tree. | |
# TODO: It would be preferable to allow serialization of the | |
# preprocessed grammar and the annotations attached to it. | |
preprocess = (user_grammar, default_accept): | |
nullable = find_nullable(user_grammar) | |
grammar = {} | |
blankset = {} | |
rules = build_nnf(user_grammar, nullable) | |
for rule in rules | |
if rule.rhs.length == 0 | |
try | |
blankset[rule.lhs].append(rule) | |
except KeyError as _ | |
blankset[rule.lhs] = [rule] | |
else | |
try | |
grammar[rule.lhs].append(rule) | |
except KeyError as _ | |
grammar[rule.lhs] = [rule] | |
right_recursive = find_right_recursive(rules) | |
return Initiator(grammar, blankset, | |
right_recursive, default_accept) | |
# Earley based parsing would suffer from nullable rules. | |
# The parsing step ends up being simple when grammar | |
# does not contain any of them, so they are rewritten away. | |
# The result is a "nihilist normal form" | |
# Further reasoning about this can be found in the paper | |
# "Practical Earley Parsing" by Aycock & Horspool | |
find_nullable = (grammar): | |
nullable = set() | |
queue = [] | |
new_nullable = (symbol): | |
if symbol not in nullable | |
nullable.add(symbol) | |
queue.append(symbol) | |
inverse_lookup = {} | |
new_lookup = (index, symbol): | |
try | |
inverse_lookup[symbol].append(index) | |
except KeyError as _ | |
inverse_lookup[symbol] = [index] | |
nonterminals = [] | |
nonnullables = [] | |
for rule in grammar | |
if rule.rhs.length == 0 | |
new_nullable(rule.lhs) | |
elif all_nonterminals(rule.rhs) | |
index = nonnullables.length | |
for x in rule.rhs | |
x = x.getsym() | |
if x != rule.lhs | |
new_lookup(index, x) | |
nonterminals.append(rule.lhs) | |
nonnullables.append(count_nonrec(rule)) | |
for n in queue | |
for i in inverse_lookup.get(n, []) | |
nonnullables[i] -= 1 | |
if nonnullables[i] == 0 | |
new_nullable(nonterminals[i]) | |
return nullable | |
all_nonterminals = (rhs): | |
for x in rhs | |
if not isinstance(x.getsym(), Nonterminal) | |
return false | |
return true | |
all_nullable = (rhs, nullable): | |
for x in rhs | |
if x.getsym() not in nullable | |
return false | |
return true | |
count_nonrec = (rule): | |
s = 0 | |
for x in rule.rhs | |
s += int(x.getsym() != rule.lhs) | |
return s | |
# Going through n bits in binary produces all possible permutations | |
# where a field is present and not present. | |
build_nnf = (grammar, nullable): | |
result = [] | |
for rule in grammar | |
order = 0 | |
for x in rule.rhs | |
order += int(x.getsym() in nullable) | |
for i in range(1 << order) | |
result.append(nihilist_rule(rule, i, nullable)) | |
return result | |
nihilist_rule = (rule, index, nullable): | |
present = [] | |
rhs = [] | |
for x in rule.rhs | |
shift = true | |
if x.getsym() in nullable | |
if index & 1 == 0 | |
shift = false | |
index >>= 1 | |
present.append(shift) | |
if shift | |
rhs.append(x) | |
return Rule(rule.lhs, rhs, NNF(rule, present)) | |
# The nihilist normal form rules are annotated with NNF nodes. | |
class NNF | |
+init = (self, rule, present): | |
self.rule = rule # the original rule | |
self.present = present # tells which fields are present in this rule. | |
# Conditions on whether an item is leo-eligible: | |
# its rule is right recursive | |
# it is quasi-complete | |
# it is postdot-unique | |
find_right_recursive = (grammar): | |
edges = [] | |
for rule in grammar | |
if rule.rhs.length > 0 | |
right = rule.rhs[rule.rhs.length - 1] | |
row = [] | |
for other in grammar | |
row.append(other.lhs == right.getsym()) | |
edges.append(row) | |
else | |
row = [] | |
for other in grammar | |
row.append(false) | |
edges.append(row) | |
warshall_transitive_closure(edges) | |
right_recursive = set() | |
i = 0 | |
for rule in grammar | |
if edges[i][i] and rule.rhs.length >= 2 # Excluding rules that have only one rhs symbol. | |
right_recursive.add(rule) # Leo items caused problems if the rule was a prediction. | |
i += 1 | |
return right_recursive | |
warshall_transitive_closure = (a): | |
n = a.length | |
for k in range(n) | |
for i in range(n) | |
if not a[i][k] | |
continue | |
for j in range(n) | |
if not a[k][j] | |
continue | |
a[i][j] = true | |
return a | |
# The nullable set presents the same information as the blankset | |
# so we can discard it. | |
class Initiator | |
+init = (self, grammar, blankset, right_recursive, default_accept): | |
self.grammar = grammar | |
self.blankset = blankset | |
self.right_recursive = right_recursive | |
self.right_recursive = set() # disable LEO | |
self.default_accept = default_accept | |
# TODO: Fix up the performance issue in REPR startup by allowing the | |
# caching of a grammar. | |
+call = (self, accept=self.default_accept): | |
parser = Parser(self, accept, []) | |
# In an earley parser that uses NNF, empty input is a special case, that is taken care of here. | |
if accept in self.blankset | |
parser.output.append(SPPF(null, null, null, null, 0)) | |
# The first chart column | |
nodes = {} | |
current = [] | |
leims = {} | |
prediction(current, nodes, self.grammar, parser.chart, accept) | |
for eim in current | |
prediction(current, nodes, self.grammar, parser.chart, eim.postdot()) | |
cache_transitions(parser.chart, eim, null, leims) | |
if isinstance(eim.postdot(), Nonterminal) and eim.postdot_constraint() | |
parser.must_check_layout = true | |
return parser | |
class Parser | |
+init = (self, init, accept, output): | |
self.chart = self.first = {} | |
self.init = init | |
self.accept = accept | |
self.output = output | |
self.lno = null # Previous lno | |
self.indent = 0 | |
self.must_check_layout = false | |
step = (self, term, token, start=null, stop=null): | |
if self.lno != start.lno # Record indentation level | |
self.lno = start.lno | |
self.indent = start.col | |
init = self.init | |
current = [] | |
leims = {} | |
transitions = {} | |
nodes = {} | |
output = [] | |
bottom = SPPF(start, stop, token, null, self.indent) | |
# If layout check is necessary, it happens here. | |
# (self.chart[term]), do a DFS through the chart trying | |
# to find a position where the layout is satisfied. | |
# completions proceed in non-deterministic manner, | |
# until everything has been completed. | |
edges = self.chart[term] | |
if self.must_check_layout | |
edges = filter_by_layout(edges, term, bottom) | |
assert edges.length > 0 | |
"layout violation at line " ++ start.lno.to_string() ++ ", come up with better error message" | |
self.must_check_layout = false | |
shift_eims(current, nodes, edges, bottom, init.right_recursive, leims) | |
for eim in current | |
# reduction | |
cc = nodes[eim] | |
if eim.is_completed() | |
shift_eims(current, nodes, eim.origin.get(eim.rule.lhs, []), cc, init.right_recursive, leims) | |
if eim.rule.lhs == self.accept and eim.origin == self.first | |
output.append(cc) | |
prediction(current, nodes, init.grammar, transitions, eim.postdot()) | |
cache_transitions(transitions, eim, cc, leims) | |
if isinstance(eim.postdot(), Nonterminal) and eim.postdot_constraint() | |
self.must_check_layout = true | |
self.chart = transitions | |
self.output = output | |
accepted = property(); | |
get = (self): | |
return self.output.length > 0 | |
expect = property(); | |
get = (self): | |
return self.chart.keys() | |
expecting = (self, symbol): | |
return symbol in self.chart | |
traverse = (self, postorder_cb, | |
blank_cb=make_default_blank(self, postorder_cb), | |
resolve_ambiguity=self.default_ambiguity_resolution): | |
if self.output.length > 1 | |
# This is really weird in current context. I should probably | |
# rethink this whole ambiguity resolution -thing. | |
sppf = resolve_ambiguity(null, self.output) | |
else | |
sppf = self.output[0] | |
if isinstance(sppf, SPPF) and sppf.cell == null | |
return blank_cb(self.accept.getsym()) | |
res = traverse_sppf([sppf], postorder_cb, blank_cb, resolve_ambiguity) | |
assert res.length == 1, "broken parse traverse" | |
return res[0] | |
default_ambiguity_resolution = (self, sppf): | |
raise Ambiguity(sppf) | |
make_default_blank = (parser, postorder_cb): | |
blank_cb = (symbol): | |
blanks = parser.init.blankset[symbol] | |
if blanks.length != 1 | |
raise Exception("default_blank ambiguity") | |
cell = blanks[0] | |
return postorder_cb(expand(null, null, cell, blank_cb, iter([]))...) | |
return blank_cb | |
prediction = (current, nodes, grammar, transitions, postdot): | |
if isinstance(postdot, Nonterminal) | |
for rule in grammar.get(postdot, []) | |
eim = EIM(rule, 0, transitions) | |
if eim not in nodes | |
nodes[eim] = null | |
current.append(eim) | |
cache_transitions = (transitions, eim, cc, leims): | |
if not eim.is_completed() | |
postdot = eim.postdot() | |
trans = object(); | |
eim = eim | |
cc = cc | |
leo = null | |
try | |
transitions[postdot].append(trans) | |
except KeyError as _ | |
if eim.rule in leims # If the item is not postdot unique, then | |
trans.leo = leims[eim.rule] # .leo is never read anyway. | |
transitions[postdot] = [trans] | |
filter_by_layout = (edges, symbol, cc): | |
out = [] | |
for trans in edges | |
if check_layout(trans, cc, set([symbol])) | |
out.append(trans) | |
return out | |
check_layout = (trans, cc, visited): | |
if trans.eim.is_confirmed() | |
return trans.eim.check_postdot_constraint(trans.cc, cc) | |
else | |
lhs = trans.eim.rule.lhs | |
return false if lhs in visited | |
visited.add(lhs) | |
edges = trans.eim.origin.get(lhs, []) | |
for trans in edges | |
return true if check_layout(trans, cc, visited) | |
return edges.length == 0 # Includes condition that there is no edges for transition. | |
# Should only happen if we reach accept symbol | |
# TODO: The ideas and the approach we take with LEO items should be | |
# reviewed again, so that we can confirm there are no bugs there. | |
shift_eims = (current, nodes, edges, cc, right_recursive, leims): | |
if is_leo_eligible(edges, right_recursive) | |
trans = edges[0] | |
if trans.leo | |
if trans.eim.check_postdot_constraint(trans.cc, cc) | |
link = LEOLink(trans.leo.link, trans.eim.rule, trans.cc) | |
leims[trans.eim.rule] = object(); | |
trans = trans.leo.trans | |
link = link | |
eim = trans.leo.trans.eim.next() | |
assert eim not in nodes | |
"assumption that a postdot unique eim does not appear twice" | |
nodes[eim] = LEO(link, cc, trans.cc.start, trans.cc.indent) | |
current.append(eim) | |
else | |
leims[trans.eim.rule] = object(); | |
trans = trans | |
link = LEOLink(null, trans.eim.rule, trans.cc) | |
shift_eim(current, nodes, trans.eim, trans.cc, cc) | |
else | |
for trans in edges | |
shift_eim(current, nodes, trans.eim, trans.cc, cc) | |
is_leo_eligible = (edges, right_recursive): | |
if edges.length != 1 # must be postdot-unique | |
return false | |
eim = edges[0].eim | |
return eim.rule in right_recursive and eim.pos == eim.rule.rhs.length - 1 #quasi-complete | |
shift_eim = (current, nodes, eim, bb, cc): | |
# We have to prevent Link buildup if the condition is unsatisfied. | |
return if not eim.check_postdot_constraint(bb, cc) | |
eim = eim.next() | |
try | |
sppf = nodes[eim] | |
if bb | |
start = bb.start | |
else | |
start = cc.start | |
assert start == sppf.start | |
"sppf tree corruption (parsing bug)" | |
sppf.insert(bb, cc) | |
except KeyError as _ | |
if bb | |
start = bb.start | |
indent = bb.indent | |
else | |
start = cc.start | |
indent = cc.indent | |
nodes[eim] = sppf = SPPF(start, cc.stop, eim.rule, Link(bb, cc), indent) | |
current.append(eim) | |
class EIM | |
+init = (self, rule, pos, origin): | |
self.rule = rule | |
self.pos = pos | |
self.origin = origin | |
# assert 0 <= pos <= len(rule) | |
postdot = (self): | |
if self.pos < self.rule.rhs.length | |
return self.rule.rhs[self.pos].getsym() | |
return null | |
postdot_constraint = (self): | |
if self.pos < self.rule.rhs.length | |
x = self.rule.rhs[self.pos] | |
if isinstance(x, Condition) | |
return x.constraints | |
return null | |
check_postdot_constraint = (self, bb, cc): | |
constraints = self.postdot_constraint() | |
if constraints | |
for fn in constraints | |
if not fn(bb, cc) | |
return false | |
return true | |
next = (self): | |
if self.postdot() | |
return EIM(self.rule, self.pos + 1, self.origin) | |
return null | |
penult = (self): | |
if self.pos + 1 == self.rule.length | |
return self.postdot() | |
is_predicted = (self): | |
return self.pos == 0 | |
is_confirmed = (self): | |
return self.pos > 0 | |
is_completed = (self): | |
return self.pos == self.rule.rhs.length | |
+hash = (self): | |
return hash([self.rule, self.pos, self.origin]) | |
# Sometimes to resolve bugs, we need to see what's going on. | |
+repr = (self): | |
return repr(self.origin) ++ ":" ++ | |
repr(self.pos) ++ | |
":" ++ repr(self.rule) | |
# # TODO: String formatting | |
# # if isinstance(self.rule, Rule): | |
# # lhs = repr(self.rule.lhs) | |
# # pre = ' '.join(map(repr, self.rule.rhs[:self.pos])) | |
# # pos = ' '.join(map(repr, self.rule.rhs[self.pos:])) | |
# # return "{} -> {} * {} : {}".format(lhs, pre, pos, self.origin) | |
# # return object.__repr__(self) | |
# | |
%"=="[[EIM, EIM]] = (a, b): | |
if a.rule != b.rule | |
return false | |
if a.origin != b.origin | |
return false | |
if a.pos != b.pos | |
return false | |
return true | |
class LEO | |
+init = (self, left, cc, start, indent): | |
self.left = left | |
self.cc = cc | |
self.start = start | |
self.indent = indent | |
stop = property(); | |
get = (self): | |
return self.cc.stop | |
to_sppf = (self): | |
left = self.left | |
cc = self.cc | |
while left | |
bb = left.sppf | |
if bb | |
start = bb.start | |
indent = bb.indent | |
else | |
start = cc.start | |
indent = cc.indent | |
cc = SPPF(start, cc.stop, left.rule, Link(bb, cc), indent) | |
left = left.left | |
return cc | |
class LEOLink | |
+init = (self, left, rule, sppf): | |
self.left = left | |
self.rule = rule | |
self.sppf = sppf | |
class SPPF # Shared packed parse forest | |
+init = (self, start, stop, cell, link, indent): | |
self.start = start | |
self.stop = stop | |
self.cell = cell | |
self.link = link | |
self.indent = indent # TODO: consider whether this belongs into the location data. | |
to_sppf = (self): | |
return self | |
is_leaf = (self): | |
return self.link == null | |
insert = (self, left, right): | |
if self.link == null | |
self.link = Link(left, right) | |
return self.link | |
link = self.link | |
while true | |
if link.left == left and link.right == right | |
return link | |
if link.link == null | |
link.link = Link(left, right) | |
return link.link | |
link = link.link | |
single = (self): | |
result = [] | |
link = self.link | |
while link.left | |
if link.link | |
return null | |
result.append(link.right) | |
link = link.left.link | |
if link.link # Fixed the samples/grammar_bug_0 | |
return null | |
result.append(link.right) | |
result.reverse() | |
return result | |
+iter = (self): | |
# TODO: should probably be incremental? | |
output = [] | |
finger = [] | |
# To produce all parses, the sppf is fingered through. | |
link = self.link | |
while finger.length > 0 or link | |
while link.left | |
finger.append(link) | |
link = link.left.link | |
# Now the link contains the head, while the tail is in the finger list. | |
while link | |
result = [link.right] | |
for x in reversed(finger) | |
result.append(x.right) | |
output.append(result) | |
link = link.link | |
# Now some portion of the finger is already iterated, and should be removed. | |
while finger.length > 0 and not link | |
link = finger.pop().link | |
return iter(output) | |
## TODO: add string formatter to lever | |
## return "[{}:{}] {}".format(self.start, self.stop, self.cell) | |
class Link | |
+init = (self, left, right, link=null): | |
self.left = left | |
self.right = right | |
self.link = link | |
traverse_sppf = (stack, postorder_cb, blank_cb, resolve_ambiguity): | |
rcount = 1 | |
sstack = [] | |
rstack = [] | |
while stack.length > 0 | |
sppf = stack.pop().to_sppf() | |
if sppf.is_leaf() | |
sstack.append(sppf.cell) | |
rcount -= 1 | |
else | |
result = sppf.single() | |
if result == null | |
result = resolve_ambiguity(sppf, ambiguity_traverser(sppf, | |
postorder_cb, blank_cb, resolve_ambiguity)) | |
if isinstance(result, Resolve) | |
sstack.append(result.value) | |
rcount -= 1 | |
else | |
rstack.append(object(); | |
rcount = rcount - 1 | |
rlen = result.length | |
sppf = sppf) | |
rcount = result.length | |
stack.extend(reversed(result)) | |
while rcount == 0 and rstack.length > 0 | |
s = rstack.pop() | |
rcount = s.rcount | |
rlen = s.rlen | |
sppf = s.sppf | |
a = [] | |
for i in range(rlen) | |
a.append(sstack.pop(sstack.length+i-rlen)) | |
# TODO: Here we do not really identify where the blank rule appears. | |
# That feature could be really useful sometimes. | |
# That information is available in the sppf. | |
sstack.append(postorder_cb(expand( | |
sppf.start, sppf.stop, sppf.cell, blank_cb, iter(a))...)) | |
sstack.reverse() # won't hurt. | |
return sstack | |
ambiguity_traverser = (sppf, postorder_cb, blank_cb, resolve_ambiguity): | |
return (stack): | |
seq = traverse_sppf(stack, | |
postorder_cb, | |
blank_cb, | |
resolve_ambiguity) | |
return postorder_cb(expand( | |
sppf.start, sppf.stop, sppf.cell, blank_cb, iter(seq))...) | |
class Resolve | |
+init = (self, value): | |
self.value = value | |
expand = (start, stop, cell, blank_callback, seq): | |
if isinstance(cell.annotation, NNF) | |
nnf = cell.annotation | |
result = [] | |
i = 0 | |
for p in nnf.present | |
if p | |
result.append(seq.next()) | |
else | |
result.append(blank_callback(nnf.rule.rhs[i].getsym())) | |
i += 1 | |
return [nnf.rule, result, start, stop] | |
return [cell, list(seq), start, stop] | |
# TODO: 'format_origin' depends on the lno/col positioning information. | |
# We may want to change this, as parsing is not necessarily constrained | |
# to text files. | |
class SyntaxError extends Exception | |
+init = (self, message, location, source, at_eof=false): | |
self.message = message | |
self.location = location | |
self.source = source | |
self.traceback = null | |
self.at_eof = at_eof | |
+repr = (self): | |
return format_origin(self.source, self.location, self.message) | |
class Ambiguity extends SyntaxError | |
+init = (self, sppf): | |
self.sppf = sppf | |
self.traceback = null | |
+repr = (self): | |
start = self.sppf.start | |
stop = self.sppf.stop | |
msg = ["Ambiguity in " ++ format_loc(start) ++ "-" ++ format_loc(stop)] | |
sppf_group = set([self.sppf]) | |
while sppf_group.length == 1 | |
sppf = sppf_group.pop() | |
rows = list(sppf) | |
col_count = null | |
col_count = min(col_count, row.length) for row in rows | |
for j in range(col_count) | |
items = set() | |
items.add(rows[i][j]) for i in range(rows.length) | |
if items.length > 1 | |
sppf_group = items | |
break | |
if sppf_group.length == 0 | |
sppf_group.add(sppf) | |
for sppf in sppf_group | |
for row in sppf | |
cols = [] | |
for item in row | |
item = item.to_sppf() | |
start = item.start | |
stop = item.stop | |
if isinstance(item.cell, Rule) | |
cols.append("(" ++ format_loc(start) ++ "-" ++ format_loc(stop) ++ ")") | |
cols.append(repr(item.cell.lhs)) | |
else | |
cols.append(repr(item.cell)) | |
msg.append(" ".join(cols)) | |
return "\n".join(msg) | |
class SyntaxErrorExpected extends SyntaxError | |
+init = (self, expect, location, source, at_eof=false): | |
self.expect = list(expect) | |
self.location = location | |
self.source = source | |
self.traceback = null | |
self.at_eof = at_eof | |
+repr = (self): | |
msg = [format_origin(self.source, self.location, " expected some of:")] | |
expect = [] | |
for e in self.expect | |
if e.name | |
expect.append(e) | |
expect.sort(symbol_lt) | |
for symbol in expect | |
msg.append(" " ++ symbol.name) | |
return "\n".join(msg) | |
class SyntaxErrorExpected2 extends SyntaxError | |
+init = (self, value, expect, location, source, at_eof=false): | |
self.value = value | |
self.expect = list(expect) | |
self.location = location | |
self.source = source | |
self.traceback = null | |
self.at_eof = at_eof | |
+repr = (self): | |
msg = [format_origin(self.source, self.location, " expected some of:")] | |
if self.value | |
msg.insert(0, "got:" ++ repr(self.value)) | |
expect = [] | |
for e in self.expect | |
if e.name | |
expect.append(e) | |
expect.sort(symbol_lt) | |
for symbol in expect | |
msg.append(" " ++ symbol.name) | |
return "\n".join(msg) | |
format_origin = (source, location, message=null): | |
loc = [repr(location.lno), repr(location.col)] | |
if message | |
loc.append(message) | |
if isinstance(source, path) | |
loc.insert(0, source.to_string()) | |
elif source | |
loc.insert(0, source) | |
else | |
loc.insert(0, "") | |
return ":".join(loc) | |
format_loc = (location): | |
return repr(location.lno) ++ ":" ++ repr(location.col) | |
symbol_lt = multimethod(2) | |
symbol_lt[[Terminal, Nonterminal]] = (a, b): | |
return false | |
symbol_lt[[Nonterminal, Terminal]] = (a, b): | |
return true | |
symbol_lt[[Nonterminal, Nonterminal]] = (a, b): | |
return a.name < b.name | |
symbol_lt[[Terminal, Terminal]] = (a, b): | |
return a.name < b.name | |
# There used to be indent parser here, it was removed | |
# because it was no longer needed. | |
# TODO: Remove this once grammar.lc doesn't need it anymore. | |
class IndentParser | |
+init = (self, pos=tokenizer.Position(0, 1), indent=null, dedent=null, newline=null): | |
self.stack = [] | |
self.level = pos.col | |
self.line = pos.lno | |
self.indent = indent | |
self.dedent = dedent | |
self.newline = newline | |
step = (self, parser, pos, source): | |
if self.line < pos.lno | |
while pos.col < self.level and parser.expecting(self.dedent) | |
parser.step(self.dedent, null, pos, pos) | |
self.level = self.stack.pop() | |
if pos.col < self.level | |
raise SyntaxError("uneven indent", pos, source) | |
if pos.col == self.level and parser.expecting(self.newline) | |
parser.step(self.newline, null, pos, pos) | |
if pos.col > self.level and parser.expecting(self.indent) | |
parser.step(self.indent, null, pos, pos) | |
self.stack.append(self.level) | |
self.level = pos.col | |
self.line = pos.lno | |
# This can be used to terminate dedent if the parsing cannot | |
# continue otherwise. Though note that this function, and to | |
# some extent the whole indent parser in its current form | |
# provides bias for certain interpretations of the input. | |
slip = (self, parser, pos, source): | |
while parser.expecting(self.dedent) | |
parser.step(self.dedent, null, pos, pos) | |
self.level = self.stack.pop() | |
# Most languages have a bug if this function returns false. | |
finish = (self, parser, pos): | |
while self.stack.length > 0 and parser.expecting(self.dedent) | |
parser.step(self.dedent, null, pos, pos) | |
self.level = self.stack.pop() | |
return self.stack.length == 0 |