Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
lever/lib/chartparser.lc
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
885 lines (780 sloc)
28.5 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tokenizer | |
doc = path("doc:/chartparser") | |
# The upgraded chart parser is layout sensitive without | |
# needing preprocessing in the token stream. | |
# Therefore the tokenizer no longer needs any feedback | |
# from the parser to parse these languages. | |
# This example is too simple to trigger any of the potential | |
# bugs in the grammar, but it's enough to give an idea of the usage. | |
main = (): | |
file = Nonterminal('file') | |
symbol = Terminal('symbol') | |
on_indent = (bb, cc): | |
return bb.indent < cc.start.col and cc.start.col == cc.indent | |
on_newline = (bb, cc): | |
return bb.indent == cc.start.col and cc.start.col == cc.indent | |
user_grammar = [ | |
Rule(file, [symbol, Condition(symbol, [on_indent])]), | |
Rule(file, [symbol, Condition(symbol, [on_newline])]), | |
] | |
parsergen = preprocess(user_grammar, file) | |
parser = parsergen() | |
parser.step(symbol, "hello", (lno=0, col=0), (lno=0, col=5)) | |
parser.step(symbol, "world", (lno=1, col=2), (lno=1, col=7)) | |
print("accepted?", parser.accepted) | |
print(repr(parser.traverse( | |
((x, a): | |
return a), | |
(x): | |
return ""))) | |
# The interface to construct a grammar consists of | |
# rules, terminals, nonterminals and conditions. | |
# Rules have a nonterminal on the left side and | |
# list of nonterminals, terminals on the right side. | |
# The rules may be annotated to help reading parse trees. | |
class Rule | |
+init = (self, lhs, rhs, annotation=null): | |
self.lhs = lhs | |
self.rhs = rhs | |
self.annotation = annotation | |
+repr = (self): | |
rhs = [] | |
for item in self.rhs | |
rhs.append(repr(item)) | |
out = repr(self.lhs) ++ " -> " ++ " ".join(rhs) | |
return out | |
# Earlier I did not separate terminals from | |
# non-terminals because it was not strictly | |
# necessary. That turned out to confuse | |
# when designing grammars. | |
class Terminal | |
+init = (self, name): | |
self.name = name | |
+repr = (self): | |
if self.name | |
return "@" ++ self.name | |
return "<Terminal>" | |
# .getsym makes it easier to extract the symbol from rhs item. | |
getsym = (self): | |
return self | |
class Nonterminal | |
+init = (self, name): | |
self.name = name | |
+repr = (self): | |
if self.name | |
return self.name | |
return "<Nonterminal>" | |
getsym = (self): | |
return self | |
class Condition | |
+init = (self, symbol, constraints): | |
self.symbol = symbol | |
self.constraints = constraints | |
getsym = (self): | |
return self.symbol | |
+repr = (self): | |
cons = [] | |
for c in self.constraints | |
cons.append(repr(c)) | |
return "{" ++ ", ".join(cons) ++ "}" ++ repr(self.symbol) | |
# To reduce the work during parsing the grammar is preprocessed. | |
# the user calls preprocess(user_grammar, accept_symbol) | |
# and gets an initiator back. The initiator can be used to | |
# init parsers. | |
# The preprocessing transforms the grammar into NNF, which | |
# divides the grammar into null and non-null rules. | |
# The null rules are not used in middle of parsing, but | |
# they are useful when interpreting the parse tree. | |
# TODO: It would be preferable to allow serialization of the | |
# preprocessed grammar and the annotations attached to it. | |
preprocess = (user_grammar, default_accept): | |
nullable = find_nullable(user_grammar) | |
grammar = {} | |
blankset = {} | |
rules = build_nnf(user_grammar, nullable) | |
for rule in rules | |
if rule.rhs.length == 0 | |
try | |
blankset[rule.lhs].append(rule) | |
except KeyError as _ | |
blankset[rule.lhs] = [rule] | |
else | |
try | |
grammar[rule.lhs].append(rule) | |
except KeyError as _ | |
grammar[rule.lhs] = [rule] | |
right_recursive = find_right_recursive(rules) | |
return Initiator(grammar, blankset, | |
right_recursive, default_accept) | |
# Earley based parsing would suffer from nullable rules. | |
# The parsing step ends up being simple when grammar | |
# does not contain any of them, so they are rewritten away. | |
# The result is a "nihilist normal form" | |
# Further reasoning about this can be found in the paper | |
# "Practical Earley Parsing" by Aycock & Horspool | |
find_nullable = (grammar): | |
nullable = set() | |
queue = [] | |
new_nullable = (symbol): | |
if symbol not in nullable | |
nullable.add(symbol) | |
queue.append(symbol) | |
inverse_lookup = {} | |
new_lookup = (index, symbol): | |
try | |
inverse_lookup[symbol].append(index) | |
except KeyError as _ | |
inverse_lookup[symbol] = [index] | |
nonterminals = [] | |
nonnullables = [] | |
for rule in grammar | |
if rule.rhs.length == 0 | |
new_nullable(rule.lhs) | |
elif all_nonterminals(rule.rhs) | |
index = nonnullables.length | |
for x in rule.rhs | |
x = x.getsym() | |
if x != rule.lhs | |
new_lookup(index, x) | |
nonterminals.append(rule.lhs) | |
nonnullables.append(count_nonrec(rule)) | |
for n in queue | |
for i in inverse_lookup.get(n, []) | |
nonnullables[i] -= 1 | |
if nonnullables[i] == 0 | |
new_nullable(nonterminals[i]) | |
return nullable | |
all_nonterminals = (rhs): | |
for x in rhs | |
if not isinstance(x.getsym(), Nonterminal) | |
return false | |
return true | |
all_nullable = (rhs, nullable): | |
for x in rhs | |
if x.getsym() not in nullable | |
return false | |
return true | |
count_nonrec = (rule): | |
s = 0 | |
for x in rule.rhs | |
s += int(x.getsym() != rule.lhs) | |
return s | |
# Going through n bits in binary produces all possible permutations | |
# where a field is present and not present. | |
build_nnf = (grammar, nullable): | |
result = [] | |
for rule in grammar | |
order = 0 | |
for x in rule.rhs | |
order += int(x.getsym() in nullable) | |
for i in range(1 << order) | |
result.append(nihilist_rule(rule, i, nullable)) | |
return result | |
nihilist_rule = (rule, index, nullable): | |
present = [] | |
rhs = [] | |
for x in rule.rhs | |
shift = true | |
if x.getsym() in nullable | |
if index & 1 == 0 | |
shift = false | |
index >>= 1 | |
present.append(shift) | |
if shift | |
rhs.append(x) | |
return Rule(rule.lhs, rhs, NNF(rule, present)) | |
# The nihilist normal form rules are annotated with NNF nodes. | |
class NNF | |
+init = (self, rule, present): | |
self.rule = rule # the original rule | |
self.present = present # tells which fields are present in this rule. | |
# Conditions on whether an item is leo-eligible: | |
# its rule is right recursive | |
# it is quasi-complete | |
# it is postdot-unique | |
find_right_recursive = (grammar): | |
edges = [] | |
for rule in grammar | |
if rule.rhs.length > 0 | |
right = rule.rhs[rule.rhs.length - 1] | |
row = [] | |
for other in grammar | |
row.append(other.lhs == right.getsym()) | |
edges.append(row) | |
else | |
row = [] | |
for other in grammar | |
row.append(false) | |
edges.append(row) | |
warshall_transitive_closure(edges) | |
right_recursive = set() | |
i = 0 | |
for rule in grammar | |
if edges[i][i] and rule.rhs.length >= 2 # Excluding rules that have only one rhs symbol. | |
right_recursive.add(rule) # Leo items caused problems if the rule was a prediction. | |
i += 1 | |
return right_recursive | |
warshall_transitive_closure = (a): | |
n = a.length | |
for k in range(n) | |
for i in range(n) | |
if not a[i][k] | |
continue | |
for j in range(n) | |
if not a[k][j] | |
continue | |
a[i][j] = true | |
return a | |
# The nullable set presents the same information as the blankset | |
# so we can discard it. | |
class Initiator | |
+init = (self, grammar, blankset, right_recursive, default_accept): | |
self.grammar = grammar | |
self.blankset = blankset | |
self.right_recursive = right_recursive | |
self.right_recursive = set() # disable LEO | |
self.default_accept = default_accept | |
# TODO: Fix up the performance issue in REPR startup by allowing the | |
# caching of a grammar. | |
+call = (self, accept=self.default_accept): | |
parser = Parser(self, accept, []) | |
# In an earley parser that uses NNF, empty input is a special case, that is taken care of here. | |
if accept in self.blankset | |
parser.output.append(SPPF(null, null, null, null, 0)) | |
# The first chart column | |
nodes = {} | |
current = [] | |
leims = {} | |
prediction(current, nodes, self.grammar, parser.chart, accept) | |
for eim in current | |
prediction(current, nodes, self.grammar, parser.chart, eim.postdot()) | |
cache_transitions(parser.chart, eim, null, leims) | |
if isinstance(eim.postdot(), Nonterminal) and eim.postdot_constraint() | |
parser.must_check_layout = true | |
return parser | |
class Parser | |
+init = (self, init, accept, output): | |
self.chart = self.first = {} | |
self.init = init | |
self.accept = accept | |
self.output = output | |
self.lno = null # Previous lno | |
self.indent = 0 | |
self.must_check_layout = false | |
step = (self, term, token, start=null, stop=null): | |
if self.lno != start.lno # Record indentation level | |
self.lno = start.lno | |
self.indent = start.col | |
init = self.init | |
current = [] | |
leims = {} | |
transitions = {} | |
nodes = {} | |
output = [] | |
bottom = SPPF(start, stop, token, null, self.indent) | |
# If layout check is necessary, it happens here. | |
# (self.chart[term]), do a DFS through the chart trying | |
# to find a position where the layout is satisfied. | |
# completions proceed in non-deterministic manner, | |
# until everything has been completed. | |
edges = self.chart[term] | |
if self.must_check_layout | |
edges = filter_by_layout(edges, term, bottom) | |
assert edges.length > 0 | |
"layout violation at line " ++ start.lno.to_string() ++ ", come up with better error message" | |
self.must_check_layout = false | |
shift_eims(current, nodes, edges, bottom, init.right_recursive, leims) | |
for eim in current | |
# reduction | |
cc = nodes[eim] | |
if eim.is_completed() | |
shift_eims(current, nodes, eim.origin.get(eim.rule.lhs, []), cc, init.right_recursive, leims) | |
if eim.rule.lhs == self.accept and eim.origin == self.first | |
output.append(cc) | |
prediction(current, nodes, init.grammar, transitions, eim.postdot()) | |
cache_transitions(transitions, eim, cc, leims) | |
if isinstance(eim.postdot(), Nonterminal) and eim.postdot_constraint() | |
self.must_check_layout = true | |
self.chart = transitions | |
self.output = output | |
accepted = property(); | |
get = (self): | |
return self.output.length > 0 | |
expect = property(); | |
get = (self): | |
return self.chart.keys() | |
expecting = (self, symbol): | |
return symbol in self.chart | |
traverse = (self, postorder_cb, | |
blank_cb=make_default_blank(self, postorder_cb), | |
resolve_ambiguity=self.default_ambiguity_resolution): | |
if self.output.length > 1 | |
# This is really weird in current context. I should probably | |
# rethink this whole ambiguity resolution -thing. | |
sppf = resolve_ambiguity(null, self.output) | |
else | |
sppf = self.output[0] | |
if isinstance(sppf, SPPF) and sppf.cell == null | |
return blank_cb(self.accept.getsym()) | |
res = traverse_sppf([sppf], postorder_cb, blank_cb, resolve_ambiguity) | |
assert res.length == 1, "broken parse traverse" | |
return res[0] | |
default_ambiguity_resolution = (self, sppf): | |
raise Ambiguity(sppf) | |
make_default_blank = (parser, postorder_cb): | |
blank_cb = (symbol): | |
blanks = parser.init.blankset[symbol] | |
if blanks.length != 1 | |
raise Exception("default_blank ambiguity") | |
cell = blanks[0] | |
return postorder_cb(expand(null, null, cell, blank_cb, iter([]))...) | |
return blank_cb | |
prediction = (current, nodes, grammar, transitions, postdot): | |
if isinstance(postdot, Nonterminal) | |
for rule in grammar.get(postdot, []) | |
eim = EIM(rule, 0, transitions) | |
if eim not in nodes | |
nodes[eim] = null | |
current.append(eim) | |
cache_transitions = (transitions, eim, cc, leims): | |
if not eim.is_completed() | |
postdot = eim.postdot() | |
trans = object(); | |
eim = eim | |
cc = cc | |
leo = null | |
try | |
transitions[postdot].append(trans) | |
except KeyError as _ | |
if eim.rule in leims # If the item is not postdot unique, then | |
trans.leo = leims[eim.rule] # .leo is never read anyway. | |
transitions[postdot] = [trans] | |
filter_by_layout = (edges, symbol, cc): | |
out = [] | |
for trans in edges | |
if check_layout(trans, cc, set([symbol])) | |
out.append(trans) | |
return out | |
check_layout = (trans, cc, visited): | |
if trans.eim.is_confirmed() | |
return trans.eim.check_postdot_constraint(trans.cc, cc) | |
else | |
lhs = trans.eim.rule.lhs | |
return false if lhs in visited | |
visited.add(lhs) | |
edges = trans.eim.origin.get(lhs, []) | |
for trans in edges | |
return true if check_layout(trans, cc, visited) | |
return edges.length == 0 # Includes condition that there is no edges for transition. | |
# Should only happen if we reach accept symbol | |
# TODO: The ideas and the approach we take with LEO items should be | |
# reviewed again, so that we can confirm there are no bugs there. | |
shift_eims = (current, nodes, edges, cc, right_recursive, leims): | |
if is_leo_eligible(edges, right_recursive) | |
trans = edges[0] | |
if trans.leo | |
if trans.eim.check_postdot_constraint(trans.cc, cc) | |
link = LEOLink(trans.leo.link, trans.eim.rule, trans.cc) | |
leims[trans.eim.rule] = object(); | |
trans = trans.leo.trans | |
link = link | |
eim = trans.leo.trans.eim.next() | |
assert eim not in nodes | |
"assumption that a postdot unique eim does not appear twice" | |
nodes[eim] = LEO(link, cc, trans.cc.start, trans.cc.indent) | |
current.append(eim) | |
else | |
leims[trans.eim.rule] = object(); | |
trans = trans | |
link = LEOLink(null, trans.eim.rule, trans.cc) | |
shift_eim(current, nodes, trans.eim, trans.cc, cc) | |
else | |
for trans in edges | |
shift_eim(current, nodes, trans.eim, trans.cc, cc) | |
is_leo_eligible = (edges, right_recursive): | |
if edges.length != 1 # must be postdot-unique | |
return false | |
eim = edges[0].eim | |
return eim.rule in right_recursive and eim.pos == eim.rule.rhs.length - 1 #quasi-complete | |
shift_eim = (current, nodes, eim, bb, cc): | |
# We have to prevent Link buildup if the condition is unsatisfied. | |
return if not eim.check_postdot_constraint(bb, cc) | |
eim = eim.next() | |
try | |
sppf = nodes[eim] | |
if bb | |
start = bb.start | |
else | |
start = cc.start | |
assert start == sppf.start | |
"sppf tree corruption (parsing bug)" | |
sppf.insert(bb, cc) | |
except KeyError as _ | |
if bb | |
start = bb.start | |
indent = bb.indent | |
else | |
start = cc.start | |
indent = cc.indent | |
nodes[eim] = sppf = SPPF(start, cc.stop, eim.rule, Link(bb, cc), indent) | |
current.append(eim) | |
class EIM | |
+init = (self, rule, pos, origin): | |
self.rule = rule | |
self.pos = pos | |
self.origin = origin | |
# assert 0 <= pos <= len(rule) | |
postdot = (self): | |
if self.pos < self.rule.rhs.length | |
return self.rule.rhs[self.pos].getsym() | |
return null | |
postdot_constraint = (self): | |
if self.pos < self.rule.rhs.length | |
x = self.rule.rhs[self.pos] | |
if isinstance(x, Condition) | |
return x.constraints | |
return null | |
check_postdot_constraint = (self, bb, cc): | |
constraints = self.postdot_constraint() | |
if constraints | |
for fn in constraints | |
if not fn(bb, cc) | |
return false | |
return true | |
next = (self): | |
if self.postdot() | |
return EIM(self.rule, self.pos + 1, self.origin) | |
return null | |
penult = (self): | |
if self.pos + 1 == self.rule.length | |
return self.postdot() | |
is_predicted = (self): | |
return self.pos == 0 | |
is_confirmed = (self): | |
return self.pos > 0 | |
is_completed = (self): | |
return self.pos == self.rule.rhs.length | |
+hash = (self): | |
return hash([self.rule, self.pos, self.origin]) | |
# Sometimes to resolve bugs, we need to see what's going on. | |
+repr = (self): | |
return repr(self.origin) ++ ":" ++ | |
repr(self.pos) ++ | |
":" ++ repr(self.rule) | |
# # TODO: String formatting | |
# # if isinstance(self.rule, Rule): | |
# # lhs = repr(self.rule.lhs) | |
# # pre = ' '.join(map(repr, self.rule.rhs[:self.pos])) | |
# # pos = ' '.join(map(repr, self.rule.rhs[self.pos:])) | |
# # return "{} -> {} * {} : {}".format(lhs, pre, pos, self.origin) | |
# # return object.__repr__(self) | |
# | |
%"=="[[EIM, EIM]] = (a, b): | |
if a.rule != b.rule | |
return false | |
if a.origin != b.origin | |
return false | |
if a.pos != b.pos | |
return false | |
return true | |
class LEO | |
+init = (self, left, cc, start, indent): | |
self.left = left | |
self.cc = cc | |
self.start = start | |
self.indent = indent | |
stop = property(); | |
get = (self): | |
return self.cc.stop | |
to_sppf = (self): | |
left = self.left | |
cc = self.cc | |
while left | |
bb = left.sppf | |
if bb | |
start = bb.start | |
indent = bb.indent | |
else | |
start = cc.start | |
indent = cc.indent | |
cc = SPPF(start, cc.stop, left.rule, Link(bb, cc), indent) | |
left = left.left | |
return cc | |
class LEOLink | |
+init = (self, left, rule, sppf): | |
self.left = left | |
self.rule = rule | |
self.sppf = sppf | |
class SPPF # Shared packed parse forest | |
+init = (self, start, stop, cell, link, indent): | |
self.start = start | |
self.stop = stop | |
self.cell = cell | |
self.link = link | |
self.indent = indent # TODO: consider whether this belongs into the location data. | |
to_sppf = (self): | |
return self | |
is_leaf = (self): | |
return self.link == null | |
insert = (self, left, right): | |
if self.link == null | |
self.link = Link(left, right) | |
return self.link | |
link = self.link | |
while true | |
if link.left == left and link.right == right | |
return link | |
if link.link == null | |
link.link = Link(left, right) | |
return link.link | |
link = link.link | |
single = (self): | |
result = [] | |
link = self.link | |
while link.left | |
if link.link | |
return null | |
result.append(link.right) | |
link = link.left.link | |
if link.link # Fixed the samples/grammar_bug_0 | |
return null | |
result.append(link.right) | |
result.reverse() | |
return result | |
+iter = (self): | |
# TODO: should probably be incremental? | |
output = [] | |
finger = [] | |
# To produce all parses, the sppf is fingered through. | |
link = self.link | |
while finger.length > 0 or link | |
while link.left | |
finger.append(link) | |
link = link.left.link | |
# Now the link contains the head, while the tail is in the finger list. | |
while link | |
result = [link.right] | |
for x in reversed(finger) | |
result.append(x.right) | |
output.append(result) | |
link = link.link | |
# Now some portion of the finger is already iterated, and should be removed. | |
while finger.length > 0 and not link | |
link = finger.pop().link | |
return iter(output) | |
## TODO: add string formatter to lever | |
## return "[{}:{}] {}".format(self.start, self.stop, self.cell) | |
class Link | |
+init = (self, left, right, link=null): | |
self.left = left | |
self.right = right | |
self.link = link | |
traverse_sppf = (stack, postorder_cb, blank_cb, resolve_ambiguity): | |
rcount = 1 | |
sstack = [] | |
rstack = [] | |
while stack.length > 0 | |
sppf = stack.pop().to_sppf() | |
if sppf.is_leaf() | |
sstack.append(sppf.cell) | |
rcount -= 1 | |
else | |
result = sppf.single() | |
if result == null | |
result = resolve_ambiguity(sppf, ambiguity_traverser(sppf, | |
postorder_cb, blank_cb, resolve_ambiguity)) | |
if isinstance(result, Resolve) | |
sstack.append(result.value) | |
rcount -= 1 | |
else | |
rstack.append(object(); | |
rcount = rcount - 1 | |
rlen = result.length | |
sppf = sppf) | |
rcount = result.length | |
stack.extend(reversed(result)) | |
while rcount == 0 and rstack.length > 0 | |
s = rstack.pop() | |
rcount = s.rcount | |
rlen = s.rlen | |
sppf = s.sppf | |
a = [] | |
for i in range(rlen) | |
a.append(sstack.pop(sstack.length+i-rlen)) | |
# TODO: Here we do not really identify where the blank rule appears. | |
# That feature could be really useful sometimes. | |
# That information is available in the sppf. | |
sstack.append(postorder_cb(expand( | |
sppf.start, sppf.stop, sppf.cell, blank_cb, iter(a))...)) | |
sstack.reverse() # won't hurt. | |
return sstack | |
ambiguity_traverser = (sppf, postorder_cb, blank_cb, resolve_ambiguity): | |
return (stack): | |
seq = traverse_sppf(stack, | |
postorder_cb, | |
blank_cb, | |
resolve_ambiguity) | |
return postorder_cb(expand( | |
sppf.start, sppf.stop, sppf.cell, blank_cb, iter(seq))...) | |
class Resolve | |
+init = (self, value): | |
self.value = value | |
expand = (start, stop, cell, blank_callback, seq): | |
if isinstance(cell.annotation, NNF) | |
nnf = cell.annotation | |
result = [] | |
i = 0 | |
for p in nnf.present | |
if p | |
result.append(seq.next()) | |
else | |
result.append(blank_callback(nnf.rule.rhs[i].getsym())) | |
i += 1 | |
return [nnf.rule, result, start, stop] | |
return [cell, list(seq), start, stop] | |
# TODO: 'format_origin' depends on the lno/col positioning information. | |
# We may want to change this, as parsing is not necessarily constrained | |
# to text files. | |
class SyntaxError extends Exception | |
+init = (self, message, location, source, at_eof=false): | |
self.message = message | |
self.location = location | |
self.source = source | |
self.traceback = null | |
self.at_eof = at_eof | |
+repr = (self): | |
return format_origin(self.source, self.location, self.message) | |
class Ambiguity extends SyntaxError | |
+init = (self, sppf): | |
self.sppf = sppf | |
self.traceback = null | |
+repr = (self): | |
start = self.sppf.start | |
stop = self.sppf.stop | |
msg = ["Ambiguity in " ++ format_loc(start) ++ "-" ++ format_loc(stop)] | |
sppf_group = set([self.sppf]) | |
while sppf_group.length == 1 | |
sppf = sppf_group.pop() | |
rows = list(sppf) | |
col_count = null | |
col_count = min(col_count, row.length) for row in rows | |
for j in range(col_count) | |
items = set() | |
items.add(rows[i][j]) for i in range(rows.length) | |
if items.length > 1 | |
sppf_group = items | |
break | |
if sppf_group.length == 0 | |
sppf_group.add(sppf) | |
for sppf in sppf_group | |
for row in sppf | |
cols = [] | |
for item in row | |
item = item.to_sppf() | |
start = item.start | |
stop = item.stop | |
if isinstance(item.cell, Rule) | |
cols.append("(" ++ format_loc(start) ++ "-" ++ format_loc(stop) ++ ")") | |
cols.append(repr(item.cell.lhs)) | |
else | |
cols.append(repr(item.cell)) | |
msg.append(" ".join(cols)) | |
return "\n".join(msg) | |
class SyntaxErrorExpected extends SyntaxError | |
+init = (self, expect, location, source, at_eof=false): | |
self.expect = list(expect) | |
self.location = location | |
self.source = source | |
self.traceback = null | |
self.at_eof = at_eof | |
+repr = (self): | |
msg = [format_origin(self.source, self.location, " expected some of:")] | |
expect = [] | |
for e in self.expect | |
if e.name | |
expect.append(e) | |
expect.sort(symbol_lt) | |
for symbol in expect | |
msg.append(" " ++ symbol.name) | |
return "\n".join(msg) | |
class SyntaxErrorExpected2 extends SyntaxError | |
+init = (self, value, expect, location, source, at_eof=false): | |
self.value = value | |
self.expect = list(expect) | |
self.location = location | |
self.source = source | |
self.traceback = null | |
self.at_eof = at_eof | |
+repr = (self): | |
msg = [format_origin(self.source, self.location, " expected some of:")] | |
if self.value | |
msg.insert(0, "got:" ++ repr(self.value)) | |
expect = [] | |
for e in self.expect | |
if e.name | |
expect.append(e) | |
expect.sort(symbol_lt) | |
for symbol in expect | |
msg.append(" " ++ symbol.name) | |
return "\n".join(msg) | |
format_origin = (source, location, message=null): | |
loc = [repr(location.lno), repr(location.col)] | |
if message | |
loc.append(message) | |
if isinstance(source, path) | |
loc.insert(0, source.to_string()) | |
elif source | |
loc.insert(0, source) | |
else | |
loc.insert(0, "") | |
return ":".join(loc) | |
format_loc = (location): | |
return repr(location.lno) ++ ":" ++ repr(location.col) | |
symbol_lt = multimethod(2) | |
symbol_lt[[Terminal, Nonterminal]] = (a, b): | |
return false | |
symbol_lt[[Nonterminal, Terminal]] = (a, b): | |
return true | |
symbol_lt[[Nonterminal, Nonterminal]] = (a, b): | |
return a.name < b.name | |
symbol_lt[[Terminal, Terminal]] = (a, b): | |
return a.name < b.name | |
# There used to be indent parser here, it was removed | |
# because it was no longer needed. | |
# TODO: Remove this once grammar.lc doesn't need it anymore. | |
class IndentParser | |
+init = (self, pos=tokenizer.Position(0, 1), indent=null, dedent=null, newline=null): | |
self.stack = [] | |
self.level = pos.col | |
self.line = pos.lno | |
self.indent = indent | |
self.dedent = dedent | |
self.newline = newline | |
step = (self, parser, pos, source): | |
if self.line < pos.lno | |
while pos.col < self.level and parser.expecting(self.dedent) | |
parser.step(self.dedent, null, pos, pos) | |
self.level = self.stack.pop() | |
if pos.col < self.level | |
raise SyntaxError("uneven indent", pos, source) | |
if pos.col == self.level and parser.expecting(self.newline) | |
parser.step(self.newline, null, pos, pos) | |
if pos.col > self.level and parser.expecting(self.indent) | |
parser.step(self.indent, null, pos, pos) | |
self.stack.append(self.level) | |
self.level = pos.col | |
self.line = pos.lno | |
# This can be used to terminate dedent if the parsing cannot | |
# continue otherwise. Though note that this function, and to | |
# some extent the whole indent parser in its current form | |
# provides bias for certain interpretations of the input. | |
slip = (self, parser, pos, source): | |
while parser.expecting(self.dedent) | |
parser.step(self.dedent, null, pos, pos) | |
self.level = self.stack.pop() | |
# Most languages have a bug if this function returns false. | |
finish = (self, parser, pos): | |
while self.stack.length > 0 and parser.expecting(self.dedent) | |
parser.step(self.dedent, null, pos, pos) | |
self.level = self.stack.pop() | |
return self.stack.length == 0 |