Fix line-by-line parsing

evhub · Dec 22, 2023 · d8941a6 · d8941a6
1 parent d6d9e51
commit d8941a6
Show file tree

Hide file tree

Showing 6 changed files with 62 additions and 57 deletions.
diff --git a/coconut/compiler/compiler.py b/coconut/compiler/compiler.py
@@ -151,7 +151,6 @@
     match_in,
     transform,
     parse,
-    all_matches,
     get_target_info_smart,
     split_leading_comments,
     compile_regex,
@@ -1210,9 +1209,9 @@ def make_err(self, errtype, message, original, loc=0, ln=None, extra=None, refor
             causes = dictset()
             for check_loc in dictset((loc, endpoint, startpoint)):
                 if check_loc is not None:
-                    for cause, _, _ in all_matches(self.parse_err_msg, original[check_loc:], inner=True):
-                        if cause:
-                            causes.add(cause)
+                    cause = try_parse(self.parse_err_msg, original[check_loc:], inner=True)
+                    if cause:
+                        causes.add(cause)
             if causes:
                 extra = "possible cause{s}: {causes}".format(
                     s="s" if len(causes) > 1 else "",
@@ -1263,10 +1262,18 @@ def make_err(self, errtype, message, original, loc=0, ln=None, extra=None, refor
         # build the error
         if extra is not None:
             kwargs["extra"] = extra
-        err = errtype(message, snippet, loc_in_snip, ln, endpoint=endpt_in_snip, filename=self.filename, **kwargs)
-        if use_startpoint:
-            err = err.set_formatting(point_to_endpoint=True, max_err_msg_lines=2)
-        return err
+        return errtype(
+            message,
+            snippet,
+            loc_in_snip,
+            ln,
+            endpoint=endpt_in_snip,
+            filename=self.filename,
+            **kwargs,
+        ).set_formatting(
+            point_to_endpoint=True if use_startpoint else None,
+            max_err_msg_lines=2 if use_startpoint else None,
+        )
 
     def make_syntax_err(self, err, original, after_parsing=False):
         """Make a CoconutSyntaxError from a CoconutDeferredSyntaxError."""
@@ -1375,9 +1382,7 @@ def parse_line_by_line(self, init_parser, line_parser, original):
                     got, got_loc = results
                     out_parts.append(got)
                 got_loc = int(got_loc)
-                internal_assert(got_loc >= cur_loc, "invalid line by line parse", (cur_loc, results), extra=lambda: "in: " + repr(self.remaining_original.split("\n", 1)[0]))
-                if not init and got_loc == cur_loc:
-                    raise self.make_err(CoconutParseError, "parsing could not continue", original, cur_loc, include_causes=True)
+                internal_assert(got_loc >= cur_loc and (init or got_loc > cur_loc), "invalid line by line parse", (cur_loc, results), extra=lambda: "in: " + repr(self.remaining_original.split("\n", 1)[0]))
                 cur_loc = got_loc
                 init = False
         return "".join(out_parts)

diff --git a/coconut/compiler/grammar.py b/coconut/compiler/grammar.py
@@ -2646,8 +2646,9 @@ class Grammar(object):
 
         parse_err_msg = start_marker + (
             # should be in order of most likely to actually be the source of the error first
-            ZeroOrMore(~questionmark + ~Literal("\n") + any_char) + fixto(
-                questionmark
+            fixto(
+                ZeroOrMore(~questionmark + ~Literal("\n") + any_char)
+                + questionmark
                 + ~dollar
                 + ~lparen
                 + ~lbrack

diff --git a/coconut/compiler/util.py b/coconut/compiler/util.py
@@ -454,12 +454,6 @@ def attach(item, action, ignore_no_tokens=None, ignore_one_token=None, ignore_ar
     return add_action(item, action, make_copy)
 
 
-def final_evaluate_tokens(tokens):
-    """Same as evaluate_tokens but should only be used once a parse is assured."""
-    clear_packrat_cache()
-    return evaluate_tokens(tokens, is_final=True)
-
-
 @contextmanager
 def adaptive_manager(original, loc, item, reparse=False):
     """Manage the use of MatchFirst.setAdaptiveMode."""
@@ -489,6 +483,14 @@ def adaptive_manager(original, loc, item, reparse=False):
             MatchFirst.setAdaptiveMode(False)
 
 
+def final_evaluate_tokens(tokens):
+    """Same as evaluate_tokens but should only be used once a parse is assured."""
+    result = evaluate_tokens(tokens, is_final=True)
+    # clear packrat cache after evaluating tokens so error creation gets to see the cache
+    clear_packrat_cache()
+    return result
+
+
 def final(item):
     """Collapse the computation graph upon parsing the given item."""
     if SUPPORTS_ADAPTIVE and use_adaptive_if_available:
@@ -530,9 +532,12 @@ def force_reset_packrat_cache():
 @contextmanager
 def parsing_context(inner_parse=None):
     """Context to manage the packrat cache across parse calls."""
-    current_cache_matters = ParserElement._packratEnabled
+    current_cache_matters = (
+        inner_parse is not False
+        and ParserElement._packratEnabled
+    )
     new_cache_matters = (
-        not inner_parse
+        inner_parse is not True
         and ParserElement._incrementalEnabled
         and not ParserElement._incrementalWithResets
     )
@@ -542,7 +547,17 @@ def parsing_context(inner_parse=None):
     )
     if (
         current_cache_matters
-        and not new_cache_matters
+        and new_cache_matters
+        and ParserElement._incrementalWithResets
+    ):
+        incrementalWithResets, ParserElement._incrementalWithResets = ParserElement._incrementalWithResets, False
+        try:
+            yield
+        finally:
+            ParserElement._incrementalWithResets = incrementalWithResets
+    elif (
+        current_cache_matters
+        and will_clear_cache
     ):
         # store old packrat cache
         old_cache = ParserElement.packrat_cache
@@ -557,16 +572,6 @@ def parsing_context(inner_parse=None):
             if logger.verbose:
                 ParserElement.packrat_cache_stats[0] += old_cache_stats[0]
                 ParserElement.packrat_cache_stats[1] += old_cache_stats[1]
-    elif (
-        current_cache_matters
-        and new_cache_matters
-        and will_clear_cache
-    ):
-        incrementalWithResets, ParserElement._incrementalWithResets = ParserElement._incrementalWithResets, False
-        try:
-            yield
-        finally:
-            ParserElement._incrementalWithResets = incrementalWithResets
     else:
         yield
 
@@ -806,7 +811,7 @@ def should_clear_cache(force=False):
         return True
     elif not ParserElement._packratEnabled:
         return False
-    elif SUPPORTS_INCREMENTAL and ParserElement._incrementalEnabled:
+    elif ParserElement._incrementalEnabled:
         if not in_incremental_mode():
             return repeatedly_clear_incremental_cache
         if (
@@ -897,18 +902,11 @@ def get_cache_items_for(original, only_useful=False, exclude_stale=True):
             yield lookup, value
 
 
-def get_highest_parse_loc(original, only_successes=False):
-    """Get the highest observed parse location."""
+def get_highest_parse_loc(original):
+    """Get the highest observed parse location.
+    Note that there's no point in filtering for successes/failures, since we always see both at the same locations."""
     highest_loc = 0
     for lookup, _ in get_cache_items_for(original):
-        if only_successes:
-            if SUPPORTS_INCREMENTAL and ParserElement._incrementalEnabled:
-                # parseIncremental failure
-                if lookup[1] is True:
-                    continue
-            # parseCache failure
-            elif not isinstance(lookup, tuple):
-                continue
         loc = lookup[2]
         if loc > highest_loc:
             highest_loc = loc

diff --git a/coconut/exceptions.py b/coconut/exceptions.py
@@ -22,7 +22,6 @@
 import traceback
 
 from coconut._pyparsing import (
-    USE_LINE_BY_LINE,
     lineno,
     col as getcol,
 )
@@ -169,8 +168,8 @@ def message(self, message, source, point, ln, extra=None, endpoint=None, filenam
                     message_parts += ["\n", " " * taberrfmt, highlight(part)]
 
                     # add squiggles to message
-                    if point_ind > 0 or endpoint_ind > 0:
-                        err_len = endpoint_ind - point_ind
+                    err_len = endpoint_ind - point_ind
+                    if (point_ind > 0 or endpoint_ind > 0) and err_len < len(part):
                         message_parts += ["\n", " " * (taberrfmt + point_ind)]
                         if err_len <= min_squiggles_in_err_msg:
                             if not self.point_to_endpoint:
@@ -274,7 +273,7 @@ def message(self, message, source, point, ln, target, endpoint, filename):
 
 class CoconutParseError(CoconutSyntaxError):
     """Coconut ParseError."""
-    point_to_endpoint = not USE_LINE_BY_LINE
+    point_to_endpoint = True
 
 
 class CoconutWarning(CoconutException):

diff --git a/coconut/tests/main_test.py b/coconut/tests/main_test.py
@@ -148,9 +148,8 @@
     "INTERNAL ERROR",
 )
 ignore_error_lines_with = (
-    # ignore SyntaxWarnings containing assert_raises
-    "assert_raises(",
-    " raise ",
+    # ignore SyntaxWarnings containing assert_raises or raise
+    "raise",
 )
 
 mypy_snip = "a: str = count()[0]"

diff --git a/coconut/tests/src/extras.coco b/coconut/tests/src/extras.coco
@@ -208,11 +208,11 @@ cannot reassign type variable 'T' (use explicit '\T' syntax if intended) (line 1
     assert_raises(-> parse("$"), CoconutParseError)
     assert_raises(-> parse("@"), CoconutParseError)
     assert_raises(-> parse("range(1,10) |> reduce$(*, initializer = 1000) |> print"), CoconutParseError, err_has=(
-        "\n                       \~~^",
+        "\n                       \\~~^",
     ))
     assert_raises(-> parse("a := b"), CoconutParseError, err_has="\n    ^")
     assert_raises(-> parse("1 + return"), CoconutParseError, err_has=(
-        "\n    \~~^",
+        "\n    \\~~^",
     ))
     assert_raises(-> parse("""
 def f() =
@@ -227,21 +227,24 @@ def f() =
         """
   assert 2
          ~^
-        """.strip()
+        """.strip(),
     ))
     assert_raises(-> parse('b"abc" "def"'), CoconutParseError, err_has="\n         ^")
     assert_raises(-> parse('"abc" b"def"'), CoconutParseError, err_has="\n        ^")
     assert_raises(-> parse('"a" 10'), CoconutParseError, err_has="\n      ^")
-    assert_raises(-> parse("A. ."), CoconutParseError, err_has="\n   \~^")
+    assert_raises(-> parse("A. ."), CoconutParseError, err_has="\n   \\~^")
     assert_raises(-> parse('''f"""{
 }"""'''), CoconutSyntaxError, err_has="parsing failed for format string expression")
-    assert_raises(-> parse("f([] {})"), CoconutParseError, err_has="\n   \~~~^")
+    assert_raises(-> parse("f([] {})"), CoconutParseError, err_has="\n   \\~~~^")
 
     assert_raises(-> parse("return = 1"), CoconutParseError, err_has='invalid use of the keyword "return"')
     assert_raises(-> parse("if a = b: pass"), CoconutParseError, err_has="misplaced assignment")
     assert_raises(-> parse("while a == b"), CoconutParseError, err_has="misplaced newline")
-    assert_raises(-> parse("0xfgf"), CoconutParseError, err_has=r"\n     ^")
-    assert_raises(-> parse("(. if 1)"), CoconutParseError, err_has=r"\n     ^")
+    assert_raises(-> parse("0xfgf"), CoconutParseError, err_has=(
+        "\n     ^",
+        "\n     \\~^",
+    ))
+    assert_raises(-> parse("(. if 1)"), CoconutParseError, err_has="\n     ^")
 
     try:
         parse("""