Skip to content

Commit

Permalink
Merge PR #18893: [lexer] Fix handling of stream current location in e…
Browse files Browse the repository at this point in the history
…rror case.

Reviewed-by: ppedrot
Co-authored-by: ppedrot <ppedrot@users.noreply.github.com>
  • Loading branch information
coqbot-app[bot] and ppedrot committed Apr 11, 2024
2 parents 470d2e2 + 1ffe2f5 commit fd07071
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 3 deletions.
7 changes: 7 additions & 0 deletions lib/cErrors.ml
Original file line number Diff line number Diff line change
Expand Up @@ -157,3 +157,10 @@ let noncritical = function
| Invalid_argument "equal: functional value" -> false
| _ -> true
[@@@ocaml.warning "+52"]

(* This should be in exninfo, but noncritical is here... *)
let to_result ~f x =
try Ok (f x)
with exn when noncritical exn ->
let iexn = Exninfo.capture exn in
Error iexn
4 changes: 4 additions & 0 deletions lib/cErrors.mli
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,7 @@ val noncritical : exn -> bool
val register_additional_error_info
: (Exninfo.info -> Pp.t option)
-> unit

(** [to_result ~f x] reifies (non-critical) exceptions into a [('a,
iexn) Result.t] type *)
val to_result : f:('a -> 'b) -> 'a -> ('b, Exninfo.iexn) Result.t
22 changes: 19 additions & 3 deletions parsing/cLexer.ml
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,12 @@ let rec next_token ~diff_mode ttree loc s =
between_commands := false;
(EOI, set_loc_pos loc bp (bp+1))

(* Reify exceptions for next_token, it would be really nice if we
could actually delimit what this function raises, for now Error.E *)
let next_token ~diff_mode ttree loc s : (Tok.t * Loc.t, Exninfo.iexn) Result.t =
let f (diff_mode, ttree, loc, s) = next_token ~diff_mode ttree loc s in
CErrors.to_result ~f (diff_mode, ttree, loc, s)

(** {6 The lexer of Coq} *)

module MakeLexer (Diff : sig val mode : bool end)
Expand All @@ -764,9 +770,19 @@ module MakeLexer (Diff : sig val mode : bool end)
let cur_loc = ref loc in
Gramlib.LStream.from ~loc
(fun ttree ->
let (tok, loc) = next_token ~diff_mode:Diff.mode ttree !cur_loc cs in
cur_loc := after loc;
Some (tok,loc))
match next_token ~diff_mode:Diff.mode ttree !cur_loc cs with
| Ok (tok, loc) ->
cur_loc := after loc;
Some (tok,loc)
| Error (exn, info) ->
(* If the error contains the location of the failing token,
we still need to update [cur_loc] as to resume parsing
correctly after the error. See
https://github.com/ejgallego/coq-lsp/issues/633 for an
example. *)
let loc = Loc.get_loc info in
Option.iter (fun loc -> cur_loc := after loc) loc;
Exninfo.iraise (exn, info))
let tok_match = Tok.match_pattern
let tok_text = Tok.token_text

Expand Down
62 changes: 62 additions & 0 deletions test-suite/unit-tests/parsing/lexer_recovery.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
(* The idea of this test is to have the lexer crash at a new line,
with the ∀ symbol in line 2; then we test that we can recover correctly. *)
let doc =
"Definition map_union_weak `{β A, Insert K A (M A),
∀ A, Empty (M A),
∀ A, Lookup K A (M A),
∀ A, FinMapToList K A (M A)} {A} (m1 m2 : M A) :=
map_imap (λ l v, Some (default v (m1 !! l))) m2."

let parse pa n =
let entry = Pvernac.Vernac_.main_entry in
let rec loop res n =
if n = 0 then res else
match Pcoq.Entry.parse entry pa with
| None -> res
| Some r -> loop (r :: res) (n-1)
in
loop [] n |> List.rev

let raw_pr_loc fmt (l : Loc.t) =
let { Loc.fname=_; line_nb; bol_pos; line_nb_last; bol_pos_last; bp; ep } = l in
Format.fprintf fmt "| line_nb: %d | bol_pos: %d | line_nb_last: %d | bol_pos_last: %d | bp: %d | ep: %d |"
line_nb bol_pos line_nb_last bol_pos_last bp ep

let print_locs fmt { CAst.loc; _ } =
Option.iter (Format.fprintf fmt "@[%a@]" raw_pr_loc) loc

let setup_pa () =
let text = doc in
Pcoq.Parsable.make (Gramlib.Stream.of_string text)

let parse_whole pa =
parse pa 10

(* Use junk *)
let log_file = __FILE__ ^ ".log"

let main () =
let pa = setup_pa () in
let res, loc =
try
let _ = parse_whole pa in
false, Pcoq.Parsable.loc pa
with
(* should be `E Undefined_token` but type is private *)
| CLexer.Error.E _ ->
(* We now consume a single token and check that the location is
correct for "A" *)
let () = Pcoq.Parsable.consume pa 1 in
let loc = Pcoq.Parsable.loc pa in
let res = (loc.line_nb = 2) && (loc.bol_pos = 52) && (loc.bp = 58) && (loc.ep = 59) in
res, loc
| _ -> false, Pcoq.Parsable.loc pa
in
let oc = Stdlib.open_out log_file in
let outf = Format.formatter_of_out_channel oc in
Format.fprintf outf "fail lexer test passed: %a@\n%!" raw_pr_loc loc;
Format.pp_print_flush outf ();
Stdlib.close_out oc;
if res then exit 0 else exit 1

let () = main ()

0 comments on commit fd07071

Please sign in to comment.