Skip to content

Commit

Permalink
XXX Add scanner DAG driver.
Browse files Browse the repository at this point in the history
  • Loading branch information
jasone committed Sep 16, 2020
1 parent d631e85 commit 296d021
Showing 1 changed file with 110 additions and 59 deletions.
169 changes: 110 additions & 59 deletions bootstrap/src/hlc/scan.ml
Original file line number Diff line number Diff line change
Expand Up @@ -87,60 +87,101 @@ let accept variant cursor t =
let loc = Location.init (Text.Cursor.pos t.cursor) (Text.Cursor.pos cursor) in
{t with cursor}, (Token.init variant loc)

let accept_dentation variant cursor t =
accept variant cursor {t with line_state=Line_body}
let accept_pred variant cursor t =
assert (Text.Cursor.index cursor > 0);
accept variant (Text.Cursor.pred cursor) t

let rec whitespace t cursor =
match Text.Cursor.next_opt cursor with
| None -> accept Whitespace t.cursor t
| Some (cp, cursor') -> begin
match cp with
| cp when Codepoint.(cp = of_char ' ') -> whitespace t cursor'
| cp when Codepoint.(cp = nl) ->
accept Whitespace cursor {t with line_state=Line_dentation}
| _ -> accept Whitespace cursor t
end
(* The scanner's directed acyclic subgraph is be expressed as a DAG of states
* with a unified state transition driver. *)
type action = Text.Cursor.t -> t -> t * Token.t
type state = {
edges: (codepoint, action, Codepoint.cmper_witness) Map.t;
eoi: action;
default: action;
}

let lbrack cursor t =
let act state cursor t =
match Text.Cursor.next_opt cursor with
| None -> accept Lbrack cursor t
| None -> state.eoi cursor t
| Some (cp, cursor') -> begin
match cp with
| cp when Codepoint.(cp = of_char '|') -> accept Larray cursor' t
| _ -> accept Lbrack cursor t
match Map.get cp state.edges with
| Some action' -> action' cursor' t
| None -> state.default cursor' t
end

let bar cursor t =
let rec whitespace cursor t =
match Text.Cursor.next_opt cursor with
| None -> accept Bar cursor t
| None -> accept Whitespace cursor t
| Some (cp, cursor') -> begin
match cp with
| cp when Codepoint.(cp = of_char ']') -> accept Rarray cursor' t
| _ -> accept Bar cursor t
| cp when Codepoint.(cp = of_char ' ') -> whitespace cursor' t
| _ -> accept Whitespace cursor t
end

let rec body t =
match Text.Cursor.next_opt t.cursor with
| None -> accept End_of_input t.cursor t
| Some (cp, cursor') -> begin
match cp with
| cp when Codepoint.(cp = of_char '[') -> lbrack cursor' t
| cp when Codepoint.(cp = of_char '|') -> bar cursor' t
| cp when Codepoint.(cp = of_char ' ') -> whitespace t cursor'
| cp when Codepoint.(cp = nl) ->
dentation t.cursor {t with line_state=Line_dentation}
(* XXX *)
| _ -> accept Error cursor' t
end
and dentation cursor t =
let other t cursor line_delim = begin
let lbrack_bar_state = {
edges=Map.empty (module Codepoint);
eoi=(accept Larray);
default=(accept_pred Larray);
}

let lbrack_state = {
edges=Map.singleton (module Codepoint)
~k:(Codepoint.of_char '|') ~v:(act lbrack_bar_state);
eoi=(accept Lbrack);
default=(accept_pred Lbrack);
}

let bar_rbrack_state = {
edges=Map.empty (module Codepoint);
eoi=(accept Rarray);
default=(accept_pred Rarray);
}

let rbrack_state = {
edges=Map.empty (module Codepoint);
eoi=(accept Rbrack);
default=(accept_pred Rbrack);
}

let bar_state = {
edges=Map.singleton (module Codepoint)
~k:(Codepoint.of_char ']') ~v:(act bar_rbrack_state);
eoi=(accept Bar);
default=(accept_pred Bar);
}

let error_state = {
edges=Map.empty (module Codepoint);
eoi=(accept Error);
default=(accept_pred Error);
}

let start_state = {
edges=Map.of_alist (module Codepoint) [
(Codepoint.of_char '[', (act lbrack_state));
(Codepoint.of_char ']', (act rbrack_state));
(Codepoint.of_char '|', (act bar_state));
(Codepoint.of_char ' ', whitespace);
];
eoi=(accept End_of_input);
default=(act error_state);
}

let start t =
act start_state t.cursor t

let accept_dentation variant cursor t =
accept variant cursor {t with line_state=Line_body}

let rec dentation_cont line_delim cursor t =
let other line_delim cursor t = begin
let col = Text.(Pos.col (Cursor.pos cursor)) in
let level = col / 4 in
let rem = col % 4 in
match rem, t.level, level with
| 0, t_level, level when t_level = level -> begin
match line_delim with
| false -> body {t with line_state=Line_body}
| false -> start {t with line_state=Line_body}
| true -> accept_dentation Line_delim cursor t
end
| 0, t_level, level when Uns.succ t_level = level ->
Expand All @@ -151,31 +192,32 @@ and dentation cursor t =
accept_dentation Whitespace cursor t
| _ -> accept_dentation Indent_error cursor t
end in
let rec cont t cursor line_delim = begin
match Text.Cursor.next_opt cursor with
| None -> other t cursor line_delim
| Some (cp, cursor') -> begin
match cp with
| cp when Codepoint.(cp = of_char ' ') -> cont t cursor' line_delim
| cp when Codepoint.(cp = nl) -> begin
(* Handle empty lines specially. *)
let col = Text.(Pos.col (Cursor.pos cursor)) in
match col = 0 with
| true -> accept Whitespace cursor t
| false -> accept_dentation Indent_error cursor' t
end
| _ -> other t cursor line_delim
end
end in
match Text.Cursor.next_opt cursor with
| None -> accept End_of_input t.cursor t
| None -> other line_delim cursor t
| Some (cp, cursor') -> begin
match cp with
| cp when Codepoint.(cp = of_char ' ') ->
dentation_cont line_delim cursor' t
| cp when Codepoint.(cp = nl) -> begin
(* Handle empty lines specially. *)
let col = Text.(Pos.col (Cursor.pos cursor)) in
match col = 0 with
| true -> accept Whitespace cursor t
| false -> accept_dentation Indent_error cursor' t
end
| _ -> other line_delim cursor t
end

let dentation cursor t =
match Text.Cursor.next_opt cursor with
| None -> accept End_of_input cursor t
| Some (cp, cursor') -> begin
match cp with
| cp when Codepoint.(cp = of_char ' ') -> cont t cursor' false
| cp when Codepoint.(cp = nl) -> cont t cursor' true
| cp when Codepoint.(cp = of_char ' ') -> dentation_cont false cursor' t
| cp when Codepoint.(cp = nl) -> dentation_cont true cursor' t
| _ -> begin
match t.level with
| 0 -> body {t with line_state=Line_body}
| 0 -> start {t with line_state=Line_body}
| 1 -> accept_dentation Dedent cursor {t with level=0}
| _ -> accept_dentation Indent_error cursor t
end
Expand All @@ -184,7 +226,16 @@ and dentation cursor t =
let next t =
match t.line_state with
| Line_dentation-> dentation t.cursor t
| Line_body -> body t
| Line_body -> begin
match Text.Cursor.next_opt t.cursor with
| None -> accept End_of_input t.cursor t
| Some (cp, cursor') -> begin
match cp with
| cp when Codepoint.(cp = nl) ->
dentation_cont true cursor' {t with line_state=Line_dentation}
| _ -> start t
end
end

(******************************************************************************)
(* Begin tests. *)
Expand Down Expand Up @@ -413,6 +464,6 @@ let%expect_test "symbols" =
1:1..1:3 : <Larray>
1:3..1:4 : <Bar>
1:4..1:6 : <Rarray>
1:6..1:7 : <Error>
1:6..1:7 : <Rbrack>
1:7..1:7 : <End_of_input>
|xxx}]

0 comments on commit 296d021

Please sign in to comment.