forked from BranchTaken/Hemlock
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
474 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,398 @@ | ||
open Rudiments | ||
|
||
(* Line/column position independent of previous lines' contents, with signed | ||
* column number to facilitate backward cursoring. *) | ||
module Spos = struct | ||
module T = struct | ||
type t = { | ||
line: uns; | ||
(* Column number if non-negative, in which case conversion to t is | ||
* trivial. If negative, the column number for the equivalent t must be | ||
* calculated via backward iteration. *) | ||
scol: sint; | ||
} | ||
|
||
let cmp t0 t1 = | ||
let open Cmp in | ||
match Uns.cmp t0.line t1.line with | ||
| Lt -> Lt | ||
| Eq -> Sint.cmp t0.scol t1.scol | ||
| Gt -> Gt | ||
|
||
let init ~line ~scol = | ||
{line; scol} | ||
|
||
let succ cp t = | ||
match cp with | ||
| cp when Codepoint.(cp = nl) -> {line=Uns.succ t.line; scol=Sint.kv 0} | ||
| _ -> {t with scol=Sint.succ t.scol} | ||
|
||
let pred cp t = | ||
match cp with | ||
| cp when Codepoint.(cp = nl) -> | ||
{line=Uns.pred t.line; scol=Sint.neg_one} | ||
| _ -> {t with scol=Sint.pred t.scol} | ||
|
||
let line t = | ||
t.line | ||
|
||
let scol t = | ||
t.scol | ||
end | ||
include T | ||
include Cmpable.Make(T) | ||
end | ||
|
||
(* Line/column position independent of previous lines' contents. *) | ||
module Pos = struct | ||
module T = struct | ||
type t = { | ||
line: uns; | ||
col: uns; | ||
} | ||
|
||
let cmp t0 t1 = | ||
let open Cmp in | ||
match Uns.cmp t0.line t1.line with | ||
| Lt -> Lt | ||
| Eq -> Uns.cmp t0.col t1.col | ||
| Gt -> Gt | ||
|
||
let init ~line ~col = | ||
{line; col} | ||
|
||
let line t = | ||
t.line | ||
|
||
let col t = | ||
t.col | ||
end | ||
include T | ||
include Cmpable.Make(T) | ||
end | ||
|
||
(* Absolute virtual index relative to beginning of text. *) | ||
module Vind = struct | ||
module T = struct | ||
type t = uns | ||
|
||
let cmp t0 t1 = | ||
Uns.cmp t0 t1 | ||
|
||
let init vind = | ||
vind | ||
|
||
let index t = | ||
t | ||
end | ||
include T | ||
include Cmpable.Make(T) | ||
end | ||
|
||
(* Text excerpt. *) | ||
module Excerpt = struct | ||
type t = { | ||
(* Excerpt index; base is 0. *) | ||
eind: uns; | ||
(* Raw excerpt. *) | ||
bytes: Bytes.Slice.t; | ||
} | ||
|
||
let cmp t0 t1 = | ||
Uns.cmp t0.eind t1.eind | ||
|
||
(* Base excerpt, always hd of excerpts maps. *) | ||
let base = {eind=0; bytes=Bytes.Slice.of_container [||]} | ||
|
||
(* val of_bytes_slice: t -> Bytes.Slice.t -> t *) | ||
let of_bytes_slice pred bytes = | ||
let eind = Uns.succ pred.eind in | ||
{eind; bytes} | ||
|
||
(* val of_string_slice: t -> Bytes.Slice.t -> t *) | ||
let of_string_slice pred slice = | ||
let eind = Uns.succ pred.eind in | ||
let bytes = Bytes.Slice.of_string_slice slice in | ||
{eind; bytes} | ||
|
||
let eind t = | ||
t.eind | ||
|
||
let bytes t = | ||
t.bytes | ||
end | ||
|
||
type t = { | ||
(* Filesystem path. *) | ||
path: string option; | ||
(* Excerpts already forced into text. The map is initialized with a base | ||
* excerpt, which simplifies various logic. *) | ||
excerpts: (uns, Excerpt.t, Uns.cmper_witness) Map.t; | ||
(* Lazy suspension which produces extended text. *) | ||
extend: t option Lazy.t; | ||
} | ||
|
||
let of_bytes_stream ?path stream = | ||
let rec susp_extend path pred_excerpt excerpts stream = lazy begin | ||
match Stream.is_empty stream with | ||
| true -> None | ||
| false -> begin | ||
let bytes, stream' = Stream.pop stream in | ||
let excerpt = Excerpt.of_bytes_slice pred_excerpt bytes in | ||
let excerpts' = | ||
Map.insert ~k:(Excerpt.eind excerpt) ~v:excerpt excerpts in | ||
let extend' = susp_extend path excerpt excerpts' stream' in | ||
let t' = {path; excerpts=excerpts'; extend=extend'} in | ||
Some t' | ||
end | ||
end in | ||
let excerpt = Excerpt.base in | ||
let excerpts = | ||
Map.singleton (module Uns) ~k:(Excerpt.eind excerpt) ~v:excerpt in | ||
let extend = susp_extend path excerpt excerpts stream in | ||
{path; excerpts; extend} | ||
|
||
let of_string_slice ?path slice = | ||
let susp_extend () = lazy None in | ||
let excerpt = Excerpt.(of_string_slice base slice) in | ||
let excerpts = | ||
Map.singleton (module Uns) ~k:Excerpt.base.eind ~v:Excerpt.base | ||
|> Map.insert ~k:excerpt.eind ~v:excerpt in | ||
let extend = susp_extend () in | ||
{path; excerpts; extend} | ||
|
||
let path t = | ||
t.path | ||
|
||
(* XXX | ||
let force t = | ||
let rec fn t = begin | ||
match Lazy.force (t.extend) with | ||
| None -> t | ||
| Some t' -> fn t' | ||
end in | ||
fn t | ||
*) | ||
|
||
module Cursor = struct | ||
module T = struct | ||
type container = t | ||
type elm = codepoint | ||
type t = { | ||
text: container; | ||
(* Virtual byte index, as if all encoding errors were replaced. *) | ||
vind: Vind.t; | ||
(* Line/column. *) | ||
spos: Spos.t; | ||
(* Excerpt which contains bcursor's bytes. *) | ||
excerpt: Excerpt.t; | ||
(* Bytes cursor, used for iterating over bytes within a single excerpt. | ||
* Note that for the positions between excerpts, there are two logically | ||
* equivalent cursors, one of which can only handle lget, and the other of | ||
* which can only handle rget. We make no effort to amortize repeated | ||
* conversions between equivalent cursors to transparently support | ||
* lget/rget, under the assumption that no reasonable use case suffers | ||
* more than constant additional overhead. *) | ||
bcursor: Bytes.Cursor.t; | ||
} | ||
|
||
let cmp t0 t1 = | ||
let open Cmp in | ||
match Excerpt.cmp t0.excerpt t1.excerpt with | ||
| Lt -> Lt | ||
| Eq -> Bytes.Cursor.cmp t0.bcursor t1.bcursor | ||
| Gt -> Gt | ||
|
||
let container t = | ||
t.text | ||
|
||
let index t = | ||
Vind.index t.vind | ||
|
||
let hd text = | ||
let excerpt = Map.get_hlt 0 text.excerpts in | ||
let bcursor = Bytes.Slice.base (Excerpt.bytes excerpt) in | ||
{ | ||
text; | ||
vind=Vind.init 0; | ||
spos=Spos.init ~line:1 ~scol:(Sint.kv 0); | ||
excerpt; | ||
bcursor; | ||
} | ||
|
||
module Codepoint_seq = struct | ||
module T = struct | ||
type nonrec t = t | ||
|
||
let init cursor = | ||
cursor | ||
|
||
let rec next t = | ||
match Bytes.Cursor.(t.bcursor < tl (container t.bcursor)) with | ||
| true -> begin | ||
let b, bcursor' = Bytes.Cursor.next t.bcursor in | ||
Some (b, {t with bcursor=bcursor'}) | ||
end | ||
| false -> begin | ||
match (Uns.succ (Excerpt.eind t.excerpt)) < | ||
(Map.length t.text.excerpts) with | ||
| true -> begin | ||
let excerpt' = | ||
Map.get_hlt (Uns.succ t.excerpt.eind) t.text.excerpts in | ||
let bcursor' = Bytes.Slice.base (Excerpt.bytes excerpt') in | ||
next {t with excerpt=excerpt'; bcursor=bcursor'} | ||
end | ||
| false -> begin | ||
match Lazy.force (t.text.extend) with | ||
| None -> None | ||
| Some text' -> next {t with text=text'} | ||
end | ||
end | ||
end | ||
include T | ||
include Codepoint.Seq.Make(T) | ||
end | ||
|
||
let next_opt t = | ||
match Codepoint_seq.(to_codepoint (init t)) with | ||
| None -> None | ||
| Some (Valid (cp, t')) -> begin | ||
let vincr = Codepoint.Utf8.length_of_codepoint cp in | ||
let vind' = Vind.(init ((index t.vind) + vincr)) in | ||
let spos' = Spos.succ cp t.spos in | ||
Some (cp, {t' with vind=vind'; spos=spos'}) | ||
end | ||
| Some (Invalid t') -> begin | ||
let cp = Codepoint.replacement in | ||
let vincr = Codepoint.Utf8.length_of_codepoint cp in | ||
let vind' = Vind.(init ((index t.vind) + vincr)) in | ||
let spos' = Spos.succ cp t.spos in | ||
Some (cp, {t' with vind=vind'; spos=spos'}) | ||
end | ||
|
||
let rget_opt t = | ||
match next_opt t with | ||
| None -> None | ||
| Some (cp, _) -> Some cp | ||
|
||
let rget t = | ||
match rget_opt t with | ||
| None -> halt "Out of bounds" | ||
| Some cp -> cp | ||
|
||
let next t = | ||
match next_opt t with | ||
| None -> halt "Out of bounds" | ||
| Some (cp, t') -> cp, t' | ||
|
||
let succ t = | ||
match next t with _, t' -> t' | ||
|
||
let seek_fwd offset t = | ||
let rec fn offset t = begin | ||
match offset with | ||
| 0 -> t | ||
| _ -> fn (Uns.pred offset) (succ t) | ||
end in | ||
fn offset t | ||
|
||
let tl text = | ||
let rec fn cursor = begin | ||
match next_opt cursor with | ||
| None -> cursor | ||
| Some (_, cursor') -> fn cursor' | ||
end in | ||
fn (hd text) | ||
|
||
module Codepoint_rev_seq = struct | ||
module T = struct | ||
type nonrec t = t | ||
|
||
let init cursor = | ||
cursor | ||
|
||
let rec next t = | ||
match Bytes.Cursor.(t.bcursor > hd (container t.bcursor)) with | ||
| true -> begin | ||
let b, bcursor' = Bytes.Cursor.prev t.bcursor in | ||
Some (b, {t with bcursor=bcursor'}) | ||
end | ||
| false -> begin | ||
match (Excerpt.eind t.excerpt) > 0 with | ||
| true -> begin | ||
let excerpt' = | ||
Map.get_hlt (Uns.pred t.excerpt.eind) t.text.excerpts in | ||
let bcursor' = Bytes.Slice.past (Excerpt.bytes excerpt') in | ||
next {t with excerpt=excerpt'; bcursor=bcursor'} | ||
end | ||
| false -> None | ||
end | ||
end | ||
include T | ||
include Codepoint.Seq.Make_rev(T) | ||
end | ||
|
||
let prev t = | ||
match Codepoint_rev_seq.(to_codepoint (init t)) with | ||
| None -> halt "Out of bounds" | ||
| Some (Valid (cp, t')) -> begin | ||
let vdecr = Codepoint.Utf8.length_of_codepoint cp in | ||
let vind' = Vind.(init ((index t.vind) - vdecr)) in | ||
let spos' = Spos.pred cp t.spos in | ||
cp, {t' with vind=vind'; spos=spos'} | ||
end | ||
| Some (Invalid t') -> begin | ||
let cp = Codepoint.replacement in | ||
let vdecr = Codepoint.Utf8.length_of_codepoint cp in | ||
let vind' = Vind.(init ((index t.vind) - vdecr)) in | ||
let spos' = Spos.pred cp t.spos in | ||
cp, {t' with vind=vind'; spos=spos'} | ||
end | ||
|
||
let lget t = | ||
match prev t with cp, _ -> cp | ||
|
||
let pred t = | ||
match prev t with _, t' -> t' | ||
|
||
let seek_rev offset t = | ||
let rec fn offset t = begin | ||
match offset with | ||
| 0 -> t | ||
| _ -> fn (Uns.pred offset) (pred t) | ||
end in | ||
fn offset t | ||
|
||
let seek offset t = | ||
if Sint.(offset < kv 0) then seek_rev (Uns.of_sint (Sint.neg offset)) t | ||
else seek_fwd (Uns.of_sint offset) t | ||
|
||
let pos t = | ||
let rec col0_delta t i = begin | ||
match t.vind = 0 with | ||
| true -> i | ||
| false -> begin | ||
let cp, t' = prev t in | ||
match cp with | ||
| cp when Codepoint.(cp = nl) -> i | ||
| _ -> col0_delta t' (Uns.succ i) | ||
end | ||
end in | ||
let col = match Sint.is_negative (Spos.scol t.spos) with | ||
| false -> Uns.of_sint (Spos.scol t.spos) | ||
| true -> col0_delta t 0 | ||
in | ||
{Pos.line=Spos.line t.spos; col} | ||
end | ||
include T | ||
include Cmpable.Make(T) | ||
end | ||
|
||
module Slice = struct | ||
include Slice.Make_mono(Cursor) | ||
end | ||
|
||
(******************************************************************************) | ||
(* Begin tests. *) | ||
|
||
(* XXX *) |
Oops, something went wrong.