forked from BranchTaken/Hemlock
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
XXX Simplify Text API, implement lazy extension.
- Loading branch information
Showing
2 changed files
with
128 additions
and
37 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,122 @@ | ||
open Rudiments | ||
|
||
type t = { | ||
src: codepoint Stream.t; | ||
path: string option; | ||
(* Number of bytes currently in excerpts, *not* the text length unless no | ||
* further extensions can be forced. *) | ||
blength: uns; | ||
(* bindex->excerpt map for strings already forced into text. *) | ||
excerpts: (uns, string, Uns.cmper_witness) Ordmap.t; | ||
(* Lazy suspension which produces extended text. *) | ||
extend: t option Lazy.t; | ||
} | ||
|
||
(* XXX *) | ||
let init ?path stream = | ||
let blength = 0 in | ||
let excerpts = Ordmap.empty (module Uns) in | ||
let rec susp_extend path blength excerpts stream = lazy begin | ||
match Stream.is_empty stream with | ||
| true -> None | ||
| false -> begin | ||
let excerpt, stream' = Stream.pop stream in | ||
let blength' = blength + (String.blength excerpt) in | ||
let excerpts' = Ordmap.insert_hlt ~k:blength ~v:excerpt excerpts in | ||
let extend' = susp_extend path blength' excerpts' stream' in | ||
let t' = {path; blength=blength'; excerpts=excerpts'; extend=extend'} in | ||
Some t' | ||
end | ||
end in | ||
let extend = susp_extend path blength excerpts stream in | ||
{path; blength; excerpts; extend} | ||
|
||
let path t = | ||
t.path | ||
|
||
let force t = | ||
let rec fn t = begin | ||
match Lazy.force (t.extend) with | ||
| None -> t | ||
| Some t' -> fn t' | ||
end in | ||
fn t | ||
|
||
let blength t = | ||
(force t).blength | ||
|
||
module Pos = struct | ||
type t = { | ||
line: uns; | ||
col: uns; | ||
} | ||
|
||
let init ~line ~col = | ||
{line; col} | ||
|
||
let line t = | ||
t.line | ||
|
||
let col t = | ||
t.col | ||
end | ||
|
||
module Cursor = struct | ||
module T = struct | ||
type container = t | ||
type elm = codepoint | ||
type t = { | ||
text: container; | ||
bindex: uns; | ||
pos: Pos.t; | ||
cindex: uns; | ||
(* Excerpts cursor, used for iterating over excerpts. *) | ||
ecursor: (uns, string, Uns.cmper_witness) Ordmap.Cursor.t; | ||
(* String cursor, used for iterating over codepoints within a single | ||
* excerpt. Note that for the positions between excerpts, there are two | ||
* logically equivalent cursors -- one at (String.Cursor.tl | ||
* (Ordmap.Cursor.lget ecursor)), and the other at (String.Cursor.hd | ||
* (Ordmap.Cursor.rget ecursor)). In principle it is possible for accesses | ||
* across excerpt boundaries to dominate performance. However, the Ordmap | ||
* cursor provides constant-time access to both the left and right, which | ||
* means that even if we have to access a codepoint in the excerpt | ||
* adjacent to the one in which scursor resides, the additional overhead | ||
* is constant. *) | ||
scursor: String.Cursor.t; | ||
} | ||
|
||
let cmp t0 t1 = | ||
Uns.cmp t0.bindex t1.bindex | ||
|
||
let hd text = | ||
let ecursor = Ordmap.Cursor.hd text.excerpts in | ||
let scursor = String.Cursor.hd ( | ||
match Ordmap.length text.excerpts > 0 with | ||
| true -> (match Ordmap.Cursor.rget ecursor with _, s -> s) | ||
| false -> "" | ||
) in | ||
{text; bindex=0; cindex=0; pos=Pos.init ~line:1 ~col:0; ecursor; scursor} | ||
|
||
let tl text = | ||
XXX | ||
|
||
let succ t = | ||
XXX | ||
|
||
let pred t = | ||
XXX | ||
|
||
let lget t = | ||
XXX | ||
|
||
let rget t = | ||
XXX | ||
|
||
end | ||
include T | ||
include Cmpable.Make_mono(T) | ||
end | ||
|
||
module Slice = struct | ||
type container = t | ||
|
||
include Slice.Make_mono(Cursor) | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters