Skip to content

Commit

Permalink
Implement Text (XXX untested).
Browse files Browse the repository at this point in the history
  • Loading branch information
jasone committed Aug 23, 2020
1 parent 9a41019 commit 7ccd66d
Show file tree
Hide file tree
Showing 2 changed files with 474 additions and 0 deletions.
398 changes: 398 additions & 0 deletions bootstrap/src/basis/text.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,398 @@
open Rudiments

(* Line/column position independent of previous lines' contents, with signed
* column number to facilitate backward cursoring. *)
module Spos = struct
module T = struct
type t = {
line: uns;
(* Column number if non-negative, in which case conversion to t is
* trivial. If negative, the column number for the equivalent t must be
* calculated via backward iteration. *)
scol: sint;
}

let cmp t0 t1 =
let open Cmp in
match Uns.cmp t0.line t1.line with
| Lt -> Lt
| Eq -> Sint.cmp t0.scol t1.scol
| Gt -> Gt

let init ~line ~scol =
{line; scol}

let succ cp t =
match cp with
| cp when Codepoint.(cp = nl) -> {line=Uns.succ t.line; scol=Sint.kv 0}
| _ -> {t with scol=Sint.succ t.scol}

let pred cp t =
match cp with
| cp when Codepoint.(cp = nl) ->
{line=Uns.pred t.line; scol=Sint.neg_one}
| _ -> {t with scol=Sint.pred t.scol}

let line t =
t.line

let scol t =
t.scol
end
include T
include Cmpable.Make(T)
end

(* Line/column position independent of previous lines' contents. *)
module Pos = struct
module T = struct
type t = {
line: uns;
col: uns;
}

let cmp t0 t1 =
let open Cmp in
match Uns.cmp t0.line t1.line with
| Lt -> Lt
| Eq -> Uns.cmp t0.col t1.col
| Gt -> Gt

let init ~line ~col =
{line; col}

let line t =
t.line

let col t =
t.col
end
include T
include Cmpable.Make(T)
end

(* Absolute virtual index relative to beginning of text. *)
module Vind = struct
module T = struct
type t = uns

let cmp t0 t1 =
Uns.cmp t0 t1

let init vind =
vind

let index t =
t
end
include T
include Cmpable.Make(T)
end

(* Text excerpt. *)
module Excerpt = struct
type t = {
(* Excerpt index; base is 0. *)
eind: uns;
(* Raw excerpt. *)
bytes: Bytes.Slice.t;
}

let cmp t0 t1 =
Uns.cmp t0.eind t1.eind

(* Base excerpt, always hd of excerpts maps. *)
let base = {eind=0; bytes=Bytes.Slice.of_container [||]}

(* val of_bytes_slice: t -> Bytes.Slice.t -> t *)
let of_bytes_slice pred bytes =
let eind = Uns.succ pred.eind in
{eind; bytes}

(* val of_string_slice: t -> Bytes.Slice.t -> t *)
let of_string_slice pred slice =
let eind = Uns.succ pred.eind in
let bytes = Bytes.Slice.of_string_slice slice in
{eind; bytes}

let eind t =
t.eind

let bytes t =
t.bytes
end

type t = {
(* Filesystem path. *)
path: string option;
(* Excerpts already forced into text. The map is initialized with a base
* excerpt, which simplifies various logic. *)
excerpts: (uns, Excerpt.t, Uns.cmper_witness) Map.t;
(* Lazy suspension which produces extended text. *)
extend: t option Lazy.t;
}

let of_bytes_stream ?path stream =
let rec susp_extend path pred_excerpt excerpts stream = lazy begin
match Stream.is_empty stream with
| true -> None
| false -> begin
let bytes, stream' = Stream.pop stream in
let excerpt = Excerpt.of_bytes_slice pred_excerpt bytes in
let excerpts' =
Map.insert ~k:(Excerpt.eind excerpt) ~v:excerpt excerpts in
let extend' = susp_extend path excerpt excerpts' stream' in
let t' = {path; excerpts=excerpts'; extend=extend'} in
Some t'
end
end in
let excerpt = Excerpt.base in
let excerpts =
Map.singleton (module Uns) ~k:(Excerpt.eind excerpt) ~v:excerpt in
let extend = susp_extend path excerpt excerpts stream in
{path; excerpts; extend}

let of_string_slice ?path slice =
let susp_extend () = lazy None in
let excerpt = Excerpt.(of_string_slice base slice) in
let excerpts =
Map.singleton (module Uns) ~k:Excerpt.base.eind ~v:Excerpt.base
|> Map.insert ~k:excerpt.eind ~v:excerpt in
let extend = susp_extend () in
{path; excerpts; extend}

let path t =
t.path

(* XXX
let force t =
let rec fn t = begin
match Lazy.force (t.extend) with
| None -> t
| Some t' -> fn t'
end in
fn t
*)

module Cursor = struct
module T = struct
type container = t
type elm = codepoint
type t = {
text: container;
(* Virtual byte index, as if all encoding errors were replaced. *)
vind: Vind.t;
(* Line/column. *)
spos: Spos.t;
(* Excerpt which contains bcursor's bytes. *)
excerpt: Excerpt.t;
(* Bytes cursor, used for iterating over bytes within a single excerpt.
* Note that for the positions between excerpts, there are two logically
* equivalent cursors, one of which can only handle lget, and the other of
* which can only handle rget. We make no effort to amortize repeated
* conversions between equivalent cursors to transparently support
* lget/rget, under the assumption that no reasonable use case suffers
* more than constant additional overhead. *)
bcursor: Bytes.Cursor.t;
}

let cmp t0 t1 =
let open Cmp in
match Excerpt.cmp t0.excerpt t1.excerpt with
| Lt -> Lt
| Eq -> Bytes.Cursor.cmp t0.bcursor t1.bcursor
| Gt -> Gt

let container t =
t.text

let index t =
Vind.index t.vind

let hd text =
let excerpt = Map.get_hlt 0 text.excerpts in
let bcursor = Bytes.Slice.base (Excerpt.bytes excerpt) in
{
text;
vind=Vind.init 0;
spos=Spos.init ~line:1 ~scol:(Sint.kv 0);
excerpt;
bcursor;
}

module Codepoint_seq = struct
module T = struct
type nonrec t = t

let init cursor =
cursor

let rec next t =
match Bytes.Cursor.(t.bcursor < tl (container t.bcursor)) with
| true -> begin
let b, bcursor' = Bytes.Cursor.next t.bcursor in
Some (b, {t with bcursor=bcursor'})
end
| false -> begin
match (Uns.succ (Excerpt.eind t.excerpt)) <
(Map.length t.text.excerpts) with
| true -> begin
let excerpt' =
Map.get_hlt (Uns.succ t.excerpt.eind) t.text.excerpts in
let bcursor' = Bytes.Slice.base (Excerpt.bytes excerpt') in
next {t with excerpt=excerpt'; bcursor=bcursor'}
end
| false -> begin
match Lazy.force (t.text.extend) with
| None -> None
| Some text' -> next {t with text=text'}
end
end
end
include T
include Codepoint.Seq.Make(T)
end

let next_opt t =
match Codepoint_seq.(to_codepoint (init t)) with
| None -> None
| Some (Valid (cp, t')) -> begin
let vincr = Codepoint.Utf8.length_of_codepoint cp in
let vind' = Vind.(init ((index t.vind) + vincr)) in
let spos' = Spos.succ cp t.spos in
Some (cp, {t' with vind=vind'; spos=spos'})
end
| Some (Invalid t') -> begin
let cp = Codepoint.replacement in
let vincr = Codepoint.Utf8.length_of_codepoint cp in
let vind' = Vind.(init ((index t.vind) + vincr)) in
let spos' = Spos.succ cp t.spos in
Some (cp, {t' with vind=vind'; spos=spos'})
end

let rget_opt t =
match next_opt t with
| None -> None
| Some (cp, _) -> Some cp

let rget t =
match rget_opt t with
| None -> halt "Out of bounds"
| Some cp -> cp

let next t =
match next_opt t with
| None -> halt "Out of bounds"
| Some (cp, t') -> cp, t'

let succ t =
match next t with _, t' -> t'

let seek_fwd offset t =
let rec fn offset t = begin
match offset with
| 0 -> t
| _ -> fn (Uns.pred offset) (succ t)
end in
fn offset t

let tl text =
let rec fn cursor = begin
match next_opt cursor with
| None -> cursor
| Some (_, cursor') -> fn cursor'
end in
fn (hd text)

module Codepoint_rev_seq = struct
module T = struct
type nonrec t = t

let init cursor =
cursor

let rec next t =
match Bytes.Cursor.(t.bcursor > hd (container t.bcursor)) with
| true -> begin
let b, bcursor' = Bytes.Cursor.prev t.bcursor in
Some (b, {t with bcursor=bcursor'})
end
| false -> begin
match (Excerpt.eind t.excerpt) > 0 with
| true -> begin
let excerpt' =
Map.get_hlt (Uns.pred t.excerpt.eind) t.text.excerpts in
let bcursor' = Bytes.Slice.past (Excerpt.bytes excerpt') in
next {t with excerpt=excerpt'; bcursor=bcursor'}
end
| false -> None
end
end
include T
include Codepoint.Seq.Make_rev(T)
end

let prev t =
match Codepoint_rev_seq.(to_codepoint (init t)) with
| None -> halt "Out of bounds"
| Some (Valid (cp, t')) -> begin
let vdecr = Codepoint.Utf8.length_of_codepoint cp in
let vind' = Vind.(init ((index t.vind) - vdecr)) in
let spos' = Spos.pred cp t.spos in
cp, {t' with vind=vind'; spos=spos'}
end
| Some (Invalid t') -> begin
let cp = Codepoint.replacement in
let vdecr = Codepoint.Utf8.length_of_codepoint cp in
let vind' = Vind.(init ((index t.vind) - vdecr)) in
let spos' = Spos.pred cp t.spos in
cp, {t' with vind=vind'; spos=spos'}
end

let lget t =
match prev t with cp, _ -> cp

let pred t =
match prev t with _, t' -> t'

let seek_rev offset t =
let rec fn offset t = begin
match offset with
| 0 -> t
| _ -> fn (Uns.pred offset) (pred t)
end in
fn offset t

let seek offset t =
if Sint.(offset < kv 0) then seek_rev (Uns.of_sint (Sint.neg offset)) t
else seek_fwd (Uns.of_sint offset) t

let pos t =
let rec col0_delta t i = begin
match t.vind = 0 with
| true -> i
| false -> begin
let cp, t' = prev t in
match cp with
| cp when Codepoint.(cp = nl) -> i
| _ -> col0_delta t' (Uns.succ i)
end
end in
let col = match Sint.is_negative (Spos.scol t.spos) with
| false -> Uns.of_sint (Spos.scol t.spos)
| true -> col0_delta t 0
in
{Pos.line=Spos.line t.spos; col}
end
include T
include Cmpable.Make(T)
end

module Slice = struct
include Slice.Make_mono(Cursor)
end

(******************************************************************************)
(* Begin tests. *)

(* XXX *)
Loading

0 comments on commit 7ccd66d

Please sign in to comment.