Skip to content

Commit d19a212

Browse files
committed
persistent bit vectors, wip
1 parent 3cd817e commit d19a212

File tree

4 files changed

+218
-7
lines changed

4 files changed

+218
-7
lines changed

bitv.mli

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ type t
3131

3232
val create : int -> bool -> t
3333
(** [(Bitv.create n b)] creates a new bit vector of length [n],
34-
initialized with [b]. *)
34+
initialized with [b]. *)
3535

3636
val init : int -> (int -> bool) -> t
3737
(** [(Bitv.init n f)] returns a fresh vector of length [n],

pbv.ml

Lines changed: 183 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,21 @@ module type S = sig
7878
val compare: t -> t -> int
7979
val equal: t -> t -> bool
8080
val hash: t -> int
81+
82+
val unsafe_get: t -> int -> bool
83+
val unsafe_set: t -> int -> bool -> t
84+
end
85+
86+
module SetOps(X: sig
87+
type t
88+
val length: t -> int
89+
val pop: t -> int
90+
val get: t -> int -> bool
91+
end) = struct
92+
let mem i v = X.get v i
93+
let cardinal = X.pop
94+
let find i s = if mem i s then i else raise Not_found
95+
let find_opt i s = if mem i s then Some i else None
8196
end
8297

8398
module Native = struct
@@ -175,8 +190,6 @@ module Native = struct
175190

176191
let empty _n = 0
177192
let full _n = -1
178-
let mem i v = get v i
179-
let cardinal = pop
180193
let singleton len i =
181194
if i < 0 || i >= len then invalid_arg "singleton";
182195
1 lsl i
@@ -192,8 +205,12 @@ module Native = struct
192205
let max_elt_opt v =
193206
if is_empty v then None else Some (Sys.int_size - 1 - nlz v)
194207

195-
let find i s = if mem i s then i else raise Not_found
196-
let find_opt i s = if mem i s then Some i else None
208+
include SetOps(struct
209+
type t_ = t type t = t_
210+
let length = length
211+
let pop = pop
212+
let get = get
213+
end)
197214

198215
let choose = min_elt
199216
let choose_opt = min_elt_opt
@@ -313,9 +330,170 @@ module Native = struct
313330

314331
end
315332

333+
module Large : S = struct
334+
335+
(* leaf
336+
62 56 50 44 38 32 31 0
337+
+-+------+------+------+------+------+------------------------------+
338+
|?| ?? | nlz | ntz | pop | size | bits |
339+
+-+------+------+------+------+------+------------------------------+
340+
node info
341+
62 61 31 30 0
342+
+-+----------------------------------+------------------------------+
343+
|?| ntz | size |
344+
+-+----------------------------------+------------------------------+
345+
*)
346+
347+
let ilen x = (x lsr 32) land 0x3F
348+
let ipop x = (x lsr 38) land 0x3F
349+
let intz x = (x lsr 44) land 0x3F
350+
let inlz x = (x lsr 50) land 0x3F
351+
let imk ~nlz ~ntz ~pop ~size bits =
352+
assert (bits < 0x1_0000_0000);
353+
(nlz lsl 50) lor (ntz lsl 44) lor (pop lsl 38) lor (size lsl 32) lor bits
354+
355+
let izeros size =
356+
assert (size <= 32);
357+
imk ~nlz:size ~ntz:size ~pop:0 ~size 0
358+
let iones size =
359+
assert (size <= 32);
360+
imk ~nlz:0 ~ntz:0 ~pop:size ~size 0xFFFF_FFFF
361+
let imake size b = if b then iones size else izeros size
362+
363+
let nlen i = i land 0x7FFF_FFFF
364+
let nntz i = i lsr 31
365+
366+
type t =
367+
| Leaf of int
368+
| Node of { info: int; high: t; low: t }
369+
370+
let length = function
371+
| Leaf x -> ilen x
372+
| Node {info;_} -> nlen info
373+
374+
let ntz = function
375+
| Leaf x -> intz x
376+
| Node {info;_} -> nntz info
377+
378+
let max_length = 1 lsl 31 - 1
379+
380+
let node ~low ~high =
381+
let lenl = length low and lenh = length high in
382+
let len = lenl + lenh in
383+
if len > max_length then invalid_arg "max length exceeded";
384+
let ntzl = ntz low and ntzh = ntz high in
385+
let ntz = if ntzl < lenl then ntzl else lenl + ntzh in
386+
let info = (ntz lsl 31) lor len in
387+
Node { info; high; low }
388+
389+
type elt = int
390+
type size = int
391+
392+
(* both size and size+1 *)
393+
let make2 size _b =
394+
if size < 32 then assert false (*TODO*) else assert false (*TODO*)
395+
396+
let make size b =
397+
if size <= 32 then Leaf (imake size b) else
398+
let v, _ = make2 size b in v
399+
400+
let rec unsafe_get v i = match v with
401+
| Leaf x ->
402+
(x lsr i) land 1 <> 0
403+
| Node {high; low; _} ->
404+
let ll = length low in
405+
if i < ll then unsafe_get low i else unsafe_get high (i - ll)
406+
407+
let unsafe_set _v _i _b =
408+
assert false (*TODO*)
409+
410+
let check_index s v i =
411+
if i < 0 || i >= length v then invalid_arg s
412+
413+
let get v i =
414+
check_index "get" v i;
415+
unsafe_get v i
416+
417+
let set v i b =
418+
check_index "set" v i;
419+
unsafe_set v i b
420+
421+
let is_empty v =
422+
ntz v = length v
423+
424+
let rec pop = function
425+
| Leaf x -> ipop x
426+
| Node { high; low; _ } -> pop high + pop low
427+
428+
include SetOps(struct
429+
type t_ = t type t = t_
430+
let length = length
431+
let pop = pop
432+
let get = get
433+
end)
434+
435+
let swap: t -> int -> t = fun _ -> assert false (*TODO*)
436+
437+
let bw_and: t -> t -> t = fun _ -> assert false (*TODO*)
438+
let bw_or: t -> t -> t = fun _ -> assert false (*TODO*)
439+
let bw_xor: t -> t -> t = fun _ -> assert false (*TODO*)
440+
let bw_not: t -> t = fun _ -> assert false (*TODO*)
441+
442+
let rec nlz = function
443+
| Leaf x -> inlz x
444+
| Node { high; low; _ } ->
445+
let nlzh = nlz high in
446+
if nlzh < length high then nlzh else nlzh + nlz low
447+
448+
let empty: size -> t = fun _ -> assert false (*TODO*)
449+
let full: size -> t = fun _ -> assert false (*TODO*)
450+
let mem: elt -> t -> bool = fun _ -> assert false (*TODO*)
451+
let singleton: size -> elt -> t = fun _ -> assert false (*TODO*)
452+
let min_elt: t -> elt = fun _ -> assert false (*TODO*)
453+
let min_elt_opt: t -> elt option = fun _ -> assert false (*TODO*)
454+
let max_elt: t -> elt = fun _ -> assert false (*TODO*)
455+
let max_elt_opt: t -> elt option = fun _ -> assert false (*TODO*)
456+
let add: elt -> t -> t = fun _ -> assert false (*TODO*)
457+
let remove: elt -> t -> t = fun _ -> assert false (*TODO*)
458+
let union: t -> t -> t = fun _ -> assert false (*TODO*)
459+
let inter: t -> t -> t = fun _ -> assert false (*TODO*)
460+
let diff: t -> t -> t = fun _ -> assert false (*TODO*)
461+
let subset: t -> t -> bool = fun _ -> assert false (*TODO*)
462+
let disjoint: t -> t -> bool = fun _ -> assert false (*TODO*)
463+
let iter: (elt -> unit) -> t -> unit = fun _ -> assert false (*TODO*)
464+
let map: (elt -> elt) -> t -> t = fun _ -> assert false (*TODO*)
465+
let fold: (elt -> 'a -> 'a) -> t -> 'a -> 'a = fun _ -> assert false (*TODO*)
466+
let for_all: (elt -> bool) -> t -> bool = fun _ -> assert false (*TODO*)
467+
let exists: (elt -> bool) -> t -> bool = fun _ -> assert false (*TODO*)
468+
let filter: (elt -> bool) -> t -> t = fun _ -> assert false (*TODO*)
469+
let filter_map: (elt -> elt option) -> t -> t = fun _ -> assert false (*TODO*)
470+
let partition: (elt -> bool) -> t -> t * t = fun _ -> assert false (*TODO*)
471+
let elements: t -> elt list = fun _ -> assert false (*TODO*)
472+
let choose: t -> elt = fun _ -> assert false (*TODO*)
473+
let choose_opt: t -> elt option = fun _ -> assert false (*TODO*)
474+
let split: elt -> t -> t * bool * t = fun _ -> assert false (*TODO*)
475+
let find: elt -> t -> elt = fun _ -> assert false (*TODO*)
476+
let find_opt: elt -> t -> elt option = fun _ -> assert false (*TODO*)
477+
let find_first: (elt -> bool) -> t -> elt = fun _ -> assert false (*TODO*)
478+
let find_first_opt: (elt -> bool) -> t -> elt option = fun _ -> assert false (*TODO*)
479+
let find_last: (elt -> bool) -> t -> elt = fun _ -> assert false (*TODO*)
480+
let find_last_opt: (elt -> bool) -> t -> elt option = fun _ -> assert false (*TODO*)
481+
let of_list: elt list -> t = fun _ -> assert false (*TODO*)
482+
let to_seq_from : elt -> t -> elt Seq.t = fun _ -> assert false (*TODO*)
483+
let to_seq : t -> elt Seq.t = fun _ -> assert false (*TODO*)
484+
let to_rev_seq : t -> elt Seq.t = fun _ -> assert false (*TODO*)
485+
let add_seq : elt Seq.t -> t -> t = fun _ -> assert false (*TODO*)
486+
let of_seq : elt Seq.t -> t = fun _ -> assert false (*TODO*)
487+
let print_set: Format.formatter -> t -> unit = fun _ -> assert false (*TODO*)
488+
489+
let compare: t -> t -> int = fun _ -> assert false (*TODO*)
490+
let equal: t -> t -> bool = fun _ -> assert false (*TODO*)
491+
let hash: t -> int = fun _ -> assert false (*TODO*)
492+
493+
end
494+
316495
let fixed_size n : (module S) =
317496
if n = Sys.int_size then
318497
(module Native)
319498
else
320499
assert false (*TODO*)
321-

pbv.mli

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
module type S = sig
2424
type t
25+
(** the type of persistent bit vectors *)
2526

2627
(** Array interface *)
2728

@@ -30,7 +31,7 @@ module type S = sig
3031
val make: int -> bool -> t
3132
val get: t -> int -> bool
3233
val set: t -> int -> bool -> t
33-
(* TODO: iter, print, fill, blit, sub, append *)
34+
(* TODO: init, iter, print, fill, blit, sub, append *)
3435

3536
(** Bit vector interface *)
3637

@@ -89,14 +90,20 @@ module type S = sig
8990
val add_seq : elt Seq.t -> t -> t
9091
val of_seq : elt Seq.t -> t
9192
val print_set: Format.formatter -> t -> unit
93+
(* TODO iter_subsets *)
9294

9395
val compare: t -> t -> int
9496
val equal: t -> t -> bool
9597
val hash: t -> int
98+
99+
val unsafe_get: t -> int -> bool
100+
val unsafe_set: t -> int -> bool -> t
96101
end
97102

98103
module Native : S
99104
(** Bit-vectors of size [Sys.int_size].
100105
Note: The size parameter of [empty] and [full] is ignored. *)
101106

107+
module Large : S
108+
102109
val fixed_size: int -> (module S)

test_pbv.ml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ let test (module X: S) (size: int) =
7070
assert (X.length b = size);
7171
assert (X.get b i);
7272
assert (X.pop b = 1);
73+
assert (X.ntz b = i);
74+
assert (b = X.singleton size i);
7375
assert (X.swap b i = v0);
7476
let v = X.set v1 i false in
7577
assert (X.length v = size);
@@ -84,7 +86,31 @@ let test (module X: S) (size: int) =
8486
assert (X.min_elt s = i);
8587
assert (X.max_elt s = i);
8688
done;
89+
let sieve (limit: int) =
90+
assert (limit > 1);
91+
let rec loop v n =
92+
if n > limit then v else
93+
if X.unsafe_get v n then (* n is prime *)
94+
let rec mark v i =
95+
if i > limit then v else
96+
let v = X.unsafe_set v i false in mark v (i + 2*n) in
97+
let v = if n <= limit/n then mark v (n * n) else v in
98+
loop v (n + 2)
99+
else
100+
loop v (n + 2) in
101+
let v = X.make (limit + 1) true in
102+
let v = X.unsafe_set v 0 false in
103+
let v = X.unsafe_set v 1 false in
104+
loop v 3
105+
in
106+
if size >= 101 then assert (X.pop (sieve 100) = 25);
107+
if size >= 1001 then assert (X.pop (sieve 1000) = 168);
87108
()
88109

89110
let () = test (module Native) Sys.int_size
111+
let () = test (module Large) 31
112+
let () = test (module Large) 32
113+
let () = test (module Large) Sys.int_size
114+
let () = test (module Large) 200
115+
90116

0 commit comments

Comments
 (0)