Skip to content

Commit 4285c88

Browse files
authored
Merge pull request #8 from dlesbre/hset-functions
Extra Hset functions
2 parents 215ddeb + 4e910e6 commit 4285c88

File tree

2 files changed

+144
-9
lines changed

2 files changed

+144
-9
lines changed

hashcons.ml

Lines changed: 104 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -663,6 +663,9 @@ module Hset = struct
663663
| Leaf j -> k.tag == j.tag
664664
| Branch (_, m, l, r) -> mem k (if zero_bit k.tag m then l else r)
665665

666+
let find k s = if mem k s then k else raise Not_found
667+
let find_opt k s = if mem k s then Some k else None
668+
666669
(*s The following operation [join] will be used in both insertion and
667670
union. Given two non-empty trees [t0] and [t1] with longest common
668671
prefixes [p0] and [p1] respectively, which are supposed to
@@ -832,8 +835,8 @@ module Hset = struct
832835
s1
833836

834837
(*s All the following operations ([cardinal], [iter], [fold], [for_all],
835-
[exists], [filter], [partition], [choose], [elements]) are
836-
implemented as for any other kind of binary trees. *)
838+
[exists], [filter], [partition], [choose], [choose_opt], [elements],
839+
[to_seq]) are implemented as for any other kind of binary trees. *)
837840

838841
let rec cardinal = function
839842
| Empty -> 0
@@ -878,6 +881,11 @@ module Hset = struct
878881
| Leaf k -> k
879882
| Branch (_, _,t0,_) -> choose t0 (* we know that [t0] is non-empty *)
880883

884+
let rec choose_opt = function
885+
| Empty -> None
886+
| Leaf k -> Some k
887+
| Branch (_, _,t0,_) -> choose_opt t0 (* we know that [t0] is non-empty *)
888+
881889
let elements s =
882890
let rec elements_aux acc = function
883891
| Empty -> acc
@@ -886,6 +894,36 @@ module Hset = struct
886894
in
887895
elements_aux [] s
888896

897+
let to_seq s =
898+
let rec to_seq_aux acc = function
899+
| Empty -> acc
900+
| Leaf k -> Seq.cons k acc
901+
| Branch (_,_,l,r) -> to_seq_aux (to_seq_aux acc r) l
902+
in
903+
to_seq_aux Seq.empty s
904+
905+
let split elt s =
906+
fold (fun elt' (lt, present, gt) ->
907+
if elt'.tag < elt.tag then (add elt' lt, present, gt) else
908+
if elt'.tag > elt.tag then (lt, present, add elt' gt) else
909+
(lt, true, gt)
910+
) s (Empty, false, Empty)
911+
912+
(*s [map] and [filter_map] are implemented via [fold] and [add]
913+
since we can't relate the tag of [f elt] to that of [elt] *)
914+
let map f s = fold (fun elt s -> add (f elt) s) s Empty
915+
let filter_map f s = fold (fun elt s ->
916+
match f elt with
917+
| None -> s
918+
| Some elt' -> add elt' s)
919+
s Empty
920+
921+
let add_seq seq s = Seq.fold_left (fun s elt -> add elt s) s seq
922+
923+
let of_seq seq = add_seq seq Empty
924+
925+
let of_list list = List.fold_left (fun s elt -> add elt s) Empty list
926+
889927
(*s There is no way to give an efficient implementation of [min_elt]
890928
and [max_elt], as with binary search trees. The following
891929
implementation is a traversal of all elements, barely more
@@ -899,11 +937,53 @@ module Hset = struct
899937
| Leaf k -> k
900938
| Branch (_,_,s,t) -> min (min_elt s) (min_elt t)
901939

940+
let min_elt_opt = function
941+
| Empty -> None
942+
| x -> Some (min_elt x)
943+
902944
let rec max_elt = function
903945
| Empty -> raise Not_found
904946
| Leaf k -> k
905947
| Branch (_,_,s,t) -> max (max_elt s) (max_elt t)
906948

949+
let max_elt_opt = function
950+
| Empty -> None
951+
| x -> Some (max_elt x)
952+
953+
(*s [find_first], [find_last] and their opt versions are less efficient
954+
then with binary search trees. They are linear time and can call [f] an
955+
arbitrary number of times, and not necessarily on elements smaller/larger
956+
than the witness. *)
957+
let find_first_opt f s =
958+
fold
959+
(fun elt acc ->
960+
match acc with
961+
| None -> if f elt then Some elt else None
962+
| Some witness ->
963+
if witness.tag <= elt.tag then acc else
964+
if f elt then Some elt else acc)
965+
s None
966+
967+
let find_first f s =
968+
match find_first_opt f s with
969+
| Some elt -> elt
970+
| None -> raise Not_found
971+
972+
let find_last_opt f s =
973+
fold
974+
(fun elt acc ->
975+
match acc with
976+
| None -> if f elt then Some elt else None
977+
| Some witness ->
978+
if witness.tag >= elt.tag then acc else
979+
if f elt then Some elt else acc)
980+
s None
981+
982+
let find_last f s =
983+
match find_last_opt f s with
984+
| Some elt -> elt
985+
| None -> raise Not_found
986+
907987
(*s Another nice property of Patricia trees is to be independent of the
908988
order of insertion. As a consequence, two Patricia trees have the
909989
same elements if and only if they are structurally equal.
@@ -959,4 +1039,26 @@ module Hset = struct
9591039
intersect s1 (if zero_bit p1 m2 then l2 else r2)
9601040
else
9611041
false
1042+
1043+
let disjoint s1 s2 = not (intersect s1 s2)
1044+
1045+
let find_any (type a) f (s : a t) =
1046+
let exception Found of a elt in
1047+
try
1048+
iter (fun elt -> if f elt then raise (Found elt)) s;
1049+
raise Not_found
1050+
with Found elt -> elt
1051+
let find_any_opt (type a) f (s : a t) =
1052+
let exception Found of a elt in
1053+
try
1054+
iter (fun elt -> if f elt then raise (Found elt)) s;
1055+
None
1056+
with Found elt -> Some elt
1057+
1058+
let bind f s = fold (fun elt s -> union (f elt) s) s empty
1059+
1060+
let is_singleton = function
1061+
| Leaf elt -> Some elt
1062+
| _ -> None
1063+
9621064
end

hashcons.mli

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -169,27 +169,60 @@ module Hset : sig
169169
val diff : 'a t -> 'a t -> 'a t
170170
val equal : 'a t -> 'a t -> bool
171171
val compare : 'a t -> 'a t -> int
172-
val elements : 'a t -> 'a elt list
173172
val choose : 'a t -> 'a elt
173+
val choose_opt : 'a t -> 'a elt option
174174
val cardinal : 'a t -> int
175-
val iter : ('a elt -> unit) -> 'a t -> unit
176-
val fold : ('a elt -> 'b -> 'b) -> 'a t -> 'b -> 'b
177175
val for_all : ('a elt -> bool) -> 'a t -> bool
178176
val exists : ('a elt -> bool) -> 'a t -> bool
179-
val filter : ('a elt -> bool) -> 'a t -> 'a t
180177
val partition : ('a elt -> bool) -> 'a t -> 'a t * 'a t
178+
val disjoint : 'a t -> 'a t -> bool
179+
val find : 'a elt -> 'a t -> 'a elt
180+
val find_opt : 'a elt -> 'a t -> 'a elt option
181+
val add_seq : 'a elt Seq.t -> 'a t -> 'a t
182+
val of_seq : 'a elt Seq.t -> 'a t
183+
val of_list : 'a elt list -> 'a t
184+
val split : 'a elt -> 'a t -> 'a t * bool * 'a t
185+
186+
(*s Warning: [iter], [fold], [map], [filter] and [map_filter] do NOT iterate
187+
over element order. Similarly, [elements] and [to_seq] are not sorted. *)
188+
val iter : ('a elt -> unit) -> 'a t -> unit
189+
val fold : ('a elt -> 'b -> 'b) -> 'a t -> 'b -> 'b
190+
val map : ('a elt -> 'b elt) -> 'a t -> 'b t
191+
val filter : ('a elt -> bool) -> 'a t -> 'a t
192+
val filter_map : ('a elt -> 'b elt option) -> 'a t -> 'b t
193+
val elements : 'a t -> 'a elt list
194+
val to_seq : 'a t -> 'a elt Seq.t
181195

182-
(*s Warning: [min_elt] and [max_elt] are linear w.r.t. the size of the
183-
set. In other words, [min_elt t] is barely more efficient than [fold
184-
min t (choose t)]. *)
196+
(*s Warning: [min_elt], [max_elt] and the [_opt] versions are linear w.r.t.
197+
the size of the set. In other words, [min_elt t] is barely more efficient
198+
than [fold min t (choose t)]. *)
185199
val min_elt : 'a t -> 'a elt
200+
val min_elt_opt : 'a t -> 'a elt option
186201
val max_elt : 'a t -> 'a elt
202+
val max_elt_opt : 'a t -> 'a elt option
203+
204+
(*s [find_first], [find_last] are linear time and can call [f] an arbitrary
205+
number of times, and not necessarily on elements smaller/larger
206+
than the witness. *)
207+
val find_first : ('a elt -> bool) -> 'a t -> 'a elt
208+
val find_first_opt : ('a elt -> bool) -> 'a t -> 'a elt option
209+
val find_last : ('a elt -> bool) -> 'a t -> 'a elt
210+
val find_last_opt : ('a elt -> bool) -> 'a t -> 'a elt option
187211

188212
(*s Additional functions not appearing in the signature [Set.S] from ocaml
189213
standard library. *)
190214

191215
(* [intersect u v] determines if sets [u] and [v] have a non-empty
192216
intersection. *)
193217
val intersect : 'a t -> 'a t -> bool
218+
219+
(* Faster finds when order doesn't matter *)
220+
val find_any : ('a elt -> bool) -> 'a t -> 'a elt
221+
val find_any_opt : ('a elt -> bool) -> 'a t -> 'a elt option
222+
223+
val is_singleton : 'a t -> 'a elt option
224+
(* Check if the set is a singleton, if so return unique element *)
225+
226+
val bind : ('a elt -> 'b t) -> 'a t -> 'b t
194227
end
195228

0 commit comments

Comments
 (0)