Skip to content
Permalink
Browse files

Place module: added split_suburb,only_suburb,without_suburb,compare_p…

…laces functions.

Used compare_places and fixed autocompletion ordering
  • Loading branch information...
sagotch committed Oct 10, 2019
1 parent f2f6ada commit ab7f72330d33ed338c0f9803a5759f626e6a4cc1
Showing with 128 additions and 48 deletions.
  1. +5 −0 benchmark/bench.ml
  2. +5 −1 lib/api_saisie_autocomplete.ml
  3. +10 −10 lib/api_search.ml
  4. +51 −13 lib/place.ml
  5. +5 −24 lib/updateData.ml
  6. +11 −0 lib/util/mutil.ml
  7. +5 −0 lib/util/mutil.mli
  8. +36 −0 test/test_place.ml
@@ -24,6 +24,11 @@ let () =
[ "aaaaaaaaaa" ; "bbbbbbbbbb" ; "abbbbbbbb" ; "bbbbbbbbba" ; "ababababab" ]
; bench "Mutil.contains" (Mutil.contains "foobarbaz")
[ "foo" ; "bar" ; "baz" ; "foobarbaz!" ]
; bench "Place.compare_places" (Place.compare_places "[foo-bar] - baz, boobar")
[ "[foo-bar] - baz, boobar"
; "[foo-bar] - baz, boobar, barboo"
; "baz, boobar"
]

; begin match Sys.getenv "BENCH_BASE" with
| exception Not_found -> ()
@@ -51,7 +51,11 @@ let create_cache base mode cache_file =
(Gwdb.families base)
in
let cache = List.rev_map (sou base) (IstrSet.elements cache) in
let cache = List.sort Gutil.alphabetic_order cache in
let cache =
List.sort
(if mode = `place then Place.compare_places else Gutil.alphabetic_order)
cache
in
let oc = Secure.open_out_bin cache_file in
Marshal.to_channel oc cache [ Marshal.No_sharing ] ;
close_out oc
@@ -635,7 +635,7 @@ let get_field mode =
| _ -> failwith "get_field"

(** [ini] must be in the form of [Name.lower @@ Mutil.tr '_' ' ' ini]
Assume that [list] is already sorted.
Assume that [list] is already sorted, but reversed.
*)
let complete_with_dico conf nb max mode ini list =
let reduce_dico mode ignored format list =
@@ -644,7 +644,7 @@ let complete_with_dico conf nb max mode ini list =
| hd :: tl ->
let acc =
let k = Mutil.tr '_' ' ' hd in
let k = if mode <> `subdivision then UpdateData.remove_suburb k else k in
let k = if mode <> `subdivision then Place.without_suburb k else k in
if string_start_with ini (Name.lower k) then begin
let hd =
if format <> []
@@ -689,29 +689,29 @@ let complete_with_dico conf nb max mode ini list =
(String.split_on_char ',' s)
in
let dico_place = reduce_dico mode list format (load_dico_lieu conf mode) in
List.append list (List.sort Gutil.alphabetic_order dico_place)
| _ -> list
List.rev_append list (List.sort Place.compare_places dico_place)
| _ -> List.rev list

let search_auto_complete conf base mode place_mode max n =
let aux data =
let aux data compare =
let conf = { conf with env = ("data", data) :: conf.env } in
UpdateData.get_all_data conf base
|> List.rev_map (sou base)
|> List.sort Gutil.alphabetic_order
|> List.sort compare
in
match mode with

| `place ->
let list = aux "place" in
let list = aux "place" Place.compare_places in
let nb = ref 0 in
let ini = Name.lower @@ Mutil.tr '_' ' ' n in
let reduce_perso list =
let rec loop acc = function
| [] -> List.rev acc
| [] -> acc
| hd :: tl ->
let hd' =
if place_mode <> Some `subdivision
then UpdateData.remove_suburb hd
then Place.without_suburb hd
else hd
in
let acc =
@@ -726,7 +726,7 @@ let search_auto_complete conf base mode place_mode max n =
complete_with_dico conf nb max place_mode ini (reduce_perso list)

| `source ->
let list = aux "src" in
let list = aux "src" Gutil.alphabetic_order in
let nb = ref 0 in
let ini = Name.lower @@ Mutil.tr '_' ' ' n in
let rec reduce acc = function
@@ -3,14 +3,13 @@
open Gwdb
open Util

(** Transform ["[foo-bar] - boobar (baz)"] into ["foo-bar, boobar (baz)"] *)
let normalize s =
let suburb_aux sub nosub s =
let len = String.length s in
if len = 0 then ""
if len = 0 then nosub ""
else begin
if String.unsafe_get s 0 = '[' then begin
match String.index_opt s ']' with
| None -> s
| None -> nosub s
| Some i ->
match
let rec loop b i =
@@ -21,17 +20,56 @@ let normalize s =
| _ -> if b then Some i else None
in loop false (i + 1)
with
| None -> s
| Some j ->
let b = Bytes.create (len - j + i + 1) in
Bytes.blit_string s 1 b 0 (i - 1) ;
Bytes.unsafe_set b (i - 1) ',' ;
Bytes.unsafe_set b i ' ' ;
Bytes.blit_string s j b (i + 1) (len - j) ;
Bytes.unsafe_to_string b
end else s
| None -> nosub s
| Some j -> sub s len i j
end else nosub s
end

(** [split_suburb "[foo-bar] - boobar (baz)"] is [9"foo-bar", "boobar (baz)")] *)
let split_suburb =
suburb_aux
begin fun s len i j -> String.sub s 1 (i - 1), String.sub s j (len - j) end
begin fun s -> "", s end

(** [only_suburb "[foo-bar] - boobar (baz)"] is ["foo-bar"]
[only_suburb "boobar (baz)"] is [""] *)
let only_suburb =
suburb_aux
begin fun s _len i _j -> String.sub s 1 (i - 1) end
begin fun _ -> "" end

(** [without_suburb "[foo-bar] - boobar (baz)"] is ["boobar (baz)"]
[without_suburb "boobar (baz)"] is ["boobar (baz)"] *)
let without_suburb =
suburb_aux
begin fun s len _i j -> String.sub s j (len - j) end
begin fun s -> s end

(** Transform ["[foo-bar] - boobar (baz)"] into ["foo-bar, boobar (baz)"] *)
let normalize =
suburb_aux
begin fun s len i j ->
let b = Bytes.create (len - j + i + 1) in
Bytes.blit_string s 1 b 0 (i - 1) ;
Bytes.unsafe_set b (i - 1) ',' ;
Bytes.unsafe_set b i ' ' ;
Bytes.blit_string s j b (i + 1) (len - j) ;
Bytes.unsafe_to_string b
end
begin fun s -> s end

let compare_places s1 s2 =
let ss1, s1 = split_suburb s1 in
let ss2, s2 = split_suburb s2 in
match
Mutil.list_compare
Gutil.alphabetic_order
(String.split_on_char ',' s1)
(String.split_on_char ',' s2)
with
| 0 -> Gutil.alphabetic_order ss1 ss2
| x -> x

(* [String.length s > 0] is always true because we already tested [is_empty_string].
If it is not true, then the base should be cleaned. *)
let fold_place_long inverted s =
@@ -115,29 +115,10 @@ let get_person_from_data conf base =
(fun istr pset acc -> (istr, PersSet.elements pset) :: acc)
acc []


(* ********************************************************************* *)
(* [Fonc] remove_suburb : string -> string *)
(** [Description] : Enlève le lieu-dit (de la forme
"[Lieu-dit] - Commune...") d'une chaîne de caractères.
[Args] :
- s : chaîne de caractères contenant le lieu-dit.
[Retour] : Retourne la chaîne de caractères dont le lieu-dit a été
enlevé.
[Rem] : Non exporté en clair hors de ce module. *)
(* ********************************************************************* *)
let remove_suburb s =
let re = Str.regexp "^\\[.+\\] - " in
let matched = Str.string_match re s 0 in
if matched then
let sub_start = Str.match_end () in
String.sub s sub_start (String.length s - sub_start)
else s

let combine_by_ini ini list =
let len = Util.str_length ini + 1 in
Util.groupby
~key:(fun (_, s) -> AllnDisplay.ini len @@ remove_suburb s)
~key:(fun (_, s) -> AllnDisplay.ini len @@ Place.without_suburb s)
~value:(fun x -> x)
list

@@ -544,7 +525,7 @@ let build_list conf base =
if ini <> "" then
Util.filter_map begin fun istr ->
let str = sou base istr in
if Mutil.start_with ~wildcard:true ini 0 @@ remove_suburb str
if Mutil.start_with ~wildcard:true ini 0 @@ Place.without_suburb str
then Some (istr, str)
else None
end list
@@ -576,7 +557,7 @@ let build_list_short conf list =
let ini_list =
List.rev_map
(fun (_, s) ->
let s = remove_suburb s in
let s = Place.without_suburb s in
if String.length s > len then
String.sub s 0 (index_of_next_char s len)
else s ^ String.make (len + 1 - String.length s) '_')
@@ -719,8 +700,8 @@ let print_foreach conf print_ast _eval_expr =
Vlist_value l ->
List.sort
(fun (_, s1) (_, s2) ->
let rss1 = remove_suburb s1 in
let rss2 = remove_suburb s2 in
let rss1 = Place.without_suburb s1 in
let rss2 = Place.without_suburb s2 in
if rss1 = rss2 then Gutil.alphabetic_order s1 s2
else Gutil.alphabetic_order rss1 rss2)
l
@@ -425,3 +425,14 @@ let string_of_int_sep sep x =
(0, 0) digits
in
Bytes.unsafe_to_string s

let rec list_compare cmp l1 l2 =
match l1, l2 with
| x1 :: l1, x2 :: l2 -> begin
match cmp x1 x2 with
| 0 -> list_compare cmp l1 l2
| x -> x
end
| [], [] -> 0
| [], _ -> -1
| _, [] -> 1
@@ -90,3 +90,8 @@ val rm : string -> unit
(** [string_of_int_sep "," 1000000] is ["1,000,000"]
*)
val string_of_int_sep : string -> int -> string

(** [list_compare cmp l1 l2]
Comparison function for lists, using [cmp] to compare each elements
*)
val list_compare : ('a -> 'a -> int) -> 'a list -> 'a list -> int
@@ -11,5 +11,41 @@ let suite =
; test "[foo-bar - boobar (baz)" "[foo-bar - boobar (baz)"
; test "[foo-bar] boobar (baz)" "[foo-bar] boobar (baz)"
end
; "split_suburb" >:: begin fun _ ->
let test exp inp =
assert_equal
~printer:(fun (a,b) -> Printf.sprintf {|("%s","%s")|} a b)
exp (Place.split_suburb inp)
in
test ("foo-bar", "boobar (baz)") "[foo-bar] - boobar (baz)"
; test ("", "boobar (baz)") "boobar (baz)"
end
; "only_suburb" >:: begin fun _ ->
let test exp inp =
assert_equal ~printer:(fun s -> s) exp (Place.only_suburb inp)
in
test "foo-bar" "[foo-bar] - boobar (baz)"
; test "" "boobar (baz)"
end
; "without_suburb" >:: begin fun _ ->
let test exp inp =
assert_equal ~printer:(fun s -> s) exp (Place.without_suburb inp)
in
test "boobar (baz)" "[foo-bar] - boobar (baz)"
; test "boobar (baz)" "boobar (baz)"
end
; "compare_places" >:: begin fun _ ->
let test exp a b =
assert_equal ~printer:string_of_int exp (Place.compare_places a b)
; assert_equal ~printer:string_of_int (-exp) (Place.compare_places b a)
in
test 0 "boobar (baz)" "boobar (baz)"
; test (-1) "baz (boobar)" "boobar (baz)"
; test (-1) "baz (boobar)" "[foo-bar] - baz (boobar)"
; test (-1) "[bar-foo] - baz (boobar)" "[foo-bar] - baz (boobar)"
; test (-1) "[foo-bar] - baz (boobar)" "[bar-foo] - boobar (baz)"
; test (-1) "[foo-bar] - ebaz (boobar)" "[bar-foo] - éboobar (baz)"
; test (-1) "[foo-bar] - baz, boobar, barboo" "[foo-bar] - baz, boobar, barboo, bam"
end
]
]

0 comments on commit ab7f723

Please sign in to comment.
You can’t perform that action at this time.