Skip to content
Permalink
Browse files

Re-imported convert_dico_place.ml from old geneweb

  • Loading branch information...
sagotch committed Sep 3, 2019
1 parent 2436e14 commit 982200637eaef94665ff6bf37da3a5f5fe39cbda
Showing with 101 additions and 0 deletions.
  1. +19 −0 bin/distrib/dico_place/README.MD
  2. +71 −0 bin/distrib/dico_place/marshal_dico_place.ml
  3. +11 −0 bin/distrib/dune.in
@@ -0,0 +1,19 @@
# marshal_dico_place.exe

This tool take csv files as input, create place lists from it, and
write them to binary files meant to be used by `gwd` API.

e.g. for a given language `LANG`, provide a `dico_place_LANG.csv` file
containing data as `town,are_code,county,region,country`, and it will
produce these files:

```
dico_place_town_LANG.list
dico_place_area_code_LANG.list
dico_place_county_LANG.list
dico_place_region_LANG.list
dico_place_country_LANG.list
```

Place these files in the `lang` folder used by your geneweb server,
and you are good to go.
@@ -0,0 +1,71 @@
open Geneweb

module StrSet = Set.Make (String)

let rest str =
let i = String.index str ',' in
String.sub str (i + 1) (String.length str - i - 1)

let write_dico_place_set lang =
let fname_csv = "dico_place_" ^ lang ^ ".csv" in
let (towns, area_codes, countys, regions, countrys) =
let ic = Secure.open_in (Util.search_in_lang_path fname_csv) in
let string_set_town = ref StrSet.empty in
let string_set_area_code = ref StrSet.empty in
let string_set_county = ref StrSet.empty in
let string_set_region = ref StrSet.empty in
let string_set_country = ref StrSet.empty in
begin try while true do
let line = input_line ic in
match String.split_on_char ',' line with
| [ town ; area_code ; county ; region ; country ] ->
let place = line in
if town <> "" then string_set_town := StrSet.add place !string_set_town;
let place = rest place in
if area_code <> "" then string_set_area_code := StrSet.add place !string_set_area_code;
let place = rest place in
if county <> "" then string_set_county := StrSet.add place !string_set_county;
let place = rest place in
if region <> "" then string_set_region := StrSet.add place !string_set_region;
let place = rest place in
if country <> "" then string_set_country := StrSet.add place !string_set_country;
| _ -> ()
done
with End_of_file -> close_in ic
end ;
(List.sort Gutil.alphabetic (StrSet.elements !string_set_town),
List.sort Gutil.alphabetic (StrSet.elements !string_set_area_code),
List.sort Gutil.alphabetic (StrSet.elements !string_set_county),
List.sort Gutil.alphabetic (StrSet.elements !string_set_region),
List.sort Gutil.alphabetic (StrSet.elements !string_set_country))
in
let generate name data =
let fname_set = "dico_place_" ^ name ^ "_" ^ lang ^ ".list" in
let ext_flags =
[ Open_wronly ; Open_append ; Open_creat ; Open_binary ; Open_nonblock ]
in
let oc = Secure.open_out_gen ext_flags 0o644 fname_set in
output_value oc (data : (string list)) ;
close_out oc
in
generate "town" towns ;
generate "area_code" area_codes ;
generate "county" countys ;
generate "region" regions ;
generate "country" countrys

let _ =
let lang = ref ["de"; "en"; "es"; "fi"; "fr"; "it"; "nl"; "no"; "pt"; "sv"] in
let speclist =
[ ( "-add"
, Arg.String (fun x -> lang := x :: !lang)
, "<langs> comma-separated list of languages added to default ones: "
^ String.concat "," !lang ^ ".")
; ( "-lang"
, Arg.String (fun x -> lang := String.split_on_char ',' x)
, "<langs> comma-separated list of languages to process.")
]
in
let usage = "Usage: " ^ Sys.argv.(0) in
Arg.parse speclist ignore usage;
List.iter (write_dico_place_set) !lang
@@ -1,5 +1,16 @@
(env (dev (flags (-w -33) ) ) )

#ifdef API
(executable
(name marshal_dico_place)
(public_name marshal_dico_place)
(libraries unix %%%GWDB_PKG%%% geneweb geneweb.wserver)
(modules marshal_dico_place)
)
#else
(dirs :standard \ dico_place)
#endif

(include_subdirs unqualified)

(executables

0 comments on commit 9822006

Please sign in to comment.
You can’t perform that action at this time.