Skip to content
This repository has been archived by the owner on Jul 14, 2022. It is now read-only.
Permalink
Browse files
Rethink the generation of combined names in version 2.2.0:
Putting them in () will not work well in bidi text thus do not use them
anymore.  Instead now (by default) use dashes in ways and newline in
placenames.  The previous behaviour can be enabled if desired.
  • Loading branch information
giggls committed Nov 13, 2016
1 parent 0c37976 commit bc4e85c
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 65 deletions.
@@ -50,49 +50,68 @@ A convinient way of using these functions is to hide them behind a virtual colum
#### Old style
```
select osml10n_get_placename('Москва́','Moskau',NULL,'Moscow',true) as name;
---> "Москва́ (Moskau)"
--> Москва́
Moskau
select osml10n_get_placename('Москва́','Moskau',NULL,'Moscow',false) as name;
---> "Moskau (Москва́)"
--> Moskau
Москва́
select osml10n_get_placename('القاهرة','Kairo','Cairo','Cairo',false) as name;
--> "Kairo"
--> Kairo
القاهرة
select osml10n_get_placename('Brixen Bressanone','Brixen',NULL,NULL,false) as name;
--> "Brixen"
--> Brixen Bressanone
select osml10n_get_placename('Roma','Rom',NULL,NULL,false) as name;
--> Rom
Roma
select osml10n_get_streetname('Doktor-No-Straße',NULL,NULL,NULL,false) as name;
--> "Dr.-No-Str."
--> Dr.-No-Str.
select osml10n_get_streetname('Dr. No Street','Professor-Doktor-No-Straße',NULL,NULL,false) as name;
--> "Prof.-Dr.-No-Str. (Dr. No St.)"
--> Prof.-Dr.-No-Str. - Dr. No St.
select osml10n_get_name_without_brackets('Dr. No Street','Doktor-No-Straße',NULL,NULL) as name;
--> "Doktor-No-Straße"
select osml10n_get_streetname('улица Воздвиженка',NULL,NULL,'Vozdvizhenka Street',true,'de') as name;
--> "ул. Воздвиженка (Vozdvizhenka St.)"
select osml10n_get_streetname('улица Воздвиженка',NULL,NULL,NULL,true,'de') as name;
--> "ул. Воздвиженка (ul. Vozdviženka)"
select osml10n_get_streetname('вулиця Молока',NULL,NULL,NULL,true,'de') as name;
--> "вул. Молока (vul. Moloka)"
--> Doktor-No-Straße
select osml10n_get_streetname('улица Воздвиженка',NULL,NULL,'Vozdvizhenka Street',true,true,' ','de') as name;
--> ул. Воздвиженка (Vozdvizhenka St.)
select osml10n_get_streetname('улица Воздвиженка',NULL,NULL,NULL,true,true,' ','de') as name;
--> ул. Воздвиженка (ul. Vozdviženka)
select osml10n_get_streetname('вулиця Молока',NULL,NULL,NULL,true,false,' - ','de') as name;
--> вул. Молока - vul. Moloka
```

#### Using hstore column containing all name tags from Openstreetmap
```
select osml10n_get_placename_from_tags('"name"=>"Москва́","name:de"=>"Moskau","name:en"=>"Moscow"',true) as name;
---> "Москва́ (Moskau)"
--> Москва́
Moskau
select osml10n_get_placename_from_tags('"name"=>"Москва́","name:de"=>"Moskau","name:en"=>"Moscow"',false) as name;
---> "Moskau (Москва́)"
select osml10n_get_placename_from_tags('"name"=>"القاهرة","name:de"=>"Kairo","int_name"=>"Cairo","name:en"=>"Cairo"',true) as name;
--> "Kairo"
--> Moskau
Москва́
select osml10n_get_placename_from_tags('"name"=>"القاهرة","name:de"=>"Kairo","int_name"=>"Cairo","name:en"=>"Cairo"',false) as name;
--> Kairo
القاهرة
select osml10n_get_placename_from_tags('"name"=>"Brixen Bressanone","name:de"=>"Brixen"',false) as name;
--> "Brixen"
--> Brixen Bressanone
select osml10n_get_placename_from_tags('"name"=>"Roma","name:de"=>"Rom"',false) as name;
--> Rom
Roma
select osml10n_get_streetname_from_tags('"name"=>"Doktor-No-Straße"',false) as name;
--> "Dr.-No-Str."
--> Dr.-No-Str.
select osml10n_get_streetname_from_tags('"name"=>"Dr. No Street","name:de"=>"Professor-Doktor-No-Straße"',false) as name;
--> "Prof.-Dr.-No-Str. (Dr. No St.)"
select osml10n_get_name_without_brackets_from_tags('"name"=>"Dr. No Street","name:de"=>"Doktor-No-Straße") as name;
--> "Doktor-No-Straße"
select osml10n_get_streetname_from_tags('"name"=>"улица Воздвиженка","name:en"=>"Vozdvizhenka Street"',true) as name;
--> "ул. Воздвиженка (Vozdvizhenka St.)"
select osml10n_get_streetname_from_tags('"name"=>"улица Воздвиженка"',true) as name;
--> "ул. Воздвиженка (ul. Vozdviženka)"
select osml10n_get_streetname_from_tags('"name"=>"вулиця Молока"',true) as name;
--> "вул. Молока (vul. Moloka)"
select osml10n_get_placename_from_tags('"name"=>"주촌 Juchon", "name:ko"=>"주촌","name:ko_rm"=>"Juchon"',true) as name;
--> "Juchon"
--> Prof.-Dr.-No-Str. - Dr. No St.
select osml10n_get_name_without_brackets_from_tags('"name"=>"Dr. No Street","name:de"=>"Doktor-No-Straße"') as name;
--> Doktor-No-Straße
select osml10n_get_streetname_from_tags('"name"=>"улица Воздвиженка","name:en"=>"Vozdvizhenka Street"',true,true,' ','de') as name;
--> ул. Воздвиженка (Vozdvizhenka St.)
select osml10n_get_streetname_from_tags('"name"=>"улица Воздвиженка"',true,true,' ','de') as name;
--> ул. Воздвиженка (ul. Vozdviženka)
select osml10n_get_streetname_from_tags('"name"=>"вулиця Молока"',true,false,' - ','de') as name;
--> вул. Молока - vul. Moloka
select osml10n_get_placename_from_tags('"name"=>"주촌 Juchon", "name:ko"=>"주촌","name:ko_rm"=>"Juchon"',false) as name;
--> 주촌 Juchon
select osml10n_get_placename_from_tags('"name"=>"주촌", "name:ko"=>"주촌","name:ko_rm"=>"Juchon"',false) as name;
--> Juchon
J주촌
select osml10n_get_country_name('"ISO3166-1:alpha2"=>"IN","name:de"=>"Indien","name:hi"=>"भारत","name:en"=>"India"') as name;
--> Indien
भारत
India
```
@@ -1,3 +1,13 @@
osml10n (2.2.0pre) unstable; urgency=medium

* rethink generation of combined names:
putting them in () will not work well in bidi text
thus we will not use them anymore
* generate country name from name:xx tags rather than the generic name
tags

-- Sven Geggus <sven-debian@geggus.net> Sun, 30 Oct 2016 13:56:52 +0100

osml10n (2.1.5) unstable; urgency=medium

* Bugfix for Bugfix :(
@@ -50,10 +50,10 @@ CREATE or REPLACE FUNCTION osml10n_contains_cjk(text) RETURNS BOOLEAN AS $$
$$ LANGUAGE 'plpgsql' IMMUTABLE;

/*
helper function "osml10n_gen_bracketed_name"
Will create a name (name in brackets) pair
helper function "osml10n_gen_combined_name"
Will create a name+local_name pair
*/
CREATE or REPLACE FUNCTION osml10n_gen_bracketed_name(local_name text, name text, loc_in_brackets boolean) RETURNS TEXT AS $$
CREATE or REPLACE FUNCTION osml10n_gen_combined_name(local_name text, name text, loc_in_brackets boolean, show_brackets boolean DEFAULT true, separator text DEFAULT ' ') RETURNS TEXT AS $combined$
DECLARE
nobrackets boolean;
regex text;
@@ -65,42 +65,62 @@ CREATE or REPLACE FUNCTION osml10n_gen_bracketed_name(local_name text, name text
END IF;
nobrackets=false;
/* Now we need to do some heuristic to check if the generation of a
bracketed name is a good idea.
combined name is a good idea.
Currently we do the following:
If local_name is part of name as a single word, not just as a substring
we return name and discard local_name.
Otherwise we return a combined bracketed name with name and local_name
Otherwise we return a combined name with name and local_name
*/
unacc = unaccent(name);
unacc_local = unaccent(local_name);
if (position(unacc_local in unacc) >0) THEN
/* the regexp_replace function below is a quotemeta equivalent
http://stackoverflow.com/questions/11442090/implementing-quotemeta-q-e-in-tcl/11442113
*/
regex = '[\s\(\)\-,;:/\[\]]('|| regexp_replace(unacc_local, '[][#$^*()+{}\\|.?-]', '\\\&', 'g') ||')[\s\(\)\-,;:/\[\]]';
regex = '[\s\(\)\-,;:/\[\]](' || regexp_replace(unacc_local, '[][#$^*()+{}\\|.?-]', '\\\&', 'g') ||')[\s\(\)\-,;:/\[\]]';
-- raise notice 'regex: %',regex;
IF regexp_matches(concat(' ',unacc,' '),regex) IS NOT NULL THEN
nobrackets=true;
END IF;
END IF;

-- raise notice 'nobrackets: %',nobrackets;
IF nobrackets THEN
return name;
ELSE
IF ( loc_in_brackets ) THEN
return name||' ('||local_name||')';
-- explicitely mark the whole string as LTR
IF ( show_brackets ) THEN
return chr(8237)||name||separator||'('||local_name||')'||chr(8236);
ELSE
return chr(8237)||name||separator||local_name||chr(8236);
END IF;
ELSE
return local_name||' ('||name||')';
-- explicitely mark the whole string as LTR
IF ( show_brackets ) THEN
return chr(8237)||local_name||separator||'('||name||')'||chr(8236);
ELSE
return chr(8237)||local_name||separator||name||chr(8236);
END IF;
END IF;
END IF;
END;
$$ LANGUAGE 'plpgsql' IMMUTABLE;
$combined$ LANGUAGE 'plpgsql' IMMUTABLE;


CREATE or REPLACE FUNCTION osml10n_get_placename(name text, local_name text, int_name text, name_en text, loc_in_brackets boolean, place geometry DEFAULT NULL) RETURNS TEXT AS $$
CREATE or REPLACE FUNCTION osml10n_get_placename(name text,
local_name text,
int_name text,
name_en text,
loc_in_brackets boolean,
show_brackets boolean DEFAULT false,
separator text DEFAULT chr(10),
place geometry DEFAULT NULL
) RETURNS TEXT AS $$
BEGIN
IF (local_name is not NULL) THEN
return osml10n_gen_bracketed_name(local_name,name,loc_in_brackets);
return osml10n_gen_combined_name(local_name,name,loc_in_brackets,show_brackets,separator);
END IF;
IF (name is not NULL) THEN
if (name = '') THEN
@@ -113,26 +133,34 @@ CREATE or REPLACE FUNCTION osml10n_get_placename(name text, local_name text, int
-- these are currently international and english names
IF (int_name is not NULL) THEN
if osml10n_is_latin(int_name) THEN
return osml10n_gen_bracketed_name(int_name,name,loc_in_brackets);
return osml10n_gen_combined_name(int_name,name,loc_in_brackets,show_brackets,separator);
END IF;
END IF;
IF (name_en is not NULL) THEN
return osml10n_gen_bracketed_name(name_en,name,loc_in_brackets);
return osml10n_gen_combined_name(name_en,name,loc_in_brackets,show_brackets,separator);
END IF;
-- transliteration as last resort
return osml10n_gen_bracketed_name(osml10n_geo_translit(name,place),name,loc_in_brackets);
return osml10n_gen_combined_name(osml10n_geo_translit(name,place),name,loc_in_brackets,show_brackets,separator);
ELSE
return NULL;
END IF;
END;
$$ LANGUAGE 'plpgsql' STABLE;

CREATE or REPLACE FUNCTION osml10n_get_streetname(name text, local_name text, int_name text, name_en text, loc_in_brackets boolean, langcode text DEFAULT 'de', place geometry DEFAULT NULL) RETURNS TEXT AS $$
CREATE or REPLACE FUNCTION osml10n_get_streetname(name text,
local_name text,
int_name text,
name_en text,
loc_in_brackets boolean,
show_brackets boolean DEFAULT false,
separator text DEFAULT ' - ',
langcode text DEFAULT 'de',
place geometry DEFAULT NULL) RETURNS TEXT AS $$
DECLARE
abbrev text;
BEGIN
IF (local_name is not NULL) THEN
return osml10n_gen_bracketed_name(osml10n_street_abbrev(local_name,langcode),osml10n_street_abbrev_all(name),loc_in_brackets);
return osml10n_gen_combined_name(osml10n_street_abbrev(local_name,langcode),osml10n_street_abbrev_all(name),loc_in_brackets,show_brackets,separator);
END IF;
IF (name is not NULL) THEN
if (name = '') THEN
@@ -145,15 +173,15 @@ CREATE or REPLACE FUNCTION osml10n_get_streetname(name text, local_name text, in
-- these are currently international and english names
IF (int_name is not NULL) THEN
if osml10n_is_latin(int_name) THEN
return osml10n_gen_bracketed_name(osml10n_street_abbrev_en(int_name),osml10n_street_abbrev_non_latin(name),loc_in_brackets);
return osml10n_gen_combined_name(osml10n_street_abbrev_en(int_name),osml10n_street_abbrev_non_latin(name),loc_in_brackets,show_brackets,separator);
END IF;
END IF;
IF (name_en is not NULL) THEN
return osml10n_gen_bracketed_name(osml10n_street_abbrev_en(name_en),osml10n_street_abbrev_non_latin(name),loc_in_brackets);
return osml10n_gen_combined_name(osml10n_street_abbrev_en(name_en),osml10n_street_abbrev_non_latin(name),loc_in_brackets,show_brackets,separator);
END IF;
-- transliteration as last resort
abbrev = osml10n_street_abbrev_non_latin(name);
return osml10n_gen_bracketed_name(osml10n_geo_translit(abbrev,place),abbrev,loc_in_brackets);
return osml10n_gen_combined_name(osml10n_geo_translit(abbrev,place),abbrev,loc_in_brackets,show_brackets,separator);
ELSE
return NULL;
END IF;

0 comments on commit bc4e85c

Please sign in to comment.