Skip to content

Commit

Permalink
Smarter PlaceNode.Country(). Fixes #143 (#147)
Browse files Browse the repository at this point in the history
For places that do not follow the strict four jurisdictional entities the country can still be extracted by referring to a list of known countries and comparing them with the suffix of the place name.

It can work around some basic puntuation, capitalization and whitespace. However, it is not as full proof as it could be, yet.
  • Loading branch information
elliotchance committed Sep 27, 2018
1 parent 9c7165d commit 18e9024
Show file tree
Hide file tree
Showing 3 changed files with 261 additions and 2 deletions.
215 changes: 215 additions & 0 deletions countries.go
@@ -0,0 +1,215 @@
package gedcom

// Countries is a list of all the known current and historical countries.
//
// These are used to help identify countries in place names. See
// PlaceNode.Country().
//
// https://www.searchify.ca/list-of-countries/
var Countries = []string{
"Afghanistan",
"Albania",
"Algeria",
"America",
"Andorra",
"Angola",
"Antigua",
"Argentina",
"Armenia",
"Australia",
"Austria",
"Azerbaijan",
"Bahamas",
"Bahrain",
"Bangladesh",
"Barbados",
"Belarus",
"Belgium",
"Belize",
"Benin",
"Bhutan",
"Bissau",
"Bolivia",
"Bosnia",
"Botswana",
"Brazil",
"British",
"Brunei",
"Bulgaria",
"Burkina",
"Burma",
"Burundi",
"Cambodia",
"Cameroon",
"Canada",
"Cape Verde",
"Central African Republic",
"Chad",
"Chile",
"China",
"Colombia",
"Comoros",
"Congo",
"Costa Rica",
"Croatia",
"Cuba",
"Cyprus",
"Czech",
"Denmark",
"Djibouti",
"Dominica",
"East Timor",
"Ecuador",
"Egypt",
"El Salvador",
"Emirate",
"England",
"Eritrea",
"Estonia",
"Ethiopia",
"Fiji",
"Finland",
"France",
"Gabon",
"Gambia",
"Georgia",
"Germany",
"Ghana",
"Great Britain",
"Greece",
"Grenada",
"Grenadines",
"Guatemala",
"Guinea",
"Guyana",
"Haiti",
"Herzegovina",
"Holland",
"Honduras",
"Hungary",
"Iceland",
"India",
"Indonesia",
"Iran",
"Iraq",
"Ireland",
"Israel",
"Italy",
"Ivory Coast",
"Jamaica",
"Japan",
"Jordan",
"Kazakhstan",
"Kenya",
"Kiribati",
"Korea",
"Kosovo",
"Kuwait",
"Kyrgyzstan",
"Laos",
"Latvia",
"Lebanon",
"Lesotho",
"Liberia",
"Libya",
"Liechtenstein",
"Lithuania",
"Luxembourg",
"Macedonia",
"Madagascar",
"Malawi",
"Malaysia",
"Maldives",
"Mali",
"Malta",
"Marshall",
"Mauritania",
"Mauritius",
"Mexico",
"Micronesia",
"Moldova",
"Monaco",
"Mongolia",
"Montenegro",
"Morocco",
"Mozambique",
"Myanmar",
"Namibia",
"Nauru",
"Nepal",
"Netherlands",
"New Zealand",
"Nicaragua",
"Niger",
"Nigeria",
"Norway",
"Oman",
"Pakistan",
"Palau",
"Panama",
"Papua",
"Paraguay",
"Peru",
"Philippines",
"Poland",
"Portugal",
"Qatar",
"Romania",
"Russia",
"Rwanda",
"Samoa",
"San Marino",
"Sao Tome",
"Saudi Arabia",
"Scotland",
"Scottish",
"Senegal",
"Serbia",
"Seychelles",
"Sierra Leone",
"Singapore",
"Slovakia",
"Slovenia",
"Solomon",
"Somalia",
"South Africa",
"South Sudan",
"Spain",
"Sri Lanka",
"St Kitts",
"St Lucia",
"Sudan",
"Suriname",
"Swaziland",
"Sweden",
"Switzerland",
"Syria",
"Taiwan",
"Tajikistan",
"Tanzania",
"Thailand",
"Tobago",
"Togo",
"Tonga",
"Trinidad",
"Tunisia",
"Turkey",
"Turkmenistan",
"Tuvalu",
"Uganda",
"Ukraine",
"United Kingdom",
"United States",
"Uruguay",
"USA",
"Uzbekistan",
"Vanuatu",
"Vatican",
"Venezuela",
"Vietnam",
"Wales",
"Welsh",
"Yemen",
"Zambia",
"Zimbabwe",
}
20 changes: 18 additions & 2 deletions place_node.go
Expand Up @@ -79,11 +79,27 @@ func (node *PlaceNode) State() string {
// Country is the forth part of the JurisdictionalName().
//
// Country will only return a non-empty response if the JurisdictionalName is
// exactly in the form of "Name,County,State,Country".
// exactly in the form of "Name,County,State,Country" or the country can be
// identified from the list of Countries.
func (node *PlaceNode) Country() string {
_, _, _, country := node.JurisdictionalEntities()

return country
if country != "" {
return country
}

// If the country is empty it is likely because the place is not formatted
// into four jurisdictional entities. In this case we will try to find the
// country by looking at the suffix of the place name.
name := strings.ToLower(strings.Trim(node.JurisdictionalName(), ",. "))

for _, c := range Countries {
if strings.HasSuffix(name, strings.ToLower(c)) {
return c
}
}

return ""
}

// Format shows the jurisdictional entities that are named in a sequence from
Expand Down
28 changes: 28 additions & 0 deletions place_node_test.go
Expand Up @@ -50,6 +50,34 @@ var placeTests = []struct {
state: "Utah",
country: "USA.",
},
{
node: gedcom.NewPlaceNode(nil, "United States", "", nil),
name: "United States",
county: "",
state: "",
country: "United States",
},
{
node: gedcom.NewPlaceNode(nil, "Foo, australia.", "", nil),
name: "Foo, australia.",
county: "",
state: "",
country: "Australia",
},
{
node: gedcom.NewPlaceNode(nil, "Bar, Nashville, USA", "", nil),
name: "Bar, Nashville, USA",
county: "",
state: "",
country: "USA",
},
{
node: gedcom.NewPlaceNode(nil, "Hobbitown, New zealand ", "", nil),
name: "Hobbitown, New zealand",
county: "",
state: "",
country: "New Zealand",
},
}

func TestNewPlaceNode(t *testing.T) {
Expand Down

0 comments on commit 18e9024

Please sign in to comment.