Skip to content
This repository has been archived by the owner on Dec 18, 2019. It is now read-only.

Commit

Permalink
Work around states w dots, move "United States" code
Browse files Browse the repository at this point in the history
* Work around the appearance of periods in state abbreviations, like
  "S.C." which GeoNames will not resolve.
* Move the code that normalizes "United States" and its variants into
  geocode.set_name().
  • Loading branch information
Mark Breedlove committed Feb 1, 2017
1 parent c23a221 commit cdbf723
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 6 deletions.
18 changes: 12 additions & 6 deletions lib/akamod/geocode.py
Expand Up @@ -168,10 +168,6 @@ def geocode_place(self, place):
are already present, will attempt to validate them before accepting
the new place.
"""
if place.name and re.search(ur" *(United States(?!-)|États-Unis|USA)",
place.name):
place.name = 'United States'

place.set_name()
params = {}

Expand Down Expand Up @@ -457,10 +453,20 @@ def map_fields(self):

def set_name(self):
"""
Returns the name property. If none is set, sets it to the smallest
available geographic division label.
Modify and return our name property, after cleaning it up. If none is
set, initialize it to the smallest available geographic division label.
"""
if self.name:
if re.search(ur" *(United States(?!-)|États-Unis|USA)", self.name):
self.name = 'United States'
# Kludge state abbreviations with periods (e.g. "S.C." or "CA.")
# into their equivalent official post-office abbreviations (e.g.
# "SC" or "CA"). GeoNames just doesn't resolve the ones with
# periods. We can get rid of this when we switch to Twofishes.
self.name = re.sub(r'([ACDFGHIKLMNOPRSTUVW])\.?'
r'([KLRZAOTEIDNSYCHJMVXV])\.',
r'\1\2',
self.name)
return self.name

prop_order = ["city", "county", "state", "country", "region"]
Expand Down
36 changes: 36 additions & 0 deletions test/test_geocode.py
Expand Up @@ -590,6 +590,42 @@ def test_geocode_geonames_name_search_context():
assert resp.status == 200
assert_same_jsons(EXPECTED, json.loads(content))

@attr(travis_exclude='yes')
def test_geocode_works_with_dotted_abbreviations():
"""Resolves something like "Greenville (S.C.)" as well as "SC" """
# Note when retrofitting Twofishes later: Twofishes handles "(S.C.)" just
# fine, so most of this test's assertion should be kept, but the code that
# works around this syntax should be altered. When we use Twofishes,
# we're going to be able to preserve the "S.C." spelling in the "name"
# property, and when we do this for Ingestion 3 with MAPv4 we'll be able
# to preserve that spelling in the providedLabel property.
INPUT = {
"_id": "foo",
"sourceResource": {
"spatial": {
"name": "Greenville (S.C.)"
}
}
}
EXPECTED = {
"_id": "foo",
"sourceResource": {
"spatial": [
{
"county": "Greenville County",
"country": "United States",
"state": "South Carolina",
"name": "Greenville (SC)",
"coordinates": "34.85262, -82.39401"
}
]
}
}
url = server() + "geocode"
resp, content = H.request(url, "POST", body=json.dumps(INPUT))
assert resp.status == 200
assert_same_jsons(EXPECTED, json.loads(content))

@attr(travis_exclude='yes')
def test_geocode_geonames_name_search_failure():
"""Shouldn't fall down when nothing is returned.
Expand Down

0 comments on commit cdbf723

Please sign in to comment.