Skip to content

Commit

Permalink
Merge pull request #1 from victortrac/master
Browse files Browse the repository at this point in the history
fixing bug with cleaving hyphens
  • Loading branch information
bidhan-a committed May 18, 2017
2 parents 48d3838 + 99a5460 commit 4613c0d
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 3 deletions.
6 changes: 4 additions & 2 deletions name_cleaver/names.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,12 @@ def without_extra_phrases(self):
if "-" in name:
hyphen_parts = name.rsplit("-", 1)
# if the part after the hyphen is shorter than the part before,
# AND isn't either a number (often occurs in Union names) or a single letter (e.g., Tech-X),
# AND the hyphen is preceded by either whitespace or at least four characters,
# AND isn't either a number (often occurs in Union names) or a single letter (e.g., Tech-X),
# discard the hyphen and whatever follows
if len(hyphen_parts[1]) < len(hyphen_parts[0]) and re.search(r'(\w{4,}|\s+)$', hyphen_parts[0]) and not re.match(r'^([a-zA-Z]|[0-9]+)$', hyphen_parts[1]):
if len(hyphen_parts[1]) < len(hyphen_parts[0]) \
and re.search(r'^(\s+)|^(\w{0,4})$', hyphen_parts[1]) \
and not re.match(r'^([a-zA-Z]|[0-9]+)$', hyphen_parts[1]):
name = hyphen_parts[0].strip()

return name
Expand Down
4 changes: 3 additions & 1 deletion name_cleaver/test_name_cleaver.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,13 +144,15 @@ def test_expand_with_two_tokens_to_expand(self):
def test_dont_strip_after_hyphens_too_soon_in_a_name(self):
self.assertEqual('US-Russia Business Council', OrganizationNameCleaver('US-Russia Business Council').parse().kernel())
self.assertEqual('Wal-Mart Stores', OrganizationNameCleaver('Wal-Mart Stores, Inc.').parse().kernel())
self.assertEqual('Williams-Sonoma', OrganizationNameCleaver('Williams-Sonoma, Inc.').parse().kernel())
self.assertEqual('Austin American-Statesman', OrganizationNameCleaver('Austin American-Statesman').parse().kernel())

# these were new after the hyphen rewrite
self.assertEqual('Coca-Cola Company', OrganizationNameCleaver('Coca-Cola Co').parse().expand()) # used to return 'Coca'
self.assertEqual('Rolls-Royce PLC', OrganizationNameCleaver('Rolls-Royce PLC').parse().expand()) # used to return 'Rolls'

def test_drop_postname_hyphen_phrases(self):
self.assertEqual('Lawyers For Better Government', OrganizationNameCleaver('LAWYERS FOR BETTER GOVERNMENT-ILLINOIS').parse().without_extra_phrases())
self.assertEqual('Lawyers For Better Government-Illinois', OrganizationNameCleaver('LAWYERS FOR BETTER GOVERNMENT-ILLINOIS').parse().without_extra_phrases())
self.assertEqual('Jobs Opportunity And Freedom Political Action Committee', OrganizationNameCleaver('JOBS OPPORTUNITY AND FREEDOM POLITICAL ACTION COMMITTEE - JOFPAC').parse().without_extra_phrases())

def test_kernel(self):
Expand Down

0 comments on commit 4613c0d

Please sign in to comment.