Skip to content
This repository has been archived by the owner on Sep 17, 2018. It is now read-only.

Commit

Permalink
Account for terms which contain other terms as a suffix
Browse files Browse the repository at this point in the history
  • Loading branch information
cmc333333 committed Jun 7, 2013
1 parent 361e955 commit badf900
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 3 deletions.
2 changes: 1 addition & 1 deletion parser/layer/terms.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def calculate_offsets(self, text, applicable_terms):
larger (i.e. containing) terms."""

# longer terms first
applicable_terms.sort(key=lambda x: x[0], reverse=True)
applicable_terms.sort(key=lambda x: len(x[0]), reverse=True)

matches = []
existing_defs = []
Expand Down
13 changes: 11 additions & 2 deletions tests/layer_terms.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ def test_has_definitions_p_marker(self):
struct.label('88-20-a', ['88', '20', 'a']))
self.assertTrue(t.has_definitions(node))


def test_node_definitions(self):
t = Terms(None)
text1 = u'This has a “worD” and then more'
Expand Down Expand Up @@ -124,7 +123,7 @@ def test_calculate_offsets(self):
matches = t.calculate_offsets(text, applicable_terms)
self.assertEqual(3, len(matches))
found = [False, False, False]
for term, ref, offsets in matches:
for _, ref, offsets in matches:
if ref == 'a' and offsets == [(10,19)]:
found[0] = True
if ref == 'b' and offsets == [(30,34)]:
Expand All @@ -133,6 +132,16 @@ def test_calculate_offsets(self):
found[2] = True
self.assertEqual([True,True,True], found)

def test_calculate_offsets_lexical_container(self):
applicable_terms = [('access device', 'a'), ('device', 'd')]
text = "This access device is fantastic!"
t = Terms(None)
matches = t.calculate_offsets(text, applicable_terms)
self.assertEqual(1, len(matches))
_, ref, offsets = matches[0]
self.assertEqual('a', ref)
self.assertEqual([(5,18)], offsets)

def test_calculate_offsets_word_part(self):
"""If a defined term is part of another word, don't include it"""
applicable_terms = [('act', 'a')]
Expand Down

0 comments on commit badf900

Please sign in to comment.