Skip to content
This repository has been archived by the owner on Sep 17, 2018. It is now read-only.

Commit

Permalink
Merge pull request #59 from cmc333333/missing-defs
Browse files Browse the repository at this point in the history
Fix for #54, missing defs
  • Loading branch information
khandelwal committed Jun 10, 2013
2 parents d03407a + badf900 commit 5765c83
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 3 deletions.
6 changes: 4 additions & 2 deletions parser/layer/terms.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from layer import Layer
from parser import utils
from parser.grammar.terms import term_parser
from parser.layer.paragraph_markers import ParagraphMarkers
from parser.tree import struct
import re

Expand Down Expand Up @@ -31,8 +32,9 @@ def has_definitions(self, node):
# Definitions are only in the reg text (not appendices/interprs)
if not node['label']['parts'][1].isdigit():
return False
stripped = node['text'].strip(ParagraphMarkers.marker(node)).strip()
return (
node['text'].lower().startswith('definition')
stripped.lower().startswith('definition')
or ('title' in node['label']
and 'definition' in node['label']['title'].lower()))

Expand Down Expand Up @@ -85,7 +87,7 @@ def calculate_offsets(self, text, applicable_terms):
larger (i.e. containing) terms."""

# longer terms first
applicable_terms.sort(key=lambda x: x[0], reverse=True)
applicable_terms.sort(key=lambda x: len(x[0]), reverse=True)

matches = []
existing_defs = []
Expand Down
19 changes: 18 additions & 1 deletion tests/layer_terms.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ def test_has_definitions(self):
label=struct.label("101-22-c", ["101", "22", "c"],
"But definition is in the title"))))

def test_has_definitions_p_marker(self):
t = Terms(None)
node = struct.node("(a) Definitions. For purposes of this " +
"section except blah",
[],
struct.label('88-20-a', ['88', '20', 'a']))
self.assertTrue(t.has_definitions(node))

def test_node_definitions(self):
t = Terms(None)
Expand Down Expand Up @@ -116,7 +123,7 @@ def test_calculate_offsets(self):
matches = t.calculate_offsets(text, applicable_terms)
self.assertEqual(3, len(matches))
found = [False, False, False]
for term, ref, offsets in matches:
for _, ref, offsets in matches:
if ref == 'a' and offsets == [(10,19)]:
found[0] = True
if ref == 'b' and offsets == [(30,34)]:
Expand All @@ -125,6 +132,16 @@ def test_calculate_offsets(self):
found[2] = True
self.assertEqual([True,True,True], found)

def test_calculate_offsets_lexical_container(self):
applicable_terms = [('access device', 'a'), ('device', 'd')]
text = "This access device is fantastic!"
t = Terms(None)
matches = t.calculate_offsets(text, applicable_terms)
self.assertEqual(1, len(matches))
_, ref, offsets = matches[0]
self.assertEqual('a', ref)
self.assertEqual([(5,18)], offsets)

def test_calculate_offsets_word_part(self):
"""If a defined term is part of another word, don't include it"""
applicable_terms = [('act', 'a')]
Expand Down

0 comments on commit 5765c83

Please sign in to comment.