Skip to content
This repository has been archived by the owner on Sep 17, 2018. It is now read-only.

Commit

Permalink
Merge pull request #55 from cmc333333/comments-citations
Browse files Browse the repository at this point in the history
Comments citations
  • Loading branch information
khandelwal committed Jun 7, 2013
2 parents 5e33847 + 4223517 commit d03407a
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 18 deletions.
32 changes: 25 additions & 7 deletions parser/grammar/internal_citations.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,15 +115,33 @@ def __getattr__(self, attr):
)


upper_dec = "." + Word(string.ascii_uppercase)
roman_dec = "." + Word("ivxlcdm")
upper_dec = "." + Word(string.ascii_uppercase).setResultsName('level3')
roman_dec = "." + Word("ivxlcdm").setResultsName('level2')


comment_citation = (
"comment"
+ (Word(string.digits) + depth1_p)
+ "-"
+ (Word(string.digits)
single_comment = (
Word(string.digits).setResultsName("section")
+ depth1_p.setResultsName('p_head')
+ Optional("-" + (
Word(string.digits).setResultsName('level1')
+ Optional(roman_dec + Optional(upper_dec))
).leaveWhitespace() # Exclude any period + space (end of sentence)
)
).setParseAction(keep_pos)


single_comment_with_marker = (
Suppress("comment") + single_comment.setResultsName('without_marker')
)


multiple_comments = (
Suppress("comments")
+ single_comment.setResultsName("c_head")
+ OneOrMore(conj_phrases
+ single_comment.setResultsName("c_tail", listAllMatches=True)))

comment_citation = (
multiple_comments.setResultsName("multiple_comments")
| single_comment_with_marker.setResultsName("single_comment")
)
49 changes: 39 additions & 10 deletions parser/layer/internal_citations.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,34 +10,63 @@ def parse(self, text, parts=None):
""" Parse the provided text, pulling out all the internal (self-referential)
citations. """

all_citations = []
all_citations = self.regtext_citations(text, parts)
all_citations.extend(self.comment_citations(text, parts))

for cit, start, end in grammar.appendix_citation.scanString(text):
label = [parts[0], cit.appendix, cit.section]
all_citations.extend(self.paragraph_list(cit, start, end,
label))

return self.strip_whitespace(text, all_citations)

def regtext_citations(self, text, parts):
"""Find all citations that refer to regtext"""
citations = []
for citation, start, end in grammar.regtext_citation.scanString(text):
if citation.single_paragraph or citation.multiple_paragraphs:
if citation.single_paragraph:
citation = citation.single_paragraph
else:
citation = citation.multiple_paragraphs
all_citations.extend(self.paragraph_list(citation,
citations.extend(self.paragraph_list(citation,
citation.p_head.pos[0], end, parts[0:2]))
elif citation.multiple_sections:
sections = [citation.s_head] + list(citation.s_tail)
for section in sections:
all_citations.extend(self.paragraph_list(section,
citations.extend(self.paragraph_list(section,
section.pos[0], section.pos[1],
[section.part, section.section]))
else:
citation = citation.without_marker
all_citations.extend(self.paragraph_list(citation,
citations.extend(self.paragraph_list(citation,
citation.pos[0], end,
[citation.part, citation.section]))
return citations

for cit, start, end in grammar.appendix_citation.scanString(text):
label = [parts[0], cit.appendix, cit.section]
all_citations.extend(self.paragraph_list(cit, start, end,
label))

return self.strip_whitespace(text, all_citations)
def comment_citations(self, text, parts):
"""Find all citations that refer to interpretations"""
citations = []
for cit, start, end in grammar.comment_citation.scanString(text):
label = [parts[0], 'Interpretations']
if cit.multiple_comments:
comments = [cit.c_head] + list(cit.c_tail)
else:
comments = [cit.without_marker]
for comment in comments:
start, end = comment.pos
cit = comment.tokens
label = [parts[0], 'Interpretations', cit.section]
paragraph_ref = ')('.join(filter(bool, list(cit.p_head)))
label.append('(' + paragraph_ref + ')')
label.append(cit.level1)
label.append(cit.level2)
label.append(cit.level3)
citations.append({
'offsets': [(start, end)],
'citation': filter(bool, label)
})
return citations

def strip_whitespace(self, text, citations):
"""Modifies the offsets to exclude any trailing whitespace. Modifies
Expand Down
2 changes: 1 addition & 1 deletion tests/grammar_internal_citations.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def test_comment_positive(self):
def test_comment_negative(self):
citations = [
"comment 10(5)-5",
"comment 10(b)"
"comment 10-b-q"
"comment 10"
"comment 8-b(1)"
]
Expand Down
41 changes: 41 additions & 0 deletions tests/internal_citation_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,3 +212,44 @@ def test_sections_verbose(self):
self.assertEqual(['321','11','h','4'], r321['citation'])
offsets = r321['offsets'][0]
self.assertEqual('321.11 (h)(4)', text[offsets[0]:offsets[1]])

def test_comment_header(self):
text = "See comment 32(b)(3) blah blah"
result = self.parser.parse(text, parts = ['222', '87'])
self.assertEqual(1, len(result))
self.assertEqual(['222','Interpretations','32', '(b)(3)'],
result[0]['citation'])
offsets = result[0]['offsets'][0]
self.assertEqual('32(b)(3)', text[offsets[0]:offsets[1]])

def test_sub_comment(self):
text = "refer to comment 36(a)(2)-3 of thing"
result = self.parser.parse(text, parts = ['222', '87'])
self.assertEqual(1, len(result))
self.assertEqual(['222','Interpretations','36', '(a)(2)', '3'],
result[0]['citation'])
offsets = result[0]['offsets'][0]
self.assertEqual('36(a)(2)-3', text[offsets[0]:offsets[1]])

def test_sub_comment2(self):
text = "See comment 3(b)(1)-1.v."
result = self.parser.parse(text, parts = ['222', '87'])
self.assertEqual(1, len(result))
self.assertEqual(['222','Interpretations','3', '(b)(1)', '1', 'v'],
result[0]['citation'])
offsets = result[0]['offsets'][0]
# Note the final period is not included
self.assertEqual('3(b)(1)-1.v', text[offsets[0]:offsets[1]])

def test_multiple_comments(self):
text = "See, e.g., comments 31(b)(1)(iv)-1 and 31(b)(1)(vi)-1"
result = self.parser.parse(text, parts = ['222', '87'])
self.assertEqual(2, len(result))
self.assertEqual(['222', 'Interpretations', '31', '(b)(1)(iv)',
'1'], result[0]['citation'])
offsets = result[0]['offsets'][0]
self.assertEqual('31(b)(1)(iv)-1', text[offsets[0]:offsets[1]])
self.assertEqual(['222', 'Interpretations', '31', '(b)(1)(vi)',
'1'], result[1]['citation'])
offsets = result[1]['offsets'][0]
self.assertEqual('31(b)(1)(vi)-1', text[offsets[0]:offsets[1]])

0 comments on commit d03407a

Please sign in to comment.