Skip to content

Commit

Permalink
OPENNLP-509 opennlp.tools.parser.Parse.getParent() returning incorrec…
Browse files Browse the repository at this point in the history
…t object

- adds `setParents(..)` calls in `AbstractBottomUpParser#parse(Parse tokens, int numParses)` which now set the hierarchical (back) references that were missing before
- adds test to verify that a processed Parse instance's parent references are populated and _not_ `null`
  • Loading branch information
mawiesne authored and rzo1 committed Mar 9, 2023
1 parent e439624 commit 2511272
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -338,15 +338,30 @@ else if (1 == derivationStage) {
odh = ndh;
}
if (completeParses.size() == 0) {
if (guess != null) {
setParents(guess);
for (Parse childGuess: guess.getChildren()) {
setParents(childGuess);
}
}
return new Parse[] {guess};
}
else if (numParses == 1) {
return new Parse[] {completeParses.first()};
Parse best = completeParses.first();
setParents(best);
for (Parse childBest: best.getChildren()) {
setParents(childBest);
}
return new Parse[] {best};
}
else {
List<Parse> topParses = new ArrayList<>(numParses);
while (!completeParses.isEmpty() && topParses.size() < numParses) {
Parse tp = completeParses.first();
setParents(tp);
for (Parse childTp: tp.getChildren()) {
setParents(childTp);
}
completeParses.remove(tp);
topParses.add(tp);
//parses.remove(tp);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,57 @@ void testParsing(String input, String reference) {
Assertions.assertNotNull(s);
}

/*
* Verifies / addresses OPENNLP-509
* See: https://issues.apache.org/jira/projects/OPENNLP/issues/OPENNLP-509
*/
@Test
void testParsingCheckParentReferencesArePopulated() {
// fixtures
final String sent = "Martin is testing.";
// prepare
List<String> tokens = Arrays.asList(WhitespaceTokenizer.INSTANCE.tokenize(sent));
String text = String.join(" ", tokens);

Parse sentP = new Parse(text, new Span(0, text.length()),
AbstractBottomUpParser.INC_NODE, 0, null);
int start = 0;
int i = 0;
for (Iterator<String> ti = tokens.iterator(); ti.hasNext(); i++) {
String tok = ti.next();
sentP.insert(new Parse(text, new Span(start, start + tok.length()),
AbstractBottomUpParser.TOK_NODE, 0, i));
start += tok.length() + 1;
}

Parser parser = ParserFactory.create(getModel());
Assertions.assertNotNull(parser);

// Verifies parents of top-k parses (k=2)
Parse[] parses = parser.parse(sentP, 2);
Assertions.assertNotNull(parses);
for (Parse parent : parses) {
checkParentsEqual(parent);
}
}

/*
* Recursively traverses the parse tree and verifies parent references are populated.
*/
private void checkParentsEqual(Parse parent) {
for (Parse child : parent.getChildren()) {
Parse cParent = child.getParent();
// System.out.println(cParent.toStringPennTreebank() " --- type: " cParent.getType());
if (AbstractBottomUpParser.TOK_NODE.equals(child.getType())) {
return; // found a leaf node: stopping recursion
}
Assertions.assertEquals(parent, cParent);
if (cParent.getChildren() != null) {
checkParentsEqual(child);
}
}
}

/*
* Verifies changes in OPENNLP-1330 and addresses follow-up OPENNLP-1333
* See: https://issues.apache.org/jira/projects/OPENNLP/issues/OPENNLP-1333
Expand Down

0 comments on commit 2511272

Please sign in to comment.