Skip to content

Commit

Permalink
TokenStreamToAutomaton failed to handle certain holes correctly
Browse files Browse the repository at this point in the history
  • Loading branch information
mikemccand committed Jan 8, 2017
1 parent 1aa9c42 commit e64111c
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 1 deletion.
Expand Up @@ -113,6 +113,7 @@ public Automaton toAutomaton(TokenStream in) throws IOException {
final RollingBuffer<Position> positions = new Positions();

int pos = -1;
int freedPos = 0;
Position posData = null;
int maxOffset = 0;
while (in.incrementToken()) {
Expand Down Expand Up @@ -150,7 +151,15 @@ public Automaton toAutomaton(TokenStream in) throws IOException {
addHoles(builder, positions, pos);
}
}
positions.freeBefore(pos);
while (freedPos <= pos) {
Position freePosData = positions.get(freedPos);
// don't free this position yet if we may still need to fill holes over it:
if (freePosData.arriving == -1 || freePosData.leaving == -1) {
break;
}
positions.freeBefore(freedPos);
freedPos++;
}
}

final int endPos = pos + posLengthAtt.getPositionLength();
Expand Down
Expand Up @@ -585,4 +585,16 @@ private void assertSameLanguage(Automaton expected, Automaton actual) {
Operations.determinize(Operations.removeDeadStates(expected), DEFAULT_MAX_DETERMINIZED_STATES),
Operations.determinize(Operations.removeDeadStates(actual), DEFAULT_MAX_DETERMINIZED_STATES)));
}

public void testTokenStreamGraphWithHoles() throws Exception {
final TokenStream ts = new CannedTokenStream(
new Token[] {
token("abc", 1, 1),
token("xyz", 1, 8),
token("def", 1, 1),
token("ghi", 1, 1),
});
assertSameLanguage(Operations.union(join(s2a("abc"), SEP_A, s2a("xyz")),
join(s2a("abc"), SEP_A, HOLE_A, SEP_A, s2a("def"), SEP_A, s2a("ghi"))), ts);
}
}

0 comments on commit e64111c

Please sign in to comment.