Skip to content

Commit

Permalink
LUCENE-9402: Let MultiCollector handle minCompetitiveScore (#1567)
Browse files Browse the repository at this point in the history
  • Loading branch information
tflobbe committed Jun 18, 2020
1 parent 84729ed commit 4db1e38
Show file tree
Hide file tree
Showing 3 changed files with 251 additions and 34 deletions.
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,8 @@ Improvements

* LUCENE-9396: Improved truncation detection for points. (Adrien Grand, Robert Muir)

* LUCENE-9402: Let MultiCollector handle minCompetitiveScore (Tomás Fernández Löbbe, Adrien Grand)

Optimizations
---------------------

Expand Down
111 changes: 77 additions & 34 deletions lucene/core/src/java/org/apache/lucene/search/MultiCollector.java
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ public ScoreMode scoreMode() {

@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
final List<LeafCollector> leafCollectors = new ArrayList<>();
final List<LeafCollector> leafCollectors = new ArrayList<>(collectors.length);
for (Collector collector : collectors) {
final LeafCollector leafCollector;
try {
Expand All @@ -134,69 +134,112 @@ public LeafCollector getLeafCollector(LeafReaderContext context) throws IOExcept
case 1:
return leafCollectors.get(0);
default:
return new MultiLeafCollector(leafCollectors, cacheScores);
return new MultiLeafCollector(leafCollectors, cacheScores, scoreMode() == ScoreMode.TOP_SCORES);
}
}

private static class MultiLeafCollector implements LeafCollector {

private final boolean cacheScores;
private final LeafCollector[] collectors;
private int numCollectors;
private final float[] minScores;
private final boolean skipNonCompetitiveScores;

private MultiLeafCollector(List<LeafCollector> collectors, boolean cacheScores) {
private MultiLeafCollector(List<LeafCollector> collectors, boolean cacheScores, boolean skipNonCompetitive) {
this.collectors = collectors.toArray(new LeafCollector[collectors.size()]);
this.cacheScores = cacheScores;
this.numCollectors = this.collectors.length;
this.skipNonCompetitiveScores = skipNonCompetitive;
this.minScores = this.skipNonCompetitiveScores ? new float[this.collectors.length] : null;
}

@Override
public void setScorer(Scorable scorer) throws IOException {
if (cacheScores) {
scorer = new ScoreCachingWrappingScorer(scorer);
}
scorer = new FilterScorable(scorer) {
@Override
public void setMinCompetitiveScore(float minScore) {
// Ignore calls to setMinCompetitiveScore so that if we wrap two
// collectors and one of them wants to skip low-scoring hits, then
// the other collector still sees all hits. We could try to reconcile
// min scores and take the maximum min score across collectors, but
// this is very unlikely to be helpful in practice.
if (skipNonCompetitiveScores) {
for (int i = 0; i < collectors.length; ++i) {
final LeafCollector c = collectors[i];
if (c != null) {
c.setScorer(new MinCompetitiveScoreAwareScorable(scorer, i, minScores));
}
}
} else {
scorer = new FilterScorable(scorer) {
@Override
public void setMinCompetitiveScore(float minScore) throws IOException {
// Ignore calls to setMinCompetitiveScore so that if we wrap two
// collectors and one of them wants to skip low-scoring hits, then
// the other collector still sees all hits.
}

};
for (int i = 0; i < numCollectors; ++i) {
final LeafCollector c = collectors[i];
c.setScorer(scorer);
};
for (int i = 0; i < collectors.length; ++i) {
final LeafCollector c = collectors[i];
if (c != null) {
c.setScorer(scorer);
}
}
}
}

private void removeCollector(int i) {
System.arraycopy(collectors, i + 1, collectors, i, numCollectors - i - 1);
--numCollectors;
collectors[numCollectors] = null;
}

@Override
public void collect(int doc) throws IOException {
final LeafCollector[] collectors = this.collectors;
int numCollectors = this.numCollectors;
for (int i = 0; i < numCollectors; ) {
for (int i = 0; i < collectors.length; i++) {
final LeafCollector collector = collectors[i];
try {
collector.collect(doc);
++i;
} catch (CollectionTerminatedException e) {
removeCollector(i);
numCollectors = this.numCollectors;
if (numCollectors == 0) {
throw new CollectionTerminatedException();
if (collector != null) {
try {
collector.collect(doc);
} catch (CollectionTerminatedException e) {
collectors[i] = null;
if (allCollectorsTerminated()) {
throw new CollectionTerminatedException();
}
}
}
}
}

private boolean allCollectorsTerminated() {
for (int i = 0; i < collectors.length; i++) {
if (collectors[i] != null) {
return false;
}
}
return true;
}

}

final static class MinCompetitiveScoreAwareScorable extends FilterScorable {

private final int idx;
private final float[] minScores;

MinCompetitiveScoreAwareScorable(Scorable in, int idx, float[] minScores) {
super(in);
this.idx = idx;
this.minScores = minScores;
}

@Override
public void setMinCompetitiveScore(float minScore) throws IOException {
if (minScore > minScores[idx]) {
minScores[idx] = minScore;
in.setMinCompetitiveScore(minScore());
}
}

private float minScore() {
float min = Float.MAX_VALUE;
for (int i = 0; i < minScores.length; i++) {
if (minScores[i] < min) {
min = minScores[i];
}
}
return min;
}

}

}
172 changes: 172 additions & 0 deletions lucene/core/src/test/org/apache/lucene/search/MultiCollectorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -163,4 +163,176 @@ public void testCacheScoresIfNecessary() throws IOException {
reader.close();
dir.close();
}

public void testScorerWrappingForTopScores() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
iw.addDocument(new Document());
DirectoryReader reader = iw.getReader();
iw.close();
final LeafReaderContext ctx = reader.leaves().get(0);
Collector c1 = collector(ScoreMode.TOP_SCORES, MultiCollector.MinCompetitiveScoreAwareScorable.class);
Collector c2 = collector(ScoreMode.TOP_SCORES, MultiCollector.MinCompetitiveScoreAwareScorable.class);
MultiCollector.wrap(c1, c2).getLeafCollector(ctx).setScorer(new ScoreAndDoc());

c1 = collector(ScoreMode.TOP_SCORES, ScoreCachingWrappingScorer.class);
c2 = collector(ScoreMode.COMPLETE, ScoreCachingWrappingScorer.class);
MultiCollector.wrap(c1, c2).getLeafCollector(ctx).setScorer(new ScoreAndDoc());

reader.close();
dir.close();
}

public void testMinCompetitiveScore() throws IOException {
float[] currentMinScores = new float[3];
float[] minCompetitiveScore = new float[1];
Scorable scorer = new Scorable() {

@Override
public float score() throws IOException {
return 0;
}

@Override
public int docID() {
return 0;
}

@Override
public void setMinCompetitiveScore(float minScore) throws IOException {
minCompetitiveScore[0] = minScore;
}
};
Scorable s0 = new MultiCollector.MinCompetitiveScoreAwareScorable(scorer, 0, currentMinScores);
Scorable s1 = new MultiCollector.MinCompetitiveScoreAwareScorable(scorer, 1, currentMinScores);
Scorable s2 = new MultiCollector.MinCompetitiveScoreAwareScorable(scorer, 2, currentMinScores);
assertEquals(0f, minCompetitiveScore[0], 0);
s0.setMinCompetitiveScore(0.5f);
assertEquals(0f, minCompetitiveScore[0], 0);
s1.setMinCompetitiveScore(0.8f);
assertEquals(0f, minCompetitiveScore[0], 0);
s2.setMinCompetitiveScore(0.3f);
assertEquals(0.3f, minCompetitiveScore[0], 0);
s2.setMinCompetitiveScore(0.1f);
assertEquals(0.3f, minCompetitiveScore[0], 0);
s1.setMinCompetitiveScore(Float.MAX_VALUE);
assertEquals(0.3f, minCompetitiveScore[0], 0);
s2.setMinCompetitiveScore(Float.MAX_VALUE);
assertEquals(0.5f, minCompetitiveScore[0], 0);
s0.setMinCompetitiveScore(Float.MAX_VALUE);
assertEquals(Float.MAX_VALUE, minCompetitiveScore[0], 0);
}

public void testCollectionTermination() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
iw.addDocument(new Document());
DirectoryReader reader = iw.getReader();
iw.close();
final LeafReaderContext ctx = reader.leaves().get(0);
DummyCollector c1 = new TerminatingDummyCollector(1, ScoreMode.COMPLETE);
DummyCollector c2 = new TerminatingDummyCollector(2, ScoreMode.COMPLETE);

Collector mc = MultiCollector.wrap(c1, c2);
LeafCollector lc = mc.getLeafCollector(ctx);
lc.setScorer(new ScoreAndDoc());
lc.collect(0); // OK
assertTrue("c1's collect should be called", c1.collectCalled);
assertTrue("c2's collect should be called", c2.collectCalled);
c1.collectCalled = false;
c2.collectCalled = false;
lc.collect(1); // OK, but c1 should terminate
assertFalse("c1 should be removed already", c1.collectCalled);
assertTrue("c2's collect should be called", c2.collectCalled);
c2.collectCalled = false;

expectThrows(CollectionTerminatedException.class, () -> {
lc.collect(2);
});
assertFalse("c1 should be removed already", c1.collectCalled);
assertFalse("c2 should be removed already", c2.collectCalled);

reader.close();
dir.close();
}

public void testSetScorerOnCollectionTerminationSkipNonCompetitive() throws IOException {
doTestSetScorerOnCollectionTermination(true);
}

public void testSetScorerOnCollectionTerminationSkipNoSkips() throws IOException {
doTestSetScorerOnCollectionTermination(false);
}

private void doTestSetScorerOnCollectionTermination(boolean allowSkipNonCompetitive) throws IOException {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
iw.addDocument(new Document());
DirectoryReader reader = iw.getReader();
iw.close();
final LeafReaderContext ctx = reader.leaves().get(0);

DummyCollector c1 = new TerminatingDummyCollector(1, allowSkipNonCompetitive? ScoreMode.TOP_SCORES : ScoreMode.COMPLETE);
DummyCollector c2 = new TerminatingDummyCollector(2, allowSkipNonCompetitive? ScoreMode.TOP_SCORES : ScoreMode.COMPLETE);

Collector mc = MultiCollector.wrap(c1, c2);
LeafCollector lc = mc.getLeafCollector(ctx);
assertFalse(c1.setScorerCalled);
assertFalse(c2.setScorerCalled);
lc.setScorer(new ScoreAndDoc());
assertTrue(c1.setScorerCalled);
assertTrue(c2.setScorerCalled);
c1.setScorerCalled = false;
c2.setScorerCalled = false;
lc.collect(0); // OK

lc.setScorer(new ScoreAndDoc());
assertTrue(c1.setScorerCalled);
assertTrue(c2.setScorerCalled);
c1.setScorerCalled = false;
c2.setScorerCalled = false;

lc.collect(1); // OK, but c1 should terminate
lc.setScorer(new ScoreAndDoc());
assertFalse(c1.setScorerCalled);
assertTrue(c2.setScorerCalled);
c2.setScorerCalled = false;

expectThrows(CollectionTerminatedException.class, () -> {
lc.collect(2);
});
lc.setScorer(new ScoreAndDoc());
assertFalse(c1.setScorerCalled);
assertFalse(c2.setScorerCalled);

reader.close();
dir.close();
}

private static class TerminatingDummyCollector extends DummyCollector {

private final int terminateOnDoc;
private final ScoreMode scoreMode;

public TerminatingDummyCollector(int terminateOnDoc, ScoreMode scoreMode) {
super();
this.terminateOnDoc = terminateOnDoc;
this.scoreMode = scoreMode;
}

@Override
public void collect(int doc) throws IOException {
if (doc == terminateOnDoc) {
throw new CollectionTerminatedException();
}
super.collect(doc);
}

@Override
public ScoreMode scoreMode() {
return scoreMode;
}

}

}

0 comments on commit 4db1e38

Please sign in to comment.