New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
LUCENE-8995: TopSuggestDocsCollector#collect should be able to signal rejection #913
base: master
Are you sure you want to change the base?
Changes from all commits
c0e2aad
8739bb0
d42a88c
3ffef37
3b4b816
78c40a2
5632320
bb1ece6
e72ac79
e51f262
81bc00c
2f3a1b9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,7 @@ | |
import org.apache.lucene.search.TopDocs; | ||
import org.apache.lucene.search.TotalHits; | ||
import org.apache.lucene.search.suggest.Lookup; | ||
import org.apache.lucene.util.fst.Util.TopNSearcher; | ||
|
||
/** | ||
* {@link org.apache.lucene.search.TopDocs} wrapper with | ||
|
@@ -32,7 +33,8 @@ public class TopSuggestDocs extends TopDocs { | |
/** | ||
* Singleton for empty {@link TopSuggestDocs} | ||
*/ | ||
public final static TopSuggestDocs EMPTY = new TopSuggestDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new SuggestScoreDoc[0]); | ||
public final static TopSuggestDocs EMPTY = new TopSuggestDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), | ||
new SuggestScoreDoc[0], true); | ||
|
||
/** | ||
* {@link org.apache.lucene.search.ScoreDoc} with an | ||
|
@@ -88,13 +90,19 @@ public String toString() { | |
} | ||
} | ||
|
||
/** | ||
* Indicates that all possibilities for completions were exhausted | ||
*/ | ||
final boolean isComplete; | ||
|
||
/** | ||
* {@link org.apache.lucene.search.TopDocs} wrapper with | ||
* {@link TopSuggestDocs.SuggestScoreDoc} | ||
* instead of {@link org.apache.lucene.search.ScoreDoc} | ||
*/ | ||
public TopSuggestDocs(TotalHits totalHits, SuggestScoreDoc[] scoreDocs) { | ||
public TopSuggestDocs(TotalHits totalHits, SuggestScoreDoc[] scoreDocs, boolean isComplete) { | ||
super(totalHits, scoreDocs); | ||
this.isComplete = isComplete; | ||
} | ||
|
||
/** | ||
|
@@ -116,19 +124,29 @@ public SuggestScoreDoc[] scoreLookupDocs() { | |
*/ | ||
public static TopSuggestDocs merge(int topN, TopSuggestDocs[] shardHits) { | ||
SuggestScoreDocPriorityQueue priorityQueue = new SuggestScoreDocPriorityQueue(topN); | ||
boolean allComplete = true; | ||
for (TopSuggestDocs shardHit : shardHits) { | ||
for (SuggestScoreDoc scoreDoc : shardHit.scoreLookupDocs()) { | ||
if (scoreDoc == priorityQueue.insertWithOverflow(scoreDoc)) { | ||
break; | ||
} | ||
} | ||
allComplete &= shardHit.isComplete; | ||
} | ||
SuggestScoreDoc[] topNResults = priorityQueue.getResults(); | ||
if (topNResults.length > 0) { | ||
return new TopSuggestDocs(new TotalHits(topNResults.length, TotalHits.Relation.EQUAL_TO), topNResults); | ||
return new TopSuggestDocs(new TotalHits(topNResults.length, TotalHits.Relation.EQUAL_TO), topNResults, | ||
allComplete); | ||
} else { | ||
return TopSuggestDocs.EMPTY; | ||
} | ||
} | ||
|
||
/** | ||
* Indicates if the list of results is complete or not. Might be <code>false</code> if the {@link TopNSearcher} rejected | ||
* too many of the queued results. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't it sometimes There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The admissibility of the search is computed from the reject count so a value of |
||
*/ | ||
public boolean isComplete() { | ||
return this.isComplete; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,6 +43,8 @@ | |
* <p> | ||
* Subclasses should only override | ||
* {@link TopSuggestDocsCollector#collect(int, CharSequence, CharSequence, float)}. | ||
* Overwriting subclasses can opt to reject documents, in which case | ||
* they should return <tt>false</tt> to signal this back to the caller. | ||
* <p> | ||
* NOTE: {@link #setScorer(org.apache.lucene.search.Scorable)} and | ||
* {@link #collect(int)} is not used | ||
|
@@ -63,6 +65,8 @@ public class TopSuggestDocsCollector extends SimpleCollector { | |
/** Document base offset for the current Leaf */ | ||
protected int docBase; | ||
|
||
private boolean isComplete = true; | ||
|
||
/** | ||
* Sole constructor | ||
* | ||
|
@@ -113,10 +117,14 @@ protected void doSetNextReader(LeafReaderContext context) throws IOException { | |
* similar to {@link org.apache.lucene.search.LeafCollector#collect(int)} | ||
* but for completions. | ||
* | ||
* This implementation always returns <tt>true</tt> because it collects all documents, but | ||
* subclasses overwriting this can choose to reject documents in which case they should | ||
* return <tt>false</tt> to signal this back to the caller. | ||
* | ||
* NOTE: collection at the leaf level is guaranteed to be in | ||
* descending order of score | ||
*/ | ||
cbuescher marked this conversation as resolved.
Show resolved
Hide resolved
|
||
public void collect(int docID, CharSequence key, CharSequence context, float score) throws IOException { | ||
public boolean collect(int docID, CharSequence key, CharSequence context, float score) throws IOException { | ||
SuggestScoreDoc current = new SuggestScoreDoc(docBase + docID, key, context, score); | ||
if (current == priorityQueue.insertWithOverflow(current)) { | ||
// if the current SuggestScoreDoc has overflown from pq, | ||
|
@@ -125,6 +133,7 @@ public void collect(int docID, CharSequence key, CharSequence context, float sco | |
// TODO: reuse the overflow instance? | ||
throw new CollectionTerminatedException(); | ||
} | ||
return true; | ||
} | ||
|
||
/** | ||
|
@@ -179,7 +188,8 @@ public TopSuggestDocs get() throws IOException { | |
} | ||
|
||
if (suggestScoreDocs.length > 0) { | ||
return new TopSuggestDocs(new TotalHits(suggestScoreDocs.length, TotalHits.Relation.EQUAL_TO), suggestScoreDocs); | ||
return new TopSuggestDocs(new TotalHits(suggestScoreDocs.length, TotalHits.Relation.EQUAL_TO), | ||
suggestScoreDocs, this.isComplete); | ||
} else { | ||
return TopSuggestDocs.EMPTY; | ||
} | ||
|
@@ -201,4 +211,27 @@ public void collect(int doc) throws IOException { | |
public ScoreMode scoreMode() { | ||
return ScoreMode.COMPLETE; | ||
} | ||
|
||
/** | ||
* returns true if the collector clearly exhausted all possibilities to collect results | ||
*/ | ||
boolean isComplete() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this needed now that we provide the information in the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not necessary, but since we need the underlying flag and its currently private, I thought its okay to have at least a package private getter. Or does it bloat the class too much? Happy to remove either way, wdyt? |
||
return this.isComplete ; | ||
} | ||
|
||
/** | ||
* call to signal that during collection at least one segment might have returned incomplete results, e.g. because | ||
* of too many rejections | ||
*/ | ||
void setNotComplete() { | ||
this.isComplete = false; | ||
} | ||
|
||
/** | ||
* indicate if this collectors {@link #collect(int, CharSequence, CharSequence, float)} method potentially rejects | ||
* documents. This information can be used to e.g. estimating necessary queue sizes in the searcher. | ||
*/ | ||
protected boolean canReject() { | ||
return false; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Whoa, was this opto breaking something? I guess if this final path is filtered out, we still need the queue? Have you run the suggest benchmarks to see if removing this opto hurt performance?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As far as I understand this optimization assumes we surely accept (and collect) the path later in L516s acceptResult(), which always seems to be the case for collectors that don't reject, but if the collector that is eventually called via NRTSuggesters acceptResult() chooses to reject this option, we were losing expected results. This surfaced in the prefix completion tests I added. @jimczi might be able to explain this a bit better than me.
No, where are they and how can I run them?