Skip to content

Commit

Permalink
LUCENE-7369: Similarity.coord and BooleanQuery.disableCoord are removed.
Browse files Browse the repository at this point in the history
  • Loading branch information
jpountz committed Jul 7, 2016
1 parent 24d6b78 commit f1528bf
Show file tree
Hide file tree
Showing 77 changed files with 164 additions and 1,807 deletions.
3 changes: 3 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ API Changes
* LUCENE-2605: Classic QueryParser no longer splits on whitespace by default.
Use setSplitOnWhitespace(true) to get the old behavior. (Steve Rowe)

* LUCENE-7369: Similarity.coord and BooleanQuery.disableCoord are removed.
(Adrien Grand)

Bug Fixes

Improvements
Expand Down
11 changes: 11 additions & 0 deletions lucene/MIGRATE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,14 @@ classes from the java.util.zip package.

Clients wishing to render Explanations as HTML should implement their own
utilities for this.

## Similarity.coord and BooleanQuery.disableCoord removed (LUCENE-7369)

Coordination factors were a workaround for the fact that the ClassicSimilarity
does not have strong enough term frequency saturation. This causes disjunctions
to get better scores on documents that have many occurrences of a few query
terms than on documents that match most clauses, which is most of time
undesirable. The new BM25Similarity does not suffer from this problem since it
has better saturation for the contribution of the term frequency so the coord
factors have been removed from scores. Things now work as if coords were always
disabled when constructing boolean queries.
Original file line number Diff line number Diff line change
Expand Up @@ -125,16 +125,14 @@ protected RewriteMethod() {}
}

/**
* A {@link RewriteMethod} that adds all sub queries to a {@link BooleanQuery}
* which has {@link BooleanQuery#isCoordDisabled() coords disabled}. This
* {@link RewriteMethod} is useful when matching on several fields is
* A {@link RewriteMethod} that adds all sub queries to a {@link BooleanQuery}.
* This {@link RewriteMethod} is useful when matching on several fields is
* considered better than having a good match on a single field.
*/
public static final RewriteMethod BOOLEAN_REWRITE = new RewriteMethod() {
@Override
public Query rewrite(Query[] subQueries) {
BooleanQuery.Builder merged = new BooleanQuery.Builder();
merged.setDisableCoord(true);
for (Query query : subQueries) {
merged.add(query, Occur.SHOULD);
}
Expand Down
36 changes: 4 additions & 32 deletions lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.similarities.Similarity;

/** A Query that matches documents matching boolean combinations of other
* queries, e.g. {@link TermQuery}s, {@link PhraseQuery}s or other
Expand Down Expand Up @@ -74,24 +73,12 @@ public static void setMaxClauseCount(int maxClauseCount) {
/** A builder for boolean queries. */
public static class Builder {

private boolean disableCoord;
private int minimumNumberShouldMatch;
private final List<BooleanClause> clauses = new ArrayList<>();

/** Sole constructor. */
public Builder() {}

/**
* {@link Similarity#coord(int,int)} may be disabled in scoring, as
* appropriate. For example, this score factor does not make sense for most
* automatically generated queries, like {@link WildcardQuery} and {@link
* FuzzyQuery}.
*/
public Builder setDisableCoord(boolean disableCoord) {
this.disableCoord = disableCoord;
return this;
}

/**
* Specifies a minimum number of the optional BooleanClauses
* which must be satisfied.
Expand Down Expand Up @@ -142,19 +129,17 @@ public Builder add(Query query, Occur occur) {
/** Create a new {@link BooleanQuery} based on the parameters that have
* been set on this builder. */
public BooleanQuery build() {
return new BooleanQuery(disableCoord, minimumNumberShouldMatch, clauses.toArray(new BooleanClause[0]));
return new BooleanQuery(minimumNumberShouldMatch, clauses.toArray(new BooleanClause[0]));
}

}

private final boolean disableCoord;
private final int minimumNumberShouldMatch;
private final List<BooleanClause> clauses; // used for toString() and getClauses()
private final Map<Occur, Collection<Query>> clauseSets; // used for equals/hashcode

private BooleanQuery(boolean disableCoord, int minimumNumberShouldMatch,
private BooleanQuery(int minimumNumberShouldMatch,
BooleanClause[] clauses) {
this.disableCoord = disableCoord;
this.minimumNumberShouldMatch = minimumNumberShouldMatch;
this.clauses = Collections.unmodifiableList(Arrays.asList(clauses));
clauseSets = new EnumMap<>(Occur.class);
Expand All @@ -169,13 +154,6 @@ private BooleanQuery(boolean disableCoord, int minimumNumberShouldMatch,
}
}

/**
* Return whether the coord factor is disabled.
*/
public boolean isCoordDisabled() {
return disableCoord;
}

/**
* Gets the minimum number of the optional BooleanClauses
* which must be satisfied.
Expand Down Expand Up @@ -223,7 +201,7 @@ public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws I
if (needsScores == false) {
query = rewriteNoScoring();
}
return new BooleanWeight(query, searcher, needsScores, disableCoord);
return new BooleanWeight(query, searcher, needsScores);
}

@Override
Expand Down Expand Up @@ -258,7 +236,6 @@ public Query rewrite(IndexReader reader) throws IOException {
// recursively rewrite
{
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.setDisableCoord(isCoordDisabled());
builder.setMinimumNumberShouldMatch(getMinimumNumberShouldMatch());
boolean actuallyRewritten = false;
for (BooleanClause clause : this) {
Expand All @@ -284,7 +261,6 @@ public Query rewrite(IndexReader reader) throws IOException {
// since clauseSets implicitly deduplicates FILTER and MUST_NOT
// clauses, this means there were duplicates
BooleanQuery.Builder rewritten = new BooleanQuery.Builder();
rewritten.setDisableCoord(disableCoord);
rewritten.setMinimumNumberShouldMatch(minimumNumberShouldMatch);
for (Map.Entry<Occur, Collection<Query>> entry : clauseSets.entrySet()) {
final Occur occur = entry.getKey();
Expand All @@ -304,7 +280,6 @@ public Query rewrite(IndexReader reader) throws IOException {
modified |= filters.removeAll(clauseSets.get(Occur.MUST));
if (modified) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.setDisableCoord(isCoordDisabled());
builder.setMinimumNumberShouldMatch(getMinimumNumberShouldMatch());
for (BooleanClause clause : clauses) {
if (clause.getOccur() != Occur.FILTER) {
Expand Down Expand Up @@ -355,7 +330,6 @@ public Query rewrite(IndexReader reader) throws IOException {

// now add back the SHOULD clauses
builder = new BooleanQuery.Builder()
.setDisableCoord(isCoordDisabled())
.setMinimumNumberShouldMatch(getMinimumNumberShouldMatch())
.add(rewritten, Occur.MUST);
for (Query query : clauseSets.get(Occur.SHOULD)) {
Expand Down Expand Up @@ -414,7 +388,6 @@ public String toString(String field) {
* Compares the specified object with this boolean query for equality.
* Returns true if and only if the provided object<ul>
* <li>is also a {@link BooleanQuery},</li>
* <li>has the same value of {@link #isCoordDisabled()}</li>
* <li>has the same value of {@link #getMinimumNumberShouldMatch()}</li>
* <li>has the same {@link Occur#SHOULD} clauses, regardless of the order</li>
* <li>has the same {@link Occur#MUST} clauses, regardless of the order</li>
Expand All @@ -431,12 +404,11 @@ public boolean equals(Object o) {

private boolean equalsTo(BooleanQuery other) {
return getMinimumNumberShouldMatch() == other.getMinimumNumberShouldMatch() &&
disableCoord == other.disableCoord &&
clauseSets.equals(other.clauseSets);
}

private int computeHashCode() {
int hashCode = Objects.hash(disableCoord, minimumNumberShouldMatch, clauseSets);
int hashCode = Objects.hash(minimumNumberShouldMatch, clauseSets);
if (hashCode == 0) {
hashCode = 1;
}
Expand Down
36 changes: 9 additions & 27 deletions lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ public BulkScorerAndDoc get(int i) {
// This is basically an inlined FixedBitSet... seems to help with bound checks
final long[] matching = new long[SET_SIZE];

final float[] coordFactors;
final BulkScorerAndDoc[] leads;
final HeadPriorityQueue head;
final TailPriorityQueue tail;
Expand Down Expand Up @@ -146,7 +145,7 @@ public void collect(int doc) throws IOException {

final OrCollector orCollector = new OrCollector();

BooleanScorer(BooleanWeight weight, boolean disableCoord, int maxCoord, Collection<BulkScorer> scorers, int minShouldMatch, boolean needsScores) {
BooleanScorer(BooleanWeight weight, Collection<BulkScorer> scorers, int minShouldMatch, boolean needsScores) {
if (minShouldMatch < 1 || minShouldMatch > scorers.size()) {
throw new IllegalArgumentException("minShouldMatch should be within 1..num_scorers. Got " + minShouldMatch);
}
Expand All @@ -172,11 +171,6 @@ public void collect(int doc) throws IOException {
}
}
this.cost = cost(scorers, minShouldMatch);

coordFactors = new float[scorers.size() + 1];
for (int i = 0; i < coordFactors.length; i++) {
coordFactors[i] = disableCoord ? 1.0f : weight.coord(i, maxCoord);
}
}

@Override
Expand All @@ -189,7 +183,7 @@ private void scoreDocument(LeafCollector collector, int base, int i) throws IOEx
final Bucket bucket = buckets[i];
if (bucket.freq >= minShouldMatch) {
fakeScorer.freq = bucket.freq;
fakeScorer.score = (float) bucket.score * coordFactors[bucket.freq];
fakeScorer.score = (float) bucket.score;
final int doc = base | i;
fakeScorer.doc = doc;
collector.collect(doc);
Expand Down Expand Up @@ -275,20 +269,20 @@ private void scoreWindowMultipleScorers(LeafCollector collector, Bits acceptDocs
}
}

private void scoreWindowSingleScorer(BulkScorerAndDoc bulkScorer, LeafCollector collector, LeafCollector singleClauseCollector,
private void scoreWindowSingleScorer(BulkScorerAndDoc bulkScorer, LeafCollector collector,
Bits acceptDocs, int windowMin, int windowMax, int max) throws IOException {
assert tail.size() == 0;
final int nextWindowBase = head.top().next & ~MASK;
final int end = Math.max(windowMax, Math.min(max, nextWindowBase));
bulkScorer.score(singleClauseCollector, acceptDocs, windowMin, end);

bulkScorer.score(collector, acceptDocs, windowMin, end);

// reset the scorer that should be used for the general case
collector.setScorer(fakeScorer);
}

private BulkScorerAndDoc scoreWindow(BulkScorerAndDoc top, LeafCollector collector,
LeafCollector singleClauseCollector, Bits acceptDocs, int min, int max) throws IOException {
Bits acceptDocs, int min, int max) throws IOException {
final int windowBase = top.next & ~MASK; // find the window that the next match belongs to
final int windowMin = Math.max(min, windowBase);
final int windowMax = Math.min(max, windowBase + SIZE);
Expand All @@ -304,7 +298,7 @@ private BulkScorerAndDoc scoreWindow(BulkScorerAndDoc top, LeafCollector collect
// special case: only one scorer can match in the current window,
// we can collect directly
final BulkScorerAndDoc bulkScorer = leads[0];
scoreWindowSingleScorer(bulkScorer, collector, singleClauseCollector, acceptDocs, windowMin, windowMax, max);
scoreWindowSingleScorer(bulkScorer, collector, acceptDocs, windowMin, windowMax, max);
return head.add(bulkScorer);
} else {
// general case, collect through a bit set first and then replay
Expand All @@ -318,21 +312,9 @@ public int score(LeafCollector collector, Bits acceptDocs, int min, int max) thr
fakeScorer.doc = -1;
collector.setScorer(fakeScorer);

final LeafCollector singleClauseCollector;
if (coordFactors[1] == 1f) {
singleClauseCollector = collector;
} else {
singleClauseCollector = new FilterLeafCollector(collector) {
@Override
public void setScorer(Scorer scorer) throws IOException {
super.setScorer(new BooleanTopLevelScorers.BoostedScorer(scorer, coordFactors[1]));
}
};
}

BulkScorerAndDoc top = advance(min);
while (top.next < max) {
top = scoreWindow(top, collector, singleClauseCollector, acceptDocs, min, max);
top = scoreWindow(top, collector, acceptDocs, min, max);
}

return top.next;
Expand Down
Loading

0 comments on commit f1528bf

Please sign in to comment.