Skip to content

Commit

Permalink
Fixed bool filter bugs:
Browse files Browse the repository at this point in the history
 * In the case only should clauses were specified with specific type of filters, the first clause determined which documents matched.
 * In some cases the minimum at least 1 should clause should match behaviour was broken.
  • Loading branch information
martijnvg committed Mar 29, 2013
1 parent b657bdf commit a89dde8
Show file tree
Hide file tree
Showing 4 changed files with 609 additions and 25 deletions.
Expand Up @@ -78,10 +78,13 @@ public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws
List<ResultClause> results = new ArrayList<ResultClause>(clauses.size());
boolean hasShouldClauses = false;
boolean hasNonEmptyShouldClause = false;
boolean hasMustClauses = false;
boolean hasMustNotClauses = false;
for (int i = 0; i < clauses.size(); i++) {
FilterClause clause = clauses.get(i);
DocIdSet set = clause.getFilter().getDocIdSet(context, acceptDocs);
if (clause.getOccur() == Occur.MUST) {
hasMustClauses = true;
if (DocIdSets.isEmpty(set)) {
return null;
}
Expand All @@ -92,6 +95,7 @@ public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws
}
hasNonEmptyShouldClause = true;
} else if (clause.getOccur() == Occur.MUST_NOT) {
hasMustNotClauses = true;
if (DocIdSets.isEmpty(set)) {
// we mark empty ones as null for must_not, handle it in the next run...
results.add(new ResultClause(null, null, clause));
Expand All @@ -110,6 +114,7 @@ public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws
}

// now, go over the clauses and apply the "fast" ones...
hasNonEmptyShouldClause = false;
boolean hasBits = false;
for (int i = 0; i < results.size(); i++) {
ResultClause clause = results.get(i);
Expand All @@ -123,6 +128,7 @@ public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws
if (it == null) {
continue;
}
hasNonEmptyShouldClause = true;
if (res == null) {
res = new FixedBitSet(reader.maxDoc());
}
Expand Down Expand Up @@ -153,40 +159,40 @@ public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws
}

if (!hasBits) {
return res;
if (hasShouldClauses && !hasNonEmptyShouldClause) {
return null;
} else {
return res;
}
}

// we have some clauses with bits, apply them...
// we let the "res" drive the computation, and check Bits for that
List<ResultClause> orClauses = new ArrayList<ResultClause>();
for (int i = 0; i < results.size(); i++) {
ResultClause clause = results.get(i);
// we apply bits in based ones (slow) in the second run
if (clause.bits == null) {
continue;
}
if (clause.clause.getOccur() == Occur.SHOULD) {
if (res == null) {
DocIdSetIterator it = clause.docIdSet.iterator();
if (it == null) {
continue;
}
res = new FixedBitSet(reader.maxDoc());
res.or(it);
if (hasMustClauses || hasMustNotClauses) {
orClauses.add(clause);
} else {
Bits bits = clause.bits;
// use the "res" to drive the iteration
int lastSetDoc = res.nextSetBit(0);
DocIdSetIterator it = res.iterator();
for (int setDoc = it.nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) {
int diff = setDoc - lastSetDoc;
if (diff > 1) {
for (int unsetDoc = lastSetDoc + 1; unsetDoc < setDoc; unsetDoc++) {
if (bits.get(unsetDoc)) {
res.set(unsetDoc);
}
if (res == null) {
DocIdSetIterator it = clause.docIdSet.iterator();
if (it == null) {
continue;
}
hasNonEmptyShouldClause = true;
res = new FixedBitSet(reader.maxDoc());
res.or(it);
} else if (!hasMustNotClauses && !hasMustClauses) {
for (int doc = 0; doc < reader.maxDoc(); doc++) {
if (!res.get(doc) && clause.bits.get(doc)) {
hasNonEmptyShouldClause = true;
res.set(doc);
}
}
lastSetDoc = setDoc;
}
}
} else if (clause.clause.getOccur() == Occur.MUST) {
Expand Down Expand Up @@ -229,7 +235,32 @@ public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws
}
}

return res;
// From a boolean_logic behavior point of view a should clause doesn't have impact on a bool filter if there
// is already a must or must_not clause. However in the current ES bool filter behaviour all should clauses
// must match in order for a doc to be a match. What we do here is checking if matched docs match with any
// should filter. TODO: Add an option to have disable minimum_should_match=1 behaviour
if (!orClauses.isEmpty()) {
DocIdSetIterator it = res.iterator();
for (int setDoc = it.nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) {
boolean match = false;
for (ResultClause orClause : orClauses) {
if (orClause.bits.get(setDoc)) {
match = true;
hasNonEmptyShouldClause = true;
}
}
if (!match) {
res.clear(setDoc);
}
}
}

if (hasShouldClauses && !hasNonEmptyShouldClause) {
return null;
} else {
return res;
}

}

/**
Expand Down
Expand Up @@ -1087,4 +1087,61 @@ public void testNumericTermsAndRanges() throws Exception {
assertThat(searchResponse.getHits().getTotalHits(), equalTo(1l));
assertThat(searchResponse.getHits().getAt(0).getId(), equalTo("1"));
}

@Test
public void testNumericRangeFilter_2826() throws Exception {
client.admin().indices().prepareDelete().execute().actionGet();
client.admin().indices().prepareCreate("test").setSettings(
ImmutableSettings.settingsBuilder()
.put("index.number_of_shards", 1)
.put("index.number_of_replicas", 0)
)
.addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties")
.startObject("num_byte").field("type", "byte").endObject()
.startObject("num_short").field("type", "short").endObject()
.startObject("num_integer").field("type", "integer").endObject()
.startObject("num_long").field("type", "long").endObject()
.startObject("num_float").field("type", "float").endObject()
.startObject("num_double").field("type", "double").endObject()
.endObject().endObject().endObject())
.execute().actionGet();

client.prepareIndex("test", "type1", "1").setSource(jsonBuilder().startObject()
.field("num_long", 1)
.endObject())
.execute().actionGet();

client.prepareIndex("test", "type1", "2").setSource(jsonBuilder().startObject()
.field("num_long", 2)
.endObject())
.execute().actionGet();

client.prepareIndex("test", "type1", "3").setSource(jsonBuilder().startObject()
.field("num_long", 3)
.endObject())
.execute().actionGet();

client.prepareIndex("test", "type1", "4").setSource(jsonBuilder().startObject()
.field("num_long", 4)
.endObject())
.execute().actionGet();

client.admin().indices().prepareRefresh().execute().actionGet();
SearchResponse response = client.prepareSearch("test").setFilter(
FilterBuilders.boolFilter()
.should(FilterBuilders.rangeFilter("num_long").from(1).to(2))
.should(FilterBuilders.rangeFilter("num_long").from(3).to(4))
).execute().actionGet();
assertThat(response.getHits().totalHits(), equalTo(4l));

// This made 2826 fail! (only with bit based filters)
response = client.prepareSearch("test").setFilter(
FilterBuilders.boolFilter()
.should(FilterBuilders.numericRangeFilter("num_long").from(1).to(2))
.should(FilterBuilders.numericRangeFilter("num_long").from(3).to(4))
).execute().actionGet();

assertThat(response.getHits().totalHits(), equalTo(4l));
}

}

0 comments on commit a89dde8

Please sign in to comment.