Skip to content

Commit

Permalink
use BooleanFilter instead of BooleanQuery to pre-filter in array equa…
Browse files Browse the repository at this point in the history
…ls case

This way the execution order can be controlled (first
BooleanFilter with TermFilter to pre-filter the result and
then apply the genericFunctionFilter only on the results
that are remaining)

This way it is a lot faster and also has the advantage that
there is no "MaxBooleanClause" limit.
  • Loading branch information
mfussenegger committed Feb 9, 2015
1 parent 012650a commit e13e690
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 23 deletions.
2 changes: 2 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ Changes for Crate
Unreleased
==========

- Improved performance of queries that use the equals operator on arrays

- COPY FROM now skips rows with the same primary key instead of overriding them

2015/02/06 0.47.0
Expand Down
2 changes: 1 addition & 1 deletion core/src/main/java/io/crate/Version.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public class Version {
// the (internal) format of the id is there so we can easily do after/before checks on the id


public static final boolean SNAPSHOT = false;
public static final boolean SNAPSHOT = true;
public static final Version CURRENT = new Version(470099, SNAPSHOT, org.elasticsearch.Version.V_1_4_2);

static {
Expand Down
40 changes: 18 additions & 22 deletions sql/src/main/java/io/crate/lucene/LuceneQueryBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -345,57 +345,53 @@ public Query apply(Function input, Context context) {
if (tuple == null) {
return null;
}

Reference reference = tuple.v1();
Literal literal = tuple.v2();
String columnName = reference.info().ident().columnIdent().fqn();
if (DataTypes.isCollectionType(reference.valueType()) && DataTypes.isCollectionType(literal.valueType())) {
// create terms query to utilize lucene index to pre-filter the result..
BooleanQuery booleanQuery = new BooleanQuery();

// create boolean filter with term filters to pre-filter the result before applying the functionQuery.
BooleanFilter boolTermsFilter = new BooleanFilter();
DataType type = literal.valueType();
while (DataTypes.isCollectionType(type)) {
type = ((CollectionType) type).innerType();
}
QueryBuilderHelper builder = QueryBuilderHelper.forType(type);
Object value = literal.value();
buildTermsQuery(booleanQuery, value, columnName, builder);
buildTermsQuery(boolTermsFilter, value, columnName, builder);

if (booleanQuery.clauses().isEmpty()) {
if (boolTermsFilter.clauses().isEmpty()) {
// all values are null...
return genericFunctionQuery(input);
}
// genericFunctionFilter will do the exact match, operating on the _DOC

// wrap boolTermsFilter and genericFunction filter in an additional BooleanFilter to control the ordering of the filters
// termsFilter is applied first
// afterwards the more expensive genericFunctionFilter
BooleanFilter filterClauses = new BooleanFilter();
filterClauses.add(new QueryWrapperFilter(booleanQuery), BooleanClause.Occur.MUST);
filterClauses.add(boolTermsFilter, BooleanClause.Occur.MUST);
filterClauses.add(genericFunctionFilter(input), BooleanClause.Occur.MUST);
return new FilteredQuery(Queries.newMatchAllQuery(), filterClauses);
}
QueryBuilderHelper builder = QueryBuilderHelper.forType(tuple.v1().valueType());
return builder.eq(columnName, tuple.v2().value());
}

private boolean buildTermsQuery(BooleanQuery booleanQuery,
private void buildTermsQuery(BooleanFilter booleanFilter,
Object value,
String columnName,
QueryBuilderHelper builder) {
if (value == null) {
return true;
return;
}
if (value.getClass().isArray()) {
Object[] array = (Object[]) value;
for (Object o : array) {
if (!buildTermsQuery(booleanQuery, o, columnName, builder)) {
return false;
}
buildTermsQuery(booleanFilter, o, columnName, builder);
}
} else {
try {
booleanQuery.add(builder.eq(columnName, value), BooleanClause.Occur.MUST);
} catch (BooleanQuery.TooManyClauses e) {
return false;
}
booleanFilter.add(builder.eqFilter(columnName, value), BooleanClause.Occur.MUST);
}
return true;
}
}

Expand Down Expand Up @@ -904,11 +900,11 @@ private Filter genericFunctionFilter(Function function) {
for (LuceneCollectorExpression expression : expressions) {
expression.startCollect(collectorContext);
}
Filter filter = new Filter() {
return new Filter() {
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
for (LuceneCollectorExpression expression : expressions) {
expression.setNextReader(context);
expression.setNextReader(context.reader().getContext());
}
return BitsFilteredDocIdSet.wrap(
new FunctionDocSet(
Expand All @@ -919,10 +915,10 @@ public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws
context.reader().maxDoc(),
acceptDocs
),
acceptDocs);
acceptDocs
);
}
};
return indexCache.filter().cache(filter);
}

private Query genericFunctionQuery(Function function) {
Expand Down
10 changes: 10 additions & 0 deletions sql/src/main/java/io/crate/lucene/QueryBuilderHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import io.crate.types.DataType;
import io.crate.types.DataTypes;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.TermFilter;
import org.apache.lucene.search.*;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.lucene.BytesRefs;
Expand Down Expand Up @@ -51,6 +52,10 @@ public static QueryBuilderHelper forType(DataType dataType) {
public abstract Filter rangeFilter(String columnName, Object from, Object to, boolean includeLower, boolean includeUpper);
public abstract Query rangeQuery(String columnName, Object from, Object to, boolean includeLower, boolean includeUpper);

public Filter eqFilter(String columnName, Object value) {
return rangeFilter(columnName, value, value, true, true);
}

public Query eq(String columnName, Object value) {
return rangeQuery(columnName, value, value, true, true);
}
Expand Down Expand Up @@ -185,6 +190,11 @@ public Query eq(String columnName, Object value) {
return new TermQuery(new Term(columnName, (BytesRef)value));
}

@Override
public Filter eqFilter(String columnName, Object value) {
return new TermFilter(new Term(columnName, (BytesRef) value));
}

@Override
public Query like(String columnName, Object value) {
return new WildcardQuery(
Expand Down
5 changes: 5 additions & 0 deletions sql/src/test/java/io/crate/lucene/LuceneQueryBuilderTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import io.crate.types.DataType;
import io.crate.types.DataTypes;
import io.crate.types.SetType;
import org.apache.lucene.queries.BooleanFilter;
import org.apache.lucene.sandbox.queries.regex.RegexQuery;
import org.apache.lucene.search.*;
import org.apache.lucene.util.BytesRef;
Expand Down Expand Up @@ -99,6 +100,10 @@ public void testEqOnTwoArraysBecomesGenericFunctionQuery() throws Exception {
createReference("x", longArray),
Literal.newLiteral(longArray, new Object[] { 10L, null, 20L }))));
assertThat(query, instanceOf(FilteredQuery.class));
FilteredQuery filteredQuery = (FilteredQuery) query;

assertThat(filteredQuery.getFilter(), instanceOf(BooleanFilter.class));
assertThat(filteredQuery.getQuery(), instanceOf(FilteredQuery.class));
}

@Test
Expand Down

0 comments on commit e13e690

Please sign in to comment.