Skip to content

Commit

Permalink
change lucene query generation
Browse files Browse the repository at this point in the history
before symbols were used to generate xcontent which was then
parsed by ES components to generate lucene query
classes. So the process was:

    symbols -> xcontent -> query

This commit removes the -> xcontent step for all SELECT
queries (DELETE / UPDATE still use xcontent) so now it is:

    symbols -> query

This also enables a generic "FunctionFilter" which is a
fallback that will use the Scalar functions to evaluate any
conditions in the where clause if it is not possible to
create optimized Lucene queries.

So with this commit it is possible to do stuff like

    where oneCol = anotherCol
  • Loading branch information
mfussenegger committed Sep 24, 2014
1 parent eb12b5e commit 61f0e8c
Show file tree
Hide file tree
Showing 30 changed files with 2,714 additions and 278 deletions.
7 changes: 7 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@ Changes for Crate Data
Unreleased
==========

- All scalar functions can now be used in the WHERE clause in SELECT
statements and they may be nested without limitations. In addition it is now
possible to compare one scalar function to another.

- Added support to compare one column with another in the WHERE clause for
SELECT statements.

- Implemented regular expression scalar functions and query operator

- Support for accessing items in an array. Please refer to the
Expand Down
38 changes: 34 additions & 4 deletions core/src/main/java/io/crate/core/StringUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,7 @@
import com.google.common.base.Splitter;
import org.elasticsearch.common.Nullable;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

Expand Down Expand Up @@ -65,6 +62,39 @@ public static String sqlToDottedPath(String sqlPath) {
return PATH_JOINER.join(s);
}

/**
* Return the common ancestors of a list of fields.<br />
* A field is a string that can use the dotted-notation to indicate nesting.<br />
*
* <pre>
* fields: [ "a", "a.b", "b.c", "b.c.d"]
* returns: [ "a", "b.c" ]
* </pre>
*
* @param fields a list of strings where each string may contain dots as its separator
* @return a list of strings with only the common ancestors.
*/
public static Set<String> commonAncestors(List<String> fields){
int idx = 0;
String previous = null;

Collections.sort(fields);
Set<String> result = new HashSet<>(fields.size());
for (String field : fields) {
if (idx>0){
if (!field.startsWith(previous + '.')){
previous = field;
result.add(field);
}
} else {
result.add(field);
previous = field;
}
idx++;
}
return result;
}

/**
* check if a collection of Strings containing dotted paths contains at least one element
* beginning with <code>prefix</code>, which consists of one or more complete path elements
Expand Down
3 changes: 3 additions & 0 deletions core/src/main/java/io/crate/types/DoubleType.java
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ public Double value(Object value) {
if (value instanceof BytesRef) {
return Double.valueOf(((BytesRef)value).utf8ToString());
}
if (value instanceof Double) {
return (Double) value;
}
return ((Number)value).doubleValue();
}

Expand Down
12 changes: 12 additions & 0 deletions core/src/test/java/io/crate/core/StringUtilsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

package io.crate.core;

import com.google.common.collect.ImmutableSet;
import org.junit.Test;

import java.util.Arrays;
Expand Down Expand Up @@ -59,4 +60,15 @@ public void testGetPathListByPrefix() throws Exception {
assertEquals(StringUtils.getPathByPrefix(Arrays.asList("a", "b.c"), "b"), "b.c");
assertEquals(StringUtils.getPathByPrefix(Arrays.asList("a", "bc"), "b"), null);
}

@Test
public void testCommonAncestors() throws Exception {
assertEquals(ImmutableSet.of("a"), StringUtils.commonAncestors(Arrays.asList("a", "a.b")));

assertEquals(ImmutableSet.of("d", "a", "b"),
StringUtils.commonAncestors(Arrays.asList("a.c", "b", "b.c.d", "a", "a.b", "d")));

assertEquals(ImmutableSet.of("d", "a", "b.c"),
StringUtils.commonAncestors(Arrays.asList("a.c", "b.c", "b.c.d", "a", "a.b", "d")));
}
}
15 changes: 10 additions & 5 deletions docs/sql/scalar.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,16 @@ Scalar functions return a single data value (not a table).

.. note::

Scalar functions aren't available in all query clauses. For example if a
regular (non-system table) is queried the WHERE clause may not contain any
scalar functions unless otherwise described in a function description below.
While some scalar functions are supported within a WHERE clause, comparing
two scalar functions is never supported.
In UPDATE and DELETE statements scalar functions cannot be used in the WHERE
clause unless otherwise described in the function description below.

In the case it is stated below that the scalar function can always be used
in a WHERE clause it is still not possible to compare one scalar function
with another in UPDATE and DELETE statements.

.. note::

Scalar functions cannot be used in the SET clause of an UPDATE statement.

See below for a list of available scalar functions.

Expand Down
84 changes: 0 additions & 84 deletions sql/src/main/java/io/crate/action/SQLXContentQueryParser.java

This file was deleted.

111 changes: 80 additions & 31 deletions sql/src/main/java/io/crate/action/sql/query/CrateSearchService.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,39 +21,37 @@

package io.crate.action.sql.query;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import io.crate.Constants;
import io.crate.executor.transport.task.elasticsearch.ESQueryBuilder;
import io.crate.core.StringUtils;
import io.crate.executor.transport.task.elasticsearch.SortOrder;
import io.crate.lucene.LuceneQueryBuilder;
import io.crate.metadata.ColumnIdent;
import io.crate.metadata.Functions;
import io.crate.metadata.Routing;
import io.crate.metadata.ReferenceInfo;
import io.crate.metadata.doc.DocSysColumns;
import io.crate.operation.Input;
import io.crate.operation.collect.CollectInputSymbolVisitor;
import io.crate.operation.reference.doc.lucene.CollectorContext;
import io.crate.operation.reference.doc.lucene.LuceneCollectorExpression;
import io.crate.operation.reference.doc.lucene.LuceneDocLevelReferenceResolver;
import io.crate.planner.node.dql.QueryThenFetchNode;
import io.crate.planner.symbol.*;
import io.crate.types.DataType;
import io.crate.types.DataTypes;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.*;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.cache.recycler.CacheRecycler;
import org.elasticsearch.cache.recycler.PageCacheRecycler;
import org.elasticsearch.cluster.ClusterService;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.index.engine.Engine;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.query.ParsedQuery;
import org.elasticsearch.index.service.IndexService;
import org.elasticsearch.index.shard.service.IndexShard;
import org.elasticsearch.indices.IndicesLifecycle;
Expand All @@ -65,6 +63,7 @@
import org.elasticsearch.search.SearchShardTarget;
import org.elasticsearch.search.dfs.DfsPhase;
import org.elasticsearch.search.fetch.FetchPhase;
import org.elasticsearch.search.fetch.source.FetchSourceContext;
import org.elasticsearch.search.internal.DefaultSearchContext;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.internal.ShardSearchRequest;
Expand All @@ -78,11 +77,12 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;

public class CrateSearchService extends InternalSearchService {

private static final ESQueryBuilder ESQueryBuilder = new ESQueryBuilder();
private final SortSymbolVisitor sortSymbolVisitor;
private final Functions functions;

@Inject
public CrateSearchService(Settings settings,
Expand All @@ -104,6 +104,7 @@ public CrateSearchService(Settings settings,
threadPool,
scriptService,
cacheRecycler, pageCacheRecycler, bigArrays, dfsPhase, queryPhase, fetchPhase);
this.functions = functions;
CollectInputSymbolVisitor<LuceneCollectorExpression<?>> inputSymbolVisitor =
new CollectInputSymbolVisitor<>(functions, LuceneDocLevelReferenceResolver.INSTANCE);
sortSymbolVisitor = new SortSymbolVisitor(inputSymbolVisitor);
Expand Down Expand Up @@ -190,33 +191,23 @@ private SearchContext createContext(QueryShardRequest request, @Nullable Engine.
SearchContext.setCurrent(context);

try {
QueryThenFetchNode searchNode = new QueryThenFetchNode(
new Routing(null),
request.outputs(),
// can omit sort because it's added below using generateLuceneSort
ImmutableList.<Symbol>of(),
new boolean[0],
new Boolean[0],
request.limit(),
request.offset(),
request.whereClause(),
request.partitionBy()
);
LuceneQueryBuilder builder = new LuceneQueryBuilder(functions, context, indexService.cache());
LuceneQueryBuilder.Context ctx = builder.convert(request.whereClause());
context.parsedQuery(new ParsedQuery(ctx.query(), ImmutableMap.<String, Filter>of()));
Float minScore = ctx.minScore();
if (minScore != null) {
context.minimumScore(minScore);
}

// TODO: remove xcontent
BytesReference source = ESQueryBuilder.convert(searchNode);
parseSource(context, source);
// the OUTPUTS_VISITOR sets the sourceFetchContext / version / minScore onto the SearchContext
OutputContext outputContext = new OutputContext(context, request.partitionBy());
OUTPUTS_VISITOR.process(request.outputs(), outputContext);

context.sort(generateLuceneSort(
context, request.orderBy(), request.reverseFlags(), request.nullsFirst()));

// if the from and size are still not set, default them
if (context.from() == -1) {
context.from(0);
}
if (context.size() == -1) {
context.size(10);
}
context.from(request.offset());
context.size(request.limit());

// pre process
dfsPhase.preProcess(context);
Expand All @@ -233,6 +224,64 @@ private SearchContext createContext(QueryShardRequest request, @Nullable Engine.
return context;
}

private static final OutputSymbolVisitor OUTPUTS_VISITOR = new OutputSymbolVisitor();

private static class OutputContext {
private final SearchContext searchContext;
private final List<ReferenceInfo> partitionBy;
private final List<String> fields = new ArrayList<>();
public boolean needWholeSource = false;

private OutputContext(SearchContext searchContext, List<ReferenceInfo> partitionBy) {
this.searchContext = searchContext;
this.partitionBy = partitionBy;
}
}

private static class OutputSymbolVisitor extends SymbolVisitor<OutputContext, Void> {

public void process(List<Symbol> outputs, OutputContext context) {
for (Symbol output : outputs) {
process(output, context);
}
if (!context.needWholeSource) {
if (context.fields.isEmpty()) {
context.searchContext.fetchSourceContext(new FetchSourceContext(false));
} else {
Set<String> fields = StringUtils.commonAncestors(context.fields);
context.searchContext.fetchSourceContext(
new FetchSourceContext(fields.toArray(new String[fields.size()])));
}
}
}

@Override
public Void visitReference(Reference symbol, OutputContext context) {
ColumnIdent columnIdent = symbol.info().ident().columnIdent();
if (columnIdent.isSystemColumn()) {
if (DocSysColumns.VERSION.equals(columnIdent)) {
context.searchContext.version(true);
} else {
context.needWholeSource = true;
}
} else if (!context.partitionBy.contains(symbol.info())) {
context.fields.add(columnIdent.fqn());
}
return null;
}

@Override
public Void visitDynamicReference(DynamicReference symbol, OutputContext context) {
return visitReference(symbol, context);
}

@Override
protected Void visitSymbol(Symbol symbol, OutputContext context) {
throw new UnsupportedOperationException(SymbolFormatter.format(
"Can't use %s as an output", symbol));
}
}

private static final Map<DataType, SortField.Type> luceneTypeMap = ImmutableMap.<DataType, SortField.Type>builder()
.put(DataTypes.STRING, SortField.Type.STRING)
.put(DataTypes.LONG, SortField.Type.LONG)
Expand Down
Loading

0 comments on commit 61f0e8c

Please sign in to comment.