Skip to content

Commit

Permalink
SOLR-15579: Re-configure calcite to allow more values in an IN clause (
Browse files Browse the repository at this point in the history
  • Loading branch information
thelabdude committed Aug 12, 2021
1 parent 48ce929 commit 1481f78
Show file tree
Hide file tree
Showing 7 changed files with 223 additions and 52 deletions.
3 changes: 3 additions & 0 deletions solr/CHANGES.txt
Expand Up @@ -70,6 +70,9 @@ Bug Fixes
* SOLR-14758: Fix NPE in QueryComponent.mergeIds when using timeAllowed and sorting
(Bram Van Dam, Uwe Schindler)

* SOLR-15579: Allow for many values in a SQL IN clause; previously using more than 19 would result in
the IN clause being ignored, now users are only limited by the `maxBooleanClauses` configured for a collection (Timothy Potter)

Other Changes
---------------------
* SOLR-15355: Upgrade Hadoop from 3.2.0 to 3.2.2. (Antoine Gruzelle via David Smiley)
Expand Down
Expand Up @@ -18,7 +18,10 @@

import org.apache.calcite.jdbc.CalciteConnection;
import org.apache.calcite.jdbc.Driver;
import org.apache.calcite.runtime.Hook;
import org.apache.calcite.schema.SchemaPlus;
import org.apache.calcite.sql2rel.SqlToRelConverter;
import org.apache.calcite.util.Holder;
import org.apache.solr.client.solrj.io.SolrClientCache;

import java.sql.Connection;
Expand Down Expand Up @@ -46,6 +49,10 @@ private CalciteSolrDriver() {
INSTANCE.register();
}

static void subQueryThreshold(Holder<SqlToRelConverter.Config> configHolder) {
configHolder.accept(config -> config.withInSubQueryThreshold(Integer.MAX_VALUE));
}

@Override
protected String getConnectStringPrefix() {
return CONNECT_STRING_PREFIX;
Expand All @@ -57,8 +64,13 @@ public Connection connect(String url, Properties info) throws SQLException {
return null;
}

// Configure SqlToRelConverter to allow more values for an 'IN' clause,
// otherwise, Calcite will transform the query into a join with a static table of literals
Hook.SQL2REL_CONVERTER_CONFIG_BUILDER.addThread(CalciteSolrDriver::subQueryThreshold);

Connection connection = super.connect(url, info);
CalciteConnection calciteConnection = (CalciteConnection) connection;

final SchemaPlus rootSchema = calciteConnection.getRootSchema();

String schemaName = info.getProperty("zk");
Expand Down
149 changes: 104 additions & 45 deletions solr/core/src/java/org/apache/solr/handler/sql/SolrFilter.java
Expand Up @@ -22,6 +22,7 @@
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import org.apache.calcite.plan.RelOptCluster;
import org.apache.calcite.plan.RelOptCost;
Expand All @@ -31,11 +32,17 @@
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Filter;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rex.*;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.calcite.util.Pair;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.SolrException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -79,15 +86,14 @@ public void implement(Implementor implementor) {
Translator translator = new Translator(SolrRules.solrFieldNames(getRowType()), builder);
String query = translator.translateMatch(condition);
implementor.addQuery(query);
implementor.setNegativeQuery(translator.negativeQuery);
implementor.setNegativeQuery(query.startsWith("-"));
}
}

private static class Translator {

protected final List<String> fieldNames;
private final RexBuilder builder;
public boolean negativeQuery = true;

Translator(List<String> fieldNames, RexBuilder builder) {
this.fieldNames = fieldNames;
Expand All @@ -102,52 +108,52 @@ protected String translateMatch(RexNode condition) {
final SqlKind kind = condition.getKind();

if (condition.isA(SqlKind.SEARCH)) {

return translateMatch(RexUtil.expandSearch(builder, null, condition));
}

if (kind.belongsTo(SqlKind.COMPARISON) || kind == SqlKind.NOT) {
return translateSearch(condition);
} else if (kind.belongsTo(SqlKind.COMPARISON) || kind == SqlKind.NOT) {
return translateComparison(condition);
} else if (condition.isA(SqlKind.AND)) {
// see if this is a translated range query of greater than or equals and less than or equal on same field
// if so, then collapse into a single range criteria, e.g. field:[gte TO lte] instead of two ranges AND'd together
RexCall call = (RexCall) condition;
List<RexNode> operands = call.getOperands();
String query = null;
if (operands.size() == 2) {
RexNode lhs = operands.get(0);
RexNode rhs = operands.get(1);
if (lhs.getKind() == SqlKind.GREATER_THAN_OR_EQUAL && rhs.getKind() == SqlKind.LESS_THAN_OR_EQUAL) {
query = translateBetween(lhs, rhs);
} else if (lhs.getKind() == SqlKind.LESS_THAN_OR_EQUAL && rhs.getKind() == SqlKind.GREATER_THAN_OR_EQUAL) {
// just swap the nodes
query = translateBetween(rhs, lhs);
}
}
query = query != null ? query : "(" + translateAnd(condition) + ")";
if (log.isDebugEnabled()) {
log.debug("translated query match={}", query);
}
return query;
return translateAndOrBetween(condition);
} else if (condition.isA(SqlKind.OR)) {
return "(" + translateOr(condition) + ")";
} else if (kind == SqlKind.LIKE) {
return translateLike(condition, false);
return translateLike(condition);
} else if (kind == SqlKind.IS_NOT_NULL || kind == SqlKind.IS_NULL) {
return translateIsNullOrIsNotNull(condition);
} else {
return null;
}
}

protected String translateAndOrBetween(RexNode condition) {
// see if this is a translated range query of greater than or equals and less than or equal on same field
// if so, then collapse into a single range criteria, e.g. field:[gte TO lte] instead of two ranges AND'd together
RexCall call = (RexCall) condition;
List<RexNode> operands = call.getOperands();
String query = null;
if (operands.size() == 2) {
RexNode lhs = operands.get(0);
RexNode rhs = operands.get(1);
if (lhs.getKind() == SqlKind.GREATER_THAN_OR_EQUAL && rhs.getKind() == SqlKind.LESS_THAN_OR_EQUAL) {
query = translateBetween(lhs, rhs);
} else if (lhs.getKind() == SqlKind.LESS_THAN_OR_EQUAL && rhs.getKind() == SqlKind.GREATER_THAN_OR_EQUAL) {
// just swap the nodes
query = translateBetween(rhs, lhs);
}
}
query = (query != null ? query : translateAnd(condition));
if (log.isDebugEnabled()) {
log.debug("translated query match={}", query);
}
return "(" + query + ")";
}

protected String translateBetween(RexNode gteNode, RexNode lteNode) {
Pair<String, RexLiteral> gte = getFieldValuePair(gteNode);
Pair<String, RexLiteral> lte = getFieldValuePair(lteNode);
String fieldName = gte.getKey();
String query = null;
if (fieldName.equals(lte.getKey()) && compareRexLiteral(gte.right, lte.right) < 0) {
query = fieldName + ":[" + toSolrLiteral(gte.getValue()) + " TO " + toSolrLiteral(lte.getValue()) + "]";
this.negativeQuery = false; // so we don't get *:* AND range
}

return query;
Expand All @@ -172,7 +178,6 @@ protected String translateIsNullOrIsNotNull(RexNode node) {
if (left instanceof RexInputRef) {
String name = fieldNames.get(((RexInputRef) left).getIndex());
SqlKind kind = node.getKind();
this.negativeQuery = false;
return kind == SqlKind.IS_NOT_NULL ? "+" + name + ":*" : "(*:* -" + name + ":*)";
}

Expand All @@ -182,7 +187,11 @@ protected String translateIsNullOrIsNotNull(RexNode node) {
protected String translateOr(RexNode condition) {
List<String> ors = new ArrayList<>();
for (RexNode node : RelOptUtil.disjunctions(condition)) {
ors.add(translateMatch(node));
String orQuery = translateMatch(node);
if (orQuery.startsWith("-")) {
orQuery = "(*:* "+orQuery+")";
}
ors.add(orQuery);
}
return String.join(" OR ", ors);
}
Expand All @@ -197,7 +206,11 @@ protected String translateAnd(RexNode node0) {


for (RexNode node : ands) {
andStrings.add(translateMatch(node));
String andQuery = translateMatch(node);
if (andQuery.startsWith("-")) {
andQuery = "(*:* "+andQuery+")";
}
andStrings.add(andQuery);
}

String andString = String.join(" AND ", andStrings);
Expand All @@ -213,7 +226,7 @@ protected String translateAnd(RexNode node0) {
}
}

protected String translateLike(RexNode like, boolean isNegativeQuery) {
protected String translateLike(RexNode like) {
Pair<String, RexLiteral> pair = getFieldValuePair(like);
String terms = pair.getValue().toString().trim();
terms = terms.replace("'", "").replace('%', '*').replace('_', '?');
Expand All @@ -224,7 +237,6 @@ protected String translateLike(RexNode like, boolean isNegativeQuery) {
wrappedQuotes = true;
}

this.negativeQuery = isNegativeQuery;
String query = pair.getKey() + ":" + terms;
return wrappedQuotes ? "{!complexphrase}" + query : query;
}
Expand All @@ -233,32 +245,27 @@ protected String translateComparison(RexNode node) {
final SqlKind kind = node.getKind();
if (kind == SqlKind.NOT) {
RexNode negated = ((RexCall) node).getOperands().get(0);
return "-" + (negated.getKind() == SqlKind.LIKE ? translateLike(negated, true) : translateComparison(negated));
return "-" + (negated.getKind() == SqlKind.LIKE ? translateLike(negated) : translateMatch(negated));
}

Pair<String, RexLiteral> binaryTranslated = getFieldValuePair(node);
final String key = binaryTranslated.getKey();
RexLiteral value = binaryTranslated.getValue();
switch (kind) {
case EQUALS:
this.negativeQuery = false;
return toEqualsClause(key, value, node);
case NOT_EQUALS:
return "-(" + toEqualsClause(key, value, node) + ")";
return "-" + toEqualsClause(key, value, node);
case LESS_THAN:
this.negativeQuery = false;
return "(" + key + ": [ * TO " + toSolrLiteral(value) + " })";
case LESS_THAN_OR_EQUAL:
this.negativeQuery = false;
return "(" + key + ": [ * TO " + toSolrLiteral(value) + " ])";
case GREATER_THAN:
this.negativeQuery = false;
return "(" + key + ": { " + toSolrLiteral(value) + " TO * ])";
case GREATER_THAN_OR_EQUAL:
this.negativeQuery = false;
return "(" + key + ": [ " + toSolrLiteral(value) + " TO * ])";
case LIKE:
return translateLike(node, false);
return translateLike(node);
case IS_NOT_NULL:
case IS_NULL:
return translateIsNullOrIsNotNull(node);
Expand Down Expand Up @@ -356,12 +363,12 @@ protected Pair<String, RexLiteral> translateBinary(RexCall call) {
}

if (left.getKind() == SqlKind.CAST && right.getKind() == SqlKind.CAST) {
return translateBinary2(((RexCall)left).operands.get(0), ((RexCall)right).operands.get(0));
return translateBinary2(((RexCall) left).operands.get(0), ((RexCall) right).operands.get(0));
}

// for WHERE clause like: pdatex >= '2021-07-13T15:12:10.037Z'
if (left.getKind() == SqlKind.INPUT_REF && right.getKind() == SqlKind.CAST) {
final RexCall cast = ((RexCall)right);
final RexCall cast = ((RexCall) right);
if (cast.operands.size() == 1 && cast.operands.get(0).getKind() == SqlKind.LITERAL) {
return translateBinary2(left, cast.operands.get(0));
}
Expand Down Expand Up @@ -401,6 +408,58 @@ protected Pair<String, RexLiteral> translateBinary2(RexNode left, RexNode right)
return null;
}
}

/**
* A search node can be an IN or NOT IN clause or a BETWEEN
*/
protected String translateSearch(RexNode condition) {
final String fieldName = getSolrFieldName(condition);

RexCall expanded = (RexCall) RexUtil.expandSearch(builder, null, condition);
final RexNode peekAt0 = !expanded.operands.isEmpty() ? expanded.operands.get(0) : null;
if (expanded.op.kind == SqlKind.AND) {
// See if NOT IN was translated into a big AND not
if (peekAt0 instanceof RexCall) {
RexCall op0 = (RexCall) peekAt0;
if (op0.op.kind == SqlKind.NOT_EQUALS) {
return "*:* -" + fieldName + ":" + toOrSetOnSameField(expanded);
}
}
} else if (expanded.op.kind == SqlKind.OR) {
if (peekAt0 instanceof RexCall) {
RexCall op0 = (RexCall) peekAt0;
if (op0.op.kind == SqlKind.EQUALS) {
return fieldName + ":" + toOrSetOnSameField(expanded);
}
}
}

if (expanded.getKind() != SqlKind.SEARCH) {
// passing a search back to translateMatch would lead to infinite recursion ...
return translateMatch(expanded);
}

// don't know how to handle this search!
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unsupported search filter: " + condition);
}

protected String toOrSetOnSameField(RexCall search) {
String orClause = search.operands.stream().map(n -> {
RexCall next = (RexCall) n;
RexLiteral lit = (RexLiteral) next.getOperands().get(1);
return "\"" + toSolrLiteral(lit) + "\"";
}).collect(Collectors.joining(" OR "));
return "(" + orClause + ")";
}

protected String getSolrFieldName(RexNode node) {
RexCall call = (RexCall) node;
final RexNode left = call.getOperands().get(0);
if (left instanceof RexInputRef) {
return fieldNames.get(((RexInputRef) left).getIndex());
}
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Expected Solr field name for " + call.getKind() + " but found " + left);
}
}

private static class HavingTranslator extends Translator {
Expand Down
Expand Up @@ -138,8 +138,10 @@ private LukeResponse getSchema(final String collection) {
}
}

private boolean isStoredOrDocValues(final EnumSet<FieldFlag> flags) {
return flags != null && (flags.contains(FieldFlag.STORED) || flags.contains(FieldFlag.DOC_VALUES));
private boolean isStoredIndexedOrDocValues(final EnumSet<FieldFlag> flags) {
// if a field is not stored but indexed, then we should still include it in the table schema so that users
// can filter on it, they just won't be able to return it as a field
return flags != null && (flags.contains(FieldFlag.DOC_VALUES) || flags.contains(FieldFlag.STORED) || flags.contains(FieldFlag.INDEXED));
}

private EnumSet<FieldFlag> getFieldFlags(final LukeResponse.FieldInfo luceneFieldInfo) {
Expand Down Expand Up @@ -172,13 +174,12 @@ RelDataType buildRowSchema(String collection) {
Map<String, LukeResponse.FieldInfo> fieldsInUseMap = getFieldInfo(collection);

LukeResponse schema = getSchema(collection);
// Only want fields that are stored or have docValues enabled
Map<String, LukeResponse.FieldInfo> storedFields = schema.getFieldInfo().entrySet().stream()
.filter(e -> isStoredOrDocValues(getFieldFlags(e.getValue())))
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
// merge the actual fields in use returned by Luke with the declared fields in the schema that are empty
Map<String, LukeResponse.FieldInfo> combinedFields = Stream.of(fieldsInUseMap, storedFields)
.flatMap(map -> map.entrySet().stream())
.filter(e -> isStoredIndexedOrDocValues(getFieldFlags(e.getValue()))) // Only want fields that are stored, indexed, or have docValues enabled
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (v1, v2) -> v1, TreeMap::new));

Map<String, Class<?>> javaClassForTypeMap = new HashMap<>(); // local cache for custom field types we've already resolved
Expand Down

0 comments on commit 1481f78

Please sign in to comment.