diff --git a/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowFilter.java b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowFilter.java index 9774318ea925..9617ac6b3cb0 100644 --- a/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowFilter.java +++ b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowFilter.java @@ -34,7 +34,7 @@ * relational expression in Arrow. */ class ArrowFilter extends Filter implements ArrowRel { - private final List match; + private final List> match; ArrowFilter(RelOptCluster cluster, RelTraitSet traitSet, RelNode input, RexNode condition) { super(cluster, traitSet, input, condition); diff --git a/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowRel.java b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowRel.java index 5b002bdc2dcd..944c17d867dd 100644 --- a/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowRel.java +++ b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowRel.java @@ -41,15 +41,24 @@ public interface ArrowRel extends RelNode { * {@link ArrowRel} nodes into a SQL query. */ class Implementor { @Nullable List selectFields; - final List whereClause = new ArrayList<>(); + final List> whereClause = new ArrayList<>(); @Nullable RelOptTable table; @Nullable ArrowTable arrowTable; /** Adds new predicates. * - * @param predicates Predicates + *

The structure is two levels of nesting: + *

    + *
  • Outer list: conjunction (AND) of clauses + *
  • Inner list: disjunction (OR) of conditions within a clause + *
+ * + *

Each {@link ConditionToken} represents a single unary or binary + * predicate condition. + * + * @param predicates Predicates in CNF form */ - void addFilters(List predicates) { + void addFilters(List> predicates) { whereClause.addAll(predicates); } diff --git a/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowRules.java b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowRules.java index b70e70964837..6e268d646928 100644 --- a/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowRules.java +++ b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowRules.java @@ -29,6 +29,8 @@ import org.apache.calcite.rel.logical.LogicalFilter; import org.apache.calcite.rel.logical.LogicalProject; import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.validate.SqlValidatorUtil; import com.google.common.collect.ImmutableList; @@ -97,9 +99,13 @@ protected ArrowFilterRule(Config config) { RelNode convert(Filter filter) { final RelTraitSet traitSet = filter.getTraitSet().replace(ArrowRel.CONVENTION); + // Expand SEARCH (e.g. IN, BETWEEN) before pushing to Arrow, + // since Gandiva does not support SEARCH natively. + final RexNode condition = + RexUtil.expandSearch(filter.getCluster().getRexBuilder(), null, filter.getCondition()); return new ArrowFilter(filter.getCluster(), traitSet, convert(filter.getInput(), ArrowRel.CONVENTION), - filter.getCondition()); + condition); } /** Rule configuration. */ diff --git a/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowTable.java b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowTable.java index ba459c7b48b8..d24ed04e2392 100644 --- a/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowTable.java +++ b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowTable.java @@ -97,7 +97,7 @@ public class ArrowTable extends AbstractTable * {@link org.apache.calcite.adapter.arrow.ArrowMethod#ARROW_QUERY}. */ @SuppressWarnings("unused") public Enumerable query(DataContext root, ImmutableIntList fields, - List conditions) { + List>> conditions) { requireNonNull(fields, "fields"); final Projector projector; final Filter filter; @@ -119,30 +119,26 @@ public Enumerable query(DataContext root, ImmutableIntList fields, } else { projector = null; - final List conditionNodes = new ArrayList<>(conditions.size()); - for (String condition : conditions) { - String[] data = condition.split(" "); - List treeNodes = new ArrayList<>(2); - treeNodes.add( - TreeBuilder.makeField(schema.getFields() - .get(schema.getFields().indexOf(schema.findField(data[0]))))); - - // if the split condition has more than two parts it's a binary operator - // with an additional literal node - if (data.length > 2) { - treeNodes.add(makeLiteralNode(data[2], data[3])); + final List conjuncts = new ArrayList<>(conditions.size()); + for (List> orGroup : conditions) { + final List disjuncts = new ArrayList<>(orGroup.size()); + for (List conditionParts : orGroup) { + disjuncts.add( + parseSingleCondition( + ConditionToken.fromTokenList(conditionParts))); + } + if (disjuncts.size() == 1) { + conjuncts.add(disjuncts.get(0)); + } else { + conjuncts.add(TreeBuilder.makeOr(disjuncts)); } - - String operator = data[1]; - conditionNodes.add( - TreeBuilder.makeFunction(operator, treeNodes, new ArrowType.Bool())); } final Condition filterCondition; - if (conditionNodes.size() == 1) { - filterCondition = TreeBuilder.makeCondition(conditionNodes.get(0)); + if (conjuncts.size() == 1) { + filterCondition = TreeBuilder.makeCondition(conjuncts.get(0)); } else { - TreeNode treeNode = TreeBuilder.makeAnd(conditionNodes); - filterCondition = TreeBuilder.makeCondition(treeNode); + filterCondition = + TreeBuilder.makeCondition(TreeBuilder.makeAnd(conjuncts)); } try { @@ -184,6 +180,26 @@ private static RelDataType deduceRowType(Schema schema, return builder.build(); } + /** Parses a single {@link ConditionToken} into a Gandiva {@link TreeNode}. */ + private TreeNode parseSingleCondition(ConditionToken token) { + final List treeNodes = new ArrayList<>(2); + treeNodes.add( + TreeBuilder.makeField(schema.getFields() + .get( + schema.getFields().indexOf( + schema.findField(token.fieldName))))); + + if (token.isBinary()) { + treeNodes.add( + makeLiteralNode( + requireNonNull(token.value, "value"), + requireNonNull(token.valueType, "valueType"))); + } + + return TreeBuilder.makeFunction( + token.operator, treeNodes, new ArrowType.Bool()); + } + private static TreeNode makeLiteralNode(String literal, String type) { if (type.startsWith("decimal")) { String[] typeParts = diff --git a/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowToEnumerableConverter.java b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowToEnumerableConverter.java index 3b90dfd890e3..bd0e2c2e8c10 100644 --- a/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowToEnumerableConverter.java +++ b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowToEnumerableConverter.java @@ -35,6 +35,7 @@ import com.google.common.primitives.Ints; +import java.util.ArrayList; import java.util.List; import static java.util.Objects.requireNonNull; @@ -84,6 +85,23 @@ protected ArrowToEnumerableConverter(RelOptCluster cluster, : Expressions.call( BuiltInMethod.IMMUTABLE_INT_LIST_IDENTITY.method, Expressions.constant(fieldCount)), - Expressions.constant(arrowImplementor.whereClause)))); + Expressions.constant( + toTokenLists(arrowImplementor.whereClause))))); + } + + /** Converts structured {@link ConditionToken} conditions to nested string + * lists for serialization through {@link Expressions#constant}. */ + private static List>> toTokenLists( + List> conditions) { + final List>> result = + new ArrayList<>(conditions.size()); + for (List orGroup : conditions) { + final List> group = new ArrayList<>(orGroup.size()); + for (ConditionToken token : orGroup) { + group.add(token.toTokenList()); + } + result.add(group); + } + return result; } } diff --git a/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowTranslator.java b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowTranslator.java index 1102ce205692..1bee64a0e4fc 100644 --- a/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowTranslator.java +++ b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowTranslator.java @@ -41,7 +41,7 @@ import static java.util.Objects.requireNonNull; /** - * Translates a {@link RexNode} expression to a Gandiva string. + * Translates a {@link RexNode} expression to Gandiva predicate tokens. */ class ArrowTranslator { final RexBuilder rexBuilder; @@ -61,13 +61,30 @@ public static ArrowTranslator create(RexBuilder rexBuilder, return new ArrowTranslator(rexBuilder, rowType); } - List translateMatch(RexNode condition) { - List disjunctions = RelOptUtil.disjunctions(condition); - if (disjunctions.size() == 1) { - return translateAnd(disjunctions.get(0)); - } else { - throw new UnsupportedOperationException("Unsupported disjunctive condition " + condition); + /** The maximum number of nodes allowed during CNF conversion. + * + *

If exceeded, {@link RexUtil#toCnf(RexBuilder, int, RexNode)} returns + * the original expression unchanged, which may cause the subsequent + * translation to Gandiva predicates to fail with an + * {@link UnsupportedOperationException}. That exception is caught by + * {@link ArrowRules.ArrowFilterRule#onMatch}, which silently skips the + * Arrow convention and falls back to an Enumerable plan. */ + private static final int MAX_CNF_NODE_COUNT = 256; + + List> translateMatch(RexNode condition) { + // Convert to CNF; SEARCH nodes are already expanded + // by ArrowFilterRule before reaching here. + final RexNode cnf = RexUtil.toCnf(rexBuilder, MAX_CNF_NODE_COUNT, condition); + + final List> result = new ArrayList<>(); + for (RexNode conjunct : RelOptUtil.conjunctions(cnf)) { + final List orGroup = new ArrayList<>(); + for (RexNode disjunct : RelOptUtil.disjunctions(conjunct)) { + orGroup.add(translateMatch2(disjunct)); + } + result.add(orGroup); } + return result; } /** @@ -93,34 +110,14 @@ private static Object literalValue(RexLiteral literal) { } } - /** - * Translate a conjunctive predicate to a SQL string. - * - * @param condition A conjunctive predicate - * - * @return SQL string for the predicate - */ - private List translateAnd(RexNode condition) { - List predicates = new ArrayList<>(); - for (RexNode node : RelOptUtil.conjunctions(condition)) { - if (node.getKind() == SqlKind.SEARCH) { - final RexNode node2 = RexUtil.expandSearch(rexBuilder, null, node); - predicates.addAll(translateMatch(node2)); - } else { - predicates.add(translateMatch2(node)); - } - } - return predicates; - } - /** * Translates a binary or unary relation. * * @param node A RexNode that always evaluates to a boolean expression. * Currently, this method is only called from translateAnd. - * @return The translated SQL string for the relation. + * @return The translated condition token for the relation. */ - private String translateMatch2(RexNode node) { + private ConditionToken translateMatch2(RexNode node) { switch (node.getKind()) { case EQUALS: return translateBinary("equal", "=", (RexCall) node); @@ -144,7 +141,7 @@ private String translateMatch2(RexNode node) { return translateUnary("isnotfalse", (RexCall) node); case INPUT_REF: final RexInputRef inputRef = (RexInputRef) node; - return fieldNames.get(inputRef.getIndex()) + " istrue"; + return ConditionToken.unary(fieldNames.get(inputRef.getIndex()), "istrue"); case NOT: return translateUnary("isfalse", (RexCall) node); default: @@ -156,10 +153,10 @@ private String translateMatch2(RexNode node) { * Translates a call to a binary operator, reversing arguments if * necessary. */ - private String translateBinary(String op, String rop, RexCall call) { + private ConditionToken translateBinary(String op, String rop, RexCall call) { final RexNode left = call.operands.get(0); final RexNode right = call.operands.get(1); - @Nullable String expression = translateBinary2(op, left, right); + @Nullable ConditionToken expression = translateBinary2(op, left, right); if (expression != null) { return expression; } @@ -171,7 +168,8 @@ private String translateBinary(String op, String rop, RexCall call) { } /** Translates a call to a binary operator. Returns null on failure. */ - private @Nullable String translateBinary2(String op, RexNode left, RexNode right) { + private @Nullable ConditionToken translateBinary2(String op, RexNode left, + RexNode right) { if (right.getKind() != SqlKind.LITERAL) { return null; } @@ -189,26 +187,29 @@ private String translateBinary(String op, String rop, RexCall call) { } } - /** Combines a field name, operator, and literal to produce a predicate string. */ - private String translateOp2(String op, String name, RexLiteral right) { + /** Combines a field name, operator, and literal to produce a binary + * condition token. */ + private ConditionToken translateOp2(String op, String name, + RexLiteral right) { Object value = literalValue(right); String valueString = value.toString(); String valueType = getLiteralType(right.getType()); if (value instanceof String) { - final RelDataTypeField field = requireNonNull(rowType.getField(name, true, false), "field"); + final RelDataTypeField field = + requireNonNull(rowType.getField(name, true, false), "field"); SqlTypeName typeName = field.getType().getSqlTypeName(); if (typeName != SqlTypeName.CHAR) { valueString = "'" + valueString + "'"; } } - return name + " " + op + " " + valueString + " " + valueType; + return ConditionToken.binary(name, op, valueString, valueType); } /** Translates a call to a unary operator. */ - private String translateUnary(String op, RexCall call) { + private ConditionToken translateUnary(String op, RexCall call) { final RexNode opNode = call.operands.get(0); - @Nullable String expression = translateUnary2(op, opNode); + @Nullable ConditionToken expression = translateUnary2(op, opNode); if (expression != null) { return expression; @@ -218,21 +219,16 @@ private String translateUnary(String op, RexCall call) { } /** Translates a call to a unary operator. Returns null on failure. */ - private @Nullable String translateUnary2(String op, RexNode opNode) { + private @Nullable ConditionToken translateUnary2(String op, RexNode opNode) { if (opNode.getKind() == SqlKind.INPUT_REF) { final RexInputRef inputRef = (RexInputRef) opNode; final String name = fieldNames.get(inputRef.getIndex()); - return translateUnaryOp(op, name); + return ConditionToken.unary(name, op); } return null; } - /** Combines a field name and a unary operator to produce a predicate string. */ - private static String translateUnaryOp(String op, String name) { - return name + " " + op; - } - private static String getLiteralType(RelDataType type) { if (type.getSqlTypeName() == SqlTypeName.DECIMAL) { return "decimal" + "(" + type.getPrecision() + "," + type.getScale() + ")"; diff --git a/arrow/src/main/java/org/apache/calcite/adapter/arrow/ConditionToken.java b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ConditionToken.java new file mode 100644 index 000000000000..cc4deff7c145 --- /dev/null +++ b/arrow/src/main/java/org/apache/calcite/adapter/arrow/ConditionToken.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.calcite.adapter.arrow; + +import com.google.common.collect.ImmutableList; + +import org.checkerframework.checker.nullness.qual.Nullable; + +import java.util.List; + +import static java.util.Objects.requireNonNull; + +/** + * A structured representation of a single Gandiva predicate condition. + * + *

A condition is either unary (e.g. {@code IS NULL}) or binary + * (e.g. {@code =}, {@code <}). Unary conditions have a field name + * and operator; binary conditions additionally have a literal value + * and its type. + * + *

This class replaces the raw {@code List} representation + * used in earlier versions, providing type safety and clarity. + * + * @see ArrowTranslator + */ +class ConditionToken { + final String fieldName; + final String operator; + final @Nullable String value; + final @Nullable String valueType; + + private ConditionToken(String fieldName, String operator, + @Nullable String value, @Nullable String valueType) { + this.fieldName = requireNonNull(fieldName, "fieldName"); + this.operator = requireNonNull(operator, "operator"); + this.value = value; + this.valueType = valueType; + } + + /** Creates a binary condition token + * (e.g. {@code intField equal 12 integer}). */ + static ConditionToken binary(String fieldName, String operator, + String value, String valueType) { + return new ConditionToken(fieldName, operator, + requireNonNull(value, "value"), + requireNonNull(valueType, "valueType")); + } + + /** Creates a unary condition token + * (e.g. {@code intField isnull}). */ + static ConditionToken unary(String fieldName, String operator) { + return new ConditionToken(fieldName, operator, null, null); + } + + /** Returns whether this is a binary condition. */ + boolean isBinary() { + return value != null; + } + + /** Converts this token to a string list for serialization + * through code generation. + * + *

The result is either {@code [fieldName, operator]} for unary + * conditions or {@code [fieldName, operator, value, valueType]} for + * binary conditions. */ + List toTokenList() { + if (isBinary()) { + return ImmutableList.of(fieldName, operator, + requireNonNull(value, "value"), + requireNonNull(valueType, "valueType")); + } + return ImmutableList.of(fieldName, operator); + } + + /** Creates a {@code ConditionToken} from a serialized string list. */ + static ConditionToken fromTokenList(List tokens) { + final int size = tokens.size(); + if (size == 4) { + return binary(tokens.get(0), tokens.get(1), + tokens.get(2), tokens.get(3)); + } else if (size == 2) { + return unary(tokens.get(0), tokens.get(1)); + } + throw new IllegalArgumentException("Invalid condition tokens: " + tokens); + } +} diff --git a/arrow/src/test/java/org/apache/calcite/adapter/arrow/ArrowAdapterTest.java b/arrow/src/test/java/org/apache/calcite/adapter/arrow/ArrowAdapterTest.java index 6027e2448803..13b79bb15dd3 100644 --- a/arrow/src/test/java/org/apache/calcite/adapter/arrow/ArrowAdapterTest.java +++ b/arrow/src/test/java/org/apache/calcite/adapter/arrow/ArrowAdapterTest.java @@ -22,7 +22,6 @@ import org.apache.calcite.rel.type.RelDataTypeSystem; import org.apache.calcite.schema.Table; import org.apache.calcite.test.CalciteAssert; -import org.apache.calcite.util.Bug; import org.apache.calcite.util.Sources; import com.google.common.collect.ImmutableMap; @@ -218,7 +217,7 @@ static void initializeArrowState(@TempDir Path sharedTempDir) + "where \"intField\" > 1 and \"intField\" < 4"; String plan = "PLAN=ArrowToEnumerableConverter\n" + " ArrowProject(intField=[$0], stringField=[$1])\n" - + " ArrowFilter(condition=[SEARCH($0, Sarg[(1..4)])])\n" + + " ArrowFilter(condition=[AND(>($0, 1), <($0, 4))])\n" + " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2, 3]])\n\n"; String result = "intField=2; stringField=2\n" + "intField=3; stringField=3\n"; @@ -251,20 +250,10 @@ static void initializeArrowState(@TempDir Path sharedTempDir) String sql = "select \"intField\", \"stringField\"\n" + "from arrowdata\n" + "where \"intField\"=12 or \"stringField\"='12'"; - String plan; - if (Bug.CALCITE_6293_FIXED) { - plan = "PLAN=ArrowToEnumerableConverter\n" - + " ArrowProject(intField=[$0], stringField=[$1])\n" - + " ArrowFilter(condition=[OR(=($0, 12), =($1, '12'))])\n" - + " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2, 3]])\n\n"; - } else { - plan = "PLAN=EnumerableCalc(expr#0..1=[{inputs}], expr#2=[12], " - + "expr#3=[=($t0, $t2)], expr#4=['12':VARCHAR], expr#5=[=($t1, $t4)], " - + "expr#6=[OR($t3, $t5)], proj#0..1=[{exprs}], $condition=[$t6])\n" - + " ArrowToEnumerableConverter\n" - + " ArrowProject(intField=[$0], stringField=[$1])\n" - + " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2, 3]])\n\n"; - } + String plan = "PLAN=ArrowToEnumerableConverter\n" + + " ArrowProject(intField=[$0], stringField=[$1])\n" + + " ArrowFilter(condition=[OR(=($0, 12), =($1, '12'))])\n" + + " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2, 3]])\n\n"; String result = "intField=12; stringField=12\n"; CalciteAssert.that() @@ -274,23 +263,84 @@ static void initializeArrowState(@TempDir Path sharedTempDir) .explainContains(plan); } + /** Test case for + * [CALCITE-6636] + * Support CNF condition of Arrow adapter. */ + @Test void testArrowProjectFieldsWithCnfFilter() { + String sql = "select \"intField\", \"stringField\"\n" + + "from arrowdata\n" + + "where (\"intField\" > 1 and \"stringField\" = '2') or \"intField\" = 0"; + String plan = "PLAN=ArrowToEnumerableConverter\n" + + " ArrowProject(intField=[$0], stringField=[$1])\n" + + " ArrowFilter(condition=[OR(AND(>($0, 1), =($1, '2')), =($0, 0))])\n" + + " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2, 3]])\n\n"; + String result = "intField=0; stringField=0\n" + + "intField=2; stringField=2\n"; + + CalciteAssert.that() + .with(arrow) + .query(sql) + .returns(result) + .explainContains(plan); + } + + /** Test case for + * [CALCITE-6636] + * Support CNF condition of Arrow adapter. + * + *

Tests deeply nested conditions: {@code (A AND B) OR (C AND D)}, + * which in CNF becomes {@code (A OR C) AND (A OR D) AND (B OR C) AND (B OR D)}. */ + @Test void testArrowProjectFieldsWithDeepCnfFilter() { + String sql = "select \"intField\", \"stringField\"\n" + + "from arrowdata\n" + + "where (\"intField\" = 2 and \"stringField\" = '2')" + + " or (\"intField\" = 3 and \"stringField\" = '3')"; + String plan = "PLAN=ArrowToEnumerableConverter\n" + + " ArrowProject(intField=[$0], stringField=[$1])\n" + + " ArrowFilter(condition=[OR(AND(=($0, 2), =($1, '2')), AND(=($0, 3), =($1, '3')))])\n" + + " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2, 3]])\n\n"; + String result = "intField=2; stringField=2\n" + + "intField=3; stringField=3\n"; + + CalciteAssert.that() + .with(arrow) + .query(sql) + .returns(result) + .explainContains(plan); + } + + /** Test case for + * [CALCITE-6636] + * Support CNF condition of Arrow adapter. + * + *

Tests triple OR: {@code A OR B OR C}. */ + @Test void testArrowProjectFieldsWithTripleOrFilter() { + String sql = "select \"intField\", \"stringField\"\n" + + "from arrowdata\n" + + "where \"intField\" = 1 or \"intField\" = 2 or \"intField\" = 3"; + String plan = "PLAN=ArrowToEnumerableConverter\n" + + " ArrowProject(intField=[$0], stringField=[$1])\n" + + " ArrowFilter(condition=[OR(=($0, 1), =($0, 2), =($0, 3))])\n" + + " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2, 3]])\n\n"; + String result = "intField=1; stringField=1\n" + + "intField=2; stringField=2\n" + + "intField=3; stringField=3\n"; + + CalciteAssert.that() + .with(arrow) + .query(sql) + .returns(result) + .explainContains(plan); + } + @Test void testArrowProjectFieldsWithInFilter() { String sql = "select \"intField\", \"stringField\"\n" + "from arrowdata\n" + "where \"intField\" in (0, 1, 2)"; - String plan; - if (Bug.CALCITE_6294_FIXED) { - plan = "PLAN=ArrowToEnumerableConverter\n" - + " ArrowProject(intField=[$0], stringField=[$1])\n" - + " ArrowFilter(condition=[OR(=($0, 0), =($0, 1), =($0, 2))])\n" - + " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2, 3]])\n\n"; - } else { - plan = "PLAN=EnumerableCalc(expr#0..1=[{inputs}], expr#2=[Sarg[0, 1, 2]], " - + "expr#3=[SEARCH($t0, $t2)], proj#0..1=[{exprs}], $condition=[$t3])\n" - + " ArrowToEnumerableConverter\n" - + " ArrowProject(intField=[$0], stringField=[$1])\n" - + " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2, 3]])\n\n"; - } + String plan = "PLAN=ArrowToEnumerableConverter\n" + + " ArrowProject(intField=[$0], stringField=[$1])\n" + + " ArrowFilter(condition=[OR(=($0, 0), =($0, 1), =($0, 2))])\n" + + " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2, 3]])\n\n"; String result = "intField=0; stringField=0\n" + "intField=1; stringField=1\n" + "intField=2; stringField=2\n"; @@ -387,7 +437,7 @@ static void initializeArrowState(@TempDir Path sharedTempDir) + "where \"intField\" between 1 and 3"; String plan = "PLAN=ArrowToEnumerableConverter\n" + " ArrowProject(intField=[$0], stringField=[$1])\n" - + " ArrowFilter(condition=[SEARCH($0, Sarg[[1..3]])])\n" + + " ArrowFilter(condition=[AND(>=($0, 1), <=($0, 3))])\n" + " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2, 3]])\n\n"; String result = "intField=1; stringField=1\n" + "intField=2; stringField=2\n" @@ -530,14 +580,13 @@ static void initializeArrowState(@TempDir Path sharedTempDir) .explainContains(plan); } - @Disabled("literal with space is not supported") @Test void testLiteralWithSpace() { String sql = "select \"intField\", \"stringField\" as \"my Field\"\n" + "from arrowdata\n" + "where \"stringField\" = 'literal with space'"; String plan = "PLAN=ArrowToEnumerableConverter\n" - + " ArrowProject(intField=[$0], my Field=[$1])\n" - + " ArrowFilter(condition=[=($1, '2')])\n" + + " ArrowProject(intField=[$0], stringField=[$1])\n" + + " ArrowFilter(condition=[=($1, 'literal with space')])\n" + " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2, 3]])\n\n"; String result = ""; @@ -565,6 +614,23 @@ static void initializeArrowState(@TempDir Path sharedTempDir) .explainContains(plan); } + @Test void testLiteralWithEmptyString() { + String sql = "select \"intField\", \"stringField\"\n" + + "from arrowdata\n" + + "where \"stringField\" = ''"; + String plan = "PLAN=ArrowToEnumerableConverter\n" + + " ArrowProject(intField=[$0], stringField=[$1])\n" + + " ArrowFilter(condition=[=($1, '')])\n" + + " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2, 3]])\n\n"; + String result = ""; + + CalciteAssert.that() + .with(arrow) + .query(sql) + .returns(result) + .explainContains(plan); + } + @Test void testTinyIntProject() { String sql = "select DEPTNO from DEPT"; String plan = "PLAN=ArrowToEnumerableConverter\n" @@ -962,6 +1028,34 @@ static void initializeArrowState(@TempDir Path sharedTempDir) .explainContains(plan); } + /** When a filter condition exceeds the CNF node limit, the Arrow adapter + * falls back to the Enumerable convention (EnumerableCalc) instead of + * using ArrowFilter. The query should still return correct results. */ + @Test void testCnfExceedsLimitFallsBackToEnumerable() { + StringBuilder sb = new StringBuilder(); + sb.append("select \"intField\", \"stringField\" from arrowdata\nwhere "); + for (int i = 0; i < 45; i++) { + if (i > 0) { + sb.append(" or "); + } + sb.append("(\"intField\" = ").append(i) + .append(" and \"stringField\" = '").append(i).append("')"); + } + String sql = sb.toString(); + + String planPrefix = "PLAN=EnumerableCalc("; + String arrowInputPlan = "ArrowToEnumerableConverter" + + "\n ArrowProject(intField=[$0], stringField=[$1])" + + "\n ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2, 3]])"; + + CalciteAssert.that() + .with(arrow) + .query(sql) + .returnsCount(45) + .explainContains(planPrefix) + .explainContains(arrowInputPlan); + } + /** Test case for * [CALCITE-6684] * Arrow adapter should supports filter conditions of Decimal type. */ diff --git a/core/src/main/java/org/apache/calcite/util/Bug.java b/core/src/main/java/org/apache/calcite/util/Bug.java index 7eb756d5d24a..8aaebeb4af06 100644 --- a/core/src/main/java/org/apache/calcite/util/Bug.java +++ b/core/src/main/java/org/apache/calcite/util/Bug.java @@ -204,16 +204,6 @@ public abstract class Bug { * is fixed. */ public static final boolean CALCITE_6391_FIXED = false; - /** Whether - * - * [CALCITE-6293] Support OR condition in Arrow adapter is fixed. */ - public static final boolean CALCITE_6293_FIXED = false; - - /** Whether - * - * [CALCITE-6294] Support IN filter in Arrow adapter is fixed. */ - public static final boolean CALCITE_6294_FIXED = false; - /** Whether * [CALCITE-6328] * The BigQuery functions SAFE_* do not match the BigQuery specification