diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java index c8eafa2cf14188..5d38058dfc8a77 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java @@ -25,6 +25,8 @@ import org.apache.doris.nereids.trees.expressions.ArrayItemReference.ArrayItemSlot; import org.apache.doris.nereids.trees.expressions.Cast; import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.IsNull; +import org.apache.doris.nereids.trees.expressions.Not; import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.expressions.functions.scalar.ArrayCount; import org.apache.doris.nereids.trees.expressions.functions.scalar.ArrayExists; @@ -118,7 +120,14 @@ public Void visitSlotReference(SlotReference slotReference, CollectorContext con if (slotReference.hasSubColPath()) { path.addAll(slotReference.getSubPath()); } - path.addAll(context.accessPathBuilder.getPathList()); + // Strip NULL suffix for variant sub-column access — null-flag-only optimization + // does not apply to variant sub-column data layout. + List builderPath = context.accessPathBuilder.getPathList(); + if (builderPath.size() > 1 + && AccessPathInfo.ACCESS_NULL.equals(builderPath.get(builderPath.size() - 1))) { + builderPath = new ArrayList<>(builderPath.subList(0, builderPath.size() - 1)); + } + path.addAll(builderPath); int slotId = slotReference.getExprId().asInt(); slotToAccessPaths.put(slotId, new CollectAccessPathResult( path, context.bottomFilter, TAccessPathType.DATA)); @@ -133,8 +142,8 @@ public Void visitSlotReference(SlotReference slotReference, CollectorContext con if (dataType.isStringLikeType()) { int slotId = slotReference.getExprId().asInt(); if (!context.accessPathBuilder.isEmpty()) { - // Accessed via an offset-only function (e.g. length()). - // Builder already has "offset" at the tail; add the column name as prefix. + // Accessed via an offset-only function (e.g. length()) or null-check (IS NULL). + // Builder already has "OFFSET"/"NULL" at the tail; add the column name as prefix. context.accessPathBuilder.addPrefix(slotReference.getName()); ImmutableList path = ImmutableList.copyOf(context.accessPathBuilder.accessPath); slotToAccessPaths.put(slotId, @@ -146,6 +155,30 @@ public Void visitSlotReference(SlotReference slotReference, CollectorContext con slotToAccessPaths.put(slotId, new CollectAccessPathResult(path, context.bottomFilter, TAccessPathType.DATA)); } + return null; + } + // For any other nullable column type (e.g. INT, BIGINT) accessed via IS NULL / IS NOT NULL: + // record the [col_name, NULL] path so NestedColumnPruning can emit null-only access paths. + // Skip NestedColumnPrunable types (already handled above) and string types (handled above). + if (!(dataType instanceof NestedColumnPrunable) && !dataType.isStringLikeType() + && !context.accessPathBuilder.isEmpty() && slotReference.nullable()) { + context.accessPathBuilder.addPrefix(slotReference.getName()); + ImmutableList path = ImmutableList.copyOf(context.accessPathBuilder.accessPath); + int slotId = slotReference.getExprId().asInt(); + slotToAccessPaths.put(slotId, + new CollectAccessPathResult(path, context.bottomFilter, TAccessPathType.DATA)); + } + // For any other nullable column type accessed directly (not via IS NULL / length / etc.): + // record a [col_name] full-access path so that when the column is also used via IS NULL, + // stripNullSuffixPaths correctly suppresses the null-only optimization. + if (!(dataType instanceof NestedColumnPrunable) && !dataType.isStringLikeType() + && !(dataType instanceof VariantType) + && context.accessPathBuilder.isEmpty() && slotReference.nullable()) { + int slotId = slotReference.getExprId().asInt(); + slotToAccessPaths.put(slotId, + new CollectAccessPathResult( + ImmutableList.of(slotReference.getName()), + context.bottomFilter, TAccessPathType.DATA)); } return null; } @@ -491,14 +524,35 @@ public Void visitArraySortBy(ArraySortBy arraySortBy, CollectorContext context) return visit(arraySortBy, context); } - // @Override - // public Void visitIsNull(IsNull isNull, CollectorContext context) { - // if (context.accessPathBuilder.isEmpty()) { - // context.setType(TAccessPathType.META); - // return continueCollectAccessPath(isNull.child(), context); - // } - // return visit(isNull, context); - // } + @Override + public Void visitIsNull(IsNull isNull, CollectorContext context) { + Expression arg = isNull.child(); + // Skip variant sub-column paths (v['k'] IS NULL): the sub-column path is already baked + // into the SlotReference, so null-only access doesn't apply the same way. + if (arg instanceof SlotReference && ((SlotReference) arg).hasSubColPath()) { + return visit(isNull, context); + } + // Optimize IS NULL on nullable expressions: create a context with NULL suffix to indicate + // only the null flag is needed. Works for top-level columns (col IS NULL → [col, NULL]) + // and nested access (struct_element(s, 'city') IS NULL → [s, city, NULL]). + // For unrecognized expressions, the default visitor resets context, safely discarding NULL. + if (arg.nullable() && context.accessPathBuilder.isEmpty()) { + CollectorContext nullContext = + new CollectorContext(context.statementContext, context.bottomFilter); + nullContext.accessPathBuilder.addSuffix(AccessPathInfo.ACCESS_NULL); + return continueCollectAccessPath(arg, nullContext); + } + return visit(isNull, context); + } + + @Override + public Void visitNot(Not not, CollectorContext context) { + // NOT(IS NULL) == IS NOT NULL: same null-only access pattern + if (not.child() instanceof IsNull) { + return not.child().accept(this, context); + } + return visit(not, context); + } private Void collectArrayPathInLambda(Lambda lambda, CollectorContext context) { List arguments = lambda.getArguments(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathInfo.java index 864d4d69f8e661..f033d967785ec4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathInfo.java @@ -30,6 +30,9 @@ public class AccessPathInfo { // Suffix appended to a string-column path to indicate that only the offset array // (not the char data) is needed — agreed with BE as the special path component name. public static final String ACCESS_STRING_OFFSET = "OFFSET"; + // Suffix appended to a column path to indicate that only the null flag + // (not the actual data) is needed — used when the column is only accessed via IS NULL / IS NOT NULL. + public static final String ACCESS_NULL = "NULL"; private DataType prunedType; // allAccessPaths is used to record all access path include predicate access path and non-predicate access path, diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathPlanCollector.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathPlanCollector.java index c738790dc7d0bd..e109523e813221 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathPlanCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathPlanCollector.java @@ -72,7 +72,8 @@ public Map> collect(Plan root, StatementCont private boolean shouldCollectAccessPath(Slot slot) { return slot.getDataType() instanceof NestedColumnPrunable || slot.getDataType().isVariantType() - || slot.getDataType().isStringLikeType(); + || slot.getDataType().isStringLikeType() + || slot.nullable(); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java index 97370c261abf41..08206166cb93a5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java @@ -23,6 +23,8 @@ import org.apache.doris.nereids.jobs.JobContext; import org.apache.doris.nereids.rules.rewrite.AccessPathExpressionCollector.CollectAccessPathResult; import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.IsNull; +import org.apache.doris.nereids.trees.expressions.Not; import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.expressions.functions.scalar.Cardinality; @@ -51,6 +53,7 @@ import org.apache.logging.log4j.Logger; import java.util.ArrayList; +import java.util.Collection; import java.util.Comparator; import java.util.LinkedHashMap; import java.util.List; @@ -58,6 +61,7 @@ import java.util.Map.Entry; import java.util.Optional; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.stream.Collectors; /** *
  • 1. prune the data type of struct/map @@ -84,7 +88,8 @@ public Plan rewriteRoot(Plan plan, JobContext jobContext) { if (!sessionVariable.enablePruneNestedColumns || (!statementContext.hasNestedColumns() && !containsVariant(plan) - && !(containsStringLength(plan)))) { + && !containsStringLength(plan) + && !containsNullCheck(plan))) { return plan; } @@ -165,6 +170,40 @@ private static boolean containsVariant(Plan plan) { return hasVariant.get(); } + /** Returns true when the plan tree contains IS NULL or IS NOT NULL on a nullable slot. */ + private static boolean containsNullCheck(Plan plan) { + AtomicBoolean found = new AtomicBoolean(false); + plan.foreachUp(node -> { + if (found.get()) { + return; + } + Plan current = (Plan) node; + for (Expression expression : current.getExpressions()) { + if (expressionContainsNullCheck(expression)) { + found.set(true); + return; + } + } + }); + return found.get(); + } + + private static boolean expressionContainsNullCheck(Expression expr) { + if (expr instanceof IsNull && expr.child(0).nullable()) { + return true; + } + if (expr instanceof Not && expr.child(0) instanceof IsNull + && expr.child(0).child(0).nullable()) { + return true; + } + for (Expression child : expr.children()) { + if (expressionContainsNullCheck(child)) { + return true; + } + } + return false; + } + private static Map pruneDataType( Map> slotToAccessPaths) { Map result = new LinkedHashMap<>(); @@ -232,6 +271,12 @@ private static Map pruneDataType( List allPaths = buildColumnAccessPaths(slot, allAccessPaths); result.put(slot.getExprId().asInt(), new AccessPathInfo(slot.getDataType(), allPaths, new ArrayList<>())); + } else if (accessTree.hasNullCheckOnlyAccess()) { + // Null-check-only access (e.g. str_col IS NULL): type stays varchar, + // but we send [col, NULL] access path so BE only reads the null flag. + List allPaths = buildColumnAccessPaths(slot, allAccessPaths); + result.put(slot.getExprId().asInt(), + new AccessPathInfo(slot.getDataType(), allPaths, new ArrayList<>())); } // direct access (accessAll=true) or other: skip — no type change, no access paths needed. continue; @@ -247,6 +292,15 @@ private static Map pruneDataType( continue; } + // Null-check-only access (e.g. col IS NULL / col IS NOT NULL): type stays unchanged, + // but we must send the [col, NULL] access path to BE so it only reads the null flag. + if (accessTree.hasNullCheckOnlyAccess()) { + List allPaths = buildColumnAccessPaths(slot, allAccessPaths); + result.put(slot.getExprId().asInt(), + new AccessPathInfo(slot.getDataType(), allPaths, new ArrayList<>())); + continue; + } + if (slot.getDataType().isMapType() && accessTree.hasMapValueOffsetOnlyAccess()) { // length(map_col['key']): keys read in full (element lookup) + values offset-only. // Emit [col, KEYS] and [col, VALUES, OFFSET] directly instead of the collected @@ -293,6 +347,10 @@ private static Map pruneDataType( }); } } + + // Strip NULL-suffix paths when a non-NULL path also exists for the same slot. + // E.g. `SELECT col FROM t WHERE col IS NULL` — full data is needed, NULL path is redundant. + stripNullSuffixPaths(slot, allAccessPaths); List allPaths = buildColumnAccessPaths(slot, allAccessPaths); result.put(slot.getExprId().asInt(), new AccessPathInfo(prunedDataType, allPaths, new ArrayList<>())); @@ -330,6 +388,28 @@ private static Map pruneDataType( return result; } + /** Strip NULL-suffix paths when a non-NULL path also exists for the same slot. */ + private static void stripNullSuffixPaths( + Slot slot, Multimap>> allAccessPaths) { + int slotId = slot.getExprId().asInt(); + Collection>> slotPaths = allAccessPaths.get(slotId); + boolean hasNonNullPath = slotPaths.stream().anyMatch(p -> { + List path = p.second; + return path.isEmpty() + || !AccessPathInfo.ACCESS_NULL.equals(path.get(path.size() - 1)); + }); + if (hasNonNullPath) { + List>> toRemove = slotPaths.stream() + .filter(p -> !p.second.isEmpty() + && AccessPathInfo.ACCESS_NULL.equals( + p.second.get(p.second.size() - 1))) + .collect(Collectors.toList()); + for (Pair> r : toRemove) { + allAccessPaths.remove(slotId, r); + } + } + } + private static List buildColumnAccessPaths( Slot slot, Multimap>> accessPaths) { List paths = new ArrayList<>(); @@ -390,6 +470,10 @@ public static class DataTypeAccessTree { // When this flag is set and accessAll is NOT set, pruneDataType() returns BigIntType // to signal that the BE only needs to read the offset array, not the chars data. private boolean isStringOffsetOnly; + // True when this column node is accessed ONLY via IS NULL / IS NOT NULL. + // When this flag is set and accessAll is NOT set, the BE only needs to read the null flag, + // not the actual column data. + private boolean isNullCheckOnly; // for the future, only access the meta of the column, // e.g. `is not null` can only access the column's offset, not need to read the data private TAccessPathType pathType; @@ -497,6 +581,17 @@ public boolean hasStringOffsetOnlyAccess() { return type.isStringLikeType() && isStringOffsetOnly && !accessAll; } + /** True when the column is accessed ONLY via IS NULL / IS NOT NULL, + * meaning the BE only needs to read the null flag, not the actual data. */ + public boolean hasNullCheckOnlyAccess() { + if (isRoot) { + DataTypeAccessTree child = children.values().iterator().next(); + return child.isNullCheckOnly && !child.accessAll + && !child.isStringOffsetOnly && !child.accessPartialChild; + } + return isNullCheckOnly && !accessAll && !isStringOffsetOnly && !accessPartialChild; + } + /** pruneCastType */ public DataType pruneCastType(DataTypeAccessTree origin, DataTypeAccessTree cast) { if (type instanceof StructType) { @@ -583,14 +678,22 @@ public void setAccessByPath(List path, int accessIndex, TAccessPathType if (accessIndex >= path.size()) { accessAll = true; return; - } else { - accessPartialChild = true; } if (pathType == TAccessPathType.DATA) { this.pathType = TAccessPathType.DATA; } + // NULL path component: the column is accessed only via IS NULL / IS NOT NULL. + // Mark null-check-only and return without setting accessAll or accessPartialChild, + // so that parent nodes can distinguish "null-only leaf" from "has real sub-access". + if (path.get(accessIndex).equals(AccessPathInfo.ACCESS_NULL)) { + isNullCheckOnly = true; + return; + } + + accessPartialChild = true; + if (this.type.isStructType()) { String fieldName = path.get(accessIndex).toLowerCase(); DataTypeAccessTree child = children.get(fieldName); @@ -690,9 +793,12 @@ public Optional pruneDataType() { } else if (accessAll) { return Optional.of(type); } else if (isStringOffsetOnly) { - // Only the offset array is accessed (e.g. length(str_col)). - // The slot type stays unchanged (varchar); the access path tells BE to skip char data. - return Optional.empty(); + return Optional.of(type); + } else if (isNullCheckOnly && !accessPartialChild) { + // Only the null flag is accessed (e.g. col IS NULL / struct_element(s,'f') IS NULL). + // Return the node's type so that parent nodes include this child in their pruned type, + // while the access path (ending in NULL) tells BE to skip actual data reading. + return Optional.of(type); } else if (!accessPartialChild) { return Optional.empty(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SlotTypeReplacer.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SlotTypeReplacer.java index 068afd626ff6b6..d71d66ccb1f9e0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SlotTypeReplacer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SlotTypeReplacer.java @@ -725,7 +725,8 @@ private void tryRecordReplaceSlots(Plan plan, Object checkObj, Set shou int slotId = slot.getExprId().asInt(); if ((slot.getDataType() instanceof NestedColumnPrunable || slot.getDataType().isVariantType() - || slot.getDataType().isStringLikeType()) + || slot.getDataType().isStringLikeType() + || slot.nullable()) && replacedDataTypes.containsKey(slotId)) { shouldReplaceSlots.add(slotId); shouldPrune = true; diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java index ae4fe67a802983..ad028844341fd7 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java @@ -436,31 +436,35 @@ public void testProject() throws Exception { public void testFilter() throws Throwable { assertColumn("select 100 from tbl where s is not null", "struct>>>", - ImmutableList.of(path("s")), - ImmutableList.of(path("s")) + ImmutableList.of(path("s", "NULL")), + ImmutableList.of(path("s", "NULL")) ); + // After optimizer simplification, if(id=1, null, s) IS NOT NULL becomes s IS NOT NULL, + // which triggers null-only access. Combined with struct_element(s, 'city'), the struct + // is pruned to just the city field, and NULL path is stripped from allPaths. + // predicateAccessPaths retains [s, NULL] since it describes the predicate's specific access. assertColumn("select 100 from tbl where if(id = 1, null, s) is not null or struct_element(s, 'city') = 'beijing'", - "struct>>>", - ImmutableList.of(path("s")), - ImmutableList.of(path("s")) + "struct", + ImmutableList.of(path("s", "city")), + ImmutableList.of(path("s", "NULL"), path("s", "city")) ); assertColumn("select 100 from tbl where struct_element(s, 'city') is not null", "struct", - ImmutableList.of(path("s", "city")), - ImmutableList.of(path("s", "city")) + ImmutableList.of(path("s", "city", "NULL")), + ImmutableList.of(path("s", "city", "NULL")) ); assertColumn("select 100 from tbl where struct_element(s, 'data') is not null", "struct>>>", - ImmutableList.of(path("s", "data")), - ImmutableList.of(path("s", "data")) + ImmutableList.of(path("s", "data", "NULL")), + ImmutableList.of(path("s", "data", "NULL")) ); assertColumn("select 100 from tbl where struct_element(s, 'data')[1] is not null", "struct>>>", - ImmutableList.of(path("s", "data", "*")), - ImmutableList.of(path("s", "data", "*")) + ImmutableList.of(path("s", "data", "*", "NULL")), + ImmutableList.of(path("s", "data", "*", "NULL")) ); assertColumn("select 100 from tbl where map_keys(struct_element(s, 'data')[1]) is not null", "struct>>>", @@ -469,28 +473,28 @@ public void testFilter() throws Throwable { ); assertColumn("select 100 from tbl where map_values(struct_element(s, 'data')[1]) is not null", "struct>>>", - ImmutableList.of(path("s", "data", "*", "VALUES")), - ImmutableList.of(path("s", "data", "*", "VALUES")) + ImmutableList.of(path("s", "data", "*", "VALUES", "NULL")), + ImmutableList.of(path("s", "data", "*", "VALUES", "NULL")) ); assertColumn("select 100 from tbl where struct_element(map_values(struct_element(s, 'data')[1])[1], 'a') is not null", "struct>>>", - ImmutableList.of(path("s", "data", "*", "VALUES", "a")), - ImmutableList.of(path("s", "data", "*", "VALUES", "a")) + ImmutableList.of(path("s", "data", "*", "VALUES", "a", "NULL")), + ImmutableList.of(path("s", "data", "*", "VALUES", "a", "NULL")) ); assertColumn("select 100 from tbl where struct_element(s, 'data')[1][1] is not null", "struct>>>", - ImmutableList.of(path("s", "data", "*", "*")), - ImmutableList.of(path("s", "data", "*", "*")) + ImmutableList.of(path("s", "data", "*", "*", "NULL")), + ImmutableList.of(path("s", "data", "*", "*", "NULL")) ); assertColumn("select 100 from tbl where struct_element(struct_element(s, 'data')[1][1], 'a') is not null", "struct>>>", - ImmutableList.of(path("s", "data", "*", "*", "a")), - ImmutableList.of(path("s", "data", "*", "*", "a")) + ImmutableList.of(path("s", "data", "*", "*", "a", "NULL")), + ImmutableList.of(path("s", "data", "*", "*", "a", "NULL")) ); assertColumn("select 100 from tbl where struct_element(struct_element(s, 'data')[1][1], 'b') is not null", "struct>>>", - ImmutableList.of(path("s", "data", "*", "*", "b")), - ImmutableList.of(path("s", "data", "*", "*", "b")) + ImmutableList.of(path("s", "data", "*", "*", "b", "NULL")), + ImmutableList.of(path("s", "data", "*", "*", "b", "NULL")) ); } @@ -499,19 +503,19 @@ public void testProjectFilter() throws Throwable { assertColumn("select s from tbl where struct_element(s, 'city') is not null", "struct>>>", ImmutableList.of(path("s")), - ImmutableList.of(path("s", "city")) + ImmutableList.of(path("s", "city", "NULL")) ); assertColumn("select struct_element(s, 'data') from tbl where struct_element(s, 'city') is not null", "struct>>>", - ImmutableList.of(path("s", "data"), path("s", "city")), - ImmutableList.of(path("s", "city")) + ImmutableList.of(path("s", "data")), + ImmutableList.of(path("s", "city", "NULL")) ); assertColumn("select struct_element(s, 'data') from tbl where struct_element(s, 'city') is not null and struct_element(s, 'data') is not null", "struct>>>", - ImmutableList.of(path("s", "data"), path("s", "city")), - ImmutableList.of(path("s", "data"), path("s", "city")) + ImmutableList.of(path("s", "data")), + ImmutableList.of(path("s", "city", "NULL"), path("s", "data", "NULL")) ); } @@ -1185,6 +1189,36 @@ private void assertColumns(String sql, // @Test // public void testStringLengthPruning() { + + @Test + public void testStructIsNullPruning() throws Exception { + // struct column IS NULL → null-only access, emit [s, NULL] path, type stays struct + assertColumn("select 1 from tbl where s is null", + "struct>>>", + ImmutableList.of(path("s", "NULL")), + ImmutableList.of(path("s", "NULL"))); + } + + @Test + public void testStructIsNotNullPruning() throws Exception { + // struct column IS NOT NULL → same null-only access pattern + assertColumn("select 1 from tbl where s is not null", + "struct>>>", + ImmutableList.of(path("s", "NULL")), + ImmutableList.of(path("s", "NULL"))); + } + + @Test + public void testStructIsNullMixedAccess() throws Exception { + // struct column IS NULL + data access → NULL path stripped from allPaths, predPaths keeps [s, NULL] + assertColumn("select struct_element(s, 'city') from tbl where s is null", + "struct", + ImmutableList.of(path("s", "city")), + ImmutableList.of(path("s", "NULL"))); + } + + // @Test + // public void testStringLengthPruningOld() { // // ── Case 1: length(str_col) only ─ offset-only optimization applied ────────── // assertStringColumn( // "select length(str_col) from str_tbl", diff --git a/regression-test/suites/nereids_rules_p0/column_pruning/null_column_pruning.groovy b/regression-test/suites/nereids_rules_p0/column_pruning/null_column_pruning.groovy new file mode 100644 index 00000000000000..5378bf3dd41e79 --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/column_pruning/null_column_pruning.groovy @@ -0,0 +1,187 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Regression tests for the IS NULL / IS NOT NULL column pruning optimization. +// +// When IS NULL (or IS NOT NULL) is the *only* use of a nullable column, the FE +// should emit a DATA access path with a "NULL" component so that the BE can +// satisfy the query by reading only the null flag instead of the full column data. +// The EXPLAIN plan should show: +// nested columns: : all access paths: [.NULL] +// +// When the same column is also accessed for data (e.g., projected or used in +// struct_element), the NULL-only path must be stripped from allAccessPaths but +// preserved in predicateAccessPaths. + +suite("null_column_pruning") { + sql """ DROP TABLE IF EXISTS ncp_tbl """ + sql """ + CREATE TABLE ncp_tbl ( + id INT, + str_col STRING NULL, + struct_col STRUCT NULL, + arr_col ARRAY NULL, + map_col MAP NULL, + int_col INT NULL + ) ENGINE = OLAP + DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES ("replication_allocation" = "tag.location.default: 1") + """ + + sql """ + INSERT INTO ncp_tbl VALUES + (1, 'hello', named_struct('city', null, 'zip', 10001), [1, 2, 3], {'a': 1, 'b': 2 }, 1) + """ + // ─── Struct IS NULL only ──────────────────────────────────────────────────── + // Only null check on struct_col → emit [struct_col, NULL] access path, + // type stays full struct (no pruning needed). + explain { + sql "select 1 from ncp_tbl where struct_col is null" + contains "nested columns" + contains "NULL" + } + + // ─── String IS NULL only ──────────────────────────────────────────────────── + // Only null check on str_col → emit [str_col, NULL] access path, + // type stays string (no pruning needed). + explain { + sql "select 1 from ncp_tbl where str_col is null" + contains "nested columns" + contains "NULL" + } + + // ─── String IS NOT NULL only ──────────────────────────────────────────────── + explain { + sql "select 1 from ncp_tbl where str_col is not null" + contains "nested columns" + contains "NULL" + } + + // ─── Struct IS NOT NULL only ──────────────────────────────────────────────── + // IS NOT NULL is the same optimization (only null flag needed). + explain { + sql "select 1 from ncp_tbl where struct_col is not null" + contains "nested columns" + contains "NULL" + } + + // ─── Struct IS NULL in aggregate ──────────────────────────────────────────── + explain { + sql "select count(*) from ncp_tbl where struct_col is null" + contains "nested columns" + contains "NULL" + } + + // ─── Array IS NULL only ───────────────────────────────────────────────────── + explain { + sql "select 1 from ncp_tbl where arr_col is null" + contains "nested columns" + contains "NULL" + } + + // ─── Map IS NULL only ─────────────────────────────────────────────────────── + explain { + sql "select 1 from ncp_tbl where map_col is null" + contains "nested columns" + contains "NULL" + } + + // ─── Int IS NULL only ─────────────────────────────────────────────────────── + // Nullable primitive type (INT) accessed only via IS NULL → emit [int_col, NULL] + // access path so BE only reads the null flag. + explain { + sql "select 1 from ncp_tbl where int_col is null" + contains "nested columns" + contains "NULL" + } + + // ─── Int IS NOT NULL only ─────────────────────────────────────────────────── + explain { + sql "select 1 from ncp_tbl where int_col is not null" + contains "nested columns" + contains "NULL" + } + + // ─── Mixed: int IS NULL + projected ──────────────────────────────────────── + // int_col IS NULL in WHERE + int_col in SELECT → data is also needed. + // allAccessPaths should be [int_col] (whole column), not null-only. + explain { + sql "select int_col from ncp_tbl where int_col is null" + contains "nested columns" + contains "all access paths: [int_col]" + contains "predicate access paths: [int_col.NULL]" + } + + // ─── Mixed: struct IS NULL + partial field access ─────────────────────────── + // struct_col IS NULL in WHERE + struct_element in SELECT → data is also needed. + // allAccessPaths should have field path (NULL stripped), predicateAccessPaths keeps NULL. + explain { + sql "select struct_element(struct_col, 'city') from ncp_tbl where struct_col is null" + contains "nested columns" + contains "predicate access paths" + contains "NULL" + } + + // ─── Non-optimizable: struct IS NULL + full struct projected ──────────────── + // Full struct access means accessAll=true; null-only optimization is suppressed. + explain { + sql "select struct_col from ncp_tbl where struct_col is null" + // The predicate path [struct_col.NULL] should still appear + contains "predicate access paths" + contains "NULL" + } + + // ─── Nested struct field IS NULL ──────────────────────────────────────────── + // struct_element(struct_col, 'city') IS NULL should produce a null-flag-only + // predicate path [struct_col.city.NULL] while the projection reads city data. + explain { + sql "select struct_element(struct_col, 'city') from ncp_tbl where struct_element(struct_col, 'city') is null" + contains "predicate access paths: [struct_col.city.NULL]" + } + + // ─── Non-nullable column IS NULL → no nested column pruning ───────────────── + // A NOT NULL column has no null flags; IS NULL is always false and the optimizer + // must NOT generate a .NULL access path for it. + sql """ DROP TABLE IF EXISTS ncp_tbl_nn """ + sql """ + CREATE TABLE ncp_tbl_nn ( + id INT NOT NULL, + str_col STRING NULL + ) ENGINE = OLAP + DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES ("replication_allocation" = "tag.location.default: 1") + """ + sql """ INSERT INTO ncp_tbl_nn VALUES (1, 'hello') """ + + explain { + sql "select 1 from ncp_tbl_nn where id is null" + notContains "nested columns" + } + + // ─── length(str_col) = 0 OR str_col IS NULL ──────────────────────────────── + // Both length and IS NULL are null-flag / offset-only accesses on str_col. + // The plan should show both OFFSET and NULL access paths without reading + // the full string data. + explain { + sql "select 1 from ncp_tbl where length(str_col) = 0 or str_col is null" + contains "nested columns" + contains "str_col.NULL" + contains "str_col.OFFSET" + } +}