Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
import org.apache.doris.nereids.trees.expressions.ArrayItemReference.ArrayItemSlot;
import org.apache.doris.nereids.trees.expressions.Cast;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.IsNull;
import org.apache.doris.nereids.trees.expressions.Not;
import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.trees.expressions.functions.scalar.ArrayCount;
import org.apache.doris.nereids.trees.expressions.functions.scalar.ArrayExists;
Expand Down Expand Up @@ -118,7 +120,14 @@ public Void visitSlotReference(SlotReference slotReference, CollectorContext con
if (slotReference.hasSubColPath()) {
path.addAll(slotReference.getSubPath());
}
path.addAll(context.accessPathBuilder.getPathList());
// Strip NULL suffix for variant sub-column access — null-flag-only optimization
// does not apply to variant sub-column data layout.
List<String> builderPath = context.accessPathBuilder.getPathList();
if (builderPath.size() > 1
&& AccessPathInfo.ACCESS_NULL.equals(builderPath.get(builderPath.size() - 1))) {
builderPath = new ArrayList<>(builderPath.subList(0, builderPath.size() - 1));
}
path.addAll(builderPath);
int slotId = slotReference.getExprId().asInt();
slotToAccessPaths.put(slotId, new CollectAccessPathResult(
path, context.bottomFilter, TAccessPathType.DATA));
Expand All @@ -133,8 +142,8 @@ public Void visitSlotReference(SlotReference slotReference, CollectorContext con
if (dataType.isStringLikeType()) {
int slotId = slotReference.getExprId().asInt();
if (!context.accessPathBuilder.isEmpty()) {
// Accessed via an offset-only function (e.g. length()).
// Builder already has "offset" at the tail; add the column name as prefix.
// Accessed via an offset-only function (e.g. length()) or null-check (IS NULL).
// Builder already has "OFFSET"/"NULL" at the tail; add the column name as prefix.
context.accessPathBuilder.addPrefix(slotReference.getName());
ImmutableList<String> path = ImmutableList.copyOf(context.accessPathBuilder.accessPath);
slotToAccessPaths.put(slotId,
Expand All @@ -146,6 +155,30 @@ public Void visitSlotReference(SlotReference slotReference, CollectorContext con
slotToAccessPaths.put(slotId,
new CollectAccessPathResult(path, context.bottomFilter, TAccessPathType.DATA));
}
return null;
}
// For any other nullable column type (e.g. INT, BIGINT) accessed via IS NULL / IS NOT NULL:
// record the [col_name, NULL] path so NestedColumnPruning can emit null-only access paths.
// Skip NestedColumnPrunable types (already handled above) and string types (handled above).
if (!(dataType instanceof NestedColumnPrunable) && !dataType.isStringLikeType()
&& !context.accessPathBuilder.isEmpty() && slotReference.nullable()) {
context.accessPathBuilder.addPrefix(slotReference.getName());
ImmutableList<String> path = ImmutableList.copyOf(context.accessPathBuilder.accessPath);
int slotId = slotReference.getExprId().asInt();
slotToAccessPaths.put(slotId,
new CollectAccessPathResult(path, context.bottomFilter, TAccessPathType.DATA));
}
// For any other nullable column type accessed directly (not via IS NULL / length / etc.):
// record a [col_name] full-access path so that when the column is also used via IS NULL,
// stripNullSuffixPaths correctly suppresses the null-only optimization.
if (!(dataType instanceof NestedColumnPrunable) && !dataType.isStringLikeType()
&& !(dataType instanceof VariantType)
&& context.accessPathBuilder.isEmpty() && slotReference.nullable()) {
int slotId = slotReference.getExprId().asInt();
slotToAccessPaths.put(slotId,
new CollectAccessPathResult(
ImmutableList.of(slotReference.getName()),
context.bottomFilter, TAccessPathType.DATA));
}
return null;
}
Expand Down Expand Up @@ -491,14 +524,35 @@ public Void visitArraySortBy(ArraySortBy arraySortBy, CollectorContext context)
return visit(arraySortBy, context);
}

// @Override
// public Void visitIsNull(IsNull isNull, CollectorContext context) {
// if (context.accessPathBuilder.isEmpty()) {
// context.setType(TAccessPathType.META);
// return continueCollectAccessPath(isNull.child(), context);
// }
// return visit(isNull, context);
// }
@Override
public Void visitIsNull(IsNull isNull, CollectorContext context) {
Expression arg = isNull.child();
// Skip variant sub-column paths (v['k'] IS NULL): the sub-column path is already baked
// into the SlotReference, so null-only access doesn't apply the same way.
if (arg instanceof SlotReference && ((SlotReference) arg).hasSubColPath()) {
return visit(isNull, context);
}
// Optimize IS NULL on nullable expressions: create a context with NULL suffix to indicate
// only the null flag is needed. Works for top-level columns (col IS NULL → [col, NULL])
// and nested access (struct_element(s, 'city') IS NULL → [s, city, NULL]).
// For unrecognized expressions, the default visitor resets context, safely discarding NULL.
if (arg.nullable() && context.accessPathBuilder.isEmpty()) {
CollectorContext nullContext =
new CollectorContext(context.statementContext, context.bottomFilter);
nullContext.accessPathBuilder.addSuffix(AccessPathInfo.ACCESS_NULL);
return continueCollectAccessPath(arg, nullContext);
}
return visit(isNull, context);
}

@Override
public Void visitNot(Not not, CollectorContext context) {
// NOT(IS NULL) == IS NOT NULL: same null-only access pattern
if (not.child() instanceof IsNull) {
return not.child().accept(this, context);
}
return visit(not, context);
}

private Void collectArrayPathInLambda(Lambda lambda, CollectorContext context) {
List<Expression> arguments = lambda.getArguments();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ public class AccessPathInfo {
// Suffix appended to a string-column path to indicate that only the offset array
// (not the char data) is needed — agreed with BE as the special path component name.
public static final String ACCESS_STRING_OFFSET = "OFFSET";
// Suffix appended to a column path to indicate that only the null flag
// (not the actual data) is needed — used when the column is only accessed via IS NULL / IS NOT NULL.
public static final String ACCESS_NULL = "NULL";

private DataType prunedType;
// allAccessPaths is used to record all access path include predicate access path and non-predicate access path,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ public Map<Slot, List<CollectAccessPathResult>> collect(Plan root, StatementCont
private boolean shouldCollectAccessPath(Slot slot) {
return slot.getDataType() instanceof NestedColumnPrunable
|| slot.getDataType().isVariantType()
|| slot.getDataType().isStringLikeType();
|| slot.getDataType().isStringLikeType()
|| slot.nullable();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import org.apache.doris.nereids.jobs.JobContext;
import org.apache.doris.nereids.rules.rewrite.AccessPathExpressionCollector.CollectAccessPathResult;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.IsNull;
import org.apache.doris.nereids.trees.expressions.Not;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Cardinality;
Expand Down Expand Up @@ -51,13 +53,15 @@
import org.apache.logging.log4j.Logger;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.stream.Collectors;

/**
* <li> 1. prune the data type of struct/map
Expand All @@ -84,7 +88,8 @@ public Plan rewriteRoot(Plan plan, JobContext jobContext) {
if (!sessionVariable.enablePruneNestedColumns
|| (!statementContext.hasNestedColumns()
&& !containsVariant(plan)
&& !(containsStringLength(plan)))) {
&& !containsStringLength(plan)
&& !containsNullCheck(plan))) {
return plan;
}

Expand Down Expand Up @@ -165,6 +170,40 @@ private static boolean containsVariant(Plan plan) {
return hasVariant.get();
}

/** Returns true when the plan tree contains IS NULL or IS NOT NULL on a nullable slot. */
private static boolean containsNullCheck(Plan plan) {
AtomicBoolean found = new AtomicBoolean(false);
plan.foreachUp(node -> {
if (found.get()) {
return;
}
Plan current = (Plan) node;
for (Expression expression : current.getExpressions()) {
if (expressionContainsNullCheck(expression)) {
found.set(true);
return;
}
}
});
return found.get();
}

private static boolean expressionContainsNullCheck(Expression expr) {
if (expr instanceof IsNull && expr.child(0).nullable()) {
return true;
}
if (expr instanceof Not && expr.child(0) instanceof IsNull
&& expr.child(0).child(0).nullable()) {
return true;
}
for (Expression child : expr.children()) {
if (expressionContainsNullCheck(child)) {
return true;
}
}
return false;
}

private static Map<Integer, AccessPathInfo> pruneDataType(
Map<Slot, List<CollectAccessPathResult>> slotToAccessPaths) {
Map<Integer, AccessPathInfo> result = new LinkedHashMap<>();
Expand Down Expand Up @@ -232,6 +271,12 @@ private static Map<Integer, AccessPathInfo> pruneDataType(
List<TColumnAccessPath> allPaths = buildColumnAccessPaths(slot, allAccessPaths);
result.put(slot.getExprId().asInt(),
new AccessPathInfo(slot.getDataType(), allPaths, new ArrayList<>()));
} else if (accessTree.hasNullCheckOnlyAccess()) {
// Null-check-only access (e.g. str_col IS NULL): type stays varchar,
// but we send [col, NULL] access path so BE only reads the null flag.
List<TColumnAccessPath> allPaths = buildColumnAccessPaths(slot, allAccessPaths);
result.put(slot.getExprId().asInt(),
new AccessPathInfo(slot.getDataType(), allPaths, new ArrayList<>()));
}
// direct access (accessAll=true) or other: skip — no type change, no access paths needed.
continue;
Expand All @@ -247,6 +292,15 @@ private static Map<Integer, AccessPathInfo> pruneDataType(
continue;
}

// Null-check-only access (e.g. col IS NULL / col IS NOT NULL): type stays unchanged,
// but we must send the [col, NULL] access path to BE so it only reads the null flag.
if (accessTree.hasNullCheckOnlyAccess()) {
List<TColumnAccessPath> allPaths = buildColumnAccessPaths(slot, allAccessPaths);
result.put(slot.getExprId().asInt(),
new AccessPathInfo(slot.getDataType(), allPaths, new ArrayList<>()));
continue;
}

if (slot.getDataType().isMapType() && accessTree.hasMapValueOffsetOnlyAccess()) {
// length(map_col['key']): keys read in full (element lookup) + values offset-only.
// Emit [col, KEYS] and [col, VALUES, OFFSET] directly instead of the collected
Expand Down Expand Up @@ -293,6 +347,10 @@ private static Map<Integer, AccessPathInfo> pruneDataType(
});
}
}

// Strip NULL-suffix paths when a non-NULL path also exists for the same slot.
// E.g. `SELECT col FROM t WHERE col IS NULL` — full data is needed, NULL path is redundant.
stripNullSuffixPaths(slot, allAccessPaths);
List<TColumnAccessPath> allPaths = buildColumnAccessPaths(slot, allAccessPaths);
result.put(slot.getExprId().asInt(),
new AccessPathInfo(prunedDataType, allPaths, new ArrayList<>()));
Expand Down Expand Up @@ -330,6 +388,28 @@ private static Map<Integer, AccessPathInfo> pruneDataType(
return result;
}

/** Strip NULL-suffix paths when a non-NULL path also exists for the same slot. */
private static void stripNullSuffixPaths(
Slot slot, Multimap<Integer, Pair<TAccessPathType, List<String>>> allAccessPaths) {
int slotId = slot.getExprId().asInt();
Collection<Pair<TAccessPathType, List<String>>> slotPaths = allAccessPaths.get(slotId);
boolean hasNonNullPath = slotPaths.stream().anyMatch(p -> {
List<String> path = p.second;
return path.isEmpty()
|| !AccessPathInfo.ACCESS_NULL.equals(path.get(path.size() - 1));
});
if (hasNonNullPath) {
List<Pair<TAccessPathType, List<String>>> toRemove = slotPaths.stream()
.filter(p -> !p.second.isEmpty()
&& AccessPathInfo.ACCESS_NULL.equals(
p.second.get(p.second.size() - 1)))
.collect(Collectors.toList());
for (Pair<TAccessPathType, List<String>> r : toRemove) {
allAccessPaths.remove(slotId, r);
}
}
}

private static List<TColumnAccessPath> buildColumnAccessPaths(
Slot slot, Multimap<Integer, Pair<TAccessPathType, List<String>>> accessPaths) {
List<TColumnAccessPath> paths = new ArrayList<>();
Expand Down Expand Up @@ -390,6 +470,10 @@ public static class DataTypeAccessTree {
// When this flag is set and accessAll is NOT set, pruneDataType() returns BigIntType
// to signal that the BE only needs to read the offset array, not the chars data.
private boolean isStringOffsetOnly;
// True when this column node is accessed ONLY via IS NULL / IS NOT NULL.
// When this flag is set and accessAll is NOT set, the BE only needs to read the null flag,
// not the actual column data.
private boolean isNullCheckOnly;
// for the future, only access the meta of the column,
// e.g. `is not null` can only access the column's offset, not need to read the data
private TAccessPathType pathType;
Expand Down Expand Up @@ -497,6 +581,17 @@ public boolean hasStringOffsetOnlyAccess() {
return type.isStringLikeType() && isStringOffsetOnly && !accessAll;
}

/** True when the column is accessed ONLY via IS NULL / IS NOT NULL,
* meaning the BE only needs to read the null flag, not the actual data. */
public boolean hasNullCheckOnlyAccess() {
if (isRoot) {
DataTypeAccessTree child = children.values().iterator().next();
return child.isNullCheckOnly && !child.accessAll
&& !child.isStringOffsetOnly && !child.accessPartialChild;
}
return isNullCheckOnly && !accessAll && !isStringOffsetOnly && !accessPartialChild;
}

/** pruneCastType */
public DataType pruneCastType(DataTypeAccessTree origin, DataTypeAccessTree cast) {
if (type instanceof StructType) {
Expand Down Expand Up @@ -583,14 +678,22 @@ public void setAccessByPath(List<String> path, int accessIndex, TAccessPathType
if (accessIndex >= path.size()) {
accessAll = true;
return;
} else {
accessPartialChild = true;
}

if (pathType == TAccessPathType.DATA) {
this.pathType = TAccessPathType.DATA;
}

// NULL path component: the column is accessed only via IS NULL / IS NOT NULL.
// Mark null-check-only and return without setting accessAll or accessPartialChild,
// so that parent nodes can distinguish "null-only leaf" from "has real sub-access".
if (path.get(accessIndex).equals(AccessPathInfo.ACCESS_NULL)) {
isNullCheckOnly = true;
return;
}

accessPartialChild = true;

if (this.type.isStructType()) {
String fieldName = path.get(accessIndex).toLowerCase();
DataTypeAccessTree child = children.get(fieldName);
Expand Down Expand Up @@ -690,9 +793,12 @@ public Optional<DataType> pruneDataType() {
} else if (accessAll) {
return Optional.of(type);
} else if (isStringOffsetOnly) {
// Only the offset array is accessed (e.g. length(str_col)).
// The slot type stays unchanged (varchar); the access path tells BE to skip char data.
return Optional.empty();
return Optional.of(type);
} else if (isNullCheckOnly && !accessPartialChild) {
// Only the null flag is accessed (e.g. col IS NULL / struct_element(s,'f') IS NULL).
// Return the node's type so that parent nodes include this child in their pruned type,
// while the access path (ending in NULL) tells BE to skip actual data reading.
return Optional.of(type);
} else if (!accessPartialChild) {
return Optional.empty();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -725,7 +725,8 @@ private void tryRecordReplaceSlots(Plan plan, Object checkObj, Set<Integer> shou
int slotId = slot.getExprId().asInt();
if ((slot.getDataType() instanceof NestedColumnPrunable
|| slot.getDataType().isVariantType()
|| slot.getDataType().isStringLikeType())
|| slot.getDataType().isStringLikeType()
|| slot.nullable())
&& replacedDataTypes.containsKey(slotId)) {
shouldReplaceSlots.add(slotId);
shouldPrune = true;
Expand Down
Loading
Loading