diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/table/IntermediateRecord.java b/pinot-core/src/main/java/org/apache/pinot/core/data/table/IntermediateRecord.java
index 85288d49f4a6..8136293968fa 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/data/table/IntermediateRecord.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/data/table/IntermediateRecord.java
@@ -29,7 +29,7 @@ public class IntermediateRecord {
public final Record _record;
public final Comparable[] _values;
- IntermediateRecord(Key key, Record record, Comparable[] values) {
+ public IntermediateRecord(Key key, Record record, Comparable[] values) {
_key = key;
_record = record;
_values = values;
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/query/JsonExtractIndexUtils.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/query/JsonExtractIndexUtils.java
new file mode 100644
index 000000000000..245264c6eb5f
--- /dev/null
+++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/query/JsonExtractIndexUtils.java
@@ -0,0 +1,261 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.operator.query;
+
+import java.util.List;
+import java.util.Optional;
+import javax.annotation.Nullable;
+import org.apache.pinot.common.function.JsonPathCache;
+import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.common.request.context.FilterContext;
+import org.apache.pinot.common.request.context.RequestContextUtils;
+import org.apache.pinot.common.request.context.predicate.JsonMatchPredicate;
+import org.apache.pinot.common.request.context.predicate.Predicate;
+import org.apache.pinot.segment.spi.IndexSegment;
+import org.apache.pinot.segment.spi.datasource.DataSource;
+import org.apache.pinot.segment.spi.index.IndexService;
+import org.apache.pinot.segment.spi.index.IndexType;
+import org.apache.pinot.segment.spi.index.reader.JsonIndexReader;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.sql.parsers.CalciteSqlParser;
+
+
+/**
+ * Shared parsing and filter-pushdown helpers for index-aware operators that consume a scalar
+ * {@code jsonExtractIndex(column, path, type[, defaultValue])} expression and a JSON index on the column.
+ */
+public final class JsonExtractIndexUtils {
+ private static final String FUNCTION_NAME_EXTRACT_INDEX = "jsonExtractIndex";
+
+ private JsonExtractIndexUtils() {
+ }
+
+ /**
+ * Parsed view of a {@code jsonExtractIndex} call.
+ */
+ public static final class ParsedJsonExtractIndex {
+ public final String _columnName;
+ public final String _jsonPathString;
+ public final FieldSpec.DataType _dataType;
+ @Nullable
+ public final String _defaultValueLiteral;
+
+ public ParsedJsonExtractIndex(String columnName, String jsonPathString, FieldSpec.DataType dataType,
+ @Nullable String defaultValueLiteral) {
+ _columnName = columnName;
+ _jsonPathString = jsonPathString;
+ _dataType = dataType;
+ _defaultValueLiteral = defaultValueLiteral;
+ }
+ }
+
+ /**
+ * Parses the given expression as a 3/4-arg scalar {@code jsonExtractIndex} call.
+ * Returns {@code null} when the expression has a different shape (wrong arity, non-literal args, MV result type,
+ * unsupported scalar type, {@code [*]} wildcard, or malformed JSON path).
+ *
+ *
This is a pure shape check; it does not consult segment metadata.
+ * Pair with {@link #canUseJsonIndex} to verify that the column has a usable JSON index on the path.
+ */
+ @Nullable
+ public static ParsedJsonExtractIndex parseJsonExtractIndex(ExpressionContext expr) {
+ if (expr.getType() != ExpressionContext.Type.FUNCTION) {
+ return null;
+ }
+ String functionName = expr.getFunction().getFunctionName();
+ if (!FUNCTION_NAME_EXTRACT_INDEX.equalsIgnoreCase(functionName)) {
+ return null;
+ }
+ List args = expr.getFunction().getArguments();
+ if (args.size() != 3 && args.size() != 4) {
+ return null;
+ }
+ if (args.get(0).getType() != ExpressionContext.Type.IDENTIFIER) {
+ return null;
+ }
+ if (args.get(1).getType() != ExpressionContext.Type.LITERAL
+ || args.get(2).getType() != ExpressionContext.Type.LITERAL
+ || (args.size() == 4 && args.get(3).getType() != ExpressionContext.Type.LITERAL)) {
+ return null;
+ }
+
+ String columnName = args.get(0).getIdentifier();
+ String jsonPathString = args.get(1).getLiteral().getStringValue();
+ String resultsType = args.get(2).getLiteral().getStringValue().toUpperCase();
+ // Only single-value types are supported; MV (_ARRAY) would have incorrect flattened-to-real
+ // docId intersection since convertFlattenedDocIdsToDocIds is skipped for MV.
+ if (resultsType.endsWith("_ARRAY")) {
+ return null;
+ }
+ if (jsonPathString.contains("[*]")) {
+ return null;
+ }
+
+ FieldSpec.DataType dataType;
+ try {
+ dataType = FieldSpec.DataType.valueOf(resultsType);
+ } catch (IllegalArgumentException e) {
+ return null;
+ }
+ switch (dataType) {
+ case INT:
+ case LONG:
+ case FLOAT:
+ case DOUBLE:
+ case BIG_DECIMAL:
+ case STRING:
+ break;
+ default:
+ return null;
+ }
+
+ try {
+ JsonPathCache.INSTANCE.getOrCompute(jsonPathString);
+ } catch (Exception e) {
+ return null;
+ }
+
+ String defaultValueLiteral = null;
+ if (args.size() == 4) {
+ defaultValueLiteral = args.get(3).getLiteral().getStringValue();
+ try {
+ dataType.convert(defaultValueLiteral);
+ } catch (Exception e) {
+ return null;
+ }
+ }
+
+ return new ParsedJsonExtractIndex(columnName, jsonPathString, dataType, defaultValueLiteral);
+ }
+
+ /**
+ * Returns the JSON index reader for the column if present, falling back to a composite JSON index when available.
+ */
+ @Nullable
+ public static JsonIndexReader getJsonIndexReader(DataSource dataSource) {
+ JsonIndexReader reader = dataSource.getJsonIndex();
+ if (reader == null) {
+ Optional> compositeIndex =
+ IndexService.getInstance().getOptional("composite_json_index");
+ if (compositeIndex.isPresent()) {
+ reader = (JsonIndexReader) dataSource.getIndex(compositeIndex.get());
+ }
+ }
+ return reader;
+ }
+
+ /**
+ * Returns {@code true} when the expression is a parseable {@code jsonExtractIndex} call and the referenced column
+ * has a usable JSON index covering the path.
+ */
+ public static boolean canUseJsonIndex(IndexSegment indexSegment, ExpressionContext expr) {
+ ParsedJsonExtractIndex parsed = parseJsonExtractIndex(expr);
+ if (parsed == null) {
+ return false;
+ }
+ DataSource dataSource = indexSegment.getDataSourceNullable(parsed._columnName);
+ if (dataSource == null) {
+ return false;
+ }
+ JsonIndexReader reader = getJsonIndexReader(dataSource);
+ if (reader == null) {
+ return false;
+ }
+ return reader.isPathIndexed(parsed._jsonPathString);
+ }
+
+ /**
+ * Walks the filter and returns the inner JSON-match string for a single same-column-same-path {@code JSON_MATCH}
+ * predicate that can be pushed into the JSON-index lookup. Returns {@code null} when no such predicate exists,
+ * or when more than one candidate is found and disambiguation would be unsafe.
+ */
+ @Nullable
+ public static String extractSamePathJsonMatchFilter(ParsedJsonExtractIndex parsed, @Nullable FilterContext filter) {
+ if (filter == null) {
+ return null;
+ }
+ switch (filter.getType()) {
+ case PREDICATE:
+ return extractSamePathJsonMatchFilter(parsed, filter.getPredicate());
+ case AND:
+ String matchingFilter = null;
+ for (FilterContext child : filter.getChildren()) {
+ String childFilter = extractSamePathJsonMatchFilter(parsed, child);
+ if (childFilter == null) {
+ continue;
+ }
+ if (matchingFilter != null) {
+ return null;
+ }
+ matchingFilter = childFilter;
+ }
+ return matchingFilter;
+ default:
+ return null;
+ }
+ }
+
+ /**
+ * Returns {@code true} when the filter is exactly a single same-path JSON_MATCH predicate (no other clauses).
+ */
+ public static boolean isOnlySamePathJsonMatchFilter(ParsedJsonExtractIndex parsed, @Nullable FilterContext filter) {
+ if (filter == null || filter.getType() != FilterContext.Type.PREDICATE) {
+ return false;
+ }
+ return extractSamePathJsonMatchFilter(parsed, filter.getPredicate()) != null;
+ }
+
+ /**
+ * Returns {@code true} when the filter string can match documents that lack the JSON path entirely (i.e. an
+ * {@code IS_NULL} on the path). Callers use this to decide whether they must still account for missing-path
+ * documents after running an index-side filter.
+ */
+ public static boolean jsonMatchFilterCanMatchMissingPath(String filterJsonString) {
+ try {
+ FilterContext filter = RequestContextUtils.getFilter(CalciteSqlParser.compileToExpression(filterJsonString));
+ return filter.getType() == FilterContext.Type.PREDICATE
+ && filter.getPredicate().getType() == Predicate.Type.IS_NULL;
+ } catch (Exception e) {
+ return false;
+ }
+ }
+
+ @Nullable
+ private static String extractSamePathJsonMatchFilter(ParsedJsonExtractIndex parsed, Predicate predicate) {
+ if (!(predicate instanceof JsonMatchPredicate)) {
+ return null;
+ }
+ ExpressionContext lhs = predicate.getLhs();
+ if (lhs.getType() != ExpressionContext.Type.IDENTIFIER
+ || !parsed._columnName.equals(lhs.getIdentifier())) {
+ return null;
+ }
+ String filterJsonString = ((JsonMatchPredicate) predicate).getValue();
+ int start = filterJsonString.indexOf('"');
+ if (start < 0) {
+ return null;
+ }
+ int end = filterJsonString.indexOf('"', start + 1);
+ if (end < 0) {
+ return null;
+ }
+ String filterPath = filterJsonString.substring(start + 1, end);
+ return parsed._jsonPathString.equals(filterPath) ? filterJsonString : null;
+ }
+}
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/query/JsonIndexDistinctOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/query/JsonIndexDistinctOperator.java
index 0970cac305e3..d9c1c8d595ba 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/operator/query/JsonIndexDistinctOperator.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/query/JsonIndexDistinctOperator.java
@@ -23,16 +23,10 @@
import java.util.Collections;
import java.util.List;
import java.util.Map;
-import java.util.Optional;
import java.util.Set;
import javax.annotation.Nullable;
-import org.apache.pinot.common.function.JsonPathCache;
import org.apache.pinot.common.request.context.ExpressionContext;
-import org.apache.pinot.common.request.context.FilterContext;
import org.apache.pinot.common.request.context.OrderByExpressionContext;
-import org.apache.pinot.common.request.context.RequestContextUtils;
-import org.apache.pinot.common.request.context.predicate.JsonMatchPredicate;
-import org.apache.pinot.common.request.context.predicate.Predicate;
import org.apache.pinot.common.utils.DataSchema;
import org.apache.pinot.common.utils.DataSchema.ColumnDataType;
import org.apache.pinot.core.common.Operator;
@@ -41,6 +35,7 @@
import org.apache.pinot.core.operator.ExplainAttributeBuilder;
import org.apache.pinot.core.operator.blocks.results.DistinctResultsBlock;
import org.apache.pinot.core.operator.filter.BaseFilterOperator;
+import org.apache.pinot.core.operator.query.JsonExtractIndexUtils.ParsedJsonExtractIndex;
import org.apache.pinot.core.query.distinct.table.BigDecimalDistinctTable;
import org.apache.pinot.core.query.distinct.table.DistinctTable;
import org.apache.pinot.core.query.distinct.table.DoubleDistinctTable;
@@ -52,12 +47,9 @@
import org.apache.pinot.segment.spi.IndexSegment;
import org.apache.pinot.segment.spi.SegmentContext;
import org.apache.pinot.segment.spi.datasource.DataSource;
-import org.apache.pinot.segment.spi.index.IndexService;
-import org.apache.pinot.segment.spi.index.IndexType;
import org.apache.pinot.segment.spi.index.reader.JsonIndexReader;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.query.QueryThreadContext;
-import org.apache.pinot.sql.parsers.CalciteSqlParser;
import org.roaringbitmap.RoaringBitmap;
import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
@@ -72,7 +64,6 @@
*/
public class JsonIndexDistinctOperator extends BaseOperator {
private static final String EXPLAIN_NAME = "DISTINCT_JSON_INDEX";
- private static final String FUNCTION_NAME = "jsonExtractIndex";
private final IndexSegment _indexSegment;
private final SegmentContext _segmentContext;
@@ -98,21 +89,22 @@ protected DistinctResultsBlock getNextBlock() {
}
ExpressionContext expr = expressions.get(0);
- ParsedJsonExtractIndex parsed = parseJsonExtractIndex(expr);
+ ParsedJsonExtractIndex parsed = JsonExtractIndexUtils.parseJsonExtractIndex(expr);
if (parsed == null) {
throw new IllegalStateException("Expected 3/4-arg scalar jsonExtractIndex expression");
}
DataSource dataSource = _indexSegment.getDataSource(parsed._columnName, _queryContext.getSchema());
- JsonIndexReader jsonIndexReader = getJsonIndexReader(dataSource);
+ JsonIndexReader jsonIndexReader = JsonExtractIndexUtils.getJsonIndexReader(dataSource);
if (jsonIndexReader == null) {
throw new IllegalStateException("Column " + parsed._columnName + " has no JSON index");
}
- String pushedDownFilterJson = extractSamePathJsonMatchFilter(parsed, _queryContext.getFilter());
+ String pushedDownFilterJson = JsonExtractIndexUtils.extractSamePathJsonMatchFilter(parsed,
+ _queryContext.getFilter());
boolean filterFullyPushedDown = pushedDownFilterJson != null
- && isOnlySamePathJsonMatchFilter(parsed, _queryContext.getFilter())
- && !jsonMatchFilterCanMatchMissingPath(pushedDownFilterJson);
+ && JsonExtractIndexUtils.isOnlySamePathJsonMatchFilter(parsed, _queryContext.getFilter())
+ && !JsonExtractIndexUtils.jsonMatchFilterCanMatchMissingPath(pushedDownFilterJson);
// Fast path: when the filter is fully pushed down into the JSON index, we only need the distinct value strings.
// This avoids reading posting lists, building per-value bitmaps, and converting flattened doc IDs.
@@ -251,72 +243,6 @@ private void handleMissingDocs(DistinctTable distinctTable, ParsedJsonExtractInd
}
}
- @Nullable
- private static String extractSamePathJsonMatchFilter(ParsedJsonExtractIndex parsed, @Nullable FilterContext filter) {
- if (filter == null) {
- return null;
- }
- switch (filter.getType()) {
- case PREDICATE:
- return extractSamePathJsonMatchFilter(parsed, filter.getPredicate());
- case AND:
- String matchingFilter = null;
- for (FilterContext child : filter.getChildren()) {
- String childFilter = extractSamePathJsonMatchFilter(parsed, child);
- if (childFilter == null) {
- continue;
- }
- if (matchingFilter != null) {
- return null;
- }
- matchingFilter = childFilter;
- }
- return matchingFilter;
- default:
- return null;
- }
- }
-
- private static boolean isOnlySamePathJsonMatchFilter(ParsedJsonExtractIndex parsed, @Nullable FilterContext filter) {
- if (filter == null || filter.getType() != FilterContext.Type.PREDICATE) {
- return false;
- }
- return extractSamePathJsonMatchFilter(parsed, filter.getPredicate()) != null;
- }
-
- private static boolean jsonMatchFilterCanMatchMissingPath(String filterJsonString) {
- try {
- FilterContext filter = RequestContextUtils.getFilter(CalciteSqlParser.compileToExpression(filterJsonString));
- return filter.getType() == FilterContext.Type.PREDICATE
- && filter.getPredicate().getType() == Predicate.Type.IS_NULL;
- } catch (Exception e) {
- return false;
- }
- }
-
- @Nullable
- private static String extractSamePathJsonMatchFilter(ParsedJsonExtractIndex parsed, Predicate predicate) {
- if (!(predicate instanceof JsonMatchPredicate)) {
- return null;
- }
- ExpressionContext lhs = predicate.getLhs();
- if (lhs.getType() != ExpressionContext.Type.IDENTIFIER
- || !parsed._columnName.equals(lhs.getIdentifier())) {
- return null;
- }
- String filterJsonString = ((JsonMatchPredicate) predicate).getValue();
- int start = filterJsonString.indexOf('"');
- if (start < 0) {
- return null;
- }
- int end = filterJsonString.indexOf('"', start + 1);
- if (end < 0) {
- return null;
- }
- String filterPath = filterJsonString.substring(start + 1, end);
- return parsed._jsonPathString.equals(filterPath) ? filterJsonString : null;
- }
-
private DistinctTable createDistinctTable(DataSchema dataSchema, FieldSpec.DataType dataType,
@Nullable OrderByExpressionContext orderByExpression) {
int limit = _queryContext.getLimit();
@@ -449,19 +375,6 @@ private static boolean addToTable(StringDistinctTable table, String value,
}
}
- @Nullable
- private static JsonIndexReader getJsonIndexReader(DataSource dataSource) {
- JsonIndexReader reader = dataSource.getJsonIndex();
- if (reader == null) {
- Optional> compositeIndex =
- IndexService.getInstance().getOptional("composite_json_index");
- if (compositeIndex.isPresent()) {
- reader = (JsonIndexReader) dataSource.getIndex(compositeIndex.get());
- }
- }
- return reader;
- }
-
@Nullable
private RoaringBitmap buildFilteredDocIds() {
BaseFilterOperator.FilteredDocIds filteredDocIds = _filterOperator.getFilteredDocIds();
@@ -470,93 +383,6 @@ private RoaringBitmap buildFilteredDocIds() {
return docIds != null ? docIds.toRoaringBitmap() : null;
}
- @Nullable
- private static ParsedJsonExtractIndex parseJsonExtractIndex(ExpressionContext expr) {
- if (expr.getType() != ExpressionContext.Type.FUNCTION) {
- return null;
- }
- if (!FUNCTION_NAME.equalsIgnoreCase(expr.getFunction().getFunctionName())) {
- return null;
- }
- List args = expr.getFunction().getArguments();
- if (args.size() != 3 && args.size() != 4) {
- return null;
- }
- if (args.get(0).getType() != ExpressionContext.Type.IDENTIFIER) {
- return null;
- }
- if (args.get(1).getType() != ExpressionContext.Type.LITERAL
- || args.get(2).getType() != ExpressionContext.Type.LITERAL
- || (args.size() == 4 && args.get(3).getType() != ExpressionContext.Type.LITERAL)) {
- return null;
- }
-
- String columnName = args.get(0).getIdentifier();
- String jsonPathString = args.get(1).getLiteral().getStringValue();
- String resultsType = args.get(2).getLiteral().getStringValue().toUpperCase();
- // Only single-value types are supported; MV (_ARRAY) would have incorrect flattened-to-real
- // docId intersection since convertFlattenedDocIdsToDocIds is skipped for MV.
- if (resultsType.endsWith("_ARRAY")) {
- return null;
- }
- if (jsonPathString.contains("[*]")) {
- return null;
- }
-
- FieldSpec.DataType dataType;
- try {
- dataType = FieldSpec.DataType.valueOf(resultsType);
- } catch (IllegalArgumentException e) {
- return null;
- }
- // Only types with a corresponding DistinctTable implementation are supported
- switch (dataType) {
- case INT:
- case LONG:
- case FLOAT:
- case DOUBLE:
- case BIG_DECIMAL:
- case STRING:
- break;
- default:
- return null;
- }
-
- try {
- JsonPathCache.INSTANCE.getOrCompute(jsonPathString);
- } catch (Exception e) {
- return null;
- }
-
- String defaultValueLiteral = null;
- if (args.size() == 4) {
- defaultValueLiteral = args.get(3).getLiteral().getStringValue();
- try {
- dataType.convert(defaultValueLiteral);
- } catch (Exception e) {
- return null;
- }
- }
-
- return new ParsedJsonExtractIndex(columnName, jsonPathString, dataType, defaultValueLiteral);
- }
-
- private static final class ParsedJsonExtractIndex {
- final String _columnName;
- final String _jsonPathString;
- final FieldSpec.DataType _dataType;
- @Nullable
- final String _defaultValueLiteral;
-
- ParsedJsonExtractIndex(String columnName, String jsonPathString, FieldSpec.DataType dataType,
- @Nullable String defaultValueLiteral) {
- _columnName = columnName;
- _jsonPathString = jsonPathString;
- _dataType = dataType;
- _defaultValueLiteral = defaultValueLiteral;
- }
- }
-
@Override
public List getChildOperators() {
return Collections.singletonList(_filterOperator);
@@ -597,26 +423,11 @@ protected void explainAttributes(ExplainAttributeBuilder attributeBuilder) {
}
/**
- * Returns true if the expression is the 3/4-arg scalar jsonExtractIndex form on a column with JSON index and the
- * path is indexed. For OSS JSON index all paths are indexed. For composite JSON index, only paths in
- * invertedIndexConfigs are indexed per key.
+ * Returns true if the expression is the 3/4-arg scalar {@code jsonExtractIndex} form on a column with a JSON index
+ * that covers the path. For OSS JSON index all paths are indexed; for the composite JSON index, only paths in
+ * {@code invertedIndexConfigs} are indexed per key.
*/
public static boolean canUseJsonIndexDistinct(IndexSegment indexSegment, ExpressionContext expr) {
- ParsedJsonExtractIndex parsed = parseJsonExtractIndex(expr);
- if (parsed == null) {
- return false;
- }
- DataSource dataSource = indexSegment.getDataSourceNullable(parsed._columnName);
- if (dataSource == null) {
- return false;
- }
- JsonIndexReader reader = getJsonIndexReader(dataSource);
- if (reader == null) {
- return false;
- }
- if (!reader.isPathIndexed(parsed._jsonPathString)) {
- return false;
- }
- return true;
+ return JsonExtractIndexUtils.canUseJsonIndex(indexSegment, expr);
}
}
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/query/JsonIndexGroupByOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/query/JsonIndexGroupByOperator.java
new file mode 100644
index 000000000000..8244747e28d7
--- /dev/null
+++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/query/JsonIndexGroupByOperator.java
@@ -0,0 +1,331 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.operator.query;
+
+import com.google.common.base.CaseFormat;
+import com.google.common.base.Preconditions;
+import java.math.BigDecimal;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import javax.annotation.Nullable;
+import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.common.utils.DataSchema;
+import org.apache.pinot.common.utils.DataSchema.ColumnDataType;
+import org.apache.pinot.core.common.Operator;
+import org.apache.pinot.core.data.table.IntermediateRecord;
+import org.apache.pinot.core.data.table.Key;
+import org.apache.pinot.core.data.table.Record;
+import org.apache.pinot.core.operator.BaseOperator;
+import org.apache.pinot.core.operator.ExecutionStatistics;
+import org.apache.pinot.core.operator.ExplainAttributeBuilder;
+import org.apache.pinot.core.operator.blocks.results.GroupByResultsBlock;
+import org.apache.pinot.core.operator.filter.BaseFilterOperator;
+import org.apache.pinot.core.operator.query.JsonExtractIndexUtils.ParsedJsonExtractIndex;
+import org.apache.pinot.core.query.aggregation.function.AggregationFunction;
+import org.apache.pinot.core.query.request.context.QueryContext;
+import org.apache.pinot.segment.spi.AggregationFunctionType;
+import org.apache.pinot.segment.spi.IndexSegment;
+import org.apache.pinot.segment.spi.SegmentContext;
+import org.apache.pinot.segment.spi.datasource.DataSource;
+import org.apache.pinot.segment.spi.index.reader.JsonIndexReader;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.query.QueryThreadContext;
+import org.roaringbitmap.RoaringBitmap;
+import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
+
+
+/**
+ * Group-by operator for {@code SELECT jsonExtractIndex(col, path, type[, default]), COUNT(*) ... GROUP BY 1}
+ * when {@code col} has a JSON index covering {@code path}.
+ *
+ *
Execution flow:
+ * 1. Push a same-path {@code JSON_MATCH} predicate into the JSON-index lookup when it cannot match missing paths.
+ * 2. For each distinct value at the path, count the source documents via posting-list cardinality, intersecting
+ * with the residual WHERE bitmap when one is needed.
+ * 3. Track documents that have no value at the path; emit a missing-path group consistent with the
+ * {@link JsonIndexDistinctOperator} contract (default literal / null / throw).
+ *
+ *
The output is a {@link GroupByResultsBlock} carrying one {@link IntermediateRecord} per emitted group, which the
+ * downstream combine operator merges across segments via {@code CountAggregationFunction.merge} (sums).
+ *
+ *
Semantic note on array-traversing paths. The JSON index flattens JSON arrays into one row per element,
+ * so a document with {@code {"items": [{"name":"a"},{"name":"b"}]}} contributes to two distinct values at the
+ * path {@code $.items.name}. This operator therefore reports per-element counts ("multikey" semantics) — the same
+ * doc is counted once per distinct value present in its array. For non-array-traversing paths this matches the
+ * scalar {@code jsonExtractScalar} evaluation exactly; for array-traversing paths it diverges. This matches the
+ * behavior already established by {@link JsonIndexDistinctOperator}.
+ */
+public class JsonIndexGroupByOperator extends BaseOperator {
+ private static final String EXPLAIN_NAME = "GROUP_BY_JSON_INDEX";
+ // Empty order-by-values array reused across IntermediateRecord instances. The combine path reads only key/record,
+ // not values, so a shared singleton is safe.
+ private static final Comparable>[] EMPTY_ORDER_BY_VALUES = new Comparable>[0];
+
+ private final IndexSegment _indexSegment;
+ private final QueryContext _queryContext;
+ private final BaseFilterOperator _filterOperator;
+ private final DataSchema _dataSchema;
+ private final ParsedJsonExtractIndex _parsed;
+
+ private int _numEntriesExamined = 0;
+ private long _numEntriesScannedInFilter = 0;
+
+ /**
+ * SegmentContext is accepted for parity with {@link JsonIndexDistinctOperator} and the regular
+ * {@code GroupByPlanNode} call site. It is intentionally not stored: the data source lookup goes through
+ * {@code IndexSegment#getDataSource}, and any upsert / valid-doc-id filtering flows through the
+ * {@code BaseFilterOperator} that {@code FilterPlanNode} produced for the caller.
+ */
+ public JsonIndexGroupByOperator(IndexSegment indexSegment, SegmentContext segmentContext,
+ QueryContext queryContext, BaseFilterOperator filterOperator) {
+ _indexSegment = indexSegment;
+ _queryContext = queryContext;
+ _filterOperator = filterOperator;
+
+ ExpressionContext groupByExpression = queryContext.getGroupByExpressions().get(0);
+ _parsed = JsonExtractIndexUtils.parseJsonExtractIndex(groupByExpression);
+ Preconditions.checkState(_parsed != null,
+ "Expected a parseable jsonExtractIndex group-by expression");
+
+ AggregationFunction, ?> countFn = queryContext.getAggregationFunctions()[0];
+ _dataSchema = new DataSchema(
+ new String[]{groupByExpression.toString(), countFn.getResultColumnName()},
+ new ColumnDataType[]{ColumnDataType.fromDataTypeSV(_parsed._dataType),
+ countFn.getIntermediateResultColumnType()});
+ }
+
+ /**
+ * Returns {@code true} when the query is a single-group-by, single-{@code COUNT(*)} aggregation over a JSON-indexed
+ * path, with no {@code HAVING} clause and no filtered aggregations. All other shapes fall back to
+ * {@link GroupByOperator}.
+ *
+ *
No cost-model gate. Bench results (see
+ * {@code BenchmarkJsonIndexGroupByCount} and the reference memo on this branch) show the dictionary scan wins on
+ * most realistic workloads, with the only loss quadrant at very-selective WHERE clauses ({@code M ≲ 1% of segment})
+ * combined with medium path cardinality ({@code D ∈ 1k..10k}), where it can be up to ~65% slower than the
+ * baseline. The wins (2-4× on loose WHEREs) dominate that worst case in aggregate, so the operator is always
+ * picked when the shape matches. Re-evaluate if production data shows the worst-case quadrant matters more than
+ * the bench suggested.
+ */
+ public static boolean canUse(IndexSegment indexSegment, QueryContext queryContext) {
+ List groupByExpressions = queryContext.getGroupByExpressions();
+ if (groupByExpressions == null || groupByExpressions.size() != 1) {
+ return false;
+ }
+ if (!JsonExtractIndexUtils.canUseJsonIndex(indexSegment, groupByExpressions.get(0))) {
+ return false;
+ }
+
+ AggregationFunction, ?>[] aggFns = queryContext.getAggregationFunctions();
+ if (aggFns == null || aggFns.length != 1) {
+ return false;
+ }
+ if (aggFns[0].getType() != AggregationFunctionType.COUNT) {
+ return false;
+ }
+ // CountAggregationFunction.getInputExpressions() returns the empty list for plain COUNT(*) (when null handling
+ // is disabled). COUNT(col) or null-handling COUNT(*) returns a non-empty list — both rejected for v1.
+ if (!aggFns[0].getInputExpressions().isEmpty()) {
+ return false;
+ }
+ if (queryContext.getHavingFilter() != null) {
+ return false;
+ }
+ if (queryContext.hasFilteredAggregations()) {
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ protected GroupByResultsBlock getNextBlock() {
+ DataSource dataSource = _indexSegment.getDataSource(_parsed._columnName, _queryContext.getSchema());
+ JsonIndexReader jsonIndexReader = JsonExtractIndexUtils.getJsonIndexReader(dataSource);
+ Preconditions.checkState(jsonIndexReader != null,
+ "Column %s has no JSON index", _parsed._columnName);
+
+ // Decide which slice of the WHERE clause can be evaluated entirely inside the JSON-index lookup.
+ String pushedDownFilterJson = JsonExtractIndexUtils.extractSamePathJsonMatchFilter(_parsed,
+ _queryContext.getFilter());
+ // Don't push a filter that can match missing-path docs into the JSON-index lookup. The JsonIndexReader SPI does
+ // not contractually guarantee how implementations represent missing-path matches in the returned map; relying on
+ // them to return an empty map for IS_NULL would tie correctness to an implementation detail. Fall back to the
+ // row-level filter, which evaluates IS_NULL correctly via the null-value vector / scan path.
+ if (pushedDownFilterJson != null
+ && JsonExtractIndexUtils.jsonMatchFilterCanMatchMissingPath(pushedDownFilterJson)) {
+ pushedDownFilterJson = null;
+ }
+ boolean filterFullyPushedDown = pushedDownFilterJson != null
+ && JsonExtractIndexUtils.isOnlySamePathJsonMatchFilter(_parsed, _queryContext.getFilter());
+
+ // When the filter is fully pushed down we trust the index-side filter and skip materializing the WHERE bitmap;
+ // otherwise we evaluate the full filter and intersect per-value posting lists with it.
+ RoaringBitmap fullFilterDocs = filterFullyPushedDown ? null : buildFilteredDocIds();
+ if (fullFilterDocs != null && fullFilterDocs.isEmpty()) {
+ return new GroupByResultsBlock(_dataSchema, Collections.emptyList(), _queryContext);
+ }
+
+ Map valueToDocs =
+ jsonIndexReader.getMatchingFlattenedDocsMap(_parsed._jsonPathString, pushedDownFilterJson);
+ jsonIndexReader.convertFlattenedDocIdsToDocIds(valueToDocs);
+
+ // When the filter is fully pushed down, every doc that lacks the path was already excluded by it, so we don't
+ // need to track covered docs or emit a missing-path group.
+ RoaringBitmap coveredDocs = filterFullyPushedDown ? null : new RoaringBitmap();
+
+ List results = new ArrayList<>(valueToDocs.size());
+ for (Map.Entry entry : valueToDocs.entrySet()) {
+ _numEntriesExamined++;
+ QueryThreadContext.checkTerminationAndSampleUsagePeriodically(_numEntriesExamined, EXPLAIN_NAME);
+
+ String value = entry.getKey();
+ RoaringBitmap docIds = entry.getValue();
+
+ long count;
+ if (fullFilterDocs == null) {
+ count = docIds.getLongCardinality();
+ if (coveredDocs != null) {
+ coveredDocs.or(docIds);
+ }
+ } else {
+ count = RoaringBitmap.andCardinality(docIds, fullFilterDocs);
+ if (count == 0) {
+ continue;
+ }
+ coveredDocs.or(docIds);
+ }
+ if (count == 0) {
+ continue;
+ }
+
+ Object groupKey = convertValue(value, _parsed._dataType);
+ results.add(buildRecord(groupKey, count));
+ }
+
+ // Account for documents that had no value at the path. When the filter was fully pushed down it can't match
+ // missing-path docs by construction (we required !jsonMatchFilterCanMatchMissingPath), so there are no missing
+ // docs to attribute.
+ if (!filterFullyPushedDown) {
+ long expectedTotal;
+ long coveredCard;
+ if (fullFilterDocs == null) {
+ expectedTotal = _indexSegment.getSegmentMetadata().getTotalDocs();
+ coveredCard = coveredDocs.getLongCardinality();
+ } else {
+ expectedTotal = fullFilterDocs.getLongCardinality();
+ // Restrict the "covered" set to docs that survived the WHERE.
+ coveredDocs.and(fullFilterDocs);
+ coveredCard = coveredDocs.getLongCardinality();
+ }
+ long missing = expectedTotal - coveredCard;
+ if (missing > 0) {
+ results.add(buildMissingPathRecord(missing));
+ }
+ }
+
+ return new GroupByResultsBlock(_dataSchema, results, _queryContext);
+ }
+
+ private IntermediateRecord buildMissingPathRecord(long missing) {
+ Object missingKey;
+ if (_parsed._defaultValueLiteral != null) {
+ missingKey = convertValue(_parsed._defaultValueLiteral, _parsed._dataType);
+ } else if (_queryContext.isNullHandlingEnabled()) {
+ missingKey = null;
+ } else {
+ throw new RuntimeException(
+ String.format("Illegal Json Path: [%s], for some docIds in segment [%s]",
+ _parsed._jsonPathString, _indexSegment.getSegmentName()));
+ }
+ return buildRecord(missingKey, missing);
+ }
+
+ private static IntermediateRecord buildRecord(@Nullable Object groupKey, long count) {
+ Object[] keyValues = {groupKey};
+ Object[] rowValues = {groupKey, count};
+ return new IntermediateRecord(new Key(keyValues), new Record(rowValues), EMPTY_ORDER_BY_VALUES);
+ }
+
+ private static Object convertValue(String stringValue, FieldSpec.DataType dataType) {
+ switch (dataType) {
+ case INT:
+ return Integer.parseInt(stringValue);
+ case LONG:
+ return Long.parseLong(stringValue);
+ case FLOAT:
+ return Float.parseFloat(stringValue);
+ case DOUBLE:
+ return Double.parseDouble(stringValue);
+ case BIG_DECIMAL:
+ return new BigDecimal(stringValue);
+ case STRING:
+ return stringValue;
+ default:
+ throw new IllegalStateException("Unsupported data type for JSON index group-by: " + dataType);
+ }
+ }
+
+ @Nullable
+ private RoaringBitmap buildFilteredDocIds() {
+ BaseFilterOperator.FilteredDocIds filteredDocIds = _filterOperator.getFilteredDocIds();
+ _numEntriesScannedInFilter = filteredDocIds.getNumEntriesScannedInFilter();
+ ImmutableRoaringBitmap docIds = filteredDocIds.getDocIds();
+ return docIds != null ? docIds.toRoaringBitmap() : null;
+ }
+
+ @Override
+ public List getChildOperators() {
+ return Collections.singletonList(_filterOperator);
+ }
+
+ @Override
+ public IndexSegment getIndexSegment() {
+ return _indexSegment;
+ }
+
+ @Override
+ public ExecutionStatistics getExecutionStatistics() {
+ int numTotalDocs = _indexSegment.getSegmentMetadata().getTotalDocs();
+ return new ExecutionStatistics(0, _numEntriesScannedInFilter, 0, numTotalDocs);
+ }
+
+ @Override
+ public String toExplainString() {
+ StringBuilder sb = new StringBuilder(EXPLAIN_NAME).append("(groupKey:");
+ sb.append(_queryContext.getGroupByExpressions().get(0).toString());
+ sb.append(", aggregations:").append(_queryContext.getAggregationFunctions()[0].toExplainString());
+ return sb.append(')').toString();
+ }
+
+ @Override
+ protected String getExplainName() {
+ return CaseFormat.UPPER_UNDERSCORE.to(CaseFormat.UPPER_CAMEL, EXPLAIN_NAME);
+ }
+
+ @Override
+ protected void explainAttributes(ExplainAttributeBuilder attributeBuilder) {
+ super.explainAttributes(attributeBuilder);
+ attributeBuilder.putStringList("groupKeys",
+ List.of(_queryContext.getGroupByExpressions().get(0).toString()));
+ attributeBuilder.putStringList("aggregations",
+ List.of(_queryContext.getAggregationFunctions()[0].toExplainString()));
+ }
+}
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/plan/GroupByPlanNode.java b/pinot-core/src/main/java/org/apache/pinot/core/plan/GroupByPlanNode.java
index b44b8dc8ee4f..41dae5ef88c7 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/plan/GroupByPlanNode.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/plan/GroupByPlanNode.java
@@ -23,6 +23,7 @@
import org.apache.pinot.core.operator.filter.BaseFilterOperator;
import org.apache.pinot.core.operator.query.FilteredGroupByOperator;
import org.apache.pinot.core.operator.query.GroupByOperator;
+import org.apache.pinot.core.operator.query.JsonIndexGroupByOperator;
import org.apache.pinot.core.query.aggregation.function.AggregationFunctionUtils;
import org.apache.pinot.core.query.request.context.QueryContext;
import org.apache.pinot.segment.spi.IndexSegment;
@@ -46,6 +47,13 @@ public GroupByPlanNode(SegmentContext segmentContext, QueryContext queryContext)
@Override
public Operator run() {
assert _queryContext.getAggregationFunctions() != null && _queryContext.getGroupByExpressions() != null;
+ // Prefer the index-based GROUP BY when the shape is GROUP BY jsonExtractIndex(col, path, type[, default])
+ // + COUNT(*) on a column whose JSON index covers the path. canUse() already excludes filtered aggregations
+ // and HAVING.
+ if (JsonIndexGroupByOperator.canUse(_indexSegment, _queryContext)) {
+ BaseFilterOperator filterOperator = new FilterPlanNode(_segmentContext, _queryContext).run();
+ return new JsonIndexGroupByOperator(_indexSegment, _segmentContext, _queryContext, filterOperator);
+ }
return _queryContext.hasFilteredAggregations() ? buildFilteredGroupByPlan() : buildNonFilteredGroupByPlan();
}
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/operator/query/JsonIndexGroupByOperatorTest.java b/pinot-core/src/test/java/org/apache/pinot/core/operator/query/JsonIndexGroupByOperatorTest.java
new file mode 100644
index 000000000000..67ce7e3d69b5
--- /dev/null
+++ b/pinot-core/src/test/java/org/apache/pinot/core/operator/query/JsonIndexGroupByOperatorTest.java
@@ -0,0 +1,482 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.operator.query;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.pinot.core.common.Operator;
+import org.apache.pinot.core.data.table.IntermediateRecord;
+import org.apache.pinot.core.operator.blocks.results.GroupByResultsBlock;
+import org.apache.pinot.core.operator.filter.BaseFilterOperator;
+import org.apache.pinot.core.operator.filter.BitmapCollection;
+import org.apache.pinot.core.query.request.context.QueryContext;
+import org.apache.pinot.core.query.request.context.utils.QueryContextConverterUtils;
+import org.apache.pinot.segment.spi.IndexSegment;
+import org.apache.pinot.segment.spi.SegmentContext;
+import org.apache.pinot.segment.spi.SegmentMetadata;
+import org.apache.pinot.segment.spi.datasource.DataSource;
+import org.apache.pinot.segment.spi.index.reader.JsonIndexReader;
+import org.roaringbitmap.RoaringBitmap;
+import org.roaringbitmap.buffer.MutableRoaringBitmap;
+import org.testng.annotations.Test;
+
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+import static org.testng.Assert.expectThrows;
+
+
+/**
+ * Unit tests for {@link JsonIndexGroupByOperator}.
+ */
+public class JsonIndexGroupByOperatorTest {
+ private static final String EXTRACT_INDEX = "JSON_EXTRACT_INDEX(tags, '$.instance', 'STRING')";
+ private static final String EXTRACT_INDEX_WITH_DEFAULT =
+ "JSON_EXTRACT_INDEX(tags, '$.instance', 'STRING', 'missing')";
+ private static final String SAME_PATH_FILTER = "REGEXP_LIKE(\"$.instance\", '.*test.*')";
+ private static final String CROSS_PATH_FILTER = "REGEXP_LIKE(\"$.env\", 'prod.*')";
+ private static final String SAME_PATH_IS_NULL_FILTER = "\"$.instance\" IS NULL";
+
+ @Test
+ public void testGroupByWithoutFilterCountsAllDocs() {
+ QueryContext queryContext = groupByQuery(EXTRACT_INDEX, null);
+
+ JsonIndexReader jsonIndexReader = mock(JsonIndexReader.class);
+ Map flattenedDocsByValue = new HashMap<>();
+ flattenedDocsByValue.put("east", bitmap(100, 101));
+ flattenedDocsByValue.put("west", bitmap(200));
+ when(jsonIndexReader.getMatchingFlattenedDocsMap("$.instance", null)).thenReturn(flattenedDocsByValue);
+ stubConvertedDocIds(jsonIndexReader, Map.of(
+ "east", bitmap(0, 1),
+ "west", bitmap(2)));
+
+ GroupByResultsBlock block = buildOperatorMatchAll(queryContext, jsonIndexReader, 3).nextBlock();
+
+ assertEquals(extractCounts(block), Map.of("east", 2L, "west", 1L));
+ }
+
+ @Test
+ public void testSamePathJsonMatchPushedDownNoMissingPathGroup() {
+ QueryContext queryContext = groupByQuery(EXTRACT_INDEX, SAME_PATH_FILTER);
+
+ JsonIndexReader jsonIndexReader = mock(JsonIndexReader.class);
+ // The pushed-down filter restricts which flattened docs come back.
+ Map flattenedDocsByValue = new HashMap<>();
+ flattenedDocsByValue.put("test-east", bitmap(10));
+ flattenedDocsByValue.put("test-west", bitmap(20, 21));
+ when(jsonIndexReader.getMatchingFlattenedDocsMap("$.instance", SAME_PATH_FILTER))
+ .thenReturn(flattenedDocsByValue);
+ stubConvertedDocIds(jsonIndexReader, Map.of(
+ "test-east", bitmap(0),
+ "test-west", bitmap(1, 2)));
+
+ GroupByResultsBlock block = buildOperator(queryContext, jsonIndexReader, bufferBitmap(0, 1, 2), 5).nextBlock();
+
+ // Even though only 3 of 5 docs are covered, no missing-path group is emitted because the same-path JSON_MATCH
+ // filter cannot match docs that lack the path.
+ assertEquals(extractCounts(block), Map.of("test-east", 1L, "test-west", 2L));
+ }
+
+ @Test
+ public void testCrossPathFilterAppliedAsResidual() {
+ QueryContext queryContext = groupByQuery(EXTRACT_INDEX, CROSS_PATH_FILTER);
+
+ JsonIndexReader jsonIndexReader = mock(JsonIndexReader.class);
+ Map flattenedDocsByValue = new HashMap<>();
+ flattenedDocsByValue.put("prod-a", bitmap(100));
+ flattenedDocsByValue.put("prod-b", bitmap(200));
+ flattenedDocsByValue.put("other", bitmap(300));
+ // The cross-path filter is NOT pushed down so getMatchingFlattenedDocsMap is called with null.
+ when(jsonIndexReader.getMatchingFlattenedDocsMap("$.instance", null)).thenReturn(flattenedDocsByValue);
+ stubConvertedDocIds(jsonIndexReader, Map.of(
+ "prod-a", bitmap(0),
+ "prod-b", bitmap(1),
+ "other", bitmap(2)));
+
+ // Filter operator says docs 0 and 1 pass the cross-path filter.
+ GroupByResultsBlock block = buildOperator(queryContext, jsonIndexReader, bufferBitmap(0, 1), 3).nextBlock();
+
+ // "other" is filtered out; both surviving docs have a value at the path so no missing-path group.
+ assertEquals(extractCounts(block), Map.of("prod-a", 1L, "prod-b", 1L));
+ verify(jsonIndexReader).getMatchingFlattenedDocsMap("$.instance", null);
+ }
+
+ @Test
+ public void testMissingPathDocsUseDefaultLiteral() {
+ QueryContext queryContext = groupByQuery(EXTRACT_INDEX_WITH_DEFAULT, null);
+
+ JsonIndexReader jsonIndexReader = mock(JsonIndexReader.class);
+ Map flattenedDocsByValue = new HashMap<>();
+ flattenedDocsByValue.put("east", bitmap(0));
+ when(jsonIndexReader.getMatchingFlattenedDocsMap("$.instance", null)).thenReturn(flattenedDocsByValue);
+ stubConvertedDocIds(jsonIndexReader, Map.of("east", bitmap(0)));
+
+ // 3 total docs, only doc 0 has a value at the path; docs 1 and 2 should land in the "missing" group.
+ GroupByResultsBlock block = buildOperatorMatchAll(queryContext, jsonIndexReader, 3).nextBlock();
+
+ assertEquals(extractCounts(block), Map.of("east", 1L, "missing", 2L));
+ }
+
+ @Test
+ public void testMissingPathDocsUseNullWhenNullHandlingEnabled() {
+ QueryContext queryContext = groupByQuery(EXTRACT_INDEX, null, true);
+
+ JsonIndexReader jsonIndexReader = mock(JsonIndexReader.class);
+ Map flattenedDocsByValue = new HashMap<>();
+ flattenedDocsByValue.put("east", bitmap(0));
+ when(jsonIndexReader.getMatchingFlattenedDocsMap("$.instance", null)).thenReturn(flattenedDocsByValue);
+ stubConvertedDocIds(jsonIndexReader, Map.of("east", bitmap(0)));
+
+ GroupByResultsBlock block = buildOperatorMatchAll(queryContext, jsonIndexReader, 3).nextBlock();
+
+ Map