diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/table/IntermediateRecord.java b/pinot-core/src/main/java/org/apache/pinot/core/data/table/IntermediateRecord.java index 85288d49f4a6..8136293968fa 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/data/table/IntermediateRecord.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/data/table/IntermediateRecord.java @@ -29,7 +29,7 @@ public class IntermediateRecord { public final Record _record; public final Comparable[] _values; - IntermediateRecord(Key key, Record record, Comparable[] values) { + public IntermediateRecord(Key key, Record record, Comparable[] values) { _key = key; _record = record; _values = values; diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/query/JsonExtractIndexUtils.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/query/JsonExtractIndexUtils.java new file mode 100644 index 000000000000..245264c6eb5f --- /dev/null +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/query/JsonExtractIndexUtils.java @@ -0,0 +1,261 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.operator.query; + +import java.util.List; +import java.util.Optional; +import javax.annotation.Nullable; +import org.apache.pinot.common.function.JsonPathCache; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.common.request.context.FilterContext; +import org.apache.pinot.common.request.context.RequestContextUtils; +import org.apache.pinot.common.request.context.predicate.JsonMatchPredicate; +import org.apache.pinot.common.request.context.predicate.Predicate; +import org.apache.pinot.segment.spi.IndexSegment; +import org.apache.pinot.segment.spi.datasource.DataSource; +import org.apache.pinot.segment.spi.index.IndexService; +import org.apache.pinot.segment.spi.index.IndexType; +import org.apache.pinot.segment.spi.index.reader.JsonIndexReader; +import org.apache.pinot.spi.data.FieldSpec; +import org.apache.pinot.sql.parsers.CalciteSqlParser; + + +/** + * Shared parsing and filter-pushdown helpers for index-aware operators that consume a scalar + * {@code jsonExtractIndex(column, path, type[, defaultValue])} expression and a JSON index on the column. + */ +public final class JsonExtractIndexUtils { + private static final String FUNCTION_NAME_EXTRACT_INDEX = "jsonExtractIndex"; + + private JsonExtractIndexUtils() { + } + + /** + * Parsed view of a {@code jsonExtractIndex} call. + */ + public static final class ParsedJsonExtractIndex { + public final String _columnName; + public final String _jsonPathString; + public final FieldSpec.DataType _dataType; + @Nullable + public final String _defaultValueLiteral; + + public ParsedJsonExtractIndex(String columnName, String jsonPathString, FieldSpec.DataType dataType, + @Nullable String defaultValueLiteral) { + _columnName = columnName; + _jsonPathString = jsonPathString; + _dataType = dataType; + _defaultValueLiteral = defaultValueLiteral; + } + } + + /** + * Parses the given expression as a 3/4-arg scalar {@code jsonExtractIndex} call. + * Returns {@code null} when the expression has a different shape (wrong arity, non-literal args, MV result type, + * unsupported scalar type, {@code [*]} wildcard, or malformed JSON path). + * + *

This is a pure shape check; it does not consult segment metadata. + * Pair with {@link #canUseJsonIndex} to verify that the column has a usable JSON index on the path. + */ + @Nullable + public static ParsedJsonExtractIndex parseJsonExtractIndex(ExpressionContext expr) { + if (expr.getType() != ExpressionContext.Type.FUNCTION) { + return null; + } + String functionName = expr.getFunction().getFunctionName(); + if (!FUNCTION_NAME_EXTRACT_INDEX.equalsIgnoreCase(functionName)) { + return null; + } + List args = expr.getFunction().getArguments(); + if (args.size() != 3 && args.size() != 4) { + return null; + } + if (args.get(0).getType() != ExpressionContext.Type.IDENTIFIER) { + return null; + } + if (args.get(1).getType() != ExpressionContext.Type.LITERAL + || args.get(2).getType() != ExpressionContext.Type.LITERAL + || (args.size() == 4 && args.get(3).getType() != ExpressionContext.Type.LITERAL)) { + return null; + } + + String columnName = args.get(0).getIdentifier(); + String jsonPathString = args.get(1).getLiteral().getStringValue(); + String resultsType = args.get(2).getLiteral().getStringValue().toUpperCase(); + // Only single-value types are supported; MV (_ARRAY) would have incorrect flattened-to-real + // docId intersection since convertFlattenedDocIdsToDocIds is skipped for MV. + if (resultsType.endsWith("_ARRAY")) { + return null; + } + if (jsonPathString.contains("[*]")) { + return null; + } + + FieldSpec.DataType dataType; + try { + dataType = FieldSpec.DataType.valueOf(resultsType); + } catch (IllegalArgumentException e) { + return null; + } + switch (dataType) { + case INT: + case LONG: + case FLOAT: + case DOUBLE: + case BIG_DECIMAL: + case STRING: + break; + default: + return null; + } + + try { + JsonPathCache.INSTANCE.getOrCompute(jsonPathString); + } catch (Exception e) { + return null; + } + + String defaultValueLiteral = null; + if (args.size() == 4) { + defaultValueLiteral = args.get(3).getLiteral().getStringValue(); + try { + dataType.convert(defaultValueLiteral); + } catch (Exception e) { + return null; + } + } + + return new ParsedJsonExtractIndex(columnName, jsonPathString, dataType, defaultValueLiteral); + } + + /** + * Returns the JSON index reader for the column if present, falling back to a composite JSON index when available. + */ + @Nullable + public static JsonIndexReader getJsonIndexReader(DataSource dataSource) { + JsonIndexReader reader = dataSource.getJsonIndex(); + if (reader == null) { + Optional> compositeIndex = + IndexService.getInstance().getOptional("composite_json_index"); + if (compositeIndex.isPresent()) { + reader = (JsonIndexReader) dataSource.getIndex(compositeIndex.get()); + } + } + return reader; + } + + /** + * Returns {@code true} when the expression is a parseable {@code jsonExtractIndex} call and the referenced column + * has a usable JSON index covering the path. + */ + public static boolean canUseJsonIndex(IndexSegment indexSegment, ExpressionContext expr) { + ParsedJsonExtractIndex parsed = parseJsonExtractIndex(expr); + if (parsed == null) { + return false; + } + DataSource dataSource = indexSegment.getDataSourceNullable(parsed._columnName); + if (dataSource == null) { + return false; + } + JsonIndexReader reader = getJsonIndexReader(dataSource); + if (reader == null) { + return false; + } + return reader.isPathIndexed(parsed._jsonPathString); + } + + /** + * Walks the filter and returns the inner JSON-match string for a single same-column-same-path {@code JSON_MATCH} + * predicate that can be pushed into the JSON-index lookup. Returns {@code null} when no such predicate exists, + * or when more than one candidate is found and disambiguation would be unsafe. + */ + @Nullable + public static String extractSamePathJsonMatchFilter(ParsedJsonExtractIndex parsed, @Nullable FilterContext filter) { + if (filter == null) { + return null; + } + switch (filter.getType()) { + case PREDICATE: + return extractSamePathJsonMatchFilter(parsed, filter.getPredicate()); + case AND: + String matchingFilter = null; + for (FilterContext child : filter.getChildren()) { + String childFilter = extractSamePathJsonMatchFilter(parsed, child); + if (childFilter == null) { + continue; + } + if (matchingFilter != null) { + return null; + } + matchingFilter = childFilter; + } + return matchingFilter; + default: + return null; + } + } + + /** + * Returns {@code true} when the filter is exactly a single same-path JSON_MATCH predicate (no other clauses). + */ + public static boolean isOnlySamePathJsonMatchFilter(ParsedJsonExtractIndex parsed, @Nullable FilterContext filter) { + if (filter == null || filter.getType() != FilterContext.Type.PREDICATE) { + return false; + } + return extractSamePathJsonMatchFilter(parsed, filter.getPredicate()) != null; + } + + /** + * Returns {@code true} when the filter string can match documents that lack the JSON path entirely (i.e. an + * {@code IS_NULL} on the path). Callers use this to decide whether they must still account for missing-path + * documents after running an index-side filter. + */ + public static boolean jsonMatchFilterCanMatchMissingPath(String filterJsonString) { + try { + FilterContext filter = RequestContextUtils.getFilter(CalciteSqlParser.compileToExpression(filterJsonString)); + return filter.getType() == FilterContext.Type.PREDICATE + && filter.getPredicate().getType() == Predicate.Type.IS_NULL; + } catch (Exception e) { + return false; + } + } + + @Nullable + private static String extractSamePathJsonMatchFilter(ParsedJsonExtractIndex parsed, Predicate predicate) { + if (!(predicate instanceof JsonMatchPredicate)) { + return null; + } + ExpressionContext lhs = predicate.getLhs(); + if (lhs.getType() != ExpressionContext.Type.IDENTIFIER + || !parsed._columnName.equals(lhs.getIdentifier())) { + return null; + } + String filterJsonString = ((JsonMatchPredicate) predicate).getValue(); + int start = filterJsonString.indexOf('"'); + if (start < 0) { + return null; + } + int end = filterJsonString.indexOf('"', start + 1); + if (end < 0) { + return null; + } + String filterPath = filterJsonString.substring(start + 1, end); + return parsed._jsonPathString.equals(filterPath) ? filterJsonString : null; + } +} diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/query/JsonIndexDistinctOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/query/JsonIndexDistinctOperator.java index 0970cac305e3..d9c1c8d595ba 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/query/JsonIndexDistinctOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/query/JsonIndexDistinctOperator.java @@ -23,16 +23,10 @@ import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.Optional; import java.util.Set; import javax.annotation.Nullable; -import org.apache.pinot.common.function.JsonPathCache; import org.apache.pinot.common.request.context.ExpressionContext; -import org.apache.pinot.common.request.context.FilterContext; import org.apache.pinot.common.request.context.OrderByExpressionContext; -import org.apache.pinot.common.request.context.RequestContextUtils; -import org.apache.pinot.common.request.context.predicate.JsonMatchPredicate; -import org.apache.pinot.common.request.context.predicate.Predicate; import org.apache.pinot.common.utils.DataSchema; import org.apache.pinot.common.utils.DataSchema.ColumnDataType; import org.apache.pinot.core.common.Operator; @@ -41,6 +35,7 @@ import org.apache.pinot.core.operator.ExplainAttributeBuilder; import org.apache.pinot.core.operator.blocks.results.DistinctResultsBlock; import org.apache.pinot.core.operator.filter.BaseFilterOperator; +import org.apache.pinot.core.operator.query.JsonExtractIndexUtils.ParsedJsonExtractIndex; import org.apache.pinot.core.query.distinct.table.BigDecimalDistinctTable; import org.apache.pinot.core.query.distinct.table.DistinctTable; import org.apache.pinot.core.query.distinct.table.DoubleDistinctTable; @@ -52,12 +47,9 @@ import org.apache.pinot.segment.spi.IndexSegment; import org.apache.pinot.segment.spi.SegmentContext; import org.apache.pinot.segment.spi.datasource.DataSource; -import org.apache.pinot.segment.spi.index.IndexService; -import org.apache.pinot.segment.spi.index.IndexType; import org.apache.pinot.segment.spi.index.reader.JsonIndexReader; import org.apache.pinot.spi.data.FieldSpec; import org.apache.pinot.spi.query.QueryThreadContext; -import org.apache.pinot.sql.parsers.CalciteSqlParser; import org.roaringbitmap.RoaringBitmap; import org.roaringbitmap.buffer.ImmutableRoaringBitmap; @@ -72,7 +64,6 @@ */ public class JsonIndexDistinctOperator extends BaseOperator { private static final String EXPLAIN_NAME = "DISTINCT_JSON_INDEX"; - private static final String FUNCTION_NAME = "jsonExtractIndex"; private final IndexSegment _indexSegment; private final SegmentContext _segmentContext; @@ -98,21 +89,22 @@ protected DistinctResultsBlock getNextBlock() { } ExpressionContext expr = expressions.get(0); - ParsedJsonExtractIndex parsed = parseJsonExtractIndex(expr); + ParsedJsonExtractIndex parsed = JsonExtractIndexUtils.parseJsonExtractIndex(expr); if (parsed == null) { throw new IllegalStateException("Expected 3/4-arg scalar jsonExtractIndex expression"); } DataSource dataSource = _indexSegment.getDataSource(parsed._columnName, _queryContext.getSchema()); - JsonIndexReader jsonIndexReader = getJsonIndexReader(dataSource); + JsonIndexReader jsonIndexReader = JsonExtractIndexUtils.getJsonIndexReader(dataSource); if (jsonIndexReader == null) { throw new IllegalStateException("Column " + parsed._columnName + " has no JSON index"); } - String pushedDownFilterJson = extractSamePathJsonMatchFilter(parsed, _queryContext.getFilter()); + String pushedDownFilterJson = JsonExtractIndexUtils.extractSamePathJsonMatchFilter(parsed, + _queryContext.getFilter()); boolean filterFullyPushedDown = pushedDownFilterJson != null - && isOnlySamePathJsonMatchFilter(parsed, _queryContext.getFilter()) - && !jsonMatchFilterCanMatchMissingPath(pushedDownFilterJson); + && JsonExtractIndexUtils.isOnlySamePathJsonMatchFilter(parsed, _queryContext.getFilter()) + && !JsonExtractIndexUtils.jsonMatchFilterCanMatchMissingPath(pushedDownFilterJson); // Fast path: when the filter is fully pushed down into the JSON index, we only need the distinct value strings. // This avoids reading posting lists, building per-value bitmaps, and converting flattened doc IDs. @@ -251,72 +243,6 @@ private void handleMissingDocs(DistinctTable distinctTable, ParsedJsonExtractInd } } - @Nullable - private static String extractSamePathJsonMatchFilter(ParsedJsonExtractIndex parsed, @Nullable FilterContext filter) { - if (filter == null) { - return null; - } - switch (filter.getType()) { - case PREDICATE: - return extractSamePathJsonMatchFilter(parsed, filter.getPredicate()); - case AND: - String matchingFilter = null; - for (FilterContext child : filter.getChildren()) { - String childFilter = extractSamePathJsonMatchFilter(parsed, child); - if (childFilter == null) { - continue; - } - if (matchingFilter != null) { - return null; - } - matchingFilter = childFilter; - } - return matchingFilter; - default: - return null; - } - } - - private static boolean isOnlySamePathJsonMatchFilter(ParsedJsonExtractIndex parsed, @Nullable FilterContext filter) { - if (filter == null || filter.getType() != FilterContext.Type.PREDICATE) { - return false; - } - return extractSamePathJsonMatchFilter(parsed, filter.getPredicate()) != null; - } - - private static boolean jsonMatchFilterCanMatchMissingPath(String filterJsonString) { - try { - FilterContext filter = RequestContextUtils.getFilter(CalciteSqlParser.compileToExpression(filterJsonString)); - return filter.getType() == FilterContext.Type.PREDICATE - && filter.getPredicate().getType() == Predicate.Type.IS_NULL; - } catch (Exception e) { - return false; - } - } - - @Nullable - private static String extractSamePathJsonMatchFilter(ParsedJsonExtractIndex parsed, Predicate predicate) { - if (!(predicate instanceof JsonMatchPredicate)) { - return null; - } - ExpressionContext lhs = predicate.getLhs(); - if (lhs.getType() != ExpressionContext.Type.IDENTIFIER - || !parsed._columnName.equals(lhs.getIdentifier())) { - return null; - } - String filterJsonString = ((JsonMatchPredicate) predicate).getValue(); - int start = filterJsonString.indexOf('"'); - if (start < 0) { - return null; - } - int end = filterJsonString.indexOf('"', start + 1); - if (end < 0) { - return null; - } - String filterPath = filterJsonString.substring(start + 1, end); - return parsed._jsonPathString.equals(filterPath) ? filterJsonString : null; - } - private DistinctTable createDistinctTable(DataSchema dataSchema, FieldSpec.DataType dataType, @Nullable OrderByExpressionContext orderByExpression) { int limit = _queryContext.getLimit(); @@ -449,19 +375,6 @@ private static boolean addToTable(StringDistinctTable table, String value, } } - @Nullable - private static JsonIndexReader getJsonIndexReader(DataSource dataSource) { - JsonIndexReader reader = dataSource.getJsonIndex(); - if (reader == null) { - Optional> compositeIndex = - IndexService.getInstance().getOptional("composite_json_index"); - if (compositeIndex.isPresent()) { - reader = (JsonIndexReader) dataSource.getIndex(compositeIndex.get()); - } - } - return reader; - } - @Nullable private RoaringBitmap buildFilteredDocIds() { BaseFilterOperator.FilteredDocIds filteredDocIds = _filterOperator.getFilteredDocIds(); @@ -470,93 +383,6 @@ private RoaringBitmap buildFilteredDocIds() { return docIds != null ? docIds.toRoaringBitmap() : null; } - @Nullable - private static ParsedJsonExtractIndex parseJsonExtractIndex(ExpressionContext expr) { - if (expr.getType() != ExpressionContext.Type.FUNCTION) { - return null; - } - if (!FUNCTION_NAME.equalsIgnoreCase(expr.getFunction().getFunctionName())) { - return null; - } - List args = expr.getFunction().getArguments(); - if (args.size() != 3 && args.size() != 4) { - return null; - } - if (args.get(0).getType() != ExpressionContext.Type.IDENTIFIER) { - return null; - } - if (args.get(1).getType() != ExpressionContext.Type.LITERAL - || args.get(2).getType() != ExpressionContext.Type.LITERAL - || (args.size() == 4 && args.get(3).getType() != ExpressionContext.Type.LITERAL)) { - return null; - } - - String columnName = args.get(0).getIdentifier(); - String jsonPathString = args.get(1).getLiteral().getStringValue(); - String resultsType = args.get(2).getLiteral().getStringValue().toUpperCase(); - // Only single-value types are supported; MV (_ARRAY) would have incorrect flattened-to-real - // docId intersection since convertFlattenedDocIdsToDocIds is skipped for MV. - if (resultsType.endsWith("_ARRAY")) { - return null; - } - if (jsonPathString.contains("[*]")) { - return null; - } - - FieldSpec.DataType dataType; - try { - dataType = FieldSpec.DataType.valueOf(resultsType); - } catch (IllegalArgumentException e) { - return null; - } - // Only types with a corresponding DistinctTable implementation are supported - switch (dataType) { - case INT: - case LONG: - case FLOAT: - case DOUBLE: - case BIG_DECIMAL: - case STRING: - break; - default: - return null; - } - - try { - JsonPathCache.INSTANCE.getOrCompute(jsonPathString); - } catch (Exception e) { - return null; - } - - String defaultValueLiteral = null; - if (args.size() == 4) { - defaultValueLiteral = args.get(3).getLiteral().getStringValue(); - try { - dataType.convert(defaultValueLiteral); - } catch (Exception e) { - return null; - } - } - - return new ParsedJsonExtractIndex(columnName, jsonPathString, dataType, defaultValueLiteral); - } - - private static final class ParsedJsonExtractIndex { - final String _columnName; - final String _jsonPathString; - final FieldSpec.DataType _dataType; - @Nullable - final String _defaultValueLiteral; - - ParsedJsonExtractIndex(String columnName, String jsonPathString, FieldSpec.DataType dataType, - @Nullable String defaultValueLiteral) { - _columnName = columnName; - _jsonPathString = jsonPathString; - _dataType = dataType; - _defaultValueLiteral = defaultValueLiteral; - } - } - @Override public List getChildOperators() { return Collections.singletonList(_filterOperator); @@ -597,26 +423,11 @@ protected void explainAttributes(ExplainAttributeBuilder attributeBuilder) { } /** - * Returns true if the expression is the 3/4-arg scalar jsonExtractIndex form on a column with JSON index and the - * path is indexed. For OSS JSON index all paths are indexed. For composite JSON index, only paths in - * invertedIndexConfigs are indexed per key. + * Returns true if the expression is the 3/4-arg scalar {@code jsonExtractIndex} form on a column with a JSON index + * that covers the path. For OSS JSON index all paths are indexed; for the composite JSON index, only paths in + * {@code invertedIndexConfigs} are indexed per key. */ public static boolean canUseJsonIndexDistinct(IndexSegment indexSegment, ExpressionContext expr) { - ParsedJsonExtractIndex parsed = parseJsonExtractIndex(expr); - if (parsed == null) { - return false; - } - DataSource dataSource = indexSegment.getDataSourceNullable(parsed._columnName); - if (dataSource == null) { - return false; - } - JsonIndexReader reader = getJsonIndexReader(dataSource); - if (reader == null) { - return false; - } - if (!reader.isPathIndexed(parsed._jsonPathString)) { - return false; - } - return true; + return JsonExtractIndexUtils.canUseJsonIndex(indexSegment, expr); } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/query/JsonIndexGroupByOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/query/JsonIndexGroupByOperator.java new file mode 100644 index 000000000000..8244747e28d7 --- /dev/null +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/query/JsonIndexGroupByOperator.java @@ -0,0 +1,331 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.operator.query; + +import com.google.common.base.CaseFormat; +import com.google.common.base.Preconditions; +import java.math.BigDecimal; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import javax.annotation.Nullable; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.common.utils.DataSchema; +import org.apache.pinot.common.utils.DataSchema.ColumnDataType; +import org.apache.pinot.core.common.Operator; +import org.apache.pinot.core.data.table.IntermediateRecord; +import org.apache.pinot.core.data.table.Key; +import org.apache.pinot.core.data.table.Record; +import org.apache.pinot.core.operator.BaseOperator; +import org.apache.pinot.core.operator.ExecutionStatistics; +import org.apache.pinot.core.operator.ExplainAttributeBuilder; +import org.apache.pinot.core.operator.blocks.results.GroupByResultsBlock; +import org.apache.pinot.core.operator.filter.BaseFilterOperator; +import org.apache.pinot.core.operator.query.JsonExtractIndexUtils.ParsedJsonExtractIndex; +import org.apache.pinot.core.query.aggregation.function.AggregationFunction; +import org.apache.pinot.core.query.request.context.QueryContext; +import org.apache.pinot.segment.spi.AggregationFunctionType; +import org.apache.pinot.segment.spi.IndexSegment; +import org.apache.pinot.segment.spi.SegmentContext; +import org.apache.pinot.segment.spi.datasource.DataSource; +import org.apache.pinot.segment.spi.index.reader.JsonIndexReader; +import org.apache.pinot.spi.data.FieldSpec; +import org.apache.pinot.spi.query.QueryThreadContext; +import org.roaringbitmap.RoaringBitmap; +import org.roaringbitmap.buffer.ImmutableRoaringBitmap; + + +/** + * Group-by operator for {@code SELECT jsonExtractIndex(col, path, type[, default]), COUNT(*) ... GROUP BY 1} + * when {@code col} has a JSON index covering {@code path}. + * + *

Execution flow: + * 1. Push a same-path {@code JSON_MATCH} predicate into the JSON-index lookup when it cannot match missing paths. + * 2. For each distinct value at the path, count the source documents via posting-list cardinality, intersecting + * with the residual WHERE bitmap when one is needed. + * 3. Track documents that have no value at the path; emit a missing-path group consistent with the + * {@link JsonIndexDistinctOperator} contract (default literal / null / throw). + * + *

The output is a {@link GroupByResultsBlock} carrying one {@link IntermediateRecord} per emitted group, which the + * downstream combine operator merges across segments via {@code CountAggregationFunction.merge} (sums). + * + *

Semantic note on array-traversing paths. The JSON index flattens JSON arrays into one row per element, + * so a document with {@code {"items": [{"name":"a"},{"name":"b"}]}} contributes to two distinct values at the + * path {@code $.items.name}. This operator therefore reports per-element counts ("multikey" semantics) — the same + * doc is counted once per distinct value present in its array. For non-array-traversing paths this matches the + * scalar {@code jsonExtractScalar} evaluation exactly; for array-traversing paths it diverges. This matches the + * behavior already established by {@link JsonIndexDistinctOperator}. + */ +public class JsonIndexGroupByOperator extends BaseOperator { + private static final String EXPLAIN_NAME = "GROUP_BY_JSON_INDEX"; + // Empty order-by-values array reused across IntermediateRecord instances. The combine path reads only key/record, + // not values, so a shared singleton is safe. + private static final Comparable[] EMPTY_ORDER_BY_VALUES = new Comparable[0]; + + private final IndexSegment _indexSegment; + private final QueryContext _queryContext; + private final BaseFilterOperator _filterOperator; + private final DataSchema _dataSchema; + private final ParsedJsonExtractIndex _parsed; + + private int _numEntriesExamined = 0; + private long _numEntriesScannedInFilter = 0; + + /** + * SegmentContext is accepted for parity with {@link JsonIndexDistinctOperator} and the regular + * {@code GroupByPlanNode} call site. It is intentionally not stored: the data source lookup goes through + * {@code IndexSegment#getDataSource}, and any upsert / valid-doc-id filtering flows through the + * {@code BaseFilterOperator} that {@code FilterPlanNode} produced for the caller. + */ + public JsonIndexGroupByOperator(IndexSegment indexSegment, SegmentContext segmentContext, + QueryContext queryContext, BaseFilterOperator filterOperator) { + _indexSegment = indexSegment; + _queryContext = queryContext; + _filterOperator = filterOperator; + + ExpressionContext groupByExpression = queryContext.getGroupByExpressions().get(0); + _parsed = JsonExtractIndexUtils.parseJsonExtractIndex(groupByExpression); + Preconditions.checkState(_parsed != null, + "Expected a parseable jsonExtractIndex group-by expression"); + + AggregationFunction countFn = queryContext.getAggregationFunctions()[0]; + _dataSchema = new DataSchema( + new String[]{groupByExpression.toString(), countFn.getResultColumnName()}, + new ColumnDataType[]{ColumnDataType.fromDataTypeSV(_parsed._dataType), + countFn.getIntermediateResultColumnType()}); + } + + /** + * Returns {@code true} when the query is a single-group-by, single-{@code COUNT(*)} aggregation over a JSON-indexed + * path, with no {@code HAVING} clause and no filtered aggregations. All other shapes fall back to + * {@link GroupByOperator}. + * + *

No cost-model gate. Bench results (see + * {@code BenchmarkJsonIndexGroupByCount} and the reference memo on this branch) show the dictionary scan wins on + * most realistic workloads, with the only loss quadrant at very-selective WHERE clauses ({@code M ≲ 1% of segment}) + * combined with medium path cardinality ({@code D ∈ 1k..10k}), where it can be up to ~65% slower than the + * baseline. The wins (2-4× on loose WHEREs) dominate that worst case in aggregate, so the operator is always + * picked when the shape matches. Re-evaluate if production data shows the worst-case quadrant matters more than + * the bench suggested. + */ + public static boolean canUse(IndexSegment indexSegment, QueryContext queryContext) { + List groupByExpressions = queryContext.getGroupByExpressions(); + if (groupByExpressions == null || groupByExpressions.size() != 1) { + return false; + } + if (!JsonExtractIndexUtils.canUseJsonIndex(indexSegment, groupByExpressions.get(0))) { + return false; + } + + AggregationFunction[] aggFns = queryContext.getAggregationFunctions(); + if (aggFns == null || aggFns.length != 1) { + return false; + } + if (aggFns[0].getType() != AggregationFunctionType.COUNT) { + return false; + } + // CountAggregationFunction.getInputExpressions() returns the empty list for plain COUNT(*) (when null handling + // is disabled). COUNT(col) or null-handling COUNT(*) returns a non-empty list — both rejected for v1. + if (!aggFns[0].getInputExpressions().isEmpty()) { + return false; + } + if (queryContext.getHavingFilter() != null) { + return false; + } + if (queryContext.hasFilteredAggregations()) { + return false; + } + return true; + } + + @Override + protected GroupByResultsBlock getNextBlock() { + DataSource dataSource = _indexSegment.getDataSource(_parsed._columnName, _queryContext.getSchema()); + JsonIndexReader jsonIndexReader = JsonExtractIndexUtils.getJsonIndexReader(dataSource); + Preconditions.checkState(jsonIndexReader != null, + "Column %s has no JSON index", _parsed._columnName); + + // Decide which slice of the WHERE clause can be evaluated entirely inside the JSON-index lookup. + String pushedDownFilterJson = JsonExtractIndexUtils.extractSamePathJsonMatchFilter(_parsed, + _queryContext.getFilter()); + // Don't push a filter that can match missing-path docs into the JSON-index lookup. The JsonIndexReader SPI does + // not contractually guarantee how implementations represent missing-path matches in the returned map; relying on + // them to return an empty map for IS_NULL would tie correctness to an implementation detail. Fall back to the + // row-level filter, which evaluates IS_NULL correctly via the null-value vector / scan path. + if (pushedDownFilterJson != null + && JsonExtractIndexUtils.jsonMatchFilterCanMatchMissingPath(pushedDownFilterJson)) { + pushedDownFilterJson = null; + } + boolean filterFullyPushedDown = pushedDownFilterJson != null + && JsonExtractIndexUtils.isOnlySamePathJsonMatchFilter(_parsed, _queryContext.getFilter()); + + // When the filter is fully pushed down we trust the index-side filter and skip materializing the WHERE bitmap; + // otherwise we evaluate the full filter and intersect per-value posting lists with it. + RoaringBitmap fullFilterDocs = filterFullyPushedDown ? null : buildFilteredDocIds(); + if (fullFilterDocs != null && fullFilterDocs.isEmpty()) { + return new GroupByResultsBlock(_dataSchema, Collections.emptyList(), _queryContext); + } + + Map valueToDocs = + jsonIndexReader.getMatchingFlattenedDocsMap(_parsed._jsonPathString, pushedDownFilterJson); + jsonIndexReader.convertFlattenedDocIdsToDocIds(valueToDocs); + + // When the filter is fully pushed down, every doc that lacks the path was already excluded by it, so we don't + // need to track covered docs or emit a missing-path group. + RoaringBitmap coveredDocs = filterFullyPushedDown ? null : new RoaringBitmap(); + + List results = new ArrayList<>(valueToDocs.size()); + for (Map.Entry entry : valueToDocs.entrySet()) { + _numEntriesExamined++; + QueryThreadContext.checkTerminationAndSampleUsagePeriodically(_numEntriesExamined, EXPLAIN_NAME); + + String value = entry.getKey(); + RoaringBitmap docIds = entry.getValue(); + + long count; + if (fullFilterDocs == null) { + count = docIds.getLongCardinality(); + if (coveredDocs != null) { + coveredDocs.or(docIds); + } + } else { + count = RoaringBitmap.andCardinality(docIds, fullFilterDocs); + if (count == 0) { + continue; + } + coveredDocs.or(docIds); + } + if (count == 0) { + continue; + } + + Object groupKey = convertValue(value, _parsed._dataType); + results.add(buildRecord(groupKey, count)); + } + + // Account for documents that had no value at the path. When the filter was fully pushed down it can't match + // missing-path docs by construction (we required !jsonMatchFilterCanMatchMissingPath), so there are no missing + // docs to attribute. + if (!filterFullyPushedDown) { + long expectedTotal; + long coveredCard; + if (fullFilterDocs == null) { + expectedTotal = _indexSegment.getSegmentMetadata().getTotalDocs(); + coveredCard = coveredDocs.getLongCardinality(); + } else { + expectedTotal = fullFilterDocs.getLongCardinality(); + // Restrict the "covered" set to docs that survived the WHERE. + coveredDocs.and(fullFilterDocs); + coveredCard = coveredDocs.getLongCardinality(); + } + long missing = expectedTotal - coveredCard; + if (missing > 0) { + results.add(buildMissingPathRecord(missing)); + } + } + + return new GroupByResultsBlock(_dataSchema, results, _queryContext); + } + + private IntermediateRecord buildMissingPathRecord(long missing) { + Object missingKey; + if (_parsed._defaultValueLiteral != null) { + missingKey = convertValue(_parsed._defaultValueLiteral, _parsed._dataType); + } else if (_queryContext.isNullHandlingEnabled()) { + missingKey = null; + } else { + throw new RuntimeException( + String.format("Illegal Json Path: [%s], for some docIds in segment [%s]", + _parsed._jsonPathString, _indexSegment.getSegmentName())); + } + return buildRecord(missingKey, missing); + } + + private static IntermediateRecord buildRecord(@Nullable Object groupKey, long count) { + Object[] keyValues = {groupKey}; + Object[] rowValues = {groupKey, count}; + return new IntermediateRecord(new Key(keyValues), new Record(rowValues), EMPTY_ORDER_BY_VALUES); + } + + private static Object convertValue(String stringValue, FieldSpec.DataType dataType) { + switch (dataType) { + case INT: + return Integer.parseInt(stringValue); + case LONG: + return Long.parseLong(stringValue); + case FLOAT: + return Float.parseFloat(stringValue); + case DOUBLE: + return Double.parseDouble(stringValue); + case BIG_DECIMAL: + return new BigDecimal(stringValue); + case STRING: + return stringValue; + default: + throw new IllegalStateException("Unsupported data type for JSON index group-by: " + dataType); + } + } + + @Nullable + private RoaringBitmap buildFilteredDocIds() { + BaseFilterOperator.FilteredDocIds filteredDocIds = _filterOperator.getFilteredDocIds(); + _numEntriesScannedInFilter = filteredDocIds.getNumEntriesScannedInFilter(); + ImmutableRoaringBitmap docIds = filteredDocIds.getDocIds(); + return docIds != null ? docIds.toRoaringBitmap() : null; + } + + @Override + public List getChildOperators() { + return Collections.singletonList(_filterOperator); + } + + @Override + public IndexSegment getIndexSegment() { + return _indexSegment; + } + + @Override + public ExecutionStatistics getExecutionStatistics() { + int numTotalDocs = _indexSegment.getSegmentMetadata().getTotalDocs(); + return new ExecutionStatistics(0, _numEntriesScannedInFilter, 0, numTotalDocs); + } + + @Override + public String toExplainString() { + StringBuilder sb = new StringBuilder(EXPLAIN_NAME).append("(groupKey:"); + sb.append(_queryContext.getGroupByExpressions().get(0).toString()); + sb.append(", aggregations:").append(_queryContext.getAggregationFunctions()[0].toExplainString()); + return sb.append(')').toString(); + } + + @Override + protected String getExplainName() { + return CaseFormat.UPPER_UNDERSCORE.to(CaseFormat.UPPER_CAMEL, EXPLAIN_NAME); + } + + @Override + protected void explainAttributes(ExplainAttributeBuilder attributeBuilder) { + super.explainAttributes(attributeBuilder); + attributeBuilder.putStringList("groupKeys", + List.of(_queryContext.getGroupByExpressions().get(0).toString())); + attributeBuilder.putStringList("aggregations", + List.of(_queryContext.getAggregationFunctions()[0].toExplainString())); + } +} diff --git a/pinot-core/src/main/java/org/apache/pinot/core/plan/GroupByPlanNode.java b/pinot-core/src/main/java/org/apache/pinot/core/plan/GroupByPlanNode.java index b44b8dc8ee4f..41dae5ef88c7 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/plan/GroupByPlanNode.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/plan/GroupByPlanNode.java @@ -23,6 +23,7 @@ import org.apache.pinot.core.operator.filter.BaseFilterOperator; import org.apache.pinot.core.operator.query.FilteredGroupByOperator; import org.apache.pinot.core.operator.query.GroupByOperator; +import org.apache.pinot.core.operator.query.JsonIndexGroupByOperator; import org.apache.pinot.core.query.aggregation.function.AggregationFunctionUtils; import org.apache.pinot.core.query.request.context.QueryContext; import org.apache.pinot.segment.spi.IndexSegment; @@ -46,6 +47,13 @@ public GroupByPlanNode(SegmentContext segmentContext, QueryContext queryContext) @Override public Operator run() { assert _queryContext.getAggregationFunctions() != null && _queryContext.getGroupByExpressions() != null; + // Prefer the index-based GROUP BY when the shape is GROUP BY jsonExtractIndex(col, path, type[, default]) + // + COUNT(*) on a column whose JSON index covers the path. canUse() already excludes filtered aggregations + // and HAVING. + if (JsonIndexGroupByOperator.canUse(_indexSegment, _queryContext)) { + BaseFilterOperator filterOperator = new FilterPlanNode(_segmentContext, _queryContext).run(); + return new JsonIndexGroupByOperator(_indexSegment, _segmentContext, _queryContext, filterOperator); + } return _queryContext.hasFilteredAggregations() ? buildFilteredGroupByPlan() : buildNonFilteredGroupByPlan(); } diff --git a/pinot-core/src/test/java/org/apache/pinot/core/operator/query/JsonIndexGroupByOperatorTest.java b/pinot-core/src/test/java/org/apache/pinot/core/operator/query/JsonIndexGroupByOperatorTest.java new file mode 100644 index 000000000000..67ce7e3d69b5 --- /dev/null +++ b/pinot-core/src/test/java/org/apache/pinot/core/operator/query/JsonIndexGroupByOperatorTest.java @@ -0,0 +1,482 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.operator.query; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.pinot.core.common.Operator; +import org.apache.pinot.core.data.table.IntermediateRecord; +import org.apache.pinot.core.operator.blocks.results.GroupByResultsBlock; +import org.apache.pinot.core.operator.filter.BaseFilterOperator; +import org.apache.pinot.core.operator.filter.BitmapCollection; +import org.apache.pinot.core.query.request.context.QueryContext; +import org.apache.pinot.core.query.request.context.utils.QueryContextConverterUtils; +import org.apache.pinot.segment.spi.IndexSegment; +import org.apache.pinot.segment.spi.SegmentContext; +import org.apache.pinot.segment.spi.SegmentMetadata; +import org.apache.pinot.segment.spi.datasource.DataSource; +import org.apache.pinot.segment.spi.index.reader.JsonIndexReader; +import org.roaringbitmap.RoaringBitmap; +import org.roaringbitmap.buffer.MutableRoaringBitmap; +import org.testng.annotations.Test; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.expectThrows; + + +/** + * Unit tests for {@link JsonIndexGroupByOperator}. + */ +public class JsonIndexGroupByOperatorTest { + private static final String EXTRACT_INDEX = "JSON_EXTRACT_INDEX(tags, '$.instance', 'STRING')"; + private static final String EXTRACT_INDEX_WITH_DEFAULT = + "JSON_EXTRACT_INDEX(tags, '$.instance', 'STRING', 'missing')"; + private static final String SAME_PATH_FILTER = "REGEXP_LIKE(\"$.instance\", '.*test.*')"; + private static final String CROSS_PATH_FILTER = "REGEXP_LIKE(\"$.env\", 'prod.*')"; + private static final String SAME_PATH_IS_NULL_FILTER = "\"$.instance\" IS NULL"; + + @Test + public void testGroupByWithoutFilterCountsAllDocs() { + QueryContext queryContext = groupByQuery(EXTRACT_INDEX, null); + + JsonIndexReader jsonIndexReader = mock(JsonIndexReader.class); + Map flattenedDocsByValue = new HashMap<>(); + flattenedDocsByValue.put("east", bitmap(100, 101)); + flattenedDocsByValue.put("west", bitmap(200)); + when(jsonIndexReader.getMatchingFlattenedDocsMap("$.instance", null)).thenReturn(flattenedDocsByValue); + stubConvertedDocIds(jsonIndexReader, Map.of( + "east", bitmap(0, 1), + "west", bitmap(2))); + + GroupByResultsBlock block = buildOperatorMatchAll(queryContext, jsonIndexReader, 3).nextBlock(); + + assertEquals(extractCounts(block), Map.of("east", 2L, "west", 1L)); + } + + @Test + public void testSamePathJsonMatchPushedDownNoMissingPathGroup() { + QueryContext queryContext = groupByQuery(EXTRACT_INDEX, SAME_PATH_FILTER); + + JsonIndexReader jsonIndexReader = mock(JsonIndexReader.class); + // The pushed-down filter restricts which flattened docs come back. + Map flattenedDocsByValue = new HashMap<>(); + flattenedDocsByValue.put("test-east", bitmap(10)); + flattenedDocsByValue.put("test-west", bitmap(20, 21)); + when(jsonIndexReader.getMatchingFlattenedDocsMap("$.instance", SAME_PATH_FILTER)) + .thenReturn(flattenedDocsByValue); + stubConvertedDocIds(jsonIndexReader, Map.of( + "test-east", bitmap(0), + "test-west", bitmap(1, 2))); + + GroupByResultsBlock block = buildOperator(queryContext, jsonIndexReader, bufferBitmap(0, 1, 2), 5).nextBlock(); + + // Even though only 3 of 5 docs are covered, no missing-path group is emitted because the same-path JSON_MATCH + // filter cannot match docs that lack the path. + assertEquals(extractCounts(block), Map.of("test-east", 1L, "test-west", 2L)); + } + + @Test + public void testCrossPathFilterAppliedAsResidual() { + QueryContext queryContext = groupByQuery(EXTRACT_INDEX, CROSS_PATH_FILTER); + + JsonIndexReader jsonIndexReader = mock(JsonIndexReader.class); + Map flattenedDocsByValue = new HashMap<>(); + flattenedDocsByValue.put("prod-a", bitmap(100)); + flattenedDocsByValue.put("prod-b", bitmap(200)); + flattenedDocsByValue.put("other", bitmap(300)); + // The cross-path filter is NOT pushed down so getMatchingFlattenedDocsMap is called with null. + when(jsonIndexReader.getMatchingFlattenedDocsMap("$.instance", null)).thenReturn(flattenedDocsByValue); + stubConvertedDocIds(jsonIndexReader, Map.of( + "prod-a", bitmap(0), + "prod-b", bitmap(1), + "other", bitmap(2))); + + // Filter operator says docs 0 and 1 pass the cross-path filter. + GroupByResultsBlock block = buildOperator(queryContext, jsonIndexReader, bufferBitmap(0, 1), 3).nextBlock(); + + // "other" is filtered out; both surviving docs have a value at the path so no missing-path group. + assertEquals(extractCounts(block), Map.of("prod-a", 1L, "prod-b", 1L)); + verify(jsonIndexReader).getMatchingFlattenedDocsMap("$.instance", null); + } + + @Test + public void testMissingPathDocsUseDefaultLiteral() { + QueryContext queryContext = groupByQuery(EXTRACT_INDEX_WITH_DEFAULT, null); + + JsonIndexReader jsonIndexReader = mock(JsonIndexReader.class); + Map flattenedDocsByValue = new HashMap<>(); + flattenedDocsByValue.put("east", bitmap(0)); + when(jsonIndexReader.getMatchingFlattenedDocsMap("$.instance", null)).thenReturn(flattenedDocsByValue); + stubConvertedDocIds(jsonIndexReader, Map.of("east", bitmap(0))); + + // 3 total docs, only doc 0 has a value at the path; docs 1 and 2 should land in the "missing" group. + GroupByResultsBlock block = buildOperatorMatchAll(queryContext, jsonIndexReader, 3).nextBlock(); + + assertEquals(extractCounts(block), Map.of("east", 1L, "missing", 2L)); + } + + @Test + public void testMissingPathDocsUseNullWhenNullHandlingEnabled() { + QueryContext queryContext = groupByQuery(EXTRACT_INDEX, null, true); + + JsonIndexReader jsonIndexReader = mock(JsonIndexReader.class); + Map flattenedDocsByValue = new HashMap<>(); + flattenedDocsByValue.put("east", bitmap(0)); + when(jsonIndexReader.getMatchingFlattenedDocsMap("$.instance", null)).thenReturn(flattenedDocsByValue); + stubConvertedDocIds(jsonIndexReader, Map.of("east", bitmap(0))); + + GroupByResultsBlock block = buildOperatorMatchAll(queryContext, jsonIndexReader, 3).nextBlock(); + + Map counts = extractCountsAllowingNull(block); + assertEquals(counts.get("east"), Long.valueOf(1L)); + assertEquals(counts.get(null), Long.valueOf(2L)); + assertEquals(counts.size(), 2); + } + + @Test + public void testMissingPathDocsThrowWhenNoDefaultAndNullHandlingDisabled() { + QueryContext queryContext = groupByQuery(EXTRACT_INDEX, null); + + JsonIndexReader jsonIndexReader = mock(JsonIndexReader.class); + Map flattenedDocsByValue = new HashMap<>(); + flattenedDocsByValue.put("east", bitmap(0)); + when(jsonIndexReader.getMatchingFlattenedDocsMap("$.instance", null)).thenReturn(flattenedDocsByValue); + stubConvertedDocIds(jsonIndexReader, Map.of("east", bitmap(0))); + + RuntimeException ex = expectThrows(RuntimeException.class, + () -> buildOperatorMatchAll(queryContext, jsonIndexReader, 3).nextBlock()); + assertTrue(ex.getMessage().contains("Illegal Json Path")); + } + + @Test + public void testSamePathIsNullEmitsOnlyMissingPathGroup() { + QueryContext queryContext = groupByQuery(EXTRACT_INDEX_WITH_DEFAULT, SAME_PATH_IS_NULL_FILTER); + + JsonIndexReader jsonIndexReader = mock(JsonIndexReader.class); + // The operator does NOT push the IS_NULL filter into the index (it would rely on undocumented "returns empty" + // SPI behavior). Instead, it asks for all flattened docs at the path and lets the row-side filter operator + // produce the IS_NULL bitmap. + Map flattenedDocsByValue = new HashMap<>(); + flattenedDocsByValue.put("east", bitmap(100)); + when(jsonIndexReader.getMatchingFlattenedDocsMap("$.instance", null)).thenReturn(flattenedDocsByValue); + stubConvertedDocIds(jsonIndexReader, Map.of("east", bitmap(0))); + + // Filter operator returns docs 1 and 2 (docs without a value at the path), satisfying the IS_NULL filter. + GroupByResultsBlock block = buildOperator(queryContext, jsonIndexReader, bufferBitmap(1, 2), 3).nextBlock(); + + // Doc 0 has the "east" value but is filtered out by IS_NULL. Docs 1 and 2 have no value and fall into "missing". + assertEquals(extractCounts(block), Map.of("missing", 2L)); + verify(jsonIndexReader).getMatchingFlattenedDocsMap("$.instance", null); + verify(jsonIndexReader, never()).getMatchingFlattenedDocsMap("$.instance", SAME_PATH_IS_NULL_FILTER); + } + + @Test + public void testMultikeyArrayPathSplitsCountsAcrossElements() { + // A document with an implicit-array-traversal path lands in multiple value bitmaps after flattening. + // The operator reports per-element counts ("multikey" semantics), diverging from scalar jsonExtractScalar + // evaluation. This test locks in the documented behavior. + QueryContext queryContext = groupByQuery(EXTRACT_INDEX, null); + + JsonIndexReader jsonIndexReader = mock(JsonIndexReader.class); + Map flattenedDocsByValue = new HashMap<>(); + flattenedDocsByValue.put("east", bitmap(10, 11)); + flattenedDocsByValue.put("west", bitmap(20)); + when(jsonIndexReader.getMatchingFlattenedDocsMap("$.instance", null)).thenReturn(flattenedDocsByValue); + // Doc 0 has BOTH values "east" and "west" (array traversal), doc 1 has only "east". + stubConvertedDocIds(jsonIndexReader, Map.of( + "east", bitmap(0, 1), + "west", bitmap(0))); + + GroupByResultsBlock block = buildOperatorMatchAll(queryContext, jsonIndexReader, 2).nextBlock(); + + // Multikey: doc 0 contributes to both groups → counts sum to 3, exceeding total docs (2). + // The missing-path bucket is correctly 0 because both docs appear in the covered set (union). + assertEquals(extractCounts(block), Map.of("east", 2L, "west", 1L)); + } + + @Test + public void testEmptyFilterShortCircuitsToEmptyBlock() { + QueryContext queryContext = groupByQuery(EXTRACT_INDEX, CROSS_PATH_FILTER); + + JsonIndexReader jsonIndexReader = mock(JsonIndexReader.class); + + // Filter operator returns no docs. + GroupByResultsBlock block = buildOperator(queryContext, jsonIndexReader, bufferBitmap(), 3).nextBlock(); + + assertEquals(extractCounts(block), Map.of()); + // We never need to consult the index when the filter rules out every doc. + verify(jsonIndexReader, never()).getMatchingFlattenedDocsMap(any(), any()); + } + + @Test + public void testCanUseAcceptsThreeAndFourArgForms() { + JsonIndexReader jsonIndexReader = mock(JsonIndexReader.class); + when(jsonIndexReader.isPathIndexed("$.instance")).thenReturn(true); + IndexSegment indexSegment = buildCanUseIndexSegment(jsonIndexReader); + + assertTrue(JsonIndexGroupByOperator.canUse(indexSegment, groupByQuery(EXTRACT_INDEX, null))); + assertTrue(JsonIndexGroupByOperator.canUse(indexSegment, groupByQuery(EXTRACT_INDEX_WITH_DEFAULT, null))); + } + + @Test + public void testCanUseRejectsMultipleAggregations() { + JsonIndexReader jsonIndexReader = mock(JsonIndexReader.class); + when(jsonIndexReader.isPathIndexed("$.instance")).thenReturn(true); + IndexSegment indexSegment = buildCanUseIndexSegment(jsonIndexReader); + + QueryContext queryContext = QueryContextConverterUtils.getQueryContext( + "SELECT " + EXTRACT_INDEX + " AS k, COUNT(*), MAX(rating) FROM myTable GROUP BY k"); + assertFalse(JsonIndexGroupByOperator.canUse(indexSegment, queryContext)); + } + + @Test + public void testCanUseRejectsNonCountAggregation() { + JsonIndexReader jsonIndexReader = mock(JsonIndexReader.class); + when(jsonIndexReader.isPathIndexed("$.instance")).thenReturn(true); + IndexSegment indexSegment = buildCanUseIndexSegment(jsonIndexReader); + + QueryContext queryContext = QueryContextConverterUtils.getQueryContext( + "SELECT " + EXTRACT_INDEX + " AS k, SUM(rating) FROM myTable GROUP BY k"); + assertFalse(JsonIndexGroupByOperator.canUse(indexSegment, queryContext)); + } + + @Test + public void testCanUseRejectsCountColumn() { + JsonIndexReader jsonIndexReader = mock(JsonIndexReader.class); + when(jsonIndexReader.isPathIndexed("$.instance")).thenReturn(true); + IndexSegment indexSegment = buildCanUseIndexSegment(jsonIndexReader); + + QueryContext queryContext = QueryContextConverterUtils.getQueryContext( + "SELECT " + EXTRACT_INDEX + " AS k, COUNT(rating) FROM myTable GROUP BY k " + + "OPTION(enableNullHandling=true)"); + assertFalse(JsonIndexGroupByOperator.canUse(indexSegment, queryContext)); + } + + @Test + public void testCanUseRejectsHaving() { + JsonIndexReader jsonIndexReader = mock(JsonIndexReader.class); + when(jsonIndexReader.isPathIndexed("$.instance")).thenReturn(true); + IndexSegment indexSegment = buildCanUseIndexSegment(jsonIndexReader); + + QueryContext queryContext = QueryContextConverterUtils.getQueryContext( + "SELECT " + EXTRACT_INDEX + " AS k, COUNT(*) AS c FROM myTable GROUP BY k HAVING COUNT(*) > 10"); + assertFalse(JsonIndexGroupByOperator.canUse(indexSegment, queryContext)); + } + + @Test + public void testCanUseRejectsMultipleGroupByKeys() { + JsonIndexReader jsonIndexReader = mock(JsonIndexReader.class); + when(jsonIndexReader.isPathIndexed("$.instance")).thenReturn(true); + IndexSegment indexSegment = buildCanUseIndexSegment(jsonIndexReader); + + QueryContext queryContext = QueryContextConverterUtils.getQueryContext( + "SELECT " + EXTRACT_INDEX + " AS k, env, COUNT(*) FROM myTable GROUP BY k, env"); + assertFalse(JsonIndexGroupByOperator.canUse(indexSegment, queryContext)); + } + + @Test + public void testCanUseRejectsWhenPathNotIndexed() { + JsonIndexReader jsonIndexReader = mock(JsonIndexReader.class); + when(jsonIndexReader.isPathIndexed("$.instance")).thenReturn(false); + IndexSegment indexSegment = buildCanUseIndexSegment(jsonIndexReader); + + assertFalse(JsonIndexGroupByOperator.canUse(indexSegment, groupByQuery(EXTRACT_INDEX, null))); + } + + private static QueryContext groupByQuery(String groupKeyExpression, String jsonMatchFilter) { + return groupByQuery(groupKeyExpression, jsonMatchFilter, false); + } + + private static QueryContext groupByQuery(String groupKeyExpression, String jsonMatchFilter, + boolean enableNullHandling) { + StringBuilder sql = new StringBuilder("SELECT "); + sql.append(groupKeyExpression).append(" AS tag_value, COUNT(*) FROM myTable"); + if (jsonMatchFilter != null) { + sql.append(" WHERE JSON_MATCH(tags, '").append(jsonMatchFilter.replace("'", "''")).append("')"); + } + sql.append(" GROUP BY tag_value"); + if (enableNullHandling) { + sql.append(" OPTION(enableNullHandling=true)"); + } + return QueryContextConverterUtils.getQueryContext(sql.toString()); + } + + private static void stubConvertedDocIds(JsonIndexReader jsonIndexReader, + Map convertedDocIds) { + doAnswer(invocation -> { + @SuppressWarnings("unchecked") + Map docsByValue = (Map) invocation.getArgument(0); + docsByValue.clear(); + docsByValue.putAll(convertedDocIds); + return null; + }).when(jsonIndexReader).convertFlattenedDocIdsToDocIds(any()); + } + + private static IndexSegment buildCanUseIndexSegment(JsonIndexReader jsonIndexReader) { + DataSource dataSource = mock(DataSource.class); + when(dataSource.getJsonIndex()).thenReturn(jsonIndexReader); + + IndexSegment indexSegment = mock(IndexSegment.class); + when(indexSegment.getDataSourceNullable("tags")).thenReturn(dataSource); + return indexSegment; + } + + private static JsonIndexGroupByOperator buildOperator(QueryContext queryContext, JsonIndexReader jsonIndexReader, + MutableRoaringBitmap filterBitmap, int numDocs) { + return buildOperator(queryContext, jsonIndexReader, + new StaticBitmapFilterOperator(numDocs, filterBitmap), numDocs); + } + + /** + * Used by tests that simulate a query with no WHERE clause — the FilterPlanNode would produce a match-all operator. + */ + private static JsonIndexGroupByOperator buildOperatorMatchAll(QueryContext queryContext, + JsonIndexReader jsonIndexReader, int numDocs) { + return buildOperator(queryContext, jsonIndexReader, new MatchAllFilterOperator(numDocs), numDocs); + } + + private static JsonIndexGroupByOperator buildOperator(QueryContext queryContext, JsonIndexReader jsonIndexReader, + BaseFilterOperator filterOperator, int numDocs) { + SegmentMetadata segmentMetadata = mock(SegmentMetadata.class); + when(segmentMetadata.getTotalDocs()).thenReturn(numDocs); + + DataSource dataSource = mock(DataSource.class); + when(dataSource.getJsonIndex()).thenReturn(jsonIndexReader); + + IndexSegment indexSegment = mock(IndexSegment.class); + when(indexSegment.getSegmentMetadata()).thenReturn(segmentMetadata); + when(indexSegment.getSegmentName()).thenReturn("testSegment"); + when(indexSegment.getDataSource(eq("tags"), any())).thenReturn(dataSource); + when(indexSegment.getDataSourceNullable("tags")).thenReturn(dataSource); + + return new JsonIndexGroupByOperator(indexSegment, new SegmentContext(indexSegment), queryContext, filterOperator); + } + + private static RoaringBitmap bitmap(int... docIds) { + RoaringBitmap bitmap = new RoaringBitmap(); + for (int docId : docIds) { + bitmap.add(docId); + } + return bitmap; + } + + private static MutableRoaringBitmap bufferBitmap(int... docIds) { + MutableRoaringBitmap bitmap = new MutableRoaringBitmap(); + for (int docId : docIds) { + bitmap.add(docId); + } + return bitmap; + } + + private static Map extractCounts(GroupByResultsBlock block) { + Map result = new HashMap<>(); + List records = block.getIntermediateRecords(); + if (records == null) { + return result; + } + for (IntermediateRecord record : records) { + Object[] values = record._record.getValues(); + result.put((String) values[0], (Long) values[1]); + } + return result; + } + + private static Map extractCountsAllowingNull(GroupByResultsBlock block) { + Map result = new HashMap<>(); + List records = block.getIntermediateRecords(); + if (records == null) { + return result; + } + for (IntermediateRecord record : records) { + Object[] values = record._record.getValues(); + result.put(values[0], (Long) values[1]); + } + return result; + } + + private static final class MatchAllFilterOperator extends BaseFilterOperator { + MatchAllFilterOperator(int numDocs) { + super(numDocs, false); + } + + @Override + public boolean isResultMatchingAll() { + return true; + } + + @Override + public List getChildOperators() { + return List.of(); + } + + @Override + protected org.apache.pinot.core.common.BlockDocIdSet getTrues() { + throw new UnsupportedOperationException("Match-all should never reach getTrues"); + } + + @Override + public String toExplainString() { + return "MATCH_ALL"; + } + } + + private static final class StaticBitmapFilterOperator extends BaseFilterOperator { + private final MutableRoaringBitmap _bitmap; + + StaticBitmapFilterOperator(int numDocs, MutableRoaringBitmap bitmap) { + super(numDocs, false); + _bitmap = bitmap; + } + + @Override + public boolean canProduceBitmaps() { + return true; + } + + @Override + public BitmapCollection getBitmaps() { + return new BitmapCollection(_numDocs, false, _bitmap); + } + + @Override + public List getChildOperators() { + return List.of(); + } + + @Override + protected org.apache.pinot.core.common.BlockDocIdSet getTrues() { + throw new UnsupportedOperationException("Bitmap path only"); + } + + @Override + public String toExplainString() { + return "STATIC_BITMAP_FILTER"; + } + } +} diff --git a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkJsonIndexGroupByCount.java b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkJsonIndexGroupByCount.java new file mode 100644 index 000000000000..aeb1e1fb8cde --- /dev/null +++ b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkJsonIndexGroupByCount.java @@ -0,0 +1,260 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.perf; + +import com.google.common.base.Preconditions; +import java.io.File; +import java.util.Collections; +import java.util.concurrent.TimeUnit; +import org.apache.commons.io.FileUtils; +import org.apache.pinot.common.request.PinotQuery; +import org.apache.pinot.core.common.Operator; +import org.apache.pinot.core.operator.blocks.results.GroupByResultsBlock; +import org.apache.pinot.core.operator.filter.BaseFilterOperator; +import org.apache.pinot.core.operator.query.GroupByOperator; +import org.apache.pinot.core.operator.query.JsonIndexGroupByOperator; +import org.apache.pinot.core.plan.FilterPlanNode; +import org.apache.pinot.core.query.aggregation.function.AggregationFunctionUtils; +import org.apache.pinot.core.query.request.context.QueryContext; +import org.apache.pinot.core.query.request.context.utils.QueryContextConverterUtils; +import org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader; +import org.apache.pinot.segment.local.segment.creator.impl.SegmentIndexCreationDriverImpl; +import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig; +import org.apache.pinot.segment.spi.IndexSegment; +import org.apache.pinot.segment.spi.SegmentContext; +import org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig; +import org.apache.pinot.spi.config.table.TableConfig; +import org.apache.pinot.spi.config.table.TableType; +import org.apache.pinot.spi.data.FieldSpec; +import org.apache.pinot.spi.data.Schema; +import org.apache.pinot.spi.data.readers.GenericRow; +import org.apache.pinot.spi.data.readers.RecordReader; +import org.apache.pinot.spi.utils.builder.TableConfigBuilder; +import org.apache.pinot.sql.parsers.CalciteSqlParser; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.options.OptionsBuilder; + + +/** + * JMH benchmark for {@code GROUP BY jsonExtractIndex(...) + COUNT(*)} comparing the dictionary-scan path + * ({@link JsonIndexGroupByOperator}) against the row-by-row baseline ({@link GroupByOperator}). + * + *

The benchmark exists to settle the {@code SELECTIVITY_THRESHOLD} constant in + * {@link JsonIndexGroupByOperator}. We sweep two dimensions: + *

+ * For each combination we time both operators, with no planner gating, and report the crossover point. + * + *

Both operators are constructed directly (not via {@code GroupByPlanNode}) so the selectivity gate does not + * influence the measurement — we want the raw cost of each path at every (D, M) cell. + */ +@State(Scope.Benchmark) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +@Fork(1) +@Warmup(iterations = 3, time = 1) +@Measurement(iterations = 5, time = 1) +public class BenchmarkJsonIndexGroupByCount { + private static final File INDEX_DIR = new File(FileUtils.getTempDirectory(), "BenchmarkJsonIndexGroupByCount"); + private static final String TABLE_NAME = "myTable"; + private static final String SEGMENT_NAME = "jsonIndexGroupByCountSegment"; + private static final String ID_COLUMN = "id"; + private static final String TAGS_COLUMN = "tags"; + // Same-column filter that selects a subset of docs based on the synthetic "match" field embedded in the JSON. + // We use a cross-path filter so the index lookup for the GROUP BY path runs unfiltered, isolating the cost + // model of "dictionary scan of D entries vs row-by-row scan of M docs". + private static final String FILTER_CLAUSE = + "WHERE JSON_MATCH(tags, '\"$.matchKey\" = ''yes''')"; + private static final String BASE_QUERY = + "SELECT JSON_EXTRACT_INDEX(tags, '$.instance', 'STRING') AS k, COUNT(*) FROM " + TABLE_NAME + + " " + FILTER_CLAUSE + " GROUP BY k LIMIT 1000000"; + + private static final TableConfig TABLE_CONFIG = new TableConfigBuilder(TableType.OFFLINE) + .setTableName(TABLE_NAME) + .setJsonIndexColumns(Collections.singletonList(TAGS_COLUMN)) + .build(); + + private static final Schema SCHEMA = new Schema.SchemaBuilder() + .setSchemaName(TABLE_NAME) + .addSingleValueDimension(ID_COLUMN, FieldSpec.DataType.INT) + .addSingleValueDimension(TAGS_COLUMN, FieldSpec.DataType.JSON) + .build(); + + @Param({"500000"}) + int _numRows; + + // Sweep path cardinality across two orders of magnitude. The crossover where index path stops winning sits + // somewhere in this range for typical JSON sizes. + @Param({"10", "100", "1000", "10000", "100000", "500000"}) + int _pathCardinality; + + // Matched-doc fraction: 1% (very selective), 10%, 50%, 100% (no filter). + @Param({"0.01", "0.1", "0.5", "1.0"}) + double _matchedFraction; + + private IndexSegment _indexSegment; + private SegmentContext _segmentContext; + private QueryContext _queryContext; + private int _expectedDistinctGroupCount; + + @Setup(Level.Trial) + public void setup() + throws Exception { + Preconditions.checkState(_pathCardinality <= _numRows, + "pathCardinality (%s) must not exceed numRows (%s)", _pathCardinality, _numRows); + + FileUtils.deleteQuietly(INDEX_DIR); + buildSegment(); + IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig(TABLE_CONFIG, SCHEMA); + _indexSegment = ImmutableSegmentLoader.load(new File(INDEX_DIR, SEGMENT_NAME), indexLoadingConfig); + Preconditions.checkState(_indexSegment.getDataSource(TAGS_COLUMN, SCHEMA).getJsonIndex() != null, + "Loaded segment must expose JSON index on %s", TAGS_COLUMN); + _segmentContext = new SegmentContext(_indexSegment); + + PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(BASE_QUERY); + _queryContext = QueryContextConverterUtils.getQueryContext(pinotQuery); + _queryContext.setSchema(SCHEMA); + + // Cross-validate the two operators on a sanity run before measuring. Each call constructs fresh per-invocation + // operator state, identical to what the @Benchmark methods do. + int baselineGroups = numGroupsFromBaseline(); + int indexGroups = numGroupsFromIndex(); + Preconditions.checkState(baselineGroups == indexGroups, + "Baseline and index operators disagreed on group count. pathCardinality=%s matchedFraction=%s " + + "baseline=%s index=%s", _pathCardinality, _matchedFraction, baselineGroups, indexGroups); + _expectedDistinctGroupCount = baselineGroups; + } + + @Benchmark + public int baselineGroupByOperator() { + return numGroupsFromBaseline(); + } + + @Benchmark + public int jsonIndexGroupByOperator() { + return numGroupsFromIndex(); + } + + @TearDown(Level.Trial) + public void tearDown() { + if (_indexSegment != null) { + _indexSegment.destroy(); + } + FileUtils.deleteQuietly(INDEX_DIR); + } + + private int numGroupsFromBaseline() { + // Build fresh per-invocation state. The baseline GroupByOperator consumes its DocIdSetOperator to exhaustion on + // the first nextBlock call; reusing the AggregationInfo / project operator across iterations would have every + // subsequent invocation see an empty doc stream and return in ~1us, making the bench meaningless. + FilterPlanNode filterPlanNode = new FilterPlanNode(_segmentContext, _queryContext); + BaseFilterOperator filterOperator = filterPlanNode.run(); + AggregationFunctionUtils.AggregationInfo aggregationInfo = + AggregationFunctionUtils.buildAggregationInfo(_segmentContext, _queryContext, + _queryContext.getAggregationFunctions(), _queryContext.getFilter(), filterOperator, + filterPlanNode.getPredicateEvaluators()); + GroupByOperator operator = new GroupByOperator(_queryContext, aggregationInfo, + _indexSegment.getSegmentMetadata().getTotalDocs()); + return countGroups(operator); + } + + private int numGroupsFromIndex() { + // Build fresh per-invocation state, matching the baseline path's per-invocation cost. JsonIndexGroupByOperator + // does not consume the filter operator across iterations (it uses cached getFilteredDocIds), but building both + // paths identically keeps the comparison fair. + FilterPlanNode filterPlanNode = new FilterPlanNode(_segmentContext, _queryContext); + BaseFilterOperator filterOperator = filterPlanNode.run(); + JsonIndexGroupByOperator operator = new JsonIndexGroupByOperator(_indexSegment, _segmentContext, _queryContext, + filterOperator); + return countGroups(operator); + } + + private static int countGroups(Operator operator) { + GroupByResultsBlock block = operator.nextBlock(); + if (block.getIntermediateRecords() != null) { + return block.getIntermediateRecords().size(); + } + if (block.getAggregationGroupByResult() != null) { + return block.getAggregationGroupByResult().getNumGroups(); + } + return 0; + } + + private void buildSegment() + throws Exception { + SegmentGeneratorConfig config = new SegmentGeneratorConfig(TABLE_CONFIG, SCHEMA); + config.setOutDir(INDEX_DIR.getPath()); + config.setTableName(TABLE_NAME); + config.setSegmentName(SEGMENT_NAME); + + SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl(); + try (RecordReader recordReader = new GeneratedDataRecordReader(createData())) { + driver.init(config, recordReader); + driver.build(); + } + } + + private LazyDataGenerator createData() { + // Round the matched-doc count so that an integer number of docs match. The "matchKey" field flips between + // "yes" and "no" based on whether the row index falls below the matched-doc threshold. + int matchedDocs = (int) Math.round(_numRows * _matchedFraction); + return new LazyDataGenerator() { + @Override + public int size() { + return _numRows; + } + + @Override + public GenericRow next(GenericRow row, int index) { + int instance = index % _pathCardinality; + String matchValue = index < matchedDocs ? "yes" : "no"; + row.putValue(ID_COLUMN, index); + row.putValue(TAGS_COLUMN, + "{\"instance\":\"instance-" + instance + "\",\"matchKey\":\"" + matchValue + "\"}"); + return row; + } + + @Override + public void rewind() { + } + }; + } + + public static void main(String[] args) + throws Exception { + new Runner(new OptionsBuilder() + .include(BenchmarkJsonIndexGroupByCount.class.getSimpleName()) + .build()).run(); + } +} diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/JsonIndexTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/JsonIndexTest.java index b576a4439426..670ed60baaf3 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/JsonIndexTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/JsonIndexTest.java @@ -408,7 +408,8 @@ private void createIndex(boolean createOnHeap, JsonIndexConfig jsonIndexConfig, * @param continueOnError whether continueOnError should be enabled or disabled * @throws IOException on error */ - private void createIndex(boolean createOnHeap, JsonIndexConfig jsonIndexConfig, String[] records, boolean continueOnError) + private void createIndex(boolean createOnHeap, JsonIndexConfig jsonIndexConfig, String[] records, + boolean continueOnError) throws IOException { try (JsonIndexCreator indexCreator = createOnHeap ? new OnHeapJsonIndexCreator(INDEX_DIR, ON_HEAP_COLUMN_NAME, "myTable_OFFLINE", continueOnError,