From 0590d1261daf640424c035660a025197e97b4d81 Mon Sep 17 00:00:00 2001
From: "Xiaotian (Jackie) Jiang" <jackie.jxt@gmail.com>
Date: Tue, 26 May 2026 14:04:50 -0700
Subject: [PATCH] Refine index-based DISTINCT operators (JSON / inverted)

- JsonIndexDistinctOperator: validate 3/4/5-arg jsonExtractIndex in the
  constructor (mirroring JsonExtractIndexTransformFunction.init), accept
  MV `_ARRAY` types, intersect per-value doc ids with the WHERE-clause
  filter through a unified `remainingDocs` bitmap, and surface
  numDocsScanned in execution statistics.
- New `jsonIndexDistinctSkipMissingPath` query option: when true, the
  operator skips parsing the 4-arg default, skips `remainingDocs`
  tracking, and never throws "Illegal Json Path".
- `canUseJsonIndexDistinct` simplified to a function-name check; planner
  routes any `jsonExtractIndex` call through the operator and lets the
  constructor validate arguments.
- InvertedIndexDistinctOperator: cache `_totalDocs`, short-circuit the
  DESC sorted path with `intersects` instead of `getLongCardinality`,
  drop redundant `advanceIfNeeded` / inner `hasNext`, and report a
  correct numDocsScanned for sorted / inverted paths.
- Tests rewritten as queries-based suites
  (`JsonIndexDistinctOperatorQueriesTest`,
  `InvertedIndexDistinctOperatorQueriesTest`) that drive the full
  broker -> operator path and assert on execution statistics; the older
  mock-based unit tests are removed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../utils/config/QueryOptionsUtils.java       |   7 +
 .../query/InvertedIndexDistinctOperator.java  | 289 +++---
 .../query/JsonIndexDistinctOperator.java      | 703 ++++++---------
 .../JsonExtractIndexTransformFunction.java    |  41 +-
 .../pinot/core/plan/DistinctPlanNode.java     |   4 +-
 ...InvertedIndexDistinctOperatorUnitTest.java | 233 -----
 .../query/JsonIndexDistinctOperatorTest.java  | 306 -------
 ...rtedIndexDistinctOperatorQueriesTest.java} | 327 +++----
 .../JsonIndexDistinctOperatorQueriesTest.java | 447 ++++++++++
 .../tests/custom/JsonPathTest.java            | 825 +++++++++---------
 .../pinot/spi/utils/CommonConstants.java      |   5 +
 11 files changed, 1443 insertions(+), 1744 deletions(-)
 delete mode 100644 pinot-core/src/test/java/org/apache/pinot/core/operator/query/InvertedIndexDistinctOperatorUnitTest.java
 delete mode 100644 pinot-core/src/test/java/org/apache/pinot/core/operator/query/JsonIndexDistinctOperatorTest.java
 rename pinot-core/src/test/java/org/apache/pinot/queries/{InvertedIndexDistinctOperatorTest.java => InvertedIndexDistinctOperatorQueriesTest.java} (73%)
 create mode 100644 pinot-core/src/test/java/org/apache/pinot/queries/JsonIndexDistinctOperatorQueriesTest.java

diff --git a/pinot-common/src/main/java/org/apache/pinot/common/utils/config/QueryOptionsUtils.java b/pinot-common/src/main/java/org/apache/pinot/common/utils/config/QueryOptionsUtils.java
index 11c507639cf8..52829dc07f10 100644
--- a/pinot-common/src/main/java/org/apache/pinot/common/utils/config/QueryOptionsUtils.java
+++ b/pinot-common/src/main/java/org/apache/pinot/common/utils/config/QueryOptionsUtils.java
@@ -192,6 +192,13 @@ public static Double getInvertedIndexDistinctCostRatio(Map<String, String> query
         queryOptions.get(QueryOptionKey.INVERTED_INDEX_DISTINCT_COST_RATIO));
   }
 
+  /// When true, [org.apache.pinot.core.operator.query.JsonIndexDistinctOperator] skips its missing-path handling —
+  /// does not add a 4-arg default, does not add null, and does not throw `Illegal Json Path`. The distinct set is
+  /// purely the values returned by the JSON-index lookup.
+  public static boolean isJsonIndexDistinctSkipMissingPath(Map<String, String> queryOptions) {
+    return Boolean.parseBoolean(queryOptions.get(QueryOptionKey.JSON_INDEX_DISTINCT_SKIP_MISSING_PATH));
+  }
+
   public static boolean isSkipScanFilterReorder(Map<String, String> queryOptions) {
     return "false".equalsIgnoreCase(queryOptions.get(QueryOptionKey.USE_SCAN_REORDER_OPTIMIZATION));
   }
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/query/InvertedIndexDistinctOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/query/InvertedIndexDistinctOperator.java
index c57a0e5b2425..06ab1c7cca21 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/operator/query/InvertedIndexDistinctOperator.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/query/InvertedIndexDistinctOperator.java
@@ -67,33 +67,30 @@
 import org.apache.pinot.spi.utils.Pairs;
 import org.roaringbitmap.PeekableIntIterator;
 import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
-import org.roaringbitmap.buffer.MutableRoaringBitmap;
 
 
-/**
- * Inverted-index-based operator for single-column distinct queries on a single segment.
- *
- * <p>Supports three execution paths, chosen at runtime:
- * <ul>
- *   <li><b>Sorted index path</b>: For sorted columns, merge-iterates filter bitmap against contiguous doc ranges.
- *       Cost ~ O(cardinality + filteredDocs). Always chosen when the column has a sorted forward index.</li>
- *   <li><b>Bitmap inverted index path</b>: Iterates dictionary entries and uses inverted index bitmap intersections
- *       to check filter membership. Avoids the projection pipeline entirely. Chosen by cost heuristic when dictionary
- *       cardinality is much smaller than the filtered doc count.</li>
- *   <li><b>Scan path (fallback)</b>: Uses ProjectOperator + DistinctExecutor to scan filtered docs.
- *       Used when the cost heuristic determines scanning is cheaper.</li>
- * </ul>
- *
- * <p>Enabled via the {@code useIndexBasedDistinctOperator} query option. The cost ratio can be tuned
- * via the {@code invertedIndexDistinctCostRatio} query option; setting it to 0 forces the inverted index path
- * for non-empty filter results.
- */
+/// Inverted-index-based operator for single-column distinct queries on a single segment.
+///
+/// Supports three execution paths, chosen at runtime:
+/// - **Sorted index path**: For sorted columns, merge-iterates filter bitmap against contiguous doc ranges.
+///   Cost ~ `O(cardinality + filteredDocs)`.
+///   Always chosen when the column has a sorted forward index.
+/// - **Bitmap inverted index path**: Iterates dictionary entries and uses inverted index bitmap intersections to check
+///   filter membership. Avoids the projection pipeline entirely.
+///   Chosen by cost heuristic when dictionary cardinality is much smaller than the filtered doc count.
+/// - **Scan path (fallback)**: Uses ProjectOperator + DistinctExecutor to scan filtered docs.
+///   Used when the cost heuristic determines scanning is cheaper.
+///
+/// Enabled via the `useIndexBasedDistinctOperator` query option. The cost ratio can be tuned via the
+/// `invertedIndexDistinctCostRatio` query option; setting it to 0 forces the inverted index path for non-empty filter
+/// results.
 public class InvertedIndexDistinctOperator extends BaseOperator<DistinctResultsBlock> {
   private static final String EXPLAIN_NAME = "DISTINCT_INVERTED_INDEX";
   private static final String EXPLAIN_NAME_SORTED_INDEX = "DISTINCT_SORTED_INDEX";
   private static final String EXPLAIN_NAME_SCAN_FALLBACK = "DISTINCT";
 
   private final IndexSegment _indexSegment;
+  private final int _totalDocs;
   private final SegmentContext _segmentContext;
   private final QueryContext _queryContext;
   private final BaseFilterOperator _filterOperator;
@@ -107,16 +104,15 @@ public class InvertedIndexDistinctOperator extends BaseOperator<DistinctResultsB
   // Execution tracking
   private boolean _usedInvertedIndexPath = false;
   private int _numDocsScanned = 0;
-  private int _numEntriesExamined = 0;
   private long _numEntriesScannedInFilter = 0;
+  private int _numEntriesExaminedPostFilter = 0;
 
-  /**
-   * Creates an InvertedIndexDistinctOperator. The caller (DistinctPlanNode) must verify that the column
-   * has both a dictionary and an inverted index before constructing this operator.
-   */
+  /// Creates an InvertedIndexDistinctOperator. The caller (DistinctPlanNode) must verify that the column has both a
+  /// dictionary and an inverted index before constructing this operator.
   public InvertedIndexDistinctOperator(IndexSegment indexSegment, SegmentContext segmentContext,
       QueryContext queryContext, BaseFilterOperator filterOperator, DataSource dataSource) {
     _indexSegment = indexSegment;
+    _totalDocs = indexSegment.getSegmentMetadata().getTotalDocs();
     _segmentContext = segmentContext;
     _queryContext = queryContext;
     _filterOperator = filterOperator;
@@ -127,57 +123,58 @@ public InvertedIndexDistinctOperator(IndexSegment indexSegment, SegmentContext s
 
   @Override
   protected DistinctResultsBlock getNextBlock() {
+    if (_filterOperator.isResultEmpty()) {
+      return createEmptyResultsBlock();
+    }
+
     // Sorted index: always use the sorted path — O(cardinality + filteredDocs) merge iteration
     if (_invertedIndexReader instanceof SortedIndexReader) {
-      ImmutableRoaringBitmap filteredDocIds = buildFilteredDocIds();
       _usedInvertedIndexPath = true;
-      return executeSortedIndexPath((SortedIndexReader<?>) _invertedIndexReader, filteredDocIds);
+      BaseFilterOperator.FilteredDocIds filteredDocIds = _filterOperator.getFilteredDocIds();
+      ImmutableRoaringBitmap docIds = filteredDocIds.getDocIds();
+      _numDocsScanned = docIds != null ? docIds.getCardinality() : _totalDocs;
+      _numEntriesScannedInFilter = filteredDocIds.getNumEntriesScannedInFilter();
+      return executeSortedIndexPath((SortedIndexReader<?>) _invertedIndexReader, docIds);
     }
 
-    // Prefer cheap count-only inputs for the heuristic so scan fallback can keep the original filter pipeline.
-    FilterPreparation filterPreparation = prepareBitmapPathInput();
-    Integer filteredDocCount = filterPreparation.getFilteredDocCount();
-
-    if (filteredDocCount != null) {
-      if (filteredDocCount == 0) {
-        return createEmptyResultsBlock();
-      }
-      // Bitmap inverted index: use cost heuristic to decide
-      if (shouldUseBitmapInvertedIndex(filteredDocCount)) {
-        ImmutableRoaringBitmap filteredDocIds = filterPreparation.getFilteredDocIds();
-        if (filteredDocIds == null) {
-          filteredDocIds = buildFilteredDocIds();
-        }
-        _usedInvertedIndexPath = true;
-        return executeInvertedIndexPath(filteredDocIds);
-      }
+    ImmutableRoaringBitmap matchingDocIds = null;
+    int numMatchingDocs = -1;
+    if (_filterOperator.isResultMatchingAll()) {
+      numMatchingDocs = _totalDocs;
+    } else if (_filterOperator.canProduceBitmaps()) {
+      matchingDocIds = _filterOperator.getBitmaps().reduce();
+      numMatchingDocs = matchingDocIds.getCardinality();
+    }
+    if (numMatchingDocs == 0) {
+      return createEmptyResultsBlock();
+    }
+    if (numMatchingDocs > 0 && shouldUseBitmapInvertedIndex(numMatchingDocs)) {
+      _usedInvertedIndexPath = true;
+      _numDocsScanned = numMatchingDocs;
+      return executeInvertedIndexPath(matchingDocIds);
     }
-    return executeScanPath(filterPreparation.getFilteredDocIds());
+    return executeScanPath(matchingDocIds);
   }
 
   // ==================== Cost Heuristic ====================
 
-  /**
-   * Default cost ratios for the inverted-index-based distinct heuristic, keyed by dictionary cardinality threshold.
-   * The inverted index path is chosen when {@code dictionaryCardinality * costRatio <= filteredDocCount}.
-   *
-   * <p>The cost ratio accounts for the per-entry bitmap intersection cost relative to the per-doc scan cost.
-   * For low-cardinality dictionaries, each bitmap is dense and {@code intersects()} is fast, but there are few
-   * entries so any unnecessary intersection is relatively expensive vs. scanning a small filtered doc set.
-   * For high-cardinality dictionaries, bitmaps are sparser and {@code intersects()} is slower per entry,
-   * but the scan path also becomes cheaper (fewer docs per value), so a lower ratio suffices.
-   *
-   * <p>Benchmarking (BenchmarkInvertedIndexDistinct, 1M docs) shows the crossover points:
-   * <ul>
-   *   <li>dictCard &le; 1K:  costRatio=30 — inverted index wins when filteredDocs &ge; ~30x dictCard</li>
-   *   <li>dictCard &le; 10K: costRatio=10 — inverted index wins when filteredDocs &ge; ~10x dictCard</li>
-   *   <li>dictCard &gt; 10K: costRatio=6  — inverted index wins when filteredDocs &ge; ~6x dictCard</li>
-   * </ul>
-   *
-   * <p>Can be overridden at query time via the query option {@code invertedIndexDistinctCostRatio}. Setting it
-   * to 0 forces the inverted index path for non-empty filter results.
-   */
-  static final NavigableMap<Integer, Double> DEFAULT_COST_RATIO_BY_CARDINALITY;
+  /// Default cost ratios for the inverted-index-based distinct heuristic, keyed by dictionary cardinality threshold.
+  /// The inverted index path is chosen when `dictionaryCardinality * costRatio <= filteredDocCount`.
+  ///
+  /// The cost ratio accounts for the per-entry bitmap intersection cost relative to the per-doc scan cost.
+  /// For low-cardinality dictionaries, each bitmap is dense and `intersects()` is fast, but there are few entries so
+  /// any unnecessary intersection is relatively expensive vs. scanning a small filtered doc set.
+  /// For high-cardinality dictionaries, bitmaps are sparser and `intersects()` is slower per entry, but the scan path
+  /// also becomes cheaper (fewer docs per value), so a lower ratio suffices.
+  ///
+  /// Benchmarking (BenchmarkInvertedIndexDistinct, 1M docs) shows the crossover points:
+  /// - dictCard ≤ 1K:  costRatio=30 — inverted index wins when filteredDocs ≥ ~30x dictCard
+  /// - dictCard ≤ 10K: costRatio=10 — inverted index wins when filteredDocs ≥ ~10x dictCard
+  /// - dictCard > 10K: costRatio=6  — inverted index wins when filteredDocs ≥ ~6x dictCard
+  ///
+  /// Can be overridden at query time via the query option `invertedIndexDistinctCostRatio`. Setting it to 0 forces the
+  /// inverted index path for non-empty filter results.
+  private static final NavigableMap<Integer, Double> DEFAULT_COST_RATIO_BY_CARDINALITY;
 
   static {
     TreeMap<Integer, Double> map = new TreeMap<>();
@@ -192,9 +189,6 @@ static double getDefaultCostRatio(int dictionaryCardinality) {
   }
 
   private boolean shouldUseBitmapInvertedIndex(int filteredDocCount) {
-    if (filteredDocCount == 0) {
-      return false;
-    }
     Double costRatioOverride = QueryOptionsUtils.getInvertedIndexDistinctCostRatio(_queryContext.getQueryOptions());
     if (costRatioOverride != null && costRatioOverride == 0.0) {
       return true;
@@ -204,59 +198,15 @@ private boolean shouldUseBitmapInvertedIndex(int filteredDocCount) {
     return (double) dictionaryCardinality * costRatio <= filteredDocCount;
   }
 
-  static final class FilterPreparation {
-    @Nullable
-    private final ImmutableRoaringBitmap _filteredDocIds;
-    @Nullable
-    private final Integer _filteredDocCount;
-
-    private FilterPreparation(@Nullable ImmutableRoaringBitmap filteredDocIds, @Nullable Integer filteredDocCount) {
-      _filteredDocIds = filteredDocIds;
-      _filteredDocCount = filteredDocCount;
-    }
-
-    @Nullable
-    ImmutableRoaringBitmap getFilteredDocIds() {
-      return _filteredDocIds;
-    }
-
-    @Nullable
-    Integer getFilteredDocCount() {
-      return _filteredDocCount;
-    }
-  }
-
-  FilterPreparation prepareBitmapPathInput() {
-    int totalDocs = _indexSegment.getSegmentMetadata().getTotalDocs();
-    if (_filterOperator.isResultMatchingAll()) {
-      return new FilterPreparation(null, totalDocs);
-    }
-    if (_filterOperator.isResultEmpty()) {
-      return new FilterPreparation(new MutableRoaringBitmap(), 0);
-    }
-    // Prefer the cheaper exact count when available so scan fallback does not pay eager bitmap materialization.
-    if (_filterOperator.canOptimizeCount()) {
-      return new FilterPreparation(null, _filterOperator.getNumMatchingDocs());
-    }
-    if (_filterOperator.canProduceBitmaps()) {
-      ImmutableRoaringBitmap filteredDocIds = _filterOperator.getBitmaps().reduce();
-      return new FilterPreparation(filteredDocIds, filteredDocIds.getCardinality());
-    }
-    return new FilterPreparation(null, null);
-  }
-
   // ==================== Scan Path (Fallback) ====================
 
-  /**
-   * Scan fallback: uses ProjectOperator + DistinctExecutor. When an exact filter bitmap is already cheaply available,
-   * wraps it in a {@link BitmapBasedFilterOperator} to avoid re-evaluating the filter through the projection pipeline.
-   * Otherwise preserves the original filter operator so scan fallback does not pay eager bitmap materialization.
-   */
+  /// Scan fallback: uses ProjectOperator + DistinctExecutor. When an exact filter bitmap is already cheaply available,
+  /// wraps it in a [BitmapBasedFilterOperator] to avoid re-evaluating the filter through the projection pipeline.
+  /// Otherwise, preserves the original filter operator so scan fallback does not pay eager bitmap materialization.
   private DistinctResultsBlock executeScanPath(@Nullable ImmutableRoaringBitmap filteredDocIds) {
     BaseFilterOperator filterOp;
     if (filteredDocIds != null) {
-      filterOp = new BitmapBasedFilterOperator(filteredDocIds, false,
-          _indexSegment.getSegmentMetadata().getTotalDocs());
+      filterOp = new BitmapBasedFilterOperator(filteredDocIds, false, _totalDocs);
     } else {
       filterOp = _filterOperator;
     }
@@ -275,10 +225,8 @@ private DistinctResultsBlock executeScanPath(@Nullable ImmutableRoaringBitmap fi
 
   // ==================== Sorted Index Path ====================
 
-  /**
-   * Optimized path for sorted columns. Each dictId maps to a contiguous doc range [start, end].
-   * We merge-iterate the filter bitmap with the sorted ranges in O(cardinality + filteredDocs).
-   */
+  /// Optimized path for sorted columns. Each dictId maps to a contiguous doc range [start,end].
+  /// We merge-iterate the filter bitmap with the sorted ranges in O(cardinality + filteredDocs).
   private DistinctResultsBlock executeSortedIndexPath(SortedIndexReader<?> sortedReader,
       @Nullable ImmutableRoaringBitmap filteredDocIds) {
     OrderByExpressionContext orderByExpression =
@@ -300,13 +248,12 @@ private DistinctResultsBlock executeSortedIndexPath(SortedIndexReader<?> sortedR
 
     if (nonNullFilteredDocIds == null) {
       // No filter, no null exclusion — every dictionary value is present
-      int entriesExamined = 0;
       int start = iterateReverse ? dictLength - 1 : 0;
       int end = iterateReverse ? -1 : dictLength;
       int step = iterateReverse ? -1 : 1;
       for (int dictId = start; dictId != end; dictId += step) {
-        QueryThreadContext.checkTerminationAndSampleUsagePeriodically(entriesExamined, EXPLAIN_NAME_SORTED_INDEX);
-        entriesExamined++;
+        QueryThreadContext.checkTerminationAndSampleUsagePeriodically(_numEntriesExaminedPostFilter++,
+            EXPLAIN_NAME_SORTED_INDEX);
         if (dictId == nullResult._nullPlaceholderDictId) {
           continue;
         }
@@ -315,50 +262,40 @@ private DistinctResultsBlock executeSortedIndexPath(SortedIndexReader<?> sortedR
           break;
         }
       }
-      _numEntriesExamined = entriesExamined;
     } else if (!nonNullFilteredDocIds.isEmpty()) {
       if (iterateReverse) {
-        // DESC + LIMIT: iterate dictIds backward, use rangeCardinality for presence check.
-        // Each dictId maps to a contiguous doc range, so rangeCardinality is O(1) per check.
-        int entriesExamined = 0;
+        // DESC + LIMIT: iterate dictIds backward, use intersects for presence check. Each dictId maps to a contiguous
+        // doc range; intersects short-circuits on the first matching container.
         for (int dictId = dictLength - 1; dictId >= 0; dictId--) {
-          QueryThreadContext.checkTerminationAndSampleUsagePeriodically(entriesExamined, EXPLAIN_NAME_SORTED_INDEX);
-          entriesExamined++;
+          QueryThreadContext.checkTerminationAndSampleUsagePeriodically(_numEntriesExaminedPostFilter++,
+              EXPLAIN_NAME_SORTED_INDEX);
           Pairs.IntPair range = sortedReader.getDocIds(dictId);
           int startDocId = range.getLeft();
           int endDocId = range.getRight(); // inclusive
-          if (nonNullFilteredDocIds.rangeCardinality(startDocId, endDocId + 1L) > 0) {
+          if (nonNullFilteredDocIds.intersects(startDocId, endDocId + 1L)) {
             if (addDistinctValue(distinctTable, dictId, orderByExpression, true)) {
               break;
             }
           }
         }
-        _numEntriesExamined = entriesExamined;
       } else {
-        // ASC or no ORDER BY: merge-iterate forward (O(cardinality + filteredDocs))
+        // ASC or no ORDER BY: merge-iterate forward (O(cardinality + filteredDocs)). Sorted-index ranges are
+        // contiguous over [0, numDocs), and we advance the filter iterator past `endDocId` after each match, so
+        // `peekNext()` is always >= the current dictId's `startDocId` at the top of each iteration.
         PeekableIntIterator filterIter = nonNullFilteredDocIds.getIntIterator();
-        int dictId;
-        for (dictId = 0; dictId < dictLength && filterIter.hasNext(); dictId++) {
-          QueryThreadContext.checkTerminationAndSampleUsagePeriodically(dictId, EXPLAIN_NAME_SORTED_INDEX);
-          Pairs.IntPair range = sortedReader.getDocIds(dictId);
-          int startDocId = range.getLeft();
-          int endDocId = range.getRight(); // inclusive
-
-          // Skip filter docs before this range
-          filterIter.advanceIfNeeded(startDocId);
-
-          // Check if any non-null filter doc falls within this range
-          if (filterIter.hasNext() && filterIter.peekNext() <= endDocId) {
+        for (int dictId = 0; dictId < dictLength && filterIter.hasNext(); dictId++) {
+          QueryThreadContext.checkTerminationAndSampleUsagePeriodically(_numEntriesExaminedPostFilter++,
+              EXPLAIN_NAME_SORTED_INDEX);
+          int endDocId = sortedReader.getDocIds(dictId).getRight(); // inclusive
+          if (filterIter.peekNext() <= endDocId) {
             boolean done = addDistinctValue(distinctTable, dictId, orderByExpression, orderedEarlyTermination);
             if (done) {
-              _numEntriesExamined = dictId + 1;
               return new DistinctResultsBlock(convertDistinctTable(distinctTable, nullResult._hasNull), _queryContext);
             }
-            // Advance past the current range for next dictId
+            // Advance past the current range so the next iteration's peekNext() is >= the next dictId's startDocId.
             filterIter.advanceIfNeeded(endDocId + 1);
           }
         }
-        _numEntriesExamined = dictId;
       }
     }
 
@@ -385,14 +322,11 @@ private DistinctResultsBlock executeInvertedIndexPath(@Nullable ImmutableRoaring
     boolean orderedEarlyTermination = useDictIdTable && orderByExpression != null && distinctTable.hasLimit();
     boolean iterateReverse = orderedEarlyTermination && !orderByExpression.isAsc();
 
-    int entriesExamined = 0;
     int start = iterateReverse ? dictLength - 1 : 0;
     int end = iterateReverse ? -1 : dictLength;
     int step = iterateReverse ? -1 : 1;
-
     for (int dictId = start; dictId != end; dictId += step) {
-      QueryThreadContext.checkTerminationAndSampleUsagePeriodically(entriesExamined, EXPLAIN_NAME);
-      entriesExamined++;
+      QueryThreadContext.checkTerminationAndSampleUsagePeriodically(_numEntriesExaminedPostFilter++, EXPLAIN_NAME);
       if (dictId == nullResult._nullPlaceholderDictId) {
         continue;
       }
@@ -419,18 +353,10 @@ private DistinctResultsBlock executeInvertedIndexPath(@Nullable ImmutableRoaring
         }
       }
     }
-    _numEntriesExamined = entriesExamined;
 
     return new DistinctResultsBlock(convertDistinctTable(distinctTable, nullResult._hasNull), _queryContext);
   }
 
-  @Nullable
-  private ImmutableRoaringBitmap buildFilteredDocIds() {
-    BaseFilterOperator.FilteredDocIds filteredDocIds = _filterOperator.getFilteredDocIds();
-    _numEntriesScannedInFilter = filteredDocIds.getNumEntriesScannedInFilter();
-    return filteredDocIds.getDocIds();
-  }
-
   private boolean canUseDictIdDistinctTable(@Nullable OrderByExpressionContext orderByExpression) {
     return orderByExpression == null || _dictionary.isSorted();
   }
@@ -605,14 +531,12 @@ private boolean addDistinctValue(DistinctTable distinctTable, int dictId,
 
   // ==================== Null Handling ====================
 
-  /**
-   * Processes null handling for the filter bitmap. Returns the filter bitmap with null docs excluded
-   * and whether any filtered docs have null values.
-   *
-   * <p>Nulls are not in the dictionary, so they must be checked separately via the null value vector.
-   * The null placeholder value (e.g., Integer.MIN_VALUE) is excluded from dictionary iteration by
-   * removing null docs from the filter bitmap.
-   */
+  /// Processes null handling for the filter bitmap. Returns the filter bitmap with null docs excluded and whether any
+  /// filtered docs have null values.
+  ///
+  /// Nulls are not in the dictionary, so they must be checked separately via the null value vector.
+  /// The null placeholder value (e.g., `Integer.MIN_VALUE`) is excluded from dictionary iteration by removing null docs
+  /// from the filter bitmap.
   private NullFilterResult processNullDocs(@Nullable ImmutableRoaringBitmap filteredDocIds) {
     if (!_queryContext.isNullHandlingEnabled()) {
       return new NullFilterResult(filteredDocIds, false, Dictionary.NULL_VALUE_INDEX);
@@ -633,6 +557,7 @@ private NullFilterResult processNullDocs(@Nullable ImmutableRoaringBitmap filter
     if (filteredDocIds == null) {
       // Preserve match-all to avoid materializing a dense complement bitmap. Instead skip the null placeholder dictId
       // while iterating dictionary values.
+      // TODO: This will count all default null values as null, regardless of whether they are actually nulls.
       nonNullFilteredDocIds = null;
       nullPlaceholderDictId = getNullPlaceholderDictId();
     } else {
@@ -680,13 +605,7 @@ private static class NullFilterResult {
 
   @Override
   public List<? extends Operator> getChildOperators() {
-    // If scan fallback was used, the project operator is the logical child
-    if (_projectOperator != null && !_usedInvertedIndexPath) {
-      return Collections.singletonList(_projectOperator);
-    }
-    // For inverted/sorted index paths (or before execution in EXPLAIN plans),
-    // the filter operator is the logical child.
-    return Collections.singletonList(_filterOperator);
+    return _projectOperator != null ? List.of(_projectOperator) : List.of(_filterOperator);
   }
 
   @Override
@@ -696,21 +615,17 @@ public IndexSegment getIndexSegment() {
 
   @Override
   public ExecutionStatistics getExecutionStatistics() {
-    int numTotalDocs = _indexSegment.getSegmentMetadata().getTotalDocs();
-    if (_usedInvertedIndexPath || _projectOperator == null) {
-      // For inverted/sorted index paths: numDocsScanned=0 (no forward index lookups),
-      // numEntriesScannedInFilter tracks work done while materializing the exact filter bitmap,
-      // numEntriesScannedPostFilter=numEntriesExamined (dictionary entries examined via bitmap
-      // intersection or sorted range checks).
-      return new ExecutionStatistics(0, _numEntriesScannedInFilter, _numEntriesExamined, numTotalDocs);
+    if (_projectOperator == null) {
+      // For inverted/sorted index paths:
+      // - numDocsScanned tracks the matching docs
+      // - numEntriesScannedInFilter tracks work done while materializing the exact filter bitmap
+      // - numEntriesScannedPostFilter tracks dictionary entries examined
+      return new ExecutionStatistics(_numDocsScanned, _numEntriesScannedInFilter, _numEntriesExaminedPostFilter,
+          _totalDocs);
     }
-    // _numEntriesScannedInFilter captures filter work from exact bitmap materialization (non-zero only when
-    // the filter could not produce bitmaps directly). The project operator's stats capture any additional
-    // filter work (zero when using a pre-built BitmapBasedFilterOperator).
-    long numEntriesScannedInFilter = _numEntriesScannedInFilter
-        + _projectOperator.getExecutionStatistics().getNumEntriesScannedInFilter();
     // Single-column distinct, so numEntriesScannedPostFilter equals numDocsScanned
-    return new ExecutionStatistics(_numDocsScanned, numEntriesScannedInFilter, _numDocsScanned, numTotalDocs);
+    return new ExecutionStatistics(_numDocsScanned,
+        _projectOperator.getExecutionStatistics().getNumEntriesScannedInFilter(), _numDocsScanned, _totalDocs);
   }
 
   private String resolveExplainName() {
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/query/JsonIndexDistinctOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/query/JsonIndexDistinctOperator.java
index 0970cac305e3..a94dcd6253f3 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/operator/query/JsonIndexDistinctOperator.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/query/JsonIndexDistinctOperator.java
@@ -18,23 +18,20 @@
  */
 package org.apache.pinot.core.operator.query;
 
+import com.fasterxml.jackson.databind.JsonNode;
 import com.google.common.base.CaseFormat;
+import java.io.IOException;
 import java.math.BigDecimal;
-import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
-import java.util.Set;
 import javax.annotation.Nullable;
 import org.apache.pinot.common.function.JsonPathCache;
 import org.apache.pinot.common.request.context.ExpressionContext;
-import org.apache.pinot.common.request.context.FilterContext;
 import org.apache.pinot.common.request.context.OrderByExpressionContext;
-import org.apache.pinot.common.request.context.RequestContextUtils;
-import org.apache.pinot.common.request.context.predicate.JsonMatchPredicate;
-import org.apache.pinot.common.request.context.predicate.Predicate;
 import org.apache.pinot.common.utils.DataSchema;
 import org.apache.pinot.common.utils.DataSchema.ColumnDataType;
+import org.apache.pinot.common.utils.config.QueryOptionsUtils;
 import org.apache.pinot.core.common.Operator;
 import org.apache.pinot.core.operator.BaseOperator;
 import org.apache.pinot.core.operator.ExecutionStatistics;
@@ -50,516 +47,396 @@
 import org.apache.pinot.core.query.distinct.table.StringDistinctTable;
 import org.apache.pinot.core.query.request.context.QueryContext;
 import org.apache.pinot.segment.spi.IndexSegment;
-import org.apache.pinot.segment.spi.SegmentContext;
 import org.apache.pinot.segment.spi.datasource.DataSource;
 import org.apache.pinot.segment.spi.index.IndexService;
 import org.apache.pinot.segment.spi.index.IndexType;
 import org.apache.pinot.segment.spi.index.reader.JsonIndexReader;
-import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.data.FieldSpec.DataType;
 import org.apache.pinot.spi.query.QueryThreadContext;
-import org.apache.pinot.sql.parsers.CalciteSqlParser;
+import org.apache.pinot.spi.utils.JsonUtils;
 import org.roaringbitmap.RoaringBitmap;
 import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
 
 
-/**
- * Distinct operator for the scalar {@code jsonExtractIndex(column, path, type[, defaultValue])} form.
- *
- * <p>Execution flow:
- * 1. Push a same-path {@code JSON_MATCH} predicate into the JSON-index lookup when it cannot match missing paths.
- * 2. Convert matching flattened doc ids back to segment doc ids.
- * 3. Apply any remaining row-level filter and materialize DISTINCT results, including missing-path handling.
- */
+/// Distinct operator for `jsonExtractIndex(column, path, type[, defaultValue[, filterJsonExpression]])`.
+///
+/// Supports both SV (e.g. `STRING`) and MV (e.g. `STRING_ARRAY`) result types — DISTINCT collapses MV array elements
+/// to scalar rows, matching the scan-based `SELECT DISTINCT mvCol` convention. The 4-arg default is a single value
+/// for SV; for MV it's a JSON array whose elements are each added to the distinct set when no doc matches the path.
+///
+/// Execution flow:
+/// 1. Pass the optional 5-arg `filterJsonExpression` directly to the JSON-index lookup (matches
+///    `JsonExtractIndexTransformFunction`'s convention).
+/// 2. Convert matching flattened doc ids back to segment doc ids.
+/// 3. Apply any remaining row-level WHERE filter and materialize DISTINCT results, including missing-path handling.
 public class JsonIndexDistinctOperator extends BaseOperator<DistinctResultsBlock> {
   private static final String EXPLAIN_NAME = "DISTINCT_JSON_INDEX";
   private static final String FUNCTION_NAME = "jsonExtractIndex";
 
+  /// Returns true if the expression is a `jsonExtractIndex` function call. All other validation (argument count/types,
+  /// column existence, JSON index presence, path support) happens inside the operator's constructor and matches what
+  /// the scan-based fallback (`JsonExtractIndexTransformFunction`) would surface during its own `init`.
+  public static boolean canUseJsonIndexDistinct(ExpressionContext expr) {
+    return expr.getType() == ExpressionContext.Type.FUNCTION && FUNCTION_NAME.equalsIgnoreCase(
+        expr.getFunction().getFunctionName());
+  }
+
   private final IndexSegment _indexSegment;
-  private final SegmentContext _segmentContext;
+  private final int _totalDocs;
   private final QueryContext _queryContext;
   private final BaseFilterOperator _filterOperator;
+  private final ExpressionContext _expression;
+  private final boolean _skipMissingPath;
+  private final JsonIndexReader _jsonIndexReader;
+  private final String _jsonPathString;
+  private final DataType _dataType;
+  @Nullable
+  private final String[] _defaultValueLiterals;
+  @Nullable
+  private final String _filterJsonExpression;
+  private final DataSchema _dataSchema;
+  @Nullable
+  private final OrderByExpressionContext _orderByExpression;
 
-  private int _numEntriesExamined = 0;
+  private int _numDocsScanned = 0;
   private long _numEntriesScannedInFilter = 0;
+  private int _numEntriesExaminedPostFilter = 0;
 
-  public JsonIndexDistinctOperator(IndexSegment indexSegment, SegmentContext segmentContext,
-      QueryContext queryContext, BaseFilterOperator filterOperator) {
+  public JsonIndexDistinctOperator(IndexSegment indexSegment, QueryContext queryContext,
+      BaseFilterOperator filterOperator) {
     _indexSegment = indexSegment;
-    _segmentContext = segmentContext;
+    _totalDocs = indexSegment.getSegmentMetadata().getTotalDocs();
     _queryContext = queryContext;
     _filterOperator = filterOperator;
-  }
-
-  @Override
-  protected DistinctResultsBlock getNextBlock() {
-    List<ExpressionContext> expressions = _queryContext.getSelectExpressions();
+    List<ExpressionContext> expressions = queryContext.getSelectExpressions();
     if (expressions.size() != 1) {
       throw new IllegalStateException("JsonIndexDistinctOperator supports single expression only");
     }
+    _expression = expressions.get(0);
+    _skipMissingPath = QueryOptionsUtils.isJsonIndexDistinctSkipMissingPath(queryContext.getQueryOptions());
 
-    ExpressionContext expr = expressions.get(0);
-    ParsedJsonExtractIndex parsed = parseJsonExtractIndex(expr);
-    if (parsed == null) {
-      throw new IllegalStateException("Expected 3/4-arg scalar jsonExtractIndex expression");
-    }
+    // Mirrors the arguments handling logic in `JsonExtractIndexTransformFunction`
 
-    DataSource dataSource = _indexSegment.getDataSource(parsed._columnName, _queryContext.getSchema());
-    JsonIndexReader jsonIndexReader = getJsonIndexReader(dataSource);
-    if (jsonIndexReader == null) {
-      throw new IllegalStateException("Column " + parsed._columnName + " has no JSON index");
+    List<ExpressionContext> arguments = _expression.getFunction().getArguments();
+    int numArguments = arguments.size();
+    // Check that there are exactly 3 or 4 or 5 arguments
+    if (numArguments < 3 || numArguments > 5) {
+      throw new IllegalArgumentException(
+          "Expected 3/4/5 arguments for jsonExtractIndex(jsonFieldName, 'jsonPath', 'resultsType',"
+              + " ['defaultValue'], ['jsonFilterExpression'])");
     }
 
-    String pushedDownFilterJson = extractSamePathJsonMatchFilter(parsed, _queryContext.getFilter());
-    boolean filterFullyPushedDown = pushedDownFilterJson != null
-        && isOnlySamePathJsonMatchFilter(parsed, _queryContext.getFilter())
-        && !jsonMatchFilterCanMatchMissingPath(pushedDownFilterJson);
-
-    // Fast path: when the filter is fully pushed down into the JSON index, we only need the distinct value strings.
-    // This avoids reading posting lists, building per-value bitmaps, and converting flattened doc IDs.
-    if (filterFullyPushedDown) {
-      Set<String> distinctValues = jsonIndexReader.getMatchingDistinctValues(
-          parsed._jsonPathString, pushedDownFilterJson);
-      return buildDistinctResultsFromValues(expr, parsed, distinctValues);
+    ExpressionContext firstArgument = arguments.get(0);
+    if (firstArgument.getType() == ExpressionContext.Type.IDENTIFIER) {
+      DataSource dataSource = indexSegment.getDataSource(firstArgument.getIdentifier());
+      _jsonIndexReader = getJsonIndexReader(dataSource);
+      if (_jsonIndexReader == null) {
+        throw new IllegalStateException("jsonExtractIndex can only be applied on a column with JSON index");
+      }
+    } else {
+      throw new IllegalArgumentException("jsonExtractIndex can only be applied to a raw column");
     }
 
-    // Evaluate the filter first so we can skip the (potentially expensive) index map when no docs match.
-    RoaringBitmap filteredDocIds = buildFilteredDocIds();
-    if (filteredDocIds != null && filteredDocIds.isEmpty()) {
-      ColumnDataType earlyColumnDataType = ColumnDataType.fromDataTypeSV(parsed._dataType);
-      DataSchema earlyDataSchema = new DataSchema(
-          new String[]{expr.toString()},
-          new ColumnDataType[]{earlyColumnDataType});
-      OrderByExpressionContext earlyOrderBy = _queryContext.getOrderByExpressions() != null
-          ? _queryContext.getOrderByExpressions().get(0) : null;
-      return new DistinctResultsBlock(
-          createDistinctTable(earlyDataSchema, parsed._dataType, earlyOrderBy), _queryContext);
+    ExpressionContext secondArgument = arguments.get(1);
+    if (secondArgument.getType() != ExpressionContext.Type.LITERAL) {
+      throw new IllegalArgumentException("JSON path argument must be a literal");
+    }
+    _jsonPathString = secondArgument.getLiteral().getStringValue();
+    try {
+      JsonPathCache.INSTANCE.getOrCompute(_jsonPathString);
+    } catch (Exception e) {
+      throw new IllegalArgumentException("JSON path argument is not a valid JSON path");
     }
 
-    // All other WHERE filters remain row-level and are applied after converting flattened doc IDs to real doc IDs.
-    Map<String, RoaringBitmap> valueToMatchingDocs =
-        jsonIndexReader.getMatchingFlattenedDocsMap(parsed._jsonPathString, pushedDownFilterJson);
+    ExpressionContext thirdArgument = arguments.get(2);
+    if (thirdArgument.getType() != ExpressionContext.Type.LITERAL) {
+      throw new IllegalArgumentException("Result type argument must be a literal");
+    }
+    String resultsType = thirdArgument.getLiteral().getStringValue().toUpperCase();
+    boolean isSingleValue = !resultsType.endsWith("_ARRAY");
+    if (isSingleValue && _jsonPathString.contains("[*]")) {
+      throw new IllegalArgumentException(
+          "[*] syntax in json path is unsupported for singleValue field json_extract_index");
+    }
+    String dataTypeName = isSingleValue ? resultsType : resultsType.substring(0, resultsType.length() - 6);
+    try {
+      _dataType = DataType.valueOf(dataTypeName);
+    } catch (IllegalArgumentException e) {
+      throw new IllegalArgumentException("Unknown jsonExtractIndex result type: " + resultsType);
+    }
+    switch (_dataType) {
+      case INT:
+      case LONG:
+      case FLOAT:
+      case DOUBLE:
+      case BIG_DECIMAL:
+      case STRING:
+        break;
+      default:
+        throw new IllegalArgumentException("Unsupported jsonExtractIndex result type for distinct: " + _dataType);
+    }
 
-    // Always single-value (MV _ARRAY is rejected in parseJsonExtractIndex)
-    jsonIndexReader.convertFlattenedDocIdsToDocIds(valueToMatchingDocs);
-    return buildDistinctResultsBlock(expr, parsed, valueToMatchingDocs, filteredDocIds,
-        filteredDocIds == null);
-  }
+    // With _skipMissingPath, the 4-arg default is never used at runtime (handleMissingDocs is bypassed), so don't
+    // parse or validate it — accept any literal shape and ignore it.
+    if (numArguments >= 4 && !_skipMissingPath) {
+      ExpressionContext fourthArgument = arguments.get(3);
+      if (fourthArgument.getType() != ExpressionContext.Type.LITERAL) {
+        throw new IllegalArgumentException("Default value must be a literal");
+      }
+      String defaultLiteral = fourthArgument.getLiteral().getStringValue();
+      if (isSingleValue) {
+        try {
+          _dataType.convert(defaultLiteral);
+        } catch (Exception e) {
+          throw new IllegalArgumentException("Default value '" + defaultLiteral + "' is not a valid " + _dataType);
+        }
+        _defaultValueLiterals = new String[]{defaultLiteral};
+      } else {
+        try {
+          JsonNode mvArray = JsonUtils.stringToJsonNode(defaultLiteral);
+          if (!mvArray.isArray()) {
+            throw new IllegalArgumentException("Default value must be a valid JSON array");
+          }
+          String[] literals = new String[mvArray.size()];
+          for (int i = 0; i < mvArray.size(); i++) {
+            literals[i] = mvArray.get(i).asText();
+            try {
+              _dataType.convert(literals[i]);
+            } catch (Exception e) {
+              throw new IllegalArgumentException("Default value '" + literals[i] + "' is not a valid " + _dataType);
+            }
+          }
+          _defaultValueLiterals = literals;
+        } catch (IOException e) {
+          throw new IllegalArgumentException("Default value must be a valid JSON array");
+        }
+      }
+    } else {
+      _defaultValueLiterals = null;
+    }
 
-  private DistinctResultsBlock buildDistinctResultsFromValues(ExpressionContext expr, ParsedJsonExtractIndex parsed,
-      Set<String> distinctValues) {
-    ColumnDataType columnDataType = ColumnDataType.fromDataTypeSV(parsed._dataType);
-    DataSchema dataSchema = new DataSchema(
-        new String[]{expr.toString()},
-        new ColumnDataType[]{columnDataType});
-    OrderByExpressionContext orderByExpression = _queryContext.getOrderByExpressions() != null
-        ? _queryContext.getOrderByExpressions().get(0) : null;
-    DistinctTable distinctTable = createDistinctTable(dataSchema, parsed._dataType, orderByExpression);
-    int limit = _queryContext.getLimit();
+    if (numArguments == 5) {
+      ExpressionContext fifthArgument = arguments.get(4);
+      if (fifthArgument.getType() != ExpressionContext.Type.LITERAL) {
+        throw new IllegalArgumentException("JSON path filter argument must be a literal");
+      }
+      _filterJsonExpression = fifthArgument.getLiteral().getStringValue();
+    } else {
+      _filterJsonExpression = null;
+    }
 
-    for (String value : distinctValues) {
-      _numEntriesExamined++;
-      QueryThreadContext.checkTerminationAndSampleUsagePeriodically(_numEntriesExamined, EXPLAIN_NAME);
+    _dataSchema = new DataSchema(new String[]{_expression.toString()},
+        new ColumnDataType[]{ColumnDataType.fromDataTypeSV(_dataType)});
+    List<OrderByExpressionContext> orderByExpressions = queryContext.getOrderByExpressions();
+    _orderByExpression = orderByExpressions != null ? orderByExpressions.get(0) : null;
+  }
 
-      boolean done = addValueToDistinctTable(distinctTable, value, parsed._dataType, orderByExpression);
-      if (done) {
-        break;
-      }
-      if (orderByExpression == null && distinctTable.hasLimit() && distinctTable.size() >= limit) {
-        break;
+  @Nullable
+  private static JsonIndexReader getJsonIndexReader(DataSource dataSource) {
+    JsonIndexReader reader = dataSource.getJsonIndex();
+    // TODO: rework
+    if (reader == null) {
+      Optional<IndexType<?, ?, ?>> compositeIndex = IndexService.getInstance().getOptional("composite_json_index");
+      if (compositeIndex.isPresent()) {
+        reader = (JsonIndexReader) dataSource.getIndex(compositeIndex.get());
       }
     }
+    return reader;
+  }
 
-    return new DistinctResultsBlock(distinctTable, _queryContext);
+  @Override
+  protected DistinctResultsBlock getNextBlock() {
+    // Evaluate the filter first so we can skip the (potentially expensive) index map when no docs match.
+    BaseFilterOperator.FilteredDocIds filteredDocIds = _filterOperator.getFilteredDocIds();
+    ImmutableRoaringBitmap docIds = filteredDocIds.getDocIds();
+    _numDocsScanned = docIds != null ? docIds.getCardinality() : _totalDocs;
+    _numEntriesScannedInFilter = filteredDocIds.getNumEntriesScannedInFilter();
+    if (_numDocsScanned == 0) {
+      return new DistinctResultsBlock(createDistinctTable(), _queryContext);
+    }
+
+    // The 5-arg form's filter literal is pushed into the JSON index; WHERE-clause filters remain row-level and are
+    // applied after converting flattened doc IDs to real doc IDs.
+    Map<String, RoaringBitmap> valueToMatchingDocs =
+        _jsonIndexReader.getMatchingFlattenedDocsMap(_jsonPathString, _filterJsonExpression);
+    _jsonIndexReader.convertFlattenedDocIdsToDocIds(valueToMatchingDocs);
+    return buildDistinctResultsBlock(valueToMatchingDocs, docIds != null ? docIds.toRoaringBitmap() : null);
   }
 
-  private DistinctResultsBlock buildDistinctResultsBlock(ExpressionContext expr, ParsedJsonExtractIndex parsed,
-      Map<String, RoaringBitmap> valueToMatchingDocs, @Nullable RoaringBitmap filteredDocIds,
-      boolean allDocsSelected) {
-    ColumnDataType columnDataType = ColumnDataType.fromDataTypeSV(parsed._dataType);
-    DataSchema dataSchema = new DataSchema(
-        new String[]{expr.toString()},
-        new ColumnDataType[]{columnDataType});
-    OrderByExpressionContext orderByExpression = _queryContext.getOrderByExpressions() != null
-        ? _queryContext.getOrderByExpressions().get(0) : null;
-    DistinctTable distinctTable = createDistinctTable(dataSchema, parsed._dataType, orderByExpression);
+  private DistinctResultsBlock buildDistinctResultsBlock(Map<String, RoaringBitmap> valueToMatchingDocs,
+      @Nullable RoaringBitmap filteredDocIds) {
+    DistinctTable distinctTable = createDistinctTable();
 
-    int limit = _queryContext.getLimit();
-    int totalDocs = _indexSegment.getSegmentMetadata().getTotalDocs();
-    RoaringBitmap coveredDocs = allDocsSelected ? new RoaringBitmap() : null;
-    RoaringBitmap remainingDocs = filteredDocIds != null ? filteredDocIds.clone() : null;
-    boolean allDocsCovered = filteredDocIds == null ? !allDocsSelected || totalDocs == 0 : filteredDocIds.isEmpty();
+    // With _skipMissingPath, handleMissingDocs is bypassed — no need to track which docs are still uncovered, so
+    // skip the bitmap allocation and per-iteration `andNot` work entirely.
+    boolean allDocsCovered = _skipMissingPath;
+    RoaringBitmap remainingDocs = _skipMissingPath ? null
+        : (filteredDocIds != null ? filteredDocIds.clone() : RoaringBitmap.bitmapOfRange(0L, _totalDocs));
     boolean earlyBreak = false;
 
     for (Map.Entry<String, RoaringBitmap> entry : valueToMatchingDocs.entrySet()) {
-      _numEntriesExamined++;
-      QueryThreadContext.checkTerminationAndSampleUsagePeriodically(_numEntriesExamined, EXPLAIN_NAME);
+      QueryThreadContext.checkTerminationAndSampleUsagePeriodically(_numEntriesExaminedPostFilter++, EXPLAIN_NAME);
 
       String value = entry.getKey();
       RoaringBitmap docIds = entry.getValue();
 
-      boolean includeValue;
-      if (filteredDocIds == null) {
-        includeValue = true;
-        if (!allDocsCovered && allDocsSelected) {
-          coveredDocs.or(docIds);
-          if (coveredDocs.getLongCardinality() >= totalDocs) {
-            allDocsCovered = true;
-          }
-        }
-      } else {
-        includeValue = RoaringBitmap.intersects(docIds, filteredDocIds);
-        // Remove matched docs from remaining set in-place (no allocation per value).
-        if (!allDocsCovered && includeValue) {
-          remainingDocs.andNot(docIds);
-          if (remainingDocs.isEmpty()) {
-            allDocsCovered = true;
-          }
-        }
-      }
+      // Unfiltered always includes; filtered must intersect the original filter set (not the shrinking
+      // `remainingDocs`, since a value can still belong to the result after all filtered docs are covered).
+      boolean includeValue = filteredDocIds == null || RoaringBitmap.intersects(docIds, filteredDocIds);
 
-      if (includeValue) {
-        boolean done = addValueToDistinctTable(distinctTable, value, parsed._dataType, orderByExpression);
-        if (done) {
-          earlyBreak = true;
-          break;
+      if (!allDocsCovered && includeValue) {
+        remainingDocs.andNot(docIds);
+        if (remainingDocs.isEmpty()) {
+          allDocsCovered = true;
         }
       }
 
-      if (orderByExpression == null && distinctTable.hasLimit() && distinctTable.size() >= limit) {
+      // addValueToDistinctTable returns true exactly when the table has reached its LIMIT (no-ORDER-BY case);
+      // for ORDER-BY or unbounded LIMIT it always returns false. So no separate size check is needed.
+      if (includeValue && addValueToDistinctTable(distinctTable, value)) {
         earlyBreak = true;
         break;
       }
     }
 
     if (!earlyBreak && !allDocsCovered) {
-      handleMissingDocs(distinctTable, parsed, orderByExpression);
+      handleMissingDocs(distinctTable);
     }
 
     return new DistinctResultsBlock(distinctTable, _queryContext);
   }
 
-  private void handleMissingDocs(DistinctTable distinctTable, ParsedJsonExtractIndex parsed,
-      @Nullable OrderByExpressionContext orderByExpression) {
-    if (parsed._defaultValueLiteral != null) {
-      addValueToDistinctTable(distinctTable, parsed._defaultValueLiteral, parsed._dataType, orderByExpression);
-    } else if (_queryContext.isNullHandlingEnabled()) {
-      distinctTable.addNull();
-    } else {
-      throw new RuntimeException(
-          String.format("Illegal Json Path: [%s], for some docIds in segment [%s]",
-              parsed._jsonPathString, _indexSegment.getSegmentName()));
-    }
-  }
-
-  @Nullable
-  private static String extractSamePathJsonMatchFilter(ParsedJsonExtractIndex parsed, @Nullable FilterContext filter) {
-    if (filter == null) {
-      return null;
-    }
-    switch (filter.getType()) {
-      case PREDICATE:
-        return extractSamePathJsonMatchFilter(parsed, filter.getPredicate());
-      case AND:
-        String matchingFilter = null;
-        for (FilterContext child : filter.getChildren()) {
-          String childFilter = extractSamePathJsonMatchFilter(parsed, child);
-          if (childFilter == null) {
-            continue;
-          }
-          if (matchingFilter != null) {
-            return null;
-          }
-          matchingFilter = childFilter;
-        }
-        return matchingFilter;
-      default:
-        return null;
-    }
-  }
-
-  private static boolean isOnlySamePathJsonMatchFilter(ParsedJsonExtractIndex parsed, @Nullable FilterContext filter) {
-    if (filter == null || filter.getType() != FilterContext.Type.PREDICATE) {
-      return false;
-    }
-    return extractSamePathJsonMatchFilter(parsed, filter.getPredicate()) != null;
-  }
-
-  private static boolean jsonMatchFilterCanMatchMissingPath(String filterJsonString) {
-    try {
-      FilterContext filter = RequestContextUtils.getFilter(CalciteSqlParser.compileToExpression(filterJsonString));
-      return filter.getType() == FilterContext.Type.PREDICATE
-          && filter.getPredicate().getType() == Predicate.Type.IS_NULL;
-    } catch (Exception e) {
-      return false;
-    }
-  }
-
-  @Nullable
-  private static String extractSamePathJsonMatchFilter(ParsedJsonExtractIndex parsed, Predicate predicate) {
-    if (!(predicate instanceof JsonMatchPredicate)) {
-      return null;
-    }
-    ExpressionContext lhs = predicate.getLhs();
-    if (lhs.getType() != ExpressionContext.Type.IDENTIFIER
-        || !parsed._columnName.equals(lhs.getIdentifier())) {
-      return null;
-    }
-    String filterJsonString = ((JsonMatchPredicate) predicate).getValue();
-    int start = filterJsonString.indexOf('"');
-    if (start < 0) {
-      return null;
-    }
-    int end = filterJsonString.indexOf('"', start + 1);
-    if (end < 0) {
-      return null;
-    }
-    String filterPath = filterJsonString.substring(start + 1, end);
-    return parsed._jsonPathString.equals(filterPath) ? filterJsonString : null;
-  }
-
-  private DistinctTable createDistinctTable(DataSchema dataSchema, FieldSpec.DataType dataType,
-      @Nullable OrderByExpressionContext orderByExpression) {
+  private DistinctTable createDistinctTable() {
     int limit = _queryContext.getLimit();
     boolean nullHandlingEnabled = _queryContext.isNullHandlingEnabled();
-    switch (dataType) {
+    switch (_dataType) {
       case INT:
-        return new IntDistinctTable(dataSchema, limit, nullHandlingEnabled, orderByExpression);
+        return new IntDistinctTable(_dataSchema, limit, nullHandlingEnabled, _orderByExpression);
       case LONG:
-        return new LongDistinctTable(dataSchema, limit, nullHandlingEnabled, orderByExpression);
+        return new LongDistinctTable(_dataSchema, limit, nullHandlingEnabled, _orderByExpression);
       case FLOAT:
-        return new FloatDistinctTable(dataSchema, limit, nullHandlingEnabled, orderByExpression);
+        return new FloatDistinctTable(_dataSchema, limit, nullHandlingEnabled, _orderByExpression);
       case DOUBLE:
-        return new DoubleDistinctTable(dataSchema, limit, nullHandlingEnabled, orderByExpression);
+        return new DoubleDistinctTable(_dataSchema, limit, nullHandlingEnabled, _orderByExpression);
       case BIG_DECIMAL:
-        return new BigDecimalDistinctTable(dataSchema, limit, nullHandlingEnabled, orderByExpression);
+        return new BigDecimalDistinctTable(_dataSchema, limit, nullHandlingEnabled, _orderByExpression);
       case STRING:
-        return new StringDistinctTable(dataSchema, limit, nullHandlingEnabled, orderByExpression);
+        return new StringDistinctTable(_dataSchema, limit, nullHandlingEnabled, _orderByExpression);
       default:
-        throw new IllegalStateException("Unsupported data type for JSON index distinct: " + dataType);
+        throw new IllegalStateException("Unsupported data type for JSON index distinct: " + _dataType);
     }
   }
 
-  private static boolean addValueToDistinctTable(DistinctTable distinctTable, String stringValue,
-      FieldSpec.DataType dataType, @Nullable OrderByExpressionContext orderByExpression) {
-    switch (dataType) {
+  private boolean addValueToDistinctTable(DistinctTable distinctTable, String stringValue) {
+    switch (_dataType) {
       case INT:
-        return addToTable((IntDistinctTable) distinctTable, Integer.parseInt(stringValue), orderByExpression);
+        return addToTable((IntDistinctTable) distinctTable, Integer.parseInt(stringValue));
       case LONG:
-        return addToTable((LongDistinctTable) distinctTable, Long.parseLong(stringValue), orderByExpression);
+        return addToTable((LongDistinctTable) distinctTable, Long.parseLong(stringValue));
       case FLOAT:
-        return addToTable((FloatDistinctTable) distinctTable, Float.parseFloat(stringValue), orderByExpression);
+        return addToTable((FloatDistinctTable) distinctTable, Float.parseFloat(stringValue));
       case DOUBLE:
-        return addToTable((DoubleDistinctTable) distinctTable, Double.parseDouble(stringValue), orderByExpression);
+        return addToTable((DoubleDistinctTable) distinctTable, Double.parseDouble(stringValue));
       case BIG_DECIMAL:
-        return addToTable((BigDecimalDistinctTable) distinctTable, new BigDecimal(stringValue), orderByExpression);
+        return addToTable((BigDecimalDistinctTable) distinctTable, new BigDecimal(stringValue));
       case STRING:
-        return addToTable((StringDistinctTable) distinctTable, stringValue, orderByExpression);
+        return addToTable((StringDistinctTable) distinctTable, stringValue);
       default:
-        throw new IllegalStateException("Unsupported data type for JSON index distinct: " + dataType);
+        throw new IllegalStateException("Unsupported data type for JSON index distinct: " + _dataType);
     }
   }
 
-  private static boolean addToTable(IntDistinctTable table, int value,
-      @Nullable OrderByExpressionContext orderByExpression) {
-    if (table.hasLimit()) {
-      if (orderByExpression != null) {
-        table.addWithOrderBy(value);
-        return false;
-      } else {
-        return table.addWithoutOrderBy(value);
-      }
-    } else {
+  private boolean addToTable(IntDistinctTable table, int value) {
+    if (!table.hasLimit()) {
       table.addUnbounded(value);
       return false;
     }
-  }
-
-  private static boolean addToTable(LongDistinctTable table, long value,
-      @Nullable OrderByExpressionContext orderByExpression) {
-    if (table.hasLimit()) {
-      if (orderByExpression != null) {
-        table.addWithOrderBy(value);
-        return false;
-      } else {
-        return table.addWithoutOrderBy(value);
-      }
-    } else {
-      table.addUnbounded(value);
+    if (_orderByExpression != null) {
+      table.addWithOrderBy(value);
       return false;
     }
+    return table.addWithoutOrderBy(value);
   }
 
-  private static boolean addToTable(FloatDistinctTable table, float value,
-      @Nullable OrderByExpressionContext orderByExpression) {
-    if (table.hasLimit()) {
-      if (orderByExpression != null) {
-        table.addWithOrderBy(value);
-        return false;
-      } else {
-        return table.addWithoutOrderBy(value);
-      }
-    } else {
+  private boolean addToTable(LongDistinctTable table, long value) {
+    if (!table.hasLimit()) {
       table.addUnbounded(value);
       return false;
     }
-  }
-
-  private static boolean addToTable(DoubleDistinctTable table, double value,
-      @Nullable OrderByExpressionContext orderByExpression) {
-    if (table.hasLimit()) {
-      if (orderByExpression != null) {
-        table.addWithOrderBy(value);
-        return false;
-      } else {
-        return table.addWithoutOrderBy(value);
-      }
-    } else {
-      table.addUnbounded(value);
+    if (_orderByExpression != null) {
+      table.addWithOrderBy(value);
       return false;
     }
+    return table.addWithoutOrderBy(value);
   }
 
-  private static boolean addToTable(BigDecimalDistinctTable table, BigDecimal value,
-      @Nullable OrderByExpressionContext orderByExpression) {
-    if (table.hasLimit()) {
-      if (orderByExpression != null) {
-        table.addWithOrderBy(value);
-        return false;
-      } else {
-        return table.addWithoutOrderBy(value);
-      }
-    } else {
+  private boolean addToTable(FloatDistinctTable table, float value) {
+    if (!table.hasLimit()) {
       table.addUnbounded(value);
       return false;
     }
+    if (_orderByExpression != null) {
+      table.addWithOrderBy(value);
+      return false;
+    }
+    return table.addWithoutOrderBy(value);
   }
 
-  private static boolean addToTable(StringDistinctTable table, String value,
-      @Nullable OrderByExpressionContext orderByExpression) {
-    if (table.hasLimit()) {
-      if (orderByExpression != null) {
-        table.addWithOrderBy(value);
-        return false;
-      } else {
-        return table.addWithoutOrderBy(value);
-      }
-    } else {
+  private boolean addToTable(DoubleDistinctTable table, double value) {
+    if (!table.hasLimit()) {
       table.addUnbounded(value);
       return false;
     }
-  }
-
-  @Nullable
-  private static JsonIndexReader getJsonIndexReader(DataSource dataSource) {
-    JsonIndexReader reader = dataSource.getJsonIndex();
-    if (reader == null) {
-      Optional<IndexType<?, ?, ?>> compositeIndex =
-          IndexService.getInstance().getOptional("composite_json_index");
-      if (compositeIndex.isPresent()) {
-        reader = (JsonIndexReader) dataSource.getIndex(compositeIndex.get());
-      }
+    if (_orderByExpression != null) {
+      table.addWithOrderBy(value);
+      return false;
     }
-    return reader;
-  }
-
-  @Nullable
-  private RoaringBitmap buildFilteredDocIds() {
-    BaseFilterOperator.FilteredDocIds filteredDocIds = _filterOperator.getFilteredDocIds();
-    _numEntriesScannedInFilter = filteredDocIds.getNumEntriesScannedInFilter();
-    ImmutableRoaringBitmap docIds = filteredDocIds.getDocIds();
-    return docIds != null ? docIds.toRoaringBitmap() : null;
+    return table.addWithoutOrderBy(value);
   }
 
-  @Nullable
-  private static ParsedJsonExtractIndex parseJsonExtractIndex(ExpressionContext expr) {
-    if (expr.getType() != ExpressionContext.Type.FUNCTION) {
-      return null;
-    }
-    if (!FUNCTION_NAME.equalsIgnoreCase(expr.getFunction().getFunctionName())) {
-      return null;
-    }
-    List<ExpressionContext> args = expr.getFunction().getArguments();
-    if (args.size() != 3 && args.size() != 4) {
-      return null;
-    }
-    if (args.get(0).getType() != ExpressionContext.Type.IDENTIFIER) {
-      return null;
-    }
-    if (args.get(1).getType() != ExpressionContext.Type.LITERAL
-        || args.get(2).getType() != ExpressionContext.Type.LITERAL
-        || (args.size() == 4 && args.get(3).getType() != ExpressionContext.Type.LITERAL)) {
-      return null;
-    }
-
-    String columnName = args.get(0).getIdentifier();
-    String jsonPathString = args.get(1).getLiteral().getStringValue();
-    String resultsType = args.get(2).getLiteral().getStringValue().toUpperCase();
-    // Only single-value types are supported; MV (_ARRAY) would have incorrect flattened-to-real
-    // docId intersection since convertFlattenedDocIdsToDocIds is skipped for MV.
-    if (resultsType.endsWith("_ARRAY")) {
-      return null;
-    }
-    if (jsonPathString.contains("[*]")) {
-      return null;
-    }
-
-    FieldSpec.DataType dataType;
-    try {
-      dataType = FieldSpec.DataType.valueOf(resultsType);
-    } catch (IllegalArgumentException e) {
-      return null;
+  private boolean addToTable(BigDecimalDistinctTable table, BigDecimal value) {
+    if (!table.hasLimit()) {
+      table.addUnbounded(value);
+      return false;
     }
-    // Only types with a corresponding DistinctTable implementation are supported
-    switch (dataType) {
-      case INT:
-      case LONG:
-      case FLOAT:
-      case DOUBLE:
-      case BIG_DECIMAL:
-      case STRING:
-        break;
-      default:
-        return null;
+    if (_orderByExpression != null) {
+      table.addWithOrderBy(value);
+      return false;
     }
+    return table.addWithoutOrderBy(value);
+  }
 
-    try {
-      JsonPathCache.INSTANCE.getOrCompute(jsonPathString);
-    } catch (Exception e) {
-      return null;
+  private boolean addToTable(StringDistinctTable table, String value) {
+    if (!table.hasLimit()) {
+      table.addUnbounded(value);
+      return false;
     }
-
-    String defaultValueLiteral = null;
-    if (args.size() == 4) {
-      defaultValueLiteral = args.get(3).getLiteral().getStringValue();
-      try {
-        dataType.convert(defaultValueLiteral);
-      } catch (Exception e) {
-        return null;
-      }
+    if (_orderByExpression != null) {
+      table.addWithOrderBy(value);
+      return false;
     }
-
-    return new ParsedJsonExtractIndex(columnName, jsonPathString, dataType, defaultValueLiteral);
+    return table.addWithoutOrderBy(value);
   }
 
-  private static final class ParsedJsonExtractIndex {
-    final String _columnName;
-    final String _jsonPathString;
-    final FieldSpec.DataType _dataType;
-    @Nullable
-    final String _defaultValueLiteral;
-
-    ParsedJsonExtractIndex(String columnName, String jsonPathString, FieldSpec.DataType dataType,
-        @Nullable String defaultValueLiteral) {
-      _columnName = columnName;
-      _jsonPathString = jsonPathString;
-      _dataType = dataType;
-      _defaultValueLiteral = defaultValueLiteral;
+  private void handleMissingDocs(DistinctTable distinctTable) {
+    if (_defaultValueLiterals != null) {
+      for (String literal : _defaultValueLiterals) {
+        if (addValueToDistinctTable(distinctTable, literal)) {
+          return;
+        }
+      }
+    } else if (_queryContext.isNullHandlingEnabled()) {
+      distinctTable.addNull();
+    } else {
+      throw new RuntimeException(
+          String.format("Illegal Json Path: [%s], for some docIds in segment [%s]", _jsonPathString,
+              _indexSegment.getSegmentName()));
     }
   }
 
   @Override
   public List<Operator> getChildOperators() {
-    return Collections.singletonList(_filterOperator);
+    return List.of(_filterOperator);
   }
 
   @Override
@@ -569,16 +446,16 @@ public IndexSegment getIndexSegment() {
 
   @Override
   public ExecutionStatistics getExecutionStatistics() {
-    int numTotalDocs = _indexSegment.getSegmentMetadata().getTotalDocs();
-    // Index-only operator: no docs scanned, no entries scanned post-filter.
-    // Filter-phase stats are tracked when buildFilteredDocIds falls back to DocIdSetPlanNode.
-    return new ExecutionStatistics(0, _numEntriesScannedInFilter, 0, numTotalDocs);
+    // - numDocsScanned tracks the matching docs
+    // - numEntriesScannedInFilter tracks work done while materializing the exact filter bitmap
+    // - numEntriesScannedPostFilter tracks values examined
+    return new ExecutionStatistics(_numDocsScanned, _numEntriesScannedInFilter, _numEntriesExaminedPostFilter,
+        _totalDocs);
   }
 
   @Override
   public String toExplainString() {
-    List<ExpressionContext> expressions = _queryContext.getSelectExpressions();
-    return EXPLAIN_NAME + "(keyColumns:" + (expressions.isEmpty() ? "" : expressions.get(0).toString()) + ")";
+    return EXPLAIN_NAME + "(keyColumns:" + _expression + ")";
   }
 
   @Override
@@ -589,34 +466,6 @@ protected String getExplainName() {
   @Override
   protected void explainAttributes(ExplainAttributeBuilder attributeBuilder) {
     super.explainAttributes(attributeBuilder);
-    List<ExpressionContext> selectExpressions = _queryContext.getSelectExpressions();
-    if (!selectExpressions.isEmpty()) {
-      attributeBuilder.putStringList("keyColumns",
-          List.of(selectExpressions.get(0).toString()));
-    }
-  }
-
-  /**
-   * Returns true if the expression is the 3/4-arg scalar jsonExtractIndex form on a column with JSON index and the
-   * path is indexed. For OSS JSON index all paths are indexed. For composite JSON index, only paths in
-   * invertedIndexConfigs are indexed per key.
-   */
-  public static boolean canUseJsonIndexDistinct(IndexSegment indexSegment, ExpressionContext expr) {
-    ParsedJsonExtractIndex parsed = parseJsonExtractIndex(expr);
-    if (parsed == null) {
-      return false;
-    }
-    DataSource dataSource = indexSegment.getDataSourceNullable(parsed._columnName);
-    if (dataSource == null) {
-      return false;
-    }
-    JsonIndexReader reader = getJsonIndexReader(dataSource);
-    if (reader == null) {
-      return false;
-    }
-    if (!reader.isPathIndexed(parsed._jsonPathString)) {
-      return false;
-    }
-    return true;
+    attributeBuilder.putStringList("keyColumns", List.of(_expression.toString()));
   }
 }
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java
index 97b34e05e200..cb3a53d1062f 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java
@@ -24,10 +24,12 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
+import javax.annotation.Nullable;
 import org.apache.pinot.common.function.JsonPathCache;
 import org.apache.pinot.core.operator.ColumnContext;
 import org.apache.pinot.core.operator.blocks.ValueBlock;
 import org.apache.pinot.core.operator.transform.TransformResultMetadata;
+import org.apache.pinot.segment.spi.datasource.DataSource;
 import org.apache.pinot.segment.spi.index.IndexService;
 import org.apache.pinot.segment.spi.index.IndexType;
 import org.apache.pinot.segment.spi.index.reader.JsonIndexReader;
@@ -45,14 +47,15 @@
 public class JsonExtractIndexTransformFunction extends BaseTransformFunction {
   public static final String FUNCTION_NAME = "jsonExtractIndex";
 
-  private TransformFunction _jsonFieldTransformFunction;
-  private String _jsonPathString;
-  private TransformResultMetadata _resultMetadata;
   private JsonIndexReader _jsonIndexReader;
+  private String _jsonPathString;
+  private boolean _isSingleValue;
+  @Nullable
   private Object _defaultValue;
+  @Nullable
+  private String _filterJsonExpression;
+  private TransformResultMetadata _resultMetadata;
   private Map<String, RoaringBitmap> _valueToMatchingDocsMap;
-  private boolean _isSingleValue;
-  private String _filterJsonPath;
 
   @Override
   public String getName() {
@@ -62,8 +65,10 @@ public String getName() {
   @Override
   public void init(List<TransformFunction> arguments, Map<String, ColumnContext> columnContextMap) {
     super.init(arguments, columnContextMap);
+
+    int numArguments = arguments.size();
     // Check that there are exactly 3 or 4 or 5 arguments
-    if (arguments.size() < 3 || arguments.size() > 5) {
+    if (numArguments < 3 || numArguments > 5) {
       throw new IllegalArgumentException(
           "Expected 3/4/5 arguments for transform function: jsonExtractIndex(jsonFieldName, 'jsonPath', 'resultsType',"
               + " ['defaultValue'], ['jsonFilterExpression'])");
@@ -71,14 +76,14 @@ public void init(List<TransformFunction> arguments, Map<String, ColumnContext> c
 
     TransformFunction firstArgument = arguments.get(0);
     if (firstArgument instanceof IdentifierTransformFunction) {
-      String columnName = ((IdentifierTransformFunction) firstArgument).getColumnName();
-      _jsonIndexReader = columnContextMap.get(columnName).getDataSource().getJsonIndex();
-      if (_jsonIndexReader == null) { //TODO: rework
-        Optional<IndexType<?, ?, ?>> compositeIndex =
-            IndexService.getInstance().getOptional("composite_json_index");
+      DataSource dataSource =
+          columnContextMap.get(((IdentifierTransformFunction) firstArgument).getColumnName()).getDataSource();
+      _jsonIndexReader = dataSource.getJsonIndex();
+      // TODO: rework
+      if (_jsonIndexReader == null) {
+        Optional<IndexType<?, ?, ?>> compositeIndex = IndexService.getInstance().getOptional("composite_json_index");
         if (compositeIndex.isPresent()) {
-          _jsonIndexReader = (JsonIndexReader) columnContextMap.get(columnName)
-              .getDataSource().getIndex(compositeIndex.get());
+          _jsonIndexReader = (JsonIndexReader) dataSource.getIndex(compositeIndex.get());
         }
       }
       if (_jsonIndexReader == null) {
@@ -87,7 +92,6 @@ public void init(List<TransformFunction> arguments, Map<String, ColumnContext> c
     } else {
       throw new IllegalArgumentException("jsonExtractIndex can only be applied to a raw column");
     }
-    _jsonFieldTransformFunction = firstArgument;
 
     TransformFunction secondArgument = arguments.get(1);
     if (!(secondArgument instanceof LiteralTransformFunction)) {
@@ -113,12 +117,11 @@ public void init(List<TransformFunction> arguments, Map<String, ColumnContext> c
     DataType dataType = _isSingleValue ? DataType.valueOf(resultsType)
         : DataType.valueOf(resultsType.substring(0, resultsType.length() - 6));
 
-    if (arguments.size() >= 4) {
+    if (numArguments >= 4) {
       TransformFunction fourthArgument = arguments.get(3);
       if (!(fourthArgument instanceof LiteralTransformFunction)) {
         throw new IllegalArgumentException("Default value must be a literal");
       }
-
       if (_isSingleValue) {
         _defaultValue = dataType.convert(((LiteralTransformFunction) fourthArgument).getStringLiteral());
       } else {
@@ -138,12 +141,12 @@ public void init(List<TransformFunction> arguments, Map<String, ColumnContext> c
       }
     }
 
-    if (arguments.size() == 5) {
+    if (numArguments == 5) {
       TransformFunction fifthArgument = arguments.get(4);
       if (!(fifthArgument instanceof LiteralTransformFunction)) {
         throw new IllegalArgumentException("JSON path filter argument must be a literal");
       }
-      _filterJsonPath = ((LiteralTransformFunction) fifthArgument).getStringLiteral();
+      _filterJsonExpression = ((LiteralTransformFunction) fifthArgument).getStringLiteral();
     }
 
     _resultMetadata = new TransformResultMetadata(dataType, _isSingleValue, false);
@@ -425,7 +428,7 @@ public String[][] transformToStringValuesMV(ValueBlock valueBlock) {
    */
   private Map<String, RoaringBitmap> getValueToMatchingDocsMap() {
     if (_valueToMatchingDocsMap == null) {
-      _valueToMatchingDocsMap = _jsonIndexReader.getMatchingFlattenedDocsMap(_jsonPathString, _filterJsonPath);
+      _valueToMatchingDocsMap = _jsonIndexReader.getMatchingFlattenedDocsMap(_jsonPathString, _filterJsonExpression);
       if (_isSingleValue) {
         // For single value result type, it's more efficient to use original docIDs map
         _jsonIndexReader.convertFlattenedDocIdsToDocIds(_valueToMatchingDocsMap);
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/plan/DistinctPlanNode.java b/pinot-core/src/main/java/org/apache/pinot/core/plan/DistinctPlanNode.java
index 7b35faa6ea78..54edba21a8fb 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/plan/DistinctPlanNode.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/plan/DistinctPlanNode.java
@@ -84,9 +84,9 @@ public Operator<DistinctResultsBlock> run() {
       ExpressionContext expr = expressions.get(0);
 
       // JSON index path
-      if (JsonIndexDistinctOperator.canUseJsonIndexDistinct(_indexSegment, expr)) {
+      if (JsonIndexDistinctOperator.canUseJsonIndexDistinct(expr)) {
         BaseFilterOperator filterOperator = new FilterPlanNode(_segmentContext, _queryContext).run();
-        return new JsonIndexDistinctOperator(_indexSegment, _segmentContext, _queryContext, filterOperator);
+        return new JsonIndexDistinctOperator(_indexSegment, _queryContext, filterOperator);
       }
 
       // Inverted/sorted index path. For unsorted dictionaries the operator still avoids the scan/projection path,
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/operator/query/InvertedIndexDistinctOperatorUnitTest.java b/pinot-core/src/test/java/org/apache/pinot/core/operator/query/InvertedIndexDistinctOperatorUnitTest.java
deleted file mode 100644
index 48408cdcc9de..000000000000
--- a/pinot-core/src/test/java/org/apache/pinot/core/operator/query/InvertedIndexDistinctOperatorUnitTest.java
+++ /dev/null
@@ -1,233 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.pinot.core.operator.query;
-
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import org.apache.pinot.common.request.context.ExpressionContext;
-import org.apache.pinot.core.common.BlockDocIdSet;
-import org.apache.pinot.core.common.Operator;
-import org.apache.pinot.core.operator.BaseProjectOperator;
-import org.apache.pinot.core.operator.ColumnContext;
-import org.apache.pinot.core.operator.DocIdOrderedOperator.DocIdOrder;
-import org.apache.pinot.core.operator.ProjectionOperator;
-import org.apache.pinot.core.operator.ProjectionOperatorUtils;
-import org.apache.pinot.core.operator.blocks.ProjectionBlock;
-import org.apache.pinot.core.operator.blocks.results.DistinctResultsBlock;
-import org.apache.pinot.core.operator.docidsets.MatchAllDocIdSet;
-import org.apache.pinot.core.operator.filter.BaseFilterOperator;
-import org.apache.pinot.core.operator.filter.BitmapCollection;
-import org.apache.pinot.core.query.request.context.QueryContext;
-import org.apache.pinot.core.query.request.context.utils.QueryContextConverterUtils;
-import org.apache.pinot.segment.spi.IndexSegment;
-import org.apache.pinot.segment.spi.SegmentContext;
-import org.apache.pinot.segment.spi.SegmentMetadata;
-import org.apache.pinot.segment.spi.datasource.DataSource;
-import org.apache.pinot.segment.spi.datasource.DataSourceMetadata;
-import org.apache.pinot.segment.spi.index.reader.Dictionary;
-import org.apache.pinot.segment.spi.index.reader.InvertedIndexReader;
-import org.apache.pinot.spi.data.FieldSpec.DataType;
-import org.testng.annotations.Test;
-
-import static org.mockito.ArgumentMatchers.any;
-import static org.mockito.ArgumentMatchers.eq;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.verifyNoInteractions;
-import static org.mockito.Mockito.when;
-import static org.testng.Assert.assertEquals;
-import static org.testng.Assert.assertNotNull;
-
-
-/**
- * Unit tests for {@link InvertedIndexDistinctOperator}.
- */
-public class InvertedIndexDistinctOperatorUnitTest {
-
-  @Test
-  public void testScanFallbackDoesNotMaterializeBitmapWhenCountIsAvailable() {
-    QueryContext queryContext = QueryContextConverterUtils.getQueryContext(
-        "SELECT DISTINCT testColumn FROM testTable "
-            + "OPTION(useIndexBasedDistinctOperator=true, invertedIndexDistinctCostRatio=1000)");
-
-    Dictionary dictionary = mock(Dictionary.class);
-    when(dictionary.length()).thenReturn(100);
-    when(dictionary.getValueType()).thenReturn(DataType.INT);
-
-    DataSourceMetadata dataSourceMetadata = mock(DataSourceMetadata.class);
-    when(dataSourceMetadata.getDataType()).thenReturn(DataType.INT);
-    when(dataSourceMetadata.isSingleValue()).thenReturn(true);
-
-    DataSource dataSource = mock(DataSource.class);
-    when(dataSource.getDictionary()).thenReturn(dictionary);
-    when(dataSource.getDataSourceMetadata()).thenReturn(dataSourceMetadata);
-    @SuppressWarnings("rawtypes")
-    InvertedIndexReader invertedIndexReader = mock(InvertedIndexReader.class);
-    when(dataSource.getInvertedIndex()).thenReturn(invertedIndexReader);
-
-    SegmentMetadata segmentMetadata = mock(SegmentMetadata.class);
-    when(segmentMetadata.getTotalDocs()).thenReturn(10);
-
-    IndexSegment indexSegment = mock(IndexSegment.class);
-    when(indexSegment.getSegmentMetadata()).thenReturn(segmentMetadata);
-    when(indexSegment.getDataSource(eq("testColumn"), any())).thenReturn(dataSource);
-
-    ColumnContext columnContext = ColumnContext.fromDataSource(dataSource);
-    ProjectionOperatorUtils.setImplementation((dataSourceMap, docIdSetOperator, ignoredQueryContext) ->
-        new EmptyProjectionOperator(ignoredQueryContext, "testColumn", columnContext));
-    try {
-      DistinctResultsBlock resultsBlock =
-          new InvertedIndexDistinctOperator(indexSegment, new SegmentContext(indexSegment), queryContext,
-              new CountOptimizedBitmapCapableFilterOperator(10, 5), dataSource).nextBlock();
-
-      assertNotNull(resultsBlock);
-      verifyNoInteractions(invertedIndexReader);
-    } finally {
-      ProjectionOperatorUtils.setImplementation(new ProjectionOperatorUtils.DefaultImplementation());
-    }
-  }
-
-  @Test
-  public void testEmptyCountOptimizedFilterShortCircuitsWithoutProjection() {
-    QueryContext queryContext = QueryContextConverterUtils.getQueryContext(
-        "SELECT DISTINCT testColumn FROM testTable "
-            + "OPTION(useIndexBasedDistinctOperator=true)");
-
-    Dictionary dictionary = mock(Dictionary.class);
-    when(dictionary.length()).thenReturn(100);
-    when(dictionary.getValueType()).thenReturn(DataType.INT);
-
-    DataSourceMetadata dataSourceMetadata = mock(DataSourceMetadata.class);
-    when(dataSourceMetadata.getDataType()).thenReturn(DataType.INT);
-    when(dataSourceMetadata.isSingleValue()).thenReturn(true);
-
-    DataSource dataSource = mock(DataSource.class);
-    when(dataSource.getDictionary()).thenReturn(dictionary);
-    when(dataSource.getDataSourceMetadata()).thenReturn(dataSourceMetadata);
-    @SuppressWarnings("rawtypes")
-    InvertedIndexReader invertedIndexReader = mock(InvertedIndexReader.class);
-    when(dataSource.getInvertedIndex()).thenReturn(invertedIndexReader);
-
-    SegmentMetadata segmentMetadata = mock(SegmentMetadata.class);
-    when(segmentMetadata.getTotalDocs()).thenReturn(10);
-
-    IndexSegment indexSegment = mock(IndexSegment.class);
-    when(indexSegment.getSegmentMetadata()).thenReturn(segmentMetadata);
-    when(indexSegment.getDataSource(eq("testColumn"), any())).thenReturn(dataSource);
-
-    ProjectionOperatorUtils.setImplementation((dataSourceMap, docIdSetOperator, ignoredQueryContext) -> {
-      throw new AssertionError("Empty result should short-circuit before building projection");
-    });
-    try {
-      DistinctResultsBlock resultsBlock =
-          new InvertedIndexDistinctOperator(indexSegment, new SegmentContext(indexSegment), queryContext,
-              new CountOptimizedFilterOperator(10, 0), dataSource).nextBlock();
-
-      assertNotNull(resultsBlock);
-      assertEquals(resultsBlock.getNumRows(), 0);
-      verifyNoInteractions(invertedIndexReader);
-    } finally {
-      ProjectionOperatorUtils.setImplementation(new ProjectionOperatorUtils.DefaultImplementation());
-    }
-  }
-
-  private static class CountOptimizedFilterOperator extends BaseFilterOperator {
-    private final int _numMatchingDocs;
-
-    private CountOptimizedFilterOperator(int numDocs, int numMatchingDocs) {
-      super(numDocs, false);
-      _numMatchingDocs = numMatchingDocs;
-    }
-
-    @Override
-    public boolean canOptimizeCount() {
-      return true;
-    }
-
-    @Override
-    public int getNumMatchingDocs() {
-      return _numMatchingDocs;
-    }
-
-    @Override
-    public FilteredDocIds getFilteredDocIds() {
-      throw new AssertionError("Scan fallback should not materialize filtered doc ids for count-optimized filters");
-    }
-
-    @Override
-    protected BlockDocIdSet getTrues() {
-      return new MatchAllDocIdSet(_numDocs);
-    }
-
-    @Override
-    public String toExplainString() {
-      return "COUNT_OPTIMIZED_TEST_FILTER";
-    }
-
-    @Override
-    public List<? extends Operator> getChildOperators() {
-      return Collections.emptyList();
-    }
-  }
-
-  private static final class CountOptimizedBitmapCapableFilterOperator extends CountOptimizedFilterOperator {
-    private CountOptimizedBitmapCapableFilterOperator(int numDocs, int numMatchingDocs) {
-      super(numDocs, numMatchingDocs);
-    }
-
-    @Override
-    public boolean canProduceBitmaps() {
-      return true;
-    }
-
-    @Override
-    public BitmapCollection getBitmaps() {
-      throw new AssertionError("Count-optimized filters should not eagerly materialize bitmaps");
-    }
-  }
-
-  private static final class EmptyProjectionOperator extends ProjectionOperator {
-    private final Map<String, ColumnContext> _columnContextMap;
-
-    private EmptyProjectionOperator(QueryContext queryContext, String column, ColumnContext columnContext) {
-      super(Collections.emptyMap(), null, queryContext);
-      _columnContextMap = Map.of(column, columnContext);
-    }
-
-    @Override
-    public Map<String, ColumnContext> getSourceColumnContextMap() {
-      return _columnContextMap;
-    }
-
-    @Override
-    public ColumnContext getResultColumnContext(ExpressionContext expression) {
-      return _columnContextMap.get(expression.getIdentifier());
-    }
-
-    @Override
-    protected ProjectionBlock getNextBlock() {
-      return null;
-    }
-
-    @Override
-    public BaseProjectOperator<ProjectionBlock> withOrder(DocIdOrder newOrder) {
-      return this;
-    }
-  }
-}
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/operator/query/JsonIndexDistinctOperatorTest.java b/pinot-core/src/test/java/org/apache/pinot/core/operator/query/JsonIndexDistinctOperatorTest.java
deleted file mode 100644
index bb8431d0707a..000000000000
--- a/pinot-core/src/test/java/org/apache/pinot/core/operator/query/JsonIndexDistinctOperatorTest.java
+++ /dev/null
@@ -1,306 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.pinot.core.operator.query;
-
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.stream.Collectors;
-import org.apache.pinot.core.common.Operator;
-import org.apache.pinot.core.operator.blocks.results.DistinctResultsBlock;
-import org.apache.pinot.core.operator.filter.BaseFilterOperator;
-import org.apache.pinot.core.operator.filter.BitmapCollection;
-import org.apache.pinot.core.query.request.context.QueryContext;
-import org.apache.pinot.core.query.request.context.utils.QueryContextConverterUtils;
-import org.apache.pinot.segment.spi.IndexSegment;
-import org.apache.pinot.segment.spi.SegmentContext;
-import org.apache.pinot.segment.spi.SegmentMetadata;
-import org.apache.pinot.segment.spi.datasource.DataSource;
-import org.apache.pinot.segment.spi.index.reader.JsonIndexReader;
-import org.roaringbitmap.RoaringBitmap;
-import org.roaringbitmap.buffer.MutableRoaringBitmap;
-import org.testng.annotations.Test;
-
-import static org.mockito.ArgumentMatchers.any;
-import static org.mockito.ArgumentMatchers.eq;
-import static org.mockito.Mockito.doAnswer;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.never;
-import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.when;
-import static org.testng.Assert.assertEquals;
-import static org.testng.Assert.assertFalse;
-import static org.testng.Assert.assertTrue;
-import static org.testng.Assert.expectThrows;
-
-
-/**
- * Unit tests for {@link JsonIndexDistinctOperator}.
- */
-public class JsonIndexDistinctOperatorTest {
-  private static final String STRING_EXTRACT = "JSON_EXTRACT_INDEX(tags, '$.instance', 'STRING')";
-  private static final String STRING_EXTRACT_WITH_EMPTY_DEFAULT =
-      "JSON_EXTRACT_INDEX(tags, '$.instance', 'STRING', '')";
-  private static final String STRING_EXTRACT_WITH_DEFAULT =
-      "JSON_EXTRACT_INDEX(tags, '$.instance', 'STRING', 'missing')";
-  private static final String INVALID_INT_DEFAULT_EXTRACT =
-      "JSON_EXTRACT_INDEX(tags, '$.instance', 'INT', 'abc')";
-  private static final String SAME_PATH_FILTER = "REGEXP_LIKE(\"$.instance\", '.*test.*')";
-  private static final String CROSS_PATH_FILTER = "REGEXP_LIKE(\"$.env\", 'prod.*')";
-  private static final String SAME_PATH_IS_NULL_FILTER = "\"$.instance\" IS NULL";
-
-  @Test
-  public void testSamePathJsonMatchUsesDistinctValuesFastPathForFourArgScalarForm() {
-    QueryContext queryContext = distinctQuery(STRING_EXTRACT_WITH_EMPTY_DEFAULT, SAME_PATH_FILTER);
-
-    JsonIndexReader jsonIndexReader = mock(JsonIndexReader.class);
-    when(jsonIndexReader.getMatchingDistinctValues("$.instance", SAME_PATH_FILTER))
-        .thenReturn(Set.of("test-east", "test-west"));
-
-    DistinctResultsBlock resultsBlock =
-        buildOperator(queryContext, jsonIndexReader, bufferBitmap(0, 1), 2).nextBlock();
-
-    assertEquals(extractValues(resultsBlock), Set.of("test-east", "test-west"));
-    verify(jsonIndexReader).getMatchingDistinctValues("$.instance", SAME_PATH_FILTER);
-    verify(jsonIndexReader, never()).getMatchingFlattenedDocsMap(any(), any());
-    verify(jsonIndexReader, never()).convertFlattenedDocIdsToDocIds(any());
-  }
-
-  @Test
-  public void testSamePathJsonMatchUsesDistinctValuesFastPathForThreeArgScalarForm() {
-    QueryContext queryContext = distinctQuery(STRING_EXTRACT, SAME_PATH_FILTER);
-
-    JsonIndexReader jsonIndexReader = mock(JsonIndexReader.class);
-    when(jsonIndexReader.getMatchingDistinctValues("$.instance", SAME_PATH_FILTER))
-        .thenReturn(Set.of("test-east", "test-west"));
-
-    DistinctResultsBlock resultsBlock =
-        buildOperator(queryContext, jsonIndexReader, bufferBitmap(0, 1), 2).nextBlock();
-
-    assertEquals(extractValues(resultsBlock), Set.of("test-east", "test-west"));
-    verify(jsonIndexReader).getMatchingDistinctValues("$.instance", SAME_PATH_FILTER);
-    verify(jsonIndexReader, never()).getMatchingFlattenedDocsMap(any(), any());
-    verify(jsonIndexReader, never()).convertFlattenedDocIdsToDocIds(any());
-  }
-
-  @Test
-  public void testDifferentPathJsonMatchIsAppliedAtDocLevel() {
-    QueryContext queryContext = distinctQuery(STRING_EXTRACT, CROSS_PATH_FILTER);
-
-    JsonIndexReader jsonIndexReader = mock(JsonIndexReader.class);
-    Map<String, RoaringBitmap> flattenedDocsByValue = new HashMap<>();
-    flattenedDocsByValue.put("prod-a", bitmap(100));
-    flattenedDocsByValue.put("prod-b", bitmap(200));
-    flattenedDocsByValue.put("other-doc", bitmap(300));
-    when(jsonIndexReader.getMatchingFlattenedDocsMap("$.instance", null)).thenReturn(flattenedDocsByValue);
-    stubConvertedDocIds(jsonIndexReader, Map.of("prod-a", bitmap(0), "prod-b", bitmap(1), "other-doc", bitmap(2)));
-
-    DistinctResultsBlock resultsBlock =
-        buildOperator(queryContext, jsonIndexReader, bufferBitmap(0, 1), 3).nextBlock();
-
-    assertEquals(extractValues(resultsBlock), Set.of("prod-a", "prod-b"));
-    verify(jsonIndexReader).getMatchingFlattenedDocsMap("$.instance", null);
-    verify(jsonIndexReader, never()).getMatchingFlattenedDocsMap("$.instance",
-        "REGEXP_LIKE(\"$.env\", ''prod.*'')");
-    verify(jsonIndexReader).convertFlattenedDocIdsToDocIds(any());
-  }
-
-  @Test
-  public void testCanUseJsonIndexDistinctAllowsThreeArgScalarForm() {
-    QueryContext queryContext = distinctQuery(STRING_EXTRACT, CROSS_PATH_FILTER);
-
-    JsonIndexReader jsonIndexReader = mock(JsonIndexReader.class);
-    when(jsonIndexReader.isPathIndexed("$.instance")).thenReturn(true);
-    IndexSegment indexSegment = buildCanUseIndexSegment(jsonIndexReader);
-
-    assertTrue(JsonIndexDistinctOperator.canUseJsonIndexDistinct(indexSegment,
-        queryContext.getSelectExpressions().get(0)));
-  }
-
-  @Test
-  public void testCanUseJsonIndexDistinctAllowsFourArgScalarForm() {
-    QueryContext queryContext = distinctQuery(STRING_EXTRACT_WITH_EMPTY_DEFAULT, CROSS_PATH_FILTER);
-
-    JsonIndexReader jsonIndexReader = mock(JsonIndexReader.class);
-    when(jsonIndexReader.isPathIndexed("$.instance")).thenReturn(true);
-    IndexSegment indexSegment = buildCanUseIndexSegment(jsonIndexReader);
-
-    assertTrue(JsonIndexDistinctOperator.canUseJsonIndexDistinct(indexSegment,
-        queryContext.getSelectExpressions().get(0)));
-  }
-
-  @Test
-  public void testCanUseJsonIndexDistinctRejectsInvalidDefaultArgument() {
-    QueryContext queryContext = distinctQuery(INVALID_INT_DEFAULT_EXTRACT, CROSS_PATH_FILTER);
-
-    JsonIndexReader jsonIndexReader = mock(JsonIndexReader.class);
-    when(jsonIndexReader.isPathIndexed("$.instance")).thenReturn(true);
-    IndexSegment indexSegment = buildCanUseIndexSegment(jsonIndexReader);
-
-    assertFalse(JsonIndexDistinctOperator.canUseJsonIndexDistinct(indexSegment,
-        queryContext.getSelectExpressions().get(0)));
-  }
-
-  @Test
-  public void testFourArgAddsDefaultForDocsWithoutJsonPath() {
-    QueryContext queryContext = distinctQuery(STRING_EXTRACT_WITH_DEFAULT, CROSS_PATH_FILTER);
-
-    JsonIndexReader jsonIndexReader = mock(JsonIndexReader.class);
-    Map<String, RoaringBitmap> flattenedDocsByValue = new HashMap<>();
-    flattenedDocsByValue.put("prod-a", bitmap(100));
-    flattenedDocsByValue.put("prod-b", bitmap(200));
-    when(jsonIndexReader.getMatchingFlattenedDocsMap("$.instance", null)).thenReturn(flattenedDocsByValue);
-    stubConvertedDocIds(jsonIndexReader, Map.of("prod-a", bitmap(0), "prod-b", bitmap(1)));
-
-    DistinctResultsBlock resultsBlock =
-        buildOperator(queryContext, jsonIndexReader, bufferBitmap(0, 1, 2), 3).nextBlock();
-
-    assertEquals(extractValues(resultsBlock), Set.of("prod-a", "prod-b", "missing"));
-  }
-
-  @Test
-  public void testSamePathIsNullStillAddsDefaultForMissingPath() {
-    QueryContext queryContext = distinctQuery(STRING_EXTRACT_WITH_DEFAULT, SAME_PATH_IS_NULL_FILTER);
-
-    JsonIndexReader jsonIndexReader = mock(JsonIndexReader.class);
-    when(jsonIndexReader.getMatchingFlattenedDocsMap("$.instance", SAME_PATH_IS_NULL_FILTER)).thenReturn(
-        new HashMap<>());
-
-    DistinctResultsBlock resultsBlock =
-        buildOperator(queryContext, jsonIndexReader, bufferBitmap(2), 3).nextBlock();
-
-    assertEquals(extractValues(resultsBlock), Set.of("missing"));
-    verify(jsonIndexReader).getMatchingFlattenedDocsMap("$.instance", SAME_PATH_IS_NULL_FILTER);
-    verify(jsonIndexReader).convertFlattenedDocIdsToDocIds(any());
-  }
-
-  @Test
-  public void testMissingPathWithoutDefaultThrows() {
-    QueryContext queryContext = distinctQuery(STRING_EXTRACT, SAME_PATH_IS_NULL_FILTER);
-
-    JsonIndexReader jsonIndexReader = mock(JsonIndexReader.class);
-    when(jsonIndexReader.getMatchingFlattenedDocsMap("$.instance", SAME_PATH_IS_NULL_FILTER)).thenReturn(
-        new HashMap<>());
-
-    RuntimeException exception = expectThrows(RuntimeException.class,
-        () -> buildOperator(queryContext, jsonIndexReader, bufferBitmap(2), 3).nextBlock());
-
-    assertTrue(exception.getMessage().contains("Illegal Json Path"));
-  }
-
-  private static QueryContext distinctQuery(String expression, String filterJsonString) {
-    return QueryContextConverterUtils.getQueryContext(
-        "SELECT DISTINCT " + expression + " AS tag_value FROM myTable WHERE JSON_MATCH(tags, '"
-            + filterJsonString.replace("'", "''") + "')");
-  }
-
-  private static void stubConvertedDocIds(JsonIndexReader jsonIndexReader,
-      Map<String, RoaringBitmap> convertedDocIds) {
-    doAnswer(invocation -> {
-      @SuppressWarnings("unchecked")
-      Map<String, RoaringBitmap> docsByValue = (Map<String, RoaringBitmap>) invocation.getArgument(0);
-      docsByValue.clear();
-      docsByValue.putAll(convertedDocIds);
-      return null;
-    }).when(jsonIndexReader).convertFlattenedDocIdsToDocIds(any());
-  }
-
-  private static IndexSegment buildCanUseIndexSegment(JsonIndexReader jsonIndexReader) {
-    DataSource dataSource = mock(DataSource.class);
-    when(dataSource.getJsonIndex()).thenReturn(jsonIndexReader);
-
-    IndexSegment indexSegment = mock(IndexSegment.class);
-    when(indexSegment.getDataSourceNullable("tags")).thenReturn(dataSource);
-    return indexSegment;
-  }
-
-  private static JsonIndexDistinctOperator buildOperator(QueryContext queryContext, JsonIndexReader jsonIndexReader,
-      MutableRoaringBitmap filterBitmap, int numDocs) {
-    SegmentMetadata segmentMetadata = mock(SegmentMetadata.class);
-    when(segmentMetadata.getTotalDocs()).thenReturn(numDocs);
-
-    DataSource dataSource = mock(DataSource.class);
-    when(dataSource.getJsonIndex()).thenReturn(jsonIndexReader);
-
-    IndexSegment indexSegment = mock(IndexSegment.class);
-    when(indexSegment.getSegmentMetadata()).thenReturn(segmentMetadata);
-    when(indexSegment.getSegmentName()).thenReturn("testSegment");
-    when(indexSegment.getDataSource(eq("tags"), any())).thenReturn(dataSource);
-    when(indexSegment.getDataSourceNullable("tags")).thenReturn(dataSource);
-
-    return new JsonIndexDistinctOperator(indexSegment, new SegmentContext(indexSegment), queryContext,
-        new StaticBitmapFilterOperator(numDocs, filterBitmap));
-  }
-
-  private static RoaringBitmap bitmap(int... docIds) {
-    RoaringBitmap bitmap = new RoaringBitmap();
-    for (int docId : docIds) {
-      bitmap.add(docId);
-    }
-    return bitmap;
-  }
-
-  private static MutableRoaringBitmap bufferBitmap(int... docIds) {
-    MutableRoaringBitmap bitmap = new MutableRoaringBitmap();
-    for (int docId : docIds) {
-      bitmap.add(docId);
-    }
-    return bitmap;
-  }
-
-  private static Set<String> extractValues(DistinctResultsBlock resultsBlock) {
-    List<Object[]> rows = resultsBlock.getRows();
-    return rows.stream().map(row -> (String) row[0]).collect(Collectors.toSet());
-  }
-
-  private static final class StaticBitmapFilterOperator extends BaseFilterOperator {
-    private final MutableRoaringBitmap _bitmap;
-
-    StaticBitmapFilterOperator(int numDocs, MutableRoaringBitmap bitmap) {
-      super(numDocs, false);
-      _bitmap = bitmap;
-    }
-
-    @Override
-    public boolean canProduceBitmaps() {
-      return true;
-    }
-
-    @Override
-    public BitmapCollection getBitmaps() {
-      return new BitmapCollection(_numDocs, false, _bitmap);
-    }
-
-    @Override
-    public List<Operator> getChildOperators() {
-      return List.of();
-    }
-
-    @Override
-    protected org.apache.pinot.core.common.BlockDocIdSet getTrues() {
-      throw new UnsupportedOperationException("Bitmap path only");
-    }
-
-    @Override
-    public String toExplainString() {
-      return "STATIC_BITMAP_FILTER";
-    }
-  }
-}
diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/InvertedIndexDistinctOperatorTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/InvertedIndexDistinctOperatorQueriesTest.java
similarity index 73%
rename from pinot-core/src/test/java/org/apache/pinot/queries/InvertedIndexDistinctOperatorTest.java
rename to pinot-core/src/test/java/org/apache/pinot/queries/InvertedIndexDistinctOperatorQueriesTest.java
index 78708dd79b05..f7744fda7dcf 100644
--- a/pinot-core/src/test/java/org/apache/pinot/queries/InvertedIndexDistinctOperatorTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/InvertedIndexDistinctOperatorQueriesTest.java
@@ -27,6 +27,7 @@
 import org.apache.commons.io.FileUtils;
 import org.apache.pinot.common.response.broker.ResultTable;
 import org.apache.pinot.core.operator.BaseOperator;
+import org.apache.pinot.core.operator.ExecutionStatistics;
 import org.apache.pinot.core.operator.blocks.results.DistinctResultsBlock;
 import org.apache.pinot.core.query.distinct.table.DistinctTable;
 import org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader;
@@ -54,30 +55,36 @@
 import static org.testng.Assert.assertTrue;
 
 
-/**
- * Tests for {@link org.apache.pinot.core.operator.query.InvertedIndexDistinctOperator}.
- *
- * <p>Six segments exercise distinct operator features:
- * <ul>
- *   <li><b>INT segment</b>: 10K records, 100 unique INT values (interleaved), inverted index.
- *       Tests cost heuristic path selection and inverted-vs-scan correctness.</li>
- *   <li><b>MV segment</b>: 1K records, MV INT column (50 unique values), inverted index.
- *       Tests multi-value column support.</li>
- *   <li><b>Sorted segment</b>: 10K records, sorted INT column (100 unique), sorted forward index.
- *       Tests sorted index path.</li>
- *   <li><b>Mutable segment</b>: consuming segment with unsorted dictionary + inverted index.
- *       Tests ORDER BY correctness without relying on sorted dictIds.</li>
- *   <li><b>STRING segment</b>: 5K records, STRING column (50 unique), inverted index.
- *       Tests STRING data type handling.</li>
- *   <li><b>Null segment</b>: 1K records, INT column with nulls, inverted index.
- *       Tests null handling.</li>
- * </ul>
- */
-public class InvertedIndexDistinctOperatorTest extends BaseQueriesTest {
+/// Tests for [org.apache.pinot.core.operator.query.InvertedIndexDistinctOperator].
+///
+/// Six segments exercise distinct operator features:
+/// - **INT segment**: 10K records, 100 unique INT values (interleaved), inverted index.
+///   Tests cost heuristic path selection and inverted-vs-scan correctness.
+/// - **MV segment**: 1K records, MV INT column (50 unique values), inverted index.
+///   Tests multi-value column support.
+/// - **Sorted segment**: 10K records, sorted INT column (100 unique), sorted forward index.
+///   Tests sorted index path.
+/// - **Mutable segment**: consuming segment with unsorted dictionary + inverted index.
+///   Tests ORDER BY correctness without relying on sorted dictIds.
+/// - **STRING segment**: 5K records, STRING column (50 unique), inverted index.
+///   Tests STRING data type handling.
+/// - **Null segment**: 1K records, INT column with nulls, inverted index.
+///   Tests null handling.
+public class InvertedIndexDistinctOperatorQueriesTest extends BaseQueriesTest {
   private static final File INDEX_DIR =
-      new File(FileUtils.getTempDirectory(), "InvertedIndexDistinctOperatorTest");
+      new File(FileUtils.getTempDirectory(), "InvertedIndexDistinctOperatorQueriesTest");
   private static final String RAW_TABLE_NAME = "testTable";
 
+  private static final String OPT = "SET useIndexBasedDistinctOperator=true; ";
+  private static final String OPT_INV = OPT + "SET invertedIndexDistinctCostRatio=0; ";
+  private static final String OPT_SCAN = OPT + "SET invertedIndexDistinctCostRatio=100000; ";
+  private static final String OPT_INV_NULLS = OPT_INV + "SET enableNullHandling=true; ";
+  private static final String OPT_SCAN_NULLS = OPT_SCAN + "SET enableNullHandling=true; ";
+
+  private static String optWithRatio(int ratio) {
+    return OPT + "SET invertedIndexDistinctCostRatio=" + ratio + "; ";
+  }
+
   // Active segment — swapped per test group
   private IndexSegment _activeSegment;
   private final List<IndexSegment> _allSegments = new ArrayList<>();
@@ -386,58 +393,40 @@ private boolean containsNull(ResultTable resultTable) {
     return false;
   }
 
-  private static final String OPT = "OPTION(useIndexBasedDistinctOperator=true";
-  private static final String OPT_INV = OPT + ", invertedIndexDistinctCostRatio=1)";
-  private static final String OPT_SCAN = OPT + ", invertedIndexDistinctCostRatio=100000)";
-
   // ==================== Cost Heuristic Tests ====================
 
   @Test
   public void testCostRatioPathSelection() {
     _activeSegment = _intSegment;
+    String wideQuery = "SELECT DISTINCT intColumn FROM testTable WHERE intColumn >= 0";
+    String selectiveQuery = "SELECT DISTINCT intColumn FROM testTable WHERE intColumn = 0";
 
     // Without the query option → old DistinctOperator
-    assertFalse(usedInvertedIndex(runDistinct(
-        "SELECT DISTINCT intColumn FROM testTable WHERE intColumn >= 0")));
+    assertFalse(usedInvertedIndex(runDistinct(wideQuery)));
 
     // costRatio=1, wide filter (10K docs): 100*1 <= 10000 → inverted
-    assertTrue(usedInvertedIndex(runDistinct(
-        "SELECT DISTINCT intColumn FROM testTable WHERE intColumn >= 0 "
-            + OPT + ", invertedIndexDistinctCostRatio=1)")));
+    assertTrue(usedInvertedIndex(runDistinct(optWithRatio(1) + wideQuery)));
 
     // costRatio=200, wide filter: 100*200=20000 > 10000 → scan
-    assertFalse(usedInvertedIndex(runDistinct(
-        "SELECT DISTINCT intColumn FROM testTable WHERE intColumn >= 0 "
-            + OPT + ", invertedIndexDistinctCostRatio=200)")));
+    assertFalse(usedInvertedIndex(runDistinct(optWithRatio(200) + wideQuery)));
 
     // costRatio=1, selective filter (100 docs): 100*1 <= 100 → inverted
-    assertTrue(usedInvertedIndex(runDistinct(
-        "SELECT DISTINCT intColumn FROM testTable WHERE intColumn = 0 "
-            + OPT + ", invertedIndexDistinctCostRatio=1)")));
+    assertTrue(usedInvertedIndex(runDistinct(optWithRatio(1) + selectiveQuery)));
 
     // costRatio=2, selective filter: 100*2=200 > 100 → scan
-    assertFalse(usedInvertedIndex(runDistinct(
-        "SELECT DISTINCT intColumn FROM testTable WHERE intColumn = 0 "
-            + OPT + ", invertedIndexDistinctCostRatio=2)")));
+    assertFalse(usedInvertedIndex(runDistinct(optWithRatio(2) + selectiveQuery)));
 
     // costRatio=0: force inverted index for non-empty filters
-    assertTrue(usedInvertedIndex(runDistinct(
-        "SELECT DISTINCT intColumn FROM testTable WHERE intColumn = 0 "
-            + OPT + ", invertedIndexDistinctCostRatio=0)")));
+    assertTrue(usedInvertedIndex(runDistinct(optWithRatio(0) + selectiveQuery)));
 
     // Default costRatio=30: 100*30=3000 <= 10K → inverted
-    assertTrue(usedInvertedIndex(runDistinct(
-        "SELECT DISTINCT intColumn FROM testTable WHERE intColumn >= 0 " + OPT + ")")));
+    assertTrue(usedInvertedIndex(runDistinct(OPT + wideQuery)));
 
     // Boundary: costRatio=100: 100*100=10000 <= 10000 → inverted
-    assertTrue(usedInvertedIndex(runDistinct(
-        "SELECT DISTINCT intColumn FROM testTable WHERE intColumn >= 0 "
-            + OPT + ", invertedIndexDistinctCostRatio=100)")));
+    assertTrue(usedInvertedIndex(runDistinct(optWithRatio(100) + wideQuery)));
 
     // Above boundary: costRatio=101: 100*101=10100 > 10000 → scan
-    assertFalse(usedInvertedIndex(runDistinct(
-        "SELECT DISTINCT intColumn FROM testTable WHERE intColumn >= 0 "
-            + OPT + ", invertedIndexDistinctCostRatio=101)")));
+    assertFalse(usedInvertedIndex(runDistinct(optWithRatio(101) + wideQuery)));
   }
 
   @Test
@@ -445,15 +434,13 @@ public void testInvertedIndexVsScanCorrectness() {
     _activeSegment = _intSegment;
 
     // With ORDER BY
-    BaseOperator<DistinctResultsBlock> invertedOp = getOperator(
-        "SELECT DISTINCT intColumn FROM testTable WHERE intColumn IN "
-            + "(0,1,2,3,4,5,6,7,8,9) ORDER BY intColumn LIMIT 100 " + OPT_INV);
+    String orderByQuery = "SELECT DISTINCT intColumn FROM testTable WHERE intColumn IN (0,1,2,3,4,5,6,7,8,9) "
+        + "ORDER BY intColumn LIMIT 100";
+    BaseOperator<DistinctResultsBlock> invertedOp = getOperator(OPT_INV + orderByQuery);
     DistinctTable invertedTable = invertedOp.nextBlock().getDistinctTable();
     assertTrue(usedInvertedIndex(invertedOp));
 
-    BaseOperator<DistinctResultsBlock> scanOp = getOperator(
-        "SELECT DISTINCT intColumn FROM testTable WHERE intColumn IN "
-            + "(0,1,2,3,4,5,6,7,8,9) ORDER BY intColumn LIMIT 100 " + OPT_SCAN);
+    BaseOperator<DistinctResultsBlock> scanOp = getOperator(OPT_SCAN + orderByQuery);
     DistinctTable scanTable = scanOp.nextBlock().getDistinctTable();
     assertFalse(usedInvertedIndex(scanOp));
 
@@ -465,12 +452,11 @@ public void testInvertedIndexVsScanCorrectness() {
     assertEquals(extractIntValues(scanTable), expected);
 
     // Without ORDER BY — same count
-    BaseOperator<DistinctResultsBlock> inv2 = getOperator(
-        "SELECT DISTINCT intColumn FROM testTable WHERE intColumn >= 0 LIMIT 200 " + OPT_INV);
+    String noOrderByQuery = "SELECT DISTINCT intColumn FROM testTable WHERE intColumn >= 0 LIMIT 200";
+    BaseOperator<DistinctResultsBlock> inv2 = getOperator(OPT_INV + noOrderByQuery);
     assertEquals(inv2.nextBlock().getDistinctTable().size(), INT_NUM_UNIQUE);
 
-    BaseOperator<DistinctResultsBlock> scan2 = getOperator(
-        "SELECT DISTINCT intColumn FROM testTable WHERE intColumn >= 0 LIMIT 200 " + OPT_SCAN);
+    BaseOperator<DistinctResultsBlock> scan2 = getOperator(OPT_SCAN + noOrderByQuery);
     assertEquals(scan2.nextBlock().getDistinctTable().size(), INT_NUM_UNIQUE);
   }
 
@@ -481,7 +467,7 @@ public void testMvColumnWithFilter() {
     _activeSegment = _mvSegment;
 
     BaseOperator<DistinctResultsBlock> op = getOperator(
-        "SELECT DISTINCT mvIntColumn FROM testTable WHERE svFilterColumn < 500 LIMIT 1000 " + OPT_INV);
+        OPT_INV + "SELECT DISTINCT mvIntColumn FROM testTable WHERE svFilterColumn < 500 LIMIT 1000");
     DistinctTable table = op.nextBlock().getDistinctTable();
     assertTrue(usedInvertedIndex(op));
     assertEquals(extractIntValues(table), _filteredMvValues);
@@ -490,16 +476,14 @@ public void testMvColumnWithFilter() {
   @Test
   public void testMvColumnInvertedVsScan() {
     _activeSegment = _mvSegment;
+    String query = "SELECT DISTINCT mvIntColumn FROM testTable WHERE svFilterColumn < 500 "
+        + "ORDER BY mvIntColumn LIMIT 1000";
 
-    BaseOperator<DistinctResultsBlock> invertedOp = getOperator(
-        "SELECT DISTINCT mvIntColumn FROM testTable WHERE svFilterColumn < 500 "
-            + "ORDER BY mvIntColumn LIMIT 1000 " + OPT_INV);
+    BaseOperator<DistinctResultsBlock> invertedOp = getOperator(OPT_INV + query);
     DistinctTable invertedTable = invertedOp.nextBlock().getDistinctTable();
     assertTrue(usedInvertedIndex(invertedOp));
 
-    BaseOperator<DistinctResultsBlock> scanOp = getOperator(
-        "SELECT DISTINCT mvIntColumn FROM testTable WHERE svFilterColumn < 500 "
-            + "ORDER BY mvIntColumn LIMIT 1000 " + OPT_SCAN);
+    BaseOperator<DistinctResultsBlock> scanOp = getOperator(OPT_SCAN + query);
     DistinctTable scanTable = scanOp.nextBlock().getDistinctTable();
     assertFalse(usedInvertedIndex(scanOp));
 
@@ -511,7 +495,7 @@ public void testMvColumnMatchAll() {
     _activeSegment = _mvSegment;
 
     BaseOperator<DistinctResultsBlock> matchAllOp = getOperator(
-        "SELECT DISTINCT mvIntColumn FROM testTable WHERE svFilterColumn >= 0 LIMIT 1000 " + OPT_INV);
+        OPT_INV + "SELECT DISTINCT mvIntColumn FROM testTable WHERE svFilterColumn >= 0 LIMIT 1000");
     DistinctTable matchAllTable = matchAllOp.nextBlock().getDistinctTable();
     assertEquals(extractIntValues(matchAllTable), _allMvValues);
   }
@@ -521,20 +505,18 @@ public void testMvColumnLimit() {
     _activeSegment = _mvSegment;
 
     BaseOperator<DistinctResultsBlock> limitOp = getOperator(
-        "SELECT DISTINCT mvIntColumn FROM testTable WHERE svFilterColumn >= 0 LIMIT 10 " + OPT_INV);
+        OPT_INV + "SELECT DISTINCT mvIntColumn FROM testTable WHERE svFilterColumn >= 0 LIMIT 10");
     assertEquals(limitOp.nextBlock().getDistinctTable().size(), 10);
   }
 
   @Test
   public void testMvColumnOrderByDesc() {
     _activeSegment = _mvSegment;
+    String query = "SELECT DISTINCT mvIntColumn FROM testTable WHERE svFilterColumn < 500 "
+        + "ORDER BY mvIntColumn DESC LIMIT 1000";
 
-    BaseOperator<DistinctResultsBlock> descOp = getOperator(
-        "SELECT DISTINCT mvIntColumn FROM testTable WHERE svFilterColumn < 500 "
-            + "ORDER BY mvIntColumn DESC LIMIT 1000 " + OPT_INV);
-    BaseOperator<DistinctResultsBlock> descScanOp = getOperator(
-        "SELECT DISTINCT mvIntColumn FROM testTable WHERE svFilterColumn < 500 "
-            + "ORDER BY mvIntColumn DESC LIMIT 1000 " + OPT_SCAN);
+    BaseOperator<DistinctResultsBlock> descOp = getOperator(OPT_INV + query);
+    BaseOperator<DistinctResultsBlock> descScanOp = getOperator(OPT_SCAN + query);
     assertEquals(extractOrderedIntValues(descOp.nextBlock().getDistinctTable().toResultTable()),
         extractOrderedIntValues(descScanOp.nextBlock().getDistinctTable().toResultTable()));
   }
@@ -544,8 +526,8 @@ public void testMvColumnSelectiveFilter() {
     _activeSegment = _mvSegment;
 
     BaseOperator<DistinctResultsBlock> selectiveOp = getOperator(
-        "SELECT DISTINCT mvIntColumn FROM testTable WHERE svFilterColumn < 3 "
-            + "ORDER BY mvIntColumn LIMIT 100 " + OPT_INV);
+        OPT_INV + "SELECT DISTINCT mvIntColumn FROM testTable WHERE svFilterColumn < 3 "
+            + "ORDER BY mvIntColumn LIMIT 100");
     assertEquals(extractIntValues(selectiveOp.nextBlock().getDistinctTable()),
         new HashSet<>(Arrays.asList(0, 1, 2, 3)));
   }
@@ -555,7 +537,7 @@ public void testMvColumnEmptyFilter() {
     _activeSegment = _mvSegment;
 
     BaseOperator<DistinctResultsBlock> emptyOp = getOperator(
-        "SELECT DISTINCT mvIntColumn FROM testTable WHERE svFilterColumn > 99999 LIMIT 1000 " + OPT_INV);
+        OPT_INV + "SELECT DISTINCT mvIntColumn FROM testTable WHERE svFilterColumn > 99999 LIMIT 1000");
     assertEquals(emptyOp.nextBlock().getDistinctTable().size(), 0);
   }
 
@@ -567,7 +549,7 @@ public void testSortedColumnPath() {
 
     // Should use sorted index path
     BaseOperator<DistinctResultsBlock> op = getOperator(
-        "SELECT DISTINCT sortedColumn FROM testTable WHERE filterColumn >= 0 LIMIT 1000 " + OPT + ")");
+        OPT + "SELECT DISTINCT sortedColumn FROM testTable WHERE filterColumn >= 0 LIMIT 1000");
     DistinctTable table = op.nextBlock().getDistinctTable();
     assertTrue(usedSortedIndex(op));
     assertEquals(table.size(), SORTED_NUM_UNIQUE);
@@ -579,7 +561,7 @@ public void testSortedColumnFilters() {
 
     // Selective filter: filterColumn < 500 → sorted values 0..4
     BaseOperator<DistinctResultsBlock> selOp = getOperator(
-        "SELECT DISTINCT sortedColumn FROM testTable WHERE filterColumn < 500 LIMIT 1000 " + OPT + ")");
+        OPT + "SELECT DISTINCT sortedColumn FROM testTable WHERE filterColumn < 500 LIMIT 1000");
     Set<Integer> expected = new HashSet<>();
     for (int i = 0; i < 5; i++) {
       expected.add(i);
@@ -588,13 +570,13 @@ public void testSortedColumnFilters() {
 
     // Sparse filter: filterColumn=50 (value 0) OR filterColumn=150 (value 1)
     BaseOperator<DistinctResultsBlock> sparseOp = getOperator(
-        "SELECT DISTINCT sortedColumn FROM testTable WHERE filterColumn = 50 OR filterColumn = 150 "
-            + "LIMIT 1000 " + OPT + ")");
+        OPT + "SELECT DISTINCT sortedColumn FROM testTable WHERE filterColumn = 50 OR filterColumn = 150 "
+            + "LIMIT 1000");
     assertEquals(extractIntValues(sparseOp.nextBlock().getDistinctTable()), Set.of(0, 1));
 
     // Empty filter
     BaseOperator<DistinctResultsBlock> emptyOp = getOperator(
-        "SELECT DISTINCT sortedColumn FROM testTable WHERE filterColumn > 99999 LIMIT 1000 " + OPT + ")");
+        OPT + "SELECT DISTINCT sortedColumn FROM testTable WHERE filterColumn > 99999 LIMIT 1000");
     DistinctTable emptyTable = emptyOp.nextBlock().getDistinctTable();
     assertTrue(usedSortedIndex(emptyOp));
     assertEquals(emptyTable.size(), 0);
@@ -605,18 +587,17 @@ public void testSortedColumnLimit() {
     _activeSegment = _sortedSegment;
 
     BaseOperator<DistinctResultsBlock> limitOp = getOperator(
-        "SELECT DISTINCT sortedColumn FROM testTable WHERE filterColumn >= 0 LIMIT 10 " + OPT + ")");
+        OPT + "SELECT DISTINCT sortedColumn FROM testTable WHERE filterColumn >= 0 LIMIT 10");
     assertEquals(limitOp.nextBlock().getDistinctTable().size(), 10);
   }
 
   @Test
   public void testSortedColumnMatchesScan() {
     _activeSegment = _sortedSegment;
+    String query = "SELECT DISTINCT sortedColumn FROM testTable WHERE filterColumn < 500 LIMIT 1000";
 
-    BaseOperator<DistinctResultsBlock> sortedOp = getOperator(
-        "SELECT DISTINCT sortedColumn FROM testTable WHERE filterColumn < 500 LIMIT 1000 " + OPT + ")");
-    BaseOperator<DistinctResultsBlock> scanOp = getOperator(
-        "SELECT DISTINCT sortedColumn FROM testTable WHERE filterColumn < 500 LIMIT 1000");
+    BaseOperator<DistinctResultsBlock> sortedOp = getOperator(OPT + query);
+    BaseOperator<DistinctResultsBlock> scanOp = getOperator(query);
     assertEquals(extractIntValues(sortedOp.nextBlock().getDistinctTable()),
         extractIntValues(scanOp.nextBlock().getDistinctTable()));
   }
@@ -624,13 +605,11 @@ public void testSortedColumnMatchesScan() {
   @Test
   public void testSortedColumnOrderByDesc() {
     _activeSegment = _sortedSegment;
+    String query = "SELECT DISTINCT sortedColumn FROM testTable WHERE filterColumn < 500 "
+        + "ORDER BY sortedColumn DESC LIMIT 1000";
 
-    BaseOperator<DistinctResultsBlock> descOp = getOperator(
-        "SELECT DISTINCT sortedColumn FROM testTable WHERE filterColumn < 500 "
-            + "ORDER BY sortedColumn DESC LIMIT 1000 " + OPT + ")");
-    BaseOperator<DistinctResultsBlock> descScanOp = getOperator(
-        "SELECT DISTINCT sortedColumn FROM testTable WHERE filterColumn < 500 "
-            + "ORDER BY sortedColumn DESC LIMIT 1000");
+    BaseOperator<DistinctResultsBlock> descOp = getOperator(OPT + query);
+    BaseOperator<DistinctResultsBlock> descScanOp = getOperator(query);
     assertEquals(extractOrderedIntValues(descOp.nextBlock().getDistinctTable().toResultTable()),
         extractOrderedIntValues(descScanOp.nextBlock().getDistinctTable().toResultTable()));
   }
@@ -638,14 +617,12 @@ public void testSortedColumnOrderByDesc() {
   @Test
   public void testSortedColumnOrderByDescWithLimit() {
     _activeSegment = _sortedSegment;
-
     int limit = 5;
-    BaseOperator<DistinctResultsBlock> descLimitOp = getOperator(
-        "SELECT DISTINCT sortedColumn FROM testTable WHERE filterColumn >= 0 "
-            + "ORDER BY sortedColumn DESC LIMIT " + limit + " " + OPT + ")");
-    BaseOperator<DistinctResultsBlock> descLimitScanOp = getOperator(
-        "SELECT DISTINCT sortedColumn FROM testTable WHERE filterColumn >= 0 "
-            + "ORDER BY sortedColumn DESC LIMIT " + limit);
+    String query = "SELECT DISTINCT sortedColumn FROM testTable WHERE filterColumn >= 0 "
+        + "ORDER BY sortedColumn DESC LIMIT " + limit;
+
+    BaseOperator<DistinctResultsBlock> descLimitOp = getOperator(OPT + query);
+    BaseOperator<DistinctResultsBlock> descLimitScanOp = getOperator(query);
     DistinctTable descLimitTable = descLimitOp.nextBlock().getDistinctTable();
     ResultTable descLimitResultTable = descLimitTable.toResultTable();
     assertEquals(extractOrderedIntValues(descLimitResultTable),
@@ -656,17 +633,14 @@ public void testSortedColumnOrderByDescWithLimit() {
   @Test
   public void testMutableSegmentOrderByUsesInvertedIndex() {
     _activeSegment = _mutableSegment;
-    String bitmapCapableAllDocsFilter = "intColumn IN (0,1,2,3,4,5,6,7,8,9)";
+    String query = "SELECT DISTINCT intColumn FROM testTable WHERE intColumn IN (0,1,2,3,4,5,6,7,8,9) "
+        + "ORDER BY intColumn DESC LIMIT 5";
 
-    BaseOperator<DistinctResultsBlock> invertedOp = getOperator(
-        "SELECT DISTINCT intColumn FROM testTable WHERE " + bitmapCapableAllDocsFilter + ' '
-            + "ORDER BY intColumn DESC LIMIT 5 " + OPT_INV);
+    BaseOperator<DistinctResultsBlock> invertedOp = getOperator(OPT_INV + query);
     DistinctTable invertedTable = invertedOp.nextBlock().getDistinctTable();
     assertTrue(usedInvertedIndex(invertedOp));
 
-    BaseOperator<DistinctResultsBlock> scanOp = getOperator(
-        "SELECT DISTINCT intColumn FROM testTable WHERE " + bitmapCapableAllDocsFilter + ' '
-            + "ORDER BY intColumn DESC LIMIT 5 " + OPT_SCAN);
+    BaseOperator<DistinctResultsBlock> scanOp = getOperator(OPT_SCAN + query);
     DistinctTable scanTable = scanOp.nextBlock().getDistinctTable();
     assertFalse(usedInvertedIndex(scanOp));
 
@@ -683,7 +657,7 @@ public void testStringColumnWithFilter() {
     _activeSegment = _stringSegment;
 
     BaseOperator<DistinctResultsBlock> op = getOperator(
-        "SELECT DISTINCT stringColumn FROM testTable WHERE filterColumn < 500 LIMIT 1000 " + OPT_INV);
+        OPT_INV + "SELECT DISTINCT stringColumn FROM testTable WHERE filterColumn < 500 LIMIT 1000");
     DistinctTable table = op.nextBlock().getDistinctTable();
     assertTrue(usedInvertedIndex(op));
     assertEquals(table.size(), STRING_NUM_UNIQUE);
@@ -692,16 +666,14 @@ public void testStringColumnWithFilter() {
   @Test
   public void testStringColumnInvertedVsScan() {
     _activeSegment = _stringSegment;
+    String query = "SELECT DISTINCT stringColumn FROM testTable WHERE filterColumn < 200 "
+        + "ORDER BY stringColumn LIMIT 1000";
 
-    BaseOperator<DistinctResultsBlock> invertedOp = getOperator(
-        "SELECT DISTINCT stringColumn FROM testTable WHERE filterColumn < 200 "
-            + "ORDER BY stringColumn LIMIT 1000 " + OPT_INV);
+    BaseOperator<DistinctResultsBlock> invertedOp = getOperator(OPT_INV + query);
     DistinctTable invertedTable = invertedOp.nextBlock().getDistinctTable();
     assertTrue(usedInvertedIndex(invertedOp));
 
-    BaseOperator<DistinctResultsBlock> scanOp = getOperator(
-        "SELECT DISTINCT stringColumn FROM testTable WHERE filterColumn < 200 "
-            + "ORDER BY stringColumn LIMIT 1000 " + OPT_SCAN);
+    BaseOperator<DistinctResultsBlock> scanOp = getOperator(OPT_SCAN + query);
     assertEquals(extractOrderedStringValues(invertedTable.toResultTable()),
         extractOrderedStringValues(scanOp.nextBlock().getDistinctTable().toResultTable()));
   }
@@ -709,13 +681,11 @@ public void testStringColumnInvertedVsScan() {
   @Test
   public void testStringColumnOrderByDesc() {
     _activeSegment = _stringSegment;
+    String query = "SELECT DISTINCT stringColumn FROM testTable WHERE filterColumn >= 0 "
+        + "ORDER BY stringColumn DESC LIMIT 1000";
 
-    BaseOperator<DistinctResultsBlock> descOp = getOperator(
-        "SELECT DISTINCT stringColumn FROM testTable WHERE filterColumn >= 0 "
-            + "ORDER BY stringColumn DESC LIMIT 1000 " + OPT_INV);
-    BaseOperator<DistinctResultsBlock> descScanOp = getOperator(
-        "SELECT DISTINCT stringColumn FROM testTable WHERE filterColumn >= 0 "
-            + "ORDER BY stringColumn DESC LIMIT 1000 " + OPT_SCAN);
+    BaseOperator<DistinctResultsBlock> descOp = getOperator(OPT_INV + query);
+    BaseOperator<DistinctResultsBlock> descScanOp = getOperator(OPT_SCAN + query);
     assertEquals(extractOrderedStringValues(descOp.nextBlock().getDistinctTable().toResultTable()),
         extractOrderedStringValues(descScanOp.nextBlock().getDistinctTable().toResultTable()));
   }
@@ -723,13 +693,11 @@ public void testStringColumnOrderByDesc() {
   @Test
   public void testStringColumnOrderByDescWithLimit() {
     _activeSegment = _stringSegment;
+    String query = "SELECT DISTINCT stringColumn FROM testTable WHERE filterColumn >= 0 "
+        + "ORDER BY stringColumn DESC LIMIT 5";
 
-    BaseOperator<DistinctResultsBlock> descLimitOp = getOperator(
-        "SELECT DISTINCT stringColumn FROM testTable WHERE filterColumn >= 0 "
-            + "ORDER BY stringColumn DESC LIMIT 5 " + OPT_INV);
-    BaseOperator<DistinctResultsBlock> descLimitScanOp = getOperator(
-        "SELECT DISTINCT stringColumn FROM testTable WHERE filterColumn >= 0 "
-            + "ORDER BY stringColumn DESC LIMIT 5");
+    BaseOperator<DistinctResultsBlock> descLimitOp = getOperator(OPT_INV + query);
+    BaseOperator<DistinctResultsBlock> descLimitScanOp = getOperator(query);
     DistinctTable descLimitStrTable = descLimitOp.nextBlock().getDistinctTable();
     ResultTable descLimitResultTable = descLimitStrTable.toResultTable();
     assertEquals(extractOrderedStringValues(descLimitResultTable),
@@ -742,20 +710,18 @@ public void testStringColumnEmptyFilter() {
     _activeSegment = _stringSegment;
 
     BaseOperator<DistinctResultsBlock> emptyOp = getOperator(
-        "SELECT DISTINCT stringColumn FROM testTable WHERE filterColumn > 99999 LIMIT 1000 " + OPT_INV);
+        OPT_INV + "SELECT DISTINCT stringColumn FROM testTable WHERE filterColumn > 99999 LIMIT 1000");
     assertEquals(emptyOp.nextBlock().getDistinctTable().size(), 0);
   }
 
   @Test
   public void testStringColumnSelectiveFilter() {
     _activeSegment = _stringSegment;
+    String query = "SELECT DISTINCT stringColumn FROM testTable WHERE filterColumn < 100 "
+        + "ORDER BY stringColumn LIMIT 100";
 
-    BaseOperator<DistinctResultsBlock> selectiveOp = getOperator(
-        "SELECT DISTINCT stringColumn FROM testTable WHERE filterColumn < 100 "
-            + "ORDER BY stringColumn LIMIT 100 " + OPT_INV);
-    BaseOperator<DistinctResultsBlock> selectiveScanOp = getOperator(
-        "SELECT DISTINCT stringColumn FROM testTable WHERE filterColumn < 100 "
-            + "ORDER BY stringColumn LIMIT 100 " + OPT_SCAN);
+    BaseOperator<DistinctResultsBlock> selectiveOp = getOperator(OPT_INV + query);
+    BaseOperator<DistinctResultsBlock> selectiveScanOp = getOperator(OPT_SCAN + query);
     assertEquals(extractOrderedStringValues(selectiveOp.nextBlock().getDistinctTable().toResultTable()),
         extractOrderedStringValues(selectiveScanOp.nextBlock().getDistinctTable().toResultTable()));
   }
@@ -767,8 +733,7 @@ public void testNullIncludedWithWideFilter() {
     _activeSegment = _nullSegment;
 
     BaseOperator<DistinctResultsBlock> op = getOperator(
-        "SELECT DISTINCT intColumn FROM testTable WHERE filterColumn >= 0 LIMIT 1000 "
-            + OPT + ", invertedIndexDistinctCostRatio=1, enableNullHandling=true)");
+        OPT_INV_NULLS + "SELECT DISTINCT intColumn FROM testTable WHERE filterColumn >= 0 LIMIT 1000");
     DistinctTable table = op.nextBlock().getDistinctTable();
     assertTrue(usedInvertedIndex(op));
     assertEquals(table.size(), NULL_NUM_UNIQUE + 1);
@@ -781,8 +746,7 @@ public void testNullExcludedWithSelectiveFilter() {
 
     // filterColumn < 500 → docs 0-499, all non-null
     BaseOperator<DistinctResultsBlock> op = getOperator(
-        "SELECT DISTINCT intColumn FROM testTable WHERE filterColumn < 500 LIMIT 1000 "
-            + OPT + ", invertedIndexDistinctCostRatio=1, enableNullHandling=true)");
+        OPT_INV_NULLS + "SELECT DISTINCT intColumn FROM testTable WHERE filterColumn < 500 LIMIT 1000");
     DistinctTable table = op.nextBlock().getDistinctTable();
     assertTrue(usedInvertedIndex(op));
     assertFalse(containsNull(table));
@@ -795,8 +759,7 @@ public void testNullWithPartialFilter() {
 
     // filterColumn >= 940 → docs 940-999 (10 non-null + 50 null)
     BaseOperator<DistinctResultsBlock> op = getOperator(
-        "SELECT DISTINCT intColumn FROM testTable WHERE filterColumn >= 940 LIMIT 1000 "
-            + OPT + ", invertedIndexDistinctCostRatio=1, enableNullHandling=true)");
+        OPT_INV_NULLS + "SELECT DISTINCT intColumn FROM testTable WHERE filterColumn >= 940 LIMIT 1000");
     DistinctTable table = op.nextBlock().getDistinctTable();
     assertTrue(usedInvertedIndex(op));
     assertTrue(containsNull(table));
@@ -808,9 +771,8 @@ public void testNullHandlingOrderBy() {
     _activeSegment = _nullSegment;
 
     BaseOperator<DistinctResultsBlock> orderOp = getOperator(
-        "SELECT DISTINCT intColumn FROM testTable WHERE filterColumn >= 0 "
-            + "ORDER BY intColumn LIMIT 1000 "
-            + OPT + ", invertedIndexDistinctCostRatio=1, enableNullHandling=true)");
+        OPT_INV_NULLS + "SELECT DISTINCT intColumn FROM testTable WHERE filterColumn >= 0 "
+            + "ORDER BY intColumn LIMIT 1000");
     ResultTable resultTable = orderOp.nextBlock().getDistinctTable().toResultTable();
     assertEquals(resultTable.getRows().size(), NULL_NUM_UNIQUE + 1);
     assertEquals(resultTable.getRows().get(0)[0], 0);
@@ -822,9 +784,8 @@ public void testNullHandlingOrderByNullsFirstLimit() {
     _activeSegment = _nullSegment;
 
     BaseOperator<DistinctResultsBlock> limitOp = getOperator(
-        "SELECT DISTINCT intColumn FROM testTable WHERE filterColumn >= 0 "
-            + "ORDER BY intColumn NULLS FIRST LIMIT 10 "
-            + OPT + ", invertedIndexDistinctCostRatio=1, enableNullHandling=true)");
+        OPT_INV_NULLS + "SELECT DISTINCT intColumn FROM testTable WHERE filterColumn >= 0 "
+            + "ORDER BY intColumn NULLS FIRST LIMIT 10");
     ResultTable resultTable = limitOp.nextBlock().getDistinctTable().toResultTable();
     assertEquals(resultTable.getRows().size(), 10);
     assertNull(resultTable.getRows().get(0)[0]);
@@ -836,9 +797,8 @@ public void testNullHandlingOrderByNullsLastLimit() {
     _activeSegment = _nullSegment;
 
     BaseOperator<DistinctResultsBlock> limitOp = getOperator(
-        "SELECT DISTINCT intColumn FROM testTable WHERE filterColumn >= 0 "
-            + "ORDER BY intColumn LIMIT 10 "
-            + OPT + ", invertedIndexDistinctCostRatio=1, enableNullHandling=true)");
+        OPT_INV_NULLS + "SELECT DISTINCT intColumn FROM testTable WHERE filterColumn >= 0 "
+            + "ORDER BY intColumn LIMIT 10");
     ResultTable resultTable = limitOp.nextBlock().getDistinctTable().toResultTable();
     assertEquals(resultTable.getRows().size(), 10);
     assertFalse(containsNull(resultTable));
@@ -851,9 +811,8 @@ public void testNullHandlingOrderByDescNullsLastLimit() {
     _activeSegment = _nullSegment;
 
     BaseOperator<DistinctResultsBlock> limitOp = getOperator(
-        "SELECT DISTINCT intColumn FROM testTable WHERE filterColumn >= 0 "
-            + "ORDER BY intColumn DESC NULLS LAST LIMIT 10 "
-            + OPT + ", invertedIndexDistinctCostRatio=1, enableNullHandling=true)");
+        OPT_INV_NULLS + "SELECT DISTINCT intColumn FROM testTable WHERE filterColumn >= 0 "
+            + "ORDER BY intColumn DESC NULLS LAST LIMIT 10");
     ResultTable resultTable = limitOp.nextBlock().getDistinctTable().toResultTable();
     assertEquals(resultTable.getRows().size(), 10);
     assertFalse(containsNull(resultTable));
@@ -864,18 +823,14 @@ public void testNullHandlingOrderByDescNullsLastLimit() {
   @Test
   public void testNullHandlingInvertedVsScan() {
     _activeSegment = _nullSegment;
+    String query = "SELECT DISTINCT intColumn FROM testTable WHERE filterColumn >= 940 "
+        + "ORDER BY intColumn LIMIT 1000";
 
-    BaseOperator<DistinctResultsBlock> invertedOp = getOperator(
-        "SELECT DISTINCT intColumn FROM testTable WHERE filterColumn >= 940 "
-            + "ORDER BY intColumn LIMIT 1000 "
-            + OPT + ", invertedIndexDistinctCostRatio=1, enableNullHandling=true)");
+    BaseOperator<DistinctResultsBlock> invertedOp = getOperator(OPT_INV_NULLS + query);
     ResultTable invertedResultTable = invertedOp.nextBlock().getDistinctTable().toResultTable();
     assertTrue(usedInvertedIndex(invertedOp));
 
-    BaseOperator<DistinctResultsBlock> scanOp = getOperator(
-        "SELECT DISTINCT intColumn FROM testTable WHERE filterColumn >= 940 "
-            + "ORDER BY intColumn LIMIT 1000 "
-            + OPT + ", invertedIndexDistinctCostRatio=100000, enableNullHandling=true)");
+    BaseOperator<DistinctResultsBlock> scanOp = getOperator(OPT_SCAN_NULLS + query);
     ResultTable scanResultTable = scanOp.nextBlock().getDistinctTable().toResultTable();
     assertFalse(usedInvertedIndex(scanOp));
 
@@ -888,8 +843,7 @@ public void testNullPreservedInBrokerResultWithoutOrderByLimit() {
     _activeSegment = _nullSegment;
 
     BaseOperator<DistinctResultsBlock> op = getOperator(
-        "SELECT DISTINCT intColumn FROM testTable WHERE filterColumn >= 940 LIMIT 10 "
-            + OPT + ", invertedIndexDistinctCostRatio=1, enableNullHandling=true)");
+        OPT_INV_NULLS + "SELECT DISTINCT intColumn FROM testTable WHERE filterColumn >= 940 LIMIT 10");
     DistinctTable table = op.nextBlock().getDistinctTable();
     assertTrue(usedInvertedIndex(op));
 
@@ -911,11 +865,58 @@ public void testPlaceholderWithoutNullHandling() {
     _activeSegment = _nullSegment;
 
     BaseOperator<DistinctResultsBlock> op = getOperator(
-        "SELECT DISTINCT intColumn FROM testTable WHERE filterColumn >= 0 LIMIT 1000 " + OPT_INV);
+        OPT_INV + "SELECT DISTINCT intColumn FROM testTable WHERE filterColumn >= 0 LIMIT 1000");
     DistinctTable table = op.nextBlock().getDistinctTable();
     assertTrue(usedInvertedIndex(op));
     assertFalse(containsNull(table));
     // Without null handling: 50 real values + Integer.MIN_VALUE placeholder = 51
     assertEquals(table.size(), NULL_NUM_UNIQUE + 1);
   }
+
+  // ==================== Execution Statistics ====================
+
+  /// Inverted-index path emits `numDocsScanned` for matching docs and `numEntriesScannedPostFilter` for dictionary
+  /// entries examined; scan fallback emits the same `numDocsScanned` but reports it again as
+  /// `numEntriesScannedPostFilter` since the scan visits every matching doc. Both paths agree on total docs and on
+  /// the matching doc count for the same filter.
+  @Test
+  public void testExecutionStatistics() {
+    _activeSegment = _intSegment;
+
+    // IN-list filter matches 10 values × 100 records = 1000 docs.
+    String inListQuery =
+        "SELECT DISTINCT intColumn FROM testTable WHERE intColumn IN (0,1,2,3,4,5,6,7,8,9) LIMIT 100";
+    BaseOperator<DistinctResultsBlock> invertedOp = getOperator(OPT_INV + inListQuery);
+    invertedOp.nextBlock();
+    assertTrue(usedInvertedIndex(invertedOp));
+    ExecutionStatistics invertedStats = invertedOp.getExecutionStatistics();
+    assertEquals(invertedStats.getNumDocsScanned(), 1000);
+    assertEquals(invertedStats.getNumTotalDocs(), INT_NUM_RECORDS);
+    assertTrue(invertedStats.getNumEntriesScannedPostFilter() > 0);
+
+    BaseOperator<DistinctResultsBlock> scanOp = getOperator(OPT_SCAN + inListQuery);
+    scanOp.nextBlock();
+    assertFalse(usedInvertedIndex(scanOp));
+    ExecutionStatistics scanStats = scanOp.getExecutionStatistics();
+    assertEquals(scanStats.getNumDocsScanned(), 1000);
+    assertEquals(scanStats.getNumTotalDocs(), INT_NUM_RECORDS);
+    assertEquals(scanStats.getNumEntriesScannedPostFilter(), 1000);
+
+    // Empty filter: no docs scanned, no entries examined.
+    String emptyQuery = "SELECT DISTINCT intColumn FROM testTable WHERE intColumn = -1 LIMIT 100";
+    BaseOperator<DistinctResultsBlock> emptyOp = getOperator(OPT_INV + emptyQuery);
+    emptyOp.nextBlock();
+    ExecutionStatistics emptyStats = emptyOp.getExecutionStatistics();
+    assertEquals(emptyStats.getNumDocsScanned(), 0);
+    assertEquals(emptyStats.getNumEntriesScannedPostFilter(), 0);
+    assertEquals(emptyStats.getNumTotalDocs(), INT_NUM_RECORDS);
+
+    // Wide filter matching all docs.
+    String wideQuery = "SELECT DISTINCT intColumn FROM testTable WHERE intColumn >= 0 LIMIT 1000";
+    BaseOperator<DistinctResultsBlock> wideOp = getOperator(OPT_INV + wideQuery);
+    wideOp.nextBlock();
+    ExecutionStatistics wideStats = wideOp.getExecutionStatistics();
+    assertEquals(wideStats.getNumDocsScanned(), INT_NUM_RECORDS);
+    assertEquals(wideStats.getNumTotalDocs(), INT_NUM_RECORDS);
+  }
 }
diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/JsonIndexDistinctOperatorQueriesTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/JsonIndexDistinctOperatorQueriesTest.java
new file mode 100644
index 000000000000..2fd35218c4b8
--- /dev/null
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/JsonIndexDistinctOperatorQueriesTest.java
@@ -0,0 +1,447 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.queries;
+
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import java.io.File;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.common.response.broker.BrokerResponseNative;
+import org.apache.pinot.common.response.broker.ResultTable;
+import org.apache.pinot.core.operator.BaseOperator;
+import org.apache.pinot.core.operator.ExecutionStatistics;
+import org.apache.pinot.core.operator.blocks.results.DistinctResultsBlock;
+import org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader;
+import org.apache.pinot.segment.local.segment.creator.impl.SegmentIndexCreationDriverImpl;
+import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig;
+import org.apache.pinot.segment.local.segment.readers.GenericRowRecordReader;
+import org.apache.pinot.segment.spi.IndexSegment;
+import org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig;
+import org.apache.pinot.spi.config.table.FieldConfig;
+import org.apache.pinot.spi.config.table.JsonIndexConfig;
+import org.apache.pinot.spi.config.table.TableConfig;
+import org.apache.pinot.spi.config.table.TableType;
+import org.apache.pinot.spi.data.FieldSpec.DataType;
+import org.apache.pinot.spi.data.Schema;
+import org.apache.pinot.spi.data.readers.GenericRow;
+import org.apache.pinot.spi.utils.JsonUtils;
+import org.apache.pinot.spi.utils.builder.TableConfigBuilder;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+import static org.testng.Assert.expectThrows;
+
+
+/// Queries tests for [org.apache.pinot.core.operator.query.JsonIndexDistinctOperator] against the JSON index.
+///
+/// Two segments cover the operator's behavior:
+/// - **Full segment**: 500 docs, every doc has both `$.k1` and `$.k2`; `filterCol` is nullable in the last 100 docs.
+///   Used for path parity, base-column-filter null handling, same/cross-path `JSON_MATCH` filters, the 5-arg
+///   `jsonFilterExpression`, the `jsonIndexDistinctSkipMissingPath` query option, execution-statistics shape, and
+///   the construction-time validation throw for invalid 4-arg defaults.
+/// - **Sparse segment**: 20 docs, only the first 10 have `$.k1`. Used for the 4-arg-default, the same-path `IS NULL`
+///   filter that triggers the default, the 3-arg "Illegal Json Path" throw, and the skip-option's suppression of
+///   that throw.
+///
+/// Composite-only behavior (selective `invertedIndexConfigs`, `jsonExtractScalar` fallback on non-indexed paths)
+/// is covered separately in `ai.startree.integration.tests.JsonIndexDistinctOperatorCompositeSharedClusterTest`.
+public class JsonIndexDistinctOperatorQueriesTest extends BaseQueriesTest {
+  private static final File INDEX_DIR =
+      new File(FileUtils.getTempDirectory(), "JsonIndexDistinctOperatorQueriesTest");
+  private static final String RAW_TABLE_NAME = "testTable";
+  private static final String JSON_COL = "jsonCol";
+  private static final String FILTER_COL = "filterCol";
+
+  private static final int FULL_NUM_DOCS = 500;
+  private static final int FULL_NUM_DISTINCT_K1 = 50;
+  private static final int FULL_NUM_NON_NULL_FILTER = 400;
+
+  private static final int SPARSE_NUM_DOCS = 20;
+  private static final int SPARSE_NUM_WITH_K1 = 10;
+
+  private static final String OPT_USE_INDEX = "SET useIndexBasedDistinctOperator=true; ";
+  private static final String OPT_NULLS = "SET enableNullHandling=true; ";
+  private static final String OPT_USE_INDEX_NULLS = OPT_USE_INDEX + OPT_NULLS;
+  private static final String OPT_USE_INDEX_SKIP_MISSING_PATH =
+      OPT_USE_INDEX + "SET jsonIndexDistinctSkipMissingPath=true; ";
+
+  private IndexSegment _fullSegment;
+  private IndexSegment _sparseSegment;
+  private IndexSegment _activeSegment;
+
+  @Override
+  protected String getFilter() {
+    return "";
+  }
+
+  @Override
+  protected IndexSegment getIndexSegment() {
+    return _activeSegment;
+  }
+
+  @Override
+  protected List<IndexSegment> getIndexSegments() {
+    return List.of(_activeSegment, _activeSegment);
+  }
+
+  @BeforeClass
+  public void setUp()
+      throws Exception {
+    FileUtils.deleteDirectory(INDEX_DIR);
+    _fullSegment = buildFullSegment();
+    _sparseSegment = buildSparseSegment();
+  }
+
+  @AfterClass
+  public void tearDown() {
+    if (_fullSegment != null) {
+      _fullSegment.destroy();
+    }
+    if (_sparseSegment != null) {
+      _sparseSegment.destroy();
+    }
+    FileUtils.deleteQuietly(INDEX_DIR);
+  }
+
+  private IndexSegment buildFullSegment()
+      throws Exception {
+    Schema schema = new Schema.SchemaBuilder().setSchemaName(RAW_TABLE_NAME)
+        .addSingleValueDimension(JSON_COL, DataType.STRING)
+        .addSingleValueDimension(FILTER_COL, DataType.INT)
+        .build();
+
+    List<GenericRow> records = new ArrayList<>(FULL_NUM_DOCS);
+    for (int i = 0; i < FULL_NUM_DOCS; i++) {
+      Map<String, String> json = Map.of(
+          "k1", "value-k1-" + (i % FULL_NUM_DISTINCT_K1),
+          "k2", "value-k2-" + i
+      );
+      GenericRow record = new GenericRow();
+      record.putValue(JSON_COL, JsonUtils.objectToString(json));
+      record.putValue(FILTER_COL, i < FULL_NUM_NON_NULL_FILTER ? i : null);
+      records.add(record);
+    }
+
+    TableConfig tableConfig = createTableConfig(true);
+    return buildSegment("fullSegment", schema, tableConfig, records, true);
+  }
+
+  private IndexSegment buildSparseSegment()
+      throws Exception {
+    Schema schema = new Schema.SchemaBuilder().setSchemaName(RAW_TABLE_NAME)
+        .addSingleValueDimension(JSON_COL, DataType.STRING)
+        .build();
+
+    List<GenericRow> records = new ArrayList<>(SPARSE_NUM_DOCS);
+    for (int i = 0; i < SPARSE_NUM_DOCS; i++) {
+      Map<String, String> json = new HashMap<>();
+      if (i < SPARSE_NUM_WITH_K1) {
+        json.put("k1", "k1-" + i);
+      }
+      json.put("k2", "k2-" + i);
+      GenericRow record = new GenericRow();
+      record.putValue(JSON_COL, JsonUtils.objectToString(json));
+      records.add(record);
+    }
+
+    TableConfig tableConfig = createTableConfig(false);
+    return buildSegment("sparseSegment", schema, tableConfig, records, false);
+  }
+
+  private TableConfig createTableConfig(boolean withFilterCol) {
+    ObjectNode indexes = JsonUtils.newObjectNode();
+    indexes.set("json", new JsonIndexConfig().toJsonNode());
+    TableConfigBuilder builder = new TableConfigBuilder(TableType.OFFLINE).setTableName(RAW_TABLE_NAME)
+        .addFieldConfig(new FieldConfig.Builder(JSON_COL)
+            .withEncodingType(FieldConfig.EncodingType.RAW)
+            .withIndexes(indexes)
+            .build());
+    if (withFilterCol) {
+      builder.setNullHandlingEnabled(true);
+    }
+    return builder.build();
+  }
+
+  private IndexSegment buildSegment(String segmentName, Schema schema, TableConfig tableConfig,
+      List<GenericRow> records, boolean defaultNullHandling)
+      throws Exception {
+    File segmentDir = new File(INDEX_DIR, segmentName + "_dir");
+    SegmentGeneratorConfig config = new SegmentGeneratorConfig(tableConfig, schema);
+    config.setTableName(RAW_TABLE_NAME);
+    config.setSegmentName(segmentName);
+    config.setOutDir(segmentDir.getAbsolutePath());
+    config.setDefaultNullHandlingEnabled(defaultNullHandling);
+
+    SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
+    driver.init(config, new GenericRowRecordReader(records));
+    driver.build();
+
+    return ImmutableSegmentLoader.load(new File(segmentDir, segmentName), new IndexLoadingConfig(tableConfig, schema));
+  }
+
+  /// With the OSS JSON index `$.k1` is indexed, so `useIndexBasedDistinctOperator=true` must route through
+  /// [JsonIndexDistinctOperator] and produce the same set of distinct values as the scan-based baseline.
+  @Test
+  public void testIndexedPathParity() {
+    _activeSegment = _fullSegment;
+    String selectExpr = "jsonExtractIndex(jsonCol, '$.k1', 'STRING')";
+    String baselineQuery = "SELECT DISTINCT " + selectExpr + " FROM testTable ORDER BY " + selectExpr + " LIMIT 10000";
+    String optimizedQuery = OPT_USE_INDEX + baselineQuery;
+
+    BaseOperator<DistinctResultsBlock> optimizedOp = getOperator(optimizedQuery);
+    assertTrue(optimizedOp.toExplainString().contains("DISTINCT_JSON_INDEX"));
+
+    BrokerResponseNative baseline = getBrokerResponse(baselineQuery);
+    BrokerResponseNative optimized = getBrokerResponse(optimizedQuery);
+    assertEquals(extractStringValues(optimized.getResultTable()), extractStringValues(baseline.getResultTable()));
+    assertEquals(optimized.getResultTable().getRows().size(), FULL_NUM_DISTINCT_K1);
+  }
+
+  /// A nullable filter column drives the doc-id set into [JsonIndexDistinctOperator]. With null handling disabled,
+  /// `filterCol < N` still excludes nulls naturally; with `enableNullHandling=true`, `filterCol IS NOT NULL` must
+  /// produce the same distinct values as the scan-based baseline. Regression coverage for the prior bug where the
+  /// operator did not honor null exclusion delivered through a base-column filter.
+  @Test
+  public void testNullHandlingOnSeparateFilterColumn() {
+    _activeSegment = _fullSegment;
+    String selectExpr = "jsonExtractIndex(jsonCol, '$.k1', 'STRING')";
+
+    String rangeBaselineQuery =
+        "SELECT DISTINCT " + selectExpr + " FROM testTable WHERE filterCol < " + FULL_NUM_NON_NULL_FILTER + " "
+            + "ORDER BY " + selectExpr + " LIMIT 10000";
+    String rangeOptimizedQuery = OPT_USE_INDEX + rangeBaselineQuery;
+    BaseOperator<DistinctResultsBlock> rangeOptimizedOp = getOperator(rangeOptimizedQuery);
+    assertTrue(rangeOptimizedOp.toExplainString().contains("DISTINCT_JSON_INDEX"));
+    BrokerResponseNative rangeBaseline = getBrokerResponse(rangeBaselineQuery);
+    BrokerResponseNative rangeOptimized = getBrokerResponse(rangeOptimizedQuery);
+    assertEquals(extractStringValues(rangeOptimized.getResultTable()),
+        extractStringValues(rangeBaseline.getResultTable()));
+    assertFalse(containsNull(rangeOptimized.getResultTable()));
+
+    String isNotNullBody =
+        "SELECT DISTINCT " + selectExpr + " FROM testTable WHERE filterCol IS NOT NULL "
+            + "ORDER BY " + selectExpr + " LIMIT 10000";
+    String isNotNullBaselineQuery = OPT_NULLS + isNotNullBody;
+    String isNotNullOptimizedQuery = OPT_USE_INDEX_NULLS + isNotNullBody;
+    BaseOperator<DistinctResultsBlock> isNotNullOptimizedOp = getOperator(isNotNullOptimizedQuery);
+    assertTrue(isNotNullOptimizedOp.toExplainString().contains("DISTINCT_JSON_INDEX"));
+    BrokerResponseNative isNotNullBaseline = getBrokerResponse(isNotNullBaselineQuery);
+    BrokerResponseNative isNotNullOptimized = getBrokerResponse(isNotNullOptimizedQuery);
+    assertEquals(extractStringValues(isNotNullOptimized.getResultTable()),
+        extractStringValues(isNotNullBaseline.getResultTable()));
+    assertFalse(containsNull(isNotNullOptimized.getResultTable()));
+  }
+
+  /// Same-path `JSON_MATCH` on the indexed column means the filter resolves entirely inside the JSON index, so the
+  /// distinct set must contain only the values that satisfy the predicate. With `value-k1-0` selected, the result
+  /// is a single distinct value.
+  @Test
+  public void testSamePathJsonMatchFilter() {
+    _activeSegment = _fullSegment;
+    String query = OPT_USE_INDEX + "SELECT DISTINCT jsonExtractIndex(jsonCol, '$.k1', 'STRING') FROM testTable "
+        + "WHERE JSON_MATCH(jsonCol, '\"$.k1\" = ''value-k1-0''') LIMIT 100";
+    BaseOperator<DistinctResultsBlock> op = getOperator(query);
+    assertTrue(op.toExplainString().contains("DISTINCT_JSON_INDEX"));
+    assertEquals(extractStringValues(getBrokerResponse(query).getResultTable()), Set.of("value-k1-0"));
+  }
+
+  /// Cross-path `JSON_MATCH` (filter on `$.k2`, select distinct of `$.k1`) intersects the per-value doc ids from the
+  /// `$.k1` JSON-index lookup with the doc set produced by the `WHERE`-clause filter on `$.k2`, returning only the
+  /// `$.k1` values for docs whose `$.k2` matches.
+  @Test
+  public void testCrossPathJsonMatchFilter() {
+    _activeSegment = _fullSegment;
+    // `$.k2` = `value-k2-7` matches a single doc; that doc's `$.k1` is `value-k1-(7 % 50)` = `value-k1-7`.
+    String query = OPT_USE_INDEX + "SELECT DISTINCT jsonExtractIndex(jsonCol, '$.k1', 'STRING') FROM testTable "
+        + "WHERE JSON_MATCH(jsonCol, '\"$.k2\" = ''value-k2-7''') LIMIT 100";
+    BaseOperator<DistinctResultsBlock> op = getOperator(query);
+    assertTrue(op.toExplainString().contains("DISTINCT_JSON_INDEX"));
+    assertEquals(extractStringValues(getBrokerResponse(query).getResultTable()), Set.of("value-k1-7"));
+  }
+
+  /// A 4-arg `jsonExtractIndex` whose default literal cannot be parsed into the requested type still routes through
+  /// the JSON-index operator (planner-time `canUseJsonIndexDistinct` only checks the function name); the operator's
+  /// constructor surfaces the validation failure as `IllegalArgumentException`.
+  @Test
+  public void testInvalidDefaultArgThrowsAtConstruction() {
+    _activeSegment = _fullSegment;
+    String query = OPT_USE_INDEX
+        + "SELECT DISTINCT jsonExtractIndex(jsonCol, '$.k1', 'INT', 'abc') FROM testTable LIMIT 100";
+    IllegalArgumentException exception = expectThrows(IllegalArgumentException.class, () -> getOperator(query));
+    assertTrue(exception.getMessage().contains("Default value"));
+  }
+
+  /// With a 4-arg `jsonExtractIndex(..., defaultValue)` and docs that don't have the path, the default value must be
+  /// added once to the distinct set. The sparse segment has 10 docs with `$.k1` and 10 without, so the result is the
+  /// 10 distinct `$.k1` values plus the literal `missing`.
+  @Test
+  public void testFourArgDefaultForDocsWithoutPath() {
+    _activeSegment = _sparseSegment;
+    String query = OPT_USE_INDEX
+        + "SELECT DISTINCT jsonExtractIndex(jsonCol, '$.k1', 'STRING', 'missing') FROM testTable LIMIT 100";
+    BaseOperator<DistinctResultsBlock> op = getOperator(query);
+    assertTrue(op.toExplainString().contains("DISTINCT_JSON_INDEX"));
+
+    Set<String> expected = new HashSet<>();
+    for (int i = 0; i < SPARSE_NUM_WITH_K1; i++) {
+      expected.add("k1-" + i);
+    }
+    expected.add("missing");
+    assertEquals(extractStringValues(getBrokerResponse(query).getResultTable()), expected);
+  }
+
+  /// `"$.k1" IS NULL` selects only docs missing the path. None of the values returned by the JSON-index lookup
+  /// intersect the filtered doc set, so `handleMissingDocs` adds the 4-arg default for the unmatched docs. Result
+  /// is the default alone.
+  @Test
+  public void testSamePathIsNullFilterWithDefault() {
+    _activeSegment = _sparseSegment;
+    String query = OPT_USE_INDEX
+        + "SELECT DISTINCT jsonExtractIndex(jsonCol, '$.k1', 'STRING', 'missing') FROM testTable "
+        + "WHERE JSON_MATCH(jsonCol, '\"$.k1\" IS NULL') LIMIT 100";
+    BaseOperator<DistinctResultsBlock> op = getOperator(query);
+    assertTrue(op.toExplainString().contains("DISTINCT_JSON_INDEX"));
+    assertEquals(extractStringValues(getBrokerResponse(query).getResultTable()), Set.of("missing"));
+  }
+
+  /// 3-arg `jsonExtractIndex` (no default) over a segment where some docs miss the path must throw
+  /// `Illegal Json Path` once `handleMissingDocs` is reached.
+  @Test
+  public void testMissingPathThrowsWithoutDefault() {
+    _activeSegment = _sparseSegment;
+    String query = OPT_USE_INDEX
+        + "SELECT DISTINCT jsonExtractIndex(jsonCol, '$.k1', 'STRING') FROM testTable LIMIT 100";
+    BaseOperator<DistinctResultsBlock> op = getOperator(query);
+    assertTrue(op.toExplainString().contains("DISTINCT_JSON_INDEX"));
+    RuntimeException exception = expectThrows(RuntimeException.class, op::nextBlock);
+    assertTrue(exception.getMessage().contains("Illegal Json Path"));
+  }
+
+  /// `numDocsScanned` reports the count of matching docs (either the filter bitmap's cardinality, or `_totalDocs` when
+  /// the filter is MatchAll). `numEntriesScannedPostFilter` is the count of distinct JSON-index values examined.
+  /// `numEntriesScannedInFilter` is reported by the underlying filter operator (0 for MatchAll, positive when the
+  /// filter materializes a bitmap).
+  @Test
+  public void testExecutionStatistics() {
+    _activeSegment = _fullSegment;
+
+    // Unfiltered: filter is MatchAll → numDocsScanned == _totalDocs, numEntriesScannedInFilter == 0, every distinct
+    // k1 value is examined post-filter.
+    String unfilteredQuery =
+        OPT_USE_INDEX + "SELECT DISTINCT jsonExtractIndex(jsonCol, '$.k1', 'STRING') FROM testTable LIMIT 10000";
+    BaseOperator<DistinctResultsBlock> unfilteredOp = getOperator(unfilteredQuery);
+    unfilteredOp.nextBlock();
+    ExecutionStatistics unfilteredStats = unfilteredOp.getExecutionStatistics();
+    assertEquals(unfilteredStats.getNumDocsScanned(), FULL_NUM_DOCS);
+    assertEquals(unfilteredStats.getNumEntriesScannedInFilter(), 0);
+    assertEquals(unfilteredStats.getNumEntriesScannedPostFilter(), FULL_NUM_DISTINCT_K1);
+    assertEquals(unfilteredStats.getNumTotalDocs(), FULL_NUM_DOCS);
+
+    // Base-column filter: numDocsScanned == cardinality of the filter bitmap. numEntriesScannedInFilter > 0 since the
+    // scan-based filter materializes its bitmap by visiting docs. `filterCol >= 0` excludes the Integer.MIN_VALUE
+    // null-placeholder docs (since null handling is not enabled here), leaving exactly the FULL_NUM_NON_NULL_FILTER
+    // non-null docs.
+    String filteredQuery = OPT_USE_INDEX + "SELECT DISTINCT jsonExtractIndex(jsonCol, '$.k1', 'STRING') FROM testTable "
+        + "WHERE filterCol >= 0 LIMIT 10000";
+    BaseOperator<DistinctResultsBlock> filteredOp = getOperator(filteredQuery);
+    filteredOp.nextBlock();
+    ExecutionStatistics filteredStats = filteredOp.getExecutionStatistics();
+    assertEquals(filteredStats.getNumDocsScanned(), FULL_NUM_NON_NULL_FILTER);
+    assertEquals(filteredStats.getNumEntriesScannedInFilter(), FULL_NUM_DOCS);
+    assertEquals(filteredStats.getNumEntriesScannedPostFilter(), FULL_NUM_DISTINCT_K1);
+    assertEquals(filteredStats.getNumTotalDocs(), FULL_NUM_DOCS);
+  }
+
+  /// 5-arg form pushes the `jsonFilterExpression` literal directly into `getMatchingFlattenedDocsMap`, so the JSON
+  /// index returns only entries whose values satisfy the filter. Any doc whose value does not satisfy the filter is
+  /// seen by the distinct operator as missing-path; with a 4-arg default present, the default is added to the
+  /// distinct set for those docs. Matches `JsonExtractIndexTransformFunction`'s per-doc behavior, where
+  /// `getValuesSV` returns null for docs outside the filtered map and the loop substitutes the default.
+  @Test
+  public void testFiveArgFilterJsonExpression() {
+    _activeSegment = _fullSegment;
+    // 5-arg filter narrows the index to value-k1-3 (10 docs). The remaining 490 docs see their $.k1 as missing under
+    // this filter and pick up the 4-arg default 'missing'. Result is the union of both.
+    String query = OPT_USE_INDEX + "SELECT DISTINCT "
+        + "jsonExtractIndex(jsonCol, '$.k1', 'STRING', 'missing', '\"$.k1\" = ''value-k1-3''') "
+        + "FROM testTable LIMIT 100";
+    BaseOperator<DistinctResultsBlock> op = getOperator(query);
+    assertTrue(op.toExplainString().contains("DISTINCT_JSON_INDEX"));
+    assertEquals(extractStringValues(getBrokerResponse(query).getResultTable()),
+        Set.of("value-k1-3", "missing"));
+  }
+
+  /// `jsonIndexDistinctSkipMissingPath=true` disables `handleMissingDocs` entirely. Even when docs are
+  /// "missing" from the index (here, all docs outside the 5-arg filter), the 4-arg default is NOT added, no null is
+  /// added under nullHandling, and the 3-arg "Illegal Json Path" throw is suppressed. The distinct set is exactly
+  /// the values the JSON index returned.
+  @Test
+  public void testSkipMissingPath() {
+    _activeSegment = _fullSegment;
+    String query = OPT_USE_INDEX_SKIP_MISSING_PATH
+        + "SELECT DISTINCT jsonExtractIndex(jsonCol, '$.k1', 'STRING', 'missing', '\"$.k1\" = ''value-k1-3''') "
+        + "FROM testTable LIMIT 100";
+    BaseOperator<DistinctResultsBlock> op = getOperator(query);
+    assertTrue(op.toExplainString().contains("DISTINCT_JSON_INDEX"));
+    // No 'missing' even though the 4-arg default is set and 490 docs are "missing" under the 5-arg filter.
+    assertEquals(extractStringValues(getBrokerResponse(query).getResultTable()), Set.of("value-k1-3"));
+  }
+
+  /// With the skip option, a 3-arg call over a sparse segment (docs without `$.k1`) no longer throws — it just
+  /// returns the values it did find in the index.
+  @Test
+  public void testSkipMissingPathSuppressesThrow() {
+    _activeSegment = _sparseSegment;
+    String query = OPT_USE_INDEX_SKIP_MISSING_PATH
+        + "SELECT DISTINCT jsonExtractIndex(jsonCol, '$.k1', 'STRING') FROM testTable LIMIT 100";
+    BaseOperator<DistinctResultsBlock> op = getOperator(query);
+    assertTrue(op.toExplainString().contains("DISTINCT_JSON_INDEX"));
+    Set<String> expected = new HashSet<>();
+    for (int i = 0; i < SPARSE_NUM_WITH_K1; i++) {
+      expected.add("k1-" + i);
+    }
+    assertEquals(extractStringValues(getBrokerResponse(query).getResultTable()), expected);
+  }
+
+  private static Set<String> extractStringValues(ResultTable resultTable) {
+    Set<String> values = new HashSet<>();
+    for (Object[] row : resultTable.getRows()) {
+      values.add(row[0] == null ? null : (String) row[0]);
+    }
+    return values;
+  }
+
+  private static boolean containsNull(ResultTable resultTable) {
+    for (Object[] row : resultTable.getRows()) {
+      if (row[0] == null) {
+        return true;
+      }
+    }
+    return false;
+  }
+}
diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/JsonPathTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/JsonPathTest.java
index 520f55126ab6..bb90cf7313da 100644
--- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/JsonPathTest.java
+++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/JsonPathTest.java
@@ -24,8 +24,6 @@
 import com.jayway.jsonpath.spi.cache.CacheProvider;
 import java.io.File;
 import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -36,29 +34,40 @@
 import org.apache.pinot.spi.config.table.TableType;
 import org.apache.pinot.spi.config.table.ingestion.IngestionConfig;
 import org.apache.pinot.spi.config.table.ingestion.TransformConfig;
-import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.data.FieldSpec.DataType;
 import org.apache.pinot.spi.data.Schema;
 import org.apache.pinot.spi.exception.QueryErrorCode;
+import org.apache.pinot.spi.utils.CommonConstants.Broker.Request.QueryOptionKey;
 import org.apache.pinot.spi.utils.JsonUtils;
 import org.apache.pinot.spi.utils.builder.TableConfigBuilder;
-import org.testng.Assert;
 import org.testng.annotations.Test;
 
-import static org.apache.pinot.spi.utils.CommonConstants.Broker.Request.QueryOptionKey.USE_INDEX_BASED_DISTINCT_OPERATOR;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertNotNull;
+import static org.testng.Assert.assertTrue;
 
 
 @Test(suiteName = "CustomClusterIntegrationTest")
 public class JsonPathTest extends CustomDataQueryClusterIntegrationTest {
-
   protected static final String DEFAULT_TABLE_NAME = "JsonPathTest";
 
   protected static final int NUM_DOCS_PER_SEGMENT = 1000;
+  // Number of distinct values for myMapStr.$.k1 across the segment. Setting this lower than NUM_DOCS_PER_SEGMENT
+  // forces value repetition, so the JsonIndexDistinct path (which enumerates dictionary values once) and the scan
+  // path (which visits every doc) return the same result set but follow visibly different code paths.
+  private static final int NUM_DISTINCT_K1 = 100;
   private static final String MY_MAP_STR_FIELD_NAME = "myMapStr";
   private static final String MY_MAP_STR_K1_FIELD_NAME = "myMapStr_k1";
   private static final String MY_MAP_STR_K2_FIELD_NAME = "myMapStr_k2";
   private static final String COMPLEX_MAP_STR_FIELD_NAME = "complexMapStr";
   private static final String COMPLEX_MAP_STR_K3_FIELD_NAME = "complexMapStr_k3";
 
+  // Query-option strings passed to postQueryWithOptions.
+  private static final String OPT_USE_INDEX = QueryOptionKey.USE_INDEX_BASED_DISTINCT_OPERATOR + "=true";
+  private static final String OPT_USE_INDEX_SKIP_MISSING_PATH =
+      OPT_USE_INDEX + ";" + QueryOptionKey.JSON_INDEX_DISTINCT_SKIP_MISSING_PATH + "=true";
+
   protected final List<String> _sortedSequenceIds = new ArrayList<>(NUM_DOCS_PER_SEGMENT);
 
   @Override
@@ -67,32 +76,36 @@ protected long getCountStarResult() {
   }
 
   @Override
-  public Schema createSchema() {
-    return new Schema.SchemaBuilder().setSchemaName(getTableName())
-        .addSingleValueDimension("myMap", FieldSpec.DataType.STRING)
-        .addSingleValueDimension(MY_MAP_STR_FIELD_NAME, FieldSpec.DataType.STRING)
-        .addSingleValueDimension(MY_MAP_STR_K1_FIELD_NAME, FieldSpec.DataType.STRING)
-        .addSingleValueDimension(MY_MAP_STR_K2_FIELD_NAME, FieldSpec.DataType.STRING)
-        .addSingleValueDimension(COMPLEX_MAP_STR_FIELD_NAME, FieldSpec.DataType.STRING)
-        .addMultiValueDimension(COMPLEX_MAP_STR_K3_FIELD_NAME, FieldSpec.DataType.STRING).build();
+  public String getTableName() {
+    return DEFAULT_TABLE_NAME;
   }
 
   @Override
-  public String getTableName() {
-    return DEFAULT_TABLE_NAME;
+  public Schema createSchema() {
+    return new Schema.SchemaBuilder()
+        .setSchemaName(getTableName())
+        .addSingleValueDimension("myMap", DataType.STRING)
+        .addSingleValueDimension(MY_MAP_STR_FIELD_NAME, DataType.STRING)
+        .addSingleValueDimension(MY_MAP_STR_K1_FIELD_NAME, DataType.STRING)
+        .addSingleValueDimension(MY_MAP_STR_K2_FIELD_NAME, DataType.STRING)
+        .addSingleValueDimension(COMPLEX_MAP_STR_FIELD_NAME, DataType.STRING)
+        .addMultiValueDimension(COMPLEX_MAP_STR_K3_FIELD_NAME, DataType.STRING)
+        .build();
   }
 
   @Override
   public TableConfig createOfflineTableConfig() {
-    List<TransformConfig> transformConfigs = Arrays.asList(
+    List<TransformConfig> transformConfigs = List.of(
         new TransformConfig(MY_MAP_STR_K1_FIELD_NAME, "jsonPathString(" + MY_MAP_STR_FIELD_NAME + ", '$.k1')"),
         new TransformConfig(MY_MAP_STR_K2_FIELD_NAME, "jsonPathString(" + MY_MAP_STR_FIELD_NAME + ", '$.k2')"),
-        new TransformConfig(COMPLEX_MAP_STR_K3_FIELD_NAME,
-            "jsonPathArray(" + COMPLEX_MAP_STR_FIELD_NAME + ", '$.k3')"));
+        new TransformConfig(COMPLEX_MAP_STR_K3_FIELD_NAME, "jsonPathArray(" + COMPLEX_MAP_STR_FIELD_NAME + ", '$.k3')")
+    );
     IngestionConfig ingestionConfig = new IngestionConfig();
     ingestionConfig.setTransformConfigs(transformConfigs);
-    return new TableConfigBuilder(TableType.OFFLINE).setTableName(getTableName()).setIngestionConfig(ingestionConfig)
-        .setJsonIndexColumns(Collections.singletonList(MY_MAP_STR_FIELD_NAME))
+    return new TableConfigBuilder(TableType.OFFLINE)
+        .setTableName(getTableName())
+        .setIngestionConfig(ingestionConfig)
+        .setJsonIndexColumns(List.of(MY_MAP_STR_FIELD_NAME))
         .build();
   }
 
@@ -100,17 +113,18 @@ public TableConfig createOfflineTableConfig() {
   public List<File> createAvroFiles()
       throws Exception {
     org.apache.avro.Schema avroSchema = org.apache.avro.Schema.createRecord("myRecord", null, null, false);
-    List<org.apache.avro.Schema.Field> fields =
-        Arrays.asList(new org.apache.avro.Schema.Field(MY_MAP_STR_FIELD_NAME, org.apache.avro.Schema.create(
-                org.apache.avro.Schema.Type.STRING), null, null),
-            new org.apache.avro.Schema.Field(COMPLEX_MAP_STR_FIELD_NAME, org.apache.avro.Schema.create(
-                org.apache.avro.Schema.Type.STRING), null, null));
+    List<org.apache.avro.Schema.Field> fields = List.of(
+        new org.apache.avro.Schema.Field(MY_MAP_STR_FIELD_NAME,
+            org.apache.avro.Schema.create(org.apache.avro.Schema.Type.STRING), null, null),
+        new org.apache.avro.Schema.Field(COMPLEX_MAP_STR_FIELD_NAME,
+            org.apache.avro.Schema.create(org.apache.avro.Schema.Type.STRING), null, null)
+    );
     avroSchema.setFields(fields);
 
     try (AvroFilesAndWriters avroFilesAndWriters = createAvroFilesAndWriters(avroSchema)) {
       for (int i = 0; i < NUM_DOCS_PER_SEGMENT; i++) {
         Map<String, String> map = new HashMap<>();
-        map.put("k1", "value-k1-" + i);
+        map.put("k1", "value-k1-" + (i % NUM_DISTINCT_K1));
         map.put("k2", "value-k2-" + i);
         GenericData.Record record = new GenericData.Record(avroSchema);
         record.put(MY_MAP_STR_FIELD_NAME, JsonUtils.objectToString(map));
@@ -118,17 +132,20 @@ public List<File> createAvroFiles()
         Map<String, Object> complexMap = new HashMap<>();
         complexMap.put("k1", "value-k1-" + i);
         complexMap.put("k2", "value-k2-" + i);
-        complexMap.put("k3", Arrays.asList("value-k3-0-" + i, "value-k3-1-" + i, "value-k3-2-" + i));
-        complexMap.put("k4",
-            Map.of("k4-k1", "value-k4-k1-" + i, "k4-k2", "value-k4-k2-" + i, "k4-k3", "value-k4-k3-" + i,
-                "met", i));
+        complexMap.put("k3", List.of("value-k3-0-" + i, "value-k3-1-" + i, "value-k3-2-" + i));
+        complexMap.put("k4", Map.of(
+            "k4-k1", "value-k4-k1-" + i,
+            "k4-k2", "value-k4-k2-" + i,
+            "k4-k3", "value-k4-k3-" + i,
+            "met", i)
+        );
         record.put(COMPLEX_MAP_STR_FIELD_NAME, JsonUtils.objectToString(complexMap));
         for (DataFileWriter<GenericData.Record> writer : avroFilesAndWriters.getWriters()) {
           writer.append(record);
         }
         _sortedSequenceIds.add(String.valueOf(i));
       }
-      Collections.sort(_sortedSequenceIds);
+      _sortedSequenceIds.sort(null);
       return avroFilesAndWriters.getAvroFiles();
     }
   }
@@ -139,49 +156,47 @@ public void testQueries(boolean useMultiStageQueryEngine)
     setUseMultiStageQueryEngine(useMultiStageQueryEngine);
 
     //Selection Query
-    String query = "Select myMapStr from " + getTableName();
+    String query = "SELECT myMapStr FROM " + getTableName();
     JsonNode pinotResponse = postQuery(query);
     ArrayNode rows = (ArrayNode) pinotResponse.get("resultTable").get("rows");
-    Assert.assertNotNull(rows);
-    Assert.assertFalse(rows.isEmpty());
+    assertNotNull(rows);
+    assertFalse(rows.isEmpty());
     for (int i = 0; i < rows.size(); i++) {
       String value = rows.get(i).get(0).textValue();
-      Assert.assertTrue(value.indexOf("-k1-") > 0);
+      assertTrue(value.indexOf("-k1-") > 0);
     }
 
     //Filter Query
-    query = "Select jsonExtractScalar(myMapStr,'$.k1','STRING') from " + getTableName()
-        + "  where jsonExtractScalar(myMapStr,'$.k1','STRING') = 'value-k1-0'";
+    String expr = "jsonExtractScalar(myMapStr,'$.k1','STRING')";
+    query = "SELECT " + expr + " FROM " + getTableName() + " WHERE " + expr + " = 'value-k1-0'";
     pinotResponse = postQuery(query);
     rows = (ArrayNode) pinotResponse.get("resultTable").get("rows");
-    Assert.assertNotNull(rows);
-    Assert.assertFalse(rows.isEmpty());
+    assertNotNull(rows);
+    assertFalse(rows.isEmpty());
     for (int i = 0; i < rows.size(); i++) {
       String value = rows.get(i).get(0).textValue();
-      Assert.assertEquals(value, "value-k1-0");
+      assertEquals(value, "value-k1-0");
     }
 
     //selection order by
-    query = "Select jsonExtractScalar(myMapStr,'$.k1','STRING') from " + getTableName()
-        + " order by jsonExtractScalar(myMapStr,'$.k1','STRING')";
+    query = "SELECT " + expr + " FROM " + getTableName() + " ORDER BY " + expr;
     pinotResponse = postQuery(query);
     rows = (ArrayNode) pinotResponse.get("resultTable").get("rows");
-    Assert.assertNotNull(rows);
-    Assert.assertFalse(rows.isEmpty());
+    assertNotNull(rows);
+    assertFalse(rows.isEmpty());
     for (int i = 0; i < rows.size(); i++) {
       String value = rows.get(i).get(0).textValue();
-      Assert.assertTrue(value.indexOf("-k1-") > 0);
+      assertTrue(value.indexOf("-k1-") > 0);
     }
 
     //Group By Query
-    query = "Select jsonExtractScalar(myMapStr,'$.k1','STRING'), count(*) from " + getTableName()
-        + " group by jsonExtractScalar(myMapStr,'$.k1','STRING')";
+    query = "SELECT " + expr + ", count(*) FROM " + getTableName() + " GROUP BY " + expr;
     pinotResponse = postQuery(query);
-    Assert.assertNotNull(pinotResponse.get("resultTable"));
+    assertNotNull(pinotResponse.get("resultTable"));
     rows = (ArrayNode) pinotResponse.get("resultTable").get("rows");
     for (int i = 0; i < rows.size(); i++) {
       String value = rows.get(i).get(0).textValue();
-      Assert.assertTrue(value.indexOf("-k1-") > 0);
+      assertTrue(value.indexOf("-k1-") > 0);
     }
   }
 
@@ -190,71 +205,71 @@ public void testComplexQueries(boolean useMultiStageQueryEngine)
       throws Exception {
     setUseMultiStageQueryEngine(useMultiStageQueryEngine);
     //Selection Query
-    String query = "Select complexMapStr from " + getTableName();
+    String query = "SELECT complexMapStr FROM " + getTableName();
     JsonNode pinotResponse = postQuery(query);
     ArrayNode rows = (ArrayNode) pinotResponse.get("resultTable").get("rows");
 
-    Assert.assertNotNull(rows);
-    Assert.assertFalse(rows.isEmpty());
+    assertNotNull(rows);
+    assertFalse(rows.isEmpty());
     for (int i = 0; i < rows.size(); i++) {
       String value = rows.get(i).get(0).textValue();
       Map<?, ?> results = JsonUtils.stringToObject(value, Map.class);
-      Assert.assertTrue(value.indexOf("-k1-") > 0);
-      Assert.assertEquals(results.get("k1"), "value-k1-" + i % NUM_DOCS_PER_SEGMENT);
-      Assert.assertEquals(results.get("k2"), "value-k2-" + i % NUM_DOCS_PER_SEGMENT);
-      final List<?> k3 = (List<?>) results.get("k3");
-      Assert.assertEquals(k3.size(), 3);
-      Assert.assertEquals(k3.get(0), "value-k3-0-" + i % NUM_DOCS_PER_SEGMENT);
-      Assert.assertEquals(k3.get(1), "value-k3-1-" + i % NUM_DOCS_PER_SEGMENT);
-      Assert.assertEquals(k3.get(2), "value-k3-2-" + i % NUM_DOCS_PER_SEGMENT);
-      final Map<?, ?> k4 = (Map<?, ?>) results.get("k4");
-      Assert.assertEquals(k4.size(), 4);
-      Assert.assertEquals(k4.get("k4-k1"), "value-k4-k1-" + i % NUM_DOCS_PER_SEGMENT);
-      Assert.assertEquals(k4.get("k4-k2"), "value-k4-k2-" + i % NUM_DOCS_PER_SEGMENT);
-      Assert.assertEquals(k4.get("k4-k3"), "value-k4-k3-" + i % NUM_DOCS_PER_SEGMENT);
-      Assert.assertEquals(Double.parseDouble(k4.get("met").toString()), i % NUM_DOCS_PER_SEGMENT);
+      assertTrue(value.indexOf("-k1-") > 0);
+      assertEquals(results.get("k1"), "value-k1-" + i % NUM_DOCS_PER_SEGMENT);
+      assertEquals(results.get("k2"), "value-k2-" + i % NUM_DOCS_PER_SEGMENT);
+      List<?> k3 = (List<?>) results.get("k3");
+      assertEquals(k3.size(), 3);
+      assertEquals(k3.get(0), "value-k3-0-" + i % NUM_DOCS_PER_SEGMENT);
+      assertEquals(k3.get(1), "value-k3-1-" + i % NUM_DOCS_PER_SEGMENT);
+      assertEquals(k3.get(2), "value-k3-2-" + i % NUM_DOCS_PER_SEGMENT);
+      Map<?, ?> k4 = (Map<?, ?>) results.get("k4");
+      assertEquals(k4.size(), 4);
+      assertEquals(k4.get("k4-k1"), "value-k4-k1-" + i % NUM_DOCS_PER_SEGMENT);
+      assertEquals(k4.get("k4-k2"), "value-k4-k2-" + i % NUM_DOCS_PER_SEGMENT);
+      assertEquals(k4.get("k4-k3"), "value-k4-k3-" + i % NUM_DOCS_PER_SEGMENT);
+      assertEquals(Double.parseDouble(k4.get("met").toString()), i % NUM_DOCS_PER_SEGMENT);
     }
 
     //Filter Query
-    query = "Select jsonExtractScalar(complexMapStr,'$.k4','STRING') from " + getTableName()
-        + "  where jsonExtractScalar(complexMapStr,'$.k4.k4-k1','STRING') = 'value-k4-k1-0'";
+    query = "SELECT jsonExtractScalar(complexMapStr,'$.k4','STRING') FROM " + getTableName()
+        + " WHERE jsonExtractScalar(complexMapStr,'$.k4.k4-k1','STRING') = 'value-k4-k1-0'";
     pinotResponse = postQuery(query);
     rows = (ArrayNode) pinotResponse.get("resultTable").get("rows");
-    Assert.assertNotNull(rows);
-    Assert.assertEquals(rows.size(), getNumAvroFiles());
+    assertNotNull(rows);
+    assertEquals(rows.size(), getNumAvroFiles());
     for (int i = 0; i < rows.size(); i++) {
       String value = rows.get(i).get(0).textValue();
       Map<?, ?> k4 = JsonUtils.stringToObject(value, Map.class);
-      Assert.assertEquals(k4.size(), 4);
-      Assert.assertEquals(k4.get("k4-k1"), "value-k4-k1-0");
-      Assert.assertEquals(k4.get("k4-k2"), "value-k4-k2-0");
-      Assert.assertEquals(k4.get("k4-k3"), "value-k4-k3-0");
-      Assert.assertEquals(Double.parseDouble(k4.get("met").toString()), 0.0);
+      assertEquals(k4.size(), 4);
+      assertEquals(k4.get("k4-k1"), "value-k4-k1-0");
+      assertEquals(k4.get("k4-k2"), "value-k4-k2-0");
+      assertEquals(k4.get("k4-k3"), "value-k4-k3-0");
+      assertEquals(Double.parseDouble(k4.get("met").toString()), 0.0);
     }
 
     //selection order by
-    query = "Select complexMapStr from " + getTableName()
-        + " order by jsonExtractScalar(complexMapStr,'$.k4.k4-k1','STRING') DESC LIMIT " + NUM_DOCS_PER_SEGMENT;
+    query = "SELECT complexMapStr FROM " + getTableName()
+        + " ORDER BY jsonExtractScalar(complexMapStr,'$.k4.k4-k1','STRING') DESC LIMIT " + NUM_DOCS_PER_SEGMENT;
     pinotResponse = postQuery(query);
     rows = (ArrayNode) pinotResponse.get("resultTable").get("rows");
-    Assert.assertNotNull(rows);
-    Assert.assertFalse(rows.isEmpty());
+    assertNotNull(rows);
+    assertFalse(rows.isEmpty());
     for (int i = 0; i < rows.size(); i++) {
       String value = rows.get(i).get(0).textValue();
-      Assert.assertTrue(value.indexOf("-k1-") > 0);
+      assertTrue(value.indexOf("-k1-") > 0);
       Map<?, ?> results = JsonUtils.stringToObject(value, Map.class);
       String seqId = _sortedSequenceIds.get(NUM_DOCS_PER_SEGMENT - 1 - i / getNumAvroFiles());
-      Assert.assertEquals(results.get("k1"), "value-k1-" + seqId);
-      Assert.assertEquals(results.get("k2"), "value-k2-" + seqId);
-      final List<?> k3 = (List<?>) results.get("k3");
-      Assert.assertEquals(k3.get(0), "value-k3-0-" + seqId);
-      Assert.assertEquals(k3.get(1), "value-k3-1-" + seqId);
-      Assert.assertEquals(k3.get(2), "value-k3-2-" + seqId);
-      final Map<?, ?> k4 = (Map<?, ?>) results.get("k4");
-      Assert.assertEquals(k4.get("k4-k1"), "value-k4-k1-" + seqId);
-      Assert.assertEquals(k4.get("k4-k2"), "value-k4-k2-" + seqId);
-      Assert.assertEquals(k4.get("k4-k3"), "value-k4-k3-" + seqId);
-      Assert.assertEquals(Double.parseDouble(k4.get("met").toString()), Double.parseDouble(seqId));
+      assertEquals(results.get("k1"), "value-k1-" + seqId);
+      assertEquals(results.get("k2"), "value-k2-" + seqId);
+      List<?> k3 = (List<?>) results.get("k3");
+      assertEquals(k3.get(0), "value-k3-0-" + seqId);
+      assertEquals(k3.get(1), "value-k3-1-" + seqId);
+      assertEquals(k3.get(2), "value-k3-2-" + seqId);
+      Map<?, ?> k4 = (Map<?, ?>) results.get("k4");
+      assertEquals(k4.get("k4-k1"), "value-k4-k1-" + seqId);
+      assertEquals(k4.get("k4-k2"), "value-k4-k2-" + seqId);
+      assertEquals(k4.get("k4-k3"), "value-k4-k3-" + seqId);
+      assertEquals(Double.parseDouble(k4.get("met").toString()), Double.parseDouble(seqId));
     }
   }
 
@@ -264,18 +279,18 @@ public void testComplexGroupByQueryV1(boolean useMultiStageQueryEngine)
     setUseMultiStageQueryEngine(useMultiStageQueryEngine);
 
     //Group By Query
-    String query = "Select" + " jsonExtractScalar(complexMapStr,'$.k1','STRING'),"
-        + " sum(jsonExtractScalar(complexMapStr,'$.k4.met','INT'))" + " from " + getTableName()
-        + " group by jsonExtractScalar(complexMapStr,'$.k1','STRING')"
-        + " order by sum(jsonExtractScalar(complexMapStr,'$.k4.met','INT')) DESC";
+    String groupExpr = "jsonExtractScalar(complexMapStr,'$.k1','STRING')";
+    String sumExpr = "SUM(jsonExtractScalar(complexMapStr,'$.k4.met','INT'))";
+    String query = "SELECT " + groupExpr + ", " + sumExpr + " FROM " + getTableName()
+        + " GROUP BY " + groupExpr + " ORDER BY " + sumExpr + " DESC";
     JsonNode pinotResponse = postQuery(query);
-    Assert.assertNotNull(pinotResponse.get("resultTable").get("rows"));
+    assertNotNull(pinotResponse.get("resultTable").get("rows"));
     ArrayNode rows = (ArrayNode) pinotResponse.get("resultTable").get("rows");
     for (int i = 0; i < rows.size(); i++) {
       String seqId = _sortedSequenceIds.get(NUM_DOCS_PER_SEGMENT - 1 - i);
-      final JsonNode row = rows.get(i);
-      Assert.assertEquals(row.get(0).asText(), "value-k1-" + seqId);
-      Assert.assertEquals(row.get(1).asDouble(), Double.parseDouble(seqId) * getNumAvroFiles());
+      JsonNode row = rows.get(i);
+      assertEquals(row.get(0).asText(), "value-k1-" + seqId);
+      assertEquals(row.get(1).asDouble(), Double.parseDouble(seqId) * getNumAvroFiles());
     }
   }
 
@@ -284,18 +299,18 @@ public void testComplexGroupByQueryV2(boolean useMultiStageQueryEngine)
       throws Exception {
     setUseMultiStageQueryEngine(useMultiStageQueryEngine);
     //Group By Query
-    String query = "Select" + " jsonExtractScalar(complexMapStr,'$.k1','STRING'),"
-        + " sum(jsonExtractScalar(complexMapStr,'$.k4.met','INT'))" + " from " + getTableName()
-        + " group by jsonExtractScalar(complexMapStr,'$.k1','STRING')"
-        + " order by sum(jsonExtractScalar(complexMapStr,'$.k4.met','INT')) DESC";
+    String groupExpr = "jsonExtractScalar(complexMapStr,'$.k1','STRING')";
+    String sumExpr = "SUM(jsonExtractScalar(complexMapStr,'$.k4.met','INT'))";
+    String query = "SELECT " + groupExpr + ", " + sumExpr + " FROM " + getTableName()
+        + " GROUP BY " + groupExpr + " ORDER BY " + sumExpr + " DESC";
     JsonNode pinotResponse = postQuery(query);
-    Assert.assertNotNull(pinotResponse.get("resultTable").get("rows"));
+    assertNotNull(pinotResponse.get("resultTable").get("rows"));
     ArrayNode rows = (ArrayNode) pinotResponse.get("resultTable").get("rows");
     for (int i = 0; i < rows.size(); i++) {
       String seqId = String.valueOf(NUM_DOCS_PER_SEGMENT - 1 - i);
-      final JsonNode row = rows.get(i);
-      Assert.assertEquals(row.get(0).asText(), "value-k1-" + seqId);
-      Assert.assertEquals(row.get(1).asDouble(), Double.parseDouble(seqId) * getNumAvroFiles());
+      JsonNode row = rows.get(i);
+      assertEquals(row.get(0).asText(), "value-k1-" + seqId);
+      assertEquals(row.get(1).asDouble(), Double.parseDouble(seqId) * getNumAvroFiles());
     }
   }
 
@@ -304,17 +319,17 @@ public void testQueryWithIntegerDefault(boolean useMultiStageQueryEngine)
       throws Exception {
     setUseMultiStageQueryEngine(useMultiStageQueryEngine);
     //Group By Query
-    String query = "Select" + " jsonExtractScalar(complexMapStr,'$.inExistKey','STRING','defaultKey'),"
-        + " sum(jsonExtractScalar(complexMapStr,'$.inExistMet','INT','1'))" + " from " + getTableName()
-        + " group by jsonExtractScalar(complexMapStr,'$.inExistKey','STRING','defaultKey')"
-        + " order by sum(jsonExtractScalar(complexMapStr,'$.inExistMet','INT','1')) DESC";
+    String groupExpr = "jsonExtractScalar(complexMapStr,'$.inExistKey','STRING','defaultKey')";
+    String sumExpr = "SUM(jsonExtractScalar(complexMapStr,'$.inExistMet','INT','1'))";
+    String query = "SELECT " + groupExpr + ", " + sumExpr + " FROM " + getTableName()
+        + " GROUP BY " + groupExpr + " ORDER BY " + sumExpr + " DESC";
     JsonNode pinotResponse = postQuery(query);
-    Assert.assertNotNull(pinotResponse.get("resultTable").get("rows"));
+    assertNotNull(pinotResponse.get("resultTable").get("rows"));
     ArrayNode rows = (ArrayNode) pinotResponse.get("resultTable").get("rows");
-    Assert.assertEquals(rows.size(), 1);
-    final JsonNode row = rows.get(0);
-    Assert.assertEquals(row.get(0).asText(), "defaultKey");
-    Assert.assertEquals(row.get(1).asDouble(), 1000.0 * getNumAvroFiles());
+    assertEquals(rows.size(), 1);
+    JsonNode row = rows.get(0);
+    assertEquals(row.get(0).asText(), "defaultKey");
+    assertEquals(row.get(1).asDouble(), 1000.0 * getNumAvroFiles());
   }
 
   @Test(dataProvider = "useBothQueryEngines")
@@ -322,24 +337,24 @@ public void testQueryWithDoubleDefault(boolean useMultiStageQueryEngine)
       throws Exception {
     setUseMultiStageQueryEngine(useMultiStageQueryEngine);
     //Group By Query
-    String query = "Select" + " jsonExtractScalar(complexMapStr,'$.inExistKey','STRING', 'defaultKey'),"
-        + " sum(jsonExtractScalar(complexMapStr,'$.inExistMet','DOUBLE','0.1'))" + " from " + getTableName()
-        + " group by jsonExtractScalar(complexMapStr,'$.inExistKey','STRING','defaultKey')"
-        + " order by sum(jsonExtractScalar(complexMapStr,'$.inExistMet','DOUBLE','0.1')) DESC";
+    String groupExpr = "jsonExtractScalar(complexMapStr,'$.inExistKey','STRING','defaultKey')";
+    String sumExpr = "SUM(jsonExtractScalar(complexMapStr,'$.inExistMet','DOUBLE','0.1'))";
+    String query = "SELECT " + groupExpr + ", " + sumExpr + " FROM " + getTableName()
+        + " GROUP BY " + groupExpr + " ORDER BY " + sumExpr + " DESC";
     JsonNode pinotResponse = postQuery(query);
-    Assert.assertNotNull(pinotResponse.get("resultTable").get("rows"));
+    assertNotNull(pinotResponse.get("resultTable").get("rows"));
     ArrayNode rows = (ArrayNode) pinotResponse.get("resultTable").get("rows");
-    Assert.assertEquals(rows.size(), 1);
-    final JsonNode row = rows.get(0);
-    Assert.assertEquals(row.get(0).asText(), "defaultKey");
-    Assert.assertTrue(Math.abs(row.get(1).asDouble() - 100.0 * getNumAvroFiles()) < 1e-10);
+    assertEquals(rows.size(), 1);
+    JsonNode row = rows.get(0);
+    assertEquals(row.get(0).asText(), "defaultKey");
+    assertTrue(Math.abs(row.get(1).asDouble() - 100.0 * getNumAvroFiles()) < 1e-10);
   }
 
   @Test(dataProvider = "useBothQueryEngines")
   void testFailedQuery(boolean useMultiStageQueryEngine)
       throws Exception {
     setUseMultiStageQueryEngine(useMultiStageQueryEngine);
-    String query = "Select jsonExtractScalar(myMapStr,\"$.k1\",\"STRING\") from " + getTableName();
+    String query = "SELECT jsonExtractScalar(myMapStr,\"$.k1\",\"STRING\") FROM " + getTableName();
     JsonNode pinotResponse = postQuery(query);
     int expectedStatusCode;
     if (useMultiStageQueryEngine) {
@@ -347,30 +362,30 @@ void testFailedQuery(boolean useMultiStageQueryEngine)
     } else {
       expectedStatusCode = QueryErrorCode.SQL_PARSING.getId();
     }
-    Assert.assertEquals(pinotResponse.get("exceptions").get(0).get("errorCode").asInt(), expectedStatusCode);
-    Assert.assertEquals(pinotResponse.get("numDocsScanned").asInt(), 0);
-    Assert.assertEquals(pinotResponse.get("totalDocs").asInt(), 0);
+    assertEquals(pinotResponse.get("exceptions").get(0).get("errorCode").asInt(), expectedStatusCode);
+    assertEquals(pinotResponse.get("numDocsScanned").asInt(), 0);
+    assertEquals(pinotResponse.get("totalDocs").asInt(), 0);
 
-    query = "Select myMapStr from " + getTableName()
-        + "  where jsonExtractScalar(myMapStr, '$.k1',\"STRING\") = 'value-k1-0'";
+    query = "SELECT myMapStr FROM " + getTableName()
+        + " WHERE jsonExtractScalar(myMapStr, '$.k1',\"STRING\") = 'value-k1-0'";
     pinotResponse = postQuery(query);
-    Assert.assertEquals(pinotResponse.get("exceptions").get(0).get("errorCode").asInt(), expectedStatusCode);
-    Assert.assertEquals(pinotResponse.get("numDocsScanned").asInt(), 0);
-    Assert.assertEquals(pinotResponse.get("totalDocs").asInt(), 0);
+    assertEquals(pinotResponse.get("exceptions").get(0).get("errorCode").asInt(), expectedStatusCode);
+    assertEquals(pinotResponse.get("numDocsScanned").asInt(), 0);
+    assertEquals(pinotResponse.get("totalDocs").asInt(), 0);
 
-    query = "Select jsonExtractScalar(myMapStr,\"$.k1\", 'STRING') from " + getTableName()
-        + "  where jsonExtractScalar(myMapStr, '$.k1', 'STRING') = 'value-k1-0'";
+    query = "SELECT jsonExtractScalar(myMapStr,\"$.k1\", 'STRING') FROM " + getTableName()
+        + " WHERE jsonExtractScalar(myMapStr, '$.k1', 'STRING') = 'value-k1-0'";
     pinotResponse = postQuery(query);
-    Assert.assertEquals(pinotResponse.get("exceptions").get(0).get("errorCode").asInt(), expectedStatusCode);
-    Assert.assertEquals(pinotResponse.get("numDocsScanned").asInt(), 0);
-    Assert.assertEquals(pinotResponse.get("totalDocs").asInt(), 0);
+    assertEquals(pinotResponse.get("exceptions").get(0).get("errorCode").asInt(), expectedStatusCode);
+    assertEquals(pinotResponse.get("numDocsScanned").asInt(), 0);
+    assertEquals(pinotResponse.get("totalDocs").asInt(), 0);
   }
 
   @Test
   public void testJsonPathCache() {
     Cache cache = CacheProvider.getCache();
-    Assert.assertTrue(cache instanceof JsonPathCache);
-    Assert.assertTrue(((JsonPathCache) cache).size() > 0);
+    assertTrue(cache instanceof JsonPathCache);
+    assertTrue(((JsonPathCache) cache).size() > 0);
   }
 
   @Test(dataProvider = "useBothQueryEngines")
@@ -379,37 +394,37 @@ public void testJsonKeysQueries(boolean useMultiStageQueryEngine)
     setUseMultiStageQueryEngine(useMultiStageQueryEngine);
     String query = "SELECT jsonExtractKey(myMapStr, '$.*', 'maxDepth=1') FROM " + getTableName() + " LIMIT 1";
     JsonNode pinotResponse = postQuery(query);
-    Assert.assertEquals(pinotResponse.get("exceptions").size(), 0);
+    assertEquals(pinotResponse.get("exceptions").size(), 0);
     JsonNode rows = pinotResponse.get("resultTable").get("rows");
-    Assert.assertEquals(rows.size(), 1);
+    assertEquals(rows.size(), 1);
     JsonNode row = rows.get(0);
-    Assert.assertEquals(row.size(), 1);
+    assertEquals(row.size(), 1);
     // JsonPath returns keys in JsonPath format like "$['key']"
     JsonNode keys = row.get(0);
-    Assert.assertTrue(keys.isArray());
-    Assert.assertTrue(keys.size() > 0);
+    assertTrue(keys.isArray());
+    assertFalse(keys.isEmpty());
 
     query = "SELECT jsonExtractKey(complexMapStr, '$.*', 'maxDepth=2') FROM " + getTableName() + " LIMIT 1";
     pinotResponse = postQuery(query);
-    Assert.assertEquals(pinotResponse.get("exceptions").size(), 0);
+    assertEquals(pinotResponse.get("exceptions").size(), 0);
     rows = pinotResponse.get("resultTable").get("rows");
-    Assert.assertEquals(rows.size(), 1);
+    assertEquals(rows.size(), 1);
     row = rows.get(0);
-    Assert.assertEquals(row.size(), 1);
+    assertEquals(row.size(), 1);
     keys = row.get(0);
-    Assert.assertTrue(keys.isArray());
-    Assert.assertTrue(keys.size() > 0);
+    assertTrue(keys.isArray());
+    assertFalse(keys.isEmpty());
 
     query = "SELECT jsonExtractKey(complexMapStr, '$.*', 'maxDepth=3') FROM " + getTableName() + " LIMIT 1";
     pinotResponse = postQuery(query);
-    Assert.assertEquals(pinotResponse.get("exceptions").size(), 0);
+    assertEquals(pinotResponse.get("exceptions").size(), 0);
     rows = pinotResponse.get("resultTable").get("rows");
-    Assert.assertEquals(rows.size(), 1);
+    assertEquals(rows.size(), 1);
     row = rows.get(0);
-    Assert.assertEquals(row.size(), 1);
+    assertEquals(row.size(), 1);
     keys = row.get(0);
-    Assert.assertTrue(keys.isArray());
-    Assert.assertTrue(keys.size() > 0);
+    assertTrue(keys.isArray());
+    assertFalse(keys.isEmpty());
   }
 
   @Test(dataProvider = "useBothQueryEngines")
@@ -421,180 +436,180 @@ public void testJsonKeysQueriesWithDotNotation(boolean useMultiStageQueryEngine)
     String query =
         "SELECT jsonExtractKey(myMapStr, '$.*', 'maxDepth=1; dotNotation=true') FROM " + getTableName() + " LIMIT 1";
     JsonNode pinotResponse = postQuery(query);
-    Assert.assertEquals(pinotResponse.get("exceptions").size(), 0);
+    assertEquals(pinotResponse.get("exceptions").size(), 0);
     JsonNode rows = pinotResponse.get("resultTable").get("rows");
-    Assert.assertEquals(rows.size(), 1);
+    assertEquals(rows.size(), 1);
     JsonNode row = rows.get(0);
-    Assert.assertEquals(row.size(), 1);
+    assertEquals(row.size(), 1);
     JsonNode keys = row.get(0);
-    Assert.assertTrue(keys.isArray());
-    Assert.assertEquals(keys.size(), 2); // k1, k2
+    assertTrue(keys.isArray());
+    assertEquals(keys.size(), 2); // k1, k2
     // Should contain simple key names, not JsonPath format
     List<String> keyList = new ArrayList<>();
     for (JsonNode key : keys) {
       keyList.add(key.asText());
     }
-    Assert.assertTrue(keyList.contains("k1"));
-    Assert.assertTrue(keyList.contains("k2"));
+    assertTrue(keyList.contains("k1"));
+    assertTrue(keyList.contains("k2"));
     // Should NOT contain JsonPath format like "$['k1']"
-    Assert.assertFalse(keyList.contains("$['k1']"));
-    Assert.assertFalse(keyList.contains("$['k2']"));
+    assertFalse(keyList.contains("$['k1']"));
+    assertFalse(keyList.contains("$['k2']"));
 
     // Test optional parameter jsonExtractKey with dotNotation=false (JsonPath format)
     query =
         "SELECT jsonExtractKey(myMapStr, '$.*', 'maxDepth=1; dotNotation=false') FROM " + getTableName() + " LIMIT 1";
     pinotResponse = postQuery(query);
-    Assert.assertEquals(pinotResponse.get("exceptions").size(), 0);
+    assertEquals(pinotResponse.get("exceptions").size(), 0);
     rows = pinotResponse.get("resultTable").get("rows");
     row = rows.get(0);
     keys = row.get(0);
-    Assert.assertTrue(keys.isArray());
-    Assert.assertEquals(keys.size(), 2);
+    assertTrue(keys.isArray());
+    assertEquals(keys.size(), 2);
     keyList.clear();
     for (JsonNode key : keys) {
       keyList.add(key.asText());
     }
     // Should contain JsonPath format
-    Assert.assertTrue(keyList.contains("$['k1']"));
-    Assert.assertTrue(keyList.contains("$['k2']"));
+    assertTrue(keyList.contains("$['k1']"));
+    assertTrue(keyList.contains("$['k2']"));
 
     // Test recursive key extraction with dot notation on complex JSON
     query = "SELECT jsonExtractKey(complexMapStr, '$..**', 'maxDepth=2; dotNotation=true') FROM " + getTableName()
         + " LIMIT 1";
     pinotResponse = postQuery(query);
-    Assert.assertEquals(pinotResponse.get("exceptions").size(), 0);
+    assertEquals(pinotResponse.get("exceptions").size(), 0);
     rows = pinotResponse.get("resultTable").get("rows");
     row = rows.get(0);
     keys = row.get(0);
-    Assert.assertTrue(keys.isArray());
-    Assert.assertTrue(keys.size() >= 4); // At least k1, k2, k3, k4
+    assertTrue(keys.isArray());
+    assertTrue(keys.size() >= 4); // At least k1, k2, k3, k4
     keyList.clear();
     for (JsonNode key : keys) {
       keyList.add(key.asText());
     }
     // Should contain top-level keys in dot notation
-    Assert.assertTrue(keyList.contains("k1"));
-    Assert.assertTrue(keyList.contains("k2"));
-    Assert.assertTrue(keyList.contains("k3"));
-    Assert.assertTrue(keyList.contains("k4"));
+    assertTrue(keyList.contains("k1"));
+    assertTrue(keyList.contains("k2"));
+    assertTrue(keyList.contains("k3"));
+    assertTrue(keyList.contains("k4"));
 
     // Test recursive key extraction with JsonPath format
     query = "SELECT jsonExtractKey(complexMapStr, '$..**', 'maxDepth=2; dotNotation=false') FROM " + getTableName()
         + " LIMIT 1";
     pinotResponse = postQuery(query);
-    Assert.assertEquals(pinotResponse.get("exceptions").size(), 0);
+    assertEquals(pinotResponse.get("exceptions").size(), 0);
     rows = pinotResponse.get("resultTable").get("rows");
     row = rows.get(0);
     keys = row.get(0);
-    Assert.assertTrue(keys.isArray());
+    assertTrue(keys.isArray());
     keyList.clear();
     for (JsonNode key : keys) {
       keyList.add(key.asText());
     }
     // Should contain JsonPath format
-    Assert.assertTrue(keyList.contains("$['k1']"));
-    Assert.assertTrue(keyList.contains("$['k2']"));
-    Assert.assertTrue(keyList.contains("$['k3']"));
-    Assert.assertTrue(keyList.contains("$['k4']"));
+    assertTrue(keyList.contains("$['k1']"));
+    assertTrue(keyList.contains("$['k2']"));
+    assertTrue(keyList.contains("$['k3']"));
+    assertTrue(keyList.contains("$['k4']"));
 
     // Test deeper recursive extraction with dot notation
     query = "SELECT jsonExtractKey(complexMapStr, '$..**', 'maxDepth=3; dotNotation=true') FROM " + getTableName()
         + " LIMIT 1";
     pinotResponse = postQuery(query);
-    Assert.assertEquals(pinotResponse.get("exceptions").size(), 0);
+    assertEquals(pinotResponse.get("exceptions").size(), 0);
     rows = pinotResponse.get("resultTable").get("rows");
     row = rows.get(0);
     keys = row.get(0);
-    Assert.assertTrue(keys.isArray());
-    Assert.assertTrue(keys.size() > 4); // Should include nested keys
+    assertTrue(keys.isArray());
+    assertTrue(keys.size() > 4); // Should include nested keys
     keyList.clear();
     for (JsonNode key : keys) {
       keyList.add(key.asText());
     }
     // Should contain nested keys in dot notation
-    Assert.assertTrue(keyList.contains("k4.k4-k1"));
-    Assert.assertTrue(keyList.contains("k4.k4-k2"));
-    Assert.assertTrue(keyList.contains("k4.k4-k3"));
-    Assert.assertTrue(keyList.contains("k4.met"));
+    assertTrue(keyList.contains("k4.k4-k1"));
+    assertTrue(keyList.contains("k4.k4-k2"));
+    assertTrue(keyList.contains("k4.k4-k3"));
+    assertTrue(keyList.contains("k4.met"));
     // Should contain array indices in dot notation
-    Assert.assertTrue(keyList.contains("k3.0"));
-    Assert.assertTrue(keyList.contains("k3.1"));
-    Assert.assertTrue(keyList.contains("k3.2"));
+    assertTrue(keyList.contains("k3.0"));
+    assertTrue(keyList.contains("k3.1"));
+    assertTrue(keyList.contains("k3.2"));
 
     // Test deeper recursive extraction with JsonPath format
     query = "SELECT jsonExtractKey(complexMapStr, '$..**', 'maxDepth=3; dotNotation=false') FROM " + getTableName()
         + " LIMIT 1";
     pinotResponse = postQuery(query);
-    Assert.assertEquals(pinotResponse.get("exceptions").size(), 0);
+    assertEquals(pinotResponse.get("exceptions").size(), 0);
     rows = pinotResponse.get("resultTable").get("rows");
     row = rows.get(0);
     keys = row.get(0);
-    Assert.assertTrue(keys.isArray());
+    assertTrue(keys.isArray());
     keyList.clear();
     for (JsonNode key : keys) {
       keyList.add(key.asText());
     }
     // Should contain nested keys in JsonPath format
-    Assert.assertTrue(keyList.contains("$['k4']['k4-k1']"));
-    Assert.assertTrue(keyList.contains("$['k4']['k4-k2']"));
-    Assert.assertTrue(keyList.contains("$['k4']['k4-k3']"));
-    Assert.assertTrue(keyList.contains("$['k4']['met']"));
+    assertTrue(keyList.contains("$['k4']['k4-k1']"));
+    assertTrue(keyList.contains("$['k4']['k4-k2']"));
+    assertTrue(keyList.contains("$['k4']['k4-k3']"));
+    assertTrue(keyList.contains("$['k4']['met']"));
     // Should contain array indices in JsonPath format
-    Assert.assertTrue(keyList.contains("$['k3'][0]"));
-    Assert.assertTrue(keyList.contains("$['k3'][1]"));
-    Assert.assertTrue(keyList.contains("$['k3'][2]"));
+    assertTrue(keyList.contains("$['k3'][0]"));
+    assertTrue(keyList.contains("$['k3'][1]"));
+    assertTrue(keyList.contains("$['k3'][2]"));
 
     // Test specific path extraction with dot notation
     query = "SELECT jsonExtractKey(complexMapStr, '$.k4.*', 'maxDepth=2; dotNotation=true') FROM " + getTableName()
         + " LIMIT 1";
     pinotResponse = postQuery(query);
-    Assert.assertEquals(pinotResponse.get("exceptions").size(), 0);
+    assertEquals(pinotResponse.get("exceptions").size(), 0);
     rows = pinotResponse.get("resultTable").get("rows");
     row = rows.get(0);
     keys = row.get(0);
-    Assert.assertTrue(keys.isArray());
-    Assert.assertEquals(keys.size(), 4); // k4-k1, k4-k2, k4-k3, met
+    assertTrue(keys.isArray());
+    assertEquals(keys.size(), 4); // k4-k1, k4-k2, k4-k3, met
     keyList.clear();
     for (JsonNode key : keys) {
       keyList.add(key.asText());
     }
     // Should contain nested keys in dot notation format
-    Assert.assertTrue(keyList.contains("k4.k4-k1"));
-    Assert.assertTrue(keyList.contains("k4.k4-k2"));
-    Assert.assertTrue(keyList.contains("k4.k4-k3"));
-    Assert.assertTrue(keyList.contains("k4.met"));
+    assertTrue(keyList.contains("k4.k4-k1"));
+    assertTrue(keyList.contains("k4.k4-k2"));
+    assertTrue(keyList.contains("k4.k4-k3"));
+    assertTrue(keyList.contains("k4.met"));
 
     // Test backward compatibility - 2-parameter version should default to JsonPath format
     query = "SELECT jsonExtractKey(myMapStr, '$.*') FROM " + getTableName() + " LIMIT 1";
     pinotResponse = postQuery(query);
-    Assert.assertEquals(pinotResponse.get("exceptions").size(), 0);
+    assertEquals(pinotResponse.get("exceptions").size(), 0);
     rows = pinotResponse.get("resultTable").get("rows");
     row = rows.get(0);
     keys = row.get(0);
-    Assert.assertTrue(keys.isArray());
+    assertTrue(keys.isArray());
     keyList.clear();
     for (JsonNode key : keys) {
       keyList.add(key.asText());
     }
     // Should default to JsonPath format
-    Assert.assertTrue(keyList.contains("$['k1']"));
-    Assert.assertTrue(keyList.contains("$['k2']"));
+    assertTrue(keyList.contains("$['k1']"));
+    assertTrue(keyList.contains("$['k2']"));
 
     // Test backward compatibility - no dotNotation should default to JsonPath format
     query = "SELECT jsonExtractKey(myMapStr, '$.*', 'maxDepth=1') FROM " + getTableName() + " LIMIT 1";
     pinotResponse = postQuery(query);
-    Assert.assertEquals(pinotResponse.get("exceptions").size(), 0);
+    assertEquals(pinotResponse.get("exceptions").size(), 0);
     rows = pinotResponse.get("resultTable").get("rows");
     row = rows.get(0);
     keys = row.get(0);
-    Assert.assertTrue(keys.isArray());
+    assertTrue(keys.isArray());
     keyList.clear();
     for (JsonNode key : keys) {
       keyList.add(key.asText());
     }
     // Should default to JsonPath format
-    Assert.assertTrue(keyList.contains("$['k1']"));
-    Assert.assertTrue(keyList.contains("$['k2']"));
+    assertTrue(keyList.contains("$['k1']"));
+    assertTrue(keyList.contains("$['k2']"));
   }
 
   // --- JsonIndexDistinctOperator tests (useIndexBasedDistinctOperator) ---
@@ -608,79 +623,77 @@ public void testJsonIndexDistinctOperatorDisabledByDefault(boolean useMultiStage
       throws Exception {
     setUseMultiStageQueryEngine(useMultiStageQueryEngine);
 
-    String query = "SELECT DISTINCT jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME + ", '$.k1', 'STRING') FROM "
-        + getTableName() + " ORDER BY jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME + ", '$.k1', 'STRING') LIMIT 10000";
+    String expr = "jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME + ", '$.k1', 'STRING')";
+    String query = "SELECT DISTINCT " + expr + " FROM " + getTableName() + " ORDER BY " + expr + " LIMIT 10000";
     JsonNode response = postQuery(query);
-    Assert.assertEquals(response.get("exceptions").size(), 0);
+    assertEquals(response.get("exceptions").size(), 0);
     List<String> values = extractOrderedDistinctValues(response);
-    Assert.assertFalse(values.isEmpty(),
-        "Baseline (operator disabled) should return distinct values. Engine="
-            + (useMultiStageQueryEngine ? "MSE" : "SSE"));
+    assertFalse(values.isEmpty(),
+        "Baseline (operator disabled) should return distinct values. Engine=" + (useMultiStageQueryEngine ? "MSE"
+            : "SSE"));
   }
 
   /**
    * With useIndexBasedDistinctOperator, JsonIndexDistinctOperator produces same results as baseline.
-   * Compares ordered rows (not just sets) to verify ORDER BY semantics.
-   * For SSE, verifies numEntriesScannedPostFilter=0 (index path, no doc scan).
+   * Compares ordered rows (not just sets) to verify ORDER BY semantics. The numEntriesScannedPostFilter assertion
+   * pins the operator's per-value iteration: one increment per entry in the value-to-docs map, so the expected
+   * count is NUM_DISTINCT_K1 * getNumAvroFiles().
    */
   @Test(dataProvider = "useBothQueryEngines")
   public void testJsonIndexDistinctOperatorWithPinotJsonIndex(boolean useMultiStageQueryEngine)
       throws Exception {
     setUseMultiStageQueryEngine(useMultiStageQueryEngine);
 
-    String query = "SELECT DISTINCT jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME + ", '$.k1', 'STRING') FROM "
-        + getTableName() + " ORDER BY jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME + ", '$.k1', 'STRING') LIMIT 10000";
+    String expr = "jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME + ", '$.k1', 'STRING')";
+    String query = "SELECT DISTINCT " + expr + " FROM " + getTableName() + " ORDER BY " + expr + " LIMIT 10000";
 
     JsonNode baselineResponse = postQuery(query);
-    Assert.assertEquals(baselineResponse.get("exceptions").size(), 0);
+    assertTrue(baselineResponse.get("exceptions").isEmpty());
 
-    JsonNode optimizedResponse = postQueryWithOptions(query, USE_INDEX_BASED_DISTINCT_OPERATOR + "=true");
-    Assert.assertEquals(optimizedResponse.get("exceptions").size(), 0);
+    JsonNode optimizedResponse = postQueryWithOptions(query, OPT_USE_INDEX);
+    assertTrue(optimizedResponse.get("exceptions").isEmpty());
 
     List<String> baselineRows = extractOrderedDistinctValues(baselineResponse);
     List<String> optimizedRows = extractOrderedDistinctValues(optimizedResponse);
-    Assert.assertEquals(optimizedRows, baselineRows,
-        "JsonIndexDistinctOperator should produce same ordered results as baseline. "
-            + "Engine=" + (useMultiStageQueryEngine ? "MSE" : "SSE"));
+    assertEquals(optimizedRows, baselineRows,
+        "JsonIndexDistinctOperator should produce same ordered results as baseline. " + "Engine=" + (
+            useMultiStageQueryEngine ? "MSE" : "SSE"));
 
-    if (!useMultiStageQueryEngine) {
-      Assert.assertEquals(optimizedResponse.get("numEntriesScannedPostFilter").asLong(), 0L,
-          "JsonIndexDistinctOperator (SSE) uses index only (numEntriesScannedPostFilter=0).");
-    }
+    assertEquals(optimizedResponse.get("numEntriesScannedPostFilter").asInt(), NUM_DISTINCT_K1 * getNumAvroFiles());
   }
 
   /**
-   * JsonIndexDistinctOperator with filter produces same ordered results as baseline.
-   * For SSE, verifies numEntriesScannedPostFilter=0 (index path, no doc scan).
+   * JsonIndexDistinctOperator with a WHERE filter on a different path produces the same ordered results as the
+   * baseline. The operator still iterates every entry in the $.k1 value-to-docs map (the WHERE filter is applied
+   * via per-entry bitmap intersection, not by shrinking the map), so numEntriesScannedPostFilter is
+   * NUM_DISTINCT_K1 * getNumAvroFiles() — same as the no-filter case.
    */
   @Test(dataProvider = "useBothQueryEngines")
   public void testJsonIndexDistinctOperatorWithFilter(boolean useMultiStageQueryEngine)
       throws Exception {
     setUseMultiStageQueryEngine(useMultiStageQueryEngine);
 
-    String query = "SELECT DISTINCT jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME + ", '$.k1', 'STRING') FROM "
-        + getTableName() + " WHERE jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME + ", '$.k2', 'STRING') = 'value-k2-0'"
-        + " ORDER BY jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME + ", '$.k1', 'STRING') LIMIT 10000";
+    String k1Expr = "jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME + ", '$.k1', 'STRING')";
+    String k2Expr = "jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME + ", '$.k2', 'STRING')";
+    String query = "SELECT DISTINCT " + k1Expr + " FROM " + getTableName() + " WHERE " + k2Expr + " = 'value-k2-0'"
+        + " ORDER BY " + k1Expr + " LIMIT 10000";
     JsonNode baselineResponse = postQuery(query);
-    Assert.assertEquals(baselineResponse.get("exceptions").size(), 0);
+    assertTrue(baselineResponse.get("exceptions").isEmpty());
 
-    JsonNode optimizedResponse = postQueryWithOptions(query, USE_INDEX_BASED_DISTINCT_OPERATOR + "=true");
-    Assert.assertEquals(optimizedResponse.get("exceptions").size(), 0);
+    JsonNode optimizedResponse = postQueryWithOptions(query, OPT_USE_INDEX);
+    assertTrue(optimizedResponse.get("exceptions").isEmpty());
 
     List<String> baselineRows = extractOrderedDistinctValues(baselineResponse);
     List<String> optimizedRows = extractOrderedDistinctValues(optimizedResponse);
-    Assert.assertEquals(optimizedRows, baselineRows,
-        "JsonIndexDistinctOperator with filter should match baseline. Engine="
-            + (useMultiStageQueryEngine ? "MSE" : "SSE"));
+    assertEquals(optimizedRows, baselineRows,
+        "JsonIndexDistinctOperator with filter should match baseline. Engine=" + (useMultiStageQueryEngine ? "MSE"
+            : "SSE"));
 
-    if (!useMultiStageQueryEngine) {
-      Assert.assertEquals(optimizedResponse.get("numEntriesScannedPostFilter").asLong(), 0L,
-          "JsonIndexDistinctOperator with filter (SSE) uses index only (numEntriesScannedPostFilter=0).");
-    }
+    assertEquals(optimizedResponse.get("numEntriesScannedPostFilter").asInt(), NUM_DISTINCT_K1 * getNumAvroFiles());
   }
 
   /**
-   * Verifies that JsonIndexDistinctOperator correctly materializes the defaultValue for docs where the JSON path
+   * Verifies that JsonIndexDistinctOperator correctly materializes the defaultValue for docs WHERE the JSON path
    * is absent, matching baseline JsonExtractIndexTransformFunction behavior.
    */
   @Test(dataProvider = "useBothQueryEngines")
@@ -689,24 +702,25 @@ public void testJsonIndexDistinctOperatorWithDefaultValue(boolean useMultiStageQ
     setUseMultiStageQueryEngine(useMultiStageQueryEngine);
 
     // Query a non-existent path with a defaultValue — all docs should produce the default
-    String query = "SELECT DISTINCT jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME
-        + ", '$.nonexistent', 'STRING', 'N/A') FROM " + getTableName()
-        + " ORDER BY jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME + ", '$.nonexistent', 'STRING', 'N/A') LIMIT 10";
+    String expr = "jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME + ", '$.nonexistent', 'STRING', 'N/A')";
+    String query = "SELECT DISTINCT " + expr + " FROM " + getTableName() + " ORDER BY " + expr + " LIMIT 10";
 
     JsonNode baselineResponse = postQuery(query);
-    Assert.assertEquals(baselineResponse.get("exceptions").size(), 0);
+    assertTrue(baselineResponse.get("exceptions").isEmpty());
 
-    JsonNode optimizedResponse = postQueryWithOptions(query, USE_INDEX_BASED_DISTINCT_OPERATOR + "=true");
-    Assert.assertEquals(optimizedResponse.get("exceptions").size(), 0);
+    JsonNode optimizedResponse = postQueryWithOptions(query, OPT_USE_INDEX);
+    assertTrue(optimizedResponse.get("exceptions").isEmpty());
 
     List<String> baselineRows = extractOrderedDistinctValues(baselineResponse);
     List<String> optimizedRows = extractOrderedDistinctValues(optimizedResponse);
-    Assert.assertEquals(optimizedRows, baselineRows,
-        "JsonIndexDistinctOperator with defaultValue should match baseline. Engine="
-            + (useMultiStageQueryEngine ? "MSE" : "SSE"));
-    Assert.assertTrue(optimizedRows.contains("N/A"),
-        "defaultValue 'N/A' should appear in results for non-existent path. Engine="
-            + (useMultiStageQueryEngine ? "MSE" : "SSE"));
+    assertEquals(optimizedRows, baselineRows,
+        "JsonIndexDistinctOperator with defaultValue should match baseline. Engine=" + (useMultiStageQueryEngine ? "MSE"
+            : "SSE"));
+    assertTrue(optimizedRows.contains("N/A"),
+        "defaultValue 'N/A' should appear in results for non-existent path. Engine=" + (useMultiStageQueryEngine ? "MSE"
+            : "SSE"));
+
+    assertEquals(optimizedResponse.get("numEntriesScannedPostFilter").asInt(), 0);
   }
 
   /**
@@ -720,262 +734,259 @@ public void testJsonIndexDistinctOperatorMissingPathNoDefault(boolean useMultiSt
     setUseMultiStageQueryEngine(useMultiStageQueryEngine);
 
     // Query a non-existent path WITHOUT defaultValue — should produce an error
-    String query = "SELECT DISTINCT jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME
-        + ", '$.nonexistent', 'STRING') FROM " + getTableName() + " LIMIT 10";
+    String query = "SELECT DISTINCT jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME + ", '$.nonexistent', 'STRING') FROM "
+        + getTableName() + " LIMIT 10";
 
     // Baseline also throws for missing path without defaultValue
     JsonNode baselineResponse = postQuery(query);
-    Assert.assertTrue(baselineResponse.get("exceptions").size() > 0,
+    assertFalse(baselineResponse.get("exceptions").isEmpty(),
         "Baseline should throw for missing JSON path without defaultValue");
 
-    JsonNode optimizedResponse = postQueryWithOptions(query, USE_INDEX_BASED_DISTINCT_OPERATOR + "=true");
-    Assert.assertTrue(optimizedResponse.get("exceptions").size() > 0,
+    JsonNode optimizedResponse = postQueryWithOptions(query, OPT_USE_INDEX);
+    assertFalse(optimizedResponse.get("exceptions").isEmpty(),
         "JsonIndexDistinctOperator should throw for missing JSON path without defaultValue");
   }
 
-  // --- Same-path JSON_MATCH predicate tests (trigger getMatchingDistinctValues fast path) ---
+  // --- 5-arg jsonExtractIndex(column, path, type, default, filterJsonExpression) tests ---
+  //
+  // The 5-arg form pushes the JSON_MATCH-style filter into the JSON-index lookup itself. Each filter that doesn't
+  // match every doc causes `handleMissingDocs` (or the transform's per-row default branch) to add the literal
+  // default to the distinct set, so the expected result is `{matching values} ∪ {default}`.
 
-  /**
-   * Same-path REGEXP_LIKE: fully pushed down, single dict scan, no posting list reads.
-   */
+  /// REGEXP_LIKE pushed down via the 5-arg filterJsonExpression.
   @Test(dataProvider = "useBothQueryEngines")
   public void testJsonIndexDistinctSamePathRegexpLike(boolean useMultiStageQueryEngine)
       throws Exception {
     setUseMultiStageQueryEngine(useMultiStageQueryEngine);
 
-    // REGEXP_LIKE on $.k1 matching a subset of values
-    String query = "SELECT DISTINCT jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME + ", '$.k1', 'STRING') FROM "
-        + getTableName() + " WHERE JSON_MATCH(" + MY_MAP_STR_FIELD_NAME
-        + ", 'REGEXP_LIKE(\"$.k1\", ''value-k1-[0-9]'')')"
-        + " ORDER BY jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME + ", '$.k1', 'STRING') LIMIT 10000";
+    String expr = "jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME
+        + ", '$.k1', 'STRING', 'missing', 'REGEXP_LIKE(\"$.k1\", ''value-k1-[0-9]'')')";
+    String query = "SELECT DISTINCT " + expr + " FROM " + getTableName() + " ORDER BY " + expr + " LIMIT 10000";
 
     JsonNode baselineResponse = postQuery(query);
-    Assert.assertEquals(baselineResponse.get("exceptions").size(), 0);
+    assertTrue(baselineResponse.get("exceptions").isEmpty());
 
-    JsonNode optimizedResponse = postQueryWithOptions(query, USE_INDEX_BASED_DISTINCT_OPERATOR + "=true");
-    Assert.assertEquals(optimizedResponse.get("exceptions").size(), 0);
+    JsonNode optimizedResponse = postQueryWithOptions(query, OPT_USE_INDEX);
+    assertTrue(optimizedResponse.get("exceptions").isEmpty());
 
     List<String> baselineRows = extractOrderedDistinctValues(baselineResponse);
     List<String> optimizedRows = extractOrderedDistinctValues(optimizedResponse);
-    Assert.assertFalse(baselineRows.isEmpty(), "REGEXP_LIKE should match single-digit k1 values");
-    Assert.assertEquals(optimizedRows, baselineRows,
-        "Same-path REGEXP_LIKE fast path should match baseline");
+    assertEquals(optimizedRows, baselineRows, "5-arg REGEXP_LIKE should match baseline");
+    // Single-digit suffix matches value-k1-0..value-k1-9 (10 values); non-matching docs add 'missing'.
+    assertEquals(optimizedRows.size(), 11);
+    assertTrue(optimizedRows.contains("missing"));
 
-    if (!useMultiStageQueryEngine) {
-      Assert.assertEquals(optimizedResponse.get("numEntriesScannedPostFilter").asLong(), 0L);
-    }
+    assertEquals(optimizedResponse.get("numEntriesScannedPostFilter").asInt(), 10 * getNumAvroFiles());
   }
 
-  /**
-   * Same-path EQ: fully pushed down.
-   */
+  /// EQ pushed down via the 5-arg filterJsonExpression.
   @Test(dataProvider = "useBothQueryEngines")
   public void testJsonIndexDistinctSamePathEq(boolean useMultiStageQueryEngine)
       throws Exception {
     setUseMultiStageQueryEngine(useMultiStageQueryEngine);
 
-    String query = "SELECT DISTINCT jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME + ", '$.k1', 'STRING') FROM "
-        + getTableName() + " WHERE JSON_MATCH(" + MY_MAP_STR_FIELD_NAME
-        + ", '\"$.k1\" = ''value-k1-0''')"
-        + " ORDER BY jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME + ", '$.k1', 'STRING') LIMIT 10000";
+    String expr = "jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME
+        + ", '$.k1', 'STRING', 'missing', '\"$.k1\" = ''value-k1-0''')";
+    String query = "SELECT DISTINCT " + expr + " FROM " + getTableName() + " ORDER BY " + expr + " LIMIT 10000";
 
     JsonNode baselineResponse = postQuery(query);
-    Assert.assertEquals(baselineResponse.get("exceptions").size(), 0);
+    assertTrue(baselineResponse.get("exceptions").isEmpty());
 
-    JsonNode optimizedResponse = postQueryWithOptions(query, USE_INDEX_BASED_DISTINCT_OPERATOR + "=true");
-    Assert.assertEquals(optimizedResponse.get("exceptions").size(), 0);
+    JsonNode optimizedResponse = postQueryWithOptions(query, OPT_USE_INDEX);
+    assertTrue(optimizedResponse.get("exceptions").isEmpty());
 
     List<String> baselineRows = extractOrderedDistinctValues(baselineResponse);
     List<String> optimizedRows = extractOrderedDistinctValues(optimizedResponse);
-    Assert.assertEquals(optimizedRows, baselineRows,
-        "Same-path EQ fast path should match baseline");
-    Assert.assertTrue(optimizedRows.contains("value-k1-0"));
+    assertEquals(optimizedRows, baselineRows, "5-arg EQ should match baseline");
+    assertEquals(optimizedRows, List.of("missing", "value-k1-0"));
 
-    if (!useMultiStageQueryEngine) {
-      Assert.assertEquals(optimizedResponse.get("numEntriesScannedPostFilter").asLong(), 0L);
-    }
+    assertEquals(optimizedResponse.get("numEntriesScannedPostFilter").asInt(), getNumAvroFiles());
   }
 
-  /**
-   * Same-path NOT_EQ: fully pushed down.
-   */
+  /// NOT_EQ pushed down via the 5-arg filterJsonExpression.
   @Test(dataProvider = "useBothQueryEngines")
   public void testJsonIndexDistinctSamePathNotEq(boolean useMultiStageQueryEngine)
       throws Exception {
     setUseMultiStageQueryEngine(useMultiStageQueryEngine);
 
-    String query = "SELECT DISTINCT jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME + ", '$.k1', 'STRING') FROM "
-        + getTableName() + " WHERE JSON_MATCH(" + MY_MAP_STR_FIELD_NAME
-        + ", '\"$.k1\" != ''value-k1-0''')"
-        + " ORDER BY jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME + ", '$.k1', 'STRING') LIMIT 10000";
+    String expr = "jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME
+        + ", '$.k1', 'STRING', 'missing', '\"$.k1\" != ''value-k1-0''')";
+    String query = "SELECT DISTINCT " + expr + " FROM " + getTableName() + " ORDER BY " + expr + " LIMIT 10000";
 
     JsonNode baselineResponse = postQuery(query);
-    Assert.assertEquals(baselineResponse.get("exceptions").size(), 0);
+    assertTrue(baselineResponse.get("exceptions").isEmpty());
 
-    JsonNode optimizedResponse = postQueryWithOptions(query, USE_INDEX_BASED_DISTINCT_OPERATOR + "=true");
-    Assert.assertEquals(optimizedResponse.get("exceptions").size(), 0);
+    JsonNode optimizedResponse = postQueryWithOptions(query, OPT_USE_INDEX);
+    assertTrue(optimizedResponse.get("exceptions").isEmpty());
 
     List<String> baselineRows = extractOrderedDistinctValues(baselineResponse);
     List<String> optimizedRows = extractOrderedDistinctValues(optimizedResponse);
-    Assert.assertEquals(optimizedRows, baselineRows,
-        "Same-path NOT_EQ fast path should match baseline");
-    Assert.assertFalse(optimizedRows.contains("value-k1-0"));
-
-    if (!useMultiStageQueryEngine) {
-      Assert.assertEquals(optimizedResponse.get("numEntriesScannedPostFilter").asLong(), 0L);
-    }
+    assertEquals(optimizedRows, baselineRows, "5-arg NOT_EQ should match baseline");
+    // 99 matching k1 values (everything except value-k1-0) + 'missing' for the excluded docs.
+    assertEquals(optimizedRows.size(), NUM_DISTINCT_K1);
+    assertFalse(optimizedRows.contains("value-k1-0"));
+    assertTrue(optimizedRows.contains("missing"));
+
+    assertEquals(optimizedResponse.get("numEntriesScannedPostFilter").asInt(),
+        (NUM_DISTINCT_K1 - 1) * getNumAvroFiles());
   }
 
-  /**
-   * Same-path IN: fully pushed down.
-   */
+  /// IN pushed down via the 5-arg filterJsonExpression.
   @Test(dataProvider = "useBothQueryEngines")
   public void testJsonIndexDistinctSamePathIn(boolean useMultiStageQueryEngine)
       throws Exception {
     setUseMultiStageQueryEngine(useMultiStageQueryEngine);
 
-    String query = "SELECT DISTINCT jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME + ", '$.k1', 'STRING') FROM "
-        + getTableName() + " WHERE JSON_MATCH(" + MY_MAP_STR_FIELD_NAME
-        + ", '\"$.k1\" IN (''value-k1-0'', ''value-k1-1'', ''value-k1-2'')')"
-        + " ORDER BY jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME + ", '$.k1', 'STRING') LIMIT 10000";
+    String expr = "jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME
+        + ", '$.k1', 'STRING', 'missing', '\"$.k1\" IN (''value-k1-0'', ''value-k1-1'', ''value-k1-2'')')";
+    String query = "SELECT DISTINCT " + expr + " FROM " + getTableName() + " ORDER BY " + expr + " LIMIT 10000";
 
     JsonNode baselineResponse = postQuery(query);
-    Assert.assertEquals(baselineResponse.get("exceptions").size(), 0);
+    assertTrue(baselineResponse.get("exceptions").isEmpty());
 
-    JsonNode optimizedResponse = postQueryWithOptions(query, USE_INDEX_BASED_DISTINCT_OPERATOR + "=true");
-    Assert.assertEquals(optimizedResponse.get("exceptions").size(), 0);
+    JsonNode optimizedResponse = postQueryWithOptions(query, OPT_USE_INDEX);
+    assertTrue(optimizedResponse.get("exceptions").isEmpty());
 
     List<String> baselineRows = extractOrderedDistinctValues(baselineResponse);
     List<String> optimizedRows = extractOrderedDistinctValues(optimizedResponse);
-    Assert.assertEquals(optimizedRows, baselineRows,
-        "Same-path IN fast path should match baseline");
-    Assert.assertEquals(optimizedRows.size(), 3);
+    assertEquals(optimizedRows, baselineRows, "5-arg IN should match baseline");
+    assertEquals(optimizedRows, List.of("missing", "value-k1-0", "value-k1-1", "value-k1-2"));
 
-    if (!useMultiStageQueryEngine) {
-      Assert.assertEquals(optimizedResponse.get("numEntriesScannedPostFilter").asLong(), 0L);
-    }
+    assertEquals(optimizedResponse.get("numEntriesScannedPostFilter").asInt(), 3 * getNumAvroFiles());
   }
 
-  /**
-   * Same-path NOT_IN: fully pushed down.
-   */
+  /// NOT_IN pushed down via the 5-arg filterJsonExpression.
   @Test(dataProvider = "useBothQueryEngines")
   public void testJsonIndexDistinctSamePathNotIn(boolean useMultiStageQueryEngine)
       throws Exception {
     setUseMultiStageQueryEngine(useMultiStageQueryEngine);
 
-    String query = "SELECT DISTINCT jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME + ", '$.k1', 'STRING') FROM "
-        + getTableName() + " WHERE JSON_MATCH(" + MY_MAP_STR_FIELD_NAME
-        + ", '\"$.k1\" NOT IN (''value-k1-0'', ''value-k1-1'')')"
-        + " ORDER BY jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME + ", '$.k1', 'STRING') LIMIT 10000";
+    String expr = "jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME
+        + ", '$.k1', 'STRING', 'missing', '\"$.k1\" NOT IN (''value-k1-0'', ''value-k1-1'')')";
+    String query = "SELECT DISTINCT " + expr + " FROM " + getTableName() + " ORDER BY " + expr + " LIMIT 10000";
 
     JsonNode baselineResponse = postQuery(query);
-    Assert.assertEquals(baselineResponse.get("exceptions").size(), 0);
+    assertTrue(baselineResponse.get("exceptions").isEmpty());
 
-    JsonNode optimizedResponse = postQueryWithOptions(query, USE_INDEX_BASED_DISTINCT_OPERATOR + "=true");
-    Assert.assertEquals(optimizedResponse.get("exceptions").size(), 0);
+    JsonNode optimizedResponse = postQueryWithOptions(query, OPT_USE_INDEX);
+    assertTrue(optimizedResponse.get("exceptions").isEmpty());
 
     List<String> baselineRows = extractOrderedDistinctValues(baselineResponse);
     List<String> optimizedRows = extractOrderedDistinctValues(optimizedResponse);
-    Assert.assertEquals(optimizedRows, baselineRows,
-        "Same-path NOT_IN fast path should match baseline");
-    Assert.assertFalse(optimizedRows.contains("value-k1-0"));
-    Assert.assertFalse(optimizedRows.contains("value-k1-1"));
-
-    if (!useMultiStageQueryEngine) {
-      Assert.assertEquals(optimizedResponse.get("numEntriesScannedPostFilter").asLong(), 0L);
-    }
+    assertEquals(optimizedRows, baselineRows, "5-arg NOT_IN should match baseline");
+    // 98 matching k1 values + 'missing' for the excluded docs.
+    assertEquals(optimizedRows.size(), NUM_DISTINCT_K1 - 1);
+    assertFalse(optimizedRows.contains("value-k1-0"));
+    assertFalse(optimizedRows.contains("value-k1-1"));
+    assertTrue(optimizedRows.contains("missing"));
+
+    assertEquals(optimizedResponse.get("numEntriesScannedPostFilter").asInt(),
+        (NUM_DISTINCT_K1 - 2) * getNumAvroFiles());
   }
 
-  /**
-   * Same-path IS NOT NULL: fully pushed down.
-   */
+  /// IS NOT NULL pushed down via the 5-arg filterJsonExpression. The filter matches every doc (every row has
+  /// `$.k1`), so the literal default is never added and the result is exactly the distinct `$.k1` set.
   @Test(dataProvider = "useBothQueryEngines")
   public void testJsonIndexDistinctSamePathIsNotNull(boolean useMultiStageQueryEngine)
       throws Exception {
     setUseMultiStageQueryEngine(useMultiStageQueryEngine);
 
-    String query = "SELECT DISTINCT jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME + ", '$.k1', 'STRING') FROM "
-        + getTableName() + " WHERE JSON_MATCH(" + MY_MAP_STR_FIELD_NAME + ", '\"$.k1\" IS NOT NULL')"
-        + " ORDER BY jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME + ", '$.k1', 'STRING') LIMIT 10000";
+    String expr = "jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME
+        + ", '$.k1', 'STRING', 'missing', '\"$.k1\" IS NOT NULL')";
+    String query = "SELECT DISTINCT " + expr + " FROM " + getTableName() + " ORDER BY " + expr + " LIMIT 10000";
 
     JsonNode baselineResponse = postQuery(query);
-    Assert.assertEquals(baselineResponse.get("exceptions").size(), 0);
+    assertTrue(baselineResponse.get("exceptions").isEmpty());
 
-    JsonNode optimizedResponse = postQueryWithOptions(query, USE_INDEX_BASED_DISTINCT_OPERATOR + "=true");
-    Assert.assertEquals(optimizedResponse.get("exceptions").size(), 0);
+    JsonNode optimizedResponse = postQueryWithOptions(query, OPT_USE_INDEX);
+    assertTrue(optimizedResponse.get("exceptions").isEmpty());
 
     List<String> baselineRows = extractOrderedDistinctValues(baselineResponse);
     List<String> optimizedRows = extractOrderedDistinctValues(optimizedResponse);
-    Assert.assertEquals(optimizedRows, baselineRows,
-        "Same-path IS NOT NULL fast path should match baseline");
-    Assert.assertEquals(optimizedRows.size(), NUM_DOCS_PER_SEGMENT,
-        "IS NOT NULL should return all values since every doc has $.k1");
+    assertEquals(optimizedRows, baselineRows, "5-arg IS NOT NULL should match baseline");
+    assertEquals(optimizedRows.size(), NUM_DISTINCT_K1);
+    assertFalse(optimizedRows.contains("missing"),
+        "Filter matches every doc, so the default literal should never be added");
 
-    if (!useMultiStageQueryEngine) {
-      Assert.assertEquals(optimizedResponse.get("numEntriesScannedPostFilter").asLong(), 0L);
-    }
+    assertEquals(optimizedResponse.get("numEntriesScannedPostFilter").asInt(), NUM_DISTINCT_K1 * getNumAvroFiles());
   }
 
-  /**
-   * Same-path REGEXP_LIKE with 4-arg form (defaultValue): fully pushed down fast path still works with defaults.
-   */
+  /// 5-arg filterJsonExpression with LIMIT (no ORDER BY): only the LIMIT row-count is enforced.
   @Test(dataProvider = "useBothQueryEngines")
-  public void testJsonIndexDistinctSamePathRegexpLikeWithDefault(boolean useMultiStageQueryEngine)
+  public void testJsonIndexDistinctSamePathWithLimit(boolean useMultiStageQueryEngine)
       throws Exception {
     setUseMultiStageQueryEngine(useMultiStageQueryEngine);
 
-    String query = "SELECT DISTINCT jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME
-        + ", '$.k1', 'STRING', 'fallback') FROM "
-        + getTableName() + " WHERE JSON_MATCH(" + MY_MAP_STR_FIELD_NAME
-        + ", 'REGEXP_LIKE(\"$.k1\", ''value-k1-[0-9]'')')"
-        + " ORDER BY jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME + ", '$.k1', 'STRING', 'fallback') LIMIT 10000";
+    String expr = "jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME
+        + ", '$.k1', 'STRING', 'missing', '\"$.k1\" IS NOT NULL')";
+    String query = "SELECT DISTINCT " + expr + " FROM " + getTableName() + " LIMIT 5";
 
     JsonNode baselineResponse = postQuery(query);
-    Assert.assertEquals(baselineResponse.get("exceptions").size(), 0);
+    assertTrue(baselineResponse.get("exceptions").isEmpty());
 
-    JsonNode optimizedResponse = postQueryWithOptions(query, USE_INDEX_BASED_DISTINCT_OPERATOR + "=true");
-    Assert.assertEquals(optimizedResponse.get("exceptions").size(), 0);
+    JsonNode optimizedResponse = postQueryWithOptions(query, OPT_USE_INDEX);
+    assertTrue(optimizedResponse.get("exceptions").isEmpty());
 
-    List<String> baselineRows = extractOrderedDistinctValues(baselineResponse);
-    List<String> optimizedRows = extractOrderedDistinctValues(optimizedResponse);
-    Assert.assertEquals(optimizedRows, baselineRows,
-        "Same-path REGEXP_LIKE 4-arg fast path should match baseline");
-    // The default should NOT appear since the filter only matches docs that HAVE $.k1
-    Assert.assertFalse(optimizedRows.contains("fallback"),
-        "Same-path filter ensures all matching docs have the path, so no default should appear");
+    assertEquals(extractOrderedDistinctValues(baselineResponse).size(), 5);
+    assertEquals(extractOrderedDistinctValues(optimizedResponse).size(), 5);
 
+    // TODO: Fix LIMIT push down for MSE
     if (!useMultiStageQueryEngine) {
-      Assert.assertEquals(optimizedResponse.get("numEntriesScannedPostFilter").asLong(), 0L);
+      assertEquals(optimizedResponse.get("numEntriesScannedPostFilter").asInt(), 5 * getNumAvroFiles());
     }
   }
 
-  /**
-   * Same-path REGEXP_LIKE without ORDER BY: verify LIMIT is respected with fast path.
-   */
+  /// Cross-path 5-arg form: filter on `$.k2`, extract `$.k1`. `getMatchingFlattenedDocsMap` applies the filter
+  /// independently of the extracted path, so the returned value-to-docs map holds the `$.k1` values for only the
+  /// docs satisfying `$.k2 = 'value-k2-0'`. `$.k2` is unique across the segment, so exactly one doc matches per
+  /// segment, and that doc's `$.k1` is `value-k1-0`. Every other doc falls through to the literal default.
   @Test(dataProvider = "useBothQueryEngines")
-  public void testJsonIndexDistinctSamePathWithLimit(boolean useMultiStageQueryEngine)
+  public void testJsonIndexDistinctCrossPathFilter(boolean useMultiStageQueryEngine)
       throws Exception {
     setUseMultiStageQueryEngine(useMultiStageQueryEngine);
 
-    String query = "SELECT DISTINCT jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME + ", '$.k1', 'STRING') FROM "
-        + getTableName() + " WHERE JSON_MATCH(" + MY_MAP_STR_FIELD_NAME
-        + ", '\"$.k1\" IS NOT NULL') LIMIT 5";
+    String expr = "jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME
+        + ", '$.k1', 'STRING', 'missing', '\"$.k2\" = ''value-k2-0''')";
+    String query = "SELECT DISTINCT " + expr + " FROM " + getTableName() + " ORDER BY " + expr + " LIMIT 10000";
 
     JsonNode baselineResponse = postQuery(query);
-    Assert.assertEquals(baselineResponse.get("exceptions").size(), 0);
+    assertTrue(baselineResponse.get("exceptions").isEmpty());
 
-    JsonNode optimizedResponse = postQueryWithOptions(query, USE_INDEX_BASED_DISTINCT_OPERATOR + "=true");
-    Assert.assertEquals(optimizedResponse.get("exceptions").size(), 0);
+    JsonNode optimizedResponse = postQueryWithOptions(query, OPT_USE_INDEX);
+    assertTrue(optimizedResponse.get("exceptions").isEmpty());
 
     List<String> baselineRows = extractOrderedDistinctValues(baselineResponse);
     List<String> optimizedRows = extractOrderedDistinctValues(optimizedResponse);
-    Assert.assertEquals(optimizedRows.size(), 5, "LIMIT 5 should be respected by fast path");
-    Assert.assertEquals(baselineRows.size(), 5);
+    assertEquals(optimizedRows, baselineRows, "Cross-path 5-arg filter should match baseline");
+    assertEquals(optimizedRows, List.of("missing", "value-k1-0"));
 
-    if (!useMultiStageQueryEngine) {
-      Assert.assertEquals(optimizedResponse.get("numEntriesScannedPostFilter").asLong(), 0L);
-    }
+    assertEquals(optimizedResponse.get("numEntriesScannedPostFilter").asInt(), getNumAvroFiles());
+  }
+
+  /// The new `jsonIndexDistinctSkipMissingPath` query option suppresses `handleMissingDocs`, so the literal default
+  /// never appears in the result — even when the 5-arg filter excludes most docs. Same EQ-filter query as
+  /// `testJsonIndexDistinctSamePathEq`: without the option the operator returns `[missing, value-k1-0]`; with the
+  /// option it collapses to `[value-k1-0]`.
+  @Test(dataProvider = "useBothQueryEngines")
+  public void testJsonIndexDistinctSkipMissingPath(boolean useMultiStageQueryEngine)
+      throws Exception {
+    setUseMultiStageQueryEngine(useMultiStageQueryEngine);
+
+    String expr = "jsonExtractIndex(" + MY_MAP_STR_FIELD_NAME
+        + ", '$.k1', 'STRING', 'missing', '\"$.k1\" = ''value-k1-0''')";
+    String query = "SELECT DISTINCT " + expr + " FROM " + getTableName() + " ORDER BY " + expr + " LIMIT 10000";
+
+    // Operator without skip: same shape as testJsonIndexDistinctSamePathEq — default appears.
+    JsonNode withoutSkip = postQueryWithOptions(query, OPT_USE_INDEX);
+    assertTrue(withoutSkip.get("exceptions").isEmpty());
+    assertEquals(extractOrderedDistinctValues(withoutSkip), List.of("missing", "value-k1-0"));
+
+    // Operator with skip: default is never added.
+    JsonNode withSkip = postQueryWithOptions(query, OPT_USE_INDEX_SKIP_MISSING_PATH);
+    assertTrue(withSkip.get("exceptions").isEmpty());
+    assertEquals(extractOrderedDistinctValues(withSkip), List.of("value-k1-0"));
+
+    assertEquals(withSkip.get("numEntriesScannedPostFilter").asInt(), getNumAvroFiles());
   }
 
   private static List<String> extractOrderedDistinctValues(JsonNode response) {
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java
index 840dd97db515..a61a4ba93b3c 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java
@@ -708,6 +708,11 @@ public static class QueryOptionKey {
          * 30 for dictCard <= 1K, 10 for dictCard <= 10K, 6 for dictCard > 10K.
          */
         public static final String INVERTED_INDEX_DISTINCT_COST_RATIO = "invertedIndexDistinctCostRatio";
+        /// When true, `JsonIndexDistinctOperator` skips missing-path handling — it does not add a 4-arg default
+        /// value, does not add null (even when `nullHandling` is enabled), and does not throw `Illegal Json Path`.
+        /// The result is purely the distinct values produced by the JSON-index lookup (filtered by the optional
+        /// 5-arg `jsonFilterExpression` and intersected with the `WHERE`-clause filter).
+        public static final String JSON_INDEX_DISTINCT_SKIP_MISSING_PATH = "jsonIndexDistinctSkipMissingPath";
         public static final String SCAN_STAR_TREE_NODES = "scanStarTreeNodes";
         public static final String ROUTING_OPTIONS = "routingOptions";
         public static final String TABLE_SAMPLER = "sampler";