From 5e3793840a3bf4f2efc5a9dd58a95a5b13c9768e Mon Sep 17 00:00:00 2001 From: "Xiaotian (Jackie) Jiang" Date: Wed, 15 Apr 2026 19:23:45 -0700 Subject: [PATCH] Accept any negative cardinality as unknown in BloomIndexType Update BloomIndexType to use the relaxed check (`cardinality < 0`) instead of comparing against `Constants.UNKNOWN_CARDINALITY` exactly, so bloom index creation is robust regardless of the sentinel value stored in segment metadata. Also add a TODO to consider changing `UNKNOWN_CARDINALITY` from `Integer.MIN_VALUE` to `-1` after the 1.6.0 release for better readability in stored metadata. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../segment/local/segment/index/bloom/BloomIndexType.java | 3 +-- .../src/main/java/org/apache/pinot/segment/spi/Constants.java | 3 +++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/bloom/BloomIndexType.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/bloom/BloomIndexType.java index 945430d327d7..ee1b6bc6a6a0 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/bloom/BloomIndexType.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/bloom/BloomIndexType.java @@ -28,7 +28,6 @@ import org.apache.pinot.segment.local.segment.index.loader.bloomfilter.BloomFilterHandler; import org.apache.pinot.segment.local.segment.index.readers.bloom.BloomFilterReaderFactory; import org.apache.pinot.segment.spi.ColumnMetadata; -import org.apache.pinot.segment.spi.Constants; import org.apache.pinot.segment.spi.V1Constants; import org.apache.pinot.segment.spi.creator.IndexCreationContext; import org.apache.pinot.segment.spi.index.AbstractIndexType; @@ -99,7 +98,7 @@ protected ColumnConfigDeserializer createDeserializerForLegac @Override public BloomFilterCreator createIndexCreator(IndexCreationContext context, BloomFilterConfig indexConfig) { int cardinality = context.getCardinality(); - if (cardinality == Constants.UNKNOWN_CARDINALITY) { + if (cardinality < 0) { // This is when we're creating bloom filters for non dictionary encoded cols where exact cardinality is not // known beforehand. // Since this field is only used for the estimate cardinality, using total # of entries instead diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/Constants.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/Constants.java index 911bde9a421e..b128f1923784 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/Constants.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/Constants.java @@ -23,6 +23,9 @@ private Constants() { } public static final int EOF = Integer.MIN_VALUE; + + // TODO: Consider modifying it to -1 to be more readable when stored in segment metadata. Reader accepts all negative + // values as unknown in release 1.6.0. Change writer side after 1.6.0 release. public static final int UNKNOWN_CARDINALITY = Integer.MIN_VALUE; public static final String HLL_LOG2M_KEY = "log2m";