diff --git a/docs/changelog/138524.yaml b/docs/changelog/138524.yaml new file mode 100644 index 0000000000000..91522d3a0ba62 --- /dev/null +++ b/docs/changelog/138524.yaml @@ -0,0 +1,16 @@ +pr: 138524 +summary: Remove feature flag to enable binary doc value compression +area: Mapping +type: feature +issues: [] +highlight: + title: Remove feature flag to enable binary doc value compression + body: |- + Add compression for binary doc values using Zstd and blocks with a variable number of values. + + Block-wise LZ4 compression was previously added to Lucene in LUCENE-9211 and removed in LUCENE-9378 due to query performance issues. This approach stored a constant number of values per block (specifically 32 values). This made it easy to map a given value index (e.g., docId) to the block containing it by doing blockId = docId / 32. + Unfortunately, if values are very large, we must still have exactly 32 values per block, and (de)compressing a block could cause very high memory usage. As a result, we had to keep the number of values small, meaning that in the average case, a block was much smaller than ideal. + To overcome the issues of blocks with a constant number of values, this PR adds block-wise compression with a variable number of values per block. It stores a minimum of 1 document per block and stops adding values when the size of a block exceeds a threshold or the number of values exceeds a threshold. + Like the previous version, it stores an array of addresses for the start of each block. Additionally, it stores a parallel array with the docId at the start of each block. When looking up a given docId, if it is not in the current block, we binary search the array of docId starts to find the blockId containing the value. We then look up the address of the block. After this, decompression works very similarly to the code from LUCENE-9211; the main difference being that Zstd(1) is used instead of LZ4. + + notable: true diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java index dbfbed346bc07..b479ff2a475d8 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java @@ -13,7 +13,6 @@ import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; -import org.elasticsearch.common.util.FeatureFlag; import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode; @@ -37,8 +36,6 @@ */ public class ES819TSDBDocValuesFormat extends org.apache.lucene.codecs.DocValuesFormat { - public static final boolean BINARY_DV_COMPRESSION_FEATURE_FLAG = new FeatureFlag("binary_dv_compression").isEnabled(); - static final int NUMERIC_BLOCK_SHIFT = 7; public static final int NUMERIC_BLOCK_SIZE = 1 << NUMERIC_BLOCK_SHIFT; static final int NUMERIC_BLOCK_MASK = NUMERIC_BLOCK_SIZE - 1; @@ -145,7 +142,7 @@ public ES819TSDBDocValuesFormat() { DEFAULT_SKIP_INDEX_INTERVAL_SIZE, ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, OPTIMIZED_MERGE_ENABLE_DEFAULT, - BINARY_DV_COMPRESSION_FEATURE_FLAG ? BinaryDVCompressionMode.COMPRESSED_ZSTD_LEVEL_1 : BinaryDVCompressionMode.NO_COMPRESS, + BinaryDVCompressionMode.COMPRESSED_ZSTD_LEVEL_1, true ); } diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java index 7f65cb9af3ad5..3bd8d69dcc91d 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java @@ -121,13 +121,9 @@ protected Codec getCodec() { return codec; } - public void testBinaryCompressionFeatureFlag() { + public void testBinaryCompressionEnabled() { ES819TSDBDocValuesFormat docValueFormat = new ES819TSDBDocValuesFormat(); - if (ES819TSDBDocValuesFormat.BINARY_DV_COMPRESSION_FEATURE_FLAG) { - assertThat(docValueFormat.binaryDVCompressionMode, equalTo(BinaryDVCompressionMode.COMPRESSED_ZSTD_LEVEL_1)); - } else { - assertThat(docValueFormat.binaryDVCompressionMode, equalTo(BinaryDVCompressionMode.NO_COMPRESS)); - } + assertThat(docValueFormat.binaryDVCompressionMode, equalTo(BinaryDVCompressionMode.COMPRESSED_ZSTD_LEVEL_1)); } public void testBlockWiseBinary() throws Exception { diff --git a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java index a78024d7fca9c..6f0e04a215a35 100644 --- a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java +++ b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java @@ -27,8 +27,7 @@ public enum FeatureFlag { ), RANDOM_SAMPLING("es.random_sampling_feature_flag_enabled=true", Version.fromString("9.2.0"), null), INFERENCE_API_CCM("es.inference_api_ccm_feature_flag_enabled=true", Version.fromString("9.3.0"), null), - GENERIC_VECTOR_FORMAT("es.generic_vector_format_feature_flag_enabled=true", Version.fromString("9.3.0"), null), - BINARY_DOC_VALUE_COMPRESSION("es.binary_dv_compression_feature_flag_enabled=true", Version.fromString("9.3.0"), null); + GENERIC_VECTOR_FORMAT("es.generic_vector_format_feature_flag_enabled=true", Version.fromString("9.3.0"), null); public final String systemProperty; public final Version from; diff --git a/x-pack/plugin/logsdb/src/yamlRestTest/java/org/elasticsearch/xpack/logsdb/LogsdbTestSuiteIT.java b/x-pack/plugin/logsdb/src/yamlRestTest/java/org/elasticsearch/xpack/logsdb/LogsdbTestSuiteIT.java index bfa059d428bc0..5a1ccb4e08140 100644 --- a/x-pack/plugin/logsdb/src/yamlRestTest/java/org/elasticsearch/xpack/logsdb/LogsdbTestSuiteIT.java +++ b/x-pack/plugin/logsdb/src/yamlRestTest/java/org/elasticsearch/xpack/logsdb/LogsdbTestSuiteIT.java @@ -37,7 +37,6 @@ public class LogsdbTestSuiteIT extends ESClientYamlSuiteTestCase { .setting("xpack.security.autoconfiguration.enabled", "false") .setting("xpack.license.self_generated.type", "trial") .feature(FeatureFlag.DOC_VALUES_SKIPPER) - .feature(FeatureFlag.BINARY_DOC_VALUE_COMPRESSION) .build(); public LogsdbTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) {