From 0681c532404e3e594c781408968ab6903de06ef3 Mon Sep 17 00:00:00 2001 From: Kostas Krikellas Date: Mon, 17 Nov 2025 17:17:19 +0200 Subject: [PATCH 01/11] Add accessors for block size constants in tsdb codec --- .../es819/ES819TSDBDocValuesConsumer.java | 13 +++-- .../tsdb/es819/ES819TSDBDocValuesFormat.java | 14 +++-- .../es819/ES819TSDBDocValuesProducer.java | 53 +++++++++++-------- .../ES819TSDBDocValuesConsumerVersion0.java | 12 +++-- 4 files changed, 56 insertions(+), 36 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java index 968e50eaf32be..d810925fb3146 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java @@ -199,16 +199,19 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer, startDocs.add(maxDoc); startDocs.finish(); } else { + final int numericBlockShift = ES819TSDBDocValuesFormat.getNumericBlockShift(); + final int numericBlockSize = ES819TSDBDocValuesFormat.getNumericBlockSize(); + indexWriter = DirectMonotonicWriter.getInstance( meta, new ByteBuffersIndexOutput(indexOut, "temp-dv-index", "temp-dv-index"), - 1L + ((numValues - 1) >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT), + 1L + ((numValues - 1) >>> numericBlockShift), ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT ); meta.writeInt(DIRECT_MONOTONIC_BLOCK_SHIFT); - final long[] buffer = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE]; + final long[] buffer = new long[numericBlockSize]; int bufferSize = 0; - final TSDBDocValuesEncoder encoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE); + final TSDBDocValuesEncoder encoder = new TSDBDocValuesEncoder(numericBlockSize); values = valuesProducer.getSortedNumeric(field); final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1; if (valuesProducer.mergeStats.supported() && numDocsWithValue < maxDoc) { @@ -224,7 +227,7 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer, } for (int i = 0; i < count; ++i) { buffer[bufferSize++] = values.nextValue(); - if (bufferSize == ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE) { + if (bufferSize == numericBlockSize) { indexWriter.add(data.getFilePointer() - valuesDataOffset); if (maxOrd >= 0) { encoder.encodeOrdinals(buffer, data, bitsPerOrd); @@ -238,7 +241,7 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer, if (bufferSize > 0) { indexWriter.add(data.getFilePointer() - valuesDataOffset); // Fill unused slots in the block with zeroes rather than junk - Arrays.fill(buffer, bufferSize, ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE, 0L); + Arrays.fill(buffer, bufferSize, numericBlockSize, 0L); if (maxOrd >= 0) { encoder.encodeOrdinals(buffer, data, bitsPerOrd); } else { diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java index fb43ae176dedc..ec6fe55c00d0b 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java @@ -31,9 +31,9 @@ */ public class ES819TSDBDocValuesFormat extends org.apache.lucene.codecs.DocValuesFormat { - static final int NUMERIC_BLOCK_SHIFT = 7; - public static final int NUMERIC_BLOCK_SIZE = 1 << NUMERIC_BLOCK_SHIFT; - static final int NUMERIC_BLOCK_MASK = NUMERIC_BLOCK_SIZE - 1; + private static final int NUMERIC_BLOCK_SHIFT = 7; + private static final int NUMERIC_BLOCK_SIZE = 1 << NUMERIC_BLOCK_SHIFT; + static final int DIRECT_MONOTONIC_BLOCK_SHIFT = 16; static final String CODEC_NAME = "ES819TSDB"; static final String DATA_CODEC = "ES819TSDBDocValuesData"; @@ -154,4 +154,12 @@ public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOExcept public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException { return new ES819TSDBDocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION); } + + public static int getNumericBlockSize() { + return NUMERIC_BLOCK_SIZE; + } + + public static int getNumericBlockShift() { + return NUMERIC_BLOCK_SHIFT; + } } diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index ab7af743589d5..4e5229a40f41a 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -1254,7 +1254,7 @@ private static void readNumeric(IndexInput meta, NumericEntry entry) throws IOEx } else { entry.indexMeta = DirectMonotonicReader.loadMeta( meta, - 1 + ((entry.numValues - 1) >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT), + 1 + ((entry.numValues - 1) >>> ES819TSDBDocValuesFormat.getNumericBlockShift()), indexBlockShift ); } @@ -1473,12 +1473,15 @@ public int docIDRunEnd() throws IOException { final IndexInput valuesData = data.slice("values", entry.valuesOffset, entry.valuesLength); final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1; + final int numericBlockShift = ES819TSDBDocValuesFormat.getNumericBlockShift(); + final int numericBlockSize = ES819TSDBDocValuesFormat.getNumericBlockSize(); + final int numericBlockMask = numericBlockSize - 1; if (entry.docsWithFieldOffset == -1) { // dense return new BaseDenseNumericValues(maxDoc) { - private final TSDBDocValuesEncoder decoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE); + private final TSDBDocValuesEncoder decoder = new TSDBDocValuesEncoder(numericBlockSize); private long currentBlockIndex = -1; - private final long[] currentBlock = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE]; + private final long[] currentBlock = new long[numericBlockSize]; // lookahead block private long lookaheadBlockIndex = -1; private long[] lookaheadBlock; @@ -1492,8 +1495,8 @@ public int docIDRunEnd() { @Override public long longValue() throws IOException { final int index = doc; - final int blockIndex = index >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT; - final int blockInIndex = index & ES819TSDBDocValuesFormat.NUMERIC_BLOCK_MASK; + final int blockIndex = index >>> numericBlockShift; + final int blockInIndex = index & numericBlockMask; if (blockIndex == currentBlockIndex) { return currentBlock[blockInIndex]; } @@ -1534,8 +1537,8 @@ BlockLoader.Block tryRead(BlockLoader.SingletonLongBuilder builder, BlockLoader. doc = docs.get(docsCount - 1); for (int i = offset; i < docsCount;) { int index = docs.get(i); - final int blockIndex = index >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT; - final int blockInIndex = index & ES819TSDBDocValuesFormat.NUMERIC_BLOCK_MASK; + final int blockIndex = index >>> numericBlockShift; + final int blockInIndex = index & numericBlockMask; if (blockIndex != currentBlockIndex) { assert blockIndex > currentBlockIndex : blockIndex + " < " + currentBlockIndex; // no need to seek if the loading block is the next block @@ -1554,7 +1557,7 @@ BlockLoader.Block tryRead(BlockLoader.SingletonLongBuilder builder, BlockLoader. // Instead of iterating over docs and find the max length, take an optimistic approach to avoid as // many comparisons as there are remaining docs and instead do at most 7 comparisons: int length = 1; - int remainingBlockLength = Math.min(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE - blockInIndex, docsCount - i); + int remainingBlockLength = Math.min(numericBlockSize - blockInIndex, docsCount - i); for (int newLength = remainingBlockLength; newLength > 1; newLength = newLength >> 1) { int lastIndex = i + newLength - 1; if (isDense(index, docs.get(lastIndex), newLength)) { @@ -1570,15 +1573,15 @@ BlockLoader.Block tryRead(BlockLoader.SingletonLongBuilder builder, BlockLoader. @Override long lookAheadValueAt(int targetDoc) throws IOException { - final int blockIndex = targetDoc >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT; - final int valueIndex = targetDoc & ES819TSDBDocValuesFormat.NUMERIC_BLOCK_MASK; + final int blockIndex = targetDoc >>> numericBlockShift; + final int valueIndex = targetDoc & numericBlockMask; if (blockIndex == currentBlockIndex) { return currentBlock[valueIndex]; } // load data to the lookahead block if (lookaheadBlockIndex != blockIndex) { if (lookaheadBlock == null) { - lookaheadBlock = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE]; + lookaheadBlock = new long[numericBlockSize]; lookaheadData = data.slice("look_ahead_values", entry.valuesOffset, entry.valuesLength); } if (lookaheadBlockIndex + 1 != blockIndex) { @@ -1609,10 +1612,10 @@ SortedOrdinalReader sortedOrdinalReader() { entry.numValues ); return new BaseSparseNumericValues(disi) { - private final TSDBDocValuesEncoder decoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE); + private final TSDBDocValuesEncoder decoder = new TSDBDocValuesEncoder(numericBlockSize); private IndexedDISI lookAheadDISI; private long currentBlockIndex = -1; - private final long[] currentBlock = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE]; + private final long[] currentBlock = new long[numericBlockSize]; @Override public int docIDRunEnd() throws IOException { @@ -1622,8 +1625,8 @@ public int docIDRunEnd() throws IOException { @Override public long longValue() throws IOException { final int index = disi.index(); - final int blockIndex = index >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT; - final int blockInIndex = index & ES819TSDBDocValuesFormat.NUMERIC_BLOCK_MASK; + final int blockIndex = index >>> numericBlockShift; + final int blockInIndex = index & numericBlockMask; if (blockIndex != currentBlockIndex) { assert blockIndex > currentBlockIndex : blockIndex + "<=" + currentBlockIndex; // no need to seek if the loading block is the next block @@ -1691,8 +1694,8 @@ public BlockLoader.Block tryRead( try (var singletonLongBuilder = singletonLongBuilder(factory, toDouble, valueCount, toInt)) { for (int i = 0; i < valueCount;) { final int index = firstIndex + i; - final int blockIndex = index >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT; - final int blockStartIndex = index & ES819TSDBDocValuesFormat.NUMERIC_BLOCK_MASK; + final int blockIndex = index >>> numericBlockShift; + final int blockStartIndex = index & numericBlockMask; if (blockIndex != currentBlockIndex) { assert blockIndex > currentBlockIndex : blockIndex + "<=" + currentBlockIndex; if (currentBlockIndex + 1 != blockIndex) { @@ -1701,7 +1704,7 @@ public BlockLoader.Block tryRead( currentBlockIndex = blockIndex; decoder.decode(valuesData, currentBlock); } - final int count = Math.min(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE - blockStartIndex, valueCount - i); + final int count = Math.min(numericBlockSize - blockStartIndex, valueCount - i); singletonLongBuilder.appendLongs(currentBlock, blockStartIndex, count); i += count; } @@ -1777,12 +1780,16 @@ private NumericValues getValues(NumericEntry entry, final long maxOrd) throws IO final IndexInput valuesData = data.slice("values", entry.valuesOffset, entry.valuesLength); final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1; + final int numericBlockShift = ES819TSDBDocValuesFormat.getNumericBlockShift(); + final int numericBlockSize = ES819TSDBDocValuesFormat.getNumericBlockSize(); + final int numericBlockMask = numericBlockSize - 1; + final long[] currentBlockIndex = { -1 }; - final long[] currentBlock = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE]; - final TSDBDocValuesEncoder decoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE); + final long[] currentBlock = new long[numericBlockSize]; + final TSDBDocValuesEncoder decoder = new TSDBDocValuesEncoder(numericBlockSize); return index -> { - final long blockIndex = index >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT; - final int blockInIndex = (int) (index & ES819TSDBDocValuesFormat.NUMERIC_BLOCK_MASK); + final long blockIndex = index >>> numericBlockShift; + final int blockInIndex = (int) (index & numericBlockMask); if (blockIndex != currentBlockIndex[0]) { // no need to seek if the loading block is the next block if (currentBlockIndex[0] + 1 != blockIndex) { @@ -2019,7 +2026,7 @@ private static class TermsDictEntry { static final class SingletonLongToSingletonOrdinalDelegate implements BlockLoader.SingletonLongBuilder { private final BlockLoader.SingletonOrdinalsBuilder builder; - private final int[] buffer = new int[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE]; + private final int[] buffer = new int[ES819TSDBDocValuesFormat.getNumericBlockSize()]; SingletonLongToSingletonOrdinalDelegate(BlockLoader.SingletonOrdinalsBuilder builder) { this.builder = builder; diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumerVersion0.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumerVersion0.java index df669f007db2b..259dfa8d22141 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumerVersion0.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumerVersion0.java @@ -159,6 +159,8 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer, DISIAccumulator disiAccumulator = null; try { + final int numericBlockShift = ES819TSDBDocValuesFormat.getNumericBlockShift(); + final int numericBlockSize = ES819TSDBDocValuesFormat.getNumericBlockSize(); if (numValues > 0) { assert numDocsWithValue > 0; final ByteBuffersDataOutput indexOut = new ByteBuffersDataOutput(); @@ -202,13 +204,13 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer, indexWriter = DirectMonotonicWriter.getInstance( meta, new ByteBuffersIndexOutput(indexOut, "temp-dv-index", "temp-dv-index"), - 1L + ((numValues - 1) >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT), + 1L + ((numValues - 1) >>> numericBlockShift), ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT ); meta.writeInt(DIRECT_MONOTONIC_BLOCK_SHIFT); - final long[] buffer = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE]; + final long[] buffer = new long[numericBlockSize]; int bufferSize = 0; - final TSDBDocValuesEncoder encoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE); + final TSDBDocValuesEncoder encoder = new TSDBDocValuesEncoder(numericBlockSize); values = valuesProducer.getSortedNumeric(field); final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1; if (valuesProducer.mergeStats.supported() && numDocsWithValue < maxDoc) { @@ -224,7 +226,7 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer, } for (int i = 0; i < count; ++i) { buffer[bufferSize++] = values.nextValue(); - if (bufferSize == ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE) { + if (bufferSize == numericBlockSize) { indexWriter.add(data.getFilePointer() - valuesDataOffset); if (maxOrd >= 0) { encoder.encodeOrdinals(buffer, data, bitsPerOrd); @@ -238,7 +240,7 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer, if (bufferSize > 0) { indexWriter.add(data.getFilePointer() - valuesDataOffset); // Fill unused slots in the block with zeroes rather than junk - Arrays.fill(buffer, bufferSize, ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE, 0L); + Arrays.fill(buffer, bufferSize, numericBlockSize, 0L); if (maxOrd >= 0) { encoder.encodeOrdinals(buffer, data, bitsPerOrd); } else { From 9e1a9796b00ab7331c1fcee13d60379297fd05f8 Mon Sep 17 00:00:00 2001 From: Kostas Krikellas Date: Tue, 18 Nov 2025 20:43:17 +0200 Subject: [PATCH 02/11] add large block format --- .../es819/ES819TSDBDocValuesConsumer.java | 11 ++-- .../tsdb/es819/ES819TSDBDocValuesFormat.java | 16 ++--- .../es819/ES819TSDBDocValuesProducer.java | 55 +++++++++-------- .../ES819TSDBLargeBlockDocValuesFormat.java | 59 +++++++++++++++++++ .../ES819TSDBDocValuesConsumerVersion0.java | 10 +++- .../es819/ES819TSDBDocValuesFormatTests.java | 5 +- ...819TSDBLargeBlockDocValuesFormatTests.java | 34 +++++++++++ 7 files changed, 145 insertions(+), 45 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBLargeBlockDocValuesFormat.java create mode 100644 server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBLargeBlockDocValuesFormatTests.java diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java index d810925fb3146..72f8334af04e1 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java @@ -65,6 +65,8 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer { private final int minDocsPerOrdinalForOrdinalRangeEncoding; final boolean enableOptimizedMerge; private final int primarySortFieldNumber; + private final int numericBlockShift; + private final int numericBlockSize; ES819TSDBDocValuesConsumer( SegmentWriteState state, @@ -74,13 +76,17 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer { String dataCodec, String dataExtension, String metaCodec, - String metaExtension + String metaExtension, + int numericBlockShift ) throws IOException { this.termsDictBuffer = new byte[1 << 14]; this.dir = state.directory; this.minDocsPerOrdinalForOrdinalRangeEncoding = minDocsPerOrdinalForOrdinalRangeEncoding; this.primarySortFieldNumber = ES819TSDBDocValuesProducer.primarySortFieldNumber(state.segmentInfo, state.fieldInfos); this.context = state.context; + this.numericBlockShift = numericBlockShift; + this.numericBlockSize = 1 << numericBlockShift; + boolean success = false; try { final String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension); @@ -199,9 +205,6 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer, startDocs.add(maxDoc); startDocs.finish(); } else { - final int numericBlockShift = ES819TSDBDocValuesFormat.getNumericBlockShift(); - final int numericBlockSize = ES819TSDBDocValuesFormat.getNumericBlockSize(); - indexWriter = DirectMonotonicWriter.getInstance( meta, new ByteBuffersIndexOutput(indexOut, "temp-dv-index", "temp-dv-index"), diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java index ec6fe55c00d0b..6578d21faeced 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java @@ -31,8 +31,7 @@ */ public class ES819TSDBDocValuesFormat extends org.apache.lucene.codecs.DocValuesFormat { - private static final int NUMERIC_BLOCK_SHIFT = 7; - private static final int NUMERIC_BLOCK_SIZE = 1 << NUMERIC_BLOCK_SHIFT; + static final int NUMERIC_BLOCK_SHIFT = 7; // block size: 128 static final int DIRECT_MONOTONIC_BLOCK_SHIFT = 16; static final String CODEC_NAME = "ES819TSDB"; @@ -146,20 +145,13 @@ public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOExcept DATA_CODEC, DATA_EXTENSION, META_CODEC, - META_EXTENSION + META_EXTENSION, + NUMERIC_BLOCK_SHIFT ); } @Override public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException { - return new ES819TSDBDocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION); - } - - public static int getNumericBlockSize() { - return NUMERIC_BLOCK_SIZE; - } - - public static int getNumericBlockShift() { - return NUMERIC_BLOCK_SHIFT; + return new ES819TSDBDocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION, NUMERIC_BLOCK_SHIFT); } } diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index 4e5229a40f41a..f058be05f6ff8 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -68,9 +68,18 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer { private final int maxDoc; final int version; private final boolean merging; - - ES819TSDBDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) - throws IOException { + private final int numericBlockShift; + private final int numericBlockSize; + private final int numericBlockMask; + + ES819TSDBDocValuesProducer( + SegmentReadState state, + String dataCodec, + String dataExtension, + String metaCodec, + String metaExtension, + int numericBlockShift + ) throws IOException { this.numerics = new IntObjectHashMap<>(); this.binaries = new IntObjectHashMap<>(); this.sorted = new IntObjectHashMap<>(); @@ -80,6 +89,9 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer { this.maxDoc = state.segmentInfo.maxDoc(); this.primarySortFieldNumber = primarySortFieldNumber(state.segmentInfo, state.fieldInfos); this.merging = false; + this.numericBlockShift = numericBlockShift; + this.numericBlockSize = 1 << numericBlockShift; + this.numericBlockMask = numericBlockSize - 1; // read in the entries from the metadata file. int version = -1; @@ -149,7 +161,8 @@ private ES819TSDBDocValuesProducer( int maxDoc, int version, int primarySortFieldNumber, - boolean merging + boolean merging, + int numericBlockShift ) { this.numerics = numerics; this.binaries = binaries; @@ -162,6 +175,9 @@ private ES819TSDBDocValuesProducer( this.version = version; this.primarySortFieldNumber = primarySortFieldNumber; this.merging = merging; + this.numericBlockShift = numericBlockShift; + this.numericBlockSize = 1 << numericBlockShift; + this.numericBlockMask = numericBlockSize - 1; } @Override @@ -177,7 +193,8 @@ public DocValuesProducer getMergeInstance() { maxDoc, version, primarySortFieldNumber, - true + true, + numericBlockShift ); } @@ -497,7 +514,7 @@ public BlockLoader.Block tryRead( } // Falling back to tryRead(...) is safe here, given that current block index wasn't altered by looking ahead. try (var builder = factory.singletonOrdinalsBuilder(this, docs.count() - offset, true)) { - BlockLoader.SingletonLongBuilder delegate = new SingletonLongToSingletonOrdinalDelegate(builder); + BlockLoader.SingletonLongBuilder delegate = new SingletonLongToSingletonOrdinalDelegate(builder, numericBlockSize); var result = denseOrds.tryRead(delegate, docs, offset); if (result != null) { return result; @@ -1221,7 +1238,7 @@ private void readFields(IndexInput meta, FieldInfos infos) throws IOException { } } - private static NumericEntry readNumeric(IndexInput meta) throws IOException { + private NumericEntry readNumeric(IndexInput meta) throws IOException { NumericEntry entry = new NumericEntry(); readNumeric(meta, entry); return entry; @@ -1238,7 +1255,7 @@ private static DocValuesSkipperEntry readDocValueSkipperMeta(IndexInput meta) th return new DocValuesSkipperEntry(offset, length, minValue, maxValue, docCount, maxDocID); } - private static void readNumeric(IndexInput meta, NumericEntry entry) throws IOException { + private void readNumeric(IndexInput meta, NumericEntry entry) throws IOException { entry.numValues = meta.readLong(); // Change compared to ES87TSDBDocValuesProducer: entry.numDocsWithField = meta.readInt(); @@ -1252,11 +1269,7 @@ private static void readNumeric(IndexInput meta, NumericEntry entry) throws IOEx final int blockShift = meta.readByte(); entry.sortedOrdinals = DirectMonotonicReader.loadMeta(meta, numOrds + 1, blockShift); } else { - entry.indexMeta = DirectMonotonicReader.loadMeta( - meta, - 1 + ((entry.numValues - 1) >>> ES819TSDBDocValuesFormat.getNumericBlockShift()), - indexBlockShift - ); + entry.indexMeta = DirectMonotonicReader.loadMeta(meta, 1 + ((entry.numValues - 1) >>> numericBlockShift), indexBlockShift); } entry.indexOffset = meta.readLong(); entry.indexLength = meta.readLong(); @@ -1294,13 +1307,13 @@ private BinaryEntry readBinary(IndexInput meta) throws IOException { return entry; } - private static SortedNumericEntry readSortedNumeric(IndexInput meta) throws IOException { + private SortedNumericEntry readSortedNumeric(IndexInput meta) throws IOException { SortedNumericEntry entry = new SortedNumericEntry(); readSortedNumeric(meta, entry); return entry; } - private static SortedNumericEntry readSortedNumeric(IndexInput meta, SortedNumericEntry entry) throws IOException { + private SortedNumericEntry readSortedNumeric(IndexInput meta, SortedNumericEntry entry) throws IOException { readNumeric(meta, entry); // We don't read numDocsWithField here any more. if (entry.numDocsWithField != entry.numValues) { @@ -1473,9 +1486,6 @@ public int docIDRunEnd() throws IOException { final IndexInput valuesData = data.slice("values", entry.valuesOffset, entry.valuesLength); final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1; - final int numericBlockShift = ES819TSDBDocValuesFormat.getNumericBlockShift(); - final int numericBlockSize = ES819TSDBDocValuesFormat.getNumericBlockSize(); - final int numericBlockMask = numericBlockSize - 1; if (entry.docsWithFieldOffset == -1) { // dense return new BaseDenseNumericValues(maxDoc) { @@ -1780,10 +1790,6 @@ private NumericValues getValues(NumericEntry entry, final long maxOrd) throws IO final IndexInput valuesData = data.slice("values", entry.valuesOffset, entry.valuesLength); final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1; - final int numericBlockShift = ES819TSDBDocValuesFormat.getNumericBlockShift(); - final int numericBlockSize = ES819TSDBDocValuesFormat.getNumericBlockSize(); - final int numericBlockMask = numericBlockSize - 1; - final long[] currentBlockIndex = { -1 }; final long[] currentBlock = new long[numericBlockSize]; final TSDBDocValuesEncoder decoder = new TSDBDocValuesEncoder(numericBlockSize); @@ -2026,10 +2032,11 @@ private static class TermsDictEntry { static final class SingletonLongToSingletonOrdinalDelegate implements BlockLoader.SingletonLongBuilder { private final BlockLoader.SingletonOrdinalsBuilder builder; - private final int[] buffer = new int[ES819TSDBDocValuesFormat.getNumericBlockSize()]; + private final int[] buffer; - SingletonLongToSingletonOrdinalDelegate(BlockLoader.SingletonOrdinalsBuilder builder) { + SingletonLongToSingletonOrdinalDelegate(BlockLoader.SingletonOrdinalsBuilder builder, int bufferSize) { this.builder = builder; + this.buffer = new int[bufferSize]; } @Override diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBLargeBlockDocValuesFormat.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBLargeBlockDocValuesFormat.java new file mode 100644 index 0000000000000..940a96761f276 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBLargeBlockDocValuesFormat.java @@ -0,0 +1,59 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.tsdb.es819; + +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; + +import java.io.IOException; + +/** + * Evolved from {@link ES819TSDBDocValuesFormat} but uses larger block size for numeric values (512 vs 128). + */ +public class ES819TSDBLargeBlockDocValuesFormat extends ES819TSDBDocValuesFormat { + + static final int NUMERIC_BLOCK_SHIFT = 9; // block size: 512 + + /** Default constructor. */ + public ES819TSDBLargeBlockDocValuesFormat() { + super(); + } + + /** Doc values fields format with specified skipIndexIntervalSize. */ + public ES819TSDBLargeBlockDocValuesFormat( + int skipIndexIntervalSize, + int minDocsPerOrdinalForRangeEncoding, + boolean enableOptimizedMerge + ) { + super(skipIndexIntervalSize, minDocsPerOrdinalForRangeEncoding, enableOptimizedMerge); + } + + @Override + public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException { + return new ES819TSDBDocValuesConsumer( + state, + skipIndexIntervalSize, + minDocsPerOrdinalForRangeEncoding, + enableOptimizedMerge, + DATA_CODEC, + DATA_EXTENSION, + META_CODEC, + META_EXTENSION, + NUMERIC_BLOCK_SHIFT + ); + } + + @Override + public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException { + return new ES819TSDBDocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION, NUMERIC_BLOCK_SHIFT); + } +} diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumerVersion0.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumerVersion0.java index 259dfa8d22141..bd07f113b0cec 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumerVersion0.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumerVersion0.java @@ -65,6 +65,8 @@ final class ES819TSDBDocValuesConsumerVersion0 extends XDocValuesConsumer { private final int minDocsPerOrdinalForOrdinalRangeEncoding; final boolean enableOptimizedMerge; private final int primarySortFieldNumber; + private final int numericBlockShift; + private final int numericBlockSize; ES819TSDBDocValuesConsumerVersion0( SegmentWriteState state, @@ -74,13 +76,17 @@ final class ES819TSDBDocValuesConsumerVersion0 extends XDocValuesConsumer { String dataCodec, String dataExtension, String metaCodec, - String metaExtension + String metaExtension, + int numericBlockShift ) throws IOException { this.termsDictBuffer = new byte[1 << 14]; this.dir = state.directory; this.minDocsPerOrdinalForOrdinalRangeEncoding = minDocsPerOrdinalForOrdinalRangeEncoding; this.primarySortFieldNumber = ES819TSDBDocValuesProducer.primarySortFieldNumber(state.segmentInfo, state.fieldInfos); this.context = state.context; + this.numericBlockShift = numericBlockShift; + this.numericBlockSize = 1 << numericBlockShift; + boolean success = false; try { final String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension); @@ -159,8 +165,6 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer, DISIAccumulator disiAccumulator = null; try { - final int numericBlockShift = ES819TSDBDocValuesFormat.getNumericBlockShift(); - final int numericBlockSize = ES819TSDBDocValuesFormat.getNumericBlockSize(); if (numValues > 0) { assert numDocsWithValue > 0; final ByteBuffersDataOutput indexOut = new ByteBuffersDataOutput(); diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java index ae4594e0568d7..c3b0a0f90d5ba 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java @@ -75,7 +75,7 @@ public class ES819TSDBDocValuesFormatTests extends ES87TSDBDocValuesFormatTests { - private final Codec codec = new Elasticsearch92Lucene103Codec() { + protected final Codec codec = new Elasticsearch92Lucene103Codec() { final ES819TSDBDocValuesFormat docValuesFormat = new ES819TSDBDocValuesFormat( randomIntBetween(2, 4096), @@ -105,7 +105,8 @@ public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOExcept DATA_CODEC, DATA_EXTENSION, META_CODEC, - META_EXTENSION + META_EXTENSION, + NUMERIC_BLOCK_SHIFT ); } } diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBLargeBlockDocValuesFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBLargeBlockDocValuesFormatTests.java new file mode 100644 index 0000000000000..829cc324e1788 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBLargeBlockDocValuesFormatTests.java @@ -0,0 +1,34 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.tsdb.es819; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.DocValuesFormat; +import org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec; +import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormatTests; + +import static org.elasticsearch.test.ESTestCase.randomIntBetween; + +public class ES819TSDBLargeBlockDocValuesFormatTests extends ES87TSDBDocValuesFormatTests { + + protected final Codec codec = new Elasticsearch92Lucene103Codec() { + + final ES819TSDBLargeBlockDocValuesFormat docValuesFormat = new ES819TSDBLargeBlockDocValuesFormat( + randomIntBetween(2, 4096), + randomIntBetween(1, 512), + random().nextBoolean() + ); + + @Override + public DocValuesFormat getDocValuesFormatForField(String field) { + return docValuesFormat; + } + }; +} From 2369406b939320c9dbce6c3f9b89bb7f790062f7 Mon Sep 17 00:00:00 2001 From: Kostas Krikellas Date: Wed, 19 Nov 2025 18:56:54 +0200 Subject: [PATCH 03/11] update codec name --- server/src/main/java/module-info.java | 3 ++- .../tsdb/es819/ES819TSDBDocValuesFormat.java | 17 +++++++++++++++-- .../ES819TSDBLargeBlockDocValuesFormat.java | 7 ++++--- .../org.apache.lucene.codecs.DocValuesFormat | 1 + 4 files changed, 22 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java index 9c5d11e1cf9e1..dadc8ca3d6d6e 100644 --- a/server/src/main/java/module-info.java +++ b/server/src/main/java/module-info.java @@ -451,7 +451,8 @@ provides org.apache.lucene.codecs.DocValuesFormat with org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat, - org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat; + org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat, + org.elasticsearch.index.codec.tsdb.es819.ES819TSDBLargeBlockDocValuesFormat; provides org.apache.lucene.codecs.KnnVectorsFormat with org.elasticsearch.index.codec.vectors.ES813FlatVectorFormat, diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java index 6578d21faeced..422b73938bd3f 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java @@ -121,12 +121,25 @@ private static boolean getOptimizedMergeEnabledDefault() { /** Default constructor. */ public ES819TSDBDocValuesFormat() { - this(DEFAULT_SKIP_INDEX_INTERVAL_SIZE, ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, OPTIMIZED_MERGE_ENABLE_DEFAULT); + this(CODEC_NAME); + } + + protected ES819TSDBDocValuesFormat(String name) { + this(name, DEFAULT_SKIP_INDEX_INTERVAL_SIZE, ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, OPTIMIZED_MERGE_ENABLE_DEFAULT); } /** Doc values fields format with specified skipIndexIntervalSize. */ public ES819TSDBDocValuesFormat(int skipIndexIntervalSize, int minDocsPerOrdinalForRangeEncoding, boolean enableOptimizedMerge) { - super(CODEC_NAME); + this(CODEC_NAME, skipIndexIntervalSize, minDocsPerOrdinalForRangeEncoding, enableOptimizedMerge); + } + + protected ES819TSDBDocValuesFormat( + String name, + int skipIndexIntervalSize, + int minDocsPerOrdinalForRangeEncoding, + boolean enableOptimizedMerge + ) { + super(name); if (skipIndexIntervalSize < 2) { throw new IllegalArgumentException("skipIndexIntervalSize must be > 1, got [" + skipIndexIntervalSize + "]"); } diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBLargeBlockDocValuesFormat.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBLargeBlockDocValuesFormat.java index 940a96761f276..5ae87c05be333 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBLargeBlockDocValuesFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBLargeBlockDocValuesFormat.java @@ -21,11 +21,12 @@ */ public class ES819TSDBLargeBlockDocValuesFormat extends ES819TSDBDocValuesFormat { - static final int NUMERIC_BLOCK_SHIFT = 9; // block size: 512 + static final int NUMERIC_BLOCK_SHIFT = 9; + static final String CODEC_NAME = "ES819TSDBLB";// block size: 512 /** Default constructor. */ public ES819TSDBLargeBlockDocValuesFormat() { - super(); + super(CODEC_NAME); } /** Doc values fields format with specified skipIndexIntervalSize. */ @@ -34,7 +35,7 @@ public ES819TSDBLargeBlockDocValuesFormat( int minDocsPerOrdinalForRangeEncoding, boolean enableOptimizedMerge ) { - super(skipIndexIntervalSize, minDocsPerOrdinalForRangeEncoding, enableOptimizedMerge); + super(CODEC_NAME, skipIndexIntervalSize, minDocsPerOrdinalForRangeEncoding, enableOptimizedMerge); } @Override diff --git a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat index c459cd485a22d..ad17275684e63 100644 --- a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat +++ b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat @@ -1,2 +1,3 @@ org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat +org.elasticsearch.index.codec.tsdb.es819.ES819TSDBLargeBlockDocValuesFormat From f1576b92fe74edd1f306a15aa418658ca4a4a932 Mon Sep 17 00:00:00 2001 From: Kostas Krikellas Date: Tue, 25 Nov 2025 12:17:15 +0200 Subject: [PATCH 04/11] sync --- .../tsdb/es819/ES819TSDBDocValuesFormat.java | 61 ++++++++++++++++++- .../ES819TSDBLargeBlockDocValuesFormat.java | 16 ++++- ...819TSDBLargeBlockDocValuesFormatTests.java | 4 +- 3 files changed, 75 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java index 0fd7511497c91..0dbc84fb668ac 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java @@ -139,12 +139,67 @@ private static boolean getOptimizedMergeEnabledDefault() { /** Default constructor. */ public ES819TSDBDocValuesFormat() { - this(DEFAULT_SKIP_INDEX_INTERVAL_SIZE, ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, OPTIMIZED_MERGE_ENABLE_DEFAULT); + this(CODEC_NAME); + } + + protected ES819TSDBDocValuesFormat(String name) { + this( + name, + DEFAULT_SKIP_INDEX_INTERVAL_SIZE, + ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, + OPTIMIZED_MERGE_ENABLE_DEFAULT, + BINARY_DV_COMPRESSION_FEATURE_FLAG ? BinaryDVCompressionMode.COMPRESSED_ZSTD_LEVEL_1 : BinaryDVCompressionMode.NO_COMPRESS, + true + ); + } + + public ES819TSDBDocValuesFormat(BinaryDVCompressionMode binaryDVCompressionMode) { + this( + DEFAULT_SKIP_INDEX_INTERVAL_SIZE, + ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, + OPTIMIZED_MERGE_ENABLE_DEFAULT, + binaryDVCompressionMode, + true + ); + } + + public ES819TSDBDocValuesFormat(BinaryDVCompressionMode binaryDVCompressionMode, boolean enablePerBlockCompression) { + this( + DEFAULT_SKIP_INDEX_INTERVAL_SIZE, + ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, + OPTIMIZED_MERGE_ENABLE_DEFAULT, + binaryDVCompressionMode, + enablePerBlockCompression + ); } /** Doc values fields format with specified skipIndexIntervalSize. */ - public ES819TSDBDocValuesFormat(int skipIndexIntervalSize, int minDocsPerOrdinalForRangeEncoding, boolean enableOptimizedMerge) { - super(CODEC_NAME); + public ES819TSDBDocValuesFormat( + int skipIndexIntervalSize, + int minDocsPerOrdinalForRangeEncoding, + boolean enableOptimizedMerge, + BinaryDVCompressionMode binaryDVCompressionMode, + final boolean enablePerBlockCompression + ) { + this( + CODEC_NAME, + skipIndexIntervalSize, + minDocsPerOrdinalForRangeEncoding, + enableOptimizedMerge, + binaryDVCompressionMode, + enablePerBlockCompression + ); + } + + protected ES819TSDBDocValuesFormat( + String name, + int skipIndexIntervalSize, + int minDocsPerOrdinalForRangeEncoding, + boolean enableOptimizedMerge, + BinaryDVCompressionMode binaryDVCompressionMode, + final boolean enablePerBlockCompression + ) { + super(name); if (skipIndexIntervalSize < 2) { throw new IllegalArgumentException("skipIndexIntervalSize must be > 1, got [" + skipIndexIntervalSize + "]"); } diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBLargeBlockDocValuesFormat.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBLargeBlockDocValuesFormat.java index 5ae87c05be333..24fe86d7618b4 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBLargeBlockDocValuesFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBLargeBlockDocValuesFormat.java @@ -13,6 +13,7 @@ import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; +import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode; import java.io.IOException; @@ -33,14 +34,25 @@ public ES819TSDBLargeBlockDocValuesFormat() { public ES819TSDBLargeBlockDocValuesFormat( int skipIndexIntervalSize, int minDocsPerOrdinalForRangeEncoding, - boolean enableOptimizedMerge + boolean enableOptimizedMerge, + BinaryDVCompressionMode binaryDVCompressionMode, + final boolean enablePerBlockCompression ) { - super(CODEC_NAME, skipIndexIntervalSize, minDocsPerOrdinalForRangeEncoding, enableOptimizedMerge); + super( + CODEC_NAME, + skipIndexIntervalSize, + minDocsPerOrdinalForRangeEncoding, + enableOptimizedMerge, + binaryDVCompressionMode, + enablePerBlockCompression + ); } @Override public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException { return new ES819TSDBDocValuesConsumer( + binaryDVCompressionMode, + enablePerBlockCompression, state, skipIndexIntervalSize, minDocsPerOrdinalForRangeEncoding, diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBLargeBlockDocValuesFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBLargeBlockDocValuesFormatTests.java index 829cc324e1788..abc5e9df37edb 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBLargeBlockDocValuesFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBLargeBlockDocValuesFormatTests.java @@ -23,7 +23,9 @@ public class ES819TSDBLargeBlockDocValuesFormatTests extends ES87TSDBDocValuesFo final ES819TSDBLargeBlockDocValuesFormat docValuesFormat = new ES819TSDBLargeBlockDocValuesFormat( randomIntBetween(2, 4096), randomIntBetween(1, 512), - random().nextBoolean() + random().nextBoolean(), + ES819TSDBDocValuesFormatTests.randomBinaryCompressionMode(), + true ); @Override From 18c0ba88ff3b5b8b4dbc3b5f2af3d0808b1633a1 Mon Sep 17 00:00:00 2001 From: Kostas Krikellas Date: Tue, 25 Nov 2025 19:09:47 +0200 Subject: [PATCH 05/11] use new version --- server/src/main/java/module-info.java | 3 +- .../es819/ES819TSDBDocValuesConsumer.java | 7 +- .../tsdb/es819/ES819TSDBDocValuesFormat.java | 45 +++++++----- .../es819/ES819TSDBDocValuesProducer.java | 16 +++-- .../ES819TSDBLargeBlockDocValuesFormat.java | 72 ------------------- .../org.apache.lucene.codecs.DocValuesFormat | 1 - ...819TSDBLargeBlockDocValuesFormatTests.java | 36 ---------- 7 files changed, 44 insertions(+), 136 deletions(-) delete mode 100644 server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBLargeBlockDocValuesFormat.java delete mode 100644 server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBLargeBlockDocValuesFormatTests.java diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java index a9c6f5ba6cd9b..02f235aaadf8b 100644 --- a/server/src/main/java/module-info.java +++ b/server/src/main/java/module-info.java @@ -451,8 +451,7 @@ provides org.apache.lucene.codecs.DocValuesFormat with org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat, - org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat, - org.elasticsearch.index.codec.tsdb.es819.ES819TSDBLargeBlockDocValuesFormat; + org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat; provides org.apache.lucene.codecs.KnnVectorsFormat with org.elasticsearch.index.codec.vectors.ES813FlatVectorFormat, diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java index e3e60b7be9acc..f93078da8f175 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java @@ -86,11 +86,11 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer { int skipIndexIntervalSize, int minDocsPerOrdinalForOrdinalRangeEncoding, boolean enableOptimizedMerge, + int numericBlockShift, String dataCodec, String dataExtension, String metaCodec, - String metaExtension, - int numericBlockShift + String metaExtension ) throws IOException { this.binaryDVCompressionMode = binaryDVCompressionMode; this.enablePerBlockCompression = enablePerBlockCompression; @@ -114,6 +114,7 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer { state.segmentInfo.getId(), state.segmentSuffix ); + String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); meta = state.directory.createOutput(metaName, state.context); CodecUtil.writeIndexHeader( @@ -123,6 +124,8 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer { state.segmentInfo.getId(), state.segmentSuffix ); + meta.writeByte((byte) numericBlockShift); + maxDoc = state.segmentInfo.maxDoc(); this.skipIndexIntervalSize = skipIndexIntervalSize; this.enableOptimizedMerge = enableOptimizedMerge; diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java index 0dbc84fb668ac..f9affa29393d8 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java @@ -40,6 +40,7 @@ public class ES819TSDBDocValuesFormat extends org.apache.lucene.codecs.DocValues public static final boolean BINARY_DV_COMPRESSION_FEATURE_FLAG = new FeatureFlag("binary_dv_compression").isEnabled(); static final int NUMERIC_BLOCK_SHIFT = 7; + static final int NUMERIC_LARGE_BLOCK_SHIFT = 9; static final int DIRECT_MONOTONIC_BLOCK_SHIFT = 16; static final String CODEC_NAME = "ES819TSDB"; static final String DATA_CODEC = "ES819TSDBDocValuesData"; @@ -54,7 +55,8 @@ public class ES819TSDBDocValuesFormat extends org.apache.lucene.codecs.DocValues static final int VERSION_START = 0; static final int VERSION_BINARY_DV_COMPRESSION = 1; - static final int VERSION_CURRENT = VERSION_BINARY_DV_COMPRESSION; + static final int VERSION_NUMERIC_LARGE_BLOCKS = 2; + static final int VERSION_CURRENT = VERSION_NUMERIC_LARGE_BLOCKS; static final int TERMS_DICT_BLOCK_LZ4_SHIFT = 6; static final int TERMS_DICT_BLOCK_LZ4_SIZE = 1 << TERMS_DICT_BLOCK_LZ4_SHIFT; @@ -131,25 +133,31 @@ private static boolean getOptimizedMergeEnabledDefault() { */ public static final int ORDINAL_RANGE_ENCODING_BLOCK_SHIFT = 12; + final int numericBlockShift; final int skipIndexIntervalSize; final int minDocsPerOrdinalForRangeEncoding; final boolean enableOptimizedMerge; final BinaryDVCompressionMode binaryDVCompressionMode; final boolean enablePerBlockCompression; - /** Default constructor. */ + static ES819TSDBDocValuesFormat getInstance(boolean useLargeNumericBlock) { + return useLargeNumericBlock + ? new ES819TSDBDocValuesFormat(NUMERIC_LARGE_BLOCK_SHIFT) + : new ES819TSDBDocValuesFormat(); + } + public ES819TSDBDocValuesFormat() { - this(CODEC_NAME); + this(NUMERIC_BLOCK_SHIFT); } - protected ES819TSDBDocValuesFormat(String name) { + public ES819TSDBDocValuesFormat(int numericBlockShift) { this( - name, DEFAULT_SKIP_INDEX_INTERVAL_SIZE, ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, OPTIMIZED_MERGE_ENABLE_DEFAULT, BINARY_DV_COMPRESSION_FEATURE_FLAG ? BinaryDVCompressionMode.COMPRESSED_ZSTD_LEVEL_1 : BinaryDVCompressionMode.NO_COMPRESS, - true + true, + numericBlockShift ); } @@ -159,7 +167,8 @@ public ES819TSDBDocValuesFormat(BinaryDVCompressionMode binaryDVCompressionMode) ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, OPTIMIZED_MERGE_ENABLE_DEFAULT, binaryDVCompressionMode, - true + true, + NUMERIC_BLOCK_SHIFT ); } @@ -169,7 +178,8 @@ public ES819TSDBDocValuesFormat(BinaryDVCompressionMode binaryDVCompressionMode, ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, OPTIMIZED_MERGE_ENABLE_DEFAULT, binaryDVCompressionMode, - enablePerBlockCompression + enablePerBlockCompression, + NUMERIC_BLOCK_SHIFT ); } @@ -182,24 +192,24 @@ public ES819TSDBDocValuesFormat( final boolean enablePerBlockCompression ) { this( - CODEC_NAME, skipIndexIntervalSize, minDocsPerOrdinalForRangeEncoding, enableOptimizedMerge, binaryDVCompressionMode, - enablePerBlockCompression + enablePerBlockCompression, + NUMERIC_BLOCK_SHIFT ); } - protected ES819TSDBDocValuesFormat( - String name, + public ES819TSDBDocValuesFormat( int skipIndexIntervalSize, int minDocsPerOrdinalForRangeEncoding, boolean enableOptimizedMerge, BinaryDVCompressionMode binaryDVCompressionMode, - final boolean enablePerBlockCompression + final boolean enablePerBlockCompression, + final int numericBlockShift ) { - super(name); + super(CODEC_NAME); if (skipIndexIntervalSize < 2) { throw new IllegalArgumentException("skipIndexIntervalSize must be > 1, got [" + skipIndexIntervalSize + "]"); } @@ -208,6 +218,7 @@ protected ES819TSDBDocValuesFormat( this.enableOptimizedMerge = enableOptimizedMerge; this.binaryDVCompressionMode = binaryDVCompressionMode; this.enablePerBlockCompression = enablePerBlockCompression; + this.numericBlockShift = numericBlockShift; } @Override @@ -219,16 +230,16 @@ public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOExcept skipIndexIntervalSize, minDocsPerOrdinalForRangeEncoding, enableOptimizedMerge, + numericBlockShift, DATA_CODEC, DATA_EXTENSION, META_CODEC, - META_EXTENSION, - NUMERIC_BLOCK_SHIFT + META_EXTENSION ); } @Override public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException { - return new ES819TSDBDocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION, NUMERIC_BLOCK_SHIFT); + return new ES819TSDBDocValuesProducer(state, numericBlockShift, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION); } } diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index 7afb297b2cfd0..49e99f42103c8 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -77,11 +77,11 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer { ES819TSDBDocValuesProducer( SegmentReadState state, + int numericBlockShift, String dataCodec, String dataExtension, String metaCodec, - String metaExtension, - int numericBlockShift + String metaExtension ) throws IOException { this.numerics = new IntObjectHashMap<>(); this.binaries = new IntObjectHashMap<>(); @@ -92,12 +92,10 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer { this.maxDoc = state.segmentInfo.maxDoc(); this.primarySortFieldNumber = primarySortFieldNumber(state.segmentInfo, state.fieldInfos); this.merging = false; - this.numericBlockShift = numericBlockShift; - this.numericBlockSize = 1 << numericBlockShift; - this.numericBlockMask = numericBlockSize - 1; // read in the entries from the metadata file. int version = -1; + int blockShift = numericBlockShift; String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); try (ChecksumIndexInput in = state.directory.openChecksumInput(metaName)) { @@ -112,7 +110,9 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer { state.segmentInfo.getId(), state.segmentSuffix ); - + if (version >= ES819TSDBDocValuesFormat.VERSION_NUMERIC_LARGE_BLOCKS) { + blockShift = in.readByte(); + } readFields(in, state.fieldInfos, version); } catch (Throwable exception) { @@ -151,6 +151,10 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer { IOUtils.closeWhileHandlingException(this.data); } } + + this.numericBlockShift = blockShift; + this.numericBlockSize = 1 << numericBlockShift; + this.numericBlockMask = numericBlockSize - 1; } private ES819TSDBDocValuesProducer( diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBLargeBlockDocValuesFormat.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBLargeBlockDocValuesFormat.java deleted file mode 100644 index 24fe86d7618b4..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBLargeBlockDocValuesFormat.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.index.codec.tsdb.es819; - -import org.apache.lucene.codecs.DocValuesConsumer; -import org.apache.lucene.codecs.DocValuesProducer; -import org.apache.lucene.index.SegmentReadState; -import org.apache.lucene.index.SegmentWriteState; -import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode; - -import java.io.IOException; - -/** - * Evolved from {@link ES819TSDBDocValuesFormat} but uses larger block size for numeric values (512 vs 128). - */ -public class ES819TSDBLargeBlockDocValuesFormat extends ES819TSDBDocValuesFormat { - - static final int NUMERIC_BLOCK_SHIFT = 9; - static final String CODEC_NAME = "ES819TSDBLB";// block size: 512 - - /** Default constructor. */ - public ES819TSDBLargeBlockDocValuesFormat() { - super(CODEC_NAME); - } - - /** Doc values fields format with specified skipIndexIntervalSize. */ - public ES819TSDBLargeBlockDocValuesFormat( - int skipIndexIntervalSize, - int minDocsPerOrdinalForRangeEncoding, - boolean enableOptimizedMerge, - BinaryDVCompressionMode binaryDVCompressionMode, - final boolean enablePerBlockCompression - ) { - super( - CODEC_NAME, - skipIndexIntervalSize, - minDocsPerOrdinalForRangeEncoding, - enableOptimizedMerge, - binaryDVCompressionMode, - enablePerBlockCompression - ); - } - - @Override - public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException { - return new ES819TSDBDocValuesConsumer( - binaryDVCompressionMode, - enablePerBlockCompression, - state, - skipIndexIntervalSize, - minDocsPerOrdinalForRangeEncoding, - enableOptimizedMerge, - DATA_CODEC, - DATA_EXTENSION, - META_CODEC, - META_EXTENSION, - NUMERIC_BLOCK_SHIFT - ); - } - - @Override - public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException { - return new ES819TSDBDocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION, NUMERIC_BLOCK_SHIFT); - } -} diff --git a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat index ad17275684e63..c459cd485a22d 100644 --- a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat +++ b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat @@ -1,3 +1,2 @@ org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat -org.elasticsearch.index.codec.tsdb.es819.ES819TSDBLargeBlockDocValuesFormat diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBLargeBlockDocValuesFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBLargeBlockDocValuesFormatTests.java deleted file mode 100644 index abc5e9df37edb..0000000000000 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBLargeBlockDocValuesFormatTests.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.index.codec.tsdb.es819; - -import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.DocValuesFormat; -import org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec; -import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormatTests; - -import static org.elasticsearch.test.ESTestCase.randomIntBetween; - -public class ES819TSDBLargeBlockDocValuesFormatTests extends ES87TSDBDocValuesFormatTests { - - protected final Codec codec = new Elasticsearch92Lucene103Codec() { - - final ES819TSDBLargeBlockDocValuesFormat docValuesFormat = new ES819TSDBLargeBlockDocValuesFormat( - randomIntBetween(2, 4096), - randomIntBetween(1, 512), - random().nextBoolean(), - ES819TSDBDocValuesFormatTests.randomBinaryCompressionMode(), - true - ); - - @Override - public DocValuesFormat getDocValuesFormatForField(String field) { - return docValuesFormat; - } - }; -} From 0471eca7d4ae4fdd56571bd55c78b6a3e26b40b7 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 25 Nov 2025 17:20:53 +0000 Subject: [PATCH 06/11] [CI] Auto commit changes from spotless --- .../index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java index f9affa29393d8..88982b0f8d096 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java @@ -141,9 +141,7 @@ private static boolean getOptimizedMergeEnabledDefault() { final boolean enablePerBlockCompression; static ES819TSDBDocValuesFormat getInstance(boolean useLargeNumericBlock) { - return useLargeNumericBlock - ? new ES819TSDBDocValuesFormat(NUMERIC_LARGE_BLOCK_SHIFT) - : new ES819TSDBDocValuesFormat(); + return useLargeNumericBlock ? new ES819TSDBDocValuesFormat(NUMERIC_LARGE_BLOCK_SHIFT) : new ES819TSDBDocValuesFormat(); } public ES819TSDBDocValuesFormat() { From ec519788829e1c684f49f3fc69fc46a71ae90cc5 Mon Sep 17 00:00:00 2001 From: Kostas Krikellas Date: Tue, 25 Nov 2025 19:25:40 +0200 Subject: [PATCH 07/11] fix --- .../index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java index f9affa29393d8..88982b0f8d096 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java @@ -141,9 +141,7 @@ private static boolean getOptimizedMergeEnabledDefault() { final boolean enablePerBlockCompression; static ES819TSDBDocValuesFormat getInstance(boolean useLargeNumericBlock) { - return useLargeNumericBlock - ? new ES819TSDBDocValuesFormat(NUMERIC_LARGE_BLOCK_SHIFT) - : new ES819TSDBDocValuesFormat(); + return useLargeNumericBlock ? new ES819TSDBDocValuesFormat(NUMERIC_LARGE_BLOCK_SHIFT) : new ES819TSDBDocValuesFormat(); } public ES819TSDBDocValuesFormat() { From 30853bd0b3f8d110c46505838327d187073481e5 Mon Sep 17 00:00:00 2001 From: Kostas Krikellas Date: Tue, 25 Nov 2025 21:02:40 +0200 Subject: [PATCH 08/11] fix and tests --- .../es819/ES819TSDBDocValuesProducer.java | 20 +++++++++---------- .../codec/tsdb/DocValuesCodecDuelTests.java | 3 ++- .../codec/tsdb/TsdbDocValueBwcTests.java | 3 ++- .../es819/ES819TSDBDocValuesFormatTests.java | 7 ++++++- ...ValuesFormatVariableSkipIntervalTests.java | 6 ++++-- 5 files changed, 23 insertions(+), 16 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index 49e99f42103c8..3ae25b8ea376f 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -71,9 +71,9 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer { private final int maxDoc; final int version; private final boolean merging; - private final int numericBlockShift; - private final int numericBlockSize; - private final int numericBlockMask; + private int numericBlockShift; + private int numericBlockSize; + private int numericBlockMask; ES819TSDBDocValuesProducer( SegmentReadState state, @@ -92,10 +92,12 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer { this.maxDoc = state.segmentInfo.maxDoc(); this.primarySortFieldNumber = primarySortFieldNumber(state.segmentInfo, state.fieldInfos); this.merging = false; + this.numericBlockShift = numericBlockShift; + this.numericBlockSize = 1 << numericBlockShift; + this.numericBlockMask = numericBlockSize - 1; // read in the entries from the metadata file. int version = -1; - int blockShift = numericBlockShift; String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); try (ChecksumIndexInput in = state.directory.openChecksumInput(metaName)) { @@ -111,17 +113,17 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer { state.segmentSuffix ); if (version >= ES819TSDBDocValuesFormat.VERSION_NUMERIC_LARGE_BLOCKS) { - blockShift = in.readByte(); + this.numericBlockShift = in.readByte(); + this.numericBlockSize = 1 << this.numericBlockShift; + this.numericBlockMask = this.numericBlockSize - 1; } readFields(in, state.fieldInfos, version); - } catch (Throwable exception) { priorE = exception; } finally { CodecUtil.checkFooter(in, priorE); } } - String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension); this.data = state.directory.openInput(dataName, state.context); boolean success = false; @@ -151,10 +153,6 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer { IOUtils.closeWhileHandlingException(this.data); } } - - this.numericBlockShift = blockShift; - this.numericBlockSize = 1 << numericBlockShift; - this.numericBlockMask = numericBlockSize - 1; } private ES819TSDBDocValuesProducer( diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java index ab7e65959d686..283f441a3fafb 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java @@ -64,7 +64,8 @@ public void testDuel() throws IOException { ESTestCase.randomIntBetween(1, 512), random().nextBoolean(), ES819TSDBDocValuesFormatTests.randomBinaryCompressionMode(), - random().nextBoolean() + random().nextBoolean(), + ES819TSDBDocValuesFormatTests.randomNumericBlockSize() ) : new TestES87TSDBDocValuesFormat(); diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java index f9dc86e181638..d6549b08322c5 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java @@ -350,7 +350,8 @@ public void testEncodeOrdinalRange() throws IOException { nextOrdinalRangeThreshold.getAsInt(), random().nextBoolean(), ES819TSDBDocValuesFormatTests.randomBinaryCompressionMode(), - randomBoolean() + randomBoolean(), + ES819TSDBDocValuesFormatTests.randomNumericBlockSize() ) ) ); diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java index bb35b58c3194f..8f7eab7c1644c 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java @@ -1420,7 +1420,8 @@ public void testLoadKeywordFieldWithIndexSorts() throws IOException { 1, // always enable range-encode random().nextBoolean(), randomBinaryCompressionMode(), - randomBoolean() + randomBoolean(), + randomNumericBlockSize() ); @Override @@ -1799,6 +1800,10 @@ public static BinaryDVCompressionMode randomBinaryCompressionMode() { return modes[random().nextInt(modes.length)]; } + public static int randomNumericBlockSize() { + return random().nextBoolean() ? ES819TSDBDocValuesFormat.NUMERIC_LARGE_BLOCK_SHIFT : ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT; + } + private boolean isCompressed(IndexWriterConfig config, String field) { if (config.getCodec() instanceof Elasticsearch92Lucene103Codec codec) { if (codec.getDocValuesFormatForField(field) instanceof ES819TSDBDocValuesFormat format) { diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java index d43e253729a64..ef0dc7aaeb743 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java @@ -24,7 +24,8 @@ protected Codec getCodec() { random().nextInt(1, 32), random().nextBoolean(), ES819TSDBDocValuesFormatTests.randomBinaryCompressionMode(), - random().nextBoolean() + random().nextBoolean(), + ES819TSDBDocValuesFormatTests.randomNumericBlockSize() ) ); } @@ -37,7 +38,8 @@ public void testSkipIndexIntervalSize() { random().nextInt(1, 32), random().nextBoolean(), ES819TSDBDocValuesFormatTests.randomBinaryCompressionMode(), - random().nextBoolean() + random().nextBoolean(), + ES819TSDBDocValuesFormatTests.randomNumericBlockSize() ) ); assertTrue(ex.getMessage().contains("skipIndexIntervalSize must be > 1")); From ee857219cd0ca915fe4a136c6a203be0bf5bf0fe Mon Sep 17 00:00:00 2001 From: Kostas Krikellas Date: Thu, 27 Nov 2025 15:52:56 +0200 Subject: [PATCH 09/11] assert --- .../index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java index 2d059f4ff8e84..90ffb784a8154 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java @@ -137,10 +137,6 @@ private static boolean getOptimizedMergeEnabledDefault() { final BinaryDVCompressionMode binaryDVCompressionMode; final boolean enablePerBlockCompression; - static ES819TSDBDocValuesFormat getInstance(boolean useLargeNumericBlock) { - return useLargeNumericBlock ? new ES819TSDBDocValuesFormat(NUMERIC_LARGE_BLOCK_SHIFT) : new ES819TSDBDocValuesFormat(); - } - public ES819TSDBDocValuesFormat() { this(NUMERIC_BLOCK_SHIFT); } @@ -204,6 +200,7 @@ public ES819TSDBDocValuesFormat( final int numericBlockShift ) { super(CODEC_NAME); + assert numericBlockShift == NUMERIC_BLOCK_SHIFT || numericBlockShift == NUMERIC_LARGE_BLOCK_SHIFT : numericBlockShift; if (skipIndexIntervalSize < 2) { throw new IllegalArgumentException("skipIndexIntervalSize must be > 1, got [" + skipIndexIntervalSize + "]"); } From 5e1f2bdfc9ec439524d760187aec116e9c2c7cde Mon Sep 17 00:00:00 2001 From: Kostas Krikellas Date: Thu, 27 Nov 2025 19:38:53 +0200 Subject: [PATCH 10/11] refactor --- .../es819/ES819TSDBDocValuesProducer.java | 56 ++++++++++--------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index fc17c45423321..bd707e0f6fb22 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -71,9 +71,9 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer { private final int maxDoc; final int version; private final boolean merging; - private int numericBlockShift; - private int numericBlockSize; - private int numericBlockMask; + private final int numericBlockShift; + private final int numericBlockSize; + private final int numericBlockMask; ES819TSDBDocValuesProducer( SegmentReadState state, @@ -92,12 +92,10 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer { this.maxDoc = state.segmentInfo.maxDoc(); this.primarySortFieldNumber = primarySortFieldNumber(state.segmentInfo, state.fieldInfos); this.merging = false; - this.numericBlockShift = numericBlockShift; - this.numericBlockSize = 1 << numericBlockShift; - this.numericBlockMask = numericBlockSize - 1; // read in the entries from the metadata file. int version = -1; + int blockShift = numericBlockShift; String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); try (ChecksumIndexInput in = state.directory.openChecksumInput(metaName)) { @@ -113,17 +111,20 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer { state.segmentSuffix ); if (version >= ES819TSDBDocValuesFormat.VERSION_NUMERIC_LARGE_BLOCKS) { - this.numericBlockShift = in.readByte(); - this.numericBlockSize = 1 << this.numericBlockShift; - this.numericBlockMask = this.numericBlockSize - 1; + blockShift = in.readByte(); } - readFields(in, state.fieldInfos, version); + readFields(in, state.fieldInfos, version, blockShift); } catch (Throwable exception) { priorE = exception; } finally { CodecUtil.checkFooter(in, priorE); } } + + this.numericBlockShift = blockShift; + this.numericBlockSize = 1 << blockShift; + this.numericBlockMask = numericBlockSize - 1; + String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension); this.data = state.directory.openInput(dataName, state.context); boolean success = false; @@ -1535,7 +1536,7 @@ static int primarySortFieldNumber(SegmentInfo segmentInfo, FieldInfos fieldInfos return -1; } - private void readFields(IndexInput meta, FieldInfos infos, int version) throws IOException { + private void readFields(IndexInput meta, FieldInfos infos, int version, int numericBlockShift) throws IOException { for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) { FieldInfo info = infos.fieldInfo(fieldNumber); if (info == null) { @@ -1546,24 +1547,24 @@ private void readFields(IndexInput meta, FieldInfos infos, int version) throws I skippers.put(info.number, readDocValueSkipperMeta(meta)); } if (type == ES819TSDBDocValuesFormat.NUMERIC) { - numerics.put(info.number, readNumeric(meta)); + numerics.put(info.number, readNumeric(meta, numericBlockShift)); } else if (type == ES819TSDBDocValuesFormat.BINARY) { binaries.put(info.number, readBinary(meta, version)); } else if (type == ES819TSDBDocValuesFormat.SORTED) { - sorted.put(info.number, readSorted(meta)); + sorted.put(info.number, readSorted(meta, numericBlockShift)); } else if (type == ES819TSDBDocValuesFormat.SORTED_SET) { - sortedSets.put(info.number, readSortedSet(meta)); + sortedSets.put(info.number, readSortedSet(meta, numericBlockShift)); } else if (type == ES819TSDBDocValuesFormat.SORTED_NUMERIC) { - sortedNumerics.put(info.number, readSortedNumeric(meta)); + sortedNumerics.put(info.number, readSortedNumeric(meta, numericBlockShift)); } else { throw new CorruptIndexException("invalid type: " + type, meta); } } } - private NumericEntry readNumeric(IndexInput meta) throws IOException { + private static NumericEntry readNumeric(IndexInput meta, int numericBlockShift) throws IOException { NumericEntry entry = new NumericEntry(); - readNumeric(meta, entry); + readNumeric(meta, entry, numericBlockShift); return entry; } @@ -1578,7 +1579,7 @@ private static DocValuesSkipperEntry readDocValueSkipperMeta(IndexInput meta) th return new DocValuesSkipperEntry(offset, length, minValue, maxValue, docCount, maxDocID); } - private void readNumeric(IndexInput meta, NumericEntry entry) throws IOException { + private static void readNumeric(IndexInput meta, NumericEntry entry, int numericBlockShift) throws IOException { entry.numValues = meta.readLong(); // Change compared to ES87TSDBDocValuesProducer: entry.numDocsWithField = meta.readInt(); @@ -1656,14 +1657,15 @@ private BinaryEntry readBinary(IndexInput meta, int version) throws IOException return entry; } - private SortedNumericEntry readSortedNumeric(IndexInput meta) throws IOException { + private static SortedNumericEntry readSortedNumeric(IndexInput meta, int numericBlockShift) throws IOException { SortedNumericEntry entry = new SortedNumericEntry(); - readSortedNumeric(meta, entry); + readSortedNumeric(meta, entry, numericBlockShift); return entry; } - private SortedNumericEntry readSortedNumeric(IndexInput meta, SortedNumericEntry entry) throws IOException { - readNumeric(meta, entry); + private static SortedNumericEntry readSortedNumeric(IndexInput meta, SortedNumericEntry entry, int numericBlockShift) + throws IOException { + readNumeric(meta, entry, numericBlockShift); // We don't read numDocsWithField here any more. if (entry.numDocsWithField != entry.numValues) { entry.addressesOffset = meta.readLong(); @@ -1674,21 +1676,21 @@ private SortedNumericEntry readSortedNumeric(IndexInput meta, SortedNumericEntry return entry; } - private SortedEntry readSorted(IndexInput meta) throws IOException { + private static SortedEntry readSorted(IndexInput meta, int numericBlockShift) throws IOException { SortedEntry entry = new SortedEntry(); entry.ordsEntry = new NumericEntry(); - readNumeric(meta, entry.ordsEntry); + readNumeric(meta, entry.ordsEntry, numericBlockShift); entry.termsDictEntry = new TermsDictEntry(); readTermDict(meta, entry.termsDictEntry); return entry; } - private SortedSetEntry readSortedSet(IndexInput meta) throws IOException { + private static SortedSetEntry readSortedSet(IndexInput meta, int numericBlockShift) throws IOException { SortedSetEntry entry = new SortedSetEntry(); byte multiValued = meta.readByte(); switch (multiValued) { case 0: // singlevalued - entry.singleValueEntry = readSorted(meta); + entry.singleValueEntry = readSorted(meta, numericBlockShift); return entry; case 1: // multivalued break; @@ -1696,7 +1698,7 @@ private SortedSetEntry readSortedSet(IndexInput meta) throws IOException { throw new CorruptIndexException("Invalid multiValued flag: " + multiValued, meta); } entry.ordsEntry = new SortedNumericEntry(); - readSortedNumeric(meta, entry.ordsEntry); + readSortedNumeric(meta, entry.ordsEntry, numericBlockShift); entry.termsDictEntry = new TermsDictEntry(); readTermDict(meta, entry.termsDictEntry); return entry; From 0bacea773f8349c072f600e9d074b4347c5d7048 Mon Sep 17 00:00:00 2001 From: Kostas Krikellas Date: Thu, 27 Nov 2025 19:51:49 +0200 Subject: [PATCH 11/11] fix --- .../codec/tsdb/es819/ES819TSDBDocValuesFormat.java | 2 +- .../codec/tsdb/es819/ES819TSDBDocValuesProducer.java | 12 +++--------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java index 90ffb784a8154..833e5c551efd3 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java @@ -231,6 +231,6 @@ public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOExcept @Override public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException { - return new ES819TSDBDocValuesProducer(state, numericBlockShift, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION); + return new ES819TSDBDocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION); } } diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index bd707e0f6fb22..6c5b290880003 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -75,14 +75,8 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer { private final int numericBlockSize; private final int numericBlockMask; - ES819TSDBDocValuesProducer( - SegmentReadState state, - int numericBlockShift, - String dataCodec, - String dataExtension, - String metaCodec, - String metaExtension - ) throws IOException { + ES819TSDBDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) + throws IOException { this.numerics = new IntObjectHashMap<>(); this.binaries = new IntObjectHashMap<>(); this.sorted = new IntObjectHashMap<>(); @@ -95,7 +89,7 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer { // read in the entries from the metadata file. int version = -1; - int blockShift = numericBlockShift; + int blockShift = ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT; String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); try (ChecksumIndexInput in = state.directory.openChecksumInput(metaName)) {