From 331a65438ec6023ecf763a2e0b65378d3e8ec1ee Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Tue, 25 Nov 2025 17:35:03 -0600 Subject: [PATCH 1/7] Working bulk loading --- .../es819/ES819TSDBDocValuesProducer.java | 129 +++++++++++++++--- 1 file changed, 107 insertions(+), 22 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index 8100685cb4df3..ca5c3bdfc6400 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -54,6 +54,9 @@ import org.elasticsearch.index.mapper.blockloader.docvalues.BlockDocValuesReader; import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.SKIP_INDEX_JUMP_LENGTH_PER_LEVEL; import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.SKIP_INDEX_MAX_LEVEL; @@ -364,13 +367,13 @@ private BinaryDocValues getCompressedBinary(BinaryEntry entry) throws IOExceptio final RandomAccessInput addressesData = this.data.randomAccessSlice(entry.addressesOffset, entry.addressesLength); final LongValues addresses = DirectMonotonicReader.getInstance(entry.addressesMeta, addressesData); - final RandomAccessInput docRangeData = this.data.randomAccessSlice(entry.docRangeOffset, entry.docRangeLength); - final DirectMonotonicReader docRanges = DirectMonotonicReader.getInstance(entry.docRangeMeta, docRangeData); + final RandomAccessInput docOffsetsData = this.data.randomAccessSlice(entry.docOffsetsOffset, entry.docOffsetLength); + final DirectMonotonicReader docOffsets = DirectMonotonicReader.getInstance(entry.docOffsetMeta, docOffsetsData); return new DenseBinaryDocValues(maxDoc) { final BinaryDecoder decoder = new BinaryDecoder( entry.compression.compressionMode().newDecompressor(), addresses, - docRanges, + docOffsets, data.clone(), entry.maxUncompressedChunkSize, entry.maxNumDocsInAnyBlock @@ -391,13 +394,22 @@ public BlockLoader.Block tryRead( boolean toInt ) throws IOException { int count = docs.count() - offset; - try (var builder = factory.bytesRefs(count)) { - for (int i = offset; i < docs.count(); i++) { - doc = docs.get(i); - var bytes = decoder.decode(doc, entry.numCompressedBlocks); - builder.appendBytesRef(bytes); + int firstDocId = docs.get(offset); + int lastDocId = docs.get(count - 1); + doc = lastDocId; + + if (isDense(firstDocId, lastDocId, count)) { + try (var builder = factory.singletonBytesRefs(count)) { + decoder.decodeBulk(entry.numCompressedBlocks, firstDocId, count, builder); + return builder.build(); + } + } else { + try (var builder = factory.bytesRefs(count)) { + for (int i = offset; i < docs.count(); i++) { + builder.appendBytesRef(decoder.decode(docs.get(i), entry.numCompressedBlocks)); + } + return builder.build(); } - return builder.build(); } } }; @@ -414,13 +426,13 @@ public BlockLoader.Block tryRead( final RandomAccessInput addressesData = this.data.randomAccessSlice(entry.addressesOffset, entry.addressesLength); final LongValues addresses = DirectMonotonicReader.getInstance(entry.addressesMeta, addressesData); - final RandomAccessInput docRangeData = this.data.randomAccessSlice(entry.docRangeOffset, entry.docRangeLength); - final DirectMonotonicReader docRanges = DirectMonotonicReader.getInstance(entry.docRangeMeta, docRangeData); + final RandomAccessInput docOffsetsData = this.data.randomAccessSlice(entry.docOffsetsOffset, entry.docOffsetLength); + final DirectMonotonicReader docOffsets = DirectMonotonicReader.getInstance(entry.docOffsetMeta, docOffsetsData); return new SparseBinaryDocValues(disi) { final BinaryDecoder decoder = new BinaryDecoder( entry.compression.compressionMode().newDecompressor(), addresses, - docRanges, + docOffsets, data.clone(), entry.maxUncompressedChunkSize, entry.maxNumDocsInAnyBlock @@ -439,7 +451,7 @@ public BytesRef binaryValue() throws IOException { static final class BinaryDecoder { private final LongValues addresses; - private final DirectMonotonicReader docRanges; + private final DirectMonotonicReader docOffsets; private final IndexInput compressedData; // Cache of last uncompressed block private long lastBlockId = -1; @@ -453,14 +465,14 @@ static final class BinaryDecoder { BinaryDecoder( Decompressor decompressor, LongValues addresses, - DirectMonotonicReader docRanges, + DirectMonotonicReader docOffsets, IndexInput compressedData, int biggestUncompressedBlockSize, int maxNumDocsInAnyBlock ) { this.decompressor = decompressor; this.addresses = addresses; - this.docRanges = docRanges; + this.docOffsets = docOffsets; this.compressedData = compressedData; // pre-allocate a byte array large enough for the biggest uncompressed block needed. this.uncompressedBlock = new byte[biggestUncompressedBlockSize]; @@ -526,7 +538,7 @@ long findAndUpdateBlock(DirectMonotonicReader docRanges, long lastBlockId, int d BytesRef decode(int docNumber, int numBlocks) throws IOException { // docNumber, rather than docId, because these are dense and could be indices from a DISI - long blockId = docNumber < limitDocNumForBlock ? lastBlockId : findAndUpdateBlock(docRanges, lastBlockId, docNumber, numBlocks); + long blockId = docNumber < limitDocNumForBlock ? lastBlockId : findAndUpdateBlock(docOffsets, lastBlockId, docNumber, numBlocks); int numDocsInBlock = (int) (limitDocNumForBlock - startDocNumForBlock); int idxInBlock = (int) (docNumber - startDocNumForBlock); @@ -544,6 +556,79 @@ BytesRef decode(int docNumber, int numBlocks) throws IOException { uncompressedBytesRef.length = end - start; return uncompressedBytesRef; } + + void decodeBulk( + int numBlocks, + int firstDoc, + int count, + BlockLoader.SingletonBytesRefBuilder builder + ) throws IOException { + int remainingCount = count; + int nextDoc = firstDoc; + long[] offsets = new long[count + 1]; + int docsAdded = 0; + int currBlockByteOffset = 0; + List decompressedBlocks = new ArrayList<>(); + + while (remainingCount > 0) { + + long blockId = nextDoc < limitDocNumForBlock ? lastBlockId : findAndUpdateBlock(this.docOffsets, lastBlockId, nextDoc, numBlocks); + assert blockId >= 0; + + int numDocsInBlock = (int) (limitDocNumForBlock - startDocNumForBlock); + int idxFirstDocInBlock = (int) (nextDoc - startDocNumForBlock); + int countInBlock = Math.min(numDocsInBlock - idxFirstDocInBlock, remainingCount); + + assert idxFirstDocInBlock < numDocsInBlock; + assert countInBlock <= numDocsInBlock; + + if (blockId != lastBlockId) { + decompressBlock((int) blockId, numDocsInBlock); + // uncompressedBytesRef and uncompressedDocStarts now populated + lastBlockId = blockId; + } + + // Copy offsets for block into combined offset array + int startOffset = uncompressedDocStarts[idxFirstDocInBlock]; + int endOffset = uncompressedDocStarts[idxFirstDocInBlock + countInBlock]; + int lenValuesInBlock = endOffset - startOffset; + int offsetIdx = 0; + for (int i = idxFirstDocInBlock; i < idxFirstDocInBlock + countInBlock; i++) { + offsets[docsAdded + offsetIdx+1] = uncompressedDocStarts[i+1] - startOffset + currBlockByteOffset; + offsetIdx++; + } + + nextDoc += countInBlock; + remainingCount -= countInBlock; + docsAdded += countInBlock; + currBlockByteOffset += lenValuesInBlock; + + if (remainingCount == 0) { + // avoid making a copy if this was the last block to be decompressed + decompressedBlocks.add(new BytesRef(uncompressedBlock, startOffset, lenValuesInBlock)); + } else { + decompressedBlocks.add(new BytesRef(Arrays.copyOfRange(uncompressedBlock, startOffset, startOffset + lenValuesInBlock))); + } + } + + int totalLen = Math.toIntExact(offsets[count]); + if (totalLen == 0) { + builder.appendBytesRefs(new byte[0], 0); + } else { + var allBytes = combinedBytes(totalLen, decompressedBlocks); + builder.appendBytesRefs(allBytes, offsets); + } + } + + static byte[] combinedBytes(int totalLen, List allBytes) { + byte[] all = new byte[totalLen]; + int byteOffset = 0; + for (var bytes : allBytes) { + System.arraycopy(bytes.bytes, bytes.offset, all, byteOffset, bytes.length); + byteOffset += bytes.length; + } + return all; + } } abstract static class DenseBinaryDocValues extends BinaryDocValues implements BlockLoader.OptionalColumnAtATimeReader { @@ -1533,9 +1618,9 @@ private BinaryEntry readBinary(IndexInput meta, int version) throws IOException entry.addressesMeta = DirectMonotonicReader.loadMeta(meta, numCompressedChunks + 1, blockShift); entry.addressesLength = meta.readLong(); - entry.docRangeOffset = meta.readLong(); - entry.docRangeMeta = DirectMonotonicReader.loadMeta(meta, numCompressedChunks + 1, blockShift); - entry.docRangeLength = meta.readLong(); + entry.docOffsetsOffset = meta.readLong(); + entry.docOffsetMeta = DirectMonotonicReader.loadMeta(meta, numCompressedChunks + 1, blockShift); + entry.docOffsetLength = meta.readLong(); entry.numCompressedBlocks = numCompressedChunks; } @@ -2230,14 +2315,14 @@ static class BinaryEntry { int maxLength; long addressesOffset; long addressesLength; - long docRangeOffset; - long docRangeLength; + long docOffsetsOffset; + long docOffsetLength; // compression mode int maxUncompressedChunkSize; int maxNumDocsInAnyBlock; int numCompressedBlocks; DirectMonotonicReader.Meta addressesMeta; - DirectMonotonicReader.Meta docRangeMeta; + DirectMonotonicReader.Meta docOffsetMeta; BinaryEntry(BinaryDVCompressionMode compression) { this.compression = compression; From 6d904bfaccbded1dab0d0aa6e6617ff78960aafc Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Tue, 25 Nov 2025 17:55:49 -0600 Subject: [PATCH 2/7] A bit of cleanup --- .../tsdb/es819/ES819TSDBDocValuesProducer.java | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index ca5c3bdfc6400..24bb8508fb34c 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -565,13 +565,12 @@ void decodeBulk( ) throws IOException { int remainingCount = count; int nextDoc = firstDoc; + int blockDocOffset = 0; + int blockByteOffset = 0; long[] offsets = new long[count + 1]; - int docsAdded = 0; - int currBlockByteOffset = 0; List decompressedBlocks = new ArrayList<>(); while (remainingCount > 0) { - long blockId = nextDoc < limitDocNumForBlock ? lastBlockId : findAndUpdateBlock(this.docOffsets, lastBlockId, nextDoc, numBlocks); assert blockId >= 0; @@ -592,16 +591,15 @@ void decodeBulk( int startOffset = uncompressedDocStarts[idxFirstDocInBlock]; int endOffset = uncompressedDocStarts[idxFirstDocInBlock + countInBlock]; int lenValuesInBlock = endOffset - startOffset; - int offsetIdx = 0; - for (int i = idxFirstDocInBlock; i < idxFirstDocInBlock + countInBlock; i++) { - offsets[docsAdded + offsetIdx+1] = uncompressedDocStarts[i+1] - startOffset + currBlockByteOffset; - offsetIdx++; + for (int i = 0; i < countInBlock; i++) { + int byteOffsetInBlock = uncompressedDocStarts[idxFirstDocInBlock + i + 1] - startOffset; + offsets[blockDocOffset + i + 1] = byteOffsetInBlock + blockByteOffset; } nextDoc += countInBlock; remainingCount -= countInBlock; - docsAdded += countInBlock; - currBlockByteOffset += lenValuesInBlock; + blockDocOffset += countInBlock; + blockByteOffset += lenValuesInBlock; if (remainingCount == 0) { // avoid making a copy if this was the last block to be decompressed From e2c681f75b4ca105a48dde32b089b0f62f9b8026 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 26 Nov 2025 00:05:54 +0000 Subject: [PATCH 3/7] [CI] Auto commit changes from spotless --- .../es819/ES819TSDBDocValuesProducer.java | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index 24bb8508fb34c..3acc6ac62a66f 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -538,7 +538,9 @@ long findAndUpdateBlock(DirectMonotonicReader docRanges, long lastBlockId, int d BytesRef decode(int docNumber, int numBlocks) throws IOException { // docNumber, rather than docId, because these are dense and could be indices from a DISI - long blockId = docNumber < limitDocNumForBlock ? lastBlockId : findAndUpdateBlock(docOffsets, lastBlockId, docNumber, numBlocks); + long blockId = docNumber < limitDocNumForBlock + ? lastBlockId + : findAndUpdateBlock(docOffsets, lastBlockId, docNumber, numBlocks); int numDocsInBlock = (int) (limitDocNumForBlock - startDocNumForBlock); int idxInBlock = (int) (docNumber - startDocNumForBlock); @@ -557,12 +559,7 @@ BytesRef decode(int docNumber, int numBlocks) throws IOException { return uncompressedBytesRef; } - void decodeBulk( - int numBlocks, - int firstDoc, - int count, - BlockLoader.SingletonBytesRefBuilder builder - ) throws IOException { + void decodeBulk(int numBlocks, int firstDoc, int count, BlockLoader.SingletonBytesRefBuilder builder) throws IOException { int remainingCount = count; int nextDoc = firstDoc; int blockDocOffset = 0; @@ -571,7 +568,9 @@ void decodeBulk( List decompressedBlocks = new ArrayList<>(); while (remainingCount > 0) { - long blockId = nextDoc < limitDocNumForBlock ? lastBlockId : findAndUpdateBlock(this.docOffsets, lastBlockId, nextDoc, numBlocks); + long blockId = nextDoc < limitDocNumForBlock + ? lastBlockId + : findAndUpdateBlock(this.docOffsets, lastBlockId, nextDoc, numBlocks); assert blockId >= 0; int numDocsInBlock = (int) (limitDocNumForBlock - startDocNumForBlock); @@ -605,7 +604,9 @@ void decodeBulk( // avoid making a copy if this was the last block to be decompressed decompressedBlocks.add(new BytesRef(uncompressedBlock, startOffset, lenValuesInBlock)); } else { - decompressedBlocks.add(new BytesRef(Arrays.copyOfRange(uncompressedBlock, startOffset, startOffset + lenValuesInBlock))); + decompressedBlocks.add( + new BytesRef(Arrays.copyOfRange(uncompressedBlock, startOffset, startOffset + lenValuesInBlock)) + ); } } From 9bed9c537d0e40a469e828c6c95b20d29466a1e1 Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Tue, 25 Nov 2025 22:03:59 -0600 Subject: [PATCH 4/7] Update docs/changelog/138631.yaml --- docs/changelog/138631.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/138631.yaml diff --git a/docs/changelog/138631.yaml b/docs/changelog/138631.yaml new file mode 100644 index 0000000000000..9e6d254879d45 --- /dev/null +++ b/docs/changelog/138631.yaml @@ -0,0 +1,5 @@ +pr: 138631 +summary: Improved bulk loading for binary doc values +area: Codec +type: enhancement +issues: [] From d52b0d8f203e8677817d505725d3efc97873e16b Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Wed, 26 Nov 2025 13:13:08 -0600 Subject: [PATCH 5/7] Scan forward through chunk to find uncompressed size and allocate buffer --- .../es819/ES819TSDBDocValuesProducer.java | 93 ++++++++++++------- 1 file changed, 59 insertions(+), 34 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index 3acc6ac62a66f..fafcd07940b31 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -521,8 +521,12 @@ void deltaDecode(int[] arr, int length) { } } - long findAndUpdateBlock(DirectMonotonicReader docRanges, long lastBlockId, int docNumber, int numBlocks) { - long index = docRanges.binarySearch(lastBlockId + 1, numBlocks, docNumber); + long findAndUpdateBlock(int docNumber, int numBlocks) { + if (docNumber < limitDocNumForBlock && lastBlockId >= 0) { + return lastBlockId; + } + + long index = docOffsets.binarySearch(lastBlockId + 1, numBlocks, docNumber); // If index is found, index is inclusive lower bound of docNum range, so docNum is in blockId == index if (index < 0) { // If index was not found, insertion point (-index - 1) will be upper bound of docNum range. @@ -531,16 +535,25 @@ long findAndUpdateBlock(DirectMonotonicReader docRanges, long lastBlockId, int d } assert index < numBlocks : "invalid range " + index + " for doc " + docNumber + " in numBlocks " + numBlocks; - startDocNumForBlock = docRanges.get(index); - limitDocNumForBlock = docRanges.get(index + 1); + startDocNumForBlock = docOffsets.get(index); + limitDocNumForBlock = docOffsets.get(index + 1); return index; } + // If query is over adjacent values we can scan forward through blocks, rather than binary searching for the next block. + long findAndUpdateBlockByScanning(int docNumber) { + if (docNumber < limitDocNumForBlock && lastBlockId >= 0) { + return lastBlockId; + } + long blockId = lastBlockId + 1; + startDocNumForBlock = docOffsets.get(blockId); + limitDocNumForBlock = docOffsets.get(blockId + 1); + return blockId; + } + BytesRef decode(int docNumber, int numBlocks) throws IOException { // docNumber, rather than docId, because these are dense and could be indices from a DISI - long blockId = docNumber < limitDocNumForBlock - ? lastBlockId - : findAndUpdateBlock(docOffsets, lastBlockId, docNumber, numBlocks); + long blockId = findAndUpdateBlock(docNumber, numBlocks); int numDocsInBlock = (int) (limitDocNumForBlock - startDocNumForBlock); int idxInBlock = (int) (docNumber - startDocNumForBlock); @@ -559,20 +572,49 @@ BytesRef decode(int docNumber, int numBlocks) throws IOException { return uncompressedBytesRef; } + int computeMultipleBlockBufferSize(int count, int firstDoc, long firstBlockId) throws IOException { + long lastBlockId = firstBlockId; + + int remaining = count; + int nextDoc = firstDoc; + for (long blockId = firstBlockId; remaining > 0; blockId++, lastBlockId++) { + long blockStart = docOffsets.get(blockId); + long blockLimit = docOffsets.get(blockId + 1); + int numDocsInBlock = (int) (blockLimit - blockStart); + int idxFirstDocInBlock = (int) (nextDoc - blockStart); + int countInBlock = Math.min(numDocsInBlock - idxFirstDocInBlock, remaining); + remaining -= countInBlock; + nextDoc += countInBlock; + } + + // We could use compressedData directly, but making a clone seems less error-prone. + IndexInput readAhead = compressedData.clone(); + int requiredBufferSize = 0; + for (long blockId = firstBlockId; blockId <= lastBlockId; blockId++) { + long blockStartOffset = addresses.get(blockId); + readAhead.seek(blockStartOffset); + readAhead.readByte(); // skip BlockHeader + int uncompressedBlockLength = readAhead.readVInt(); + requiredBufferSize += uncompressedBlockLength; + } + return requiredBufferSize; + } + void decodeBulk(int numBlocks, int firstDoc, int count, BlockLoader.SingletonBytesRefBuilder builder) throws IOException { int remainingCount = count; int nextDoc = firstDoc; int blockDocOffset = 0; int blockByteOffset = 0; + + // Need to binary search forward for first blockId, but since query is dense, can scan from then on. + // This block contains at least one value for range. + long firstBlockId = findAndUpdateBlock(nextDoc, numBlocks); long[] offsets = new long[count + 1]; - List decompressedBlocks = new ArrayList<>(); + int bufferSize = computeMultipleBlockBufferSize(count, firstDoc, firstBlockId); + byte[] bytes = new byte[bufferSize]; while (remainingCount > 0) { - long blockId = nextDoc < limitDocNumForBlock - ? lastBlockId - : findAndUpdateBlock(this.docOffsets, lastBlockId, nextDoc, numBlocks); - assert blockId >= 0; - + long blockId = findAndUpdateBlockByScanning(nextDoc); int numDocsInBlock = (int) (limitDocNumForBlock - startDocNumForBlock); int idxFirstDocInBlock = (int) (nextDoc - startDocNumForBlock); int countInBlock = Math.min(numDocsInBlock - idxFirstDocInBlock, remainingCount); @@ -595,38 +637,21 @@ void decodeBulk(int numBlocks, int firstDoc, int count, BlockLoader.SingletonByt offsets[blockDocOffset + i + 1] = byteOffsetInBlock + blockByteOffset; } + // Copy uncompressedBlock bytes into buffer for multiple blocks + System.arraycopy(uncompressedBlock, startOffset, bytes, blockByteOffset, lenValuesInBlock); + nextDoc += countInBlock; remainingCount -= countInBlock; blockDocOffset += countInBlock; blockByteOffset += lenValuesInBlock; - - if (remainingCount == 0) { - // avoid making a copy if this was the last block to be decompressed - decompressedBlocks.add(new BytesRef(uncompressedBlock, startOffset, lenValuesInBlock)); - } else { - decompressedBlocks.add( - new BytesRef(Arrays.copyOfRange(uncompressedBlock, startOffset, startOffset + lenValuesInBlock)) - ); - } } int totalLen = Math.toIntExact(offsets[count]); if (totalLen == 0) { builder.appendBytesRefs(new byte[0], 0); } else { - var allBytes = combinedBytes(totalLen, decompressedBlocks); - builder.appendBytesRefs(allBytes, offsets); - } - } - - static byte[] combinedBytes(int totalLen, List allBytes) { - byte[] all = new byte[totalLen]; - int byteOffset = 0; - for (var bytes : allBytes) { - System.arraycopy(bytes.bytes, bytes.offset, all, byteOffset, bytes.length); - byteOffset += bytes.length; + builder.appendBytesRefs(bytes, offsets); } - return all; } } From 2ba8c7a63936e3517b924d4d27c48c9a63f47491 Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Wed, 26 Nov 2025 13:46:16 -0600 Subject: [PATCH 6/7] Some cleanup --- .../es819/ES819TSDBDocValuesProducer.java | 28 +++++++------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index fafcd07940b31..130d364e491e3 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -572,30 +572,22 @@ BytesRef decode(int docNumber, int numBlocks) throws IOException { return uncompressedBytesRef; } - int computeMultipleBlockBufferSize(int count, int firstDoc, long firstBlockId) throws IOException { - long lastBlockId = firstBlockId; - - int remaining = count; - int nextDoc = firstDoc; - for (long blockId = firstBlockId; remaining > 0; blockId++, lastBlockId++) { - long blockStart = docOffsets.get(blockId); - long blockLimit = docOffsets.get(blockId + 1); - int numDocsInBlock = (int) (blockLimit - blockStart); - int idxFirstDocInBlock = (int) (nextDoc - blockStart); - int countInBlock = Math.min(numDocsInBlock - idxFirstDocInBlock, remaining); - remaining -= countInBlock; - nextDoc += countInBlock; - } - - // We could use compressedData directly, but making a clone seems less error-prone. + int computeMultipleBlockBufferSize(int count, int firstDoc, long firstBlockId, long numBlocks) throws IOException { IndexInput readAhead = compressedData.clone(); + int lastDoc = firstDoc + count - 1; int requiredBufferSize = 0; - for (long blockId = firstBlockId; blockId <= lastBlockId; blockId++) { + + for (long blockId = firstBlockId; blockId < numBlocks; blockId++) { long blockStartOffset = addresses.get(blockId); readAhead.seek(blockStartOffset); readAhead.readByte(); // skip BlockHeader int uncompressedBlockLength = readAhead.readVInt(); requiredBufferSize += uncompressedBlockLength; + + long blockLimit = docOffsets.get(blockId + 1); + if (lastDoc < blockLimit) { + break; + } } return requiredBufferSize; } @@ -610,7 +602,7 @@ void decodeBulk(int numBlocks, int firstDoc, int count, BlockLoader.SingletonByt // This block contains at least one value for range. long firstBlockId = findAndUpdateBlock(nextDoc, numBlocks); long[] offsets = new long[count + 1]; - int bufferSize = computeMultipleBlockBufferSize(count, firstDoc, firstBlockId); + int bufferSize = computeMultipleBlockBufferSize(count, firstDoc, firstBlockId, numBlocks); byte[] bytes = new byte[bufferSize]; while (remainingCount > 0) { From 62bf8435d16264620819362ccfc6cde5aa13572f Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 26 Nov 2025 19:54:29 +0000 Subject: [PATCH 7/7] [CI] Auto commit changes from spotless --- .../index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index 130d364e491e3..74b98f6cdfca8 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -54,9 +54,6 @@ import org.elasticsearch.index.mapper.blockloader.docvalues.BlockDocValuesReader; import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.SKIP_INDEX_JUMP_LENGTH_PER_LEVEL; import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.SKIP_INDEX_MAX_LEVEL;