diff --git a/docs/changelog/137995.yaml b/docs/changelog/137995.yaml
new file mode 100644
index 0000000000000..6e5ff3a572243
--- /dev/null
+++ b/docs/changelog/137995.yaml
@@ -0,0 +1,5 @@
+pr: 137995
+summary: Improve bulk loading of binary doc values
+area: Mapping
+type: enhancement
+issues: []
diff --git a/server/src/main/java/org/elasticsearch/common/util/BigArrays.java b/server/src/main/java/org/elasticsearch/common/util/BigArrays.java
index 5d2420328033d..9e5e8bc9fd568 100644
--- a/server/src/main/java/org/elasticsearch/common/util/BigArrays.java
+++ b/server/src/main/java/org/elasticsearch/common/util/BigArrays.java
@@ -614,6 +614,10 @@ public ByteArray newByteArray(long size, boolean clearOnResize) {
}
}
+ public ByteArray newByteArrayWrapper(byte[] bytes) {
+ return validate(new ByteArrayWrapper(this, bytes, bytes.length, null, false));
+ }
+
/**
* Allocate a new {@link ByteArray} initialized with zeros.
* @param size the initial length of the array
diff --git a/server/src/main/java/org/elasticsearch/common/util/BytesRefArray.java b/server/src/main/java/org/elasticsearch/common/util/BytesRefArray.java
index a6693fd26bf50..770fda2cf8480 100644
--- a/server/src/main/java/org/elasticsearch/common/util/BytesRefArray.java
+++ b/server/src/main/java/org/elasticsearch/common/util/BytesRefArray.java
@@ -77,7 +77,7 @@ public BytesRefArray(StreamInput in, BigArrays bigArrays) throws IOException {
}
}
- private BytesRefArray(LongArray startOffsets, ByteArray bytes, long size, BigArrays bigArrays) {
+ public BytesRefArray(LongArray startOffsets, ByteArray bytes, long size, BigArrays bigArrays) {
this.bytes = bytes;
this.startOffsets = startOffsets;
this.size = size;
diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java
index f4012cf678b46..ab7af743589d5 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java
@@ -220,13 +220,27 @@ public BlockLoader.Block tryRead(
boolean toInt
) throws IOException {
int count = docs.count() - offset;
- try (var builder = factory.bytesRefs(count)) {
- for (int i = offset; i < docs.count(); i++) {
- doc = docs.get(i);
- bytesSlice.readBytes((long) doc * length, bytes.bytes, 0, length);
- builder.appendBytesRef(bytes);
+ int firstDocId = docs.get(offset);
+ int lastDocId = docs.get(count - 1);
+ doc = lastDocId;
+
+ if (isDense(firstDocId, lastDocId, count)) {
+ try (var builder = factory.singletonBytesRefs(count)) {
+ int bulkLength = length * count;
+ byte[] bytes = new byte[bulkLength];
+ bytesSlice.readBytes((long) firstDocId * length, bytes, 0, bulkLength);
+ builder.appendBytesRefs(bytes, length);
+ return builder.build();
+ }
+ } else {
+ try (var builder = factory.bytesRefs(count)) {
+ for (int i = offset; i < docs.count(); i++) {
+ int docId = docs.get(i);
+ bytesSlice.readBytes((long) docId * length, bytes.bytes, 0, length);
+ builder.appendBytesRef(bytes);
+ }
+ return builder.build();
}
- return builder.build();
}
}
};
@@ -255,15 +269,39 @@ public BlockLoader.Block tryRead(
boolean toInt
) throws IOException {
int count = docs.count() - offset;
- try (var builder = factory.bytesRefs(count)) {
- for (int i = offset; i < docs.count(); i++) {
- doc = docs.get(i);
- long startOffset = addresses.get(doc);
- bytes.length = (int) (addresses.get(doc + 1L) - startOffset);
- bytesSlice.readBytes(startOffset, bytes.bytes, 0, bytes.length);
- builder.appendBytesRef(bytes);
+ int firstDocId = docs.get(offset);
+ int lastDocId = docs.get(count - 1);
+ doc = lastDocId;
+
+ if (isDense(firstDocId, lastDocId, count)) {
+ try (var builder = factory.singletonBytesRefs(count)) {
+ long[] offsets = new long[count + 1];
+
+ // Normalize offsets so that first offset is 0
+ long startOffset = addresses.get(firstDocId);
+ for (int i = offset, j = 1; i < docs.count(); i++, j++) {
+ int docId = docs.get(i);
+ long nextOffset = addresses.get(docId + 1) - startOffset;
+ offsets[j] = nextOffset;
+ }
+
+ int length = Math.toIntExact(addresses.get(lastDocId + 1L) - startOffset);
+ byte[] bytes = new byte[length];
+ bytesSlice.readBytes(startOffset, bytes, 0, length);
+ builder.appendBytesRefs(bytes, offsets);
+ return builder.build();
+ }
+ } else {
+ try (var builder = factory.bytesRefs(count)) {
+ for (int i = offset; i < docs.count(); i++) {
+ int docId = docs.get(i);
+ long startOffset = addresses.get(docId);
+ bytes.length = (int) (addresses.get(docId + 1L) - startOffset);
+ bytesSlice.readBytes(startOffset, bytes.bytes, 0, bytes.length);
+ builder.appendBytesRef(bytes);
+ }
+ return builder.build();
}
- return builder.build();
}
}
};
@@ -1556,13 +1594,6 @@ long lookAheadValueAt(int targetDoc) throws IOException {
return lookaheadBlock[valueIndex];
}
- static boolean isDense(int firstDocId, int lastDocId, int length) {
- // This does not detect duplicate docids (e.g [1, 1, 2, 4] would be detected as dense),
- // this can happen with enrich or lookup. However this codec isn't used for enrich / lookup.
- // This codec is only used in the context of logsdb and tsdb, so this is fine here.
- return lastDocId - firstDocId == length - 1;
- }
-
@Override
SortedOrdinalReader sortedOrdinalReader() {
return null;
@@ -1681,6 +1712,13 @@ public BlockLoader.Block tryRead(
}
}
+ private static boolean isDense(int firstDocId, int lastDocId, int length) {
+ // This does not detect duplicate docids (e.g [1, 1, 2, 4] would be detected as dense),
+ // this can happen with enrich or lookup. However this codec isn't used for enrich / lookup.
+ // This codec is only used in the context of logsdb and tsdb, so this is fine here.
+ return lastDocId - firstDocId == length - 1;
+ }
+
private NumericDocValues getRangeEncodedNumericDocValues(NumericEntry entry, long maxOrd) throws IOException {
final var ordinalsReader = new SortedOrdinalReader(
maxOrd,
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java b/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java
index 08325e25d3c65..dc018cc368363 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java
@@ -407,6 +407,17 @@ interface BlockFactory {
*/
BytesRefBuilder bytesRefs(int expectedCount);
+ /**
+ * Build a specialized builder for singleton dense {@link BytesRef} fields with the following constraints:
+ *
+ * - Only one value per document can be collected
+ * - No more than expectedCount values can be collected
+ *
+ *
+ * @param expectedCount The maximum number of values to be collected.
+ */
+ SingletonBytesRefBuilder singletonBytesRefs(int expectedCount);
+
/**
* Build a builder to load doubles as loaded from doc values.
* Doc values load doubles in sorted order.
@@ -574,6 +585,22 @@ interface BytesRefBuilder extends Builder {
BytesRefBuilder appendBytesRef(BytesRef value);
}
+ /**
+ * Specialized builder for collecting dense arrays of BytesRef values.
+ */
+ interface SingletonBytesRefBuilder extends Builder {
+ /**
+ * Append multiple BytesRef. Offsets contains offsets of each BytesRef in the byte array.
+ * The length of the offsets array is one more than the number of BytesRefs.
+ */
+ SingletonBytesRefBuilder appendBytesRefs(byte[] bytes, long[] offsets) throws IOException;
+
+ /**
+ * Append multiple BytesRefs, all with the same length.
+ */
+ SingletonBytesRefBuilder appendBytesRefs(byte[] bytes, long bytesRefLengths) throws IOException;
+ }
+
interface FloatBuilder extends Builder {
/**
* Appends a float to the current entry.
diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java
index 9f18dc59b4296..ae4594e0568d7 100644
--- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java
+++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java
@@ -67,7 +67,9 @@
import static org.elasticsearch.test.ESTestCase.between;
import static org.elasticsearch.test.ESTestCase.randomAlphaOfLength;
+import static org.elasticsearch.test.ESTestCase.randomBoolean;
import static org.elasticsearch.test.ESTestCase.randomFrom;
+import static org.elasticsearch.test.ESTestCase.randomIntBetween;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
@@ -76,8 +78,8 @@ public class ES819TSDBDocValuesFormatTests extends ES87TSDBDocValuesFormatTests
private final Codec codec = new Elasticsearch92Lucene103Codec() {
final ES819TSDBDocValuesFormat docValuesFormat = new ES819TSDBDocValuesFormat(
- ESTestCase.randomIntBetween(2, 4096),
- ESTestCase.randomIntBetween(1, 512),
+ randomIntBetween(2, 4096),
+ randomIntBetween(1, 512),
random().nextBoolean()
);
@@ -762,6 +764,7 @@ public void testOptionalColumnAtATimeReader() throws Exception {
final String gaugeField = "gauge";
final String binaryFixedField = "binary_variable";
final String binaryVariableField = "binary_fixed";
+ final int binaryFieldMaxLength = randomIntBetween(1, 20);
long currentTimestamp = 1704067200000L;
long currentCounter = 10_000_000;
@@ -773,8 +776,8 @@ public void testOptionalColumnAtATimeReader() throws Exception {
int numDocs = 256 + random().nextInt(8096);
for (int i = 0; i < numDocs; i++) {
- binaryFixedValues.add(new BytesRef(randomAlphaOfLength(10)));
- binaryVariableValues.add(new BytesRef(randomAlphaOfLength(between(0, 10))));
+ binaryFixedValues.add(new BytesRef(randomAlphaOfLength(binaryFieldMaxLength)));
+ binaryVariableValues.add(new BytesRef(randomAlphaOfLength(between(0, binaryFieldMaxLength))));
var d = new Document();
long timestamp = currentTimestamp;
// Index sorting doesn't work with NumericDocValuesField:
@@ -1081,6 +1084,10 @@ public void testOptionalColumnAtATimeReaderWithSparseDocs() throws Exception {
final String timestampField = "@timestamp";
String queryField = "query_field";
String temperatureField = "temperature_field";
+ final String binaryFixedField = "binary_variable";
+ final String binaryVariableField = "binary_fixed";
+ final int binaryFieldMaxLength = randomIntBetween(1, 20);
+
long currentTimestamp = 1704067200000L;
long currentCounter = 10_000_000;
@@ -1089,6 +1096,8 @@ public void testOptionalColumnAtATimeReaderWithSparseDocs() throws Exception {
int numDocsPerQValue = 120;
int numDocs = numDocsPerQValue * (1 + random().nextInt(40));
Long[] temperatureValues = new Long[numDocs];
+ BytesRef[] binaryFixed = new BytesRef[numDocs];
+ BytesRef[] binaryVariable = new BytesRef[numDocs];
long q = 1;
for (int i = 1; i <= numDocs; i++) {
var d = new Document();
@@ -1098,6 +1107,12 @@ public void testOptionalColumnAtATimeReaderWithSparseDocs() throws Exception {
d.add(new SortedNumericDocValuesField(counterField, currentCounter));
d.add(new SortedDocValuesField(counterAsStringField, new BytesRef(Long.toString(currentCounter))));
d.add(new SortedNumericDocValuesField(queryField, q));
+
+ binaryFixed[numDocs - i] = new BytesRef(randomAlphaOfLength(binaryFieldMaxLength));
+ d.add(new BinaryDocValuesField(binaryFixedField, binaryFixed[numDocs - i]));
+ binaryVariable[numDocs - i] = new BytesRef(randomAlphaOfLength(between(0, binaryFieldMaxLength)));
+ d.add(new BinaryDocValuesField(binaryVariableField, binaryVariable[numDocs - i]));
+
if (i % 120 == 0) {
q++;
}
@@ -1139,6 +1154,7 @@ public void testOptionalColumnAtATimeReaderWithSparseDocs() throws Exception {
long[] expectedCounters = new long[numDocsPerQValue];
var counterAsStringDV = getBaseSortedDocValues(leafReader, counterAsStringField);
String[] expectedCounterAsStrings = new String[numDocsPerQValue];
+
int[] docIds = new int[numDocsPerQValue];
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
var scoreDoc = topDocs.scoreDocs[i];
@@ -1149,6 +1165,7 @@ public void testOptionalColumnAtATimeReaderWithSparseDocs() throws Exception {
assertTrue(counterDV.advanceExact(scoreDoc.doc));
expectedCounters[i] = counterDV.longValue();
+
assertTrue(counterAsStringDV.advanceExact(scoreDoc.doc));
expectedCounterAsStrings[i] = counterAsStringDV.lookupOrd(counterAsStringDV.ordValue()).utf8ToString();
}
@@ -1187,35 +1204,64 @@ public void testOptionalColumnAtATimeReaderWithSparseDocs() throws Exception {
assertEquals(expectedCounter, actualCounter);
}
}
- {
- int startIndex = ESTestCase.between(0, temperatureValues.length - 1);
- int endIndex = ESTestCase.between(startIndex + 1, temperatureValues.length);
- List testDocs = new ArrayList<>();
- for (int i = startIndex; i < endIndex; i++) {
- if (temperatureValues[i] != null) {
- testDocs.add(i);
- }
+ }
+
+ BlockLoader.Docs docs;
+ {
+ int startIndex = ESTestCase.between(0, temperatureValues.length - 1);
+ int endIndex = ESTestCase.between(startIndex + 1, temperatureValues.length);
+ List testDocs = new ArrayList<>();
+ for (int i = startIndex; i < endIndex; i++) {
+ if (temperatureValues[i] != null) {
+ testDocs.add(i);
}
- if (testDocs.isEmpty() == false) {
- NumericDocValues dv = leafReader.getNumericDocValues(temperatureField);
- assertThat(dv, instanceOf(OptionalColumnAtATimeReader.class));
- OptionalColumnAtATimeReader directReader = (OptionalColumnAtATimeReader) dv;
+ }
+ if (testDocs.isEmpty() == false) {
+ NumericDocValues dv = leafReader.getNumericDocValues(temperatureField);
+ assertThat(dv, instanceOf(OptionalColumnAtATimeReader.class));
+ OptionalColumnAtATimeReader directReader = (OptionalColumnAtATimeReader) dv;
+ docs = TestBlock.docs(testDocs.stream().mapToInt(n -> n).toArray());
+ assertNull(directReader.tryRead(factory, docs, 0, false, null, false));
+ TestBlock block = (TestBlock) directReader.tryRead(factory, docs, 0, true, null, false);
+ assertNotNull(block);
+ for (int i = 0; i < testDocs.size(); i++) {
+ assertThat(block.get(i), equalTo(temperatureValues[testDocs.get(i)]));
+ }
+ }
+ if (testDocs.size() > 2) {
+ // currently bulk loading is disabled with gaps
+ testDocs.remove(ESTestCase.between(1, testDocs.size() - 2));
+ docs = TestBlock.docs(testDocs.stream().mapToInt(n -> n).toArray());
+ NumericDocValues dv = leafReader.getNumericDocValues(temperatureField);
+ OptionalColumnAtATimeReader directReader = (OptionalColumnAtATimeReader) dv;
+ assertNull(directReader.tryRead(factory, docs, 0, false, null, false));
+ assertNull(directReader.tryRead(factory, docs, 0, true, null, false));
+ }
+ }
+
+ {
+ // Bulk binary loader can only handle sparse queries over dense documents
+ List testDocs = IntStream.range(0, numDocs - 1).filter(i -> randomBoolean()).boxed().toList();
+ if (testDocs.isEmpty() == false) {
+ {
+ // fixed length
docs = TestBlock.docs(testDocs.stream().mapToInt(n -> n).toArray());
- assertNull(directReader.tryRead(factory, docs, 0, false, null, false));
- TestBlock block = (TestBlock) directReader.tryRead(factory, docs, 0, true, null, false);
+ var dv = getDenseBinaryValues(leafReader, binaryFixedField);
+ var block = (TestBlock) dv.tryRead(factory, docs, 0, random().nextBoolean(), null, false);
assertNotNull(block);
for (int i = 0; i < testDocs.size(); i++) {
- assertThat(block.get(i), equalTo(temperatureValues[testDocs.get(i)]));
+ assertThat(block.get(i), equalTo(binaryFixed[testDocs.get(i)]));
}
}
- if (testDocs.size() > 2) {
- // currently bulk loading is disabled with gaps
- testDocs.remove(ESTestCase.between(1, testDocs.size() - 2));
+ {
+ // variable length
docs = TestBlock.docs(testDocs.stream().mapToInt(n -> n).toArray());
- NumericDocValues dv = leafReader.getNumericDocValues(temperatureField);
- OptionalColumnAtATimeReader directReader = (OptionalColumnAtATimeReader) dv;
- assertNull(directReader.tryRead(factory, docs, 0, false, null, false));
- assertNull(directReader.tryRead(factory, docs, 0, true, null, false));
+ var dv = getDenseBinaryValues(leafReader, binaryVariableField);
+ var block = (TestBlock) dv.tryRead(factory, docs, 0, random().nextBoolean(), null, false);
+ assertNotNull(block);
+ for (int i = 0; i < testDocs.size(); i++) {
+ assertThat(block.get(i), equalTo(binaryVariable[testDocs.get(i)]));
+ }
}
}
}
@@ -1233,7 +1279,7 @@ public void testLoadKeywordFieldWithIndexSorts() throws IOException {
config.setMergePolicy(new LogByteSizeMergePolicy());
final Codec codec = new Elasticsearch92Lucene103Codec() {
final ES819TSDBDocValuesFormat docValuesFormat = new ES819TSDBDocValuesFormat(
- ESTestCase.randomIntBetween(2, 4096),
+ randomIntBetween(2, 4096),
1, // always enable range-encode
random().nextBoolean()
);
@@ -1246,7 +1292,7 @@ public DocValuesFormat getDocValuesFormatForField(String field) {
config.setCodec(codec);
Map hostnames = new HashMap<>();
try (Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, config)) {
- int numDocs = ESTestCase.randomIntBetween(100, 5000);
+ int numDocs = randomIntBetween(100, 5000);
for (int i = 0; i < numDocs; i++) {
hostnames.put(i, "h" + random().nextInt(10));
}
diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/TestBlock.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/TestBlock.java
index 95a4ec2496305..0a4e6812ac2b5 100644
--- a/test/framework/src/main/java/org/elasticsearch/index/mapper/TestBlock.java
+++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/TestBlock.java
@@ -113,6 +113,36 @@ public BytesRefsBuilder appendBytesRef(BytesRef value) {
return new BytesRefsBuilder();
}
+ @Override
+ public BlockLoader.SingletonBytesRefBuilder singletonBytesRefs(int expectedCount) {
+ class BytesRefsBuilder extends TestBlock.Builder implements BlockLoader.SingletonBytesRefBuilder {
+ private final int count = expectedCount;
+
+ private BytesRefsBuilder() {
+ super(expectedCount);
+ }
+
+ @Override
+ public BlockLoader.SingletonBytesRefBuilder appendBytesRefs(byte[] bytes, long[] offsets) throws IOException {
+ for (int i = 0; i < offsets.length - 1; i++) {
+ BytesRef ref = new BytesRef(bytes, (int) offsets[i], (int) (offsets[i + 1] - offsets[i]));
+ add(BytesRef.deepCopyOf(ref));
+ }
+ return this;
+ }
+
+ @Override
+ public BlockLoader.SingletonBytesRefBuilder appendBytesRefs(byte[] bytes, long bytesRefLengths) throws IOException {
+ for (int i = 0; i < count; i++) {
+ BytesRef ref = new BytesRef(bytes, (int) (i * bytesRefLengths), (int) bytesRefLengths);
+ add(BytesRef.deepCopyOf(ref));
+ }
+ return this;
+ }
+ }
+ return new BytesRefsBuilder();
+ }
+
@Override
public BlockLoader.DoubleBuilder doublesFromDocValues(int expectedCount) {
return doubles(expectedCount);
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/DelegatingBlockLoaderFactory.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/DelegatingBlockLoaderFactory.java
index 53a3b866acdde..5686cfe845dc6 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/DelegatingBlockLoaderFactory.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/DelegatingBlockLoaderFactory.java
@@ -56,6 +56,11 @@ public BlockLoader.BytesRefBuilder bytesRefs(int expectedCount) {
return factory.newBytesRefBlockBuilder(expectedCount);
}
+ @Override
+ public BlockLoader.SingletonBytesRefBuilder singletonBytesRefs(int expectedCount) {
+ return new SingletonBytesRefBuilder(expectedCount, factory);
+ }
+
@Override
public BytesRefBlock constantBytes(BytesRef value, int count) {
if (count == 1) {
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/SingletonBytesRefBuilder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/SingletonBytesRefBuilder.java
new file mode 100644
index 0000000000000..1d62d9f3bbd51
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/SingletonBytesRefBuilder.java
@@ -0,0 +1,206 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.lucene.read;
+
+import org.apache.lucene.util.RamUsageEstimator;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.util.BytesRefArray;
+import org.elasticsearch.common.util.LongArray;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.index.mapper.BlockLoader;
+
+import java.io.IOException;
+
+public final class SingletonBytesRefBuilder implements BlockLoader.SingletonBytesRefBuilder {
+
+ private final int count;
+ private final BlockFactory blockFactory;
+
+ private BytesRefArray bytesRefArray;
+
+ public SingletonBytesRefBuilder(int count, BlockFactory blockFactory) {
+ this.count = count;
+ this.blockFactory = blockFactory;
+ }
+
+ @Override
+ public SingletonBytesRefBuilder appendBytesRefs(byte[] bytes, long[] offsets) {
+ var values = blockFactory.bigArrays().newByteArrayWrapper(bytes);
+ bytesRefArray = new BytesRefArray(new LongArrayWrapper(offsets), values, count, blockFactory.bigArrays());
+ return this;
+ }
+
+ @Override
+ public SingletonBytesRefBuilder appendBytesRefs(byte[] bytes, long bytesRefLengths) {
+ var values = blockFactory.bigArrays().newByteArrayWrapper(bytes);
+ bytesRefArray = new BytesRefArray(
+ new ConstantOffsetLongArrayWrapper(bytesRefLengths, count + 1),
+ values,
+ count,
+ blockFactory.bigArrays()
+ );
+ return this;
+ }
+
+ @Override
+ public BlockLoader.Block build() {
+ return blockFactory.newBytesRefArrayVector(bytesRefArray, count).asBlock();
+ }
+
+ @Override
+ public BlockLoader.Builder appendNull() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public BlockLoader.Builder beginPositionEntry() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public BlockLoader.Builder endPositionEntry() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void close() {}
+
+ /**
+ * An array wrapper that starts with 0 and has a constant offset between each pair of values.
+ * For an offset of n, the values are: [0, n, 2n, 3n, ..., Xn]
+ * This can be used to provide an "offsets" array for ByteRefs of constant length without the need to allocate an unnecessary array.
+ */
+ static final class ConstantOffsetLongArrayWrapper implements LongArray {
+
+ private final long offset;
+ private final long size;
+
+ ConstantOffsetLongArrayWrapper(long offset, long size) {
+ this.offset = offset;
+ this.size = size;
+ }
+
+ @Override
+ public long get(long index) {
+ assert index >= 0 && index < size;
+ return index * offset;
+ }
+
+ @Override
+ public long getAndSet(long index, long value) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void set(long index, long value) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long increment(long index, long inc) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void fill(long fromIndex, long toIndex, long value) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void fillWith(StreamInput in) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void set(long index, byte[] buf, int offset, int len) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void writeTo(StreamOutput out) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long size() {
+ return size;
+ }
+
+ @Override
+ public long ramBytesUsed() {
+ return 2 * Long.BYTES; // offset + size
+ }
+
+ @Override
+ public void close() {}
+ }
+
+ static class LongArrayWrapper implements LongArray {
+
+ final long[] values;
+
+ LongArrayWrapper(long[] values) {
+ this.values = values;
+ }
+
+ @Override
+ public long get(long index) {
+ return values[(int) index];
+ }
+
+ @Override
+ public long getAndSet(long index, long value) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void set(long index, long value) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long increment(long index, long inc) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void fill(long fromIndex, long toIndex, long value) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void fillWith(StreamInput in) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void set(long index, byte[] buf, int offset, int len) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void writeTo(StreamOutput out) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long size() {
+ return values.length;
+ }
+
+ @Override
+ public long ramBytesUsed() {
+ return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (long) values.length * Long.BYTES;
+ }
+
+ @Override
+ public void close() {}
+ }
+
+}