From 6edcd28398ac3f256cf2693d7f5477f9c8c827d3 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 28 Sep 2021 14:35:37 +0200 Subject: [PATCH 1/2] LUCENE-10127: Minor speedup to doc values writes. This reduces a bit the overhead of writing doc values. On the NYC Taxis benchmark this resulted in ~10% faster merges for doc values. --- .../lucene90/Lucene90DocValuesConsumer.java | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java index 6480dcfaedfd..352a5104070b 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java @@ -164,6 +164,13 @@ void update(long v) { ++numValues; } + /** Accumulate state from another tracker. */ + void update(MinMaxTracker other) { + min = Math.min(min, other.min); + max = Math.max(max, other.max); + numValues += other.numValues; + } + /** Update the required space. */ void finish() { if (max > min) { @@ -181,6 +188,13 @@ void nextBlock() { private long[] writeValues(FieldInfo field, DocValuesProducer valuesProducer, boolean ords) throws IOException { SortedNumericDocValues values = valuesProducer.getSortedNumeric(field); + final long firstValue; + if (values.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + firstValue = values.nextValue(); + } else { + firstValue = 0L; + } + values = valuesProducer.getSortedNumeric(field); int numDocsWithValue = 0; MinMaxTracker minMax = new MinMaxTracker(); MinMaxTracker blockMinMax = new MinMaxTracker(); @@ -196,14 +210,14 @@ private long[] writeValues(FieldInfo field, DocValuesProducer valuesProducer, bo // wrong results. Since these extreme values are unlikely, we just discard // GCD computation for them gcd = 1; - } else if (minMax.numValues != 0) { // minValue needs to be set first - gcd = MathUtil.gcd(gcd, v - minMax.min); + } else { + gcd = MathUtil.gcd(gcd, v - firstValue); } } - minMax.update(v); blockMinMax.update(v); if (blockMinMax.numValues == NUMERIC_BLOCK_SIZE) { + minMax.update(blockMinMax); blockMinMax.nextBlock(); } @@ -215,6 +229,7 @@ private long[] writeValues(FieldInfo field, DocValuesProducer valuesProducer, bo numDocsWithValue++; } + minMax.update(blockMinMax); minMax.finish(); blockMinMax.finish(); From c30f783c9e98f439e1d05ffde9ede527bf258a4a Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 28 Sep 2021 14:41:31 +0200 Subject: [PATCH 2/2] Avoid Arrays#fill on large arrays. --- .../java/org/apache/lucene/util/packed/DirectWriter.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/DirectWriter.java b/lucene/core/src/java/org/apache/lucene/util/packed/DirectWriter.java index aae949aebe1e..90c8290c4fbd 100644 --- a/lucene/core/src/java/org/apache/lucene/util/packed/DirectWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/packed/DirectWriter.java @@ -89,11 +89,15 @@ public void add(long l) throws IOException { } private void flush() throws IOException { + if (off == 0) { + return; + } + // Avoid writing bits from values that are outside of the range we need to encode + Arrays.fill(nextValues, off, nextValues.length, 0L); encode(nextValues, 0, nextBlocks, 0, iterations); final int blockCount = (int) PackedInts.Format.PACKED.byteCount(PackedInts.VERSION_CURRENT, off, bitsPerValue); output.writeBytes(nextBlocks, blockCount); - Arrays.fill(nextValues, 0L); off = 0; }