Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LUCENE-10333: Speed up BinaryDocValues with a batch reading on LongValues #557

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -763,11 +763,13 @@ public BytesRef binaryValue() throws IOException {
DirectMonotonicReader.getInstance(entry.addressesMeta, addressesData, merging);
return new DenseBinaryDocValues(maxDoc) {
final BytesRef bytes = new BytesRef(new byte[entry.maxLength], 0, entry.maxLength);
final LongValues.Twin twin = new LongValues.Twin();

@Override
public BytesRef binaryValue() throws IOException {
long startOffset = addresses.get(doc);
bytes.length = (int) (addresses.get(doc + 1L) - startOffset);
addresses.get(doc, twin);
long startOffset = twin.first;
bytes.length = (int) (twin.second - startOffset);
bytesSlice.seek(startOffset);
bytesSlice.readBytes(bytes.bytes, 0, bytes.length);
return bytes;
Expand Down Expand Up @@ -805,12 +807,14 @@ public BytesRef binaryValue() throws IOException {
DirectMonotonicReader.getInstance(entry.addressesMeta, addressesData);
return new SparseBinaryDocValues(disi) {
final BytesRef bytes = new BytesRef(new byte[entry.maxLength], 0, entry.maxLength);
final LongValues.Twin twin = new LongValues.Twin();

@Override
public BytesRef binaryValue() throws IOException {
final int index = disi.index();
long startOffset = addresses.get(index);
bytes.length = (int) (addresses.get(index + 1L) - startOffset);
addresses.get(index, twin);
long startOffset = twin.first;
bytes.length = (int) (twin.second - startOffset);
bytesSlice.seek(startOffset);
bytesSlice.readBytes(bytes.bytes, 0, bytes.length);
return bytes;
Expand Down Expand Up @@ -1295,6 +1299,7 @@ private SortedNumericDocValues getSortedNumeric(SortedNumericEntry entry) throws
int doc = -1;
long start, end;
int count;
final LongValues.Twin twin = new LongValues.Twin();

@Override
public int nextDoc() throws IOException {
Expand All @@ -1316,16 +1321,18 @@ public int advance(int target) throws IOException {
if (target >= maxDoc) {
return doc = NO_MORE_DOCS;
}
start = addresses.get(target);
end = addresses.get(target + 1L);
addresses.get(target, twin);
start = twin.first;
end = twin.second;
count = (int) (end - start);
return doc = target;
}

@Override
public boolean advanceExact(int target) throws IOException {
start = addresses.get(target);
end = addresses.get(target + 1L);
addresses.get(target, twin);
start = twin.first;
end = twin.second;
count = (int) (end - start);
doc = target;
return true;
Expand Down Expand Up @@ -1356,6 +1363,7 @@ public int docValueCount() {
boolean set;
long start, end;
int count;
private final LongValues.Twin twin = new LongValues.Twin();

@Override
public int nextDoc() throws IOException {
Expand Down Expand Up @@ -1400,8 +1408,9 @@ public int docValueCount() {
private void set() {
if (set == false) {
final int index = disi.index();
start = addresses.get(index);
end = addresses.get(index + 1L);
addresses.get(index, twin);
start = twin.first;
end = twin.second;
count = (int) (end - start);
set = true;
}
Expand Down
14 changes: 14 additions & 0 deletions lucene/core/src/java/org/apache/lucene/util/LongValues.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@
*/
public abstract class LongValues {

public static class Twin {
public long first;
public long second;
}

/** An instance that returns the provided value. */
public static final LongValues IDENTITY =
new LongValues() {
Expand All @@ -44,4 +49,13 @@ public long get(long index) {

/** Get value at <code>index</code>. */
public abstract long get(long index);

public void get(long index, Twin twin) {
twin.first = get(index);
twin.second = get(index + 1);
}

protected boolean twinImplementIsRight(long index, Twin twin) {
return twin.first == get(index) && twin.second == get(index + 1);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ public static DirectMonotonicReader getInstance(
}

private final int blockShift;
private final long blockMask;
private final LongValues[] readers;
private final long[] mins;
private final float[] avgs;
Expand All @@ -136,6 +137,7 @@ public static DirectMonotonicReader getInstance(
private DirectMonotonicReader(
int blockShift, LongValues[] readers, long[] mins, float[] avgs, byte[] bpvs) {
this.blockShift = blockShift;
this.blockMask = (1L << blockShift) - 1;
this.readers = readers;
this.mins = mins;
this.avgs = avgs;
Expand All @@ -157,11 +159,29 @@ private DirectMonotonicReader(
@Override
public long get(long index) {
final int block = (int) (index >>> blockShift);
final long blockIndex = index & ((1 << blockShift) - 1);
final long blockIndex = index & blockMask;
final long delta = readers[block].get(blockIndex);
return mins[block] + (long) (avgs[block] * blockIndex) + delta;
}

@Override
public void get(long index, Twin twin) {
int block = (int) (index >>> blockShift);
long blockIndex = index & blockMask;
if (blockIndex == blockMask) {
twin.first = readers[block].get(blockIndex) + mins[block] + (long) (avgs[block] * blockIndex);
block++;
twin.second = readers[block].get(0) + mins[block];
} else {
readers[block].get(blockIndex, twin);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line could call DirectReader#get(index, twin)

long min = mins[block];
float avg = avgs[block];
twin.first = twin.first + min + (long) (avg * blockIndex);
twin.second = twin.second + min + (long) (avg * (blockIndex + 1));
}
assert twinImplementIsRight(index, twin);
}

/** Get lower/upper bounds for the value at a given index without hitting the direct reader. */
private long[] getBounds(long index) {
final int block = Math.toIntExact(index >>> blockShift);
Expand Down