Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.benchmark.bytes;

import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.logging.LogConfigurator;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;

import java.util.concurrent.TimeUnit;

@Warmup(iterations = 4, time = 1)
@Measurement(iterations = 5, time = 1)
@BenchmarkMode(Mode.Throughput)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@State(Scope.Thread)
@Fork(value = 1)
public class BytesArrayIndexOfBenchmark {

static {
LogConfigurator.configureESLogging(); // native access requires logging to be initialized
}

static final byte MARKER = (byte) '\n';

@Param(value = { "64", "127", "128", "4096", "16384", "65536", "1048576" })
public int size;

BytesArray bytesArray;

@Setup
public void setup() {
byte[] bytes = new byte[size];
bytes[bytes.length - 1] = MARKER;
bytesArray = new BytesArray(bytes, 0, bytes.length);
}

@Benchmark
public int indexOf() {
return bytesArray.indexOf(MARKER, 0);
}

@Benchmark
@Fork(jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
public int indexOfPanama() {
return bytesArray.indexOf(MARKER, 0);
}

@Benchmark
public int withOffsetIndexOf() {
return bytesArray.indexOf(MARKER, 1);
}

@Benchmark
@Fork(jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
public int withOffsetIndexPanama() {
return bytesArray.indexOf(MARKER, 1);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.benchmark.bytes;

import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;

import org.elasticsearch.test.ESTestCase;
import org.openjdk.jmh.annotations.Param;

import java.util.Arrays;

public class BytesArrayIndexOfBenchmarkTests extends ESTestCase {

final int size;

public BytesArrayIndexOfBenchmarkTests(int size) {
this.size = size;
}

public void testIndexOf() {
var bench = new BytesArrayIndexOfBenchmark();
bench.size = size;
bench.setup();
assertEquals(size - 1, bench.indexOf());
assertEquals(size - 1, bench.indexOfPanama());
assertEquals(size - 1, bench.withOffsetIndexOf());
assertEquals(size - 1, bench.withOffsetIndexPanama());
}

@ParametersFactory
public static Iterable<Object[]> parametersFactory() {
try {
var params = BytesArrayIndexOfBenchmark.class.getField("size").getAnnotationsByType(Param.class)[0].value();
return () -> Arrays.stream(params).map(Integer::parseInt).map(i -> new Object[] { i }).iterator();
} catch (NoSuchFieldException e) {
throw new AssertionError(e);
}
}
}
5 changes: 5 additions & 0 deletions docs/changelog/135087.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 135087
summary: "Optimize `BytesArray::indexOf,` which is used heavily in ndjson parsing"
area: "Performance"
type: feature
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import java.lang.invoke.MethodHandle;
import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodType;
import java.util.Objects;

import static org.elasticsearch.simdvec.internal.vectorization.ESVectorUtilSupport.B_QUERY;

Expand Down Expand Up @@ -399,4 +400,22 @@ public static void transposeHalfByte(int[] q, byte[] quantQueryByte) {
}
IMPL.transposeHalfByte(q, quantQueryByte);
}

/**
* Searches for the first occurrence of the given marker byte in the specified range of the array.
*
* <p>The search starts at {@code offset} and examines at most {@code length} bytes. The return
* value is the relative index of the first occurrence of {@code marker} within this slice,
* or {@code -1} if not found.
*
* @param bytes the byte array to search
* @param offset the starting index within the array
* @param length the number of bytes to examine
* @param marker the byte to search for
* @return the relative index (0..length-1) of the first match, or {@code -1} if not found
*/
public static int indexOf(byte[] bytes, int offset, int length, byte marker) {
Objects.checkFromIndexSize(offset, length, bytes.length);
return IMPL.indexOf(bytes, offset, length, marker);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.simdvec.internal.vectorization;

import static org.apache.lucene.util.BitUtil.VH_LE_LONG;

/** Byte array utilities. */
final class ByteArrayUtils {

/**
* Implementation of {@link ESVectorUtilSupport#indexOf(byte[], int, int, byte)} using fast
* SWAR (SIMD Within A Register) loop.
*/
static int indexOf(final byte[] bytes, final int offset, final int len, final byte marker) {
final int end = offset + len;
int i = offset;

// First, try to find the marker in the first few bytes, so we can enter the faster 8-byte aligned loop below.
// The idea for this logic is taken from Netty's io.netty.buffer.ByteBufUtil.firstIndexOf and optimized for little endian hardware.
// See e.g. https://richardstartin.github.io/posts/finding-bytes for the idea behind this optimization.
final int byteCount = len & 7;
if (byteCount > 0) {
final int index = unrolledFirstIndexOf(bytes, i, byteCount, marker);
if (index != -1) {
return index - offset;
}
i += byteCount;
if (i == end) {
return -1;
}
}
final int longCount = len >>> 3;
// faster SWAR (SIMD Within A Register) loop
final long pattern = compilePattern(marker);
for (int j = 0; j < longCount; j++) {
int index = findInLong(readLongLE(bytes, i), pattern);
if (index < Long.BYTES) {
return i + index - offset;
}
i += Long.BYTES;
}
return -1;
}

private static long readLongLE(byte[] arr, int offset) {
return (long) VH_LE_LONG.get(arr, offset);
}

private static long compilePattern(byte byteToFind) {
return (byteToFind & 0xFFL) * 0x101010101010101L;
}

private static int findInLong(long word, long pattern) {
long input = word ^ pattern;
long tmp = (input & 0x7F7F7F7F7F7F7F7FL) + 0x7F7F7F7F7F7F7F7FL;
tmp = ~(tmp | input | 0x7F7F7F7F7F7F7F7FL);
final int binaryPosition = Long.numberOfTrailingZeros(tmp);
return binaryPosition >>> 3;
}

private static int unrolledFirstIndexOf(byte[] buffer, int fromIndex, int byteCount, byte value) {
if (buffer[fromIndex] == value) {
return fromIndex;
}
if (byteCount == 1) {
return -1;
}
if (buffer[fromIndex + 1] == value) {
return fromIndex + 1;
}
if (byteCount == 2) {
return -1;
}
if (buffer[fromIndex + 2] == value) {
return fromIndex + 2;
}
if (byteCount == 3) {
return -1;
}
if (buffer[fromIndex + 3] == value) {
return fromIndex + 3;
}
if (byteCount == 4) {
return -1;
}
if (buffer[fromIndex + 4] == value) {
return fromIndex + 4;
}
if (byteCount == 5) {
return -1;
}
if (buffer[fromIndex + 5] == value) {
return fromIndex + 5;
}
if (byteCount == 6) {
return -1;
}
if (buffer[fromIndex + 6] == value) {
return fromIndex + 6;
}
return -1;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -403,4 +403,9 @@ public static void transposeHalfByteImpl(int[] q, byte[] quantQueryByte) {
quantQueryByte[index + quantQueryByte.length / 2] = (byte) upperMiddleByte;
quantQueryByte[index + 3 * quantQueryByte.length / 4] = (byte) upperByte;
}

@Override
public int indexOf(byte[] bytes, int offset, int length, byte marker) {
return ByteArrayUtils.indexOf(bytes, offset, length, marker);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,6 @@ void soarDistanceBulk(
void packAsBinary(int[] vector, byte[] packed);

void transposeHalfByte(int[] q, byte[] quantQueryByte);

int indexOf(byte[] bytes, int offset, int length, byte marker);
}
Original file line number Diff line number Diff line change
Expand Up @@ -1122,4 +1122,26 @@ private void transposeHalfByte128(int[] q, byte[] quantQueryByte) {
quantQueryByte[index + quantQueryByte.length / 2] = (byte) upperMiddleByte;
quantQueryByte[index + 3 * quantQueryByte.length / 4] = (byte) upperByte;
}

@Override
public int indexOf(final byte[] bytes, final int offset, final int length, final byte marker) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: I don't think we need to keep this in main21, as 21 is the minimum version we support now. We need to apply the mrjar plugin for the preview bit handling, but no need for a different sourceset I think

final ByteVector markerVector = ByteVector.broadcast(ByteVector.SPECIES_PREFERRED, marker);
final int loopBound = ByteVector.SPECIES_PREFERRED.loopBound(length);
for (int i = 0; i < loopBound; i += ByteVector.SPECIES_PREFERRED.length()) {
ByteVector chunk = ByteVector.fromArray(ByteVector.SPECIES_PREFERRED, bytes, offset + i);
VectorMask<Byte> mask = chunk.eq(markerVector);
if (mask.anyTrue()) {
return i + mask.firstTrue();
}
}
// tail
if (loopBound < length) {
int remaining = length - loopBound;
int tail = ByteArrayUtils.indexOf(bytes, offset + loopBound, remaining, marker);
if (tail >= 0) {
return loopBound + tail;
}
}
return -1;
}
}
Loading