Skip to content

Commit

Permalink
Big arrays sliced from nettey buffers (int) (#89668)
Browse files Browse the repository at this point in the history
This teaches `IntArray` about our serialization.
The interesting bit here is that reading slices the reference to the
underlying buffer rather than copying. That reference can be retained as
long as it's needed, holding the underlying buffer open until the
`IntArray` is `close`d.

This should allow aggregations to send dense representations between
nodes with one fewer copy operation.
  • Loading branch information
nik9000 committed Sep 19, 2022
1 parent d5c56b8 commit b11f738
Show file tree
Hide file tree
Showing 10 changed files with 252 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ public int getInt(int index) {
return (get(index) & 0xFF) << 24 | (get(index + 1) & 0xFF) << 16 | (get(index + 2) & 0xFF) << 8 | get(index + 3) & 0xFF;
}

@Override
public int getIntLE(int index) {
return (get(index + 3) & 0xFF) << 24 | (get(index + 2) & 0xFF) << 16 | (get(index + 1) & 0xFF) << 8 | get(index) & 0xFF;
}

@Override
public int indexOf(byte marker, int from) {
final int to = length();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,11 @@ static BytesReference fromByteArray(ByteArray byteArray, int length) {
*/
int getInt(int index);

/**
* Returns the integer read from the 4 bytes (LE) starting at the given index.
*/
int getIntLE(int index);

/**
* Finds the index of the first occurrence of the given marker between within the given bounds.
* @param marker marker byte to search
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,12 @@ public int getInt(int index) {
return delegate.getInt(index);
}

@Override
public int getIntLE(int index) {
assert hasReferences();
return delegate.getIntLE(index);
}

@Override
public int indexOf(byte marker, int from) {
assert hasReferences();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,14 @@
import org.elasticsearch.common.breaker.CircuitBreaker;
import org.elasticsearch.common.breaker.CircuitBreakingException;
import org.elasticsearch.common.breaker.PreallocatedCircuitBreakerService;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.recycler.Recycler;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.Releasable;
import org.elasticsearch.core.Releasables;
import org.elasticsearch.indices.breaker.CircuitBreakerService;

import java.io.IOException;
import java.util.Arrays;

import static org.elasticsearch.common.util.BigDoubleArray.VH_PLATFORM_NATIVE_DOUBLE;
Expand Down Expand Up @@ -162,6 +164,13 @@ private static class ByteArrayAsIntArrayWrapper extends AbstractArrayWrapper imp
this.array = new byte[(int) size << 2];
}

@Override
public void writeTo(StreamOutput out) throws IOException {
int intSize = (int) size();
out.writeVInt(intSize * 4);
out.write(array, 0, intSize * Integer.BYTES);
}

@Override
public long ramBytesUsed() {
return SHALLOW_SIZE + RamUsageEstimator.sizeOf(array);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@

import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;
import org.elasticsearch.common.io.stream.StreamOutput;

import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.lang.invoke.VarHandle;
import java.nio.ByteOrder;
Expand All @@ -23,6 +25,11 @@
* configurable length.
*/
final class BigIntArray extends AbstractBigArray implements IntArray {
static {
if (ByteOrder.nativeOrder() != ByteOrder.LITTLE_ENDIAN) {
throw new Error("The deserialization assumes this class is written with little-endian ints.");
}
}

private static final BigIntArray ESTIMATOR = new BigIntArray(0, BigArrays.NON_RECYCLING_INSTANCE, false);

Expand All @@ -40,6 +47,20 @@ final class BigIntArray extends AbstractBigArray implements IntArray {
}
}

@Override
public void writeTo(StreamOutput out) throws IOException {
if (size > Integer.MAX_VALUE / Integer.BYTES) {
throw new IllegalArgumentException();
}
int intSize = (int) size;
out.writeVInt(intSize * 4);
for (int i = 0; i < pages.length - 1; i++) {
out.write(pages[i]);
}
int end = intSize % INT_PAGE_SIZE;
out.write(pages[pages.length - 1], 0, end * Integer.BYTES);
}

@Override
public int get(long index) {
final int pageIndex = pageIndex(index);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,18 @@

package org.elasticsearch.common.util;

import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.Writeable;

import java.io.IOException;

/**
* Abstraction of an array of integer values.
*/
public interface IntArray extends BigArray {
public interface IntArray extends BigArray, Writeable {
static IntArray readFrom(StreamInput in) throws IOException {
return new ReleasableIntArray(in);
}

/**
* Get an element given its index.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.common.util;

import org.apache.lucene.util.RamUsageEstimator;
import org.elasticsearch.common.bytes.ReleasableBytesReference;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;

import java.io.IOException;

class ReleasableIntArray implements IntArray {
private static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(ReleasableIntArray.class);

private final ReleasableBytesReference ref;

ReleasableIntArray(StreamInput in) throws IOException {
ref = in.readReleasableBytesReference();
}

@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeBytesReference(ref);
}

@Override
public long size() {
return ref.length() / 4;
}

@Override
public int get(long index) {
if (index > Integer.MAX_VALUE / 4) {
// We can't serialize messages longer than 2gb anyway
throw new ArrayIndexOutOfBoundsException();
}
return ref.getIntLE((int) index * 4);
}

@Override
public int set(long index, int value) {
throw new UnsupportedOperationException();
}

@Override
public int increment(long index, int inc) {
throw new UnsupportedOperationException();
}

@Override
public void fill(long fromIndex, long toIndex, int value) {
throw new UnsupportedOperationException();
}

@Override
public void set(long index, byte[] buf, int offset, int len) {
throw new UnsupportedOperationException();
}

@Override
public long ramBytesUsed() {
/*
* If we return the size of the buffer that we've sliced
* we're likely to double count things.
*/
return SHALLOW_SIZE;
}

@Override
public void close() {
ref.decRef();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.common.bytes;

import org.elasticsearch.common.io.stream.AbstractStreamTests;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.IntArray;
import org.junit.After;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

import static org.hamcrest.Matchers.equalTo;

public class ReleasableBytesReferenceStreamTests extends AbstractStreamTests {
private final List<ReleasableBytesReference> opened = new ArrayList<>();
private final Set<Exception> openTraces = Collections.newSetFromMap(new IdentityHashMap<>());

@After
public void checkAllClosed() throws Exception {
// Decrement one time to simulate closing the netty buffer after the stream is captured
for (ReleasableBytesReference r : opened) {
r.decRef();
}
// Now that we've decremented, we expect all streams will have been closed
Iterator<Exception> iter = openTraces.iterator();
if (iter.hasNext()) {
throw new Exception("didn't close iterator - cause is opening location", iter.next());
}
for (ReleasableBytesReference r : opened) {
assertThat(r.hasReferences(), equalTo(false));
}
}

@Override
protected StreamInput getStreamInput(BytesReference bytesReference) throws IOException {
// Grab an exception at the opening location, so we can throw it if we don't close
Exception trace = new Exception();
openTraces.add(trace);

ReleasableBytesReference counted = new ReleasableBytesReference(bytesReference, () -> openTraces.remove(trace));

/*
* Grab a reference to the bytes ref we're using, so we can close it after the
* test to simulate the underlying netter butter closing after the test.
*/
opened.add(counted);
return counted.streamInput();
}

public void testBigIntArrayLivesAfterReleasableIsDecremented() throws IOException {
IntArray testData = BigArrays.NON_RECYCLING_INSTANCE.newIntArray(1, false);
testData.set(0, 1);

BytesStreamOutput out = new BytesStreamOutput();
testData.writeTo(out);

ReleasableBytesReference ref = ReleasableBytesReference.wrap(out.bytes());

try (IntArray in = IntArray.readFrom(ref.streamInput())) {
ref.decRef();
assertThat(ref.hasReferences(), equalTo(true));

assertThat(in.size(), equalTo(testData.size()));
assertThat(in.get(0), equalTo(1));
}
assertThat(ref.hasReferences(), equalTo(false));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.settings.SecureString;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.IntArray;
import org.elasticsearch.common.util.PageCacheRecycler;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.core.CheckedConsumer;
import org.elasticsearch.core.CheckedFunction;
Expand Down Expand Up @@ -214,6 +217,31 @@ public void testArrays() throws IOException {
assertThat(deserialized, equalTo(strings));
}

public void testSmallBigIntArray() throws IOException {
assertBigIntArray(between(0, PageCacheRecycler.INT_PAGE_SIZE));
}

public void testLargeBigIntArray() throws IOException {
assertBigIntArray(between(PageCacheRecycler.INT_PAGE_SIZE, 10000));
}

private void assertBigIntArray(int size) throws IOException {
IntArray testData = BigArrays.NON_RECYCLING_INSTANCE.newIntArray(size, false);
for (int i = 0; i < size; i++) {
testData.set(i, randomInt());
}

BytesStreamOutput out = new BytesStreamOutput();
testData.writeTo(out);

try (IntArray in = IntArray.readFrom(getStreamInput(out.bytes()))) {
assertThat(in.size(), equalTo(testData.size()));
for (int i = 0; i < size; i++) {
assertThat(in.get(i), equalTo(testData.get(i)));
}
}
}

public void testCollection() throws IOException {
class FooBar implements Writeable {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,15 @@
import org.elasticsearch.common.breaker.CircuitBreaker;
import org.elasticsearch.common.breaker.CircuitBreakingException;
import org.elasticsearch.common.breaker.NoopCircuitBreaker;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.core.Releasable;
import org.elasticsearch.core.Releasables;
import org.elasticsearch.indices.breaker.CircuitBreakerService;

import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
Expand Down Expand Up @@ -411,6 +413,11 @@ private class IntArrayWrapper extends AbstractArrayWrapper implements IntArray {
this.in = in;
}

@Override
public void writeTo(StreamOutput out) throws IOException {
in.writeTo(out);
}

@Override
protected BigArray getDelegate() {
return in;
Expand Down

0 comments on commit b11f738

Please sign in to comment.