From 04d3b9989fe03d3591d9eb637dc54f7813e174ea Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Fri, 10 May 2024 13:45:42 -0400 Subject: [PATCH] ESQL: Begin optimizing `Block#lookup` (#108482) This creates the infrastructure to allow optimizing the `lookup` method when applied to `Vector`s and then implements that optimization for constant vectors. Constant vectors now take one of six paths: 1. An empty positions `Block` yields an empty result set. 2. If `positions` is a `Block`, perform the un-optimized lookup. 3. If the `min` of the `positions` *Vector* is less that 0 then throw an exception. 4. If the `min` of the positions Vector is greater than the number of positions in the lookup block then return a single `ConstantNullBlock` because you are looking up outside the range. 5. If the `max` of the positions Vector is less than the number of positions in the lookup block then return a `Constant$Type$Block` with the same value as the lookup block. This is a lookup that's entirely within range. 6. Otherwise return the unoptimized lookup. This is *fairly* simple but demonstrates how we can plug in more complex optimizations later. --- .../core/ReleasableIterator.java | 26 ++++++ .../compute/data/BooleanArrayVector.java | 7 ++ .../compute/data/BooleanBigArrayVector.java | 7 ++ .../compute/data/BooleanVector.java | 5 ++ .../compute/data/BooleanVectorBlock.java | 5 +- .../compute/data/BytesRefArrayVector.java | 7 ++ .../compute/data/BytesRefVector.java | 5 ++ .../compute/data/BytesRefVectorBlock.java | 5 +- .../compute/data/ConstantBooleanVector.java | 24 +++++ .../compute/data/ConstantBytesRefVector.java | 24 +++++ .../compute/data/ConstantDoubleVector.java | 24 +++++ .../compute/data/ConstantIntVector.java | 24 +++++ .../compute/data/ConstantLongVector.java | 24 +++++ .../compute/data/DoubleArrayVector.java | 7 ++ .../compute/data/DoubleBigArrayVector.java | 7 ++ .../compute/data/DoubleVector.java | 5 ++ .../compute/data/DoubleVectorBlock.java | 5 +- .../compute/data/IntArrayVector.java | 7 ++ .../compute/data/IntBigArrayVector.java | 7 ++ .../elasticsearch/compute/data/IntVector.java | 5 ++ .../compute/data/IntVectorBlock.java | 5 +- .../compute/data/LongArrayVector.java | 7 ++ .../compute/data/LongBigArrayVector.java | 7 ++ .../compute/data/LongVector.java | 5 ++ .../compute/data/LongVectorBlock.java | 5 +- .../org/elasticsearch/compute/data/Block.java | 8 +- .../compute/data/ConstantNullVector.java | 8 ++ .../elasticsearch/compute/data/DocBlock.java | 2 +- .../elasticsearch/compute/data/DocVector.java | 7 ++ .../compute/data/OrdinalBytesRefVector.java | 7 ++ .../elasticsearch/compute/data/Vector.java | 29 ++++++ .../compute/data/X-ArrayVector.java.st | 9 ++ .../compute/data/X-BigArrayVector.java.st | 7 ++ .../compute/data/X-ConstantVector.java.st | 24 +++++ .../compute/data/X-Vector.java.st | 5 ++ .../compute/data/X-VectorBlock.java.st | 5 +- .../compute/data/BasicBlockTests.java | 89 +++++++++++++++++-- 37 files changed, 431 insertions(+), 28 deletions(-) diff --git a/libs/core/src/main/java/org/elasticsearch/core/ReleasableIterator.java b/libs/core/src/main/java/org/elasticsearch/core/ReleasableIterator.java index 68a4a136c5308..83a68c984a684 100644 --- a/libs/core/src/main/java/org/elasticsearch/core/ReleasableIterator.java +++ b/libs/core/src/main/java/org/elasticsearch/core/ReleasableIterator.java @@ -46,4 +46,30 @@ public String toString() { }; } + + /** + * Returns an empty iterator over the supplied value. + */ + static ReleasableIterator empty() { + return new ReleasableIterator<>() { + @Override + public boolean hasNext() { + return false; + } + + @Override + public T next() { + assert false : "hasNext is always false so next should never be called"; + return null; + } + + @Override + public void close() {} + + @Override + public String toString() { + return "ReleasableIterator[]"; + } + }; + } } diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanArrayVector.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanArrayVector.java index e195bda3a6dbb..a91999a49c16b 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanArrayVector.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanArrayVector.java @@ -10,6 +10,8 @@ import org.apache.lucene.util.RamUsageEstimator; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.core.ReleasableIterator; import java.io.IOException; import java.util.stream.Collectors; @@ -91,6 +93,11 @@ public BooleanVector filter(int... positions) { } } + @Override + public ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize) { + return new BooleanLookup(asBlock(), positions, targetBlockSize); + } + public static long ramBytesEstimated(boolean[] values) { return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(values); } diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBigArrayVector.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBigArrayVector.java index 5f6db129e73d3..9215cd0d9bbda 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBigArrayVector.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBigArrayVector.java @@ -10,8 +10,10 @@ import org.apache.lucene.util.RamUsageEstimator; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.util.BitArray; import org.elasticsearch.core.Releasable; +import org.elasticsearch.core.ReleasableIterator; import java.io.IOException; @@ -87,6 +89,11 @@ public BooleanVector filter(int... positions) { return new BooleanBigArrayVector(filtered, positions.length, blockFactory); } + @Override + public ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize) { + return new BooleanLookup(asBlock(), positions, targetBlockSize); + } + @Override public void closeInternal() { // The circuit breaker that tracks the values {@link BitArray} is adjusted outside diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVector.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVector.java index 7218f3d2771c8..c8921a7c9f02e 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVector.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVector.java @@ -10,6 +10,8 @@ import org.elasticsearch.TransportVersions; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.core.ReleasableIterator; import java.io.IOException; @@ -27,6 +29,9 @@ public sealed interface BooleanVector extends Vector permits ConstantBooleanVect @Override BooleanVector filter(int... positions); + @Override + ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize); + /** * Compares the given object with this vector for equality. Returns {@code true} if and only if the * given object is a BooleanVector, and both vectors are {@link #equals(BooleanVector, BooleanVector) equal}. diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVectorBlock.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVectorBlock.java index 013718bb42a7d..193e6ea5d8965 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVectorBlock.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVectorBlock.java @@ -52,9 +52,8 @@ public BooleanBlock filter(int... positions) { } @Override - public ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize) { - // TODO optimizations - return new BooleanLookup(this, positions, targetBlockSize); + public ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize) { + return vector.lookup(positions, targetBlockSize); } @Override diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefArrayVector.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefArrayVector.java index 75cf4a2e1fe5a..61bbfb5ebbd02 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefArrayVector.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefArrayVector.java @@ -11,7 +11,9 @@ import org.apache.lucene.util.RamUsageEstimator; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.util.BytesRefArray; +import org.elasticsearch.core.ReleasableIterator; import org.elasticsearch.core.Releasables; import java.io.IOException; @@ -91,6 +93,11 @@ public BytesRefVector filter(int... positions) { } } + @Override + public ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize) { + return new BytesRefLookup(asBlock(), positions, targetBlockSize); + } + public static long ramBytesEstimated(BytesRefArray values) { return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(values); } diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefVector.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefVector.java index 4f07ca2d61049..3739dccb0f956 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefVector.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefVector.java @@ -11,6 +11,8 @@ import org.elasticsearch.TransportVersions; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.core.ReleasableIterator; import java.io.IOException; @@ -34,6 +36,9 @@ public sealed interface BytesRefVector extends Vector permits ConstantBytesRefVe @Override BytesRefVector filter(int... positions); + @Override + ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize); + /** * Compares the given object with this vector for equality. Returns {@code true} if and only if the * given object is a BytesRefVector, and both vectors are {@link #equals(BytesRefVector, BytesRefVector) equal}. diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefVectorBlock.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefVectorBlock.java index 39bd37ea9bc34..16a8fc0888096 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefVectorBlock.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefVectorBlock.java @@ -63,9 +63,8 @@ public BytesRefBlock filter(int... positions) { } @Override - public ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize) { - // TODO optimizations - return new BytesRefLookup(this, positions, targetBlockSize); + public ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize) { + return vector.lookup(positions, targetBlockSize); } @Override diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantBooleanVector.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantBooleanVector.java index 16d70d1a0e800..1f6786f64e0a9 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantBooleanVector.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantBooleanVector.java @@ -8,6 +8,8 @@ package org.elasticsearch.compute.data; import org.apache.lucene.util.RamUsageEstimator; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.core.ReleasableIterator; /** * Vector implementation that stores a constant boolean value. @@ -39,6 +41,28 @@ public BooleanVector filter(int... positions) { return blockFactory().newConstantBooleanVector(value, positions.length); } + @Override + public ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize) { + if (positions.getPositionCount() == 0) { + return ReleasableIterator.empty(); + } + IntVector positionsVector = positions.asVector(); + if (positionsVector == null) { + return new BooleanLookup(asBlock(), positions, targetBlockSize); + } + int min = positionsVector.min(); + if (min < 0) { + throw new IllegalArgumentException("invalid position [" + min + "]"); + } + if (min > getPositionCount()) { + return ReleasableIterator.single((BooleanBlock) positions.blockFactory().newConstantNullBlock(positions.getPositionCount())); + } + if (positionsVector.max() < getPositionCount()) { + return ReleasableIterator.single(positions.blockFactory().newConstantBooleanBlockWith(value, positions.getPositionCount())); + } + return new BooleanLookup(asBlock(), positions, targetBlockSize); + } + @Override public ElementType elementType() { return ElementType.BOOLEAN; diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantBytesRefVector.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantBytesRefVector.java index eed780a42f7ba..33967d66374c1 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantBytesRefVector.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantBytesRefVector.java @@ -9,6 +9,8 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.RamUsageEstimator; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.core.ReleasableIterator; /** * Vector implementation that stores a constant BytesRef value. @@ -45,6 +47,28 @@ public BytesRefVector filter(int... positions) { return blockFactory().newConstantBytesRefVector(value, positions.length); } + @Override + public ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize) { + if (positions.getPositionCount() == 0) { + return ReleasableIterator.empty(); + } + IntVector positionsVector = positions.asVector(); + if (positionsVector == null) { + return new BytesRefLookup(asBlock(), positions, targetBlockSize); + } + int min = positionsVector.min(); + if (min < 0) { + throw new IllegalArgumentException("invalid position [" + min + "]"); + } + if (min > getPositionCount()) { + return ReleasableIterator.single((BytesRefBlock) positions.blockFactory().newConstantNullBlock(positions.getPositionCount())); + } + if (positionsVector.max() < getPositionCount()) { + return ReleasableIterator.single(positions.blockFactory().newConstantBytesRefBlockWith(value, positions.getPositionCount())); + } + return new BytesRefLookup(asBlock(), positions, targetBlockSize); + } + @Override public ElementType elementType() { return ElementType.BYTES_REF; diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantDoubleVector.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantDoubleVector.java index a783f0243313e..1ddf31d753d43 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantDoubleVector.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantDoubleVector.java @@ -8,6 +8,8 @@ package org.elasticsearch.compute.data; import org.apache.lucene.util.RamUsageEstimator; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.core.ReleasableIterator; /** * Vector implementation that stores a constant double value. @@ -39,6 +41,28 @@ public DoubleVector filter(int... positions) { return blockFactory().newConstantDoubleVector(value, positions.length); } + @Override + public ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize) { + if (positions.getPositionCount() == 0) { + return ReleasableIterator.empty(); + } + IntVector positionsVector = positions.asVector(); + if (positionsVector == null) { + return new DoubleLookup(asBlock(), positions, targetBlockSize); + } + int min = positionsVector.min(); + if (min < 0) { + throw new IllegalArgumentException("invalid position [" + min + "]"); + } + if (min > getPositionCount()) { + return ReleasableIterator.single((DoubleBlock) positions.blockFactory().newConstantNullBlock(positions.getPositionCount())); + } + if (positionsVector.max() < getPositionCount()) { + return ReleasableIterator.single(positions.blockFactory().newConstantDoubleBlockWith(value, positions.getPositionCount())); + } + return new DoubleLookup(asBlock(), positions, targetBlockSize); + } + @Override public ElementType elementType() { return ElementType.DOUBLE; diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantIntVector.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantIntVector.java index c6d463af7cfad..e8fb8cb39ceb4 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantIntVector.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantIntVector.java @@ -8,6 +8,8 @@ package org.elasticsearch.compute.data; import org.apache.lucene.util.RamUsageEstimator; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.core.ReleasableIterator; /** * Vector implementation that stores a constant int value. @@ -39,6 +41,28 @@ public IntVector filter(int... positions) { return blockFactory().newConstantIntVector(value, positions.length); } + @Override + public ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize) { + if (positions.getPositionCount() == 0) { + return ReleasableIterator.empty(); + } + IntVector positionsVector = positions.asVector(); + if (positionsVector == null) { + return new IntLookup(asBlock(), positions, targetBlockSize); + } + int min = positionsVector.min(); + if (min < 0) { + throw new IllegalArgumentException("invalid position [" + min + "]"); + } + if (min > getPositionCount()) { + return ReleasableIterator.single((IntBlock) positions.blockFactory().newConstantNullBlock(positions.getPositionCount())); + } + if (positionsVector.max() < getPositionCount()) { + return ReleasableIterator.single(positions.blockFactory().newConstantIntBlockWith(value, positions.getPositionCount())); + } + return new IntLookup(asBlock(), positions, targetBlockSize); + } + /** * The minimum value in the block. */ diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantLongVector.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantLongVector.java index 0173f1c1d4d7a..b997cbbe22849 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantLongVector.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/ConstantLongVector.java @@ -8,6 +8,8 @@ package org.elasticsearch.compute.data; import org.apache.lucene.util.RamUsageEstimator; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.core.ReleasableIterator; /** * Vector implementation that stores a constant long value. @@ -39,6 +41,28 @@ public LongVector filter(int... positions) { return blockFactory().newConstantLongVector(value, positions.length); } + @Override + public ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize) { + if (positions.getPositionCount() == 0) { + return ReleasableIterator.empty(); + } + IntVector positionsVector = positions.asVector(); + if (positionsVector == null) { + return new LongLookup(asBlock(), positions, targetBlockSize); + } + int min = positionsVector.min(); + if (min < 0) { + throw new IllegalArgumentException("invalid position [" + min + "]"); + } + if (min > getPositionCount()) { + return ReleasableIterator.single((LongBlock) positions.blockFactory().newConstantNullBlock(positions.getPositionCount())); + } + if (positionsVector.max() < getPositionCount()) { + return ReleasableIterator.single(positions.blockFactory().newConstantLongBlockWith(value, positions.getPositionCount())); + } + return new LongLookup(asBlock(), positions, targetBlockSize); + } + @Override public ElementType elementType() { return ElementType.LONG; diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleArrayVector.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleArrayVector.java index 476d5e55c55a0..e7c1d342133d5 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleArrayVector.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleArrayVector.java @@ -10,6 +10,8 @@ import org.apache.lucene.util.RamUsageEstimator; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.core.ReleasableIterator; import java.io.IOException; import java.util.stream.Collectors; @@ -90,6 +92,11 @@ public DoubleVector filter(int... positions) { } } + @Override + public ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize) { + return new DoubleLookup(asBlock(), positions, targetBlockSize); + } + public static long ramBytesEstimated(double[] values) { return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(values); } diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleBigArrayVector.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleBigArrayVector.java index 8f6aedf31b50e..d558eabd2dd4c 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleBigArrayVector.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleBigArrayVector.java @@ -10,8 +10,10 @@ import org.apache.lucene.util.RamUsageEstimator; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.util.DoubleArray; import org.elasticsearch.core.Releasable; +import org.elasticsearch.core.ReleasableIterator; import java.io.IOException; @@ -86,6 +88,11 @@ public DoubleVector filter(int... positions) { return new DoubleBigArrayVector(filtered, positions.length, blockFactory); } + @Override + public ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize) { + return new DoubleLookup(asBlock(), positions, targetBlockSize); + } + @Override public void closeInternal() { // The circuit breaker that tracks the values {@link DoubleArray} is adjusted outside diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleVector.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleVector.java index 1d71575b33316..3d93043f93d8f 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleVector.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleVector.java @@ -10,6 +10,8 @@ import org.elasticsearch.TransportVersions; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.core.ReleasableIterator; import java.io.IOException; @@ -27,6 +29,9 @@ public sealed interface DoubleVector extends Vector permits ConstantDoubleVector @Override DoubleVector filter(int... positions); + @Override + ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize); + /** * Compares the given object with this vector for equality. Returns {@code true} if and only if the * given object is a DoubleVector, and both vectors are {@link #equals(DoubleVector, DoubleVector) equal}. diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleVectorBlock.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleVectorBlock.java index e76a4e0c5fdee..24887bebcd838 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleVectorBlock.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleVectorBlock.java @@ -52,9 +52,8 @@ public DoubleBlock filter(int... positions) { } @Override - public ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize) { - // TODO optimizations - return new DoubleLookup(this, positions, targetBlockSize); + public ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize) { + return vector.lookup(positions, targetBlockSize); } @Override diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntArrayVector.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntArrayVector.java index 97bf1675a9a37..e9d9a6b3fb958 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntArrayVector.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntArrayVector.java @@ -10,6 +10,8 @@ import org.apache.lucene.util.RamUsageEstimator; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.core.ReleasableIterator; import java.io.IOException; import java.util.stream.Collectors; @@ -100,6 +102,11 @@ public IntVector filter(int... positions) { } } + @Override + public ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize) { + return new IntLookup(asBlock(), positions, targetBlockSize); + } + public static long ramBytesEstimated(int[] values) { return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(values); } diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntBigArrayVector.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntBigArrayVector.java index fe89782bad0ec..df8298b87237e 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntBigArrayVector.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntBigArrayVector.java @@ -10,8 +10,10 @@ import org.apache.lucene.util.RamUsageEstimator; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.util.IntArray; import org.elasticsearch.core.Releasable; +import org.elasticsearch.core.ReleasableIterator; import java.io.IOException; @@ -126,6 +128,11 @@ public IntVector filter(int... positions) { return new IntBigArrayVector(filtered, positions.length, blockFactory); } + @Override + public ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize) { + return new IntLookup(asBlock(), positions, targetBlockSize); + } + @Override public void closeInternal() { // The circuit breaker that tracks the values {@link IntArray} is adjusted outside diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntVector.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntVector.java index 8f6f42b66fbe6..b1a2d1b80a410 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntVector.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntVector.java @@ -10,6 +10,8 @@ import org.elasticsearch.TransportVersions; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.core.ReleasableIterator; import java.io.IOException; @@ -27,6 +29,9 @@ public sealed interface IntVector extends Vector permits ConstantIntVector, IntA @Override IntVector filter(int... positions); + @Override + ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize); + /** * The minimum value in the Vector. An empty Vector will return {@link Integer#MAX_VALUE}. */ diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntVectorBlock.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntVectorBlock.java index 70bcf6919bea6..ae28fb9f6ffa6 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntVectorBlock.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntVectorBlock.java @@ -52,9 +52,8 @@ public IntBlock filter(int... positions) { } @Override - public ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize) { - // TODO optimizations - return new IntLookup(this, positions, targetBlockSize); + public ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize) { + return vector.lookup(positions, targetBlockSize); } @Override diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongArrayVector.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongArrayVector.java index 4b504943b760a..5fa904dcf1acc 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongArrayVector.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongArrayVector.java @@ -10,6 +10,8 @@ import org.apache.lucene.util.RamUsageEstimator; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.core.ReleasableIterator; import java.io.IOException; import java.util.stream.Collectors; @@ -90,6 +92,11 @@ public LongVector filter(int... positions) { } } + @Override + public ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize) { + return new LongLookup(asBlock(), positions, targetBlockSize); + } + public static long ramBytesEstimated(long[] values) { return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(values); } diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongBigArrayVector.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongBigArrayVector.java index d30dedd4cce16..a7828788169ca 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongBigArrayVector.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongBigArrayVector.java @@ -10,8 +10,10 @@ import org.apache.lucene.util.RamUsageEstimator; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.util.LongArray; import org.elasticsearch.core.Releasable; +import org.elasticsearch.core.ReleasableIterator; import java.io.IOException; @@ -86,6 +88,11 @@ public LongVector filter(int... positions) { return new LongBigArrayVector(filtered, positions.length, blockFactory); } + @Override + public ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize) { + return new LongLookup(asBlock(), positions, targetBlockSize); + } + @Override public void closeInternal() { // The circuit breaker that tracks the values {@link LongArray} is adjusted outside diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongVector.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongVector.java index 2ebdb89a31262..e2f53d1ee07f4 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongVector.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongVector.java @@ -10,6 +10,8 @@ import org.elasticsearch.TransportVersions; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.core.ReleasableIterator; import java.io.IOException; @@ -27,6 +29,9 @@ public sealed interface LongVector extends Vector permits ConstantLongVector, Lo @Override LongVector filter(int... positions); + @Override + ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize); + /** * Compares the given object with this vector for equality. Returns {@code true} if and only if the * given object is a LongVector, and both vectors are {@link #equals(LongVector, LongVector) equal}. diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongVectorBlock.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongVectorBlock.java index b6f1e8e77505d..01921e1195f4a 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongVectorBlock.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongVectorBlock.java @@ -52,9 +52,8 @@ public LongBlock filter(int... positions) { } @Override - public ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize) { - // TODO optimizations - return new LongLookup(this, positions, targetBlockSize); + public ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize) { + return vector.lookup(positions, targetBlockSize); } @Override diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Block.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Block.java index cfa1d3656ba3a..9a6b701a2e4ea 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Block.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Block.java @@ -139,19 +139,19 @@ public interface Block extends Accountable, BlockLoader.Block, NamedWriteable, R * same number of {@link #getPositionCount() positions} as the {@code positions} * parameter. *

- * For example, this this block contained {@code [a, b, [b, c]]} + * For example, if this block contained {@code [a, b, [b, c]]} * and were called with the block {@code [0, 1, 1, [1, 2]]} then the * result would be {@code [a, b, b, [b, b, c]]}. *

*

* This process produces {@code count(this) * count(positions)} values per - * positions which could be quite quite large. Instead of returning a single + * positions which could be quite large. Instead of returning a single * Block, this returns an Iterator of Blocks containing all of the promised * values. *

*

- * The returned {@link ReleasableIterator} may retain a reference to {@link Block}s - * inside the {@link Page}. Close it to release those references. + * The returned {@link ReleasableIterator} may retain a reference to the + * {@code positions} parameter. Close it to release those references. *

*

* This block is built using the same {@link BlockFactory} as was used to diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullVector.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullVector.java index 4deededdf41c5..a8a6dbaf382f9 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullVector.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullVector.java @@ -9,6 +9,8 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.core.ReleasableIterator; import java.io.IOException; @@ -45,6 +47,12 @@ public ConstantNullVector filter(int... positions) { throw new UnsupportedOperationException("null vector"); } + @Override + public ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize) { + assert false : "null vector"; + throw new UnsupportedOperationException("null vector"); + } + @Override public boolean getBoolean(int position) { assert false : "null vector"; diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/DocBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/DocBlock.java index e5a0d934aa01a..da9ca2bbae270 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/DocBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/DocBlock.java @@ -52,7 +52,7 @@ public Block filter(int... positions) { @Override public ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize) { - throw new UnsupportedOperationException(); + throw new UnsupportedOperationException("can't lookup values from DocBlock"); } @Override diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/DocVector.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/DocVector.java index 067fddd311cc7..33f5797f60df8 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/DocVector.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/DocVector.java @@ -9,6 +9,8 @@ import org.apache.lucene.util.IntroSorter; import org.apache.lucene.util.RamUsageEstimator; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.core.ReleasableIterator; import org.elasticsearch.core.Releasables; import java.util.Objects; @@ -235,6 +237,11 @@ public DocVector filter(int... positions) { } } + @Override + public ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize) { + throw new UnsupportedOperationException("can't lookup values from DocVector"); + } + @Override public ElementType elementType() { return ElementType.DOC; diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/OrdinalBytesRefVector.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/OrdinalBytesRefVector.java index a67db54b68ec9..ec0c7efa715ad 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/OrdinalBytesRefVector.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/OrdinalBytesRefVector.java @@ -10,6 +10,8 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.core.ReleasableIterator; import org.elasticsearch.core.Releasables; import java.io.IOException; @@ -120,6 +122,11 @@ public BytesRefVector filter(int... positions) { } } + @Override + public ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize) { + return new BytesRefLookup(asBlock(), positions, targetBlockSize); + } + @Override public ElementType elementType() { return bytes.elementType(); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Vector.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Vector.java index 89b39569be454..9a5688685374d 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Vector.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Vector.java @@ -8,8 +8,10 @@ package org.elasticsearch.compute.data; import org.apache.lucene.util.Accountable; +import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.core.RefCounted; import org.elasticsearch.core.Releasable; +import org.elasticsearch.core.ReleasableIterator; /** * A dense Vector of single values. @@ -35,6 +37,33 @@ public interface Vector extends Accountable, RefCounted, Releasable { */ Vector filter(int... positions); + /** + * Builds an Iterator of new {@link Block}s with the same {@link #elementType} + * as this {@link Vector} whose values are copied from positions in this Vector. + * It has the same number of {@link #getPositionCount() positions} as the + * {@code positions} parameter. + *

+ * For example, if this vector contained {@code [a, b, c]} + * and were called with the block {@code [0, 1, 1, [1, 2]]} then the + * result would be {@code [a, b, b, [b, c]]}. + *

+ *

+ * This process produces {@code count(positions)} values per + * positions which could be quite large. Instead of returning a single + * Block, this returns an Iterator of Blocks containing all of the promised + * values. + *

+ *

+ * The returned {@link ReleasableIterator} may retain a reference to the + * {@code positions} parameter. Close it to release those references. + *

+ *

+ * This block is built using the same {@link BlockFactory} as was used to + * build the {@code positions} parameter. + *

+ */ + ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize); + /** * {@return the element type of this vector} */ diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ArrayVector.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ArrayVector.java.st index 4afd8db62f848..d594d32898d36 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ArrayVector.java.st +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ArrayVector.java.st @@ -12,7 +12,9 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.RamUsageEstimator; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.util.BytesRefArray; +import org.elasticsearch.core.ReleasableIterator; import org.elasticsearch.core.Releasables; import java.io.IOException; @@ -21,6 +23,8 @@ $else$ import org.apache.lucene.util.RamUsageEstimator; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.core.ReleasableIterator; import java.io.IOException; import java.util.stream.Collectors; @@ -168,6 +172,11 @@ $endif$ } } + @Override + public ReleasableIterator<$Type$Block> lookup(IntBlock positions, ByteSizeValue targetBlockSize) { + return new $Type$Lookup(asBlock(), positions, targetBlockSize); + } + public static long ramBytesEstimated($if(BytesRef)$BytesRefArray$else$$type$[]$endif$ values) { return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(values); } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-BigArrayVector.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-BigArrayVector.java.st index d6a8723748c1f..30ef9e799cf11 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-BigArrayVector.java.st +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-BigArrayVector.java.st @@ -10,8 +10,10 @@ package org.elasticsearch.compute.data; import org.apache.lucene.util.RamUsageEstimator; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.util.$Array$; import org.elasticsearch.core.Releasable; +import org.elasticsearch.core.ReleasableIterator; import java.io.IOException; @@ -148,6 +150,11 @@ $endif$ return new $Type$BigArrayVector(filtered, positions.length, blockFactory); } + @Override + public ReleasableIterator<$Type$Block> lookup(IntBlock positions, ByteSizeValue targetBlockSize) { + return new $Type$Lookup(asBlock(), positions, targetBlockSize); + } + @Override public void closeInternal() { // The circuit breaker that tracks the values {@link $if(boolean)$Bit$else$$Type$$endif$Array} is adjusted outside diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ConstantVector.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ConstantVector.java.st index 37cb2d2412522..42c34128121a8 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ConstantVector.java.st +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ConstantVector.java.st @@ -11,6 +11,8 @@ $if(BytesRef)$ import org.apache.lucene.util.BytesRef; $endif$ import org.apache.lucene.util.RamUsageEstimator; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.core.ReleasableIterator; /** * Vector implementation that stores a constant $type$ value. @@ -58,6 +60,28 @@ $endif$ return blockFactory().newConstant$Type$Vector(value, positions.length); } + @Override + public ReleasableIterator<$Type$Block> lookup(IntBlock positions, ByteSizeValue targetBlockSize) { + if (positions.getPositionCount() == 0) { + return ReleasableIterator.empty(); + } + IntVector positionsVector = positions.asVector(); + if (positionsVector == null) { + return new $Type$Lookup(asBlock(), positions, targetBlockSize); + } + int min = positionsVector.min(); + if (min < 0) { + throw new IllegalArgumentException("invalid position [" + min + "]"); + } + if (min > getPositionCount()) { + return ReleasableIterator.single(($Type$Block) positions.blockFactory().newConstantNullBlock(positions.getPositionCount())); + } + if (positionsVector.max() < getPositionCount()) { + return ReleasableIterator.single(positions.blockFactory().newConstant$Type$BlockWith(value, positions.getPositionCount())); + } + return new $Type$Lookup(asBlock(), positions, targetBlockSize); + } + $if(int)$ /** * The minimum value in the block. diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Vector.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Vector.java.st index 746ccc97a2819..628ee93ed757d 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Vector.java.st +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Vector.java.st @@ -13,6 +13,8 @@ $endif$ import org.elasticsearch.TransportVersions; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.core.ReleasableIterator; import java.io.IOException; @@ -54,6 +56,9 @@ $endif$ @Override $Type$Vector filter(int... positions); + @Override + ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize); + $if(int)$ /** * The minimum value in the Vector. An empty Vector will return {@link Integer#MAX_VALUE}. diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-VectorBlock.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-VectorBlock.java.st index f011d6f2a4b48..8f4390e8782c5 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-VectorBlock.java.st +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-VectorBlock.java.st @@ -72,9 +72,8 @@ $endif$ } @Override - public ReleasableIterator<$Type$Block> lookup(IntBlock positions, ByteSizeValue targetBlockSize) { - // TODO optimizations - return new $Type$Lookup(this, positions, targetBlockSize); + public ReleasableIterator lookup(IntBlock positions, ByteSizeValue targetBlockSize) { + return vector.lookup(positions, targetBlockSize); } @Override diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BasicBlockTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BasicBlockTests.java index 3d80e560cc4d2..017d4c7065bed 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BasicBlockTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BasicBlockTests.java @@ -35,6 +35,7 @@ import java.util.BitSet; import java.util.List; import java.util.function.BiConsumer; +import java.util.function.Consumer; import java.util.function.Supplier; import java.util.stream.IntStream; import java.util.stream.LongStream; @@ -283,8 +284,19 @@ public void testConstantIntBlock() { positions(blockFactory, 1, 2, new int[] { 1, 2 }), List.of(List.of(value), List.of(value), List.of(value, value)) ); + assertLookup( + block, + positions(blockFactory, 1, 2), + List.of(List.of(value), List.of(value)), + b -> assertThat(b.asVector(), instanceOf(ConstantIntVector.class)) + ); } - assertLookup(block, positions(blockFactory, positionCount + 1000), singletonList(null)); + assertLookup( + block, + positions(blockFactory, positionCount + 1000), + singletonList(null), + b -> assertThat(b, instanceOf(ConstantNullBlock.class)) + ); assertEmptyLookup(blockFactory, block); assertThat(block.asVector().min(), equalTo(value)); assertThat(block.asVector().max(), equalTo(value)); @@ -365,8 +377,19 @@ public void testConstantLongBlock() { positions(blockFactory, 1, 2, new int[] { 1, 2 }), List.of(List.of(value), List.of(value), List.of(value, value)) ); + assertLookup( + block, + positions(blockFactory, 1, 2), + List.of(List.of(value), List.of(value)), + b -> assertThat(b.asVector(), instanceOf(ConstantLongVector.class)) + ); } - assertLookup(block, positions(blockFactory, positionCount + 1000), singletonList(null)); + assertLookup( + block, + positions(blockFactory, positionCount + 1000), + singletonList(null), + b -> assertThat(b, instanceOf(ConstantNullBlock.class)) + ); assertEmptyLookup(blockFactory, block); releaseAndAssertBreaker(block); } @@ -447,8 +470,19 @@ public void testConstantDoubleBlock() { positions(blockFactory, 1, 2, new int[] { 1, 2 }), List.of(List.of(value), List.of(value), List.of(value, value)) ); + assertLookup( + block, + positions(blockFactory, 1, 2), + List.of(List.of(value), List.of(value)), + b -> assertThat(b.asVector(), instanceOf(ConstantDoubleVector.class)) + ); } - assertLookup(block, positions(blockFactory, positionCount + 1000), singletonList(null)); + assertLookup( + block, + positions(blockFactory, positionCount + 1000), + singletonList(null), + b -> assertThat(b, instanceOf(ConstantNullBlock.class)) + ); assertEmptyLookup(blockFactory, block); releaseAndAssertBreaker(block); } @@ -605,8 +639,19 @@ public void testConstantBytesRefBlock() { positions(blockFactory, 1, 2, new int[] { 1, 2 }), List.of(List.of(value), List.of(value), List.of(value, value)) ); + assertLookup( + block, + positions(blockFactory, 1, 2), + List.of(List.of(value), List.of(value)), + b -> assertThat(b.asVector(), instanceOf(ConstantBytesRefVector.class)) + ); } - assertLookup(block, positions(blockFactory, positionCount + 1000), singletonList(null)); + assertLookup( + block, + positions(blockFactory, positionCount + 1000), + singletonList(null), + b -> assertThat(b, instanceOf(ConstantNullBlock.class)) + ); assertEmptyLookup(blockFactory, block); releaseAndAssertBreaker(block); } @@ -689,8 +734,19 @@ public void testConstantBooleanBlock() { positions(blockFactory, 1, 2, new int[] { 1, 2 }), List.of(List.of(value), List.of(value), List.of(value, value)) ); + assertLookup( + block, + positions(blockFactory, 1, 2), + List.of(List.of(value), List.of(value)), + b -> assertThat(b.asVector(), instanceOf(ConstantBooleanVector.class)) + ); } - assertLookup(block, positions(blockFactory, positionCount + 1000), singletonList(null)); + assertLookup( + block, + positions(blockFactory, positionCount + 1000), + singletonList(null), + b -> assertThat(b, instanceOf(ConstantNullBlock.class)) + ); assertEmptyLookup(blockFactory, block); releaseAndAssertBreaker(block); } @@ -716,6 +772,24 @@ public void testConstantNullBlock() { assertThat(positionCount, is(block.getPositionCount())); assertThat(block.getPositionCount(), is(positionCount)); assertThat(block.isNull(randomPosition(positionCount)), is(true)); + if (positionCount > 2) { + List> expected = new ArrayList<>(); + expected.add(null); + expected.add(null); + expected.add(null); + assertLookup( + block, + positions(blockFactory, 1, 2, new int[] { 1, 2 }), + expected, + b -> assertThat(b, instanceOf(ConstantNullBlock.class)) + ); + } + assertLookup( + block, + positions(blockFactory, positionCount + 1000), + singletonList(null), + b -> assertThat(b, instanceOf(ConstantNullBlock.class)) + ); releaseAndAssertBreaker(block); } } @@ -1544,11 +1618,16 @@ static void assertEmptyLookup(BlockFactory blockFactory, Block block) { } static void assertLookup(Block block, IntBlock positions, List> expected) { + assertLookup(block, positions, expected, l -> {}); + } + + static void assertLookup(Block block, IntBlock positions, List> expected, Consumer extra) { try (positions; ReleasableIterator lookup = block.lookup(positions, ByteSizeValue.ofKb(100))) { assertThat(lookup.hasNext(), equalTo(true)); try (Block b = lookup.next()) { assertThat(valuesAtPositions(b, 0, b.getPositionCount()), equalTo(expected)); assertThat(b.blockFactory(), sameInstance(positions.blockFactory())); + extra.accept(b); } assertThat(lookup.hasNext(), equalTo(false)); }