Skip to content
Permalink
Browse files
push value range and set index get operations into BitmapIndex (#12315)
* push value range and set index get operations into BitmapIndex

* fix bug

* oops, fix better

* better like, fix test, javadocs

* fix checkstyle

* simplify and fixes

* cache

* fix tests

* move indexOf into GenericIndexed

* oops

* fix tests
  • Loading branch information
clintropolis committed Mar 9, 2022
1 parent 9f6a930 commit 9cfb23935ffc3eace79b526dd84d912d129a9b1c
Show file tree
Hide file tree
Showing 38 changed files with 736 additions and 322 deletions.
@@ -30,13 +30,11 @@
import org.apache.druid.math.expr.ExpressionType;
import org.apache.druid.math.expr.Parser;
import org.apache.druid.query.expression.TestExprMacroTable;
import org.apache.druid.segment.ColumnInspector;
import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.Cursor;
import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.QueryableIndexStorageAdapter;
import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.generator.GeneratorBasicSchemas;
import org.apache.druid.segment.generator.GeneratorSchemaInfo;
import org.apache.druid.segment.generator.SegmentGenerator;
@@ -126,17 +124,7 @@ public void setup()
);

Expr parsed = Parser.parse(expression, ExprMacroTable.nil());
outputType = parsed.getOutputType(
new ColumnInspector()
{
@Nullable
@Override
public ColumnCapabilities getColumnCapabilities(String column)
{
return QueryableIndexStorageAdapter.getColumnCapabilities(index, column);
}
}
);
outputType = parsed.getOutputType(index);
checkSanity();
}

@@ -28,6 +28,8 @@
import org.apache.druid.segment.data.CloseableIndexed;
import org.apache.druid.segment.data.GenericIndexed;

import javax.annotation.Nullable;

public class MockBitmapIndexSelector implements BitmapIndexSelector
{
private final GenericIndexed<String> dictionary;
@@ -71,7 +73,7 @@ public BitmapFactory getBitmapFactory()
@Override
public ImmutableBitmap getBitmapIndex(String dimension, String value)
{
return bitmapIndex.getBitmap(bitmapIndex.getIndex(value));
return bitmapIndex.getBitmapForValue(value);
}

@Override
@@ -85,4 +87,11 @@ public ImmutableRTree getSpatialIndex(String dimension)
{
throw new UnsupportedOperationException();
}

@Nullable
@Override
public ColumnCapabilities getColumnCapabilities(String column)
{
return null;
}
}
@@ -64,6 +64,7 @@ public IndexedInts getRow()
@Override
public ValueMatcher makeValueMatcher(final String value)
{
final boolean matchNull = predicate.apply(null);
return new ValueMatcher()
{
@Override
@@ -81,8 +82,8 @@ public boolean matches()
nullRow = false;
}
}
// null should match empty rows in multi-value columns
return nullRow && value == null;
// null should match empty rows in multi-value columns if predicate matches null
return nullRow && value == null && matchNull;
}

@Override
@@ -97,7 +98,7 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector)
@Override
public ValueMatcher makeValueMatcher(final Predicate<String> matcherPredicate)
{
final boolean matchNull = predicate.apply(null);
final boolean matchNull = predicate.apply(null) && matcherPredicate.apply(null);
return new ValueMatcher()
{
@Override
@@ -23,6 +23,7 @@
import org.apache.druid.collections.bitmap.BitmapFactory;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.collections.spatial.ImmutableRTree;
import org.apache.druid.segment.ColumnInspector;
import org.apache.druid.segment.column.BitmapIndex;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.data.CloseableIndexed;
@@ -31,12 +32,15 @@

/**
*/
public interface BitmapIndexSelector
public interface BitmapIndexSelector extends ColumnInspector
{
@MustBeClosed
@Nullable
CloseableIndexed<String> getDimensionValues(String dimension);

@Deprecated
ColumnCapabilities.Capable hasMultipleValues(String dimension);

int getNumRows();
BitmapFactory getBitmapFactory();
@Nullable
@@ -38,8 +38,6 @@
import com.google.common.hash.Hasher;
import com.google.common.hash.Hashing;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.ints.IntIterable;
import it.unimi.dsi.fastutil.ints.IntIterator;
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
import it.unimi.dsi.fastutil.longs.LongArrayList;
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
@@ -60,7 +58,6 @@
import org.apache.druid.segment.ColumnSelector;
import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.DimensionHandlerUtils;
import org.apache.druid.segment.IntIteratorUtils;
import org.apache.druid.segment.column.BitmapIndex;
import org.apache.druid.segment.filter.Filters;
import org.apache.druid.segment.vector.VectorColumnSelectorFactory;
@@ -71,7 +68,6 @@
import java.util.Collection;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
@@ -305,8 +301,7 @@ public double estimateSelectivity(BitmapIndexSelector indexSelector)
if (extractionFn == null) {
final BitmapIndex bitmapIndex = indexSelector.getBitmapIndex(dimension);
return Filters.estimateSelectivity(
bitmapIndex,
IntIteratorUtils.toIntList(getBitmapIndexIterable(values, bitmapIndex).iterator()),
bitmapIndex.getBitmapsForValues(values).iterator(),
indexSelector.getNumRows()
);
} else {
@@ -503,27 +498,7 @@ private static <T> boolean isNaturalOrder(@Nullable final Comparator<T> comparat

private static Iterable<ImmutableBitmap> getBitmapIterable(final Set<String> values, final BitmapIndex bitmapIndex)
{
return Filters.bitmapsFromIndexes(getBitmapIndexIterable(values, bitmapIndex), bitmapIndex);
}

private static IntIterable getBitmapIndexIterable(final Set<String> values, final BitmapIndex bitmapIndex)
{
return () -> new IntIterator()
{
final Iterator<String> iterator = values.iterator();

@Override
public boolean hasNext()
{
return iterator.hasNext();
}

@Override
public int nextInt()
{
return bitmapIndex.getIndex(iterator.next());
}
};
return bitmapIndex.getBitmapsForValues(values);
}

@SuppressWarnings("ReturnValueIgnored")
@@ -32,7 +32,6 @@
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.query.extraction.ExtractionFn;
import org.apache.druid.segment.data.Indexed;
import org.apache.druid.segment.filter.LikeFilter;

import javax.annotation.Nullable;
@@ -299,17 +298,15 @@ private static boolean matches(@Nullable final String s, Pattern pattern)
* of s are ignored. This method is useful if you've already independently verified the prefix. This method
* evalutes strings.get(i) lazily to save time when it isn't necessary to actually look at the string.
*/
public boolean matchesSuffixOnly(final Indexed<String> strings, final int i)
public boolean matchesSuffixOnly(@Nullable String value)
{
if (suffixMatch == SuffixMatch.MATCH_ANY) {
return true;
} else if (suffixMatch == SuffixMatch.MATCH_EMPTY) {
final String s = strings.get(i);
return s == null ? matches(null) : s.length() == prefix.length();
return value == null ? matches(null) : value.length() == prefix.length();
} else {
// suffixMatch is MATCH_PATTERN
final String s = strings.get(i);
return matches(s);
return matches(value);
}
}

@@ -209,7 +209,7 @@ private ColumnAnalysis analyzeStringColumn(
String value = bitmapIndex.getValue(i);
if (value != null) {
size += StringUtils.estimatedBinaryLengthAsUTF8(value) *
((long) bitmapIndex.getBitmap(bitmapIndex.getIndex(value)).size());
((long) bitmapIndex.getBitmapForValue(value).size());
}
}
}
@@ -37,6 +37,15 @@ public interface ColumnInspector extends Expr.InputBindingInspector
@Nullable
ColumnCapabilities getColumnCapabilities(String column);

default ColumnCapabilities getColumnCapabilitiesWithDefault(String column, ColumnCapabilities defaultCapabilites)
{
final ColumnCapabilities capabilities = getColumnCapabilities(column);
if (capabilities != null) {
return capabilities;
}
return defaultCapabilites;
}

@Nullable
@Override
default ExpressionType getType(String name)
@@ -19,17 +19,29 @@

package org.apache.druid.segment;

import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnHolder;

import javax.annotation.Nullable;
import java.util.List;

/**
*/
public interface ColumnSelector
public interface ColumnSelector extends ColumnInspector
{
List<String> getColumnNames();

@Nullable
ColumnHolder getColumnHolder(String columnName);

@Nullable
@Override
default ColumnCapabilities getColumnCapabilities(String column)
{
final ColumnHolder columnHolder = getColumnHolder(column);
if (columnHolder == null) {
return null;
}
return columnHolder.getCapabilities();
}
}
@@ -19,6 +19,7 @@

package org.apache.druid.segment;

import com.google.common.collect.ImmutableList;
import org.apache.druid.collections.bitmap.BitmapFactory;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.collections.spatial.ImmutableRTree;
@@ -38,7 +39,10 @@

import javax.annotation.Nullable;
import java.io.IOException;
import java.util.Collections;
import java.util.Iterator;
import java.util.Set;
import java.util.function.Predicate;

/**
*/
@@ -171,10 +175,7 @@ public ColumnCapabilities.Capable hasMultipleValues(final String dimension)
VirtualColumn virtualColumn = virtualColumns.getVirtualColumn(dimension);
ColumnCapabilities virtualCapabilities = null;
if (virtualColumn != null) {
virtualCapabilities = virtualColumn.capabilities(
QueryableIndexStorageAdapter.getColumnInspectorForIndex(index),
dimension
);
virtualCapabilities = virtualColumn.capabilities(index, dimension);
}
return virtualCapabilities != null ? virtualCapabilities.hasMultipleValues() : ColumnCapabilities.Capable.FALSE;
}
@@ -264,6 +265,67 @@ public ImmutableBitmap getBitmap(int idx)
return bitmapFactory.makeEmptyImmutableBitmap();
}
}

@Override
public ImmutableBitmap getBitmapForValue(@Nullable String value)
{
if (NullHandling.isNullOrEquivalent(value)) {
return bitmapFactory.complement(bitmapFactory.makeEmptyImmutableBitmap(), getNumRows());
} else {
return bitmapFactory.makeEmptyImmutableBitmap();
}
}

@Override
public Iterable<ImmutableBitmap> getBitmapsInRange(
@Nullable String startValue,
boolean startStrict,
@Nullable String endValue,
boolean endStrict,
Predicate<String> matcher
)
{
final int startIndex; // inclusive
int endIndex; // exclusive

if (startValue == null) {
startIndex = 0;
} else {
if (NullHandling.isNullOrEquivalent(startValue)) {
startIndex = startStrict ? 1 : 0;
} else {
startIndex = 1;
}
}

if (endValue == null) {
endIndex = 1;
} else {
if (NullHandling.isNullOrEquivalent(endValue)) {
endIndex = endStrict ? 0 : 1;
} else {
endIndex = 1;
}
}

endIndex = Math.max(startIndex, endIndex);
if (startIndex == endIndex) {
return Collections.emptyList();
}
if (matcher.test(null)) {
return ImmutableList.of(getBitmap(0));
}
return ImmutableList.of(bitmapFactory.makeEmptyImmutableBitmap());
}

@Override
public Iterable<ImmutableBitmap> getBitmapsForValues(Set<String> values)
{
if (values.contains(null) || (NullHandling.replaceWithDefault() && values.contains(""))) {
return ImmutableList.of(getBitmap(0));
}
return ImmutableList.of(bitmapFactory.makeEmptyImmutableBitmap());
}
};
} else if (columnHolder.getCapabilities().hasBitmapIndexes() && columnHolder.getCapabilities().is(ValueType.STRING)) {
// currently BitmapIndex are reliant on STRING dictionaries to operate correctly, and will fail when used with
@@ -282,7 +344,7 @@ public ImmutableBitmap getBitmapIndex(String dimension, String value)
if (idx == null) {
return null;
}
return idx.getBitmap(idx.getIndex(value));
return idx.getBitmapForValue(value);
}

final ColumnHolder columnHolder = index.getColumnHolder(dimension);
@@ -301,7 +363,7 @@ public ImmutableBitmap getBitmapIndex(String dimension, String value)
}

final BitmapIndex bitmapIndex = columnHolder.getBitmapIndex();
return bitmapIndex.getBitmap(bitmapIndex.getIndex(value));
return bitmapIndex.getBitmapForValue(value);
}

@Override
@@ -323,4 +385,11 @@ private boolean isVirtualColumn(final String columnName)
{
return virtualColumns.getVirtualColumn(columnName) != null;
}

@Nullable
@Override
public ColumnCapabilities getColumnCapabilities(String column)
{
return virtualColumns.getColumnCapabilities(index, column);
}
}

0 comments on commit 9cfb239

Please sign in to comment.