Skip to content

Commit

Permalink
overhaul DruidPredicateFactory to better handle 3VL (#15629)
Browse files Browse the repository at this point in the history
* overhaul DruidPredicateFactory to better handle 3VL

fixes some bugs caused by some limitations of the original design of how DruidPredicateFactory interacts with 3-value logic. The primary impacted area was with how filters on values transformed with expressions or extractionFn which turn non-null values into nulls, which were not possible to be modelled with the 'isNullInputUnknown' method

changes:
* adds DruidObjectPredicate to specialize string, array, and object based predicates instead of using guava Predicate
* DruidPredicateFactory now uses DruidObjectPredicate
* introduces DruidPredicateMatch enum, which all predicates returned from DruidPredicateFactory now use instead of booleans to indicate match. This means DruidLongPredicate, DruidFloatPredicate, DruidDoublePredicate, and the newly added DruidObjectPredicate apply methods all now return DruidPredicateMatch. This allows matchers and indexes
* isNullInputUnknown has been removed from DruidPredicateFactory

* rename, fix test

* adjust

* style

* npe

* more test

* fix default value mode to not match new test
  • Loading branch information
clintropolis authored Jan 6, 2024
1 parent 62964e9 commit c221a26
Show file tree
Hide file tree
Showing 88 changed files with 1,191 additions and 1,086 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
package org.apache.druid.benchmark;

import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.collect.FluentIterable;
import org.apache.druid.collections.bitmap.BitmapFactory;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
Expand All @@ -32,7 +31,9 @@
import org.apache.druid.query.filter.DruidDoublePredicate;
import org.apache.druid.query.filter.DruidFloatPredicate;
import org.apache.druid.query.filter.DruidLongPredicate;
import org.apache.druid.query.filter.DruidObjectPredicate;
import org.apache.druid.query.filter.DruidPredicateFactory;
import org.apache.druid.query.filter.DruidPredicateMatch;
import org.apache.druid.segment.data.BitmapSerdeFactory;
import org.apache.druid.segment.data.GenericIndexed;
import org.apache.druid.segment.data.RoaringBitmapSerdeFactory;
Expand Down Expand Up @@ -73,37 +74,32 @@ public class DimensionPredicateFilterBenchmark
new DruidPredicateFactory()
{
@Override
public Predicate<String> makeStringPredicate()
public DruidObjectPredicate<String> makeStringPredicate()
{
return new Predicate<String>()
{
@Override
public boolean apply(String input)
{
if (input == null) {
return false;
}
return Integer.parseInt(input) % 2 == 0;
return (DruidObjectPredicate<String>) input -> {
if (input == null) {
return DruidPredicateMatch.UNKNOWN;
}
return DruidPredicateMatch.of(Integer.parseInt(input) % 2 == 0);
};
}

@Override
public DruidLongPredicate makeLongPredicate()
{
return DruidLongPredicate.ALWAYS_FALSE;
return DruidLongPredicate.ALWAYS_FALSE_WITH_NULL_UNKNOWN;
}

@Override
public DruidFloatPredicate makeFloatPredicate()
{
return DruidFloatPredicate.ALWAYS_FALSE;
return DruidFloatPredicate.ALWAYS_FALSE_WITH_NULL_UNKNOWN;
}

@Override
public DruidDoublePredicate makeDoublePredicate()
{
return DruidDoublePredicate.ALWAYS_FALSE;
return DruidDoublePredicate.ALWAYS_FALSE_WITH_NULL_UNKNOWN;
}
},
null
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
package org.apache.druid.benchmark;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Predicate;
import com.google.common.collect.ImmutableList;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.data.input.InputRow;
Expand All @@ -41,6 +40,7 @@
import org.apache.druid.query.filter.DruidDoublePredicate;
import org.apache.druid.query.filter.DruidFloatPredicate;
import org.apache.druid.query.filter.DruidLongPredicate;
import org.apache.druid.query.filter.DruidObjectPredicate;
import org.apache.druid.query.filter.DruidPredicateFactory;
import org.apache.druid.query.filter.Filter;
import org.apache.druid.query.filter.OrDimFilter;
Expand Down Expand Up @@ -93,7 +93,6 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.TimeUnit;

@State(Scope.Benchmark)
Expand Down Expand Up @@ -543,34 +542,27 @@ public Filter toFilter()
final DruidPredicateFactory predicateFactory = new DruidPredicateFactory()
{
@Override
public Predicate<String> makeStringPredicate()
public DruidObjectPredicate<String> makeStringPredicate()
{
return new Predicate<String>()
{
@Override
public boolean apply(String input)
{
return Objects.equals(valueOrNull, input);
}
};
return valueOrNull == null ? DruidObjectPredicate.isNull() : DruidObjectPredicate.equalTo(valueOrNull);
}

@Override
public DruidLongPredicate makeLongPredicate()
{
return DruidLongPredicate.ALWAYS_FALSE;
return DruidLongPredicate.ALWAYS_FALSE_WITH_NULL_UNKNOWN;
}

@Override
public DruidFloatPredicate makeFloatPredicate()
{
return DruidFloatPredicate.ALWAYS_FALSE;
return DruidFloatPredicate.ALWAYS_FALSE_WITH_NULL_UNKNOWN;
}

@Override
public DruidDoublePredicate makeDoublePredicate()
{
return DruidDoublePredicate.ALWAYS_FALSE;
return DruidDoublePredicate.ALWAYS_FALSE_WITH_NULL_UNKNOWN;
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
package org.apache.druid.segment;

import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import org.apache.druid.query.filter.DruidObjectPredicate;
import org.apache.druid.query.filter.DruidPredicateFactory;
import org.apache.druid.query.filter.ValueMatcher;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
Expand Down Expand Up @@ -88,15 +88,14 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector)
@Override
public ValueMatcher makeValueMatcher(DruidPredicateFactory predicateFactory)
{
final Predicate<String> predicate = predicateFactory.makeStringPredicate();
final DruidObjectPredicate<String> predicate = predicateFactory.makeStringPredicate();
return new ValueMatcher()
{
@Override
public boolean matches(boolean includeUnknown)
{
final String rowValue = (String) getObject();
final boolean matchNull = includeUnknown && predicateFactory.isNullInputUnknown();
return (matchNull && rowValue == null) || predicate.apply(rowValue);
return predicate.apply(rowValue).matches(includeUnknown);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.RangeSet;
import com.google.common.hash.HashCode;
Expand Down Expand Up @@ -91,16 +90,14 @@ public Filter toFilter()
dimension,
new DruidPredicateFactory()
{
private final boolean isNullUnknown = !bloomKFilter.testBytes(null, 0, 0);

@Override
public Predicate<String> makeStringPredicate()
public DruidObjectPredicate<String> makeStringPredicate()
{
return str -> {
if (str == null) {
return bloomKFilter.testBytes(null, 0, 0);
return DruidPredicateMatch.of(bloomKFilter.testBytes(null, 0, 0));
}
return bloomKFilter.testString(str);
return DruidPredicateMatch.of(bloomKFilter.testString(str));
};
}

Expand All @@ -110,15 +107,15 @@ public DruidLongPredicate makeLongPredicate()
return new DruidLongPredicate()
{
@Override
public boolean applyLong(long input)
public DruidPredicateMatch applyLong(long input)
{
return bloomKFilter.testLong(input);
return DruidPredicateMatch.of(bloomKFilter.testLong(input));
}

@Override
public boolean applyNull()
public DruidPredicateMatch applyNull()
{
return bloomKFilter.testBytes(null, 0, 0);
return DruidPredicateMatch.of(bloomKFilter.testBytes(null, 0, 0));
}
};
}
Expand All @@ -129,15 +126,15 @@ public DruidFloatPredicate makeFloatPredicate()
return new DruidFloatPredicate()
{
@Override
public boolean applyFloat(float input)
public DruidPredicateMatch applyFloat(float input)
{
return bloomKFilter.testFloat(input);
return DruidPredicateMatch.of(bloomKFilter.testFloat(input));
}

@Override
public boolean applyNull()
public DruidPredicateMatch applyNull()
{
return bloomKFilter.testBytes(null, 0, 0);
return DruidPredicateMatch.of(bloomKFilter.testBytes(null, 0, 0));
}
};
}
Expand All @@ -148,24 +145,18 @@ public DruidDoublePredicate makeDoublePredicate()
return new DruidDoublePredicate()
{
@Override
public boolean applyDouble(double input)
public DruidPredicateMatch applyDouble(double input)
{
return bloomKFilter.testDouble(input);
return DruidPredicateMatch.of(bloomKFilter.testDouble(input));
}

@Override
public boolean applyNull()
public DruidPredicateMatch applyNull()
{
return bloomKFilter.testBytes(null, 0, 0);
return DruidPredicateMatch.of(bloomKFilter.testBytes(null, 0, 0));
}
};
}

@Override
public boolean isNullInputUnknown()
{
return isNullUnknown;
}
},
extractionFn,
filterTuning
Expand Down
3 changes: 3 additions & 0 deletions processing/src/main/java/org/apache/druid/math/expr/Expr.java
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,9 @@ default ColumnIndexSupplier asColumnIndexSupplier(ColumnSelector columnSelector,
}
final ColumnCapabilities capabilities = holder.getCapabilities();
final ColumnIndexSupplier delegateIndexSupplier = holder.getIndexSupplier();
if (delegateIndexSupplier == null) {
return null;
}
final DictionaryEncodedValueIndex<?> delegateRawIndex = delegateIndexSupplier.as(
DictionaryEncodedValueIndex.class
);
Expand Down
Loading

0 comments on commit c221a26

Please sign in to comment.