Skip to content

Commit

Permalink
adding Bound Filter
Browse files Browse the repository at this point in the history
  • Loading branch information
Slim Bouguerra committed Dec 10, 2015
1 parent a2d0bea commit 77afdf2
Show file tree
Hide file tree
Showing 8 changed files with 503 additions and 2 deletions.
95 changes: 95 additions & 0 deletions docs/content/querying/filters.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,101 @@ The grammar for a IN filter is as follows:
}
```

### Bound filter

Bound filter can be used to filter by comparing dimension values to an upper value or/and a lower value.
By default Comparison is string based and **case sensitive**.
To use numeric comparison you can set `alphaNumeric` to `true`.
By default the bound filter is a not a strict inclusion `inputString <= upper && inputSting >= lower`.

The grammar for a bound filter is as follows:

```json
{
"type": "bound",
"dimension": "age",
"lower": "21",
"upper": "31" ,
"alphaNumeric": true
}
```
Equivalent to retain column if `21 <= age <= 31`

```json
{
"type": "bound",
"dimension": "name",
"lower": "foo",
"upper": "hoo"
}
```

Equivalent to retain column if `foo <= name <= hoo`

In order to have a strict inclusion user can set `lowerStrict` or/and `upperStrict` to `true`

To have strict bounds:

```json
{
"type": "bound",
"dimension": "age",
"lower": "21",
"lowerStrict": true,
"upper": "31" ,
"upperStrict": true,
"alphaNumeric": true
}
```
Equivalent to retain column if `21 < age < 31`

To have strict upper bound:

```json
{
"type": "bound",
"dimension": "age",
"lower": "21",
"upper": "31" ,
"upperStrict": true,
"alphaNumeric": true
}
```

Equivalent to retain column if `21 <= age < 31`

To compare to only an upper bound or lowe bound

```json
{
"type": "bound",
"dimension": "age",
"upper": "31" ,
"upperStrict": true,
"alphaNumeric": true
}
```

Equivalent to retain column if `age < 31`

```json
{
"type": "bound",
"dimension": "age",
"lower": "18" ,
"alphaNumeric": true
}
```

Equivalent to retain column if ` 18 <= age`

For `alphaNumeric` comparator, in case of the dimension value includes none-digits you may expect **fuzzy matching**
If dimension value starts with a none digit, the filter will consider it out of range (`value < lowerBound` and `value > upperBound`)
If dimension value starts with digit and contains a none digits comparing will be done character wise.
For instance suppose lower bound is `100` and value is `10K` the filter will match (`100 < 10K` returns `true`) since `K` is greater than any digit
Now suppose that the lower bound is `110` the filter will not match (`110 < 10K` returns `false`)


#### Search Query Spec

##### Insensitive Contains
Expand Down
172 changes: 172 additions & 0 deletions processing/src/main/java/io/druid/query/filter/BoundDimFilter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package io.druid.query.filter;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import com.metamx.common.StringUtils;

import java.nio.ByteBuffer;

public class BoundDimFilter implements DimFilter
{
private final String dimension;
private final String upper;
private final String lower;
private final boolean lowerStrict;
private final boolean upperStrict;
private final boolean alphaNumeric;

@JsonCreator
public BoundDimFilter(
@JsonProperty("dimension") String dimension,
@JsonProperty("lower") String lower,
@JsonProperty("upper") String upper,
@JsonProperty("lowerStrict") Boolean lowerStrict,
@JsonProperty("upperStrict") Boolean upperStrict,
@JsonProperty("alphaNumeric") Boolean alphaNumeric
)
{
this.dimension = Preconditions.checkNotNull(dimension, "dimension can not be null");
Preconditions.checkState((lower != null) || (upper != null), "lower and upper can not be null at the same time");
this.upper = upper;
this.lower = lower;
this.lowerStrict = (lowerStrict == null) ? false : lowerStrict;
this.upperStrict = (upperStrict == null) ? false : upperStrict;
this.alphaNumeric = (alphaNumeric == null) ? false : alphaNumeric;
}

@JsonProperty
public String getDimension()
{
return dimension;
}

@JsonProperty
public String getUpper()
{
return upper;
}

@JsonProperty
public String getLower()
{
return lower;
}

@JsonProperty
public boolean isLowerStrict()
{
return lowerStrict;
}

@JsonProperty
public boolean isUpperStrict()
{
return upperStrict;
}

@JsonProperty
public boolean isAlphaNumeric()
{
return alphaNumeric;
}

@Override
public byte[] getCacheKey()
{
byte[] dimensionBytes = StringUtils.toUtf8(this.getDimension());
byte[] lowerBytes = this.getLower() == null ? new byte[0] : StringUtils.toUtf8(this.getLower());
byte[] upperBytes = this.getUpper() == null ? new byte[0] : StringUtils.toUtf8(this.getUpper());
byte boundType = 0x1;
if (this.getLower() == null) {
boundType = 0x2;
} else if (this.getUpper() == null) {
boundType = 0x3;
}

byte lowerStrictByte = (this.isLowerStrict() == false) ? 0x0 : (byte) 1;
byte upperStrictByte = (this.isUpperStrict() == false) ? 0x0 : (byte) 1;
byte AlphaNumericByte = (this.isAlphaNumeric() == false) ? 0x0 : (byte) 1;

ByteBuffer boundCacheBuffer = ByteBuffer.allocate(
8
+ dimensionBytes.length
+ upperBytes.length
+ lowerBytes.length
);
boundCacheBuffer.put(DimFilterCacheHelper.BOUND_CACHE_ID)
.put(boundType)
.put(upperStrictByte)
.put(lowerStrictByte)
.put(AlphaNumericByte)
.put(DimFilterCacheHelper.STRING_SEPARATOR)
.put(dimensionBytes)
.put(DimFilterCacheHelper.STRING_SEPARATOR)
.put(upperBytes)
.put(DimFilterCacheHelper.STRING_SEPARATOR)
.put(lowerBytes);
return boundCacheBuffer.array();
}

@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}
if (!(o instanceof BoundDimFilter)) {
return false;
}

BoundDimFilter that = (BoundDimFilter) o;

if (isLowerStrict() != that.isLowerStrict()) {
return false;
}
if (isUpperStrict() != that.isUpperStrict()) {
return false;
}
if (isAlphaNumeric() != that.isAlphaNumeric()) {
return false;
}
if (!getDimension().equals(that.getDimension())) {
return false;
}
if (getUpper() != null ? !getUpper().equals(that.getUpper()) : that.getUpper() != null) {
return false;
}
return !(getLower() != null ? !getLower().equals(that.getLower()) : that.getLower() != null);

}

@Override
public int hashCode()
{
int result = getDimension().hashCode();
result = 31 * result + (getUpper() != null ? getUpper().hashCode() : 0);
result = 31 * result + (getLower() != null ? getLower().hashCode() : 0);
result = 31 * result + (isLowerStrict() ? 1 : 0);
result = 31 * result + (isUpperStrict() ? 1 : 0);
result = 31 * result + (isAlphaNumeric() ? 1 : 0);
return result;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@
@JsonSubTypes.Type(name="search", value=SearchQueryDimFilter.class),
@JsonSubTypes.Type(name="javascript", value=JavaScriptDimFilter.class),
@JsonSubTypes.Type(name="spatial", value=SpatialDimFilter.class),
@JsonSubTypes.Type(name="in", value=InDimFilter.class)
@JsonSubTypes.Type(name="in", value=InDimFilter.class),
@JsonSubTypes.Type(name="bound", value=BoundDimFilter.class)

})
public interface DimFilter
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class DimFilterCacheHelper
static final byte SPATIAL_CACHE_ID = 0x8;
static final byte IN_CACHE_ID = 0x9;
static final byte STRING_SEPARATOR = (byte) 0xFF;
public static byte BOUND_CACHE_ID = 0xA;

static byte[] computeCacheKey(byte cacheIdKey, List<DimFilter> filters)
{
Expand Down
70 changes: 70 additions & 0 deletions processing/src/main/java/io/druid/segment/filter/BoundFilter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package io.druid.segment.filter;

import com.google.common.base.Predicate;
import io.druid.query.filter.BoundDimFilter;
import io.druid.query.topn.AlphaNumericTopNMetricSpec;
import io.druid.query.topn.LexicographicTopNMetricSpec;

import java.util.Comparator;

public class BoundFilter extends DimensionPredicateFilter
{

public BoundFilter(final BoundDimFilter boundDimFilter)
{
super(
boundDimFilter.getDimension(), new Predicate<String>()
{
@Override
public boolean apply(String input)
{
if (input == null) {
return false;
}
Comparator<String> comparator;
if (boundDimFilter.isAlphaNumeric()) {
comparator = new AlphaNumericTopNMetricSpec(null).getComparator(null, null);
} else {
comparator = new LexicographicTopNMetricSpec(null).getComparator(null, null);
}

int lowerComparing = 1;
int upperComparing = 1;
if (boundDimFilter.getLower() != null) {
lowerComparing = comparator.compare(input, boundDimFilter.getLower());
}
if (boundDimFilter.getUpper() != null) {
upperComparing = comparator.compare(boundDimFilter.getUpper(), input);
}
if (boundDimFilter.isLowerStrict() && boundDimFilter.isUpperStrict()) {
return ((lowerComparing > 0)) && ( upperComparing > 0);
} else if (boundDimFilter.isLowerStrict()) {
return (lowerComparing > 0) && (upperComparing >= 0);
} else if (boundDimFilter.isUpperStrict()) {
return (lowerComparing >= 0) && (upperComparing > 0);
}
return (lowerComparing >= 0) && (upperComparing >= 0);
}
}
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import com.google.common.base.Function;
import com.google.common.collect.Lists;
import io.druid.query.filter.AndDimFilter;
import io.druid.query.filter.BoundDimFilter;
import io.druid.query.filter.DimFilter;
import io.druid.query.filter.ExtractionDimFilter;
import io.druid.query.filter.Filter;
Expand All @@ -41,7 +42,8 @@
*/
public class Filters
{
public static List<Filter> convertDimensionFilters(List<DimFilter> filters){
public static List<Filter> convertDimensionFilters(List<DimFilter> filters)
{
return Lists.transform(
filters,
new Function<DimFilter, Filter>()
Expand Down Expand Up @@ -111,6 +113,8 @@ public Filter apply(@Nullable String input)
);

filter = new OrFilter(listFilters);
} else if (dimFilter instanceof BoundDimFilter) {
filter = new BoundFilter((BoundDimFilter) dimFilter);
}

return filter;
Expand Down
Loading

0 comments on commit 77afdf2

Please sign in to comment.