Skip to content

Commit

Permalink
HIVE-2005. Implement BETWEEN operator (Navis via cws)
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.apache.org/repos/asf/hive/trunk@1220933 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
cwsteinbach committed Dec 19, 2011
1 parent 016c6ce commit 5f354e5
Show file tree
Hide file tree
Showing 6 changed files with 358 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFArray;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFArrayContains;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFAssertTrue;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCoalesce;
Expand Down Expand Up @@ -362,6 +363,7 @@ public final class FunctionRegistry {
registerGenericUDF(">=", GenericUDFOPEqualOrGreaterThan.class);
registerGenericUDF("not", GenericUDFOPNot.class);
registerGenericUDF("!", GenericUDFOPNot.class);
registerGenericUDF("between", GenericUDFBetween.class);

registerGenericUDF("ewah_bitmap_and", GenericUDFEWAHBitmapAnd.class);
registerGenericUDF("ewah_bitmap_or", GenericUDFEWAHBitmapOr.class);
Expand Down
5 changes: 5 additions & 0 deletions ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g
Original file line number Diff line number Diff line change
Expand Up @@ -2003,6 +2003,10 @@ precedenceEqualExpression
-> ^(KW_NOT ^(TOK_FUNCTION KW_IN $precedenceEqualExpression expressions))
| (KW_IN expressions)
-> ^(TOK_FUNCTION KW_IN $precedenceEqualExpression expressions)
| ( KW_NOT KW_BETWEEN (min=precedenceBitwiseOrExpression) KW_AND (max=precedenceBitwiseOrExpression) )
-> ^(TOK_FUNCTION Identifier["between"] KW_TRUE $left $min $max)
| ( KW_BETWEEN (min=precedenceBitwiseOrExpression) KW_AND (max=precedenceBitwiseOrExpression) )
-> ^(TOK_FUNCTION Identifier["between"] KW_FALSE $left $min $max)
)*
;

Expand Down Expand Up @@ -2106,6 +2110,7 @@ sysFuncNames
| KW_RLIKE
| KW_REGEXP
| KW_IN
| KW_BETWEEN
;

descFuncNames
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.hive.ql.udf.generic;

import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.BooleanWritable;

@Description(name = "between", value = "_FUNC_ a [NOT] BETWEEN b AND c - evaluate if a is [not] in between b and c")
public class GenericUDFBetween extends GenericUDF {

GenericUDFOPEqualOrGreaterThan egt = new GenericUDFOPEqualOrGreaterThan();
GenericUDFOPEqualOrLessThan elt = new GenericUDFOPEqualOrLessThan();

private ObjectInspector[] argumentOIs;
private final BooleanWritable result = new BooleanWritable();

@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
if (!arguments[0].getTypeName().equals("boolean")) {
throw new UDFArgumentTypeException(0, "First argument for BETWEEN should be boolean type");
}
egt.initialize(new ObjectInspector[] {arguments[1], arguments[2]});
elt.initialize(new ObjectInspector[] {arguments[1], arguments[3]});

argumentOIs = arguments;
return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector;
}

@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
boolean invert = (Boolean) ((PrimitiveObjectInspector) argumentOIs[0])
.getPrimitiveJavaObject(arguments[0].get());

BooleanWritable left = ((BooleanWritable)egt.evaluate(new DeferredObject[] {arguments[1], arguments[2]}));
if (left == null) {
return null;
}
if (!invert && !left.get()) {
result.set(false);
return result;
}
BooleanWritable right = ((BooleanWritable)elt.evaluate(new DeferredObject[] {arguments[1], arguments[3]}));
if (right == null) {
return null;
}
boolean between = left.get() && right.get();
result.set(invert ? !between : between);
return result;
}

@Override
public String getDisplayString(String[] children) {
StringBuilder sb = new StringBuilder();
sb.append(children[1]);
if (Boolean.valueOf(children[0])) {
sb.append(" NOT");
}
sb.append(" BETWEEN ");
sb.append(children[2]).append(" AND ").append(children[3]);
return sb.toString();
}
}
14 changes: 14 additions & 0 deletions ql/src/test/queries/clientpositive/udf_between.q
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
describe function between;
describe function extended between;

explain SELECT * FROM src where key + 100 between (150 + -50) AND (150 + 50) LIMIT 20;
SELECT * FROM src where key + 100 between (150 + -50) AND (150 + 50) LIMIT 20;

explain SELECT * FROM src where key + 100 not between (150 + -50) AND (150 + 50) LIMIT 20;
SELECT * FROM src where key + 100 not between (150 + -50) AND (150 + 50) LIMIT 20;

explain SELECT * FROM src where 'b' between 'a' AND 'c' LIMIT 1;
SELECT * FROM src where 'b' between 'a' AND 'c' LIMIT 1;

explain SELECT * FROM src where 2 between 2 AND '3' LIMIT 1;
SELECT * FROM src where 2 between 2 AND '3' LIMIT 1;
1 change: 1 addition & 0 deletions ql/src/test/results/clientpositive/show_functions.q.out
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ asin
assert_true
atan
avg
between
bin
case
ceil
Expand Down
252 changes: 252 additions & 0 deletions ql/src/test/results/clientpositive/udf_between.q.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,252 @@
PREHOOK: query: describe function between
PREHOOK: type: DESCFUNCTION
POSTHOOK: query: describe function between
POSTHOOK: type: DESCFUNCTION
between a [NOT] BETWEEN b AND c - evaluate if a is [not] in between b and c
PREHOOK: query: describe function extended between
PREHOOK: type: DESCFUNCTION
POSTHOOK: query: describe function extended between
POSTHOOK: type: DESCFUNCTION
between a [NOT] BETWEEN b AND c - evaluate if a is [not] in between b and c
PREHOOK: query: explain SELECT * FROM src where key + 100 between (150 + -50) AND (150 + 50) LIMIT 20
PREHOOK: type: QUERY
POSTHOOK: query: explain SELECT * FROM src where key + 100 between (150 + -50) AND (150 + 50) LIMIT 20
POSTHOOK: type: QUERY
ABSTRACT SYNTAX TREE:
(TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_FUNCTION between KW_FALSE (+ (TOK_TABLE_OR_COL key) 100) (+ 150 (- 50)) (+ 150 50))) (TOK_LIMIT 20)))

STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 is a root stage

STAGE PLANS:
Stage: Stage-1
Map Reduce
Alias -> Map Operator Tree:
src
TableScan
alias: src
Filter Operator
predicate:
expr: (key + 100) BETWEEN (150 + (- 50)) AND (150 + 50)
type: boolean
Select Operator
expressions:
expr: key
type: string
expr: value
type: string
outputColumnNames: _col0, _col1
Limit
File Output Operator
compressed: false
GlobalTableId: 0
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

Stage: Stage-0
Fetch Operator
limit: 20


PREHOOK: query: SELECT * FROM src where key + 100 between (150 + -50) AND (150 + 50) LIMIT 20
PREHOOK: type: QUERY
PREHOOK: Input: default@src
#### A masked pattern was here ####
POSTHOOK: query: SELECT * FROM src where key + 100 between (150 + -50) AND (150 + 50) LIMIT 20
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
86 val_86
27 val_27
98 val_98
66 val_66
37 val_37
15 val_15
82 val_82
17 val_17
0 val_0
57 val_57
20 val_20
92 val_92
47 val_47
72 val_72
4 val_4
35 val_35
54 val_54
51 val_51
65 val_65
83 val_83
PREHOOK: query: explain SELECT * FROM src where key + 100 not between (150 + -50) AND (150 + 50) LIMIT 20
PREHOOK: type: QUERY
POSTHOOK: query: explain SELECT * FROM src where key + 100 not between (150 + -50) AND (150 + 50) LIMIT 20
POSTHOOK: type: QUERY
ABSTRACT SYNTAX TREE:
(TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_FUNCTION between KW_TRUE (+ (TOK_TABLE_OR_COL key) 100) (+ 150 (- 50)) (+ 150 50))) (TOK_LIMIT 20)))

STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 is a root stage

STAGE PLANS:
Stage: Stage-1
Map Reduce
Alias -> Map Operator Tree:
src
TableScan
alias: src
Filter Operator
predicate:
expr: (key + 100) NOT BETWEEN (150 + (- 50)) AND (150 + 50)
type: boolean
Select Operator
expressions:
expr: key
type: string
expr: value
type: string
outputColumnNames: _col0, _col1
Limit
File Output Operator
compressed: false
GlobalTableId: 0
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

Stage: Stage-0
Fetch Operator
limit: 20


PREHOOK: query: SELECT * FROM src where key + 100 not between (150 + -50) AND (150 + 50) LIMIT 20
PREHOOK: type: QUERY
PREHOOK: Input: default@src
#### A masked pattern was here ####
POSTHOOK: query: SELECT * FROM src where key + 100 not between (150 + -50) AND (150 + 50) LIMIT 20
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
238 val_238
311 val_311
165 val_165
409 val_409
255 val_255
278 val_278
484 val_484
265 val_265
193 val_193
401 val_401
150 val_150
273 val_273
224 val_224
369 val_369
128 val_128
213 val_213
146 val_146
406 val_406
429 val_429
374 val_374
PREHOOK: query: explain SELECT * FROM src where 'b' between 'a' AND 'c' LIMIT 1
PREHOOK: type: QUERY
POSTHOOK: query: explain SELECT * FROM src where 'b' between 'a' AND 'c' LIMIT 1
POSTHOOK: type: QUERY
ABSTRACT SYNTAX TREE:
(TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_FUNCTION between KW_FALSE 'b' 'a' 'c')) (TOK_LIMIT 1)))

STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 is a root stage

STAGE PLANS:
Stage: Stage-1
Map Reduce
Alias -> Map Operator Tree:
src
TableScan
alias: src
Filter Operator
predicate:
expr: 'b' BETWEEN 'a' AND 'c'
type: boolean
Select Operator
expressions:
expr: key
type: string
expr: value
type: string
outputColumnNames: _col0, _col1
Limit
File Output Operator
compressed: false
GlobalTableId: 0
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

Stage: Stage-0
Fetch Operator
limit: 1


PREHOOK: query: SELECT * FROM src where 'b' between 'a' AND 'c' LIMIT 1
PREHOOK: type: QUERY
PREHOOK: Input: default@src
#### A masked pattern was here ####
POSTHOOK: query: SELECT * FROM src where 'b' between 'a' AND 'c' LIMIT 1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
238 val_238
PREHOOK: query: explain SELECT * FROM src where 2 between 2 AND '3' LIMIT 1
PREHOOK: type: QUERY
POSTHOOK: query: explain SELECT * FROM src where 2 between 2 AND '3' LIMIT 1
POSTHOOK: type: QUERY
ABSTRACT SYNTAX TREE:
(TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_FUNCTION between KW_FALSE 2 2 '3')) (TOK_LIMIT 1)))

STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 is a root stage

STAGE PLANS:
Stage: Stage-1
Map Reduce
Alias -> Map Operator Tree:
src
TableScan
alias: src
Filter Operator
predicate:
expr: 2 BETWEEN 2 AND '3'
type: boolean
Select Operator
expressions:
expr: key
type: string
expr: value
type: string
outputColumnNames: _col0, _col1
Limit
File Output Operator
compressed: false
GlobalTableId: 0
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

Stage: Stage-0
Fetch Operator
limit: 1


PREHOOK: query: SELECT * FROM src where 2 between 2 AND '3' LIMIT 1
PREHOOK: type: QUERY
PREHOOK: Input: default@src
#### A masked pattern was here ####
POSTHOOK: query: SELECT * FROM src where 2 between 2 AND '3' LIMIT 1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
238 val_238

0 comments on commit 5f354e5

Please sign in to comment.