Skip to content

Commit

Permalink
HIVE-16511: CBO looses inner casts on constants of complex type (Vine…
Browse files Browse the repository at this point in the history
…et Garg, reviewed by Ashutosh Chauhan)
  • Loading branch information
Vineet Garg committed Oct 10, 2017
1 parent 7463d67 commit 71b2a26
Show file tree
Hide file tree
Showing 14 changed files with 118 additions and 44 deletions.
1 change: 0 additions & 1 deletion itests/src/test/resources/testconfiguration.properties
Expand Up @@ -30,7 +30,6 @@ disabled.query.files=ql_rewrite_gbtoidx.q,\
cbo_rp_subq_not_in.q,\
cbo_rp_subq_exists.q,\
orc_llap.q,\
min_structvalue.q,\
ql_rewrite_gbtoidx_cbo_2.q,\
rcfile_merge1.q,\
smb_mapjoin_8.q,\
Expand Down
Expand Up @@ -2351,11 +2351,10 @@ private VectorExpression getBetweenFilterExpression(List<ExprNodeDesc> childExpr
}

private boolean isNullConst(ExprNodeDesc exprNodeDesc) {
if (exprNodeDesc instanceof ExprNodeConstantDesc) {
String typeString = exprNodeDesc.getTypeString();
if (typeString.equalsIgnoreCase("void")) {
//null constant could be typed so we need to check the value
if (exprNodeDesc instanceof ExprNodeConstantDesc &&
((ExprNodeConstantDesc) exprNodeDesc).getValue() == null) {
return true;
}
}
return false;
}
Expand Down
Expand Up @@ -19,9 +19,12 @@

import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;


import org.apache.calcite.adapter.druid.DruidQuery;
import org.apache.calcite.rel.RelFieldCollation;
Expand All @@ -40,6 +43,7 @@
import org.apache.calcite.rel.core.TableScan;
import org.apache.calcite.rel.core.Union;
import org.apache.calcite.rel.type.RelDataTypeField;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexFieldAccess;
import org.apache.calcite.rex.RexFieldCollation;
Expand Down Expand Up @@ -205,12 +209,8 @@ else if (aggregateType == Group.CUBE) {
int i = 0;

for (RexNode r : select.getChildExps()) {
if (RexUtil.isNull(r) && r.getType().getSqlTypeName() != SqlTypeName.NULL) {
// It is NULL value with different type, we need to introduce a CAST
// to keep it
r = select.getCluster().getRexBuilder().makeAbstractCast(r.getType(), r);
}
ASTNode expr = r.accept(new RexVisitor(schema, r instanceof RexLiteral));
ASTNode expr = r.accept(new RexVisitor(schema, r instanceof RexLiteral,
select.getCluster().getRexBuilder()));
String alias = select.getRowType().getFieldNames().get(i++);
ASTNode selectExpr = ASTBuilder.selectExpr(expr, alias);
b.add(selectExpr);
Expand All @@ -223,12 +223,8 @@ else if (aggregateType == Group.CUBE) {
List<ASTNode> children = new ArrayList<>();
RexCall call = (RexCall) udtf.getCall();
for (RexNode r : call.getOperands()) {
if (RexUtil.isNull(r) && r.getType().getSqlTypeName() != SqlTypeName.NULL) {
// It is NULL value with different type, we need to introduce a CAST
// to keep it
r = select.getCluster().getRexBuilder().makeAbstractCast(r.getType(), r);
}
ASTNode expr = r.accept(new RexVisitor(schema, r instanceof RexLiteral));
ASTNode expr = r.accept(new RexVisitor(schema, r instanceof RexLiteral,
select.getCluster().getRexBuilder()));
children.add(expr);
}
ASTBuilder sel = ASTBuilder.construct(HiveParser.TOK_SELEXPR, "TOK_SELEXPR");
Expand Down Expand Up @@ -460,19 +456,41 @@ public void visit(RelNode node, int ordinal, RelNode parent) {

}


static class RexVisitor extends RexVisitorImpl<ASTNode> {

private final Schema schema;
private final boolean useTypeQualInLiteral;
private final RexBuilder rexBuilder;
// this is to keep track of null literal which already has been visited
private Map<RexLiteral, Boolean> nullLiteralMap ;


protected RexVisitor(Schema schema, boolean useTypeQualInLiteral) {
this(schema, useTypeQualInLiteral, null);

}
protected RexVisitor(Schema schema) {
this(schema, false);
}

protected RexVisitor(Schema schema, boolean useTypeQualInLiteral) {
protected RexVisitor(Schema schema, boolean useTypeQualInLiteral, RexBuilder rexBuilder) {
super(true);
this.schema = schema;
this.useTypeQualInLiteral = useTypeQualInLiteral;
this.rexBuilder = rexBuilder;

this.nullLiteralMap =
new TreeMap<>(new Comparator<RexLiteral>(){
// RexLiteral's equal only consider value and type which isn't sufficient
// so providing custom comparator which distinguishes b/w objects irrespective
// of value/type
@Override
public int compare(RexLiteral o1, RexLiteral o2) {
if(o1 == o2) return 0;
else return 1;
}
});
}

@Override
Expand All @@ -497,6 +515,19 @@ public ASTNode visitInputRef(RexInputRef inputRef) {

@Override
public ASTNode visitLiteral(RexLiteral literal) {

if (RexUtil.isNull(literal) && literal.getType().getSqlTypeName() != SqlTypeName.NULL
&& rexBuilder != null) {
// It is NULL value with different type, we need to introduce a CAST
// to keep it
if(nullLiteralMap.containsKey(literal)) {
return ASTBuilder.literal(literal, useTypeQualInLiteral);
}
nullLiteralMap.put(literal, true);
RexNode r = rexBuilder.makeAbstractCast(literal.getType(), literal);

return r.accept(this);
}
return ASTBuilder.literal(literal, useTypeQualInLiteral);
}

Expand Down
Expand Up @@ -523,7 +523,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumns: [12, 0, 14]
selectExpressions: ConstantVectorExpression(val null) -> 12:bigint, VectorCoalesce(columns [13, 0])(children: ConstantVectorExpression(val null) -> 13:tinyint, col 0) -> 14:tinyint
selectExpressions: ConstantVectorExpression(val null) -> 12:bigint, VectorCoalesce(columns [13, 0])(children: ConstantVectorExpression(val null) -> 13:bigint, col 0) -> 14:bigint
Limit Vectorization:
className: VectorLimitOperator
native: true
Expand Down
Expand Up @@ -772,7 +772,7 @@ STAGE PLANS:
Execution mode: vectorized, llap
LLAP IO: all inputs
Reducer 2
Execution mode: llap
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int)
Expand Down
Expand Up @@ -5613,9 +5613,9 @@ STAGE PLANS:
native: true
projectedOutputColumns: [0, 1, 2]
Reduce Output Operator
key expressions: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (null) END (type: timestamp)
key expressions: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (CAST( null AS TIMESTAMP)) END (type: timestamp)
sort order: ++
Map-reduce partition columns: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (null) END (type: timestamp)
Map-reduce partition columns: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (CAST( null AS TIMESTAMP)) END (type: timestamp)
Reduce Sink Vectorization:
className: VectorReduceSinkMultiKeyOperator
keyColumns: [0, 5]
Expand Down Expand Up @@ -5662,13 +5662,13 @@ STAGE PLANS:
Windowing table definition
input alias: ptf_1
name: windowingtablefunction
order by: _col0 ASC NULLS FIRST, CASE WHEN ((_col0 = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (null) END ASC NULLS FIRST
partition by: _col0, CASE WHEN ((_col0 = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (null) END
order by: _col0 ASC NULLS FIRST, CASE WHEN ((_col0 = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (CAST( null AS TIMESTAMP)) END ASC NULLS FIRST
partition by: _col0, CASE WHEN ((_col0 = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (CAST( null AS TIMESTAMP)) END
raw input shape:
window functions:
window function definition
alias: rank_window_0
arguments: _col0, CASE WHEN ((_col0 = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (null) END
arguments: _col0, CASE WHEN ((_col0 = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (CAST( null AS TIMESTAMP)) END
name: rank
window function: GenericUDAFRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
Expand Down Expand Up @@ -5781,9 +5781,9 @@ STAGE PLANS:
native: true
projectedOutputColumns: [0, 1, 2]
Reduce Output Operator
key expressions: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (null) END (type: timestamp), p_name (type: string)
key expressions: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (CAST( null AS TIMESTAMP)) END (type: timestamp), p_name (type: string)
sort order: +++
Map-reduce partition columns: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (null) END (type: timestamp)
Map-reduce partition columns: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (CAST( null AS TIMESTAMP)) END (type: timestamp)
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
keyColumns: [0, 5, 1]
Expand Down Expand Up @@ -5845,7 +5845,7 @@ STAGE PLANS:
input alias: ptf_1
name: windowingtablefunction
order by: _col1 ASC NULLS FIRST
partition by: _col0, CASE WHEN ((_col0 = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (null) END
partition by: _col0, CASE WHEN ((_col0 = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (CAST( null AS TIMESTAMP)) END
raw input shape:
window functions:
window function definition
Expand Down
45 changes: 45 additions & 0 deletions ql/src/test/results/clientpositive/min_structvalue.q.out
@@ -0,0 +1,45 @@
PREHOOK: query: select max(a), min(a) FROM (select named_struct("field",1) as a union all select named_struct("field",2) as a union all select named_struct("field",cast(null as int)) as a) tmp
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
#### A masked pattern was here ####
POSTHOOK: query: select max(a), min(a) FROM (select named_struct("field",1) as a union all select named_struct("field",2) as a union all select named_struct("field",cast(null as int)) as a) tmp
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
#### A masked pattern was here ####
{"field":2} {"field":1}
PREHOOK: query: select min(a) FROM (select named_struct("field",1) as a union all select named_struct("field",-2) as a union all select named_struct("field",cast(null as int)) as a) tmp
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
#### A masked pattern was here ####
POSTHOOK: query: select min(a) FROM (select named_struct("field",1) as a union all select named_struct("field",-2) as a union all select named_struct("field",cast(null as int)) as a) tmp
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
#### A masked pattern was here ####
{"field":-2}
PREHOOK: query: select min(a) FROM (select named_struct("field",1) as a union all select named_struct("field",2) as a union all select named_struct("field",cast(5 as int)) as a) tmp
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
#### A masked pattern was here ####
POSTHOOK: query: select min(a) FROM (select named_struct("field",1) as a union all select named_struct("field",2) as a union all select named_struct("field",cast(5 as int)) as a) tmp
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
#### A masked pattern was here ####
{"field":1}
PREHOOK: query: select min(a) FROM (select named_struct("field",1, "secf", cast(null as int) ) as a union all select named_struct("field",2, "secf", 3) as a union all select named_struct("field",cast(5 as int), "secf", 4) as a) tmp
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
#### A masked pattern was here ####
POSTHOOK: query: select min(a) FROM (select named_struct("field",1, "secf", cast(null as int) ) as a union all select named_struct("field",2, "secf", 3) as a union all select named_struct("field",cast(5 as int), "secf", 4) as a) tmp
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
#### A masked pattern was here ####
{"field":1,"secf":null}
PREHOOK: query: select min(a) FROM (select named_struct("field",1, "secf", 2) as a union all select named_struct("field",-2, "secf", 3) as a union all select named_struct("field",cast(null as int), "secf", 1) as a) tmp
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
#### A masked pattern was here ####
POSTHOOK: query: select min(a) FROM (select named_struct("field",1, "secf", 2) as a union all select named_struct("field",-2, "secf", 3) as a union all select named_struct("field",cast(null as int), "secf", 1) as a) tmp
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
#### A masked pattern was here ####
{"field":-2,"secf":3}
6 changes: 3 additions & 3 deletions ql/src/test/results/clientpositive/perf/spark/query36.q.out
Expand Up @@ -215,9 +215,9 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col4, _col5, _col6
Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: (grouping(_col6, 1) + grouping(_col6, 0)) (type: int), CASE WHEN ((grouping(_col6, 0) = 0)) THEN (_col0) ELSE (null) END (type: string), (_col4 / _col5) (type: decimal(37,20))
key expressions: (grouping(_col6, 1) + grouping(_col6, 0)) (type: int), CASE WHEN ((grouping(_col6, 0) = 0)) THEN (_col0) ELSE (CAST( null AS varchar(65535))) END (type: string), (_col4 / _col5) (type: decimal(37,20))
sort order: +++
Map-reduce partition columns: (grouping(_col6, 1) + grouping(_col6, 0)) (type: int), CASE WHEN ((grouping(_col6, 0) = 0)) THEN (_col0) ELSE (null) END (type: string)
Map-reduce partition columns: (grouping(_col6, 1) + grouping(_col6, 0)) (type: int), CASE WHEN ((grouping(_col6, 0) = 0)) THEN (_col0) ELSE (CAST( null AS varchar(65535))) END (type: string)
Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string), _col1 (type: string), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: int)
Reducer 5
Expand All @@ -236,7 +236,7 @@ STAGE PLANS:
input alias: ptf_1
name: windowingtablefunction
order by: (_col4 / _col5) ASC NULLS FIRST
partition by: (grouping(_col6, 1) + grouping(_col6, 0)), CASE WHEN ((grouping(_col6, 0) = 0)) THEN (_col0) ELSE (null) END
partition by: (grouping(_col6, 1) + grouping(_col6, 0)), CASE WHEN ((grouping(_col6, 0) = 0)) THEN (_col0) ELSE (CAST( null AS varchar(65535))) END
raw input shape:
window functions:
window function definition
Expand Down
6 changes: 3 additions & 3 deletions ql/src/test/results/clientpositive/perf/spark/query70.q.out
Expand Up @@ -362,9 +362,9 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col4, _col5
Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: (grouping(_col5, 1) + grouping(_col5, 0)) (type: int), CASE WHEN ((grouping(_col5, 0) = 0)) THEN (_col0) ELSE (null) END (type: string), _col4 (type: decimal(17,2))
key expressions: (grouping(_col5, 1) + grouping(_col5, 0)) (type: int), CASE WHEN ((grouping(_col5, 0) = 0)) THEN (_col0) ELSE (CAST( null AS varchar(65535))) END (type: string), _col4 (type: decimal(17,2))
sort order: ++-
Map-reduce partition columns: (grouping(_col5, 1) + grouping(_col5, 0)) (type: int), CASE WHEN ((grouping(_col5, 0) = 0)) THEN (_col0) ELSE (null) END (type: string)
Map-reduce partition columns: (grouping(_col5, 1) + grouping(_col5, 0)) (type: int), CASE WHEN ((grouping(_col5, 0) = 0)) THEN (_col0) ELSE (CAST( null AS varchar(65535))) END (type: string)
Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: int)
Reducer 5
Expand All @@ -383,7 +383,7 @@ STAGE PLANS:
input alias: ptf_1
name: windowingtablefunction
order by: _col4 DESC NULLS LAST
partition by: (grouping(_col5, 1) + grouping(_col5, 0)), CASE WHEN ((grouping(_col5, 0) = 0)) THEN (_col0) ELSE (null) END
partition by: (grouping(_col5, 1) + grouping(_col5, 0)), CASE WHEN ((grouping(_col5, 0) = 0)) THEN (_col0) ELSE (CAST( null AS varchar(65535))) END
raw input shape:
window functions:
window function definition
Expand Down
6 changes: 3 additions & 3 deletions ql/src/test/results/clientpositive/perf/spark/query86.q.out
Expand Up @@ -173,9 +173,9 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col4, _col5
Statistics: Num rows: 261364852 Data size: 35538061226 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: (grouping(_col5, 1) + grouping(_col5, 0)) (type: int), CASE WHEN ((grouping(_col5, 0) = 0)) THEN (_col0) ELSE (null) END (type: string), _col4 (type: decimal(17,2))
key expressions: (grouping(_col5, 1) + grouping(_col5, 0)) (type: int), CASE WHEN ((grouping(_col5, 0) = 0)) THEN (_col0) ELSE (CAST( null AS varchar(65535))) END (type: string), _col4 (type: decimal(17,2))
sort order: ++-
Map-reduce partition columns: (grouping(_col5, 1) + grouping(_col5, 0)) (type: int), CASE WHEN ((grouping(_col5, 0) = 0)) THEN (_col0) ELSE (null) END (type: string)
Map-reduce partition columns: (grouping(_col5, 1) + grouping(_col5, 0)) (type: int), CASE WHEN ((grouping(_col5, 0) = 0)) THEN (_col0) ELSE (CAST( null AS varchar(65535))) END (type: string)
Statistics: Num rows: 261364852 Data size: 35538061226 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: int)
Reducer 4
Expand All @@ -194,7 +194,7 @@ STAGE PLANS:
input alias: ptf_1
name: windowingtablefunction
order by: _col4 DESC NULLS LAST
partition by: (grouping(_col5, 1) + grouping(_col5, 0)), CASE WHEN ((grouping(_col5, 0) = 0)) THEN (_col0) ELSE (null) END
partition by: (grouping(_col5, 1) + grouping(_col5, 0)), CASE WHEN ((grouping(_col5, 0) = 0)) THEN (_col0) ELSE (CAST( null AS varchar(65535))) END
raw input shape:
window functions:
window function definition
Expand Down

0 comments on commit 71b2a26

Please sign in to comment.