From 725984d1804ee6cc30ea59307e25ee5b4dcf58b3 Mon Sep 17 00:00:00 2001 From: Stamatis Zampetakis Date: Thu, 6 Jun 2024 09:20:27 +0200 Subject: [PATCH 1/2] Disable hive.optimize.join.disjunctive.transitive.predicates.pushdown by default --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 173d8efcdf5f..576a06dab364 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2575,7 +2575,7 @@ public static enum ConfVars { "and the original filter is removed. If this config is false, the original filter \n" + "is also left in the operator tree at the original place."), HIVE_JOIN_DISJ_TRANSITIVE_PREDICATES_PUSHDOWN("hive.optimize.join.disjunctive.transitive.predicates.pushdown", - true, "Whether to transitively infer disjunctive predicates across joins. \n" + false, "Whether to transitively infer disjunctive predicates across joins. \n" + "Disjunctive predicates are hard to simplify and pushing them down might lead to infinite rule matching " + "causing stackoverflow and OOM errors"), HIVE_POINT_LOOKUP_OPTIMIZER("hive.optimize.point.lookup", true, From 38f92b03b5a6999faf929be41953292ccffb8c32 Mon Sep 17 00:00:00 2001 From: Stamatis Zampetakis Date: Mon, 10 Jun 2024 11:21:24 +0200 Subject: [PATCH 2/2] Update affected plans All these are expected regressions highlighting the impact that this change may have on actual queries. Observe that in terms of simplifications there are difficulties in the presence of negation/disjunction and the HiveJoinPushTransitivePredicatesRule. (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) > 100.0D)) is equivalent to UDFToDouble(key) > 100.0D Some predicates that were transitively propagated were redundant (see plans before). --- .../llap/correlationoptimizer8.q.out | 134 +++++++++--------- .../results/clientpositive/llap/join34.q.out | 60 ++++---- .../results/clientpositive/llap/join35.q.out | 66 ++++----- .../llap/materialized_view_rewrite_7.q.out | 10 +- .../materialized_view_rewrite_by_text_9.q.out | 2 +- 5 files changed, 136 insertions(+), 136 deletions(-) diff --git a/ql/src/test/results/clientpositive/llap/correlationoptimizer8.q.out b/ql/src/test/results/clientpositive/llap/correlationoptimizer8.q.out index 0565c11698ce..ccc701109235 100644 --- a/ql/src/test/results/clientpositive/llap/correlationoptimizer8.q.out +++ b/ql/src/test/results/clientpositive/llap/correlationoptimizer8.q.out @@ -40,24 +40,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) < 20.0D)) (type: boolean) + filterExpr: (UDFToDouble(key) < 20.0D) (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) < 20.0D)) (type: boolean) - Statistics: Num rows: 148 Data size: 12876 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (UDFToDouble(key) < 20.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: string) minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -65,24 +65,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x1 - filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) > 100.0D)) (type: boolean) + filterExpr: (UDFToDouble(key) > 100.0D) (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) > 100.0D)) (type: boolean) - Statistics: Num rows: 148 Data size: 12876 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (UDFToDouble(key) > 100.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: string) minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -90,21 +90,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) + filterExpr: key is not null (type: boolean) Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) - Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -116,13 +116,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 296 Data size: 28120 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 332 Data size: 31540 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 4 Execution mode: llap @@ -134,14 +134,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 9150 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col3 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 9150 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 9150 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -154,13 +154,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 296 Data size: 28120 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 332 Data size: 31540 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Union 3 Vertex: Union 3 @@ -248,24 +248,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) < 20.0D)) (type: boolean) + filterExpr: (UDFToDouble(key) < 20.0D) (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) < 20.0D)) (type: boolean) - Statistics: Num rows: 148 Data size: 12876 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (UDFToDouble(key) < 20.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: string) minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -273,24 +273,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x1 - filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) > 100.0D)) (type: boolean) + filterExpr: (UDFToDouble(key) > 100.0D) (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) > 100.0D)) (type: boolean) - Statistics: Num rows: 148 Data size: 12876 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (UDFToDouble(key) > 100.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: string) minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -298,21 +298,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) + filterExpr: key is not null (type: boolean) Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) - Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -324,13 +324,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 296 Data size: 28120 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 332 Data size: 31540 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 4 Execution mode: llap @@ -342,14 +342,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 9150 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col3 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 9150 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 9150 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -362,13 +362,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 296 Data size: 28120 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 332 Data size: 31540 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Union 3 Vertex: Union 3 @@ -896,24 +896,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) < 20.0D)) (type: boolean) + filterExpr: (UDFToDouble(key) < 20.0D) (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) < 20.0D)) (type: boolean) - Statistics: Num rows: 148 Data size: 12876 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (UDFToDouble(key) < 20.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: string) minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -921,24 +921,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x1 - filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) > 100.0D)) (type: boolean) + filterExpr: (UDFToDouble(key) > 100.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) > 100.0D)) (type: boolean) - Statistics: Num rows: 148 Data size: 26344 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (UDFToDouble(key) > 100.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: string), value (type: string) minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 148 Data size: 27528 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 30876 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 148 Data size: 27528 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 30876 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -946,21 +946,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) + filterExpr: key is not null (type: boolean) Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) - Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -972,13 +972,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 296 Data size: 28120 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 332 Data size: 31540 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 4 Execution mode: llap @@ -990,14 +990,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 9150 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col3 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 9150 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 9150 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1010,17 +1010,17 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 148 Data size: 27528 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 30876 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 296 Data size: 28120 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 332 Data size: 31540 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/join34.q.out b/ql/src/test/results/clientpositive/llap/join34.q.out index 2da4893eae51..98ffe78c779b 100644 --- a/ql/src/test/results/clientpositive/llap/join34.q.out +++ b/ql/src/test/results/clientpositive/llap/join34.q.out @@ -36,14 +36,14 @@ OPTIMIZED SQL: SELECT `t6`.`key`, `t6`.`value`, `t4`.`value` AS `value1` FROM (SELECT * FROM (SELECT `key`, `value` FROM `default`.`src` -WHERE CAST(`key` AS DOUBLE) BETWEEN 20 AND 100 AND `key` < 20 +WHERE `key` < 20 UNION ALL SELECT `key`, `value` FROM `default`.`src` -WHERE CAST(`key` AS DOUBLE) BETWEEN 20 AND 100 AND `key` > 100) AS `t3`) AS `t4` +WHERE `key` > 100) AS `t3`) AS `t4` INNER JOIN (SELECT `key`, `value` FROM `default`.`src1` -WHERE CAST(`key` AS DOUBLE) BETWEEN 20 AND 100 AND `key` IS NOT NULL) AS `t6` ON `t4`.`key` = `t6`.`key` +WHERE `key` IS NOT NULL) AS `t6` ON `t4`.`key` = `t6`.`key` STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -64,22 +64,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) < 20.0D)) (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_40_container, bigKeyColName:key, smallTablePos:1, keyRatio:0.092 + filterExpr: (UDFToDouble(key) < 20.0D) (type: boolean) + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_40_container, bigKeyColName:key, smallTablePos:1, keyRatio:0.1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) < 20.0D)) (type: boolean) - Statistics: Num rows: 148 Data size: 26344 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (UDFToDouble(key) < 20.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 148 Data size: 26344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 5 => 23 + Estimated key counts: Map 5 => 25 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -87,18 +87,18 @@ STAGE PLANS: input vertices: 1 Map 5 Position of Big Table: 0 - Statistics: Num rows: 46 Data size: 12236 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 13300 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col3 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 46 Data size: 12236 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 13300 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 46 Data size: 12236 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 13300 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -121,10 +121,10 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: key, value, val2 - Statistics: Num rows: 46 Data size: 12236 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 13300 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector_hll(key), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector_hll(value), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector_hll(val2) - minReductionHashAggr: 0.9782609 + minReductionHashAggr: 0.98 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE @@ -180,21 +180,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x1 - filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) > 100.0D)) (type: boolean) + filterExpr: (UDFToDouble(key) > 100.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) > 100.0D)) (type: boolean) - Statistics: Num rows: 148 Data size: 26344 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (UDFToDouble(key) > 100.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 148 Data size: 26344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 5 => 23 + Estimated key counts: Map 5 => 25 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -202,18 +202,18 @@ STAGE PLANS: input vertices: 1 Map 5 Position of Big Table: 0 - Statistics: Num rows: 46 Data size: 12236 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 13300 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col3 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 46 Data size: 12236 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 13300 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 46 Data size: 12236 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 13300 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -236,10 +236,10 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: key, value, val2 - Statistics: Num rows: 46 Data size: 12236 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 13300 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector_hll(key), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector_hll(value), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector_hll(val2) - minReductionHashAggr: 0.9782609 + minReductionHashAggr: 0.98 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE @@ -295,17 +295,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) + filterExpr: key is not null (type: boolean) Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) - Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string) @@ -313,7 +313,7 @@ STAGE PLANS: numBuckets: -1 sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col1 (type: string) auto parallelism: true @@ -324,7 +324,7 @@ STAGE PLANS: numBuckets: -1 sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col1 (type: string) auto parallelism: true diff --git a/ql/src/test/results/clientpositive/llap/join35.q.out b/ql/src/test/results/clientpositive/llap/join35.q.out index 41040633cc90..a4fadbbb70eb 100644 --- a/ql/src/test/results/clientpositive/llap/join35.q.out +++ b/ql/src/test/results/clientpositive/llap/join35.q.out @@ -36,16 +36,16 @@ OPTIMIZED SQL: SELECT `t8`.`key`, `t8`.`value`, `t6`.`$f1` AS `cnt` FROM (SELECT * FROM (SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`src` -WHERE CAST(`key` AS DOUBLE) BETWEEN 20 AND 100 AND `key` < 20 +WHERE `key` < 20 GROUP BY `key` UNION ALL SELECT `key`, COUNT(*) AS `$f1` FROM `default`.`src` -WHERE CAST(`key` AS DOUBLE) BETWEEN 20 AND 100 AND `key` > 100 +WHERE `key` > 100 GROUP BY `key`) AS `t5`) AS `t6` INNER JOIN (SELECT `key`, `value` FROM `default`.`src1` -WHERE CAST(`key` AS DOUBLE) BETWEEN 20 AND 100 AND `key` IS NOT NULL) AS `t8` ON `t6`.`key` = `t8`.`key` +WHERE `key` IS NOT NULL) AS `t8` ON `t6`.`key` = `t8`.`key` STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -66,20 +66,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) < 20.0D)) (type: boolean) + filterExpr: (UDFToDouble(key) < 20.0D) (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) < 20.0D)) (type: boolean) - Statistics: Num rows: 148 Data size: 12876 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (UDFToDouble(key) < 20.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: string) minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string) @@ -87,7 +87,7 @@ STAGE PLANS: numBuckets: -1 sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: true @@ -134,20 +134,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x1 - filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) > 100.0D)) (type: boolean) + filterExpr: (UDFToDouble(key) > 100.0D) (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and (UDFToDouble(key) > 100.0D)) (type: boolean) - Statistics: Num rows: 148 Data size: 12876 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (UDFToDouble(key) > 100.0D) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: string) minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string) @@ -155,7 +155,7 @@ STAGE PLANS: numBuckets: -1 sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: true @@ -202,17 +202,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) + filterExpr: key is not null (type: boolean) Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) - Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string) @@ -220,7 +220,7 @@ STAGE PLANS: numBuckets: -1 sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col1 (type: string) auto parallelism: true @@ -231,7 +231,7 @@ STAGE PLANS: numBuckets: -1 sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col1 (type: string) auto parallelism: true @@ -283,11 +283,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 7 => 23 + Estimated key counts: Map 7 => 25 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -295,18 +295,18 @@ STAGE PLANS: input vertices: 1 Map 7 Position of Big Table: 0 - Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 9150 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col3 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 46 Data size: 8234 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 8950 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 46 Data size: 8234 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 8950 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -329,10 +329,10 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) outputColumnNames: key, value, val2 - Statistics: Num rows: 46 Data size: 8234 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 8950 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector_hll(key), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector_hll(value), min(val2), max(val2), count(val2), compute_bit_vector_hll(val2) - minReductionHashAggr: 0.9782609 + minReductionHashAggr: 0.98 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE @@ -391,11 +391,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 7 => 23 + Estimated key counts: Map 7 => 25 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -403,18 +403,18 @@ STAGE PLANS: input vertices: 1 Map 7 Position of Big Table: 0 - Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 9150 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col3 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 46 Data size: 8234 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 8950 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 46 Data size: 8234 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 8950 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -437,10 +437,10 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) outputColumnNames: key, value, val2 - Statistics: Num rows: 46 Data size: 8234 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 8950 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector_hll(key), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector_hll(value), min(val2), max(val2), count(val2), compute_bit_vector_hll(val2) - minReductionHashAggr: 0.9782609 + minReductionHashAggr: 0.98 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out index 8da9a81d7cca..afa816a031fd 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out @@ -605,21 +605,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: depts_n6 - filterExpr: ((deptno > 10) and (deptno < 20) and ((deptno <= 11) or (deptno >= 19)) and name is not null) (type: boolean) + filterExpr: ((deptno > 10) and (deptno < 20) and name is not null) (type: boolean) Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((deptno > 10) and (deptno < 20) and ((deptno <= 11) or (deptno >= 19)) and name is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((deptno > 10) and (deptno < 20) and name is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: deptno (type: int), name (type: varchar(256)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: varchar(256)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_by_text_9.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_by_text_9.q.out index 52ce097a039f..5ce787659f1c 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_by_text_9.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_by_text_9.q.out @@ -79,7 +79,7 @@ POSTHOOK: Input: default@t2 CBO PLAN: HiveSemiJoin(condition=[=($0, $1)], joinType=[semi]) HiveProject(col0=[$0]) - HiveFilter(condition=[IN($0, 1, 2)]) + HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, t2]], table:alias=[t2]) HiveProject($f0=[$0]) HiveUnion(all=[true])