Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@
import org.apache.hadoop.hive.ql.ddl.DDLDescWithTableProperties;
import org.apache.hadoop.hive.ql.ddl.DDLWork;
import org.apache.hadoop.hive.ql.ddl.misc.hooks.InsertCommitHookDesc;
import org.apache.hadoop.hive.ql.ddl.table.create.CreateTableAnalyzer;
import org.apache.hadoop.hive.ql.ddl.table.create.CreateTableDesc;
import org.apache.hadoop.hive.ql.ddl.table.misc.preinsert.PreInsertTableDesc;
import org.apache.hadoop.hive.ql.ddl.table.misc.properties.AlterTableUnsetPropertiesDesc;
Expand Down Expand Up @@ -1568,7 +1569,7 @@ Table materializeCTE(String cteName, CTEClause cte) throws HiveException {
createTable.addChild(temporary);
createTable.addChild(cte.cteNode);

SemanticAnalyzer analyzer = new SemanticAnalyzer(queryState);
CreateTableAnalyzer analyzer = new CreateTableAnalyzer(queryState);
analyzer.initCtx(ctx);
analyzer.init(false);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -493,4 +493,25 @@ private void checkTablesUsed(String query, Set<String> tables) throws Exception

Assert.assertEquals(new TreeSet<>(tables), new TreeSet<>(result));
}

@Test
public void testMaterializeCTEWithCBODisabled() throws Exception {
HiveConf testConf = new HiveConf(conf);
testConf.setBoolVar(HiveConf.ConfVars.HIVE_CBO_ENABLED, false);
testConf.setIntVar(HiveConf.ConfVars.HIVE_CTE_MATERIALIZE_THRESHOLD, 2);

SessionState.start(testConf);
Context ctx = new Context(testConf);

String query = "WITH cte AS (SELECT COUNT(*) as cnt FROM table1) " +
"SELECT * FROM cte UNION ALL SELECT * FROM cte";

ASTNode astNode = ParseUtils.parse(query, ctx);
QueryState queryState = new QueryState.Builder().withHiveConf(testConf).build();
BaseSemanticAnalyzer analyzer = SemanticAnalyzerFactory.get(queryState, astNode);
analyzer.initCtx(ctx);

// This should not throw NPE after the fix
analyzer.analyze(astNode, ctx);
}
}
16 changes: 16 additions & 0 deletions ql/src/test/queries/clientpositive/cte_materialize_no_cbo.q
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
-- HIVE-28724 regression: SemanticAnalyzer.materializeCTE uses wrong analyzer class
-- CalcitePlanner.materializeCTE was fixed to use CreateTableAnalyzer,
-- but SemanticAnalyzer.materializeCTE still uses SemanticAnalyzer directly.
-- Bug triggers when CBO is disabled

set hive.cbo.enable=false;

explain
WITH cte AS (
SELECT COUNT(*) as cnt FROM (SELECT 1 as id) t
)
SELECT * FROM cte
UNION ALL
SELECT * FROM cte
UNION ALL
SELECT * FROM cte;
158 changes: 158 additions & 0 deletions ql/src/test/results/clientpositive/llap/cte_materialize_no_cbo.q.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
PREHOOK: query: explain
WITH cte AS (
SELECT COUNT(*) as cnt FROM (SELECT 1 as id) t
)
SELECT * FROM cte
UNION ALL
SELECT * FROM cte
UNION ALL
SELECT * FROM cte
PREHOOK: type: QUERY
PREHOOK: Input: default@cte
#### A masked pattern was here ####
POSTHOOK: query: explain
WITH cte AS (
SELECT COUNT(*) as cnt FROM (SELECT 1 as id) t
)
SELECT * FROM cte
UNION ALL
SELECT * FROM cte
UNION ALL
SELECT * FROM cte
POSTHOOK: type: QUERY
POSTHOOK: Input: default@cte
#### A masked pattern was here ####
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
Stage-4 depends on stages: Stage-2, Stage-0
Stage-0 depends on stages: Stage-1
Stage-3 depends on stages: Stage-4

STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: _dummy_table
Row Limit Per Split: 1
Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
minReductionHashAggr: 0.4
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
null sort order:
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: no inputs
Reducer 2
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.cte

Stage: Stage-2
Dependency Collection

Stage: Stage-4
Tez
#### A masked pattern was here ####
Edges:
Map 3 <- Union 4 (CONTAINS)
Map 5 <- Union 4 (CONTAINS)
Map 6 <- Union 4 (CONTAINS)
#### A masked pattern was here ####
Vertices:
Map 3
Map Operator Tree:
TableScan
alias: cte
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: cnt (type: bigint)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
Map 5
Map Operator Tree:
TableScan
alias: cte
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: cnt (type: bigint)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
Map 6
Map Operator Tree:
TableScan
alias: cte
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: cnt (type: bigint)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
Union 4
Vertex: Union 4

Stage: Stage-0
Move Operator
files:
hdfs directory: true
#### A masked pattern was here ####

Stage: Stage-3
Fetch Operator
limit: -1
Processor Tree:
ListSink

Loading