Skip to content

Commit

Permalink
HIVE-23877: Hive on Spark incorrect partition pruning ANALYZE TABLE
Browse files Browse the repository at this point in the history
  • Loading branch information
fornaix committed Jul 23, 2020
1 parent 3ddabad commit dfa690d
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,9 @@ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
Table table = ts.getConf().getTableMetadata();
PrunedPartitionList prunedPartList = null;
try {
boolean analyzeCommand = opTraitsCtx.getParseContext().getQueryProperties().isAnalyzeCommand();
prunedPartList =
opTraitsCtx.getParseContext().getPrunedPartitions(ts.getConf().getAlias(), ts);
opTraitsCtx.getParseContext().getPrunedPartitions(ts, analyzeCommand);
} catch (HiveException e) {
prunedPartList = null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,8 @@ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
TableScanOperator tsop = (TableScanOperator) nd;
AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx;
PrunedPartitionList partList = aspCtx.getParseContext().getPrunedPartitions(tsop);
boolean analyzeCommand = aspCtx.getParseContext().getQueryProperties().isAnalyzeCommand();
PrunedPartitionList partList = aspCtx.getParseContext().getPrunedPartitions(tsop, analyzeCommand);
ColumnStatsList colStatsCached = aspCtx.getParseContext().getColStatsCached(partList);
Table table = tsop.getConf().getTableMetadata();

Expand Down
24 changes: 24 additions & 0 deletions ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

package org.apache.hadoop.hive.ql.parse;

import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Multimap;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.Context;
Expand All @@ -39,6 +40,7 @@
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.hooks.LineageInfo;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext;
Expand Down Expand Up @@ -538,6 +540,28 @@ public PrunedPartitionList getPrunedPartitions(String alias, TableScanOperator t
return partsList;
}

public PrunedPartitionList getPrunedPartitions(TableScanOperator ts, boolean analyzeCommand)
throws SemanticException {
PrunedPartitionList partsList = opToPartList.get(ts);
if (partsList != null) {
return partsList;
}
if (analyzeCommand) {
Table table = ts.getConf().getTableMetadata();
ImmutableSet<Partition> partitions;
if (table.getTableSpec().partitions == null) {
partitions = ImmutableSet.of();
} else {
partitions = ImmutableSet.copyOf(table.getTableSpec().partitions);
}
PrunedPartitionList partList = new PrunedPartitionList(table, partitions, Collections.emptyList(), false);
opToPartList.put(ts, partList);
return partList;
} else {
return getPrunedPartitions(ts);
}
}

/**
* @return the opToPartToSkewedPruner
*/
Expand Down

0 comments on commit dfa690d

Please sign in to comment.