Skip to content

Commit

Permalink
HIVE-25856: Intermittent null ordering in plans of queries with GROUP…
Browse files Browse the repository at this point in the history
… BY and LIMIT (Stamatis Zampetakis, reviewed by Krisztian Kasa)

Remove singleton instantiation of HiveAggregateSortLimitRule, which
binds the default null ordering behavior for every subsequent query.

Before this change setting hive.default.nulls.last has no effect in the
plan. It also means that if another test/user sets this property it
will have an impact on every subsequent query.

Closes #2932
  • Loading branch information
zabetak committed Jan 12, 2022
1 parent 459f8f5 commit 587c698
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import org.apache.calcite.plan.RelOptRuleCall;
import org.apache.calcite.rel.RelFieldCollation;
import org.apache.calcite.tools.RelBuilder;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelCollation;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
Expand Down Expand Up @@ -55,29 +54,13 @@
*/
public class HiveAggregateSortLimitRule extends RelOptRule {

private static HiveAggregateSortLimitRule instance = null;

public static final HiveAggregateSortLimitRule getInstance(HiveConf hiveConf) {
if (instance == null) {
RelFieldCollation.NullDirection defaultAscNullDirection;
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_DEFAULT_NULLS_LAST)) {
defaultAscNullDirection = RelFieldCollation.NullDirection.LAST;
} else {
defaultAscNullDirection = RelFieldCollation.NullDirection.FIRST;
}
instance = new HiveAggregateSortLimitRule(defaultAscNullDirection);
}

return instance;
}

private final RelFieldCollation.NullDirection defaultAscNullDirection;


private HiveAggregateSortLimitRule(RelFieldCollation.NullDirection defaultAscNullDirection) {
public HiveAggregateSortLimitRule(boolean nullsLast) {
super(operand(HiveSortLimit.class, operand(HiveAggregate.class, any())),
HiveRelFactories.HIVE_BUILDER, "HiveAggregateSortRule");
this.defaultAscNullDirection = defaultAscNullDirection;
this.defaultAscNullDirection =
nullsLast ? RelFieldCollation.NullDirection.LAST : RelFieldCollation.NullDirection.FIRST;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2209,10 +2209,13 @@ private RelNode applyPostJoinOrderingTransform(RelNode basePlan, RelMetadataProv

// 1. Run other optimizations that do not need stats
generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
ProjectRemoveRule.Config.DEFAULT.toRule(), HiveUnionMergeRule.INSTANCE,
ProjectRemoveRule.Config.DEFAULT.toRule(),
HiveUnionMergeRule.INSTANCE,
new HiveUnionSimpleSelectsToInlineTableRule(dummyTableScan),
HiveAggregateProjectMergeRule.INSTANCE, HiveProjectMergeRule.INSTANCE_NO_FORCE,
HiveJoinCommuteRule.INSTANCE, HiveAggregateSortLimitRule.getInstance(conf));
HiveAggregateProjectMergeRule.INSTANCE,
HiveProjectMergeRule.INSTANCE_NO_FORCE,
HiveJoinCommuteRule.INSTANCE,
new HiveAggregateSortLimitRule(conf.getBoolVar(ConfVars.HIVE_DEFAULT_NULLS_LAST)));

// 2. Run aggregate-join transpose (cost based)
// If it failed because of missing stats, we continue with
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
CREATE TABLE person (id INTEGER, country STRING);
SET hive.default.nulls.last=false;
EXPLAIN CBO SELECT country, count(1) FROM person GROUP BY country LIMIT 5;
SET hive.default.nulls.last=true;
EXPLAIN CBO SELECT country, count(1) FROM person GROUP BY country LIMIT 5;
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
PREHOOK: query: CREATE TABLE person (id INTEGER, country STRING)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@person
POSTHOOK: query: CREATE TABLE person (id INTEGER, country STRING)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@person
PREHOOK: query: EXPLAIN CBO SELECT country, count(1) FROM person GROUP BY country LIMIT 5
PREHOOK: type: QUERY
PREHOOK: Input: default@person
#### A masked pattern was here ####
POSTHOOK: query: EXPLAIN CBO SELECT country, count(1) FROM person GROUP BY country LIMIT 5
POSTHOOK: type: QUERY
POSTHOOK: Input: default@person
#### A masked pattern was here ####
CBO PLAN:
HiveSortLimit(sort0=[$1], dir0=[ASC-nulls-first], fetch=[5])
HiveProject(country=[$0], $f1=[$1])
HiveAggregate(group=[{1}], agg#0=[count()])
HiveTableScan(table=[[default, person]], table:alias=[person])

PREHOOK: query: EXPLAIN CBO SELECT country, count(1) FROM person GROUP BY country LIMIT 5
PREHOOK: type: QUERY
PREHOOK: Input: default@person
#### A masked pattern was here ####
POSTHOOK: query: EXPLAIN CBO SELECT country, count(1) FROM person GROUP BY country LIMIT 5
POSTHOOK: type: QUERY
POSTHOOK: Input: default@person
#### A masked pattern was here ####
CBO PLAN:
HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[5])
HiveProject(country=[$0], $f1=[$1])
HiveAggregate(group=[{1}], agg#0=[count()])
HiveTableScan(table=[[default, person]], table:alias=[person])

0 comments on commit 587c698

Please sign in to comment.