Skip to content

Commit

Permalink
Merge #28393 #28411
Browse files Browse the repository at this point in the history
28393: opt: support DISTINCT for aggregates + simplification rules r=RaduBerinde a=RaduBerinde

#### opt: support DISTINCT for aggregates

Adding optbuilder and execbuilder support for DISTINCT aggregate
arguments. We use a new `AggDistinct` operator which acts as a
modifier that wraps the Variable argument.

Note that DISTINCT does not affect any existing normalization rules;
e.g. it does not change whether an aggregation ignores nulls. The
existing rules should work just the same; added a few testcases.

Release note: None

#### opt: remove DISTINCT from aggregations for which it makes no difference

Some aggregations, like `min`, are unaffected by `DISTINCT`. Add
normalization rule to remove `AggDistinct` in these cases.

Release note: None

#### opt: remove DISTINCT when argument is unique within group

This change introduces a rule that removes `AggDistinct` if we can
prove that there are no duplicate input values within a group (i.e.
the input column plus the grouping columns form a key).

Release note: None

28411: sql: disallow specifying both FORCE_INDEX and NO_INDEX_JOIN r=RaduBerinde a=RaduBerinde

The current code allows both `FORCE_INDEX` and `NO_INDEX_JOIN`
"hints", and the query fails if the index we force requires
index-join.

This semantic would be tricky to support with the optimizer (it would
be a "no-plans" condition during exploration).

This commit changes the behavior to disallow setting both hints at the
same time. The code to combine and validate `IndexHints` is moved in a
method; the code is kept general even though with this change it's
never valid to have more than one parameter.

Release note (sql change): It is now an error to specify both
FORCE_INDEX and NO_INDEX_JOIN hints at the same time.

Co-authored-by: Radu Berinde <radu@cockroachlabs.com>
  • Loading branch information
craig[bot] and RaduBerinde committed Aug 10, 2018
3 parents 2f2467c + 988ab07 + 1669169 commit 131f4b0
Show file tree
Hide file tree
Showing 39 changed files with 963 additions and 235 deletions.
12 changes: 6 additions & 6 deletions docs/generated/sql/bnf/stmt_block.bnf
Original file line number Diff line number Diff line change
Expand Up @@ -1710,7 +1710,7 @@ distinct_on_clause ::=
'DISTINCT' 'ON' '(' expr_list ')'

table_ref ::=
relation_expr opt_index_hints opt_ordinality opt_alias_clause
relation_expr opt_index_flags opt_ordinality opt_alias_clause
| select_with_parens opt_ordinality opt_alias_clause
| joined_table
| '(' joined_table ')' opt_ordinality alias_clause
Expand Down Expand Up @@ -1915,10 +1915,10 @@ from_list ::=
window_definition_list ::=
( window_definition ) ( ( ',' window_definition ) )*

opt_index_hints ::=
opt_index_flags ::=
'@' index_name
| '@' '[' iconst64 ']'
| '@' '{' index_hints_param_list '}'
| '@' '{' index_flags_param_list '}'
|

opt_ordinality ::=
Expand Down Expand Up @@ -2058,8 +2058,8 @@ tuple1_unambiguous_values ::=
window_definition ::=
window_name 'AS' window_specification

index_hints_param_list ::=
( index_hints_param ) ( ( ',' index_hints_param ) )*
index_flags_param_list ::=
( index_flags_param ) ( ( ',' index_flags_param ) )*

join_type ::=
'FULL' join_outer
Expand Down Expand Up @@ -2135,7 +2135,7 @@ trim_list ::=
| 'FROM' expr_list
| expr_list

index_hints_param ::=
index_flags_param ::=
'FORCE_INDEX' '=' index_name
| 'NO_INDEX_JOIN'

Expand Down
2 changes: 1 addition & 1 deletion docs/generated/sql/bnf/table_ref.bnf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
table_ref ::=
table_name ( '@' scan_parameters | ) ( 'WITH' 'ORDINALITY' | ) ( ( 'AS' table_alias_name ( '(' ( ( name ) ( ( ',' name ) )* ) ')' | ) | table_alias_name ( '(' ( ( name ) ( ( ',' name ) )* ) ')' | ) ) | )
table_name opt_index_flags ( 'WITH' 'ORDINALITY' | ) ( ( 'AS' table_alias_name ( '(' ( ( name ) ( ( ',' name ) )* ) ')' | ) | table_alias_name ( '(' ( ( name ) ( ( ',' name ) )* ) ')' | ) ) | )
| '(' select_stmt ')' ( 'WITH' 'ORDINALITY' | ) ( ( 'AS' table_alias_name ( '(' ( ( name ) ( ( ',' name ) )* ) ')' | ) | table_alias_name ( '(' ( ( name ) ( ( ',' name ) )* ) ')' | ) ) | )
| joined_table
| '(' joined_table ')' ( 'WITH' 'ORDINALITY' | ) ( ( 'AS' table_alias_name ( '(' ( ( name ) ( ( ',' name ) )* ) ')' | ) | table_alias_name ( '(' ( ( name ) ( ( ',' name ) )* ) ')' | ) ) | )
Expand Down
28 changes: 17 additions & 11 deletions pkg/sql/data_source.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,10 @@ func (p *planner) getVirtualDataSource(
// getDataSource builds a planDataSource from a single data source clause
// (TableExpr) in a SelectClause.
func (p *planner) getDataSource(
ctx context.Context, src tree.TableExpr, hints *tree.IndexHints, scanVisibility scanVisibility,
ctx context.Context,
src tree.TableExpr,
indexFlags *tree.IndexFlags,
scanVisibility scanVisibility,
) (planDataSource, error) {
switch t := src.(type) {
case *tree.NormalizableTableName:
Expand All @@ -124,7 +127,7 @@ func (p *planner) getDataSource(
}

colCfg := scanColumnsConfig{visibility: scanVisibility}
return p.getPlanForDesc(ctx, desc, tn, hints, colCfg)
return p.getPlanForDesc(ctx, desc, tn, indexFlags, colCfg)

case *tree.RowsFromExpr:
return p.getPlanForRowsFrom(ctx, t.Items...)
Expand Down Expand Up @@ -160,19 +163,19 @@ func (p *planner) getDataSource(
}, nil

case *tree.ParenTableExpr:
return p.getDataSource(ctx, t.Expr, hints, scanVisibility)
return p.getDataSource(ctx, t.Expr, indexFlags, scanVisibility)

case *tree.TableRef:
return p.getTableScanByRef(ctx, t, hints, scanVisibility)
return p.getTableScanByRef(ctx, t, indexFlags, scanVisibility)

case *tree.AliasedTableExpr:
// Alias clause: source AS alias(cols...)

if t.Hints != nil {
hints = t.Hints
if t.IndexFlags != nil {
indexFlags = t.IndexFlags
}

src, err := p.getDataSource(ctx, t.Expr, hints, scanVisibility)
src, err := p.getDataSource(ctx, t.Expr, indexFlags, scanVisibility)
if err != nil {
return src, err
}
Expand Down Expand Up @@ -219,7 +222,10 @@ func (p *planner) getTableDescByID(
}

func (p *planner) getTableScanByRef(
ctx context.Context, tref *tree.TableRef, hints *tree.IndexHints, scanVisibility scanVisibility,
ctx context.Context,
tref *tree.TableRef,
indexFlags *tree.IndexFlags,
scanVisibility scanVisibility,
) (planDataSource, error) {
desc, err := p.getTableDescByID(ctx, sqlbase.ID(tref.TableID))
if err != nil {
Expand All @@ -246,7 +252,7 @@ func (p *planner) getTableScanByRef(
addUnwantedAsHidden: true,
visibility: scanVisibility,
}
src, err := p.getPlanForDesc(ctx, desc, &tn, hints, colCfg)
src, err := p.getPlanForDesc(ctx, desc, &tn, indexFlags, colCfg)
if err != nil {
return src, err
}
Expand Down Expand Up @@ -316,7 +322,7 @@ func (p *planner) getPlanForDesc(
ctx context.Context,
desc *sqlbase.TableDescriptor,
tn *tree.TableName,
hints *tree.IndexHints,
indexFlags *tree.IndexFlags,
colCfg scanColumnsConfig,
) (planDataSource, error) {
if desc.IsView() {
Expand All @@ -336,7 +342,7 @@ func (p *planner) getPlanForDesc(

// This name designates a real table.
scan := p.Scan()
if err := scan.initTable(ctx, p, desc, hints, colCfg); err != nil {
if err := scan.initTable(ctx, p, desc, indexFlags, colCfg); err != nil {
return planDataSource{}, err
}

Expand Down
3 changes: 0 additions & 3 deletions pkg/sql/logictest/testdata/logic_test/select_index_hints
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,3 @@ SELECT * FROM abcd@badidx

query error index \"badidx\" not found
SELECT * FROM abcd@{FORCE_INDEX=badidx}

query error index \"cd\" is not covering and NO_INDEX_JOIN was specified
SELECT b, c, d FROM abcd@{FORCE_INDEX=cd,NO_INDEX_JOIN} WHERE c = 10
6 changes: 3 additions & 3 deletions pkg/sql/logictest/testdata/planner_test/distsql_misc
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ query T
SELECT url FROM [EXPLAIN (DISTSQL)
SELECT leftside.v, leftside.k, leftside.data, rightside.v, rightside.k, rightside.data
FROM
(SELECT v,k,data FROM test@{FORCE_INDEX=[1],NO_INDEX_JOIN} ORDER BY v,k,data) AS leftside
(SELECT v,k,data FROM test@{FORCE_INDEX=[1]} ORDER BY v,k,data) AS leftside
FULL OUTER JOIN
(SELECT v,k,data FROM test@{FORCE_INDEX=[2],NO_INDEX_JOIN} ORDER BY v,k,data) AS rightside
(SELECT v,k,data FROM test@{FORCE_INDEX=[2]} ORDER BY v,k,data) AS rightside
ON leftside.v = rightside.v AND leftside.k = rightside.k AND leftside.data = rightside.data
WHERE (leftside.k IS NULL) OR
(rightside.k IS NULL)
Expand Down Expand Up @@ -44,7 +44,7 @@ SELECT url FROM [EXPLAIN (DISTSQL)
FROM
(SELECT child_id, id, id2 FROM child@{NO_INDEX_JOIN} ORDER BY id, id2) AS p
FULL OUTER JOIN
(SELECT id, id2 FROM parent@{FORCE_INDEX=[2],NO_INDEX_JOIN} ORDER BY id, id2) AS c
(SELECT id, id2 FROM parent@{FORCE_INDEX=[2]} ORDER BY id, id2) AS c
ON p.id = c.id AND p.id2 = c.id2
WHERE (p.id IS NOT NULL OR p.id2 IS NOT NULL) AND
c.id IS NULL AND c.id2 IS NULL
Expand Down
5 changes: 0 additions & 5 deletions pkg/sql/logictest/testdata/planner_test/planning_errors
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,5 @@
# Check that plans that fail during expansion/optimization do not cause
# memory leaks. #17274

statement error index "aa" is not covering
CREATE TABLE kv(k INT, v INT);
CREATE INDEX aa ON kv(v);
SELECT k FROM [SHOW JOBS], kv@{FORCE_INDEX=aa,NO_INDEX_JOIN};

statement error pgcode 42P01 relation "nonexistent" does not exist
SELECT * FROM [SHOW JOBS], [SHOW CREATE TABLE nonexistent];
6 changes: 2 additions & 4 deletions pkg/sql/logictest/testdata/planner_test/select_index_hints
Original file line number Diff line number Diff line change
Expand Up @@ -149,19 +149,17 @@ render · ·
· spans ALL

query TTT
EXPLAIN SELECT b, c, d FROM abcd@{FORCE_INDEX=bcd,NO_INDEX_JOIN} WHERE c = 10
EXPLAIN SELECT b, c, d FROM abcd@{FORCE_INDEX=bcd} WHERE c = 10
----
render · ·
└── scan · ·
· table abcd@bcd
· hint no index join
· spans ALL

query TTT
EXPLAIN SELECT b, c, d FROM abcd@{FORCE_INDEX=primary,NO_INDEX_JOIN} WHERE c = 10
EXPLAIN SELECT b, c, d FROM abcd@{FORCE_INDEX=primary} WHERE c = 10
----
render · ·
└── scan · ·
· table abcd@primary
· hint no index join
· spans ALL
7 changes: 7 additions & 0 deletions pkg/sql/opt/exec/execbuilder/relational_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -505,9 +505,15 @@ func (b *Builder) buildGroupBy(ev memo.ExprView) (execPlan, error) {
fn := aggregations.Child(i)
name, overload := memo.FindAggregateOverload(fn)

distinct := false
argIdx := make([]exec.ColumnOrdinal, fn.ChildCount())
for j := range argIdx {
child := fn.Child(j)

if child.Operator() == opt.AggDistinctOp {
distinct = true
child = child.Child(0)
}
if child.Operator() != opt.VariableOp {
return execPlan{}, errors.Errorf("only VariableOp args supported")
}
Expand All @@ -518,6 +524,7 @@ func (b *Builder) buildGroupBy(ev memo.ExprView) (execPlan, error) {
aggInfos[i] = exec.AggInfo{
FuncName: name,
Builtin: overload,
Distinct: distinct,
ResultType: fn.Logical().Scalar.Type,
ArgCols: argIdx,
}
Expand Down
41 changes: 28 additions & 13 deletions pkg/sql/opt/exec/execbuilder/testdata/aggregate
Original file line number Diff line number Diff line change
Expand Up @@ -173,22 +173,37 @@ group · · (count int, sum decimal, max int) ·
· table kv@primary · ·
· spans ALL · ·

query TTTTT
EXPLAIN (VERBOSE) SELECT count(v), count(DISTINCT v), sum(v), sum(DISTINCT v), min(v), min(DISTINCT v) FROM kv
----
group · · (count, count, sum, sum, min, min) ·
│ aggregate 0 count(v) · ·
│ aggregate 1 count(DISTINCT v) · ·
│ aggregate 2 sum(v) · ·
│ aggregate 3 sum(DISTINCT v) · ·
│ aggregate 4 min(v) · ·
│ aggregate 5 min(v) · ·
│ scalar · · ·
└── scan · · (v) ·
· table kv@primary · ·
· spans ALL · ·

query TTTTT
EXPLAIN (VERBOSE) SELECT count(DISTINCT a.*) FROM kv a, kv b
----
group · · (count) ·
│ aggregate 0 count(DISTINCT ((k, v, w, s) AS k, v, w, s)) · ·
│ scalar · · ·
└── render · · ("((k, v, w, s) AS k, v, w, s)") ·
│ render 0 ((a.k, a.v, a.w, a.s) AS k, v, w, s) · ·
└── join · · (k, v, w, s, k[omitted], v[omitted], w[omitted], s[omitted]) ·
│ type cross · ·
├── scan · · (k, v, w, s) k!=NULL; key(k)
│ table kv@primary · ·
│ spans ALL · ·
└── scan · · (k[omitted], v[omitted], w[omitted], s[omitted]) k!=NULL; key(k)
· table kv@primary · ·
· spans ALL · ·
group · · (count) ·
│ aggregate 0 count(DISTINCT column9) · ·
│ scalar · · ·
└── render · · (column9) ·
│ render 0 ((k, v, w, s) AS k, v, w, s) · ·
└── join · · (k, v, w, s) ·
│ type cross · ·
├── scan · · (k, v, w, s) ·
│ table kv@primary · ·
│ spans ALL · ·
└── scan · · () ·
· table kv@primary · ·
· spans ALL · ·

query TTT
SELECT tree, field, description FROM [
Expand Down
6 changes: 3 additions & 3 deletions pkg/sql/opt/exec/execbuilder/testdata/distsql_misc
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ subtest scrub
# SELECT url FROM [EXPLAIN (DISTSQL)
# SELECT leftside.v, leftside.k, leftside.data, rightside.v, rightside.k, rightside.data
# FROM
# (SELECT v,k,data FROM test@{FORCE_INDEX=[1],NO_INDEX_JOIN} ORDER BY v,k,data) AS leftside
# (SELECT v,k,data FROM test@{FORCE_INDEX=[1]} ORDER BY v,k,data) AS leftside
# FULL OUTER JOIN
# (SELECT v,k,data FROM test@{FORCE_INDEX=[2],NO_INDEX_JOIN} ORDER BY v,k,data) AS rightside
# (SELECT v,k,data FROM test@{FORCE_INDEX=[2]} ORDER BY v,k,data) AS rightside
# ON leftside.v = rightside.v AND leftside.k = rightside.k AND leftside.data = rightside.data
# WHERE (leftside.k IS NULL) OR
# (rightside.k IS NULL)
Expand Down Expand Up @@ -46,7 +46,7 @@ subtest scrub
# FROM
# (SELECT child_id, id, id2 FROM child@{NO_INDEX_JOIN} ORDER BY id, id2) AS p
# FULL OUTER JOIN
# (SELECT id, id2 FROM parent@{FORCE_INDEX=[2],NO_INDEX_JOIN} ORDER BY id, id2) AS c
# (SELECT id, id2 FROM parent@{FORCE_INDEX=[2]} ORDER BY id, id2) AS c
# ON p.id = c.id AND p.id2 = c.id2
# WHERE (p.id IS NOT NULL OR p.id2 IS NOT NULL) AND
# c.id IS NULL AND c.id2 IS NULL
Expand Down
5 changes: 3 additions & 2 deletions pkg/sql/opt/exec/execbuilder/testdata/explain
Original file line number Diff line number Diff line change
Expand Up @@ -624,5 +624,6 @@ project
└── projections
└── const: 1 [type=int]

statement error aggregates with DISTINCT are not supported yet
EXPLAIN (OPT) SELECT sum(DISTINCT x) FROM (VALUES (1), (1), (2)) AS t(x)
# Test with an unsupported statement.
statement error window functions are not supported
EXPLAIN (OPT) SELECT avg(x) OVER () FROM (VALUES (1)) AS t(x)
6 changes: 2 additions & 4 deletions pkg/sql/opt/exec/execbuilder/testdata/select_index_hints
Original file line number Diff line number Diff line change
Expand Up @@ -147,19 +147,17 @@ render · ·
· spans ALL

query TTT
EXPLAIN SELECT b, c, d FROM abcd@{FORCE_INDEX=bcd,NO_INDEX_JOIN} WHERE c = 10
EXPLAIN SELECT b, c, d FROM abcd@{FORCE_INDEX=bcd} WHERE c = 10
----
render · ·
└── scan · ·
· table abcd@bcd
· hint no index join
· spans ALL

query TTT
EXPLAIN SELECT b, c, d FROM abcd@{FORCE_INDEX=primary,NO_INDEX_JOIN} WHERE c = 10
EXPLAIN SELECT b, c, d FROM abcd@{FORCE_INDEX=primary} WHERE c = 10
----
render · ·
└── scan · ·
· table abcd@primary
· hint no index join
· spans ALL
1 change: 1 addition & 0 deletions pkg/sql/opt/exec/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ type ColumnOrdinalSet = util.FastIntSet
type AggInfo struct {
FuncName string
Builtin *tree.Overload
Distinct bool
ResultType types.T
ArgCols []ColumnOrdinal
}
3 changes: 3 additions & 0 deletions pkg/sql/opt/memo/typing.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,9 @@ func init() {
opt.ConstAggOp: typeAsFirstArg,
opt.ConstNotNullAggOp: typeAsFirstArg,
opt.FirstAggOp: typeAsFirstArg,

// Modifiers for aggregations pass through their argument.
opt.AggDistinctOp: typeAsFirstArg,
}

for _, op := range opt.BooleanOperators {
Expand Down
Loading

0 comments on commit 131f4b0

Please sign in to comment.