Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

opt: Add weak keys and rule to eliminate DISTINCT #24451

Merged
merged 2 commits into from
Apr 6, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 10 additions & 20 deletions pkg/sql/opt/exec/execbuilder/testdata/aggregate
Original file line number Diff line number Diff line change
Expand Up @@ -1256,17 +1256,11 @@ INSERT INTO ab VALUES
exec-explain
SELECT 1 FROM kv GROUP BY kv.*;
----
render 0 render · · (column5) ·
│ 0 · render 0 1 · ·
└── group 1 group · · (k, v, w, s) ·
│ 1 · aggregate 0 k · ·
│ 1 · aggregate 1 v · ·
│ 1 · aggregate 2 w · ·
│ 1 · aggregate 3 s · ·
│ 1 · group by @1-@4 · ·
└── scan 2 scan · · (k, v, w, s) ·
· 2 · table kv@primary · ·
· 2 · spans ALL · ·
render 0 render · · (column5) ·
│ 0 · render 0 1 · ·
└── scan 1 scan · · () ·
· 1 · table kv@primary · ·
· 1 · spans ALL · ·

exec
SELECT 1 FROM kv GROUP BY kv.*;
Expand Down Expand Up @@ -1456,15 +1450,11 @@ column3:tuple{int, int}
exec-explain
SELECT (b, a) FROM ab GROUP BY (b, a)
----
render 0 render · · (column3) ·
│ 0 · render 0 (b, a) · ·
└── group 1 group · · (a, b) ·
│ 1 · aggregate 0 a · ·
│ 1 · aggregate 1 b · ·
│ 1 · group by @1-@2 · ·
└── scan 2 scan · · (a, b) ·
· 2 · table ab@primary · ·
· 2 · spans ALL · ·
render 0 render · · (column3) ·
│ 0 · render 0 (b, a) · ·
└── scan 1 scan · · (a, b) ·
· 1 · table ab@primary · ·
· 1 · spans ALL · ·

exec rowsort
SELECT MIN(y), (b, a) FROM ab, xy GROUP BY (x, (a, b))
Expand Down
6 changes: 4 additions & 2 deletions pkg/sql/opt/exec/execbuilder/testdata/scan
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ SELECT * FROM t.a
scan a
├── columns: x:1(int!null) y:2(float) s:3(string)
├── stats: [rows=1000]
└── cost: 1000.00
├── cost: 1000.00
└── keys: (1)

exec-explain
SELECT * FROM t.a
Expand All @@ -37,7 +38,8 @@ SELECT s, x FROM t.a
scan a
├── columns: s:3(string) x:1(int!null)
├── stats: [rows=1000]
└── cost: 1000.00
├── cost: 1000.00
└── keys: (1)

exec-explain
SELECT s, x FROM t.a
Expand Down
26 changes: 25 additions & 1 deletion pkg/sql/opt/memo/expr_view.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,9 @@ const (
// ExprFmtHideConstraints does not show inferred constraints in the output.
ExprFmtHideConstraints

// ExprFmtHideKeys does not show keys in the output.
ExprFmtHideKeys

// ExprFmtHideAll shows only the most basic properties of the expression.
ExprFmtHideAll ExprFmtFlags = (1 << iota) - 1
)
Expand Down Expand Up @@ -256,7 +259,6 @@ func (ev ExprView) formatRelational(tp treeprinter.Node, flags ExprFmtFlags) {
logProps := ev.Logical()

tp = tp.Child(buf.String())
buf.Reset()

// If a particular column presentation is required of the expression, then
// print columns using that information.
Expand Down Expand Up @@ -320,6 +322,11 @@ func (ev ExprView) formatRelational(tp treeprinter.Node, flags ExprFmtFlags) {
tp.Childf("cost: %.2f", ev.lookupBestExpr().cost)
}

// Format weak keys.
if !flags.HasFlags(ExprFmtHideKeys) {
ev.formatWeakKeys(tp)
}

if physProps.Ordering.Defined() {
tp.Childf("ordering: %s", physProps.Ordering.String())
}
Expand Down Expand Up @@ -416,6 +423,23 @@ func (ev ExprView) formatPresentation(tp treeprinter.Node, presentation Presenta
tp.Child(buf.String())
}

func (ev ExprView) formatWeakKeys(tp treeprinter.Node) {
var buf bytes.Buffer
rel := ev.Logical().Relational
for i, key := range rel.WeakKeys {
if i != 0 {
buf.WriteRune(' ')
}
if !key.SubsetOf(rel.NotNullCols) {
buf.WriteString("weak")
}
buf.WriteString(key.String())
}
if buf.Len() != 0 {
tp.Childf("keys: %s", buf.String())
}
}

// MatchesTupleOfConstants returns true if the expression is a TupleOp with
// ConstValue children.
func MatchesTupleOfConstants(ev ExprView) bool {
Expand Down
22 changes: 22 additions & 0 deletions pkg/sql/opt/memo/logical_props.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,28 @@ type RelationalProps struct {
// derived from filters that are NULL-intolerant.
NotNullCols opt.ColSet

// WeakKeys are the column sets which form weak keys and are subsets of the
// expression's output columns. A weak key set cannot contain any other weak
// key set (it would be redundant).
//
// A column set is a key if no two rows are equal after projection onto that
// set. This definition treats NULL as if were equal to NULL, so two rows
// having duplicate NULL values would *not* qualify as key rows. Therefore,
// in the usual case, the key columns are also not nullable. The simplest
// example of a key is the primary key for a table (recall that all of the
// columns of the primary key are defined to be NOT NULL).
//
// A weak key is similar to a key, with the difference that NULL values are
// treated as *not equal* to other NULL values. Therefore, two rows having
// duplicate NULL values could still qualify as weak key rows. A UNIQUE index
// on a table is a weak key and possibly a key if all of the columns are NOT
// NULL. A weak key is a key if "(WeakKeys[i] & NotNullCols) == WeakKeys[i]".
//
// An empty key is valid (an empty key implies there is at most one row). Note
// that an empty key is always the only key in the set, since it's a subset of
// every other key (i.e. every other key would be redundant).
WeakKeys opt.WeakKeys

// OuterCols is the set of columns that are referenced by variables within
// this relational sub-expression, but are not bound within the scope of
// the expression. For example:
Expand Down
71 changes: 61 additions & 10 deletions pkg/sql/opt/memo/logical_props_factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ func (f logicalPropsFactory) constructScanProps(ev ExprView) LogicalProps {
}
}

// Initialize weak keys from the table schema.
props.Relational.WeakKeys = md.TableWeakKeys(def.Table)
filterWeakKeys(props.Relational)

// TODO: Need actual number of rows.
if def.Constraint != nil {
props.Relational.Stats.RowCount = 100
Expand All @@ -121,11 +125,8 @@ func (f logicalPropsFactory) constructSelectProps(ev ExprView) LogicalProps {

inputProps := ev.lookupChildGroup(0).logical.Relational

// Inherit output columns from input.
props.Relational.OutputCols = inputProps.OutputCols

// Inherit not null columns from input.
props.Relational.NotNullCols = inputProps.NotNullCols
// Inherit input properties as starting point.
*props.Relational = *inputProps

// TODO: Need better estimate based on actual filter conditions.
props.Relational.Stats.RowCount = inputProps.Stats.RowCount / 10
Expand All @@ -141,10 +142,18 @@ func (f logicalPropsFactory) constructProjectProps(ev ExprView) LogicalProps {
// Use output columns from projection list.
props.Relational.OutputCols = opt.ColListToSet(ev.Child(1).Private().(opt.ColList))

// Inherit not null columns from input.
// Inherit not null columns from input, but only use those that are also
// output columns.
props.Relational.NotNullCols = inputProps.NotNullCols
filterNullCols(props.Relational)

// Inherit outer columns from input.
props.Relational.OuterCols = inputProps.OuterCols

// Inherit weak keys that are composed entirely of output columns.
props.Relational.WeakKeys = inputProps.WeakKeys
filterWeakKeys(props.Relational)

props.Relational.Stats.RowCount = inputProps.Stats.RowCount

return props
Expand Down Expand Up @@ -185,6 +194,9 @@ func (f logicalPropsFactory) constructJoinProps(ev ExprView) LogicalProps {
props.Relational.NotNullCols.UnionWith(leftProps.NotNullCols)
}

// TODO(andyk): Need to derive weak keys for joins, for example when weak
// keys on both sides are equivalent cols.

// TODO: Need better estimate based on actual on conditions.
props.Relational.Stats.RowCount = leftProps.Stats.RowCount * rightProps.Stats.RowCount
if ev.Child(2).Operator() != opt.TrueOp {
Expand All @@ -210,10 +222,25 @@ func (f logicalPropsFactory) constructGroupByProps(ev ExprView) LogicalProps {
props.Relational.NotNullCols = inputProps.NotNullCols.Copy()
props.Relational.NotNullCols.IntersectionWith(groupingColSet)

// Scalar group by has no grouping columns and always a single row.
if groupingColSet.Empty() {
// Scalar group by.
// Any combination of columns is a weak key when there is one row.
props.Relational.WeakKeys = opt.WeakKeys{groupingColSet}
props.Relational.Stats.RowCount = 1
} else {
// The grouping columns always form a key because the GroupBy operation
// eliminates all duplicates. The result WeakKeys property either contains
// only the grouping column set, or else it contains one or more weak keys
// that are strict subsets of the grouping column set. This is because
// the grouping column set contains every output column (except aggregate
// columns, which aren't relevant since they're newly synthesized).
if inputProps.WeakKeys.ContainsSubsetOf(groupingColSet) {
props.Relational.WeakKeys = inputProps.WeakKeys
filterWeakKeys(props.Relational)
} else {
props.Relational.WeakKeys = opt.WeakKeys{groupingColSet}
}

// TODO: Need better estimate.
props.Relational.Stats.RowCount = inputProps.Stats.RowCount / 10
}
Expand Down Expand Up @@ -324,11 +351,35 @@ func (f logicalPropsFactory) constructScalarProps(ev ExprView) LogicalProps {
return props
}

// filterNullCols will ensure that the set of null columns is a subset of the
// output columns. It respects immutability by making a copy of the null
// columns if they need to be updated.
// filterNullCols ensures that the set of null columns is a subset of the output
// columns. It respects immutability by making a copy of the null columns if
// they need to be updated.
func filterNullCols(props *RelationalProps) {
if !props.NotNullCols.SubsetOf(props.OutputCols) {
props.NotNullCols = props.NotNullCols.Intersection(props.OutputCols)
}
}

// filterWeakKeys ensures that each weak key is a subset of the output columns.
// It respects immutability by making a copy of the weak keys if they need to be
// updated.
func filterWeakKeys(props *RelationalProps) {
var filtered opt.WeakKeys
for i, weakKey := range props.WeakKeys {
// Discard weak keys that have columns that are not part of the output
// column set.
if !weakKey.SubsetOf(props.OutputCols) {
if filtered == nil {
filtered = make(opt.WeakKeys, i, len(props.WeakKeys)-1)
copy(filtered, props.WeakKeys[:i])
}
} else {
if filtered != nil {
filtered = append(filtered, weakKey)
}
}
}
if filtered != nil {
props.WeakKeys = filtered
}
}
31 changes: 11 additions & 20 deletions pkg/sql/opt/memo/logical_props_factory_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/opt/norm"
"github.com/cockroachdb/cockroach/pkg/sql/opt/testutils"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/util/treeprinter"
)

func TestLogicalPropsFactory(t *testing.T) {
Expand Down Expand Up @@ -61,14 +60,14 @@ func TestLogicalJoinProps(t *testing.T) {
testLogicalProps(t, f.Metadata(), ev, expected)
}

joinFunc(opt.InnerJoinApplyOp, "a.x:1(int!null) a.y:2(int) b.x:3(int!null) b.z:4(int!null)\n")
joinFunc(opt.LeftJoinApplyOp, "a.x:1(int!null) a.y:2(int) b.x:3(int) b.z:4(int)\n")
joinFunc(opt.RightJoinApplyOp, "a.x:1(int) a.y:2(int) b.x:3(int!null) b.z:4(int!null)\n")
joinFunc(opt.FullJoinApplyOp, "a.x:1(int) a.y:2(int) b.x:3(int) b.z:4(int)\n")
joinFunc(opt.SemiJoinOp, "a.x:1(int!null) a.y:2(int)\n")
joinFunc(opt.SemiJoinApplyOp, "a.x:1(int!null) a.y:2(int)\n")
joinFunc(opt.AntiJoinOp, "a.x:1(int!null) a.y:2(int)\n")
joinFunc(opt.AntiJoinApplyOp, "a.x:1(int!null) a.y:2(int)\n")
joinFunc(opt.InnerJoinApplyOp, "a.x:1(int!null) a.y:2(int) b.x:3(int!null) b.z:4(int!null)")
joinFunc(opt.LeftJoinApplyOp, "a.x:1(int!null) a.y:2(int) b.x:3(int) b.z:4(int)")
joinFunc(opt.RightJoinApplyOp, "a.x:1(int) a.y:2(int) b.x:3(int!null) b.z:4(int!null)")
joinFunc(opt.FullJoinApplyOp, "a.x:1(int) a.y:2(int) b.x:3(int) b.z:4(int)")
joinFunc(opt.SemiJoinOp, "a.x:1(int!null) a.y:2(int)")
joinFunc(opt.SemiJoinApplyOp, "a.x:1(int!null) a.y:2(int)")
joinFunc(opt.AntiJoinOp, "a.x:1(int!null) a.y:2(int)")
joinFunc(opt.AntiJoinApplyOp, "a.x:1(int!null) a.y:2(int)")
}

func constructScanOpDef(md *opt.Metadata, tabID opt.TableID) *memo.ScanOpDef {
Expand All @@ -81,18 +80,10 @@ func constructScanOpDef(md *opt.Metadata, tabID opt.TableID) *memo.ScanOpDef {

func testLogicalProps(t *testing.T, md *opt.Metadata, ev memo.ExprView, expected string) {
t.Helper()
actual := ev.String()

logical := ev.Logical()
if logical.Relational == nil {
panic("only relational properties are supported")
}

tp := treeprinter.New()
logical.FormatColSet(tp, md, "", logical.Relational.OutputCols)
actual := strings.Trim(tp.String(), " ")

if actual != expected {
t.Fatalf("\nexpected: %s\nactual : %s", expected, actual)
if !strings.Contains(actual, expected) {
t.Fatalf("\nexpected to contain: %s\nactual:\n%s", expected, actual)
}
}

Expand Down
8 changes: 6 additions & 2 deletions pkg/sql/opt/memo/testdata/logprops/constraints
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,11 @@ SELECT * FROM kuv WHERE u > 1::INT
select
├── columns: k:1(int!null) u:2(float) v:3(string)
├── stats: [rows=100]
├── keys: (1)
├── scan kuv
│ ├── columns: kuv.k:1(int!null) kuv.u:2(float) kuv.v:3(string)
│ └── stats: [rows=1000]
│ ├── stats: [rows=1000]
│ └── keys: (1)
└── filters [type=bool, outer=(2)]
└── gt [type=bool, outer=(2)]
├── variable: kuv.u [type=float, outer=(2)]
Expand All @@ -227,9 +229,11 @@ SELECT * FROM kuv WHERE v <= 'foo' AND v >= 'bar'
select
├── columns: k:1(int!null) u:2(float) v:3(string)
├── stats: [rows=100]
├── keys: (1)
├── scan kuv
│ ├── columns: kuv.k:1(int!null) kuv.u:2(float) kuv.v:3(string)
│ └── stats: [rows=1000]
│ ├── stats: [rows=1000]
│ └── keys: (1)
└── filters [type=bool, outer=(3), constraints=(/3: [/'bar' - /'foo']; tight)]
├── le [type=bool, outer=(3), constraints=(/3: (/NULL - /'foo']; tight)]
│ ├── variable: kuv.v [type=string, outer=(3)]
Expand Down
Loading