Skip to content

Commit

Permalink
norm: early inlining of WITH bound expression with VALUES
Browse files Browse the repository at this point in the history
Fixes https://github.com/cockroachlabs/support/issues/1769

This commit implements early inlining of WITH clause bound expressions
(at the time when the WithScanExpr is normalized) if the
WITH bound expression is a ValuesExpr with all constant expressions or
placeholders and it appears in an IN or NOT IN expression, for example:
`column IN (VALUES(...))` or `column NOT IN(VALUES(...))`.
Not doing early inlining can result in a superfluous join.

Normally WITH clause inlining is done when the top-level WithExpr is
normalized, but since normalization is done bottom-up, the WithExpr is
processed last, making the inlining too late to trigger other
normalization rules.

Release note (performance improvement): This patch adds early inlining
of VALUES clauses and unnested arrays in WITH queries in order to
eliminate unnecessary joins.
  • Loading branch information
Mark Sirek committed Oct 21, 2022
1 parent f9db369 commit 248ba67
Show file tree
Hide file tree
Showing 7 changed files with 363 additions and 12 deletions.
2 changes: 1 addition & 1 deletion pkg/sql/logictest/testdata/logic_test/apply_join
Expand Up @@ -529,7 +529,7 @@ FROM

query error pq: st_mpointfromwkb\(\): error parsing EWKB: wkb: unknown byte order: 11000000
WITH
with_111870 (col_664924) AS (SELECT * FROM (VALUES (NULL)) AS tab_397795 (col_664924))
with_111870 (col_664924) AS (SELECT * FROM (VALUES (true)) AS tab_397795 (col_664924))
SELECT
cte_ref_33032.col_664924 AS col_664951
FROM
Expand Down
51 changes: 51 additions & 0 deletions pkg/sql/opt/memo/expr.go
Expand Up @@ -1281,3 +1281,54 @@ func (g *GroupingPrivate) GroupingOrderType(required *props.OrderingChoice) Grou
}
return PartialStreaming
}

// IsConstantsAndPlaceholders returns true if all values in the list are
// constant, placeholders or tuples containing constants, placeholders or other
// such nested tuples.
func (v *ValuesExpr) IsConstantsAndPlaceholders() bool {
// Constant expressions should be leak-proof, so this is a quick test to
// determine if the ValuesExpr even needs to be checked.
if !v.Relational().VolatilitySet.IsLeakproof() {
return false
}
// Another quick check for something other than constants
if !v.Relational().OuterCols.Empty() {
return false
}
// A safety check
if len(v.Rows) == 0 {
return false
}
if !v.Rows.IsConstantsAndPlaceholders(v.Memo().EvalContext()) {
return false
}
return true
}

// IsConstantsAndPlaceholders returns true if all scalar expressions in the list
// are constants, placeholders or tuples containing constants or placeholders.
// If a tuple nested within a tuple is found, false is returned.
func (e ScalarListExpr) IsConstantsAndPlaceholders(evalCtx *eval.Context) bool {
return e.isConstantsAndPlaceholders(evalCtx, false /* insideTuple */)
}

func (e ScalarListExpr) isConstantsAndPlaceholders(evalCtx *eval.Context, insideTuple bool) bool {
for _, scalarExpr := range e {
if tupleExpr, ok := scalarExpr.(*TupleExpr); ok {
if insideTuple {
return false
}
if !tupleExpr.Elems.isConstantsAndPlaceholders(evalCtx, true) {
return false
}
} else {
if scalarExpr == nil {
panic(errors.AssertionFailedf("nil scalar expression in list"))
}
if !opt.IsConstValueOp(scalarExpr) && scalarExpr.Op() != opt.PlaceholderOp {
return false
}
}
}
return true
}
41 changes: 41 additions & 0 deletions pkg/sql/opt/norm/rules/scalar.opt
Expand Up @@ -70,6 +70,47 @@ $item
=>
$input

# InlineAnyWithScan matches on an ANY or NOT ANY expression which was generated
# from expressions like `column IN (WithScan)` or `column NOT IN (WithScan)`,
# where the WITH clause definition was normalized into a VALUES clause with
# constants or placeholders. The `WithScan` is replaced with the VALUES clause
# in order to avoid a join and can potentially enable a constrained index scan.
[InlineAnyWithScan, Normalize]
(Any
(WithScan $withScanPrivate:*)
$scalar:* & (CanInlineWithScan $withScanPrivate $scalar)
$anyPrivate:*
)
=>
(Any (InlineWithScan $withScanPrivate) $scalar $anyPrivate)

# InlineAnyProjectWithScan matches on an ANY or NOT ANY expression which was
# generated from expressions like `column IN (projected WithScan)` or `column
# NOT IN (projected WithScan)`, where the WITH clause definition was normalized
# into a VALUES clause with constants or placeholders. The `WithScan` is
# replaced with the VALUES clause in order to avoid a join and can potentially
# enable a constrained index scan.
[InlineAnyProjectWithScan, Normalize]
(Any
(Project
(WithScan $withScanPrivate:*)
$projections:*
$passthrough:*
)
$scalar:* & (CanInlineWithScan $withScanPrivate $scalar)
$anyPrivate:*
)
=>
(Any
(Project
(InlineWithScan $withScanPrivate)
$projections
$passthrough
)
$scalar
$anyPrivate
)

# NormalizeInConst ensures that the In operator's tuple operand is sorted with
# duplicates removed (since duplicates do not change the result).
[NormalizeInConst, Normalize]
Expand Down
188 changes: 187 additions & 1 deletion pkg/sql/opt/norm/testdata/rules/with
Expand Up @@ -173,7 +173,6 @@ with &2 (bar)
├── key: ()
└── fd: ()-->(5)

# We should inline foo, but not bar.
norm expect=InlineWith
WITH foo AS (SELECT 1), bar AS (SELECT 2) SELECT * FROM foo CROSS JOIN bar CROSS JOIN bar AS bar2
----
Expand Down Expand Up @@ -876,3 +875,190 @@ scalar-group-by
└── aggregations
└── sum [as=sum:6, outer=(5)]
└── n:5

# Regression test for #87790
exec-ddl
CREATE TABLE t87790(i INT, j INT, k float, CONSTRAINT "primary" PRIMARY KEY (i,j))
----

# An array unnested to VALUES should be inlined.
norm
WITH ivals AS (SELECT * FROM unnest(ARRAY[1,2,3]))
SELECT * FROM t87790 WHERE i IN (SELECT * FROM ivals)
----
select
├── columns: i:2!null j:3!null k:4
├── key: (2,3)
├── fd: (2,3)-->(4)
├── scan t87790
│ ├── columns: i:2!null j:3!null k:4
│ ├── key: (2,3)
│ └── fd: (2,3)-->(4)
└── filters
└── i:2 IN (1, 2, 3) [outer=(2), constraints=(/2: [/1 - /1] [/2 - /2] [/3 - /3]; tight)]

# An array unnested to VALUES in a MATERIALIZED WITH should not be inlined.
norm expect-not=InlineAnyWithScan
WITH ivals AS MATERIALIZED (SELECT * FROM unnest(ARRAY[1,2,3]))
SELECT * FROM t87790 WHERE i IN (SELECT * FROM ivals)
----
with &1 (ivals)
├── columns: i:2!null j:3!null k:4
├── materialized
├── key: (2,3)
├── fd: (2,3)-->(4)
├── values
│ ├── columns: unnest:1!null
│ ├── cardinality: [3 - 3]
│ ├── (1,)
│ ├── (2,)
│ └── (3,)
└── semi-join (hash)
├── columns: i:2!null j:3!null k:4
├── key: (2,3)
├── fd: (2,3)-->(4)
├── scan t87790
│ ├── columns: i:2!null j:3!null k:4
│ ├── key: (2,3)
│ └── fd: (2,3)-->(4)
├── with-scan &1 (ivals)
│ ├── columns: unnest:7!null
│ ├── mapping:
│ │ └── unnest:1 => unnest:7
│ └── cardinality: [3 - 3]
└── filters
└── i:2 = unnest:7 [outer=(2,7), constraints=(/2: (/NULL - ]; /7: (/NULL - ]), fd=(2)==(7), (7)==(2)]

# Random (volatile) VALUES should not be inlined.
norm expect-not=InlineAnyWithScan
WITH randvals AS (SELECT random())
SELECT * FROM t87790 WHERE k NOT IN (SELECT * FROM randvals)
----
with &1 (randvals)
├── columns: i:2!null j:3!null k:4
├── volatile
├── key: (2,3)
├── fd: (2,3)-->(4)
├── values
│ ├── columns: random:1
│ ├── cardinality: [1 - 1]
│ ├── volatile
│ ├── key: ()
│ ├── fd: ()-->(1)
│ └── (random(),)
└── anti-join (cross)
├── columns: i:2!null j:3!null k:4
├── key: (2,3)
├── fd: (2,3)-->(4)
├── scan t87790
│ ├── columns: i:2!null j:3!null k:4
│ ├── key: (2,3)
│ └── fd: (2,3)-->(4)
├── with-scan &1 (randvals)
│ ├── columns: random:7
│ ├── mapping:
│ │ └── random:1 => random:7
│ ├── cardinality: [1 - 1]
│ ├── key: ()
│ └── fd: ()-->(7)
└── filters
└── (k:4 = random:7) IS NOT false [outer=(4,7)]

# A non-constant WITH expression cannot be inlined in the middle of
# normalization.
norm expect-not=InlineAnyWithScan
WITH ivals AS (SELECT * FROM unnest(ARRAY(SELECT i FROM t87790)))
SELECT * FROM t87790 WHERE i IN (SELECT * FROM ivals)
----
semi-join (hash)
├── columns: i:7!null j:8!null k:9
├── immutable
├── key: (7,8)
├── fd: (7,8)-->(9)
├── scan t87790
│ ├── columns: i:7!null j:8!null k:9
│ ├── key: (7,8)
│ └── fd: (7,8)-->(9)
├── project
│ ├── columns: unnest:12
│ ├── immutable
│ ├── project-set
│ │ ├── columns: unnest:6
│ │ ├── immutable
│ │ ├── values
│ │ │ ├── cardinality: [1 - 1]
│ │ │ ├── key: ()
│ │ │ └── ()
│ │ └── zip
│ │ └── function: unnest [immutable, subquery]
│ │ └── array-flatten
│ │ └── scan t87790
│ │ └── columns: i:1!null
│ └── projections
│ └── unnest:6 [as=unnest:12, outer=(6)]
└── filters
└── i:7 = unnest:12 [outer=(7,12), constraints=(/7: (/NULL - ]; /12: (/NULL - ]), fd=(7)==(12), (12)==(7)]

# Placeholders are considered constant for the InlineAnyWithScan normalization.
assign-placeholders-norm query-args=(1, 2, 3)
WITH ivals AS (SELECT * FROM unnest(ARRAY[$1::INT, $2::INT, $3::INT]))
SELECT * FROM t87790 WHERE i IN (SELECT * FROM ivals)
----
select
├── columns: i:2!null j:3!null k:4
├── key: (2,3)
├── fd: (2,3)-->(4)
├── scan t87790
│ ├── columns: i:2!null j:3!null k:4
│ ├── key: (2,3)
│ └── fd: (2,3)-->(4)
└── filters
└── i:2 IN (1, 2, 3) [outer=(2), constraints=(/2: [/1 - /1] [/2 - /2] [/3 - /3]; tight)]

norm expect=InlineAnyProjectWithScan
WITH v(a, b) AS (VALUES (1, 2))
SELECT 1 FROM t87790 WHERE (i,j) IN (SELECT a, 1 FROM v)
UNION ALL
SELECT 1 FROM t87790 WHERE (i,j) IN (SELECT a, b FROM v)
----
union-all
├── columns: "?column?":22!null
├── left columns: "?column?":12
├── right columns: "?column?":21
├── cardinality: [0 - 2]
├── project
│ ├── columns: "?column?":12!null
│ ├── cardinality: [0 - 1]
│ ├── key: ()
│ ├── fd: ()-->(12)
│ ├── select
│ │ ├── columns: i:3!null j:4!null
│ │ ├── cardinality: [0 - 1]
│ │ ├── key: ()
│ │ ├── fd: ()-->(3,4)
│ │ ├── scan t87790
│ │ │ ├── columns: i:3!null j:4!null
│ │ │ └── key: (3,4)
│ │ └── filters
│ │ ├── i:3 = 1 [outer=(3), constraints=(/3: [/1 - /1]; tight), fd=()-->(3)]
│ │ └── j:4 = 1 [outer=(4), constraints=(/4: [/1 - /1]; tight), fd=()-->(4)]
│ └── projections
│ └── 1 [as="?column?":12]
└── project
├── columns: "?column?":21!null
├── cardinality: [0 - 1]
├── key: ()
├── fd: ()-->(21)
├── select
│ ├── columns: i:13!null j:14!null
│ ├── cardinality: [0 - 1]
│ ├── key: ()
│ ├── fd: ()-->(13,14)
│ ├── scan t87790
│ │ ├── columns: i:13!null j:14!null
│ │ └── key: (13,14)
│ └── filters
│ ├── i:13 = 1 [outer=(13), constraints=(/13: [/1 - /1]; tight), fd=()-->(13)]
│ └── j:14 = 2 [outer=(14), constraints=(/14: [/2 - /2]; tight), fd=()-->(14)]
└── projections
└── 1 [as="?column?":21]
66 changes: 66 additions & 0 deletions pkg/sql/opt/norm/with_funcs.go
Expand Up @@ -13,6 +13,7 @@ package norm
import (
"github.com/cockroachdb/cockroach/pkg/sql/opt"
"github.com/cockroachdb/cockroach/pkg/sql/opt/memo"
"github.com/cockroachdb/errors"
)

// CanInlineWith returns whether or not it's valid to inline binding in expr.
Expand Down Expand Up @@ -61,3 +62,68 @@ func (c *CustomFuncs) InlineWith(binding, input memo.RelExpr, priv *memo.WithPri

return replace(input).(memo.RelExpr)
}

// CanInlineWithScan returns whether or not it's valid and heuristically cheaper
// to inline a WithScanExpr with its bound expression from the memo. Currently
// this only allows inlining leak-proof constant VALUES clauses of the form
// `column IN (VALUES(...))` or `column NOT IN(VALUES(...))`, but could likely
// be extended to handle other expressions in the future.
func (c *CustomFuncs) CanInlineWithScan(private *memo.WithScanPrivate, scalar opt.ScalarExpr) bool {
if !private.CanInlineInPlace {
return false
}
// If we don't have `column IN(...)` or `column NOT IN(...)` or
// (col1, col2 ... coln) IN/NOT IN (...), it is not cheaper to inline because
// we wouldn't be avoiding one or more joins.
if tupleExpr, ok := scalar.(*memo.TupleExpr); ok {
for _, scalarExpr := range tupleExpr.Elems {
if scalarExpr.Op() != opt.VariableOp {
return false
}
}
} else if scalar.Op() != opt.VariableOp {
return false
}
expr := c.mem.Metadata().WithBinding(private.With)
var valuesExpr *memo.ValuesExpr
var ok bool
if valuesExpr, ok = expr.(*memo.ValuesExpr); !ok {
return false
}
if !valuesExpr.IsConstantsAndPlaceholders() {
return false
}
return true
}

// InlineWithScan replaces a WithScanExpr with its bound expression, mapped to
// new output ColumnIDs.
func (c *CustomFuncs) InlineWithScan(private *memo.WithScanPrivate) memo.RelExpr {
expr := c.mem.Metadata().WithBinding(private.With)
var valuesExpr *memo.ValuesExpr
var ok bool
valuesExpr.Op()
if valuesExpr, ok = expr.(*memo.ValuesExpr); !ok {
// Didn't find the expected VALUES.
panic(errors.AssertionFailedf("attempt to inline a WithScan which is not a VALUES clause; operator: %s",
expr.Op().String()))
}
projections := make(memo.ProjectionsExpr, len(private.InCols))
for i := range private.InCols {
projections[i] = c.f.ConstructProjectionsItem(
c.f.ConstructVariable(private.InCols[i]),
private.OutCols[i],
)
}
// Shallow copy the values in case this WITH binding is inlined more than
// once.
newRows := make(memo.ScalarListExpr, len(valuesExpr.Rows))
copy(newRows, valuesExpr.Rows)
newCols := make(opt.ColList, len(valuesExpr.ValuesPrivate.Cols))
copy(newCols, valuesExpr.ValuesPrivate.Cols)
newValuesExpr := c.f.ConstructValues(newRows, &memo.ValuesPrivate{
Cols: newCols,
ID: c.f.Metadata().NextUniqueID(),
})
return c.f.ConstructProject(newValuesExpr, projections, opt.ColSet{})
}
5 changes: 5 additions & 0 deletions pkg/sql/opt/ops/relational.opt
Expand Up @@ -1313,6 +1313,11 @@ define WithScanPrivate {
# most cases the column set is sufficient to do this, but various rules make
# it possible to construct WithScan expressions with no columns.
ID UniqueID

# CanInlineInPlace is true when the WithScanExpr is allowed to be inlined
# when it is normalized as opposed to inlining which takes place when the
# WithExpr is normalized.
CanInlineInPlace bool
}

# RecursiveCTE implements the logic of a recursive CTE:
Expand Down

0 comments on commit 248ba67

Please sign in to comment.