Skip to content

Commit

Permalink
Merge #88555 #89050
Browse files Browse the repository at this point in the history
88555: opt: don't double count OR selectivity for joins r=DrewKimball a=DrewKimball

Previously, an `OR` expression with tight constraints would have its selecitivy applied in `applyFilters` as expected, without incrementing `numUnappliedConjuncts`. However, joins call into `selectivityFromOredEquivalencies`, which would then increment `numUnappliedConjuncts` for that `OR` if the disjuncts weren't all conjunctions of equalities. This caused an additional factor of `1/3` (`memo.unknownFilterSelectivity`) to be applied to the join row count estimate.

This commit modifies `selectivityFromOredEquivalencies` to avoid incrementing `numUnappliedConjuncts` for `OR` conditions with tight constraints. This prevents the double-counting behavior. This commit also removes a few `FiltersItem` copies from loops.

Fixes #88455

Release note: None

89050: rowexec: fix usage of the shared "single datum" acc in windower r=yuzefovich a=yuzefovich

This commit fixes the usage of the shared "single datum agg mem account" by the window builtins. Previously, we were updating the eval context with the memory account after the builtins were constructed. The impact of the bug on its own is pretty minor (namely that we could reserve more memory than necessary - the initial allocation is 10KiB), but I have some other changes brewing which make it more important to be precise about the attribution of the memory allocations.

Release note: None

Co-authored-by: DrewKimball <drewk@cockroachlabs.com>
Co-authored-by: Yahor Yuzefovich <yahor@cockroachlabs.com>
  • Loading branch information
3 people committed Sep 30, 2022
3 parents 2b58861 + 1c05628 + 982eb16 commit aecc58f
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 22 deletions.
8 changes: 6 additions & 2 deletions pkg/sql/opt/memo/statistics_builder.go
Expand Up @@ -4181,7 +4181,11 @@ func (sb *statisticsBuilder) selectivityFromOredEquivalencies(
var conjunctSelectivity props.Selectivity

for f := 0; f < len(h.filters); f++ {
disjunction := h.filters[f]
disjunction := &h.filters[f]
if disjunction.ScalarProps().TightConstraints {
// applyFilters will have already handled this filter.
continue
}
var disjuncts []opt.ScalarExpr
if orExpr, ok := disjunction.Condition.(*OrExpr); !ok {
continue
Expand Down Expand Up @@ -4638,7 +4642,7 @@ func (sb *statisticsBuilder) buildStatsFromCheckConstraints(
filters := *constraints.(*FiltersExpr)
// For each ANDed check constraint...
for i := 0; i < len(filters); i++ {
filter := filters[i]
filter := &filters[i]
// This must be some type of comparison operation, or an OR or AND
// expression. These operations have at least 2 children.
if filter.Condition.ChildCount() < 2 {
Expand Down
55 changes: 55 additions & 0 deletions pkg/sql/opt/memo/testdata/stats/join
Expand Up @@ -2040,3 +2040,58 @@ project
│ └── 82 [type=int]
└── projections
└── '1971-10-24' [as=date:19, type=date]

# Regression test for #88455 - don't double-count selectivity for OR expressions
# with tight constraints in join ON conditions.
exec-ddl
CREATE TABLE t0_88455 (c0 INT);
----

exec-ddl
CREATE TABLE t1_88455 (c0 INT);
----

exec-ddl
ALTER TABLE t0_88455 INJECT STATISTICS '[
{
"columns": [
"c0"
],
"created_at": "2022-08-09 09:00:00.00000",
"distinct_count": 13,
"name": "__auto__",
"null_count": 0,
"row_count": 13
}
]'
----

exec-ddl
ALTER TABLE t1_88455 INJECT STATISTICS '[
{
"columns": [
"c0"
],
"created_at": "2022-08-09 09:00:00.00000",
"distinct_count": 5,
"name": "__auto__",
"null_count": 0,
"row_count": 5
}
]'
----

opt format=show-stats
SELECT * FROM t0_88455 LEFT OUTER JOIN t1_88455 ON t0_88455.c0<1 OR t0_88455.c0>1;
----
left-join (cross)
├── columns: c0:1(int) c0:5(int)
├── stats: [rows=21.66667]
├── scan t0_88455
│ ├── columns: t0_88455.c0:1(int)
│ └── stats: [rows=13, distinct(1)=13, null(1)=0]
├── scan t1_88455
│ ├── columns: t1_88455.c0:5(int)
│ └── stats: [rows=5]
└── filters
└── (t0_88455.c0:1 < 1) OR (t0_88455.c0:1 > 1) [type=bool, outer=(1), constraints=(/1: (/NULL - /0] [/2 - ]; tight)]
2 changes: 1 addition & 1 deletion pkg/sql/opt/xform/general_funcs.go
Expand Up @@ -682,7 +682,7 @@ func (c *CustomFuncs) numAllowedValues(
filters := *constraints.(*memo.FiltersExpr)
// For each ANDed check constraint...
for i := 0; i < len(filters); i++ {
filter := filters[i]
filter := &filters[i]
// This must be some type of comparison operation, or an OR or AND
// expression. These operations have at least 2 children.
if filter.Condition.ChildCount() < 2 {
Expand Down
39 changes: 20 additions & 19 deletions pkg/sql/rowexec/windower.go
Expand Up @@ -112,6 +112,26 @@ func newWindower(
w.inputTypes = input.OutputTypes()
ctx := evalCtx.Ctx()

// Limit the memory use by creating a child monitor with a hard limit.
// windower will overflow to disk if this limit is not enough.
limit := execinfra.GetWorkMemLimit(flowCtx)
if limit < memRequiredByWindower {
// The limit is set very low (likely by the tests in order to improve
// the test coverage), but the windower requires some amount of RAM, so
// we override the limit. This behavior is acceptable given that we
// don't expect anyone to lower the setting to less than 100KiB in
// production.
limit = memRequiredByWindower
}
limitedMon := mon.NewMonitorInheritWithLimit("windower-limited", limit, evalCtx.Mon)
limitedMon.StartNoReserved(ctx, evalCtx.Mon)
w.acc = limitedMon.MakeBoundAccount()
// If we have aggregate builtins that aggregate a single datum, we want
// them to reuse the same shared memory account with the windower. Notably,
// we need to update the eval context before constructing the window
// builtins.
evalCtx.SingleDatumAggMemAccount = &w.acc

w.partitionBy = spec.PartitionBy
windowFns := spec.WindowFns
w.windowFns = make([]*windowFunc, 0, len(windowFns))
Expand Down Expand Up @@ -145,20 +165,6 @@ func newWindower(
}
w.outputRow = make(rowenc.EncDatumRow, len(w.outputTypes))

// Limit the memory use by creating a child monitor with a hard limit.
// windower will overflow to disk if this limit is not enough.
limit := execinfra.GetWorkMemLimit(flowCtx)
if limit < memRequiredByWindower {
// The limit is set very low (likely by the tests in order to improve
// the test coverage), but the windower requires some amount of RAM, so
// we override the limit. This behavior is acceptable given that we
// don't expect anyone to lower the setting to less than 100KiB in
// production.
limit = memRequiredByWindower
}
limitedMon := mon.NewMonitorInheritWithLimit("windower-limited", limit, evalCtx.Mon)
limitedMon.StartNoReserved(ctx, evalCtx.Mon)

if err := w.InitWithEvalCtx(
w,
post,
Expand Down Expand Up @@ -191,11 +197,6 @@ func newWindower(
return nil, err
}

w.acc = w.MemMonitor.MakeBoundAccount()
// If we have aggregate builtins that aggregate a single datum, we want
// them to reuse the same shared memory account with the windower.
evalCtx.SingleDatumAggMemAccount = &w.acc

if execstats.ShouldCollectStats(ctx, flowCtx.CollectStats) {
w.input = newInputStatCollector(w.input)
w.ExecStatsForTrace = w.execStatsForTrace
Expand Down

0 comments on commit aecc58f

Please sign in to comment.