From 1c0562875a622b00cad548453c3f1415d6f8d5c4 Mon Sep 17 00:00:00 2001 From: DrewKimball Date: Fri, 23 Sep 2022 02:27:32 -0700 Subject: [PATCH 1/2] opt: don't double count OR selectivity for joins Previously, an `OR` expression with tight constraints would have its selecitivy applied in `applyFilters` as expected, without incrementing `numUnappliedConjuncts`. However, joins call into `selectivityFromOredEquivalencies`, which would then increment `numUnappliedConjuncts` for that `OR` if the disjuncts weren't all conjunctions of equalities. This caused an additional factor of `1/3` (`memo.unknownFilterSelectivity`) to be applied to the join row count estimate. This commit modifies `selectivityFromOredEquivalencies` to avoid incrementing `numUnappliedConjuncts` for `OR` conditions with tight constraints. This prevents the double-counting behavior. This commit also removes a few `FiltersItem` copies from loops. Fixes #88455 Release note: None --- pkg/sql/opt/memo/statistics_builder.go | 8 +++- pkg/sql/opt/memo/testdata/stats/join | 55 ++++++++++++++++++++++++++ pkg/sql/opt/xform/general_funcs.go | 2 +- 3 files changed, 62 insertions(+), 3 deletions(-) diff --git a/pkg/sql/opt/memo/statistics_builder.go b/pkg/sql/opt/memo/statistics_builder.go index adf0652f87bb..3c24a9897a5c 100644 --- a/pkg/sql/opt/memo/statistics_builder.go +++ b/pkg/sql/opt/memo/statistics_builder.go @@ -4181,7 +4181,11 @@ func (sb *statisticsBuilder) selectivityFromOredEquivalencies( var conjunctSelectivity props.Selectivity for f := 0; f < len(h.filters); f++ { - disjunction := h.filters[f] + disjunction := &h.filters[f] + if disjunction.ScalarProps().TightConstraints { + // applyFilters will have already handled this filter. + continue + } var disjuncts []opt.ScalarExpr if orExpr, ok := disjunction.Condition.(*OrExpr); !ok { continue @@ -4638,7 +4642,7 @@ func (sb *statisticsBuilder) buildStatsFromCheckConstraints( filters := *constraints.(*FiltersExpr) // For each ANDed check constraint... for i := 0; i < len(filters); i++ { - filter := filters[i] + filter := &filters[i] // This must be some type of comparison operation, or an OR or AND // expression. These operations have at least 2 children. if filter.Condition.ChildCount() < 2 { diff --git a/pkg/sql/opt/memo/testdata/stats/join b/pkg/sql/opt/memo/testdata/stats/join index 0263068330fe..50567b9bc324 100644 --- a/pkg/sql/opt/memo/testdata/stats/join +++ b/pkg/sql/opt/memo/testdata/stats/join @@ -2040,3 +2040,58 @@ project │ └── 82 [type=int] └── projections └── '1971-10-24' [as=date:19, type=date] + +# Regression test for #88455 - don't double-count selectivity for OR expressions +# with tight constraints in join ON conditions. +exec-ddl +CREATE TABLE t0_88455 (c0 INT); +---- + +exec-ddl +CREATE TABLE t1_88455 (c0 INT); +---- + +exec-ddl +ALTER TABLE t0_88455 INJECT STATISTICS '[ + { + "columns": [ + "c0" + ], + "created_at": "2022-08-09 09:00:00.00000", + "distinct_count": 13, + "name": "__auto__", + "null_count": 0, + "row_count": 13 + } +]' +---- + +exec-ddl +ALTER TABLE t1_88455 INJECT STATISTICS '[ + { + "columns": [ + "c0" + ], + "created_at": "2022-08-09 09:00:00.00000", + "distinct_count": 5, + "name": "__auto__", + "null_count": 0, + "row_count": 5 + } +]' +---- + +opt format=show-stats +SELECT * FROM t0_88455 LEFT OUTER JOIN t1_88455 ON t0_88455.c0<1 OR t0_88455.c0>1; +---- +left-join (cross) + ├── columns: c0:1(int) c0:5(int) + ├── stats: [rows=21.66667] + ├── scan t0_88455 + │ ├── columns: t0_88455.c0:1(int) + │ └── stats: [rows=13, distinct(1)=13, null(1)=0] + ├── scan t1_88455 + │ ├── columns: t1_88455.c0:5(int) + │ └── stats: [rows=5] + └── filters + └── (t0_88455.c0:1 < 1) OR (t0_88455.c0:1 > 1) [type=bool, outer=(1), constraints=(/1: (/NULL - /0] [/2 - ]; tight)] diff --git a/pkg/sql/opt/xform/general_funcs.go b/pkg/sql/opt/xform/general_funcs.go index ac39e745ea27..6ce63a8fd8ad 100644 --- a/pkg/sql/opt/xform/general_funcs.go +++ b/pkg/sql/opt/xform/general_funcs.go @@ -682,7 +682,7 @@ func (c *CustomFuncs) numAllowedValues( filters := *constraints.(*memo.FiltersExpr) // For each ANDed check constraint... for i := 0; i < len(filters); i++ { - filter := filters[i] + filter := &filters[i] // This must be some type of comparison operation, or an OR or AND // expression. These operations have at least 2 children. if filter.Condition.ChildCount() < 2 { From 982eb16cf0a911d2d6f83a0efe8eec60196aab29 Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Thu, 29 Sep 2022 19:48:52 -0700 Subject: [PATCH 2/2] rowexec: fix usage of the shared "single datum" acc in windower This commit fixes the usage of the shared "single datum agg mem account" by the window builtins. Previously, we were updating the eval context with the memory account after the builtins were constructed. The impact of the bug on its own is pretty minor (namely that we could reserve more memory than necessary - the initial allocation is 10KiB), but I have some other changes brewing which make it more important to be precise about the attribution of the memory allocations. Release note: None --- pkg/sql/rowexec/windower.go | 39 +++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/pkg/sql/rowexec/windower.go b/pkg/sql/rowexec/windower.go index 773e4029166b..f4a439c1b644 100644 --- a/pkg/sql/rowexec/windower.go +++ b/pkg/sql/rowexec/windower.go @@ -112,6 +112,26 @@ func newWindower( w.inputTypes = input.OutputTypes() ctx := evalCtx.Ctx() + // Limit the memory use by creating a child monitor with a hard limit. + // windower will overflow to disk if this limit is not enough. + limit := execinfra.GetWorkMemLimit(flowCtx) + if limit < memRequiredByWindower { + // The limit is set very low (likely by the tests in order to improve + // the test coverage), but the windower requires some amount of RAM, so + // we override the limit. This behavior is acceptable given that we + // don't expect anyone to lower the setting to less than 100KiB in + // production. + limit = memRequiredByWindower + } + limitedMon := mon.NewMonitorInheritWithLimit("windower-limited", limit, evalCtx.Mon) + limitedMon.StartNoReserved(ctx, evalCtx.Mon) + w.acc = limitedMon.MakeBoundAccount() + // If we have aggregate builtins that aggregate a single datum, we want + // them to reuse the same shared memory account with the windower. Notably, + // we need to update the eval context before constructing the window + // builtins. + evalCtx.SingleDatumAggMemAccount = &w.acc + w.partitionBy = spec.PartitionBy windowFns := spec.WindowFns w.windowFns = make([]*windowFunc, 0, len(windowFns)) @@ -145,20 +165,6 @@ func newWindower( } w.outputRow = make(rowenc.EncDatumRow, len(w.outputTypes)) - // Limit the memory use by creating a child monitor with a hard limit. - // windower will overflow to disk if this limit is not enough. - limit := execinfra.GetWorkMemLimit(flowCtx) - if limit < memRequiredByWindower { - // The limit is set very low (likely by the tests in order to improve - // the test coverage), but the windower requires some amount of RAM, so - // we override the limit. This behavior is acceptable given that we - // don't expect anyone to lower the setting to less than 100KiB in - // production. - limit = memRequiredByWindower - } - limitedMon := mon.NewMonitorInheritWithLimit("windower-limited", limit, evalCtx.Mon) - limitedMon.StartNoReserved(ctx, evalCtx.Mon) - if err := w.InitWithEvalCtx( w, post, @@ -191,11 +197,6 @@ func newWindower( return nil, err } - w.acc = w.MemMonitor.MakeBoundAccount() - // If we have aggregate builtins that aggregate a single datum, we want - // them to reuse the same shared memory account with the windower. - evalCtx.SingleDatumAggMemAccount = &w.acc - if execstats.ShouldCollectStats(ctx, flowCtx.CollectStats) { w.input = newInputStatCollector(w.input) w.ExecStatsForTrace = w.execStatsForTrace