diff --git a/pkg/sql/opt/memo/statistics_builder.go b/pkg/sql/opt/memo/statistics_builder.go index adf0652f87bb..3c24a9897a5c 100644 --- a/pkg/sql/opt/memo/statistics_builder.go +++ b/pkg/sql/opt/memo/statistics_builder.go @@ -4181,7 +4181,11 @@ func (sb *statisticsBuilder) selectivityFromOredEquivalencies( var conjunctSelectivity props.Selectivity for f := 0; f < len(h.filters); f++ { - disjunction := h.filters[f] + disjunction := &h.filters[f] + if disjunction.ScalarProps().TightConstraints { + // applyFilters will have already handled this filter. + continue + } var disjuncts []opt.ScalarExpr if orExpr, ok := disjunction.Condition.(*OrExpr); !ok { continue @@ -4638,7 +4642,7 @@ func (sb *statisticsBuilder) buildStatsFromCheckConstraints( filters := *constraints.(*FiltersExpr) // For each ANDed check constraint... for i := 0; i < len(filters); i++ { - filter := filters[i] + filter := &filters[i] // This must be some type of comparison operation, or an OR or AND // expression. These operations have at least 2 children. if filter.Condition.ChildCount() < 2 { diff --git a/pkg/sql/opt/memo/testdata/stats/join b/pkg/sql/opt/memo/testdata/stats/join index 0263068330fe..50567b9bc324 100644 --- a/pkg/sql/opt/memo/testdata/stats/join +++ b/pkg/sql/opt/memo/testdata/stats/join @@ -2040,3 +2040,58 @@ project │ └── 82 [type=int] └── projections └── '1971-10-24' [as=date:19, type=date] + +# Regression test for #88455 - don't double-count selectivity for OR expressions +# with tight constraints in join ON conditions. +exec-ddl +CREATE TABLE t0_88455 (c0 INT); +---- + +exec-ddl +CREATE TABLE t1_88455 (c0 INT); +---- + +exec-ddl +ALTER TABLE t0_88455 INJECT STATISTICS '[ + { + "columns": [ + "c0" + ], + "created_at": "2022-08-09 09:00:00.00000", + "distinct_count": 13, + "name": "__auto__", + "null_count": 0, + "row_count": 13 + } +]' +---- + +exec-ddl +ALTER TABLE t1_88455 INJECT STATISTICS '[ + { + "columns": [ + "c0" + ], + "created_at": "2022-08-09 09:00:00.00000", + "distinct_count": 5, + "name": "__auto__", + "null_count": 0, + "row_count": 5 + } +]' +---- + +opt format=show-stats +SELECT * FROM t0_88455 LEFT OUTER JOIN t1_88455 ON t0_88455.c0<1 OR t0_88455.c0>1; +---- +left-join (cross) + ├── columns: c0:1(int) c0:5(int) + ├── stats: [rows=21.66667] + ├── scan t0_88455 + │ ├── columns: t0_88455.c0:1(int) + │ └── stats: [rows=13, distinct(1)=13, null(1)=0] + ├── scan t1_88455 + │ ├── columns: t1_88455.c0:5(int) + │ └── stats: [rows=5] + └── filters + └── (t0_88455.c0:1 < 1) OR (t0_88455.c0:1 > 1) [type=bool, outer=(1), constraints=(/1: (/NULL - /0] [/2 - ]; tight)] diff --git a/pkg/sql/opt/xform/general_funcs.go b/pkg/sql/opt/xform/general_funcs.go index ac39e745ea27..6ce63a8fd8ad 100644 --- a/pkg/sql/opt/xform/general_funcs.go +++ b/pkg/sql/opt/xform/general_funcs.go @@ -682,7 +682,7 @@ func (c *CustomFuncs) numAllowedValues( filters := *constraints.(*memo.FiltersExpr) // For each ANDed check constraint... for i := 0; i < len(filters); i++ { - filter := filters[i] + filter := &filters[i] // This must be some type of comparison operation, or an OR or AND // expression. These operations have at least 2 children. if filter.Condition.ChildCount() < 2 { diff --git a/pkg/sql/rowexec/windower.go b/pkg/sql/rowexec/windower.go index 773e4029166b..f4a439c1b644 100644 --- a/pkg/sql/rowexec/windower.go +++ b/pkg/sql/rowexec/windower.go @@ -112,6 +112,26 @@ func newWindower( w.inputTypes = input.OutputTypes() ctx := evalCtx.Ctx() + // Limit the memory use by creating a child monitor with a hard limit. + // windower will overflow to disk if this limit is not enough. + limit := execinfra.GetWorkMemLimit(flowCtx) + if limit < memRequiredByWindower { + // The limit is set very low (likely by the tests in order to improve + // the test coverage), but the windower requires some amount of RAM, so + // we override the limit. This behavior is acceptable given that we + // don't expect anyone to lower the setting to less than 100KiB in + // production. + limit = memRequiredByWindower + } + limitedMon := mon.NewMonitorInheritWithLimit("windower-limited", limit, evalCtx.Mon) + limitedMon.StartNoReserved(ctx, evalCtx.Mon) + w.acc = limitedMon.MakeBoundAccount() + // If we have aggregate builtins that aggregate a single datum, we want + // them to reuse the same shared memory account with the windower. Notably, + // we need to update the eval context before constructing the window + // builtins. + evalCtx.SingleDatumAggMemAccount = &w.acc + w.partitionBy = spec.PartitionBy windowFns := spec.WindowFns w.windowFns = make([]*windowFunc, 0, len(windowFns)) @@ -145,20 +165,6 @@ func newWindower( } w.outputRow = make(rowenc.EncDatumRow, len(w.outputTypes)) - // Limit the memory use by creating a child monitor with a hard limit. - // windower will overflow to disk if this limit is not enough. - limit := execinfra.GetWorkMemLimit(flowCtx) - if limit < memRequiredByWindower { - // The limit is set very low (likely by the tests in order to improve - // the test coverage), but the windower requires some amount of RAM, so - // we override the limit. This behavior is acceptable given that we - // don't expect anyone to lower the setting to less than 100KiB in - // production. - limit = memRequiredByWindower - } - limitedMon := mon.NewMonitorInheritWithLimit("windower-limited", limit, evalCtx.Mon) - limitedMon.StartNoReserved(ctx, evalCtx.Mon) - if err := w.InitWithEvalCtx( w, post, @@ -191,11 +197,6 @@ func newWindower( return nil, err } - w.acc = w.MemMonitor.MakeBoundAccount() - // If we have aggregate builtins that aggregate a single datum, we want - // them to reuse the same shared memory account with the windower. - evalCtx.SingleDatumAggMemAccount = &w.acc - if execstats.ShouldCollectStats(ctx, flowCtx.CollectStats) { w.input = newInputStatCollector(w.input) w.ExecStatsForTrace = w.execStatsForTrace