From a783b598e19121eb6d5e8e233755db48c34c998b Mon Sep 17 00:00:00 2001 From: Marcus Gartner Date: Tue, 5 Aug 2025 12:43:42 -0400 Subject: [PATCH 1/2] opt: estimate worst-case selectivity of placeholder equalities Previously, we calculated the selectivity of placeholder equality filters, e.g., `x = $1`, using the distinct count of a column and total row count. This represented an average-case selectivity. Now, we instead estimate the worst-case selectivity using the maximum frequency of the histogram of the constrained column. This helps avoid choosing a generic query plan under `plan_cache_mode=auto` that performs poorly for heavy-hitter placeholder values. Fixes #151373 Release note (performance improvement): The cost of generic query plans is now calculated based on worst-case selectivities for placeholder equalities (e.g., x = $1). This reduces the chance of suboptimal generic query plans being chosen when `plan_cache_mode=auto`. --- .../exec/execbuilder/testdata/explain_redact | 4 +- pkg/sql/opt/memo/expr_format.go | 8 + pkg/sql/opt/memo/statistics_builder.go | 181 +++++++--- pkg/sql/opt/memo/testdata/stats/generic | 315 +++++++++++++++++- pkg/sql/opt/ops/relational.opt | 5 + pkg/sql/opt/props/histogram.go | 13 +- pkg/sql/opt/props/histogram_test.go | 4 +- pkg/sql/opt/props/selectivity.go | 12 +- pkg/sql/opt/xform/generic_funcs.go | 15 +- pkg/sql/opt/xform/rules/generic.opt | 3 +- pkg/sql/opt/xform/testdata/external/hibernate | 2 + pkg/sql/opt/xform/testdata/external/nova | 10 + pkg/sql/opt/xform/testdata/rules/generic | 16 + pkg/sql/opt/xform/testdata/rules/groupby | 4 +- pkg/sql/opt/xform/testdata/rules/join | 18 +- pkg/sql/opt/xform/testdata/rules/join_order | 12 +- pkg/sql/opt/xform/testdata/rules/select | 2 +- 17 files changed, 547 insertions(+), 77 deletions(-) diff --git a/pkg/sql/opt/exec/execbuilder/testdata/explain_redact b/pkg/sql/opt/exec/execbuilder/testdata/explain_redact index a47a9f711a96..18c543497b34 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/explain_redact +++ b/pkg/sql/opt/exec/execbuilder/testdata/explain_redact @@ -678,7 +678,7 @@ upsert bc query T EXPLAIN (OPT, MEMO, REDACT) INSERT INTO bc SELECT a::float + 1 FROM a ON CONFLICT (b) DO UPDATE SET b = bc.b + 100 ---- -memo (optimized, ~36KB, required=[presentation: info:25] [distribution: test]) +memo (optimized, ~37KB, required=[presentation: info:25] [distribution: test]) ├── G1: (explain G2 [distribution: test]) │ └── [presentation: info:25] [distribution: test] │ ├── best: (explain G2="[distribution: test]" [distribution: test]) @@ -2435,7 +2435,7 @@ project query T EXPLAIN (OPT, MEMO, REDACT) SELECT * FROM bc JOIN f ON b = f + 1 ---- -memo (optimized, ~28KB, required=[presentation: info:14] [distribution: test]) +memo (optimized, ~29KB, required=[presentation: info:14] [distribution: test]) ├── G1: (explain G2 [presentation: b:1,c:2,f:7] [distribution: test]) │ └── [presentation: info:14] [distribution: test] │ ├── best: (explain G2="[presentation: b:1,c:2,f:7] [distribution: test]" [presentation: b:1,c:2,f:7] [distribution: test]) diff --git a/pkg/sql/opt/memo/expr_format.go b/pkg/sql/opt/memo/expr_format.go index 7fab626444a2..540204138a63 100644 --- a/pkg/sql/opt/memo/expr_format.go +++ b/pkg/sql/opt/memo/expr_format.go @@ -885,12 +885,20 @@ func (f *ExprFmtCtx) formatRelational(e RelExpr, tp treeprinter.Node) { if relational.HasPlaceholder { writeFlag("has-placeholder") } + if p, ok := e.Private().(*JoinPrivate); ok { + if !p.ParameterizedCols.Empty() { + tp.Childf("parameterized columns: %s", p.ParameterizedCols) + } + } if lookupJoin, ok := e.(*LookupJoinExpr); ok { // For lookup joins, indicate whether reverse scans are required to // satisfy the ordering. if lookupJoinMustUseReverseScans(md, lookupJoin, &required.Ordering) { writeFlag("reverse-scans") } + if !lookupJoin.ParameterizedCols.Empty() { + tp.Childf("parameterized columns: %s", lookupJoin.ParameterizedCols) + } } if f.Buffer.Len() != 0 { diff --git a/pkg/sql/opt/memo/statistics_builder.go b/pkg/sql/opt/memo/statistics_builder.go index 4b70d1f69b37..402ff5a50e94 100644 --- a/pkg/sql/opt/memo/statistics_builder.go +++ b/pkg/sql/opt/memo/statistics_builder.go @@ -1031,8 +1031,11 @@ func (sb *statisticsBuilder) constrainScan( // Calculate distinct counts and histograms for the partial index predicate // ------------------------------------------------------------------------ if pred != nil { - predConstrainedCols, predHistCols := + predConstrainedCols, predHistCols, maxFreqCols := sb.applyFilters(pred, scan, relProps, false /* skipOrTermAccounting */, &unapplied) + if !maxFreqCols.Empty() { + panic(errors.AssertionFailedf("unexpected placeholder equality columns in partial index predicate")) + } constrainedCols.UnionWith(predConstrainedCols) constrainedCols = sb.tryReduceCols(constrainedCols, s, MakeTableFuncDep(sb.md, scan.Table)) histCols.UnionWith(predHistCols) @@ -1056,7 +1059,7 @@ func (sb *statisticsBuilder) constrainScan( // Calculate row count and selectivity // ----------------------------------- corr := sb.correlationFromMultiColDistinctCounts(constrainedCols, scan, s) - s.ApplySelectivity(sb.selectivityFromConstrainedCols(constrainedCols, histCols, scan, s, corr)) + s.ApplySelectivity(sb.selectivityFromConstrainedCols(constrainedCols, histCols, opt.ColSet{}, scan, s, corr)) s.ApplySelectivity(sb.selectivityFromUnappliedConjuncts(unapplied)) s.ApplySelectivity(sb.selectivityFromNullsRemoved(scan, notNullCols, constrainedCols)) } @@ -1260,7 +1263,7 @@ func (sb *statisticsBuilder) buildInvertedFilter( s.VirtualCols.UnionWith(inputStats.VirtualCols) corr := sb.correlationFromMultiColDistinctCounts(constrainedCols, invFilter, s) - s.ApplySelectivity(sb.selectivityFromConstrainedCols(constrainedCols, histCols, invFilter, s, corr)) + s.ApplySelectivity(sb.selectivityFromConstrainedCols(constrainedCols, histCols, opt.ColSet{}, invFilter, s, corr)) s.ApplySelectivity(sb.selectivityFromNullsRemoved(invFilter, relProps.NotNullCols, constrainedCols)) sb.finalizeFromCardinality(relProps) @@ -1385,7 +1388,7 @@ func (sb *statisticsBuilder) buildJoin( // Calculate distinct counts for constrained columns in the ON conditions // ---------------------------------------------------------------------- var unapplied filterCount - constrainedCols, histCols := + constrainedCols, histCols, maxFreqCols := sb.applyFilters(h.filters, join, relProps, true /* skipOrTermAccounting */, &unapplied) // Try to reduce the number of columns used for selectivity @@ -1442,7 +1445,7 @@ func (sb *statisticsBuilder) buildJoin( s.ApplySelectivity(sb.selectivityFromInvertedJoinCondition(join, s)) } corr := sb.correlationFromMultiColDistinctCountsForJoin(constrainedCols, leftCols, rightCols, join, s) - s.ApplySelectivity(sb.selectivityFromConstrainedCols(constrainedCols, histCols, join, s, corr)) + s.ApplySelectivity(sb.selectivityFromConstrainedCols(constrainedCols, histCols, maxFreqCols, join, s, corr)) s.ApplySelectivity(sb.selectivityFromUnappliedConjuncts(unapplied)) // Ignore columns that are already null in the input when calculating @@ -1938,7 +1941,7 @@ func (sb *statisticsBuilder) buildZigzagJoin( // to iterate through FixedCols here if we are already processing the ON // clause. var unapplied filterCount - constrainedCols, histCols := + constrainedCols, histCols, maxFreqCols := sb.applyFilters(zigzag.On, zigzag, relProps, false /* skipOrTermAccounting */, &unapplied) // Application of constraints on inverted indexes needs to be handled a @@ -1990,7 +1993,7 @@ func (sb *statisticsBuilder) buildZigzagJoin( // TODO(msirek): Validate stats for inverted index zigzag join match // non-zigzag join stats. corr := sb.correlationFromMultiColDistinctCounts(constrainedCols, zigzag, s) - s.ApplySelectivity(sb.selectivityFromConstrainedCols(constrainedCols, histCols, zigzag, s, corr)) + s.ApplySelectivity(sb.selectivityFromConstrainedCols(constrainedCols, histCols, maxFreqCols, zigzag, s, corr)) } else { multiColSelectivity, _, _ := sb.selectivityFromMultiColDistinctCounts(constrainedCols, zigzag, s) s.ApplySelectivity(multiColSelectivity) @@ -3398,7 +3401,7 @@ func (sb *statisticsBuilder) filterRelExpr( // Calculate distinct counts and histograms for constrained columns // ---------------------------------------------------------------- var unapplied filterCount - constrainedCols, histCols := + constrainedCols, histCols, maxFreqCols := sb.applyFilters(filters, e, relProps, false /* skipOrTermAccounting */, &unapplied) // Try to reduce the number of columns used for selectivity @@ -3412,7 +3415,7 @@ func (sb *statisticsBuilder) filterRelExpr( // Calculate row count and selectivity // ----------------------------------- corr := sb.correlationFromMultiColDistinctCounts(constrainedCols, e, s) - s.ApplySelectivity(sb.selectivityFromConstrainedCols(constrainedCols, histCols, e, s, corr)) + s.ApplySelectivity(sb.selectivityFromConstrainedCols(constrainedCols, histCols, maxFreqCols, e, s, corr)) s.ApplySelectivity(sb.selectivityFromEquivalencies(equivReps, &relProps.FuncDeps, e, s)) s.ApplySelectivity(sb.selectivityFromUnappliedConjuncts(unapplied)) s.ApplySelectivity(sb.selectivityFromNullsRemoved(e, notNullCols, constrainedCols)) @@ -3436,7 +3439,7 @@ func (sb *statisticsBuilder) applyFilters( relProps *props.Relational, skipOrTermAccounting bool, unapplied *filterCount, -) (constrainedCols, histCols opt.ColSet) { +) (constrainedCols, histCols, maxFreqCols opt.ColSet) { // Special hack for inverted joins. Add constant filters from the equality // conditions. // TODO(rytaft): the correct way to do this is probably to fully implement @@ -3449,7 +3452,7 @@ func (sb *statisticsBuilder) applyFilters( for i := range filters { var unappliedLocal filterCount - constrainedColsLocal, histColsLocal := + constrainedColsLocal, histColsLocal, maxFreqColsLocal := sb.applyFiltersItem(&filters[i], e, relProps, &unappliedLocal) // Selectivity from OrExprs is computed elsewhere when skipOrTermAccounting // is true. @@ -3458,9 +3461,10 @@ func (sb *statisticsBuilder) applyFilters( } constrainedCols.UnionWith(constrainedColsLocal) histCols.UnionWith(histColsLocal) + maxFreqCols.UnionWith(maxFreqColsLocal) } - return constrainedCols, histCols + return constrainedCols, histCols, maxFreqCols } // applyFiltersItem uses constraints to update the distinct counts and @@ -3478,6 +3482,10 @@ func (sb *statisticsBuilder) applyFilters( // updateDistinctCountsFromConstraint for more details about how distinct // counts are calculated from constraints. // +// Equalities between a variable and a placeholder, or between a variable and +// another variable that is equivalent to a placeholder are handled separately. +// These columns are included in maxFreqCols. See selectivityFromMaxFrequencies. +// // Equalities between two variables (e.g., var1=var2) are handled separately. // See applyEquivalencies and selectivityFromEquivalencies for details. // @@ -3485,7 +3493,7 @@ func (sb *statisticsBuilder) applyFilters( // selectivityFromInvertedJoinCondition. func (sb *statisticsBuilder) applyFiltersItem( filter *FiltersItem, e RelExpr, relProps *props.Relational, unapplied *filterCount, -) (constrainedCols, histCols opt.ColSet) { +) (constrainedCols, histCols, maxFreqCols opt.ColSet) { s := relProps.Statistics() // Before checking anything, try to replace any virtual computed column @@ -3501,29 +3509,29 @@ func (sb *statisticsBuilder) applyFiltersItem( } } + // Special case: a placeholder equality filter. + if col, ok := isPlaceholderEqualityFilter(filter.Condition, e); ok { + cols := opt.MakeColSet(col) + sb.ensureColStat(cols, 1 /* maxDistinctCount */, e, s) + return cols, opt.ColSet{}, cols + } + if isEqualityWithTwoVars(filter.Condition) { // Equalities are handled by applyEquivalencies. - return opt.ColSet{}, opt.ColSet{} + return opt.ColSet{}, opt.ColSet{}, opt.ColSet{} } // Special case: a trigram similarity filter. if isSimilarityFilter(filter.Condition) && sb.evalCtx.SessionData().OptimizerUseImprovedTrigramSimilaritySelectivity { unapplied.similarity++ - return opt.ColSet{}, opt.ColSet{} - } - - // Special case: a placeholder equality filter. - if col, ok := isPlaceholderEqualityFilter(filter.Condition); ok { - cols := opt.MakeColSet(col) - sb.ensureColStat(cols, 1 /* maxDistinctCount */, e, s) - return cols, opt.ColSet{} + return opt.ColSet{}, opt.ColSet{}, opt.ColSet{} } // Special case: The current conjunct is an inverted join condition which is // handled by selectivityFromInvertedJoinCondition. if isInvertedJoinCond(filter.Condition) { - return opt.ColSet{}, opt.ColSet{} + return opt.ColSet{}, opt.ColSet{}, opt.ColSet{} } // Special case: The current conjunct is a JSON or Array Contains @@ -3550,7 +3558,7 @@ func (sb *statisticsBuilder) applyFiltersItem( // constrained scans, we apply the same logic here. unapplied.unknown += 2 * numPaths } - return opt.ColSet{}, opt.ColSet{} + return opt.ColSet{}, opt.ColSet{}, opt.ColSet{} } // Update constrainedCols after the above check for isEqualityWithTwoVars. @@ -3575,7 +3583,7 @@ func (sb *statisticsBuilder) applyFiltersItem( unapplied.unknown++ } } - return constrainedCols, histCols + return constrainedCols, histCols, opt.ColSet{} } if constraintUnion, numUnappliedDisjuncts := sb.buildDisjunctionConstraints(filter); len(constraintUnion) > 0 { @@ -3632,7 +3640,7 @@ func (sb *statisticsBuilder) applyFiltersItem( unapplied.unknown++ } - return constrainedCols, histCols + return constrainedCols, histCols, opt.ColSet{} } // buildDisjunctionConstraints returns a slice of tight constraint sets that are @@ -3713,7 +3721,9 @@ func (sb *statisticsBuilder) constrainExpr( // Calculate row count and selectivity // ----------------------------------- corr := sb.correlationFromMultiColDistinctCounts(constrainedCols, e, s) - s.ApplySelectivity(sb.selectivityFromConstrainedCols(constrainedCols, histCols, e, s, corr)) + s.ApplySelectivity(sb.selectivityFromConstrainedCols( + constrainedCols, histCols, opt.ColSet{}, e, s, corr, + )) s.ApplySelectivity(sb.selectivityFromNullsRemoved(e, notNullCols, constrainedCols)) } @@ -4559,24 +4569,79 @@ func (sb *statisticsBuilder) selectivityFromHistograms( return selectivity, selectivityUpperBound } +// selectivityFromMaxFrequencies calculates the selectivity of an equality +// filters by using the maximum frequency of the histograms of the constrained +// columns. This represents a worst-case selectivity estimate and is used to +// estimate the minimum selectivity for an equality with a placeholder while +// building a generic query plan. It returns the selectivity, the upper-bound +// selectivity, and the subset of cols for which the maximum frequency was used +// to calculate the selectivity. +func (sb *statisticsBuilder) selectivityFromMaxFrequencies( + cols opt.ColSet, e RelExpr, +) (selectivity, selectivityUpperBound props.Selectivity, maxFreqCols opt.ColSet) { + selectivity = props.OneSelectivity + selectivityUpperBound = props.OneSelectivity + for col, ok := cols.Next(0); ok; col, ok = cols.Next(col + 1) { + c := opt.MakeColSet(col) + inputColStat, inputStats := sb.colStatFromInput(c, e) + if inputColStat.Histogram == nil { + continue + } + + // Equality filters preclude NULL values. + const ignoreNulls = true + sel := props.OneSelectivity + if inputStats.RowCount > 0 { + sel = props.MakeSelectivityFromFraction( + inputColStat.Histogram.MaxFrequency(ignoreNulls), inputStats.RowCount, + ) + } + + // The maximum possible selectivity of the entire expression is the minimum + // selectivity of all individual predicates. + selectivityUpperBound = props.MinSelectivity(selectivityUpperBound, sel) + selectivity.Multiply(sel) + maxFreqCols.Add(col) + } + + return selectivity, selectivityUpperBound, maxFreqCols +} + // selectivityFromConstrainedCols calculates the selectivity from the // constrained columns. histCols is a subset of constrainedCols, and represents -// the columns that have histograms available. correlation represents the -// correlation between the columns, and is a number between 0 and 1, where 0 -// means the columns are completely independent, and 1 means the columns are -// completely correlated. +// the columns that have histograms available. maxFreqCols is a subset of +// constrained Cols, and it contains the columns for which the worst-case +// selectivity should be estimated using the histogram's max frequency. +// correlation represents the correlation between the columns, and is a number +// between 0 and 1, where 0 means the columns are completely independent, and 1 +// means the columns are completely correlated. func (sb *statisticsBuilder) selectivityFromConstrainedCols( - constrainedCols, histCols opt.ColSet, e RelExpr, s *props.Statistics, correlation float64, + constrainedCols, histCols, maxFreqCols opt.ColSet, + e RelExpr, + s *props.Statistics, + correlation float64, ) (selectivity props.Selectivity) { if buildutil.CrdbTestBuild && (correlation < 0 || correlation > 1) { panic(errors.AssertionFailedf("correlation must be between 0 and 1. Found %f", correlation)) } + // Calculate selectivity from histograms. selectivity, selectivityUpperBound := sb.selectivityFromHistograms(histCols, e, s) - selectivity2, selectivityUpperBound2 := sb.selectivityFromSingleColDistinctCounts( - constrainedCols.Difference(histCols), e, s, - ) + + // Calculate selectivity from max frequencies for columns held equal to + // placeholders. + selectivity2, selectivityUpperBound2, appliedMaxFreqCols := sb.selectivityFromMaxFrequencies(maxFreqCols, e) selectivity.Multiply(selectivity2) - selectivityUpperBound = props.MinSelectivity(selectivityUpperBound, selectivityUpperBound2) + + // Calculate selectivity from distinct counts for the remaining columns. + selectivity3, selectivityUpperBound3 := sb.selectivityFromSingleColDistinctCounts( + constrainedCols.Difference(histCols).Difference(appliedMaxFreqCols), e, s, + ) + selectivity.Multiply(selectivity3) + + // Find the minimum upper bound selectivity. + selectivityUpperBound = + props.MinSelectivity3(selectivityUpperBound, selectivityUpperBound2, selectivityUpperBound3) + selectivity.Add(props.MakeSelectivity( correlation * (selectivityUpperBound.AsFloat() - selectivity.AsFloat()), )) @@ -4982,12 +5047,44 @@ func isSimilarityFilter(e opt.ScalarExpr) bool { } // isPlaceholderEqualityFilter returns a column ID and true if the given -// condition is an equality between a column and a placeholder. -func isPlaceholderEqualityFilter(e opt.ScalarExpr) (opt.ColumnID, bool) { - if e.Op() == opt.EqOp && e.Child(1).Op() == opt.PlaceholderOp { - if v, ok := e.Child(0).(*VariableExpr); ok { - return v.Col, true - } +// condition is an equality between a column and a placeholder. There are two +// cases. +// +// 1. A column is directly compared to a placeholder, e.g. v=$1. +// +// 2. A column is compared to another column that is a "parameterized column", +// i.e., it is equivalent to a placeholder. Parameterized columns exist in +// generic query plans and are produced from a Values expression on the RHS of +// an inner-join. See the GenerateParameterizedJoin exploration rule. +func isPlaceholderEqualityFilter(filter opt.ScalarExpr, e RelExpr) (opt.ColumnID, bool) { + if filter.Op() != opt.EqOp { + return 0, false + } + + v, ok := filter.Child(0).(*VariableExpr) + if !ok { + return 0, false + } + + // Case 1. + if filter.Child(1).Op() == opt.PlaceholderOp { + return v.Col, true + } + + // Case 2. + p, ok := filter.Child(1).(*VariableExpr) + if !ok { + return 0, false + } + var parameterizedCols opt.ColSet + switch priv := e.Private().(type) { + case *JoinPrivate: + parameterizedCols = priv.ParameterizedCols + case *LookupJoinPrivate: + parameterizedCols = priv.ParameterizedCols + } + if parameterizedCols.Contains(p.Col) { + return v.Col, true } return 0, false } diff --git a/pkg/sql/opt/memo/testdata/stats/generic b/pkg/sql/opt/memo/testdata/stats/generic index c9c267b257cd..9a8a2ea60742 100644 --- a/pkg/sql/opt/memo/testdata/stats/generic +++ b/pkg/sql/opt/memo/testdata/stats/generic @@ -2,10 +2,15 @@ exec-ddl CREATE TABLE t ( k INT PRIMARY KEY, i INT, - s STRING + s STRING, + INDEX (i) ) ---- +# ------------------------ +# Tests without Histograms +# ------------------------ + exec-ddl ALTER TABLE t INJECT STATISTICS '[ { @@ -166,3 +171,311 @@ select │ └── fd: (1)-->(2,3) └── filters └── (i:2 = $1) OR (s:3 = $2) [type=bool, outer=(2,3)] + +# --------------------- +# Tests with Histograms +# --------------------- + +exec-ddl +ALTER TABLE t INJECT STATISTICS '[ + { + "columns": ["k"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 1000, + "distinct_count": 1000 + }, + { + "columns": ["i"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 1000, + "distinct_count": 41, + "null_count": 30, + "avg_size": 2, + "histo_col_type": "int", + "histo_buckets": [ + {"num_eq": 0, "num_range": 0, "distinct_range": 0, "upper_bound": "0"}, + {"num_eq": 10, "num_range": 90, "distinct_range": 9, "upper_bound": "100"}, + {"num_eq": 10, "num_range": 180, "distinct_range": 9, "upper_bound": "200"}, + {"num_eq": 20, "num_range": 270, "distinct_range": 9, "upper_bound": "300"}, + {"num_eq": 30, "num_range": 360, "distinct_range": 9, "upper_bound": "400"} + ] + }, + { + "columns": ["s"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 1000, + "distinct_count": 20, + "avg_size": 3, + "histo_col_type": "string", + "histo_buckets": [ + {"num_eq": 0, "num_range": 0, "distinct_range": 0, "upper_bound": "apple"}, + {"num_eq": 300, "num_range": 100, "distinct_range": 9, "upper_bound": "banana"}, + {"num_eq": 500, "num_range": 100, "distinct_range": 9, "upper_bound": "cherry"} + ] + } +]' +---- + +norm +SELECT * FROM t WHERE k = $1 +---- +select + ├── columns: k:1(int!null) i:2(int) s:3(string) + ├── cardinality: [0 - 1] + ├── has-placeholder + ├── stats: [rows=1, distinct(1)=1, null(1)=0] + ├── key: () + ├── fd: ()-->(1-3) + ├── scan t + │ ├── columns: k:1(int!null) i:2(int) s:3(string) + │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0] + │ ├── key: (1) + │ └── fd: (1)-->(2,3) + └── filters + └── k:1 = $1 [type=bool, outer=(1), constraints=(/1: (/NULL - ]), fd=()-->(1)] + +# The row count of the filter is the max frequency of i's histogram. +norm +SELECT * FROM t WHERE i = $1 +---- +select + ├── columns: k:1(int!null) i:2(int!null) s:3(string) + ├── has-placeholder + ├── stats: [rows=30, distinct(2)=1, null(2)=0] + ├── key: (1) + ├── fd: ()-->(2), (1)-->(3) + ├── scan t + │ ├── columns: k:1(int!null) i:2(int) s:3(string) + │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0, distinct(2)=41, null(2)=30] + │ │ histogram(2)= 0 30 0 0 90 10 180 10 270 20 360 30 + │ │ <--- NULL --- 0 ---- 100 ----- 200 ----- 300 ----- 400 + │ ├── key: (1) + │ └── fd: (1)-->(2,3) + └── filters + └── i:2 = $1 [type=bool, outer=(2), constraints=(/2: (/NULL - ]), fd=()-->(2)] + +# Similar case as above, but with opt to ensure the correct row counts are used +# for new memo groups. +opt +SELECT k FROM t WHERE i = $1 +---- +project + ├── columns: k:1(int!null) + ├── has-placeholder + ├── stats: [rows=30] + ├── key: (1) + └── project + ├── columns: k:1(int!null) i:2(int!null) + ├── has-placeholder + ├── stats: [rows=30, distinct(2)=1, null(2)=0] + ├── key: (1) + ├── fd: ()-->(2) + └── inner-join (lookup t@t_i_idx) + ├── columns: k:1(int!null) i:2(int!null) "$1":6(int!null) + ├── flags: disallow merge join + ├── key columns: [6] = [2] + ├── parameterized columns: (6) + ├── has-placeholder + ├── stats: [rows=30, distinct(2)=1, null(2)=0, distinct(6)=1, null(6)=0] + ├── key: (1) + ├── fd: ()-->(2,6), (2)==(6), (6)==(2) + ├── values + │ ├── columns: "$1":6(int) + │ ├── cardinality: [1 - 1] + │ ├── has-placeholder + │ ├── stats: [rows=1, distinct(6)=1, null(6)=0] + │ ├── key: () + │ ├── fd: ()-->(6) + │ └── ($1,) [type=tuple{int}] + └── filters (true) + +# Similar case as above, but with opt to ensure the correct row counts are used +# for new memo groups. +opt +SELECT * FROM t WHERE i = $1 +---- +project + ├── columns: k:1(int!null) i:2(int!null) s:3(string) + ├── has-placeholder + ├── stats: [rows=30, distinct(2)=1, null(2)=0] + ├── key: (1) + ├── fd: ()-->(2), (1)-->(3) + └── inner-join (lookup t) + ├── columns: k:1(int!null) i:2(int!null) s:3(string) "$1":6(int!null) + ├── key columns: [1] = [1] + ├── lookup columns are key + ├── has-placeholder + ├── stats: [rows=30, distinct(2)=1, null(2)=0, distinct(6)=1, null(6)=0] + ├── key: (1) + ├── fd: ()-->(2,6), (1)-->(3), (2)==(6), (6)==(2) + ├── inner-join (lookup t@t_i_idx) + │ ├── columns: k:1(int!null) i:2(int!null) "$1":6(int!null) + │ ├── flags: disallow merge join + │ ├── key columns: [6] = [2] + │ ├── parameterized columns: (6) + │ ├── has-placeholder + │ ├── stats: [rows=30, distinct(2)=1, null(2)=0, distinct(6)=1, null(6)=0] + │ ├── key: (1) + │ ├── fd: ()-->(2,6), (2)==(6), (6)==(2) + │ ├── values + │ │ ├── columns: "$1":6(int) + │ │ ├── cardinality: [1 - 1] + │ │ ├── has-placeholder + │ │ ├── stats: [rows=1, distinct(6)=1, null(6)=0] + │ │ ├── key: () + │ │ ├── fd: ()-->(6) + │ │ └── ($1,) [type=tuple{int}] + │ └── filters (true) + └── filters (true) + +# The row count of the filter is the max frequency of s's histogram. +norm +SELECT * FROM t WHERE $1 = s +---- +select + ├── columns: k:1(int!null) i:2(int) s:3(string!null) + ├── has-placeholder + ├── stats: [rows=500, distinct(3)=1, null(3)=0] + ├── key: (1) + ├── fd: ()-->(3), (1)-->(2) + ├── scan t + │ ├── columns: k:1(int!null) i:2(int) s:3(string) + │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0, distinct(3)=20, null(3)=0] + │ │ histogram(3)= 0 0 100 300 100 500 + │ │ <--- 'apple' ----- 'banana' ----- 'cherry' + │ ├── key: (1) + │ └── fd: (1)-->(2,3) + └── filters + └── s:3 = $1 [type=bool, outer=(3), constraints=(/3: (/NULL - ]), fd=()-->(3)] + +# Similar case to the previous one, but with a join on a values expression to +# mimic a parameterized join of a generic query plan. +# TODO(mgartner): The row count of the inner-join should be 500, because that is +# the maximum frequency of s. It is currently 50 because the v.s is not marked +# as a "parameterized column", which only happens during the +# GenerateParameterizedJoin exploration rule. I think we can address this by +# including paramterized columns in logical properties and propagating them +# upward. +norm +SELECT * FROM (VALUES ($1::STRING)) v(s) JOIN t ON t.s = v.s +---- +inner-join (hash) + ├── columns: s:1(string!null) k:2(int!null) i:3(int) s:4(string!null) + ├── multiplicity: left-rows(zero-or-more), right-rows(zero-or-one) + ├── has-placeholder + ├── stats: [rows=50, distinct(1)=1, null(1)=0, distinct(4)=1, null(4)=0] + ├── key: (2) + ├── fd: ()-->(1,4), (2)-->(3), (1)==(4), (4)==(1) + ├── values + │ ├── columns: column1:1(string) + │ ├── cardinality: [1 - 1] + │ ├── has-placeholder + │ ├── stats: [rows=1, distinct(1)=1, null(1)=0] + │ ├── key: () + │ ├── fd: ()-->(1) + │ └── ($1,) [type=tuple{string}] + ├── scan t + │ ├── columns: k:2(int!null) i:3(int) s:4(string) + │ ├── stats: [rows=1000, distinct(4)=20, null(4)=0] + │ │ histogram(4)= 0 0 100 300 100 500 + │ │ <--- 'apple' ----- 'banana' ----- 'cherry' + │ ├── key: (2) + │ └── fd: (2)-->(3,4) + └── filters + └── s:4 = column1:1 [type=bool, outer=(1,4), constraints=(/1: (/NULL - ]; /4: (/NULL - ]), fd=(1)==(4), (4)==(1)] + +# The row count of the filter is based on the product of selectivities from the +# max frequencies of i's and s's histograms. +norm +SELECT * FROM t WHERE i = $1 AND s = $2 +---- +select + ├── columns: k:1(int!null) i:2(int!null) s:3(string!null) + ├── has-placeholder + ├── stats: [rows=15, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(2,3)=1, null(2,3)=0] + ├── key: (1) + ├── fd: ()-->(2,3) + ├── scan t + │ ├── columns: k:1(int!null) i:2(int) s:3(string) + │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0, distinct(2)=41, null(2)=30, distinct(3)=20, null(3)=0, distinct(2,3)=820, null(2,3)=0] + │ │ histogram(2)= 0 30 0 0 90 10 180 10 270 20 360 30 + │ │ <--- NULL --- 0 ---- 100 ----- 200 ----- 300 ----- 400 + │ │ histogram(3)= 0 0 100 300 100 500 + │ │ <--- 'apple' ----- 'banana' ----- 'cherry' + │ ├── key: (1) + │ └── fd: (1)-->(2,3) + └── filters + ├── i:2 = $1 [type=bool, outer=(2), constraints=(/2: (/NULL - ]), fd=()-->(2)] + └── s:3 = $2 [type=bool, outer=(3), constraints=(/3: (/NULL - ]), fd=()-->(3)] + +norm +SELECT * FROM t WHERE i > $1 +---- +select + ├── columns: k:1(int!null) i:2(int!null) s:3(string) + ├── has-placeholder + ├── stats: [rows=323.333, distinct(2)=41, null(2)=0] + ├── key: (1) + ├── fd: (1)-->(2,3) + ├── scan t + │ ├── columns: k:1(int!null) i:2(int) s:3(string) + │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0, distinct(2)=41, null(2)=30] + │ │ histogram(2)= 0 30 0 0 90 10 180 10 270 20 360 30 + │ │ <--- NULL --- 0 ---- 100 ----- 200 ----- 300 ----- 400 + │ ├── key: (1) + │ └── fd: (1)-->(2,3) + └── filters + └── i:2 > $1 [type=bool, outer=(2), constraints=(/2: (/NULL - ])] + +norm +SELECT * FROM t WHERE i = $1 OR i = $2 +---- +select + ├── columns: k:1(int!null) i:2(int!null) s:3(string) + ├── has-placeholder + ├── stats: [rows=323.333, distinct(2)=41, null(2)=0] + ├── key: (1) + ├── fd: (1)-->(2,3) + ├── scan t + │ ├── columns: k:1(int!null) i:2(int) s:3(string) + │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0, distinct(2)=41, null(2)=30] + │ │ histogram(2)= 0 30 0 0 90 10 180 10 270 20 360 30 + │ │ <--- NULL --- 0 ---- 100 ----- 200 ----- 300 ----- 400 + │ ├── key: (1) + │ └── fd: (1)-->(2,3) + └── filters + └── (i:2 = $1) OR (i:2 = $2) [type=bool, outer=(2), constraints=(/2: (/NULL - ])] + +norm +SELECT * FROM t WHERE i IN ($1, $2, $3) +---- +select + ├── columns: k:1(int!null) i:2(int) s:3(string) + ├── has-placeholder + ├── stats: [rows=333.333] + ├── key: (1) + ├── fd: (1)-->(2,3) + ├── scan t + │ ├── columns: k:1(int!null) i:2(int) s:3(string) + │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0] + │ ├── key: (1) + │ └── fd: (1)-->(2,3) + └── filters + └── i:2 IN ($1, $2, $3) [type=bool, outer=(2)] + +norm +SELECT * FROM t WHERE i = $1 OR s = $2 +---- +select + ├── columns: k:1(int!null) i:2(int) s:3(string) + ├── has-placeholder + ├── stats: [rows=333.333] + ├── key: (1) + ├── fd: (1)-->(2,3) + ├── scan t + │ ├── columns: k:1(int!null) i:2(int) s:3(string) + │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0] + │ ├── key: (1) + │ └── fd: (1)-->(2,3) + └── filters + └── (i:2 = $1) OR (s:3 = $2) [type=bool, outer=(2,3)] diff --git a/pkg/sql/opt/ops/relational.opt b/pkg/sql/opt/ops/relational.opt index f5322e00bc0f..871f2eb603ee 100644 --- a/pkg/sql/opt/ops/relational.opt +++ b/pkg/sql/opt/ops/relational.opt @@ -307,6 +307,11 @@ define JoinPrivate { # SkipReorderJoins indicates whether the ReorderJoins rule should match this # join. SkipReorderJoins bool + + # ParameterizedCols is the set of columns that are equivalent to placeholder + # values. These columns are typically created when exploring parameterized + # joins for generic query plans. + ParameterizedCols ColSet } # IndexJoin represents an inner join between an input expression and a primary diff --git a/pkg/sql/opt/props/histogram.go b/pkg/sql/opt/props/histogram.go index 80075b80673d..3cda530166c4 100644 --- a/pkg/sql/opt/props/histogram.go +++ b/pkg/sql/opt/props/histogram.go @@ -250,9 +250,18 @@ func (h *Histogram) maxDistinctValuesCount() float64 { } // MaxFrequency returns the maximum value of NumEq across all histogram buckets. -func (h *Histogram) MaxFrequency() float64 { +// ignoreNulls controls whether the "fake" bucket that we create for NULLs (if +// any are present) should be ignored. +func (h *Histogram) MaxFrequency(ignoreNulls bool) float64 { + if len(h.buckets) == 0 { + return 0 + } + var startIdx int + if ignoreNulls && h.buckets[0].UpperBound == tree.DNull { + startIdx = 1 + } var mf float64 - for i := range h.buckets { + for i := startIdx; i < len(h.buckets); i++ { if numEq := h.numEq(i); numEq > mf { mf = numEq } diff --git a/pkg/sql/opt/props/histogram_test.go b/pkg/sql/opt/props/histogram_test.go index f5153ec6d36f..1d6d8a86efab 100644 --- a/pkg/sql/opt/props/histogram_test.go +++ b/pkg/sql/opt/props/histogram_test.go @@ -183,7 +183,7 @@ func TestHistogram(t *testing.T) { if distinct != expected { t.Fatalf("expected %f but found %f", expected, distinct) } - maxFrequency, expected := h.MaxFrequency(), float64(35) + maxFrequency, expected := h.MaxFrequency(false /* ignoreNulls */), float64(35) if maxFrequency != expected { t.Fatalf("expected %f but found %f", expected, maxFrequency) } @@ -391,7 +391,7 @@ func TestHistogram(t *testing.T) { if testData[i].distinct != distinct { t.Fatalf("expected %f but found %f", testData[i].distinct, distinct) } - maxFrequency := roundVal(filtered.MaxFrequency()) + maxFrequency := roundVal(filtered.MaxFrequency(false /* ignoreNulls */)) if testData[i].maxFrequency != maxFrequency { t.Fatalf("expected %f but found %f", testData[i].maxFrequency, maxFrequency) } diff --git a/pkg/sql/opt/props/selectivity.go b/pkg/sql/opt/props/selectivity.go index 2e1a8e0a7a72..9a593946d883 100644 --- a/pkg/sql/opt/props/selectivity.go +++ b/pkg/sql/opt/props/selectivity.go @@ -89,10 +89,16 @@ func (s *Selectivity) Divide(other Selectivity) { // MinSelectivity returns the smaller value of two selectivities. func MinSelectivity(a, b Selectivity) Selectivity { - if a.selectivity < b.selectivity { - return a + return Selectivity{ + selectivity: min(a.selectivity, b.selectivity), + } +} + +// MinSelectivity3 returns the smallest value of three selectivities. +func MinSelectivity3(a, b, c Selectivity) Selectivity { + return Selectivity{ + selectivity: min(a.selectivity, b.selectivity, c.selectivity), } - return b } // MaxSelectivity returns the larger value of two selectivities. diff --git a/pkg/sql/opt/xform/generic_funcs.go b/pkg/sql/opt/xform/generic_funcs.go index 0bc3e0ea6c6f..2b5fe981ecd7 100644 --- a/pkg/sql/opt/xform/generic_funcs.go +++ b/pkg/sql/opt/xform/generic_funcs.go @@ -36,7 +36,7 @@ func (c *CustomFuncs) HasPlaceholdersOrStableExprs(e memo.RelExpr) bool { // placeholders or stable expressions, ok=false is returned. func (c *CustomFuncs) GenerateParameterizedJoinValuesAndFilters( filters memo.FiltersExpr, -) (values memo.RelExpr, newFilters memo.FiltersExpr, ok bool) { +) (values memo.RelExpr, newFilters memo.FiltersExpr, parameterizedCols opt.ColSet, ok bool) { var exprs memo.ScalarListExpr var cols opt.ColList placeholderCols := make(map[tree.PlaceholderIdx]opt.ColumnID) @@ -58,6 +58,7 @@ func (c *CustomFuncs) GenerateParameterizedJoinValuesAndFilters( placeholderCols[idx] = col exprs = append(exprs, t) cols = append(cols, col) + parameterizedCols.Add(col) return c.e.f.ConstructVariable(col) case *memo.FunctionExpr: @@ -70,6 +71,7 @@ func (c *CustomFuncs) GenerateParameterizedJoinValuesAndFilters( col := c.e.f.Metadata().AddColumn("", t.DataType()) exprs = append(exprs, t) cols = append(cols, col) + parameterizedCols.Add(col) return c.e.f.ConstructVariable(col) } } @@ -97,7 +99,7 @@ func (c *CustomFuncs) GenerateParameterizedJoinValuesAndFilters( // If no placeholders or stable expressions were replaced, there is nothing // to do. if len(exprs) == 0 { - return nil, nil, false + return nil, nil, opt.ColSet{}, false } // Create the Values expression with one row and one column for each @@ -113,14 +115,15 @@ func (c *CustomFuncs) GenerateParameterizedJoinValuesAndFilters( ID: c.e.f.Metadata().NextUniqueID(), }) - return values, newFilters, true + return values, newFilters, parameterizedCols, true } // ParameterizedJoinPrivate returns JoinPrivate that disabled join reordering and // merge join exploration. -func (c *CustomFuncs) ParameterizedJoinPrivate() *memo.JoinPrivate { +func (c *CustomFuncs) ParameterizedJoinPrivate(parameterizedCols opt.ColSet) *memo.JoinPrivate { return &memo.JoinPrivate{ - Flags: memo.DisallowMergeJoin, - SkipReorderJoins: true, + Flags: memo.DisallowMergeJoin, + ParameterizedCols: parameterizedCols, + SkipReorderJoins: true, } } diff --git a/pkg/sql/opt/xform/rules/generic.opt b/pkg/sql/opt/xform/rules/generic.opt index 4b89e4ab00c9..1401b3e7490b 100644 --- a/pkg/sql/opt/xform/rules/generic.opt +++ b/pkg/sql/opt/xform/rules/generic.opt @@ -38,6 +38,7 @@ ( $values $newFilters + $parameterizedCols $ok ):(GenerateParameterizedJoinValuesAndFilters $filters @@ -51,7 +52,7 @@ $values $scan $newFilters - (ParameterizedJoinPrivate) + (ParameterizedJoinPrivate $parameterizedCols) ) [] (OutputCols (Root)) diff --git a/pkg/sql/opt/xform/testdata/external/hibernate b/pkg/sql/opt/xform/testdata/external/hibernate index 5e816bb80564..011402401cdb 100644 --- a/pkg/sql/opt/xform/testdata/external/hibernate +++ b/pkg/sql/opt/xform/testdata/external/hibernate @@ -1003,6 +1003,7 @@ project │ ├── key columns: [17] = [9] │ ├── lookup columns are key │ ├── cardinality: [0 - 1] + │ ├── parameterized columns: (17) │ ├── has-placeholder │ ├── key: () │ ├── fd: ()-->(9,12,17), (9)==(17), (17)==(9) @@ -1068,6 +1069,7 @@ project │ ├── key columns: [17] = [9] │ ├── lookup columns are key │ ├── cardinality: [0 - 1] + │ ├── parameterized columns: (17) │ ├── has-placeholder │ ├── key: () │ ├── fd: ()-->(9,12,17), (9)==(17), (17)==(9) diff --git a/pkg/sql/opt/xform/testdata/external/nova b/pkg/sql/opt/xform/testdata/external/nova index aa2b0a725165..b4e71da91d5a 100644 --- a/pkg/sql/opt/xform/testdata/external/nova +++ b/pkg/sql/opt/xform/testdata/external/nova @@ -220,6 +220,7 @@ project │ │ │ │ │ │ │ ├── key columns: [37] = [7] │ │ │ │ │ │ │ ├── lookup columns are key │ │ │ │ │ │ │ ├── cardinality: [0 - 1] + │ │ │ │ │ │ │ ├── parameterized columns: (37) │ │ │ │ │ │ │ ├── has-placeholder │ │ │ │ │ │ │ ├── key: () │ │ │ │ │ │ │ ├── fd: ()-->(1,7,37), (7)==(37), (37)==(7) @@ -790,6 +791,7 @@ project │ │ │ │ │ │ │ ├── key columns: [43 42] = [2 13] │ │ │ │ │ │ │ ├── lookup columns are key │ │ │ │ │ │ │ ├── cardinality: [0 - 1] + │ │ │ │ │ │ │ ├── parameterized columns: (42,43) │ │ │ │ │ │ │ ├── has-placeholder │ │ │ │ │ │ │ ├── key: () │ │ │ │ │ │ │ ├── fd: ()-->(1,2,13,42,43), (2)==(43), (43)==(2), (13)==(42), (42)==(13) @@ -926,6 +928,7 @@ project │ │ │ │ │ │ ├── key columns: [42] = [1] │ │ │ │ │ │ ├── lookup columns are key │ │ │ │ │ │ ├── cardinality: [0 - 1] + │ │ │ │ │ │ ├── parameterized columns: (42,43) │ │ │ │ │ │ ├── has-placeholder │ │ │ │ │ │ ├── key: () │ │ │ │ │ │ ├── fd: ()-->(1-16,42,43), (1)==(42), (42)==(1), (13)==(43), (43)==(13) @@ -950,6 +953,7 @@ project │ │ │ │ │ │ ├── key columns: [46 45 44] = [20 21 22] │ │ │ │ │ │ ├── lookup columns are key │ │ │ │ │ │ ├── cardinality: [0 - 1] + │ │ │ │ │ │ ├── parameterized columns: (44-46) │ │ │ │ │ │ ├── has-placeholder │ │ │ │ │ │ ├── key: () │ │ │ │ │ │ ├── fd: ()-->(20-22,44-46), (22)==(44), (44)==(22), (21)==(45), (45)==(21), (20)==(46), (46)==(20) @@ -1081,6 +1085,7 @@ project │ │ │ │ │ │ │ ├── key columns: [37] = [2] │ │ │ │ │ │ │ ├── lookup columns are key │ │ │ │ │ │ │ ├── cardinality: [0 - 1] + │ │ │ │ │ │ │ ├── parameterized columns: (37) │ │ │ │ │ │ │ ├── has-placeholder │ │ │ │ │ │ │ ├── key: () │ │ │ │ │ │ │ ├── fd: ()-->(1,2,37), (2)==(37), (37)==(2) @@ -1213,6 +1218,7 @@ project │ │ │ │ │ │ │ ├── key columns: [37] = [7] │ │ │ │ │ │ │ ├── lookup columns are key │ │ │ │ │ │ │ ├── cardinality: [0 - 1] + │ │ │ │ │ │ │ ├── parameterized columns: (37) │ │ │ │ │ │ │ ├── has-placeholder │ │ │ │ │ │ │ ├── key: () │ │ │ │ │ │ │ ├── fd: ()-->(1,7,37), (7)==(37), (37)==(7) @@ -1641,6 +1647,7 @@ project │ │ │ │ │ │ │ ├── key columns: [43 42] = [7 13] │ │ │ │ │ │ │ ├── lookup columns are key │ │ │ │ │ │ │ ├── cardinality: [0 - 1] + │ │ │ │ │ │ │ ├── parameterized columns: (42,43) │ │ │ │ │ │ │ ├── has-placeholder │ │ │ │ │ │ │ ├── key: () │ │ │ │ │ │ │ ├── fd: ()-->(1,7,13,42,43), (7)==(43), (43)==(7), (13)==(42), (42)==(13) @@ -2344,6 +2351,7 @@ project │ │ │ │ │ │ │ ├── key columns: [43 42] = [7 13] │ │ │ │ │ │ │ ├── lookup columns are key │ │ │ │ │ │ │ ├── cardinality: [0 - 1] + │ │ │ │ │ │ │ ├── parameterized columns: (42,43) │ │ │ │ │ │ │ ├── has-placeholder │ │ │ │ │ │ │ ├── key: () │ │ │ │ │ │ │ ├── fd: ()-->(1,7,13,42,43), (7)==(43), (43)==(7), (13)==(42), (42)==(13) @@ -2471,6 +2479,7 @@ project │ │ │ │ │ │ ├── key columns: [37] = [1] │ │ │ │ │ │ ├── lookup columns are key │ │ │ │ │ │ ├── cardinality: [0 - 1] + │ │ │ │ │ │ ├── parameterized columns: (37) │ │ │ │ │ │ ├── has-placeholder │ │ │ │ │ │ ├── key: () │ │ │ │ │ │ ├── fd: ()-->(1-12,14,15,37), (1)==(37), (37)==(1) @@ -2494,6 +2503,7 @@ project │ │ │ │ │ │ ├── key columns: [39 38] = [19 20] │ │ │ │ │ │ ├── lookup columns are key │ │ │ │ │ │ ├── cardinality: [0 - 1] + │ │ │ │ │ │ ├── parameterized columns: (38,39) │ │ │ │ │ │ ├── has-placeholder │ │ │ │ │ │ ├── key: () │ │ │ │ │ │ ├── fd: ()-->(19,20,38,39), (20)==(38), (38)==(20), (19)==(39), (39)==(19) diff --git a/pkg/sql/opt/xform/testdata/rules/generic b/pkg/sql/opt/xform/testdata/rules/generic index c080ee523130..cd35c9019bca 100644 --- a/pkg/sql/opt/xform/testdata/rules/generic +++ b/pkg/sql/opt/xform/testdata/rules/generic @@ -30,6 +30,7 @@ project ├── key columns: [8] = [1] ├── lookup columns are key ├── cardinality: [0 - 1] + ├── parameterized columns: (8) ├── has-placeholder ├── key: () ├── fd: ()-->(1-5,8), (1)==(8), (8)==(1) @@ -57,6 +58,7 @@ project ├── key columns: [8] = [1] ├── lookup columns are key ├── cardinality: [0 - 1] + ├── parameterized columns: (8) ├── has-placeholder ├── key: () ├── fd: ()-->(1-5,8), (1)==(8), (8)==(1) @@ -88,6 +90,7 @@ project │ ├── columns: k:1!null i:2!null s:3!null b:4!null "$1":8!null "$2":9!null "$3":10!null │ ├── flags: disallow merge join │ ├── key columns: [8 9 10] = [2 3 4] + │ ├── parameterized columns: (8-10) │ ├── has-placeholder │ ├── key: (1) │ ├── fd: ()-->(2-4,8-10), (2)==(8), (8)==(2), (3)==(9), (9)==(3), (4)==(10), (10)==(4) @@ -118,6 +121,7 @@ project ├── key columns: [8] = [1] ├── lookup columns are key ├── cardinality: [0 - 1] + ├── parameterized columns: (8) ├── has-placeholder ├── key: () ├── fd: ()-->(1-5,8), (1)==(2,8), (2)==(1,8), (8)==(1,2) @@ -152,6 +156,7 @@ project │ ├── columns: k:1!null i:2!null t:5 "$1":8!null │ ├── flags: disallow merge join │ ├── key columns: [8] = [2] + │ ├── parameterized columns: (8) │ ├── has-placeholder │ ├── key: (1) │ ├── fd: ()-->(2,8), (1)-->(5), (2)==(8), (8)==(2) @@ -194,6 +199,7 @@ project │ ├── key columns: [15] = [8] │ ├── lookup columns are key │ ├── cardinality: [0 - 1] + │ ├── parameterized columns: (15) │ ├── has-placeholder │ ├── key: () │ ├── fd: ()-->(8,9,15), (8)==(15), (15)==(8) @@ -261,6 +267,7 @@ project │ ├── columns: k:1!null t:5!null "$1":8!null │ ├── flags: disallow merge join │ ├── key columns: [8] = [5] + │ ├── parameterized columns: (8) │ ├── has-placeholder │ ├── key: (1) │ ├── fd: ()-->(5,8), (5)==(8), (8)==(5) @@ -301,6 +308,7 @@ project │ ├── columns: k:1!null i:2!null t:5!null "$1":8!null "$2":9!null │ ├── flags: disallow merge join │ ├── key columns: [8 9] = [2 5] + │ ├── parameterized columns: (8,9) │ ├── has-placeholder │ ├── key: (1) │ ├── fd: ()-->(2,5,8,9), (2)==(8), (8)==(2), (5)==(9), (9)==(5) @@ -345,6 +353,7 @@ project │ ├── key columns: [8 9] = [3 1] │ ├── lookup columns are key │ ├── cardinality: [0 - 1] + │ ├── parameterized columns: (8,9) │ ├── has-placeholder │ ├── key: () │ ├── fd: ()-->(1,3,8,9), (3)==(8), (8)==(3), (1)==(9), (9)==(1) @@ -381,6 +390,7 @@ project │ ├── columns: k:1!null t:5!null column8:8!null │ ├── flags: disallow merge join │ ├── key columns: [8] = [5] + │ ├── parameterized columns: (8) │ ├── stable │ ├── key: (1) │ ├── fd: ()-->(5,8), (5)==(8), (8)==(5) @@ -413,6 +423,7 @@ project │ ├── columns: k:1!null i:2!null t:5!null "$1":8!null column9:9!null │ ├── flags: disallow merge join │ ├── key columns: [8 9] = [2 5] + │ ├── parameterized columns: (8,9) │ ├── stable, has-placeholder │ ├── key: (1) │ ├── fd: ()-->(2,5,8,9), (2)==(8), (8)==(2), (5)==(9), (9)==(5) @@ -448,6 +459,7 @@ project │ │ └── filters │ │ ├── t:5 > column9:9 [outer=(5,9), constraints=(/5: (/NULL - ]; /9: (/NULL - ])] │ │ └── "$1":8 = i:2 [outer=(2,8), constraints=(/2: (/NULL - ]; /8: (/NULL - ]), fd=(2)==(8), (8)==(2)] + │ ├── parameterized columns: (8,9) │ ├── stable, has-placeholder │ ├── key: (1) │ ├── fd: ()-->(2,8,9), (1)-->(5), (2)==(8), (8)==(2) @@ -485,6 +497,7 @@ project │ ├── columns: k:1!null i:2!null t:5!null "$1":8!null column9:9 "$2":10 column11:11!null │ ├── flags: disallow merge join │ ├── key columns: [8 11] = [2 5] + │ ├── parameterized columns: (8-10) │ ├── stable, has-placeholder │ ├── key: (1) │ ├── fd: ()-->(2,5,8-11), (2)==(8), (8)==(2), (5)==(11), (11)==(5) @@ -530,6 +543,7 @@ project │ ├── columns: k:1!null i:2!null t:5!null "$1":8!null column9:9 column10:10!null │ ├── flags: disallow merge join │ ├── key columns: [8 10] = [2 5] + │ ├── parameterized columns: (8,9) │ ├── stable, has-placeholder │ ├── key: (1) │ ├── fd: ()-->(2,5,8-10), (2)==(8), (8)==(2), (5)==(10), (10)==(5) @@ -590,6 +604,7 @@ project │ ├── columns: k:1!null i:2!null s:3!null b:4 "$1":8!null │ ├── flags: disallow merge join │ ├── key columns: [8] = [2] + │ ├── parameterized columns: (8) │ ├── stable, has-placeholder │ ├── key: (1) │ ├── fd: ()-->(2,3,8), (1)-->(4), (2)==(8), (8)==(2) @@ -626,6 +641,7 @@ project │ ├── columns: k:1!null i:2!null s:3!null b:4 "$1":8!null │ ├── flags: disallow merge join │ ├── key columns: [8] = [2] + │ ├── parameterized columns: (8) │ ├── stable, has-placeholder │ ├── key: (1) │ ├── fd: ()-->(2,3,8), (1)-->(4), (2)==(8), (8)==(2) diff --git a/pkg/sql/opt/xform/testdata/rules/groupby b/pkg/sql/opt/xform/testdata/rules/groupby index 7b2af3e4120b..618b7612edb4 100644 --- a/pkg/sql/opt/xform/testdata/rules/groupby +++ b/pkg/sql/opt/xform/testdata/rules/groupby @@ -2298,7 +2298,7 @@ memo (optimized, ~6KB, required=[presentation: u:2,v:3,w:4] [ordering: +4]) memo SELECT (SELECT w FROM kuvw WHERE v=1 AND x=u) FROM xyz ORDER BY x+1, x ---- -memo (optimized, ~27KB, required=[presentation: w:12] [ordering: +13,+1]) +memo (optimized, ~28KB, required=[presentation: w:12] [ordering: +13,+1]) ├── G1: (project G2 G3 x) │ ├── [presentation: w:12] [ordering: +13,+1] │ │ ├── best: (sort G1) @@ -2422,7 +2422,7 @@ memo (optimized, ~29KB, required=[]) memo INSERT INTO xyz SELECT v, w, 1.0 FROM kuvw ON CONFLICT (x) DO UPDATE SET z=2.0 ---- -memo (optimized, ~29KB, required=[]) +memo (optimized, ~30KB, required=[]) ├── G1: (upsert G2 G3 G4 xyz) │ └── [] │ ├── best: (upsert G2 G3 G4 xyz) diff --git a/pkg/sql/opt/xform/testdata/rules/join b/pkg/sql/opt/xform/testdata/rules/join index 868f13247321..7025e49ce479 100644 --- a/pkg/sql/opt/xform/testdata/rules/join +++ b/pkg/sql/opt/xform/testdata/rules/join @@ -235,7 +235,7 @@ inner-join (merge) memo expect=ReorderJoins SELECT * FROM abc, stu, xyz WHERE abc.a=stu.s AND stu.s=xyz.x ---- -memo (optimized, ~47KB, required=[presentation: a:1,b:2,c:3,s:7,t:8,u:9,x:12,y:13,z:14]) +memo (optimized, ~49KB, required=[presentation: a:1,b:2,c:3,s:7,t:8,u:9,x:12,y:13,z:14]) ├── G1: (inner-join G2 G3 G4) (inner-join G3 G2 G4) (inner-join G5 G6 G7) (inner-join G6 G5 G7) (inner-join G8 G9 G7) (inner-join G9 G8 G7) (merge-join G2 G3 G10 inner-join,+1,+7) (merge-join G3 G2 G10 inner-join,+7,+1) (lookup-join G3 G10 abc@ab,keyCols=[7],outCols=(1-3,7-9,12-14)) (merge-join G5 G6 G10 inner-join,+7,+12) (merge-join G6 G5 G10 inner-join,+12,+7) (lookup-join G6 G10 stu,keyCols=[12],outCols=(1-3,7-9,12-14)) (merge-join G8 G9 G10 inner-join,+7,+12) (lookup-join G8 G10 xyz@xy,keyCols=[7],outCols=(1-3,7-9,12-14)) (merge-join G9 G8 G10 inner-join,+12,+7) │ └── [presentation: a:1,b:2,c:3,s:7,t:8,u:9,x:12,y:13,z:14] │ ├── best: (merge-join G5="[ordering: +7]" G6="[ordering: +(1|12)]" G10 inner-join,+7,+12) @@ -343,7 +343,7 @@ SELECT * FROM stu, abc, xyz, pqr WHERE u = a AND a = x AND x = p ---- -memo (optimized, ~41KB, required=[presentation: s:1,t:2,u:3,a:6,b:7,c:8,x:12,y:13,z:14,p:18,q:19,r:20,s:21,t:22]) +memo (optimized, ~43KB, required=[presentation: s:1,t:2,u:3,a:6,b:7,c:8,x:12,y:13,z:14,p:18,q:19,r:20,s:21,t:22]) ├── G1: (inner-join G2 G3 G4) (inner-join G3 G2 G4) (merge-join G2 G3 G5 inner-join,+3,+6) (merge-join G3 G2 G5 inner-join,+6,+3) (lookup-join G3 G5 stu@uts,keyCols=[6],outCols=(1-3,6-8,12-14,18-22)) │ └── [presentation: s:1,t:2,u:3,a:6,b:7,c:8,x:12,y:13,z:14,p:18,q:19,r:20,s:21,t:22] │ ├── best: (merge-join G2="[ordering: +3]" G3="[ordering: +(6|12|18)]" G5 inner-join,+3,+6) @@ -1277,7 +1277,7 @@ memo (optimized, ~14KB, required=[presentation: a:1,b:2,c:3,x:7,y:8,z:9]) memo expect=ReorderJoins SELECT * FROM abc FULL OUTER JOIN xyz ON a=z ---- -memo (optimized, ~12KB, required=[presentation: a:1,b:2,c:3,x:7,y:8,z:9]) +memo (optimized, ~13KB, required=[presentation: a:1,b:2,c:3,x:7,y:8,z:9]) ├── G1: (full-join G2 G3 G4) (full-join G3 G2 G4) (merge-join G2 G3 G5 full-join,+1,+9) │ └── [presentation: a:1,b:2,c:3,x:7,y:8,z:9] │ ├── best: (full-join G2 G3 G4) @@ -1363,7 +1363,7 @@ full-join (hash) memo expect-not=ReorderJoins SELECT * FROM abc INNER LOOKUP JOIN xyz ON a=x ---- -memo (optimized, ~13KB, required=[presentation: a:1,b:2,c:3,x:7,y:8,z:9]) +memo (optimized, ~14KB, required=[presentation: a:1,b:2,c:3,x:7,y:8,z:9]) ├── G1: (inner-join G2 G3 G4) (lookup-join G2 G5 xyz@xy,keyCols=[1],outCols=(1-3,7-9)) │ └── [presentation: a:1,b:2,c:3,x:7,y:8,z:9] │ ├── best: (lookup-join G2 G5 xyz@xy,keyCols=[1],outCols=(1-3,7-9)) @@ -1446,7 +1446,7 @@ New expression 1 of 1: memo SELECT * FROM abc LEFT OUTER JOIN xyz ON a=z ---- -memo (optimized, ~12KB, required=[presentation: a:1,b:2,c:3,x:7,y:8,z:9]) +memo (optimized, ~13KB, required=[presentation: a:1,b:2,c:3,x:7,y:8,z:9]) ├── G1: (left-join G2 G3 G4) (right-join G3 G2 G4) (merge-join G2 G3 G5 left-join,+1,+9) │ └── [presentation: a:1,b:2,c:3,x:7,y:8,z:9] │ ├── best: (left-join G2 G3 G4) @@ -1823,7 +1823,7 @@ inner-join (merge) memo SELECT * FROM abc JOIN xyz ON a=x ---- -memo (optimized, ~16KB, required=[presentation: a:1,b:2,c:3,x:7,y:8,z:9]) +memo (optimized, ~17KB, required=[presentation: a:1,b:2,c:3,x:7,y:8,z:9]) ├── G1: (inner-join G2 G3 G4) (inner-join G3 G2 G4) (merge-join G2 G3 G5 inner-join,+1,+7) (lookup-join G2 G5 xyz@xy,keyCols=[1],outCols=(1-3,7-9)) (merge-join G3 G2 G5 inner-join,+7,+1) (lookup-join G3 G5 abc@ab,keyCols=[7],outCols=(1-3,7-9)) │ └── [presentation: a:1,b:2,c:3,x:7,y:8,z:9] │ ├── best: (merge-join G3="[ordering: +7]" G2="[ordering: +1]" G5 inner-join,+7,+1) @@ -1874,7 +1874,7 @@ memo (optimized, ~12KB, required=[presentation: a:1,b:2,c:3,x:7,y:8,z:9]) memo set=(optimizer_merge_joins_enabled=false) expect-not=GenerateMergeJoins SELECT * FROM abc JOIN xyz ON a=x ---- -memo (optimized, ~15KB, required=[presentation: a:1,b:2,c:3,x:7,y:8,z:9]) +memo (optimized, ~16KB, required=[presentation: a:1,b:2,c:3,x:7,y:8,z:9]) ├── G1: (inner-join G2 G3 G4) (inner-join G3 G2 G4) (lookup-join G2 G5 xyz@xy,keyCols=[1],outCols=(1-3,7-9)) (lookup-join G3 G5 abc@ab,keyCols=[7],outCols=(1-3,7-9)) │ └── [presentation: a:1,b:2,c:3,x:7,y:8,z:9] │ ├── best: (inner-join G3 G2 G4) @@ -1945,7 +1945,7 @@ inner-join (lookup xyz@xy) memo disable=(EliminateJoinUnderProjectLeft,EliminateJoinUnderProjectRight) SELECT * FROM stu AS l JOIN stu AS r ON (l.s, l.t, l.u) = (r.s, r.t, r.u) ---- -memo (optimized, ~20KB, required=[presentation: s:1,t:2,u:3,s:6,t:7,u:8]) +memo (optimized, ~21KB, required=[presentation: s:1,t:2,u:3,s:6,t:7,u:8]) ├── G1: (inner-join G2 G3 G4) (inner-join G3 G2 G4) (merge-join G2 G3 G5 inner-join,+1,+2,+3,+6,+7,+8) (merge-join G2 G3 G5 inner-join,+3,+2,+1,+8,+7,+6) (lookup-join G2 G5 stu [as=r],keyCols=[1 2 3],outCols=(1-3,6-8)) (lookup-join G2 G5 stu@uts [as=r],keyCols=[3 2 1],outCols=(1-3,6-8)) (merge-join G3 G2 G5 inner-join,+6,+7,+8,+1,+2,+3) (merge-join G3 G2 G5 inner-join,+8,+7,+6,+3,+2,+1) (lookup-join G3 G5 stu [as=l],keyCols=[6 7 8],outCols=(1-3,6-8)) (lookup-join G3 G5 stu@uts [as=l],keyCols=[8 7 6],outCols=(1-3,6-8)) │ └── [presentation: s:1,t:2,u:3,s:6,t:7,u:8] │ ├── best: (merge-join G2="[ordering: +1,+2,+3]" G3="[ordering: +6,+7,+8]" G5 inner-join,+1,+2,+3,+6,+7,+8) @@ -7276,7 +7276,7 @@ WHERE n.name = 'Upper West Side' OR n.name = 'Upper East Side' GROUP BY n.name, n.geom ---- -memo (optimized, ~38KB, required=[presentation: name:16,popn_per_sqkm:22]) +memo (optimized, ~39KB, required=[presentation: name:16,popn_per_sqkm:22]) ├── G1: (project G2 G3 name) │ └── [presentation: name:16,popn_per_sqkm:22] │ ├── best: (project G2 G3 name) diff --git a/pkg/sql/opt/xform/testdata/rules/join_order b/pkg/sql/opt/xform/testdata/rules/join_order index 20e0e4564b66..3466b8284889 100644 --- a/pkg/sql/opt/xform/testdata/rules/join_order +++ b/pkg/sql/opt/xform/testdata/rules/join_order @@ -360,7 +360,7 @@ memo (optimized, ~25KB, required=[presentation: b:1,x:2,c:5,y:6,a:9,b:10,c:11,d: memo set=reorder_joins_limit=2 SELECT * FROM bx, cy, abc WHERE a = 1 AND abc.b = bx.b AND abc.c = cy.c ---- -memo (optimized, ~37KB, required=[presentation: b:1,x:2,c:5,y:6,a:9,b:10,c:11,d:12]) +memo (optimized, ~38KB, required=[presentation: b:1,x:2,c:5,y:6,a:9,b:10,c:11,d:12]) ├── G1: (inner-join G2 G3 G4) (inner-join G3 G2 G4) (inner-join G5 G6 G7) (inner-join G6 G5 G7) (merge-join G2 G3 G8 inner-join,+1,+10) (merge-join G3 G2 G8 inner-join,+10,+1) (lookup-join G3 G8 bx,keyCols=[10],outCols=(1,2,5,6,9-12)) (merge-join G5 G6 G8 inner-join,+5,+11) (merge-join G6 G5 G8 inner-join,+11,+5) (lookup-join G6 G8 cy,keyCols=[11],outCols=(1,2,5,6,9-12)) │ └── [presentation: b:1,x:2,c:5,y:6,a:9,b:10,c:11,d:12] │ ├── best: (lookup-join G3 G8 bx,keyCols=[10],outCols=(1,2,5,6,9-12)) @@ -521,7 +521,7 @@ inner-join (cross) memo set=reorder_joins_limit=0 SELECT * FROM bx, cy, dz, abc WHERE x = y AND y = z AND z = a ---- -memo (optimized, ~32KB, required=[presentation: b:1,x:2,c:5,y:6,d:9,z:10,a:13,b:14,c:15,d:16]) +memo (optimized, ~33KB, required=[presentation: b:1,x:2,c:5,y:6,d:9,z:10,a:13,b:14,c:15,d:16]) ├── G1: (inner-join G2 G3 G4) (merge-join G2 G3 G5 inner-join,+2,+6) │ └── [presentation: b:1,x:2,c:5,y:6,d:9,z:10,a:13,b:14,c:15,d:16] │ ├── best: (inner-join G2 G3 G4) @@ -587,7 +587,7 @@ memo (optimized, ~32KB, required=[presentation: b:1,x:2,c:5,y:6,d:9,z:10,a:13,b: memo set=reorder_joins_limit=3 SELECT * FROM bx, cy, dz, abc WHERE x = y AND y = z AND z = a ---- -memo (optimized, ~69KB, required=[presentation: b:1,x:2,c:5,y:6,d:9,z:10,a:13,b:14,c:15,d:16]) +memo (optimized, ~73KB, required=[presentation: b:1,x:2,c:5,y:6,d:9,z:10,a:13,b:14,c:15,d:16]) ├── G1: (inner-join G2 G3 G4) (inner-join G3 G2 G4) (inner-join G5 G6 G7) (inner-join G6 G5 G7) (inner-join G8 G9 G7) (inner-join G9 G8 G7) (inner-join G10 G11 G12) (inner-join G11 G10 G12) (inner-join G13 G14 G12) (inner-join G14 G13 G12) (inner-join G15 G16 G12) (inner-join G16 G15 G12) (inner-join G17 G18 G12) (inner-join G18 G17 G12) (merge-join G3 G2 G19 inner-join,+6,+2) (merge-join G6 G5 G19 inner-join,+10,+6) (merge-join G9 G8 G19 inner-join,+10,+6) (merge-join G11 G10 G19 inner-join,+13,+10) (merge-join G14 G13 G19 inner-join,+13,+10) (merge-join G16 G15 G19 inner-join,+13,+10) (lookup-join G17 G19 abc,keyCols=[10],outCols=(1,2,5,6,9,10,13-16)) (merge-join G18 G17 G19 inner-join,+13,+10) │ └── [presentation: b:1,x:2,c:5,y:6,d:9,z:10,a:13,b:14,c:15,d:16] │ ├── best: (inner-join G3 G2 G4) @@ -2774,7 +2774,7 @@ SELECT ( ) FROM table80901_1 AS tab_42921; ---- -memo (optimized, ~73KB, required=[presentation: ?column?:50]) +memo (optimized, ~76KB, required=[presentation: ?column?:50]) ├── G1: (project G2 G3) │ └── [presentation: ?column?:50] │ ├── best: (project G2 G3) @@ -3547,7 +3547,7 @@ right-join (hash) # Only 2 joins are considered (instead of 8) when the STRAIGHT hint is present in one join. reorderjoins format=hide-all -SELECT * +SELECT * FROM straight_join1 INNER STRAIGHT JOIN straight_join2 ON straight_join1.x = straight_join2.y INNER JOIN straight_join3 ON straight_join1.x = straight_join3.z @@ -3597,7 +3597,7 @@ inner-join (hash) # No joins are considered when the STRAIGHT hint is present in both joins. reorderjoins format=hide-all -SELECT * +SELECT * FROM straight_join1 INNER STRAIGHT JOIN straight_join2 ON straight_join1.x = straight_join2.y INNER STRAIGHT JOIN straight_join3 ON straight_join1.x = straight_join3.z diff --git a/pkg/sql/opt/xform/testdata/rules/select b/pkg/sql/opt/xform/testdata/rules/select index 6790bba18ab5..0803d89b647e 100644 --- a/pkg/sql/opt/xform/testdata/rules/select +++ b/pkg/sql/opt/xform/testdata/rules/select @@ -12003,7 +12003,7 @@ JOIN t61795 AS t2 ON t1.c = t1.b AND t1.b = t2.b WHERE t1.a = 10 OR t2.b != abs(t2.b) ORDER BY t1.b ASC ---- -memo (optimized, ~37KB, required=[presentation: a:1] [ordering: +2]) +memo (optimized, ~38KB, required=[presentation: a:1] [ordering: +2]) ├── G1: (project G2 G3 a b) │ ├── [presentation: a:1] [ordering: +2] │ │ ├── best: (sort G1) From 013bb7572f46e1e6122ee48221392253bb9129bb Mon Sep 17 00:00:00 2001 From: Marcus Gartner Date: Fri, 31 Oct 2025 12:24:30 -0400 Subject: [PATCH 2/2] sql: add optimizer_use_max_frequency_selectivity session setting The `optimizer_use_max_frequency_selectivity` session setting has been added. It is enabled by default. Disabling it reverts the selectivity improvements added in #151409. Release note: None --- pkg/sql/exec_util.go | 4 ++++ .../testdata/logic_test/information_schema | 1 + .../logictest/testdata/logic_test/pg_catalog | 3 +++ .../logictest/testdata/logic_test/show_source | 1 + pkg/sql/opt/memo/memo.go | 3 +++ pkg/sql/opt/memo/memo_test.go | 6 ++++++ pkg/sql/opt/memo/statistics_builder.go | 3 +++ pkg/sql/opt/memo/testdata/stats/generic | 21 +++++++++++++++++++ .../local_only_session_data.proto | 3 +++ pkg/sql/vars.go | 17 +++++++++++++++ 10 files changed, 62 insertions(+) diff --git a/pkg/sql/exec_util.go b/pkg/sql/exec_util.go index 721337771a46..6d40546b1716 100644 --- a/pkg/sql/exec_util.go +++ b/pkg/sql/exec_util.go @@ -4211,6 +4211,10 @@ func (m *sessionDataMutator) SetUseProcTxnControlExtendedProtocolFix(val bool) { m.data.UseProcTxnControlExtendedProtocolFix = val } +func (m *sessionDataMutator) SetOptimizerUseMaxFrequencySelectivity(val bool) { + m.data.OptimizerUseMaxFrequencySelectivity = val +} + // Utility functions related to scrubbing sensitive information on SQL Stats. // quantizeCounts ensures that the Count field in the diff --git a/pkg/sql/logictest/testdata/logic_test/information_schema b/pkg/sql/logictest/testdata/logic_test/information_schema index c12fb3496064..ca1c26051a5c 100644 --- a/pkg/sql/logictest/testdata/logic_test/information_schema +++ b/pkg/sql/logictest/testdata/logic_test/information_schema @@ -4042,6 +4042,7 @@ optimizer_use_improved_zigzag_join_costing on optimizer_use_limit_ordering_for_streaming_group_by on optimizer_use_lock_elision_multiple_families off optimizer_use_lock_op_for_serializable off +optimizer_use_max_frequency_selectivity on optimizer_use_merged_partial_statistics on optimizer_use_multicol_stats on optimizer_use_not_visible_indexes off diff --git a/pkg/sql/logictest/testdata/logic_test/pg_catalog b/pkg/sql/logictest/testdata/logic_test/pg_catalog index 6cf0a1039e42..e7394036fafb 100644 --- a/pkg/sql/logictest/testdata/logic_test/pg_catalog +++ b/pkg/sql/logictest/testdata/logic_test/pg_catalog @@ -3051,6 +3051,7 @@ optimizer_use_improved_zigzag_join_costing on N optimizer_use_limit_ordering_for_streaming_group_by on NULL NULL NULL string optimizer_use_lock_elision_multiple_families off NULL NULL NULL string optimizer_use_lock_op_for_serializable off NULL NULL NULL string +optimizer_use_max_frequency_selectivity on NULL NULL NULL string optimizer_use_merged_partial_statistics on NULL NULL NULL string optimizer_use_multicol_stats on NULL NULL NULL string optimizer_use_not_visible_indexes off NULL NULL NULL string @@ -3276,6 +3277,7 @@ optimizer_use_improved_zigzag_join_costing on N optimizer_use_limit_ordering_for_streaming_group_by on NULL user NULL on on optimizer_use_lock_elision_multiple_families off NULL user NULL off off optimizer_use_lock_op_for_serializable off NULL user NULL off off +optimizer_use_max_frequency_selectivity on NULL user NULL on on optimizer_use_merged_partial_statistics on NULL user NULL on on optimizer_use_multicol_stats on NULL user NULL on on optimizer_use_not_visible_indexes off NULL user NULL off off @@ -3500,6 +3502,7 @@ optimizer_use_improved_zigzag_join_costing NULL NULL NULL optimizer_use_limit_ordering_for_streaming_group_by NULL NULL NULL NULL NULL optimizer_use_lock_elision_multiple_families NULL NULL NULL NULL NULL optimizer_use_lock_op_for_serializable NULL NULL NULL NULL NULL +optimizer_use_max_frequency_selectivity NULL NULL NULL NULL NULL optimizer_use_merged_partial_statistics NULL NULL NULL NULL NULL optimizer_use_multicol_stats NULL NULL NULL NULL NULL optimizer_use_not_visible_indexes NULL NULL NULL NULL NULL diff --git a/pkg/sql/logictest/testdata/logic_test/show_source b/pkg/sql/logictest/testdata/logic_test/show_source index e134234a188c..dc000668d94b 100644 --- a/pkg/sql/logictest/testdata/logic_test/show_source +++ b/pkg/sql/logictest/testdata/logic_test/show_source @@ -167,6 +167,7 @@ optimizer_use_improved_zigzag_join_costing on optimizer_use_limit_ordering_for_streaming_group_by on optimizer_use_lock_elision_multiple_families off optimizer_use_lock_op_for_serializable off +optimizer_use_max_frequency_selectivity on optimizer_use_merged_partial_statistics on optimizer_use_multicol_stats on optimizer_use_not_visible_indexes off diff --git a/pkg/sql/opt/memo/memo.go b/pkg/sql/opt/memo/memo.go index e545e400b305..ae9b70468fc4 100644 --- a/pkg/sql/opt/memo/memo.go +++ b/pkg/sql/opt/memo/memo.go @@ -209,6 +209,7 @@ type Memo struct { internal bool usePre_25_2VariadicBuiltins bool useExistsFilterHoistRule bool + useMaxFrequencySelectivity bool // txnIsoLevel is the isolation level under which the plan was created. This // affects the planning of some locking operations, so it must be included in @@ -314,6 +315,7 @@ func (m *Memo) Init(ctx context.Context, evalCtx *eval.Context) { internal: evalCtx.SessionData().Internal, usePre_25_2VariadicBuiltins: evalCtx.SessionData().UsePre_25_2VariadicBuiltins, useExistsFilterHoistRule: evalCtx.SessionData().OptimizerUseExistsFilterHoistRule, + useMaxFrequencySelectivity: evalCtx.SessionData().OptimizerUseMaxFrequencySelectivity, txnIsoLevel: evalCtx.TxnIsoLevel, } m.metadata.Init() @@ -492,6 +494,7 @@ func (m *Memo) IsStale( m.internal != evalCtx.SessionData().Internal || m.usePre_25_2VariadicBuiltins != evalCtx.SessionData().UsePre_25_2VariadicBuiltins || m.useExistsFilterHoistRule != evalCtx.SessionData().OptimizerUseExistsFilterHoistRule || + m.useMaxFrequencySelectivity != evalCtx.SessionData().OptimizerUseMaxFrequencySelectivity || m.txnIsoLevel != evalCtx.TxnIsoLevel { return true, nil } diff --git a/pkg/sql/opt/memo/memo_test.go b/pkg/sql/opt/memo/memo_test.go index 6ef6bc04a370..9b9d7a6a7c86 100644 --- a/pkg/sql/opt/memo/memo_test.go +++ b/pkg/sql/opt/memo/memo_test.go @@ -510,6 +510,12 @@ func TestMemoIsStale(t *testing.T) { evalCtx.SessionData().OptimizerUseImprovedMultiColumnSelectivityEstimate = false notStale() + // Stale optimizer_use_max_frequency_selectivity. + evalCtx.SessionData().OptimizerUseMaxFrequencySelectivity = true + stale() + evalCtx.SessionData().OptimizerUseMaxFrequencySelectivity = false + notStale() + // Stale optimizer_prove_implication_with_virtual_computed_columns. evalCtx.SessionData().OptimizerProveImplicationWithVirtualComputedColumns = true stale() diff --git a/pkg/sql/opt/memo/statistics_builder.go b/pkg/sql/opt/memo/statistics_builder.go index 402ff5a50e94..0a6aac533be1 100644 --- a/pkg/sql/opt/memo/statistics_builder.go +++ b/pkg/sql/opt/memo/statistics_builder.go @@ -4581,6 +4581,9 @@ func (sb *statisticsBuilder) selectivityFromMaxFrequencies( ) (selectivity, selectivityUpperBound props.Selectivity, maxFreqCols opt.ColSet) { selectivity = props.OneSelectivity selectivityUpperBound = props.OneSelectivity + if !sb.evalCtx.SessionData().OptimizerUseMaxFrequencySelectivity { + return selectivity, selectivityUpperBound, opt.ColSet{} + } for col, ok := cols.Next(0); ok; col, ok = cols.Next(col + 1) { c := opt.MakeColSet(col) inputColStat, inputStats := sb.colStatFromInput(c, e) diff --git a/pkg/sql/opt/memo/testdata/stats/generic b/pkg/sql/opt/memo/testdata/stats/generic index 9a8a2ea60742..fc721d41b810 100644 --- a/pkg/sql/opt/memo/testdata/stats/generic +++ b/pkg/sql/opt/memo/testdata/stats/generic @@ -254,6 +254,27 @@ select └── filters └── i:2 = $1 [type=bool, outer=(2), constraints=(/2: (/NULL - ]), fd=()-->(2)] +# Fallback to ~row_count/distinct_count if +# optimizer_use_max_frequency_selectivity is false. +norm set=(optimizer_use_max_frequency_selectivity=false) +SELECT * FROM t WHERE i = $1 +---- +select + ├── columns: k:1(int!null) i:2(int!null) s:3(string) + ├── has-placeholder + ├── stats: [rows=24.25, distinct(2)=1, null(2)=0] + ├── key: (1) + ├── fd: ()-->(2), (1)-->(3) + ├── scan t + │ ├── columns: k:1(int!null) i:2(int) s:3(string) + │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0, distinct(2)=41, null(2)=30] + │ │ histogram(2)= 0 30 0 0 90 10 180 10 270 20 360 30 + │ │ <--- NULL --- 0 ---- 100 ----- 200 ----- 300 ----- 400 + │ ├── key: (1) + │ └── fd: (1)-->(2,3) + └── filters + └── i:2 = $1 [type=bool, outer=(2), constraints=(/2: (/NULL - ]), fd=()-->(2)] + # Similar case as above, but with opt to ensure the correct row counts are used # for new memo groups. opt diff --git a/pkg/sql/sessiondatapb/local_only_session_data.proto b/pkg/sql/sessiondatapb/local_only_session_data.proto index 3ef129c3ebdb..930e20689524 100644 --- a/pkg/sql/sessiondatapb/local_only_session_data.proto +++ b/pkg/sql/sessiondatapb/local_only_session_data.proto @@ -687,6 +687,9 @@ message LocalOnlySessionData { // hint - if available - when comparing against // DistributeScanRowCountThreshold. bool use_soft_limit_for_distribute_scan = 185; + // OptimizerUseMaxFrequencySelectivity, when true, indicates that the + // optimizer should use max frequency for selectivity estimation. + bool optimizer_use_max_frequency_selectivity = 191; /////////////////////////////////////////////////////////////////////////// // WARNING: consider whether a session parameter you're adding needs to // diff --git a/pkg/sql/vars.go b/pkg/sql/vars.go index 8568c66a4540..c33e2006778f 100644 --- a/pkg/sql/vars.go +++ b/pkg/sql/vars.go @@ -3613,6 +3613,23 @@ var varGen = map[string]sessionVar{ GlobalDefault: globalTrue, }, + // CockroachDB extension. + `optimizer_use_max_frequency_selectivity`: { + GetStringVal: makePostgresBoolGetStringValFn(`optimizer_use_max_frequency_selectivity`), + Set: func(_ context.Context, m sessionDataMutator, s string) error { + b, err := paramparse.ParseBoolVar("optimizer_use_max_frequency_selectivity", s) + if err != nil { + return err + } + m.SetOptimizerUseMaxFrequencySelectivity(b) + return nil + }, + Get: func(evalCtx *extendedEvalContext, _ *kv.Txn) (string, error) { + return formatBoolAsPostgresSetting(evalCtx.SessionData().OptimizerUseMaxFrequencySelectivity), nil + }, + GlobalDefault: globalTrue, + }, + // CockroachDB extension. `optimizer_prove_implication_with_virtual_computed_columns`: { GetStringVal: makePostgresBoolGetStringValFn(`optimizer_prove_implication_with_virtual_computed_columns`),