From c9ed1b637ae5f9838cca1f44fc2d60c694ac5a22 Mon Sep 17 00:00:00 2001 From: Marcus Gartner Date: Fri, 31 Oct 2025 12:24:30 -0400 Subject: [PATCH] sql: add optimizer_use_max_frequency_selectivity session setting The `optimizer_use_max_frequency_selectivity` session setting has been added. It is enabled by default. Disabling it reverts the selectivity improvements added in #151409. Release note: None --- pkg/sql/exec_util.go | 4 ++++ .../testdata/logic_test/information_schema | 1 + .../logictest/testdata/logic_test/pg_catalog | 3 +++ .../logictest/testdata/logic_test/show_source | 1 + pkg/sql/opt/memo/memo.go | 3 +++ pkg/sql/opt/memo/memo_test.go | 6 ++++++ pkg/sql/opt/memo/statistics_builder.go | 3 +++ pkg/sql/opt/memo/testdata/stats/generic | 21 +++++++++++++++++++ .../local_only_session_data.proto | 3 +++ pkg/sql/vars.go | 17 +++++++++++++++ 10 files changed, 62 insertions(+) diff --git a/pkg/sql/exec_util.go b/pkg/sql/exec_util.go index 4993fc3c9cce..06170013259c 100644 --- a/pkg/sql/exec_util.go +++ b/pkg/sql/exec_util.go @@ -4479,6 +4479,10 @@ func (m *sessionDataMutator) SetOptimizerClampInequalitySelectivity(val bool) { m.data.OptimizerClampInequalitySelectivity = val } +func (m *sessionDataMutator) SetOptimizerUseMaxFrequencySelectivity(val bool) { + m.data.OptimizerUseMaxFrequencySelectivity = val +} + // Utility functions related to scrubbing sensitive information on SQL Stats. // quantizeCounts ensures that the Count field in the diff --git a/pkg/sql/logictest/testdata/logic_test/information_schema b/pkg/sql/logictest/testdata/logic_test/information_schema index 003f3702df5d..bdf7559fc2a2 100644 --- a/pkg/sql/logictest/testdata/logic_test/information_schema +++ b/pkg/sql/logictest/testdata/logic_test/information_schema @@ -4218,6 +4218,7 @@ optimizer_use_improved_zigzag_join_costing on optimizer_use_limit_ordering_for_streaming_group_by on optimizer_use_lock_elision_multiple_families off optimizer_use_lock_op_for_serializable off +optimizer_use_max_frequency_selectivity on optimizer_use_merged_partial_statistics on optimizer_use_multicol_stats on optimizer_use_not_visible_indexes off diff --git a/pkg/sql/logictest/testdata/logic_test/pg_catalog b/pkg/sql/logictest/testdata/logic_test/pg_catalog index 941a6a31aa0c..11bf6ed739ef 100644 --- a/pkg/sql/logictest/testdata/logic_test/pg_catalog +++ b/pkg/sql/logictest/testdata/logic_test/pg_catalog @@ -3120,6 +3120,7 @@ optimizer_use_improved_zigzag_join_costing on optimizer_use_limit_ordering_for_streaming_group_by on NULL NULL NULL string optimizer_use_lock_elision_multiple_families off NULL NULL NULL string optimizer_use_lock_op_for_serializable off NULL NULL NULL string +optimizer_use_max_frequency_selectivity on NULL NULL NULL string optimizer_use_merged_partial_statistics on NULL NULL NULL string optimizer_use_multicol_stats on NULL NULL NULL string optimizer_use_not_visible_indexes off NULL NULL NULL string @@ -3367,6 +3368,7 @@ optimizer_use_improved_zigzag_join_costing on optimizer_use_limit_ordering_for_streaming_group_by on NULL user NULL on on optimizer_use_lock_elision_multiple_families off NULL user NULL off off optimizer_use_lock_op_for_serializable off NULL user NULL off off +optimizer_use_max_frequency_selectivity on NULL user NULL on on optimizer_use_merged_partial_statistics on NULL user NULL on on optimizer_use_multicol_stats on NULL user NULL on on optimizer_use_not_visible_indexes off NULL user NULL off off @@ -3605,6 +3607,7 @@ optimizer_use_improved_zigzag_join_costing NULL NULL optimizer_use_limit_ordering_for_streaming_group_by NULL NULL NULL NULL NULL optimizer_use_lock_elision_multiple_families NULL NULL NULL NULL NULL optimizer_use_lock_op_for_serializable NULL NULL NULL NULL NULL +optimizer_use_max_frequency_selectivity NULL NULL NULL NULL NULL optimizer_use_merged_partial_statistics NULL NULL NULL NULL NULL optimizer_use_multicol_stats NULL NULL NULL NULL NULL optimizer_use_not_visible_indexes NULL NULL NULL NULL NULL diff --git a/pkg/sql/logictest/testdata/logic_test/show_source b/pkg/sql/logictest/testdata/logic_test/show_source index 3a7c0d427151..86876c125bb2 100644 --- a/pkg/sql/logictest/testdata/logic_test/show_source +++ b/pkg/sql/logictest/testdata/logic_test/show_source @@ -184,6 +184,7 @@ optimizer_use_improved_zigzag_join_costing on optimizer_use_limit_ordering_for_streaming_group_by on optimizer_use_lock_elision_multiple_families off optimizer_use_lock_op_for_serializable off +optimizer_use_max_frequency_selectivity on optimizer_use_merged_partial_statistics on optimizer_use_multicol_stats on optimizer_use_not_visible_indexes off diff --git a/pkg/sql/opt/memo/memo.go b/pkg/sql/opt/memo/memo.go index a0587c515be8..3ed8c86a1ff6 100644 --- a/pkg/sql/opt/memo/memo.go +++ b/pkg/sql/opt/memo/memo.go @@ -213,6 +213,7 @@ type Memo struct { useImprovedHoistJoinProject bool clampLowHistogramSelectivity bool clampInequalitySelectivity bool + useMaxFrequencySelectivity bool // txnIsoLevel is the isolation level under which the plan was created. This // affects the planning of some locking operations, so it must be included in @@ -326,6 +327,7 @@ func (m *Memo) Init(ctx context.Context, evalCtx *eval.Context) { useImprovedHoistJoinProject: evalCtx.SessionData().OptimizerUseImprovedHoistJoinProject, clampLowHistogramSelectivity: evalCtx.SessionData().OptimizerClampLowHistogramSelectivity, clampInequalitySelectivity: evalCtx.SessionData().OptimizerClampInequalitySelectivity, + useMaxFrequencySelectivity: evalCtx.SessionData().OptimizerUseMaxFrequencySelectivity, txnIsoLevel: evalCtx.TxnIsoLevel, } m.metadata.Init() @@ -503,6 +505,7 @@ func (m *Memo) IsStale( m.useImprovedHoistJoinProject != evalCtx.SessionData().OptimizerUseImprovedHoistJoinProject || m.clampLowHistogramSelectivity != evalCtx.SessionData().OptimizerClampLowHistogramSelectivity || m.clampInequalitySelectivity != evalCtx.SessionData().OptimizerClampInequalitySelectivity || + m.useMaxFrequencySelectivity != evalCtx.SessionData().OptimizerUseMaxFrequencySelectivity || m.txnIsoLevel != evalCtx.TxnIsoLevel { return true, nil } diff --git a/pkg/sql/opt/memo/memo_test.go b/pkg/sql/opt/memo/memo_test.go index 021f37e292f5..f44b9a2389f6 100644 --- a/pkg/sql/opt/memo/memo_test.go +++ b/pkg/sql/opt/memo/memo_test.go @@ -510,6 +510,12 @@ func TestMemoIsStale(t *testing.T) { evalCtx.SessionData().OptimizerUseImprovedMultiColumnSelectivityEstimate = false notStale() + // Stale optimizer_use_max_frequency_selectivity. + evalCtx.SessionData().OptimizerUseMaxFrequencySelectivity = true + stale() + evalCtx.SessionData().OptimizerUseMaxFrequencySelectivity = false + notStale() + // Stale optimizer_prove_implication_with_virtual_computed_columns. evalCtx.SessionData().OptimizerProveImplicationWithVirtualComputedColumns = true stale() diff --git a/pkg/sql/opt/memo/statistics_builder.go b/pkg/sql/opt/memo/statistics_builder.go index 99a0ddc28505..ec331867aec7 100644 --- a/pkg/sql/opt/memo/statistics_builder.go +++ b/pkg/sql/opt/memo/statistics_builder.go @@ -4670,6 +4670,9 @@ func (sb *statisticsBuilder) selectivityFromMaxFrequencies( ) (selectivity, selectivityUpperBound props.Selectivity, maxFreqCols opt.ColSet) { selectivity = props.OneSelectivity selectivityUpperBound = props.OneSelectivity + if !sb.evalCtx.SessionData().OptimizerUseMaxFrequencySelectivity { + return selectivity, selectivityUpperBound, opt.ColSet{} + } for col, ok := cols.Next(0); ok; col, ok = cols.Next(col + 1) { c := opt.MakeColSet(col) inputColStat, inputStats := sb.colStatFromInput(c, e) diff --git a/pkg/sql/opt/memo/testdata/stats/generic b/pkg/sql/opt/memo/testdata/stats/generic index 1d983195a67c..1a2d95b9f0e4 100644 --- a/pkg/sql/opt/memo/testdata/stats/generic +++ b/pkg/sql/opt/memo/testdata/stats/generic @@ -254,6 +254,27 @@ select └── filters └── i:2 = $1 [type=bool, outer=(2), constraints=(/2: (/NULL - ]), fd=()-->(2)] +# Fallback to ~row_count/distinct_count if +# optimizer_use_max_frequency_selectivity is false. +norm set=(optimizer_use_max_frequency_selectivity=false) +SELECT * FROM t WHERE i = $1 +---- +select + ├── columns: k:1(int!null) i:2(int!null) s:3(string) + ├── has-placeholder + ├── stats: [rows=24.25, distinct(2)=1, null(2)=0] + ├── key: (1) + ├── fd: ()-->(2), (1)-->(3) + ├── scan t + │ ├── columns: k:1(int!null) i:2(int) s:3(string) + │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0, distinct(2)=41, null(2)=30] + │ │ histogram(2)= 0 30 0 0 90 10 180 10 270 20 360 30 + │ │ <--- NULL --- 0 ---- 100 ----- 200 ----- 300 ----- 400 + │ ├── key: (1) + │ └── fd: (1)-->(2,3) + └── filters + └── i:2 = $1 [type=bool, outer=(2), constraints=(/2: (/NULL - ]), fd=()-->(2)] + # Similar case as above, but with opt to ensure the correct row counts are used # for new memo groups. opt diff --git a/pkg/sql/sessiondatapb/local_only_session_data.proto b/pkg/sql/sessiondatapb/local_only_session_data.proto index 9d08d498f15d..63705e5a9322 100644 --- a/pkg/sql/sessiondatapb/local_only_session_data.proto +++ b/pkg/sql/sessiondatapb/local_only_session_data.proto @@ -749,6 +749,9 @@ message LocalOnlySessionData { // clamp the selectivity of open-ended inequality filters (e.g. <, >, !=) // but not (=, BETWEEN etc.) to a minimum threshold. bool optimizer_clamp_inequality_selectivity = 190; + // OptimizerUseMaxFrequencySelectivity, when true, indicates that the + // optimizer should use max frequency for selectivity estimation. + bool optimizer_use_max_frequency_selectivity = 191; /////////////////////////////////////////////////////////////////////////// // WARNING: consider whether a session parameter you're adding needs to // diff --git a/pkg/sql/vars.go b/pkg/sql/vars.go index fb2ac46f3ee6..4209b2df8eed 100644 --- a/pkg/sql/vars.go +++ b/pkg/sql/vars.go @@ -3723,6 +3723,23 @@ var varGen = map[string]sessionVar{ GlobalDefault: globalTrue, }, + // CockroachDB extension. + `optimizer_use_max_frequency_selectivity`: { + GetStringVal: makePostgresBoolGetStringValFn(`optimizer_use_max_frequency_selectivity`), + Set: func(_ context.Context, m sessionDataMutator, s string) error { + b, err := paramparse.ParseBoolVar("optimizer_use_max_frequency_selectivity", s) + if err != nil { + return err + } + m.SetOptimizerUseMaxFrequencySelectivity(b) + return nil + }, + Get: func(evalCtx *extendedEvalContext, _ *kv.Txn) (string, error) { + return formatBoolAsPostgresSetting(evalCtx.SessionData().OptimizerUseMaxFrequencySelectivity), nil + }, + GlobalDefault: globalTrue, + }, + // CockroachDB extension. `optimizer_prove_implication_with_virtual_computed_columns`: { GetStringVal: makePostgresBoolGetStringValFn(`optimizer_prove_implication_with_virtual_computed_columns`),