diff --git a/docs/generated/settings/settings-for-tenants.txt b/docs/generated/settings/settings-for-tenants.txt index 513e7fea3585..f95f97292b3e 100644 --- a/docs/generated/settings/settings-for-tenants.txt +++ b/docs/generated/settings/settings-for-tenants.txt @@ -299,6 +299,7 @@ sql.stats.response.max integer 20000 the maximum number of statements and transa sql.stats.response.show_internal.enabled boolean false controls if statistics for internal executions should be returned by the CombinedStatements and if internal sessions should be returned by the ListSessions endpoints. These endpoints are used to display statistics on the SQL Activity pages application sql.stats.system_tables.enabled boolean true when true, enables use of statistics on system tables by the query optimizer application sql.stats.system_tables_autostats.enabled boolean true when true, enables automatic collection of statistics on system tables application +sql.stats.virtual_computed_columns.enabled boolean false set to true to collect table statistics on virtual computed columns application sql.telemetry.query_sampling.enabled boolean false when set to true, executed queries will emit an event on the telemetry logging channel application sql.telemetry.query_sampling.internal.enabled boolean false when set to true, internal queries will be sampled in telemetry logging application sql.telemetry.query_sampling.max_event_frequency integer 8 the max event frequency (events per second) at which we sample executions for telemetry, note that it is recommended that this value shares a log-line limit of 10 logs per second on the telemetry pipeline with all other telemetry events. If sampling mode is set to 'transaction', this value is ignored. application diff --git a/docs/generated/settings/settings.html b/docs/generated/settings/settings.html index 3e65a87efe0d..43cde291e3b6 100644 --- a/docs/generated/settings/settings.html +++ b/docs/generated/settings/settings.html @@ -248,6 +248,7 @@
sql.stats.response.show_internal.enabled
booleanfalsecontrols if statistics for internal executions should be returned by the CombinedStatements and if internal sessions should be returned by the ListSessions endpoints. These endpoints are used to display statistics on the SQL Activity pagesServerless/Dedicated/Self-Hosted
sql.stats.system_tables.enabled
booleantruewhen true, enables use of statistics on system tables by the query optimizerServerless/Dedicated/Self-Hosted
sql.stats.system_tables_autostats.enabled
booleantruewhen true, enables automatic collection of statistics on system tablesServerless/Dedicated/Self-Hosted +
sql.stats.virtual_computed_columns.enabled
booleanfalseset to true to collect table statistics on virtual computed columnsServerless/Dedicated/Self-Hosted
sql.telemetry.query_sampling.enabled
booleanfalsewhen set to true, executed queries will emit an event on the telemetry logging channelServerless/Dedicated/Self-Hosted
sql.telemetry.query_sampling.internal.enabled
booleanfalsewhen set to true, internal queries will be sampled in telemetry loggingServerless/Dedicated/Self-Hosted
sql.telemetry.query_sampling.max_event_frequency
integer8the max event frequency (events per second) at which we sample executions for telemetry, note that it is recommended that this value shares a log-line limit of 10 logs per second on the telemetry pipeline with all other telemetry events. If sampling mode is set to 'transaction', this value is ignored.Serverless/Dedicated/Self-Hosted diff --git a/pkg/sql/create_stats.go b/pkg/sql/create_stats.go index 0dcf56fc913d..7f204313a154 100644 --- a/pkg/sql/create_stats.go +++ b/pkg/sql/create_stats.go @@ -61,14 +61,25 @@ var featureStatsEnabled = settings.RegisterBoolSetting( featureflag.FeatureFlagEnabledDefault, settings.WithPublic) +var statsOnVirtualCols = settings.RegisterBoolSetting( + settings.ApplicationLevel, + "sql.stats.virtual_computed_columns.enabled", + "set to true to collect table statistics on virtual computed columns", + false, + settings.WithPublic) + const nonIndexColHistogramBuckets = 2 // StubTableStats generates "stub" statistics for a table which are missing -// histograms and have 0 for all values. +// statistics on virtual computed columns, multi-column stats, and histograms, +// and have 0 for all values. func StubTableStats( - desc catalog.TableDescriptor, name string, multiColEnabled bool, defaultHistogramBuckets uint32, + desc catalog.TableDescriptor, name string, ) ([]*stats.TableStatisticProto, error) { - colStats, err := createStatsDefaultColumns(desc, multiColEnabled, defaultHistogramBuckets) + colStats, err := createStatsDefaultColumns( + context.Background(), desc, false /* virtColEnabled */, false, /* multiColEnabled */ + nonIndexColHistogramBuckets, nil, /* evalCtx */ + ) if err != nil { return nil, err } @@ -232,17 +243,18 @@ func (n *createStatsNode) makeJobRecord(ctx context.Context) (*jobs.Record, erro var colStats []jobspb.CreateStatsDetails_ColStat var deleteOtherStats bool if len(n.ColumnNames) == 0 { - // Disable multi-column stats and deleting stats - // if partial statistics at the extremes are requested. - // TODO (faizaanmadhani): Add support for multi-column stats. + virtColEnabled := statsOnVirtualCols.Get(n.p.ExecCfg().SV()) + // Disable multi-column stats and deleting stats if partial statistics at + // the extremes are requested. + // TODO(faizaanmadhani): Add support for multi-column stats. var multiColEnabled bool if !n.Options.UsingExtremes { - multiColEnabled = stats.MultiColumnStatisticsClusterMode.Get(&n.p.ExecCfg().Settings.SV) + multiColEnabled = stats.MultiColumnStatisticsClusterMode.Get(n.p.ExecCfg().SV()) deleteOtherStats = true } defaultHistogramBuckets := stats.GetDefaultHistogramBuckets(n.p.ExecCfg().SV(), tableDesc) if colStats, err = createStatsDefaultColumns( - tableDesc, multiColEnabled, defaultHistogramBuckets, + ctx, tableDesc, virtColEnabled, multiColEnabled, defaultHistogramBuckets, n.p.EvalContext(), ); err != nil { return nil, err } @@ -254,12 +266,16 @@ func (n *createStatsNode) makeJobRecord(ctx context.Context) (*jobs.Record, erro columnIDs := make([]descpb.ColumnID, len(columns)) for i := range columns { - if columns[i].IsVirtual() { - return nil, pgerror.Newf( + if columns[i].IsVirtual() && !statsOnVirtualCols.Get(n.p.ExecCfg().SV()) { + err := pgerror.Newf( pgcode.InvalidColumnReference, "cannot create statistics on virtual column %q", columns[i].ColName(), ) + return nil, errors.WithHint(err, + "set cluster setting sql.stats.virtual_computed_columns.enabled to collect statistics "+ + "on virtual columns", + ) } columnIDs[i] = columns[i].GetID() } @@ -354,12 +370,47 @@ const maxNonIndexCols = 100 // other columns from the table. We only collect histograms for index columns, // plus any other boolean or enum columns (where the "histogram" is tiny). func createStatsDefaultColumns( - desc catalog.TableDescriptor, multiColEnabled bool, defaultHistogramBuckets uint32, + ctx context.Context, + desc catalog.TableDescriptor, + virtColEnabled, multiColEnabled bool, + defaultHistogramBuckets uint32, + evalCtx *eval.Context, ) ([]jobspb.CreateStatsDetails_ColStat, error) { colStats := make([]jobspb.CreateStatsDetails_ColStat, 0, len(desc.ActiveIndexes())) requestedStats := make(map[string]struct{}) + // CREATE STATISTICS only runs as a fully-distributed plan. If statistics on + // virtual computed columns are enabled, we must check whether each virtual + // computed column expression is safe to distribute. Virtual computed columns + // with expressions *not* safe to distribute will be skipped, even if + // sql.stats.virtual_computed_columns.enabled is true. + // TODO(michae2): Add the ability to run CREATE STATISTICS locally if a + // local-only virtual computed column expression is needed. + cannotDistribute := make([]bool, len(desc.PublicColumns())) + if virtColEnabled { + semaCtx := tree.MakeSemaContext() + exprs, _, err := schemaexpr.MakeComputedExprs( + ctx, + desc.PublicColumns(), + desc.PublicColumns(), + desc, + tree.NewUnqualifiedTableName(tree.Name(desc.GetName())), + evalCtx, + &semaCtx, + ) + if err != nil { + return nil, err + } + for i, col := range desc.PublicColumns() { + cannotDistribute[i] = col.IsVirtual() && checkExprForDistSQL(exprs[i]) != nil + } + } + + isUnsupportedVirtual := func(col catalog.Column) bool { + return col.IsVirtual() && (!virtColEnabled || cannotDistribute[col.Ordinal()]) + } + // sortAndTrackStatsExists adds the given column IDs as a set to the // requestedStats set. If the columnIDs were already in the set, it returns // true. As a side-effect sortAndTrackStatsExists also sorts colIDs. NOTE: @@ -382,11 +433,14 @@ func createStatsDefaultColumns( return err } - // Do not collect stats for virtual computed columns. DistSQLPlanner - // cannot currently collect stats for these columns because it plans - // table readers on the table's primary index which does not include - // virtual computed columns. - if col.IsVirtual() { + // There shouldn't be any non-public columns, but defensively skip over them + // if there are. + if !col.Public() { + return nil + } + + // Skip unsupported virtual computed columns. + if isUnsupportedVirtual(col) { return nil } @@ -431,9 +485,30 @@ func createStatsDefaultColumns( continue } - colIDs := make([]descpb.ColumnID, i+1) + colIDs := make([]descpb.ColumnID, 0, i+1) for j := 0; j <= i; j++ { - colIDs[j] = desc.GetPrimaryIndex().GetKeyColumnID(j) + col, err := catalog.MustFindColumnByID(desc, desc.GetPrimaryIndex().GetKeyColumnID(j)) + if err != nil { + return nil, err + } + + // There shouldn't be any non-public columns, but defensively skip over + // them if there are. + if !col.Public() { + continue + } + + // Skip unsupported virtual computed columns. + if isUnsupportedVirtual(col) { + continue + } + colIDs = append(colIDs, col.GetID()) + } + + // Do not attempt to create multi-column stats with < 2 columns. This can + // happen when an index contains only virtual computed columns. + if len(colIDs) < 2 { + continue } // Remember the requested stats so we don't request duplicates. @@ -468,15 +543,23 @@ func createStatsDefaultColumns( if err != nil { return nil, err } - if col.IsVirtual() { + + // There shouldn't be any non-public columns, but defensively skip them + // if there are. + if !col.Public() { + continue + } + + // Skip unsupported virtual computed columns. + if isUnsupportedVirtual(col) { continue } colIDs = append(colIDs, col.GetID()) } - // Do not attempt to create multi-column stats with no columns. This - // can happen when an index contains only virtual computed columns. - if len(colIDs) == 0 { + // Do not attempt to create multi-column stats with < 2 columns. This can + // happen when an index contains only virtual computed columns. + if len(colIDs) < 2 { continue } @@ -524,8 +607,8 @@ func createStatsDefaultColumns( for i := 0; i < len(desc.PublicColumns()) && nonIdxCols < maxNonIndexCols; i++ { col := desc.PublicColumns()[i] - // Do not collect stats for virtual computed columns. - if col.IsVirtual() { + // Skip unsupported virtual computed columns. + if isUnsupportedVirtual(col) { continue } diff --git a/pkg/sql/distsql_physical_planner.go b/pkg/sql/distsql_physical_planner.go index 38fb8134230e..f33d47d76323 100644 --- a/pkg/sql/distsql_physical_planner.go +++ b/pkg/sql/distsql_physical_planner.go @@ -387,9 +387,9 @@ func hasOidType(t *types.T) bool { return false } -// checkExpr verifies that an expression doesn't contain things that are not yet -// supported by distSQL, like distSQL-blocklisted functions. -func checkExpr(expr tree.Expr) error { +// checkExprForDistSQL verifies that an expression doesn't contain things that +// are not yet supported by distSQL, like distSQL-blocklisted functions. +func checkExprForDistSQL(expr tree.Expr) error { if expr == nil { return nil } @@ -536,7 +536,7 @@ func checkSupportForPlanNode(node planNode) (distRecommendation, error) { return checkSupportForPlanNode(n.source) case *filterNode: - if err := checkExpr(n.filter); err != nil { + if err := checkExprForDistSQL(n.filter); err != nil { return cannotDistribute, err } return checkSupportForPlanNode(n.source.plan) @@ -575,7 +575,7 @@ func checkSupportForPlanNode(node planNode) (distRecommendation, error) { // TODO(nvanbenschoten): lift this restriction. return cannotDistribute, cannotDistributeRowLevelLockingErr } - if err := checkExpr(n.onExpr); err != nil { + if err := checkExprForDistSQL(n.onExpr); err != nil { return cannotDistribute, err } rec, err := checkSupportForPlanNode(n.input) @@ -585,7 +585,7 @@ func checkSupportForPlanNode(node planNode) (distRecommendation, error) { return rec.compose(shouldDistribute), nil case *joinNode: - if err := checkExpr(n.pred.onCond); err != nil { + if err := checkExprForDistSQL(n.pred.onCond); err != nil { return cannotDistribute, err } recLeft, err := checkSupportForPlanNode(n.left.plan) @@ -624,13 +624,13 @@ func checkSupportForPlanNode(node planNode) (distRecommendation, error) { return cannotDistribute, cannotDistributeRowLevelLockingErr } - if err := checkExpr(n.lookupExpr); err != nil { + if err := checkExprForDistSQL(n.lookupExpr); err != nil { return cannotDistribute, err } - if err := checkExpr(n.remoteLookupExpr); err != nil { + if err := checkExprForDistSQL(n.remoteLookupExpr); err != nil { return cannotDistribute, err } - if err := checkExpr(n.onCond); err != nil { + if err := checkExprForDistSQL(n.onCond); err != nil { return cannotDistribute, err } rec, err := checkSupportForPlanNode(n.input) @@ -646,7 +646,7 @@ func checkSupportForPlanNode(node planNode) (distRecommendation, error) { case *projectSetNode: for i := range n.exprs { - if err := checkExpr(n.exprs[i]); err != nil { + if err := checkExprForDistSQL(n.exprs[i]); err != nil { return cannotDistribute, err } } @@ -654,7 +654,7 @@ func checkSupportForPlanNode(node planNode) (distRecommendation, error) { case *renderNode: for _, e := range n.render { - if err := checkExpr(e); err != nil { + if err := checkExprForDistSQL(e); err != nil { return cannotDistribute, err } } @@ -725,7 +725,7 @@ func checkSupportForPlanNode(node planNode) (distRecommendation, error) { for _, tuple := range n.tuples { for _, expr := range tuple { - if err := checkExpr(expr); err != nil { + if err := checkExprForDistSQL(expr); err != nil { return cannotDistribute, err } } @@ -759,7 +759,7 @@ func checkSupportForPlanNode(node planNode) (distRecommendation, error) { return cannotDistribute, cannotDistributeRowLevelLockingErr } } - if err := checkExpr(n.onCond); err != nil { + if err := checkExprForDistSQL(n.onCond); err != nil { return cannotDistribute, err } return shouldDistribute, nil diff --git a/pkg/sql/distsql_plan_stats.go b/pkg/sql/distsql_plan_stats.go index 17941d1d95d5..2cfafe707b97 100644 --- a/pkg/sql/distsql_plan_stats.go +++ b/pkg/sql/distsql_plan_stats.go @@ -22,9 +22,12 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/catalog" "github.com/cockroachdb/cockroach/pkg/sql/catalog/colinfo" "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" + "github.com/cockroachdb/cockroach/pkg/sql/catalog/schemaexpr" "github.com/cockroachdb/cockroach/pkg/sql/catalog/tabledesc" "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" "github.com/cockroachdb/cockroach/pkg/sql/opt/cat" + "github.com/cockroachdb/cockroach/pkg/sql/opt/exec" + "github.com/cockroachdb/cockroach/pkg/sql/parser" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" @@ -368,28 +371,63 @@ func (dsp *DistSQLPlanner) createStatsPlan( return nil, errors.New("no stats requested") } - // Calculate the set of columns we need to scan. + // Calculate the set of columns we need to scan and any virtual computed cols. var colCfg scanColumnsConfig var tableColSet catalog.TableColSet + var requestedCols []catalog.Column + var virtComputedCols []catalog.Column for _, s := range reqStats { for _, c := range s.columns { if !tableColSet.Contains(c) { tableColSet.Add(c) - colCfg.wantedColumns = append(colCfg.wantedColumns, c) + col, err := catalog.MustFindColumnByID(desc, c) + if err != nil { + return nil, err + } + requestedCols = append(requestedCols, col) + if col.IsVirtual() { + virtComputedCols = append(virtComputedCols, col) + } else { + colCfg.wantedColumns = append(colCfg.wantedColumns, c) + } } } } + // Add columns to the scan that are referenced by virtual computed column + // expressions but were not in the requested statistics. + if len(virtComputedCols) != 0 { + exprStrings := make([]string, 0, len(virtComputedCols)) + for _, col := range virtComputedCols { + exprStrings = append(exprStrings, col.GetComputeExpr()) + } + + virtComputedExprs, err := parser.ParseExprs(exprStrings) + if err != nil { + return nil, err + } + + for _, expr := range virtComputedExprs { + refColIDs, err := schemaexpr.ExtractColumnIDs(desc, expr) + if err != nil { + return nil, err + } + refColIDs.ForEach(func(c descpb.ColumnID) { + if !tableColSet.Contains(c) { + tableColSet.Add(c) + // Add the referenced column to the scan. + colCfg.wantedColumns = append(colCfg.wantedColumns, c) + } + }) + } + } + // Create the table readers; for this we initialize a dummy scanNode. scan := scanNode{desc: desc} err := scan.initDescDefaults(colCfg) if err != nil { return nil, err } - var colIdxMap catalog.TableColMap - for i, c := range scan.cols { - colIdxMap.Set(c.GetID(), i) - } var sb span.Builder sb.Init(planCtx.EvalContext(), planCtx.ExtendedEvalCtx.Codec, desc, scan.index) scan.spans, err = sb.UnconstrainedSpans() @@ -412,8 +450,89 @@ func (dsp *DistSQLPlanner) createStatsPlan( } } + // Add rendering of virtual computed columns. + if len(virtComputedCols) != 0 { + // Resolve names and types. + semaCtx := tree.MakeSemaContext() + virtComputedExprs, _, err := schemaexpr.MakeComputedExprs( + ctx, + virtComputedCols, + scan.cols, + desc, + tree.NewUnqualifiedTableName(tree.Name(desc.GetName())), + planCtx.EvalContext(), + &semaCtx, + ) + if err != nil { + return nil, err + } + + // Build render expressions for all requested columns. + exprs := make(tree.TypedExprs, len(requestedCols)) + resultCols := colinfo.ResultColumnsFromColumns(desc.GetID(), requestedCols) + + ivh := tree.MakeIndexedVarHelper(nil /* container */, len(scan.cols)) + var scanIdx, virtIdx int + for i, col := range requestedCols { + if col.IsVirtual() { + if virtIdx >= len(virtComputedExprs) { + return nil, errors.AssertionFailedf( + "virtual computed column expressions do not match requested columns: %v vs %v", + virtComputedExprs, requestedCols, + ) + } + // Check that the virtual computed column expression can be distributed. + // TODO(michae2): Add the ability to run CREATE STATISTICS locally if a + // local-only virtual computed column expression is needed. + if err := checkExprForDistSQL(virtComputedExprs[virtIdx]); err != nil { + return nil, err + } + exprs[i] = virtComputedExprs[virtIdx] + virtIdx++ + } else { + // Confirm that the scan columns contain the requested column in the + // expected order. + if scanIdx >= len(scan.cols) || scan.cols[scanIdx].GetID() != col.GetID() { + return nil, errors.AssertionFailedf( + "scan columns do not match requested columns: %v vs %v", scan.cols, requestedCols, + ) + } + exprs[i] = ivh.IndexedVarWithType(scanIdx, scan.cols[scanIdx].GetType()) + scanIdx++ + } + } + + var rb renderBuilder + rb.init(exec.Node(planNode(&scan)), exec.OutputOrdering{}) + for i, expr := range exprs { + exprs[i] = rb.r.ivarHelper.Rebind(expr) + } + rb.setOutput(exprs, resultCols) + + err = dsp.createPlanForRender(ctx, p, rb.r, planCtx) + if err != nil { + return nil, err + } + } else { + // No virtual computed columns. Confirm that the scan columns match the + // requested columns. + for i, col := range requestedCols { + if i >= len(scan.cols) || scan.cols[i].GetID() != col.GetID() { + return nil, errors.AssertionFailedf( + "scan columns do not match requested columns: %v vs %v", scan.cols, requestedCols, + ) + } + } + } + + // Output of the scan or render will be in requestedCols order. + var colIdxMap catalog.TableColMap + for i, col := range requestedCols { + colIdxMap.Set(col.GetID(), i) + } + var sketchSpecs, invSketchSpecs []execinfrapb.SketchSpec - sampledColumnIDs := make([]descpb.ColumnID, len(scan.cols)) + sampledColumnIDs := make([]descpb.ColumnID, len(requestedCols)) for _, s := range reqStats { spec := execinfrapb.SketchSpec{ SketchType: execinfrapb.SketchType_HLL_PLUS_PLUS_V1, diff --git a/pkg/sql/distsql_spec_exec_factory.go b/pkg/sql/distsql_spec_exec_factory.go index b0e55eff337f..04ed9b684ee4 100644 --- a/pkg/sql/distsql_spec_exec_factory.go +++ b/pkg/sql/distsql_spec_exec_factory.go @@ -314,7 +314,7 @@ func (e *distSQLSpecExecFactory) checkExprsAndMaybeMergeLastStage( recommendation = cannotDistribute } for _, expr := range exprs { - if err := checkExpr(expr); err != nil { + if err := checkExprForDistSQL(expr); err != nil { recommendation = cannotDistribute if physPlan != nil { // The filter expression cannot be distributed, so we need to diff --git a/pkg/sql/importer/import_job.go b/pkg/sql/importer/import_job.go index ca88d348dfc0..9d58fe70dcb8 100644 --- a/pkg/sql/importer/import_job.go +++ b/pkg/sql/importer/import_job.go @@ -1039,14 +1039,7 @@ func (r *importResumer) writeStubStatisticsForImportedTables( distinctCount := uint64(float64(rowCount) * memo.UnknownDistinctCountRatio) nullCount := uint64(float64(rowCount) * memo.UnknownNullCountRatio) avgRowSize := uint64(memo.UnknownAvgRowSize) - // Because we don't yet have real distinct and null counts, only produce - // single-column stats to avoid the appearance of perfectly correlated - // columns. - multiColEnabled := false - defaultHistogramBuckets := stats.GetDefaultHistogramBuckets(execCfg.SV(), desc) - statistics, err := sql.StubTableStats( - desc, jobspb.ImportStatsName, multiColEnabled, defaultHistogramBuckets, - ) + statistics, err := sql.StubTableStats(desc, jobspb.ImportStatsName) if err == nil { for _, statistic := range statistics { statistic.RowCount = rowCount diff --git a/pkg/sql/logictest/testdata/logic_test/distsql_stats b/pkg/sql/logictest/testdata/logic_test/distsql_stats index 42bc1234d89b..1cb4c3b8ed56 100644 --- a/pkg/sql/logictest/testdata/logic_test/distsql_stats +++ b/pkg/sql/logictest/testdata/logic_test/distsql_stats @@ -1184,10 +1184,13 @@ upper_bound range_rows distinct_range_rows equal_rows 2 0 0 1 3 0 0 1 -# Test that stats are not collected for virtual columns. +# Test that stats are not collected for virtual columns if disabled. statement ok SET CLUSTER SETTING sql.stats.multi_column_collection.enabled = true +statement ok +SET CLUSTER SETTING sql.stats.virtual_computed_columns.enabled = false + statement ok CREATE TABLE virt ( a INT, @@ -1260,6 +1263,9 @@ s {b} 3 0 true s {j} 3 0 true s {rowid} 3 0 true +statement ok +RESET CLUSTER SETTING sql.stats.virtual_computed_columns.enabled + # Test that non-index columns have histograms collected for them, with # up to 2 buckets. statement ok @@ -1599,6 +1605,9 @@ CREATE STATISTICS s FROM t63387; # Regression test for #71080. Stats collection should succeed on tables with NOT # NULL virtual columns. +statement ok +SET CLUSTER SETTING sql.stats.virtual_computed_columns.enabled = false + statement ok SET CLUSTER SETTING sql.stats.multi_column_collection.enabled = true @@ -1608,19 +1617,19 @@ CREATE TABLE t71080 ( a INT, b INT NOT NULL AS (a + 10) VIRTUAL, INDEX (a, b) -); +) statement ok -INSERT INTO t71080 VALUES (1, 2); +INSERT INTO t71080 VALUES (1, 2) statement ok -CREATE STATISTICS s FROM t71080; +CREATE STATISTICS s FROM t71080 statement error cannot create statistics on virtual column \"b\" -CREATE STATISTICS s ON b FROM t71080; +CREATE STATISTICS s ON b FROM t71080 statement error cannot create statistics on virtual column \"b\" -CREATE STATISTICS s ON a, b FROM t71080; +CREATE STATISTICS s ON a, b FROM t71080 # Regression test for #76867. Do not attempt to collect empty multi-column stats # when there are indexes on columns that are all virtual. @@ -1635,6 +1644,9 @@ CREATE TABLE t76867 ( statement ok ANALYZE t76867 +statement ok +RESET CLUSTER SETTING sql.stats.virtual_computed_columns.enabled + # Regression tests for #80123. Collecting stats on system tables is allowed. statement ok ANALYZE system.locations @@ -2404,6 +2416,9 @@ CREATE INDEX ON xy (y) WHERE y > 5; statement error pq: table xy does not contain a non-partial forward index with y as a prefix column CREATE STATISTICS xy_partial_idx ON y FROM xy USING EXTREMES; +statement ok +RESET enable_create_stats_using_extremes + # Regression test for #100909. Ensure enum is hydrated in SHOW HISTOGRAM. statement ok CREATE TYPE enum1 as ENUM ('hello', 'hi'); @@ -2491,3 +2506,208 @@ SELECT statistics_name FROM [SHOW STATISTICS FOR TABLE tab_test_privileges] tab_test_privileges_stat user root + +# Test stats collection on virtual computed columns. + +statement ok +CREATE TABLE t68254 ( + a INT PRIMARY KEY, + b STRING, + c JSONB, + d STRING AS (b || repeat('a', a)) VIRTUAL, + e STRING AS ((c->'foo')->>'bar') VIRTUAL, + INDEX (d), + INDEX (e), + INDEX (b, e) +) + +statement ok +INSERT INTO t68254 (a, b, c) +SELECT i, i::STRING, json_build_object('foo', json_build_object('bar', json_build_object('baz', i))) +FROM generate_series(0, 3) s(i) + +statement ok +INSERT INTO t68254 (a, b, c) VALUES (4, NULL, NULL) + +statement ok +SET CLUSTER SETTING sql.stats.virtual_computed_columns.enabled = true + +statement ok +CREATE STATISTICS j1 FROM t68254 + +query TTIIIB colnames +SELECT + statistics_name, + column_names, + row_count, + distinct_count, + null_count, + histogram_id IS NOT NULL AS has_histogram +FROM + [SHOW STATISTICS FOR TABLE t68254] +ORDER BY statistics_name, column_names::STRING +---- +statistics_name column_names row_count distinct_count null_count has_histogram +j1 {a} 5 5 0 true +j1 {b,e} 5 5 1 false +j1 {b} 5 5 1 true +j1 {c} 5 5 1 true +j1 {d} 5 5 1 true +j1 {e} 5 5 1 true + +statement ok +CREATE STATISTICS j2 ON d FROM t68254 + +statement ok +CREATE STATISTICS j3 ON e FROM t68254 + +let $hist_d +SELECT histogram_id FROM [SHOW STATISTICS FOR TABLE t68254] WHERE statistics_name = 'j2' + +let $hist_e +SELECT histogram_id FROM [SHOW STATISTICS FOR TABLE t68254] WHERE statistics_name = 'j3' + +query TIRI colnames,nosort +SHOW HISTOGRAM $hist_d +---- +upper_bound range_rows distinct_range_rows equal_rows +'0' 0 0 1 +'1a' 0 0 1 +'2aa' 0 0 1 +'3aaa' 0 0 1 + +query TIRI colnames,nosort +SHOW HISTOGRAM $hist_e +---- +upper_bound range_rows distinct_range_rows equal_rows +'{"baz": 0}' 0 0 1 +'{"baz": 1}' 0 0 1 +'{"baz": 2}' 0 0 1 +'{"baz": 3}' 0 0 1 + +# Check that we also collect stats on the hidden expression index virt column. +statement ok +CREATE INDEX ON t68254 ((c->'foo')) + +statement ok +CREATE STATISTICS j4 FROM t68254 + +query TTIIIB colnames +SELECT + statistics_name, + column_names, + row_count, + distinct_count, + null_count, + histogram_id IS NOT NULL AS has_histogram +FROM + [SHOW STATISTICS FOR TABLE t68254] +ORDER BY statistics_name, column_names::STRING +---- +statistics_name column_names row_count distinct_count null_count has_histogram +j4 {a} 5 5 0 true +j4 {b,e} 5 5 1 false +j4 {b} 5 5 1 true +j4 {crdb_internal_idx_expr} 5 5 1 true +j4 {c} 5 5 1 true +j4 {d} 5 5 1 true +j4 {e} 5 5 1 true + +statement ok +CREATE STATISTICS j5 ON crdb_internal_idx_expr FROM t68254 + +let $hist_crdb_internal_idx_expr +SELECT histogram_id FROM [SHOW STATISTICS FOR TABLE t68254] WHERE statistics_name = 'j5' + +query TIRI colnames,nosort +SHOW HISTOGRAM $hist_crdb_internal_idx_expr +---- +upper_bound range_rows distinct_range_rows equal_rows +'{"bar": {"baz": 0}}' 0 0 1 +'{"bar": {"baz": 1}}' 0 0 1 +'{"bar": {"baz": 2}}' 0 0 1 +'{"bar": {"baz": 3}}' 0 0 1 + +# Test partial stats using extremes on indexed virtual computed columns. +statement ok +SET enable_create_stats_using_extremes = on + +statement ok +INSERT INTO t68254 (a, b, c) VALUES (5, '5', '{"foo": {"bar": {"baz": 5}}}') + +statement ok +CREATE STATISTICS j6 ON d FROM t68254 USING EXTREMES + +statement ok +CREATE STATISTICS j7 ON e FROM t68254 USING EXTREMES + +statement ok +CREATE STATISTICS j8 ON crdb_internal_idx_expr FROM t68254 USING EXTREMES + +let $hist_d +SELECT histogram_id FROM [SHOW STATISTICS FOR TABLE t68254] WHERE statistics_name = 'j6' + +let $hist_e +SELECT histogram_id FROM [SHOW STATISTICS FOR TABLE t68254] WHERE statistics_name = 'j7' + +let $hist_crdb_internal_idx_expr +SELECT histogram_id FROM [SHOW STATISTICS FOR TABLE t68254] WHERE statistics_name = 'j8' + +query TIRI colnames,nosort +SHOW HISTOGRAM $hist_d +---- +upper_bound range_rows distinct_range_rows equal_rows +'5aaaaa' 0 0 1 + +query TIRI colnames,nosort +SHOW HISTOGRAM $hist_e +---- +upper_bound range_rows distinct_range_rows equal_rows +'{"baz": 5}' 0 0 1 + +query TIRI colnames,nosort +SHOW HISTOGRAM $hist_crdb_internal_idx_expr +---- +upper_bound range_rows distinct_range_rows equal_rows +'{"bar": {"baz": 5}}' 0 0 1 + +statement ok +RESET enable_create_stats_using_extremes + +# Regression test for #118537. Do not create stats on non-public mutation +# columns. +statement ok +CREATE TABLE t118537 ( + a INT, + PRIMARY KEY (a) USING HASH WITH (bucket_count = 3) +) + +statement ok +INSERT INTO t118537 SELECT generate_series(0, 9) + +statement ok +SET CLUSTER SETTING jobs.debug.pausepoints = 'newschemachanger.before.exec' + +skipif config local-legacy-schema-changer +statement error job \d+ was paused before it completed with reason: pause point "newschemachanger.before.exec" hit +ALTER TABLE t118537 ALTER PRIMARY KEY USING COLUMNS (a) USING HASH + +statement ok +CREATE STATISTICS mutation FROM t118537 + +query TTIB colnames +SELECT statistics_name, column_names, row_count, histogram_id IS NOT NULL AS has_histogram +FROM [SHOW STATISTICS FOR TABLE t118537] +ORDER BY statistics_name, column_names::STRING +---- +statistics_name column_names row_count has_histogram +mutation {a} 10 true + +statement ok +SET CLUSTER SETTING jobs.debug.pausepoints = '' + +statement ok +RESUME JOB (SELECT job_id FROM crdb_internal.jobs WHERE description LIKE 'ALTER TABLE %t118537 ALTER PRIMARY KEY USING COLUMNS (a) USING HASH' AND status = 'paused' FETCH FIRST 1 ROWS ONLY) + +statement ok +RESET CLUSTER SETTING sql.stats.virtual_computed_columns.enabled diff --git a/pkg/sql/opt/exec/execbuilder/testdata/distsql_misc b/pkg/sql/opt/exec/execbuilder/testdata/distsql_misc index 9385c162665b..8af3271082a8 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/distsql_misc +++ b/pkg/sql/opt/exec/execbuilder/testdata/distsql_misc @@ -131,6 +131,204 @@ vectorized: true · Diagram: https://cockroachdb.github.io/distsqlplan/decode.html#eJyklEFvmz4UwO__T2G903-TERjIDpw2NZmElKZZ4LBpiioXv6aogJlt1FUR330ydGtBSYWID5bs5_zeLw8_H0H_KiCC1fft-ku8If8v4yRNvq0pSX9sV8kHkqzWq6uUfCRfdzfXRHDDgUIlBW54iRqin8CAgg8UAqAQAoUF7CnUSmaotVT2yLH7QSx-Q-RRyKu6MXZ7TyGTCiE6gslNgRBByu8K3CEXqFwPKAg0PC-6NDb1Zzvd1o_4DBSuZNGUlY4Ip-SOkowSARSSmts9x2WEV4IwIs0DKti3FGRjXjNrww8IEXujGi8h8lp6xvZVsqmkEqhQDAXzygB9me8LyU0XzvKSF7BvT_zZjXRk7S4GlHOe_shzMfBk06vKLqmqyxzXn1VXNvD1p_v6F_n6jhvM8vUHvsF03-Ai38Bxw1m-wcA3nO4bXuQbOu5ilm94ts9O-O5Q17LSOKlTvFEmh9neQ3HAvou1bFSGWyWz7my_vOlA3YZAbfoo6xdx1YWYzaCQl_-eibck9i7p04DkvUvy5zqxMSmYS_LHpHAuKRiTFnNJof2K94V8us0FROC9DOfE9Hd07zA_aHuVkgf51GHT59peBKMapHDNH3GJBlWZV7k2edYH2va_PwEAAP__DBRFDg== +# Check that we properly render virtual computed columns when collecting stats. +statement ok +ALTER TABLE data ADD COLUMN e INT AS (b + 1) VIRTUAL + +statement ok +CREATE INDEX ON data (e) + +statement ok +SET CLUSTER SETTING sql.stats.virtual_computed_columns.enabled = true + +query T +EXPLAIN ANALYZE (DISTSQL) CREATE STATISTICS s1 FROM data +---- +planning time: 10µs +execution time: 100µs +distribution: +vectorized: +rows decoded from KV: 1,000 (7.8 KiB, 2,000 KVs, 1,000 gRPC calls) +maximum memory usage: +network usage: +regions: +isolation level: serializable +priority: normal +quality of service: regular +· +• create statistics +· +Diagram: https://cockroachdb.github.io/distsqlplan/decode.html#eJzsV1GPmzgQfr9fYc1Tq3MWbEg266fspT0pqratkqgvp6jywjRFIZizjbZ7q_ys-wP3y05AaYANaeiqVSM1DwjPDOP5Zr7PgQcwf8cgYDp_eb18SRbL6-VssZxNF8Qw8uf8zQ0JpZVAIVEhvpZbNCD-AgYUOFDwgIIPFIawopBqFaAxSuchD8UDs_ATCJdClKSZzc0rCoHSCOIBbGRjBAFLeRvjHGWI2nGBQohWRnGxTb71JL-8Tzd4DxSmKs62iRFEUnJLSUBJCBQWqcxtA4cRmYSEEWU_ogYKc0xC1IJMGCUTTsnEo2Ti5_e_MyHE7PVyDBRevSM22qIg7n__mnIdqMRiYiOVPHJpdWdIiIEKMRSEuW5pvr23aIhGGQoydl3yR2lez99OSSDj2NRiUxnpKpYXxpt30ykxFlMSqCyx5Bl-sk6U2OeCuM4-AHHTFVCUpTKbZrbcabWjUK4_991YuUYQrDao2QsQ7o6ePquF3KYxamfYnFNpXkT_YLF3AWlhpc07v7_lNSutr7ymg9YN_iMfrduGnTh5C-ewE-ceXpYoHaLGsAFvtevsxPV6rXEtrdIOc0_vCXnGXZfcZsEGrXle61CXo9WvY2HN7n0lstnLVnBXZ71WZ1mTQux0ubOnyN1hA4f_cMHzQ4JnFyPyKjogeX5I8v73kDzvIXnWZ16V5EfnJ_lRAyc_nZf8SbzkA8f7xctv4CXvM6-Kl5fnx8vLBk7vdF56T-KlN3D8X7z8Bl56feZV8XJ8frwcN3D6p_PSfxIv_YEz_OG89A7xkl94h3npHeLl6Hvw0uvBS7_PvCpeXp0fL6_6fKLM0aQqMdh6hT-8k9vaacDyd30M11h-GBiV6QDfahUUseXyTZGoMIRobOll5WKWVC5jNcrtly-seiZ2NBNvZGL1TMN2Jn68pj5FeUdT-d2ZWDuT3xeeLKYCCdo7pTelUA0mxSmd67xylFItPfu_kMq7RWPkeh_g-rCq1zlq1zk8WueoGzFvZxr9nIgv23VeHq1z3I3Ya2ca_5yIx-06r44LxO2G7D9S7fEDoBdmrxMzv_C_gnnYwnyVH1kfYnX3PgpBgPv5NzhwqX6QPyDXJj83Fx_VXVH08j7NT70PMjZI4UZu8AVa1NsoiYyNAhBWZ7jb_fZ_AAAA__8_vDBC + +query T +EXPLAIN ANALYZE (DISTSQL) CREATE STATISTICS s1 ON c, e, a FROM data +---- +planning time: 10µs +execution time: 100µs +distribution: +vectorized: +rows decoded from KV: 1,000 (7.8 KiB, 2,000 KVs, 1,000 gRPC calls) +maximum memory usage: +network usage: +regions: +isolation level: serializable +priority: normal +quality of service: regular +· +• create statistics +· +Diagram: https://cockroachdb.github.io/distsqlplan/decode.html#eJzsl9Fu2zYUhu_3FMS52jC6EinZcXjlzO0Ao0hS2EZvBqNgxFNXiExqJI00C_xYe4E92SApbm1VMiyjDnpRXQjgIfXzPzwfBfIJ3N8ZCBhP31zN35DZ_Go-mc0n4xlxjNzekIQSpESSP6e310RJL4GCNgpv5AodiL-AAQUOFCKgEAOFPiwo5NYk6JyxxZCn8oOJ-gwipJDqfO2L8IJCYiyCeAKf-gxBwFzeZThFqdAGIVBQ6GWaldMUU4-K14f8Hh-Bwthk65V2gkhamLwDCrNcFoFewIjUijBi_Ce0QGGKWqEVZMQoGXFKRtHvTAgxuZkPgcLb98SnKxQk_O9fV7UToz1qnxr9TZc1D44oTIxCJQgLwyp89-jREYtSCTIMQ_JHFV5O341JIrPM7YzNZWq3Y3kZvH4_HhPnMSeJWWtPfsXPPki1_02QMPg6APG-bUBpy6x9vvbVTIsNhar9vNjOyyWCYDvVmbwGEW7o8QWayVWeoQ36-8WpwrP0n2KlikJ46cvVHnE6ilqt8JqVfquVrw7W2liFFtWeg8Wm1ezVcmlxKb2xAQu_i-2oZpvtLyE7nnF2MuMB6wX8xSjnTZSzVwPyNm3gnDdxHp-Dc96Bc9alSFvOB2fhfLBnhR_PCz-dF94Lop-8dOCFdynSlpeLs_BysWclOp6X6HReol4Q_-SlAy9RlyJteRmehZfhnpX4eF7i03mJe0H_xXiJmnjhr6JmXqImXgbn4CXqwEvcpUhbXi7PwstllyPgFF1utMPa-at5prA2U48VBzVUS6xOdc6sbYLvrEnKsVXzthQqAwqdr3pZ1ZjobZfzFuXqywl2V4kdVOJ7SmxXqV9X4oc9dTEVHZSK25VYXSnump4sqwIa_YOx99VecqiLi0Jxm3sOV3upig_L-8u2Z4XOyeWXzhAWu_4GdX_9g_4G7ZnyutLgx8r0ou7v4qC_YXumUV1p-GNlOqz7uzy8EcL2VONvdufhjf7iuV4Wv6SPmXn4kCoQED4_vYbX9oHiA7l0xX9x9sk8lGbnj3nxV_soM4cUruU9vkaPdpXq1Pk0AeHtGjebX_4PAAD__93LgRw= + +query T +EXPLAIN ANALYZE (DISTSQL) CREATE STATISTICS s1 ON e FROM data +---- +planning time: 10µs +execution time: 100µs +distribution: +vectorized: +rows decoded from KV: 1,000 (7.8 KiB, 2,000 KVs, 1,000 gRPC calls) +maximum memory usage: +network usage: +regions: +isolation level: serializable +priority: normal +quality of service: regular +· +• create statistics +· +Diagram: https://cockroachdb.github.io/distsqlplan/decode.html#eJzsltFu2zYUhu_3FMS5ajE6EinZcXjVzO0Ao0hS2EZvhqBgxFNXsExqJIU0C_xYe4E92SCpWmzV8iwDBnpRXwgmeXTO_-t8JPgM7s8MBExm764X78h8cb2YzhfTyZw4Ru5uCZLfZ3c3REkvgYI2Cm_lGh2IP4ABBQ4UIqAQA4Uh3FPIrUnQOWPLkOfqhan6CiKkkOq88OX0PYXEWATxDD71GYKAhXzIcIZSoQ1CoKDQyzSrypSl35SPT_kKn4DCxGTFWjtBHoDCPJfl30HAiNSKMGL8F7RAYYZaoRXkDfuVCSGmt4sxUHj_kfh0jYKE__zt6nFitEftU6O_W7Lm0RGFiVGoBGFhWE8_PHl0xKJUgozDkPxWTy9nHyYkkVnmtmJzmdomlleTNx8nE-I85iQxhfbkFX71Qar9a0HC4CUAcdUVUMkyhc8LX1e631Cox98-r_NyiSDYVj-mb0GEG3p8S-ZynWdog-FuO-rpefoXVrUrS3MvffmlO4XwlpBhp5CX-oU2VqFFtVP_ftMp9Xq5tLiU3tiAhceLJq94GJKHIlmhd687LUQtC2z3Y7Lj-WYn8B2wQcDPTjjfRzi7GJH36R7G-T7G43Mwznswzvq0pWF8dAbGRztC-PF88FP44IMg-snHEXzwPm1p-Lg8Ax-XO0Ki4_mITuEjGgTxTz6O4CPq05aGj_EZ-BjvCImP5yM-hY94EAzPzke0jw9-Ee3nI9rHx-gcfEQ9-Ij7tKXh4-oMfFz1uczN0OVGO2zdpfZXCluVBqy8dKFaYn1Dc6awCX6wJqli6-FdlaiaUOh8vcrqwVQ3S85blOv_7qLbmdjBTHwnE9vONGxn4oc19REVHUwVd2di7UxxX3uy6gpo9I_Gruqd5FBXx1l5hWwW6r1Ur7yctc3qGp2Ty5eAkvRtnaO2zuFBnaNux7ydafRjOr5s67w8qHPc7ThqZxr_mI7HbZ1XhzdI2G05_m7XHj4AenmOOj3zi_h_PA9bnq_KI-tzZh4_pQoEhN9-gz2P5gflC3LpynNz_sU8VqIXT3l56n2WmUMKN3KFb9GjXac6dT5NQHhb4Gbzy78BAAD__zeIegQ= + +statement ok +ALTER TABLE data ADD COLUMN f FLOAT AS (atan2d(c, d::float)) VIRTUAL + +query T +EXPLAIN ANALYZE (DISTSQL) CREATE STATISTICS s1 ON f, e, d FROM data +---- +planning time: 10µs +execution time: 100µs +distribution: +vectorized: +rows decoded from KV: 1,000 (7.8 KiB, 2,000 KVs, 1,000 gRPC calls) +maximum memory usage: +network usage: +regions: +isolation level: serializable +priority: normal +quality of service: regular +· +• create statistics +· +Diagram: https://cockroachdb.github.io/distsqlplan/decode.html#eJzsl9Fu2zYUhu_3FMS5ajG6EinZUXjlzG0Bo0tS2EZvBqNgxBNXiExqJI00C_xYe4E92SApbm3VNix3XnsRXQjg4dHP_-h8FKhHcH_mIGAwenMxeUPGk4vJcDwZDsbEMXJ9RW4pQUoUeTu6viRKegkUtFF4JefoQPwBDChwoBABhRgodGFKobAmReeMLVMeqweG6jOIkEKmi4Uvw1MKqbEI4hF85nMEARN5k-MIpUIbhEBBoZdZXi1TLt0vbx-LO3wACgOTL-baCaIouaEkBQrjQpaBTsCI1IowYvwntEBhhFqhFaTPKOnzX5kQYng1SSiRXmquXvQj2mdCvP39-mKSvAQK7z4Qn81RkPCfv109To32qH1m9DdT1tw7ojA1CpUgLAzr8M2DR0csSiVIEobktzo8G70fkFTmuVvLLWRmV7m8Cl5-GAyI81iQ1Cy0Jy_wsw8y7V8KEgZfExDvdiVUtszCFwtfrzRdUqjHT-_feTlDEGytYcPXIMIlPbxnYzkvcrRBd7NfdXic_VW-qbI3XvqqAX1O-9FOK7xhpbvTylcHC22sQotqw8F0udPsxWxmcSa9sQEL_xPbUcM223yF7HDs2dHYB6wT8B8JPt8GPnvVI--yLejzbejHp0Cft0CftenbCv3eSdDvbVjhhyPEj0eId4LoGaHvQ4i36dsKobOTIHS2YSU6HKHoeISiThA_I_R9CEVt-rZCKDkJQsmGlfhwhOLjEYo7QfdHIhRtQ4i_irYjFG1DqHcKhKIWCMVt-rZC6PwkCJ23OU6O0BVGO2yc5bavFDZW6rDy0IdqhvUJ0ZmFTfG9NWmVWw-vK6EqoND5epbVg6FeTTlvUc6_nIbXldheJb6hxNaVuk0lvt9TG1PRXql4txJrKsVty5NVV0Cjvzf2rt5LDnX501H-LD6F671Ux5PqX2g1M0fn5OzLZAjTdX-9pr_uXn-93ZXyplLv56r0rOnvbK-_ZHelUVMp-bkqTZr-zvdvhHB3qfE3u3P_Rv_faz0vP0m3ubn_mCkQED5dnS231QXlA3Lmyu_i-JO5r8xOHoryq3Yrc4cULuUdvkaPdp7pzPksBeHtApfLX_4NAAD___EFm6M= + +statement ok +CREATE TYPE gh AS (g INT, h INT) + +# Try a virtual computed column whose expression cannot be distributed. +statement ok +ALTER TABLE data ADD COLUMN g gh[] AS (array_cat(ARRAY[(1, 2)], ARRAY[(a, b)])) VIRTUAL + +# Error if we specify the problematic virtual computed column directly. +statement error cannot be executed with distsql +CREATE STATISTICS s1 ON g FROM data + +# We should skip the problematic virtual computed column when using defaults. +query T +EXPLAIN ANALYZE (DISTSQL) CREATE STATISTICS s1 FROM data +---- +planning time: 10µs +execution time: 100µs +distribution: +vectorized: +rows decoded from KV: 1,000 (7.8 KiB, 2,000 KVs, 1,000 gRPC calls) +maximum memory usage: +network usage: +regions: +isolation level: serializable +priority: normal +quality of service: regular +· +• create statistics +· +Diagram: https://cockroachdb.github.io/distsqlplan/decode.html#eJzsV8Fu2zgQve9XEHNKsHQkUrLj8OSs2wJGN01hG70sgoIRp65gWdSSNNJs4M_aH9gvW0iqa0mxXAtpgQSoD4I4MxrOm3l8Bh_A_p2AgPH09eX8NZnNL-eT2XwynhHLyJvp9RVR0kmgkGqF7-QKLYi_gAEFDhQCoBAChT7cUMiMjtBabfKQh-KDifoCwqcQp9na5eYbCpE2COIBXOwSBAFzeZvgFKVC4_lAQaGTcVJsk289yh8fsyXeA4WxTtar1AoiKbmlJKJEAYVZJnNbz2NEpoowot1nNEBhiqlCI8iIUTLilIwCSkZh_v47E0JM3s2HlEgnU65ORgEdhUK8-fP6cj48BQpvPxAXr1AQ_79_bbmOdOowdbFOH7mMvrNEYaQVKkGY75fm23uHlhiUSpCh75M_SvNi-n5MIpkkthKbydhsY3lhvPowHhPrMCORXqeOnOAX58WpOxXE93YBiMu2gKIsvXbZ2pU73WwolOuv47BOLhAEq8xv8gqEv6HHj3AmV1mCxuvXx1eaZ_E_WOxdQJo56fKB7F55xUqrq6DuoFVD-MhHq7b-7nXQCpk3IPdbIe-QrlNtFBpUNaQ3m9amXC4WBhfSaeMx__j2kBPu--R2HS3R2dNKs9ocjdYdCqs38juR9ba2BQ_ICd-Z2xoeNBrO6iRjx-sEe4pOeKzn8eeiFHyfUrCzAXkb79EKvk8rwp-hFbyDVrAuY9xqxeBFa8WgBpkfz1z-JObynhf8Yu6PYy7vMsYtc89fNHPPa5CD45kbPIm5Qc8LfzH3xzE36DLGLXOHL5q5wxrk8Hjmhk9ibtjz-s-FucE-5vKzYD9zg33MHfwM5gYdmBt2GeOWuRcvmrkXXS5TU7SZTi02bhj7d_IbO_VYfhVBtcDy3mL12kT43uioiC2X10WiwqDQutLLysUk3bqsMyhX3-6C1UzsYCZey8SqmfrNTPxwTV2KCg6mCtszsWamsCs8WUwFUnR32izLM2sxLSQ9Z-7WUZ7a0rP7v9l6V2itXOwC_BBuqnUOmnX2D9Y5aEfMm5kGzxPxebPO84N1DtsRB81Mw-eJeNis8-LwAfHbIYePTu1hAeiEOWjFzM_C72DuNzBf5JL1KdF3H2MFAvyvv96ex_YH-QdyYXPdnH3Wd0XR8_ssV71PMrFI4Uou8RU6NKs4ja2LIxDOrHGz-e3_AAAA___c-GNe + +# Check that we also collect stats on the hidden expression index virt column. +statement ok +CREATE INDEX ON data ((a * b)) + +query T +EXPLAIN ANALYZE (DISTSQL) CREATE STATISTICS s1 FROM data +---- +planning time: 10µs +execution time: 100µs +distribution: +vectorized: +rows decoded from KV: 1,000 (7.8 KiB, 2,000 KVs, 1,000 gRPC calls) +maximum memory usage: +network usage: +regions: +isolation level: serializable +priority: normal +quality of service: regular +· +• create statistics +· +Diagram: https://cockroachdb.github.io/distsqlplan/decode.html#eJzsWM1u2zgQvu9TEHNKdulIpOSf8OSs2wJGN01hG70sgoIRp64QWdSSNNJs4MfaF9gnW0iq15JiuRbSAkFQHQRyZjScj_PxM-gHsH8lIGAye32xeE3mi4vFdL6YTubEMvJmdnVJlHQSKKRa4Tu5QgviT2BAgQOFACiEQKEP1xQyoyO0Vps85KH4YKq-gPApxGm2drn5mkKkDYJ4ABe7BEHAQt4kOEOp0Hg-UFDoZJwUy-RLj_PXx-wW74HCRCfrVWoFkZTcUBJRooDCPJO5recxIlNFGNHuMxqgMMNUoRFkzCgZc0rGASXjMB__xoQQ03eLESVj9mvuk06mXJ2MAzoOhXjzx9XFYnQKFN5-IC5eoSD-v__Ych7p1GHqYp0-chl9Z4nCSCtUgjDfL8039w4tMSiVICPfJ7-X5uXs_YREMklsJTaTsdnG8sJ4-WEyIdZhRiK9Th05wS_Oi1N3Kojv7QIQb9sCirL02mVrV650vaFQzr92xTq5RBCs0sbpKxD-hh7fyblcZQkar1_vYmmex39jsXYBae6ky_uyG_KKlVZnQd1Bq4bwkY9Wbf3dcLAbDlvR8wb6fiv6Heh1qo1Cg6oG-nrTuj8Xy6XBpXTaeMw_fqfICfd9crOObtHZ08q-tTkau3gorL6n34is73Bb8KDNMSQnfGdu60TQ6ASrE5EdLynsKZLisZ7Hn5mo8H2iws4G5G28R1b4PlkJf4Ss8A6ywrp0cysrg5ciK4Maen48l_mTuMx7XvCTy9-dy7xLN7dcHr4ULg9r6IPjuRw8ictBzwt_cvm7czno0s0tl0cvhcujGvrweC6HT-Jy2PP6z4zLwT4u87NgP5eDfVwe_AguBx24HHbp5pbL5y-Fy-ddLm4ztJlOLTauMPtX8hsr9Vh-10G1xPJiZPXaRPje6KiILadXRaLCoNC60svKyTTduqwzKFf_3zurmdjBTLyWiVUz9ZuZ-OGauhQVHEwVtmdizUxhV3iy6Aqk6O60uS2Pr8U01_viVrl1lAe49Ox-jLbeFVorl7sAv_gjZ1fnoFln_2Cdg3bEvJlp8DwRD5t1Dg_WOWpHHDQzjZ4n4lGzzvPDB8Rvhxw-OrWHBaAT5qAVMz8Lv4G538B8nkvWp0TffYwVCPC_Pr09r-0D-QdyaXPdnH_Wd0XRi_ssV71PMrFI4VLe4it0aFZxGlsXRyCcWeNm88t_AQAA__89zn9B + +# Check that we also collect stats on other hidden columns. +statement ok +ALTER TABLE data ALTER COLUMN c SET NOT VISIBLE + +statement ok +ALTER TABLE data ALTER COLUMN c SET NOT VISIBLE + +query T +EXPLAIN ANALYZE (DISTSQL) CREATE STATISTICS s1 FROM data +---- +planning time: 10µs +execution time: 100µs +distribution: +vectorized: +rows decoded from KV: 1,000 (7.8 KiB, 2,000 KVs, 1,000 gRPC calls) +maximum memory usage: +network usage: +regions: +isolation level: serializable +priority: normal +quality of service: regular +· +• create statistics +· +Diagram: https://cockroachdb.github.io/distsqlplan/decode.html#eJzsWM1u2zgQvu9TEHNKdulIpOSf8OSs2wJGN01hG70sgoIRp64QWdSSNNJs4MfaF9gnW0iq15JiuRbSAkFQHQRyZjScj_PxM-gHsH8lIGAye32xeE3mi4vFdL6YTubEMvJmdnVJlHQSKKRa4Tu5QgviT2BAgQOFACiEQKEP1xQyoyO0Vps85KH4YKq-gPApxGm2drn5mkKkDYJ4ABe7BEHAQt4kOEOp0Hg-UFDoZJwUy-RLj_PXx-wW74HCRCfrVWoFkZTcUBJRooDCPJO5recxIlNFGNHuMxqgMMNUoRFkzCgZc0rGASXjMB__xoQQ03eLESVj9mvuk06mXJ2MAzoOhXjzx9XFYnQKFN5-IC5eoSD-v__Ych7p1GHqYp0-chl9Z4nCSCtUgjDfL8039w4tMSiVICPfJ7-X5uXs_YREMklsJTaTsdnG8sJ4-WEyIdZhRiK9Th05wS_Oi1N3Kojv7QIQb9sCirL02mVrV650vaFQzr92xTq5RBCs0sbpKxD-hh7fyblcZQkar1_vYmmex39jsXYBae6ky_uyG_KKlVZnQd1Bq4bwkY9Wbf3dcLAbDlvR8wb6fiv6Heh1qo1Cg6oG-nrTuj8Xy6XBpXTaeMw_fqfICfd9crOObtHZ08q-tTkau3gorL6n34is73Bb8KDNMSQnfGdu60TQ6ASrE5EdLynsKZLisZ7Hn5mo8H2iws4G5G28R1b4PlkJf4Ss8A6ywrp0cysrg5ciK4Maen48l_mTuMx7XvCTy9-dy7xLN7dcHr4ULg9r6IPjuRw8ictBzwt_cvm7czno0s0tl0cvhcujGvrweC6HT-Jy2PP6z4zLwT4u87NgP5eDfVwe_AguBx24HHbp5pbL5y-Fy-ddLm4ztJlOLTauMPtX8hsr9Vh-10G1xPJiZPXaRPje6KiILadXRaLCoNC60svKyTTduqwzKFf_3zurmdjBTLyWiVUz9ZuZ-OGauhQVHEwVtmdizUxhV3iy6Aqk6O60uS2Pr8U01_viVrl1lAe49Ox-jLbeFVorl7sAv_gjZ1fnoFln_2Cdg3bEvJlp8DwRD5t1Dg_WOWpHHDQzjZ4n4lGzzvPDB8Rvhxw-OrWHBaAT5qAVMz8Lv4G538B8nkvWp0TffYwVCPC_Pr09r-0D-QdyaXPdnH_Wd0XRi_ssV71PMrFI4VLe4it0aFZxGlsXRyCcWeNm88t_AQAA__89zn9B + +# Check that we can disable stats collection on virtual computed columns. +statement ok +SET CLUSTER SETTING sql.stats.virtual_computed_columns.enabled = false + +statement error cannot create statistics on virtual column +CREATE STATISTICS s1 ON e FROM data + +query T +EXPLAIN ANALYZE (DISTSQL) CREATE STATISTICS s1 FROM data +---- +planning time: 10µs +execution time: 100µs +distribution: +vectorized: +rows decoded from KV: 1,000 (7.8 KiB, 2,000 KVs, 1,000 gRPC calls) +maximum memory usage: +network usage: +regions: +isolation level: serializable +priority: normal +quality of service: regular +· +• create statistics +· +Diagram: https://cockroachdb.github.io/distsqlplan/decode.html#eJzsV8Fu2zgQve9XEHNqAToSKdlxdErW7QJGkW1hG70sjIIRp65gWdSSFNJs4M_aH9gvW0iqakmxVAtpgRqoD0I4MxrOm3mPYh7B_B1DALPF65vVa7Jc3azmy9V8tiSGkT8Wb2-JFFYAhURJ_FPs0EDwFzCgwIGCBxR8oDCGNYVUqxCNUToPeSxemMvPELgUoiTNbG5eUwiVRggewUY2RghgJe5iXKCQqB0XKEi0IoqLbfKtr_PHh3SLD0BhpuJsl5iACEruKAkpkUBhmYrcNnIYEYkkjCj7CTVQePOe2GiHAXH_-9eU61AlFhMbqeSJS6t7QySGSqIMCHPd0nz3YNEQjUIGZOq65PfSvFm8m5FQxLGpxaYi0lUsL4y372czYiymJFRZYskL_GydKLEvA-I6hwDEbVdAUZbKbJrZcqf1nkK5_tJRY8UGIWC1EcxfQeDu6elTWIpdGqN2xs0JlOZl9A8WexeQllbYgFyzw5-8ZqX1ldd00LrBf-Kj134nON4CN-4Ed8CUJUpL1CgbmNb7Tvg3m43GjbBKO8w9vRHkBXddcpeFW7TmZa0tXY5Wk_rCmi37RmRfA71WA1mTHux0kbLniNRhI4d_R5nyYzJlFxPyJjoiVH5MqP6PECofIFQ2ZBKVUCdnItRJAxw_nWb8WTTjI8f7RbPGJPiQSVQ0uzwTml02wHmn08x7Fs28keP_olljEt6QSVQ0m54JzaYNcP7pNPOfRTN_5Iy_I828YzTjF95xmnnHaDb5ETTzBtDMHzKJimZXZ0KzqyFX9wWaVCUGW7fc4zu5rZ1GLL8Oo9xgeXc2KtMhvtMqLGLL5dsiUWGQaGzpZeVinlQuYzWK3df_POqZWG8m3sjE6pnG7Uy8v6YhRXm9qfzuTKydyR8KTxRTgQTtvdLbUp0Gk-Kkzb-4laPUZ-k5fAYq7w6NEZtDgOvBul7npF3nuLfOSTdi3s40-TkRX7brvOytc9qN2Gtnmv6ciKftOq_6BeJ2Q_afqLb_ABiE2evEzC_8b2AetzBf5UfWx1jdf4gkBOB--Y2OPKof5C-IjcnPzeUndV8UvXpI81Pvo4gNUrgVW3yFFvUuSiJjoxACqzPc73_7PwAA__-mB-8m + +statement ok +RESET CLUSTER SETTING sql.stats.virtual_computed_columns.enabled + subtest regression_98373 statement ok