Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sql/stats: evict stats cache entry if user-defined types have changed #124603

Merged
merged 3 commits into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 127 additions & 0 deletions pkg/ccl/logictestccl/testdata/logic_test/multi_region_stats
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
# LogicTest: multiregion-9node-3region-3azs multiregion-9node-3region-3azs-tenant multiregion-9node-3region-3azs-no-los

query TTTTT colnames,rowsort
SHOW REGIONS
----
region zones database_names primary_region_of secondary_region_of
ap-southeast-2 {ap-az1,ap-az2,ap-az3} {} {} {}
ca-central-1 {ca-az1,ca-az2,ca-az3} {} {} {}
us-east-1 {us-az1,us-az2,us-az3} {} {} {}

query TT colnames,rowsort
SHOW REGIONS FROM CLUSTER
----
region zones
ap-southeast-2 {ap-az1,ap-az2,ap-az3}
ca-central-1 {ca-az1,ca-az2,ca-az3}
us-east-1 {us-az1,us-az2,us-az3}

# Regression test for #124181: check that we re-load table statistics after
# running ALTER DATABASE ADD REGION.

statement ok
CREATE DATABASE db124181 PRIMARY REGION "ap-southeast-2" REGIONS "us-east-1" SURVIVE ZONE FAILURE

statement ok
USE db124181

query TTTT
SHOW ENUMS
----
public crdb_internal_region {ap-southeast-2,us-east-1} root

statement ok
CREATE TABLE t124181 (
region crdb_internal_region NOT NULL,
id UUID NOT NULL DEFAULT gen_random_uuid(),
a INT NOT NULL,
PRIMARY KEY (id),
UNIQUE INDEX (a)
) LOCALITY REGIONAL BY ROW AS region

statement ok
INSERT INTO t124181 (region, a) VALUES ('ap-southeast-2', 0), ('us-east-1', 1)

statement ok
ANALYZE t124181

let $hist_id_1
SELECT histogram_id FROM [SHOW STATISTICS FOR TABLE t124181] WHERE column_names = ARRAY['region']

query TIRI colnames,nosort
SHOW HISTOGRAM $hist_id_1
----
upper_bound range_rows distinct_range_rows equal_rows
'ap-southeast-2' 0 0 1
'us-east-1' 0 0 1

query T
SELECT jsonb_pretty(stat->'histo_buckets')
FROM (
SELECT jsonb_array_elements(statistics) AS stat
FROM [SHOW STATISTICS USING JSON FOR TABLE t124181]
)
WHERE stat->>'columns' = '["region"]'
----
[
{
"distinct_range": 0,
"num_eq": 1,
"num_range": 0,
"upper_bound": "ap-southeast-2"
},
{
"distinct_range": 0,
"num_eq": 1,
"num_range": 0,
"upper_bound": "us-east-1"
}
]

# Implicitly add a value to the crdb_internal_region enum.
statement ok
ALTER DATABASE db124181 ADD REGION "ca-central-1"

query TTTT
SHOW ENUMS
----
public crdb_internal_region {ap-southeast-2,ca-central-1,us-east-1} root

# Make sure we can still SHOW STATISTICS and SHOW HISTOGRAM.
let $hist_id_2
SELECT histogram_id FROM [SHOW STATISTICS FOR TABLE t124181] WHERE column_names = ARRAY['region']

query TIRI colnames,nosort
SHOW HISTOGRAM $hist_id_2
----
upper_bound range_rows distinct_range_rows equal_rows
'ap-southeast-2' 0 0 1
'us-east-1' 0 0 1

# Make sure we can still SHOW STATISTICS USING JSON.
query T
SELECT jsonb_pretty(stat->'histo_buckets')
FROM (
SELECT jsonb_array_elements(statistics) AS stat
FROM [SHOW STATISTICS USING JSON FOR TABLE t124181]
)
WHERE stat->>'columns' = '["region"]'
----
[
{
"distinct_range": 0,
"num_eq": 1,
"num_range": 0,
"upper_bound": "ap-southeast-2"
},
{
"distinct_range": 0,
"num_eq": 1,
"num_range": 0,
"upper_bound": "us-east-1"
}
]

# Make sure we can still use the histogram in statistics_builder.
statement ok
INSERT INTO t124181 (region, a) VALUES ('ca-central-1', 2)
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ go_test(
"//pkg/ccl/logictestccl:testdata", # keep
],
exec_properties = {"test.Pool": "large"},
shard_count = 20,
shard_count = 21,
tags = ["cpu:4"],
deps = [
"//pkg/base",
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ go_test(
"//pkg/ccl/logictestccl:testdata", # keep
],
exec_properties = {"test.Pool": "large"},
shard_count = 16,
shard_count = 17,
tags = ["cpu:4"],
deps = [
"//pkg/base",
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ go_test(
"//pkg/ccl/logictestccl:testdata", # keep
],
exec_properties = {"test.Pool": "large"},
shard_count = 28,
shard_count = 29,
tags = ["cpu:4"],
deps = [
"//pkg/base",
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 18 additions & 1 deletion pkg/sql/alter_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -1469,7 +1469,8 @@ func injectTableStats(
}
}

// First, delete all statistics for the table.
// First, delete all statistics for the table. (We use the current transaction
// so that this will rollback on any error.)
if _ /* rows */, err := params.p.InternalSQLTxn().Exec(
params.ctx,
"delete-stats",
Expand All @@ -1487,6 +1488,22 @@ StatsLoop:
if err != nil {
return err
}

// Check that the type matches.
// TODO(49698): When we support multi-column histograms this check will need
// adjustment.
if len(s.Columns) == 1 {
col := catalog.FindColumnByName(desc, s.Columns[0])
// Ignore dropped columns (they are handled below).
if col != nil {
if err := h.TypeCheck(
col.GetType(), desc.GetName(), s.Columns[0], s.CreatedAt,
); err != nil {
return pgerror.WithCandidateCode(err, pgcode.DatatypeMismatch)
}
}
}

// histogram will be passed to the INSERT statement; we want it to be a
// nil interface{} if we don't generate a histogram.
var histogram interface{}
Expand Down
8 changes: 4 additions & 4 deletions pkg/sql/distsql_plan_changefeed.go
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ func (c *cdcOptCatalog) ResolveDataSource(
return nil, cat.DataSourceName{}, err
}

ds, err := c.newCDCDataSource(desc, c.targetFamilyID)
ds, err := c.newCDCDataSource(ctx, desc, c.targetFamilyID)
if err != nil {
return nil, cat.DataSourceName{}, err
}
Expand All @@ -399,7 +399,7 @@ func (c *cdcOptCatalog) ResolveDataSourceByID(
return nil, false, err
}

ds, err := c.newCDCDataSource(desc, c.targetFamilyID)
ds, err := c.newCDCDataSource(ctx, desc, c.targetFamilyID)
if err != nil {
return nil, false, err
}
Expand All @@ -423,13 +423,13 @@ func (c *cdcOptCatalog) ResolveFunction(

// newCDCDataSource builds an optTable for the target cdc table and family.
func (c *cdcOptCatalog) newCDCDataSource(
original catalog.TableDescriptor, familyID catid.FamilyID,
ctx context.Context, original catalog.TableDescriptor, familyID catid.FamilyID,
) (cat.DataSource, error) {
d, err := newFamilyTableDescriptor(original, familyID, c.extraColumns)
if err != nil {
return nil, err
}
return newOptTable(d, c.codec(), nil /* stats */, emptyZoneConfig)
return newOptTable(ctx, d, c.codec(), nil /* stats */, emptyZoneConfig)
}

// familyTableDescriptor wraps underlying catalog.TableDescriptor,
Expand Down
18 changes: 8 additions & 10 deletions pkg/sql/logictest/testdata/logic_test/prepare
Original file line number Diff line number Diff line change
Expand Up @@ -1238,11 +1238,15 @@ EXECUTE rcc('t')

user root

# Regression test for #46217. Histogram type doesn't match column type.
statement ok
CREATE TABLE ts (d DATE PRIMARY KEY, x INT);

user root

# Regression test for #46217. Test that we cannot inject a histogram type that
# doesn't match the column type.
statement ok
CREATE TABLE ts (d DATE PRIMARY KEY, x INT)

statement error pq: histogram for table ts column d created_at 2020-03-24 15:34:22\.863634\+00:00 does not match column type DATE: TIMESTAMP
ALTER TABLE ts INJECT STATISTICS '[
{
"columns": ["d"],
Expand All @@ -1267,13 +1271,7 @@ ALTER TABLE ts INJECT STATISTICS '[
"null_count": 0,
"row_count": 100000
}
]';

statement ok
PREPARE q AS DELETE FROM ts WHERE ts.d <= $1

statement ok
EXECUTE q ('2020-03-25')
]'

# Test that if we replace a view the cached plan is invalidated.
statement ok
Expand Down
35 changes: 31 additions & 4 deletions pkg/sql/opt_catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/stats"
"github.com/cockroachdb/cockroach/pkg/sql/syntheticprivilege"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/buildutil"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/errors"
"github.com/lib/pq/oid"
Expand Down Expand Up @@ -572,7 +573,7 @@ func (oc *optCatalog) dataSourceForTable(
return ds, nil
}

ds, err := newOptTable(desc, oc.codec(), tableStats, zoneConfig)
ds, err := newOptTable(ctx, desc, oc.codec(), tableStats, zoneConfig)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -775,6 +776,7 @@ type optTable struct {
var _ cat.Table = &optTable{}

func newOptTable(
ctx context.Context,
desc catalog.TableDescriptor,
codec keys.SQLCodec,
stats []*stats.TableStatistic,
Expand Down Expand Up @@ -1083,7 +1085,7 @@ func newOptTable(
n := 0
for i := range stats {
// We skip any stats that have columns that don't exist in the table anymore.
if ok, err := ot.stats[n].init(ot, stats[i]); err != nil {
if ok, err := ot.stats[n].init(ctx, ot, stats[i]); err != nil {
return nil, err
} else if ok {
n++
Expand Down Expand Up @@ -1783,19 +1785,44 @@ type optTableStat struct {

var _ cat.TableStatistic = &optTableStat{}

func (os *optTableStat) init(tab *optTable, stat *stats.TableStatistic) (ok bool, _ error) {
func (os *optTableStat) init(
ctx context.Context, tab *optTable, stat *stats.TableStatistic,
) (ok bool, _ error) {
os.stat = stat
os.columnOrdinals = make([]int, len(stat.ColumnIDs))
for i, c := range stat.ColumnIDs {
var ok bool
os.columnOrdinals[i], ok = tab.colMap.Get(c)
if !ok {
// Column not in table (this is possible if the column was removed since
// Column not in table (this is expected if the column was removed since
// the statistic was calculated).
return false, nil
}
}

// Verify that histogram column type matches table column type.
// TODO(49698): When we support multi-column histograms this check will need
// adjustment.
if len(os.columnOrdinals) == 1 {
col := tab.getCol(os.columnOrdinals[0])
createdAt := string(tree.PGWireFormatTimestamp(stat.CreatedAt, nil, nil))
if err := stat.HistogramData.TypeCheck(
col.GetType(), string(tab.Name()), col.GetName(), createdAt,
); err != nil {
// Column type in the histogram differs from column type in the
// table. This is only possible if we somehow re-used the same column ID
// during an ALTER TABLE statement, which we shouldn't.
if buildutil.CrdbTestBuild {
return false, errors.NewAssertionErrorWithWrappedErrf(
err, "type check failed while initializing stat %d", stat.StatisticID,
)
}
// For release builds, skip over the stat and log a warning.
log.Warningf(ctx, "skipping stat %d due to failed type check: %v", stat.StatisticID, err)
return false, nil
}
}

return true, nil
}

Expand Down
Loading
Loading