Skip to content

Commit

Permalink
sql/stats: evict stats cache entry if user-defined types have changed
Browse files Browse the repository at this point in the history
When adding table statistics to the stats cache, we decode histogram
upper bounds into datums. If the histogram column uses a user-defined
type, we hydrate the type and use this to decode.

In statistics builder, these histogram upper bound datums are compared
against datums in spans and constraints. The comparisons assume that the
datums are of equivalent type, but if the user-defined type has changed
sometime after loading the stats cache entry, this might not be true.

If the user-defined type has changed, we need to evict and re-load the
stats cache entry so that we decode histogram datums with a freshly-
hydrated type.

(We were already checking UDT versions when building the optTable in
sql.(*optCatalog).dataSourceForTable, but the newly-built optTable used
the existing table statistics instead of refreshing those, too.)

Fixes: #124181

Release note (bug fix): Fix a bug where a change to a user-defined type
could cause queries against tables using that type to fail with an error
message like:

  "histogram.go:694: span must be fully contained in the bucket"

The change to the user-defined type could come directly from an ALTER
TYPE statement, or indirectly from an ALTER DATABASE ADD REGION or DROP
REGION statement (which implicitly change the crdb_internal_region
type).

This bug has existed since UDTs were introduced in v20.2.
  • Loading branch information
michae2 committed May 29, 2024
1 parent 72f97c8 commit ef24bf7
Show file tree
Hide file tree
Showing 11 changed files with 236 additions and 31 deletions.
127 changes: 127 additions & 0 deletions pkg/ccl/logictestccl/testdata/logic_test/multi_region_stats
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
# LogicTest: multiregion-9node-3region-3azs multiregion-9node-3region-3azs-tenant multiregion-9node-3region-3azs-no-los

query TTTTT colnames,rowsort
SHOW REGIONS
----
region zones database_names primary_region_of secondary_region_of
ap-southeast-2 {ap-az1,ap-az2,ap-az3} {} {} {}
ca-central-1 {ca-az1,ca-az2,ca-az3} {} {} {}
us-east-1 {us-az1,us-az2,us-az3} {} {} {}

query TT colnames,rowsort
SHOW REGIONS FROM CLUSTER
----
region zones
ap-southeast-2 {ap-az1,ap-az2,ap-az3}
ca-central-1 {ca-az1,ca-az2,ca-az3}
us-east-1 {us-az1,us-az2,us-az3}

# Regression test for #124181: check that we re-load table statistics after
# running ALTER DATABASE ADD REGION.

statement ok
CREATE DATABASE db124181 PRIMARY REGION "ap-southeast-2" REGIONS "us-east-1" SURVIVE ZONE FAILURE

statement ok
USE db124181

query TTTT
SHOW ENUMS
----
public crdb_internal_region {ap-southeast-2,us-east-1} root

statement ok
CREATE TABLE t124181 (
region crdb_internal_region NOT NULL,
id UUID NOT NULL DEFAULT gen_random_uuid(),
a INT NOT NULL,
PRIMARY KEY (id),
UNIQUE INDEX (a)
) LOCALITY REGIONAL BY ROW AS region

statement ok
INSERT INTO t124181 (region, a) VALUES ('ap-southeast-2', 0), ('us-east-1', 1)

statement ok
ANALYZE t124181

let $hist_id_1
SELECT histogram_id FROM [SHOW STATISTICS FOR TABLE t124181] WHERE column_names = ARRAY['region']

query TIRI colnames,nosort
SHOW HISTOGRAM $hist_id_1
----
upper_bound range_rows distinct_range_rows equal_rows
'ap-southeast-2' 0 0 1
'us-east-1' 0 0 1

query T
SELECT jsonb_pretty(stat->'histo_buckets')
FROM (
SELECT jsonb_array_elements(statistics) AS stat
FROM [SHOW STATISTICS USING JSON FOR TABLE t124181]
)
WHERE stat->>'columns' = '["region"]'
----
[
{
"distinct_range": 0,
"num_eq": 1,
"num_range": 0,
"upper_bound": "ap-southeast-2"
},
{
"distinct_range": 0,
"num_eq": 1,
"num_range": 0,
"upper_bound": "us-east-1"
}
]

# Implicitly add a value to the crdb_internal_region enum.
statement ok
ALTER DATABASE db124181 ADD REGION "ca-central-1"

query TTTT
SHOW ENUMS
----
public crdb_internal_region {ap-southeast-2,ca-central-1,us-east-1} root

# Make sure we can still SHOW STATISTICS and SHOW HISTOGRAM.
let $hist_id_2
SELECT histogram_id FROM [SHOW STATISTICS FOR TABLE t124181] WHERE column_names = ARRAY['region']

query TIRI colnames,nosort
SHOW HISTOGRAM $hist_id_2
----
upper_bound range_rows distinct_range_rows equal_rows
'ap-southeast-2' 0 0 1
'us-east-1' 0 0 1

# Make sure we can still SHOW STATISTICS USING JSON.
query T
SELECT jsonb_pretty(stat->'histo_buckets')
FROM (
SELECT jsonb_array_elements(statistics) AS stat
FROM [SHOW STATISTICS USING JSON FOR TABLE t124181]
)
WHERE stat->>'columns' = '["region"]'
----
[
{
"distinct_range": 0,
"num_eq": 1,
"num_range": 0,
"upper_bound": "ap-southeast-2"
},
{
"distinct_range": 0,
"num_eq": 1,
"num_range": 0,
"upper_bound": "us-east-1"
}
]

# Make sure we can still use the histogram in statistics_builder.
statement ok
INSERT INTO t124181 (region, a) VALUES ('ca-central-1', 2)
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ go_test(
"//pkg/ccl/logictestccl:testdata", # keep
],
exec_properties = {"test.Pool": "large"},
shard_count = 20,
shard_count = 21,
tags = ["cpu:4"],
deps = [
"//pkg/base",
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ go_test(
"//pkg/ccl/logictestccl:testdata", # keep
],
exec_properties = {"test.Pool": "large"},
shard_count = 16,
shard_count = 17,
tags = ["cpu:4"],
deps = [
"//pkg/base",
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ go_test(
"//pkg/ccl/logictestccl:testdata", # keep
],
exec_properties = {"test.Pool": "large"},
shard_count = 28,
shard_count = 29,
tags = ["cpu:4"],
deps = [
"//pkg/base",
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pkg/sql/stats/automatic_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -760,7 +760,7 @@ func (r *Refresher) maybeRefreshStats(
rowsAffected int64,
asOf time.Duration,
) {
tableStats, err := r.cache.getTableStatsFromCache(ctx, tableID, nil /* forecast */)
tableStats, err := r.cache.getTableStatsFromCache(ctx, tableID, nil /* forecast */, nil /* udtCols */)
if err != nil {
log.Errorf(ctx, "failed to get table statistics: %v", err)
return
Expand Down
16 changes: 12 additions & 4 deletions pkg/sql/stats/delete_stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -263,15 +263,19 @@ func TestDeleteOldStatsForColumns(t *testing.T) {
}

return testutils.SucceedsSoonError(func() error {
tableStats, err := cache.getTableStatsFromCache(ctx, tableID, nil /* forecast */)
tableStats, err := cache.getTableStatsFromCache(
ctx, tableID, nil /* forecast */, nil, /* udtCols */
)
if err != nil {
return err
}

for i := range testData {
stat := &testData[i]
if stat.TableID != tableID {
stats, err := cache.getTableStatsFromCache(ctx, stat.TableID, nil /* forecast */)
stats, err := cache.getTableStatsFromCache(
ctx, stat.TableID, nil /* forecast */, nil, /* udtCols */
)
if err != nil {
return err
}
Expand Down Expand Up @@ -556,15 +560,19 @@ func TestDeleteOldStatsForOtherColumns(t *testing.T) {
}

return testutils.SucceedsSoonError(func() error {
tableStats, err := cache.getTableStatsFromCache(ctx, tableID, nil /* forecast */)
tableStats, err := cache.getTableStatsFromCache(
ctx, tableID, nil /* forecast */, nil, /* udtCols */
)
if err != nil {
return err
}

for i := range testData {
stat := &testData[i]
if stat.TableID != tableID {
stats, err := cache.getTableStatsFromCache(ctx, stat.TableID, nil /* forecast */)
stats, err := cache.getTableStatsFromCache(
ctx, stat.TableID, nil /* forecast */, nil, /* udtCols */
)
if err != nil {
return err
}
Expand Down
Loading

0 comments on commit ef24bf7

Please sign in to comment.