Skip to content

Commit

Permalink
sql: refresh stats for multi-tenant system database conversions
Browse files Browse the repository at this point in the history
Previously, when converting the system database to multiregion its
possible for table statistics to contain the existing type of
crdb_region as bytes. This could happen if automatic statistics
collection happened concurrently with the conversion to a multi-region
system database. The conversion had logic to clear table statistics, but
it was still possible for statistics collection to happen in between.
This could cause queries against RBR system tables to fail because,
since the table_statistics type information no longer matches with the table
descriptor after. We started seeing this for the system database inside
TestMrSystemDatabase, once conversion was added for the system tenant.
To address this, this patch first adds extra logic in the schema changer
to force a refresh of stats on system tables, which will force a refresh
of statistics after the schema change, in case a stats refresh occurs
before the job completes. We also modify the TestMrSystemDatabase to
intentionally generate stats before changing the system database under
the system tenant to avoid the risk of hitting this issue. With these
changes we expect the test to no longer flake and any real world
occurrence to be less transient.

Fixes: #122790

Release note: None
  • Loading branch information
fqazi committed May 9, 2024
1 parent fc8482f commit 7c040ec
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 2 deletions.
1 change: 1 addition & 0 deletions pkg/ccl/multiregionccl/multiregion_system_table_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ func TestMrSystemDatabase(t *testing.T) {

sDB := sqlutils.MakeSQLRunner(systemSQL)

sDB.Exec(t, `ANALYZE system.sqlliveness;`)
sDB.Exec(t, `SET CLUSTER SETTING sql.multiregion.system_database_multiregion.enabled = true`)
sDB.Exec(t, `ALTER DATABASE system SET PRIMARY REGION "us-east1"`)
sDB.Exec(t, `ALTER DATABASE system ADD REGION "us-east2"`)
Expand Down
6 changes: 5 additions & 1 deletion pkg/sql/region_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -2521,6 +2521,10 @@ func (p *planner) setSystemDatabaseSurvival(ctx context.Context) error {
return nil
}

// systemTableLocalityChangeJobName is the job name used when the locality is converted
// to regional by row.
const systemTableLocalityChangeJobName = "convert system.%s to %s locality"

// optimizeSystemDatabase configures some tables in the system data as
// global and regional by row. The locality changes reduce how long it
// takes a server to start up in a multi-region deployment.
Expand Down Expand Up @@ -2604,7 +2608,7 @@ func (p *planner) optimizeSystemDatabase(ctx context.Context) error {
return err
}

jobName := fmt.Sprintf("convert system.%s to %s locality", desc.GetName(), locality)
jobName := fmt.Sprintf(systemTableLocalityChangeJobName, desc.GetName(), locality)
return p.writeSchemaChange(ctx, desc, descpb.InvalidMutationID, jobName)
}

Expand Down
13 changes: 12 additions & 1 deletion pkg/sql/schema_changer.go
Original file line number Diff line number Diff line change
Expand Up @@ -920,7 +920,18 @@ func (sc *SchemaChanger) exec(ctx context.Context) error {
if sc.mutationID == descpb.InvalidMutationID {
// Nothing more to do.
isCreateTableAs := tableDesc.Adding() && tableDesc.IsAs()
return waitToUpdateLeases(isCreateTableAs /* refreshStats */)
// If we are converting a system database table to MR, we should force
// a stats refresh so that the stats also have the correct type. There is
// a potential race condition between waiting for leases and deleting the
// statistics using optimizeDatabase, where between those two point statistics
// with the wrong type could show up. To minimize any transient condition,
// lets force a refresh of stats here.
isSystemDatabaseTransformation := tableDesc.GetParentID() == keys.SystemDatabaseID &&
(strings.HasPrefix(sc.job.Payload().Description,
fmt.Sprintf(systemTableLocalityChangeJobName, tableDesc.GetName(), "regional by row")) ||
strings.HasPrefix(sc.job.Payload().Description,
fmt.Sprintf(systemTableLocalityChangeJobName, tableDesc.GetName(), "global")))
return waitToUpdateLeases(isCreateTableAs || isSystemDatabaseTransformation /* refreshStats */)
}

if err := sc.initJobRunningStatus(ctx); err != nil {
Expand Down

0 comments on commit 7c040ec

Please sign in to comment.