Skip to content

Commit

Permalink
sql: resolve deadlock in waitForCacheState
Browse files Browse the repository at this point in the history
change #25313 introduced getDatabaseID as the callback
used in waitForCacheState. However, waitForCacheState holds
on to a lock while calling its callback and can get stuck
on an intent written by another transaction. The other
transaction having written an intent can be trying to acquire
the databaseCacheHolder lock, resulting in the system getting
stuck in a deadlock.

Originally, the deadlock was thought to be cause by another
bug which got fixed through #28381, but now we know of
an actual legitimate situation in which the deadlock
can happen.

The fix is to use another callback that will fix #25313
while not going to the store.

fixes #29090

Release note: None
  • Loading branch information
vivekmenezes committed Aug 30, 2018
1 parent a309829 commit 8e7593a
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 2 deletions.
17 changes: 17 additions & 0 deletions pkg/sql/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,23 @@ func (dc *databaseCache) getDatabaseID(
return desc.ID, nil
}

// getCachedDatabaseID returns the ID of a database given its name
// from the cache. This method never goes to the store to resolve
// the name to id mapping.
func (dc *databaseCache) getCachedDatabaseID(ctx context.Context, name string) (sqlbase.ID, error) {
if id := dc.getID(name); id != 0 {
return id, nil
}

desc, err := dc.getCachedDatabaseDesc(name, false /*required*/)
if err != nil || desc == nil {
// desc can be nil if required == false and the database was not found.
return 0, err
}

return desc.ID, nil
}

// renameDatabase implements the DatabaseDescEditor interface.
func (p *planner) renameDatabase(
ctx context.Context, oldDesc *sqlbase.DatabaseDescriptor, newName string,
Expand Down
3 changes: 1 addition & 2 deletions pkg/sql/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -400,8 +400,7 @@ func (tc *TableCollection) waitForCacheToDropDatabases(ctx context.Context) {
tc.dbCacheSubscriber.waitForCacheState(
func(dc *databaseCache) bool {
// Resolve the database name from the database cache.
dbID, err := dc.getDatabaseID(ctx,
tc.leaseMgr.execCfg.DB.Txn, uc.name, false /*required*/)
dbID, err := dc.getCachedDatabaseID(ctx, uc.name)
if err != nil || dbID == 0 {
// dbID can still be 0 if required is false and
// the database is not found. Swallowing error here
Expand Down

0 comments on commit 8e7593a

Please sign in to comment.