Skip to content

Commit

Permalink
sql: crdb_internal.leases could cause a deadlock with the lease manager
Browse files Browse the repository at this point in the history
Previously, a deadlock could occur between crdb_internal.leases and the
lease manager when attempting to renew or release leases on the
role_member inside crdb_internal.leases table. This deadlock occurs
because lease manager lock is held while visting leases. The issue was
easily reproducible when the
allow_role_memberships_to_change_during_transaction setting was enabled.
To resolve this, the patch introduces in-memory caching of all leases
within crdb_internal.leases before privilege checks are performed, and
any locks are subsequently released.

Fixes: #119253

Release note (bug fix): crdb_internal.leases could cause a node to
become unavailable due to a deadlock in the leasing subsystem.
  • Loading branch information
fqazi committed Feb 16, 2024
1 parent b2924a1 commit 21913d2
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 17 deletions.
55 changes: 38 additions & 17 deletions pkg/sql/crdb_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -856,9 +856,14 @@ CREATE TABLE crdb_internal.schema_changes (
},
}

type crdbInternalLeasesTableEntry struct {
desc catalog.Descriptor
takenOffline bool
expiration tree.DTimestamp
}

// TODO(tbg): prefix with node_.
var crdbInternalLeasesTable = virtualSchemaTable{
comment: `acquired table leases (RAM; local node only)`,
schema: `
CREATE TABLE crdb_internal.leases (
node_id INT NOT NULL,
Expand All @@ -868,34 +873,50 @@ CREATE TABLE crdb_internal.leases (
expiration TIMESTAMP NOT NULL,
deleted BOOL NOT NULL
)`,
comment: `acquired table leases (RAM; local node only)`,
populate: func(
ctx context.Context, p *planner, _ catalog.DatabaseDescriptor, addRow func(...tree.Datum) error,
) error {
nodeID, _ := p.execCfg.NodeInfo.NodeID.OptionalNodeID() // zero if not available
var iterErr error
var leaseEntries []crdbInternalLeasesTableEntry
p.LeaseMgr().VisitLeases(func(desc catalog.Descriptor, takenOffline bool, _ int, expiration tree.DTimestamp) (wantMore bool) {
if ok, err := p.HasAnyPrivilege(ctx, desc); err != nil {
iterErr = err
return false
// Because we have locked the lease manager while visiting every lease,
// it not safe to *acquire* or *release* any leases in the process. As
// a result, build an in memory cache of entries and process them after
// all the locks have been released. Previously we would check privileges,
// which would need to get the lease on the role_member table. Simply skipping
// this check on that table will not be sufficient since any active lease
// on it could expire or need an renewal and cause the sample problem.
leaseEntries = append(leaseEntries, crdbInternalLeasesTableEntry{
desc: desc,
takenOffline: takenOffline,
expiration: expiration,
})
return true
})
for _, entry := range leaseEntries {
if ok, err := p.HasAnyPrivilege(ctx, entry.desc); err != nil {
return err
} else if !ok {
return true
continue
}

if err := addRow(
tree.NewDInt(tree.DInt(nodeID)),
tree.NewDInt(tree.DInt(int64(desc.GetID()))),
tree.NewDString(desc.GetName()),
tree.NewDInt(tree.DInt(int64(desc.GetParentID()))),
&expiration,
tree.MakeDBool(tree.DBool(takenOffline)),
tree.NewDInt(tree.DInt(int64(entry.desc.GetID()))),
tree.NewDString(entry.desc.GetName()),
tree.NewDInt(tree.DInt(int64(entry.desc.GetParentID()))),
&entry.expiration,
tree.MakeDBool(tree.DBool(entry.takenOffline)),
); err != nil {
iterErr = err
return false
return err
}
return true
})
return iterErr
}
return nil
},
indexes: nil,
generator: nil,
unimplemented: false,
resultColumns: nil,
}

func tsOrNull(micros int64) (tree.Datum, error) {
Expand Down
16 changes: 16 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/crdb_internal
Original file line number Diff line number Diff line change
Expand Up @@ -1645,3 +1645,19 @@ USE test;
DROP DATABASE test_cross_db;

subtest end


# In #119253 we observed a deadlock when visiting leases inside crdb_internal.lease
# between the lease manager and the authorization check for each descriptor. This
# test validates that this is no longer the case.
subtest allow_role_memberships_to_change_during_transaction

user testuser

statement ok
set allow_role_memberships_to_change_during_transaction=true

statement ok
SELECT * FROM crdb_internal.leases;

subtest end

0 comments on commit 21913d2

Please sign in to comment.