diff --git a/src/backend/distributed/utils/resource_lock.c b/src/backend/distributed/utils/resource_lock.c index 13e88a16e75..8ac269e4314 100644 --- a/src/backend/distributed/utils/resource_lock.c +++ b/src/backend/distributed/utils/resource_lock.c @@ -707,13 +707,27 @@ SerializeNonCommutativeWrites(List *shardIntervalList, LOCKMODE lockMode) } List *replicatedShardList = NIL; - if (AnyTableReplicated(shardIntervalList, &replicatedShardList)) + bool anyTableReplicated = AnyTableReplicated(shardIntervalList, &replicatedShardList); + + /* + * Acquire locks on the modified table. + * If the table is replicated, the locks are first acquired on the first worker node then locally. + * But if we're already on the first worker, acquiring on the first worker node and locally are the same operation. + * So we only acquire locally in that case. + */ + if (anyTableReplicated && ClusterHasKnownMetadataWorkers() && !IsFirstWorkerNode()) { - if (ClusterHasKnownMetadataWorkers() && !IsFirstWorkerNode()) - { - LockShardListResourcesOnFirstWorker(lockMode, replicatedShardList); - } + LockShardListResourcesOnFirstWorker(lockMode, replicatedShardList); + } + LockShardListResources(shardIntervalList, lockMode); + /* + * Next, acquire locks on the reference tables that are referenced by a foreign key if there are any. + * Note that LockReferencedReferenceShardResources() first acquires locks on the first worker, + * then locally. + */ + if (anyTableReplicated) + { ShardInterval *firstShardInterval = (ShardInterval *) linitial(replicatedShardList); if (ReferenceTableShardId(firstShardInterval->shardId)) @@ -728,8 +742,6 @@ SerializeNonCommutativeWrites(List *shardIntervalList, LOCKMODE lockMode) LockReferencedReferenceShardResources(firstShardInterval->shardId, lockMode); } } - - LockShardListResources(shardIntervalList, lockMode); } diff --git a/src/test/regress/expected/issue_7477.out b/src/test/regress/expected/issue_7477.out new file mode 100644 index 00000000000..224d85c6ea4 --- /dev/null +++ b/src/test/regress/expected/issue_7477.out @@ -0,0 +1,62 @@ +--- Test for updating a table that has a foreign key reference to another reference table. +--- Issue #7477: Distributed deadlock after issuing a simple UPDATE statement +--- https://github.com/citusdata/citus/issues/7477 +CREATE TABLE table1 (id INT PRIMARY KEY); +SELECT create_reference_table('table1'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO table1 VALUES (1); +CREATE TABLE table2 ( + id INT, + info TEXT, + CONSTRAINT table1_id_fk FOREIGN KEY (id) REFERENCES table1 (id) + ); +SELECT create_reference_table('table2'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO table2 VALUES (1, 'test'); +--- Runs the update command in parallel on workers. +--- Due to bug #7477, before the fix, the result is non-deterministic +--- and have several rows of the form: +--- localhost | 57638 | f | ERROR: deadlock detected +--- localhost | 57637 | f | ERROR: deadlock detected +--- localhost | 57637 | f | ERROR: canceling the transaction since it was involved in a distributed deadlock +SELECT * FROM master_run_on_worker( + ARRAY['localhost', 'localhost','localhost', 'localhost','localhost', + 'localhost','localhost', 'localhost','localhost', 'localhost']::text[], + ARRAY[57638, 57637, 57637, 57638, 57637, 57638, 57637, 57638, 57638, 57637]::int[], + ARRAY['UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1' + ]::text[], + true); + node_name | node_port | success | result +--------------------------------------------------------------------- + localhost | 57638 | t | UPDATE 1 + localhost | 57637 | t | UPDATE 1 + localhost | 57637 | t | UPDATE 1 + localhost | 57638 | t | UPDATE 1 + localhost | 57637 | t | UPDATE 1 + localhost | 57638 | t | UPDATE 1 + localhost | 57637 | t | UPDATE 1 + localhost | 57638 | t | UPDATE 1 + localhost | 57638 | t | UPDATE 1 + localhost | 57637 | t | UPDATE 1 +(10 rows) + +--- cleanup +DROP TABLE table2; +DROP TABLE table1; diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index 2d515e7a2c2..bc3477ce2cd 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -102,7 +102,7 @@ test: multi_dropped_column_aliases foreign_key_restriction_enforcement test: binary_protocol test: alter_table_set_access_method test: alter_distributed_table -test: issue_5248 issue_5099 issue_5763 issue_6543 issue_6758 +test: issue_5248 issue_5099 issue_5763 issue_6543 issue_6758 issue_7477 test: object_propagation_debug test: undistribute_table test: run_command_on_all_nodes diff --git a/src/test/regress/sql/issue_7477.sql b/src/test/regress/sql/issue_7477.sql new file mode 100644 index 00000000000..b9c1578e9b9 --- /dev/null +++ b/src/test/regress/sql/issue_7477.sql @@ -0,0 +1,44 @@ + +--- Test for updating a table that has a foreign key reference to another reference table. +--- Issue #7477: Distributed deadlock after issuing a simple UPDATE statement +--- https://github.com/citusdata/citus/issues/7477 + +CREATE TABLE table1 (id INT PRIMARY KEY); +SELECT create_reference_table('table1'); +INSERT INTO table1 VALUES (1); + +CREATE TABLE table2 ( + id INT, + info TEXT, + CONSTRAINT table1_id_fk FOREIGN KEY (id) REFERENCES table1 (id) + ); +SELECT create_reference_table('table2'); +INSERT INTO table2 VALUES (1, 'test'); + +--- Runs the update command in parallel on workers. +--- Due to bug #7477, before the fix, the result is non-deterministic +--- and have several rows of the form: +--- localhost | 57638 | f | ERROR: deadlock detected +--- localhost | 57637 | f | ERROR: deadlock detected +--- localhost | 57637 | f | ERROR: canceling the transaction since it was involved in a distributed deadlock + +SELECT * FROM master_run_on_worker( + ARRAY['localhost', 'localhost','localhost', 'localhost','localhost', + 'localhost','localhost', 'localhost','localhost', 'localhost']::text[], + ARRAY[57638, 57637, 57637, 57638, 57637, 57638, 57637, 57638, 57638, 57637]::int[], + ARRAY['UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1', + 'UPDATE table2 SET info = ''test_update'' WHERE id = 1' + ]::text[], + true); + +--- cleanup +DROP TABLE table2; +DROP TABLE table1;