From 429431eb4997ac76b7e03c77ef28ee31b9c71ca9 Mon Sep 17 00:00:00 2001 From: denis-chudov Date: Thu, 28 Mar 2024 20:17:14 +0200 Subject: [PATCH 01/26] wip --- .../ignite/internal/tx/TxStateMeta.java | 47 +++++++++++++++++-- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/TxStateMeta.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/TxStateMeta.java index 58bf55228ba5..ced71fc80c98 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/TxStateMeta.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/TxStateMeta.java @@ -43,6 +43,9 @@ public class TxStateMeta implements TransactionMeta { private final Long initialVacuumObservationTimestamp; + @Nullable + private final Long cleanupCompletionTimestamp; + /** * Constructor. * @@ -75,12 +78,34 @@ public TxStateMeta( @Nullable TablePartitionId commitPartitionId, @Nullable HybridTimestamp commitTimestamp, @Nullable Long initialVacuumObservationTimestamp + ) { + this(txState, txCoordinatorId, commitPartitionId, commitTimestamp, initialVacuumObservationTimestamp, null); + } + + /** + * Constructor. + * + * @param txState Transaction state. + * @param txCoordinatorId Transaction coordinator id. + * @param commitPartitionId Commit partition replication group id. + * @param commitTimestamp Commit timestamp. + * @param initialVacuumObservationTimestamp Initial vacuum observation timestamp. + * @param cleanupCompletionTimestamp Cleanup completion timestamp. + */ + public TxStateMeta( + TxState txState, + @Nullable String txCoordinatorId, + @Nullable TablePartitionId commitPartitionId, + @Nullable HybridTimestamp commitTimestamp, + @Nullable Long initialVacuumObservationTimestamp, + @Nullable Long cleanupCompletionTimestamp ) { this.txState = txState; this.txCoordinatorId = txCoordinatorId; this.commitPartitionId = commitPartitionId; this.commitTimestamp = commitTimestamp; this.initialVacuumObservationTimestamp = initialVacuumObservationTimestamp; + this.cleanupCompletionTimestamp = cleanupCompletionTimestamp; } /** @@ -125,6 +150,11 @@ public TxState txState() { return initialVacuumObservationTimestamp; } + @Nullable + public Long cleanupCompletionTimestamp() { + return cleanupCompletionTimestamp; + } + @Override public boolean equals(Object o) { if (this == o) { @@ -133,26 +163,33 @@ public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) { return false; } + TxStateMeta that = (TxStateMeta) o; if (txState != that.txState) { return false; } - if (txCoordinatorId != null ? !txCoordinatorId.equals(that.txCoordinatorId) : that.txCoordinatorId != null) { return false; } - if (commitPartitionId != null ? !commitPartitionId.equals(that.commitPartitionId) : that.commitPartitionId != null) { return false; } - - return commitTimestamp != null ? commitTimestamp.equals(that.commitTimestamp) : that.commitTimestamp == null; + if (commitTimestamp != null ? !commitTimestamp.equals(that.commitTimestamp) : that.commitTimestamp != null) { + return false; + } + if (initialVacuumObservationTimestamp != null ? !initialVacuumObservationTimestamp.equals(that.initialVacuumObservationTimestamp) + : that.initialVacuumObservationTimestamp != null) { + return false; + } + return cleanupCompletionTimestamp != null ? cleanupCompletionTimestamp.equals(that.cleanupCompletionTimestamp) + : that.cleanupCompletionTimestamp == null; } @Override public int hashCode() { - return Objects.hash(txState, txCoordinatorId, commitPartitionId, commitTimestamp); + return Objects.hash(txState, txCoordinatorId, commitPartitionId, commitTimestamp, initialVacuumObservationTimestamp, + cleanupCompletionTimestamp); } @Override From 0aae08cb4d34bf775a43f9f809fd3c9fef03d7a9 Mon Sep 17 00:00:00 2001 From: denis-chudov Date: Thu, 4 Apr 2024 16:25:30 +0300 Subject: [PATCH 02/26] wip --- .../distributed/raft/PartitionListener.java | 12 ++++-- .../replicator/PartitionReplicaListener.java | 2 +- .../apache/ignite/internal/tx/TxManager.java | 2 +- .../tx/impl/PersistentTxStateVacuumizer.java | 39 +++++++++++++++++++ .../internal/tx/impl/TxManagerImpl.java | 2 +- .../tx/impl/VolatileTxStateMetaStorage.java | 13 ++++++- 6 files changed, 61 insertions(+), 9 deletions(-) create mode 100644 modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java diff --git a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java index 26f396766cf7..8072deb2631e 100644 --- a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java +++ b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java @@ -54,6 +54,7 @@ import org.apache.ignite.internal.raft.service.CommandClosure; import org.apache.ignite.internal.raft.service.CommittedConfiguration; import org.apache.ignite.internal.raft.service.RaftGroupListener; +import org.apache.ignite.internal.replicator.TablePartitionId; import org.apache.ignite.internal.replicator.command.SafeTimePropagatingCommand; import org.apache.ignite.internal.replicator.command.SafeTimeSyncCommand; import org.apache.ignite.internal.replicator.message.PrimaryReplicaChangeCommand; @@ -390,7 +391,10 @@ private UpdateCommandResult handleUpdateAllCommand(UpdateAllCommand cmd, long co commandTerm ); - markFinished(txId, cmd.commit(), cmd.commitTimestamp()); + // Assume that we handle the finish command only on the commit partition. + TablePartitionId commitPartitionId = new TablePartitionId(storage.tableId(), storage.partitionId()); + + markFinished(txId, cmd.commit(), cmd.commitTimestamp(), commitPartitionId); LOG.debug("Finish the transaction txId = {}, state = {}, txStateChangeRes = {}", txId, txMetaToSet, txStateChangeRes); @@ -417,7 +421,7 @@ private void handleWriteIntentSwitchCommand(WriteIntentSwitchCommand cmd, long c UUID txId = cmd.txId(); - markFinished(txId, cmd.commit(), cmd.commitTimestamp()); + markFinished(txId, cmd.commit(), cmd.commitTimestamp(), null); storageUpdateHandler.switchWriteIntents( txId, @@ -675,11 +679,11 @@ private void replicaTouch(UUID txId, String txCoordinatorId, HybridTimestamp com )); } - private void markFinished(UUID txId, boolean commit, @Nullable HybridTimestamp commitTimestamp) { + private void markFinished(UUID txId, boolean commit, @Nullable HybridTimestamp commitTimestamp, @Nullable TablePartitionId partId) { txManager.updateTxMeta(txId, old -> new TxStateMeta( commit ? COMMITTED : ABORTED, old == null ? null : old.txCoordinatorId(), - old == null ? null : old.commitPartitionId(), + old == null ? partId : old.commitPartitionId(), commit ? commitTimestamp : null )); } diff --git a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/PartitionReplicaListener.java b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/PartitionReplicaListener.java index d7ea23c0d457..cc101efeea2a 100644 --- a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/PartitionReplicaListener.java +++ b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/PartitionReplicaListener.java @@ -488,7 +488,7 @@ private CompletableFuture triggerTxRecovery(UUID txId, String senderId) { new HybridTimestampTracker(), replicationGroupId, false, - // term is not required for the rollback. + // Enlistment consistency token is not required for the rollback, so it is 0L. Map.of(replicationGroupId, new IgniteBiTuple<>(clusterNodeResolver.getById(senderId), 0L)), txId ) diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/TxManager.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/TxManager.java index 2d3ea540974c..72cbf3cfe202 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/TxManager.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/TxManager.java @@ -128,7 +128,7 @@ public interface TxManager extends IgniteComponent { * should pass its own tracker to provide linearizability between read-write and read-only transactions started by this client. * @param commitPartition Partition to store a transaction state. * @param commit {@code true} if a commit requested. - * @param enlistedGroups Enlisted partition groups with consistency token. + * @param enlistedGroups Enlisted partition groups with consistency tokens. * @param txId Transaction id. */ CompletableFuture finish( diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java new file mode 100644 index 000000000000..5fe3146b3933 --- /dev/null +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.tx.impl; + +import java.util.UUID; +import java.util.function.Function; +import org.apache.ignite.internal.replicator.TablePartitionId; +import org.apache.ignite.internal.tx.storage.state.TxStateStorage; + +public class PersistentTxStateVacuumizer { + private final Function txStateStorageResolver; + + public PersistentTxStateVacuumizer(Function txStateStorageResolver) { + this.txStateStorageResolver = txStateStorageResolver; + } + + public void vacuumPersistentTxState(UUID txId, TablePartitionId commitPartitionId) { + TxStateStorage txStateStorage = txStateStorageResolver.apply(commitPartitionId); + + if (txStateStorage != null) { + txStateStorage.remove(txId); + } + } +} diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/TxManagerImpl.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/TxManagerImpl.java index 5455b70f9e58..fcca1fa787ba 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/TxManagerImpl.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/TxManagerImpl.java @@ -666,7 +666,7 @@ private CompletableFuture makeFinishRequest( new TxStateMeta( txResult.transactionState(), localNodeId, - old.commitPartitionId(), + commitPartition, txResult.commitTimestamp() )); diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java index a575318ebe77..507063d11b21 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java @@ -39,6 +39,8 @@ public class VolatileTxStateMetaStorage { /** The local map for tx states. */ private ConcurrentHashMap txStateMap; + private final PersistentTxStateVacuumizer persistentTxStateVacuumizer = new PersistentTxStateVacuumizer(); + /** * Starts the storage. */ @@ -129,10 +131,14 @@ public void vacuum(long vacuumObservationTimestamp, long txnResourceTtl) { txStateMap.forEach((txId, meta) -> { txStateMap.computeIfPresent(txId, (txId0, meta0) -> { if (TxState.isFinalState(meta0.txState())) { + Long initialVacuumObservationTimestamp = meta0.initialVacuumObservationTimestamp(); + Long cleanupCompletionTimestamp = meta0.cleanupCompletionTimestamp(); + long + if (txnResourceTtl == 0) { vacuumizedTxnsCount.incrementAndGet(); return null; - } else if (meta0.initialVacuumObservationTimestamp() == null) { + } else if (initialVacuumObservationTimestamp == null) { markedAsInitiallyDetectedTxnsCount.incrementAndGet(); return new TxStateMeta( meta0.txState(), @@ -141,8 +147,11 @@ public void vacuum(long vacuumObservationTimestamp, long txnResourceTtl) { meta0.commitTimestamp(), vacuumObservationTimestamp ); - } else if (meta0.initialVacuumObservationTimestamp() + txnResourceTtl < vacuumObservationTimestamp) { + } else if (initialVacuumObservationTimestamp + txnResourceTtl < vacuumObservationTimestamp) { vacuumizedTxnsCount.incrementAndGet(); + + persistentTxStateVacuumizer.vacuumPersistentTxState(txId, meta0.commitPartitionId()); + return null; } else { alreadyMarkedTxnsCount.incrementAndGet(); From ac3ce06fc9037b8ccecbad781ec0d3dbe8484f77 Mon Sep 17 00:00:00 2001 From: denis-chudov Date: Fri, 5 Apr 2024 10:58:59 +0300 Subject: [PATCH 03/26] wip --- .../table/distributed/TableMessageGroup.java | 3 ++ .../tx/impl/PersistentTxStateVacuumizer.java | 54 ++++++++++++++++--- .../tx/impl/VolatileTxStateMetaStorage.java | 5 +- .../internal/tx/message/TxMessageGroup.java | 5 ++ .../tx/message/VacuumTxStatesCommand.java | 30 +++++++++++ 5 files changed, 86 insertions(+), 11 deletions(-) create mode 100644 modules/transactions/src/main/java/org/apache/ignite/internal/tx/message/VacuumTxStatesCommand.java diff --git a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/TableMessageGroup.java b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/TableMessageGroup.java index 14c76a3e3b35..c20ca13c1cc3 100644 --- a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/TableMessageGroup.java +++ b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/TableMessageGroup.java @@ -218,6 +218,9 @@ interface Commands { /** Message type for {@link TablePartitionIdMessage}. */ short TABLE_PARTITION_ID = 61; + + /** Message type for {@link TablePartitionIdMessage}. */ + short TABLE_PARTITION_ID = 61; } /** diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java index 5fe3146b3933..ecbe0137a52e 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java @@ -17,23 +17,61 @@ package org.apache.ignite.internal.tx.impl; +import static org.apache.ignite.internal.util.CompletableFutures.allOf; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Set; import java.util.UUID; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentHashMap; import java.util.function.Function; +import org.apache.ignite.internal.lang.IgniteBiTuple; +import org.apache.ignite.internal.logger.IgniteLogger; +import org.apache.ignite.internal.logger.Loggers; +import org.apache.ignite.internal.raft.service.RaftGroupService; import org.apache.ignite.internal.replicator.TablePartitionId; -import org.apache.ignite.internal.tx.storage.state.TxStateStorage; +import org.apache.ignite.internal.tx.message.TxMessagesFactory; +import org.apache.ignite.internal.tx.message.VacuumTxStatesCommand; public class PersistentTxStateVacuumizer { - private final Function txStateStorageResolver; + private static final IgniteLogger LOG = Loggers.forClass(PersistentTxStateVacuumizer.class); + + private static final TxMessagesFactory TX_MESSAGES_FACTORY = new TxMessagesFactory(); - public PersistentTxStateVacuumizer(Function txStateStorageResolver) { + private final Function txStateStorageResolver; + + public PersistentTxStateVacuumizer(Function txStateStorageResolver) { this.txStateStorageResolver = txStateStorageResolver; } - public void vacuumPersistentTxState(UUID txId, TablePartitionId commitPartitionId) { - TxStateStorage txStateStorage = txStateStorageResolver.apply(commitPartitionId); + public CompletableFuture, Set>> vacuumPersistentTxStates(Map> txIds) { + Set successful = ConcurrentHashMap.newKeySet(); + Set unsuccessful = ConcurrentHashMap.newKeySet(); + List> futures = new ArrayList<>(); + + txIds.forEach((commitPartitionId, ids) -> { + RaftGroupService raftClient = txStateStorageResolver.apply(commitPartitionId); + + if (raftClient != null) { + VacuumTxStatesCommand cmd = TX_MESSAGES_FACTORY.vacuumTxStatesCommand().txIds(ids).build(); + + CompletableFuture future = raftClient.run(cmd).whenComplete((v, e) -> { + if (e == null) { + successful.addAll(ids); + } else { + LOG.warn("Failed to vacuum tx states from the persistent storage.", e); + + unsuccessful.addAll(ids); + } + }); + + futures.add(future); + } + }); - if (txStateStorage != null) { - txStateStorage.remove(txId); - } + return allOf(futures.toArray(new CompletableFuture[0])) + .thenApply(unused -> new IgniteBiTuple<>(successful, unsuccessful)); } } diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java index 507063d11b21..a6e71ff2de81 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java @@ -39,7 +39,7 @@ public class VolatileTxStateMetaStorage { /** The local map for tx states. */ private ConcurrentHashMap txStateMap; - private final PersistentTxStateVacuumizer persistentTxStateVacuumizer = new PersistentTxStateVacuumizer(); + private final PersistentTxStateVacuumizer persistentTxStateVacuumizer = new PersistentTxStateVacuumizer(null); /** * Starts the storage. @@ -133,7 +133,6 @@ public void vacuum(long vacuumObservationTimestamp, long txnResourceTtl) { if (TxState.isFinalState(meta0.txState())) { Long initialVacuumObservationTimestamp = meta0.initialVacuumObservationTimestamp(); Long cleanupCompletionTimestamp = meta0.cleanupCompletionTimestamp(); - long if (txnResourceTtl == 0) { vacuumizedTxnsCount.incrementAndGet(); @@ -150,7 +149,7 @@ public void vacuum(long vacuumObservationTimestamp, long txnResourceTtl) { } else if (initialVacuumObservationTimestamp + txnResourceTtl < vacuumObservationTimestamp) { vacuumizedTxnsCount.incrementAndGet(); - persistentTxStateVacuumizer.vacuumPersistentTxState(txId, meta0.commitPartitionId()); + persistentTxStateVacuumizer.vacuumPersistentTxStates(null); return null; } else { diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/message/TxMessageGroup.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/message/TxMessageGroup.java index 657810893c66..2c5fde4b6b2d 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/message/TxMessageGroup.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/message/TxMessageGroup.java @@ -79,4 +79,9 @@ public class TxMessageGroup { */ public static final short TX_FINISHED_BATCH = 10; + /** + * Message type for {@link VacuumTxStatesCommand}. + */ + public static final short VACUUM_TX_STATE_COMMAND = 11; + } diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/message/VacuumTxStatesCommand.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/message/VacuumTxStatesCommand.java new file mode 100644 index 000000000000..190aeab82c5b --- /dev/null +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/message/VacuumTxStatesCommand.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.tx.message; + +import static org.apache.ignite.internal.tx.message.TxMessageGroup.VACUUM_TX_STATE_COMMAND; + +import java.util.Set; +import java.util.UUID; +import org.apache.ignite.internal.network.annotations.Transferable; +import org.apache.ignite.internal.raft.WriteCommand; + +@Transferable(VACUUM_TX_STATE_COMMAND) +public interface VacuumTxStatesCommand extends WriteCommand { + Set txIds(); +} From 34a37c66ba62c966170280447c8ac15b200e76c1 Mon Sep 17 00:00:00 2001 From: denis-chudov Date: Thu, 11 Apr 2024 10:48:43 +0300 Subject: [PATCH 04/26] wip poc --- .../table/ItTransactionPrimaryChangeTest.java | 48 ++++++- .../table/distributed/TableMessageGroup.java | 3 - .../distributed/raft/PartitionListener.java | 79 +++++++---- .../replicator/PartitionReplicaListener.java | 16 +++ .../tx/impl/PersistentTxStateVacuumizer.java | 53 +++---- .../tx/impl/ResourceVacuumManager.java | 2 +- .../internal/tx/impl/TxManagerImpl.java | 20 ++- .../tx/impl/VolatileTxStateMetaStorage.java | 134 ++++++++++++++---- .../internal/tx/message/TxMessageGroup.java | 7 +- .../message/VacuumTxStateReplicaRequest.java | 30 ++++ .../state/ThreadAssertingTxStateStorage.java | 4 +- .../tx/storage/state/TxStateStorage.java | 4 +- .../state/rocksdb/TxStateRocksDbStorage.java | 15 +- .../state/AbstractTxStateStorageTest.java | 4 +- .../state/test/TestTxStateStorage.java | 7 +- 15 files changed, 324 insertions(+), 102 deletions(-) create mode 100644 modules/transactions/src/main/java/org/apache/ignite/internal/tx/message/VacuumTxStateReplicaRequest.java diff --git a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionPrimaryChangeTest.java b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionPrimaryChangeTest.java index 29aeab0ec82e..b933d050ae04 100644 --- a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionPrimaryChangeTest.java +++ b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionPrimaryChangeTest.java @@ -21,10 +21,13 @@ import static org.apache.ignite.internal.SessionUtils.executeUpdate; import static org.apache.ignite.internal.TestWrappers.unwrapIgniteTransaction; import static org.apache.ignite.internal.TestWrappers.unwrapTableImpl; +import static org.apache.ignite.internal.testframework.IgniteTestUtils.waitForCondition; import static org.apache.ignite.internal.testframework.matchers.CompletableFutureMatcher.willCompleteSuccessfully; import static org.hamcrest.MatcherAssert.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import java.util.UUID; import java.util.concurrent.CompletableFuture; import java.util.function.Predicate; import java.util.stream.IntStream; @@ -35,7 +38,10 @@ import org.apache.ignite.internal.replicator.ReplicationGroupId; import org.apache.ignite.internal.replicator.TablePartitionId; import org.apache.ignite.internal.table.distributed.command.UpdateCommand; +import org.apache.ignite.internal.testframework.SystemPropertiesExtension; +import org.apache.ignite.internal.testframework.WithSystemProperty; import org.apache.ignite.internal.tx.impl.ReadWriteTransactionImpl; +import org.apache.ignite.internal.tx.storage.state.TxStateStorage; import org.apache.ignite.raft.jraft.rpc.WriteActionRequest; import org.apache.ignite.table.RecordView; import org.apache.ignite.table.Tuple; @@ -43,10 +49,13 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInfo; +import org.junit.jupiter.api.extension.ExtendWith; /** * Integration tests for the transactions running while the primary changes, not related to the tx recovery. */ +@ExtendWith(SystemPropertiesExtension.class) +@WithSystemProperty(key = "RESOURCE_VACUUM_INTERVAL_MILLISECONDS_PROPERTY", value = "1000") public class ItTransactionPrimaryChangeTest extends ClusterPerTestIntegrationTest { /** Table name. */ private static final String TABLE_NAME = "test_table"; @@ -85,7 +94,8 @@ protected void customizeInitParameters(InitParametersBuilder builder) { builder.clusterConfiguration("{" + " transaction: {" - + " implicitTransactionTimeout: 30000" + + " implicitTransactionTimeout: 30000," + + " txnResourceTtl: 2" + " }," + " replication: {" + " rpcTimeout: 30000" @@ -192,6 +202,42 @@ public void testFullTxConsistency() throws InterruptedException { assertEquals("2", fullTxFut.join().value("val")); } + @Test + public void testVacuum() throws InterruptedException { + TableImpl tbl = unwrapTableImpl(node(0).tables().table(TABLE_NAME)); + + int partId = 0; + + var tblReplicationGrp = new TablePartitionId(tbl.tableId(), partId); + + String leaseholder = waitAndGetPrimaryReplica(node(0), tblReplicationGrp).getLeaseholder(); + + IgniteImpl firstLeaseholderNode = findNodeByName(leaseholder); + + log.info("Test: Full transaction will be executed on [node={}].", firstLeaseholderNode.name()); + + IgniteImpl txCrdNode = findNode(0, initialNodes(), n -> !leaseholder.equals(n.name())); + + log.info("Test: Transaction coordinator is [node={}].", txCrdNode.name()); + + RecordView view = txCrdNode.tables().table(TABLE_NAME).recordView(); + + // Put some value into the table. + Transaction txPreload = txCrdNode.transactions().begin(); + UUID txId = ((ReadWriteTransactionImpl) unwrapIgniteTransaction(txPreload)).id(); + log.info("Test: Preloading the data [tx={}].", txId); + view.upsert(txPreload, Tuple.create().set("key", 1).set("val", "1")); + txPreload.commit(); + + TxStateStorage txStateStorage = tbl.internalTable().txStateStorage().getTxStateStorage(partId); + + assertTrue(txStateStorage.get(txId) != null); + + assertTrue(waitForCondition(() -> { + return txStateStorage.get(txId) == null; + }, 10_000)); + } + private IgniteImpl findNode(int startRange, int endRange, Predicate filter) { return IntStream.range(startRange, endRange) .mapToObj(this::node) diff --git a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/TableMessageGroup.java b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/TableMessageGroup.java index c20ca13c1cc3..14c76a3e3b35 100644 --- a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/TableMessageGroup.java +++ b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/TableMessageGroup.java @@ -218,9 +218,6 @@ interface Commands { /** Message type for {@link TablePartitionIdMessage}. */ short TABLE_PARTITION_ID = 61; - - /** Message type for {@link TablePartitionIdMessage}. */ - short TABLE_PARTITION_ID = 61; } /** diff --git a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java index 8072deb2631e..b9d8be12a915 100644 --- a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java +++ b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java @@ -78,6 +78,7 @@ import org.apache.ignite.internal.tx.TxState; import org.apache.ignite.internal.tx.TxStateMeta; import org.apache.ignite.internal.tx.UpdateCommandResult; +import org.apache.ignite.internal.tx.message.VacuumTxStatesCommand; import org.apache.ignite.internal.tx.storage.state.TxStateStorage; import org.apache.ignite.internal.util.PendingComparableValuesTracker; import org.apache.ignite.internal.util.TrackerClosedException; @@ -85,7 +86,7 @@ import org.jetbrains.annotations.TestOnly; /** - * Partition command handler. + * Partition cmd handler. */ public class PartitionListener implements RaftGroupListener, BeforeApplyHandler { /** Logger. */ @@ -171,7 +172,7 @@ public void onWrite(Iterator> iterator) { long proposedSafeTime = cmd.safeTime().longValue(); // Because of clock.tick it's guaranteed that two different commands will have different safe timestamps. - // maxObservableSafeTime may match proposedSafeTime only if it is the command that was previously validated and then retried + // maxObservableSafeTime may match proposedSafeTime only if it is the cmd that was previously validated and then retried // by raft client because of either TimeoutException or inner raft server recoverable exception. assert proposedSafeTime >= maxObservableSafeTimeVerifier : "Safe time reordering detected [current=" + maxObservableSafeTimeVerifier + ", proposed=" + proposedSafeTime + "]"; @@ -187,13 +188,13 @@ public void onWrite(Iterator> iterator) { long storagesAppliedIndex = Math.min(storage.lastAppliedIndex(), txStateStorage.lastAppliedIndex()); assert commandIndex > storagesAppliedIndex : - "Write command must have an index greater than that of storages [commandIndex=" + commandIndex + "Write cmd must have an index greater than that of storages [commandIndex=" + commandIndex + ", mvAppliedIndex=" + storage.lastAppliedIndex() + ", txStateAppliedIndex=" + txStateStorage.lastAppliedIndex() + "]"; Serializable result = null; - // NB: Make sure that ANY command we accept here updates lastAppliedIndex+term info in one of the underlying + // NB: Make sure that ANY cmd we accept here updates lastAppliedIndex+term info in one of the underlying // storages! // Otherwise, a gap between lastAppliedIndex from the point of view of JRaft and our storage might appear. // If a leader has such a gap, and does doSnapshot(), it will subsequently truncate its log too aggressively @@ -219,6 +220,8 @@ public void onWrite(Iterator> iterator) { handleBuildIndexCommand((BuildIndexCommand) command, commandIndex, commandTerm); } else if (command instanceof PrimaryReplicaChangeCommand) { handlePrimaryReplicaChangeCommand((PrimaryReplicaChangeCommand) command, commandIndex, commandTerm); + } else if (command instanceof VacuumTxStatesCommand) { + handleVacuumTxStatesCommand((VacuumTxStatesCommand) command, commandIndex, commandTerm); } else { assert false : "Command was not found [cmd=" + command + ']'; } @@ -228,7 +231,7 @@ public void onWrite(Iterator> iterator) { result = e.getCause(); } catch (Throwable t) { LOG.error( - "Unknown error while processing command [commandIndex={}, commandTerm={}, command={}]", + "Unknown error while processing cmd [commandIndex={}, commandTerm={}, cmd={}]", t, clo.index(), clo.index(), command ); @@ -260,17 +263,17 @@ public void onWrite(Iterator> iterator) { * Handler for the {@link UpdateCommand}. * * @param cmd Command. - * @param commandIndex Index of the RAFT command. - * @param commandTerm Term of the RAFT command. + * @param commandIndex Index of the RAFT cmd. + * @param commandTerm Term of the RAFT cmd. */ private UpdateCommandResult handleUpdateCommand(UpdateCommand cmd, long commandIndex, long commandTerm) { - // Skips the write command because the storage has already executed it. + // Skips the write cmd because the storage has already executed it. if (commandIndex <= storage.lastAppliedIndex()) { return new UpdateCommandResult(true); } if (cmd.leaseStartTime() != null) { - long leaseStartTime = requireNonNull(cmd.leaseStartTime(), "Inconsistent lease information in command [cmd=" + cmd + "]."); + long leaseStartTime = requireNonNull(cmd.leaseStartTime(), "Inconsistent lease information in cmd [cmd=" + cmd + "]."); if (leaseStartTime != txStateStorage.leaseStartTime()) { return new UpdateCommandResult(false, txStateStorage.leaseStartTime()); @@ -311,17 +314,17 @@ private UpdateCommandResult handleUpdateCommand(UpdateCommand cmd, long commandI * Handler for the {@link UpdateAllCommand}. * * @param cmd Command. - * @param commandIndex Index of the RAFT command. - * @param commandTerm Term of the RAFT command. + * @param commandIndex Index of the RAFT cmd. + * @param commandTerm Term of the RAFT cmd. */ private UpdateCommandResult handleUpdateAllCommand(UpdateAllCommand cmd, long commandIndex, long commandTerm) { - // Skips the write command because the storage has already executed it. + // Skips the write cmd because the storage has already executed it. if (commandIndex <= storage.lastAppliedIndex()) { return new UpdateCommandResult(true); } if (cmd.leaseStartTime() != null) { - long leaseStartTime = requireNonNull(cmd.leaseStartTime(), "Inconsistent lease information in command [cmd=" + cmd + "]."); + long leaseStartTime = requireNonNull(cmd.leaseStartTime(), "Inconsistent lease information in cmd [cmd=" + cmd + "]."); if (leaseStartTime != txStateStorage.leaseStartTime()) { return new UpdateCommandResult(false, txStateStorage.leaseStartTime()); @@ -360,14 +363,14 @@ private UpdateCommandResult handleUpdateAllCommand(UpdateAllCommand cmd, long co * Handler for the {@link FinishTxCommand}. * * @param cmd Command. - * @param commandIndex Index of the RAFT command. - * @param commandTerm Term of the RAFT command. + * @param commandIndex Index of the RAFT cmd. + * @param commandTerm Term of the RAFT cmd. * @return The actually stored transaction state {@link TransactionResult}. * @throws IgniteInternalException if an exception occurred during a transaction state change. */ private @Nullable TransactionResult handleFinishTxCommand(FinishTxCommand cmd, long commandIndex, long commandTerm) throws IgniteInternalException { - // Skips the write command because the storage has already executed it. + // Skips the write cmd because the storage has already executed it. if (commandIndex <= txStateStorage.lastAppliedIndex()) { return null; } @@ -391,7 +394,7 @@ private UpdateCommandResult handleUpdateAllCommand(UpdateAllCommand cmd, long co commandTerm ); - // Assume that we handle the finish command only on the commit partition. + // Assume that we handle the finish cmd only on the commit partition. TablePartitionId commitPartitionId = new TablePartitionId(storage.tableId(), storage.partitionId()); markFinished(txId, cmd.commit(), cmd.commitTimestamp(), commitPartitionId); @@ -410,11 +413,11 @@ private UpdateCommandResult handleUpdateAllCommand(UpdateAllCommand cmd, long co * Handler for the {@link WriteIntentSwitchCommand}. * * @param cmd Command. - * @param commandIndex Index of the RAFT command. - * @param commandTerm Term of the RAFT command. + * @param commandIndex Index of the RAFT cmd. + * @param commandTerm Term of the RAFT cmd. */ private void handleWriteIntentSwitchCommand(WriteIntentSwitchCommand cmd, long commandIndex, long commandTerm) { - // Skips the write command because the storage has already executed it. + // Skips the write cmd because the storage has already executed it. if (commandIndex <= storage.lastAppliedIndex()) { return; } @@ -436,11 +439,11 @@ private void handleWriteIntentSwitchCommand(WriteIntentSwitchCommand cmd, long c * Handler for the {@link SafeTimeSyncCommand}. * * @param cmd Command. - * @param commandIndex RAFT index of the command. - * @param commandTerm RAFT term of the command. + * @param commandIndex RAFT index of the cmd. + * @param commandTerm RAFT term of the cmd. */ private void handleSafeTimeSyncCommand(SafeTimeSyncCommand cmd, long commandIndex, long commandTerm) { - // Skips the write command because the storage has already executed it. + // Skips the write cmd because the storage has already executed it. if (commandIndex <= storage.lastAppliedIndex()) { return; } @@ -466,7 +469,7 @@ public void onConfigurationCommitted(CommittedConfiguration config) { } // Do the update under lock to make sure no snapshot is started concurrently with this update. - // Note that we do not need to protect from a concurrent command execution by this listener because + // Note that we do not need to protect from a concurrent cmd execution by this listener because // configuration is committed in the same thread in which commands are applied. storage.acquirePartitionSnapshotsReadLock(); @@ -538,7 +541,7 @@ public boolean onBeforeApply(Command command) { long proposedSafeTime = cmd.safeTime().longValue(); // Because of clock.tick it's guaranteed that two different commands will have different safe timestamps. - // maxObservableSafeTime may match proposedSafeTime only if it is the command that was previously validated and then retried + // maxObservableSafeTime may match proposedSafeTime only if it is the cmd that was previously validated and then retried // by raft client because of either TimeoutException or inner raft server recoverable exception. if (proposedSafeTime >= maxObservableSafeTime) { maxObservableSafeTime = proposedSafeTime; @@ -562,11 +565,11 @@ public MvPartitionStorage getMvStorage() { * Handler for the {@link BuildIndexCommand}. * * @param cmd Command. - * @param commandIndex RAFT index of the command. - * @param commandTerm RAFT term of the command. + * @param commandIndex RAFT index of the cmd. + * @param commandTerm RAFT term of the cmd. */ void handleBuildIndexCommand(BuildIndexCommand cmd, long commandIndex, long commandTerm) { - // Skips the write command because the storage has already executed it. + // Skips the write cmd because the storage has already executed it. if (commandIndex <= storage.lastAppliedIndex()) { return; } @@ -613,7 +616,7 @@ void handleBuildIndexCommand(BuildIndexCommand cmd, long commandIndex, long comm * @param commandTerm Command term. */ private void handlePrimaryReplicaChangeCommand(PrimaryReplicaChangeCommand cmd, long commandIndex, long commandTerm) { - // Skips the write command because the storage has already executed it. + // Skips the write cmd because the storage has already executed it. if (commandIndex <= storage.lastAppliedIndex()) { return; } @@ -621,6 +624,24 @@ private void handlePrimaryReplicaChangeCommand(PrimaryReplicaChangeCommand cmd, txStateStorage.updateLease(cmd.leaseStartTime(), commandIndex, commandTerm); } + /** + * Handler for {@link VacuumTxStatesCommand}. + * + * @param cmd Command. + * @param commandIndex Command index. + * @param commandTerm Command term. + */ + private void handleVacuumTxStatesCommand(VacuumTxStatesCommand cmd, long commandIndex, long commandTerm) { + // Skips the write cmd because the storage has already executed it. + if (commandIndex <= storage.lastAppliedIndex()) { + return; + } + + for (UUID txId : cmd.txIds()) { + txStateStorage.remove(txId, commandIndex, commandTerm); + } + } + private static void onTxStateStorageCasFail(UUID txId, TxMeta txMetaBeforeCas, TxMeta txMetaToSet) { String errorMsg = format("Failed to update tx state in the storage, transaction txId = {} because of inconsistent state," + " expected state = {}, state to set = {}", diff --git a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/PartitionReplicaListener.java b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/PartitionReplicaListener.java index cc101efeea2a..06edc7aa21fa 100644 --- a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/PartitionReplicaListener.java +++ b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/PartitionReplicaListener.java @@ -175,8 +175,11 @@ import org.apache.ignite.internal.tx.impl.FullyQualifiedResourceId; import org.apache.ignite.internal.tx.impl.RemotelyTriggeredResourceRegistry; import org.apache.ignite.internal.tx.message.TxFinishReplicaRequest; +import org.apache.ignite.internal.tx.message.TxMessagesFactory; import org.apache.ignite.internal.tx.message.TxRecoveryMessage; import org.apache.ignite.internal.tx.message.TxStateCommitPartitionRequest; +import org.apache.ignite.internal.tx.message.VacuumTxStateReplicaRequest; +import org.apache.ignite.internal.tx.message.VacuumTxStatesCommand; import org.apache.ignite.internal.tx.message.WriteIntentSwitchReplicaRequest; import org.apache.ignite.internal.tx.message.WriteIntentSwitchReplicatedInfo; import org.apache.ignite.internal.tx.storage.state.TxStateStorage; @@ -231,6 +234,9 @@ public class PartitionReplicaListener implements ReplicaListener { /** Factory for creating replica command messages. */ private static final ReplicaMessagesFactory REPLICA_MESSAGES_FACTORY = new ReplicaMessagesFactory(); + /** Factory for creating transaction command messages. */ + private static final TxMessagesFactory TX_MESSAGES_FACTORY = new TxMessagesFactory(); + /** Replication retries limit. */ private static final int MAX_RETIES_ON_SAFE_TIME_REORDERING = 1000; @@ -716,6 +722,8 @@ private CompletableFuture processOperationRequest( return processReadOnlyDirectMultiEntryAction((ReadOnlyDirectMultiRowReplicaRequest) request, opStartTsIfDirectRo); } else if (request instanceof TxStateCommitPartitionRequest) { return processTxStateCommitPartitionRequest((TxStateCommitPartitionRequest) request); + } else if (request instanceof VacuumTxStateReplicaRequest) { + return processVacuumTxStateReplicaRequest((VacuumTxStateReplicaRequest) request); } else { throw new UnsupportedReplicaRequestException(request.getClass()); } @@ -4007,6 +4015,14 @@ private int tableVersionByTs(HybridTimestamp ts) { return source == null ? null : new BinaryRowUpgrader(schemaRegistry, targetSchemaVersion).upgrade(source); } + private CompletableFuture processVacuumTxStateReplicaRequest(VacuumTxStateReplicaRequest request) { + VacuumTxStatesCommand cmd = TX_MESSAGES_FACTORY.vacuumTxStatesCommand() + .txIds(request.transactionIds()) + .build(); + + return raftClient.run(cmd); + } + /** * Operation unique identifier. */ diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java index ecbe0137a52e..50571902fe1b 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java @@ -26,52 +26,57 @@ import java.util.UUID; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; -import java.util.function.Function; +import java.util.concurrent.atomic.AtomicInteger; import org.apache.ignite.internal.lang.IgniteBiTuple; import org.apache.ignite.internal.logger.IgniteLogger; import org.apache.ignite.internal.logger.Loggers; -import org.apache.ignite.internal.raft.service.RaftGroupService; +import org.apache.ignite.internal.replicator.ReplicaService; import org.apache.ignite.internal.replicator.TablePartitionId; import org.apache.ignite.internal.tx.message.TxMessagesFactory; -import org.apache.ignite.internal.tx.message.VacuumTxStatesCommand; +import org.apache.ignite.internal.tx.message.VacuumTxStateReplicaRequest; +import org.apache.ignite.network.ClusterNode; public class PersistentTxStateVacuumizer { private static final IgniteLogger LOG = Loggers.forClass(PersistentTxStateVacuumizer.class); private static final TxMessagesFactory TX_MESSAGES_FACTORY = new TxMessagesFactory(); - private final Function txStateStorageResolver; + private final ReplicaService replicaService; - public PersistentTxStateVacuumizer(Function txStateStorageResolver) { - this.txStateStorageResolver = txStateStorageResolver; + private final ClusterNode localNode; + + public PersistentTxStateVacuumizer( + ReplicaService replicaService, + ClusterNode localNode) { + this.replicaService = replicaService; + this.localNode = localNode; } - public CompletableFuture, Set>> vacuumPersistentTxStates(Map> txIds) { + public CompletableFuture, Integer>> vacuumPersistentTxStates(Map> txIds) { Set successful = ConcurrentHashMap.newKeySet(); - Set unsuccessful = ConcurrentHashMap.newKeySet(); + AtomicInteger unsuccessfulCount = new AtomicInteger(0); List> futures = new ArrayList<>(); - txIds.forEach((commitPartitionId, ids) -> { - RaftGroupService raftClient = txStateStorageResolver.apply(commitPartitionId); - - if (raftClient != null) { - VacuumTxStatesCommand cmd = TX_MESSAGES_FACTORY.vacuumTxStatesCommand().txIds(ids).build(); + txIds.forEach((commitPartitionId, txs) -> { + VacuumTxStateReplicaRequest request = TX_MESSAGES_FACTORY.vacuumTxStateReplicaRequest() + .groupId(commitPartitionId) + .transactionIds(txs) + .build(); - CompletableFuture future = raftClient.run(cmd).whenComplete((v, e) -> { - if (e == null) { - successful.addAll(ids); - } else { - LOG.warn("Failed to vacuum tx states from the persistent storage.", e); + CompletableFuture future = replicaService.invoke(localNode, request).whenComplete((v, e) -> { + if (e == null) { + successful.addAll(txs); + } else { + LOG.warn("Failed to vacuum tx states from the persistent storage.", e); - unsuccessful.addAll(ids); - } - }); + unsuccessfulCount.incrementAndGet(); + } + }); - futures.add(future); - } + futures.add(future); }); return allOf(futures.toArray(new CompletableFuture[0])) - .thenApply(unused -> new IgniteBiTuple<>(successful, unsuccessful)); + .handle((unused, unusedEx) -> new IgniteBiTuple<>(successful, unsuccessfulCount.get())); } } diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/ResourceVacuumManager.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/ResourceVacuumManager.java index 564a87141405..65e77fc3bc04 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/ResourceVacuumManager.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/ResourceVacuumManager.java @@ -50,7 +50,7 @@ public class ResourceVacuumManager implements IgniteComponent { public static final String RESOURCE_VACUUM_INTERVAL_MILLISECONDS_PROPERTY = "RESOURCE_VACUUM_INTERVAL_MILLISECONDS"; private final int resourceVacuumIntervalMilliseconds = IgniteSystemProperties - .getInteger(RESOURCE_VACUUM_INTERVAL_MILLISECONDS_PROPERTY, 30_000); + .getInteger(RESOURCE_VACUUM_INTERVAL_MILLISECONDS_PROPERTY, 1_000); private final FinishedReadOnlyTransactionTracker finishedReadOnlyTransactionTracker; diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/TxManagerImpl.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/TxManagerImpl.java index fcca1fa787ba..bdbbbf94ed5b 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/TxManagerImpl.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/TxManagerImpl.java @@ -131,7 +131,7 @@ public class TxManagerImpl implements TxManager, NetworkMessageHandler { private final TransactionIdGenerator transactionIdGenerator; /** The local state storage. */ - private final VolatileTxStateMetaStorage txStateVolatileStorage = new VolatileTxStateMetaStorage(); + private final VolatileTxStateMetaStorage txStateVolatileStorage; /** Future of a read-only transaction by it {@link TxIdAndTimestamp}. */ private final ConcurrentNavigableMap> readOnlyTxFutureById = new ConcurrentSkipListMap<>( @@ -179,7 +179,7 @@ public class TxManagerImpl implements TxManager, NetworkMessageHandler { private final MessagingService messagingService; /** Local node network identity. This id is available only after the network has started. */ - private String localNodeId; + private volatile String localNodeId; /** Server cleanup processor. */ private final TxCleanupRequestHandler txCleanupRequestHandler; @@ -199,6 +199,10 @@ public class TxManagerImpl implements TxManager, NetworkMessageHandler { private final TransactionInflights transactionInflights; + private final ReplicaService replicaService; + + private volatile PersistentTxStateVacuumizer persistentTxStateVacuumizer; + /** * Test-only constructor. * @@ -291,6 +295,7 @@ public TxManagerImpl( this.localRwTxCounter = localRwTxCounter; this.partitionOperationsExecutor = partitionOperationsExecutor; this.transactionInflights = transactionInflights; + this.replicaService = replicaService; placementDriverHelper = new PlacementDriverHelper(placementDriver, clockService); @@ -311,6 +316,8 @@ public TxManagerImpl( var writeIntentSwitchProcessor = new WriteIntentSwitchProcessor(placementDriverHelper, txMessageSender, topologyService); + txStateVolatileStorage = new VolatileTxStateMetaStorage(); + txCleanupRequestHandler = new TxCleanupRequestHandler( messagingService, lockManager, @@ -711,6 +718,8 @@ public int pending() { public CompletableFuture start() { localNodeId = topologyService.localMember().id(); + persistentTxStateVacuumizer = new PersistentTxStateVacuumizer(replicaService, topologyService.localMember()); + messagingService.addMessageHandler(ReplicaMessageGroup.class, this); txStateVolatileStorage.start(); @@ -770,9 +779,14 @@ public CompletableFuture cleanup(String node, UUID txId) { @Override public void vacuum() { + if (persistentTxStateVacuumizer == null) { + return; // Not started yet. + } + long vacuumObservationTimestamp = System.currentTimeMillis(); - txStateVolatileStorage.vacuum(vacuumObservationTimestamp, txConfig.txnResourceTtl().value()); + txStateVolatileStorage.vacuum(vacuumObservationTimestamp, txConfig.txnResourceTtl().value(), + persistentTxStateVacuumizer::vacuumPersistentTxStates); } @Override diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java index a6e71ff2de81..69bf71e93d28 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java @@ -17,16 +17,26 @@ package org.apache.ignite.internal.tx.impl; +import static java.lang.Math.max; +import static java.util.Objects.requireNonNull; import static org.apache.ignite.internal.tx.TxState.PENDING; import static org.apache.ignite.internal.tx.TxState.checkTransitionCorrectness; import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Objects; +import java.util.Set; import java.util.UUID; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Function; +import org.apache.ignite.internal.lang.IgniteBiTuple; import org.apache.ignite.internal.logger.IgniteLogger; import org.apache.ignite.internal.logger.Loggers; +import org.apache.ignite.internal.replicator.TablePartitionId; import org.apache.ignite.internal.tx.TxState; import org.apache.ignite.internal.tx.TxStateMeta; import org.jetbrains.annotations.Nullable; @@ -36,11 +46,10 @@ */ public class VolatileTxStateMetaStorage { private static final IgniteLogger LOG = Loggers.forClass(VolatileTxStateMetaStorage.class); + /** The local map for tx states. */ private ConcurrentHashMap txStateMap; - private final PersistentTxStateVacuumizer persistentTxStateVacuumizer = new PersistentTxStateVacuumizer(null); - /** * Starts the storage. */ @@ -120,7 +129,11 @@ public Collection states() { * @param vacuumObservationTimestamp Timestamp of the vacuum attempt. * @param txnResourceTtl Transactional resource time to live in milliseconds. */ - public void vacuum(long vacuumObservationTimestamp, long txnResourceTtl) { + public void vacuum( + long vacuumObservationTimestamp, + long txnResourceTtl, + Function>, CompletableFuture, Integer>>> beforeVacuum + ) { LOG.info("Vacuum started [vacuumObservationTimestamp={}, txnResourceTtl={}].", vacuumObservationTimestamp, txnResourceTtl); AtomicInteger vacuumizedTxnsCount = new AtomicInteger(0); @@ -128,33 +141,44 @@ public void vacuum(long vacuumObservationTimestamp, long txnResourceTtl) { AtomicInteger alreadyMarkedTxnsCount = new AtomicInteger(0); AtomicInteger skippedFotFurtherProcessingUnfinishedTxnsCount = new AtomicInteger(0); + Map> txIds = new HashMap<>(); + Map timestamps = new HashMap<>(); + txStateMap.forEach((txId, meta) -> { txStateMap.computeIfPresent(txId, (txId0, meta0) -> { if (TxState.isFinalState(meta0.txState())) { Long initialVacuumObservationTimestamp = meta0.initialVacuumObservationTimestamp(); - Long cleanupCompletionTimestamp = meta0.cleanupCompletionTimestamp(); - if (txnResourceTtl == 0) { - vacuumizedTxnsCount.incrementAndGet(); - return null; - } else if (initialVacuumObservationTimestamp == null) { + if (initialVacuumObservationTimestamp == null) { markedAsInitiallyDetectedTxnsCount.incrementAndGet(); - return new TxStateMeta( - meta0.txState(), - meta0.txCoordinatorId(), - meta0.commitPartitionId(), - meta0.commitTimestamp(), - vacuumObservationTimestamp - ); - } else if (initialVacuumObservationTimestamp + txnResourceTtl < vacuumObservationTimestamp) { - vacuumizedTxnsCount.incrementAndGet(); - - persistentTxStateVacuumizer.vacuumPersistentTxStates(null); - - return null; + + return markInitialVacuumObservationTimestamp(meta0, vacuumObservationTimestamp); } else { - alreadyMarkedTxnsCount.incrementAndGet(); - return meta0; + Long cleanupCompletionTimestamp = meta0.cleanupCompletionTimestamp(); + + boolean shouldBeVacuumized = shouldBeVacuumized(requireNonNull(initialVacuumObservationTimestamp), + cleanupCompletionTimestamp, txnResourceTtl, vacuumObservationTimestamp); + + if (shouldBeVacuumized) { + if (meta0.commitPartitionId() == null) { + vacuumizedTxnsCount.incrementAndGet(); + + return null; + } else { + Set ids = txIds.computeIfAbsent(meta0.commitPartitionId(), k -> new HashSet<>()); + ids.add(txId); + + if (cleanupCompletionTimestamp != null) { + timestamps.put(txId, cleanupCompletionTimestamp); + } + + return meta0; + } + } else { + alreadyMarkedTxnsCount.incrementAndGet(); + + return meta0; + } } } else { skippedFotFurtherProcessingUnfinishedTxnsCount.incrementAndGet(); @@ -163,14 +187,62 @@ public void vacuum(long vacuumObservationTimestamp, long txnResourceTtl) { }); }); - LOG.info("Vacuum finished [vacuumObservationTimestamp={}, txnResourceTtl={}, vacuumizedTxnsCount={}," - + " markedAsInitiallyDetectedTxnsCount={}, alreadyMarkedTxnsCount={}, skippedFotFurtherProcessingUnfinishedTxnsCount={}].", - vacuumObservationTimestamp, - txnResourceTtl, - vacuumizedTxnsCount, - markedAsInitiallyDetectedTxnsCount, - alreadyMarkedTxnsCount, - skippedFotFurtherProcessingUnfinishedTxnsCount + beforeVacuum.apply(txIds) + .thenAccept(tuple -> { + Set successful = tuple.get1(); + + for (UUID txId : successful) { + txStateMap.compute(txId, (k, v) -> { + if (v == null) { + return null; + } else { + Long cleanupCompletionTs = timestamps.get(txId); + + return (cleanupCompletionTs != null && Objects.equals(cleanupCompletionTs, v.cleanupCompletionTimestamp())) + ? null + : v; + } + }); + } + + LOG.info("Vacuum finished [vacuumObservationTimestamp={}, txnResourceTtl={}, vacuumizedTxnsCount={}," + + "vacuumizedPersistentTxnStatesCount={}, " + + " markedAsInitiallyDetectedTxnsCount={}, alreadyMarkedTxnsCount={}, " + + "skippedFotFurtherProcessingUnfinishedTxnsCount={}].", + vacuumObservationTimestamp, + txnResourceTtl, + vacuumizedTxnsCount, + successful.size(), + markedAsInitiallyDetectedTxnsCount, + alreadyMarkedTxnsCount, + skippedFotFurtherProcessingUnfinishedTxnsCount + ); + }); + } + + private static TxStateMeta markInitialVacuumObservationTimestamp(TxStateMeta meta, long vacuumObservationTimestamp) { + return new TxStateMeta( + meta.txState(), + meta.txCoordinatorId(), + meta.commitPartitionId(), + meta.commitTimestamp(), + vacuumObservationTimestamp ); } + + private static boolean shouldBeVacuumized( + long initialVacuumObservationTimestamp, + @Nullable Long cleanupCompletionTimestamp, + long txnResourceTtl, + long vacuumObservationTimestamp) { + if (txnResourceTtl == 0) { + return true; + } + + if (cleanupCompletionTimestamp == null) { + return initialVacuumObservationTimestamp + txnResourceTtl < vacuumObservationTimestamp; + } else { + return max(cleanupCompletionTimestamp, initialVacuumObservationTimestamp) + txnResourceTtl < vacuumObservationTimestamp; + } + } } diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/message/TxMessageGroup.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/message/TxMessageGroup.java index 2c5fde4b6b2d..5da098565536 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/message/TxMessageGroup.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/message/TxMessageGroup.java @@ -79,9 +79,14 @@ public class TxMessageGroup { */ public static final short TX_FINISHED_BATCH = 10; + /** + * Message type for {@link VacuumTxStateReplicaRequest}. + */ + public static final short VACUUM_TX_STATE_REPLICA_REQUEST = 11; + /** * Message type for {@link VacuumTxStatesCommand}. */ - public static final short VACUUM_TX_STATE_COMMAND = 11; + public static final short VACUUM_TX_STATE_COMMAND = 12; } diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/message/VacuumTxStateReplicaRequest.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/message/VacuumTxStateReplicaRequest.java new file mode 100644 index 000000000000..a65f8a013d27 --- /dev/null +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/message/VacuumTxStateReplicaRequest.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.tx.message; + +import static org.apache.ignite.internal.tx.message.TxMessageGroup.VACUUM_TX_STATE_REPLICA_REQUEST; + +import java.util.Set; +import java.util.UUID; +import org.apache.ignite.internal.network.annotations.Transferable; +import org.apache.ignite.internal.replicator.message.ReplicaRequest; + +@Transferable(VACUUM_TX_STATE_REPLICA_REQUEST) +public interface VacuumTxStateReplicaRequest extends ReplicaRequest { + Set transactionIds(); +} diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/ThreadAssertingTxStateStorage.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/ThreadAssertingTxStateStorage.java index 3e744c4ba2eb..b30b6d45e716 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/ThreadAssertingTxStateStorage.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/ThreadAssertingTxStateStorage.java @@ -67,10 +67,10 @@ public boolean compareAndSet(UUID txId, @Nullable TxState txStateExpected, TxMet } @Override - public void remove(UUID txId) { + public void remove(UUID txId, long commandIndex, long commandTerm) { assertThreadAllowsToWrite(); - storage.remove(txId); + storage.remove(txId, commandIndex, commandTerm); } @Override diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/TxStateStorage.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/TxStateStorage.java index 546fcc59d041..bafb9de625ef 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/TxStateStorage.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/TxStateStorage.java @@ -74,9 +74,11 @@ public interface TxStateStorage extends ManuallyCloseable { * Removes the tx meta from the storage. * * @param txId Tx id. + * @param commandIndex New value for {@link #lastAppliedIndex()}. + * @param commandTerm New value for {@link #lastAppliedTerm()}. * @throws IgniteInternalException with {@link Transactions#TX_STATE_STORAGE_ERR} error code in case when the operation has failed. */ - void remove(UUID txId); + void remove(UUID txId, long commandIndex, long commandTerm); /** * Creates a cursor to scan all data in the storage. diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/rocksdb/TxStateRocksDbStorage.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/rocksdb/TxStateRocksDbStorage.java index 95f6a1a1b73e..0fc76477a5c8 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/rocksdb/TxStateRocksDbStorage.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/rocksdb/TxStateRocksDbStorage.java @@ -231,12 +231,21 @@ public boolean compareAndSet(UUID txId, @Nullable TxState txStateExpected, TxMet } @Override - public void remove(UUID txId) { + public void remove(UUID txId, long commandIndex, long commandTerm) { busy(() -> { - try { + try (WriteBatch writeBatch = new WriteBatch()) { throwExceptionIfStorageInProgressOfRebalance(); - sharedStorage.db().delete(txIdToKey(txId)); + writeBatch.delete(txIdToKey(txId)); + + // If the store is in the process of rebalancing, then there is no need to update lastAppliedIndex and lastAppliedTerm. + // This is necessary to prevent a situation where, in the middle of the rebalance, the node will be restarted and we will + // have non-consistent storage. They will be updated by either #abortRebalance() or #finishRebalance(long, long). + if (state.get() != StorageState.REBALANCE) { + updateLastApplied(writeBatch, commandIndex, commandTerm); + } + + sharedStorage.db().write(sharedStorage.writeOptions, writeBatch); return null; } catch (RocksDBException e) { diff --git a/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/storage/state/AbstractTxStateStorageTest.java b/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/storage/state/AbstractTxStateStorageTest.java index e694c72e3dbf..4ab059e5666e 100644 --- a/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/storage/state/AbstractTxStateStorageTest.java +++ b/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/storage/state/AbstractTxStateStorageTest.java @@ -102,7 +102,7 @@ public void testPutGetRemove() { for (int i = 0; i < 100; i++) { if (i % 2 == 0) { - storage.remove(txIds.get(i)); + storage.remove(txIds.get(i), i, 1); } } @@ -469,7 +469,7 @@ private static void checkTxStateStorageMethodsWhenRebalanceInProgress(TxStateSto assertThrowsIgniteInternalException(TX_STATE_STORAGE_REBALANCE_ERR, () -> storage.lastApplied(100, 500)); assertThrowsIgniteInternalException(TX_STATE_STORAGE_REBALANCE_ERR, () -> storage.get(UUID.randomUUID())); - assertThrowsIgniteInternalException(TX_STATE_STORAGE_REBALANCE_ERR, () -> storage.remove(UUID.randomUUID())); + assertThrowsIgniteInternalException(TX_STATE_STORAGE_REBALANCE_ERR, () -> storage.remove(UUID.randomUUID(), 1, 1)); assertThrowsIgniteInternalException(TX_STATE_STORAGE_REBALANCE_ERR, storage::scan); } diff --git a/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/storage/state/test/TestTxStateStorage.java b/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/storage/state/test/TestTxStateStorage.java index 3d17c4138cfc..99484ed3dba6 100644 --- a/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/storage/state/test/TestTxStateStorage.java +++ b/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/storage/state/test/TestTxStateStorage.java @@ -108,10 +108,15 @@ public boolean compareAndSet(UUID txId, @Nullable TxState txStateExpected, TxMet } @Override - public void remove(UUID txId) { + public void remove(UUID txId, long commandIndex, long commandTerm) { checkStorageClosedOrInProgressOfRebalance(); storage.remove(txId); + + if (rebalanceFutureReference.get() == null) { + lastAppliedIndex = commandIndex; + lastAppliedTerm = commandTerm; + } } @Override From da4c1fe0e204334c90aaff43a976f9aac6852283 Mon Sep 17 00:00:00 2001 From: denis-chudov Date: Mon, 15 Apr 2024 20:46:04 +0300 Subject: [PATCH 05/26] added tests --- .../apache/ignite/internal/TestWrappers.java | 11 + .../table/ItTransactionPrimaryChangeTest.java | 62 +-- .../table/ItTransactionRecoveryTest.java | 17 +- .../table/ItTransactionTestUtils.java | 147 +++++ .../table/ItTxResourcesVacuumTest.java | 504 ++++++++++++++++++ .../internal/table/RecordBinaryViewImpl.java | 13 + 6 files changed, 678 insertions(+), 76 deletions(-) create mode 100644 modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionTestUtils.java create mode 100644 modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java diff --git a/modules/runner/src/testFixtures/java/org/apache/ignite/internal/TestWrappers.java b/modules/runner/src/testFixtures/java/org/apache/ignite/internal/TestWrappers.java index 259a1bed36c9..d232afcb6645 100644 --- a/modules/runner/src/testFixtures/java/org/apache/ignite/internal/TestWrappers.java +++ b/modules/runner/src/testFixtures/java/org/apache/ignite/internal/TestWrappers.java @@ -18,11 +18,13 @@ package org.apache.ignite.internal; import org.apache.ignite.internal.table.IgniteTablesInternal; +import org.apache.ignite.internal.table.RecordBinaryViewImpl; import org.apache.ignite.internal.table.TableImpl; import org.apache.ignite.internal.table.TableViewInternal; import org.apache.ignite.internal.table.distributed.TableManager; import org.apache.ignite.internal.tx.impl.IgniteTransactionsImpl; import org.apache.ignite.internal.wrapper.Wrappers; +import org.apache.ignite.table.RecordView; import org.apache.ignite.table.Table; import org.apache.ignite.table.manager.IgniteTables; import org.apache.ignite.tx.IgniteTransactions; @@ -99,4 +101,13 @@ public static IgniteTransactionsImpl unwrapIgniteTransactionsImpl(IgniteTransact public static Transaction unwrapIgniteTransaction(Transaction tx) { return Wrappers.unwrap(tx, Transaction.class); } + + /** + * Unwraps {@link RecordBinaryViewImpl} from a {@link RecordView}. + * + * @param view View to unwrap. + */ + public static RecordBinaryViewImpl unwrapRecordBinaryViewImpl(RecordView view) { + return Wrappers.unwrap(view, RecordBinaryViewImpl.class); + } } diff --git a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionPrimaryChangeTest.java b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionPrimaryChangeTest.java index b933d050ae04..3700c840edd1 100644 --- a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionPrimaryChangeTest.java +++ b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionPrimaryChangeTest.java @@ -17,31 +17,23 @@ package org.apache.ignite.internal.table; -import static java.util.concurrent.TimeUnit.SECONDS; import static org.apache.ignite.internal.SessionUtils.executeUpdate; import static org.apache.ignite.internal.TestWrappers.unwrapIgniteTransaction; import static org.apache.ignite.internal.TestWrappers.unwrapTableImpl; -import static org.apache.ignite.internal.testframework.IgniteTestUtils.waitForCondition; +import static org.apache.ignite.internal.table.ItTransactionTestUtils.waitAndGetPrimaryReplica; import static org.apache.ignite.internal.testframework.matchers.CompletableFutureMatcher.willCompleteSuccessfully; import static org.hamcrest.MatcherAssert.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; -import java.util.UUID; import java.util.concurrent.CompletableFuture; import java.util.function.Predicate; import java.util.stream.IntStream; import org.apache.ignite.InitParametersBuilder; import org.apache.ignite.internal.ClusterPerTestIntegrationTest; import org.apache.ignite.internal.app.IgniteImpl; -import org.apache.ignite.internal.placementdriver.ReplicaMeta; -import org.apache.ignite.internal.replicator.ReplicationGroupId; import org.apache.ignite.internal.replicator.TablePartitionId; import org.apache.ignite.internal.table.distributed.command.UpdateCommand; -import org.apache.ignite.internal.testframework.SystemPropertiesExtension; -import org.apache.ignite.internal.testframework.WithSystemProperty; import org.apache.ignite.internal.tx.impl.ReadWriteTransactionImpl; -import org.apache.ignite.internal.tx.storage.state.TxStateStorage; import org.apache.ignite.raft.jraft.rpc.WriteActionRequest; import org.apache.ignite.table.RecordView; import org.apache.ignite.table.Tuple; @@ -49,13 +41,10 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInfo; -import org.junit.jupiter.api.extension.ExtendWith; /** * Integration tests for the transactions running while the primary changes, not related to the tx recovery. */ -@ExtendWith(SystemPropertiesExtension.class) -@WithSystemProperty(key = "RESOURCE_VACUUM_INTERVAL_MILLISECONDS_PROPERTY", value = "1000") public class ItTransactionPrimaryChangeTest extends ClusterPerTestIntegrationTest { /** Table name. */ private static final String TABLE_NAME = "test_table"; @@ -202,42 +191,6 @@ public void testFullTxConsistency() throws InterruptedException { assertEquals("2", fullTxFut.join().value("val")); } - @Test - public void testVacuum() throws InterruptedException { - TableImpl tbl = unwrapTableImpl(node(0).tables().table(TABLE_NAME)); - - int partId = 0; - - var tblReplicationGrp = new TablePartitionId(tbl.tableId(), partId); - - String leaseholder = waitAndGetPrimaryReplica(node(0), tblReplicationGrp).getLeaseholder(); - - IgniteImpl firstLeaseholderNode = findNodeByName(leaseholder); - - log.info("Test: Full transaction will be executed on [node={}].", firstLeaseholderNode.name()); - - IgniteImpl txCrdNode = findNode(0, initialNodes(), n -> !leaseholder.equals(n.name())); - - log.info("Test: Transaction coordinator is [node={}].", txCrdNode.name()); - - RecordView view = txCrdNode.tables().table(TABLE_NAME).recordView(); - - // Put some value into the table. - Transaction txPreload = txCrdNode.transactions().begin(); - UUID txId = ((ReadWriteTransactionImpl) unwrapIgniteTransaction(txPreload)).id(); - log.info("Test: Preloading the data [tx={}].", txId); - view.upsert(txPreload, Tuple.create().set("key", 1).set("val", "1")); - txPreload.commit(); - - TxStateStorage txStateStorage = tbl.internalTable().txStateStorage().getTxStateStorage(partId); - - assertTrue(txStateStorage.get(txId) != null); - - assertTrue(waitForCondition(() -> { - return txStateStorage.get(txId) == null; - }, 10_000)); - } - private IgniteImpl findNode(int startRange, int endRange, Predicate filter) { return IntStream.range(startRange, endRange) .mapToObj(this::node) @@ -249,17 +202,4 @@ private IgniteImpl findNode(int startRange, int endRange, Predicate private IgniteImpl findNodeByName(String leaseholder) { return findNode(0, initialNodes(), n -> leaseholder.equals(n.name())); } - - private static ReplicaMeta waitAndGetPrimaryReplica(IgniteImpl node, ReplicationGroupId tblReplicationGrp) { - CompletableFuture primaryReplicaFut = node.placementDriver().awaitPrimaryReplica( - tblReplicationGrp, - node.clock().now(), - 10, - SECONDS - ); - - assertThat(primaryReplicaFut, willCompleteSuccessfully()); - - return primaryReplicaFut.join(); - } } diff --git a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionRecoveryTest.java b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionRecoveryTest.java index 3010ee9a5562..8eef2913a522 100644 --- a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionRecoveryTest.java +++ b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionRecoveryTest.java @@ -20,6 +20,7 @@ import static java.util.concurrent.TimeUnit.SECONDS; import static org.apache.ignite.internal.SessionUtils.executeUpdate; import static org.apache.ignite.internal.TestWrappers.unwrapTableImpl; +import static org.apache.ignite.internal.table.ItTransactionTestUtils.waitAndGetPrimaryReplica; import static org.apache.ignite.internal.testframework.IgniteTestUtils.waitForCondition; import static org.apache.ignite.internal.testframework.matchers.CompletableFutureExceptionMatcher.willThrow; import static org.apache.ignite.internal.testframework.matchers.CompletableFutureMatcher.willCompleteSuccessfully; @@ -54,7 +55,6 @@ import org.apache.ignite.internal.network.ClusterService; import org.apache.ignite.internal.network.DefaultMessagingService; import org.apache.ignite.internal.network.NetworkMessage; -import org.apache.ignite.internal.placementdriver.ReplicaMeta; import org.apache.ignite.internal.placementdriver.message.PlacementDriverMessagesFactory; import org.apache.ignite.internal.placementdriver.message.StopLeaseProlongationMessage; import org.apache.ignite.internal.replicator.ReplicationGroupId; @@ -113,7 +113,7 @@ public class ItTransactionRecoveryTest extends ClusterPerTestIntegrationTest { public void setup(TestInfo testInfo) throws Exception { super.setup(testInfo); - String zoneSql = "create zone test_zone with partitions=1, replicas=3"; + String zoneSql = "create zone test_zone with partitions=1, replicas=2"; String sql = "create table " + TABLE_NAME + " (key int primary key, val varchar(20)) with primary_zone='TEST_ZONE'"; cluster.doInSession(0, session -> { @@ -1146,19 +1146,6 @@ private IgniteImpl nonPrimaryNode(String leaseholder) { return findNode(1, initialNodes(), n -> !leaseholder.equals(n.name())); } - private static ReplicaMeta waitAndGetPrimaryReplica(IgniteImpl node, ReplicationGroupId tblReplicationGrp) { - CompletableFuture primaryReplicaFut = node.placementDriver().awaitPrimaryReplica( - tblReplicationGrp, - node.clock().now(), - 10, - SECONDS - ); - - assertThat(primaryReplicaFut, willCompleteSuccessfully()); - - return primaryReplicaFut.join(); - } - private static String waitAndGetLeaseholder(IgniteImpl node, ReplicationGroupId tblReplicationGrp) { return waitAndGetPrimaryReplica(node, tblReplicationGrp).getLeaseholder(); } diff --git a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionTestUtils.java b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionTestUtils.java new file mode 100644 index 000000000000..6e8c260cfb19 --- /dev/null +++ b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionTestUtils.java @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.table; + +import static java.util.Objects.requireNonNull; +import static java.util.concurrent.TimeUnit.SECONDS; +import static java.util.stream.Collectors.toSet; +import static org.apache.ignite.internal.TestWrappers.unwrapIgniteTransaction; +import static org.apache.ignite.internal.TestWrappers.unwrapRecordBinaryViewImpl; +import static org.apache.ignite.internal.TestWrappers.unwrapTableImpl; +import static org.apache.ignite.internal.distributionzones.rebalance.RebalanceUtil.stablePartAssignmentsKey; +import static org.apache.ignite.internal.testframework.matchers.CompletableFutureMatcher.willCompleteSuccessfully; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.CompletableFuture; +import java.util.function.Function; +import org.apache.ignite.internal.affinity.Assignment; +import org.apache.ignite.internal.affinity.Assignments; +import org.apache.ignite.internal.app.IgniteImpl; +import org.apache.ignite.internal.lang.ByteArray; +import org.apache.ignite.internal.metastorage.Entry; +import org.apache.ignite.internal.metastorage.MetaStorageManager; +import org.apache.ignite.internal.placementdriver.ReplicaMeta; +import org.apache.ignite.internal.replicator.ReplicationGroupId; +import org.apache.ignite.internal.replicator.TablePartitionId; +import org.apache.ignite.internal.schema.BinaryRowEx; +import org.apache.ignite.internal.tx.impl.ReadWriteTransactionImpl; +import org.apache.ignite.table.Tuple; +import org.apache.ignite.tx.Transaction; +import org.jetbrains.annotations.Nullable; + +public class ItTransactionTestUtils { + public static Set partitionAssignment(IgniteImpl node, TablePartitionId grpId) { + MetaStorageManager metaStorageManager = node.metaStorageManager(); + + ByteArray stableAssignmentKey = stablePartAssignmentsKey(grpId); + + CompletableFuture assignmentEntryFut = metaStorageManager.get(stableAssignmentKey); + + assertThat(assignmentEntryFut, willCompleteSuccessfully()); + + Entry e = assignmentEntryFut.join(); + + assertNotNull(e); + assertFalse(e.empty()); + assertFalse(e.tombstone()); + + Set a = requireNonNull(Assignments.fromBytes(e.value())).nodes(); + + return a.stream().filter(Assignment::isPeer).map(Assignment::consistentId).collect(toSet()); + } + + public static int partitionIdForTuple(IgniteImpl node, String tableName, Tuple tuple, @Nullable Transaction tx) { + TableImpl table = table(node, tableName); + RecordBinaryViewImpl view = unwrapRecordBinaryViewImpl(table.recordView()); + + CompletableFuture rowFut = view.marshal(tx, tuple); + assertThat(rowFut, willCompleteSuccessfully()); + BinaryRowEx row = rowFut.join(); + + return table.internalTable().partitionId(row); + } + + public static Tuple findTupleToBeHostedOnNode( + IgniteImpl node, + String tableName, + @Nullable Transaction tx, + Tuple initialTuple, + Function nextTuple, + boolean primary + ) { + Tuple t = initialTuple; + int tableId = tableId(node, tableName); + + int maxAttempts = 100; + + while (maxAttempts >= 0) { + int partId = partitionIdForTuple(node, tableName, t, tx); + + TablePartitionId grpId = new TablePartitionId(tableId, partId); + + if (primary) { + ReplicaMeta replicaMeta = waitAndGetPrimaryReplica(node, grpId); + + if (node.id().equals(replicaMeta.getLeaseholderId())) { + return t; + } + } else { + Set assignments = partitionAssignment(node, grpId); + + if (assignments.contains(node.name())) { + return t; + } + } + + t = nextTuple.apply(t); + + maxAttempts--; + } + + throw new AssertionError("Failed to find a suitable tuple."); + } + + public static TableImpl table(IgniteImpl node, String tableName) { + return unwrapTableImpl(node.tables().table(tableName)); + } + + public static int tableId(IgniteImpl node, String tableName) { + return table(node, tableName).tableId(); + } + + public static UUID txId(Transaction tx) { + return ((ReadWriteTransactionImpl) unwrapIgniteTransaction(tx)).id(); + } + + public static ReplicaMeta waitAndGetPrimaryReplica(IgniteImpl node, ReplicationGroupId tblReplicationGrp) { + CompletableFuture primaryReplicaFut = node.placementDriver().awaitPrimaryReplica( + tblReplicationGrp, + node.clock().now(), + 10, + SECONDS + ); + + assertThat(primaryReplicaFut, willCompleteSuccessfully()); + + return primaryReplicaFut.join(); + } +} diff --git a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java new file mode 100644 index 000000000000..8e3de7a44a5a --- /dev/null +++ b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java @@ -0,0 +1,504 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.table; + +import static java.util.stream.Collectors.toSet; +import static org.apache.ignite.internal.SessionUtils.executeUpdate; +import static org.apache.ignite.internal.table.ItTransactionTestUtils.findTupleToBeHostedOnNode; +import static org.apache.ignite.internal.table.ItTransactionTestUtils.partitionAssignment; +import static org.apache.ignite.internal.table.ItTransactionTestUtils.partitionIdForTuple; +import static org.apache.ignite.internal.table.ItTransactionTestUtils.table; +import static org.apache.ignite.internal.table.ItTransactionTestUtils.tableId; +import static org.apache.ignite.internal.table.ItTransactionTestUtils.txId; +import static org.apache.ignite.internal.table.ItTransactionTestUtils.waitAndGetPrimaryReplica; +import static org.apache.ignite.internal.testframework.IgniteTestUtils.waitForCondition; +import static org.apache.ignite.internal.testframework.matchers.CompletableFutureMatcher.willCompleteSuccessfully; +import static org.apache.ignite.internal.tx.TxState.COMMITTED; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.Iterator; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.CompletableFuture; +import java.util.function.Function; +import java.util.function.Predicate; +import java.util.stream.IntStream; +import org.apache.ignite.InitParametersBuilder; +import org.apache.ignite.internal.ClusterPerTestIntegrationTest; +import org.apache.ignite.internal.app.IgniteImpl; +import org.apache.ignite.internal.placementdriver.ReplicaMeta; +import org.apache.ignite.internal.replicator.TablePartitionId; +import org.apache.ignite.internal.testframework.SystemPropertiesExtension; +import org.apache.ignite.internal.testframework.WithSystemProperty; +import org.apache.ignite.internal.tx.TransactionMeta; +import org.apache.ignite.internal.tx.impl.TxManagerImpl; +import org.apache.ignite.internal.tx.message.TxCleanupMessage; +import org.apache.ignite.internal.tx.storage.state.TxStateStorage; +import org.apache.ignite.table.RecordView; +import org.apache.ignite.table.Tuple; +import org.apache.ignite.tx.Transaction; +import org.jetbrains.annotations.Nullable; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInfo; +import org.junit.jupiter.api.extension.ExtendWith; + +@ExtendWith(SystemPropertiesExtension.class) +@WithSystemProperty(key = "RESOURCE_VACUUM_INTERVAL_MILLISECONDS_PROPERTY", value = "1000") +public class ItTxResourcesVacuumTest extends ClusterPerTestIntegrationTest { + /** Table name. */ + private static final String TABLE_NAME = "test_table"; + + private static final Tuple INITIAL_TUPLE = Tuple.create().set("key", 1L).set("val", "1"); + + private static final Function NEXT_TUPLE = t -> Tuple.create() + .set("key", t.longValue("key") + 1) + .set("val", "" + (t.longValue("key") + 1)); + + private static final int REPLICAS = 2; + + /** Nodes bootstrap configuration pattern. */ + private static final String NODE_BOOTSTRAP_CFG_TEMPLATE = "{\n" + + " network: {\n" + + " port: {},\n" + + " nodeFinder: {\n" + + " netClusterNodes: [ {} ]\n" + + " }\n" + + " },\n" + + " clientConnector: { port:{} },\n" + + " rest.port: {},\n" + + " raft: { responseTimeout: 30000 }," + + " compute.threadPoolSize: 1\n" + + "}"; + + @BeforeEach + @Override + public void setup(TestInfo testInfo) throws Exception { + super.setup(testInfo); + + String zoneSql = "create zone test_zone with partitions=5, replicas=" + REPLICAS; + String sql = "create table " + TABLE_NAME + " (key bigint primary key, val varchar(20)) with primary_zone='TEST_ZONE'"; + + cluster.doInSession(0, session -> { + executeUpdate(zoneSql, session); + executeUpdate(sql, session); + }); + } + + @Override + protected void customizeInitParameters(InitParametersBuilder builder) { + super.customizeInitParameters(builder); + + builder.clusterConfiguration("{" + + " transaction: {" + + " implicitTransactionTimeout: 30000," + + " txnResourceTtl: 2" + + " }," + + " replication: {" + + " rpcTimeout: 30000" + + " }," + + "}"); + } + + @Override + protected int initialNodes() { + return 3; + } + + /** + * Returns node bootstrap config template. + * + * @return Node bootstrap config template. + */ + @Override + protected String getNodeBootstrapConfigTemplate() { + return NODE_BOOTSTRAP_CFG_TEMPLATE; + } + + @Test + public void testVacuum() throws InterruptedException { + IgniteImpl node = anyNode(); + + RecordView view = node.tables().table(TABLE_NAME).recordView(); + + // Put some value into the table. + Transaction tx = node.transactions().begin(); + UUID txId = txId(tx); + + log.info("Test: Loading the data [tx={}].", txId); + + Tuple tuple = findTupleToBeHostedOnNode(node, TABLE_NAME, tx, INITIAL_TUPLE, NEXT_TUPLE, true); + + int partId = partitionIdForTuple(node, TABLE_NAME, tuple, tx); + + Set nodes = partitionAssignment(node, new TablePartitionId(tableId(node, TABLE_NAME), partId)); + + view.upsert(tx, tuple); + + tx.commit(); + + checkVolatileTxStateOnNodes(nodes, txId); + waitForTxStateReplication(nodes, txId, partId, 10_000); + + waitForTxStateVacuum(txId, 0, true, 10_000); + } + + @Test + public void testVacuumWithCleanupDelay() throws InterruptedException { + IgniteImpl node = anyNode(); + + RecordView view = node.tables().table(TABLE_NAME).recordView(); + + // Put some value into the table. + Transaction tx = node.transactions().begin(); + UUID txId = txId(tx); + + log.info("Test: Loading the data [tx={}].", txId); + + Tuple tuple0 = findTupleToBeHostedOnNode(node, TABLE_NAME, tx, INITIAL_TUPLE, NEXT_TUPLE, true); + + int commitPartId = partitionIdForTuple(node, TABLE_NAME, tuple0, tx); + + TablePartitionId commitPartGrpId = new TablePartitionId(tableId(node, TABLE_NAME), commitPartId); + + ReplicaMeta replicaMeta = waitAndGetPrimaryReplica(node, commitPartGrpId); + IgniteImpl commitPartitionLeaseholder = findNode(0, initialNodes(), n -> n.id().equals(replicaMeta.getLeaseholderId())); + + Set commitPartNodes = partitionAssignment(node, new TablePartitionId(tableId(node, TABLE_NAME), commitPartId)); + + log.info("Test: Commit partition [leaseholder={}, hostingNodes={}].", commitPartitionLeaseholder.name(), commitPartNodes); + + // Some node that does not host the commit partition, will be the primary node for upserting another tuple. + IgniteImpl leaseholderForAnotherTuple = findNode(0, initialNodes(), n -> !commitPartNodes.contains(n.name())); + + log.info("Test: leaseholderForAnotherTuple={}", leaseholderForAnotherTuple.name()); + + Tuple tuple1 = findTupleToBeHostedOnNode(leaseholderForAnotherTuple, TABLE_NAME, tx, INITIAL_TUPLE, NEXT_TUPLE, true); + + view.upsert(tx, tuple0); + view.upsert(tx, tuple1); + + CompletableFuture cleanupStarted = new CompletableFuture<>(); + CompletableFuture cleanupAllowed = new CompletableFuture<>(); + + commitPartitionLeaseholder.dropMessages((n, msg) -> { + if (msg instanceof TxCleanupMessage) { + cleanupStarted.complete(null); + + if (commitPartNodes.contains(n)) { + cleanupAllowed.join(); + } + } + + return false; + }); + + CompletableFuture commitFut = tx.commitAsync(); + + checkVolatileTxStateOnNodes(commitPartNodes, txId); + waitForTxStateReplication(commitPartNodes, txId, commitPartId, 10_000); + + assertThat(cleanupStarted, willCompleteSuccessfully()); + + waitForTxStateVacuum(Set.of(leaseholderForAnotherTuple.name()), txId, 0, false, 10_000); + + checkPersistentTxStateOnNodes(commitPartNodes, txId, commitPartId); + + cleanupAllowed.complete(null); + + assertThat(commitFut, willCompleteSuccessfully()); + + waitForTxStateVacuum(txId, 0, true, 10_000); + } + + @Test + public void testCommitPartitionPrimaryChangesBeforeVacuum() throws InterruptedException { + IgniteImpl node = anyNode(); + + RecordView view = node.tables().table(TABLE_NAME).recordView(); + + // Put some value into the table. + Transaction tx = node.transactions().begin(); + UUID txId = txId(tx); + + log.info("Test: Loading the data [tx={}].", txId); + + Tuple tuple = findTupleToBeHostedOnNode(node, TABLE_NAME, tx, INITIAL_TUPLE, NEXT_TUPLE, true); + + int commitPartId = partitionIdForTuple(node, TABLE_NAME, tuple, tx); + + TablePartitionId commitPartGrpId = new TablePartitionId(tableId(node, TABLE_NAME), commitPartId); + + ReplicaMeta replicaMeta = waitAndGetPrimaryReplica(node, commitPartGrpId); + IgniteImpl commitPartitionLeaseholder = findNode(0, initialNodes(), n -> n.id().equals(replicaMeta.getLeaseholderId())); + + Set commitPartNodes = partitionAssignment(node, new TablePartitionId(tableId(node, TABLE_NAME), commitPartId)); + + log.info("Test: Commit partition [leaseholder={}, hostingNodes={}].", commitPartitionLeaseholder.name(), commitPartNodes); + + view.upsert(tx, tuple); + + CompletableFuture cleanupStarted = new CompletableFuture<>(); + CompletableFuture cleanupAllowedFut = new CompletableFuture<>(); + boolean[] cleanupAllowed = new boolean[1]; + + commitPartitionLeaseholder.dropMessages((n, msg) -> { + if (msg instanceof TxCleanupMessage && !cleanupAllowed[0]) { + cleanupStarted.complete(null); + + cleanupAllowedFut.join(); + + return true; + } + + return false; + }); + + CompletableFuture commitFut = tx.commitAsync(); + + assertThat(cleanupStarted, willCompleteSuccessfully()); + + //transferPrimary + + waitAndGetPrimaryReplica(node, commitPartGrpId); + + cleanupAllowedFut.complete(null); + + cleanupAllowed[0] = true; + + assertThat(commitFut, willCompleteSuccessfully()); + + waitForTxStateVacuum(txId, 0, true, 10_000); + } + + @Test + public void testVacuumPersistentStateAfterCleanupDelayAndVolatileStateVacuum() throws InterruptedException { + IgniteImpl node = anyNode(); + + RecordView view = node.tables().table(TABLE_NAME).recordView(); + + // Put some value into the table. + Transaction tx = node.transactions().begin(); + UUID txId = txId(tx); + + log.info("Test: Loading the data [tx={}].", txId); + + Tuple tuple = findTupleToBeHostedOnNode(node, TABLE_NAME, tx, INITIAL_TUPLE, NEXT_TUPLE, true); + + int commitPartId = partitionIdForTuple(node, TABLE_NAME, tuple, tx); + + TablePartitionId commitPartGrpId = new TablePartitionId(tableId(node, TABLE_NAME), commitPartId); + + ReplicaMeta replicaMeta = waitAndGetPrimaryReplica(node, commitPartGrpId); + IgniteImpl commitPartitionLeaseholder = findNode(0, initialNodes(), n -> n.id().equals(replicaMeta.getLeaseholderId())); + + Set commitPartNodes = partitionAssignment(node, new TablePartitionId(tableId(node, TABLE_NAME), commitPartId)); + + log.info("Test: Commit partition [leaseholder={}, hostingNodes={}].", commitPartitionLeaseholder.name(), commitPartNodes); + + view.upsert(tx, tuple); + + CompletableFuture cleanupStarted = new CompletableFuture<>(); + CompletableFuture cleanupAllowedFut = new CompletableFuture<>(); + boolean[] cleanupAllowed = new boolean[1]; + + commitPartitionLeaseholder.dropMessages((n, msg) -> { + if (msg instanceof TxCleanupMessage && !cleanupAllowed[0]) { + cleanupStarted.complete(null); + + cleanupAllowedFut.join(); + + return true; + } + + return false; + }); + + CompletableFuture commitFut = tx.commitAsync(); + + assertThat(cleanupStarted, willCompleteSuccessfully()); + + //transferPrimary + + waitAndGetPrimaryReplica(node, commitPartGrpId); + + cleanupAllowedFut.complete(null); + + cleanupAllowed[0] = true; + + assertThat(commitFut, willCompleteSuccessfully()); + + waitForTxStateVacuum(txId, 0, true, 10_000); + } + + @Test + public void testRecoveryAfterPersistentStateVacuumized() throws InterruptedException { + IgniteImpl coord0 = anyNode(); + + RecordView view0 = coord0.tables().table(TABLE_NAME).recordView(); + + // Put some value into the table. + Transaction tx0 = coord0.transactions().begin(); + UUID txId0 = txId(tx0); + + log.info("Test: Transaction 0 [tx={}].", txId0); + + // Find some node other than coordinator. + IgniteImpl commitPartitionLeaseholder = findNode(0, initialNodes(), n -> !n.name().equals(coord0.name())); + + Tuple tuple0 = findTupleToBeHostedOnNode(commitPartitionLeaseholder, TABLE_NAME, tx0, INITIAL_TUPLE, NEXT_TUPLE, true); + + int commitPartId = partitionIdForTuple(commitPartitionLeaseholder, TABLE_NAME, tuple0, tx0); + + Set nodes = partitionAssignment(coord0, new TablePartitionId(tableId(coord0, TABLE_NAME), commitPartId)); + + view0.upsert(tx0, tuple0); + + CompletableFuture cleanupStarted = new CompletableFuture<>(); + boolean[] cleanupAllowed = new boolean[1]; + + commitPartitionLeaseholder.dropMessages((n, msg) -> { + if (msg instanceof TxCleanupMessage) { + cleanupStarted.complete(null); + + return cleanupAllowed[0]; + } + + return false; + }); + + tx0.commitAsync(); + + assertThat(cleanupStarted, willCompleteSuccessfully()); + + // Check that the final tx state COMMITTED is saved to the persistent tx storage. + assertTrue(waitForCondition(() -> cluster.runningNodes().filter(n -> nodes.contains(n.name())).allMatch(n -> { + TransactionMeta meta = persistentTxState(n, txId0, commitPartId); + + return meta != null && meta.txState() == COMMITTED; + }), 10_000)); + + // Stop the first transaction coordinator. + stopNode(coord0.name()); + + // No cleanup happened, waiting for vacuum on the remaining nodes that participated on tx0. + waitForTxStateVacuum(txId0, commitPartId, true, 10_000); + + // Preparing to run another tx. + IgniteImpl coord1 = anyNode(); + + RecordView view1 = coord1.tables().table(TABLE_NAME).recordView(); + + Transaction tx1 = coord1.transactions().begin(); + UUID txId1 = txId(tx1); + + log.info("Test: Transaction 1 [tx={}].", txId1); + + IgniteImpl anyNodeContainingWriteIntent = findNode(0, initialNodes(), n -> nodes.contains(n.name())); + + Tuple tuple1 = findTupleToBeHostedOnNode(anyNodeContainingWriteIntent, TABLE_NAME, tx1, INITIAL_TUPLE, NEXT_TUPLE, true); + + // Tx 1 should get the data committed by tx 0. + Tuple tx0Data = view1.get(tx1, tuple1); + assertEquals(tuple0.longValue("key"), tx0Data.longValue("key")); + + tx1.commit(); + + waitForTxStateVacuum(txId0, 0, true, 10_000); + waitForTxStateVacuum(txId0, 0, true, 10_000); + } + + private boolean checkVolatileTxStateOnNodes(Set nodeConsistentIds, UUID txId) { + return cluster.runningNodes() + .filter(n -> nodeConsistentIds.contains(n.name())) + .allMatch(n -> volatileTxState(n, txId) != null); + } + + private boolean checkPersistentTxStateOnNodes(Set nodeConsistentIds, UUID txId, int partId) { + return cluster.runningNodes() + .filter(n -> nodeConsistentIds.contains(n.name())) + .allMatch(n -> persistentTxState(n, txId, partId) != null); + } + + private void waitForTxStateReplication(Set nodeConsistentIds, UUID txId, int partId, long timeMs) + throws InterruptedException { + assertTrue(waitForCondition(() -> checkPersistentTxStateOnNodes(nodeConsistentIds, txId, partId), timeMs)); + } + + private void waitForTxStateVacuum(UUID txId, int partId, boolean checkPersistent, long timeMs) throws InterruptedException { + waitForTxStateVacuum(cluster.runningNodes().map(IgniteImpl::name).collect(toSet()), txId, partId, checkPersistent, timeMs); + } + + private void waitForTxStateVacuum(Set nodeConsistentIds, UUID txId, int partId, boolean checkPersistent, long timeMs) throws InterruptedException { + boolean r = waitForCondition(() -> { + boolean result = true; + + for (Iterator iterator = cluster.runningNodes().iterator(); iterator.hasNext();) { + IgniteImpl node = iterator.next(); + + if (!nodeConsistentIds.contains(node.name())) { + continue; + } + + result = result + && volatileTxState(node, txId) == null && (!checkPersistent || persistentTxState(node, txId, partId) == null); + } + + return result; + }, timeMs); + + if (!r) { + cluster.runningNodes().forEach(node -> { + log.info("Test: volatile state [tx={}, node={}, state={}].", txId, node.name(), volatileTxState(node, txId)); + log.info("Test: persistent state [tx={}, node={}, state={}].", txId, node.name(), persistentTxState(node, txId, partId)); + }); + } + + assertTrue(r); + } + + private IgniteImpl anyNode() { + return runningNodes().findFirst().orElseThrow(); + } + + @Nullable + private static TransactionMeta volatileTxState(IgniteImpl node, UUID txId) { + TxManagerImpl txManager = (TxManagerImpl) node.txManager(); + return txManager.stateMeta(txId); + } + + @Nullable + private static TransactionMeta persistentTxState(IgniteImpl node, UUID txId, int partId) { + TxStateStorage txStateStorage = table(node, TABLE_NAME).internalTable().txStateStorage().getTxStateStorage(partId); + + assertNotNull(txStateStorage); + + return txStateStorage.get(txId); + } + + private IgniteImpl findNode(int startRange, int endRange, Predicate filter) { + return IntStream.range(startRange, endRange) + .mapToObj(this::node) + .filter(n -> n != null && filter.test(n)) + .findFirst() + .get(); + } +} diff --git a/modules/table/src/main/java/org/apache/ignite/internal/table/RecordBinaryViewImpl.java b/modules/table/src/main/java/org/apache/ignite/internal/table/RecordBinaryViewImpl.java index 24292c18173e..6eb4f0132f97 100644 --- a/modules/table/src/main/java/org/apache/ignite/internal/table/RecordBinaryViewImpl.java +++ b/modules/table/src/main/java/org/apache/ignite/internal/table/RecordBinaryViewImpl.java @@ -17,6 +17,7 @@ package org.apache.ignite.internal.table; +import static java.util.concurrent.CompletableFuture.completedFuture; import static org.apache.ignite.internal.lang.IgniteExceptionMapperUtil.convertToPublicFuture; import java.util.ArrayList; @@ -46,6 +47,7 @@ import org.apache.ignite.table.Tuple; import org.apache.ignite.tx.Transaction; import org.jetbrains.annotations.Nullable; +import org.jetbrains.annotations.TestOnly; /** * Table view implementation for binary objects. @@ -507,4 +509,15 @@ public CompletableFuture updateAll(int partitionId, Collection rows return doOperation(null, schemaVersion -> this.tbl.updateAll(mapToBinary(rows, schemaVersion, deleted), deleted, partitionId)); } + + @TestOnly + public CompletableFuture marshal(@Nullable Transaction tx, Tuple rec) { + Objects.requireNonNull(rec); + + return doOperation(tx, schemaVersion -> { + Row row = marshal(rec, schemaVersion, false); + + return completedFuture(row); + }); + } } From 2fbaefcffecb2c8cf4273c60cc29c64bdcde1c78 Mon Sep 17 00:00:00 2001 From: denis-chudov Date: Mon, 15 Apr 2024 20:59:57 +0300 Subject: [PATCH 06/26] wip --- .../internal/table/distributed/raft/PartitionListener.java | 2 +- .../internal/tx/message/VacuumTxStateReplicaRequest.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java index 0a65b3d461ce..d68d131a1876 100644 --- a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java +++ b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java @@ -87,7 +87,7 @@ import org.jetbrains.annotations.TestOnly; /** - * Partition cmd handler. + * Partition command handler. */ public class PartitionListener implements RaftGroupListener, BeforeApplyHandler { /** Logger. */ diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/message/VacuumTxStateReplicaRequest.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/message/VacuumTxStateReplicaRequest.java index a65f8a013d27..38b1ae7ec146 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/message/VacuumTxStateReplicaRequest.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/message/VacuumTxStateReplicaRequest.java @@ -22,9 +22,9 @@ import java.util.Set; import java.util.UUID; import org.apache.ignite.internal.network.annotations.Transferable; -import org.apache.ignite.internal.replicator.message.ReplicaRequest; +import org.apache.ignite.internal.replicator.message.PrimaryReplicaRequest; @Transferable(VACUUM_TX_STATE_REPLICA_REQUEST) -public interface VacuumTxStateReplicaRequest extends ReplicaRequest { +public interface VacuumTxStateReplicaRequest extends PrimaryReplicaRequest { Set transactionIds(); } From d75bbf0469722815868e79fb95b518a713e0094c Mon Sep 17 00:00:00 2001 From: denis-chudov Date: Wed, 17 Apr 2024 20:24:16 +0300 Subject: [PATCH 07/26] fixed comments, fixed flaky tests --- .../ItPrimaryReplicaChoiceTest.java | 2 +- .../MultiActorPlacementDriverTest.java | 2 +- .../placementdriver/LeaseUpdater.java | 21 ++- .../ignite/internal/table/NodeUtils.java | 45 +++--- .../table/ItTransactionPrimaryChangeTest.java | 2 +- .../table/ItTransactionTestUtils.java | 62 +++++++- .../table/ItTxResourcesVacuumTest.java | 143 ++++++++++++------ .../distributed/raft/PartitionListener.java | 6 +- .../replicator/PartitionReplicaListener.java | 2 + .../tx/impl/PersistentTxStateVacuumizer.java | 71 +++++++-- .../tx/impl/ResourceVacuumManager.java | 2 +- .../internal/tx/impl/TxManagerImpl.java | 3 +- .../tx/impl/VolatileTxStateMetaStorage.java | 6 +- .../message/VacuumTxStateReplicaRequest.java | 3 + .../tx/message/VacuumTxStatesCommand.java | 3 + .../state/ThreadAssertingTxStateStorage.java | 8 + .../tx/storage/state/TxStateStorage.java | 11 ++ .../state/rocksdb/TxStateRocksDbStorage.java | 95 +++++++----- .../state/AbstractTxStateStorageTest.java | 24 ++- .../state/test/TestTxStateStorage.java | 15 ++ 20 files changed, 395 insertions(+), 131 deletions(-) diff --git a/modules/placement-driver/src/integrationTest/java/org/apache/ignite/internal/placementdriver/ItPrimaryReplicaChoiceTest.java b/modules/placement-driver/src/integrationTest/java/org/apache/ignite/internal/placementdriver/ItPrimaryReplicaChoiceTest.java index 05179d3a36a7..1bcd5210b3a0 100644 --- a/modules/placement-driver/src/integrationTest/java/org/apache/ignite/internal/placementdriver/ItPrimaryReplicaChoiceTest.java +++ b/modules/placement-driver/src/integrationTest/java/org/apache/ignite/internal/placementdriver/ItPrimaryReplicaChoiceTest.java @@ -182,7 +182,7 @@ public void testPrimaryChangeLongHandling() throws Exception { NodeUtils.transferPrimary(nodes, tblReplicationGrp, null); CompletableFuture primaryChangeTask = - IgniteTestUtils.runAsync(() -> NodeUtils.transferPrimary(nodes, tblReplicationGrp, primary)); + IgniteTestUtils.runAsync(() -> NodeUtils.transferPrimary(nodes, tblReplicationGrp, name -> name.equals(primary))); waitingForLeaderCache(tbl, primary); diff --git a/modules/placement-driver/src/integrationTest/java/org/apache/ignite/internal/placementdriver/MultiActorPlacementDriverTest.java b/modules/placement-driver/src/integrationTest/java/org/apache/ignite/internal/placementdriver/MultiActorPlacementDriverTest.java index 03884d148d2b..a01ad18bec23 100644 --- a/modules/placement-driver/src/integrationTest/java/org/apache/ignite/internal/placementdriver/MultiActorPlacementDriverTest.java +++ b/modules/placement-driver/src/integrationTest/java/org/apache/ignite/internal/placementdriver/MultiActorPlacementDriverTest.java @@ -314,7 +314,7 @@ public void testLeaseProlong() throws Exception { } @Test - public void prolongAfterActiveActorChanger() throws Exception { + public void prolongAfterActiveActorChanged() throws Exception { var acceptedNodeRef = new AtomicReference(); leaseGrantHandler = (msg, from, to) -> { diff --git a/modules/placement-driver/src/main/java/org/apache/ignite/internal/placementdriver/LeaseUpdater.java b/modules/placement-driver/src/main/java/org/apache/ignite/internal/placementdriver/LeaseUpdater.java index bc85ce3b8ff1..3ed175c099c7 100644 --- a/modules/placement-driver/src/main/java/org/apache/ignite/internal/placementdriver/LeaseUpdater.java +++ b/modules/placement-driver/src/main/java/org/apache/ignite/internal/placementdriver/LeaseUpdater.java @@ -17,6 +17,7 @@ package org.apache.ignite.internal.placementdriver; +import static java.util.Objects.hash; import static org.apache.ignite.internal.metastorage.dsl.Conditions.notExists; import static org.apache.ignite.internal.metastorage.dsl.Conditions.or; import static org.apache.ignite.internal.metastorage.dsl.Conditions.value; @@ -247,10 +248,15 @@ private CompletableFuture denyLease(ReplicationGroupId grpId, Lease lea * Finds a node that can be the leaseholder. * * @param assignments Replication group assignment. + * @param grpId Group id. * @param proposedConsistentId Proposed consistent id, found out of a lease negotiation. The parameter might be {@code null}. * @return Cluster node, or {@code null} if no node in assignments can be the leaseholder. */ - private @Nullable ClusterNode nextLeaseHolder(Set assignments, @Nullable String proposedConsistentId) { + private @Nullable ClusterNode nextLeaseHolder( + Set assignments, + ReplicationGroupId grpId, + @Nullable String proposedConsistentId + ) { // TODO: IGNITE-18879 Implement more intellectual algorithm to choose a node. ClusterNode primaryCandidate = null; @@ -267,8 +273,15 @@ private CompletableFuture denyLease(ReplicationGroupId grpId, Lease lea primaryCandidate = candidateNode; break; - } else if (primaryCandidate == null || primaryCandidate.name().hashCode() > assignment.consistentId().hashCode()) { + } else if (primaryCandidate == null) { primaryCandidate = candidateNode; + } else { + int candidateHash = hash(primaryCandidate.name(), grpId); + int assignmentHash = hash(assignment.consistentId(), grpId); + + if (candidateHash > assignmentHash) { + primaryCandidate = candidateNode; + } } } @@ -360,7 +373,7 @@ private void updateLeaseBatchInternal() { continue; } else if (agreement.isDeclined()) { // Here we initiate negotiations for UNDEFINED_AGREEMENT and retry them on newly started active actor as well. - ClusterNode candidate = nextLeaseHolder(assignments, agreement.getRedirectTo()); + ClusterNode candidate = nextLeaseHolder(assignments, grpId, agreement.getRedirectTo()); if (candidate == null) { leaseUpdateStatistics.onLeaseWithoutCandidate(); @@ -385,7 +398,7 @@ private void updateLeaseBatchInternal() { ? lease.getLeaseholder() : lease.proposedCandidate(); - ClusterNode candidate = nextLeaseHolder(assignments, proposedLeaseholder); + ClusterNode candidate = nextLeaseHolder(assignments, grpId, proposedLeaseholder); if (candidate == null) { leaseUpdateStatistics.onLeaseWithoutCandidate(); diff --git a/modules/runner/src/testFixtures/java/org/apache/ignite/internal/table/NodeUtils.java b/modules/runner/src/testFixtures/java/org/apache/ignite/internal/table/NodeUtils.java index cf2a29c5b9b7..2f9596972890 100644 --- a/modules/runner/src/testFixtures/java/org/apache/ignite/internal/table/NodeUtils.java +++ b/modules/runner/src/testFixtures/java/org/apache/ignite/internal/table/NodeUtils.java @@ -25,6 +25,7 @@ import java.util.Collection; import java.util.concurrent.CompletableFuture; +import java.util.function.Predicate; import org.apache.ignite.internal.app.IgniteImpl; import org.apache.ignite.internal.logger.IgniteLogger; import org.apache.ignite.internal.logger.Loggers; @@ -49,16 +50,16 @@ public class NodeUtils { * * @param nodes Nodes collection. * @param groupId Group id. - * @param preferablePrimary Primary replica name which is preferred for being primary or {@code null}. + * @param preferablePrimaryFilter Primary replica preferable nodes filter, accepts the node consistent id. * @return New primary replica name. * @throws InterruptedException If failed. */ public static String transferPrimary( Collection nodes, ReplicationGroupId groupId, - @Nullable String preferablePrimary + @Nullable Predicate preferablePrimaryFilter ) throws InterruptedException { - LOG.info("Moving the primary replica [preferablePrimary=" + preferablePrimary + "]."); + LOG.info("Moving the primary replica [groupId={}].", groupId); IgniteImpl node = nodes.stream().findAny().orElseThrow(); @@ -68,33 +69,41 @@ public static String transferPrimary( .filter(n -> n.id().equals(currentLeaseholder.getLeaseholderId())) .findFirst().orElseThrow(); - if (preferablePrimary == null) { - preferablePrimary = nodes.stream() - .map(IgniteImpl::name) - .filter(n -> !n.equals(currentLeaseholder.getLeaseholder())) - .findFirst() - .orElseThrow(); - } + Predicate filter = preferablePrimaryFilter == null ? name -> true : preferablePrimaryFilter::test; + + String finalPreferablePrimary = nodes.stream() + .map(IgniteImpl::name) + .filter(n -> !n.equals(currentLeaseholder.getLeaseholder()) && filter.test(n)) + .findFirst() + .orElseThrow(); - String finalPreferablePrimary = preferablePrimary; + LOG.info("Moving the primary replica [groupId={}, currentLeaseholder={}, preferablePrimary={}].", groupId, leaseholderNode.name(), + finalPreferablePrimary); StopLeaseProlongationMessage msg = PLACEMENT_DRIVER_MESSAGES_FACTORY.stopLeaseProlongationMessage() .groupId(groupId) - .redirectProposal(preferablePrimary) + .redirectProposal(finalPreferablePrimary) .build(); nodes.forEach( n -> leaseholderNode.clusterService().messagingService().send(n.clusterService().topologyService().localMember(), msg) ); - assertTrue(waitForCondition(() -> { - ReplicaMeta newPrimaryReplica = leaseholder(node, groupId); + ReplicaMeta[] newPrimaryReplica = new ReplicaMeta[1];; + + boolean success = waitForCondition(() -> { + newPrimaryReplica[0] = leaseholder(node, groupId); - return newPrimaryReplica.getLeaseholder().equals(finalPreferablePrimary); - }, 10_000)); + return newPrimaryReplica[0].getLeaseholder().equals(finalPreferablePrimary); + }, 10_000); + + if (success) { + LOG.info("Primary replica moved successfully from [{}] to [{}].", currentLeaseholder.getLeaseholder(), finalPreferablePrimary); + } else { + LOG.info("Moving the primary replica failed [groupId={}, actualPrimary={}].", groupId, newPrimaryReplica[0]); + } - LOG.info("Primary replica moved successfully from [{}] to [{}].", - currentLeaseholder.getLeaseholder(), finalPreferablePrimary); + assertTrue(success); return finalPreferablePrimary; } diff --git a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionPrimaryChangeTest.java b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionPrimaryChangeTest.java index bd38505025b2..07d5c7f17cbb 100644 --- a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionPrimaryChangeTest.java +++ b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionPrimaryChangeTest.java @@ -163,7 +163,7 @@ public void testFullTxConsistency() throws InterruptedException { assertThat(fullTxReplicationAttemptFuture, willCompleteSuccessfully()); // Changing the primary. - NodeUtils.transferPrimary(cluster.runningNodes().collect(toList()), tblReplicationGrp, txCrdNode.name()); + NodeUtils.transferPrimary(cluster.runningNodes().collect(toList()), tblReplicationGrp, name -> name.equals(txCrdNode.name())); // Start a regular transaction that increments the value. It should see the initially inserted value and its commit should // succeed. diff --git a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionTestUtils.java b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionTestUtils.java index 6e8c260cfb19..ed593c4c2b73 100644 --- a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionTestUtils.java +++ b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionTestUtils.java @@ -48,7 +48,17 @@ import org.apache.ignite.tx.Transaction; import org.jetbrains.annotations.Nullable; +/** + * Test utils for transaction integration tests. + */ public class ItTransactionTestUtils { + /** + * Get the names of the nodes that are assignments of the given partition. + * + * @param node Any node in the cluster. + * @param grpId Group id. + * @return Node names. + */ public static Set partitionAssignment(IgniteImpl node, TablePartitionId grpId) { MetaStorageManager metaStorageManager = node.metaStorageManager(); @@ -69,6 +79,15 @@ public static Set partitionAssignment(IgniteImpl node, TablePartitionId return a.stream().filter(Assignment::isPeer).map(Assignment::consistentId).collect(toSet()); } + /** + * Calculate the partition id on which the given tuple would be placed. + * + * @param node Any node in the cluster. + * @param tableName Table name. + * @param tuple Data tuple. + * @param tx Transaction, if present. + * @return Partition id. + */ public static int partitionIdForTuple(IgniteImpl node, String tableName, Tuple tuple, @Nullable Transaction tx) { TableImpl table = table(node, tableName); RecordBinaryViewImpl view = unwrapRecordBinaryViewImpl(table.recordView()); @@ -80,6 +99,17 @@ public static int partitionIdForTuple(IgniteImpl node, String tableName, Tuple t return table.internalTable().partitionId(row); } + /** + * Generates some tuple that would be placed in the partition that is hosted on the given node in the cluster. + * + * @param node Node that should host the result tuple. + * @param tableName Table name. + * @param tx Transaction, if present. + * @param initialTuple Initial tuple, for calculation. + * @param nextTuple This function will be used to generate new tuples in order to find suitable one. + * @param primary Whether the given node should be the primary node. + * @return Tuple that would be placed on the given node. + */ public static Tuple findTupleToBeHostedOnNode( IgniteImpl node, String tableName, @@ -101,6 +131,7 @@ public static Tuple findTupleToBeHostedOnNode( if (primary) { ReplicaMeta replicaMeta = waitAndGetPrimaryReplica(node, grpId); + System.out.println("qqq partId=" + partId + ", primary=" + replicaMeta.getLeaseholder()); if (node.id().equals(replicaMeta.getLeaseholderId())) { return t; } @@ -120,21 +151,48 @@ public static Tuple findTupleToBeHostedOnNode( throw new AssertionError("Failed to find a suitable tuple."); } + /** + * Returns table instance. + * + * @param node Ignite node. + * @param tableName Table name. + * @return Table instance. + */ public static TableImpl table(IgniteImpl node, String tableName) { return unwrapTableImpl(node.tables().table(tableName)); } + /** + * Returns the table id. + * + * @param node Any node in the cluster. + * @param tableName Table name. + * @return Table id. + */ public static int tableId(IgniteImpl node, String tableName) { return table(node, tableName).tableId(); } + /** + * Transaction id. + * + * @param tx Transaction. + * @return Transaction id. + */ public static UUID txId(Transaction tx) { return ((ReadWriteTransactionImpl) unwrapIgniteTransaction(tx)).id(); } - public static ReplicaMeta waitAndGetPrimaryReplica(IgniteImpl node, ReplicationGroupId tblReplicationGrp) { + /** + * Waits for the primary replica appearance for the given replication group and returns it. + * + * @param node Any node in the cluster. + * @param replicationGrpId Replication group. + * @return Primary replica meta. + */ + public static ReplicaMeta waitAndGetPrimaryReplica(IgniteImpl node, ReplicationGroupId replicationGrpId) { CompletableFuture primaryReplicaFut = node.placementDriver().awaitPrimaryReplica( - tblReplicationGrp, + replicationGrpId, node.clock().now(), 10, SECONDS diff --git a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java index 8e3de7a44a5a..40dc1af6ef55 100644 --- a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java +++ b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java @@ -19,6 +19,7 @@ import static java.util.stream.Collectors.toSet; import static org.apache.ignite.internal.SessionUtils.executeUpdate; +import static org.apache.ignite.internal.catalog.CatalogService.DEFAULT_STORAGE_PROFILE; import static org.apache.ignite.internal.table.ItTransactionTestUtils.findTupleToBeHostedOnNode; import static org.apache.ignite.internal.table.ItTransactionTestUtils.partitionAssignment; import static org.apache.ignite.internal.table.ItTransactionTestUtils.partitionIdForTuple; @@ -26,9 +27,12 @@ import static org.apache.ignite.internal.table.ItTransactionTestUtils.tableId; import static org.apache.ignite.internal.table.ItTransactionTestUtils.txId; import static org.apache.ignite.internal.table.ItTransactionTestUtils.waitAndGetPrimaryReplica; +import static org.apache.ignite.internal.table.NodeUtils.transferPrimary; import static org.apache.ignite.internal.testframework.IgniteTestUtils.waitForCondition; import static org.apache.ignite.internal.testframework.matchers.CompletableFutureMatcher.willCompleteSuccessfully; import static org.apache.ignite.internal.tx.TxState.COMMITTED; +import static org.apache.ignite.internal.tx.impl.ResourceVacuumManager.RESOURCE_VACUUM_INTERVAL_MILLISECONDS_PROPERTY; +import static org.apache.ignite.internal.util.IgniteUtils.shutdownAndAwaitTermination; import static org.hamcrest.MatcherAssert.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; @@ -38,9 +42,14 @@ import java.util.Set; import java.util.UUID; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; import java.util.function.Function; import java.util.function.Predicate; -import java.util.stream.IntStream; +import java.util.stream.Collectors; import org.apache.ignite.InitParametersBuilder; import org.apache.ignite.internal.ClusterPerTestIntegrationTest; import org.apache.ignite.internal.app.IgniteImpl; @@ -48,6 +57,8 @@ import org.apache.ignite.internal.replicator.TablePartitionId; import org.apache.ignite.internal.testframework.SystemPropertiesExtension; import org.apache.ignite.internal.testframework.WithSystemProperty; +import org.apache.ignite.internal.thread.IgniteThreadFactory; +import org.apache.ignite.internal.thread.ThreadOperation; import org.apache.ignite.internal.tx.TransactionMeta; import org.apache.ignite.internal.tx.impl.TxManagerImpl; import org.apache.ignite.internal.tx.message.TxCleanupMessage; @@ -56,13 +67,17 @@ import org.apache.ignite.table.Tuple; import org.apache.ignite.tx.Transaction; import org.jetbrains.annotations.Nullable; +import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInfo; import org.junit.jupiter.api.extension.ExtendWith; +/** + * Integration tests for tx recources vacuum. + */ @ExtendWith(SystemPropertiesExtension.class) -@WithSystemProperty(key = "RESOURCE_VACUUM_INTERVAL_MILLISECONDS_PROPERTY", value = "1000") +@WithSystemProperty(key = RESOURCE_VACUUM_INTERVAL_MILLISECONDS_PROPERTY, value = "1000") public class ItTxResourcesVacuumTest extends ClusterPerTestIntegrationTest { /** Table name. */ private static final String TABLE_NAME = "test_table"; @@ -89,18 +104,32 @@ public class ItTxResourcesVacuumTest extends ClusterPerTestIntegrationTest { + " compute.threadPoolSize: 1\n" + "}"; + private ExecutorService txStateStorageExecutor = Executors.newSingleThreadExecutor(); + @BeforeEach @Override public void setup(TestInfo testInfo) throws Exception { super.setup(testInfo); - String zoneSql = "create zone test_zone with partitions=5, replicas=" + REPLICAS; + String zoneSql = "create zone test_zone with partitions=10, replicas=" + REPLICAS + + ", storage_profiles='" + DEFAULT_STORAGE_PROFILE + "'"; String sql = "create table " + TABLE_NAME + " (key bigint primary key, val varchar(20)) with primary_zone='TEST_ZONE'"; cluster.doInSession(0, session -> { executeUpdate(zoneSql, session); executeUpdate(sql, session); }); + + txStateStorageExecutor = Executors.newSingleThreadExecutor(IgniteThreadFactory.create("test", "tx-state-storage-test-pool", log, + ThreadOperation.STORAGE_READ)); + } + + @Override + @AfterEach + public void tearDown() { + shutdownAndAwaitTermination(txStateStorageExecutor, 10, TimeUnit.SECONDS); + + super.tearDown(); } @Override @@ -155,10 +184,12 @@ public void testVacuum() throws InterruptedException { tx.commit(); + log.info("Test: Tx committed [tx={}].", txId); + checkVolatileTxStateOnNodes(nodes, txId); waitForTxStateReplication(nodes, txId, partId, 10_000); - waitForTxStateVacuum(txId, 0, true, 10_000); + waitForTxStateVacuum(txId, partId, true, 10_000); } @Test @@ -180,14 +211,14 @@ public void testVacuumWithCleanupDelay() throws InterruptedException { TablePartitionId commitPartGrpId = new TablePartitionId(tableId(node, TABLE_NAME), commitPartId); ReplicaMeta replicaMeta = waitAndGetPrimaryReplica(node, commitPartGrpId); - IgniteImpl commitPartitionLeaseholder = findNode(0, initialNodes(), n -> n.id().equals(replicaMeta.getLeaseholderId())); + IgniteImpl commitPartitionLeaseholder = findNode(n -> n.id().equals(replicaMeta.getLeaseholderId())); Set commitPartNodes = partitionAssignment(node, new TablePartitionId(tableId(node, TABLE_NAME), commitPartId)); log.info("Test: Commit partition [leaseholder={}, hostingNodes={}].", commitPartitionLeaseholder.name(), commitPartNodes); // Some node that does not host the commit partition, will be the primary node for upserting another tuple. - IgniteImpl leaseholderForAnotherTuple = findNode(0, initialNodes(), n -> !commitPartNodes.contains(n.name())); + IgniteImpl leaseholderForAnotherTuple = findNode(n -> !commitPartNodes.contains(n.name())); log.info("Test: leaseholderForAnotherTuple={}", leaseholderForAnotherTuple.name()); @@ -203,6 +234,8 @@ public void testVacuumWithCleanupDelay() throws InterruptedException { if (msg instanceof TxCleanupMessage) { cleanupStarted.complete(null); + log.info("Test: cleanup started."); + if (commitPartNodes.contains(n)) { cleanupAllowed.join(); } @@ -226,7 +259,7 @@ public void testVacuumWithCleanupDelay() throws InterruptedException { assertThat(commitFut, willCompleteSuccessfully()); - waitForTxStateVacuum(txId, 0, true, 10_000); + waitForTxStateVacuum(txId, commitPartId, true, 10_000); } @Test @@ -248,7 +281,7 @@ public void testCommitPartitionPrimaryChangesBeforeVacuum() throws InterruptedEx TablePartitionId commitPartGrpId = new TablePartitionId(tableId(node, TABLE_NAME), commitPartId); ReplicaMeta replicaMeta = waitAndGetPrimaryReplica(node, commitPartGrpId); - IgniteImpl commitPartitionLeaseholder = findNode(0, initialNodes(), n -> n.id().equals(replicaMeta.getLeaseholderId())); + IgniteImpl commitPartitionLeaseholder = findNode(n -> n.id().equals(replicaMeta.getLeaseholderId())); Set commitPartNodes = partitionAssignment(node, new TablePartitionId(tableId(node, TABLE_NAME), commitPartId)); @@ -276,9 +309,7 @@ public void testCommitPartitionPrimaryChangesBeforeVacuum() throws InterruptedEx assertThat(cleanupStarted, willCompleteSuccessfully()); - //transferPrimary - - waitAndGetPrimaryReplica(node, commitPartGrpId); + transferPrimary(cluster.runningNodes().collect(toSet()), commitPartGrpId, commitPartNodes::contains); cleanupAllowedFut.complete(null); @@ -286,7 +317,7 @@ public void testCommitPartitionPrimaryChangesBeforeVacuum() throws InterruptedEx assertThat(commitFut, willCompleteSuccessfully()); - waitForTxStateVacuum(txId, 0, true, 10_000); + waitForTxStateVacuum(txId, commitPartId, true, 10_000); } @Test @@ -308,7 +339,7 @@ public void testVacuumPersistentStateAfterCleanupDelayAndVolatileStateVacuum() t TablePartitionId commitPartGrpId = new TablePartitionId(tableId(node, TABLE_NAME), commitPartId); ReplicaMeta replicaMeta = waitAndGetPrimaryReplica(node, commitPartGrpId); - IgniteImpl commitPartitionLeaseholder = findNode(0, initialNodes(), n -> n.id().equals(replicaMeta.getLeaseholderId())); + IgniteImpl commitPartitionLeaseholder = findNode(n -> n.id().equals(replicaMeta.getLeaseholderId())); Set commitPartNodes = partitionAssignment(node, new TablePartitionId(tableId(node, TABLE_NAME), commitPartId)); @@ -336,9 +367,7 @@ public void testVacuumPersistentStateAfterCleanupDelayAndVolatileStateVacuum() t assertThat(cleanupStarted, willCompleteSuccessfully()); - //transferPrimary - - waitAndGetPrimaryReplica(node, commitPartGrpId); + transferPrimary(cluster.runningNodes().collect(toSet()), commitPartGrpId, commitPartNodes::contains); cleanupAllowedFut.complete(null); @@ -346,12 +375,23 @@ public void testVacuumPersistentStateAfterCleanupDelayAndVolatileStateVacuum() t assertThat(commitFut, willCompleteSuccessfully()); - waitForTxStateVacuum(txId, 0, true, 10_000); + waitForTxStateVacuum(txId, commitPartId, true, 10_000); } @Test public void testRecoveryAfterPersistentStateVacuumized() throws InterruptedException { - IgniteImpl coord0 = anyNode(); + // This node isn't going to be stopped, so let it be node 0. + IgniteImpl commitPartitionLeaseholder = cluster.runningNodes().collect(Collectors.toList()).get(0); + + Tuple tuple0 = findTupleToBeHostedOnNode(commitPartitionLeaseholder, TABLE_NAME, null, INITIAL_TUPLE, NEXT_TUPLE, true); + + int commitPartId = partitionIdForTuple(commitPartitionLeaseholder, TABLE_NAME, tuple0, null); + + Set commitPartitionNodes = partitionAssignment(commitPartitionLeaseholder, + new TablePartitionId(tableId(commitPartitionLeaseholder, TABLE_NAME), commitPartId)); + + // Choose some node that doesn't host the partition as a tx coordinator. + IgniteImpl coord0 = findNode(n -> !commitPartitionNodes.contains(n.name())); RecordView view0 = coord0.tables().table(TABLE_NAME).recordView(); @@ -361,14 +401,8 @@ public void testRecoveryAfterPersistentStateVacuumized() throws InterruptedExcep log.info("Test: Transaction 0 [tx={}].", txId0); - // Find some node other than coordinator. - IgniteImpl commitPartitionLeaseholder = findNode(0, initialNodes(), n -> !n.name().equals(coord0.name())); - - Tuple tuple0 = findTupleToBeHostedOnNode(commitPartitionLeaseholder, TABLE_NAME, tx0, INITIAL_TUPLE, NEXT_TUPLE, true); - - int commitPartId = partitionIdForTuple(commitPartitionLeaseholder, TABLE_NAME, tuple0, tx0); - - Set nodes = partitionAssignment(coord0, new TablePartitionId(tableId(coord0, TABLE_NAME), commitPartId)); + log.info("Test: Commit partition of transaction 0 [leaseholder={}, hostingNodes={}].", commitPartitionLeaseholder.name(), + commitPartitionNodes); view0.upsert(tx0, tuple0); @@ -385,15 +419,17 @@ public void testRecoveryAfterPersistentStateVacuumized() throws InterruptedExcep return false; }); + log.info("Test: Committing the transaction 0 [tx={}].", txId0); + tx0.commitAsync(); assertThat(cleanupStarted, willCompleteSuccessfully()); // Check that the final tx state COMMITTED is saved to the persistent tx storage. - assertTrue(waitForCondition(() -> cluster.runningNodes().filter(n -> nodes.contains(n.name())).allMatch(n -> { - TransactionMeta meta = persistentTxState(n, txId0, commitPartId); + assertTrue(waitForCondition(() -> cluster.runningNodes().filter(n -> commitPartitionNodes.contains(n.name())).allMatch(n -> { + TransactionMeta meta = persistentTxState(n, txId0, commitPartId); - return meta != null && meta.txState() == COMMITTED; + return meta != null && meta.txState() == COMMITTED; }), 10_000)); // Stop the first transaction coordinator. @@ -412,18 +448,16 @@ public void testRecoveryAfterPersistentStateVacuumized() throws InterruptedExcep log.info("Test: Transaction 1 [tx={}].", txId1); - IgniteImpl anyNodeContainingWriteIntent = findNode(0, initialNodes(), n -> nodes.contains(n.name())); - - Tuple tuple1 = findTupleToBeHostedOnNode(anyNodeContainingWriteIntent, TABLE_NAME, tx1, INITIAL_TUPLE, NEXT_TUPLE, true); - // Tx 1 should get the data committed by tx 0. - Tuple tx0Data = view1.get(tx1, tuple1); + Tuple keyTuple = Tuple.create().set("key", tuple0.longValue("key")); + Tuple tx0Data = view1.get(tx1, keyTuple); assertEquals(tuple0.longValue("key"), tx0Data.longValue("key")); + assertEquals(tuple0.stringValue("val"), tx0Data.stringValue("val")); tx1.commit(); - waitForTxStateVacuum(txId0, 0, true, 10_000); - waitForTxStateVacuum(txId0, 0, true, 10_000); + waitForTxStateVacuum(txId0, commitPartId, true, 10_000); + waitForTxStateVacuum(txId0, commitPartId, true, 10_000); } private boolean checkVolatileTxStateOnNodes(Set nodeConsistentIds, UUID txId) { @@ -447,7 +481,8 @@ private void waitForTxStateVacuum(UUID txId, int partId, boolean checkPersistent waitForTxStateVacuum(cluster.runningNodes().map(IgniteImpl::name).collect(toSet()), txId, partId, checkPersistent, timeMs); } - private void waitForTxStateVacuum(Set nodeConsistentIds, UUID txId, int partId, boolean checkPersistent, long timeMs) throws InterruptedException { + private void waitForTxStateVacuum(Set nodeConsistentIds, UUID txId, int partId, boolean checkPersistent, long timeMs) + throws InterruptedException { boolean r = waitForCondition(() -> { boolean result = true; @@ -486,19 +521,39 @@ private static TransactionMeta volatileTxState(IgniteImpl node, UUID txId) { } @Nullable - private static TransactionMeta persistentTxState(IgniteImpl node, UUID txId, int partId) { - TxStateStorage txStateStorage = table(node, TABLE_NAME).internalTable().txStateStorage().getTxStateStorage(partId); + private TransactionMeta persistentTxState(IgniteImpl node, UUID txId, int partId) { + TransactionMeta[] meta = new TransactionMeta[1]; + + Future f = txStateStorageExecutor.submit(() -> { + TxStateStorage txStateStorage = table(node, TABLE_NAME).internalTable().txStateStorage().getTxStateStorage(partId); - assertNotNull(txStateStorage); + assertNotNull(txStateStorage); + + meta[0] = txStateStorage.get(txId); + }); - return txStateStorage.get(txId); + try { + f.get(); + } catch (InterruptedException | ExecutionException e) { + throw new RuntimeException(e); + } + + return meta[0]; } - private IgniteImpl findNode(int startRange, int endRange, Predicate filter) { - return IntStream.range(startRange, endRange) - .mapToObj(this::node) + private IgniteImpl findNode(Predicate filter) { + return cluster.runningNodes() .filter(n -> n != null && filter.test(n)) .findFirst() .get(); } + + private Transaction startTx(IgniteImpl coordinator) { + Transaction tx = coordinator.transactions().begin(); + UUID txId = txId(tx); + + log.info("Test: Transaction 0 [tx={}].", txId); + + return tx; + } } diff --git a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java index d68d131a1876..c11fb66b788f 100644 --- a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java +++ b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java @@ -658,9 +658,9 @@ private void handleVacuumTxStatesCommand(VacuumTxStatesCommand cmd, long command return; } - for (UUID txId : cmd.txIds()) { - txStateStorage.remove(txId, commandIndex, commandTerm); - } + LOG.info("qqq vacuum cmd=" + cmd); + + txStateStorage.removeAll(cmd.txIds(), commandIndex, commandTerm); } private static void onTxStateStorageCasFail(UUID txId, TxMeta txMetaBeforeCas, TxMeta txMetaToSet) { diff --git a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/PartitionReplicaListener.java b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/PartitionReplicaListener.java index 5b1541112f32..40a8b56736e0 100644 --- a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/PartitionReplicaListener.java +++ b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/PartitionReplicaListener.java @@ -4083,6 +4083,8 @@ private CompletableFuture processVacuumTxStateReplicaRequest(VacuumTxStateRep .txIds(request.transactionIds()) .build(); + LOG.info("qqq vacuum req=" + request); + return raftClient.run(cmd); } diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java index 50571902fe1b..e09ad6a20626 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java @@ -18,6 +18,8 @@ package org.apache.ignite.internal.tx.impl; import static org.apache.ignite.internal.util.CompletableFutures.allOf; +import static org.apache.ignite.internal.util.CompletableFutures.nullCompletedFuture; +import static org.apache.ignite.internal.util.ExceptionUtils.unwrapCause; import java.util.ArrayList; import java.util.List; @@ -27,15 +29,23 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; +import org.apache.ignite.internal.hlc.ClockService; +import org.apache.ignite.internal.hlc.HybridTimestamp; import org.apache.ignite.internal.lang.IgniteBiTuple; import org.apache.ignite.internal.logger.IgniteLogger; import org.apache.ignite.internal.logger.Loggers; +import org.apache.ignite.internal.placementdriver.PlacementDriver; +import org.apache.ignite.internal.placementdriver.ReplicaMeta; import org.apache.ignite.internal.replicator.ReplicaService; import org.apache.ignite.internal.replicator.TablePartitionId; +import org.apache.ignite.internal.replicator.exception.PrimaryReplicaMissException; import org.apache.ignite.internal.tx.message.TxMessagesFactory; import org.apache.ignite.internal.tx.message.VacuumTxStateReplicaRequest; import org.apache.ignite.network.ClusterNode; +/** + * Implements the logic of persistent tx states vacuum. + */ public class PersistentTxStateVacuumizer { private static final IgniteLogger LOG = Loggers.forClass(PersistentTxStateVacuumizer.class); @@ -45,35 +55,72 @@ public class PersistentTxStateVacuumizer { private final ClusterNode localNode; + private final ClockService clockService; + + private final PlacementDriver placementDriver; + + /** + * Constructor. + * + * @param replicaService Replica service. + * @param localNode Local node. + * @param clockService Clock service. + * @param placementDriver Placement driver. + */ public PersistentTxStateVacuumizer( ReplicaService replicaService, - ClusterNode localNode) { + ClusterNode localNode, + ClockService clockService, + PlacementDriver placementDriver + ) { this.replicaService = replicaService; this.localNode = localNode; + this.clockService = clockService; + this.placementDriver = placementDriver; } + /** + * Vacuum persistent tx states. + * + * @param txIds Transaction ids to vacuum; map of commit partition ids to sets of tx ids. + * @return A future. + */ public CompletableFuture, Integer>> vacuumPersistentTxStates(Map> txIds) { Set successful = ConcurrentHashMap.newKeySet(); AtomicInteger unsuccessfulCount = new AtomicInteger(0); List> futures = new ArrayList<>(); + HybridTimestamp now = clockService.now(); txIds.forEach((commitPartitionId, txs) -> { - VacuumTxStateReplicaRequest request = TX_MESSAGES_FACTORY.vacuumTxStateReplicaRequest() - .groupId(commitPartitionId) - .transactionIds(txs) - .build(); + ReplicaMeta replicaMeta = placementDriver.getPrimaryReplica(commitPartitionId, now).join(); - CompletableFuture future = replicaService.invoke(localNode, request).whenComplete((v, e) -> { - if (e == null) { - successful.addAll(txs); + if (replicaMeta != null) { + VacuumTxStateReplicaRequest request = TX_MESSAGES_FACTORY.vacuumTxStateReplicaRequest() + .enlistmentConsistencyToken(replicaMeta.getStartTime().longValue()) + .groupId(commitPartitionId) + .transactionIds(txs) + .build(); + + CompletableFuture future; + + if (localNode.id().equals(replicaMeta.getLeaseholderId())) { + future = replicaService.invoke(localNode, request).whenComplete((v, e) -> { + if (e == null) { + successful.addAll(txs); + } else if (!(unwrapCause(e) instanceof PrimaryReplicaMissException)) { + LOG.warn("Failed to vacuum tx states from the persistent storage.", e); + + unsuccessfulCount.incrementAndGet(); + } + }); } else { - LOG.warn("Failed to vacuum tx states from the persistent storage.", e); + successful.addAll(txs); - unsuccessfulCount.incrementAndGet(); + future = nullCompletedFuture(); } - }); - futures.add(future); + futures.add(future); + } }); return allOf(futures.toArray(new CompletableFuture[0])) diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/ResourceVacuumManager.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/ResourceVacuumManager.java index 65e77fc3bc04..564a87141405 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/ResourceVacuumManager.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/ResourceVacuumManager.java @@ -50,7 +50,7 @@ public class ResourceVacuumManager implements IgniteComponent { public static final String RESOURCE_VACUUM_INTERVAL_MILLISECONDS_PROPERTY = "RESOURCE_VACUUM_INTERVAL_MILLISECONDS"; private final int resourceVacuumIntervalMilliseconds = IgniteSystemProperties - .getInteger(RESOURCE_VACUUM_INTERVAL_MILLISECONDS_PROPERTY, 1_000); + .getInteger(RESOURCE_VACUUM_INTERVAL_MILLISECONDS_PROPERTY, 30_000); private final FinishedReadOnlyTransactionTracker finishedReadOnlyTransactionTracker; diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/TxManagerImpl.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/TxManagerImpl.java index 6851ac8edb41..c401eea8c395 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/TxManagerImpl.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/TxManagerImpl.java @@ -753,7 +753,8 @@ public CompletableFuture start() { messagingService.addMessageHandler(ReplicaMessageGroup.class, this); - persistentTxStateVacuumizer = new PersistentTxStateVacuumizer(replicaService, topologyService.localMember()); + persistentTxStateVacuumizer = new PersistentTxStateVacuumizer(replicaService, topologyService.localMember(), clockService, + placementDriver); txStateVolatileStorage.start(); diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java index d85741e5b984..a59180c650ed 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java @@ -144,6 +144,8 @@ public void vacuum( Map> txIds = new HashMap<>(); Map timestamps = new HashMap<>(); + UUID uuid = UUID.randomUUID(); + txStateMap.forEach((txId, meta) -> { txStateMap.computeIfPresent(txId, (txId0, meta0) -> { if (TxState.isFinalState(meta0.txState())) { @@ -191,6 +193,8 @@ public void vacuum( .thenAccept(tuple -> { Set successful = tuple.get1(); + LOG.info("qqq vacuum volatile failed=" + tuple.get2() + ", txIds=" + txIds + ", successful=" + successful); + for (UUID txId : successful) { txStateMap.compute(txId, (k, v) -> { if (v == null) { @@ -198,7 +202,7 @@ public void vacuum( } else { Long cleanupCompletionTs = timestamps.get(txId); - return (cleanupCompletionTs != null && Objects.equals(cleanupCompletionTs, v.cleanupCompletionTimestamp())) + return (cleanupCompletionTs == null || Objects.equals(cleanupCompletionTs, v.cleanupCompletionTimestamp())) ? null : v; } diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/message/VacuumTxStateReplicaRequest.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/message/VacuumTxStateReplicaRequest.java index 38b1ae7ec146..8c287db93124 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/message/VacuumTxStateReplicaRequest.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/message/VacuumTxStateReplicaRequest.java @@ -24,6 +24,9 @@ import org.apache.ignite.internal.network.annotations.Transferable; import org.apache.ignite.internal.replicator.message.PrimaryReplicaRequest; +/** + * Request that is sent to vacuumize the transaction states. + */ @Transferable(VACUUM_TX_STATE_REPLICA_REQUEST) public interface VacuumTxStateReplicaRequest extends PrimaryReplicaRequest { Set transactionIds(); diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/message/VacuumTxStatesCommand.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/message/VacuumTxStatesCommand.java index 190aeab82c5b..3e6d10fb53e3 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/message/VacuumTxStatesCommand.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/message/VacuumTxStatesCommand.java @@ -24,6 +24,9 @@ import org.apache.ignite.internal.network.annotations.Transferable; import org.apache.ignite.internal.raft.WriteCommand; +/** + * Command that vacuumizes the transaction states. + */ @Transferable(VACUUM_TX_STATE_COMMAND) public interface VacuumTxStatesCommand extends WriteCommand { Set txIds(); diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/ThreadAssertingTxStateStorage.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/ThreadAssertingTxStateStorage.java index 029482626501..a51b3d0f7469 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/ThreadAssertingTxStateStorage.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/ThreadAssertingTxStateStorage.java @@ -22,6 +22,7 @@ import static org.apache.ignite.internal.worker.ThreadAssertions.assertThreadAllowsToRead; import static org.apache.ignite.internal.worker.ThreadAssertions.assertThreadAllowsToWrite; +import java.util.Collection; import java.util.UUID; import java.util.concurrent.CompletableFuture; import org.apache.ignite.internal.lang.IgniteBiTuple; @@ -73,6 +74,13 @@ public void remove(UUID txId, long commandIndex, long commandTerm) { storage.remove(txId, commandIndex, commandTerm); } + @Override + public void removeAll(Collection txIds, long commandIndex, long commandTerm) { + assertThreadAllowsToWrite(); + + storage.removeAll(txIds, commandIndex, commandTerm); + } + @Override public Cursor> scan() { assertThreadAllowsTo(TX_STATE_STORAGE_ACCESS); diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/TxStateStorage.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/TxStateStorage.java index 81c42d754d3c..b9f0579ed020 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/TxStateStorage.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/TxStateStorage.java @@ -17,6 +17,7 @@ package org.apache.ignite.internal.tx.storage.state; +import java.util.Collection; import java.util.UUID; import java.util.concurrent.CompletableFuture; import org.apache.ignite.internal.close.ManuallyCloseable; @@ -80,6 +81,16 @@ public interface TxStateStorage extends ManuallyCloseable { */ void remove(UUID txId, long commandIndex, long commandTerm); + /** + * Removes all the given transaction metas from the storage. + * + * @param txIds Tx ids. + * @param commandIndex New value for {@link #lastAppliedIndex()}. + * @param commandTerm New value for {@link #lastAppliedTerm()}. + * @throws IgniteInternalException with {@link Transactions#TX_STATE_STORAGE_ERR} error code in case when the operation has failed. + */ + void removeAll(Collection txIds, long commandIndex, long commandTerm); + /** * Creates a cursor to scan all data in the storage. * diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/rocksdb/TxStateRocksDbStorage.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/rocksdb/TxStateRocksDbStorage.java index 53439cc0bdcd..f97761d81d17 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/rocksdb/TxStateRocksDbStorage.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/rocksdb/TxStateRocksDbStorage.java @@ -30,6 +30,7 @@ import static org.apache.ignite.lang.ErrorGroups.Transactions.TX_STATE_STORAGE_STOPPED_ERR; import java.nio.ByteBuffer; +import java.util.Collection; import java.util.Objects; import java.util.Set; import java.util.UUID; @@ -169,62 +170,66 @@ public void put(UUID txId, TxMeta txMeta) { @Override public boolean compareAndSet(UUID txId, @Nullable TxState txStateExpected, TxMeta txMeta, long commandIndex, long commandTerm) { - return busy(() -> { - try (WriteBatch writeBatch = new WriteBatch()) { - byte[] txIdBytes = txIdToKey(txId); + return updateData(writeBatch -> { + byte[] txIdBytes = txIdToKey(txId); - byte[] txMetaExistingBytes = sharedStorage.db().get(sharedStorage.readOptions, txIdToKey(txId)); + byte[] txMetaExistingBytes = sharedStorage.db().get(sharedStorage.readOptions, txIdToKey(txId)); - boolean result; + boolean result; - if (txMetaExistingBytes == null && txStateExpected == null) { - writeBatch.put(txIdBytes, toBytes(txMeta)); + if (txMetaExistingBytes == null && txStateExpected == null) { + writeBatch.put(txIdBytes, toBytes(txMeta)); - result = true; - } else { - if (txMetaExistingBytes != null) { - TxMeta txMetaExisting = fromBytes(txMetaExistingBytes); + result = true; + } else { + if (txMetaExistingBytes != null) { + TxMeta txMetaExisting = fromBytes(txMetaExistingBytes); - if (txMetaExisting.txState() == txStateExpected) { - writeBatch.put(txIdBytes, toBytes(txMeta)); + if (txMetaExisting.txState() == txStateExpected) { + writeBatch.put(txIdBytes, toBytes(txMeta)); - result = true; - } else { - result = txMetaExisting.txState() == txMeta.txState() - && Objects.equals(txMetaExisting.commitTimestamp(), txMeta.commitTimestamp()); - } + result = true; } else { - result = false; + result = txMetaExisting.txState() == txMeta.txState() + && Objects.equals(txMetaExisting.commitTimestamp(), txMeta.commitTimestamp()); } + } else { + result = false; } + } - // If the store is in the process of rebalancing, then there is no need to update lastAppliedIndex and lastAppliedTerm. - // This is necessary to prevent a situation where, in the middle of the rebalance, the node will be restarted and we will - // have non-consistent storage. They will be updated by either #abortRebalance() or #finishRebalance(long, long). - if (state.get() != StorageState.REBALANCE) { - updateLastApplied(writeBatch, commandIndex, commandTerm); - } + return result; + }, commandIndex, commandTerm); + } - sharedStorage.db().write(sharedStorage.writeOptions, writeBatch); + @Override + public void remove(UUID txId, long commandIndex, long commandTerm) { + updateData(writeBatch -> { + throwExceptionIfStorageInProgressOfRebalance(); - return result; - } catch (RocksDBException e) { - throw new IgniteInternalException( - TX_STATE_STORAGE_ERR, - format("Failed perform CAS operation over a value in storage: [{}]", createStorageInfo()), - e - ); - } - }); + writeBatch.delete(txIdToKey(txId)); + + return null; + }, commandIndex, commandTerm); } @Override - public void remove(UUID txId, long commandIndex, long commandTerm) { - busy(() -> { - try (WriteBatch writeBatch = new WriteBatch()) { - throwExceptionIfStorageInProgressOfRebalance(); + public void removeAll(Collection txIds, long commandIndex, long commandTerm) { + updateData(writeBatch -> { + throwExceptionIfStorageInProgressOfRebalance(); + for (UUID txId : txIds) { writeBatch.delete(txIdToKey(txId)); + } + + return null; + }, commandIndex, commandTerm); + } + + private T updateData(WriteClosure writeClosure, long commandIndex, long commandTerm) { + return (T) busy(() -> { + try (WriteBatch writeBatch = new WriteBatch()) { + Object result = writeClosure.apply(writeBatch); // If the store is in the process of rebalancing, then there is no need to update lastAppliedIndex and lastAppliedTerm. // This is necessary to prevent a situation where, in the middle of the rebalance, the node will be restarted and we will @@ -235,11 +240,11 @@ public void remove(UUID txId, long commandIndex, long commandTerm) { sharedStorage.db().write(sharedStorage.writeOptions, writeBatch); - return null; + return result; } catch (RocksDBException e) { throw new IgniteInternalException( TX_STATE_STORAGE_ERR, - format("Failed to remove a value from storage: [{}]", createStorageInfo()), + format("Failed to update data in the storage: [{}]", createStorageInfo()), e ); } @@ -655,4 +660,12 @@ private enum StorageState { /** Storage is in the process of cleanup. */ CLEANUP } + + /** + * Write closure. + */ + @FunctionalInterface + private interface WriteClosure { + T apply(WriteBatch writeBatch) throws RocksDBException; + } } diff --git a/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/storage/state/AbstractTxStateStorageTest.java b/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/storage/state/AbstractTxStateStorageTest.java index 0d6357b4fbbd..54b2c9e0eb1c 100644 --- a/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/storage/state/AbstractTxStateStorageTest.java +++ b/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/storage/state/AbstractTxStateStorageTest.java @@ -39,9 +39,12 @@ import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.UUID; +import java.util.function.BiConsumer; import java.util.stream.IntStream; import org.apache.ignite.internal.hlc.HybridTimestamp; import org.apache.ignite.internal.lang.IgniteBiTuple; @@ -83,6 +86,21 @@ protected void afterTest() throws Exception { @Test public void testPutGetRemove() { + testPutGetRemove0((storage, txIds) -> { + int index = 0; + + for (UUID txId : txIds) { + storage.remove(txId, index++, 1); + } + }); + } + + @Test + public void testPutGetRemoveAll() { + testPutGetRemove0((storage, txIds) -> storage.removeAll(txIds, 1, 1)); + } + + public void testPutGetRemove0(BiConsumer> removeOp) { TxStateStorage storage = tableStorage.getOrCreateTxStateStorage(0); List txIds = new ArrayList<>(); @@ -101,12 +119,16 @@ public void testPutGetRemove() { assertEquals(txMetaExpected, txMeta); } + Set toRemove = new HashSet<>(); + for (int i = 0; i < 100; i++) { if (i % 2 == 0) { - storage.remove(txIds.get(i), i, 1); + toRemove.add(txIds.get(i)); } } + removeOp.accept(storage, toRemove); + for (int i = 0; i < 100; i++) { if (i % 2 == 0) { TxMeta txMeta = storage.get(txIds.get(i)); diff --git a/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/storage/state/test/TestTxStateStorage.java b/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/storage/state/test/TestTxStateStorage.java index 047dd1c8a49b..e2c8050b637a 100644 --- a/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/storage/state/test/TestTxStateStorage.java +++ b/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/storage/state/test/TestTxStateStorage.java @@ -22,6 +22,7 @@ import static org.apache.ignite.lang.ErrorGroups.Transactions.TX_STATE_STORAGE_REBALANCE_ERR; import static org.apache.ignite.lang.ErrorGroups.Transactions.TX_STATE_STORAGE_STOPPED_ERR; +import java.util.Collection; import java.util.Iterator; import java.util.Objects; import java.util.UUID; @@ -110,6 +111,20 @@ public void remove(UUID txId, long commandIndex, long commandTerm) { } } + @Override + public void removeAll(Collection txIds, long commandIndex, long commandTerm) { + checkStorageClosedOrInProgressOfRebalance(); + + for (UUID txId : txIds) { + storage.remove(txId); + } + + if (rebalanceFutureReference.get() == null) { + lastAppliedIndex = commandIndex; + lastAppliedTerm = commandTerm; + } + } + @Override public Cursor> scan() { checkStorageClosedOrInProgressOfRebalance(); From 0ed34a0decc8f8f7616525c301935c1c7497c6ce Mon Sep 17 00:00:00 2001 From: denis-chudov Date: Wed, 17 Apr 2024 22:32:49 +0300 Subject: [PATCH 08/26] code style, pmd --- .../internal/table/RecordBinaryViewImpl.java | 29 ++++++++++++------- .../tx/impl/VolatileTxStateMetaStorage.java | 2 -- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/modules/table/src/main/java/org/apache/ignite/internal/table/RecordBinaryViewImpl.java b/modules/table/src/main/java/org/apache/ignite/internal/table/RecordBinaryViewImpl.java index 8428a14ac010..7eb1d6d80f70 100644 --- a/modules/table/src/main/java/org/apache/ignite/internal/table/RecordBinaryViewImpl.java +++ b/modules/table/src/main/java/org/apache/ignite/internal/table/RecordBinaryViewImpl.java @@ -404,6 +404,24 @@ private static Row marshal(Tuple tuple, TupleMarshaller marshaller, boolean keyO } } + /** + * Marshal a tuple to a row. Test-only public method. + * + * @param tx Transaction, if present. + * @param rec Tuple record. + * @return A future, with row as a result. + */ + @TestOnly + public CompletableFuture marshal(@Nullable Transaction tx, Tuple rec) { + Objects.requireNonNull(rec); + + return doOperation(tx, schemaVersion -> { + Row row = marshal(rec, schemaVersion, false); + + return completedFuture(row); + }); + } + /** * Returns table row tuple. * @@ -510,15 +528,4 @@ public CompletableFuture updateAll(int partitionId, Collection rows return doOperation(null, schemaVersion -> this.tbl.updateAll(mapToBinary(rows, schemaVersion, deleted), deleted, partitionId)); } - - @TestOnly - public CompletableFuture marshal(@Nullable Transaction tx, Tuple rec) { - Objects.requireNonNull(rec); - - return doOperation(tx, schemaVersion -> { - Row row = marshal(rec, schemaVersion, false); - - return completedFuture(row); - }); - } } diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java index a59180c650ed..83930288678b 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java @@ -144,8 +144,6 @@ public void vacuum( Map> txIds = new HashMap<>(); Map timestamps = new HashMap<>(); - UUID uuid = UUID.randomUUID(); - txStateMap.forEach((txId, meta) -> { txStateMap.computeIfPresent(txId, (txId0, meta0) -> { if (TxState.isFinalState(meta0.txState())) { From d96eb706c239b7e9847ab9f693d28761359b2520 Mon Sep 17 00:00:00 2001 From: denis-chudov Date: Wed, 17 Apr 2024 22:52:49 +0300 Subject: [PATCH 09/26] checkstyle --- .../internal/tx/storage/state/AbstractTxStateStorageTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/storage/state/AbstractTxStateStorageTest.java b/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/storage/state/AbstractTxStateStorageTest.java index 54b2c9e0eb1c..d67301928bbe 100644 --- a/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/storage/state/AbstractTxStateStorageTest.java +++ b/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/storage/state/AbstractTxStateStorageTest.java @@ -100,7 +100,7 @@ public void testPutGetRemoveAll() { testPutGetRemove0((storage, txIds) -> storage.removeAll(txIds, 1, 1)); } - public void testPutGetRemove0(BiConsumer> removeOp) { + private void testPutGetRemove0(BiConsumer> removeOp) { TxStateStorage storage = tableStorage.getOrCreateTxStateStorage(0); List txIds = new ArrayList<>(); From 29869a9645b660315019121d3f3e3fb11e102459 Mon Sep 17 00:00:00 2001 From: denis-chudov Date: Thu, 18 Apr 2024 10:41:23 +0300 Subject: [PATCH 10/26] some refactoring --- .../table/ItTransactionTestUtils.java | 1 - .../distributed/raft/PartitionListener.java | 60 +++++++++---------- .../replicator/PartitionReplicaListener.java | 2 - .../tx/impl/PersistentTxStateVacuumizer.java | 11 +--- .../internal/tx/impl/TxManagerImpl.java | 4 +- .../tx/impl/VolatileTxStateMetaStorage.java | 50 ++++++++-------- 6 files changed, 59 insertions(+), 69 deletions(-) diff --git a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionTestUtils.java b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionTestUtils.java index ed593c4c2b73..e285971b5958 100644 --- a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionTestUtils.java +++ b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionTestUtils.java @@ -131,7 +131,6 @@ public static Tuple findTupleToBeHostedOnNode( if (primary) { ReplicaMeta replicaMeta = waitAndGetPrimaryReplica(node, grpId); - System.out.println("qqq partId=" + partId + ", primary=" + replicaMeta.getLeaseholder()); if (node.id().equals(replicaMeta.getLeaseholderId())) { return t; } diff --git a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java index c11fb66b788f..a65ae1d546f3 100644 --- a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java +++ b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java @@ -173,7 +173,7 @@ public void onWrite(Iterator> iterator) { long proposedSafeTime = cmd.safeTime().longValue(); // Because of clock.tick it's guaranteed that two different commands will have different safe timestamps. - // maxObservableSafeTime may match proposedSafeTime only if it is the cmd that was previously validated and then retried + // maxObservableSafeTime may match proposedSafeTime only if it is the command that was previously validated and then retried // by raft client because of either TimeoutException or inner raft server recoverable exception. assert proposedSafeTime >= maxObservableSafeTimeVerifier : "Safe time reordering detected [current=" + maxObservableSafeTimeVerifier + ", proposed=" + proposedSafeTime + "]"; @@ -189,13 +189,13 @@ public void onWrite(Iterator> iterator) { long storagesAppliedIndex = Math.min(storage.lastAppliedIndex(), txStateStorage.lastAppliedIndex()); assert commandIndex > storagesAppliedIndex : - "Write cmd must have an index greater than that of storages [commandIndex=" + commandIndex + "Write command must have an index greater than that of storages [commandIndex=" + commandIndex + ", mvAppliedIndex=" + storage.lastAppliedIndex() + ", txStateAppliedIndex=" + txStateStorage.lastAppliedIndex() + "]"; Serializable result = null; - // NB: Make sure that ANY cmd we accept here updates lastAppliedIndex+term info in one of the underlying + // NB: Make sure that ANY command we accept here updates lastAppliedIndex+term info in one of the underlying // storages! // Otherwise, a gap between lastAppliedIndex from the point of view of JRaft and our storage might appear. // If a leader has such a gap, and does doSnapshot(), it will subsequently truncate its log too aggressively @@ -232,7 +232,7 @@ public void onWrite(Iterator> iterator) { result = e.getCause(); } catch (Throwable t) { LOG.error( - "Unknown error while processing cmd [commandIndex={}, commandTerm={}, cmd={}]", + "Unknown error while processing command [commandIndex={}, commandTerm={}, command={}]", t, clo.index(), clo.index(), command ); @@ -264,17 +264,17 @@ public void onWrite(Iterator> iterator) { * Handler for the {@link UpdateCommand}. * * @param cmd Command. - * @param commandIndex Index of the RAFT cmd. - * @param commandTerm Term of the RAFT cmd. + * @param commandIndex Index of the RAFT command. + * @param commandTerm Term of the RAFT command. */ private UpdateCommandResult handleUpdateCommand(UpdateCommand cmd, long commandIndex, long commandTerm) { - // Skips the write cmd because the storage has already executed it. + // Skips the write command because the storage has already executed it. if (commandIndex <= storage.lastAppliedIndex()) { return new UpdateCommandResult(true); } if (cmd.leaseStartTime() != null) { - long leaseStartTime = requireNonNull(cmd.leaseStartTime(), "Inconsistent lease information in cmd [cmd=" + cmd + "]."); + long leaseStartTime = requireNonNull(cmd.leaseStartTime(), "Inconsistent lease information in command [cmd=" + cmd + "]."); long storageLeaseStartTime = storage.leaseStartTime(); @@ -317,17 +317,17 @@ private UpdateCommandResult handleUpdateCommand(UpdateCommand cmd, long commandI * Handler for the {@link UpdateAllCommand}. * * @param cmd Command. - * @param commandIndex Index of the RAFT cmd. - * @param commandTerm Term of the RAFT cmd. + * @param commandIndex Index of the RAFT command. + * @param commandTerm Term of the RAFT command. */ private UpdateCommandResult handleUpdateAllCommand(UpdateAllCommand cmd, long commandIndex, long commandTerm) { - // Skips the write cmd because the storage has already executed it. + // Skips the write command because the storage has already executed it. if (commandIndex <= storage.lastAppliedIndex()) { return new UpdateCommandResult(true); } if (cmd.leaseStartTime() != null) { - long leaseStartTime = requireNonNull(cmd.leaseStartTime(), "Inconsistent lease information in cmd [cmd=" + cmd + "]."); + long leaseStartTime = requireNonNull(cmd.leaseStartTime(), "Inconsistent lease information in command [cmd=" + cmd + "]."); long storageLeaseStartTime = storage.leaseStartTime(); @@ -368,14 +368,14 @@ private UpdateCommandResult handleUpdateAllCommand(UpdateAllCommand cmd, long co * Handler for the {@link FinishTxCommand}. * * @param cmd Command. - * @param commandIndex Index of the RAFT cmd. - * @param commandTerm Term of the RAFT cmd. + * @param commandIndex Index of the RAFT command. + * @param commandTerm Term of the RAFT command. * @return The actually stored transaction state {@link TransactionResult}. * @throws IgniteInternalException if an exception occurred during a transaction state change. */ private @Nullable TransactionResult handleFinishTxCommand(FinishTxCommand cmd, long commandIndex, long commandTerm) throws IgniteInternalException { - // Skips the write cmd because the storage has already executed it. + // Skips the write command because the storage has already executed it. if (commandIndex <= txStateStorage.lastAppliedIndex()) { return null; } @@ -400,7 +400,7 @@ private UpdateCommandResult handleUpdateAllCommand(UpdateAllCommand cmd, long co commandTerm ); - // Assume that we handle the finish cmd only on the commit partition. + // Assume that we handle the finish command only on the commit partition. TablePartitionId commitPartitionId = new TablePartitionId(storage.tableId(), storage.partitionId()); markFinished(txId, cmd.commit(), cmd.commitTimestamp(), commitPartitionId); @@ -428,11 +428,11 @@ private static List fromPartitionIdMessage(List= maxObservableSafeTime) { maxObservableSafeTime = proposedSafeTime; @@ -580,11 +580,11 @@ public MvPartitionStorage getMvStorage() { * Handler for the {@link BuildIndexCommand}. * * @param cmd Command. - * @param commandIndex RAFT index of the cmd. - * @param commandTerm RAFT term of the cmd. + * @param commandIndex RAFT index of the command. + * @param commandTerm RAFT term of the command. */ void handleBuildIndexCommand(BuildIndexCommand cmd, long commandIndex, long commandTerm) { - // Skips the write cmd because the storage has already executed it. + // Skips the write command because the storage has already executed it. if (commandIndex <= storage.lastAppliedIndex()) { return; } @@ -631,7 +631,7 @@ void handleBuildIndexCommand(BuildIndexCommand cmd, long commandIndex, long comm * @param commandTerm Command term. */ private void handlePrimaryReplicaChangeCommand(PrimaryReplicaChangeCommand cmd, long commandIndex, long commandTerm) { - // Skips the write cmd because the storage has already executed it. + // Skips the write command because the storage has already executed it. if (commandIndex <= storage.lastAppliedIndex()) { return; } @@ -653,13 +653,11 @@ private void handlePrimaryReplicaChangeCommand(PrimaryReplicaChangeCommand cmd, * @param commandTerm Command term. */ private void handleVacuumTxStatesCommand(VacuumTxStatesCommand cmd, long commandIndex, long commandTerm) { - // Skips the write cmd because the storage has already executed it. + // Skips the write command because the storage has already executed it. if (commandIndex <= storage.lastAppliedIndex()) { return; } - LOG.info("qqq vacuum cmd=" + cmd); - txStateStorage.removeAll(cmd.txIds(), commandIndex, commandTerm); } diff --git a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/PartitionReplicaListener.java b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/PartitionReplicaListener.java index 448415e1fa14..67e63c085dcf 100644 --- a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/PartitionReplicaListener.java +++ b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/PartitionReplicaListener.java @@ -4083,8 +4083,6 @@ private CompletableFuture processVacuumTxStateReplicaRequest(VacuumTxStateRep .txIds(request.transactionIds()) .build(); - LOG.info("qqq vacuum req=" + request); - return raftClient.run(cmd); } diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java index e09ad6a20626..968eb7f5c26d 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java @@ -28,10 +28,8 @@ import java.util.UUID; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.atomic.AtomicInteger; import org.apache.ignite.internal.hlc.ClockService; import org.apache.ignite.internal.hlc.HybridTimestamp; -import org.apache.ignite.internal.lang.IgniteBiTuple; import org.apache.ignite.internal.logger.IgniteLogger; import org.apache.ignite.internal.logger.Loggers; import org.apache.ignite.internal.placementdriver.PlacementDriver; @@ -83,11 +81,10 @@ public PersistentTxStateVacuumizer( * Vacuum persistent tx states. * * @param txIds Transaction ids to vacuum; map of commit partition ids to sets of tx ids. - * @return A future. + * @return A future, result is the set of successfully vacuumized txn states. */ - public CompletableFuture, Integer>> vacuumPersistentTxStates(Map> txIds) { + public CompletableFuture> vacuumPersistentTxStates(Map> txIds) { Set successful = ConcurrentHashMap.newKeySet(); - AtomicInteger unsuccessfulCount = new AtomicInteger(0); List> futures = new ArrayList<>(); HybridTimestamp now = clockService.now(); @@ -109,8 +106,6 @@ public CompletableFuture, Integer>> vacuumPersistentTxSt successful.addAll(txs); } else if (!(unwrapCause(e) instanceof PrimaryReplicaMissException)) { LOG.warn("Failed to vacuum tx states from the persistent storage.", e); - - unsuccessfulCount.incrementAndGet(); } }); } else { @@ -124,6 +119,6 @@ public CompletableFuture, Integer>> vacuumPersistentTxSt }); return allOf(futures.toArray(new CompletableFuture[0])) - .handle((unused, unusedEx) -> new IgniteBiTuple<>(successful, unsuccessfulCount.get())); + .handle((unused, unusedEx) -> successful); } } diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/TxManagerImpl.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/TxManagerImpl.java index c401eea8c395..2c84a337a460 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/TxManagerImpl.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/TxManagerImpl.java @@ -134,7 +134,7 @@ public class TxManagerImpl implements TxManager, NetworkMessageHandler { private final TransactionIdGenerator transactionIdGenerator; /** The local state storage. */ - private final VolatileTxStateMetaStorage txStateVolatileStorage; + private final VolatileTxStateMetaStorage txStateVolatileStorage = new VolatileTxStateMetaStorage(); /** Future of a read-only transaction by it {@link TxIdAndTimestamp}. */ private final ConcurrentNavigableMap> readOnlyTxFutureById = new ConcurrentSkipListMap<>( @@ -327,8 +327,6 @@ public TxManagerImpl( var writeIntentSwitchProcessor = new WriteIntentSwitchProcessor(placementDriverHelper, txMessageSender, topologyService); - txStateVolatileStorage = new VolatileTxStateMetaStorage(); - txCleanupRequestHandler = new TxCleanupRequestHandler( messagingService, lockManager, diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java index 83930288678b..2e18a32715ba 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java @@ -33,7 +33,6 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Function; -import org.apache.ignite.internal.lang.IgniteBiTuple; import org.apache.ignite.internal.logger.IgniteLogger; import org.apache.ignite.internal.logger.Loggers; import org.apache.ignite.internal.replicator.TablePartitionId; @@ -50,6 +49,8 @@ public class VolatileTxStateMetaStorage { /** The local map for tx states. */ private ConcurrentHashMap txStateMap; + private final AtomicInteger persistentStatesVaccumizedLastIteration = new AtomicInteger(); + /** * Starts the storage. */ @@ -132,7 +133,7 @@ public Collection states() { public void vacuum( long vacuumObservationTimestamp, long txnResourceTtl, - Function>, CompletableFuture, Integer>>> beforeVacuum + Function>, CompletableFuture>> persistentVacuumOp ) { LOG.info("Vacuum started [vacuumObservationTimestamp={}, txnResourceTtl={}].", vacuumObservationTimestamp, txnResourceTtl); @@ -142,7 +143,7 @@ public void vacuum( AtomicInteger skippedForFurtherProcessingUnfinishedTxnsCount = new AtomicInteger(0); Map> txIds = new HashMap<>(); - Map timestamps = new HashMap<>(); + Map cleanupCompletionTimestamps = new HashMap<>(); txStateMap.forEach((txId, meta) -> { txStateMap.computeIfPresent(txId, (txId0, meta0) -> { @@ -169,7 +170,7 @@ public void vacuum( ids.add(txId); if (cleanupCompletionTimestamp != null) { - timestamps.put(txId, cleanupCompletionTimestamp); + cleanupCompletionTimestamps.put(txId, cleanupCompletionTimestamp); } return meta0; @@ -187,38 +188,39 @@ public void vacuum( }); }); - beforeVacuum.apply(txIds) - .thenAccept(tuple -> { - Set successful = tuple.get1(); + int vacuumizedPersistentTxnStatesCount = persistentStatesVaccumizedLastIteration.getAndSet(0); + + LOG.info("Vacuum finished [vacuumObservationTimestamp={}, txnResourceTtl={}, " + + "vacuumizedTxnsCount={}, " + + "vacuumizedPersistentTxnStatesCount={}, " + + "markedAsInitiallyDetectedTxnsCount={}, " + + "alreadyMarkedTxnsCount={}, " + + "skippedForFurtherProcessingUnfinishedTxnsCount={}].", + vacuumObservationTimestamp, + txnResourceTtl, + vacuumizedTxnsCount, + vacuumizedPersistentTxnStatesCount, + markedAsInitiallyDetectedTxnsCount, + alreadyMarkedTxnsCount, + skippedForFurtherProcessingUnfinishedTxnsCount + ); - LOG.info("qqq vacuum volatile failed=" + tuple.get2() + ", txIds=" + txIds + ", successful=" + successful); + persistentVacuumOp.apply(txIds) + .thenAccept(successful -> { for (UUID txId : successful) { txStateMap.compute(txId, (k, v) -> { if (v == null) { return null; } else { - Long cleanupCompletionTs = timestamps.get(txId); + Long cleanupCompletionTs = cleanupCompletionTimestamps.get(txId); - return (cleanupCompletionTs == null || Objects.equals(cleanupCompletionTs, v.cleanupCompletionTimestamp())) - ? null - : v; + return (Objects.equals(cleanupCompletionTs, v.cleanupCompletionTimestamp())) ? null : v; } }); } - LOG.info("Vacuum finished [vacuumObservationTimestamp={}, txnResourceTtl={}, vacuumizedTxnsCount={}," - + "vacuumizedPersistentTxnStatesCount={}, " - + " markedAsInitiallyDetectedTxnsCount={}, alreadyMarkedTxnsCount={}, " - + "skippedForFurtherProcessingUnfinishedTxnsCount={}].", - vacuumObservationTimestamp, - txnResourceTtl, - vacuumizedTxnsCount, - successful.size(), - markedAsInitiallyDetectedTxnsCount, - alreadyMarkedTxnsCount, - skippedForFurtherProcessingUnfinishedTxnsCount - ); + persistentStatesVaccumizedLastIteration.addAndGet(successful.size()); }); } From 83f764b601125a15e2cb22f6c31462d5d5652dc7 Mon Sep 17 00:00:00 2001 From: denis-chudov Date: Thu, 18 Apr 2024 12:21:43 +0300 Subject: [PATCH 11/26] added retries to NodeUtils#transferPrimary --- .../ignite/internal/table/NodeUtils.java | 35 +++++++++++++------ 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/modules/runner/src/testFixtures/java/org/apache/ignite/internal/table/NodeUtils.java b/modules/runner/src/testFixtures/java/org/apache/ignite/internal/table/NodeUtils.java index 2f9596972890..3e24676b8df5 100644 --- a/modules/runner/src/testFixtures/java/org/apache/ignite/internal/table/NodeUtils.java +++ b/modules/runner/src/testFixtures/java/org/apache/ignite/internal/table/NodeUtils.java @@ -80,22 +80,25 @@ public static String transferPrimary( LOG.info("Moving the primary replica [groupId={}, currentLeaseholder={}, preferablePrimary={}].", groupId, leaseholderNode.name(), finalPreferablePrimary); - StopLeaseProlongationMessage msg = PLACEMENT_DRIVER_MESSAGES_FACTORY.stopLeaseProlongationMessage() - .groupId(groupId) - .redirectProposal(finalPreferablePrimary) - .build(); + ReplicaMeta[] newPrimaryReplica = new ReplicaMeta[1]; + boolean stopLeaseNeeded[] = { true }; - nodes.forEach( - n -> leaseholderNode.clusterService().messagingService().send(n.clusterService().topologyService().localMember(), msg) - ); + boolean success = waitForCondition(() -> { + if (stopLeaseNeeded[0]) { + stopLeaseProlongation(nodes, leaseholderNode, groupId, finalPreferablePrimary); + } - ReplicaMeta[] newPrimaryReplica = new ReplicaMeta[1];; + ReplicaMeta previousPrimary = newPrimaryReplica[0] == null ? currentLeaseholder : newPrimaryReplica[0]; - boolean success = waitForCondition(() -> { newPrimaryReplica[0] = leaseholder(node, groupId); + // If the lease is changed to not suitable one, then stopLeaseProlongation will be retried, otherwise the cycle will be stopped. + stopLeaseNeeded[0] = + !previousPrimary.getStartTime().equals(newPrimaryReplica[0].getStartTime()) // if lease changed + || !previousPrimary.getExpirationTime().equals(newPrimaryReplica[0].getExpirationTime()); // if lease prolonged + return newPrimaryReplica[0].getLeaseholder().equals(finalPreferablePrimary); - }, 10_000); + }, 30_000); if (success) { LOG.info("Primary replica moved successfully from [{}] to [{}].", currentLeaseholder.getLeaseholder(), finalPreferablePrimary); @@ -108,6 +111,18 @@ public static String transferPrimary( return finalPreferablePrimary; } + private static void stopLeaseProlongation(Collection nodes, IgniteImpl leaseholderNode, ReplicationGroupId groupId, + String preferablePrimary) { + StopLeaseProlongationMessage msg = PLACEMENT_DRIVER_MESSAGES_FACTORY.stopLeaseProlongationMessage() + .groupId(groupId) + .redirectProposal(preferablePrimary) + .build(); + + nodes.forEach( + n -> leaseholderNode.clusterService().messagingService().send(n.clusterService().topologyService().localMember(), msg) + ); + } + private static ReplicaMeta leaseholder(IgniteImpl node, ReplicationGroupId groupId) { CompletableFuture leaseholderFuture = node.placementDriver().awaitPrimaryReplica( groupId, From a1d7e205f04ce0dc32c027b503d41a932528fcbf Mon Sep 17 00:00:00 2001 From: denis-chudov Date: Thu, 18 Apr 2024 12:24:50 +0300 Subject: [PATCH 12/26] increased partitions count for test --- .../apache/ignite/internal/table/ItTxResourcesVacuumTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java index 40dc1af6ef55..3b31c9f36d96 100644 --- a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java +++ b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java @@ -111,7 +111,7 @@ public class ItTxResourcesVacuumTest extends ClusterPerTestIntegrationTest { public void setup(TestInfo testInfo) throws Exception { super.setup(testInfo); - String zoneSql = "create zone test_zone with partitions=10, replicas=" + REPLICAS + String zoneSql = "create zone test_zone with partitions=20, replicas=" + REPLICAS + ", storage_profiles='" + DEFAULT_STORAGE_PROFILE + "'"; String sql = "create table " + TABLE_NAME + " (key bigint primary key, val varchar(20)) with primary_zone='TEST_ZONE'"; From 91a69232014edd0837418b2c452c2d6ae2336351 Mon Sep 17 00:00:00 2001 From: denis-chudov Date: Thu, 18 Apr 2024 12:40:37 +0300 Subject: [PATCH 13/26] checkstyle fix --- .../java/org/apache/ignite/internal/table/NodeUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/runner/src/testFixtures/java/org/apache/ignite/internal/table/NodeUtils.java b/modules/runner/src/testFixtures/java/org/apache/ignite/internal/table/NodeUtils.java index 3e24676b8df5..989a0c9cc388 100644 --- a/modules/runner/src/testFixtures/java/org/apache/ignite/internal/table/NodeUtils.java +++ b/modules/runner/src/testFixtures/java/org/apache/ignite/internal/table/NodeUtils.java @@ -81,7 +81,7 @@ public static String transferPrimary( finalPreferablePrimary); ReplicaMeta[] newPrimaryReplica = new ReplicaMeta[1]; - boolean stopLeaseNeeded[] = { true }; + boolean[] stopLeaseNeeded = { true }; boolean success = waitForCondition(() -> { if (stopLeaseNeeded[0]) { From a5c7e88b9b270b586525ca400a6bacc7ddcfd9c0 Mon Sep 17 00:00:00 2001 From: denis-chudov Date: Wed, 24 Apr 2024 20:25:20 +0300 Subject: [PATCH 14/26] fixes after review --- modules/compute/build.gradle | 1 + modules/placement-driver/build.gradle | 1 + .../ItPrimaryReplicaChoiceTest.java | 8 +- .../placementdriver/LeaseUpdater.java | 21 ++ .../internal/table/ItDurableFinishTest.java | 4 +- .../ignite/internal/table/NodeUtils.java | 32 +++ modules/sql-engine/build.gradle | 1 + .../table/ItTransactionPrimaryChangeTest.java | 4 +- .../table/ItTransactionRecoveryTest.java | 2 +- .../table/ItTxResourcesVacuumTest.java | 257 ++++++++++++++++-- .../internal/table/RecordBinaryViewImpl.java | 4 +- .../distributed/raft/PartitionListener.java | 4 +- .../replicator/TransactionStateResolver.java | 2 +- modules/transactions/build.gradle | 7 + .../tx/impl/PersistentTxStateVacuumizer.java | 51 ++-- .../tx/impl/VolatileTxStateMetaStorage.java | 7 +- .../state/ThreadAssertingTxStateStorage.java | 8 - .../tx/storage/state/TxStateStorage.java | 11 - .../state/rocksdb/TxStateRocksDbStorage.java | 95 +++---- .../apache/ignite/internal/TestWrappers.java | 0 .../state/AbstractTxStateStorageTest.java | 24 +- .../state/test/TestTxStateStorage.java | 15 - .../tx/test}/ItTransactionTestUtils.java | 6 +- 23 files changed, 382 insertions(+), 183 deletions(-) rename modules/{runner => transactions}/src/testFixtures/java/org/apache/ignite/internal/TestWrappers.java (100%) rename modules/{table/src/integrationTest/java/org/apache/ignite/internal/table => transactions/src/testFixtures/java/org/apache/ignite/internal/tx/test}/ItTransactionTestUtils.java (96%) diff --git a/modules/compute/build.gradle b/modules/compute/build.gradle index 4e69e03b8a23..aac160922d86 100644 --- a/modules/compute/build.gradle +++ b/modules/compute/build.gradle @@ -58,6 +58,7 @@ dependencies { integrationTestImplementation project(':ignite-client') integrationTestImplementation testFixtures(project(':ignite-core')) integrationTestImplementation testFixtures(project(':ignite-runner')) + integrationTestImplementation testFixtures(project(':ignite-transactions')) } description = 'ignite-compute' diff --git a/modules/placement-driver/build.gradle b/modules/placement-driver/build.gradle index 9efc3a557f20..7626f984c5f8 100644 --- a/modules/placement-driver/build.gradle +++ b/modules/placement-driver/build.gradle @@ -66,6 +66,7 @@ dependencies { integrationTestImplementation(testFixtures(project(':ignite-distribution-zones'))) integrationTestImplementation(testFixtures(project(':ignite-runner'))) integrationTestImplementation(testFixtures(project(':ignite-replicator'))) + integrationTestImplementation(testFixtures(project(':ignite-transactions'))) testImplementation(testFixtures(project(':ignite-core'))) testImplementation(testFixtures(project(':ignite-metastorage'))) diff --git a/modules/placement-driver/src/integrationTest/java/org/apache/ignite/internal/placementdriver/ItPrimaryReplicaChoiceTest.java b/modules/placement-driver/src/integrationTest/java/org/apache/ignite/internal/placementdriver/ItPrimaryReplicaChoiceTest.java index 1bcd5210b3a0..c6523efc8254 100644 --- a/modules/placement-driver/src/integrationTest/java/org/apache/ignite/internal/placementdriver/ItPrimaryReplicaChoiceTest.java +++ b/modules/placement-driver/src/integrationTest/java/org/apache/ignite/internal/placementdriver/ItPrimaryReplicaChoiceTest.java @@ -147,7 +147,7 @@ public void testPrimaryChangeSubscription() throws Exception { return falseCompletedFuture(); }); - NodeUtils.transferPrimary(cluster.runningNodes().collect(toSet()), tblReplicationGrp, null); + NodeUtils.transferPrimary(cluster.runningNodes().collect(toSet()), tblReplicationGrp); assertTrue(waitForCondition(primaryChanged::get, 10_000)); } @@ -179,10 +179,10 @@ public void testPrimaryChangeLongHandling() throws Exception { Collection nodes = cluster.runningNodes().collect(toSet()); - NodeUtils.transferPrimary(nodes, tblReplicationGrp, null); + NodeUtils.transferPrimary(nodes, tblReplicationGrp); CompletableFuture primaryChangeTask = - IgniteTestUtils.runAsync(() -> NodeUtils.transferPrimary(nodes, tblReplicationGrp, name -> name.equals(primary))); + IgniteTestUtils.runAsync(() -> NodeUtils.transferPrimary(nodes, tblReplicationGrp, primary)); waitingForLeaderCache(tbl, primary); @@ -264,7 +264,7 @@ public void testClearingTransactionResourcesWhenPrimaryChange() throws Exception assertTrue(primaryIgnite.txManager().lockManager().locks(rwTx.id()).hasNext()); assertEquals(6, partitionStorage.pendingCursors() + hashIdxStorage.pendingCursors() + sortedIdxStorage.pendingCursors()); - NodeUtils.transferPrimary(cluster.runningNodes().collect(toSet()), tblReplicationGrp, null); + NodeUtils.transferPrimary(cluster.runningNodes().collect(toSet()), tblReplicationGrp); assertTrue(primaryIgnite.txManager().lockManager().locks(rwTx.id()).hasNext()); assertEquals(6, partitionStorage.pendingCursors() + hashIdxStorage.pendingCursors() + sortedIdxStorage.pendingCursors()); diff --git a/modules/placement-driver/src/main/java/org/apache/ignite/internal/placementdriver/LeaseUpdater.java b/modules/placement-driver/src/main/java/org/apache/ignite/internal/placementdriver/LeaseUpdater.java index 3ed175c099c7..ae690400063a 100644 --- a/modules/placement-driver/src/main/java/org/apache/ignite/internal/placementdriver/LeaseUpdater.java +++ b/modules/placement-driver/src/main/java/org/apache/ignite/internal/placementdriver/LeaseUpdater.java @@ -17,7 +17,11 @@ package org.apache.ignite.internal.placementdriver; +import static java.lang.Math.abs; +import static java.util.Comparator.comparing; import static java.util.Objects.hash; +import static java.util.stream.Collectors.toList; +import static org.apache.ignite.internal.affinity.Assignment.forPeer; import static org.apache.ignite.internal.metastorage.dsl.Conditions.notExists; import static org.apache.ignite.internal.metastorage.dsl.Conditions.or; import static org.apache.ignite.internal.metastorage.dsl.Conditions.value; @@ -286,6 +290,23 @@ private CompletableFuture denyLease(ReplicationGroupId grpId, Lease lea } return primaryCandidate; + /*if (proposedConsistentId != null && assignments.contains(forPeer(proposedConsistentId))) { + ClusterNode proposedCandidate = topologyTracker.nodeByConsistentId(proposedConsistentId); + + if (proposedCandidate != null) { + return proposedCandidate; + } + } + + List onlineNodes = assignments.stream() + .map(a -> topologyTracker.nodeByConsistentId(a.consistentId())) + .filter(Objects::nonNull) + .sorted(comparing(ClusterNode::name)) + .collect(toList()); + + int hash = abs(hash(assignments, grpId)); + + return onlineNodes.get(hash % onlineNodes.size());*/ } /** Returns {@code true} if active. */ diff --git a/modules/runner/src/integrationTest/java/org/apache/ignite/internal/table/ItDurableFinishTest.java b/modules/runner/src/integrationTest/java/org/apache/ignite/internal/table/ItDurableFinishTest.java index 02f1b08a4eb6..a36a38868542 100644 --- a/modules/runner/src/integrationTest/java/org/apache/ignite/internal/table/ItDurableFinishTest.java +++ b/modules/runner/src/integrationTest/java/org/apache/ignite/internal/table/ItDurableFinishTest.java @@ -188,7 +188,7 @@ private CompletableFuture changePrimaryOnFinish(IgniteImpl coordinatorNode logger().info("Start transferring primary."); - NodeUtils.transferPrimary(cluster.runningNodes().collect(toSet()), defaultTablePartitionId(node(0)), null); + NodeUtils.transferPrimary(cluster.runningNodes().collect(toSet()), defaultTablePartitionId(node(0))); } catch (InterruptedException e) { throw new RuntimeException(e); } finally { @@ -287,7 +287,7 @@ private CompletableFuture changePrimaryOnCleanup(IgniteImpl primaryNode) { logger().info("Start transferring primary."); - NodeUtils.transferPrimary(cluster.runningNodes().collect(toSet()), defaultTablePartitionId(node(0)), null); + NodeUtils.transferPrimary(cluster.runningNodes().collect(toSet()), defaultTablePartitionId(node(0))); } catch (InterruptedException e) { throw new RuntimeException(e); } finally { diff --git a/modules/runner/src/testFixtures/java/org/apache/ignite/internal/table/NodeUtils.java b/modules/runner/src/testFixtures/java/org/apache/ignite/internal/table/NodeUtils.java index 989a0c9cc388..6a574d9a89f9 100644 --- a/modules/runner/src/testFixtures/java/org/apache/ignite/internal/table/NodeUtils.java +++ b/modules/runner/src/testFixtures/java/org/apache/ignite/internal/table/NodeUtils.java @@ -45,6 +45,38 @@ public class NodeUtils { private static final PlacementDriverMessagesFactory PLACEMENT_DRIVER_MESSAGES_FACTORY = new PlacementDriverMessagesFactory(); + /** + * Transfers the primary rights to another node, choosing any node from the cluster except the current leaseholder. + * + * @param nodes Nodes collection. + * @param groupId Group id. + * @return New primary replica name. + * @throws InterruptedException If failed. + */ + public static String transferPrimary( + Collection nodes, + ReplicationGroupId groupId + ) throws InterruptedException { + return transferPrimary(nodes, groupId, (Predicate) null); + } + + /** + * Transfers the primary rights to another node. + * + * @param nodes Nodes collection. + * @param groupId Group id. + * @param preferablePrimary Primary replica preferable node name. + * @return New primary replica name. + * @throws InterruptedException If failed. + */ + public static String transferPrimary( + Collection nodes, + ReplicationGroupId groupId, + String preferablePrimary + ) throws InterruptedException { + return transferPrimary(nodes, groupId, s -> s.equals(preferablePrimary)); + } + /** * Transfers the primary rights to another node. * diff --git a/modules/sql-engine/build.gradle b/modules/sql-engine/build.gradle index b24dbd327dfb..c409dbd8c617 100644 --- a/modules/sql-engine/build.gradle +++ b/modules/sql-engine/build.gradle @@ -126,6 +126,7 @@ dependencies { integrationTestImplementation testFixtures(project(':ignite-core')) integrationTestImplementation testFixtures(project(':ignite-schema')) integrationTestImplementation testFixtures(project(':ignite-sql-engine')) + integrationTestImplementation testFixtures(project(':ignite-transactions')) integrationTestImplementation testFixtures(project(':ignite-table')) integrationTestImplementation testFixtures(project(':ignite-runner')) integrationTestImplementation libs.jetbrains.annotations diff --git a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionPrimaryChangeTest.java b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionPrimaryChangeTest.java index 07d5c7f17cbb..b1493dccc578 100644 --- a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionPrimaryChangeTest.java +++ b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionPrimaryChangeTest.java @@ -22,7 +22,7 @@ import static org.apache.ignite.internal.TestDefaultProfilesNames.DEFAULT_AIPERSIST_PROFILE_NAME; import static org.apache.ignite.internal.TestWrappers.unwrapIgniteTransaction; import static org.apache.ignite.internal.TestWrappers.unwrapTableImpl; -import static org.apache.ignite.internal.table.ItTransactionTestUtils.waitAndGetPrimaryReplica; +import static org.apache.ignite.internal.tx.test.ItTransactionTestUtils.waitAndGetPrimaryReplica; import static org.apache.ignite.internal.testframework.matchers.CompletableFutureMatcher.willCompleteSuccessfully; import static org.hamcrest.MatcherAssert.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -163,7 +163,7 @@ public void testFullTxConsistency() throws InterruptedException { assertThat(fullTxReplicationAttemptFuture, willCompleteSuccessfully()); // Changing the primary. - NodeUtils.transferPrimary(cluster.runningNodes().collect(toList()), tblReplicationGrp, name -> name.equals(txCrdNode.name())); + NodeUtils.transferPrimary(cluster.runningNodes().collect(toList()), tblReplicationGrp, txCrdNode.name()); // Start a regular transaction that increments the value. It should see the initially inserted value and its commit should // succeed. diff --git a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionRecoveryTest.java b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionRecoveryTest.java index 2cfa49dccb91..4d89b294ce10 100644 --- a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionRecoveryTest.java +++ b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionRecoveryTest.java @@ -21,7 +21,7 @@ import static org.apache.ignite.internal.SessionUtils.executeUpdate; import static org.apache.ignite.internal.TestWrappers.unwrapTableImpl; import static org.apache.ignite.internal.catalog.CatalogService.DEFAULT_STORAGE_PROFILE; -import static org.apache.ignite.internal.table.ItTransactionTestUtils.waitAndGetPrimaryReplica; +import static org.apache.ignite.internal.tx.test.ItTransactionTestUtils.waitAndGetPrimaryReplica; import static org.apache.ignite.internal.testframework.IgniteTestUtils.bypassingThreadAssertions; import static org.apache.ignite.internal.testframework.IgniteTestUtils.waitForCondition; import static org.apache.ignite.internal.testframework.matchers.CompletableFutureExceptionMatcher.willThrow; diff --git a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java index 3b31c9f36d96..6be31a13d80c 100644 --- a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java +++ b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java @@ -20,22 +20,24 @@ import static java.util.stream.Collectors.toSet; import static org.apache.ignite.internal.SessionUtils.executeUpdate; import static org.apache.ignite.internal.catalog.CatalogService.DEFAULT_STORAGE_PROFILE; -import static org.apache.ignite.internal.table.ItTransactionTestUtils.findTupleToBeHostedOnNode; -import static org.apache.ignite.internal.table.ItTransactionTestUtils.partitionAssignment; -import static org.apache.ignite.internal.table.ItTransactionTestUtils.partitionIdForTuple; -import static org.apache.ignite.internal.table.ItTransactionTestUtils.table; -import static org.apache.ignite.internal.table.ItTransactionTestUtils.tableId; -import static org.apache.ignite.internal.table.ItTransactionTestUtils.txId; -import static org.apache.ignite.internal.table.ItTransactionTestUtils.waitAndGetPrimaryReplica; +import static org.apache.ignite.internal.tx.TxState.FINISHING; +import static org.apache.ignite.internal.tx.impl.ResourceVacuumManager.RESOURCE_VACUUM_INTERVAL_MILLISECONDS_PROPERTY; +import static org.apache.ignite.internal.tx.test.ItTransactionTestUtils.findTupleToBeHostedOnNode; +import static org.apache.ignite.internal.tx.test.ItTransactionTestUtils.partitionAssignment; +import static org.apache.ignite.internal.tx.test.ItTransactionTestUtils.partitionIdForTuple; +import static org.apache.ignite.internal.tx.test.ItTransactionTestUtils.table; +import static org.apache.ignite.internal.tx.test.ItTransactionTestUtils.tableId; +import static org.apache.ignite.internal.tx.test.ItTransactionTestUtils.txId; +import static org.apache.ignite.internal.tx.test.ItTransactionTestUtils.waitAndGetPrimaryReplica; import static org.apache.ignite.internal.table.NodeUtils.transferPrimary; import static org.apache.ignite.internal.testframework.IgniteTestUtils.waitForCondition; import static org.apache.ignite.internal.testframework.matchers.CompletableFutureMatcher.willCompleteSuccessfully; import static org.apache.ignite.internal.tx.TxState.COMMITTED; -import static org.apache.ignite.internal.tx.impl.ResourceVacuumManager.RESOURCE_VACUUM_INTERVAL_MILLISECONDS_PROPERTY; import static org.apache.ignite.internal.util.IgniteUtils.shutdownAndAwaitTermination; import static org.hamcrest.MatcherAssert.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.Iterator; @@ -49,7 +51,6 @@ import java.util.concurrent.TimeUnit; import java.util.function.Function; import java.util.function.Predicate; -import java.util.stream.Collectors; import org.apache.ignite.InitParametersBuilder; import org.apache.ignite.internal.ClusterPerTestIntegrationTest; import org.apache.ignite.internal.app.IgniteImpl; @@ -62,10 +63,12 @@ import org.apache.ignite.internal.tx.TransactionMeta; import org.apache.ignite.internal.tx.impl.TxManagerImpl; import org.apache.ignite.internal.tx.message.TxCleanupMessage; +import org.apache.ignite.internal.tx.message.TxFinishReplicaRequest; import org.apache.ignite.internal.tx.storage.state.TxStateStorage; import org.apache.ignite.table.RecordView; import org.apache.ignite.table.Tuple; import org.apache.ignite.tx.Transaction; +import org.apache.ignite.tx.TransactionOptions; import org.jetbrains.annotations.Nullable; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -138,8 +141,7 @@ protected void customizeInitParameters(InitParametersBuilder builder) { builder.clusterConfiguration("{" + " transaction: {" - + " implicitTransactionTimeout: 30000," - + " txnResourceTtl: 2" + + " txnResourceTtl: 1" + " }," + " replication: {" + " rpcTimeout: 30000" @@ -147,11 +149,6 @@ protected void customizeInitParameters(InitParametersBuilder builder) { + "}"); } - @Override - protected int initialNodes() { - return 3; - } - /** * Returns node bootstrap config template. * @@ -162,6 +159,22 @@ protected String getNodeBootstrapConfigTemplate() { return NODE_BOOTSTRAP_CFG_TEMPLATE; } + /** + * Simple vacuum test, checking also that PENDING and FINISHING states are not removed. + * + *
    + *
  • Run a transaction;
  • + *
  • Insert a value;
  • + *
  • Wait 3 seconds;
  • + *
  • Check that the volatile PENDING state of the transaction is preserved;
  • + *
  • Block {@link TxFinishReplicaRequest};
  • + *
  • Start the tx commit;
  • + *
  • While the state is FINISHING, wait 3 seconds;
  • + *
  • Check that the volatile state of the transaction is preserved;
  • + *
  • Unblock {@link TxFinishReplicaRequest};
  • + *
  • Check that both volatile and persistent state is vacuumized.
  • + *
+ */ @Test public void testVacuum() throws InterruptedException { IgniteImpl node = anyNode(); @@ -182,16 +195,101 @@ public void testVacuum() throws InterruptedException { view.upsert(tx, tuple); - tx.commit(); + Thread.sleep(3000); + + assertTrue(checkVolatileTxStateOnNodes(nodes, txId)); + + CompletableFuture finishAllowedFuture = new CompletableFuture<>(); + + node.dropMessages((n, msg) -> { + if (msg instanceof TxFinishReplicaRequest) { + finishAllowedFuture.join(); + } + + return false; + }); + + CompletableFuture commitFut = tx.commitAsync(); + + assertEquals(FINISHING, volatileTxState(node, txId).txState()); + + Thread.sleep(3000); + + assertTrue(checkVolatileTxStateOnNodes(nodes, txId)); + + finishAllowedFuture.complete(null); + + assertThat(commitFut, willCompleteSuccessfully()); log.info("Test: Tx committed [tx={}].", txId); - checkVolatileTxStateOnNodes(nodes, txId); waitForTxStateReplication(nodes, txId, partId, 10_000); waitForTxStateVacuum(txId, partId, true, 10_000); } + /** + * Check that the ABANDONED transaction state is preserved until recovery. + * + *
    + *
  • Start a transaction from a coordinator that would be not included into commit partition group;
  • + *
  • Insert a value;
  • + *
  • Stop the tx coordinator;
  • + *
  • Wait 3 seconds;
  • + *
  • Check that the volatile state of the transaction is preserved;
  • + *
  • Try to read the value using another transaction, which starts the tx recovery;
  • + *
  • Check that the abandoned transaction is recovered; its volatile and persistent states are vacuumized;
  • + *
  • Check that abandoned tx is rolled back and thus the value is null.
  • + *
+ */ + @Test + public void testAbandonedTxnsAreNotVacuumizedUntilRecovered() throws InterruptedException { + IgniteImpl leaseholder = cluster.node(0); + + Tuple tuple = findTupleToBeHostedOnNode(leaseholder, TABLE_NAME, null, INITIAL_TUPLE, NEXT_TUPLE, true); + + int partId = partitionIdForTuple(anyNode(), TABLE_NAME, tuple, null); + + TablePartitionId groupId = new TablePartitionId(tableId(anyNode(), TABLE_NAME), partId); + + Set txNodes = partitionAssignment(anyNode(), groupId); + + IgniteImpl abandonedTxCoord = findNode(n -> !txNodes.contains(n.name())); + + RecordView view = abandonedTxCoord.tables().table(TABLE_NAME).recordView(); + + Transaction abandonedTx = abandonedTxCoord.transactions().begin(); + UUID abandonedTxId = txId(abandonedTx); + view.upsert(abandonedTx, tuple); + + stopNode(abandonedTxCoord.name()); + + Thread.sleep(3000); + + assertTrue(checkVolatileTxStateOnNodes(txNodes, abandonedTxId)); + + RecordView viewLh = leaseholder.tables().table(TABLE_NAME).recordView(); + Tuple value = viewLh.get(null, Tuple.create().set("key", tuple.longValue("key"))); + assertNull(value); + + waitForTxStateVacuum(txNodes, abandonedTxId, partId, true, 10_000); + } + + /** + * Check that the tx state on commit partition is vacuumized only when cleanup is completed. + * + *
    + *
  • Start a transaction;
  • + *
  • Generate some tuple and define on which nodes it would be hosted;
  • + *
  • Choose one more node that doesn't host the first tuple and choose a tuple that will be sent on this node as primary;
  • + *
  • Upsert both tuples within a transaction;
  • + *
  • Block {@link TxCleanupMessage}-s from commit partition primary;
  • + *
  • Start a tx commit;
  • + *
  • Wait for vacuum completion on a node that doesn't host the commit partition;
  • + *
  • Unblock {@link TxCleanupMessage}-s;
  • + *
  • Wait for the cleanup on the commit partition group.
  • + *
+ */ @Test public void testVacuumWithCleanupDelay() throws InterruptedException { IgniteImpl node = anyNode(); @@ -244,16 +342,16 @@ public void testVacuumWithCleanupDelay() throws InterruptedException { return false; }); + assertTrue(checkVolatileTxStateOnNodes(commitPartNodes, txId)); CompletableFuture commitFut = tx.commitAsync(); - checkVolatileTxStateOnNodes(commitPartNodes, txId); waitForTxStateReplication(commitPartNodes, txId, commitPartId, 10_000); assertThat(cleanupStarted, willCompleteSuccessfully()); waitForTxStateVacuum(Set.of(leaseholderForAnotherTuple.name()), txId, 0, false, 10_000); - checkPersistentTxStateOnNodes(commitPartNodes, txId, commitPartId); + assertTrue(checkPersistentTxStateOnNodes(commitPartNodes, txId, commitPartId)); cleanupAllowed.complete(null); @@ -262,6 +360,19 @@ public void testVacuumWithCleanupDelay() throws InterruptedException { waitForTxStateVacuum(txId, commitPartId, true, 10_000); } + /** + * Check that the tx state on commit partition is vacuumized only when cleanup is completed. + * + *
    + *
  • Start a transaction;
  • + *
  • Upsert a value;
  • + *
  • Block {@link TxCleanupMessage}-s;
  • + *
  • Start a tx commit;
  • + *
  • Transfer the primary replica;
  • + *
  • Unblock the {@link TxCleanupMessage}-s;
  • + *
  • Ensure that tx states are finally vacuumized.
  • + *
+ */ @Test public void testCommitPartitionPrimaryChangesBeforeVacuum() throws InterruptedException { IgniteImpl node = anyNode(); @@ -378,10 +489,24 @@ public void testVacuumPersistentStateAfterCleanupDelayAndVolatileStateVacuum() t waitForTxStateVacuum(txId, commitPartId, true, 10_000); } + /** + * Checks that the tx recovery doesn't change tx finish result from COMMITTED to ABORTED if it once saved in the persistent storage. + * + *
    + *
  • Start a transaction tx0;
  • + *
  • Upsert some value;
  • + *
  • Block {@link TxCleanupMessage}-s;
  • + *
  • Start the commit of tx0 and with for tx state COMMITTED to be replicated in persistent storage;
  • + *
  • Stop the tx0's coordinator;
  • + *
  • Wait for tx0's state vacuum;
  • + *
  • Start a transaction tx1;
  • + *
  • Try to get the data that has been committed by tx0, ensure the data is correct.
  • + *
+ */ @Test public void testRecoveryAfterPersistentStateVacuumized() throws InterruptedException { // This node isn't going to be stopped, so let it be node 0. - IgniteImpl commitPartitionLeaseholder = cluster.runningNodes().collect(Collectors.toList()).get(0); + IgniteImpl commitPartitionLeaseholder = cluster.node(0); Tuple tuple0 = findTupleToBeHostedOnNode(commitPartitionLeaseholder, TABLE_NAME, null, INITIAL_TUPLE, NEXT_TUPLE, true); @@ -454,12 +579,68 @@ public void testRecoveryAfterPersistentStateVacuumized() throws InterruptedExcep assertEquals(tuple0.longValue("key"), tx0Data.longValue("key")); assertEquals(tuple0.stringValue("val"), tx0Data.stringValue("val")); + cleanupAllowed[0] = true; + tx1.commit(); waitForTxStateVacuum(txId0, commitPartId, true, 10_000); waitForTxStateVacuum(txId0, commitPartId, true, 10_000); } + /** + * Check that RO txns read the correct data consistent with commit timestamps. + * + *
    + *
  • Start RO tx 1;
  • + *
  • Upsert (k1, v1) within RW tx 1 and commit it;
  • + *
  • Start RO tx 2;
  • + *
  • Upsert (k1, v2) within RW tx 2 and commit it;
  • + *
  • Start RO tx 3;
  • + *
  • Read the data by k1 within RO tx 1, should be null;
  • + *
  • Read the data by k1 within RO tx 2, should be v1;
  • + *
  • Read the data by k1 within RO tx 3, should be v2.
  • + *
+ */ + @Test + public void testRoReadTheCorrectDataInBetween() throws InterruptedException { + IgniteImpl node = anyNode(); + + Transaction roTx1 = node.transactions().begin(new TransactionOptions().readOnly(true)); + + Tuple t1 = Tuple.create().set("key", 1L).set("val", "val1"); + Tuple t2 = Tuple.create().set("key", 1L).set("val", "val2"); + + RecordView view = table(node, TABLE_NAME).recordView(); + + Transaction rwTx1 = node.transactions().begin(); + view.upsert(rwTx1, t1); + rwTx1.commit(); + UUID rwTxId1 = txId(rwTx1); + + Transaction roTx2 = node.transactions().begin(new TransactionOptions().readOnly(true)); + + Transaction rwTx2 = node.transactions().begin(); + view.upsert(rwTx2, t2); + rwTx2.commit(); + UUID rwTxId2 = txId(rwTx1); + + Transaction roTx3 = node.transactions().begin(new TransactionOptions().readOnly(true)); + + waitForTxStateVacuum(rwTxId1, partitionIdForTuple(node, TABLE_NAME, t1, rwTx1), true, 10_000); + waitForTxStateVacuum(rwTxId2, partitionIdForTuple(node, TABLE_NAME, t2, rwTx2), true, 10_000); + + Tuple keyRec = Tuple.create().set("key", 1L); + + Tuple r1 = view.get(roTx1, keyRec); + assertNull(r1); + + Tuple r2 = view.get(roTx2, keyRec); + assertEquals(t1.stringValue("val"), r2.stringValue("val")); + + Tuple r3 = view.get(roTx3, keyRec); + assertEquals(t2.stringValue("val"), r3.stringValue("val")); + } + private boolean checkVolatileTxStateOnNodes(Set nodeConsistentIds, UUID txId) { return cluster.runningNodes() .filter(n -> nodeConsistentIds.contains(n.name())) @@ -472,15 +653,40 @@ private boolean checkPersistentTxStateOnNodes(Set nodeConsistentIds, UUI .allMatch(n -> persistentTxState(n, txId, partId) != null); } + /** + * Waits for persistent tx state to be replicated on the given nodes. + * + * @param nodeConsistentIds Node names. + * @param txId Transaction id. + * @param partId Commit partition id. + * @param timeMs Time to wait. + */ private void waitForTxStateReplication(Set nodeConsistentIds, UUID txId, int partId, long timeMs) throws InterruptedException { assertTrue(waitForCondition(() -> checkPersistentTxStateOnNodes(nodeConsistentIds, txId, partId), timeMs)); } + /** + * Waits for vacuum of volatile (and if needed, persistent) state of the given tx on all nodes of the cluster. + * + * @param txId Transaction id. + * @param partId Commit partition id to check the persistent tx state storage of this partition. + * @param checkPersistent Whether to wait for vacuum of persistent tx state as well. + * @param timeMs Time to wait. + */ private void waitForTxStateVacuum(UUID txId, int partId, boolean checkPersistent, long timeMs) throws InterruptedException { waitForTxStateVacuum(cluster.runningNodes().map(IgniteImpl::name).collect(toSet()), txId, partId, checkPersistent, timeMs); } + /** + * Waits for vacuum of volatile (and if needed, persistent) state of the given tx on the given nodes. + * + * @param nodeConsistentIds Node names. + * @param txId Transaction id. + * @param partId Commit partition id to check the persistent tx state storage of this partition. + * @param checkPersistent Whether to wait for vacuum of persistent tx state as well. + * @param timeMs Time to wait. + */ private void waitForTxStateVacuum(Set nodeConsistentIds, UUID txId, int partId, boolean checkPersistent, long timeMs) throws InterruptedException { boolean r = waitForCondition(() -> { @@ -547,13 +753,4 @@ private IgniteImpl findNode(Predicate filter) { .findFirst() .get(); } - - private Transaction startTx(IgniteImpl coordinator) { - Transaction tx = coordinator.transactions().begin(); - UUID txId = txId(tx); - - log.info("Test: Transaction 0 [tx={}].", txId); - - return tx; - } } diff --git a/modules/table/src/main/java/org/apache/ignite/internal/table/RecordBinaryViewImpl.java b/modules/table/src/main/java/org/apache/ignite/internal/table/RecordBinaryViewImpl.java index 7eb1d6d80f70..7ca6b96dc89f 100644 --- a/modules/table/src/main/java/org/apache/ignite/internal/table/RecordBinaryViewImpl.java +++ b/modules/table/src/main/java/org/apache/ignite/internal/table/RecordBinaryViewImpl.java @@ -49,6 +49,7 @@ import org.apache.ignite.tx.Transaction; import org.jetbrains.annotations.Nullable; import org.jetbrains.annotations.TestOnly; +import org.jetbrains.annotations.VisibleForTesting; /** * Table view implementation for binary objects. @@ -412,7 +413,8 @@ private static Row marshal(Tuple tuple, TupleMarshaller marshaller, boolean keyO * @return A future, with row as a result. */ @TestOnly - public CompletableFuture marshal(@Nullable Transaction tx, Tuple rec) { + @VisibleForTesting + public CompletableFuture tupleToBinaryRow(@Nullable Transaction tx, Tuple rec) { Objects.requireNonNull(rec); return doOperation(tx, schemaVersion -> { diff --git a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java index a65ae1d546f3..56f1813a78b2 100644 --- a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java +++ b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java @@ -658,7 +658,9 @@ private void handleVacuumTxStatesCommand(VacuumTxStatesCommand cmd, long command return; } - txStateStorage.removeAll(cmd.txIds(), commandIndex, commandTerm); + for (UUID txId : cmd.txIds()) { + txStateStorage.remove(txId, commandIndex, commandTerm); + } } private static void onTxStateStorageCasFail(UUID txId, TxMeta txMetaBeforeCas, TxMeta txMetaToSet) { diff --git a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/TransactionStateResolver.java b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/TransactionStateResolver.java index 4e2806251340..b1ebe8e87266 100644 --- a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/TransactionStateResolver.java +++ b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/TransactionStateResolver.java @@ -194,7 +194,7 @@ private void resolveDistributiveTxState( assert localMeta.txState() == ABANDONED : "Unexpected transaction state [txId=" + txId + ", txStateMeta=" + localMeta + ']'; // Still try to resolve the state from commit partition. - resolveTxStateFromCommitPartition(txId, commitGrpId, txMetaFuture); + resolveTxStateFromCommitPartition(txId, commitGrpId, txMetaFuture); } } diff --git a/modules/transactions/build.gradle b/modules/transactions/build.gradle index 371e5e0ea131..ac8c8276dd21 100644 --- a/modules/transactions/build.gradle +++ b/modules/transactions/build.gradle @@ -55,12 +55,19 @@ dependencies { integrationTestImplementation project(':ignite-api') integrationTestImplementation(testFixtures(project(':ignite-core'))) + integrationTestImplementation(testFixtures(project(':ignite-transactions'))) integrationTestImplementation(testFixtures(project(':ignite-runner'))) testFixturesImplementation project(':ignite-configuration') testFixturesImplementation project(':ignite-core') testFixturesImplementation project(':ignite-api') testFixturesImplementation project(':ignite-schema') + testFixturesImplementation project(':ignite-runner') + testFixturesImplementation project(':ignite-affinity') + testFixturesImplementation project(':ignite-metastorage-api') + testFixturesImplementation project(':ignite-placement-driver-api') + testFixturesImplementation project(':ignite-distribution-zones') + testFixturesImplementation project(':ignite-table') testFixturesImplementation(testFixtures(project(':ignite-core'))) testFixturesImplementation libs.jetbrains.annotations testFixturesImplementation libs.mockito.junit diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java index 968eb7f5c26d..521d391248e8 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java @@ -33,7 +33,6 @@ import org.apache.ignite.internal.logger.IgniteLogger; import org.apache.ignite.internal.logger.Loggers; import org.apache.ignite.internal.placementdriver.PlacementDriver; -import org.apache.ignite.internal.placementdriver.ReplicaMeta; import org.apache.ignite.internal.replicator.ReplicaService; import org.apache.ignite.internal.replicator.TablePartitionId; import org.apache.ignite.internal.replicator.exception.PrimaryReplicaMissException; @@ -89,33 +88,39 @@ public CompletableFuture> vacuumPersistentTxStates(Map { - ReplicaMeta replicaMeta = placementDriver.getPrimaryReplica(commitPartitionId, now).join(); - - if (replicaMeta != null) { - VacuumTxStateReplicaRequest request = TX_MESSAGES_FACTORY.vacuumTxStateReplicaRequest() - .enlistmentConsistencyToken(replicaMeta.getStartTime().longValue()) - .groupId(commitPartitionId) - .transactionIds(txs) - .build(); - - CompletableFuture future; - - if (localNode.id().equals(replicaMeta.getLeaseholderId())) { - future = replicaService.invoke(localNode, request).whenComplete((v, e) -> { - if (e == null) { + CompletableFuture future = placementDriver.getPrimaryReplica(commitPartitionId, now) + .thenCompose(replicaMeta -> { + // If the primary replica is absent this means that another replica would become primary and + // the volatile state (as well as cleanup completion timestamp) would be updated there, and then + // this operation would be called from there. + // Also, we are going to send the vacuum request only to the local node. + if (replicaMeta != null && localNode.id().equals(replicaMeta.getLeaseholderId())) { + VacuumTxStateReplicaRequest request = TX_MESSAGES_FACTORY.vacuumTxStateReplicaRequest() + .enlistmentConsistencyToken(replicaMeta.getStartTime().longValue()) + .groupId(commitPartitionId) + .transactionIds(txs) + .build(); + + return replicaService.invoke(localNode, request).whenComplete((v, e) -> { + if (e == null) { + successful.addAll(txs); + // We can log the exceptions without further handling because failed requests' txns are not added + // to the set of successful and will be retried. PrimaryReplicaMissException can be considered as + // a part of regular flow and doesn't need to be logged. + } else if (unwrapCause(e) instanceof PrimaryReplicaMissException) { + LOG.debug("Failed to vacuum tx states from the persistent storage.", e); + } else { + LOG.warn("Failed to vacuum tx states from the persistent storage.", e); + } + }); + } else { successful.addAll(txs); - } else if (!(unwrapCause(e) instanceof PrimaryReplicaMissException)) { - LOG.warn("Failed to vacuum tx states from the persistent storage.", e); + + return nullCompletedFuture(); } }); - } else { - successful.addAll(txs); - - future = nullCompletedFuture(); - } futures.add(future); - } }); return allOf(futures.toArray(new CompletableFuture[0])) diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java index 2e18a32715ba..d7c4182e413c 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java @@ -161,7 +161,7 @@ public void vacuum( cleanupCompletionTimestamp, txnResourceTtl, vacuumObservationTimestamp); if (shouldBeVacuumized) { - if (meta0.commitPartitionId() == null) { + if (cleanupCompletionTimestamp == null) { vacuumizedTxnsCount.incrementAndGet(); return null; @@ -169,9 +169,7 @@ public void vacuum( Set ids = txIds.computeIfAbsent(meta0.commitPartitionId(), k -> new HashSet<>()); ids.add(txId); - if (cleanupCompletionTimestamp != null) { - cleanupCompletionTimestamps.put(txId, cleanupCompletionTimestamp); - } + cleanupCompletionTimestamps.put(txId, cleanupCompletionTimestamp); return meta0; } @@ -207,7 +205,6 @@ public void vacuum( persistentVacuumOp.apply(txIds) .thenAccept(successful -> { - for (UUID txId : successful) { txStateMap.compute(txId, (k, v) -> { if (v == null) { diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/ThreadAssertingTxStateStorage.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/ThreadAssertingTxStateStorage.java index a51b3d0f7469..029482626501 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/ThreadAssertingTxStateStorage.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/ThreadAssertingTxStateStorage.java @@ -22,7 +22,6 @@ import static org.apache.ignite.internal.worker.ThreadAssertions.assertThreadAllowsToRead; import static org.apache.ignite.internal.worker.ThreadAssertions.assertThreadAllowsToWrite; -import java.util.Collection; import java.util.UUID; import java.util.concurrent.CompletableFuture; import org.apache.ignite.internal.lang.IgniteBiTuple; @@ -74,13 +73,6 @@ public void remove(UUID txId, long commandIndex, long commandTerm) { storage.remove(txId, commandIndex, commandTerm); } - @Override - public void removeAll(Collection txIds, long commandIndex, long commandTerm) { - assertThreadAllowsToWrite(); - - storage.removeAll(txIds, commandIndex, commandTerm); - } - @Override public Cursor> scan() { assertThreadAllowsTo(TX_STATE_STORAGE_ACCESS); diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/TxStateStorage.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/TxStateStorage.java index b9f0579ed020..81c42d754d3c 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/TxStateStorage.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/TxStateStorage.java @@ -17,7 +17,6 @@ package org.apache.ignite.internal.tx.storage.state; -import java.util.Collection; import java.util.UUID; import java.util.concurrent.CompletableFuture; import org.apache.ignite.internal.close.ManuallyCloseable; @@ -81,16 +80,6 @@ public interface TxStateStorage extends ManuallyCloseable { */ void remove(UUID txId, long commandIndex, long commandTerm); - /** - * Removes all the given transaction metas from the storage. - * - * @param txIds Tx ids. - * @param commandIndex New value for {@link #lastAppliedIndex()}. - * @param commandTerm New value for {@link #lastAppliedTerm()}. - * @throws IgniteInternalException with {@link Transactions#TX_STATE_STORAGE_ERR} error code in case when the operation has failed. - */ - void removeAll(Collection txIds, long commandIndex, long commandTerm); - /** * Creates a cursor to scan all data in the storage. * diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/rocksdb/TxStateRocksDbStorage.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/rocksdb/TxStateRocksDbStorage.java index f97761d81d17..53439cc0bdcd 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/rocksdb/TxStateRocksDbStorage.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/storage/state/rocksdb/TxStateRocksDbStorage.java @@ -30,7 +30,6 @@ import static org.apache.ignite.lang.ErrorGroups.Transactions.TX_STATE_STORAGE_STOPPED_ERR; import java.nio.ByteBuffer; -import java.util.Collection; import java.util.Objects; import java.util.Set; import java.util.UUID; @@ -170,66 +169,62 @@ public void put(UUID txId, TxMeta txMeta) { @Override public boolean compareAndSet(UUID txId, @Nullable TxState txStateExpected, TxMeta txMeta, long commandIndex, long commandTerm) { - return updateData(writeBatch -> { - byte[] txIdBytes = txIdToKey(txId); + return busy(() -> { + try (WriteBatch writeBatch = new WriteBatch()) { + byte[] txIdBytes = txIdToKey(txId); - byte[] txMetaExistingBytes = sharedStorage.db().get(sharedStorage.readOptions, txIdToKey(txId)); + byte[] txMetaExistingBytes = sharedStorage.db().get(sharedStorage.readOptions, txIdToKey(txId)); - boolean result; + boolean result; - if (txMetaExistingBytes == null && txStateExpected == null) { - writeBatch.put(txIdBytes, toBytes(txMeta)); + if (txMetaExistingBytes == null && txStateExpected == null) { + writeBatch.put(txIdBytes, toBytes(txMeta)); - result = true; - } else { - if (txMetaExistingBytes != null) { - TxMeta txMetaExisting = fromBytes(txMetaExistingBytes); + result = true; + } else { + if (txMetaExistingBytes != null) { + TxMeta txMetaExisting = fromBytes(txMetaExistingBytes); - if (txMetaExisting.txState() == txStateExpected) { - writeBatch.put(txIdBytes, toBytes(txMeta)); + if (txMetaExisting.txState() == txStateExpected) { + writeBatch.put(txIdBytes, toBytes(txMeta)); - result = true; + result = true; + } else { + result = txMetaExisting.txState() == txMeta.txState() + && Objects.equals(txMetaExisting.commitTimestamp(), txMeta.commitTimestamp()); + } } else { - result = txMetaExisting.txState() == txMeta.txState() - && Objects.equals(txMetaExisting.commitTimestamp(), txMeta.commitTimestamp()); + result = false; } - } else { - result = false; } - } - return result; - }, commandIndex, commandTerm); - } - - @Override - public void remove(UUID txId, long commandIndex, long commandTerm) { - updateData(writeBatch -> { - throwExceptionIfStorageInProgressOfRebalance(); + // If the store is in the process of rebalancing, then there is no need to update lastAppliedIndex and lastAppliedTerm. + // This is necessary to prevent a situation where, in the middle of the rebalance, the node will be restarted and we will + // have non-consistent storage. They will be updated by either #abortRebalance() or #finishRebalance(long, long). + if (state.get() != StorageState.REBALANCE) { + updateLastApplied(writeBatch, commandIndex, commandTerm); + } - writeBatch.delete(txIdToKey(txId)); + sharedStorage.db().write(sharedStorage.writeOptions, writeBatch); - return null; - }, commandIndex, commandTerm); + return result; + } catch (RocksDBException e) { + throw new IgniteInternalException( + TX_STATE_STORAGE_ERR, + format("Failed perform CAS operation over a value in storage: [{}]", createStorageInfo()), + e + ); + } + }); } @Override - public void removeAll(Collection txIds, long commandIndex, long commandTerm) { - updateData(writeBatch -> { - throwExceptionIfStorageInProgressOfRebalance(); + public void remove(UUID txId, long commandIndex, long commandTerm) { + busy(() -> { + try (WriteBatch writeBatch = new WriteBatch()) { + throwExceptionIfStorageInProgressOfRebalance(); - for (UUID txId : txIds) { writeBatch.delete(txIdToKey(txId)); - } - - return null; - }, commandIndex, commandTerm); - } - - private T updateData(WriteClosure writeClosure, long commandIndex, long commandTerm) { - return (T) busy(() -> { - try (WriteBatch writeBatch = new WriteBatch()) { - Object result = writeClosure.apply(writeBatch); // If the store is in the process of rebalancing, then there is no need to update lastAppliedIndex and lastAppliedTerm. // This is necessary to prevent a situation where, in the middle of the rebalance, the node will be restarted and we will @@ -240,11 +235,11 @@ private T updateData(WriteClosure writeClosure, long commandIndex, long comm sharedStorage.db().write(sharedStorage.writeOptions, writeBatch); - return result; + return null; } catch (RocksDBException e) { throw new IgniteInternalException( TX_STATE_STORAGE_ERR, - format("Failed to update data in the storage: [{}]", createStorageInfo()), + format("Failed to remove a value from storage: [{}]", createStorageInfo()), e ); } @@ -660,12 +655,4 @@ private enum StorageState { /** Storage is in the process of cleanup. */ CLEANUP } - - /** - * Write closure. - */ - @FunctionalInterface - private interface WriteClosure { - T apply(WriteBatch writeBatch) throws RocksDBException; - } } diff --git a/modules/runner/src/testFixtures/java/org/apache/ignite/internal/TestWrappers.java b/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/TestWrappers.java similarity index 100% rename from modules/runner/src/testFixtures/java/org/apache/ignite/internal/TestWrappers.java rename to modules/transactions/src/testFixtures/java/org/apache/ignite/internal/TestWrappers.java diff --git a/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/storage/state/AbstractTxStateStorageTest.java b/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/storage/state/AbstractTxStateStorageTest.java index d67301928bbe..0d6357b4fbbd 100644 --- a/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/storage/state/AbstractTxStateStorageTest.java +++ b/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/storage/state/AbstractTxStateStorageTest.java @@ -39,12 +39,9 @@ import java.util.ArrayList; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.UUID; -import java.util.function.BiConsumer; import java.util.stream.IntStream; import org.apache.ignite.internal.hlc.HybridTimestamp; import org.apache.ignite.internal.lang.IgniteBiTuple; @@ -86,21 +83,6 @@ protected void afterTest() throws Exception { @Test public void testPutGetRemove() { - testPutGetRemove0((storage, txIds) -> { - int index = 0; - - for (UUID txId : txIds) { - storage.remove(txId, index++, 1); - } - }); - } - - @Test - public void testPutGetRemoveAll() { - testPutGetRemove0((storage, txIds) -> storage.removeAll(txIds, 1, 1)); - } - - private void testPutGetRemove0(BiConsumer> removeOp) { TxStateStorage storage = tableStorage.getOrCreateTxStateStorage(0); List txIds = new ArrayList<>(); @@ -119,16 +101,12 @@ private void testPutGetRemove0(BiConsumer> removeOp) { assertEquals(txMetaExpected, txMeta); } - Set toRemove = new HashSet<>(); - for (int i = 0; i < 100; i++) { if (i % 2 == 0) { - toRemove.add(txIds.get(i)); + storage.remove(txIds.get(i), i, 1); } } - removeOp.accept(storage, toRemove); - for (int i = 0; i < 100; i++) { if (i % 2 == 0) { TxMeta txMeta = storage.get(txIds.get(i)); diff --git a/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/storage/state/test/TestTxStateStorage.java b/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/storage/state/test/TestTxStateStorage.java index e2c8050b637a..047dd1c8a49b 100644 --- a/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/storage/state/test/TestTxStateStorage.java +++ b/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/storage/state/test/TestTxStateStorage.java @@ -22,7 +22,6 @@ import static org.apache.ignite.lang.ErrorGroups.Transactions.TX_STATE_STORAGE_REBALANCE_ERR; import static org.apache.ignite.lang.ErrorGroups.Transactions.TX_STATE_STORAGE_STOPPED_ERR; -import java.util.Collection; import java.util.Iterator; import java.util.Objects; import java.util.UUID; @@ -111,20 +110,6 @@ public void remove(UUID txId, long commandIndex, long commandTerm) { } } - @Override - public void removeAll(Collection txIds, long commandIndex, long commandTerm) { - checkStorageClosedOrInProgressOfRebalance(); - - for (UUID txId : txIds) { - storage.remove(txId); - } - - if (rebalanceFutureReference.get() == null) { - lastAppliedIndex = commandIndex; - lastAppliedTerm = commandTerm; - } - } - @Override public Cursor> scan() { checkStorageClosedOrInProgressOfRebalance(); diff --git a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionTestUtils.java b/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/test/ItTransactionTestUtils.java similarity index 96% rename from modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionTestUtils.java rename to modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/test/ItTransactionTestUtils.java index e285971b5958..df34596c96ab 100644 --- a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionTestUtils.java +++ b/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/test/ItTransactionTestUtils.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.ignite.internal.table; +package org.apache.ignite.internal.tx.test; import static java.util.Objects.requireNonNull; import static java.util.concurrent.TimeUnit.SECONDS; @@ -43,6 +43,8 @@ import org.apache.ignite.internal.replicator.ReplicationGroupId; import org.apache.ignite.internal.replicator.TablePartitionId; import org.apache.ignite.internal.schema.BinaryRowEx; +import org.apache.ignite.internal.table.RecordBinaryViewImpl; +import org.apache.ignite.internal.table.TableImpl; import org.apache.ignite.internal.tx.impl.ReadWriteTransactionImpl; import org.apache.ignite.table.Tuple; import org.apache.ignite.tx.Transaction; @@ -92,7 +94,7 @@ public static int partitionIdForTuple(IgniteImpl node, String tableName, Tuple t TableImpl table = table(node, tableName); RecordBinaryViewImpl view = unwrapRecordBinaryViewImpl(table.recordView()); - CompletableFuture rowFut = view.marshal(tx, tuple); + CompletableFuture rowFut = view.tupleToBinaryRow(tx, tuple); assertThat(rowFut, willCompleteSuccessfully()); BinaryRowEx row = rowFut.join(); From 2f932de5094a283daf96e46ecdfc50c2597b8900 Mon Sep 17 00:00:00 2001 From: denis-chudov Date: Thu, 25 Apr 2024 10:45:55 +0300 Subject: [PATCH 15/26] more fixes, comments and javadocs --- .../table/ItTxResourcesVacuumTest.java | 92 ++++++++++++++----- .../internal/tx/impl/TxManagerImpl.java | 8 +- .../tx/impl/VolatileTxStateMetaStorage.java | 46 +++++----- 3 files changed, 96 insertions(+), 50 deletions(-) diff --git a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java index 6be31a13d80c..232132a37756 100644 --- a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java +++ b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java @@ -20,6 +20,7 @@ import static java.util.stream.Collectors.toSet; import static org.apache.ignite.internal.SessionUtils.executeUpdate; import static org.apache.ignite.internal.catalog.CatalogService.DEFAULT_STORAGE_PROFILE; +import static org.apache.ignite.internal.testframework.IgniteTestUtils.runAsync; import static org.apache.ignite.internal.tx.TxState.FINISHING; import static org.apache.ignite.internal.tx.impl.ResourceVacuumManager.RESOURCE_VACUUM_INTERVAL_MILLISECONDS_PROPERTY; import static org.apache.ignite.internal.tx.test.ItTransactionTestUtils.findTupleToBeHostedOnNode; @@ -172,7 +173,8 @@ protected String getNodeBootstrapConfigTemplate() { *
  • While the state is FINISHING, wait 3 seconds;
  • *
  • Check that the volatile state of the transaction is preserved;
  • *
  • Unblock {@link TxFinishReplicaRequest};
  • - *
  • Check that both volatile and persistent state is vacuumized.
  • + *
  • Check that both volatile and persistent state is vacuumized;
  • + *
  • Check that the committed value is correct.
  • * */ @Test @@ -195,26 +197,32 @@ public void testVacuum() throws InterruptedException { view.upsert(tx, tuple); + // Check that the volatile PENDING state of the transaction is preserved. Thread.sleep(3000); - assertTrue(checkVolatileTxStateOnNodes(nodes, txId)); + CompletableFuture finishStartedFuture = new CompletableFuture<>(); CompletableFuture finishAllowedFuture = new CompletableFuture<>(); node.dropMessages((n, msg) -> { if (msg instanceof TxFinishReplicaRequest) { + finishStartedFuture.complete(null); + finishAllowedFuture.join(); } return false; }); - CompletableFuture commitFut = tx.commitAsync(); + CompletableFuture commitFut = runAsync(tx::commit); - assertEquals(FINISHING, volatileTxState(node, txId).txState()); + assertThat(finishStartedFuture, willCompleteSuccessfully()); + // While the state is FINISHING, wait 3 seconds. + assertEquals(FINISHING, volatileTxState(node, txId).txState()); Thread.sleep(3000); + // Check that the volatile state of the transaction is preserved. assertTrue(checkVolatileTxStateOnNodes(nodes, txId)); finishAllowedFuture.complete(null); @@ -225,7 +233,12 @@ public void testVacuum() throws InterruptedException { waitForTxStateReplication(nodes, txId, partId, 10_000); + // Check that both volatile and persistent state is vacuumized.. waitForTxStateVacuum(txId, partId, true, 10_000); + + // Trying to read the value. + Tuple data = view.get(null, Tuple.create().set("key", tuple.longValue("key"))); + assertEquals(tuple, data); } /** @@ -238,8 +251,8 @@ public void testVacuum() throws InterruptedException { *
  • Wait 3 seconds;
  • *
  • Check that the volatile state of the transaction is preserved;
  • *
  • Try to read the value using another transaction, which starts the tx recovery;
  • - *
  • Check that the abandoned transaction is recovered; its volatile and persistent states are vacuumized;
  • - *
  • Check that abandoned tx is rolled back and thus the value is null.
  • + *
  • Check that abandoned tx is rolled back and thus the value is null;
  • + *
  • Check that the abandoned transaction is recovered; its volatile and persistent states are vacuumized.
  • * */ @Test @@ -266,12 +279,16 @@ public void testAbandonedTxnsAreNotVacuumizedUntilRecovered() throws Interrupted Thread.sleep(3000); + // Check that the volatile state of the transaction is preserved. assertTrue(checkVolatileTxStateOnNodes(txNodes, abandonedTxId)); + // Try to read the value using another transaction, which starts the tx recovery. RecordView viewLh = leaseholder.tables().table(TABLE_NAME).recordView(); Tuple value = viewLh.get(null, Tuple.create().set("key", tuple.longValue("key"))); + // Check that abandoned tx is rolled back and thus the value is null. assertNull(value); + // Check that the abandoned transaction is recovered; its volatile and persistent states are vacuumized. waitForTxStateVacuum(txNodes, abandonedTxId, partId, true, 10_000); } @@ -287,7 +304,7 @@ public void testAbandonedTxnsAreNotVacuumizedUntilRecovered() throws Interrupted *
  • Start a tx commit;
  • *
  • Wait for vacuum completion on a node that doesn't host the commit partition;
  • *
  • Unblock {@link TxCleanupMessage}-s;
  • - *
  • Wait for the cleanup on the commit partition group.
  • + *
  • Wait for the tx state vacuum on the commit partition group.
  • * */ @Test @@ -302,6 +319,7 @@ public void testVacuumWithCleanupDelay() throws InterruptedException { log.info("Test: Loading the data [tx={}].", txId); + // Generate some tuple and define on which nodes it would be hosted. Tuple tuple0 = findTupleToBeHostedOnNode(node, TABLE_NAME, tx, INITIAL_TUPLE, NEXT_TUPLE, true); int commitPartId = partitionIdForTuple(node, TABLE_NAME, tuple0, tx); @@ -322,6 +340,7 @@ public void testVacuumWithCleanupDelay() throws InterruptedException { Tuple tuple1 = findTupleToBeHostedOnNode(leaseholderForAnotherTuple, TABLE_NAME, tx, INITIAL_TUPLE, NEXT_TUPLE, true); + // Upsert both tuples within a transaction. view.upsert(tx, tuple0); view.upsert(tx, tuple1); @@ -342,22 +361,29 @@ public void testVacuumWithCleanupDelay() throws InterruptedException { return false; }); - assertTrue(checkVolatileTxStateOnNodes(commitPartNodes, txId)); CompletableFuture commitFut = tx.commitAsync(); waitForTxStateReplication(commitPartNodes, txId, commitPartId, 10_000); assertThat(cleanupStarted, willCompleteSuccessfully()); + // Wait for vacuum completion on a node that doesn't host the commit partition. waitForTxStateVacuum(Set.of(leaseholderForAnotherTuple.name()), txId, 0, false, 10_000); - assertTrue(checkPersistentTxStateOnNodes(commitPartNodes, txId, commitPartId)); - + // Unblocking cleanup. cleanupAllowed.complete(null); assertThat(commitFut, willCompleteSuccessfully()); + // Wait for the cleanup on the commit partition group. waitForTxStateVacuum(txId, commitPartId, true, 10_000); + + // Trying to read the values. + Tuple data0 = view.get(null, Tuple.create().set("key", tuple0.longValue("key"))); + assertEquals(tuple0, data0); + + Tuple data1 = view.get(null, Tuple.create().set("key", tuple1.longValue("key"))); + assertEquals(tuple1, data1); } /** @@ -409,8 +435,6 @@ public void testCommitPartitionPrimaryChangesBeforeVacuum() throws InterruptedEx cleanupStarted.complete(null); cleanupAllowedFut.join(); - - return true; } return false; @@ -428,9 +452,29 @@ public void testCommitPartitionPrimaryChangesBeforeVacuum() throws InterruptedEx assertThat(commitFut, willCompleteSuccessfully()); + log.info("Test: tx committed."); + waitForTxStateVacuum(txId, commitPartId, true, 10_000); + + // Trying to read the value. + Tuple data = view.get(null, Tuple.create().set("key", tuple.longValue("key"))); + assertEquals(tuple, data); } + /** + * Check that the tx state on commit partition is vacuumized only when cleanup is completed. + * + *
      + *
    • Start a transaction;
    • + *
    • Upsert a tuple;
    • + *
    • Block {@link TxCleanupMessage}-s from commit partition primary;
    • + *
    • Start a tx commit;
    • + *
    • Wait for tx cleanup to start;
    • + *
    • Wait for volatile tx state vacuum;
    • + *
    • Unblock {@link TxCleanupMessage}-s;
    • + *
    • Wait for the tx state vacuum on the commit partition group.
    • + *
    + */ @Test public void testVacuumPersistentStateAfterCleanupDelayAndVolatileStateVacuum() throws InterruptedException { IgniteImpl node = anyNode(); @@ -478,7 +522,10 @@ public void testVacuumPersistentStateAfterCleanupDelayAndVolatileStateVacuum() t assertThat(cleanupStarted, willCompleteSuccessfully()); - transferPrimary(cluster.runningNodes().collect(toSet()), commitPartGrpId, commitPartNodes::contains); + // Wait for volatile tx state vacuum. This is possible because tx finish is complete. + waitForTxStateVacuum(txId, commitPartId, false, 10_000); + + log.info("Test: volatile state vacuumized"); cleanupAllowedFut.complete(null); @@ -487,6 +534,10 @@ public void testVacuumPersistentStateAfterCleanupDelayAndVolatileStateVacuum() t assertThat(commitFut, willCompleteSuccessfully()); waitForTxStateVacuum(txId, commitPartId, true, 10_000); + + // Trying to read the data. + Tuple data = view.get(null, Tuple.create().set("key", tuple.longValue("key"))); + assertEquals(tuple, data); } /** @@ -499,7 +550,6 @@ public void testVacuumPersistentStateAfterCleanupDelayAndVolatileStateVacuum() t *
  • Start the commit of tx0 and with for tx state COMMITTED to be replicated in persistent storage;
  • *
  • Stop the tx0's coordinator;
  • *
  • Wait for tx0's state vacuum;
  • - *
  • Start a transaction tx1;
  • *
  • Try to get the data that has been committed by tx0, ensure the data is correct.
  • * */ @@ -568,21 +618,12 @@ public void testRecoveryAfterPersistentStateVacuumized() throws InterruptedExcep RecordView view1 = coord1.tables().table(TABLE_NAME).recordView(); - Transaction tx1 = coord1.transactions().begin(); - UUID txId1 = txId(tx1); - - log.info("Test: Transaction 1 [tx={}].", txId1); - - // Tx 1 should get the data committed by tx 0. + // Another tx should get the data committed by tx 0. Tuple keyTuple = Tuple.create().set("key", tuple0.longValue("key")); - Tuple tx0Data = view1.get(tx1, keyTuple); + Tuple tx0Data = view1.get(null, keyTuple); assertEquals(tuple0.longValue("key"), tx0Data.longValue("key")); assertEquals(tuple0.stringValue("val"), tx0Data.stringValue("val")); - cleanupAllowed[0] = true; - - tx1.commit(); - waitForTxStateVacuum(txId0, commitPartId, true, 10_000); waitForTxStateVacuum(txId0, commitPartId, true, 10_000); } @@ -596,6 +637,7 @@ public void testRecoveryAfterPersistentStateVacuumized() throws InterruptedExcep *
  • Start RO tx 2;
  • *
  • Upsert (k1, v2) within RW tx 2 and commit it;
  • *
  • Start RO tx 3;
  • + *
  • Wait for vacuum of the states of RW tx 1 and RW tx 2;
  • *
  • Read the data by k1 within RO tx 1, should be null;
  • *
  • Read the data by k1 within RO tx 2, should be v1;
  • *
  • Read the data by k1 within RO tx 3, should be v2.
  • diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/TxManagerImpl.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/TxManagerImpl.java index 2c84a337a460..0e6fed4e68ad 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/TxManagerImpl.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/TxManagerImpl.java @@ -640,7 +640,9 @@ private CompletableFuture durableFinish( result.transactionState(), old == null ? null : old.txCoordinatorId(), commitPartition, - result.commitTimestamp() + result.commitTimestamp(), + old == null ? null : old.initialVacuumObservationTimestamp(), + old == null ? null : old.cleanupCompletionTimestamp() ) ); @@ -704,7 +706,9 @@ private CompletableFuture makeFinishRequest( txResult.transactionState(), localNodeId, old == null ? null : old.commitPartitionId(), - txResult.commitTimestamp() + txResult.commitTimestamp(), + old == null ? null : old.initialVacuumObservationTimestamp(), + old == null ? null : old.cleanupCompletionTimestamp() )); assert isFinalState(updatedMeta.txState()) : diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java index d7c4182e413c..c03c04a88bc2 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java @@ -49,8 +49,6 @@ public class VolatileTxStateMetaStorage { /** The local map for tx states. */ private ConcurrentHashMap txStateMap; - private final AtomicInteger persistentStatesVaccumizedLastIteration = new AtomicInteger(); - /** * Starts the storage. */ @@ -129,6 +127,8 @@ public Collection states() { * * @param vacuumObservationTimestamp Timestamp of the vacuum attempt. * @param txnResourceTtl Transactional resource time to live in milliseconds. + * @param persistentVacuumOp Persistent vacuum operation. Accepts the map of commit partition ids to set of + * tx ids, returns a future with set of successfully vacuumized tx ids. */ public void vacuum( long vacuumObservationTimestamp, @@ -161,7 +161,7 @@ public void vacuum( cleanupCompletionTimestamp, txnResourceTtl, vacuumObservationTimestamp); if (shouldBeVacuumized) { - if (cleanupCompletionTimestamp == null) { + if (meta0.commitPartitionId() == null) { vacuumizedTxnsCount.incrementAndGet(); return null; @@ -169,7 +169,9 @@ public void vacuum( Set ids = txIds.computeIfAbsent(meta0.commitPartitionId(), k -> new HashSet<>()); ids.add(txId); - cleanupCompletionTimestamps.put(txId, cleanupCompletionTimestamp); + if (cleanupCompletionTimestamp != null) { + cleanupCompletionTimestamps.put(txId, cleanupCompletionTimestamp); + } return meta0; } @@ -186,23 +188,6 @@ public void vacuum( }); }); - int vacuumizedPersistentTxnStatesCount = persistentStatesVaccumizedLastIteration.getAndSet(0); - - LOG.info("Vacuum finished [vacuumObservationTimestamp={}, txnResourceTtl={}, " - + "vacuumizedTxnsCount={}, " - + "vacuumizedPersistentTxnStatesCount={}, " - + "markedAsInitiallyDetectedTxnsCount={}, " - + "alreadyMarkedTxnsCount={}, " - + "skippedForFurtherProcessingUnfinishedTxnsCount={}].", - vacuumObservationTimestamp, - txnResourceTtl, - vacuumizedTxnsCount, - vacuumizedPersistentTxnStatesCount, - markedAsInitiallyDetectedTxnsCount, - alreadyMarkedTxnsCount, - skippedForFurtherProcessingUnfinishedTxnsCount - ); - persistentVacuumOp.apply(txIds) .thenAccept(successful -> { for (UUID txId : successful) { @@ -217,7 +202,21 @@ public void vacuum( }); } - persistentStatesVaccumizedLastIteration.addAndGet(successful.size()); + LOG.info("Vacuum finished [vacuumObservationTimestamp={}, " + + "txnResourceTtl={}, " + + "vacuumizedTxnsCount={}, " + + "vacuumizedPersistentTxnStatesCount={}, " + + "markedAsInitiallyDetectedTxnsCount={}, " + + "alreadyMarkedTxnsCount={}, " + + "skippedForFurtherProcessingUnfinishedTxnsCount={}].", + vacuumObservationTimestamp, + txnResourceTtl, + vacuumizedTxnsCount, + successful.size(), + markedAsInitiallyDetectedTxnsCount, + alreadyMarkedTxnsCount, + skippedForFurtherProcessingUnfinishedTxnsCount + ); }); } @@ -227,7 +226,8 @@ private static TxStateMeta markInitialVacuumObservationTimestamp(TxStateMeta met meta.txCoordinatorId(), meta.commitPartitionId(), meta.commitTimestamp(), - vacuumObservationTimestamp + vacuumObservationTimestamp, + meta.cleanupCompletionTimestamp() ); } From f57a011b1324939eb28506cd846d5a9272d4c212 Mon Sep 17 00:00:00 2001 From: denis-chudov Date: Thu, 25 Apr 2024 11:13:23 +0300 Subject: [PATCH 16/26] fixed compilation --- modules/index/build.gradle | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/index/build.gradle b/modules/index/build.gradle index 000e9bbea147..3cd9b5943176 100644 --- a/modules/index/build.gradle +++ b/modules/index/build.gradle @@ -76,6 +76,7 @@ dependencies { integrationTestImplementation testFixtures(project(':ignite-sql-engine')) integrationTestImplementation testFixtures(project(':ignite-table')) integrationTestImplementation testFixtures(project(':ignite-storage-api')) + integrationTestImplementation testFixtures(project(':ignite-transactions')) integrationTestImplementation libs.jetbrains.annotations } From 5c0169281718c59a233e60de4e2bd90092ad49c2 Mon Sep 17 00:00:00 2001 From: denis-chudov Date: Thu, 25 Apr 2024 11:15:49 +0300 Subject: [PATCH 17/26] code style --- .../apache/ignite/internal/placementdriver/LeaseUpdater.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/modules/placement-driver/src/main/java/org/apache/ignite/internal/placementdriver/LeaseUpdater.java b/modules/placement-driver/src/main/java/org/apache/ignite/internal/placementdriver/LeaseUpdater.java index ae690400063a..0c353e8b062f 100644 --- a/modules/placement-driver/src/main/java/org/apache/ignite/internal/placementdriver/LeaseUpdater.java +++ b/modules/placement-driver/src/main/java/org/apache/ignite/internal/placementdriver/LeaseUpdater.java @@ -17,11 +17,7 @@ package org.apache.ignite.internal.placementdriver; -import static java.lang.Math.abs; -import static java.util.Comparator.comparing; import static java.util.Objects.hash; -import static java.util.stream.Collectors.toList; -import static org.apache.ignite.internal.affinity.Assignment.forPeer; import static org.apache.ignite.internal.metastorage.dsl.Conditions.notExists; import static org.apache.ignite.internal.metastorage.dsl.Conditions.or; import static org.apache.ignite.internal.metastorage.dsl.Conditions.value; From 5107b911c382494f1f8cae4390daf1429e98e3fd Mon Sep 17 00:00:00 2001 From: denis-chudov Date: Thu, 25 Apr 2024 11:18:42 +0300 Subject: [PATCH 18/26] code style --- .../internal/placementdriver/LeaseUpdater.java | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/modules/placement-driver/src/main/java/org/apache/ignite/internal/placementdriver/LeaseUpdater.java b/modules/placement-driver/src/main/java/org/apache/ignite/internal/placementdriver/LeaseUpdater.java index 0c353e8b062f..3ed175c099c7 100644 --- a/modules/placement-driver/src/main/java/org/apache/ignite/internal/placementdriver/LeaseUpdater.java +++ b/modules/placement-driver/src/main/java/org/apache/ignite/internal/placementdriver/LeaseUpdater.java @@ -286,23 +286,6 @@ private CompletableFuture denyLease(ReplicationGroupId grpId, Lease lea } return primaryCandidate; - /*if (proposedConsistentId != null && assignments.contains(forPeer(proposedConsistentId))) { - ClusterNode proposedCandidate = topologyTracker.nodeByConsistentId(proposedConsistentId); - - if (proposedCandidate != null) { - return proposedCandidate; - } - } - - List onlineNodes = assignments.stream() - .map(a -> topologyTracker.nodeByConsistentId(a.consistentId())) - .filter(Objects::nonNull) - .sorted(comparing(ClusterNode::name)) - .collect(toList()); - - int hash = abs(hash(assignments, grpId)); - - return onlineNodes.get(hash % onlineNodes.size());*/ } /** Returns {@code true} if active. */ From 517092eebce6ed15f5149ff85ddf540dbf0036e4 Mon Sep 17 00:00:00 2001 From: denis-chudov Date: Thu, 25 Apr 2024 11:34:56 +0300 Subject: [PATCH 19/26] code style* --- .../internal/table/ItTransactionPrimaryChangeTest.java | 2 +- .../internal/table/ItTransactionRecoveryTest.java | 2 +- .../ignite/internal/table/ItTxResourcesVacuumTest.java | 10 +++++----- .../internal/tx/impl/PersistentTxStateVacuumizer.java | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionPrimaryChangeTest.java b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionPrimaryChangeTest.java index b1493dccc578..08b79a07acb5 100644 --- a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionPrimaryChangeTest.java +++ b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionPrimaryChangeTest.java @@ -22,8 +22,8 @@ import static org.apache.ignite.internal.TestDefaultProfilesNames.DEFAULT_AIPERSIST_PROFILE_NAME; import static org.apache.ignite.internal.TestWrappers.unwrapIgniteTransaction; import static org.apache.ignite.internal.TestWrappers.unwrapTableImpl; -import static org.apache.ignite.internal.tx.test.ItTransactionTestUtils.waitAndGetPrimaryReplica; import static org.apache.ignite.internal.testframework.matchers.CompletableFutureMatcher.willCompleteSuccessfully; +import static org.apache.ignite.internal.tx.test.ItTransactionTestUtils.waitAndGetPrimaryReplica; import static org.hamcrest.MatcherAssert.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; diff --git a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionRecoveryTest.java b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionRecoveryTest.java index 4d89b294ce10..330a3bcaf191 100644 --- a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionRecoveryTest.java +++ b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTransactionRecoveryTest.java @@ -21,12 +21,12 @@ import static org.apache.ignite.internal.SessionUtils.executeUpdate; import static org.apache.ignite.internal.TestWrappers.unwrapTableImpl; import static org.apache.ignite.internal.catalog.CatalogService.DEFAULT_STORAGE_PROFILE; -import static org.apache.ignite.internal.tx.test.ItTransactionTestUtils.waitAndGetPrimaryReplica; import static org.apache.ignite.internal.testframework.IgniteTestUtils.bypassingThreadAssertions; import static org.apache.ignite.internal.testframework.IgniteTestUtils.waitForCondition; import static org.apache.ignite.internal.testframework.matchers.CompletableFutureExceptionMatcher.willThrow; import static org.apache.ignite.internal.testframework.matchers.CompletableFutureMatcher.willCompleteSuccessfully; import static org.apache.ignite.internal.tx.impl.ResourceVacuumManager.RESOURCE_VACUUM_INTERVAL_MILLISECONDS_PROPERTY; +import static org.apache.ignite.internal.tx.test.ItTransactionTestUtils.waitAndGetPrimaryReplica; import static org.apache.ignite.internal.util.ExceptionUtils.extractCodeFrom; import static org.hamcrest.MatcherAssert.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; diff --git a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java index 232132a37756..64a9a367716e 100644 --- a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java +++ b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java @@ -20,7 +20,11 @@ import static java.util.stream.Collectors.toSet; import static org.apache.ignite.internal.SessionUtils.executeUpdate; import static org.apache.ignite.internal.catalog.CatalogService.DEFAULT_STORAGE_PROFILE; +import static org.apache.ignite.internal.table.NodeUtils.transferPrimary; import static org.apache.ignite.internal.testframework.IgniteTestUtils.runAsync; +import static org.apache.ignite.internal.testframework.IgniteTestUtils.waitForCondition; +import static org.apache.ignite.internal.testframework.matchers.CompletableFutureMatcher.willCompleteSuccessfully; +import static org.apache.ignite.internal.tx.TxState.COMMITTED; import static org.apache.ignite.internal.tx.TxState.FINISHING; import static org.apache.ignite.internal.tx.impl.ResourceVacuumManager.RESOURCE_VACUUM_INTERVAL_MILLISECONDS_PROPERTY; import static org.apache.ignite.internal.tx.test.ItTransactionTestUtils.findTupleToBeHostedOnNode; @@ -30,10 +34,6 @@ import static org.apache.ignite.internal.tx.test.ItTransactionTestUtils.tableId; import static org.apache.ignite.internal.tx.test.ItTransactionTestUtils.txId; import static org.apache.ignite.internal.tx.test.ItTransactionTestUtils.waitAndGetPrimaryReplica; -import static org.apache.ignite.internal.table.NodeUtils.transferPrimary; -import static org.apache.ignite.internal.testframework.IgniteTestUtils.waitForCondition; -import static org.apache.ignite.internal.testframework.matchers.CompletableFutureMatcher.willCompleteSuccessfully; -import static org.apache.ignite.internal.tx.TxState.COMMITTED; import static org.apache.ignite.internal.util.IgniteUtils.shutdownAndAwaitTermination; import static org.hamcrest.MatcherAssert.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -392,7 +392,7 @@ public void testVacuumWithCleanupDelay() throws InterruptedException { *
      *
    • Start a transaction;
    • *
    • Upsert a value;
    • - *
    • Block {@link TxCleanupMessage}-s;
    • + *
    • Block {@link TxCleanupMessage}-s;
    • *
    • Start a tx commit;
    • *
    • Transfer the primary replica;
    • *
    • Unblock the {@link TxCleanupMessage}-s;
    • diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java index 521d391248e8..9ccc35b5b528 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java @@ -120,7 +120,7 @@ public CompletableFuture> vacuumPersistentTxStates(Map Date: Thu, 25 Apr 2024 11:44:39 +0300 Subject: [PATCH 20/26] code style** --- .../table/distributed/replicator/TransactionStateResolver.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/TransactionStateResolver.java b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/TransactionStateResolver.java index b1ebe8e87266..4e2806251340 100644 --- a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/TransactionStateResolver.java +++ b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/TransactionStateResolver.java @@ -194,7 +194,7 @@ private void resolveDistributiveTxState( assert localMeta.txState() == ABANDONED : "Unexpected transaction state [txId=" + txId + ", txStateMeta=" + localMeta + ']'; // Still try to resolve the state from commit partition. - resolveTxStateFromCommitPartition(txId, commitGrpId, txMetaFuture); + resolveTxStateFromCommitPartition(txId, commitGrpId, txMetaFuture); } } From 012eacb3d9d709638f0a6a275ec03d21cde3890e Mon Sep 17 00:00:00 2001 From: denis-chudov Date: Fri, 26 Apr 2024 19:26:06 +0300 Subject: [PATCH 21/26] some more fixes --- .../ignite/client/fakes/FakeTxManager.java | 4 +- .../table/ItTxResourcesVacuumTest.java | 361 ++++++++++++++---- .../distributed/raft/PartitionListener.java | 2 + .../replicator/PartitionReplicaListener.java | 18 +- .../apache/ignite/internal/tx/TxManager.java | 8 +- .../tx/impl/PersistentTxStateVacuumizer.java | 6 +- .../internal/tx/impl/TxManagerImpl.java | 6 +- .../tx/impl/VolatileTxStateMetaStorage.java | 23 +- 8 files changed, 340 insertions(+), 88 deletions(-) diff --git a/modules/client/src/test/java/org/apache/ignite/client/fakes/FakeTxManager.java b/modules/client/src/test/java/org/apache/ignite/client/fakes/FakeTxManager.java index 24c05f1dc4d5..99c89617a8c8 100644 --- a/modules/client/src/test/java/org/apache/ignite/client/fakes/FakeTxManager.java +++ b/modules/client/src/test/java/org/apache/ignite/client/fakes/FakeTxManager.java @@ -212,8 +212,8 @@ public CompletableFuture cleanup(String node, UUID txId) { } @Override - public void vacuum() { - // No-op. + public CompletableFuture vacuum() { + return nullCompletedFuture(); } @Override diff --git a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java index 64a9a367716e..2dcaca529b51 100644 --- a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java +++ b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java @@ -62,6 +62,7 @@ import org.apache.ignite.internal.thread.IgniteThreadFactory; import org.apache.ignite.internal.thread.ThreadOperation; import org.apache.ignite.internal.tx.TransactionMeta; +import org.apache.ignite.internal.tx.configuration.TransactionConfiguration; import org.apache.ignite.internal.tx.impl.TxManagerImpl; import org.apache.ignite.internal.tx.message.TxCleanupMessage; import org.apache.ignite.internal.tx.message.TxFinishReplicaRequest; @@ -142,7 +143,7 @@ protected void customizeInitParameters(InitParametersBuilder builder) { builder.clusterConfiguration("{" + " transaction: {" - + " txnResourceTtl: 1" + + " txnResourceTtl: 0" + " }," + " replication: {" + " rpcTimeout: 30000" @@ -161,16 +162,18 @@ protected String getNodeBootstrapConfigTemplate() { } /** - * Simple vacuum test, checking also that PENDING and FINISHING states are not removed. + * Simple TTL-triggered vacuum test, checking also that PENDING and FINISHING states are not removed. * *
        *
      • Run a transaction;
      • - *
      • Insert a value;
      • - *
      • Wait 3 seconds;
      • + *
      • Run a parallel transaction;
      • + *
      • Insert values within both transactions;
      • + *
      • Commit the parallel transaction and wait for vacuum of its state;
      • + *
      • Run another parallel transaction;
      • *
      • Check that the volatile PENDING state of the transaction is preserved;
      • - *
      • Block {@link TxFinishReplicaRequest};
      • + *
      • Block {@link TxFinishReplicaRequest} for the pending transaction;
      • *
      • Start the tx commit;
      • - *
      • While the state is FINISHING, wait 3 seconds;
      • + *
      • While the state is FINISHING, commit the parallel transaction and wait for vacuum of its state;
      • *
      • Check that the volatile state of the transaction is preserved;
      • *
      • Unblock {@link TxFinishReplicaRequest};
      • *
      • Check that both volatile and persistent state is vacuumized;
      • @@ -179,48 +182,68 @@ protected String getNodeBootstrapConfigTemplate() { */ @Test public void testVacuum() throws InterruptedException { + // We should test the TTL-triggered vacuum. + setTxResourceTtl(1); + IgniteImpl node = anyNode(); RecordView view = node.tables().table(TABLE_NAME).recordView(); // Put some value into the table. Transaction tx = node.transactions().begin(); + Transaction parallelTx1 = node.transactions().begin(); UUID txId = txId(tx); + UUID parallelTx1Id = txId(parallelTx1); log.info("Test: Loading the data [tx={}].", txId); Tuple tuple = findTupleToBeHostedOnNode(node, TABLE_NAME, tx, INITIAL_TUPLE, NEXT_TUPLE, true); + Tuple tupleForParallelTx = findTupleToBeHostedOnNode(node, TABLE_NAME, tx, NEXT_TUPLE.apply(tuple), NEXT_TUPLE, true); + int partIdForParallelTx = partitionIdForTuple(anyNode(), TABLE_NAME, tupleForParallelTx, parallelTx1); int partId = partitionIdForTuple(node, TABLE_NAME, tuple, tx); Set nodes = partitionAssignment(node, new TablePartitionId(tableId(node, TABLE_NAME), partId)); view.upsert(tx, tuple); + view.upsert(parallelTx1, tupleForParallelTx); // Check that the volatile PENDING state of the transaction is preserved. - Thread.sleep(3000); + parallelTx1.commit(); + waitForTxStateVacuum(nodes, parallelTx1Id, partIdForParallelTx, true, 10_000); assertTrue(checkVolatileTxStateOnNodes(nodes, txId)); + Transaction parallelTx2 = node.transactions().begin(); + UUID parallelTx2Id = txId(parallelTx2); + view.upsert(parallelTx2, tupleForParallelTx); + CompletableFuture finishStartedFuture = new CompletableFuture<>(); CompletableFuture finishAllowedFuture = new CompletableFuture<>(); node.dropMessages((n, msg) -> { if (msg instanceof TxFinishReplicaRequest) { - finishStartedFuture.complete(null); + TxFinishReplicaRequest finishRequest = (TxFinishReplicaRequest) msg; - finishAllowedFuture.join(); + if (finishRequest.txId().equals(txId)) { + finishStartedFuture.complete(null); + + finishAllowedFuture.join(); + } } return false; }); + Transaction roTxBefore = beginReadOnlyTx(anyNode()); + CompletableFuture commitFut = runAsync(tx::commit); assertThat(finishStartedFuture, willCompleteSuccessfully()); // While the state is FINISHING, wait 3 seconds. assertEquals(FINISHING, volatileTxState(node, txId).txState()); - Thread.sleep(3000); + parallelTx2.commit(); + waitForTxStateVacuum(nodes, parallelTx2Id, partId, true, 10_000); // Check that the volatile state of the transaction is preserved. assertTrue(checkVolatileTxStateOnNodes(nodes, txId)); @@ -231,14 +254,17 @@ public void testVacuum() throws InterruptedException { log.info("Test: Tx committed [tx={}].", txId); + Transaction roTxAfter = beginReadOnlyTx(anyNode()); + waitForTxStateReplication(nodes, txId, partId, 10_000); // Check that both volatile and persistent state is vacuumized.. waitForTxStateVacuum(txId, partId, true, 10_000); // Trying to read the value. - Tuple data = view.get(null, Tuple.create().set("key", tuple.longValue("key"))); - assertEquals(tuple, data); + Tuple keyRec = Tuple.create().set("key", tuple.longValue("key")); + checkValueReadOnly(view, roTxBefore, keyRec, null); + checkValueReadOnly(view, roTxAfter, keyRec, tuple); } /** @@ -246,9 +272,12 @@ public void testVacuum() throws InterruptedException { * *
          *
        • Start a transaction from a coordinator that would be not included into commit partition group;
        • - *
        • Insert a value;
        • + *
        • Start a parallel transaction;
        • + *
        • Find a tuple for parallel tx that would be hosted on the same partition as a tuple for the abandoned tx;
        • + *
        • Insert values within both transactions;
        • + *
        • Commit the parallel transaction;
        • *
        • Stop the tx coordinator;
        • - *
        • Wait 3 seconds;
        • + *
        • Wait for tx state of parallel tx to be vacuumized;
        • *
        • Check that the volatile state of the transaction is preserved;
        • *
        • Try to read the value using another transaction, which starts the tx recovery;
        • *
        • Check that abandoned tx is rolled back and thus the value is null;
        • @@ -257,6 +286,8 @@ public void testVacuum() throws InterruptedException { */ @Test public void testAbandonedTxnsAreNotVacuumizedUntilRecovered() throws InterruptedException { + setTxResourceTtl(1); + IgniteImpl leaseholder = cluster.node(0); Tuple tuple = findTupleToBeHostedOnNode(leaseholder, TABLE_NAME, null, INITIAL_TUPLE, NEXT_TUPLE, true); @@ -273,11 +304,27 @@ public void testAbandonedTxnsAreNotVacuumizedUntilRecovered() throws Interrupted Transaction abandonedTx = abandonedTxCoord.transactions().begin(); UUID abandonedTxId = txId(abandonedTx); + Transaction parallelTx = abandonedTxCoord.transactions().begin(); + UUID parallelTxId = txId(parallelTx); + + // Find a tuple hosted on the same partition. + Tuple tupleForParallelTx = tuple; + int partIdForParallelTx = -1; + while (partIdForParallelTx != partId) { + tupleForParallelTx = findTupleToBeHostedOnNode(leaseholder, TABLE_NAME, null, NEXT_TUPLE.apply(tupleForParallelTx), NEXT_TUPLE, + true); + + partIdForParallelTx = partitionIdForTuple(anyNode(), TABLE_NAME, tupleForParallelTx, parallelTx); + } + view.upsert(abandonedTx, tuple); + view.upsert(parallelTx, tupleForParallelTx); + + parallelTx.commit(); stopNode(abandonedTxCoord.name()); - Thread.sleep(3000); + waitForTxStateVacuum(txNodes, parallelTxId, partIdForParallelTx, true, 10_000); // Check that the volatile state of the transaction is preserved. assertTrue(checkVolatileTxStateOnNodes(txNodes, abandonedTxId)); @@ -289,6 +336,7 @@ public void testAbandonedTxnsAreNotVacuumizedUntilRecovered() throws Interrupted assertNull(value); // Check that the abandoned transaction is recovered; its volatile and persistent states are vacuumized. + // Wait for it, because we don't have the recovery completion future. waitForTxStateVacuum(txNodes, abandonedTxId, partId, true, 10_000); } @@ -361,29 +409,35 @@ public void testVacuumWithCleanupDelay() throws InterruptedException { return false; }); + Transaction roTxBefore = beginReadOnlyTx(anyNode()); + CompletableFuture commitFut = tx.commitAsync(); waitForTxStateReplication(commitPartNodes, txId, commitPartId, 10_000); assertThat(cleanupStarted, willCompleteSuccessfully()); - // Wait for vacuum completion on a node that doesn't host the commit partition. - waitForTxStateVacuum(Set.of(leaseholderForAnotherTuple.name()), txId, 0, false, 10_000); + // Check the vacuum result on a node that doesn't host the commit partition. + triggerVacuum(); + assertTxStateVacuumized(Set.of(leaseholderForAnotherTuple.name()), txId, commitPartId, false); // Unblocking cleanup. cleanupAllowed.complete(null); assertThat(commitFut, willCompleteSuccessfully()); - // Wait for the cleanup on the commit partition group. - waitForTxStateVacuum(txId, commitPartId, true, 10_000); + Transaction roTxAfter = beginReadOnlyTx(anyNode()); - // Trying to read the values. - Tuple data0 = view.get(null, Tuple.create().set("key", tuple0.longValue("key"))); - assertEquals(tuple0, data0); + triggerVacuum(); + assertTxStateVacuumized(txId, commitPartId, true); - Tuple data1 = view.get(null, Tuple.create().set("key", tuple1.longValue("key"))); - assertEquals(tuple1, data1); + // Trying to read the values. + Tuple key0 = Tuple.create().set("key", tuple0.longValue("key")); + Tuple key1 = Tuple.create().set("key", tuple1.longValue("key")); + checkValueReadOnly(view, roTxBefore, key0, null); + checkValueReadOnly(view, roTxAfter, key0, tuple0); + checkValueReadOnly(view, roTxBefore, key1, null); + checkValueReadOnly(view, roTxAfter, key1, tuple1); } /** @@ -401,6 +455,11 @@ public void testVacuumWithCleanupDelay() throws InterruptedException { */ @Test public void testCommitPartitionPrimaryChangesBeforeVacuum() throws InterruptedException { + // We can't leave TTL as 0 here, because the primary replica is changed during cleanup, and this means + // WriteIntentSwitchReplicaRequest will be processed not on the primary. Removing tx state instantly will cause incorrect + // tx recovery and write intent switch with tx state as ABORTED. + setTxResourceTtl(1); + IgniteImpl node = anyNode(); RecordView view = node.tables().table(TABLE_NAME).recordView(); @@ -440,6 +499,8 @@ public void testCommitPartitionPrimaryChangesBeforeVacuum() throws InterruptedEx return false; }); + Transaction roTxBefore = beginReadOnlyTx(anyNode()); + CompletableFuture commitFut = tx.commitAsync(); assertThat(cleanupStarted, willCompleteSuccessfully()); @@ -456,9 +517,14 @@ public void testCommitPartitionPrimaryChangesBeforeVacuum() throws InterruptedEx waitForTxStateVacuum(txId, commitPartId, true, 10_000); + Transaction roTxAfter = beginReadOnlyTx(anyNode()); + + log.info("Test: checking values."); + // Trying to read the value. - Tuple data = view.get(null, Tuple.create().set("key", tuple.longValue("key"))); - assertEquals(tuple, data); + Tuple key = Tuple.create().set("key", tuple.longValue("key")); + checkValueReadOnly(view, roTxBefore, key, null); + checkValueReadOnly(view, roTxAfter, key, tuple); } /** @@ -518,12 +584,15 @@ public void testVacuumPersistentStateAfterCleanupDelayAndVolatileStateVacuum() t return false; }); + Transaction roTxBefore = beginReadOnlyTx(anyNode()); + CompletableFuture commitFut = tx.commitAsync(); assertThat(cleanupStarted, willCompleteSuccessfully()); // Wait for volatile tx state vacuum. This is possible because tx finish is complete. - waitForTxStateVacuum(txId, commitPartId, false, 10_000); + triggerVacuum(); + assertTxStateVacuumized(txId, commitPartId, false); log.info("Test: volatile state vacuumized"); @@ -533,11 +602,15 @@ public void testVacuumPersistentStateAfterCleanupDelayAndVolatileStateVacuum() t assertThat(commitFut, willCompleteSuccessfully()); - waitForTxStateVacuum(txId, commitPartId, true, 10_000); + Transaction roTxAfter = beginReadOnlyTx(anyNode()); + + triggerVacuum(); + assertTxStateVacuumized(txId, commitPartId, true); // Trying to read the data. - Tuple data = view.get(null, Tuple.create().set("key", tuple.longValue("key"))); - assertEquals(tuple, data); + Tuple key = Tuple.create().set("key", tuple.longValue("key")); + checkValueReadOnly(view, roTxBefore, key, null); + checkValueReadOnly(view, roTxAfter, key, tuple); } /** @@ -582,13 +655,12 @@ public void testRecoveryAfterPersistentStateVacuumized() throws InterruptedExcep view0.upsert(tx0, tuple0); CompletableFuture cleanupStarted = new CompletableFuture<>(); - boolean[] cleanupAllowed = new boolean[1]; commitPartitionLeaseholder.dropMessages((n, msg) -> { if (msg instanceof TxCleanupMessage) { cleanupStarted.complete(null); - return cleanupAllowed[0]; + return false; } return false; @@ -624,7 +696,7 @@ public void testRecoveryAfterPersistentStateVacuumized() throws InterruptedExcep assertEquals(tuple0.longValue("key"), tx0Data.longValue("key")); assertEquals(tuple0.stringValue("val"), tx0Data.stringValue("val")); - waitForTxStateVacuum(txId0, commitPartId, true, 10_000); + // Waiting for vacuum, because there is no recovery future here. waitForTxStateVacuum(txId0, commitPartId, true, 10_000); } @@ -632,6 +704,7 @@ public void testRecoveryAfterPersistentStateVacuumized() throws InterruptedExcep * Check that RO txns read the correct data consistent with commit timestamps. * *
            + *
          • For this test, create another zone and table with number of replicas that is equal to number of nodes;
          • *
          • Start RO tx 1;
          • *
          • Upsert (k1, v1) within RW tx 1 and commit it;
          • *
          • Start RO tx 2;
          • @@ -644,43 +717,95 @@ public void testRecoveryAfterPersistentStateVacuumized() throws InterruptedExcep *
          */ @Test - public void testRoReadTheCorrectDataInBetween() throws InterruptedException { + public void testRoReadTheCorrectDataInBetween() { + setTxResourceTtl(0); + IgniteImpl node = anyNode(); - Transaction roTx1 = node.transactions().begin(new TransactionOptions().readOnly(true)); + String tableName = TABLE_NAME + "_1"; + + // For this test, create another zone and table with number of replicas that is equal to number of nodes. + String zoneSql = "create zone test_zone_1 with partitions=20, replicas=" + initialNodes() + + ", storage_profiles='" + DEFAULT_STORAGE_PROFILE + "'"; + String sql = "create table " + tableName + " (key bigint primary key, val varchar(20)) with primary_zone='TEST_ZONE_1'"; + + cluster.doInSession(0, session -> { + executeUpdate(zoneSql, session); + executeUpdate(sql, session); + }); + + Transaction roTx1 = beginReadOnlyTx(node); Tuple t1 = Tuple.create().set("key", 1L).set("val", "val1"); Tuple t2 = Tuple.create().set("key", 1L).set("val", "val2"); - RecordView view = table(node, TABLE_NAME).recordView(); + RecordView view = table(node, tableName).recordView(); Transaction rwTx1 = node.transactions().begin(); view.upsert(rwTx1, t1); rwTx1.commit(); UUID rwTxId1 = txId(rwTx1); - Transaction roTx2 = node.transactions().begin(new TransactionOptions().readOnly(true)); + Transaction roTx2 = beginReadOnlyTx(node); Transaction rwTx2 = node.transactions().begin(); view.upsert(rwTx2, t2); rwTx2.commit(); UUID rwTxId2 = txId(rwTx1); - Transaction roTx3 = node.transactions().begin(new TransactionOptions().readOnly(true)); + Transaction roTx3 = beginReadOnlyTx(node); - waitForTxStateVacuum(rwTxId1, partitionIdForTuple(node, TABLE_NAME, t1, rwTx1), true, 10_000); - waitForTxStateVacuum(rwTxId2, partitionIdForTuple(node, TABLE_NAME, t2, rwTx2), true, 10_000); + triggerVacuum(); + + assertTxStateVacuumized(rwTxId1, tableName, partitionIdForTuple(node, tableName, t1, rwTx1), true); + assertTxStateVacuumized(rwTxId2, tableName, partitionIdForTuple(node, tableName, t1, rwTx2), true); Tuple keyRec = Tuple.create().set("key", 1L); - Tuple r1 = view.get(roTx1, keyRec); - assertNull(r1); + checkValueReadOnly(view, roTx1, keyRec, null); + checkValueReadOnly(view, roTx2, keyRec, t1); + checkValueReadOnly(view, roTx3, keyRec, t2); + } + + private static Transaction beginReadOnlyTx(IgniteImpl node) { + return node.transactions().begin(new TransactionOptions().readOnly(true)); + } + + /** + * Check value using given read only tx. + * + * @param view Record view. + * @param readOnlyTx RO tx. + * @param keyTuple Key tuple. + * @param expected Expected tuple. + */ + private static void checkValueReadOnly(RecordView view, Transaction readOnlyTx, Tuple keyTuple, @Nullable Tuple expected) { + Tuple actual = view.get(readOnlyTx, keyTuple); + + if (expected == null) { + assertNull(actual); + } else { + assertEquals(expected.stringValue("val"), actual.stringValue("val")); + } + } - Tuple r2 = view.get(roTx2, keyRec); - assertEquals(t1.stringValue("val"), r2.stringValue("val")); + private void setTxResourceTtl(long ttl) { + CompletableFuture changeFuture = anyNode().clusterConfiguration().change(c -> + c.changeRoot(TransactionConfiguration.KEY).changeTxnResourceTtl(ttl)); - Tuple r3 = view.get(roTx3, keyRec); - assertEquals(t2.stringValue("val"), r3.stringValue("val")); + assertThat(changeFuture, willCompleteSuccessfully()); + } + + /** + * To use it, set tx resource TTL should be set to {@code 0}, see {@link #setTxResourceTtl(long)}. + */ + private void triggerVacuum() { + runningNodes().forEach(node -> { + log.info("Test: triggering vacuum manually on node: " + node.name()); + + CompletableFuture vacuum1 = node.txManager().vacuum(); + assertThat(vacuum1, willCompleteSuccessfully()); + }); } private boolean checkVolatileTxStateOnNodes(Set nodeConsistentIds, UUID txId) { @@ -731,31 +856,132 @@ private void waitForTxStateVacuum(UUID txId, int partId, boolean checkPersistent */ private void waitForTxStateVacuum(Set nodeConsistentIds, UUID txId, int partId, boolean checkPersistent, long timeMs) throws InterruptedException { - boolean r = waitForCondition(() -> { - boolean result = true; + boolean r = waitForCondition(() -> txStateIsAbsent(nodeConsistentIds, txId, TABLE_NAME, partId, checkPersistent, false), timeMs); - for (Iterator iterator = cluster.runningNodes().iterator(); iterator.hasNext();) { - IgniteImpl node = iterator.next(); + if (!r) { + logCurrentTxState(nodeConsistentIds, txId, TABLE_NAME, partId); + } - if (!nodeConsistentIds.contains(node.name())) { - continue; - } + assertTrue(r); + } - result = result - && volatileTxState(node, txId) == null && (!checkPersistent || persistentTxState(node, txId, partId) == null); - } + /** + * Assert that volatile (and if needed, persistent) state of the given tx is vacuumized on all nodes of the cluster. + * + * @param txId Transaction id. + * @param partId Commit partition id to check the persistent tx state storage of this partition. + * @param checkPersistent Whether to wait for vacuum of persistent tx state as well. + */ + private void assertTxStateVacuumized(UUID txId, int partId, boolean checkPersistent) { + assertTxStateVacuumized(txId, TABLE_NAME, partId, checkPersistent); + } - return result; - }, timeMs); + /** + * Assert that volatile (and if needed, persistent) state of the given tx is vacuumized on all nodes of the cluster. + * + * @param txId Transaction id. + * @param tableName Table name of the table that commit partition belongs to. + * @param partId Commit partition id to check the persistent tx state storage of this partition. + * @param checkPersistent Whether to wait for vacuum of persistent tx state as well. + */ + private void assertTxStateVacuumized(UUID txId, String tableName, int partId, boolean checkPersistent) { + Set allNodes = cluster.runningNodes().map(IgniteImpl::name).collect(toSet()); - if (!r) { - cluster.runningNodes().forEach(node -> { - log.info("Test: volatile state [tx={}, node={}, state={}].", txId, node.name(), volatileTxState(node, txId)); - log.info("Test: persistent state [tx={}, node={}, state={}].", txId, node.name(), persistentTxState(node, txId, partId)); - }); + assertTxStateVacuumized(allNodes, txId, tableName, partId, checkPersistent); + } + + /** + * Assert that volatile (and if needed, persistent) state of the given tx is vacuumized on the given nodes. Uses default + * {@link #TABLE_NAME}. + * + * @param nodeConsistentIds Node names. + * @param txId Transaction id. + * @param partId Commit partition id to check the persistent tx state storage of this partition. + * @param checkPersistent Whether to wait for vacuum of persistent tx state as well. + */ + private void assertTxStateVacuumized(Set nodeConsistentIds, UUID txId, int partId, boolean checkPersistent) { + assertTxStateVacuumized(nodeConsistentIds, txId, TABLE_NAME, partId, checkPersistent); + } + + /** + * Assert that volatile (and if needed, persistent) state of the given tx is vacuumized on the given nodes. + * + * @param nodeConsistentIds Node names. + * @param txId Transaction id. + * @param tableName Table name of the table that commit partition belongs to. + * @param partId Commit partition id to check the persistent tx state storage of this partition. + * @param checkPersistent Whether to wait for vacuum of persistent tx state as well. + */ + private void assertTxStateVacuumized(Set nodeConsistentIds, UUID txId, String tableName, int partId, boolean checkPersistent) { + boolean result = txStateIsAbsent(nodeConsistentIds, txId, tableName, partId, checkPersistent, true); + + if (!result) { + logCurrentTxState(nodeConsistentIds, txId, tableName, partId); } - assertTrue(r); + assertTrue(result); + } + + /** + * Checks whether the tx state is absent on all of the given nodes. + * + * @param nodeConsistentIds Set of node names to check. + * @param txId Transaction id. + * @param tableName Table name of the table that commit partition belongs to. + * @param partId Commit partition id. + * @param checkPersistent Whether the persistent state should be checked. + * @param checkCpPrimaryOnly If {@code} true, the persistent state should be checked only on the commit partition primary, + * otherwise it would be checked on every given node. + * @return {@code true} if tx state is absent, {@code false} otherwise. Call {@link #logCurrentTxState(Set, UUID, String, int)} + * for details. + */ + private boolean txStateIsAbsent( + Set nodeConsistentIds, + UUID txId, + String tableName, + int partId, + boolean checkPersistent, + boolean checkCpPrimaryOnly + ) { + boolean result = true; + + String cpPrimaryId = null; + + if (checkCpPrimaryOnly) { + IgniteImpl node = anyNode(); + + TablePartitionId tablePartitionId = new TablePartitionId(tableId(node, tableName), partId); + + CompletableFuture replicaFut = node.placementDriver().getPrimaryReplica(tablePartitionId, node.clock().now()); + assertThat(replicaFut, willCompleteSuccessfully()); + + ReplicaMeta replicaMeta = replicaFut.join(); + // The test doesn't make sense if there is no primary right now. + assertNotNull(replicaMeta); + + cpPrimaryId = replicaMeta.getLeaseholderId(); + } + + for (Iterator iterator = cluster.runningNodes().iterator(); iterator.hasNext();) { + IgniteImpl node = iterator.next(); + + if (!nodeConsistentIds.contains(node.name())) { + continue; + } + + result = result + && volatileTxState(node, txId) == null + && (!checkPersistent || !node.id().equals(cpPrimaryId) || persistentTxState(node, txId, partId) == null); + } + + return result; + } + + private void logCurrentTxState(Set nodeConsistentIds, UUID txId, String table, int partId) { + cluster.runningNodes().filter(n -> nodeConsistentIds.contains(n.name())).forEach(node -> { + log.info("Test: volatile state [tx={}, node={}, state={}].", txId, node.name(), volatileTxState(node, txId)); + log.info("Test: persistent state [tx={}, node={}, state={}].", txId, node.name(), persistentTxState(node, txId, table, partId)); + }); } private IgniteImpl anyNode() { @@ -770,10 +996,15 @@ private static TransactionMeta volatileTxState(IgniteImpl node, UUID txId) { @Nullable private TransactionMeta persistentTxState(IgniteImpl node, UUID txId, int partId) { + return persistentTxState(node, txId, TABLE_NAME, partId); + } + + @Nullable + private TransactionMeta persistentTxState(IgniteImpl node, UUID txId, String tableName, int partId) { TransactionMeta[] meta = new TransactionMeta[1]; Future f = txStateStorageExecutor.submit(() -> { - TxStateStorage txStateStorage = table(node, TABLE_NAME).internalTable().txStateStorage().getTxStateStorage(partId); + TxStateStorage txStateStorage = table(node, tableName).internalTable().txStateStorage().getTxStateStorage(partId); assertNotNull(txStateStorage); diff --git a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java index 56f1813a78b2..c35cffc19521 100644 --- a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java +++ b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java @@ -441,6 +441,8 @@ private void handleWriteIntentSwitchCommand(WriteIntentSwitchCommand cmd, long c markFinished(txId, cmd.commit(), cmd.commitTimestamp(), null); + LOG.info("qqq switching wi-s commit={} txId={}", cmd.commit(), cmd.txId()); + storageUpdateHandler.switchWriteIntents( txId, cmd.commit(), diff --git a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/PartitionReplicaListener.java b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/PartitionReplicaListener.java index f919d66fec0f..37b5c231691d 100644 --- a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/PartitionReplicaListener.java +++ b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/PartitionReplicaListener.java @@ -1871,6 +1871,8 @@ private CompletableFuture applyWriteIntentSwitc .requiredCatalogVersion(catalogVersion) .build(); + LOG.info("qqq switching wi-s for commit={}, tx={}", commit, transactionId); + storageUpdateHandler.switchWriteIntents( transactionId, commit, @@ -3694,12 +3696,16 @@ private void scheduleAsyncWriteIntentSwitch(UUID txId, RowId rowId, TransactionM // We don't need to take the partition snapshots read lock, see #INTERNAL_DOC_PLACEHOLDER why. return txManager.executeWriteIntentSwitchAsync(() -> inBusyLock(busyLock, - () -> storageUpdateHandler.switchWriteIntents( - txId, - txState == COMMITTED, - commitTimestamp, - indexIdsAtRwTxBeginTs(txId) - ) + () -> { + LOG.info("qqq switching wi-s async for commit={}, tx={}", txState == COMMITTED, txId); + + storageUpdateHandler.switchWriteIntents( + txId, + txState == COMMITTED, + commitTimestamp, + indexIdsAtRwTxBeginTs(txId) + ); + } )).whenComplete((unused, e) -> { if (e != null) { LOG.warn("Failed to complete transaction cleanup command [txId=" + txId + ']', e); diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/TxManager.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/TxManager.java index c050f6126274..badc0776518d 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/TxManager.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/TxManager.java @@ -185,8 +185,12 @@ CompletableFuture cleanup( */ CompletableFuture cleanup(String node, UUID txId); - /** Locally vacuums no longer needed transactional resources, like txnState both persistent and volatile. */ - void vacuum(); + /** + * Locally vacuums no longer needed transactional resources, like txnState both persistent and volatile. + * + * @return Vacuum complete future. + */ + CompletableFuture vacuum(); /** * Returns a number of finished transactions. diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java index 9ccc35b5b528..52beab9d7d45 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/PersistentTxStateVacuumizer.java @@ -90,9 +90,9 @@ public CompletableFuture> vacuumPersistentTxStates(Map { CompletableFuture future = placementDriver.getPrimaryReplica(commitPartitionId, now) .thenCompose(replicaMeta -> { - // If the primary replica is absent this means that another replica would become primary and - // the volatile state (as well as cleanup completion timestamp) would be updated there, and then - // this operation would be called from there. + // If the primary replica is absent or is not located on the local node, this means that the primary either is + // on another node or would be re-elected on local one; then the volatile state (as well as cleanup completion + // timestamp) would be updated there, and then this operation would be called from there. // Also, we are going to send the vacuum request only to the local node. if (replicaMeta != null && localNode.id().equals(replicaMeta.getLeaseholderId())) { VacuumTxStateReplicaRequest request = TX_MESSAGES_FACTORY.vacuumTxStateReplicaRequest() diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/TxManagerImpl.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/TxManagerImpl.java index 4111ed7ab6af..d3d7437c4dfb 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/TxManagerImpl.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/TxManagerImpl.java @@ -843,14 +843,14 @@ public CompletableFuture cleanup(String node, UUID txId) { } @Override - public void vacuum() { + public CompletableFuture vacuum() { if (persistentTxStateVacuumizer == null) { - return; // Not started yet. + return nullCompletedFuture(); // Not started yet. } long vacuumObservationTimestamp = System.currentTimeMillis(); - txStateVolatileStorage.vacuum(vacuumObservationTimestamp, txConfig.txnResourceTtl().value(), + return txStateVolatileStorage.vacuum(vacuumObservationTimestamp, txConfig.txnResourceTtl().value(), persistentTxStateVacuumizer::vacuumPersistentTxStates); } diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java index c03c04a88bc2..d0e65a9e4383 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/VolatileTxStateMetaStorage.java @@ -18,7 +18,6 @@ package org.apache.ignite.internal.tx.impl; import static java.lang.Math.max; -import static java.util.Objects.requireNonNull; import static org.apache.ignite.internal.tx.TxState.PENDING; import static org.apache.ignite.internal.tx.TxState.checkTransitionCorrectness; @@ -129,8 +128,9 @@ public Collection states() { * @param txnResourceTtl Transactional resource time to live in milliseconds. * @param persistentVacuumOp Persistent vacuum operation. Accepts the map of commit partition ids to set of * tx ids, returns a future with set of successfully vacuumized tx ids. + * @return Vacuum complete future. */ - public void vacuum( + public CompletableFuture vacuum( long vacuumObservationTimestamp, long txnResourceTtl, Function>, CompletableFuture>> persistentVacuumOp @@ -150,14 +150,14 @@ public void vacuum( if (TxState.isFinalState(meta0.txState())) { Long initialVacuumObservationTimestamp = meta0.initialVacuumObservationTimestamp(); - if (initialVacuumObservationTimestamp == null) { + if (initialVacuumObservationTimestamp == null && txnResourceTtl > 0) { markedAsInitiallyDetectedTxnsCount.incrementAndGet(); return markInitialVacuumObservationTimestamp(meta0, vacuumObservationTimestamp); } else { Long cleanupCompletionTimestamp = meta0.cleanupCompletionTimestamp(); - boolean shouldBeVacuumized = shouldBeVacuumized(requireNonNull(initialVacuumObservationTimestamp), + boolean shouldBeVacuumized = shouldBeVacuumized(initialVacuumObservationTimestamp, cleanupCompletionTimestamp, txnResourceTtl, vacuumObservationTimestamp); if (shouldBeVacuumized) { @@ -188,7 +188,7 @@ public void vacuum( }); }); - persistentVacuumOp.apply(txIds) + return persistentVacuumOp.apply(txIds) .thenAccept(successful -> { for (UUID txId : successful) { txStateMap.compute(txId, (k, v) -> { @@ -197,7 +197,13 @@ public void vacuum( } else { Long cleanupCompletionTs = cleanupCompletionTimestamps.get(txId); - return (Objects.equals(cleanupCompletionTs, v.cleanupCompletionTimestamp())) ? null : v; + TxStateMeta newMeta = (Objects.equals(cleanupCompletionTs, v.cleanupCompletionTimestamp())) ? null : v; + + if (newMeta == null) { + vacuumizedTxnsCount.incrementAndGet(); + } + + return newMeta; } }); } @@ -232,7 +238,7 @@ private static TxStateMeta markInitialVacuumObservationTimestamp(TxStateMeta met } private static boolean shouldBeVacuumized( - long initialVacuumObservationTimestamp, + @Nullable Long initialVacuumObservationTimestamp, @Nullable Long cleanupCompletionTimestamp, long txnResourceTtl, long vacuumObservationTimestamp) { @@ -240,6 +246,9 @@ private static boolean shouldBeVacuumized( return true; } + assert initialVacuumObservationTimestamp != null : "initialVacuumObservationTimestamp should have been set if txnResourceTtl > 0 " + + "[txnResourceTtl=" + txnResourceTtl + "]."; + if (cleanupCompletionTimestamp == null) { return initialVacuumObservationTimestamp + txnResourceTtl < vacuumObservationTimestamp; } else { From a4edea2b63cab5266510c5579b346c888e82562a Mon Sep 17 00:00:00 2001 From: denis-chudov Date: Sat, 27 Apr 2024 08:15:52 +0300 Subject: [PATCH 22/26] removed logs --- .../distributed/raft/PartitionListener.java | 2 -- .../replicator/PartitionReplicaListener.java | 18 ++++++------------ 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java index c35cffc19521..56f1813a78b2 100644 --- a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java +++ b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/raft/PartitionListener.java @@ -441,8 +441,6 @@ private void handleWriteIntentSwitchCommand(WriteIntentSwitchCommand cmd, long c markFinished(txId, cmd.commit(), cmd.commitTimestamp(), null); - LOG.info("qqq switching wi-s commit={} txId={}", cmd.commit(), cmd.txId()); - storageUpdateHandler.switchWriteIntents( txId, cmd.commit(), diff --git a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/PartitionReplicaListener.java b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/PartitionReplicaListener.java index 37b5c231691d..4c5bfaa90964 100644 --- a/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/PartitionReplicaListener.java +++ b/modules/table/src/main/java/org/apache/ignite/internal/table/distributed/replicator/PartitionReplicaListener.java @@ -1871,8 +1871,6 @@ private CompletableFuture applyWriteIntentSwitc .requiredCatalogVersion(catalogVersion) .build(); - LOG.info("qqq switching wi-s for commit={}, tx={}", commit, transactionId); - storageUpdateHandler.switchWriteIntents( transactionId, commit, @@ -3696,16 +3694,12 @@ private void scheduleAsyncWriteIntentSwitch(UUID txId, RowId rowId, TransactionM // We don't need to take the partition snapshots read lock, see #INTERNAL_DOC_PLACEHOLDER why. return txManager.executeWriteIntentSwitchAsync(() -> inBusyLock(busyLock, - () -> { - LOG.info("qqq switching wi-s async for commit={}, tx={}", txState == COMMITTED, txId); - - storageUpdateHandler.switchWriteIntents( - txId, - txState == COMMITTED, - commitTimestamp, - indexIdsAtRwTxBeginTs(txId) - ); - } + () -> storageUpdateHandler.switchWriteIntents( + txId, + txState == COMMITTED, + commitTimestamp, + indexIdsAtRwTxBeginTs(txId) + ) )).whenComplete((unused, e) -> { if (e != null) { LOG.warn("Failed to complete transaction cleanup command [txId=" + txId + ']', e); From 66c892adeeaac6e195fea8add0f81dcbbb8b622c Mon Sep 17 00:00:00 2001 From: denis-chudov Date: Sat, 27 Apr 2024 14:20:52 +0300 Subject: [PATCH 23/26] tests fix --- .../internal/table/ItTxResourcesVacuumTest.java | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java index 2dcaca529b51..73384160d13a 100644 --- a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java +++ b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java @@ -429,7 +429,7 @@ public void testVacuumWithCleanupDelay() throws InterruptedException { Transaction roTxAfter = beginReadOnlyTx(anyNode()); triggerVacuum(); - assertTxStateVacuumized(txId, commitPartId, true); + waitForTxStateVacuum(txId, commitPartId, true, 10_000); // Trying to read the values. Tuple key0 = Tuple.create().set("key", tuple0.longValue("key")); @@ -588,6 +588,8 @@ public void testVacuumPersistentStateAfterCleanupDelayAndVolatileStateVacuum() t CompletableFuture commitFut = tx.commitAsync(); + waitForTxStateReplication(commitPartNodes, txId, commitPartId, 10_000); + assertThat(cleanupStarted, willCompleteSuccessfully()); // Wait for volatile tx state vacuum. This is possible because tx finish is complete. @@ -605,7 +607,7 @@ public void testVacuumPersistentStateAfterCleanupDelayAndVolatileStateVacuum() t Transaction roTxAfter = beginReadOnlyTx(anyNode()); triggerVacuum(); - assertTxStateVacuumized(txId, commitPartId, true); + waitForTxStateVacuum(txId, commitPartId, true, 10_000); // Trying to read the data. Tuple key = Tuple.create().set("key", tuple.longValue("key")); @@ -758,7 +760,7 @@ public void testRoReadTheCorrectDataInBetween() { triggerVacuum(); assertTxStateVacuumized(rwTxId1, tableName, partitionIdForTuple(node, tableName, t1, rwTx1), true); - assertTxStateVacuumized(rwTxId2, tableName, partitionIdForTuple(node, tableName, t1, rwTx2), true); + assertTxStateVacuumized(rwTxId2, tableName, partitionIdForTuple(node, tableName, t2, rwTx2), true); Tuple keyRec = Tuple.create().set("key", 1L); @@ -803,8 +805,8 @@ private void triggerVacuum() { runningNodes().forEach(node -> { log.info("Test: triggering vacuum manually on node: " + node.name()); - CompletableFuture vacuum1 = node.txManager().vacuum(); - assertThat(vacuum1, willCompleteSuccessfully()); + CompletableFuture vacuumFut = node.txManager().vacuum(); + assertThat(vacuumFut, willCompleteSuccessfully()); }); } From 618276d70fa8d3ef54481b0c48eb8aee9a2a5f03 Mon Sep 17 00:00:00 2001 From: denis-chudov Date: Sat, 27 Apr 2024 16:54:22 +0300 Subject: [PATCH 24/26] turned off time-based vacuum for some tests to avoid races --- .../table/ItTxResourcesVacuumTest.java | 10 +++++-- .../tx/impl/ResourceVacuumManager.java | 28 ++++++++++--------- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java index 73384160d13a..ef5d9dacb918 100644 --- a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java +++ b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java @@ -356,6 +356,7 @@ public void testAbandonedTxnsAreNotVacuumizedUntilRecovered() throws Interrupted *
        */ @Test + @WithSystemProperty(key = RESOURCE_VACUUM_INTERVAL_MILLISECONDS_PROPERTY, value = "0") public void testVacuumWithCleanupDelay() throws InterruptedException { IgniteImpl node = anyNode(); @@ -428,8 +429,10 @@ public void testVacuumWithCleanupDelay() throws InterruptedException { Transaction roTxAfter = beginReadOnlyTx(anyNode()); + waitForCondition(() -> volatileTxState(commitPartitionLeaseholder, txId) != null, 10_000); + triggerVacuum(); - waitForTxStateVacuum(txId, commitPartId, true, 10_000); + assertTxStateVacuumized(txId, commitPartId, true); // Trying to read the values. Tuple key0 = Tuple.create().set("key", tuple0.longValue("key")); @@ -542,6 +545,7 @@ public void testCommitPartitionPrimaryChangesBeforeVacuum() throws InterruptedEx *
      */ @Test + @WithSystemProperty(key = RESOURCE_VACUUM_INTERVAL_MILLISECONDS_PROPERTY, value = "0") public void testVacuumPersistentStateAfterCleanupDelayAndVolatileStateVacuum() throws InterruptedException { IgniteImpl node = anyNode(); @@ -606,8 +610,10 @@ public void testVacuumPersistentStateAfterCleanupDelayAndVolatileStateVacuum() t Transaction roTxAfter = beginReadOnlyTx(anyNode()); + waitForCondition(() -> volatileTxState(commitPartitionLeaseholder, txId) != null, 10_000); + triggerVacuum(); - waitForTxStateVacuum(txId, commitPartId, true, 10_000); + assertTxStateVacuumized(txId, commitPartId, false); // Trying to read the data. Tuple key = Tuple.create().set("key", tuple.longValue("key")); diff --git a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/ResourceVacuumManager.java b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/ResourceVacuumManager.java index c4d2c1ad8c3c..b355ef80b6ff 100644 --- a/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/ResourceVacuumManager.java +++ b/modules/transactions/src/main/java/org/apache/ignite/internal/tx/impl/ResourceVacuumManager.java @@ -106,19 +106,21 @@ public ResourceVacuumManager( @Override public CompletableFuture startAsync() { - resourceVacuumExecutor.scheduleAtFixedRate( - this::runVacuumOperations, - 0, - resourceVacuumIntervalMilliseconds, - TimeUnit.MILLISECONDS - ); - - resourceVacuumExecutor.scheduleAtFixedRate( - finishedReadOnlyTransactionTracker::broadcastClosedTransactions, - 0, - resourceVacuumIntervalMilliseconds, - TimeUnit.MILLISECONDS - ); + if (resourceVacuumIntervalMilliseconds > 0) { + resourceVacuumExecutor.scheduleAtFixedRate( + this::runVacuumOperations, + 0, + resourceVacuumIntervalMilliseconds, + TimeUnit.MILLISECONDS + ); + + resourceVacuumExecutor.scheduleAtFixedRate( + finishedReadOnlyTransactionTracker::broadcastClosedTransactions, + 0, + resourceVacuumIntervalMilliseconds, + TimeUnit.MILLISECONDS + ); + } finishedTransactionBatchRequestHandler.start(); From e0c985fac4a9bb0d9993f58b47cb3899720a5180 Mon Sep 17 00:00:00 2001 From: denis-chudov Date: Sat, 27 Apr 2024 22:13:59 +0300 Subject: [PATCH 25/26] * --- .../table/ItTxResourcesVacuumTest.java | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java index ef5d9dacb918..eb02e0ead26a 100644 --- a/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java +++ b/modules/table/src/integrationTest/java/org/apache/ignite/internal/table/ItTxResourcesVacuumTest.java @@ -62,6 +62,7 @@ import org.apache.ignite.internal.thread.IgniteThreadFactory; import org.apache.ignite.internal.thread.ThreadOperation; import org.apache.ignite.internal.tx.TransactionMeta; +import org.apache.ignite.internal.tx.TxStateMeta; import org.apache.ignite.internal.tx.configuration.TransactionConfiguration; import org.apache.ignite.internal.tx.impl.TxManagerImpl; import org.apache.ignite.internal.tx.message.TxCleanupMessage; @@ -608,12 +609,20 @@ public void testVacuumPersistentStateAfterCleanupDelayAndVolatileStateVacuum() t assertThat(commitFut, willCompleteSuccessfully()); + log.info("Test: commit completed."); + Transaction roTxAfter = beginReadOnlyTx(anyNode()); - waitForCondition(() -> volatileTxState(commitPartitionLeaseholder, txId) != null, 10_000); + waitForCondition(() -> { + TxStateMeta txStateMeta = (TxStateMeta) volatileTxState(commitPartitionLeaseholder, txId); + + return txStateMeta != null && txStateMeta.cleanupCompletionTimestamp() != null; + }, 10_000); + + log.info("Test: cleanup completed."); triggerVacuum(); - assertTxStateVacuumized(txId, commitPartId, false); + assertTxStateVacuumized(txId, commitPartId, true); // Trying to read the data. Tuple key = Tuple.create().set("key", tuple.longValue("key")); @@ -678,8 +687,6 @@ public void testRecoveryAfterPersistentStateVacuumized() throws InterruptedExcep tx0.commitAsync(); - assertThat(cleanupStarted, willCompleteSuccessfully()); - // Check that the final tx state COMMITTED is saved to the persistent tx storage. assertTrue(waitForCondition(() -> cluster.runningNodes().filter(n -> commitPartitionNodes.contains(n.name())).allMatch(n -> { TransactionMeta meta = persistentTxState(n, txId0, commitPartId); @@ -687,6 +694,8 @@ public void testRecoveryAfterPersistentStateVacuumized() throws InterruptedExcep return meta != null && meta.txState() == COMMITTED; }), 10_000)); + assertThat(cleanupStarted, willCompleteSuccessfully()); + // Stop the first transaction coordinator. stopNode(coord0.name()); @@ -924,7 +933,13 @@ private void assertTxStateVacuumized(Set nodeConsistentIds, UUID txId, S boolean result = txStateIsAbsent(nodeConsistentIds, txId, tableName, partId, checkPersistent, true); if (!result) { - logCurrentTxState(nodeConsistentIds, txId, tableName, partId); + triggerVacuum(); + + result = txStateIsAbsent(nodeConsistentIds, txId, tableName, partId, checkPersistent, true); + + if (!result) { + logCurrentTxState(nodeConsistentIds, txId, tableName, partId); + } } assertTrue(result); From 99d0913d9497221feecfd9e4b392898ac91281cb Mon Sep 17 00:00:00 2001 From: denis-chudov Date: Mon, 29 Apr 2024 11:35:39 +0300 Subject: [PATCH 26/26] reworked dependencies --- modules/compute/build.gradle | 1 - modules/index/build.gradle | 1 - modules/placement-driver/build.gradle | 1 - .../apache/ignite/internal/TestWrappers.java | 11 ------- modules/sql-engine/build.gradle | 1 - .../tx/test/ItTransactionTestUtils.java | 33 +++++++++++++++++-- 6 files changed, 30 insertions(+), 18 deletions(-) rename modules/{transactions => runner}/src/testFixtures/java/org/apache/ignite/internal/TestWrappers.java (90%) diff --git a/modules/compute/build.gradle b/modules/compute/build.gradle index e42d28acdb17..05b72572212c 100644 --- a/modules/compute/build.gradle +++ b/modules/compute/build.gradle @@ -59,7 +59,6 @@ dependencies { integrationTestImplementation project(':ignite-client') integrationTestImplementation testFixtures(project(':ignite-core')) integrationTestImplementation testFixtures(project(':ignite-runner')) - integrationTestImplementation testFixtures(project(':ignite-transactions')) } description = 'ignite-compute' diff --git a/modules/index/build.gradle b/modules/index/build.gradle index 3cd9b5943176..000e9bbea147 100644 --- a/modules/index/build.gradle +++ b/modules/index/build.gradle @@ -76,7 +76,6 @@ dependencies { integrationTestImplementation testFixtures(project(':ignite-sql-engine')) integrationTestImplementation testFixtures(project(':ignite-table')) integrationTestImplementation testFixtures(project(':ignite-storage-api')) - integrationTestImplementation testFixtures(project(':ignite-transactions')) integrationTestImplementation libs.jetbrains.annotations } diff --git a/modules/placement-driver/build.gradle b/modules/placement-driver/build.gradle index 7626f984c5f8..9efc3a557f20 100644 --- a/modules/placement-driver/build.gradle +++ b/modules/placement-driver/build.gradle @@ -66,7 +66,6 @@ dependencies { integrationTestImplementation(testFixtures(project(':ignite-distribution-zones'))) integrationTestImplementation(testFixtures(project(':ignite-runner'))) integrationTestImplementation(testFixtures(project(':ignite-replicator'))) - integrationTestImplementation(testFixtures(project(':ignite-transactions'))) testImplementation(testFixtures(project(':ignite-core'))) testImplementation(testFixtures(project(':ignite-metastorage'))) diff --git a/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/TestWrappers.java b/modules/runner/src/testFixtures/java/org/apache/ignite/internal/TestWrappers.java similarity index 90% rename from modules/transactions/src/testFixtures/java/org/apache/ignite/internal/TestWrappers.java rename to modules/runner/src/testFixtures/java/org/apache/ignite/internal/TestWrappers.java index d232afcb6645..259a1bed36c9 100644 --- a/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/TestWrappers.java +++ b/modules/runner/src/testFixtures/java/org/apache/ignite/internal/TestWrappers.java @@ -18,13 +18,11 @@ package org.apache.ignite.internal; import org.apache.ignite.internal.table.IgniteTablesInternal; -import org.apache.ignite.internal.table.RecordBinaryViewImpl; import org.apache.ignite.internal.table.TableImpl; import org.apache.ignite.internal.table.TableViewInternal; import org.apache.ignite.internal.table.distributed.TableManager; import org.apache.ignite.internal.tx.impl.IgniteTransactionsImpl; import org.apache.ignite.internal.wrapper.Wrappers; -import org.apache.ignite.table.RecordView; import org.apache.ignite.table.Table; import org.apache.ignite.table.manager.IgniteTables; import org.apache.ignite.tx.IgniteTransactions; @@ -101,13 +99,4 @@ public static IgniteTransactionsImpl unwrapIgniteTransactionsImpl(IgniteTransact public static Transaction unwrapIgniteTransaction(Transaction tx) { return Wrappers.unwrap(tx, Transaction.class); } - - /** - * Unwraps {@link RecordBinaryViewImpl} from a {@link RecordView}. - * - * @param view View to unwrap. - */ - public static RecordBinaryViewImpl unwrapRecordBinaryViewImpl(RecordView view) { - return Wrappers.unwrap(view, RecordBinaryViewImpl.class); - } } diff --git a/modules/sql-engine/build.gradle b/modules/sql-engine/build.gradle index c409dbd8c617..b24dbd327dfb 100644 --- a/modules/sql-engine/build.gradle +++ b/modules/sql-engine/build.gradle @@ -126,7 +126,6 @@ dependencies { integrationTestImplementation testFixtures(project(':ignite-core')) integrationTestImplementation testFixtures(project(':ignite-schema')) integrationTestImplementation testFixtures(project(':ignite-sql-engine')) - integrationTestImplementation testFixtures(project(':ignite-transactions')) integrationTestImplementation testFixtures(project(':ignite-table')) integrationTestImplementation testFixtures(project(':ignite-runner')) integrationTestImplementation libs.jetbrains.annotations diff --git a/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/test/ItTransactionTestUtils.java b/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/test/ItTransactionTestUtils.java index df34596c96ab..12aa397ae64d 100644 --- a/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/test/ItTransactionTestUtils.java +++ b/modules/transactions/src/testFixtures/java/org/apache/ignite/internal/tx/test/ItTransactionTestUtils.java @@ -20,9 +20,6 @@ import static java.util.Objects.requireNonNull; import static java.util.concurrent.TimeUnit.SECONDS; import static java.util.stream.Collectors.toSet; -import static org.apache.ignite.internal.TestWrappers.unwrapIgniteTransaction; -import static org.apache.ignite.internal.TestWrappers.unwrapRecordBinaryViewImpl; -import static org.apache.ignite.internal.TestWrappers.unwrapTableImpl; import static org.apache.ignite.internal.distributionzones.rebalance.RebalanceUtil.stablePartAssignmentsKey; import static org.apache.ignite.internal.testframework.matchers.CompletableFutureMatcher.willCompleteSuccessfully; import static org.hamcrest.MatcherAssert.assertThat; @@ -46,6 +43,9 @@ import org.apache.ignite.internal.table.RecordBinaryViewImpl; import org.apache.ignite.internal.table.TableImpl; import org.apache.ignite.internal.tx.impl.ReadWriteTransactionImpl; +import org.apache.ignite.internal.wrapper.Wrappers; +import org.apache.ignite.table.RecordView; +import org.apache.ignite.table.Table; import org.apache.ignite.table.Tuple; import org.apache.ignite.tx.Transaction; import org.jetbrains.annotations.Nullable; @@ -203,4 +203,31 @@ public static ReplicaMeta waitAndGetPrimaryReplica(IgniteImpl node, ReplicationG return primaryReplicaFut.join(); } + + /** + * Unwraps {@link RecordBinaryViewImpl} from a {@link RecordView}. + * + * @param view View to unwrap. + */ + private static RecordBinaryViewImpl unwrapRecordBinaryViewImpl(RecordView view) { + return Wrappers.unwrap(view, RecordBinaryViewImpl.class); + } + + /** + * Unwraps {@link TableImpl} from a {@link Table}. + * + * @param table Table to unwrap. + */ + private static TableImpl unwrapTableImpl(Table table) { + return Wrappers.unwrap(table, TableImpl.class); + } + + /** + * Unwraps {@link Transaction} from an {@link Transaction}. + * + * @param tx Object to unwrap. + */ + private static Transaction unwrapIgniteTransaction(Transaction tx) { + return Wrappers.unwrap(tx, Transaction.class); + } }