Skip to content

Commit

Permalink
GG-21338 Fixed checkpointReadLock() may hang during node stop
Browse files Browse the repository at this point in the history
  • Loading branch information
ibessonov authored and dgovorukhin committed Jul 17, 2019
1 parent 00e531c commit fb2553a
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2245,6 +2245,8 @@ else if (plc != PartitionLossPolicy.IGNORE) {
ctx.database().checkpointReadLock();

try {
Map<UUID, Set<Integer>> addToWaitGroups = new HashMap<>();

lock.writeLock().lock();

try {
Expand Down Expand Up @@ -2282,7 +2284,7 @@ else if (plc != PartitionLossPolicy.IGNORE) {

GridDhtPartitionState state = partMap.get(part);

if (state == null || state != OWNING)
if (state != OWNING)
continue;

if (!newOwners.contains(remoteNodeId)) {
Expand All @@ -2302,9 +2304,7 @@ else if (plc != PartitionLossPolicy.IGNORE) {
UUID nodeId = entry.getKey();
Set<Integer> rebalancedParts = entry.getValue();

// Add to wait groups to ensure late assignment switch after all partitions are rebalanced.
for (Integer part : rebalancedParts)
ctx.cache().context().affinity().addToWaitGroup(groupId(), part, nodeId, topologyVersionFuture().initialVersion());
addToWaitGroups.put(nodeId, new HashSet<>(rebalancedParts));

if (!rebalancedParts.isEmpty()) {
Set<Integer> historical = rebalancedParts.stream()
Expand All @@ -2323,9 +2323,22 @@ else if (plc != PartitionLossPolicy.IGNORE) {
}

node2part = new GridDhtPartitionFullMap(node2part, updateSeq.incrementAndGet());
} finally {
}
finally {
lock.writeLock().unlock();
}

for (Map.Entry<UUID, Set<Integer>> entry : addToWaitGroups.entrySet()) {
// Add to wait groups to ensure late assignment switch after all partitions are rebalanced.
for (Integer part : entry.getValue()) {
ctx.cache().context().affinity().addToWaitGroup(
groupId(),
part,
entry.getKey(),
topologyVersionFuture().initialVersion()
);
}
}
}
finally {
ctx.database().checkpointReadUnlock();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@
import org.apache.ignite.internal.managers.discovery.GridDiscoveryManager;
import org.apache.ignite.internal.mem.DirectMemoryProvider;
import org.apache.ignite.internal.mem.DirectMemoryRegion;
import org.apache.ignite.internal.metric.IoStatisticsHolderNoOp;
import org.apache.ignite.internal.pagemem.FullPageId;
import org.apache.ignite.internal.pagemem.PageIdAllocator;
import org.apache.ignite.internal.pagemem.PageIdUtils;
Expand Down Expand Up @@ -154,7 +155,6 @@
import org.apache.ignite.internal.processors.compress.CompressionProcessor;
import org.apache.ignite.internal.processors.port.GridPortRecord;
import org.apache.ignite.internal.processors.query.GridQueryProcessor;
import org.apache.ignite.internal.metric.IoStatisticsHolderNoOp;
import org.apache.ignite.internal.util.GridConcurrentHashSet;
import org.apache.ignite.internal.util.GridMultiCollectionWrapper;
import org.apache.ignite.internal.util.GridReadOnlyArrayView;
Expand Down Expand Up @@ -950,7 +950,7 @@ private void finishRecovery() throws IgniteCheckedException {

long time = System.currentTimeMillis();

checkpointReadLock();
CHECKPOINT_LOCK_HOLD_COUNT.set(CHECKPOINT_LOCK_HOLD_COUNT.get() + 1);

try {
for (DatabaseLifecycleListener lsnr : getDatabaseListeners(cctx.kernalContext()))
Expand Down Expand Up @@ -982,7 +982,7 @@ private void finishRecovery() throws IgniteCheckedException {
throw e;
}
finally {
checkpointReadUnlock();
CHECKPOINT_LOCK_HOLD_COUNT.set(CHECKPOINT_LOCK_HOLD_COUNT.get() - 1);
}
}

Expand Down

0 comments on commit fb2553a

Please sign in to comment.