From 6e5e469e96f0486cfc96c55eb0fe6ebe25f223e5 Mon Sep 17 00:00:00 2001 From: Marina Sahakyan Date: Tue, 25 Oct 2022 10:51:41 +0200 Subject: [PATCH] migration: fixing sleeping state for broken files Motivation When broken files were discovered during migration the migration module was going into sleeping state. This was due to the fact that when broken file was detected DiscError exception was thrown and the pool was set to disabled mode. Result This is fixed now and migration should works correctly for both cases. Target: master, 9.0 8.2 Patch: https://rb.dcache.org/r/13814/ Acked-by: Tigran Mkrtchyan Requires-notes: no --- .../org/dcache/chimera/nfsv41/mover/NfsMover.java | 2 +- .../chimera/nfsv41/mover/NfsTransferService.java | 2 +- .../java/org/dcache/pool/movers/AbstractMover.java | 6 +++++- .../org/dcache/pool/movers/MoverChannelMover.java | 3 ++- .../src/main/java/org/dcache/pool/p2p/Companion.java | 2 +- .../dcache/pool/repository/ReplicaDescriptor.java | 2 +- .../dcache/pool/repository/v5/ReadHandleImpl.java | 6 ++++-- .../src/main/smc/org/dcache/pool/migration/Task.sm | 12 ++++++++++-- .../tests/repository/RepositorySubsystemTest.java | 4 ++-- 9 files changed, 27 insertions(+), 12 deletions(-) diff --git a/modules/dcache-nfs/src/main/java/org/dcache/chimera/nfsv41/mover/NfsMover.java b/modules/dcache-nfs/src/main/java/org/dcache/chimera/nfsv41/mover/NfsMover.java index e55c5d94c80..095e91f00d6 100644 --- a/modules/dcache-nfs/src/main/java/org/dcache/chimera/nfsv41/mover/NfsMover.java +++ b/modules/dcache-nfs/src/main/java/org/dcache/chimera/nfsv41/mover/NfsMover.java @@ -84,7 +84,7 @@ protected String getStatus() { * @throws DiskErrorCacheException */ public Cancellable enable(final CompletionHandler completionHandler) - throws DiskErrorCacheException, InterruptedIOException { + throws DiskErrorCacheException, InterruptedIOException, CacheException { open(); _completionHandler = completionHandler; diff --git a/modules/dcache-nfs/src/main/java/org/dcache/chimera/nfsv41/mover/NfsTransferService.java b/modules/dcache-nfs/src/main/java/org/dcache/chimera/nfsv41/mover/NfsTransferService.java index f2932402835..16a1a555d76 100644 --- a/modules/dcache-nfs/src/main/java/org/dcache/chimera/nfsv41/mover/NfsTransferService.java +++ b/modules/dcache-nfs/src/main/java/org/dcache/chimera/nfsv41/mover/NfsTransferService.java @@ -329,7 +329,7 @@ public Set getChannelCreateOptions() { @Override public Cancellable executeMover(final NfsMover mover, final CompletionHandler completionHandler) - throws DiskErrorCacheException, InterruptedIOException { + throws DiskErrorCacheException, InterruptedIOException, CacheException { final Cancellable cancellableMover = mover.enable(completionHandler); notifyDoorWithRedirect(mover); diff --git a/modules/dcache/src/main/java/org/dcache/pool/movers/AbstractMover.java b/modules/dcache/src/main/java/org/dcache/pool/movers/AbstractMover.java index 9ec80e3198c..a34a324a20a 100644 --- a/modules/dcache/src/main/java/org/dcache/pool/movers/AbstractMover.java +++ b/modules/dcache/src/main/java/org/dcache/pool/movers/AbstractMover.java @@ -24,6 +24,7 @@ import com.google.common.reflect.TypeToken; import diskCacheV111.util.CacheException; import diskCacheV111.util.DiskErrorCacheException; +import diskCacheV111.util.FileCorruptedCacheException; import diskCacheV111.vehicles.PoolAcceptFileMessage; import diskCacheV111.vehicles.PoolIoFileMessage; import diskCacheV111.vehicles.ProtocolInfo; @@ -271,7 +272,8 @@ public synchronized void onFailure(Throwable t, Void attachment) { * @throws InterruptedIOException if the mover was cancelled * @throws DiskErrorCacheException If the file could not be opened */ - public RepositoryChannel openChannel() throws DiskErrorCacheException, InterruptedIOException { + public RepositoryChannel openChannel() + throws DiskErrorCacheException, InterruptedIOException, CacheException { RepositoryChannel channel; try { channel = _handle.createChannel(); @@ -282,8 +284,10 @@ public RepositoryChannel openChannel() throws DiskErrorCacheException, Interrupt throw new DiskErrorCacheException( "File could not be opened; please check the file system: " + messageOrClassName(e), e); + } + synchronized (_checksumTypes) { _checksumChannel = channel.optionallyAs(ChecksumChannel.class).orElse(null); if (_checksumChannel != null) { diff --git a/modules/dcache/src/main/java/org/dcache/pool/movers/MoverChannelMover.java b/modules/dcache/src/main/java/org/dcache/pool/movers/MoverChannelMover.java index e10523f342b..21b1ac564f3 100644 --- a/modules/dcache/src/main/java/org/dcache/pool/movers/MoverChannelMover.java +++ b/modules/dcache/src/main/java/org/dcache/pool/movers/MoverChannelMover.java @@ -19,6 +19,7 @@ import static com.google.common.base.Preconditions.checkState; +import diskCacheV111.util.CacheException; import diskCacheV111.util.DiskErrorCacheException; import diskCacheV111.vehicles.PoolIoFileMessage; import diskCacheV111.vehicles.ProtocolInfo; @@ -75,7 +76,7 @@ public long getLastTransferred() { * @throws IllegalStateException if called more than once */ public synchronized MoverChannel

open() - throws DiskErrorCacheException, InterruptedIOException { + throws DiskErrorCacheException, InterruptedIOException, CacheException { checkState(_wrappedChannel == null); _wrappedChannel = new MoverChannel<>(this, openChannel()); return _wrappedChannel; diff --git a/modules/dcache/src/main/java/org/dcache/pool/p2p/Companion.java b/modules/dcache/src/main/java/org/dcache/pool/p2p/Companion.java index 7ef0e3589a5..7bd35e85174 100644 --- a/modules/dcache/src/main/java/org/dcache/pool/p2p/Companion.java +++ b/modules/dcache/src/main/java/org/dcache/pool/p2p/Companion.java @@ -327,7 +327,7 @@ private void transfer(String uri) { } private Set copy(String uri, ReplicaDescriptor handle) - throws IOException, InterruptedException { + throws IOException, InterruptedException, CacheException { RepositoryChannel channel = handle.createChannel(); try { HttpGet get = new HttpGet(uri); diff --git a/modules/dcache/src/main/java/org/dcache/pool/repository/ReplicaDescriptor.java b/modules/dcache/src/main/java/org/dcache/pool/repository/ReplicaDescriptor.java index 67dbc06b436..fb3db660644 100644 --- a/modules/dcache/src/main/java/org/dcache/pool/repository/ReplicaDescriptor.java +++ b/modules/dcache/src/main/java/org/dcache/pool/repository/ReplicaDescriptor.java @@ -51,7 +51,7 @@ public interface ReplicaDescriptor extends AutoCloseable { * @return repository channel. * @throws IOException if repository channel can't be created. */ - RepositoryChannel createChannel() throws IOException; + RepositoryChannel createChannel() throws IOException, CacheException; /** * Returns the file attributes of the file represented by this replica. diff --git a/modules/dcache/src/main/java/org/dcache/pool/repository/v5/ReadHandleImpl.java b/modules/dcache/src/main/java/org/dcache/pool/repository/v5/ReadHandleImpl.java index 2826ca66c95..adca547f4e7 100644 --- a/modules/dcache/src/main/java/org/dcache/pool/repository/v5/ReadHandleImpl.java +++ b/modules/dcache/src/main/java/org/dcache/pool/repository/v5/ReadHandleImpl.java @@ -4,6 +4,7 @@ import com.google.common.collect.ImmutableSet; import diskCacheV111.util.CacheException; +import diskCacheV111.util.FileCorruptedCacheException; import diskCacheV111.util.PnfsHandler; import java.io.IOException; import java.net.URI; @@ -73,11 +74,12 @@ public synchronized void close() throws IllegalStateException { } @Override - public RepositoryChannel createChannel() throws IOException { + public RepositoryChannel createChannel() throws IOException, CacheException { RepositoryChannel channel = _entry.openChannel(_openOptions); long fileSizeAlloc = channel.size(); if (_fileAttributes.getSize() != fileSizeAlloc) { - IOException ex = new IOException("Failed to read the file, because file is Broken."); + FileCorruptedCacheException ex = new FileCorruptedCacheException( + "Failed to read the file, because file is Broken."); try { _entry.update("Filesystem and pool database file sizes are inconsistent", r -> r.setState(ReplicaState.BROKEN)); diff --git a/modules/dcache/src/main/smc/org/dcache/pool/migration/Task.sm b/modules/dcache/src/main/smc/org/dcache/pool/migration/Task.sm index 123f4b29339..f1031bd485e 100644 --- a/modules/dcache/src/main/smc/org/dcache/pool/migration/Task.sm +++ b/modules/dcache/src/main/smc/org/dcache/pool/migration/Task.sm @@ -208,6 +208,14 @@ Entry String.format("Pool %s failed (no route to cell)", ctxt.getTarget())); } + copy_failure(rc: Integer, cause: Object) + [ rc == FILE_CORRUPTED ] + Failed + { + failPermanently(rc, + String.format("Pool %s failed (%s)", + ctxt.getTarget(), cause)); + } copy_failure(rc: Integer, cause: Object) Failed { @@ -228,7 +236,7 @@ Entry { } messageArrived(message: PoolMigrationCopyFinishedMessage) - [ message.getReturnCode() == FILE_NOT_FOUND ] + [ message.getReturnCode() == FILE_NOT_FOUND || message.getReturnCode() == FILE_CORRUPTED ] Failed { failPermanently(message.getReturnCode(), @@ -329,7 +337,7 @@ Exit { } messageArrived(message: PoolMigrationCopyFinishedMessage) - [ message.getReturnCode() == FILE_NOT_FOUND ] + [ message.getReturnCode() == FILE_NOT_FOUND || message.getReturnCode() == FILE_CORRUPTED ] Failed { failPermanently(message.getReturnCode(), diff --git a/modules/dcache/src/test/java/org/dcache/tests/repository/RepositorySubsystemTest.java b/modules/dcache/src/test/java/org/dcache/tests/repository/RepositorySubsystemTest.java index 979d52fcfc3..7c7c7e84c57 100644 --- a/modules/dcache/src/test/java/org/dcache/tests/repository/RepositorySubsystemTest.java +++ b/modules/dcache/src/test/java/org/dcache/tests/repository/RepositorySubsystemTest.java @@ -128,7 +128,7 @@ public class RepositorySubsystemTest private final CellAddressCore address = new CellAddressCore("pool", "test"); private void createFile(ReplicaDescriptor descriptor, long size) - throws IOException { + throws IOException, CacheException { try (RepositoryChannel channel = descriptor.createChannel()) { channel.write(ByteBuffer.allocate((int) size)); } @@ -491,7 +491,7 @@ protected void run() }; } - @Test(expected = IOException.class) + @Test(expected = CacheException.class) public void testFileIsBroken() throws IOException, IllegalTransitionException, CacheException, InterruptedException {