Skip to content

Commit

Permalink
migration: fixing sleeping state for broken files
Browse files Browse the repository at this point in the history
Motivation

When broken files were discovered during migration the migration module was going into sleeping state.

This was due to the fact that when broken file was detected DiscError exception was thrown and the pool was set to disabled mode.

Result

This is fixed now and migration should works correctly for both cases.

Target: master, 9.0 8.2
Patch: https://rb.dcache.org/r/13814/
Acked-by: Tigran Mkrtchyan
Requires-notes: no
  • Loading branch information
mksahakyan committed Mar 29, 2023
1 parent 09d7ec5 commit 6e5e469
Show file tree
Hide file tree
Showing 9 changed files with 27 additions and 12 deletions.
Expand Up @@ -84,7 +84,7 @@ protected String getStatus() {
* @throws DiskErrorCacheException
*/
public Cancellable enable(final CompletionHandler<Void, Void> completionHandler)
throws DiskErrorCacheException, InterruptedIOException {
throws DiskErrorCacheException, InterruptedIOException, CacheException {

open();
_completionHandler = completionHandler;
Expand Down
Expand Up @@ -329,7 +329,7 @@ public Set<? extends OpenOption> getChannelCreateOptions() {
@Override
public Cancellable executeMover(final NfsMover mover,
final CompletionHandler<Void, Void> completionHandler)
throws DiskErrorCacheException, InterruptedIOException {
throws DiskErrorCacheException, InterruptedIOException, CacheException {
final Cancellable cancellableMover = mover.enable(completionHandler);
notifyDoorWithRedirect(mover);

Expand Down
Expand Up @@ -24,6 +24,7 @@
import com.google.common.reflect.TypeToken;
import diskCacheV111.util.CacheException;
import diskCacheV111.util.DiskErrorCacheException;
import diskCacheV111.util.FileCorruptedCacheException;
import diskCacheV111.vehicles.PoolAcceptFileMessage;
import diskCacheV111.vehicles.PoolIoFileMessage;
import diskCacheV111.vehicles.ProtocolInfo;
Expand Down Expand Up @@ -271,7 +272,8 @@ public synchronized void onFailure(Throwable t, Void attachment) {
* @throws InterruptedIOException if the mover was cancelled
* @throws DiskErrorCacheException If the file could not be opened
*/
public RepositoryChannel openChannel() throws DiskErrorCacheException, InterruptedIOException {
public RepositoryChannel openChannel()
throws DiskErrorCacheException, InterruptedIOException, CacheException {
RepositoryChannel channel;
try {
channel = _handle.createChannel();
Expand All @@ -282,8 +284,10 @@ public RepositoryChannel openChannel() throws DiskErrorCacheException, Interrupt
throw new DiskErrorCacheException(
"File could not be opened; please check the file system: "
+ messageOrClassName(e), e);

}


synchronized (_checksumTypes) {
_checksumChannel = channel.optionallyAs(ChecksumChannel.class).orElse(null);
if (_checksumChannel != null) {
Expand Down
Expand Up @@ -19,6 +19,7 @@

import static com.google.common.base.Preconditions.checkState;

import diskCacheV111.util.CacheException;
import diskCacheV111.util.DiskErrorCacheException;
import diskCacheV111.vehicles.PoolIoFileMessage;
import diskCacheV111.vehicles.ProtocolInfo;
Expand Down Expand Up @@ -75,7 +76,7 @@ public long getLastTransferred() {
* @throws IllegalStateException if called more than once
*/
public synchronized MoverChannel<P> open()
throws DiskErrorCacheException, InterruptedIOException {
throws DiskErrorCacheException, InterruptedIOException, CacheException {
checkState(_wrappedChannel == null);
_wrappedChannel = new MoverChannel<>(this, openChannel());
return _wrappedChannel;
Expand Down
Expand Up @@ -327,7 +327,7 @@ private void transfer(String uri) {
}

private Set<Checksum> copy(String uri, ReplicaDescriptor handle)
throws IOException, InterruptedException {
throws IOException, InterruptedException, CacheException {
RepositoryChannel channel = handle.createChannel();
try {
HttpGet get = new HttpGet(uri);
Expand Down
Expand Up @@ -51,7 +51,7 @@ public interface ReplicaDescriptor extends AutoCloseable {
* @return repository channel.
* @throws IOException if repository channel can't be created.
*/
RepositoryChannel createChannel() throws IOException;
RepositoryChannel createChannel() throws IOException, CacheException;

/**
* Returns the file attributes of the file represented by this replica.
Expand Down
Expand Up @@ -4,6 +4,7 @@

import com.google.common.collect.ImmutableSet;
import diskCacheV111.util.CacheException;
import diskCacheV111.util.FileCorruptedCacheException;
import diskCacheV111.util.PnfsHandler;
import java.io.IOException;
import java.net.URI;
Expand Down Expand Up @@ -73,11 +74,12 @@ public synchronized void close() throws IllegalStateException {
}

@Override
public RepositoryChannel createChannel() throws IOException {
public RepositoryChannel createChannel() throws IOException, CacheException {
RepositoryChannel channel = _entry.openChannel(_openOptions);
long fileSizeAlloc = channel.size();
if (_fileAttributes.getSize() != fileSizeAlloc) {
IOException ex = new IOException("Failed to read the file, because file is Broken.");
FileCorruptedCacheException ex = new FileCorruptedCacheException(
"Failed to read the file, because file is Broken.");
try {
_entry.update("Filesystem and pool database file sizes are inconsistent",
r -> r.setState(ReplicaState.BROKEN));
Expand Down
12 changes: 10 additions & 2 deletions modules/dcache/src/main/smc/org/dcache/pool/migration/Task.sm
Expand Up @@ -208,6 +208,14 @@ Entry
String.format("Pool %s failed (no route to cell)",
ctxt.getTarget()));
}
copy_failure(rc: Integer, cause: Object)
[ rc == FILE_CORRUPTED ]
Failed
{
failPermanently(rc,
String.format("Pool %s failed (%s)",
ctxt.getTarget(), cause));
}
copy_failure(rc: Integer, cause: Object)
Failed
{
Expand All @@ -228,7 +236,7 @@ Entry
{
}
messageArrived(message: PoolMigrationCopyFinishedMessage)
[ message.getReturnCode() == FILE_NOT_FOUND ]
[ message.getReturnCode() == FILE_NOT_FOUND || message.getReturnCode() == FILE_CORRUPTED ]
Failed
{
failPermanently(message.getReturnCode(),
Expand Down Expand Up @@ -329,7 +337,7 @@ Exit
{
}
messageArrived(message: PoolMigrationCopyFinishedMessage)
[ message.getReturnCode() == FILE_NOT_FOUND ]
[ message.getReturnCode() == FILE_NOT_FOUND || message.getReturnCode() == FILE_CORRUPTED ]
Failed
{
failPermanently(message.getReturnCode(),
Expand Down
Expand Up @@ -128,7 +128,7 @@ public class RepositorySubsystemTest
private final CellAddressCore address = new CellAddressCore("pool", "test");

private void createFile(ReplicaDescriptor descriptor, long size)
throws IOException {
throws IOException, CacheException {
try (RepositoryChannel channel = descriptor.createChannel()) {
channel.write(ByteBuffer.allocate((int) size));
}
Expand Down Expand Up @@ -491,7 +491,7 @@ protected void run()
};
}

@Test(expected = IOException.class)
@Test(expected = CacheException.class)
public void testFileIsBroken()
throws IOException, IllegalTransitionException,
CacheException, InterruptedException {
Expand Down

0 comments on commit 6e5e469

Please sign in to comment.