Skip to content

Commit

Permalink
TransferManager: fix error-handling
Browse files Browse the repository at this point in the history
An earlier patch (5ed882c) switched from
using CellAdapter#sendMessage to the recently introduced call-back mechanism.
This separates the normal and error response handling into separate call-back
methods.  Unfortunately the patch didn't also refactor the error handling
in TransferManagerHandler so the existing error handling remained in the
successful code-path, where it was never called.

This patch moves the error handling to the error call-back method.  It
tidies up the error messages and reduces the logging somewhat.  The retry
if the delete of a file fails is refactored slightly.

Target: master
Patch: http://rb.dcache.org/r/6889/
Acked-by: Gerd Behrmann
  • Loading branch information
paulmillar committed Apr 15, 2014
1 parent 702feeb commit 9f0a1ab
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 63 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -211,18 +211,9 @@ public void success(Message message)
} else if (message instanceof PnfsMapPathMessage) {
PnfsMapPathMessage mapMessage = (PnfsMapPathMessage) message;
if (state == WAITING_FOR_PNFS_CHECK_BEFORE_DELETE_STATE) {
if (mapMessage.getReturnCode() != 0) {
log.error("We were about to delete entry that does not exist : " + mapMessage.toString()
+ " PnfsMapPathMessage return code=" + mapMessage.getReturnCode()
+ " reason : " + mapMessage.getErrorObject());
sendErrorReply();
return;
} else {
state = RECEIVED_PNFS_CHECK_BEFORE_DELETE_STATE;
deletePnfsEntry();
return;
}

state = RECEIVED_PNFS_CHECK_BEFORE_DELETE_STATE;
deletePnfsEntry();
return;
} else {
log.error(this.toString() + " got unexpected PnfsMapPathMessage "
+ " : " + mapMessage + " ; Ignoring");
Expand Down Expand Up @@ -251,18 +242,9 @@ public void success(Message message)
PnfsDeleteEntryMessage deleteReply = (PnfsDeleteEntryMessage) message;
if (state == WAITING_FOR_PNFS_ENTRY_DELETE) {
setState(RECEIVED_PNFS_ENTRY_DELETE);
if (deleteReply.getReturnCode() != 0) {
log.error("Delete failed : " + deleteReply.getPath()
+ " PnfsDeleteEntryMessage return code=" + deleteReply.getReturnCode()
+ " reason : " + deleteReply.getErrorObject());
numberOfRetries++;
int numberOfRemainingRetries = manager.getMaxNumberOfDeleteRetries() - numberOfRetries;
log.error("Will retry : " + numberOfRemainingRetries + " times");
deletePnfsEntry();
} else {
log.debug("Received PnfsDeleteEntryMessage, Deleted : " + deleteReply.getPath());
sendErrorReply();
}
log.debug("Received PnfsDeleteEntryMessage, Deleted : {}",
deleteReply.getPath());
sendErrorReply();
}
}
manager.persist(this);
Expand All @@ -271,17 +253,59 @@ public void success(Message message)
@Override
public void failure(int rc, Object error)
{
switch (state) {
case WAITING_FOR_PNFS_INFO_STATE:
sendErrorReply(rc, "Failed to lookup file: " + error);
break;

case WAITING_FOR_PNFS_ENTRY_CREATION_INFO_STATE:
sendErrorReply(rc, "Failed to create namespace entry: " + error);
break;

case WAITING_FIRST_POOL_REPLY_STATE:
// FIXME: in the case of an attempted read (pool pushing the file
// to some remote site), we can ask PoolManager for another
// pool. For an attempted write (pool pulling the file)
// we must fail the transfer as we don't know if a mover
// was started.
sendErrorReply(CacheException.SELECTED_POOL_FAILED,
"Failed while waiting for mover to start: " + error);
break;

case WAITING_FOR_PNFS_CHECK_BEFORE_DELETE_STATE:
sendErrorReply(rc, "Pre-delete check failed: " + error);
break;

case WAITING_FOR_POOL_INFO_STATE:
if (rc == CacheException.OUT_OF_DATE) {
handle();
} else {
sendErrorReply(rc, "Failed to select pool: " + error);
}
break;

case WAITING_FOR_PNFS_ENTRY_DELETE:
log.warn("Delete attempt ({} of {}) failed: {}", numberOfRetries + 1,
manager.getMaxNumberOfDeleteRetries(), error);
numberOfRetries++;
if (numberOfRetries < manager.getMaxNumberOfDeleteRetries()) {
deletePnfsEntry();
} else {
sendErrorReply(_replyCode, "Failed to delete file " +
"(" + error + "), triggered by: " + _errorObject);
}
break;

default:
/* The code should never get here, but we try to recover from bugs. */
sendErrorReply(rc, "Failed in state " + state + ": " + error +
" [" + rc + "]");
break;
}
}

public void createEntryResponseArrived(PnfsCreateEntryMessage create)
{
if (create.getReturnCode() != 0) {
sendErrorReply(create.getReturnCode(),
create.getErrorObject());
return;
}

created = true;
manager.persist(this);

Expand All @@ -295,11 +319,6 @@ public void createEntryResponseArrived(PnfsCreateEntryMessage create)

public void storageInfoArrived(PnfsGetFileAttributes msg)
{
if (msg.getReturnCode() != 0) {
sendErrorReply(msg.getReturnCode(),
msg.getErrorObject());
return;
}
if (!store && tlog != null) {
tlog.middle(msg.getFileAttributes().getSize());
}
Expand Down Expand Up @@ -357,16 +376,6 @@ public void poolInfoArrived(PoolMgrSelectPoolMsg pool_info)
((PoolMgrSelectReadPoolMsg) pool_info).getContext();
}

if (pool_info.getReturnCode() == CacheException.OUT_OF_DATE) {
handle();
return;
}

if (pool_info.getReturnCode() != 0) {
sendErrorReply(5, new CacheException("Pool manager error: "
+ pool_info.getErrorObject()));
return;
}
setPool(pool_info.getPoolName());
setPoolAddress(pool_info.getPoolAddress());
fileAttributes = pool_info.getFileAttributes();
Expand Down Expand Up @@ -411,11 +420,6 @@ public void poolFirstReplyArrived(PoolIoFileMessage poolMessage)
{
log.debug("poolReply = " + poolMessage);
info.setTimeQueued(info.getTimeQueued() + System.currentTimeMillis());
if (poolMessage.getReturnCode() != 0) {
sendErrorReply(5, new CacheException("Pool error: "
+ poolMessage.getErrorObject()));
return;
}
log.debug("Pool " + pool + " will deliver file " + pnfsId + " mover id is " + poolMessage.getMoverId());
log.debug("Starting moverTimeout timer");
manager.startTimer(id);
Expand All @@ -427,16 +431,11 @@ public void poolFirstReplyArrived(PoolIoFileMessage poolMessage)
public void deletePnfsEntry()
{
if (state == RECEIVED_PNFS_CHECK_BEFORE_DELETE_STATE) {
if (numberOfRetries < manager.getMaxNumberOfDeleteRetries()) {
PnfsDeleteEntryMessage pnfsMsg = new PnfsDeleteEntryMessage(pnfsPath);
setState(WAITING_FOR_PNFS_ENTRY_DELETE);
manager.persist(this);
pnfsMsg.setReplyRequired(true);
CellStub.addCallback(manager.getPnfsManagerStub().send(pnfsMsg), this, executor);
} else {
log.error("Failed to remove PNFS entry after " + numberOfRetries);
sendErrorReply();
}
PnfsDeleteEntryMessage pnfsMsg = new PnfsDeleteEntryMessage(pnfsPath);
setState(WAITING_FOR_PNFS_ENTRY_DELETE);
manager.persist(this);
pnfsMsg.setReplyRequired(true);
CellStub.addCallback(manager.getPnfsManagerStub().send(pnfsMsg), this, executor);
} else {
PnfsMapPathMessage message = new PnfsMapPathMessage(pnfsPath);
setState(WAITING_FOR_PNFS_CHECK_BEFORE_DELETE_STATE);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,6 @@ public class CacheException extends Exception
{
private static final long serialVersionUID = 3219663683702355240L;

/** Transfer between pool and remote site failed. */
public final static int THIRD_PARTY_TRANSFER_FAILED = 8;

/** Requested operation is disabled in pool. */
public final static int POOL_DISABLED = 104;

Expand Down Expand Up @@ -117,6 +114,13 @@ public class CacheException extends Exception
public static final int NO_POOL_CONFIGURED = 10024;
public static final int NO_POOL_ONLINE = 10025;

/** Selected pool failed for third-party copy. */
public final static int SELECTED_POOL_FAILED = 10026;

/** Transfer between pool and remote site failed. */
public final static int THIRD_PARTY_TRANSFER_FAILED = 10027;


/**
* default error code. <b>It's recommended to use more specific error
* codes</b>
Expand Down

0 comments on commit 9f0a1ab

Please sign in to comment.