Skip to content

Commit

Permalink
resilience: fix bug in formatting and handling of cache exception types
Browse files Browse the repository at this point in the history
Motivation:

The utility which returns a CacheException object with
an appropriate message and type based on the failure type
and operation uses string formatting to process arguments.
The way the code is written, however, is subject to bad
formatting exceptions, particularly if there are unexpected
null values passed in as parameters (for instance, if
an exception cause is null).  In some of these cases,
the actual exception is masked because of the formatting
exception, causing the removal of the operation from
the map to fail.

Modification:

Change the method such that it accounts for nulls; this
also requires respecting the contract that the format
strings used will always have three % placeholders.

In addition to this, the "Will retry" message appended
to the failure message should not be included if the
error is fatal.

Result:

Correct formatting will not block proper removal
from the operation map and potentially freeze
progress in the resilience service as a whole.

Target: master
Request: 3.2
Request: 3.1
Request: 3.0
Request: 2.16
Require-notes: yes
Require-book: no
Acked-by: Tigran
  • Loading branch information
alrossi committed Sep 1, 2017
1 parent c40afc0 commit 80175bf
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,7 @@ private void removeTarget(PnfsId pnfsId, String target)
}

Serializable exception = msg.getErrorObject();
if (exception != null && !CacheExceptionUtils.fileNotFound(exception)) {
if (exception != null && !CacheExceptionUtils.replicaNotFound(exception)) {
throw CacheExceptionUtils.getCacheException(
CacheException.SELECTED_POOL_FAILED,
FileTaskCompletionHandler.FAILED_REMOVE_MESSAGE,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,25 +79,18 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* Also implements the migration task termination logic.</p>
*/
public final class FileTaskCompletionHandler implements TaskCompletionHandler {
static final String WILL_RETRY_LATER
= "A best effort at retry will be made "
+ "during the next periodic scan.";

static final String ABORT_REPLICATION_LOG_MESSAGE
= "Aborting replication for {}; pools tried: {}; {} "
+ WILL_RETRY_LATER;
= "Aborting replication for {}; pools tried: {}; {}";

static final String VERIFY_FAILURE_MESSAGE
= "Processing for pnfsId %s failed during verify; %s "
+ WILL_RETRY_LATER;
= "Processing for %s failed during verify. %s%s";

static final String FAILED_COPY_MESSAGE
= "Migration task for %s failed: %s.";
= "Migration task for %s failed. %s%s.";

static final String FAILED_REMOVE_MESSAGE
= "Failed to remove %s from %s; %s. "
+ "This means that an unnecessary copy may still exist; "
+ WILL_RETRY_LATER;
+ "This means that an unnecessary copy may still exist.";

private static final Logger LOGGER
= LoggerFactory.getLogger(FileTaskCompletionHandler.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,6 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
package org.dcache.resilience.util;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutionException;

import diskCacheV111.util.CacheException;
Expand All @@ -72,8 +70,11 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* <p>Wrapper methods for processing and handling {@link CacheException}s.</p>
*/
public final class CacheExceptionUtils {
static final String WILL_RETRY_LATER
= " A best effort at retry will be made "
+ "during the next periodic scan.";

public static boolean fileNotFound(Serializable object) {
public static boolean replicaNotFound(Serializable object) {
if (object instanceof ExecutionException) {
ExecutionException exception = (ExecutionException) object;
object = exception.getCause();
Expand All @@ -91,26 +92,36 @@ public static boolean fileNotFound(Serializable object) {
return true;
}
}

return false;
}

/**
* @param rc error code for CacheException
* @param template string formatting, must have three '%' markers.
* @param pnfsid of the file operation
* @param info
* @param e
* @return appropriate CacheException to be propagated.
*/
public static CacheException getCacheException(int rc,
String template,
PnfsId pnfs,
String optional,
PnfsId pnfsid,
String info,
Throwable e) {
List<Object> args = new ArrayList<>();
args.add(pnfs);
Object[] args = new Object[3];
args[0] = pnfsid;
args[1] = info == null? "" : info;
args[2] = e == null? "" : new ExceptionMessage(e);

if (optional != null) {
args.add(optional);
}
String message = String.format(template, args);

if (e != null) {
args.add(new ExceptionMessage(e));
}
FailureType failureType = getFailureType(rc, true);

String message = String.format(template, args.toArray());
if (failureType != FailureType.FATAL
&& failureType != FailureType.BROKEN) {
message += WILL_RETRY_LATER;
}

if (e != null) {
return CacheExceptionFactory.exceptionOf(rc, message, e);
Expand All @@ -125,7 +136,11 @@ public static FailureType getFailureType(CacheException exception,
return FailureType.RETRIABLE;
}

switch (exception.getRc()) {
return getFailureType(exception.getRc(), isCopyOperation);
}

private static FailureType getFailureType(int rc, boolean isCopyOperation) {
switch (rc) {
case CacheException.FILE_CORRUPTED:
case CacheException.BROKEN_ON_TAPE:
return FailureType.BROKEN;
Expand Down

0 comments on commit 80175bf

Please sign in to comment.