Skip to content

Commit

Permalink
doors: Fix pool selection timeout handling
Browse files Browse the repository at this point in the history
Motivation:

Doors ignore the pool manager communication timeout and only apply
a door specific total timeout. This timeout is for some doors
infinite and thus pool selection would never time out or be
resubmitted.

Modification:

Applies the pool manager communication timeout as an upper bound
on pool selection. When expired, the pool selection is retried
according to the retry policy of the door.

The total timeout is also applied to the pnfs manager communication
that happens upon retry. This greatly reduces the risk that the
remaining timeout becomes negative after this point.

Result:

Pool selection gets resubmited and error messages with negative
message TTL should be gone.

Target: 2.12
Request: 2.11
Request: 2.10
Require-notes: yes
Require-book: no
Acked-by: Paul Milar <paul.millar@desy.de>
Patch: https://rb.dcache.org/r/8491/
(cherry picked from commit a952fd3)
(cherry picked from commit 2667ae5)
  • Loading branch information
gbehrmann committed Aug 24, 2015
1 parent bdb6662 commit b022089
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 29 deletions.
42 changes: 27 additions & 15 deletions modules/dcache/src/main/java/diskCacheV111/util/PnfsHandler.java
Expand Up @@ -44,6 +44,8 @@
import org.dcache.vehicles.PnfsRemoveChecksumMessage;
import org.dcache.vehicles.PnfsSetFileAttributes;

import static com.google.common.base.Preconditions.checkState;

public class PnfsHandler
implements CellMessageSender
{
Expand Down Expand Up @@ -190,7 +192,7 @@ public void addCacheLocation(PnfsId id) throws CacheException

public void addCacheLocation(PnfsId id, String pool) throws CacheException
{
pnfsRequest( new PnfsAddCacheLocationMessage(id, pool));
pnfsRequest(new PnfsAddCacheLocationMessage(id, pool));
}

public List<String> getCacheLocations( PnfsId pnfsId )throws CacheException {
Expand Down Expand Up @@ -225,22 +227,32 @@ public List<String> getCacheLocationsByPath( String fileName )throws CacheExcept
* to the PnfsManager are reported as a timeout CacheException.
*/
public <T extends PnfsMessage> T pnfsRequest( T msg )
throws CacheException {
throws CacheException
{
checkState(_cellStub != null);
return pnfsRequest(msg, _cellStub.getTimeoutInMillis());
}

if (_cellStub == null) {
throw new IllegalStateException("Missing endpoint");
/**
* Sends a message to the request manager and blocks until a reply
* is received. In case of errors in the reply, those are thrown
* as a CacheException. Timeouts and failure to send the message
* to the PnfsManager are reported as a timeout CacheException.
*/
public <T extends PnfsMessage> T pnfsRequest(T msg, long timeout)
throws CacheException
{
checkState(_cellStub != null);
try {
msg.setReplyRequired(true);
if (_subject != null) {
msg.setSubject(_subject);
}
return _cellStub.sendAndWait(msg, timeout);
} catch (InterruptedException e) {
throw new CacheException(CacheException.UNEXPECTED_SYSTEM_EXCEPTION,
"Sending message to " + _cellStub.getDestinationPath() + " interrupted");
}

try {
msg.setReplyRequired(true);
if (_subject != null) {
msg.setSubject(_subject);
}
return _cellStub.sendAndWait(msg);
} catch (InterruptedException e) {
throw new CacheException(CacheException.UNEXPECTED_SYSTEM_EXCEPTION,
"Sending message to PnafsManager intterupted");
}
}

public PnfsCreateEntryMessage createPnfsDirectory(String path)
Expand Down
33 changes: 19 additions & 14 deletions modules/dcache/src/main/java/org/dcache/util/Transfer.java
Expand Up @@ -52,6 +52,7 @@
import org.dcache.namespace.FileAttribute;
import org.dcache.namespace.FileType;
import org.dcache.vehicles.FileAttributes;
import org.dcache.vehicles.PnfsGetFileAttributes;

import static com.google.common.base.Preconditions.checkNotNull;
import static org.dcache.namespace.FileAttribute.*;
Expand Down Expand Up @@ -647,8 +648,12 @@ public void createNameSpaceEntry()
*
* @throws CacheException if reading the entry failed
*/
public void readNameSpaceEntry()
throws CacheException
public void readNameSpaceEntry() throws CacheException
{
readNameSpaceEntry(_pnfs.getPnfsTimeout());
}

private void readNameSpaceEntry(long timeout) throws CacheException
{
setStatus("PnfsManager: Fetching storage info");
try {
Expand All @@ -658,12 +663,11 @@ public void readNameSpaceEntry()
request.addAll(PoolMgrSelectReadPoolMsg.getRequiredAttributes());
Set<AccessMask> mask = EnumSet.of(AccessMask.READ_DATA);
PnfsId pnfsId = getPnfsId();
FileAttributes attributes;
if (pnfsId != null) {
attributes = _pnfs.getFileAttributes(pnfsId, request, mask);
} else {
attributes = _pnfs.getFileAttributes(_path.toString(), request, mask);
}
PnfsGetFileAttributes msg = (pnfsId != null)
? new PnfsGetFileAttributes(pnfsId, request)
: new PnfsGetFileAttributes(_path.toString(), request);
msg.setAccessMask(mask);
FileAttributes attributes = _pnfs.pnfsRequest(msg, timeout).getFileAttributes();

/* We can only read regular files.
*/
Expand Down Expand Up @@ -757,7 +761,7 @@ void setReadPoolSelectionContext(PoolMgrSelectReadPoolMsg.Context context)
* Selects a pool suitable for the transfer.
*/
public void selectPool()
throws CacheException, InterruptedException
throws CacheException, InterruptedException
{
selectPool(_poolManager.getTimeoutInMillis());
}
Expand Down Expand Up @@ -1039,11 +1043,11 @@ public synchronized void notifyBilling(int code, String error)
long start = System.currentTimeMillis();
CacheException lastFailure;
try {
selectPool(subWithInfinity(deadLine, System.currentTimeMillis()));
selectPool(Math.min(_poolManager.getTimeoutInMillis(),
subWithInfinity(deadLine, System.currentTimeMillis())));
gotPool = true;
startMover(queue,
Math.min(subWithInfinity(deadLine, System.currentTimeMillis()),
policy.getMoverStartTimeout()));
startMover(queue, Math.min(policy.getMoverStartTimeout(),
subWithInfinity(deadLine, System.currentTimeMillis())));
return;
} catch (TimeoutCacheException e) {
_log.warn(e.getMessage());
Expand Down Expand Up @@ -1102,7 +1106,8 @@ public synchronized void notifyBilling(int code, String error)
}

if (!isWrite()) {
readNameSpaceEntry();
readNameSpaceEntry( Math.min(_pnfs.getPnfsTimeout(),
subWithInfinity(deadLine, System.currentTimeMillis())));
}
}
}
Expand Down

0 comments on commit b022089

Please sign in to comment.