From 280795800594ba7f4762a35188d875eacf3b6e9d Mon Sep 17 00:00:00 2001 From: Tigran Mkrtchyan Date: Tue, 1 Oct 2019 10:48:20 +0200 Subject: [PATCH] dcap: restart pool selection on OUT-OF-DATE error Motivation: As pool selection is a probabilistic, two write transfers might hit the same pool. If there are no sufficient space, pool will reject to start a mover with OUT-OF-DATE error. Though, this is a transient error and a new pool can be selected, dcap door propagates error to the client application. Modification: Threat OUT-OF-DATE as a transient error and restart pool selection. Result: dcap door can handle out-of-date errors Acked-by: Marina Sahakyan Target: master, 6.0, 5.2, 5.1, 4.2 Require-book: no Require-notes: yes (cherry picked from commit 87ae552206265549113970db9245eb19df0d1d6e) Signed-off-by: Tigran Mkrtchyan --- .../doors/DCapDoorInterpreterV3.java | 34 ++++++++++--------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/modules/dcache-dcap/src/main/java/diskCacheV111/doors/DCapDoorInterpreterV3.java b/modules/dcache-dcap/src/main/java/diskCacheV111/doors/DCapDoorInterpreterV3.java index 33950f60e18..d52592c1395 100644 --- a/modules/dcache-dcap/src/main/java/diskCacheV111/doors/DCapDoorInterpreterV3.java +++ b/modules/dcache-dcap/src/main/java/diskCacheV111/doors/DCapDoorInterpreterV3.java @@ -2194,24 +2194,26 @@ private void storeChecksumInPnfs( PnfsId pnfsId , String checksumString){ poolIoFileArrived( PoolIoFileMessage reply ){ _log.debug("poolIoFileArrived : {}", reply); - if( reply.getReturnCode() != 0 ){ - // bad entry in cacheInfo and pool Manager did not check it ( for performance reason ) - // try again - if (reply.getReturnCode() == CacheException.FILE_NOT_IN_REPOSITORY) { + switch (reply.getReturnCode()) { + case 0: + _moverId = reply.getMoverId(); + // + // nothing to do here ( we are still waiting for + // doorTransferFinished ) + // + setStatus("WaitingForDoorTransferOk"); + break; + case CacheException.FILE_NOT_IN_REPOSITORY: + // fallthrough + case CacheException.OUT_OF_DATE: + // transient errors + _log.warn("Retry on transient error: {}", reply.getReturnCode()); again(true); - return; - } - - sendReply("poolIoFileArrived", reply); - removeUs(); - return; + break; + default: + sendReply("poolIoFileArrived", reply); + removeUs(); } - _moverId = reply.getMoverId(); - // - // nothing to do here ( we are still waiting for - // doorTransferFinished ) - // - setStatus( "WaitingForDoorTransferOk" ) ; } public void poolPassiveIoFileMessage( PoolPassiveIoFileMessage reply) {