Skip to content

Commit 221dd50

Browse files
committed
nfs: return NFSERR_DELAY if we can't kill mover
If mover takes to much time to finish, then we will simply return OK. In most cases this can go well. Nevertheless, it checksumming is enabled, then file will be unavailable and file size still will be 0. To solve this, we return NFSERR_DELAY and enforce client to retry. The problem here is to return NFSERR_IO if pool is unavailable. Observed at DESY on transfer of big files ( measured checksumming seed is 1.6GB/s ) Acked-by: Gerd Behrmann Target: trunk, 2.6 Require-book: no Require-notes: yes Signed-off-by: Tigran Mkrtchyan <tigran.mkrtchyan@desy.de>
1 parent 0914ddd commit 221dd50

File tree

1 file changed

+19
-7
lines changed

1 file changed

+19
-7
lines changed

modules/dcache/src/main/java/org/dcache/chimera/nfsv41/door/NFSv41Door.java

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@
5252
import org.dcache.chimera.JdbcFs;
5353
import org.dcache.chimera.nfs.ChimeraNFSException;
5454
import org.dcache.chimera.nfs.ExportFile;
55-
import org.dcache.chimera.nfs.FsExport;
5655
import org.dcache.chimera.nfs.nfsstat;
5756
import org.dcache.chimera.nfs.v3.MountServer;
5857
import org.dcache.chimera.nfs.v3.NfsServerV3;
@@ -440,13 +439,26 @@ public List<deviceid4> getDeviceList(CompoundContext context) {
440439
* @see org.dcache.chimera.nfsv4.NFSv41DeviceManager#releaseDevice(stateid4 stateid)
441440
*/
442441
@Override
443-
public void layoutReturn(CompoundContext context, stateid4 stateid) {
442+
public void layoutReturn(CompoundContext context, stateid4 stateid) throws IOException {
444443

445444
_log.debug("Releasing device by stateid: {}", stateid);
446-
Transfer transfer = _ioMessages.get(stateid);
447-
if (transfer != null) {
448-
_log.debug("Sending KILL to {}@{}", transfer.getMoverId(), transfer.getPool());
449-
transfer.killMover(5000);
445+
446+
NfsTransfer transfer = _ioMessages.get(stateid);
447+
if (transfer == null) {
448+
return;
449+
}
450+
451+
_log.debug("Sending KILL to {}@{}", transfer.getMoverId(), transfer.getPool());
452+
transfer.killMover(0);
453+
454+
try {
455+
if(!transfer.waitForMover(500)) {
456+
throw new ChimeraNFSException(nfsstat.NFSERR_DELAY, "Mover not stopped");
457+
}
458+
} catch (CacheException | InterruptedException e) {
459+
_log.info("Failed to kill mover: {}@{} : {}",
460+
transfer.getMoverId(), transfer.getPool(), e.getMessage());
461+
throw new ChimeraNFSException(nfsstat.NFSERR_IO, e.getMessage());
450462
}
451463
}
452464

@@ -571,7 +583,7 @@ protected NFS4ProtocolInfo getProtocolInfoForPoolManager() {
571583
protected NFS4ProtocolInfo getProtocolInfoForPool() {
572584
return _protocolInfo;
573585
}
574-
}
586+
}
575587

576588
/**
577589
* To allow the transfer monitoring in the httpd cell to recognize us

0 commit comments

Comments
 (0)