From 07eb03acee79067bd8decd092d3279b2a5c42398 Mon Sep 17 00:00:00 2001 From: John Spray Date: Wed, 3 Jun 2015 10:16:55 +0100 Subject: [PATCH] tasks/cephfs: time out on ceph-fuses that don't die For cases where we have e.g. poked the fuse abort file for a process, but it's still not dying. Because this is a special class of error (unlike e.g. when we force umount something because the network is gone) raise the error instead of trying again to kill the client. Fixes: #11835 Signed-off-by: John Spray --- tasks/cephfs/fuse_mount.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tasks/cephfs/fuse_mount.py b/tasks/cephfs/fuse_mount.py index 5517f5f84..88531fdbb 100644 --- a/tasks/cephfs/fuse_mount.py +++ b/tasks/cephfs/fuse_mount.py @@ -7,6 +7,7 @@ from textwrap import dedent from teuthology import misc +from teuthology.contextutil import MaxWhileTries from teuthology.orchestra import run from teuthology.orchestra.run import CommandFailedError from .mount import CephFSMount @@ -234,7 +235,12 @@ def umount_wait(self, force=False): try: if self.fuse_daemon: - self.fuse_daemon.wait() + # Permit a timeout, so that we do not block forever + run.wait([self.fuse_daemon], 30) + except MaxWhileTries: + log.error("process failed to terminate after unmount. This probably" + "indicates a bug within ceph-fuse.") + raise except CommandFailedError: pass