Skip to content

Commit

Permalink
tests: Thrasher: handle "OSD has the store locked" gracefully
Browse files Browse the repository at this point in the history
On slower machines (VPS, OVH) it takes time for the OSD to go down.

Fixes: http://tracker.ceph.com/issues/19556
Signed-off-by: Nathan Cutler <ncutler@suse.com>
(cherry picked from commit 8a142da)
  • Loading branch information
smithfarm committed Apr 9, 2017
1 parent a64d3e4 commit fac9991
Showing 1 changed file with 16 additions and 6 deletions.
22 changes: 16 additions & 6 deletions qa/tasks/ceph_manager.py
Expand Up @@ -222,12 +222,22 @@ def kill_osd(self, osd=None, mark_down=False, mark_out=False):
break
log.debug("ceph-objectstore-tool binary not present, trying again")

proc = exp_remote.run(args=cmd, wait=True,
check_status=False, stdout=StringIO())
if proc.exitstatus:
raise Exception("ceph-objectstore-tool: "
"exp list-pgs failure with status {ret}".
format(ret=proc.exitstatus))
# ceph-objectstore-tool might bogusly fail with "OSD has the store locked"
# see http://tracker.ceph.com/issues/19556
with safe_while(sleep=15, tries=40, action="ceph-objectstore-tool --op list-pgs") as proceed:
while proceed():
proc = exp_remote.run(args=cmd, wait=True,
check_status=False,
stdout=StringIO(), stderr=StringIO())
if proc.exitstatus == 0:
break
if proc.exitstatus == 1 and stderr == "OSD has the store locked":
continue
if proc.exitstatus:
raise Exception("ceph-objectstore-tool: "
"exp list-pgs failure with status {ret}".
format(ret=proc.exitstatus))

pgs = proc.stdout.getvalue().split('\n')[:-1]
if len(pgs) == 0:
self.log("No PGs found for osd.{osd}".format(osd=exp_osd))
Expand Down

0 comments on commit fac9991

Please sign in to comment.