Skip to content

Commit

Permalink
qa/tasks/thrashosds,ceph_manager: thrash pg_remap[_items]
Browse files Browse the repository at this point in the history
Signed-off-by: Sage Weil <sage@redhat.com>
  • Loading branch information
liewegas committed Mar 23, 2017
1 parent f6888f3 commit 4e83665
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 0 deletions.
Expand Up @@ -18,4 +18,6 @@ split_tasks:
chance_pgnum_grow: 1
chance_pgpnum_fix: 1
chance_thrash_cluster_full: 0
chance_thrash_pg_remap: 0
chance_thrash_pg_remap_items: 0
- print: "**** done thrashosds 3-thrash"
Expand Up @@ -18,4 +18,6 @@ stress-tasks:
chance_pgpnum_fix: 1
min_in: 4
chance_thrash_cluster_full: 0
chance_thrash_pg_remap: 0
chance_thrash_pg_remap_items: 0
- print: "**** done thrashosds 3-thrash"
2 changes: 2 additions & 0 deletions qa/suites/upgrade/jewel-x/stress-split/3-thrash/default.yaml
Expand Up @@ -17,4 +17,6 @@ stress-tasks:
chance_pgnum_grow: 1
chance_pgpnum_fix: 1
chance_thrash_cluster_full: 0
chance_thrash_pg_remap: 0
chance_thrash_pg_remap_items: 0
- print: "**** done thrashosds 3-thrash"
Expand Up @@ -18,4 +18,6 @@ stress-tasks:
chance_pgpnum_fix: 1
min_in: 4
chance_thrash_cluster_full: 0
chance_thrash_pg_remap: 0
chance_thrash_pg_remap_items: 0
- print: "**** done thrashosds 3-thrash"
2 changes: 2 additions & 0 deletions qa/suites/upgrade/kraken-x/stress-split/3-thrash/default.yaml
Expand Up @@ -17,4 +17,6 @@ stress-tasks:
chance_pgnum_grow: 1
chance_pgpnum_fix: 1
chance_thrash_cluster_full: 0
chance_thrash_pg_remap: 0
chance_thrash_pg_remap_items: 0
- print: "**** done thrashosds 3-thrash"
87 changes: 87 additions & 0 deletions qa/tasks/ceph_manager.py
Expand Up @@ -123,6 +123,8 @@ def __init__(self, manager, config, logger=None):
self.dump_ops_enable = self.config.get('dump_ops_enable')
self.noscrub_toggle_delay = self.config.get('noscrub_toggle_delay')
self.chance_thrash_cluster_full = self.config.get('chance_thrash_cluster_full', .05)
self.chance_thrash_pg_remap = self.config.get('chance_thrash_pg_remap', 1.0)
self.chance_thrash_pg_remap_items = self.config.get('chance_thrash_pg_remap', 1.0)

num_osds = self.in_osds + self.out_osds
self.max_pgs = self.config.get("max_pgs_per_pool_osd", 1200) * num_osds
Expand Down Expand Up @@ -505,6 +507,86 @@ def thrash_cluster_full(self):
self.log('Setting full ratio back to .95')
self.ceph_manager.raw_cluster_cmd('osd', 'set-full-ratio', '.95')

def thrash_pg_remap(self):
"""
Install or remove random pg_remap entries in OSDMap
"""
from random import shuffle
out = self.ceph_manager.raw_cluster_cmd('osd', 'dump', '-f', 'json-pretty')
j = json.loads(out)
self.log('j is %s' % j)
try:
if random.random() >= .3:
pgs = self.ceph_manager.get_pg_stats()
pg = random.choice(pgs)
pgid = str(pg['pgid'])
poolid = int(pgid.split('.')[0])
sizes = [x['size'] for x in j['pools'] if x['pool'] == poolid]
if len(sizes) == 0:
return
n = sizes[0]
osds = self.in_osds + self.out_osds
shuffle(osds)
osds = osds[0:n]
self.log('Setting %s to %s' % (pgid, osds))
cmd = ['osd', 'pg-remap', pgid] + [str(x) for x in osds]
self.log('cmd %s' % cmd)
self.ceph_manager.raw_cluster_cmd(*cmd)
else:
m = j['pg_remap']
if len(m) > 0:
shuffle(m)
pg = m[0]['pgid']
self.log('Clearing pg_remap on %s' % pg)
self.ceph_manager.raw_cluster_cmd(
'osd',
'rm-pg-remap',
pg)
else:
self.log('No pg_remap entries; doing nothing')
except CommandFailedError:
self.log('Failed to rm-pg-remap, ignoring')

def thrash_pg_remap_items(self):
"""
Install or remove random pg_remap_items entries in OSDMap
"""
from random import shuffle
out = self.ceph_manager.raw_cluster_cmd('osd', 'dump', '-f', 'json-pretty')
j = json.loads(out)
self.log('j is %s' % j)
try:
if random.random() >= .3:
pgs = self.ceph_manager.get_pg_stats()
pg = random.choice(pgs)
pgid = str(pg['pgid'])
poolid = int(pgid.split('.')[0])
sizes = [x['size'] for x in j['pools'] if x['pool'] == poolid]
if len(sizes) == 0:
return
n = sizes[0]
osds = self.in_osds + self.out_osds
shuffle(osds)
osds = osds[0:n*2]
self.log('Setting %s to %s' % (pgid, osds))
cmd = ['osd', 'pg-remap-items', pgid] + [str(x) for x in osds]
self.log('cmd %s' % cmd)
self.ceph_manager.raw_cluster_cmd(*cmd)
else:
m = j['pg_remap_items']
if len(m) > 0:
shuffle(m)
pg = m[0]['pgid']
self.log('Clearing pg_remap on %s' % pg)
self.ceph_manager.raw_cluster_cmd(
'osd',
'rm-pg-remap-items',
pg)
else:
self.log('No pg_remap entries; doing nothing')
except CommandFailedError:
self.log('Failed to rm-pg-remap-items, ignoring')

def all_up(self):
"""
Make sure all osds are up and not out.
Expand Down Expand Up @@ -723,6 +805,11 @@ def choose_action(self):
chance_test_backfill_full,))
if self.chance_thrash_cluster_full > 0:
actions.append((self.thrash_cluster_full, self.chance_thrash_cluster_full,))
if self.chance_thrash_pg_remap > 0:
actions.append((self.thrash_pg_remap, self.chance_thrash_pg_remap,))
if self.chance_thrash_pg_remap_items > 0:
actions.append((self.thrash_pg_remap_items, self.chance_thrash_pg_remap_items,))

for key in ['heartbeat_inject_failure', 'filestore_inject_stall']:
for scenario in [
(lambda:
Expand Down
3 changes: 3 additions & 0 deletions qa/tasks/thrashosds.py
Expand Up @@ -122,6 +122,9 @@ def task(ctx, config):
chance_thrash_cluster_full: .05
chance_thrash_pg_remap: 1.0
chance_thrash_pg_remap_items: 1.0
example:
tasks:
Expand Down

0 comments on commit 4e83665

Please sign in to comment.