# ceph-gentle-drain
# Author: Dan van der Ster <>
# Slowly drain a list of OSDs causing minimal impact in a ceph cluster.
import os, sys, getopt, commands, json, time, datetime
def update_osd_tree():
global osd_tree
print "update_osd_tree: loading ceph osd tree"
osd_tree_json = commands.getoutput('ceph osd tree --format=json 2>/dev/null')
osd_tree = json.loads(osd_tree_json)
print "update_osd_tree: done"
def get_crush_weight(osd):
global osd_tree
for node in osd_tree['nodes']:
if node['name'] == osd:
weight = float(node['crush_weight'])
print "get_crush_weight: %s has weight %s" % (osd, weight)
return weight
raise Exception('Undefined crush_weight for %s' % osd)
def in_timeframe(start_time, end_time, current_time, allowed_days, current_day):
print current_time.strftime('check current time: %H:%M:%S')
print "check current day: %s" % (current_day)
if current_day in allowed_days:
time_ok = True
elif start_time < end_time:
if current_time >= start_time and current_time <= end_time:
time_ok = True
time_ok = False
elif current_time >= start_time or current_time <= end_time: #Over Midnight
time_ok = True
time_ok = False
return time_ok
def measure_latency(test_pool):
print "measure_latency: measuring 4kB write latency"
latency = commands.getoutput("rados -p %s bench 10 write -t 1 -b 4096 2>/dev/null | egrep -i 'average latency' | awk '{print $3}'" % test_pool)
latency_ms = 1000*float(latency)
print "measure_latency: current latency is %s" % latency_ms
return latency_ms
def get_num_backfilling():
n = 0
cmd = "ceph pg stat | tr ',:' '\n' | awk '/%s/ { total += $1}; END { print total}'"
for status in ['backfilling', 'backfill_wait']:
out = commands.getoutput(cmd % status)
if out:
n += int(out)
print "get_num_backfilling: PGs currently backfilling: %s" % n
return n
def crush_reweight(osd, weight, really):
cmd = "ceph osd crush reweight %s %s &" % (osd, weight)
print "crush_reweight: calling %s" % cmd
if really:
out = os.system(cmd)
print "crush_reweight: %s" % out
print "crush_reweight: not really doing it!"
def reweight_osds(osds, max_pgs_backfilling, max_latency, delta_weight, target_weight, test_pool, start_time, end_time, allowed_days, interval, really):
# check if there is any work to do:
print "reweight_osds: changing all osds by weight %s (target %s)" % (delta_weight, target_weight)
# check timeframe
current_time =
current_day =
time_ok = in_timeframe(start_time, end_time, current_time, allowed_days, current_day)
if not time_ok:
print "current time: not within range %s - %s, trying again later" % (start_time, end_time)
# check num pgs backfilling
npgs = get_num_backfilling()
if npgs > max_pgs_backfilling:
print "reweight_osds: npgs backfilling is too high, trying again later"
# check the latency
latency = measure_latency(test_pool)
if latency > max_latency:
print "reweight_osds: latency is too high, trying again later"
changed = False
for osd in osds:
weight = get_crush_weight(osd)
if delta_weight > 0 and weight >= (target_weight - delta_weight / 20):
print "reweight_osds: skipping %s with weight %s target %s" % (osd, weight, target_weight)
if delta_weight < 0 and weight <= (target_weight - delta_weight / 20):
print "reweight_osds: skipping %s with weight %s target %s" % (osd, weight, target_weight)
if delta_weight >= 0:
new_weight = max(min(weight + delta_weight, target_weight), 0)
new_weight = max(max(weight + delta_weight, target_weight), 0)
print "reweight_osds: %s new weight will be %s" % (osd, new_weight)
crush_reweight(osd, new_weight, really)
changed = True
if not changed:
print "All done"
def usage(code=0):
print 'ceph-gentle-reweight -o <osd>[,<osd>,...] [-l <max_latency (default=20)>] [-b <max pgs backfilling (default=50)>] [-d <delta weight (default=0.01)>] [-t <target weight (default=2)>] [-p <latency test pool (default=test)>] [-i <interval (default=60)>] [-s <start time (default=02:00)>] [-e <end time (default=09:00)>] [-a <day of week,[day of week,...]>]'
def check_for_pool(pool):
pools_json = commands.getoutput('ceph osd pool ls --format=json 2>/dev/null')
pools = json.loads(pools_json)
if pool not in pools:
raise ValueError("Pool '%s' does not exist!" % pool)
def main(argv):
drain_osds = []
max_latency = 20
max_pgs_backfilling = 50
delta_weight = 0.01
target_weight = 5.46
test_pool = "test"
interval = 60
start_time = "02:00"
end_time = "09:00"
allowed_days = []
really = False
opts, args = getopt.getopt(argv,"ho:l:b:d:t:p:i:s:e:a:r",["osds=","latency=","backfills=","delta=","target=","pool=","interval=","start_time=","end_time=","allowed_days=","really"])
except getopt.GetoptError:
for opt, arg in opts:
if opt == '-h':
elif opt in ("-o", "--osds"):
drain_osds = arg.split(',')
elif opt in ("-l", "--latency"):
max_latency = int(arg)
elif opt in ("-b", "--backfills"):
max_pgs_backfilling = int(arg)
elif opt in ("-d", "--delta"):
delta_weight = float(arg)
elif opt in ("-t", "--target"):
target_weight = float(arg)
elif opt in ("-p", "--pool"):
test_pool = str(arg)
elif opt in ("-i", "--interval"):
interval = int(arg)
elif opt in ("-s", "--start-time"):
start_time = str(arg)
elif opt in ("-e", "--end-time"):
end_time = str(arg)
elif opt in ("-a", "--allowed-days"):
allowed_days = arg.split(',')
elif opt in ("-r", "--really"):
really = True
if not drain_osds:
end_time = datetime.datetime.strptime(end_time, "%H:%M").time()
start_time = datetime.datetime.strptime(start_time, "%H:%M").time()
if allowed_days:
allowed_days = map(int, allowed_days)
print 'Draining OSDs: ', drain_osds
print 'Max latency (ms): ', max_latency
print 'Max PGs backfilling: ', max_pgs_backfilling
print 'Delta weight:', delta_weight
print 'Target weight:', target_weight
print 'Latency test pool:', test_pool
print 'Run interval:', interval
print 'Start time:', start_time
print 'End time:', end_time
print 'Allowed days:', allowed_days
reweight_osds(drain_osds, max_pgs_backfilling, max_latency, delta_weight, target_weight, test_pool, start_time, end_time, allowed_days, interval, really)
print "main: sleeping %ss" % interval
if __name__ == "__main__":