Skip to content

Commit

Permalink
Merge pull request #51464 from ajarr/wip-59712-pacific
Browse files Browse the repository at this point in the history
pacific:  mgr/rbd_support: fixes related to recover from rados client blocklisting

Reviewed-by: Ilya Dryomov <idryomov@redhat.com>
  • Loading branch information
yuriw committed Aug 21, 2023
2 parents 68558bf + 38e5341 commit 9cc6e28
Show file tree
Hide file tree
Showing 9 changed files with 322 additions and 16 deletions.
178 changes: 178 additions & 0 deletions qa/workunits/rbd/cli_generic.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1243,6 +1243,44 @@ test_trash_purge_schedule() {
ceph osd pool rm rbd2 rbd2 --yes-i-really-really-mean-it
}

test_trash_purge_schedule_recovery() {
echo "testing recovery of trash_purge_schedule handler after module's RADOS client is blocklisted..."
remove_images
ceph osd pool create rbd3 8
rbd pool init rbd3
rbd namespace create rbd3/ns1

rbd trash purge schedule add -p rbd3/ns1 2d
rbd trash purge schedule ls -p rbd3 -R | grep 'rbd3 *ns1 *every 2d'

# Fetch and blocklist the rbd_support module's RADOS client
CLIENT_ADDR=$(ceph mgr dump | jq .active_clients[] |
jq 'select(.name == "rbd_support")' |
jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add')
ceph osd blocklist add $CLIENT_ADDR
ceph osd blocklist ls | grep $CLIENT_ADDR

# Check that you can add a trash purge schedule after a few retries
expect_fail rbd trash purge schedule add -p rbd3 10m
sleep 10
for i in `seq 24`; do
rbd trash purge schedule add -p rbd3 10m && break
sleep 10
done

rbd trash purge schedule ls -p rbd3 -R | grep 'every 10m'
# Verify that the schedule present before client blocklisting is preserved
rbd trash purge schedule ls -p rbd3 -R | grep 'rbd3 *ns1 *every 2d'

rbd trash purge schedule remove -p rbd3 10m
rbd trash purge schedule remove -p rbd3/ns1 2d
rbd trash purge schedule ls -p rbd3 -R | expect_fail grep 'every 10m'
rbd trash purge schedule ls -p rbd3 -R | expect_fail grep 'rbd3 *ns1 *every 2d'

ceph osd pool rm rbd3 rbd3 --yes-i-really-really-mean-it

}

test_mirror_snapshot_schedule() {
echo "testing mirror snapshot schedule..."
remove_images
Expand Down Expand Up @@ -1352,6 +1390,54 @@ test_mirror_snapshot_schedule() {
ceph osd pool rm rbd2 rbd2 --yes-i-really-really-mean-it
}

test_mirror_snapshot_schedule_recovery() {
echo "testing recovery of mirror snapshot scheduler after module's RADOS client is blocklisted..."
remove_images
ceph osd pool create rbd3 8
rbd pool init rbd3
rbd namespace create rbd3/ns1

rbd mirror pool enable rbd3 image
rbd mirror pool enable rbd3/ns1 image
rbd mirror pool peer add rbd3 cluster1

rbd create $RBD_CREATE_ARGS -s 1 rbd3/ns1/test1
rbd mirror image enable rbd3/ns1/test1 snapshot
test "$(rbd mirror image status rbd3/ns1/test1 |
grep -c mirror.primary)" = '1'

rbd mirror snapshot schedule add -p rbd3/ns1 --image test1 1m
test "$(rbd mirror snapshot schedule ls -p rbd3/ns1 --image test1)" = 'every 1m'

# Fetch and blocklist rbd_support module's RADOS client
CLIENT_ADDR=$(ceph mgr dump | jq .active_clients[] |
jq 'select(.name == "rbd_support")' |
jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add')
ceph osd blocklist add $CLIENT_ADDR
ceph osd blocklist ls | grep $CLIENT_ADDR

# Check that you can add a mirror snapshot schedule after a few retries
expect_fail rbd mirror snapshot schedule add -p rbd3/ns1 --image test1 2m
sleep 10
for i in `seq 24`; do
rbd mirror snapshot schedule add -p rbd3/ns1 --image test1 2m && break
sleep 10
done

rbd mirror snapshot schedule ls -p rbd3/ns1 --image test1 | grep 'every 2m'
# Verify that the schedule present before client blocklisting is preserved
rbd mirror snapshot schedule ls -p rbd3/ns1 --image test1 | grep 'every 1m'

rbd mirror snapshot schedule rm -p rbd3/ns1 --image test1 2m
rbd mirror snapshot schedule rm -p rbd3/ns1 --image test1 1m
rbd mirror snapshot schedule ls -p rbd3/ns1 --image test1 | expect_fail grep 'every 2m'
rbd mirror snapshot schedule ls -p rbd3/ns1 --image test1 | expect_fail grep 'every 1m'

rbd snap purge rbd3/ns1/test1
rbd rm rbd3/ns1/test1
ceph osd pool rm rbd3 rbd3 --yes-i-really-really-mean-it
}

test_perf_image_iostat() {
echo "testing perf image iostat..."
remove_images
Expand Down Expand Up @@ -1407,6 +1493,55 @@ test_perf_image_iostat() {
ceph osd pool rm rbd1 rbd1 --yes-i-really-really-mean-it
}

test_perf_image_iostat_recovery() {
echo "testing recovery of perf handler after module's RADOS client is blocklisted..."
remove_images

ceph osd pool create rbd3 8
rbd pool init rbd3
rbd namespace create rbd3/ns

IMAGE_SPECS=("rbd3/test1" "rbd3/ns/test2")
for spec in "${IMAGE_SPECS[@]}"; do
# ensure all images are created without a separate data pool
# as we filter iostat by specific pool specs below
rbd create $RBD_CREATE_ARGS --size 10G --rbd-default-data-pool '' $spec
done

BENCH_PIDS=()
for spec in "${IMAGE_SPECS[@]}"; do
rbd bench --io-type write --io-pattern rand --io-total 10G --io-threads 1 \
--rbd-cache false $spec >/dev/null 2>&1 &
BENCH_PIDS+=($!)
done

test "$(rbd perf image iostat --format json rbd3 |
jq -r 'map(.image) | sort | join(" ")')" = 'test1'

# Fetch and blocklist the rbd_support module's RADOS client
CLIENT_ADDR=$(ceph mgr dump | jq .active_clients[] |
jq 'select(.name == "rbd_support")' |
jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add')
ceph osd blocklist add $CLIENT_ADDR
ceph osd blocklist ls | grep $CLIENT_ADDR

expect_fail rbd perf image iostat --format json rbd3/ns
sleep 10
for i in `seq 24`; do
test "$(rbd perf image iostat --format json rbd3/ns |
jq -r 'map(.image) | sort | join(" ")')" = 'test2' && break
sleep 10
done

for pid in "${BENCH_PIDS[@]}"; do
kill $pid
done
wait

remove_images
ceph osd pool rm rbd3 rbd3 --yes-i-really-really-mean-it
}

test_mirror_pool_peer_bootstrap_create() {
echo "testing mirror pool peer bootstrap create..."
remove_images
Expand Down Expand Up @@ -1502,6 +1637,45 @@ test_tasks_removed_pool() {
remove_images
}

test_tasks_recovery() {
echo "testing task handler recovery after module's RADOS client is blocklisted..."
remove_images

ceph osd pool create rbd2 8
rbd pool init rbd2

rbd create $RBD_CREATE_ARGS --size 1G rbd2/img1
rbd bench --io-type write --io-pattern seq --io-size 1M --io-total 1G rbd2/img1
rbd snap create rbd2/img1@snap
rbd snap protect rbd2/img1@snap
rbd clone rbd2/img1@snap rbd2/clone1

# Fetch and blocklist rbd_support module's RADOS client
CLIENT_ADDR=$(ceph mgr dump | jq .active_clients[] |
jq 'select(.name == "rbd_support")' |
jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add')
ceph osd blocklist add $CLIENT_ADDR
ceph osd blocklist ls | grep $CLIENT_ADDR

expect_fail ceph rbd task add flatten rbd2/clone1
sleep 10
for i in `seq 24`; do
ceph rbd task add flatten rbd2/clone1 && break
sleep 10
done
test "$(ceph rbd task list)" != "[]"

for i in {1..12}; do
rbd info rbd2/clone1 | grep 'parent: ' || break
sleep 10
done
rbd info rbd2/clone1 | expect_fail grep 'parent: '
rbd snap unprotect rbd2/img1@snap

test "$(ceph rbd task list)" = "[]"
ceph osd pool rm rbd2 rbd2 --yes-i-really-really-mean-it
}

test_pool_image_args
test_rename
test_ls
Expand All @@ -1523,9 +1697,13 @@ test_clone_v2
test_thick_provision
test_namespace
test_trash_purge_schedule
test_trash_purge_schedule_recovery
test_mirror_snapshot_schedule
test_mirror_snapshot_schedule_recovery
test_perf_image_iostat
test_perf_image_iostat_recovery
test_mirror_pool_peer_bootstrap_create
test_tasks_removed_pool
test_tasks_recovery

echo OK
1 change: 1 addition & 0 deletions src/pybind/mgr/mgr_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -1075,6 +1075,7 @@ def shutdown(self) -> None:
addrs = self._rados.get_addrs()
self._rados.shutdown()
self._ceph_unregister_client(addrs)
self._rados = None

@API.expose
def get(self, data_name: str) -> Any:
Expand Down
27 changes: 22 additions & 5 deletions src/pybind/mgr/rbd_support/mirror_snapshot_schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,11 @@ def __del__(self):
def wait_for_pending(self):
with self.lock:
while self.pending:
self.log.debug(
"CreateSnapshotRequests.wait_for_pending: "
"{} images".format(len(self.pending)))
self.condition.wait()
self.log.debug("CreateSnapshotRequests.wait_for_pending: done")

def add(self, pool_id, namespace, image_id):
image_spec = (pool_id, namespace, image_id)
Expand Down Expand Up @@ -264,6 +268,7 @@ def finish(self, image_spec):

with self.lock:
self.pending.remove(image_spec)
self.condition.notify()
if not self.queue:
return
image_spec = self.queue.pop(0)
Expand Down Expand Up @@ -304,26 +309,33 @@ class MirrorSnapshotScheduleHandler:

lock = Lock()
condition = Condition(lock)
thread = None

def __init__(self, module):
self.module = module
self.log = module.log
self.last_refresh_images = datetime(1970, 1, 1)
self.create_snapshot_requests = CreateSnapshotRequests(self)

self.init_schedule_queue()

self.stop_thread = False
self.thread = Thread(target=self.run)

def setup(self):
self.init_schedule_queue()
self.thread.start()

def _cleanup(self):
def shutdown(self):
self.log.info("MirrorSnapshotScheduleHandler: shutting down")
self.stop_thread = True
if self.thread.is_alive():
self.log.debug("MirrorSnapshotScheduleHandler: joining thread")
self.thread.join()
self.create_snapshot_requests.wait_for_pending()
self.log.info("MirrorSnapshotScheduleHandler: shut down")

def run(self):
try:
self.log.info("MirrorSnapshotScheduleHandler: starting")
while True:
while not self.stop_thread:
refresh_delay = self.refresh_images()
with self.lock:
(image_spec, wait_time) = self.dequeue()
Expand All @@ -335,6 +347,9 @@ def run(self):
with self.lock:
self.enqueue(datetime.now(), pool_id, namespace, image_id)

except (rados.ConnectionShutdown, rbd.ConnectionShutdown):
self.log.exception("MirrorSnapshotScheduleHandler: client blocklisted")
self.module.client_blocklisted.set()
except Exception as ex:
self.log.fatal("Fatal runtime error: {}\n{}".format(
ex, traceback.format_exc()))
Expand Down Expand Up @@ -421,6 +436,8 @@ def load_pool_images(self, ioctx, images):
self.log.debug(
"load_pool_images: adding image {}".format(name))
images[pool_id][namespace][image_id] = name
except rbd.ConnectionShutdown:
raise
except Exception as e:
self.log.error(
"load_pool_images: exception when scanning pool {}: {}".format(
Expand Down
57 changes: 56 additions & 1 deletion src/pybind/mgr/rbd_support/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import traceback

from mgr_module import MgrModule
from threading import Thread, Event

from .common import NotAuthorizedError
from .mirror_snapshot_schedule import MirrorSnapshotScheduleHandler
Expand Down Expand Up @@ -156,13 +157,63 @@ class Module(MgrModule):

def __init__(self, *args, **kwargs):
super(Module, self).__init__(*args, **kwargs)
self.rados.wait_for_latest_osdmap()
self.client_blocklisted = Event()
self.module_ready = False
self.init_handlers()
self.recovery_thread = Thread(target=self.run)
self.recovery_thread.start()

def init_handlers(self):
self.mirror_snapshot_schedule = MirrorSnapshotScheduleHandler(self)
self.perf = PerfHandler(self)
self.task = TaskHandler(self)
self.trash_purge_schedule = TrashPurgeScheduleHandler(self)

def setup_handlers(self):
self.log.info("starting setup")
# new RADOS client is created and registered in the MgrMap
# implicitly here as 'rados' is a property attribute.
self.rados.wait_for_latest_osdmap()
self.mirror_snapshot_schedule.setup()
self.perf.setup()
self.task.setup()
self.trash_purge_schedule.setup()
self.log.info("setup complete")
self.module_ready = True

def run(self):
self.log.info("recovery thread starting")
try:
while True:
try:
self.setup_handlers()
except (rados.ConnectionShutdown, rbd.ConnectionShutdown):
self.log.exception("setup_handlers: client blocklisted")
self.log.info("recovering from double blocklisting")
else:
# block until RADOS client is blocklisted
self.client_blocklisted.wait()
self.log.info("recovering from blocklisting")
self.shutdown()
self.client_blocklisted.clear()
self.init_handlers()
except Exception as ex:
self.log.fatal("Fatal runtime error: {}\n{}".format(
ex, traceback.format_exc()))

def shutdown(self):
self.module_ready = False
self.mirror_snapshot_schedule.shutdown()
self.trash_purge_schedule.shutdown()
self.task.shutdown()
self.perf.shutdown()
# shut down client and deregister it from MgrMap
super().shutdown()

def handle_command(self, inbuf, cmd):
if not self.module_ready:
return (-errno.EAGAIN, "",
"rbd_support module is not ready, try again")
# ensure we have latest pools available
self.rados.wait_for_latest_osdmap()

Expand All @@ -188,6 +239,10 @@ def handle_command(self, inbuf, cmd):
ex, traceback.format_exc()))
raise

except (rados.ConnectionShutdown, rbd.ConnectionShutdown) as ex:
self.log.debug("handle_command: client blocklisted")
self.client_blocklisted.set()
return -errno.EAGAIN, "", str(ex)
except rados.Error as ex:
return -ex.errno, "", str(ex)
except rbd.OSError as ex:
Expand Down

0 comments on commit 9cc6e28

Please sign in to comment.