Merge pull request #51464 from ajarr/wip-59712-pacific

pacific: mgr/rbd_support: fixes related to recover from rados client blocklisting Reviewed-by: Ilya Dryomov <idryomov@redhat.com>
ceph · Aug 21, 2023 · 9cc6e28 · 9cc6e28
2 parents 68558bf + 38e5341
commit 9cc6e28
Show file tree

Hide file tree

Showing 9 changed files with 322 additions and 16 deletions.
diff --git a/qa/workunits/rbd/cli_generic.sh b/qa/workunits/rbd/cli_generic.sh
@@ -1243,6 +1243,44 @@ test_trash_purge_schedule() {
     ceph osd pool rm rbd2 rbd2 --yes-i-really-really-mean-it
 }
 
+test_trash_purge_schedule_recovery() {
+    echo "testing recovery of trash_purge_schedule handler after module's RADOS client is blocklisted..."
+    remove_images
+    ceph osd pool create rbd3 8
+    rbd pool init rbd3
+    rbd namespace create rbd3/ns1
+
+    rbd trash purge schedule add -p rbd3/ns1 2d
+    rbd trash purge schedule ls -p rbd3 -R | grep 'rbd3 *ns1 *every 2d'
+
+    # Fetch and blocklist the rbd_support module's RADOS client
+    CLIENT_ADDR=$(ceph mgr dump | jq .active_clients[] |
+	jq 'select(.name == "rbd_support")' |
+	jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add')
+    ceph osd blocklist add $CLIENT_ADDR
+    ceph osd blocklist ls | grep $CLIENT_ADDR
+
+    # Check that you can add a trash purge schedule after a few retries
+    expect_fail rbd trash purge schedule add -p rbd3 10m
+    sleep 10
+    for i in `seq 24`; do
+        rbd trash purge schedule add -p rbd3 10m && break
+	sleep 10
+    done
+
+    rbd trash purge schedule ls -p rbd3 -R | grep 'every 10m'
+    # Verify that the schedule present before client blocklisting is preserved
+    rbd trash purge schedule ls -p rbd3 -R | grep 'rbd3 *ns1 *every 2d'
+
+    rbd trash purge schedule remove -p rbd3 10m
+    rbd trash purge schedule remove -p rbd3/ns1 2d
+    rbd trash purge schedule ls -p rbd3 -R | expect_fail grep 'every 10m'
+    rbd trash purge schedule ls -p rbd3 -R | expect_fail grep 'rbd3 *ns1 *every 2d'
+
+    ceph osd pool rm rbd3 rbd3 --yes-i-really-really-mean-it
+
+}
+
 test_mirror_snapshot_schedule() {
     echo "testing mirror snapshot schedule..."
     remove_images
@@ -1352,6 +1390,54 @@ test_mirror_snapshot_schedule() {
     ceph osd pool rm rbd2 rbd2 --yes-i-really-really-mean-it
 }
 
+test_mirror_snapshot_schedule_recovery() {
+    echo "testing recovery of mirror snapshot scheduler after module's RADOS client is blocklisted..."
+    remove_images
+    ceph osd pool create rbd3 8
+    rbd pool init rbd3
+    rbd namespace create rbd3/ns1
+
+    rbd mirror pool enable rbd3 image
+    rbd mirror pool enable rbd3/ns1 image
+    rbd mirror pool peer add rbd3 cluster1
+
+    rbd create $RBD_CREATE_ARGS -s 1 rbd3/ns1/test1
+    rbd mirror image enable rbd3/ns1/test1 snapshot
+    test "$(rbd mirror image status rbd3/ns1/test1 |
+        grep -c mirror.primary)" = '1'
+
+    rbd mirror snapshot schedule add -p rbd3/ns1 --image test1 1m
+    test "$(rbd mirror snapshot schedule ls -p rbd3/ns1 --image test1)" = 'every 1m'
+
+    # Fetch and blocklist rbd_support module's RADOS client
+    CLIENT_ADDR=$(ceph mgr dump | jq .active_clients[] |
+	jq 'select(.name == "rbd_support")' |
+	jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add')
+    ceph osd blocklist add $CLIENT_ADDR
+    ceph osd blocklist ls | grep $CLIENT_ADDR
+
+    # Check that you can add a mirror snapshot schedule after a few retries
+    expect_fail rbd mirror snapshot schedule add -p rbd3/ns1 --image test1 2m
+    sleep 10
+    for i in `seq 24`; do
+        rbd mirror snapshot schedule add -p rbd3/ns1 --image test1 2m && break
+	sleep 10
+    done
+
+    rbd mirror snapshot schedule ls -p rbd3/ns1 --image test1 | grep 'every 2m'
+    # Verify that the schedule present before client blocklisting is preserved
+    rbd mirror snapshot schedule ls -p rbd3/ns1 --image test1 | grep 'every 1m'
+
+    rbd mirror snapshot schedule rm -p rbd3/ns1 --image test1 2m
+    rbd mirror snapshot schedule rm -p rbd3/ns1 --image test1 1m
+    rbd mirror snapshot schedule ls -p rbd3/ns1 --image test1 | expect_fail grep 'every 2m'
+    rbd mirror snapshot schedule ls -p rbd3/ns1 --image test1 | expect_fail grep 'every 1m'
+
+    rbd snap purge rbd3/ns1/test1
+    rbd rm rbd3/ns1/test1
+    ceph osd pool rm rbd3 rbd3 --yes-i-really-really-mean-it
+}
+
 test_perf_image_iostat() {
     echo "testing perf image iostat..."
     remove_images
@@ -1407,6 +1493,55 @@ test_perf_image_iostat() {
     ceph osd pool rm rbd1 rbd1 --yes-i-really-really-mean-it
 }
 
+test_perf_image_iostat_recovery() {
+    echo "testing recovery of perf handler after module's RADOS client is blocklisted..."
+    remove_images
+
+    ceph osd pool create rbd3 8
+    rbd pool init rbd3
+    rbd namespace create rbd3/ns
+
+    IMAGE_SPECS=("rbd3/test1" "rbd3/ns/test2")
+    for spec in "${IMAGE_SPECS[@]}"; do
+        # ensure all images are created without a separate data pool
+        # as we filter iostat by specific pool specs below
+        rbd create $RBD_CREATE_ARGS --size 10G --rbd-default-data-pool '' $spec
+    done
+
+    BENCH_PIDS=()
+    for spec in "${IMAGE_SPECS[@]}"; do
+        rbd bench --io-type write --io-pattern rand --io-total 10G --io-threads 1 \
+            --rbd-cache false $spec >/dev/null 2>&1 &
+        BENCH_PIDS+=($!)
+    done
+
+    test "$(rbd perf image iostat --format json rbd3 |
+        jq -r 'map(.image) | sort | join(" ")')" = 'test1'
+
+    # Fetch and blocklist the rbd_support module's RADOS client
+    CLIENT_ADDR=$(ceph mgr dump | jq .active_clients[] |
+	jq 'select(.name == "rbd_support")' |
+	jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add')
+    ceph osd blocklist add $CLIENT_ADDR
+    ceph osd blocklist ls | grep $CLIENT_ADDR
+
+    expect_fail rbd perf image iostat --format json rbd3/ns
+    sleep 10
+    for i in `seq 24`; do
+        test "$(rbd perf image iostat --format json rbd3/ns |
+            jq -r 'map(.image) | sort | join(" ")')" = 'test2' && break
+	sleep 10
+    done
+
+    for pid in "${BENCH_PIDS[@]}"; do
+        kill $pid
+    done
+    wait
+
+    remove_images
+    ceph osd pool rm rbd3 rbd3 --yes-i-really-really-mean-it
+}
+
 test_mirror_pool_peer_bootstrap_create() {
     echo "testing mirror pool peer bootstrap create..."
     remove_images
@@ -1502,6 +1637,45 @@ test_tasks_removed_pool() {
     remove_images
 }
 
+test_tasks_recovery() {
+    echo "testing task handler recovery after module's RADOS client is blocklisted..."
+    remove_images
+
+    ceph osd pool create rbd2 8
+    rbd pool init rbd2
+
+    rbd create $RBD_CREATE_ARGS --size 1G rbd2/img1
+    rbd bench --io-type write --io-pattern seq --io-size 1M --io-total 1G rbd2/img1
+    rbd snap create rbd2/img1@snap
+    rbd snap protect rbd2/img1@snap
+    rbd clone rbd2/img1@snap rbd2/clone1
+
+    # Fetch and blocklist rbd_support module's RADOS client
+    CLIENT_ADDR=$(ceph mgr dump | jq .active_clients[] |
+	jq 'select(.name == "rbd_support")' |
+	jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add')
+    ceph osd blocklist add $CLIENT_ADDR
+    ceph osd blocklist ls | grep $CLIENT_ADDR
+
+    expect_fail ceph rbd task add flatten rbd2/clone1
+    sleep 10
+    for i in `seq 24`; do
+       ceph rbd task add flatten rbd2/clone1 && break
+       sleep 10
+    done
+    test "$(ceph rbd task list)" != "[]"
+
+    for i in {1..12}; do
+        rbd info rbd2/clone1 | grep 'parent: ' || break
+        sleep 10
+    done
+    rbd info rbd2/clone1 | expect_fail grep 'parent: '
+    rbd snap unprotect rbd2/img1@snap
+
+    test "$(ceph rbd task list)" = "[]"
+    ceph osd pool rm rbd2 rbd2 --yes-i-really-really-mean-it
+}
+
 test_pool_image_args
 test_rename
 test_ls
@@ -1523,9 +1697,13 @@ test_clone_v2
 test_thick_provision
 test_namespace
 test_trash_purge_schedule
+test_trash_purge_schedule_recovery
 test_mirror_snapshot_schedule
+test_mirror_snapshot_schedule_recovery
 test_perf_image_iostat
+test_perf_image_iostat_recovery
 test_mirror_pool_peer_bootstrap_create
 test_tasks_removed_pool
+test_tasks_recovery
 
 echo OK
diff --git a/src/pybind/mgr/mgr_module.py b/src/pybind/mgr/mgr_module.py
@@ -1075,6 +1075,7 @@ def shutdown(self) -> None:
             addrs = self._rados.get_addrs()
             self._rados.shutdown()
             self._ceph_unregister_client(addrs)
+            self._rados = None
 
     @API.expose
     def get(self, data_name: str) -> Any:

diff --git a/src/pybind/mgr/rbd_support/mirror_snapshot_schedule.py b/src/pybind/mgr/rbd_support/mirror_snapshot_schedule.py
@@ -42,7 +42,11 @@ def __del__(self):
     def wait_for_pending(self):
         with self.lock:
             while self.pending:
+                self.log.debug(
+                    "CreateSnapshotRequests.wait_for_pending: "
+                    "{} images".format(len(self.pending)))
                 self.condition.wait()
+        self.log.debug("CreateSnapshotRequests.wait_for_pending: done")
 
     def add(self, pool_id, namespace, image_id):
         image_spec = (pool_id, namespace, image_id)
@@ -264,6 +268,7 @@ def finish(self, image_spec):
 
         with self.lock:
             self.pending.remove(image_spec)
+            self.condition.notify()
             if not self.queue:
                 return
             image_spec = self.queue.pop(0)
@@ -304,26 +309,33 @@ class MirrorSnapshotScheduleHandler:
 
     lock = Lock()
     condition = Condition(lock)
-    thread = None
 
     def __init__(self, module):
         self.module = module
         self.log = module.log
         self.last_refresh_images = datetime(1970, 1, 1)
         self.create_snapshot_requests = CreateSnapshotRequests(self)
 
-        self.init_schedule_queue()
-
+        self.stop_thread = False
         self.thread = Thread(target=self.run)
+
+    def setup(self):
+        self.init_schedule_queue()
         self.thread.start()
 
-    def _cleanup(self):
+    def shutdown(self):
+        self.log.info("MirrorSnapshotScheduleHandler: shutting down")
+        self.stop_thread = True
+        if self.thread.is_alive():
+            self.log.debug("MirrorSnapshotScheduleHandler: joining thread")
+            self.thread.join()
         self.create_snapshot_requests.wait_for_pending()
+        self.log.info("MirrorSnapshotScheduleHandler: shut down")
 
     def run(self):
         try:
             self.log.info("MirrorSnapshotScheduleHandler: starting")
-            while True:
+            while not self.stop_thread:
                 refresh_delay = self.refresh_images()
                 with self.lock:
                     (image_spec, wait_time) = self.dequeue()
@@ -335,6 +347,9 @@ def run(self):
                 with self.lock:
                     self.enqueue(datetime.now(), pool_id, namespace, image_id)
 
+        except (rados.ConnectionShutdown, rbd.ConnectionShutdown):
+            self.log.exception("MirrorSnapshotScheduleHandler: client blocklisted")
+            self.module.client_blocklisted.set()
         except Exception as ex:
             self.log.fatal("Fatal runtime error: {}\n{}".format(
                 ex, traceback.format_exc()))
@@ -421,6 +436,8 @@ def load_pool_images(self, ioctx, images):
                     self.log.debug(
                         "load_pool_images: adding image {}".format(name))
                     images[pool_id][namespace][image_id] = name
+        except rbd.ConnectionShutdown:
+            raise
         except Exception as e:
             self.log.error(
                 "load_pool_images: exception when scanning pool {}: {}".format(

diff --git a/src/pybind/mgr/rbd_support/module.py b/src/pybind/mgr/rbd_support/module.py
@@ -8,6 +8,7 @@
 import traceback
 
 from mgr_module import MgrModule
+from threading import Thread, Event
 
 from .common import NotAuthorizedError
 from .mirror_snapshot_schedule import MirrorSnapshotScheduleHandler
@@ -156,13 +157,63 @@ class Module(MgrModule):
 
     def __init__(self, *args, **kwargs):
         super(Module, self).__init__(*args, **kwargs)
-        self.rados.wait_for_latest_osdmap()
+        self.client_blocklisted = Event()
+        self.module_ready = False
+        self.init_handlers()
+        self.recovery_thread = Thread(target=self.run)
+        self.recovery_thread.start()
+
+    def init_handlers(self):
         self.mirror_snapshot_schedule = MirrorSnapshotScheduleHandler(self)
         self.perf = PerfHandler(self)
         self.task = TaskHandler(self)
         self.trash_purge_schedule = TrashPurgeScheduleHandler(self)
 
+    def setup_handlers(self):
+        self.log.info("starting setup")
+        # new RADOS client is created and registered in the MgrMap
+        # implicitly here as 'rados' is a property attribute.
+        self.rados.wait_for_latest_osdmap()
+        self.mirror_snapshot_schedule.setup()
+        self.perf.setup()
+        self.task.setup()
+        self.trash_purge_schedule.setup()
+        self.log.info("setup complete")
+        self.module_ready = True
+
+    def run(self):
+        self.log.info("recovery thread starting")
+        try:
+            while True:
+                try:
+                    self.setup_handlers()
+                except (rados.ConnectionShutdown, rbd.ConnectionShutdown):
+                    self.log.exception("setup_handlers: client blocklisted")
+                    self.log.info("recovering from double blocklisting")
+                else:
+                    # block until RADOS client is blocklisted
+                    self.client_blocklisted.wait()
+                    self.log.info("recovering from blocklisting")
+                self.shutdown()
+                self.client_blocklisted.clear()
+                self.init_handlers()
+        except Exception as ex:
+            self.log.fatal("Fatal runtime error: {}\n{}".format(
+                ex, traceback.format_exc()))
+
+    def shutdown(self):
+        self.module_ready = False
+        self.mirror_snapshot_schedule.shutdown()
+        self.trash_purge_schedule.shutdown()
+        self.task.shutdown()
+        self.perf.shutdown()
+        # shut down client and deregister it from MgrMap
+        super().shutdown()
+
     def handle_command(self, inbuf, cmd):
+        if not self.module_ready:
+            return (-errno.EAGAIN, "",
+                    "rbd_support module is not ready, try again")
         # ensure we have latest pools available
         self.rados.wait_for_latest_osdmap()
 
@@ -188,6 +239,10 @@ def handle_command(self, inbuf, cmd):
                     ex, traceback.format_exc()))
                 raise
 
+        except (rados.ConnectionShutdown, rbd.ConnectionShutdown) as ex:
+            self.log.debug("handle_command: client blocklisted")
+            self.client_blocklisted.set()
+            return -errno.EAGAIN, "", str(ex)
         except rados.Error as ex:
             return -ex.errno, "", str(ex)
         except rbd.OSError as ex: