From dca59ca4a148032ec5954a911c002881b8084d3b Mon Sep 17 00:00:00 2001 From: Song Shun Date: Thu, 15 Mar 2018 15:59:20 +0800 Subject: [PATCH] librbd: fix when rbd close hang due to contineous rewatch fix when rbd close hang due to contineous rewatch Signed-off-by: Song Shun --- src/librbd/ImageState.h | 5 +++-- src/librbd/ImageWatcher.cc | 7 +++++++ src/librbd/Watcher.cc | 19 ++++++++++++++----- src/librbd/Watcher.h | 1 + src/librbd/watcher/RewatchRequest.cc | 13 +------------ 5 files changed, 26 insertions(+), 19 deletions(-) diff --git a/src/librbd/ImageState.h b/src/librbd/ImageState.h index d577f290d92028..3386f6f32617d1 100644 --- a/src/librbd/ImageState.h +++ b/src/librbd/ImageState.h @@ -31,6 +31,7 @@ class ImageState { int close(); void close(Context *on_finish); + bool is_closed() const; void handle_update_notification(); @@ -49,6 +50,8 @@ class ImageState { int unregister_update_watcher(uint64_t handle); void flush_update_watchers(Context *on_finish); void shut_down_update_watchers(Context *on_finish); + + mutable Mutex m_lock; private: enum State { @@ -102,7 +105,6 @@ class ImageState { ImageCtxT *m_image_ctx; State m_state; - mutable Mutex m_lock; ActionsContexts m_actions_contexts; uint64_t m_last_refresh; @@ -113,7 +115,6 @@ class ImageState { bool m_skip_open_parent_image; bool is_transition_state() const; - bool is_closed() const; const Action *find_pending_refresh() const; diff --git a/src/librbd/ImageWatcher.cc b/src/librbd/ImageWatcher.cc index 939eb8f73464ad..20af33eb3e6fc3 100644 --- a/src/librbd/ImageWatcher.cc +++ b/src/librbd/ImageWatcher.cc @@ -89,6 +89,13 @@ void ImageWatcher::unregister_watch(Context *on_finish) { FunctionContext *ctx = new FunctionContext([this, on_finish](int r) { m_task_finisher->cancel_all(on_finish); }); + { + m_image_ctx.state->m_lock.Lock(); + if (m_image_ctx.state->is_closed()) { + m_image_closing = true; + } + m_image_ctx.state->m_lock.Unlock(); + } Watcher::unregister_watch(ctx); } diff --git a/src/librbd/Watcher.cc b/src/librbd/Watcher.cc index 37fdaaebed6d6c..f57d95a9c2eaff 100644 --- a/src/librbd/Watcher.cc +++ b/src/librbd/Watcher.cc @@ -250,13 +250,22 @@ void Watcher::rewatch() { } void Watcher::handle_rewatch(int r) { - ldout(m_cct, 10) "r=" << r << dendl; - + ldout(m_cct, 10) << "r=" << r << dendl; WatchState next_watch_state = WATCH_STATE_REGISTERED; - if (r < 0) { - // only EBLACKLISTED or ENOENT can be returned - assert(r == -EBLACKLISTED || r == -ENOENT); + + if (r == -EBLACKLISTED) { + lderr(m_cct) << "client blacklisted" << dendl; next_watch_state = WATCH_STATE_UNREGISTERED; + } else if (r == -ENOENT) { + lderr(m_cct) << "failed to unwatch: " << cpp_strerror(r) << dendl; + next_watch_state = WATCH_STATE_UNREGISTERED; + } else if (r < 0) { + next_watch_state = WATCH_STATE_ERROR; + if (m_image_closing) { + ldout(m_cct, 10) << "image is closing, skip rewatch" << dendl; + } else { + rewatch(); + } } Context *unregister_watch_ctx = nullptr; diff --git a/src/librbd/Watcher.h b/src/librbd/Watcher.h index 39009027d9ebc4..3daf2fefe19886 100644 --- a/src/librbd/Watcher.h +++ b/src/librbd/Watcher.h @@ -79,6 +79,7 @@ class Watcher { watcher::Notifier m_notifier; WatchState m_watch_state; AsyncOpTracker m_async_op_tracker; + bool m_image_closing = false; void send_notify(bufferlist &payload, watcher::NotifyResponse *response = nullptr, diff --git a/src/librbd/watcher/RewatchRequest.cc b/src/librbd/watcher/RewatchRequest.cc index fe236269793a95..fd6ae3e69581f8 100644 --- a/src/librbd/watcher/RewatchRequest.cc +++ b/src/librbd/watcher/RewatchRequest.cc @@ -78,20 +78,9 @@ void RewatchRequest::rewatch() { void RewatchRequest::handle_rewatch(int r) { CephContext *cct = reinterpret_cast(m_ioctx.cct()); ldout(cct, 10) << "r=" << r << dendl; - - if (r == -EBLACKLISTED) { - lderr(cct) << "client blacklisted" << dendl; + if (r < 0) { finish(r); return; - } else if (r == -ENOENT) { - ldout(cct, 5) << "object deleted" << dendl; - finish(r); - return; - } else if (r < 0) { - lderr(cct) << "failed to watch object: " << cpp_strerror(r) - << dendl; - rewatch(); - return; } {