Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

librbd: reduce potential of erroneous blacklisting on image close #15162

Merged
merged 3 commits into from May 22, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions qa/workunits/rbd/rbd_mirror.sh
Expand Up @@ -387,4 +387,8 @@ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'split-brain
request_resync_image ${CLUSTER1} ${POOL} ${image} image_id
wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'master_position'

testlog "TEST: no blacklists"
ceph --cluster ${CLUSTER1} osd blacklist ls 2>&1 | grep -q "listed 0 entries"
ceph --cluster ${CLUSTER2} osd blacklist ls 2>&1 | grep -q "listed 0 entries"

echo OK
52 changes: 42 additions & 10 deletions src/librbd/ImageWatcher.cc
Expand Up @@ -33,6 +33,32 @@ using librbd::watcher::util::HandlePayloadVisitor;

static const double RETRY_DELAY_SECONDS = 1.0;

template <typename I>
struct ImageWatcher<I>::C_ProcessPayload : public Context {
ImageWatcher *image_watcher;
uint64_t notify_id;
uint64_t handle;
watch_notify::Payload payload;

C_ProcessPayload(ImageWatcher *image_watcher_, uint64_t notify_id_,
uint64_t handle_, const watch_notify::Payload &payload)
: image_watcher(image_watcher_), notify_id(notify_id_), handle(handle_),
payload(payload) {
}

void finish(int r) override {
image_watcher->m_async_op_tracker.start_op();
if (image_watcher->notifications_blocked()) {
// requests are blocked -- just ack the notification
bufferlist bl;
image_watcher->acknowledge_notify(notify_id, handle, bl);
} else {
image_watcher->process_payload(notify_id, handle, payload);
}
image_watcher->m_async_op_tracker.finish_op();
}
};

template <typename I>
ImageWatcher<I>::ImageWatcher(I &image_ctx)
: Watcher(image_ctx.md_ctx, image_ctx.op_work_queue, image_ctx.header_oid),
Expand Down Expand Up @@ -62,6 +88,18 @@ void ImageWatcher<I>::unregister_watch(Context *on_finish) {
Watcher::unregister_watch(ctx);
}

template <typename I>
void ImageWatcher<I>::block_notifies(Context *on_finish) {
CephContext *cct = m_image_ctx.cct;
ldout(cct, 10) << this << " " << __func__ << dendl;

on_finish = new FunctionContext([this, on_finish](int r) {
cancel_async_requests();
on_finish->complete(r);
});
Watcher::block_notifies(on_finish);
}

template <typename I>
void ImageWatcher<I>::schedule_async_progress(const AsyncRequestId &request,
uint64_t offset, uint64_t total) {
Expand Down Expand Up @@ -884,15 +922,9 @@ bool ImageWatcher<I>::handle_payload(const UnknownPayload &payload,

template <typename I>
void ImageWatcher<I>::process_payload(uint64_t notify_id, uint64_t handle,
const Payload &payload, int r) {
if (r < 0) {
bufferlist out_bl;
this->acknowledge_notify(notify_id, handle, out_bl);
} else {
apply_visitor(HandlePayloadVisitor<ImageWatcher<I>>(this, notify_id,
handle),
payload);
}
const Payload &payload) {
apply_visitor(HandlePayloadVisitor<ImageWatcher<I>>(this, notify_id, handle),
payload);
}

template <typename I>
Expand All @@ -919,7 +951,7 @@ void ImageWatcher<I>::handle_notify(uint64_t notify_id, uint64_t handle,
m_image_ctx.state->refresh(new C_ProcessPayload(this, notify_id, handle,
notify_message.payload));
} else {
process_payload(notify_id, handle, notify_message.payload, 0);
process_payload(notify_id, handle, notify_message.payload);
}
}

Expand Down
23 changes: 4 additions & 19 deletions src/librbd/ImageWatcher.h
Expand Up @@ -36,7 +36,8 @@ class ImageWatcher : public Watcher {
ImageWatcher(ImageCtxT& image_ctx);
~ImageWatcher() override;

void unregister_watch(Context *on_finish);
void unregister_watch(Context *on_finish) override;
void block_notifies(Context *on_finish) override;

void notify_flatten(uint64_t request_id, ProgressContext &prog_ctx,
Context *on_finish);
Expand Down Expand Up @@ -145,23 +146,7 @@ class ImageWatcher : public Watcher {
ProgressContext *m_prog_ctx;
};

struct C_ProcessPayload : public Context {
ImageWatcher *image_watcher;
uint64_t notify_id;
uint64_t handle;
watch_notify::Payload payload;

C_ProcessPayload(ImageWatcher *image_watcher_, uint64_t notify_id_,
uint64_t handle_, const watch_notify::Payload &payload)
: image_watcher(image_watcher_), notify_id(notify_id_), handle(handle_),
payload(payload) {
}

void finish(int r) override {
image_watcher->process_payload(notify_id, handle, payload, r);
}
};

struct C_ProcessPayload;
struct C_ResponseMessage : public Context {
C_NotifyAck *notify_ack;

Expand Down Expand Up @@ -251,7 +236,7 @@ class ImageWatcher : public Watcher {
bool handle_payload(const watch_notify::UnknownPayload& payload,
C_NotifyAck *ctx);
void process_payload(uint64_t notify_id, uint64_t handle,
const watch_notify::Payload &payload, int r);
const watch_notify::Payload &payload);

void handle_notify(uint64_t notify_id, uint64_t handle,
uint64_t notifier_id, bufferlist &bl) override;
Expand Down
5 changes: 3 additions & 2 deletions src/librbd/ManagedLock.cc
Expand Up @@ -256,8 +256,9 @@ void ManagedLock<I>::break_lock(const managed_lock::Locker &locker,
} else {
on_finish = new C_Tracked(m_async_op_tracker, on_finish);
auto req = managed_lock::BreakRequest<I>::create(
m_ioctx, m_work_queue, m_oid, locker, m_blacklist_on_break_lock,
m_blacklist_expire_seconds, force_break_lock, on_finish);
m_ioctx, m_work_queue, m_oid, locker, m_mode == EXCLUSIVE,
m_blacklist_on_break_lock, m_blacklist_expire_seconds, force_break_lock,
on_finish);
req->send();
return;
}
Expand Down
41 changes: 36 additions & 5 deletions src/librbd/Watcher.cc
Expand Up @@ -174,6 +174,30 @@ void Watcher::unregister_watch(Context *on_finish) {
on_finish->complete(0);
}

bool Watcher::notifications_blocked() const {
RWLock::RLocker locker(m_watch_lock);

bool blocked = (m_blocked_count > 0);
ldout(m_cct, 5) << "blocked=" << blocked << dendl;
return blocked;
}

void Watcher::block_notifies(Context *on_finish) {
{
RWLock::WLocker locker(m_watch_lock);
++m_blocked_count;
ldout(m_cct, 5) << "blocked_count=" << m_blocked_count << dendl;
}
m_async_op_tracker.wait_for_ops(on_finish);
}

void Watcher::unblock_notifies() {
RWLock::WLocker locker(m_watch_lock);
assert(m_blocked_count > 0);
--m_blocked_count;
ldout(m_cct, 5) << "blocked_count=" << m_blocked_count << dendl;
}

void Watcher::flush(Context *on_finish) {
m_notifier.flush(on_finish);
}
Expand Down Expand Up @@ -260,11 +284,18 @@ void Watcher::send_notify(bufferlist& payload,
m_notifier.notify(payload, response, on_finish);
}

void Watcher::WatchCtx::handle_notify(uint64_t notify_id,
uint64_t handle,
uint64_t notifier_id,
bufferlist& bl) {
watcher.handle_notify(notify_id, handle, notifier_id, bl);
void Watcher::WatchCtx::handle_notify(uint64_t notify_id, uint64_t handle,
uint64_t notifier_id, bufferlist& bl) {
// if notifications are blocked, finish the notification w/o
// bubbling the notification up to the derived class
watcher.m_async_op_tracker.start_op();
if (watcher.notifications_blocked()) {
bufferlist bl;
watcher.acknowledge_notify(notify_id, handle, bl);
} else {
watcher.handle_notify(notify_id, handle, notifier_id, bl);
}
watcher.m_async_op_tracker.finish_op();
}

void Watcher::WatchCtx::handle_error(uint64_t handle, int err) {
Expand Down
10 changes: 9 additions & 1 deletion src/librbd/Watcher.h
Expand Up @@ -4,6 +4,7 @@
#ifndef CEPH_LIBRBD_WATCHER_H
#define CEPH_LIBRBD_WATCHER_H

#include "common/AsyncOpTracker.h"
#include "common/Mutex.h"
#include "common/RWLock.h"
#include "include/rados/librados.hpp"
Expand Down Expand Up @@ -36,9 +37,13 @@ class Watcher {
virtual ~Watcher();

void register_watch(Context *on_finish);
void unregister_watch(Context *on_finish);
virtual void unregister_watch(Context *on_finish);
void flush(Context *on_finish);

bool notifications_blocked() const;
virtual void block_notifies(Context *on_finish);
void unblock_notifies();

std::string get_oid() const;
void set_oid(const string& oid);

Expand Down Expand Up @@ -73,6 +78,7 @@ class Watcher {
uint64_t m_watch_handle;
watcher::Notifier m_notifier;
WatchState m_watch_state;
AsyncOpTracker m_async_op_tracker;

void send_notify(bufferlist &payload,
watcher::NotifyResponse *response = nullptr,
Expand Down Expand Up @@ -149,6 +155,8 @@ class Watcher {
WatchCtx m_watch_ctx;
Context *m_unregister_watch_ctx = nullptr;

uint32_t m_blocked_count = 0;

void handle_register_watch(int r, Context *on_finish);

void rewatch();
Expand Down
73 changes: 48 additions & 25 deletions src/librbd/image/CloseRequest.cc
Expand Up @@ -31,56 +31,50 @@ CloseRequest<I>::CloseRequest(I *image_ctx, Context *on_finish)

template <typename I>
void CloseRequest<I>::send() {
send_shut_down_update_watchers();
send_block_image_watcher();
}

template <typename I>
void CloseRequest<I>::send_shut_down_update_watchers() {
void CloseRequest<I>::send_block_image_watcher() {
if (m_image_ctx->image_watcher == nullptr) {
send_shut_down_update_watchers();
return;
}

CephContext *cct = m_image_ctx->cct;
ldout(cct, 10) << this << " " << __func__ << dendl;

m_image_ctx->state->shut_down_update_watchers(create_async_context_callback(
*m_image_ctx, create_context_callback<
CloseRequest<I>, &CloseRequest<I>::handle_shut_down_update_watchers>(this)));
// prevent incoming requests from our peers
m_image_ctx->image_watcher->block_notifies(create_context_callback<
CloseRequest<I>, &CloseRequest<I>::handle_block_image_watcher>(this));
}

template <typename I>
void CloseRequest<I>::handle_shut_down_update_watchers(int r) {
void CloseRequest<I>::handle_block_image_watcher(int r) {
CephContext *cct = m_image_ctx->cct;
ldout(cct, 10) << this << " " << __func__ << ": r=" << r << dendl;

save_result(r);
if (r < 0) {
lderr(cct) << "failed to shut down update watchers: " << cpp_strerror(r)
<< dendl;
}

send_unregister_image_watcher();
send_shut_down_update_watchers();
}

template <typename I>
void CloseRequest<I>::send_unregister_image_watcher() {
if (m_image_ctx->image_watcher == nullptr) {
send_shut_down_io_queue();
return;
}

void CloseRequest<I>::send_shut_down_update_watchers() {
CephContext *cct = m_image_ctx->cct;
ldout(cct, 10) << this << " " << __func__ << dendl;

// prevent incoming requests from our peers
m_image_ctx->image_watcher->unregister_watch(create_context_callback<
CloseRequest<I>, &CloseRequest<I>::handle_unregister_image_watcher>(this));
m_image_ctx->state->shut_down_update_watchers(create_async_context_callback(
*m_image_ctx, create_context_callback<
CloseRequest<I>, &CloseRequest<I>::handle_shut_down_update_watchers>(this)));
}

template <typename I>
void CloseRequest<I>::handle_unregister_image_watcher(int r) {
void CloseRequest<I>::handle_shut_down_update_watchers(int r) {
CephContext *cct = m_image_ctx->cct;
ldout(cct, 10) << this << " " << __func__ << ": r=" << r << dendl;

save_result(r);
if (r < 0) {
lderr(cct) << "failed to unregister image watcher: " << cpp_strerror(r)
lderr(cct) << "failed to shut down update watchers: " << cpp_strerror(r)
<< dendl;
}

Expand Down Expand Up @@ -156,7 +150,8 @@ void CloseRequest<I>::handle_shut_down_exclusive_lock(int r) {
lderr(cct) << "failed to shut down exclusive lock: " << cpp_strerror(r)
<< dendl;
}
send_flush_readahead();

send_unregister_image_watcher();
}

template <typename I>
Expand All @@ -178,6 +173,34 @@ void CloseRequest<I>::handle_flush(int r) {
if (r < 0) {
lderr(cct) << "failed to flush IO: " << cpp_strerror(r) << dendl;
}
send_unregister_image_watcher();
}

template <typename I>
void CloseRequest<I>::send_unregister_image_watcher() {
if (m_image_ctx->image_watcher == nullptr) {
send_flush_readahead();
return;
}

CephContext *cct = m_image_ctx->cct;
ldout(cct, 10) << this << " " << __func__ << dendl;

m_image_ctx->image_watcher->unregister_watch(create_context_callback<
CloseRequest<I>, &CloseRequest<I>::handle_unregister_image_watcher>(this));
}

template <typename I>
void CloseRequest<I>::handle_unregister_image_watcher(int r) {
CephContext *cct = m_image_ctx->cct;
ldout(cct, 10) << this << " " << __func__ << ": r=" << r << dendl;

save_result(r);
if (r < 0) {
lderr(cct) << "failed to unregister image watcher: " << cpp_strerror(r)
<< dendl;
}

send_flush_readahead();
}

Expand Down