Skip to content

Commit

Permalink
Merge pull request #8511 from dillaman/wip-15436
Browse files Browse the repository at this point in the history
librbd: IO deadlock when dynamically enabling/disabling features

Reviewed-by: Josh Durgin <jdurgin@redhat.com>
  • Loading branch information
jdurgin committed Apr 14, 2016
2 parents f655b7e + 769f994 commit 6956da8
Show file tree
Hide file tree
Showing 12 changed files with 193 additions and 34 deletions.
2 changes: 1 addition & 1 deletion qa/workunits/rbd/qemu_dynamic_features.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/sh -ex
#!/bin/bash -ex

if [[ -z "${IMAGE_NAME}" ]]; then
echo image name must be provided
Expand Down
2 changes: 1 addition & 1 deletion qa/workunits/rbd/qemu_rebuild_object_map.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/sh -ex
#!/bin/bash -ex

if [[ -z "${IMAGE_NAME}" ]]; then
echo image name must be provided
Expand Down
5 changes: 5 additions & 0 deletions src/common/WorkQueue.h
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,11 @@ class ThreadPool : public md_config_obs_t {
}
return m_items.front();
}
void requeue(T *item) {
Mutex::Locker pool_locker(m_pool->_lock);
_void_process_finish(nullptr);
m_items.push_front(item);
}
void signal() {
Mutex::Locker pool_locker(m_pool->_lock);
m_pool->_cond.SignalOne();
Expand Down
81 changes: 56 additions & 25 deletions src/librbd/AioImageRequestWQ.cc
Original file line number Diff line number Diff line change
Expand Up @@ -302,17 +302,24 @@ void AioImageRequestWQ::clear_require_lock_on_read() {

void *AioImageRequestWQ::_void_dequeue() {
AioImageRequest<> *peek_item = front();
if (peek_item == NULL || m_refresh_in_progress) {
return NULL;

// no IO ops available or refresh in-progress (IO stalled)
if (peek_item == nullptr || m_refresh_in_progress) {
return nullptr;
}

bool refresh_required = m_image_ctx.state->is_refresh_required();
{
RWLock::RLocker locker(m_lock);
if (peek_item->is_write_op()) {
if (m_write_blockers > 0) {
return NULL;
return nullptr;
}

// refresh will requeue the op -- don't count it as in-progress
if (!refresh_required) {
m_in_progress_writes.inc();
}
m_in_progress_writes.inc();
} else if (m_require_lock_on_read) {
return nullptr;
}
Expand All @@ -322,15 +329,17 @@ void *AioImageRequestWQ::_void_dequeue() {
ThreadPool::PointerWQ<AioImageRequest<> >::_void_dequeue());
assert(peek_item == item);

if (m_image_ctx.state->is_refresh_required()) {
if (refresh_required) {
ldout(m_image_ctx.cct, 15) << "image refresh required: delaying IO " << item
<< dendl;

// stall IO until the refresh completes
m_refresh_in_progress = true;

get_pool_lock().Unlock();
m_image_ctx.state->refresh(new C_RefreshFinish(this, item));
get_pool_lock().Lock();
return NULL;
return nullptr;
}
return item;
}
Expand All @@ -345,31 +354,41 @@ void AioImageRequestWQ::process(AioImageRequest<> *req) {
req->send();
}

finish_queued_op(req);
if (req->is_write_op()) {
finish_in_progress_write();
}
delete req;

finish_in_flight_op();
}

void AioImageRequestWQ::finish_queued_op(AioImageRequest<> *req) {
RWLock::RLocker locker(m_lock);
if (req->is_write_op()) {
assert(m_queued_writes.read() > 0);
m_queued_writes.dec();
} else {
assert(m_queued_reads.read() > 0);
m_queued_reads.dec();
}
}

void AioImageRequestWQ::finish_in_progress_write() {
bool writes_blocked = false;
{
RWLock::RLocker locker(m_lock);
if (req->is_write_op()) {
assert(m_queued_writes.read() > 0);
m_queued_writes.dec();

assert(m_in_progress_writes.read() > 0);
if (m_in_progress_writes.dec() == 0 &&
!m_write_blocker_contexts.empty()) {
writes_blocked = true;
}
} else {
assert(m_queued_reads.read() > 0);
m_queued_reads.dec();
assert(m_in_progress_writes.read() > 0);
if (m_in_progress_writes.dec() == 0 &&
!m_write_blocker_contexts.empty()) {
writes_blocked = true;
}
}

if (writes_blocked) {
RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
m_image_ctx.flush(new C_BlockedWrites(this));
}
delete req;

finish_in_flight_op();
}

int AioImageRequestWQ::start_in_flight_op(AioCompletion *c) {
Expand Down Expand Up @@ -440,12 +459,24 @@ void AioImageRequestWQ::handle_refreshed(int r, AioImageRequest<> *req) {
<< "req=" << req << dendl;
if (r < 0) {
req->fail(r);
delete req;

finish_queued_op(req);
finish_in_flight_op();
} else {
process(req);
process_finish();
// since IO was stalled for refresh -- original IO order is preserved
// if we requeue this op for work queue processing
requeue(req);
}

m_refresh_in_progress = false;
signal();
m_refresh_in_progress = false;
signal();

// refresh might have enabled exclusive lock -- IO stalled until
// we acquire the lock
RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
if (is_lock_required() && is_lock_request_needed()) {
m_image_ctx.exclusive_lock->request_lock(nullptr);
}
}

Expand Down
3 changes: 3 additions & 0 deletions src/librbd/AioImageRequestWQ.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ class AioImageRequestWQ : protected ThreadPool::PointerWQ<AioImageRequest<ImageC
return (m_queued_writes.read() == 0);
}

void finish_queued_op(AioImageRequest<ImageCtx> *req);
void finish_in_progress_write();

int start_in_flight_op(AioCompletion *c);
void finish_in_flight_op();

Expand Down
11 changes: 8 additions & 3 deletions src/librbd/ExclusiveLock.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,14 @@ template <typename I>
void ExclusiveLock<I>::shut_down(Context *on_shut_down) {
ldout(m_image_ctx.cct, 10) << this << " " << __func__ << dendl;

Mutex::Locker locker(m_lock);
assert(!is_shutdown());
execute_action(ACTION_SHUT_DOWN, on_shut_down);
{
Mutex::Locker locker(m_lock);
assert(!is_shutdown());
execute_action(ACTION_SHUT_DOWN, on_shut_down);
}

// if stalled in request state machine -- abort
handle_lock_released();
}

template <typename I>
Expand Down
12 changes: 10 additions & 2 deletions src/librbd/ImageWatcher.cc
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,11 @@ void ImageWatcher::schedule_request_lock(bool use_timer, int timer_delay) {
void ImageWatcher::notify_request_lock() {
RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
RWLock::RLocker snap_locker(m_image_ctx.snap_lock);

// ExclusiveLock state machine can be dynamically disabled
if (m_image_ctx.exclusive_lock == nullptr) {
return;
}
assert(!m_image_ctx.exclusive_lock->is_lock_owner());

ldout(m_image_ctx.cct, 10) << this << " notify request lock" << dendl;
Expand All @@ -417,8 +422,11 @@ void ImageWatcher::handle_request_lock(int r) {
RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
RWLock::RLocker snap_locker(m_image_ctx.snap_lock);

// ExclusiveLock state machine cannot transition
assert(!m_image_ctx.exclusive_lock->is_lock_owner());
// ExclusiveLock state machine cannot transition -- but can be
// dynamically disabled
if (m_image_ctx.exclusive_lock == nullptr) {
return;
}

if (r == -ETIMEDOUT) {
ldout(m_image_ctx.cct, 5) << this << " timed out requesting lock: retrying"
Expand Down
48 changes: 47 additions & 1 deletion src/librbd/image/RefreshRequest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ RefreshRequest<I>::~RefreshRequest() {
assert(m_object_map == nullptr);
assert(m_journal == nullptr);
assert(m_refresh_parent == nullptr);
assert(!m_blocked_writes);
}

template <typename I>
Expand Down Expand Up @@ -459,7 +460,7 @@ Context *RefreshRequest<I>::send_v2_open_journal() {
m_image_ctx.journal == nullptr) {
m_image_ctx.aio_work_queue->set_require_lock_on_read();
}
return send_v2_finalize_refresh_parent();
return send_v2_block_writes();
}

// implies journal dynamically enabled since ExclusiveLock will init
Expand Down Expand Up @@ -488,6 +489,47 @@ Context *RefreshRequest<I>::handle_v2_open_journal(int *result) {
save_result(result);
}

return send_v2_block_writes();
}

template <typename I>
Context *RefreshRequest<I>::send_v2_block_writes() {
bool disabled_journaling = false;
{
RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
disabled_journaling = ((m_features & RBD_FEATURE_EXCLUSIVE_LOCK) != 0 &&
(m_features & RBD_FEATURE_JOURNALING) == 0 &&
m_image_ctx.journal != nullptr);
}

if (!disabled_journaling) {
return send_v2_finalize_refresh_parent();
}

CephContext *cct = m_image_ctx.cct;
ldout(cct, 10) << this << " " << __func__ << dendl;

// we need to block writes temporarily to avoid in-flight journal
// writes
m_blocked_writes = true;
Context *ctx = create_context_callback<
RefreshRequest<I>, &RefreshRequest<I>::handle_v2_block_writes>(this);

RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
m_image_ctx.aio_work_queue->block_writes(ctx);
return nullptr;
}

template <typename I>
Context *RefreshRequest<I>::handle_v2_block_writes(int *result) {
CephContext *cct = m_image_ctx.cct;
ldout(cct, 10) << this << " " << __func__ << ": r=" << *result << dendl;

if (*result < 0) {
lderr(cct) << "failed to block writes: " << cpp_strerror(*result)
<< dendl;
save_result(result);
}
return send_v2_finalize_refresh_parent();
}

Expand Down Expand Up @@ -639,6 +681,10 @@ Context *RefreshRequest<I>::handle_v2_close_journal(int *result) {
delete m_journal;
m_journal = nullptr;

assert(m_blocked_writes);
m_blocked_writes = false;

m_image_ctx.aio_work_queue->unblock_writes();
return send_v2_close_object_map();
}

Expand Down
8 changes: 8 additions & 0 deletions src/librbd/image/RefreshRequest.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ class RefreshRequest {
* V2_OPEN_JOURNAL (skip if journal |
* | active or disabled) |
* v |
* V2_BLOCK_WRITES (skip if journal not |
* | disabled) |
* v |
* <apply> |
* | |
* v |
Expand Down Expand Up @@ -125,6 +128,8 @@ class RefreshRequest {
std::string m_lock_tag;
bool m_exclusive_locked;

bool m_blocked_writes = false;

void send_v1_read_header();
Context *handle_v1_read_header(int *result);

Expand Down Expand Up @@ -152,6 +157,9 @@ class RefreshRequest {
Context *send_v2_open_journal();
Context *handle_v2_open_journal(int *result);

Context *send_v2_block_writes();
Context *handle_v2_block_writes(int *result);

Context *send_v2_open_object_map();
Context *handle_v2_open_object_map(int *result);

Expand Down
15 changes: 15 additions & 0 deletions src/librbd/internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1608,6 +1608,21 @@ int mirror_image_disable_internal(ImageCtx *ictx, bool force) {
return -EINVAL;
}

// if disabling features w/ exclusive lock supported, we need to
// acquire the lock to temporarily block IO against the image
if (ictx->exclusive_lock != nullptr && !enabled) {
C_SaferCond lock_ctx;
ictx->exclusive_lock->request_lock(&lock_ctx);
r = lock_ctx.wait();
if (r < 0) {
lderr(cct) << "failed to lock image: " << cpp_strerror(r) << dendl;
return r;
} else if (!ictx->exclusive_lock->is_lock_owner()) {
lderr(cct) << "failed to acquire exclusive lock" << dendl;
return -EROFS;
}
}

RWLock::WLocker snap_locker(ictx->snap_lock);
uint64_t new_features;
if (enabled) {
Expand Down
13 changes: 13 additions & 0 deletions src/test/librbd/image/test_mock_RefreshRequest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,17 @@ class TestMockImageRefreshRequest : public TestMockFixture {
const std::string &snap_name, uint64_t snap_id) {
EXPECT_CALL(mock_image_ctx, get_snap_id(snap_name)).WillOnce(Return(snap_id));
}

void expect_block_writes(MockImageCtx &mock_image_ctx, int r) {
EXPECT_CALL(*mock_image_ctx.aio_work_queue, block_writes(_))
.WillOnce(CompleteContext(r, mock_image_ctx.image_ctx->op_work_queue));
}

void expect_unblock_writes(MockImageCtx &mock_image_ctx) {
EXPECT_CALL(*mock_image_ctx.aio_work_queue, unblock_writes())
.Times(1);
}

};

TEST_F(TestMockImageRefreshRequest, SuccessV1) {
Expand Down Expand Up @@ -615,8 +626,10 @@ TEST_F(TestMockImageRefreshRequest, DisableJournal) {
expect_get_mutable_metadata(mock_image_ctx, 0);
expect_get_flags(mock_image_ctx, 0);
expect_refresh_parent_is_required(mock_refresh_parent_request, false);
expect_block_writes(mock_image_ctx, 0);
expect_clear_require_lock_on_read(mock_image_ctx);
expect_close_journal(mock_image_ctx, *mock_journal, 0);
expect_unblock_writes(mock_image_ctx);

C_SaferCond ctx;
MockRefreshRequest *req = new MockRefreshRequest(mock_image_ctx, &ctx);
Expand Down
Loading

0 comments on commit 6956da8

Please sign in to comment.