Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rbd-mirror: failover and failback of unmodified image results in split-brain #14963

Merged
merged 3 commits into from May 5, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
19 changes: 19 additions & 0 deletions qa/workunits/rbd/rbd_mirror.sh
Expand Up @@ -127,6 +127,25 @@ wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped'
wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'master_position'
compare_images ${POOL} ${image}

# failover (unmodified)
demote_image ${CLUSTER2} ${POOL} ${image}
wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped'
wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped'
promote_image ${CLUSTER1} ${POOL} ${image}
wait_for_image_replay_started ${CLUSTER2} ${POOL} ${image}

# failback (unmodified)
demote_image ${CLUSTER1} ${POOL} ${image}
wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${image}
wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped'
promote_image ${CLUSTER2} ${POOL} ${image}
wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'master_position'
wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped'
compare_images ${POOL} ${image}

# failover
demote_image ${CLUSTER2} ${POOL} ${image}
wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
Expand Down
2 changes: 1 addition & 1 deletion src/librbd/journal/DemoteRequest.cc
Expand Up @@ -131,7 +131,7 @@ void DemoteRequest<I>::append_event() {
CephContext *cct = m_image_ctx.cct;
ldout(cct, 20) << dendl;

EventEntry event_entry{DemoteEvent{}, ceph_clock_now()};
EventEntry event_entry{DemotePromoteEvent{}, {}};
bufferlist event_entry_bl;
::encode(event_entry, event_entry_bl);

Expand Down
87 changes: 87 additions & 0 deletions src/librbd/journal/PromoteRequest.cc
Expand Up @@ -102,6 +102,93 @@ void PromoteRequest<I>::handle_allocate_tag(int r) {
if (r < 0) {
m_ret_val = r;
lderr(cct) << "failed to allocate tag: " << cpp_strerror(r) << dendl;
shut_down();
return;
}

m_tag_tid = m_tag.tid;
append_event();
}

template <typename I>
void PromoteRequest<I>::append_event() {
CephContext *cct = m_image_ctx->cct;
ldout(cct, 20) << dendl;

EventEntry event_entry{DemotePromoteEvent{}, {}};
bufferlist event_entry_bl;
::encode(event_entry, event_entry_bl);

m_journaler->start_append(0, 0, 0);
m_future = m_journaler->append(m_tag_tid, event_entry_bl);

auto ctx = create_context_callback<
PromoteRequest<I>, &PromoteRequest<I>::handle_append_event>(this);
m_future.flush(ctx);
}

template <typename I>
void PromoteRequest<I>::handle_append_event(int r) {
CephContext *cct = m_image_ctx->cct;
ldout(cct, 20) << "r=" << r << dendl;

if (r < 0) {
m_ret_val = r;
lderr(cct) << "failed to append promotion journal event: "
<< cpp_strerror(r) << dendl;
stop_append();
return;
}

commit_event();
}

template <typename I>
void PromoteRequest<I>::commit_event() {
CephContext *cct = m_image_ctx->cct;
ldout(cct, 20) << dendl;

m_journaler->committed(m_future);

auto ctx = create_context_callback<
PromoteRequest<I>, &PromoteRequest<I>::handle_commit_event>(this);
m_journaler->flush_commit_position(ctx);
}

template <typename I>
void PromoteRequest<I>::handle_commit_event(int r) {
CephContext *cct = m_image_ctx->cct;
ldout(cct, 20) << "r=" << r << dendl;

if (r < 0) {
m_ret_val = r;
lderr(cct) << "failed to flush promote commit position: "
<< cpp_strerror(r) << dendl;
}

stop_append();
}

template <typename I>
void PromoteRequest<I>::stop_append() {
CephContext *cct = m_image_ctx->cct;
ldout(cct, 20) << dendl;

auto ctx = create_context_callback<
PromoteRequest<I>, &PromoteRequest<I>::handle_stop_append>(this);
m_journaler->stop_append(ctx);
}

template <typename I>
void PromoteRequest<I>::handle_stop_append(int r) {
CephContext *cct = m_image_ctx->cct;
ldout(cct, 20) << "r=" << r << dendl;

if (r < 0) {
if (m_ret_val == 0) {
m_ret_val = r;
}
lderr(cct) << "failed to stop journal append: " << cpp_strerror(r) << dendl;
}

shut_down();
Expand Down
35 changes: 28 additions & 7 deletions src/librbd/journal/PromoteRequest.h
Expand Up @@ -7,6 +7,7 @@
#include "include/int_types.h"
#include "common/Mutex.h"
#include "cls/journal/cls_journal_types.h"
#include "journal/Future.h"
#include "librbd/journal/Types.h"
#include "librbd/journal/TypeTraits.h"

Expand Down Expand Up @@ -37,13 +38,22 @@ class PromoteRequest {
* <start>
* |
* v
* OPEN
* |
* v
* ALLOCATE_TAG
* |
* v
* SHUT_DOWN
* OPEN * * * * * * * * * *
* | *
* v *
* ALLOCATE_TAG * * * * * *
* | *
* v *
* APPEND_EVENT * * * *
* | * *
* v * *
* COMMIT_EVENT * *
* | * *
* v * *
* STOP_APPEND <* * * *
* | *
* v *
* SHUT_DOWN <* * * * * * *
* |
* v
* <finish>
Expand All @@ -52,6 +62,7 @@ class PromoteRequest {
*/

typedef typename TypeTraits<ImageCtxT>::Journaler Journaler;
typedef typename TypeTraits<ImageCtxT>::Future Future;

ImageCtxT *m_image_ctx;
bool m_force;
Expand All @@ -66,13 +77,23 @@ class PromoteRequest {
TagData m_tag_data;

cls::journal::Tag m_tag;
Future m_future;

void send_open();
void handle_open(int r);

void allocate_tag();
void handle_allocate_tag(int r);

void append_event();
void handle_append_event(int r);

void commit_event();
void handle_commit_event(int r);

void stop_append();
void handle_stop_append(int r);

void shut_down();
void handle_shut_down(int r);

Expand Down
4 changes: 2 additions & 2 deletions src/librbd/journal/Replay.cc
Expand Up @@ -670,10 +670,10 @@ void Replay<I>::handle_event(const journal::FlattenEvent &event,
}

template <typename I>
void Replay<I>::handle_event(const journal::DemoteEvent &event,
void Replay<I>::handle_event(const journal::DemotePromoteEvent &event,
Context *on_ready, Context *on_safe) {
CephContext *cct = m_image_ctx.cct;
ldout(cct, 20) << ": Demote event" << dendl;
ldout(cct, 20) << ": Demote/Promote event" << dendl;
on_ready->complete(0);
on_safe->complete(0);
}
Expand Down
2 changes: 1 addition & 1 deletion src/librbd/journal/Replay.h
Expand Up @@ -157,7 +157,7 @@ class Replay {
Context *on_safe);
void handle_event(const FlattenEvent &event, Context *on_ready,
Context *on_safe);
void handle_event(const DemoteEvent &event, Context *on_ready,
void handle_event(const DemotePromoteEvent &event, Context *on_ready,
Context *on_safe);
void handle_event(const SnapLimitEvent &event, Context *on_ready,
Context *on_safe);
Expand Down
16 changes: 8 additions & 8 deletions src/librbd/journal/Types.cc
Expand Up @@ -272,13 +272,13 @@ void ResizeEvent::dump(Formatter *f) const {
f->dump_unsigned("size", size);
}

void DemoteEvent::encode(bufferlist& bl) const {
void DemotePromoteEvent::encode(bufferlist& bl) const {
}

void DemoteEvent::decode(__u8 version, bufferlist::iterator& it) {
void DemotePromoteEvent::decode(__u8 version, bufferlist::iterator& it) {
}

void DemoteEvent::dump(Formatter *f) const {
void DemotePromoteEvent::dump(Formatter *f) const {
}

void UpdateFeaturesEvent::encode(bufferlist& bl) const {
Expand Down Expand Up @@ -400,8 +400,8 @@ void EventEntry::decode(bufferlist::iterator& it) {
case EVENT_TYPE_FLATTEN:
event = FlattenEvent();
break;
case EVENT_TYPE_DEMOTE:
event = DemoteEvent();
case EVENT_TYPE_DEMOTE_PROMOTE:
event = DemotePromoteEvent();
break;
case EVENT_TYPE_UPDATE_FEATURES:
event = UpdateFeaturesEvent();
Expand Down Expand Up @@ -484,7 +484,7 @@ void EventEntry::generate_test_instances(std::list<EventEntry *> &o) {

o.push_back(new EventEntry(FlattenEvent(123), utime_t(1, 1)));

o.push_back(new EventEntry(DemoteEvent()));
o.push_back(new EventEntry(DemotePromoteEvent()));

o.push_back(new EventEntry(UpdateFeaturesEvent()));
o.push_back(new EventEntry(UpdateFeaturesEvent(123, 127, true), utime_t(1, 1)));
Expand Down Expand Up @@ -749,8 +749,8 @@ std::ostream &operator<<(std::ostream &out, const EventType &type) {
case EVENT_TYPE_FLATTEN:
out << "Flatten";
break;
case EVENT_TYPE_DEMOTE:
out << "Demote";
case EVENT_TYPE_DEMOTE_PROMOTE:
out << "Demote/Promote";
break;
case EVENT_TYPE_UPDATE_FEATURES:
out << "UpdateFeatures";
Expand Down
9 changes: 5 additions & 4 deletions src/librbd/journal/Types.h
Expand Up @@ -37,7 +37,7 @@ enum EventType {
EVENT_TYPE_RENAME = 10,
EVENT_TYPE_RESIZE = 11,
EVENT_TYPE_FLATTEN = 12,
EVENT_TYPE_DEMOTE = 13,
EVENT_TYPE_DEMOTE_PROMOTE = 13,
EVENT_TYPE_SNAP_LIMIT = 14,
EVENT_TYPE_UPDATE_FEATURES = 15,
EVENT_TYPE_METADATA_SET = 16,
Expand Down Expand Up @@ -316,8 +316,9 @@ struct FlattenEvent : public OpEventBase {
using OpEventBase::dump;
};

struct DemoteEvent {
static const EventType TYPE = static_cast<EventType>(EVENT_TYPE_DEMOTE);
struct DemotePromoteEvent {
static const EventType TYPE = static_cast<EventType>(
EVENT_TYPE_DEMOTE_PROMOTE);

void encode(bufferlist& bl) const;
void decode(__u8 version, bufferlist::iterator& it);
Expand Down Expand Up @@ -395,7 +396,7 @@ typedef boost::variant<AioDiscardEvent,
RenameEvent,
ResizeEvent,
FlattenEvent,
DemoteEvent,
DemotePromoteEvent,
SnapLimitEvent,
UpdateFeaturesEvent,
MetadataSetEvent,
Expand Down