Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rbd-mirror: delayed replication support #11879

Merged
merged 5 commits into from Jan 13, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
11 changes: 8 additions & 3 deletions qa/workunits/rbd/rbd_mirror_helpers.sh
Expand Up @@ -14,6 +14,10 @@
# after failure.
# RBD_MIRROR_TEMDIR - use this path when creating the temporary directory
# (should not exist) instead of running mktemp(1).
# RBD_MIRROR_ARGS - use this to pass additional arguments to started
# rbd-mirror daemons.
# RBD_MIRROR_VARGS - use this to pass additional arguments to vstart.sh
# when starting clusters.
#
# The cleanup can be done as a separate step, running the script with
# `cleanup ${RBD_MIRROR_TEMDIR}' arguments.
Expand Down Expand Up @@ -159,8 +163,8 @@ setup()

if [ -z "${RBD_MIRROR_USE_EXISTING_CLUSTER}" ]; then
cd ${CEPH_ROOT}
${CEPH_SRC}/mstart.sh ${CLUSTER1} -n
${CEPH_SRC}/mstart.sh ${CLUSTER2} -n
${CEPH_SRC}/mstart.sh ${CLUSTER1} -n ${RBD_MIRROR_VARGS}
${CEPH_SRC}/mstart.sh ${CLUSTER2} -n ${RBD_MIRROR_VARGS}

ln -s $(readlink -f run/${CLUSTER1}/ceph.conf) \
${TEMPDIR}/${CLUSTER1}.conf
Expand Down Expand Up @@ -222,7 +226,8 @@ start_mirror()
--rbd-mirror-journal-poll-age=1 \
--debug-rbd=30 --debug-journaler=30 \
--debug-rbd_mirror=30 \
--daemonize=true
--daemonize=true \
${RBD_MIRROR_ARGS}
}

stop_mirror()
Expand Down
1 change: 1 addition & 0 deletions src/common/config_opts.h
Expand Up @@ -1292,6 +1292,7 @@ OPTION(rbd_validate_pool, OPT_BOOL, true) // true if empty pools should be valid
OPTION(rbd_validate_names, OPT_BOOL, true) // true if image specs should be validated
OPTION(rbd_auto_exclusive_lock_until_manual_request, OPT_BOOL, true) // whether to automatically acquire/release exclusive lock until it is explicitly requested, i.e. before we know the user of librbd is properly using the lock API
OPTION(rbd_mirroring_resync_after_disconnect, OPT_BOOL, false) // automatically start image resync after mirroring is disconnected due to being laggy
OPTION(rbd_mirroring_replay_delay, OPT_INT, 0) // time-delay in seconds for rbd-mirror asynchronous replication

/*
* The following options change the behavior for librbd's image creation methods that
Expand Down
4 changes: 3 additions & 1 deletion src/librbd/ImageCtx.cc
Expand Up @@ -967,7 +967,8 @@ struct C_InvalidateCache : public Context {
"rbd_journal_pool", false)(
"rbd_journal_max_payload_bytes", false)(
"rbd_journal_max_concurrent_object_sets", false)(
"rbd_mirroring_resync_after_disconnect", false);
"rbd_mirroring_resync_after_disconnect", false)(
"rbd_mirroring_replay_delay", false);

md_config_t local_config_t;
std::map<std::string, bufferlist> res;
Expand Down Expand Up @@ -1025,6 +1026,7 @@ struct C_InvalidateCache : public Context {
ASSIGN_OPTION(journal_max_payload_bytes);
ASSIGN_OPTION(journal_max_concurrent_object_sets);
ASSIGN_OPTION(mirroring_resync_after_disconnect);
ASSIGN_OPTION(mirroring_replay_delay);
}

ExclusiveLock<ImageCtx> *ImageCtx::create_exclusive_lock() {
Expand Down
1 change: 1 addition & 0 deletions src/librbd/ImageCtx.h
Expand Up @@ -190,6 +190,7 @@ namespace librbd {
uint32_t journal_max_payload_bytes;
int journal_max_concurrent_object_sets;
bool mirroring_resync_after_disconnect;
int mirroring_replay_delay;

LibrbdAdminSocketHook *asok_hook;

Expand Down
10 changes: 7 additions & 3 deletions src/librbd/Journal.cc
Expand Up @@ -723,7 +723,7 @@ int Journal<I>::demote() {
return r;
}

journal::EventEntry event_entry{journal::DemoteEvent{}};
journal::EventEntry event_entry{journal::DemoteEvent{}, ceph_clock_now()};
bufferlist event_entry_bl;
::encode(event_entry, event_entry_bl);

Expand Down Expand Up @@ -845,7 +845,8 @@ uint64_t Journal<I>::append_write_event(uint64_t offset, size_t length,
event_bl.substr_of(bl, event_offset, event_length);
journal::EventEntry event_entry(journal::AioWriteEvent(offset + event_offset,
event_length,
event_bl));
event_bl),
ceph_clock_now());

bufferlists.emplace_back();
::encode(event_entry, bufferlists.back());
Expand All @@ -864,6 +865,7 @@ uint64_t Journal<I>::append_io_event(journal::EventEntry &&event_entry,
uint64_t offset, size_t length,
bool flush_entry) {
bufferlist bl;
event_entry.timestamp = ceph_clock_now();
::encode(event_entry, bl);
return append_io_events(event_entry.get_event_type(), {bl}, requests, offset,
length, flush_entry);
Expand Down Expand Up @@ -974,6 +976,7 @@ void Journal<I>::append_op_event(uint64_t op_tid,
assert(m_image_ctx.owner_lock.is_locked());

bufferlist bl;
event_entry.timestamp = ceph_clock_now();
::encode(event_entry, bl);

Future future;
Expand Down Expand Up @@ -1007,7 +1010,8 @@ void Journal<I>::commit_op_event(uint64_t op_tid, int r, Context *on_safe) {
ldout(cct, 10) << this << " " << __func__ << ": op_tid=" << op_tid << ", "
<< "r=" << r << dendl;

journal::EventEntry event_entry((journal::OpFinishEvent(op_tid, r)));
journal::EventEntry event_entry((journal::OpFinishEvent(op_tid, r)),
ceph_clock_now());

bufferlist bl;
::encode(event_entry, bl);
Expand Down
57 changes: 40 additions & 17 deletions src/librbd/journal/Types.cc
Expand Up @@ -83,6 +83,10 @@ void AioDiscardEvent::dump(Formatter *f) const {
f->dump_unsigned("length", length);
}

uint32_t AioWriteEvent::get_fixed_size() {
return EventEntry::get_fixed_size() + 16 /* offset, length */;
}

void AioWriteEvent::encode(bufferlist& bl) const {
::encode(offset, bl);
::encode(length, bl);
Expand Down Expand Up @@ -314,9 +318,10 @@ EventType EventEntry::get_event_type() const {
}

void EventEntry::encode(bufferlist& bl) const {
ENCODE_START(3, 1, bl);
ENCODE_START(4, 1, bl);
boost::apply_visitor(EncodeVisitor(bl), event);
ENCODE_FINISH(bl);
encode_metadata(bl);
}

void EventEntry::decode(bufferlist::iterator& it) {
Expand Down Expand Up @@ -385,62 +390,80 @@ void EventEntry::decode(bufferlist::iterator& it) {

boost::apply_visitor(DecodeVisitor(struct_v, it), event);
DECODE_FINISH(it);
if (struct_v >= 4) {
decode_metadata(it);
}
}

void EventEntry::dump(Formatter *f) const {
boost::apply_visitor(DumpVisitor(f, "event_type"), event);
f->dump_stream("timestamp") << timestamp;
}

void EventEntry::encode_metadata(bufferlist& bl) const {
ENCODE_START(1, 1, bl);
::encode(timestamp, bl);
ENCODE_FINISH(bl);
}

void EventEntry::decode_metadata(bufferlist::iterator& it) {
DECODE_START(1, it);
::decode(timestamp, it);
DECODE_FINISH(it);
}

void EventEntry::generate_test_instances(std::list<EventEntry *> &o) {
o.push_back(new EventEntry(AioDiscardEvent()));
o.push_back(new EventEntry(AioDiscardEvent(123, 345)));
o.push_back(new EventEntry(AioDiscardEvent(123, 345), utime_t(1, 1)));

bufferlist bl;
bl.append(std::string(32, '1'));
o.push_back(new EventEntry(AioWriteEvent()));
o.push_back(new EventEntry(AioWriteEvent(123, 456, bl)));
o.push_back(new EventEntry(AioWriteEvent(123, 456, bl), utime_t(1, 1)));

o.push_back(new EventEntry(AioFlushEvent()));

o.push_back(new EventEntry(OpFinishEvent(123, -1)));
o.push_back(new EventEntry(OpFinishEvent(123, -1), utime_t(1, 1)));

o.push_back(new EventEntry(SnapCreateEvent()));
o.push_back(new EventEntry(SnapCreateEvent(), utime_t(1, 1)));
o.push_back(new EventEntry(SnapCreateEvent(234, "snap",
cls::rbd::UserSnapshotNamespace())));
cls::rbd::UserSnapshotNamespace()),
utime_t(1, 1)));

o.push_back(new EventEntry(SnapRemoveEvent()));
o.push_back(new EventEntry(SnapRemoveEvent(345, "snap")));
o.push_back(new EventEntry(SnapRemoveEvent(345, "snap"), utime_t(1, 1)));

o.push_back(new EventEntry(SnapRenameEvent()));
o.push_back(new EventEntry(SnapRenameEvent(456, 1, "src snap", "dest snap")));
o.push_back(new EventEntry(SnapRenameEvent(456, 1, "src snap", "dest snap"),
utime_t(1, 1)));

o.push_back(new EventEntry(SnapProtectEvent()));
o.push_back(new EventEntry(SnapProtectEvent(567, "snap")));
o.push_back(new EventEntry(SnapProtectEvent(567, "snap"), utime_t(1, 1)));

o.push_back(new EventEntry(SnapUnprotectEvent()));
o.push_back(new EventEntry(SnapUnprotectEvent(678, "snap")));
o.push_back(new EventEntry(SnapUnprotectEvent(678, "snap"), utime_t(1, 1)));

o.push_back(new EventEntry(SnapRollbackEvent()));
o.push_back(new EventEntry(SnapRollbackEvent(789, "snap")));
o.push_back(new EventEntry(SnapRollbackEvent(789, "snap"), utime_t(1, 1)));

o.push_back(new EventEntry(RenameEvent()));
o.push_back(new EventEntry(RenameEvent(890, "image name")));
o.push_back(new EventEntry(RenameEvent(890, "image name"), utime_t(1, 1)));

o.push_back(new EventEntry(ResizeEvent()));
o.push_back(new EventEntry(ResizeEvent(901, 1234)));
o.push_back(new EventEntry(ResizeEvent(901, 1234), utime_t(1, 1)));

o.push_back(new EventEntry(FlattenEvent(123)));
o.push_back(new EventEntry(FlattenEvent(123), utime_t(1, 1)));

o.push_back(new EventEntry(DemoteEvent()));

o.push_back(new EventEntry(UpdateFeaturesEvent()));
o.push_back(new EventEntry(UpdateFeaturesEvent(123, 127, true)));
o.push_back(new EventEntry(UpdateFeaturesEvent(123, 127, true), utime_t(1, 1)));

o.push_back(new EventEntry(MetadataSetEvent()));
o.push_back(new EventEntry(MetadataSetEvent(123, "key", "value")));
o.push_back(new EventEntry(MetadataSetEvent(123, "key", "value"), utime_t(1, 1)));

o.push_back(new EventEntry(MetadataRemoveEvent()));
o.push_back(new EventEntry(MetadataRemoveEvent(123, "key")));
o.push_back(new EventEntry(MetadataRemoveEvent(123, "key"), utime_t(1, 1)));
}

// Journal Client
Expand Down
20 changes: 16 additions & 4 deletions src/librbd/journal/Types.h
Expand Up @@ -9,6 +9,7 @@
#include "include/buffer.h"
#include "include/encoding.h"
#include "include/types.h"
#include "include/utime.h"
#include <iosfwd>
#include <list>
#include <boost/none.hpp>
Expand Down Expand Up @@ -67,9 +68,7 @@ struct AioWriteEvent {
uint64_t length;
bufferlist data;

static uint32_t get_fixed_size() {
return 30; /// version encoding, type, offset, length
}
static uint32_t get_fixed_size();

AioWriteEvent() : offset(0), length(0) {
}
Expand Down Expand Up @@ -372,12 +371,18 @@ typedef boost::variant<AioDiscardEvent,
UnknownEvent> Event;

struct EventEntry {
static uint32_t get_fixed_size() {
return EVENT_FIXED_SIZE + METADATA_FIXED_SIZE;
}

EventEntry() : event(UnknownEvent()) {
}
EventEntry(const Event &_event) : event(_event) {
EventEntry(const Event &_event, const utime_t &_timestamp = utime_t())
: event(_event), timestamp(_timestamp) {
}

Event event;
utime_t timestamp;

EventType get_event_type() const;

Expand All @@ -386,6 +391,13 @@ struct EventEntry {
void dump(Formatter *f) const;

static void generate_test_instances(std::list<EventEntry *> &o);

private:
static const uint32_t EVENT_FIXED_SIZE = 14; /// version encoding, type
static const uint32_t METADATA_FIXED_SIZE = 14; /// version encoding, timestamp

void encode_metadata(bufferlist& bl) const;
void decode_metadata(bufferlist::iterator& it);
};

// Journal Client data structures
Expand Down
4 changes: 3 additions & 1 deletion src/test/librbd/mock/MockImageCtx.h
Expand Up @@ -96,7 +96,8 @@ struct MockImageCtx {
journal_max_concurrent_object_sets(
image_ctx.journal_max_concurrent_object_sets),
mirroring_resync_after_disconnect(
image_ctx.mirroring_resync_after_disconnect)
image_ctx.mirroring_resync_after_disconnect),
mirroring_replay_delay(image_ctx.mirroring_replay_delay)
{
md_ctx.dup(image_ctx.md_ctx);
data_ctx.dup(image_ctx.data_ctx);
Expand Down Expand Up @@ -273,6 +274,7 @@ struct MockImageCtx {
uint32_t journal_max_payload_bytes;
int journal_max_concurrent_object_sets;
bool mirroring_resync_after_disconnect;
int mirroring_replay_delay;
};

} // namespace librbd
Expand Down
65 changes: 65 additions & 0 deletions src/test/rbd_mirror/image_replayer/test_mock_BootstrapRequest.cc
Expand Up @@ -830,6 +830,71 @@ TEST_F(TestMockImageReplayerBootstrapRequest, SplitBrainForcePromote) {
ASSERT_EQ(NULL, m_local_test_image_ctx);
}

TEST_F(TestMockImageReplayerBootstrapRequest, ResyncRequested) {
create_local_image();

InSequence seq;

// look up local image by global image id
librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
expect_mirror_image_get_image_id(m_local_io_ctx, "global image id",
mock_local_image_ctx.id, 0);

// lookup remote image tag class
cls::journal::Client client;
librbd::journal::ClientData client_data{
librbd::journal::ImageClientMeta{123}};
::encode(client_data, client.data);
::journal::MockJournaler mock_journaler;
expect_journaler_get_client(mock_journaler,
librbd::Journal<>::IMAGE_CLIENT_ID,
client, 0);

// lookup local peer in remote journal
librbd::journal::MirrorPeerClientMeta mirror_peer_client_meta{
mock_local_image_ctx.id};
mirror_peer_client_meta.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING;
client_data.client_meta = mirror_peer_client_meta;
client.data.clear();
::encode(client_data, client.data);
expect_journaler_get_client(mock_journaler, "local mirror uuid",
client, 0);

// open the remote image
librbd::MockJournal mock_journal;
librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
MockOpenImageRequest mock_open_image_request;
expect_open_image(mock_open_image_request, m_remote_io_ctx,
mock_remote_image_ctx.id, mock_remote_image_ctx, 0);
MockIsPrimaryRequest mock_is_primary_request;
expect_is_primary(mock_is_primary_request, true, 0);

// open the local image
mock_local_image_ctx.journal = &mock_journal;
MockOpenLocalImageRequest mock_open_local_image_request;
expect_open_local_image(mock_open_local_image_request, m_local_io_ctx,
mock_local_image_ctx.id, mock_local_image_ctx, 0);

// resync is requested
expect_is_resync_requested(mock_journal, true, 0);


MockCloseImageRequest mock_close_image_request;
expect_close_image(mock_close_image_request, mock_remote_image_ctx, 0);

C_SaferCond ctx;
MockImageSyncThrottler mock_image_sync_throttler(
new ImageSyncThrottler<librbd::MockTestImageCtx>());
MockBootstrapRequest *request = create_request(
mock_image_sync_throttler, mock_journaler, mock_remote_image_ctx.id,
"global image id", "local mirror uuid", "remote mirror uuid",
&ctx);
m_do_resync = false;
request->send();
ASSERT_EQ(0, ctx.wait());
ASSERT_TRUE(m_do_resync);
}

} // namespace image_replayer
} // namespace mirror
} // namespace rbd