Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

librbd: batch object map updates during trim #11510

Merged
merged 1 commit into from Nov 5, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
14 changes: 10 additions & 4 deletions src/librbd/AioObjectRequest.h
Expand Up @@ -330,11 +330,14 @@ class AioObjectRemove : public AbstractAioObjectWrite {

class AioObjectTrim : public AbstractAioObjectWrite {
public:
// we'd need to only conditionally specify if a post object map
// update is needed. pre update is decided as usual (by checking
// the state of the object in the map).
AioObjectTrim(ImageCtx *ictx, const std::string &oid, uint64_t object_no,
const ::SnapContext &snapc, Context *completion)
const ::SnapContext &snapc, Context *completion,
bool post_object_map_update)
: AbstractAioObjectWrite(ictx, oid, object_no, 0, 0, snapc, completion,
true) {
}
true), m_post_object_map_update(post_object_map_update) { }

protected:
virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
Expand All @@ -350,8 +353,11 @@ class AioObjectTrim : public AbstractAioObjectWrite {
}

virtual bool post_object_map_update() {
return true;
return m_post_object_map_update;
}

private:
bool m_post_object_map_update;
};

class AioObjectTruncate : public AbstractAioObjectWrite {
Expand Down
150 changes: 115 additions & 35 deletions src/librbd/operation/TrimRequest.cc
Expand Up @@ -46,7 +46,7 @@ class C_CopyupObject : public C_AsyncObjectThrottle<I> {
ldout(image_ctx.cct, 10) << "removing (with copyup) " << oid << dendl;

AioObjectRequest<> *req = new AioObjectTrim(&image_ctx, oid, m_object_no,
m_snapc, this);
m_snapc, this, false);
req->send();
return 0;
}
Expand Down Expand Up @@ -131,8 +131,18 @@ bool TrimRequest<I>::should_complete(int r)

RWLock::RLocker owner_lock(image_ctx.owner_lock);
switch (m_state) {
case STATE_PRE_COPYUP:
ldout(cct, 5) << " PRE_COPYUP" << dendl;
send_copyup_objects();
break;

case STATE_COPYUP_OBJECTS:
ldout(cct, 5) << " COPYUP_OBJECTS" << dendl;
send_post_copyup();
break;

case STATE_POST_COPYUP:
ldout(cct, 5) << " POST_COPYUP" << dendl;
send_pre_remove();
break;

Expand Down Expand Up @@ -170,58 +180,33 @@ bool TrimRequest<I>::should_complete(int r)

template <typename I>
void TrimRequest<I>::send() {
send_copyup_objects();
send_pre_copyup();
}

template <typename I>
template<typename I>
void TrimRequest<I>::send_copyup_objects() {
I &image_ctx = this->m_image_ctx;
assert(image_ctx.owner_lock.is_locked());
assert(image_ctx.exclusive_lock == nullptr ||
image_ctx.exclusive_lock->is_lock_owner());

if (m_delete_start >= m_num_objects) {
send_clean_boundary();
return;
}
ldout(image_ctx.cct, 5) << this << " send_copyup_objects: "
<< " start object=" << m_copyup_start << ", "
<< " end object=" << m_copyup_end << dendl;
m_state = STATE_COPYUP_OBJECTS;

::SnapContext snapc;
bool has_snapshots;
uint64_t parent_overlap;
{
RWLock::RLocker snap_locker(image_ctx.snap_lock);
RWLock::RLocker parent_locker(image_ctx.parent_lock);

snapc = image_ctx.snapc;
has_snapshots = !image_ctx.snaps.empty();
int r = image_ctx.get_parent_overlap(CEPH_NOSNAP, &parent_overlap);
assert(r == 0);
}

// copyup is only required for portion of image that overlaps parent
uint64_t copyup_end = Striper::get_num_objects(image_ctx.layout,
parent_overlap);
// TODO: protect against concurrent shrink and snap create?
if (copyup_end <= m_delete_start || !has_snapshots) {
send_pre_remove();
return;
}

uint64_t copyup_start = m_delete_start;
m_delete_start = copyup_end;

ldout(image_ctx.cct, 5) << this << " send_copyup_objects: "
<< " start object=" << copyup_start << ", "
<< " end object=" << copyup_end << dendl;
m_state = STATE_COPYUP_OBJECTS;

Context *ctx = this->create_callback_context();
typename AsyncObjectThrottle<I>::ContextFactory context_factory(
boost::lambda::bind(boost::lambda::new_ptr<C_CopyupObject<I> >(),
boost::lambda::_1, &image_ctx, snapc, boost::lambda::_2));
AsyncObjectThrottle<I> *throttle = new AsyncObjectThrottle<I>(
this, image_ctx, context_factory, ctx, &m_prog_ctx, copyup_start,
copyup_end);
this, image_ctx, context_factory, ctx, &m_prog_ctx, m_copyup_start,
m_copyup_end);
throttle->start_ops(image_ctx.concurrent_management_ops);
}

Expand All @@ -245,6 +230,68 @@ void TrimRequest<I>::send_remove_objects() {
throttle->start_ops(image_ctx.concurrent_management_ops);
}

template<typename I>
void TrimRequest<I>::send_pre_copyup() {
I &image_ctx = this->m_image_ctx;
assert(image_ctx.owner_lock.is_locked());

if (m_delete_start >= m_num_objects) {
send_clean_boundary();
return;
}

bool has_snapshots;
uint64_t parent_overlap;
{
RWLock::RLocker snap_locker(image_ctx.snap_lock);
RWLock::RLocker parent_locker(image_ctx.parent_lock);

has_snapshots = !image_ctx.snaps.empty();
int r = image_ctx.get_parent_overlap(CEPH_NOSNAP, &parent_overlap);
assert(r == 0);
}

// copyup is only required for portion of image that overlaps parent
m_copyup_end = Striper::get_num_objects(image_ctx.layout, parent_overlap);

// TODO: protect against concurrent shrink and snap create?
// skip to remove if no copyup is required.
if (m_copyup_end <= m_delete_start || !has_snapshots) {
send_pre_remove();
return;
}

m_copyup_start = m_delete_start;
m_delete_start = m_copyup_end;

bool copyup_objects = false;
{
RWLock::RLocker snap_locker(image_ctx.snap_lock);
if (image_ctx.object_map == nullptr) {
copyup_objects = true;
} else {
ldout(image_ctx.cct, 5) << this << " send_pre_copyup: "
<< " copyup_start=" << m_copyup_start
<< " copyup_end=" << m_copyup_end << dendl;
m_state = STATE_PRE_COPYUP;

assert(image_ctx.exclusive_lock->is_lock_owner());

Context *ctx = this->create_callback_context();
RWLock::WLocker object_map_locker(image_ctx.object_map_lock);
if (!image_ctx.object_map->aio_update(m_copyup_start, m_copyup_end,

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is correct. The purpose of the copy-up state is to ensure that if you have snapshots and if there are one-or-more blocks that haven't been copied up to the child image, we need to ensure that these snapshots have the backing parent image data just in case the child image is flattened.

This path would only be executed if you are shrinking an image and will not be executed upon an image removal (since you cannot have snapshots when you remove the image). Since we need to read each affected block from the parent image for copyup to know if the parent image has backing data, I am not sure we can optimize this path to a batch object map update.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeh, image removal updates ObjectMap in batches. There were couple of scenarios which updated ObjectMap for each object, one during discard (aio_discard) and the other (as you mentioned) being shrinking an image which has snapshots.

This PR reduces the ObjectMap update traffic to the OSD for the latter case, especially when the object exists (no copy required from its parent) and needs to be removed.

OBJECT_PENDING, OBJECT_EXISTS, ctx)) {
delete ctx;
copyup_objects = true;
}
}
}

if (copyup_objects) {
send_copyup_objects();
}
}

template <typename I>
void TrimRequest<I>::send_pre_remove() {
I &image_ctx = this->m_image_ctx;
Expand Down Expand Up @@ -286,6 +333,39 @@ void TrimRequest<I>::send_pre_remove() {
}
}

template<typename I>
void TrimRequest<I>::send_post_copyup() {
I &image_ctx = this->m_image_ctx;
assert(image_ctx.owner_lock.is_locked());

bool pre_remove_objects = false;
{
RWLock::RLocker snap_locker(image_ctx.snap_lock);
if (image_ctx.object_map == nullptr) {
pre_remove_objects = true;
} else {
ldout(image_ctx.cct, 5) << this << " send_post_copyup:"
<< " copyup_start=" << m_copyup_start
<< " copyup_end=" << m_copyup_end << dendl;
m_state = STATE_POST_COPYUP;

assert(image_ctx.exclusive_lock->is_lock_owner());

Context *ctx = this->create_callback_context();
RWLock::WLocker object_map_locker(image_ctx.object_map_lock);
if (!image_ctx.object_map->aio_update(m_copyup_start, m_copyup_end,
OBJECT_NONEXISTENT, OBJECT_PENDING, ctx)) {
delete ctx;
pre_remove_objects = true;
}
}
}

if (pre_remove_objects) {
send_pre_remove();
}
}

template <typename I>
void TrimRequest<I>::send_post_remove() {
I &image_ctx = this->m_image_ctx;
Expand Down Expand Up @@ -364,7 +444,7 @@ void TrimRequest<I>::send_clean_boundary() {
AioObjectRequest<> *req;
if (p->offset == 0) {
req = new AioObjectTrim(&image_ctx, p->oid.name, p->objectno, snapc,
req_comp);
req_comp, true);
} else {
req = new AioObjectTruncate(&image_ctx, p->oid.name, p->objectno,
p->offset, snapc, req_comp);
Expand Down
30 changes: 21 additions & 9 deletions src/librbd/operation/TrimRequest.h
Expand Up @@ -34,14 +34,17 @@ class TrimRequest : public AsyncRequest<ImageCtxT>
* @verbatim
*
* <start> . . . . > STATE_FINISHED . . . . . . . . .
* | . .
* | . . . . . . . . . . . . .
* | . .
* v . .
* STATE_COPYUP_OBJECTS . . . . .
* | . . .
* | . . .
* v v v .
* | . . . . . . . . . . > . . . . . . . . . .
* | / . .
* STATE_PRE_COPYUP ---> STATE_COPYUP_OBJECTS . .
* | . .
* /-----------------------/ v .
* | . .
* v . .
* STATE_POST_COPYUP. . . > . . .
* | . . . . . . . . . . < . . . . . . . . . .
* | | . .
* v v v .
* STATE_PRE_REMOVE ---> STATE_REMOVE_OBJECTS .
* | . . .
* /-----------------------/ . . . . . . . .
Expand All @@ -64,7 +67,9 @@ class TrimRequest : public AsyncRequest<ImageCtxT>
*/

enum State {
STATE_PRE_COPYUP,
STATE_COPYUP_OBJECTS,
STATE_POST_COPYUP,
STATE_PRE_REMOVE,
STATE_REMOVE_OBJECTS,
STATE_POST_REMOVE,
Expand All @@ -83,14 +88,21 @@ class TrimRequest : public AsyncRequest<ImageCtxT>
uint64_t m_new_size;
ProgressContext &m_prog_ctx;

uint64_t m_copyup_start;
uint64_t m_copyup_end;

TrimRequest(ImageCtxT &image_ctx, Context *on_finish,
uint64_t original_size, uint64_t new_size,
ProgressContext &prog_ctx);

void send_pre_copyup();
void send_copyup_objects();
void send_remove_objects();
void send_post_copyup();

void send_pre_remove();
void send_remove_objects();
void send_post_remove();

void send_clean_boundary();
void send_finish(int r);
};
Expand Down