Skip to content

Commit

Permalink
librbd: default to sparse-reads for any IO operation over 64K
Browse files Browse the repository at this point in the history
Testing BlueStore against both HDDs and OSDs with fully allocated
and sparse-allocated objects shows a performance improvement with
sparse-read between 32K and 64K.

Fixes: http://tracker.ceph.com/issues/21849
Signed-off-by: Jason Dillaman <dillaman@redhat.com>
  • Loading branch information
Jason Dillaman committed Oct 19, 2017
1 parent da335c7 commit 6ed3d07
Show file tree
Hide file tree
Showing 10 changed files with 225 additions and 25 deletions.
12 changes: 12 additions & 0 deletions src/common/options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5249,6 +5249,18 @@ static std::vector<Option> get_rbd_options() {
.set_default(false)
.set_description("localize parent requests to closest OSD"),

Option("rbd_sparse_read_threshold_bytes", Option::TYPE_UINT,
Option::LEVEL_ADVANCED)
.set_default(64_K)
.set_description("threshold for issuing a sparse-read")
.set_long_description("minimum number of sequential bytes to read against "
"an object before issuing a sparse-read request to "
"the cluster. 0 implies it must be a full object read"
"to issue a sparse-read, 1 implies always use "
"sparse-read, and any value larger than the maximum "
"object size will disable sparse-read for all "
"requests"),

Option("rbd_readahead_trigger_requests", Option::TYPE_INT, Option::LEVEL_ADVANCED)
.set_default(10)
.set_description("number of sequential requests necessary to trigger readahead"),
Expand Down
6 changes: 6 additions & 0 deletions src/librbd/ImageCtx.cc
Original file line number Diff line number Diff line change
Expand Up @@ -981,6 +981,7 @@ struct C_InvalidateCache : public Context {
"rbd_localize_snap_reads", false)(
"rbd_balance_parent_reads", false)(
"rbd_localize_parent_reads", false)(
"rbd_sparse_read_threshold_bytes", false)(
"rbd_readahead_trigger_requests", false)(
"rbd_readahead_max_bytes", false)(
"rbd_readahead_disable_after_bytes", false)(
Expand Down Expand Up @@ -1039,6 +1040,7 @@ struct C_InvalidateCache : public Context {
ASSIGN_OPTION(localize_snap_reads, bool);
ASSIGN_OPTION(balance_parent_reads, bool);
ASSIGN_OPTION(localize_parent_reads, bool);
ASSIGN_OPTION(sparse_read_threshold_bytes, uint64_t);
ASSIGN_OPTION(readahead_trigger_requests, int64_t);
ASSIGN_OPTION(readahead_max_bytes, int64_t);
ASSIGN_OPTION(readahead_disable_after_bytes, int64_t);
Expand All @@ -1063,6 +1065,10 @@ struct C_InvalidateCache : public Context {
if (thread_safe) {
ASSIGN_OPTION(journal_pool, std::string);
}

if (sparse_read_threshold_bytes == 0) {
sparse_read_threshold_bytes = get_object_size();
}
}

ExclusiveLock<ImageCtx> *ImageCtx::create_exclusive_lock() {
Expand Down
1 change: 1 addition & 0 deletions src/librbd/ImageCtx.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ namespace librbd {
bool localize_snap_reads;
bool balance_parent_reads;
bool localize_parent_reads;
uint64_t sparse_read_threshold_bytes;
uint32_t readahead_trigger_requests;
uint64_t readahead_max_bytes;
uint64_t readahead_disable_after_bytes;
Expand Down
17 changes: 6 additions & 11 deletions src/librbd/internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2226,21 +2226,16 @@ int validate_pool(IoCtx &io_ctx, CephContext *cct) {
CephContext *cct = ictx->cct;
ldout(cct, 20) << "flush " << ictx << dendl;

int r = ictx->state->refresh_if_required();
C_SaferCond cond;
io::AioCompletion *c = io::AioCompletion::create(&cond);
ictx->io_work_queue->aio_flush(c, false);

int r = cond.wait();
if (r < 0) {
return r;
}

ictx->user_flushed();
C_SaferCond ctx;
{
RWLock::RLocker owner_locker(ictx->owner_lock);
ictx->flush(&ctx);
}
r = ctx.wait();

ictx->perfcounter->inc(l_librbd_flush);
return r;
return 0;
}

int invalidate_cache(ImageCtx *ictx)
Expand Down
2 changes: 1 addition & 1 deletion src/librbd/io/ImageRequest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ void ImageReadRequest<I>::send_request() {
aio_comp);
ObjectReadRequest<I> *req = ObjectReadRequest<I>::create(
&image_ctx, extent.oid.name, extent.objectno, extent.offset,
extent.length, extent.buffer_extents, snap_id, true, m_op_flags,
extent.length, extent.buffer_extents, snap_id, m_op_flags,
this->m_trace, req_comp);
req_comp->request = req;

Expand Down
9 changes: 4 additions & 5 deletions src/librbd/io/ObjectRequest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -205,14 +205,13 @@ template <typename I>
ObjectReadRequest<I>::ObjectReadRequest(I *ictx, const std::string &oid,
uint64_t objectno, uint64_t offset,
uint64_t len, Extents& be,
librados::snap_t snap_id, bool sparse,
int op_flags,
librados::snap_t snap_id, int op_flags,
const ZTracer::Trace &parent_trace,
Context *completion)
: ObjectRequest<I>(ictx, oid, objectno, offset, len, snap_id, false, "read",
parent_trace, completion),
m_buffer_extents(be), m_tried_parent(false), m_sparse(sparse),
m_op_flags(op_flags), m_state(LIBRBD_AIO_READ_FLAT) {
m_buffer_extents(be), m_tried_parent(false), m_op_flags(op_flags),
m_state(LIBRBD_AIO_READ_FLAT) {
guard_read();
}

Expand Down Expand Up @@ -326,7 +325,7 @@ void ObjectReadRequest<I>::send() {

librados::ObjectReadOperation op;
int flags = image_ctx->get_read_flags(this->m_snap_id);
if (m_sparse) {
if (this->m_object_len >= image_ctx->sparse_read_threshold_bytes) {
op.sparse_read(this->m_object_off, this->m_object_len, &m_ext_map,
&m_read_data, nullptr);
} else {
Expand Down
10 changes: 4 additions & 6 deletions src/librbd/io/ObjectRequest.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,20 +151,19 @@ class ObjectReadRequest : public ObjectRequest<ImageCtxT> {
static ObjectReadRequest* create(ImageCtxT *ictx, const std::string &oid,
uint64_t objectno, uint64_t offset,
uint64_t len, Extents &buffer_extents,
librados::snap_t snap_id, bool sparse,
int op_flags,
librados::snap_t snap_id, int op_flags,
const ZTracer::Trace &parent_trace,
Context *completion) {
return new ObjectReadRequest(ictx, oid, objectno, offset, len,
buffer_extents, snap_id, sparse, op_flags,
buffer_extents, snap_id, op_flags,
parent_trace, completion);
}

ObjectReadRequest(ImageCtxT *ictx, const std::string &oid,
uint64_t objectno, uint64_t offset, uint64_t len,
Extents& buffer_extents, librados::snap_t snap_id,
bool sparse, int op_flags,
const ZTracer::Trace &parent_trace, Context *completion);
int op_flags, const ZTracer::Trace &parent_trace,
Context *completion);

bool should_complete(int r) override;
void send() override;
Expand Down Expand Up @@ -197,7 +196,6 @@ class ObjectReadRequest : public ObjectRequest<ImageCtxT> {
private:
Extents m_buffer_extents;
bool m_tried_parent;
bool m_sparse;
int m_op_flags;
ceph::bufferlist m_read_data;
ExtentMap m_ext_map;
Expand Down
1 change: 1 addition & 0 deletions src/test/librbd/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ set(unittest_librbd_srcs
image/test_mock_RemoveRequest.cc
io/test_mock_ImageRequest.cc
io/test_mock_ImageRequestWQ.cc
io/test_mock_ObjectRequest.cc
journal/test_mock_OpenRequest.cc
journal/test_mock_PromoteRequest.cc
journal/test_mock_Replay.cc
Expand Down
3 changes: 1 addition & 2 deletions src/test/librbd/io/test_mock_ImageRequest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,7 @@ struct ObjectReadRequest<librbd::MockTestImageCtx> : public ObjectRequest<librbd
const std::string &oid,
uint64_t objectno, uint64_t offset,
uint64_t len, Extents &buffer_extents,
librados::snap_t snap_id, bool sparse,
int op_flags,
librados::snap_t snap_id, int op_flags,
const ZTracer::Trace &parent_trace,
Context *completion) {
assert(s_instance != nullptr);
Expand Down
189 changes: 189 additions & 0 deletions src/test/librbd/io/test_mock_ObjectRequest.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab

#include "test/librbd/test_mock_fixture.h"
#include "test/librbd/test_support.h"
#include "test/librbd/mock/MockImageCtx.h"
#include "test/librbd/mock/MockObjectMap.h"
#include "test/librbd/mock/cache/MockImageCache.h"
#include "test/librados_test_stub/MockTestMemIoCtxImpl.h"
#include "test/librados_test_stub/MockTestMemRadosClient.h"
#include "librbd/io/CopyupRequest.h"
#include "librbd/io/ImageRequest.h"
#include "librbd/io/ObjectRequest.h"

namespace librbd {
namespace {

struct MockTestImageCtx : public MockImageCtx {
MockTestImageCtx(ImageCtx &image_ctx) : MockImageCtx(image_ctx) {
}
};

} // anonymous namespace

namespace util {

inline ImageCtx *get_image_ctx(MockImageCtx *image_ctx) {
return image_ctx->image_ctx;
}

} // namespace util

namespace io {

template <>
struct CopyupRequest<librbd::MockImageCtx> {
static CopyupRequest* create(librbd::MockImageCtx *ictx,
const std::string &oid, uint64_t objectno,
Extents &&image_extents,
const ZTracer::Trace &parent_trace) {
return nullptr;
}

MOCK_METHOD0(send, void());
};

template <>
struct ImageRequest<librbd::MockImageCtx> {
static void aio_read(librbd::MockImageCtx *ictx, AioCompletion *c,
Extents &&image_extents, ReadResult &&read_result,
int op_flags, const ZTracer::Trace &parent_trace) {
}
};

} // namespace io
} // namespace librbd

#include "librbd/io/ObjectRequest.cc"

namespace librbd {
namespace io {

using ::testing::_;
using ::testing::DoDefault;
using ::testing::InSequence;
using ::testing::Invoke;
using ::testing::Return;
using ::testing::WithArg;

struct TestMockIoObjectRequest : public TestMockFixture {
typedef ObjectRequest<librbd::MockImageCtx> MockObjectRequest;
typedef ObjectReadRequest<librbd::MockImageCtx> MockObjectReadRequest;

void expect_object_may_exist(MockTestImageCtx &mock_image_ctx,
uint64_t object_no, bool exists) {
if (mock_image_ctx.object_map != nullptr) {
EXPECT_CALL(*mock_image_ctx.object_map, object_may_exist(object_no))
.WillOnce(Return(exists));
}
}

void expect_get_parent_overlap(MockTestImageCtx &mock_image_ctx,
librados::snap_t snap_id, uint64_t overlap,
int r) {
EXPECT_CALL(mock_image_ctx, get_parent_overlap(snap_id, _))
.WillOnce(WithArg<1>(Invoke([overlap, r](uint64_t *o) {
*o = overlap;
return r;
})));
}

void expect_prune_parent_extents(MockTestImageCtx &mock_image_ctx,
const MockObjectRequest::Extents& extents,
uint64_t overlap, uint64_t object_overlap) {
EXPECT_CALL(mock_image_ctx, prune_parent_extents(_, overlap))
.WillOnce(WithArg<0>(Invoke([extents, object_overlap](MockObjectRequest::Extents& e) {
e = extents;
return object_overlap;
})));
}

void expect_get_read_flags(MockTestImageCtx &mock_image_ctx,
librados::snap_t snap_id, int flags) {
EXPECT_CALL(mock_image_ctx, get_read_flags(snap_id))
.WillOnce(Return(flags));
}

void expect_read(MockTestImageCtx &mock_image_ctx,
const std::string& oid, uint64_t off, uint64_t len,
int r) {
auto& expect = EXPECT_CALL(get_mock_io_ctx(mock_image_ctx.data_ctx),
read(oid, len, off, _));
if (r < 0) {
expect.WillOnce(Return(r));
} else {
expect.WillOnce(DoDefault());
}
}

void expect_sparse_read(MockTestImageCtx &mock_image_ctx,
const std::string& oid, uint64_t off, uint64_t len,
int r) {
auto& expect = EXPECT_CALL(get_mock_io_ctx(mock_image_ctx.data_ctx),
sparse_read(oid, off, len, _, _));
if (r < 0) {
expect.WillOnce(Return(r));
} else {
expect.WillOnce(DoDefault());
}
}
};

TEST_F(TestMockIoObjectRequest, Read) {
librbd::ImageCtx *ictx;
ASSERT_EQ(0, open_image(m_image_name, &ictx));
ictx->sparse_read_threshold_bytes = 8096;

MockTestImageCtx mock_image_ctx(*ictx);
MockObjectMap mock_object_map;
if (ictx->test_features(RBD_FEATURE_OBJECT_MAP)) {
mock_image_ctx.object_map = &mock_object_map;
}

InSequence seq;
expect_get_parent_overlap(mock_image_ctx, CEPH_NOSNAP, 0, 0);
expect_prune_parent_extents(mock_image_ctx, {}, 0, 0);
expect_object_may_exist(mock_image_ctx, 0, true);
expect_get_read_flags(mock_image_ctx, CEPH_NOSNAP, 0);
expect_read(mock_image_ctx, "object0", 0, 4096, 0);

C_SaferCond ctx;
MockObjectReadRequest::Extents extents{{0, 4096}};
auto req = MockObjectReadRequest::create(
&mock_image_ctx, "object0", 0, 0, 4096, extents, CEPH_NOSNAP, 0, {}, &ctx);
req->send();
ASSERT_EQ(-ENOENT, ctx.wait());
}

TEST_F(TestMockIoObjectRequest, SparseReadThreshold) {
librbd::ImageCtx *ictx;
ASSERT_EQ(0, open_image(m_image_name, &ictx));
ictx->sparse_read_threshold_bytes = ictx->get_object_size();

MockTestImageCtx mock_image_ctx(*ictx);
MockObjectMap mock_object_map;
if (ictx->test_features(RBD_FEATURE_OBJECT_MAP)) {
mock_image_ctx.object_map = &mock_object_map;
}

InSequence seq;
expect_get_parent_overlap(mock_image_ctx, CEPH_NOSNAP, 0, 0);
expect_prune_parent_extents(mock_image_ctx, {}, 0, 0);
expect_object_may_exist(mock_image_ctx, 0, true);
expect_get_read_flags(mock_image_ctx, CEPH_NOSNAP, 0);
expect_sparse_read(mock_image_ctx, "object0", 0,
ictx->sparse_read_threshold_bytes, 0);

C_SaferCond ctx;
MockObjectReadRequest::Extents extents{
{0, ictx->sparse_read_threshold_bytes}};
auto req = MockObjectReadRequest::create(
&mock_image_ctx, "object0", 0, 0, ictx->sparse_read_threshold_bytes,
extents, CEPH_NOSNAP, 0, {}, &ctx);
req->send();
ASSERT_EQ(-ENOENT, ctx.wait());
}

} // namespace io
} // namespace librbd

0 comments on commit 6ed3d07

Please sign in to comment.