Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

os/bluestore: add discard method for ssd's performance #14727

Merged
merged 4 commits into from
Feb 24, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions qa/objectstore/bluestore.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ overrides:
osd failsafe full ratio: .95
# this doesn't work with failures bc the log writes are not atomic across the two backends
# bluestore bluefs env mirror: true
bdev enable discard: true
bdev async discard: true
ceph-deploy:
fs: xfs
bluestore: yes
Expand All @@ -35,4 +37,6 @@ overrides:
mon osd backfillfull_ratio: .85
mon osd nearfull ratio: .8
osd failsafe full ratio: .95
bdev enable discard: true
bdev async discard: true

2 changes: 2 additions & 0 deletions src/common/legacy_config_opts.h
Original file line number Diff line number Diff line change
Expand Up @@ -938,6 +938,8 @@ OPTION(bdev_debug_aio_suicide_timeout, OPT_FLOAT)
// NVMe driver is loaded while osd is running.
OPTION(bdev_nvme_unbind_from_kernel, OPT_BOOL)
OPTION(bdev_nvme_retry_count, OPT_INT) // -1 means by default which is 4
OPTION(bdev_enable_discard, OPT_BOOL)
OPTION(bdev_async_discard, OPT_BOOL)

OPTION(objectstore_blackhole, OPT_BOOL)

Expand Down
8 changes: 8 additions & 0 deletions src/common/options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3394,6 +3394,14 @@ std::vector<Option> get_global_options() {
.set_default(-1)
.set_description(""),

Option("bdev_enable_discard", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
.set_default(false)
.set_description(""),

Option("bdev_async_discard", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
.set_default(false)
.set_description(""),

Option("bluefs_alloc_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
.set_default(1_M)
.set_description(""),
Expand Down
4 changes: 2 additions & 2 deletions src/os/bluestore/BlockDevice.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ void IOContext::release_running_aios()
}

BlockDevice *BlockDevice::create(CephContext* cct, const string& path,
aio_callback_t cb, void *cbpriv)
aio_callback_t cb, void *cbpriv, aio_callback_t d_cb, void *d_cbpriv)
{
string type = "kernel";
char buf[PATH_MAX + 1];
Expand Down Expand Up @@ -117,7 +117,7 @@ BlockDevice *BlockDevice::create(CephContext* cct, const string& path,
#endif
#if defined(HAVE_LIBAIO)
if (type == "kernel") {
return new KernelDevice(cct, cb, cbpriv);
return new KernelDevice(cct, cb, cbpriv, d_cb, d_cbpriv);
}
#endif
#if defined(HAVE_SPDK)
Expand Down
7 changes: 5 additions & 2 deletions src/os/bluestore/BlockDevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
#endif
#include "include/assert.h"
#include "include/buffer.h"

#include "include/interval_set.h"
#define SPDK_PREFIX "spdk:"

class CephContext;
Expand Down Expand Up @@ -129,7 +129,7 @@ class BlockDevice {
virtual ~BlockDevice() = default;

static BlockDevice *create(
CephContext* cct, const std::string& path, aio_callback_t cb, void *cbpriv);
CephContext* cct, const std::string& path, aio_callback_t cb, void *cbpriv, aio_callback_t d_cb, void *d_cbpriv);
virtual bool supported_bdev_label() { return true; }
virtual bool is_rotational() { return rotational; }

Expand Down Expand Up @@ -178,6 +178,9 @@ class BlockDevice {
IOContext *ioc,
bool buffered) = 0;
virtual int flush() = 0;
virtual int discard(uint64_t offset, uint64_t len) { return 0; }
virtual int queue_discard(interval_set<uint64_t> &to_release) { return -1; }
virtual void discard_drain() { return; }

void queue_reap_ioc(IOContext *ioc);
void reap_ioc();
Expand Down
44 changes: 43 additions & 1 deletion src/os/bluestore/BlueFS.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,33 @@ MEMPOOL_DEFINE_OBJECT_FACTORY(BlueFS::FileReaderBuffer,
MEMPOOL_DEFINE_OBJECT_FACTORY(BlueFS::FileReader, bluefs_file_reader, bluefs);
MEMPOOL_DEFINE_OBJECT_FACTORY(BlueFS::FileLock, bluefs_file_lock, bluefs);

static void wal_discard_cb(void *priv, void* priv2) {
BlueFS *bluefs = static_cast<BlueFS*>(priv);
interval_set<uint64_t> *tmp = static_cast<interval_set<uint64_t>*>(priv2);
bluefs->handle_discard(BlueFS::BDEV_WAL, *tmp);
}

static void db_discard_cb(void *priv, void* priv2) {
BlueFS *bluefs = static_cast<BlueFS*>(priv);
interval_set<uint64_t> *tmp = static_cast<interval_set<uint64_t>*>(priv2);
bluefs->handle_discard(BlueFS::BDEV_DB, *tmp);
}

static void slow_discard_cb(void *priv, void* priv2) {
BlueFS *bluefs = static_cast<BlueFS*>(priv);
interval_set<uint64_t> *tmp = static_cast<interval_set<uint64_t>*>(priv2);
bluefs->handle_discard(BlueFS::BDEV_SLOW, *tmp);
}

BlueFS::BlueFS(CephContext* cct)
: cct(cct),
bdev(MAX_BDEV),
ioc(MAX_BDEV),
block_all(MAX_BDEV)
{
discard_cb[BDEV_WAL] = wal_discard_cb;
discard_cb[BDEV_DB] = db_discard_cb;
discard_cb[BDEV_SLOW] = slow_discard_cb;
}

BlueFS::~BlueFS()
Expand Down Expand Up @@ -133,7 +153,7 @@ int BlueFS::add_block_device(unsigned id, const string& path)
dout(10) << __func__ << " bdev " << id << " path " << path << dendl;
assert(id < bdev.size());
assert(bdev[id] == NULL);
BlockDevice *b = BlockDevice::create(cct, path, NULL, NULL);
BlockDevice *b = BlockDevice::create(cct, path, NULL, NULL, discard_cb[id], static_cast<void*>(this));
int r = b->open(path);
if (r < 0) {
delete b;
Expand Down Expand Up @@ -222,6 +242,13 @@ int BlueFS::reclaim_blocks(unsigned id, uint64_t want,
return 0;
}

void BlueFS::handle_discard(unsigned id, interval_set<uint64_t>& to_release)
{
dout(10) << __func__ << " bdev " << id << dendl;
assert(alloc[id]);
alloc[id]->release(to_release);
}

uint64_t BlueFS::get_fs_usage()
{
std::lock_guard<std::mutex> l(lock);
Expand Down Expand Up @@ -379,6 +406,11 @@ void BlueFS::_init_alloc()
void BlueFS::_stop_alloc()
{
dout(20) << __func__ << dendl;
for (auto p : bdev) {
if (p)
p->discard_drain();
}

for (auto p : alloc) {
if (p != nullptr) {
p->shutdown();
Expand Down Expand Up @@ -1572,6 +1604,16 @@ int BlueFS::_flush_and_sync_log(std::unique_lock<std::mutex>& l,
for (unsigned i = 0; i < to_release.size(); ++i) {
if (!to_release[i].empty()) {
/* OK, now we have the guarantee alloc[i] won't be null. */
int r = 0;
if (cct->_conf->bdev_enable_discard && cct->_conf->bdev_async_discard) {
r = bdev[i]->queue_discard(to_release[i]);
if (r == 0)
continue;
} else if (cct->_conf->bdev_enable_discard) {
for (auto p = to_release[i].begin(); p != to_release[i].end(); ++p) {
bdev[i]->discard(p.get_start(), p.get_len());
}
}
alloc[i]->release(to_release[i]);
}
}
Expand Down
5 changes: 5 additions & 0 deletions src/os/bluestore/BlueFS.h
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,8 @@ class BlueFS {
vector<Allocator*> alloc; ///< allocators for bdevs
vector<interval_set<uint64_t>> pending_release; ///< extents to release

BlockDevice::aio_callback_t discard_cb[3]; //discard callbacks for each dev

void _init_logger();
void _shutdown_logger();
void _update_logger_stats();
Expand Down Expand Up @@ -405,6 +407,9 @@ class BlueFS {
int reclaim_blocks(unsigned bdev, uint64_t want,
PExtentVector *extents);

// handler for discard event
void handle_discard(unsigned dev, interval_set<uint64_t>& to_release);

void flush(FileWriter *h) {
std::lock_guard<std::mutex> l(lock);
_flush(h, false);
Expand Down
36 changes: 34 additions & 2 deletions src/os/bluestore/BlueStore.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3609,6 +3609,20 @@ static void aio_cb(void *priv, void *priv2)
c->aio_finish(store);
}

static void discard_cb(void *priv, void *priv2)
{
BlueStore *store = static_cast<BlueStore*>(priv);
interval_set<uint64_t> *tmp = static_cast<interval_set<uint64_t>*>(priv2);
store->handle_discard(*tmp);
}

void BlueStore::handle_discard(interval_set<uint64_t>& to_release)
{
dout(10) << __func__ << dendl;
assert(alloc);
alloc->release(to_release);
}

BlueStore::BlueStore(CephContext *cct, const string& path)
: ObjectStore(cct, path),
throttle_bytes(cct, "bluestore_throttle_bytes",
Expand Down Expand Up @@ -4296,7 +4310,7 @@ int BlueStore::_open_bdev(bool create)
{
assert(bdev == NULL);
string p = path + "/block";
bdev = BlockDevice::create(cct, p, aio_cb, static_cast<void*>(this));
bdev = BlockDevice::create(cct, p, aio_cb, static_cast<void*>(this), discard_cb, static_cast<void*>(this));
int r = bdev->open(p);
if (r < 0)
goto fail;
Expand Down Expand Up @@ -4487,6 +4501,9 @@ int BlueStore::_open_alloc()

void BlueStore::_close_alloc()
{
assert(bdev);
bdev->discard_drain();

assert(alloc);
alloc->shutdown();
delete alloc;
Expand Down Expand Up @@ -8286,10 +8303,25 @@ void BlueStore::_txc_release_alloc(TransContext *txc)
{
// it's expected we're called with lazy_release_lock already taken!
if (likely(!cct->_conf->bluestore_debug_no_reuse_blocks)) {
dout(10) << __func__ << " " << txc << " " << std::hex
int r = 0;
if (cct->_conf->bdev_enable_discard && cct->_conf->bdev_async_discard) {
r = bdev->queue_discard(txc->released);
if (r == 0) {
dout(10) << __func__ << "(queued) " << txc << " " << std::hex
<< txc->released << std::dec << dendl;
goto out;
}
} else if (cct->_conf->bdev_enable_discard) {
for (auto p = txc->released.begin(); p != txc->released.end(); ++p) {
bdev->discard(p.get_start(), p.get_len());
}
}
dout(10) << __func__ << "(sync) " << txc << " " << std::hex
<< txc->released << std::dec << dendl;
alloc->release(txc->released);
}

out:
txc->allocated.clear();
txc->released.clear();
}
Expand Down
3 changes: 3 additions & 0 deletions src/os/bluestore/BlueStore.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,9 @@ class BlueStore : public ObjectStore,
void handle_conf_change(const struct md_config_t *conf,
const std::set<std::string> &changed) override;

//handler for discard event
void handle_discard(interval_set<uint64_t>& to_release);

void _set_csum();
void _set_compression();
void _set_throttle_params();
Expand Down