Skip to content

Commit

Permalink
os/bluestore: Guard Zoned* headers with HAVE_LIBZBD.
Browse files Browse the repository at this point in the history
Signed-off-by: Abutalib Aghayev <agayev@psu.edu>
  • Loading branch information
agayev committed Apr 15, 2021
1 parent a958d72 commit 939d379
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 10 deletions.
50 changes: 42 additions & 8 deletions src/os/bluestore/BlueStore.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@
#include "common/PriorityCache.h"
#include "Allocator.h"
#include "FreelistManager.h"
#include "ZonedAllocator.h"
#include "ZonedFreelistManager.h"
#include "BlueFS.h"
#include "BlueRocksEnv.h"
#include "auth/Crypto.h"
Expand All @@ -48,6 +46,11 @@
#include "common/pretty_binary.h"
#include "kv/KeyValueHistogram.h"

#ifdef HAVE_LIBZBD
#include "ZonedAllocator.h"
#include "ZonedFreelistManager.h"
#endif

#if defined(WITH_LTTNG)
#define TRACEPOINT_DEFINE
#define TRACEPOINT_PROBE_DYNAMIC_LINKAGE
Expand Down Expand Up @@ -120,9 +123,12 @@ const string PREFIX_DEFERRED = "L"; // id -> deferred_transaction_t
const string PREFIX_ALLOC = "B"; // u64 offset -> u64 length (freelist)
const string PREFIX_ALLOC_BITMAP = "b";// (see BitmapFreelistManager)
const string PREFIX_SHARED_BLOB = "X"; // u64 offset -> shared_blob_t

#ifdef HAVE_LIBZBD
const string PREFIX_ZONED_FM_META = "Z"; // (see ZonedFreelistManager)
const string PREFIX_ZONED_FM_INFO = "z"; // (see ZonedFreelistManager)
const string PREFIX_ZONED_CL_INFO = "G"; // (per-zone cleaner metadata)
#endif

const string BLUESTORE_GLOBAL_STATFS_KEY = "bluestore_statfs";

Expand Down Expand Up @@ -4493,7 +4499,9 @@ BlueStore::BlueStore(CephContext *cct,
finisher(cct, "commit_finisher", "cfin"),
kv_sync_thread(this),
kv_finalize_thread(this),
#ifdef HAVE_LIBZBD
zoned_cleaner_thread(this),
#endif
min_alloc_size(_min_alloc_size),
min_alloc_size_order(ctz(_min_alloc_size)),
mempool_thread(this)
Expand Down Expand Up @@ -5249,9 +5257,11 @@ int BlueStore::_open_bdev(bool create)
goto fail_close;
}

#ifdef HAVE_LIBZBD
if (bdev->is_smr()) {
freelist_type = "zoned";
}
#endif
return 0;

fail_close:
Expand Down Expand Up @@ -5297,10 +5307,11 @@ int BlueStore::_open_fm(KeyValueDB::Transaction t, bool read_only)
ceph_assert( cct->_conf->bdev_block_size <= (int64_t)min_alloc_size);

uint64_t alloc_size = min_alloc_size;
#ifdef HAVE_LIBZBD
if (bdev->is_smr()) {
alloc_size = _zoned_piggyback_device_parameters_onto(alloc_size);
}

#endif
fm->create(bdev->get_size(), alloc_size, t);

// allocate superblock reserved space. note that we do not mark
Expand Down Expand Up @@ -5410,13 +5421,16 @@ int BlueStore::_create_alloc()
ceph_assert(bdev->get_size());

uint64_t alloc_size = min_alloc_size;

#ifdef HAVE_LIBZBD
if (bdev->is_smr()) {
int r = _zoned_check_config_settings();
if (r < 0)
return r;
alloc_size = _zoned_piggyback_device_parameters_onto(alloc_size);
}

#endif

shared_alloc.set(Allocator::create(cct, cct->_conf->bluestore_allocator,
bdev->get_size(),
alloc_size, "block"));
Expand All @@ -5438,6 +5452,7 @@ int BlueStore::_init_alloc()
}
ceph_assert(shared_alloc.a != NULL);

#ifdef HAVE_LIBZBD
if (bdev->is_smr()) {
auto a = dynamic_cast<ZonedAllocator*>(shared_alloc.a);
ceph_assert(a);
Expand All @@ -5447,7 +5462,8 @@ int BlueStore::_init_alloc()
&zoned_cleaner_lock,
&zoned_cleaner_cond);
}

#endif

uint64_t num = 0, bytes = 0;

dout(1) << __func__ << " opening allocation metadata" << dendl;
Expand Down Expand Up @@ -7019,10 +7035,12 @@ int BlueStore::_mount()

_kv_start();

#ifdef HAVE_LIBZBD
if (bdev->is_smr()) {
_zoned_cleaner_start();
}

#endif

r = _deferred_replay();
if (r < 0)
goto out_stop;
Expand Down Expand Up @@ -7052,9 +7070,11 @@ int BlueStore::_mount()
return 0;

out_stop:
#ifdef HAVE_LIBZBD
if (bdev->is_smr()) {
_zoned_cleaner_stop();
}
#endif
_kv_stop();
out_coll:
_shutdown_cache();
Expand All @@ -7073,10 +7093,12 @@ int BlueStore::umount()
mounted = false;
if (!_kv_only) {
mempool_thread.shutdown();
#ifdef HAVE_LIBZBD
if (bdev->is_smr()) {
dout(20) << __func__ << " stopping zone cleaner thread" << dendl;
_zoned_cleaner_stop();
}
#endif
dout(20) << __func__ << " stopping kv thread" << dendl;
_kv_stop();
_shutdown_cache();
Expand Down Expand Up @@ -11469,6 +11491,7 @@ void BlueStore::BSPerfTracker::update_from_perfcounters(
l_bluestore_commit_lat));
}

#ifdef HAVE_LIBZBD
// For every object we maintain <zone_num+oid, offset> tuple in the key-value
// store. When a new object written to a zone, we insert the corresponding
// tuple to the database. When an object is truncated, we remove the
Expand Down Expand Up @@ -11539,6 +11562,7 @@ int BlueStore::_zoned_check_config_settings() {
}
return 0;
}
#endif

void BlueStore::_txc_finalize_kv(TransContext *txc, KeyValueDB::Transaction t)
{
Expand Down Expand Up @@ -11585,9 +11609,11 @@ void BlueStore::_txc_finalize_kv(TransContext *txc, KeyValueDB::Transaction t)
fm->release(p.get_start(), p.get_len(), t);
}

#ifdef HAVE_LIBZBD
if (bdev->is_smr()) {
_zoned_update_cleaning_metadata(txc);
}
#endif

_txc_update_store_statfs(txc);
}
Expand Down Expand Up @@ -12311,6 +12337,7 @@ void BlueStore::_kv_finalize_thread()
kv_finalize_started = false;
}

#ifdef HAVE_LIBZBD
void BlueStore::_zoned_cleaner_start() {
dout(10) << __func__ << dendl;

Expand Down Expand Up @@ -12373,6 +12400,7 @@ void BlueStore::_zoned_clean_zone(uint64_t zone_num) {
// TODO: (1) copy live objects from zone_num to a new zone, (2) issue a RESET
// ZONE operation to the device for the corresponding zone.
}
#endif

bluestore_deferred_op_t *BlueStore::_get_deferred_op(
TransContext *txc)
Expand Down Expand Up @@ -13248,6 +13276,7 @@ void BlueStore::_do_write_small(
// than 'offset' only).
o->extent_map.fault_range(db, min_off, offset + max_bsize - min_off);

#ifdef HAVE_LIBZBD
// On zoned devices, the first goal is to support non-overwrite workloads,
// such as RGW, with large, aligned objects. Therefore, for user writes
// _do_write_small should not trigger. OSDs, however, write and update a tiny
Expand All @@ -13263,6 +13292,7 @@ void BlueStore::_do_write_small(
wctx->write(offset, b, alloc_len, b_off0, bl, b_off, length, false, true);
return;
}
#endif

// Look for an existing mutable blob we can use.
auto begin = o->extent_map.extent_map.begin();
Expand Down Expand Up @@ -14534,6 +14564,7 @@ int BlueStore::_do_write(
min_alloc_size);
}

#ifdef HAVE_LIBZBD
if (bdev->is_smr()) {
if (wctx.old_extents.empty()) {
txc->zoned_note_new_object(o);
Expand All @@ -14542,7 +14573,8 @@ int BlueStore::_do_write(
txc->zoned_note_updated_object(o, old_ondisk_offset);
}
}

#endif

// NB: _wctx_finish() will empty old_extents
// so we must do gc estimation before that
_wctx_finish(txc, c, o, &wctx);
Expand Down Expand Up @@ -14676,14 +14708,16 @@ void BlueStore::_do_truncate(
o->extent_map.punch_hole(c, offset, length, &wctx.old_extents);
o->extent_map.dirty_range(offset, length);

#ifdef HAVE_LIBZBD
if (bdev->is_smr()) {
// On zoned devices, we currently support only removing an object or
// truncating it to zero size, both of which fall through this code path.
ceph_assert(offset == 0 && !wctx.old_extents.empty());
int64_t ondisk_offset = wctx.old_extents.begin()->r.begin()->offset;
txc->zoned_note_truncated_object(o, ondisk_offset);
}

#endif

_wctx_finish(txc, c, o, &wctx, maybe_unshared_blobs);

// if we have shards past EOF, ask for a reshard
Expand Down
20 changes: 18 additions & 2 deletions src/os/bluestore/BlueStore.h
Original file line number Diff line number Diff line change
Expand Up @@ -1153,6 +1153,7 @@ class BlueStore : public ObjectStore,
void get_omap_tail(std::string *out);
void decode_omap_key(const std::string& key, std::string *user_key);

#ifdef HAVE_LIBZBD
// Return the offset of an object on disk. This function is intended *only*
// for use with zoned storage devices because in these devices, the objects
// are laid out contiguously on disk, which is not the case in general.
Expand All @@ -1162,6 +1163,8 @@ class BlueStore : public ObjectStore,
return extent_map.extent_map.begin()->blob->
get_blob().calc_offset(0, nullptr);
}
#endif

};
typedef boost::intrusive_ptr<Onode> OnodeRef;

Expand Down Expand Up @@ -1579,6 +1582,7 @@ class BlueStore : public ObjectStore,
std::set<OnodeRef> onodes; ///< these need to be updated/written
std::set<OnodeRef> modified_objects; ///< objects we modified (and need a ref)

#ifdef HAVE_LIBZBD
// A map from onode to a vector of object offset. For new objects created
// in the transaction we append the new offset to the vector, for
// overwritten objects we append the negative of the previous ondisk offset
Expand All @@ -1589,7 +1593,8 @@ class BlueStore : public ObjectStore,
// different zones. See update_cleaning_metadata function for how this map
// is used.
std::map<OnodeRef, std::vector<int64_t>> zoned_onode_to_offset_map;

#endif

std::set<SharedBlobRef> shared_blobs; ///< these need to be updated/written
std::set<SharedBlobRef> shared_blobs_written; ///< update these on io completion

Expand Down Expand Up @@ -1662,6 +1667,7 @@ class BlueStore : public ObjectStore,
onodes.erase(o);
}

#ifdef HAVE_LIBZBD
void zoned_note_new_object(OnodeRef &o) {
auto [_, ok] = zoned_onode_to_offset_map.emplace(
std::pair<OnodeRef, std::vector<int64_t>>(o, {o->zoned_get_ondisk_starting_offset()}));
Expand All @@ -1685,6 +1691,7 @@ class BlueStore : public ObjectStore,
it->second.push_back(-offset);
}
}
#endif

void aio_finish(BlueStore *store) override {
store->txc_aio_finish(this);
Expand Down Expand Up @@ -1988,6 +1995,8 @@ class BlueStore : public ObjectStore,
return NULL;
}
};

#ifdef HAVE_LIBZBD
struct ZonedCleanerThread : public Thread {
BlueStore *store;
explicit ZonedCleanerThread(BlueStore *s) : store(s) {}
Expand All @@ -1996,7 +2005,8 @@ class BlueStore : public ObjectStore,
return nullptr;
}
};

#endif

struct BigDeferredWriteContext {
uint64_t off = 0; // original logical offset
uint32_t b_off = 0; // blob relative offset
Expand Down Expand Up @@ -2087,12 +2097,14 @@ class BlueStore : public ObjectStore,
std::deque<DeferredBatch*> deferred_stable_to_finalize; ///< pending finalization
bool kv_finalize_in_progress = false;

#ifdef HAVE_LIBZBD
ZonedCleanerThread zoned_cleaner_thread;
ceph::mutex zoned_cleaner_lock = ceph::make_mutex("BlueStore::zoned_cleaner_lock");
ceph::condition_variable zoned_cleaner_cond;
bool zoned_cleaner_started = false;
bool zoned_cleaner_stop = false;
std::deque<uint64_t> zoned_cleaner_queue;
#endif

PerfCounters *logger = nullptr;

Expand Down Expand Up @@ -2383,11 +2395,13 @@ class BlueStore : public ObjectStore,
int _setup_block_symlink_or_file(std::string name, std::string path, uint64_t size,
bool create);

#ifdef HAVE_LIBZBD
// Functions related to zoned storage.
uint64_t _zoned_piggyback_device_parameters_onto(uint64_t min_alloc_size);
int _zoned_check_config_settings();
void _zoned_update_cleaning_metadata(TransContext *txc);
std::string _zoned_get_prefix(uint64_t offset);
#endif

public:
utime_t get_deferred_last_submitted() {
Expand Down Expand Up @@ -2458,10 +2472,12 @@ class BlueStore : public ObjectStore,
void _kv_sync_thread();
void _kv_finalize_thread();

#ifdef HAVE_LIBZBD
void _zoned_cleaner_start();
void _zoned_cleaner_stop();
void _zoned_cleaner_thread();
void _zoned_clean_zone(uint64_t zone_num);
#endif

bluestore_deferred_op_t *_get_deferred_op(TransContext *txc);
void _deferred_queue(TransContext *txc);
Expand Down

0 comments on commit 939d379

Please sign in to comment.