Skip to content

Commit

Permalink
os/bluestore: differ default cache size for hdd/ssd backends
Browse files Browse the repository at this point in the history
This is a follow-up change of ceph#15976
and makes the bluestore cache capacity being self-adaptive for
different backends.

Signed-off-by: xie xingguo <xie.xingguo@zte.com.cn>
  • Loading branch information
xiexingguo authored and dingdangzhang committed Jul 12, 2017
1 parent 73d7b63 commit 3e1adaf
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 15 deletions.
6 changes: 4 additions & 2 deletions src/common/config_opts.h
Expand Up @@ -1164,10 +1164,12 @@ OPTION(bluestore_cache_trim_max_skip_pinned, OPT_U32, 64) // skip this many onod
OPTION(bluestore_cache_type, OPT_STR, "2q") // lru, 2q
OPTION(bluestore_2q_cache_kin_ratio, OPT_DOUBLE, .5) // kin page slot size / max page slot size
OPTION(bluestore_2q_cache_kout_ratio, OPT_DOUBLE, .5) // number of kout page slot / total number of page slot
OPTION(bluestore_cache_size, OPT_U64, 3*1024*1024*1024)
OPTION(bluestore_cache_size, OPT_U64, 0)
OPTION(bluestore_cache_size_hdd, OPT_U64, 1*1024*1024*1024)
OPTION(bluestore_cache_size_ssd, OPT_U64, 3*1024*1024*1024)
OPTION(bluestore_cache_meta_ratio, OPT_DOUBLE, .01)
OPTION(bluestore_cache_kv_ratio, OPT_DOUBLE, .99)
OPTION(bluestore_cache_kv_max, OPT_U64, 512*1024*1024) // limit the maximum amont of cache for the kv store
OPTION(bluestore_cache_kv_max, OPT_U64, 512*1024*1024) // limit the maximum amount of cache for the kv store
OPTION(bluestore_kvbackend, OPT_STR, "rocksdb")
OPTION(bluestore_allocator, OPT_STR, "bitmap") // stupid | bitmap
OPTION(bluestore_freelist_blocks_per_key, OPT_INT, 128)
Expand Down
30 changes: 20 additions & 10 deletions src/os/bluestore/BlueStore.cc
Expand Up @@ -3329,7 +3329,7 @@ void *BlueStore::MempoolThread::entry()
size_t num_shards = store->cache_shards.size();
float target_ratio = store->cache_meta_ratio + store->cache_data_ratio;
// A little sloppy but should be close enough
uint64_t shard_target = target_ratio * (store->cct->_conf->bluestore_cache_size / num_shards);
uint64_t shard_target = target_ratio * (store->cache_size / num_shards);

for (auto i : store->cache_shards) {
i->trim(shard_target,
Expand Down Expand Up @@ -3702,10 +3702,20 @@ void BlueStore::_set_blob_size()

int BlueStore::_set_cache_sizes()
{
assert(bdev);
if (cct->_conf->bluestore_cache_size) {
cache_size = cct->_conf->bluestore_cache_size;
} else {
// choose global cache size based on backend type
if (bdev->is_rotational()) {
cache_size = cct->_conf->bluestore_cache_size_hdd;
} else {
cache_size = cct->_conf->bluestore_cache_size_ssd;
}
}
cache_meta_ratio = cct->_conf->bluestore_cache_meta_ratio;
cache_kv_ratio = cct->_conf->bluestore_cache_kv_ratio;

double cache_size = cct->_conf->bluestore_cache_size;
double cache_kv_max = cct->_conf->bluestore_cache_kv_max;
double cache_kv_max_ratio = 0;

Expand Down Expand Up @@ -3745,7 +3755,8 @@ int BlueStore::_set_cache_sizes()
// deal with floating point imprecision
cache_data_ratio = 0;
}
dout(1) << __func__ << " meta " << cache_meta_ratio
dout(1) << __func__ << " cache_size " << cache_size
<< " meta " << cache_meta_ratio
<< " kv " << cache_kv_ratio
<< " data " << cache_data_ratio
<< dendl;
Expand Down Expand Up @@ -3927,12 +3938,6 @@ int BlueStore::get_block_device_fsid(CephContext* cct, const string& path,

int BlueStore::_open_path()
{
// initial sanity check
int r = _set_cache_sizes();
if (r < 0) {
return r;
}

assert(path_fd < 0);
path_fd = ::open(path.c_str(), O_DIRECTORY);
if (path_fd < 0) {
Expand Down Expand Up @@ -4106,6 +4111,11 @@ int BlueStore::_open_bdev(bool create)
block_mask = ~(block_size - 1);
block_size_order = ctz(block_size);
assert(block_size == 1u << block_size_order);
// and set cache_size based on device type
r = _set_cache_sizes();
if (r < 0) {
goto fail_close;
}
return 0;

fail_close:
Expand Down Expand Up @@ -4653,7 +4663,7 @@ int BlueStore::_open_db(bool create)
FreelistManager::setup_merge_operators(db);
db->set_merge_operator(PREFIX_STAT, merge_op);

db->set_cache_size(cct->_conf->bluestore_cache_size * cache_kv_ratio);
db->set_cache_size(cache_size * cache_kv_ratio);

if (kv_backend == "rocksdb")
options = cct->_conf->bluestore_rocksdb_options;
Expand Down
1 change: 1 addition & 0 deletions src/os/bluestore/BlueStore.h
Expand Up @@ -1917,6 +1917,7 @@ class BlueStore : public ObjectStore,
uint64_t kv_throttle_costs = 0;

// cache trim control
uint64_t cache_size = 0; ///< total cache size
float cache_meta_ratio = 0; ///< cache ratio dedicated to metadata
float cache_kv_ratio = 0; ///< cache ratio dedicated to kv (e.g., rocksdb)
float cache_data_ratio = 0; ///< cache ratio dedicated to object data
Expand Down
9 changes: 6 additions & 3 deletions src/test/objectstore/store_test.cc
Expand Up @@ -5765,7 +5765,8 @@ TEST_P(StoreTestSpecificAUSize, OnodeSizeTracking) {
StartDeferred(block_size);
g_conf->set_val("bluestore_compression_mode", "none");
g_conf->set_val("bluestore_csum_type", "none");
g_conf->set_val("bluestore_cache_size", "400000000");
g_conf->set_val("bluestore_cache_size_hdd", "400000000");
g_conf->set_val("bluestore_cache_size_ssd", "400000000");
g_conf->apply_changes(NULL);

ObjectStore::Sequencer osr("test");
Expand Down Expand Up @@ -5853,7 +5854,8 @@ TEST_P(StoreTestSpecificAUSize, OnodeSizeTracking) {
r = apply_transaction(store, &osr, std::move(t));
ASSERT_EQ(r, 0);
}
g_ceph_context->_conf->set_val("bluestore_cache_size", "4000000");
g_ceph_context->_conf->set_val("bluestore_cache_size_hdd", "4000000");
g_ceph_context->_conf->set_val("bluestore_cache_size_ssd", "4000000");
g_conf->set_val("bluestore_compression_mode", "none");
g_conf->set_val("bluestore_csum_type", "crc32c");

Expand Down Expand Up @@ -6656,7 +6658,8 @@ int main(int argc, char **argv) {
g_ceph_context->_conf->set_val("bluestore_max_alloc_size", "196608");

// set small cache sizes so we see trimming during Synthetic tests
g_ceph_context->_conf->set_val("bluestore_cache_size", "4000000");
g_ceph_context->_conf->set_val("bluestore_cache_size_hdd", "4000000");
g_ceph_context->_conf->set_val("bluestore_cache_size_ssd", "4000000");

// very short *_max prealloc so that we fall back to async submits
g_ceph_context->_conf->set_val("bluestore_blobid_prealloc", "10");
Expand Down

0 comments on commit 3e1adaf

Please sign in to comment.