Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

os/bluestore: switch default allocator to stupid; test both bitmap and stupid in qa #16906

Merged
merged 4 commits into from
Aug 12, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
41 changes: 41 additions & 0 deletions qa/objectstore/bluestore-bitmap.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
overrides:
thrashosds:
bdev_inject_crash: 2
bdev_inject_crash_probability: .5
ceph:
fs: xfs
conf:
osd:
osd objectstore: bluestore
bluestore block size: 96636764160
debug bluestore: 30
debug bdev: 20
debug bluefs: 20
debug rocksdb: 10
bluestore fsck on mount: true
bluestore allocator: bitmap
# lower the full ratios since we can fill up a 100gb osd so quickly
mon osd full ratio: .9
mon osd backfillfull_ratio: .85
mon osd nearfull ratio: .8
osd failsafe full ratio: .95
# this doesn't work with failures bc the log writes are not atomic across the two backends
# bluestore bluefs env mirror: true
ceph-deploy:
fs: xfs
bluestore: yes
conf:
osd:
osd objectstore: bluestore
bluestore block size: 96636764160
debug bluestore: 30
debug bdev: 20
debug bluefs: 20
debug rocksdb: 10
bluestore fsck on mount: true
# lower the full ratios since we can fill up a 100gb osd so quickly
mon osd full ratio: .9
mon osd backfillfull_ratio: .85
mon osd nearfull ratio: .8
osd failsafe full ratio: .95

10 changes: 5 additions & 5 deletions src/common/options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3085,7 +3085,7 @@ std::vector<Option> get_global_options() {
.set_description(""),

Option("bluefs_allocator", Option::TYPE_STR, Option::LEVEL_DEV)
.set_default("bitmap")
.set_default("stupid")
.set_description(""),

Option("bluefs_preextend_wal_files", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
Expand Down Expand Up @@ -3396,10 +3396,10 @@ std::vector<Option> get_global_options() {
.add_tag("mkfs")
.set_description("Key value database to use for bluestore"),

Option("bluestore_allocator", Option::TYPE_STR, Option::LEVEL_DEV)
.set_default("bitmap")
.add_tag("mkfs")
.set_description(""),
Option("bluestore_allocator", Option::TYPE_STR, Option::LEVEL_ADVANCED)
.set_default("stupid")
.set_enum_allowed({"bitmap", "stupid"})
.set_description("Allocator policy"),

Option("bluestore_freelist_blocks_per_key", Option::TYPE_INT, Option::LEVEL_DEV)
.set_default(128)
Expand Down
43 changes: 22 additions & 21 deletions src/include/btree_interval_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,12 @@
#include "assert.h"
#include "encoding_btree.h"

template<typename T>
template<typename T,
typename Alloc = std::allocator<std::pair<const T, T>>>
class btree_interval_set {
public:

typedef btree::btree_map<T,T> map_t;
typedef btree::btree_map<T,T, std::less<T>, Alloc> map_t;

class const_iterator;

Expand Down Expand Up @@ -164,28 +165,28 @@ class btree_interval_set {
return m.size();
}

typename btree_interval_set<T>::iterator begin() {
return typename btree_interval_set<T>::iterator(m.begin());
typename btree_interval_set<T,Alloc>::iterator begin() {
return typename btree_interval_set<T,Alloc>::iterator(m.begin());
}

typename btree_interval_set<T>::iterator lower_bound(T start) {
return typename btree_interval_set<T>::iterator(find_inc_m(start));
typename btree_interval_set<T,Alloc>::iterator lower_bound(T start) {
return typename btree_interval_set<T,Alloc>::iterator(find_inc_m(start));
}

typename btree_interval_set<T>::iterator end() {
return typename btree_interval_set<T>::iterator(m.end());
typename btree_interval_set<T,Alloc>::iterator end() {
return typename btree_interval_set<T,Alloc>::iterator(m.end());
}

typename btree_interval_set<T>::const_iterator begin() const {
return typename btree_interval_set<T>::const_iterator(m.begin());
typename btree_interval_set<T,Alloc>::const_iterator begin() const {
return typename btree_interval_set<T,Alloc>::const_iterator(m.begin());
}

typename btree_interval_set<T>::const_iterator lower_bound(T start) const {
return typename btree_interval_set<T>::const_iterator(find_inc(start));
typename btree_interval_set<T,Alloc>::const_iterator lower_bound(T start) const {
return typename btree_interval_set<T,Alloc>::const_iterator(find_inc(start));
}

typename btree_interval_set<T>::const_iterator end() const {
return typename btree_interval_set<T>::const_iterator(m.end());
typename btree_interval_set<T,Alloc>::const_iterator end() const {
return typename btree_interval_set<T,Alloc>::const_iterator(m.end());
}

// helpers
Expand Down Expand Up @@ -555,11 +556,11 @@ class btree_interval_set {
};


template<class T>
inline std::ostream& operator<<(std::ostream& out, const btree_interval_set<T> &s) {
template<class T, class A>
inline std::ostream& operator<<(std::ostream& out, const btree_interval_set<T,A> &s) {
out << "[";
const char *prequel = "";
for (typename btree_interval_set<T>::const_iterator i = s.begin();
for (auto i = s.begin();
i != s.end();
++i)
{
Expand All @@ -570,13 +571,13 @@ inline std::ostream& operator<<(std::ostream& out, const btree_interval_set<T> &
return out;
}

template<class T>
inline void encode(const btree_interval_set<T>& s, bufferlist& bl)
template<class T,typename A>
inline void encode(const btree_interval_set<T,A>& s, bufferlist& bl)
{
s.encode(bl);
}
template<class T>
inline void decode(btree_interval_set<T>& s, bufferlist::iterator& p)
template<class T,typename A>
inline void decode(btree_interval_set<T,A>& s, bufferlist::iterator& p)
{
s.decode(p);
}
Expand Down
17 changes: 9 additions & 8 deletions src/os/bluestore/StupidAllocator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,9 @@ void StupidAllocator::unreserve(uint64_t unused)
}

/// return the effective length of the extent if we align to alloc_unit
static uint64_t aligned_len(btree_interval_set<uint64_t>::iterator p,
uint64_t alloc_unit)
uint64_t StupidAllocator::_aligned_len(
btree_interval_set<uint64_t,allocator>::iterator p,
uint64_t alloc_unit)
{
uint64_t skew = p.get_start() % alloc_unit;
if (skew)
Expand Down Expand Up @@ -106,7 +107,7 @@ int64_t StupidAllocator::allocate_int(
for (bin = orig_bin; bin < (int)free.size(); ++bin) {
p = free[bin].lower_bound(hint);
while (p != free[bin].end()) {
if (aligned_len(p, alloc_unit) >= want_size) {
if (_aligned_len(p, alloc_unit) >= want_size) {
goto found;
}
++p;
Expand All @@ -119,7 +120,7 @@ int64_t StupidAllocator::allocate_int(
p = free[bin].begin();
auto end = hint ? free[bin].lower_bound(hint) : free[bin].end();
while (p != end) {
if (aligned_len(p, alloc_unit) >= want_size) {
if (_aligned_len(p, alloc_unit) >= want_size) {
goto found;
}
++p;
Expand All @@ -131,7 +132,7 @@ int64_t StupidAllocator::allocate_int(
for (bin = orig_bin; bin >= 0; --bin) {
p = free[bin].lower_bound(hint);
while (p != free[bin].end()) {
if (aligned_len(p, alloc_unit) >= alloc_unit) {
if (_aligned_len(p, alloc_unit) >= alloc_unit) {
goto found;
}
++p;
Expand All @@ -144,7 +145,7 @@ int64_t StupidAllocator::allocate_int(
p = free[bin].begin();
auto end = hint ? free[bin].lower_bound(hint) : free[bin].end();
while (p != end) {
if (aligned_len(p, alloc_unit) >= alloc_unit) {
if (_aligned_len(p, alloc_unit) >= alloc_unit) {
goto found;
}
++p;
Expand Down Expand Up @@ -284,10 +285,10 @@ void StupidAllocator::init_rm_free(uint64_t offset, uint64_t length)
std::lock_guard<std::mutex> l(lock);
dout(10) << __func__ << " 0x" << std::hex << offset << "~" << length
<< std::dec << dendl;
btree_interval_set<uint64_t> rm;
btree_interval_set<uint64_t,allocator> rm;
rm.insert(offset, length);
for (unsigned i = 0; i < free.size() && !rm.empty(); ++i) {
btree_interval_set<uint64_t> overlap;
btree_interval_set<uint64_t,allocator> overlap;
overlap.intersection_of(rm, free[i]);
if (!overlap.empty()) {
dout(20) << __func__ << " bin " << i << " rm 0x" << std::hex << overlap
Expand Down
9 changes: 8 additions & 1 deletion src/os/bluestore/StupidAllocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "Allocator.h"
#include "include/btree_interval_set.h"
#include "os/bluestore/bluestore_types.h"
#include "include/mempool.h"

class StupidAllocator : public Allocator {
CephContext* cct;
Expand All @@ -17,13 +18,19 @@ class StupidAllocator : public Allocator {
int64_t num_free; ///< total bytes in freelist
int64_t num_reserved; ///< reserved bytes

std::vector<btree_interval_set<uint64_t> > free; ///< leading-edge copy
typedef mempool::bluestore_alloc::pool_allocator<
pair<const uint64_t,uint64_t>> allocator;
std::vector<btree_interval_set<uint64_t,allocator>> free; ///< leading-edge copy

uint64_t last_alloc;

unsigned _choose_bin(uint64_t len);
void _insert_free(uint64_t offset, uint64_t len);

uint64_t _aligned_len(
btree_interval_set<uint64_t,allocator>::iterator p,
uint64_t alloc_unit);

public:
StupidAllocator(CephContext* cct);
~StupidAllocator() override;
Expand Down