Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

luminous: os/bluestore: replace 21089 repair with something online (instead of fsck) #17734

Merged
merged 3 commits into from Oct 3, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/common/options.cc
Expand Up @@ -3002,6 +3002,10 @@ std::vector<Option> get_global_options() {
// --------------------------
// bluestore

Option("bdev_inject_bad_size", Option::TYPE_BOOL, Option::LEVEL_DEV)
.set_default(false)
.set_description(""),

Option("bdev_debug_inflight_ios", Option::TYPE_BOOL, Option::LEVEL_DEV)
.set_default(false)
.set_description(""),
Expand Down
45 changes: 44 additions & 1 deletion src/os/bluestore/BitmapFreelistManager.cc
Expand Up @@ -105,7 +105,7 @@ int BitmapFreelistManager::create(uint64_t new_size, KeyValueDB::Transaction txn
return 0;
}

int BitmapFreelistManager::init()
int BitmapFreelistManager::init(uint64_t dev_size)
{
dout(1) << __func__ << dendl;

Expand Down Expand Up @@ -153,6 +153,49 @@ int BitmapFreelistManager::init()
<< " blocks_per_key 0x" << blocks_per_key
<< std::dec << dendl;
_init_misc();

// check for http://tracker.ceph.com/issues/21089 inconsistency
{
uint64_t new_size = P2ALIGN(dev_size, bytes_per_block);
if (new_size != size) {
uint64_t bad_size = new_size & ~bytes_per_block;
if (size == bad_size) {
derr << __func__ << " size is 0x" << std::hex << size << " should be 0x"
<< new_size << " and appears to be due to #21089" << std::dec
<< dendl;

uint64_t new_blocks = new_size / bytes_per_block;
if (new_blocks / blocks_per_key * blocks_per_key != new_blocks) {
new_blocks = (new_blocks / blocks_per_key + 1) *
blocks_per_key;
}

KeyValueDB::Transaction t = kvdb->get_transaction();
{
bufferlist sizebl;
::encode(new_size, sizebl);
t->set(meta_prefix, "size", sizebl);
}
if (new_blocks != blocks) {
derr << "blocks is 0x" << std::hex << blocks << " should be 0x"
<< new_blocks << std::dec << dendl;
bufferlist bl;
::encode(new_blocks, bl);
t->set(meta_prefix, "blocks", bl);
_xor(new_size, new_blocks * bytes_per_block - new_size, t);
} else {
derr << "blocks are ok" << dendl;
_xor(bad_size, bytes_per_block, t);
}
int r = kvdb->submit_transaction_sync(t);
assert(r == 0);
size = new_size;
blocks = new_blocks;
derr << __func__ << " fixed inconsistency, size now 0x" << std::hex
<< size << " blocks 0x" << blocks << std::dec << dendl;
}
}
}
return 0;
}

Expand Down
2 changes: 1 addition & 1 deletion src/os/bluestore/BitmapFreelistManager.h
Expand Up @@ -53,7 +53,7 @@ class BitmapFreelistManager : public FreelistManager {

int create(uint64_t size, KeyValueDB::Transaction txn) override;

int init() override;
int init(uint64_t dev_size) override;
void shutdown() override;

void dump() override;
Expand Down
67 changes: 1 addition & 66 deletions src/os/bluestore/BlueStore.cc
Expand Up @@ -4226,7 +4226,7 @@ int BlueStore::_open_fm(bool create)
db->submit_transaction_sync(t);
}

int r = fm->init();
int r = fm->init(bdev->get_size());
if (r < 0) {
derr << __func__ << " freelist init failed: " << cpp_strerror(r) << dendl;
delete fm;
Expand Down Expand Up @@ -6001,71 +6001,6 @@ int BlueStore::fsck(bool deep)
}
fm->enumerate_reset();
size_t count = used_blocks.count();
if (used_blocks.size() == count + 1) {
// this due to http://tracker.ceph.com/issues/21089
bufferlist fm_bpb_bl, fm_blocks_bl, fm_bpk_bl;
db->get(PREFIX_ALLOC, "bytes_per_block", &fm_bpb_bl);
db->get(PREFIX_ALLOC, "blocks", &fm_blocks_bl);
db->get(PREFIX_ALLOC, "blocks_per_key", &fm_bpk_bl);
uint64_t fm_blocks = 0;
uint64_t fm_bsize = 1;
uint64_t fm_blocks_per_key = 1;
try {
auto p = fm_blocks_bl.begin();
::decode(fm_blocks, p);
auto q = fm_bpb_bl.begin();
::decode(fm_bsize, q);
auto r = fm_bpk_bl.begin();
::decode(fm_blocks_per_key, r);
} catch (buffer::error& e) {
}
uint64_t dev_bsize = bdev->get_block_size();
uint64_t bad_size = bdev->get_size() & ~fm_bsize;
if (used_blocks.test(bad_size / dev_bsize) == 0) {
// this is the last block of the device that we previously
// (incorrectly) truncated off of the effective device size. this
// prevented BitmapFreelistManager from marking it as used along with
// the other "past-eof" blocks in the last key slot. mark it used
// now.
derr << __func__ << " warning: fixing leaked block 0x" << std::hex
<< bad_size << "~" << fm_bsize << std::dec << " due to old bug"
<< dendl;
KeyValueDB::Transaction t = db->get_transaction();
// fix freelistmanager metadata (the internal 'blocks' count is
// rounded up to include the trailing key, past eof)
uint64_t new_blocks = bdev->get_size() / fm_bsize;
if (new_blocks / fm_blocks_per_key * fm_blocks_per_key != new_blocks) {
new_blocks = (new_blocks / fm_blocks_per_key + 1) *
fm_blocks_per_key;
}
if (new_blocks != fm_blocks) {
// the fm block count increased
derr << __func__ << " freelist block and key count changed, fixing 0x"
<< std::hex << bdev->get_size() << "~"
<< ((new_blocks * fm_bsize) - bdev->get_size()) << std::dec
<< dendl;
bufferlist bl;
::encode(new_blocks, bl);
t->set(PREFIX_ALLOC, "blocks", bl);
fm->allocate(bdev->get_size(),
(new_blocks * fm_bsize) - bdev->get_size(),
t);
} else {
// block count is the same, but size changed; fix just the size
derr << __func__ << " fixing just the stray block at 0x"
<< std::hex << bad_size << "~" << fm_bsize << std::dec << dendl;
fm->allocate(bad_size, fm_bsize, t);
}
bufferlist sizebl;
::encode(bdev->get_size(), sizebl);
t->set(PREFIX_ALLOC, "size", sizebl);
int r = db->submit_transaction_sync(t);
assert(r == 0);

used_blocks.set(bad_size / dev_bsize);
++count;
}
}
if (used_blocks.size() != count) {
assert(used_blocks.size() > count);
++errors;
Expand Down
2 changes: 1 addition & 1 deletion src/os/bluestore/FreelistManager.h
Expand Up @@ -26,7 +26,7 @@ class FreelistManager {

virtual int create(uint64_t size, KeyValueDB::Transaction txn) = 0;

virtual int init() = 0;
virtual int init(uint64_t dev_size) = 0;
virtual void shutdown() = 0;

virtual void dump() = 0;
Expand Down
5 changes: 5 additions & 0 deletions src/os/bluestore/KernelDevice.cc
Expand Up @@ -130,6 +130,11 @@ int KernelDevice::open(const string& p)
} else {
size = st.st_size;
}
if (cct->_conf->get_val<bool>("bdev_inject_bad_size")) {
derr << "injecting bad size; actual 0x" << std::hex << size
<< " but using 0x" << (size & ~block_size) << std::dec << dendl;
size &= ~(block_size);
}

{
char partition[PATH_MAX], devname[PATH_MAX];
Expand Down