diff --git a/src/os/ObjectMap.h b/src/os/ObjectMap.h index c4efc7fbc202a..95ca9a5b0553c 100644 --- a/src/os/ObjectMap.h +++ b/src/os/ObjectMap.h @@ -124,20 +124,34 @@ class ObjectMap { ) = 0; - /// Clone keys efficiently from oid map to target map + /// Clone keys from oid map to target map virtual int clone( const ghobject_t &oid, ///< [in] object containing map const ghobject_t &target, ///< [in] target of clone const SequencerPosition *spos=0 ///< [in] sequencer position ) { return 0; } + /// Rename map because of name change + virtual int rename( + const ghobject_t &from, ///< [in] object containing map + const ghobject_t &to, ///< [in] new name + const SequencerPosition *spos=0 ///< [in] sequencer position + ) { return 0; } + + /// For testing clone keys from oid map to target map using faster but more complex method + virtual int legacy_clone( + const ghobject_t &oid, ///< [in] object containing map + const ghobject_t &target, ///< [in] target of clone + const SequencerPosition *spos=0 ///< [in] sequencer position + ) { return 0; } + /// Ensure all previous writes are durable virtual int sync( const ghobject_t *oid=0, ///< [in] object const SequencerPosition *spos=0 ///< [in] Sequencer ) { return 0; } - virtual bool check(std::ostream &out) { return true; } + virtual int check(std::ostream &out, bool repair = false) { return 0; } typedef KeyValueDB::GenericIteratorImpl ObjectMapIteratorImpl; typedef ceph::shared_ptr ObjectMapIterator; diff --git a/src/os/filestore/DBObjectMap.cc b/src/os/filestore/DBObjectMap.cc index a4685b8c33671..0cc7773fe8176 100644 --- a/src/os/filestore/DBObjectMap.cc +++ b/src/os/filestore/DBObjectMap.cc @@ -54,22 +54,52 @@ static void append_escaped(const string &in, string *out) } } -bool DBObjectMap::check(std::ostream &out) +int DBObjectMap::check(std::ostream &out, bool repair) { - bool retval = true; + int errors = 0; + bool repaired = false; map parent_to_num_children; map parent_to_actual_num_children; KeyValueDB::Iterator iter = db->get_iterator(HOBJECT_TO_SEQ); for (iter->seek_to_first(); iter->valid(); iter->next()) { _Header header; - assert(header.num_children == 1); - header.num_children = 0; // Hack for leaf node bufferlist bl = iter->value(); while (true) { bufferlist::iterator bliter = bl.begin(); header.decode(bliter); if (header.seq != 0) parent_to_actual_num_children[header.seq] = header.num_children; + + // Check complete table + bool complete_error = false; + boost::optional prev; + KeyValueDB::Iterator complete_iter = db->get_iterator(USER_PREFIX + header_key(header.seq) + COMPLETE_PREFIX); + for (complete_iter->seek_to_first(); complete_iter->valid(); + complete_iter->next()) { + if (prev && prev >= complete_iter->key()) { + out << "Bad complete for " << header.oid << std::endl; + complete_error = true; + break; + } + prev = string(complete_iter->value().c_str(), complete_iter->value().length() - 1); + } + if (complete_error) { + out << "Complete mapping for " << header.seq << " :" << std::endl; + for (complete_iter->seek_to_first(); complete_iter->valid(); + complete_iter->next()) { + out << complete_iter->key() << " -> " << string(complete_iter->value().c_str(), complete_iter->value().length() - 1) << std::endl; + } + if (repair) { + repaired = true; + KeyValueDB::Transaction t = db->get_transaction(); + t->rmkeys_by_prefix(USER_PREFIX + header_key(header.seq) + COMPLETE_PREFIX); + db->submit_transaction(t); + out << "Cleared complete mapping to repair" << std::endl; + } else { + errors++; // Only count when not repaired + } + } + if (header.parent == 0) break; @@ -85,7 +115,7 @@ bool DBObjectMap::check(std::ostream &out) db->get(sys_parent_prefix(header), to_get, &got); if (got.empty()) { out << "Missing: seq " << header.parent << std::endl; - retval = false; + errors++; break; } else { bl = got.begin()->second; @@ -102,11 +132,13 @@ bool DBObjectMap::check(std::ostream &out) out << "Invalid: seq " << i->first << " recorded children: " << parent_to_actual_num_children[i->first] << " found: " << i->second << std::endl; - retval = false; + errors++; } parent_to_actual_num_children.erase(i->first); } - return retval; + if (errors == 0 && repaired) + return -1; + return errors; } string DBObjectMap::ghobject_key(const ghobject_t &oid) @@ -314,6 +346,17 @@ int DBObjectMap::DBObjectMapIteratorImpl::lower_bound(const string &to) return adjust(); } +int DBObjectMap::DBObjectMapIteratorImpl::lower_bound_parent(const string &to) +{ + int r = lower_bound(to); + if (r < 0) + return r; + if (valid() && !on_parent()) + return next_parent(); + else + return r; +} + int DBObjectMap::DBObjectMapIteratorImpl::upper_bound(const string &after) { init(); @@ -354,39 +397,57 @@ int DBObjectMap::DBObjectMapIteratorImpl::next(bool validate) int DBObjectMap::DBObjectMapIteratorImpl::next_parent() { - if (!parent_iter || !parent_iter->valid()) { - invalid = true; - return 0; - } r = next(); if (r < 0) return r; - if (!valid() || on_parent() || !parent_iter->valid()) - return 0; + while (parent_iter && parent_iter->valid() && !on_parent()) { + assert(valid()); + r = lower_bound(parent_iter->key()); + if (r < 0) + return r; + } - return lower_bound(parent_iter->key()); + if (!parent_iter || !parent_iter->valid()) { + invalid = true; + } + return 0; } int DBObjectMap::DBObjectMapIteratorImpl::in_complete_region(const string &to_test, string *begin, string *end) { + /* This is clumsy because one cannot call prev() on end(), nor can one + * test for == begin(). + */ complete_iter->upper_bound(to_test); - if (complete_iter->valid()) + if (complete_iter->valid()) { complete_iter->prev(); - else + if (!complete_iter->valid()) { + complete_iter->upper_bound(to_test); + return false; + } + } else { complete_iter->seek_to_last(); + if (!complete_iter->valid()) + return false; + } - if (!complete_iter->valid()) + assert(complete_iter->key() <= to_test); + assert(complete_iter->value().length() >= 1); + string _end(complete_iter->value().c_str(), + complete_iter->value().length() - 1); + if (_end.empty() || _end > to_test) { + if (begin) + *begin = complete_iter->key(); + if (end) + *end = _end; + return true; + } else { + complete_iter->next(); + assert(!complete_iter->valid() || complete_iter->key() > to_test); return false; - - string _end; - if (begin) - *begin = complete_iter->key(); - _end = string(complete_iter->value().c_str()); - if (end) - *end = _end; - return (to_test >= complete_iter->key()) && (!_end.size() || _end > to_test); + } } /** @@ -554,58 +615,6 @@ int DBObjectMap::_clear(Header header, return 0; } -int DBObjectMap::merge_new_complete(Header header, - const map &new_complete, - DBObjectMapIterator iter, - KeyValueDB::Transaction t) -{ - KeyValueDB::Iterator complete_iter = db->get_iterator( - complete_prefix(header) - ); - map::const_iterator i = new_complete.begin(); - set to_remove; - map to_add; - - string begin, end; - while (i != new_complete.end()) { - string new_begin = i->first; - string new_end = i->second; - int r = iter->in_complete_region(new_begin, &begin, &end); - if (r < 0) - return r; - if (r) { - to_remove.insert(begin); - new_begin = begin; - } - ++i; - while (i != new_complete.end()) { - if (!new_end.size() || i->first <= new_end) { - if (!new_end.size() && i->second > new_end) { - new_end = i->second; - } - ++i; - continue; - } - - r = iter->in_complete_region(new_end, &begin, &end); - if (r < 0) - return r; - if (r) { - to_remove.insert(begin); - new_end = end; - continue; - } - break; - } - bufferlist bl; - bl.append(bufferptr(new_end.c_str(), new_end.size() + 1)); - to_add.insert(make_pair(new_begin, bl)); - } - t->rmkeys(complete_prefix(header), to_remove); - t->set(complete_prefix(header), to_add); - return 0; -} - int DBObjectMap::copy_up_header(Header header, KeyValueDB::Transaction t) { @@ -618,22 +627,6 @@ int DBObjectMap::copy_up_header(Header header, return 0; } -int DBObjectMap::need_parent(DBObjectMapIterator iter) -{ - int r = iter->seek_to_first(); - if (r < 0) - return r; - - if (!iter->valid()) - return 0; - - string begin, end; - if (iter->in_complete_region(iter->key(), &begin, &end) && end == "") { - return 0; - } - return 1; -} - int DBObjectMap::rm_keys(const ghobject_t &oid, const set &to_clear, const SequencerPosition *spos) @@ -650,62 +643,33 @@ int DBObjectMap::rm_keys(const ghobject_t &oid, return db->submit_transaction(t); } - // Copy up keys from parent around to_clear - int keep_parent; + assert(state.v < 3); + { - DBObjectMapIterator iter = _get_iterator(header); - iter->seek_to_first(); - map new_complete; + // We only get here for legacy (v2) stores + // Copy up all keys from parent excluding to_clear + // and remove parent + // This eliminates a v2 format use of complete for this oid only map to_write; - for(set::const_iterator i = to_clear.begin(); - i != to_clear.end(); - ) { - unsigned copied = 0; - iter->lower_bound(*i); - ++i; - if (!iter->valid()) - break; - string begin = iter->key(); - if (!iter->on_parent()) - iter->next_parent(); - if (new_complete.size() && new_complete.rbegin()->second == begin) { - begin = new_complete.rbegin()->first; - } - while (iter->valid() && copied < 20) { - if (!to_clear.count(iter->key())) - to_write[iter->key()].append(iter->value()); - if (i != to_clear.end() && *i <= iter->key()) { - ++i; - copied = 0; - } - - iter->next_parent(); - copied++; - } - if (iter->valid()) { - new_complete[begin] = iter->key(); - } else { - new_complete[begin] = ""; - break; - } + ObjectMapIterator iter = _get_iterator(header); + for (iter->seek_to_first() ; iter->valid() ; iter->next()) { + if (iter->status()) + return iter->status(); + if (!to_clear.count(iter->key())) + to_write[iter->key()] = iter->value(); } t->set(user_prefix(header), to_write); - merge_new_complete(header, new_complete, iter, t); - keep_parent = need_parent(iter); - if (keep_parent < 0) - return keep_parent; - } - if (!keep_parent) { - copy_up_header(header, t); - Header parent = lookup_parent(header); - if (!parent) - return -EINVAL; - parent->num_children--; - _clear(parent, t); - header->parent = 0; - set_map_header(hl, oid, *header, t); - t->rmkeys_by_prefix(complete_prefix(header)); - } + } // destruct iter which has parent in_use + + copy_up_header(header, t); + Header parent = lookup_parent(header); + if (!parent) + return -EINVAL; + parent->num_children--; + _clear(parent, t); + header->parent = 0; + set_map_header(hl, oid, *header, t); + t->rmkeys_by_prefix(complete_prefix(header)); return db->submit_transaction(t); } @@ -880,10 +844,14 @@ int DBObjectMap::remove_xattrs(const ghobject_t &oid, return db->submit_transaction(t); } -int DBObjectMap::clone(const ghobject_t &oid, +// ONLY USED FOR TESTING +// Set version to 2 to avoid asserts +int DBObjectMap::legacy_clone(const ghobject_t &oid, const ghobject_t &target, const SequencerPosition *spos) { + state.v = 2; + if (oid == target) return 0; @@ -936,6 +904,72 @@ int DBObjectMap::clone(const ghobject_t &oid, return db->submit_transaction(t); } +int DBObjectMap::clone(const ghobject_t &oid, + const ghobject_t &target, + const SequencerPosition *spos) +{ + if (oid == target) + return 0; + + MapHeaderLock _l1(this, MIN_GHOBJ(oid, target, true)); + MapHeaderLock _l2(this, MAX_GHOBJ(oid, target, true)); + MapHeaderLock *lsource, *ltarget; + if (cmp_bitwise(oid, target) > 0) { + lsource = &_l2; + ltarget= &_l1; + } else { + lsource = &_l1; + ltarget= &_l2; + } + + KeyValueDB::Transaction t = db->get_transaction(); + { + Header destination = lookup_map_header(*ltarget, target); + if (destination) { + if (check_spos(target, destination, spos)) + return 0; + destination->num_children--; + remove_map_header(*ltarget, target, destination, t); + _clear(destination, t); + } + } + + Header source = lookup_map_header(*lsource, oid); + if (!source) + return db->submit_transaction(t); + + Header destination = generate_new_header(target, Header()); + if (spos) + destination->spos = *spos; + + set_map_header(*ltarget, target, *destination, t); + + bufferlist bl; + int r = _get_header(source, &bl); + if (r < 0) + return r; + _set_header(destination, bl, t); + + map to_set; + KeyValueDB::Iterator xattr_iter = db->get_iterator(xattr_prefix(source)); + for (xattr_iter->seek_to_first(); + xattr_iter->valid(); + xattr_iter->next()) + to_set.insert(make_pair(xattr_iter->key(), xattr_iter->value())); + t->set(xattr_prefix(destination), to_set); + + map to_write; + ObjectMapIterator iter = _get_iterator(source); + for (iter->seek_to_first() ; iter->valid() ; iter->next()) { + if (iter->status()) + return iter->status(); + to_write[iter->key()] = iter->value(); + } + t->set(user_prefix(destination), to_write); + + return db->submit_transaction(t); +} + int DBObjectMap::upgrade_to_v2() { dout(1) << __func__ << " start" << dendl; @@ -1023,9 +1057,17 @@ int DBObjectMap::init(bool do_upgrade) } } else { // New store - state.v = 2; + // Version 3 means that complete regions never used + state.v = 3; state.seq = 1; } + ostringstream ss; + int errors = check(ss, true); + if (errors) { + derr << ss.str() << dendl; + if (errors > 0) + return -EINVAL; + } dout(20) << "(init)dbobjectmap: seq is " << state.seq << dendl; return 0; } @@ -1150,9 +1192,9 @@ DBObjectMap::Header DBObjectMap::lookup_parent(Header input) } Header header = Header(new _Header(), RemoveOnDelete(this)); - header->seq = input->parent; bufferlist::iterator iter = out.begin()->second.begin(); header->decode(iter); + assert(header->seq == input->parent); dout(20) << "lookup_parent: parent seq is " << header->seq << " with parent " << header->parent << dendl; in_use.insert(header->seq); @@ -1178,7 +1220,8 @@ void DBObjectMap::clear_header(Header header, KeyValueDB::Transaction t) dout(20) << "clear_header: clearing seq " << header->seq << dendl; t->rmkeys_by_prefix(user_prefix(header)); t->rmkeys_by_prefix(sys_prefix(header)); - t->rmkeys_by_prefix(complete_prefix(header)); + if (state.v < 3) + t->rmkeys_by_prefix(complete_prefix(header)); // Needed when header.parent != 0 t->rmkeys_by_prefix(xattr_prefix(header)); set keys; keys.insert(header_key(header->seq)); @@ -1262,3 +1305,82 @@ int DBObjectMap::list_objects(vector *out) } return 0; } + +int DBObjectMap::list_object_headers(vector<_Header> *out) +{ + int error = 0; + KeyValueDB::Iterator iter = db->get_iterator(HOBJECT_TO_SEQ); + for (iter->seek_to_first(); iter->valid(); iter->next()) { + bufferlist bl = iter->value(); + bufferlist::iterator bliter = bl.begin(); + _Header header; + header.decode(bliter); + out->push_back(header); + while (header.parent) { + set to_get; + map got; + to_get.insert(HEADER_KEY); + db->get(sys_parent_prefix(header), to_get, &got); + if (got.empty()) { + dout(0) << "Missing: seq " << header.parent << dendl; + error = -ENOENT; + break; + } else { + bl = got.begin()->second; + bufferlist::iterator bliter = bl.begin(); + header.decode(bliter); + out->push_back(header); + } + } + } + return error; +} + +ostream& operator<<(ostream& out, const DBObjectMap::_Header& h) +{ + out << "seq=" << h.seq << " parent=" << h.parent + << " num_children=" << h.num_children + << " ghobject=" << h.oid; + return out; +} + +int DBObjectMap::rename(const ghobject_t &from, + const ghobject_t &to, + const SequencerPosition *spos) +{ + if (from == to) + return 0; + + MapHeaderLock _l1(this, MIN_GHOBJ(from, to, true)); + MapHeaderLock _l2(this, MAX_GHOBJ(from, to, true)); + MapHeaderLock *lsource, *ltarget; + if (cmp_bitwise(from, to) > 0) { + lsource = &_l2; + ltarget= &_l1; + } else { + lsource = &_l1; + ltarget= &_l2; + } + + KeyValueDB::Transaction t = db->get_transaction(); + { + Header destination = lookup_map_header(*ltarget, to); + if (destination) { + if (check_spos(to, destination, spos)) + return 0; + destination->num_children--; + remove_map_header(*ltarget, to, destination, t); + _clear(destination, t); + } + } + + Header hdr = lookup_map_header(*lsource, from); + if (!hdr) + return db->submit_transaction(t); + + remove_map_header(*lsource, from, hdr, t); + hdr->oid = to; + set_map_header(*ltarget, to, *hdr, t); + + return db->submit_transaction(t); +} diff --git a/src/os/filestore/DBObjectMap.h b/src/os/filestore/DBObjectMap.h index d23a246165b87..5cf7f78270798 100644 --- a/src/os/filestore/DBObjectMap.h +++ b/src/os/filestore/DBObjectMap.h @@ -30,7 +30,7 @@ * @see user_prefix * @see sys_prefix * - * - GHOBJECT_TO_SEQ: Contains leaf mapping from ghobject_t->hobj.seq and + * - HOBJECT_TO_SEQ: Contains leaf mapping from ghobject_t->header.seq and * corresponding omap header * - SYS_PREFIX: GLOBAL_STATE_KEY - contains next seq number * @see State @@ -205,6 +205,18 @@ class DBObjectMap : public ObjectMap { const SequencerPosition *spos=0 ); + int rename( + const ghobject_t &from, + const ghobject_t &to, + const SequencerPosition *spos=0 + ); + + int legacy_clone( + const ghobject_t &oid, + const ghobject_t &target, + const SequencerPosition *spos=0 + ); + /// Read initial state from backing store int init(bool upgrade = false); @@ -212,15 +224,20 @@ class DBObjectMap : public ObjectMap { int upgrade_to_v2(); /// Consistency check, debug, there must be no parallel writes - bool check(std::ostream &out); + int check(std::ostream &out, bool repair = false); /// Ensure that all previous operations are durable int sync(const ghobject_t *oid=0, const SequencerPosition *spos=0); - /// Util, list all objects, there must be no other concurrent access + /// Util, get all objects, there must be no other concurrent access int list_objects(vector *objs ///< [out] objects ); + struct _Header; + // Util, get all object headers, there must be no other concurrent access + int list_object_headers(vector<_Header> *out ///< [out] headers + ); + ObjectMapIterator get_iterator(const ghobject_t &oid); static const string USER_PREFIX; @@ -275,28 +292,29 @@ class DBObjectMap : public ObjectMap { uint64_t parent; uint64_t num_children; - coll_t c; ghobject_t oid; SequencerPosition spos; void encode(bufferlist &bl) const { + coll_t unused; ENCODE_START(2, 1, bl); ::encode(seq, bl); ::encode(parent, bl); ::encode(num_children, bl); - ::encode(c, bl); + ::encode(unused, bl); ::encode(oid, bl); ::encode(spos, bl); ENCODE_FINISH(bl); } void decode(bufferlist::iterator &bl) { + coll_t unused; DECODE_START(2, bl); ::decode(seq, bl); ::decode(parent, bl); ::decode(num_children, bl); - ::decode(c, bl); + ::decode(unused, bl); ::decode(oid, bl); if (struct_v >= 2) ::decode(spos, bl); @@ -307,7 +325,6 @@ class DBObjectMap : public ObjectMap { f->dump_unsigned("seq", seq); f->dump_unsigned("parent", parent); f->dump_unsigned("num_children", num_children); - f->dump_stream("coll") << c; f->dump_stream("oid") << oid; } @@ -398,8 +415,15 @@ class DBObjectMap : public ObjectMap { /// skips to next valid parent entry int next_parent(); - - /// Tests whether to_test is in complete region + + /// first parent() >= to + int lower_bound_parent(const string &to); + + /** + * Tests whether to_test is in complete region + * + * postcondition: complete_iter will be max s.t. complete_iter->value > to_test + */ int in_complete_region(const string &to_test, ///< [in] key to test string *begin, ///< [out] beginning of region string *end ///< [out] end of region @@ -486,19 +510,19 @@ class DBObjectMap : public ObjectMap { /// Remove header and all related prefixes int _clear(Header header, KeyValueDB::Transaction t); - /// Adds to t operations necessary to add new_complete to the complete set - int merge_new_complete(Header header, - const map &new_complete, - DBObjectMapIterator iter, - KeyValueDB::Transaction t); + + /* Scan complete region bumping *begin to the beginning of any + * containing region and adding all complete region keys between + * the updated begin and end to the complete_keys_to_remove set */ + int merge_new_complete(DBObjectMapIterator &iter, + string *begin, + const string &end, + set *complete_keys_to_remove); /// Writes out State (mainly next_seq) int write_state(KeyValueDB::Transaction _t = KeyValueDB::Transaction()); - /// 0 if the complete set now contains all of key space, < 0 on error, 1 else - int need_parent(DBObjectMapIterator iter); - /// Copies header entry from parent @see rm_keys int copy_up_header(Header header, KeyValueDB::Transaction t); @@ -531,4 +555,6 @@ class DBObjectMap : public ObjectMap { WRITE_CLASS_ENCODER(DBObjectMap::_Header) WRITE_CLASS_ENCODER(DBObjectMap::State) +ostream& operator<<(ostream& out, const DBObjectMap::_Header& h); + #endif diff --git a/src/os/filestore/FileStore.cc b/src/os/filestore/FileStore.cc index 69d8a6bf33104..a32b86529284a 100644 --- a/src/os/filestore/FileStore.cc +++ b/src/os/filestore/FileStore.cc @@ -5226,7 +5226,7 @@ int FileStore::_collection_move_rename(const coll_t& oldcid, const ghobject_t& o if (r == 0) { // the name changed; link the omap content - r = object_map->clone(oldoid, o, &spos); + r = object_map->rename(oldoid, o, &spos); if (r == -ENOENT) r = 0; } diff --git a/src/test/ObjectMap/CMakeLists.txt b/src/test/ObjectMap/CMakeLists.txt index 2a37b5acb2600..0eb00903dfb46 100644 --- a/src/test/ObjectMap/CMakeLists.txt +++ b/src/test/ObjectMap/CMakeLists.txt @@ -5,6 +5,7 @@ add_executable(ceph_test_object_map ) set_target_properties(ceph_test_object_map PROPERTIES COMPILE_FLAGS ${UNITTEST_CXX_FLAGS}) +add_ceph_unittest(ceph_test_object_map ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ceph_test_object_map) target_link_libraries(ceph_test_object_map os common diff --git a/src/test/ObjectMap/test_object_map.cc b/src/test/ObjectMap/test_object_map.cc index a38e579e16f30..ebd55d7128f3d 100644 --- a/src/test/ObjectMap/test_object_map.cc +++ b/src/test/ObjectMap/test_object_map.cc @@ -73,6 +73,17 @@ class ObjectMapTester { db->set_keys(hoid, to_write); } + void set_keys(ghobject_t hoid, const map &to_set) { + map to_write; + for (auto &&i: to_set) { + bufferptr bp(i.second.data(), i.second.size()); + bufferlist bl; + bl.append(bp); + to_write.insert(make_pair(i.first, bl)); + } + db->set_keys(hoid, to_write); + } + void set_xattr(ghobject_t hoid, string key, string value) { map to_write; @@ -145,8 +156,10 @@ class ObjectMapTester { map got; db->get_values(hoid, to_get, &got); if (!got.empty()) { - *value = string(got.begin()->second.c_str(), - got.begin()->second.length()); + if (value) { + *value = string(got.begin()->second.c_str(), + got.begin()->second.length()); + } return 1; } else { return 0; @@ -158,6 +171,11 @@ class ObjectMapTester { key); } + void remove_keys(const string &objname, const set &to_remove) { + remove_keys(ghobject_t(hobject_t(sobject_t(objname, CEPH_NOSNAP))), + to_remove); + } + void remove_key(ghobject_t hoid, string key) { set to_remove; @@ -165,6 +183,11 @@ class ObjectMapTester { db->rm_keys(hoid, to_remove); } + void remove_keys(ghobject_t hoid, + const set &to_remove) { + db->rm_keys(hoid, to_remove); + } + void remove_xattr(const string &objname, const string &key) { remove_xattr(ghobject_t(hobject_t(sobject_t(objname, CEPH_NOSNAP))), key); @@ -187,10 +210,30 @@ class ObjectMapTester { db->clone(hoid, hoid2); } + void rename(const string &objname, const string &target) { + rename(ghobject_t(hobject_t(sobject_t(objname, CEPH_NOSNAP))), + ghobject_t(hobject_t(sobject_t(target, CEPH_NOSNAP)))); + } + + void rename(ghobject_t hoid, + ghobject_t hoid2) { + db->rename(hoid, hoid2); + } + void clear(const string &objname) { clear(ghobject_t(hobject_t(sobject_t(objname, CEPH_NOSNAP)))); } + void legacy_clone(const string &objname, const string &target) { + legacy_clone(ghobject_t(hobject_t(sobject_t(objname, CEPH_NOSNAP))), + ghobject_t(hobject_t(sobject_t(target, CEPH_NOSNAP)))); + } + + void legacy_clone(ghobject_t hoid, + ghobject_t hoid2) { + db->legacy_clone(hoid, hoid2); + } + void clear(ghobject_t hoid) { db->clear(hoid); } @@ -204,10 +247,10 @@ class ObjectMapTester { } void def_init() { - for (unsigned i = 0; i < 1000; ++i) { + for (unsigned i = 0; i < 10000; ++i) { key_space.insert("key_" + num_str(i)); } - for (unsigned i = 0; i < 1000; ++i) { + for (unsigned i = 0; i < 100; ++i) { object_name_space.insert("name_" + num_str(i)); } } @@ -233,17 +276,35 @@ class ObjectMapTester { << value << std::endl; } - void auto_set_key(ostream &out) { - set::iterator key = rand_choose(key_space); + void test_set_key(const string &obj, const string &key, const string &val) { + omap[obj][key] = val; + set_key(obj, key, val); + } + + void test_set_keys(const string &obj, const map &to_set) { + for (auto &&i: to_set) { + omap[obj][i.first] = i.second; + } + set_keys( + ghobject_t(hobject_t(sobject_t(obj, CEPH_NOSNAP))), + to_set); + } + + void auto_set_keys(ostream &out) { set::iterator object = rand_choose(object_name_space); - string value = val_from_key(*object, *key); + map to_set; + unsigned amount = (rand() % 10) + 1; + for (unsigned i = 0; i < amount; ++i) { + set::iterator key = rand_choose(key_space); + string value = val_from_key(*object, *key); + out << "auto_set_key " << *object << ": " << *key << " -> " + << value << std::endl; + to_set.insert(make_pair(*key, value)); + } - omap[*object][*key] = value; - set_key(*object, *key, value); - out << "auto_set_key " << *object << ": " << *key << " -> " - << value << std::endl; + test_set_keys(*object, to_set); } void xattrs_on_object(const string &object, set *out) { @@ -403,48 +464,65 @@ class ObjectMapTester { return 0; } - void auto_clone_key(ostream &out) { - set::iterator object = rand_choose(object_name_space); - set::iterator target = rand_choose(object_name_space); - while (target == object) { - target = rand_choose(object_name_space); - } - out << "clone " << *object << " to " << *target; - clone(*object, *target); - if (!omap.count(*object)) { + void test_clone(const string &object, const string &target, ostream &out) { + clone(object, target); + if (!omap.count(object)) { out << " source missing."; - omap.erase(*target); + omap.erase(target); } else { out << " source present."; - omap[*target] = omap[*object]; + omap[target] = omap[object]; } - if (!hmap.count(*object)) { + if (!hmap.count(object)) { out << " hmap source missing." << std::endl; - hmap.erase(*target); + hmap.erase(target); } else { out << " hmap source present." << std::endl; - hmap[*target] = hmap[*object]; + hmap[target] = hmap[object]; } - if (!xattrs.count(*object)) { + if (!xattrs.count(object)) { out << " hmap source missing." << std::endl; - xattrs.erase(*target); + xattrs.erase(target); } else { out << " hmap source present." << std::endl; - xattrs[*target] = xattrs[*object]; + xattrs[target] = xattrs[object]; } } - void auto_remove_key(ostream &out) { + void auto_clone_key(ostream &out) { + set::iterator object = rand_choose(object_name_space); + set::iterator target = rand_choose(object_name_space); + while (target == object) { + target = rand_choose(object_name_space); + } + out << "clone " << *object << " to " << *target; + test_clone(*object, *target, out); + } + + void test_remove_keys(const string &obj, const set &to_remove) { + for (auto &&k: to_remove) + omap[obj].erase(k); + remove_keys(obj, to_remove); + } + + void test_remove_key(const string &obj, const string &key) { + omap[obj].erase(key); + remove_key(obj, key); + } + + void auto_remove_keys(ostream &out) { set::iterator object = rand_choose(object_name_space); set kspace; keys_on_object(*object, &kspace); - set::iterator key = rand_choose(kspace); - if (key == kspace.end()) { - return; + set to_remove; + for (unsigned i = 0; i < 3; ++i) { + set::iterator key = rand_choose(kspace); + if (key == kspace.end()) + continue; + out << "removing " << *key << " from " << *object << std::endl; + to_remove.insert(*key); } - out << "removing " << *key << " from " << *object << std::endl; - omap[*object].erase(*key); - remove_key(*object, *key); + test_remove_keys(*object, to_remove); } void auto_remove_xattr(ostream &out) { @@ -469,12 +547,16 @@ class ObjectMapTester { xattrs.erase(*object); } + void test_clear(const string &obj) { + clear_omap(obj); + omap.erase(obj); + hmap.erase(obj); + } + void auto_clear_omap(ostream &out) { set::iterator object = rand_choose(object_name_space); out << "auto_clear_object " << *object << std::endl; - clear_omap(*object); - omap.erase(*object); - hmap.erase(*object); + test_clear(*object); } void auto_write_header(ostream &out) { @@ -516,6 +598,37 @@ class ObjectMapTester { return 0; } } + + void verify_keys(const std::string &obj, ostream &out) { + set in_db; + ObjectMap::ObjectMapIterator iter = db->get_iterator( + ghobject_t(hobject_t(sobject_t(obj, CEPH_NOSNAP)))); + for (iter->seek_to_first(); iter->valid(); iter->next()) { + in_db.insert(iter->key()); + } + bool err = false; + for (auto &&i: omap[obj]) { + if (!in_db.count(i.first)) { + out << __func__ << ": obj " << obj << " missing key " + << i.first << std::endl; + err = true; + } else { + in_db.erase(i.first); + } + } + if (!in_db.empty()) { + out << __func__ << ": obj " << obj << " found extra keys " + << in_db << std::endl; + err = true; + } + ASSERT_FALSE(err); + } + + void auto_verify_objects(ostream &out) { + for (auto &&i: omap) { + verify_keys(i.first, out); + } + } }; class ObjectMapTest : public ::testing::Test { @@ -542,7 +655,7 @@ class ObjectMapTest : public ::testing::Test { virtual void TearDown() { std::cerr << "Checking..." << std::endl; - assert(db->check(std::cerr)); + ASSERT_EQ(0, db->check(std::cerr)); } }; @@ -745,6 +858,128 @@ TEST_F(ObjectMapTest, OddEvenClone) { db->clear(hoid2); } +TEST_F(ObjectMapTest, Rename) { + ghobject_t hoid(hobject_t(sobject_t("foo", CEPH_NOSNAP))); + ghobject_t hoid2(hobject_t(sobject_t("foo2", CEPH_NOSNAP))); + + for (unsigned i = 0; i < 1000; ++i) { + tester.set_key(hoid, "foo" + num_str(i), "bar" + num_str(i)); + } + + db->rename(hoid, hoid2); + // Verify rename where target exists + db->clone(hoid2, hoid); + db->rename(hoid, hoid2); + + int r = 0; + for (unsigned i = 0; i < 1000; ++i) { + string result; + r = tester.get_key(hoid2, "foo" + num_str(i), &result); + ASSERT_EQ(1, r); + ASSERT_EQ("bar" + num_str(i), result); + + if (i % 2) { + tester.remove_key(hoid2, "foo" + num_str(i)); + } + } + + for (unsigned i = 0; i < 1000; ++i) { + string result; + r = tester.get_key(hoid2, "foo" + num_str(i), &result); + if (i % 2) { + ASSERT_EQ(0, r); + } else { + ASSERT_EQ(1, r); + ASSERT_EQ("bar" + num_str(i), result); + } + } + + { + ObjectMap::ObjectMapIterator iter = db->get_iterator(hoid2); + iter->seek_to_first(); + for (unsigned i = 0; i < 1000; ++i) { + if (!(i % 2)) { + ASSERT_TRUE(iter->valid()); + ASSERT_EQ("foo" + num_str(i), iter->key()); + iter->next(); + } + } + } + + db->clear(hoid2); +} + +TEST_F(ObjectMapTest, OddEvenOldClone) { + ghobject_t hoid(hobject_t(sobject_t("foo", CEPH_NOSNAP))); + ghobject_t hoid2(hobject_t(sobject_t("foo2", CEPH_NOSNAP))); + + for (unsigned i = 0; i < 1000; ++i) { + tester.set_key(hoid, "foo" + num_str(i), "bar" + num_str(i)); + } + + db->legacy_clone(hoid, hoid2); + + int r = 0; + for (unsigned i = 0; i < 1000; ++i) { + string result; + r = tester.get_key(hoid, "foo" + num_str(i), &result); + ASSERT_EQ(1, r); + ASSERT_EQ("bar" + num_str(i), result); + r = tester.get_key(hoid2, "foo" + num_str(i), &result); + ASSERT_EQ(1, r); + ASSERT_EQ("bar" + num_str(i), result); + + if (i % 2) { + tester.remove_key(hoid, "foo" + num_str(i)); + } else { + tester.remove_key(hoid2, "foo" + num_str(i)); + } + } + + for (unsigned i = 0; i < 1000; ++i) { + string result; + string result2; + r = tester.get_key(hoid, "foo" + num_str(i), &result); + int r2 = tester.get_key(hoid2, "foo" + num_str(i), &result2); + if (i % 2) { + ASSERT_EQ(0, r); + ASSERT_EQ(1, r2); + ASSERT_EQ("bar" + num_str(i), result2); + } else { + ASSERT_EQ(0, r2); + ASSERT_EQ(1, r); + ASSERT_EQ("bar" + num_str(i), result); + } + } + + { + ObjectMap::ObjectMapIterator iter = db->get_iterator(hoid); + iter->seek_to_first(); + for (unsigned i = 0; i < 1000; ++i) { + if (!(i % 2)) { + ASSERT_TRUE(iter->valid()); + ASSERT_EQ("foo" + num_str(i), iter->key()); + iter->next(); + } + } + } + + { + ObjectMap::ObjectMapIterator iter2 = db->get_iterator(hoid2); + iter2->seek_to_first(); + for (unsigned i = 0; i < 1000; ++i) { + if (i % 2) { + ASSERT_TRUE(iter2->valid()); + ASSERT_EQ("foo" + num_str(i), iter2->key()); + iter2->next(); + } + } + } + + db->clear(hoid); + db->clear(hoid2); +} + TEST_F(ObjectMapTest, RandomTest) { tester.def_init(); for (unsigned i = 0; i < 5000; ++i) { @@ -760,7 +995,7 @@ TEST_F(ObjectMapTest, RandomTest) { } else if (val < 14) { ASSERT_TRUE(tester.auto_verify_header(std::cerr)); } else if (val < 30) { - tester.auto_set_key(std::cerr); + tester.auto_set_keys(std::cerr); } else if (val < 42) { tester.auto_set_xattr(std::cerr); } else if (val < 55) { @@ -780,7 +1015,117 @@ TEST_F(ObjectMapTest, RandomTest) { } else if (val < 92) { tester.auto_remove_xattr(std::cerr); } else { - tester.auto_remove_key(std::cerr); + tester.auto_remove_keys(std::cerr); + } + + if (i % 500) { + tester.auto_verify_objects(std::cerr); + } + } +} + +TEST_F(ObjectMapTest, RandomTestNoDeletesXattrs) { + tester.def_init(); + for (unsigned i = 0; i < 5000; ++i) { + unsigned val = rand(); + val <<= 8; + val %= 100; + if (!(i%100)) + std::cout << "on op " << i + << " val is " << val << std::endl; + + if (val < 45) { + tester.auto_set_keys(std::cerr); + } else if (val < 90) { + tester.auto_remove_keys(std::cerr); + } else { + tester.auto_clone_key(std::cerr); + } + + if (i % 500) { + tester.auto_verify_objects(std::cerr); } } } + +string num_to_key(unsigned i) { + char buf[100]; + int ret = snprintf(buf, sizeof(buf), "%010u", i); + assert(ret > 0); + return string(buf, ret); +} + +TEST_F(ObjectMapTest, TestMergeNewCompleteContainBug) { + /* This test exploits a bug in kraken and earlier where merge_new_complete + * could miss complete entries fully contained by a new entry. To get this + * to actually result in an incorrect return value, you need to remove at + * least two values, one before a complete region, and one which occurs in + * the parent after the complete region (but within 20 not yet completed + * parent points of the first value). + */ + for (unsigned i = 10; i < 160; i+=2) { + tester.test_set_key("foo", num_to_key(i), "asdf"); + } + tester.test_clone("foo", "foo2", std::cout); + tester.test_clear("foo"); + + tester.test_set_key("foo2", num_to_key(15), "asdf"); + tester.test_set_key("foo2", num_to_key(13), "asdf"); + tester.test_set_key("foo2", num_to_key(57), "asdf"); + + tester.test_remove_key("foo2", num_to_key(15)); + + set to_remove; + to_remove.insert(num_to_key(13)); + to_remove.insert(num_to_key(58)); + to_remove.insert(num_to_key(60)); + to_remove.insert(num_to_key(62)); + tester.test_remove_keys("foo2", to_remove); + + tester.verify_keys("foo2", std::cout); + ASSERT_EQ(tester.get_key("foo2", num_to_key(10), nullptr), 1); + ASSERT_EQ(tester.get_key("foo2", num_to_key(1), nullptr), 0); + ASSERT_EQ(tester.get_key("foo2", num_to_key(56), nullptr), 1); + // this one triggers the bug + ASSERT_EQ(tester.get_key("foo2", num_to_key(58), nullptr), 0); +} + +TEST_F(ObjectMapTest, TestIterateBug18533) { + /* This test starts with the one immediately above to create a pair of + * complete regions where one contains the other. Then, it deletes the + * key at the start of the contained region. The logic in next_parent() + * skips ahead to the end of the contained region, and we start copying + * values down again from the parent into the child -- including some + * that had actually been deleted. I think this works for any removal + * within the outer complete region after the start of the contained + * region. + */ + for (unsigned i = 10; i < 160; i+=2) { + tester.test_set_key("foo", num_to_key(i), "asdf"); + } + tester.test_clone("foo", "foo2", std::cout); + tester.test_clear("foo"); + + tester.test_set_key("foo2", num_to_key(15), "asdf"); + tester.test_set_key("foo2", num_to_key(13), "asdf"); + tester.test_set_key("foo2", num_to_key(57), "asdf"); + tester.test_set_key("foo2", num_to_key(91), "asdf"); + + tester.test_remove_key("foo2", num_to_key(15)); + + set to_remove; + to_remove.insert(num_to_key(13)); + to_remove.insert(num_to_key(58)); + to_remove.insert(num_to_key(60)); + to_remove.insert(num_to_key(62)); + to_remove.insert(num_to_key(82)); + to_remove.insert(num_to_key(84)); + tester.test_remove_keys("foo2", to_remove); + + //tester.test_remove_key("foo2", num_to_key(15)); also does the trick + tester.test_remove_key("foo2", num_to_key(80)); + + // the iterator in verify_keys will return an extra value + tester.verify_keys("foo2", std::cout); +} + diff --git a/src/tools/ceph_osdomap_tool.cc b/src/tools/ceph_osdomap_tool.cc index 0fb6d98a1f1cc..99974713df750 100644 --- a/src/tools/ceph_osdomap_tool.cc +++ b/src/tools/ceph_osdomap_tool.cc @@ -27,14 +27,17 @@ using namespace std; int main(int argc, char **argv) { po::options_description desc("Allowed options"); - string store_path, cmd, out_path; + string store_path, cmd, out_path, oid; + bool debug = false; desc.add_options() ("help", "produce help message") ("omap-path", po::value(&store_path), "path to mon directory, mandatory (current/omap usually)") ("paranoid", "use paranoid checking") + ("debug", "Additional debug output from DBObjectMap") + ("oid", po::value(&oid), "Restrict to this object id when dumping objects") ("command", po::value(&cmd), - "command arg is one of [dump-raw-keys, dump-raw-key-vals, dump-objects, dump-objects-with-keys, check], mandatory") + "command arg is one of [dump-raw-keys, dump-raw-key-vals, dump-objects, dump-objects-with-keys, check, dump-headers, repair], mandatory") ; po::positional_options_description p; p.add("command", 1); @@ -64,12 +67,19 @@ int main(int argc, char **argv) { ceph_options.push_back(i->c_str()); } + if (vm.count("debug")) debug = true; + auto cct = global_init( &def_args, ceph_options, CEPH_ENTITY_TYPE_OSD, - CODE_ENVIRONMENT_UTILITY, 0); + CODE_ENVIRONMENT_UTILITY_NODOUT, 0); common_init_finish(g_ceph_context); g_ceph_context->_conf->apply_changes(NULL); g_conf = g_ceph_context->_conf; + if (debug) { + g_conf->set_val_or_die("log_to_stderr", "true"); + g_conf->set_val_or_die("err_to_stderr", "true"); + } + g_conf->apply_changes(NULL); if (vm.count("help")) { std::cerr << desc << std::endl; @@ -99,6 +109,9 @@ int main(int argc, char **argv) { std::cerr << "Output: " << out.str() << std::endl; goto done; } + // We don't call omap.init() here because it will repair + // the DBObjectMap which we might want to examine for diagnostic + // reasons. Instead use --command repair. r = 0; @@ -123,6 +136,8 @@ int main(int argc, char **argv) { for (vector::iterator i = objects.begin(); i != objects.end(); ++i) { + if (vm.count("oid") != 0 && i->hobj.oid.name != oid) + continue; std::cout << *i << std::endl; } r = 0; @@ -136,6 +151,8 @@ int main(int argc, char **argv) { for (vector::iterator i = objects.begin(); i != objects.end(); ++i) { + if (vm.count("oid") != 0 && i->hobj.oid.name != oid) + continue; std::cout << "Object: " << *i << std::endl; ObjectMap::ObjectMapIterator j = omap.get_iterator(ghobject_t(i->hobj)); for (j->seek_to_first(); j->valid(); j->next()) { @@ -143,17 +160,35 @@ int main(int argc, char **argv) { j->value().hexdump(std::cout); } } - } else if (cmd == "check") { - r = omap.check(std::cout); - if (!r) { - std::cerr << "check got: " << cpp_strerror(r) << std::endl; + } else if (cmd == "check" || cmd == "repair") { + ostringstream ss; + bool repair = (cmd == "repair"); + r = omap.check(ss, repair); + if (r) { + std::cerr << ss.str() << std::endl; + if (r > 0) { + std::cerr << "check got " << r << " error(s)" << std::endl; + r = 1; + goto done; + } + } + std::cout << (repair ? "repair" : "check") << " succeeded" << std::endl; + } else if (cmd == "dump-headers") { + vector headers; + r = omap.list_object_headers(&headers); + if (r < 0) { + std::cerr << "list_object_headers got: " << cpp_strerror(r) << std::endl; + r = 1; goto done; } - std::cout << "check succeeded" << std::endl; + for (auto i : headers) + std::cout << i << std::endl; } else { std::cerr << "Did not recognize command " << cmd << std::endl; + r = 1; goto done; } + r = 0; done: return r;