From 3223378194ad59850177cb87754cb1ada2e5e5f1 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 10 Oct 2016 18:39:30 +0800 Subject: [PATCH 1/2] os/ObjectStore: properly clear object map when replaying OP_REMOVE To remove an object, filestore needs to unlink corresponding object file from filesystem and removes corresponding object keys from DBObjectMap. When replaying OP_REMOVE operation, it's possible the operation has completed partially, object file has been deleted, but object keys in DBObjectMap hasn't. The fix is force clear object keys if object file does not exists Fixes: http://tracker.ceph.com/issues/17177 Signed-off-by: Yan, Zheng --- src/os/filestore/FileStore.cc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/os/filestore/FileStore.cc b/src/os/filestore/FileStore.cc index ff92831486199..7029571212d4d 100644 --- a/src/os/filestore/FileStore.cc +++ b/src/os/filestore/FileStore.cc @@ -472,13 +472,7 @@ int FileStore::lfn_unlink(const coll_t& cid, const ghobject_t& o, } if (!force_clear_omap) { - if (hardlink == 0) { - if (!m_disable_wbthrottle) { - wbthrottle.clear_object(o); // should be only non-cache ref - } - fdcache.clear(o); - return 0; - } else if (hardlink == 1) { + if (hardlink == 0 || hardlink == 1) { force_clear_omap = true; } } @@ -505,6 +499,12 @@ int FileStore::lfn_unlink(const coll_t& cid, const ghobject_t& o, if (!backend->can_checkpoint()) object_map->sync(&o, &spos); } + if (hardlink == 0) { + if (!m_disable_wbthrottle) { + wbthrottle.clear_object(o); // should be only non-cache ref + } + return 0; + } } r = index->unlink(o); if (r < 0) { From c66e466d4ed76cd7a063b9b982ba455150ef1f14 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 10 Oct 2016 22:17:28 +0800 Subject: [PATCH 2/2] os/ObjectStore: properly clone object map when replaying OP_COLL_MOVE_RENAME FileStore::_close_replay_guard does not sync the object map. If OSD crashes while executing FileStore::_collection_move_rename, it's possible that the replay guard is set, but the object map map update gets lost. When recovering, OSD checks the replay guard and does nothing. The fix is sync the object map in FileStore::_close_replay_guard() Signed-off-by: Yan, Zheng --- src/os/filestore/DBObjectMap.cc | 2 +- src/os/filestore/FileStore.cc | 55 +++++++++++++++++++++------------ src/os/filestore/FileStore.h | 3 +- 3 files changed, 39 insertions(+), 21 deletions(-) diff --git a/src/os/filestore/DBObjectMap.cc b/src/os/filestore/DBObjectMap.cc index 67e17bdb8c89d..7e9dfe6538136 100644 --- a/src/os/filestore/DBObjectMap.cc +++ b/src/os/filestore/DBObjectMap.cc @@ -902,10 +902,10 @@ int DBObjectMap::clone(const ghobject_t &oid, { Header destination = lookup_map_header(*ltarget, target); if (destination) { - remove_map_header(*ltarget, target, destination, t); if (check_spos(target, destination, spos)) return 0; destination->num_children--; + remove_map_header(*ltarget, target, destination, t); _clear(destination, t); } } diff --git a/src/os/filestore/FileStore.cc b/src/os/filestore/FileStore.cc index 7029571212d4d..68231fc65b3a8 100644 --- a/src/os/filestore/FileStore.cc +++ b/src/os/filestore/FileStore.cc @@ -2343,10 +2343,12 @@ void FileStore::_set_replay_guard(int fd, // first make sure the previous operation commits ::fsync(fd); - // sync object_map too. even if this object has a header or keys, - // it have had them in the past and then removed them, so always - // sync. - object_map->sync(hoid, &spos); + if (!in_progress) { + // sync object_map too. even if this object has a header or keys, + // it have had them in the past and then removed them, so always + // sync. + object_map->sync(hoid, &spos); + } _inject_failure(); @@ -2384,7 +2386,8 @@ void FileStore::_close_replay_guard(const coll_t& cid, VOID_TEMP_FAILURE_RETRY(::close(fd)); } -void FileStore::_close_replay_guard(int fd, const SequencerPosition& spos) +void FileStore::_close_replay_guard(int fd, const SequencerPosition& spos, + const ghobject_t *hoid) { if (backend->can_checkpoint()) return; @@ -2393,6 +2396,11 @@ void FileStore::_close_replay_guard(int fd, const SequencerPosition& spos) _inject_failure(); + // sync object_map too. even if this object has a header or keys, + // it have had them in the past and then removed them, so always + // sync. + object_map->sync(hoid, &spos); + // then record that we are done with this operation bufferlist v(40); ::encode(spos, v); @@ -5158,18 +5166,30 @@ int FileStore::_collection_move_rename(const coll_t& oldcid, const ghobject_t& o } else { assert(0 == "ERROR: source must exist"); } - return 0; - } - if (dstcmp > 0) { // if dstcmp == 0 the guard already says "in-progress" - _set_replay_guard(**fd, spos, &o, true); - } - r = lfn_link(oldcid, c, oldoid, o); - if (replaying && !backend->can_checkpoint() && - r == -EEXIST) // crashed between link() and set_replay_guard() - r = 0; + if (!replaying) { + return 0; + } + if (allow_enoent && dstcmp > 0) { // if dstcmp == 0, try_rename was started. + return 0; + } - _inject_failure(); + r = 0; // don't know if object_map was cloned + } else { + if (dstcmp > 0) { // if dstcmp == 0 the guard already says "in-progress" + _set_replay_guard(**fd, spos, &o, true); + } + + r = lfn_link(oldcid, c, oldoid, o); + if (replaying && !backend->can_checkpoint() && + r == -EEXIST) // crashed between link() and set_replay_guard() + r = 0; + + lfn_close(fd); + fd = FDRef(); + + _inject_failure(); + } if (r == 0) { // the name changed; link the omap content @@ -5180,9 +5200,6 @@ int FileStore::_collection_move_rename(const coll_t& oldcid, const ghobject_t& o _inject_failure(); - lfn_close(fd); - fd = FDRef(); - if (r == 0) r = lfn_unlink(oldcid, oldoid, spos, true); @@ -5191,7 +5208,7 @@ int FileStore::_collection_move_rename(const coll_t& oldcid, const ghobject_t& o // close guard on object so we don't do this again if (r == 0) { - _close_replay_guard(**fd, spos); + _close_replay_guard(**fd, spos, &o); lfn_close(fd); } } diff --git a/src/os/filestore/FileStore.h b/src/os/filestore/FileStore.h index dca849f280131..7a9b78df923b3 100644 --- a/src/os/filestore/FileStore.h +++ b/src/os/filestore/FileStore.h @@ -497,7 +497,8 @@ class FileStore : public JournalingObjectStore, const SequencerPosition &spos); /// close a replay guard opened with in_progress=true - void _close_replay_guard(int fd, const SequencerPosition& spos); + void _close_replay_guard(int fd, const SequencerPosition& spos, + const ghobject_t *oid=0); void _close_replay_guard(const coll_t& cid, const SequencerPosition& spos); /**