Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pacific: mds: fix deadlock between unlinking and linkmerge #53495

Merged
merged 6 commits into from Nov 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 0 additions & 2 deletions src/mds/CDentry.cc
Expand Up @@ -104,8 +104,6 @@ ostream& operator<<(ostream& out, const CDentry& dn)
out << " state=" << dn.get_state();
if (dn.is_new()) out << "|new";
if (dn.state_test(CDentry::STATE_BOTTOMLRU)) out << "|bottomlru";
if (dn.state_test(CDentry::STATE_UNLINKING)) out << "|unlinking";
if (dn.state_test(CDentry::STATE_REINTEGRATING)) out << "|reintegrating";

if (dn.get_num_ref()) {
out << " |";
Expand Down
17 changes: 4 additions & 13 deletions src/mds/CDentry.h
Expand Up @@ -29,7 +29,6 @@
#include "BatchOp.h"
#include "MDSCacheObject.h"
#include "MDSContext.h"
#include "Mutation.h"
#include "SimpleLock.h"
#include "LocalLockC.h"
#include "ScrubHeader.h"
Expand Down Expand Up @@ -87,25 +86,18 @@ class CDentry : public MDSCacheObject, public LRUObject, public Counter<CDentry>
static const int STATE_EVALUATINGSTRAY = (1<<4);
static const int STATE_PURGINGPINNED = (1<<5);
static const int STATE_BOTTOMLRU = (1<<6);
static const int STATE_UNLINKING = (1<<7);
static const int STATE_REINTEGRATING = (1<<8);
// stray dentry needs notification of releasing reference
static const int STATE_STRAY = STATE_NOTIFYREF;
static const int MASK_STATE_IMPORT_KEPT = STATE_BOTTOMLRU;

// -- pins --
static const int PIN_INODEPIN = 1; // linked inode is pinned
static const int PIN_FRAGMENTING = -2; // containing dir is refragmenting
static const int PIN_PURGING = 3;
static const int PIN_SCRUBPARENT = 4;
static const int PIN_WAITUNLINKSTATE = 5;
static const int PIN_INODEPIN = 1; // linked inode is pinned
static const int PIN_FRAGMENTING = -2; // containing dir is refragmenting
static const int PIN_PURGING = 3;
static const int PIN_SCRUBPARENT = 4;

static const unsigned EXPORT_NONCE = 1;

const static uint64_t WAIT_UNLINK_STATE = (1<<0);
const static uint64_t WAIT_UNLINK_FINISH = (1<<1);
const static uint64_t WAIT_REINTEGRATE_FINISH = (1<<2);
uint32_t replica_unlinking_ref = 0;

CDentry(std::string_view n, __u32 h,
mempool::mds_co::string alternate_name,
Expand Down Expand Up @@ -144,7 +136,6 @@ class CDentry : public MDSCacheObject, public LRUObject, public Counter<CDentry>
case PIN_FRAGMENTING: return "fragmenting";
case PIN_PURGING: return "purging";
case PIN_SCRUBPARENT: return "scrubparent";
case PIN_WAITUNLINKSTATE: return "waitunlinkstate";
default: return generic_pin_name(p);
}
}
Expand Down
3 changes: 1 addition & 2 deletions src/mds/CInode.h
Expand Up @@ -397,8 +397,7 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CIno
static const uint64_t WAIT_FROZEN = (1<<1);
static const uint64_t WAIT_TRUNC = (1<<2);
static const uint64_t WAIT_FLOCK = (1<<3);
static const uint64_t WAIT_UNLINK = (1<<4);


static const uint64_t WAIT_ANY_MASK = (uint64_t)(-1);

// misc
Expand Down
87 changes: 8 additions & 79 deletions src/mds/MDCache.cc
Expand Up @@ -8107,10 +8107,6 @@ void MDCache::dispatch(const cref_t<Message> &m)
case MSG_MDS_DENTRYUNLINK:
handle_dentry_unlink(ref_cast<MDentryUnlink>(m));
break;
case MSG_MDS_DENTRYUNLINK_ACK:
handle_dentry_unlink_ack(ref_cast<MDentryUnlinkAck>(m));
break;


case MSG_MDS_FRAGMENTNOTIFY:
handle_fragment_notify(ref_cast<MMDSFragmentNotify>(m));
Expand Down Expand Up @@ -11051,8 +11047,7 @@ void MDCache::handle_dentry_link(const cref_t<MDentryLink> &m)

// UNLINK

void MDCache::send_dentry_unlink(CDentry *dn, CDentry *straydn,
MDRequestRef& mdr, bool unlinking)
void MDCache::send_dentry_unlink(CDentry *dn, CDentry *straydn, MDRequestRef& mdr)
{
dout(10) << __func__ << " " << *dn << dendl;
// share unlink news with replicas
Expand All @@ -11064,11 +11059,6 @@ void MDCache::send_dentry_unlink(CDentry *dn, CDentry *straydn,
CInode *strayin = straydn->get_linkage()->get_inode();
strayin->encode_snap_blob(snapbl);
}

if (unlinking) {
ceph_assert(!straydn);
dn->replica_unlinking_ref = 0;
}
for (set<mds_rank_t>::iterator it = replicas.begin();
it != replicas.end();
++it) {
Expand All @@ -11081,21 +11071,12 @@ void MDCache::send_dentry_unlink(CDentry *dn, CDentry *straydn,
rejoin_gather.count(*it)))
continue;

auto unlink = make_message<MDentryUnlink>(dn->get_dir()->dirfrag(),
dn->get_name(), unlinking);
auto unlink = make_message<MDentryUnlink>(dn->get_dir()->dirfrag(), dn->get_name());
if (straydn) {
encode_replica_stray(straydn, *it, unlink->straybl);
unlink->snapbl = snapbl;
}
mds->send_message_mds(unlink, *it);
if (unlinking) {
dn->replica_unlinking_ref++;
dn->get(CDentry::PIN_WAITUNLINKSTATE);
}
}

if (unlinking && dn->replica_unlinking_ref) {
dn->add_waiter(CDentry::WAIT_UNLINK_STATE, new C_MDS_RetryRequest(this, mdr));
}
}

Expand All @@ -11104,40 +11085,23 @@ void MDCache::handle_dentry_unlink(const cref_t<MDentryUnlink> &m)
// straydn
CDentry *straydn = nullptr;
CInode *strayin = nullptr;

if (m->straybl.length())
decode_replica_stray(straydn, &strayin, m->straybl, mds_rank_t(m->get_source().num()));

boost::intrusive_ptr<MDentryUnlinkAck> ack;
CDentry::linkage_t *dnl;
CDentry *dn;
CInode *in;
bool hadrealm;

CDir *dir = get_dirfrag(m->get_dirfrag());
if (!dir) {
dout(7) << __func__ << " don't have dirfrag " << m->get_dirfrag() << dendl;
if (m->is_unlinking())
goto ack;
} else {
dn = dir->lookup(m->get_dn());
CDentry *dn = dir->lookup(m->get_dn());
if (!dn) {
dout(7) << __func__ << " don't have dentry " << *dir << " dn " << m->get_dn() << dendl;
if (m->is_unlinking())
goto ack;
} else {
dout(7) << __func__ << " on " << *dn << dendl;

if (m->is_unlinking()) {
dn->state_set(CDentry::STATE_UNLINKING);
goto ack;
}

dnl = dn->get_linkage();
CDentry::linkage_t *dnl = dn->get_linkage();

// open inode?
if (dnl->is_primary()) {
in = dnl->get_inode();
CInode *in = dnl->get_inode();
dn->dir->unlink_inode(dn);
ceph_assert(straydn);
straydn->dir->link_primary_inode(straydn, in);
Expand All @@ -11148,12 +11112,11 @@ void MDCache::handle_dentry_unlink(const cref_t<MDentryUnlink> &m)
in->first = straydn->first;

// update subtree map?
if (in->is_dir()) {
if (in->is_dir())
adjust_subtree_after_rename(in, dir, false);
}

if (m->snapbl.length()) {
hadrealm = (in->snaprealm ? true : false);
bool hadrealm = (in->snaprealm ? true : false);
in->decode_snap_blob(m->snapbl);
ceph_assert(in->snaprealm);
if (!hadrealm)
Expand All @@ -11164,20 +11127,14 @@ void MDCache::handle_dentry_unlink(const cref_t<MDentryUnlink> &m)
if (in->is_any_caps() &&
!in->state_test(CInode::STATE_EXPORTINGCAPS))
migrator->export_caps(in);

straydn = NULL;
} else {
ceph_assert(!straydn);
ceph_assert(dnl->is_remote());
dn->dir->unlink_inode(dn);
}
ceph_assert(dnl->is_null());
dn->state_clear(CDentry::STATE_UNLINKING);

MDSContext::vec finished;
dn->take_waiting(CDentry::WAIT_UNLINK_FINISH, finished);
mds->queue_waiters(finished);

}
}

Expand All @@ -11189,36 +11146,8 @@ void MDCache::handle_dentry_unlink(const cref_t<MDentryUnlink> &m)
trim_dentry(straydn, ex);
send_expire_messages(ex);
}
return;

ack:
ack = make_message<MDentryUnlinkAck>(m->get_dirfrag(), m->get_dn());
mds->send_message(ack, m->get_connection());
}

void MDCache::handle_dentry_unlink_ack(const cref_t<MDentryUnlinkAck> &m)
{
CDir *dir = get_dirfrag(m->get_dirfrag());
if (!dir) {
dout(7) << __func__ << " don't have dirfrag " << m->get_dirfrag() << dendl;
} else {
CDentry *dn = dir->lookup(m->get_dn());
if (!dn) {
dout(7) << __func__ << " don't have dentry " << *dir << " dn " << m->get_dn() << dendl;
} else {
dout(7) << __func__ << " on " << *dn << " ref "
<< dn->replica_unlinking_ref << " -> "
<< dn->replica_unlinking_ref - 1 << dendl;
dn->replica_unlinking_ref--;
if (!dn->replica_unlinking_ref) {
MDSContext::vec finished;
dn->take_waiting(CDentry::WAIT_UNLINK_STATE, finished);
mds->queue_waiters(finished);
}
dn->put(CDentry::PIN_WAITUNLINKSTATE);
}
}
}



Expand Down
3 changes: 1 addition & 2 deletions src/mds/MDCache.h
Expand Up @@ -897,7 +897,7 @@ class MDCache {
void encode_remote_dentry_link(CDentry::linkage_t *dnl, bufferlist& bl);
void decode_remote_dentry_link(CDir *dir, CDentry *dn, bufferlist::const_iterator& p);
void send_dentry_link(CDentry *dn, MDRequestRef& mdr);
void send_dentry_unlink(CDentry *dn, CDentry *straydn, MDRequestRef& mdr, bool unlinking=false);
void send_dentry_unlink(CDentry *dn, CDentry *straydn, MDRequestRef& mdr);

void wait_for_uncommitted_fragment(dirfrag_t dirfrag, MDSContext *c) {
uncommitted_fragments.at(dirfrag).waiters.push_back(c);
Expand Down Expand Up @@ -1133,7 +1133,6 @@ class MDCache {
void handle_discover_reply(const cref_t<MDiscoverReply> &m);
void handle_dentry_link(const cref_t<MDentryLink> &m);
void handle_dentry_unlink(const cref_t<MDentryUnlink> &m);
void handle_dentry_unlink_ack(const cref_t<MDentryUnlinkAck> &m);

int dump_cache(std::string_view fn, Formatter *f);

Expand Down
2 changes: 0 additions & 2 deletions src/mds/MDSRank.cc
Expand Up @@ -1173,7 +1173,6 @@ bool MDSRank::is_valid_message(const cref_t<Message> &m) {
type == CEPH_MSG_CLIENT_RECONNECT ||
type == CEPH_MSG_CLIENT_RECLAIM ||
type == CEPH_MSG_CLIENT_REQUEST ||
type == CEPH_MSG_CLIENT_REPLY ||
type == MSG_MDS_PEER_REQUEST ||
type == MSG_MDS_HEARTBEAT ||
type == MSG_MDS_TABLE_REQUEST ||
Expand Down Expand Up @@ -1227,7 +1226,6 @@ void MDSRank::handle_message(const cref_t<Message> &m)
ALLOW_MESSAGES_FROM(CEPH_ENTITY_TYPE_CLIENT);
// fall-thru
case CEPH_MSG_CLIENT_REQUEST:
case CEPH_MSG_CLIENT_REPLY:
server->dispatch(m);
break;
case MSG_MDS_PEER_REQUEST:
Expand Down
25 changes: 0 additions & 25 deletions src/mds/MDSRank.h
Expand Up @@ -150,29 +150,6 @@ class Finisher;
class ScrubStack;
class C_ExecAndReply;

struct MDSMetaRequest {
private:
int _op;
CDentry *_dentry;
ceph_tid_t _tid;
public:
explicit MDSMetaRequest(int op, CDentry *dn, ceph_tid_t tid) :
_op(op), _dentry(dn), _tid(tid) {
if (_dentry) {
_dentry->get(CDentry::PIN_PURGING);
}
}
~MDSMetaRequest() {
if (_dentry) {
_dentry->put(CDentry::PIN_PURGING);
}
}

CDentry *get_dentry() { return _dentry; }
int get_op() { return _op; }
ceph_tid_t get_tid() { return _tid; }
};

/**
* The public part of this class's interface is what's exposed to all
* the various subsystems (server, mdcache, etc), such as pointers
Expand Down Expand Up @@ -439,8 +416,6 @@ class MDSRank {
PerfCounters *logger = nullptr, *mlogger = nullptr;
OpTracker op_tracker;

std::map<ceph_tid_t, MDSMetaRequest> internal_client_requests;

// The last different state I held before current
MDSMap::DaemonState last_state = MDSMap::STATE_BOOT;
// The state assigned to me by the MDSMap
Expand Down