Skip to content

Commit

Permalink
mds: wait unlink to finish to avoid conflict when creating same dentries
Browse files Browse the repository at this point in the history
If the previous unlink request has been delayed due to some reasons,
and the new creating for the same dentry may fail or new open will
succeeds but new contents wrote to it will be lost.

The kernel client will make sure before the unlink getting the first
reply it won't send the followed create requests for the same dentry.
Here we need to make sure that before the first reply has been sent
out the dentry must be marked as unlinking.

Fixes: https://tracker.ceph.com/issues/55332
Signed-off-by: Xiubo Li <xiubli@redhat.com>
  • Loading branch information
lxbsz committed May 26, 2022
1 parent b76b6ea commit b640f94
Show file tree
Hide file tree
Showing 8 changed files with 238 additions and 29 deletions.
17 changes: 13 additions & 4 deletions src/mds/CDentry.h
Expand Up @@ -29,6 +29,7 @@
#include "BatchOp.h"
#include "MDSCacheObject.h"
#include "MDSContext.h"
#include "Mutation.h"
#include "SimpleLock.h"
#include "LocalLockC.h"
#include "ScrubHeader.h"
Expand Down Expand Up @@ -86,18 +87,24 @@ class CDentry : public MDSCacheObject, public LRUObject, public Counter<CDentry>
static const int STATE_EVALUATINGSTRAY = (1<<4);
static const int STATE_PURGINGPINNED = (1<<5);
static const int STATE_BOTTOMLRU = (1<<6);
static const int STATE_UNLINKING = (1<<7);
// stray dentry needs notification of releasing reference
static const int STATE_STRAY = STATE_NOTIFYREF;
static const int MASK_STATE_IMPORT_KEPT = STATE_BOTTOMLRU;

// -- pins --
static const int PIN_INODEPIN = 1; // linked inode is pinned
static const int PIN_FRAGMENTING = -2; // containing dir is refragmenting
static const int PIN_PURGING = 3;
static const int PIN_SCRUBPARENT = 4;
static const int PIN_INODEPIN = 1; // linked inode is pinned
static const int PIN_FRAGMENTING = -2; // containing dir is refragmenting
static const int PIN_PURGING = 3;
static const int PIN_SCRUBPARENT = 4;
static const int PIN_WAITUNLINKSTATE = 5;
static const int PIN_WAITUNLINKFINISH = 6;

static const unsigned EXPORT_NONCE = 1;

const static uint64_t WAIT_UNLINK_STATE = (1<<0);
const static uint64_t WAIT_UNLINK_FINISH = (1<<1);
uint32_t replica_unlinking_ref = 0;

CDentry(std::string_view n, __u32 h,
mempool::mds_co::string alternate_name,
Expand Down Expand Up @@ -136,6 +143,8 @@ class CDentry : public MDSCacheObject, public LRUObject, public Counter<CDentry>
case PIN_FRAGMENTING: return "fragmenting";
case PIN_PURGING: return "purging";
case PIN_SCRUBPARENT: return "scrubparent";
case PIN_WAITUNLINKSTATE: return "waitunlinkstate";
case PIN_WAITUNLINKFINISH: return "waitunlinkfinish";
default: return generic_pin_name(p);
}
}
Expand Down
85 changes: 77 additions & 8 deletions src/mds/MDCache.cc
Expand Up @@ -8228,6 +8228,10 @@ void MDCache::dispatch(const cref_t<Message> &m)
case MSG_MDS_DENTRYUNLINK:
handle_dentry_unlink(ref_cast<MDentryUnlink>(m));
break;
case MSG_MDS_DENTRYUNLINKACK:
handle_dentry_unlink_ack(ref_cast<MDentryUnlinkAck>(m));
break;


case MSG_MDS_FRAGMENTNOTIFY:
handle_fragment_notify(ref_cast<MMDSFragmentNotify>(m));
Expand Down Expand Up @@ -11183,7 +11187,8 @@ void MDCache::handle_dentry_link(const cref_t<MDentryLink> &m)

// UNLINK

void MDCache::send_dentry_unlink(CDentry *dn, CDentry *straydn, MDRequestRef& mdr)
int MDCache::send_dentry_unlink(CDentry *dn, CDentry *straydn,
MDRequestRef& mdr, bool unlinking)
{
dout(10) << __func__ << " " << *dn << dendl;
// share unlink news with replicas
Expand All @@ -11195,6 +11200,12 @@ void MDCache::send_dentry_unlink(CDentry *dn, CDentry *straydn, MDRequestRef& md
CInode *strayin = straydn->get_linkage()->get_inode();
strayin->encode_snap_blob(snapbl);
}

int ret = 0;
if (unlinking) {
ceph_assert(!straydn);
dn->replica_unlinking_ref = 0;
}
for (set<mds_rank_t>::iterator it = replicas.begin();
it != replicas.end();
++it) {
Expand All @@ -11207,37 +11218,66 @@ void MDCache::send_dentry_unlink(CDentry *dn, CDentry *straydn, MDRequestRef& md
rejoin_gather.count(*it)))
continue;

auto unlink = make_message<MDentryUnlink>(dn->get_dir()->dirfrag(), dn->get_name());
auto unlink = make_message<MDentryUnlink>(dn->get_dir()->dirfrag(),
dn->get_name(), unlinking);
if (straydn) {
encode_replica_stray(straydn, *it, unlink->straybl);
unlink->snapbl = snapbl;
}
mds->send_message_mds(unlink, *it);
if (unlinking) {
dn->replica_unlinking_ref++;
dn->get(CDentry::PIN_WAITUNLINKSTATE);
}
}

if (unlinking) {
ret = dn->replica_unlinking_ref;
if (ret)
dn->add_waiter(CDentry::WAIT_UNLINK_STATE, new C_MDS_RetryRequest(this, mdr));
}
return ret;
}

void MDCache::handle_dentry_unlink(const cref_t<MDentryUnlink> &m)
{
// straydn
CDentry *straydn = nullptr;
CInode *strayin = nullptr;

if (m->straybl.length())
decode_replica_stray(straydn, &strayin, m->straybl, mds_rank_t(m->get_source().num()));

boost::intrusive_ptr<MDentryUnlinkAck> ack;
CDentry::linkage_t *dnl;
CDentry *dn;
CInode *in;
bool hadrealm;

CDir *dir = get_dirfrag(m->get_dirfrag());
if (!dir) {
dout(7) << __func__ << " don't have dirfrag " << m->get_dirfrag() << dendl;
if (m->is_unlinking())
goto ack;
} else {
CDentry *dn = dir->lookup(m->get_dn());
dn = dir->lookup(m->get_dn());
if (!dn) {
dout(7) << __func__ << " don't have dentry " << *dir << " dn " << m->get_dn() << dendl;
if (m->is_unlinking())
goto ack;
} else {
dout(7) << __func__ << " on " << *dn << dendl;
CDentry::linkage_t *dnl = dn->get_linkage();

if (m->is_unlinking()) {
dn->state_set(CDentry::STATE_UNLINKING);
goto ack;
}

dnl = dn->get_linkage();

// open inode?
if (dnl->is_primary()) {
CInode *in = dnl->get_inode();
in = dnl->get_inode();
dn->dir->unlink_inode(dn);
ceph_assert(straydn);
straydn->dir->link_primary_inode(straydn, in);
Expand All @@ -11248,11 +11288,11 @@ void MDCache::handle_dentry_unlink(const cref_t<MDentryUnlink> &m)
in->first = straydn->first;

// update subtree map?
if (in->is_dir())
if (in->is_dir())
adjust_subtree_after_rename(in, dir, false);

if (m->snapbl.length()) {
bool hadrealm = (in->snaprealm ? true : false);
hadrealm = (in->snaprealm ? true : false);
in->decode_snap_blob(m->snapbl);
ceph_assert(in->snaprealm);
if (!hadrealm)
Expand All @@ -11263,14 +11303,15 @@ void MDCache::handle_dentry_unlink(const cref_t<MDentryUnlink> &m)
if (in->is_any_caps() &&
!in->state_test(CInode::STATE_EXPORTINGCAPS))
migrator->export_caps(in);

straydn = NULL;
} else {
ceph_assert(!straydn);
ceph_assert(dnl->is_remote());
dn->dir->unlink_inode(dn);
}
ceph_assert(dnl->is_null());
dn->state_clear(CDentry::STATE_UNLINKING);
}
}

Expand All @@ -11282,8 +11323,36 @@ void MDCache::handle_dentry_unlink(const cref_t<MDentryUnlink> &m)
trim_dentry(straydn, ex);
send_expire_messages(ex);
}
return;

ack:
ack = make_message<MDentryUnlinkAck>(m->get_dirfrag(), m->get_dn());
mds->send_message(ack, m->get_connection());
}

void MDCache::handle_dentry_unlink_ack(const cref_t<MDentryUnlinkAck> &m)
{
CDir *dir = get_dirfrag(m->get_dirfrag());
if (!dir) {
dout(7) << __func__ << " don't have dirfrag " << m->get_dirfrag() << dendl;
} else {
CDentry *dn = dir->lookup(m->get_dn());
if (!dn) {
dout(7) << __func__ << " don't have dentry " << *dir << " dn " << m->get_dn() << dendl;
} else {
dout(7) << __func__ << " on " << *dn << " ref "
<< dn->replica_unlinking_ref << " -> "
<< dn->replica_unlinking_ref - 1 << dendl;
dn->replica_unlinking_ref--;
if (!dn->replica_unlinking_ref) {
MDSContext::vec finished;
dn->take_waiting(CDentry::WAIT_UNLINK_STATE, finished);
mds->queue_waiters(finished);
}
dn->put(CDentry::PIN_WAITUNLINKSTATE);
}
}
}



Expand Down
3 changes: 2 additions & 1 deletion src/mds/MDCache.h
Expand Up @@ -883,7 +883,7 @@ class MDCache {
void encode_remote_dentry_link(CDentry::linkage_t *dnl, bufferlist& bl);
void decode_remote_dentry_link(CDir *dir, CDentry *dn, bufferlist::const_iterator& p);
void send_dentry_link(CDentry *dn, MDRequestRef& mdr);
void send_dentry_unlink(CDentry *dn, CDentry *straydn, MDRequestRef& mdr);
int send_dentry_unlink(CDentry *dn, CDentry *straydn, MDRequestRef& mdr, bool unlinking=false);

void wait_for_uncommitted_fragment(dirfrag_t dirfrag, MDSContext *c) {
uncommitted_fragments.at(dirfrag).waiters.push_back(c);
Expand Down Expand Up @@ -1126,6 +1126,7 @@ class MDCache {
void handle_discover_reply(const cref_t<MDiscoverReply> &m);
void handle_dentry_link(const cref_t<MDentryLink> &m);
void handle_dentry_unlink(const cref_t<MDentryUnlink> &m);
void handle_dentry_unlink_ack(const cref_t<MDentryUnlinkAck> &m);

int dump_cache(std::string_view fn, Formatter *f, double timeout);

Expand Down

0 comments on commit b640f94

Please sign in to comment.