Skip to content

Commit

Permalink
Merge PR #30972 into master
Browse files Browse the repository at this point in the history
* refs/pull/30972/head:
	mds: move 'traverse to auth' logic into MDCache::path_traverse
	mds: cleanup Capability initialization
	mds: cleanup code that kills session requests
	mds: cleanup Server::try_open_auth_dirfrag()
	mds: restrict path that walks into snapdir
	mds: use single map to track pinned & auth_pinned objects
	mds: define operator<(...) for MutationImpl::LockOp
	mds: cleanup Locker::acquire_locks()
	mds: introduce Locker::wrlock_try()
	mds: explictly specify if MDCache::path_traverse() needs to check null dentry
	mds: cleanup dentry non-readable check in MDCache::path_traverse()
	mds: change MDCache::path_traverse()'s 'onfail' argument to flags

Reviewed-by: Patrick Donnelly <pdonnell@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
  • Loading branch information
batrick committed Nov 25, 2019
2 parents 2c06beb + fdaacd7 commit d7821ce
Show file tree
Hide file tree
Showing 16 changed files with 408 additions and 451 deletions.
5 changes: 5 additions & 0 deletions src/include/filepath.h
Expand Up @@ -230,6 +230,11 @@ class filepath {

return false;
}

bool is_last_snap() const {
// walk into snapdir?
return depth() > 0 && bits[0].length() == 0;
}
};

WRITE_CLASS_ENCODER(filepath)
Expand Down
10 changes: 3 additions & 7 deletions src/mds/Capability.cc
Expand Up @@ -146,15 +146,9 @@ void Capability::revoke_info::generate_test_instances(std::list<Capability::revo
* Capability
*/
Capability::Capability(CInode *i, Session *s, uint64_t id) :
client_follows(0),
client_xattr_version(0), client_inline_version(0),
last_rbytes(0), last_rsize(0),
item_session_caps(this), item_snaprealm_caps(this),
item_revoking_caps(this), item_client_revoking_caps(this),
inode(i), session(s),
cap_id(id), _wanted(0), num_revoke_warnings(0),
_pending(0), _issued(0), last_sent(0), last_issue(0), mseq(0),
suppress(0), state(0)
inode(i), session(s), cap_id(id)
{
if (session) {
session->touch_cap_bottom(this);
Expand All @@ -171,6 +165,8 @@ Capability::Capability(CInode *i, Session *s, uint64_t id) :
if (!conn->has_feature(CEPH_FEATURE_MDS_QUOTA))
state |= STATE_NOQUOTA;
}
} else {
cap_gen = 0;
}
}

Expand Down
26 changes: 13 additions & 13 deletions src/mds/Capability.h
Expand Up @@ -350,11 +350,11 @@ class Capability : public Counter<Capability> {
void dump(Formatter *f) const;
static void generate_test_instances(std::list<Capability*>& ls);

snapid_t client_follows;
version_t client_xattr_version;
version_t client_inline_version;
int64_t last_rbytes;
int64_t last_rsize;
snapid_t client_follows = 0;
version_t client_xattr_version = 0;
version_t client_inline_version = 0;
int64_t last_rbytes = 0;
int64_t last_rsize = 0;

xlist<Capability*>::item item_session_caps;
xlist<Capability*>::item item_snaprealm_caps;
Expand All @@ -380,24 +380,24 @@ class Capability : public Counter<Capability> {
uint64_t cap_id;
uint32_t cap_gen;

__u32 _wanted; // what the client wants (ideally)
__u32 _wanted = 0; // what the client wants (ideally)

utime_t last_issue_stamp;
utime_t last_revoke_stamp;
unsigned num_revoke_warnings;
unsigned num_revoke_warnings = 0;

// track in-flight caps --------------
// - add new caps to _pending
// - track revocations in _revokes list
__u32 _pending, _issued;
__u32 _pending = 0, _issued = 0;
mempool::mds_co::list<revoke_info> _revokes;

ceph_seq_t last_sent;
ceph_seq_t last_issue;
ceph_seq_t mseq;
ceph_seq_t last_sent = 0;
ceph_seq_t last_issue = 0;
ceph_seq_t mseq = 0;

int suppress;
unsigned state;
int suppress = 0;
unsigned state = 0;
};

WRITE_CLASS_ENCODER(Capability::Export)
Expand Down
128 changes: 77 additions & 51 deletions src/mds/Locker.cc
Expand Up @@ -237,7 +237,7 @@ bool Locker::acquire_locks(MDRequestRef& mdr,
} else {
// if the lock is the latest locked one, it's possible that slave mds got the lock
// while there are recovering mds.
if (!mdr->locks.count(lock) || lock == *mdr->locks.rbegin())
if (!mdr->locks.count(lock) || lock == mdr->locks.rbegin()->lock)
wait = true;
}
if (wait) {
Expand Down Expand Up @@ -371,7 +371,7 @@ bool Locker::acquire_locks(MDRequestRef& mdr,
dout(10) << " can't auth_pin (freezing?), waiting to authpin " << *object << dendl;
object->add_waiter(MDSCacheObject::WAIT_UNFREEZE, new C_MDS_RetryRequest(mdcache, mdr));

if (!mdr->remote_auth_pins.empty())
if (mdr->is_any_remote_auth_pin())
notify_freeze_waiter(object);

return false;
Expand All @@ -392,10 +392,12 @@ bool Locker::acquire_locks(MDRequestRef& mdr,
// request remote auth_pins
if (!mustpin_remote.empty()) {
marker.message = "requesting remote authpins";
for (const auto& p : mdr->remote_auth_pins) {
for (const auto& p : mdr->object_states) {
if (p.second.remote_auth_pinned == MDS_RANK_NONE)
continue;
if (mustpin.count(p.first)) {
ceph_assert(p.second == p.first->authority().first);
map<mds_rank_t, set<MDSCacheObject*> >::iterator q = mustpin_remote.find(p.second);
ceph_assert(p.second.remote_auth_pinned == p.first->authority().first);
auto q = mustpin_remote.find(p.second.remote_auth_pinned);
if (q != mustpin_remote.end())
q->second.insert(p.first);
}
Expand Down Expand Up @@ -446,24 +448,26 @@ bool Locker::acquire_locks(MDRequestRef& mdr,
// make sure they match currently acquired locks.
auto existing = mdr->locks.begin();
for (const auto& p : lov) {
auto lock = p.lock;

bool need_wrlock = p.is_wrlock();
bool need_remote_wrlock = p.is_remote_wrlock();

// already locked?
if (existing != mdr->locks.end() && existing->lock == p.lock) {
if (existing != mdr->locks.end() && existing->lock == lock) {
// right kind?
auto it = existing++;
auto have = *it; // don't reference

if (have.is_xlock() && p.is_xlock()) {
dout(10) << " already xlocked " << *have.lock << " " << *have.lock->get_parent() << dendl;
dout(10) << " already xlocked " << *lock << " " << *lock->get_parent() << dendl;
continue;
}

if (have.is_remote_wrlock() &&
(!need_remote_wrlock || have.wrlock_target != p.wrlock_target)) {
dout(10) << " unlocking remote_wrlock on wrong mds." << have.wrlock_target
<< " " << *have.lock << " " << *have.lock->get_parent() << dendl;
<< " " << *lock << " " << *lock->get_parent() << dendl;
remote_wrlock_finish(it, mdr.get());
have.clear_remote_wrlock();
}
Expand All @@ -472,24 +476,24 @@ bool Locker::acquire_locks(MDRequestRef& mdr,
if (need_wrlock == have.is_wrlock() &&
need_remote_wrlock == have.is_remote_wrlock()) {
if (need_wrlock)
dout(10) << " already wrlocked " << *have.lock << " " << *have.lock->get_parent() << dendl;
dout(10) << " already wrlocked " << *lock << " " << *lock->get_parent() << dendl;
if (need_remote_wrlock)
dout(10) << " already remote_wrlocked " << *have.lock << " " << *have.lock->get_parent() << dendl;
dout(10) << " already remote_wrlocked " << *lock << " " << *lock->get_parent() << dendl;
continue;
}

if (have.is_wrlock()) {
if (!need_wrlock)
dout(10) << " unlocking extra " << *have.lock << " " << *have.lock->get_parent() << dendl;
dout(10) << " unlocking extra " << *lock << " " << *lock->get_parent() << dendl;
else if (need_remote_wrlock) // acquire remote_wrlock first
dout(10) << " unlocking out-of-order " << *have.lock << " " << *have.lock->get_parent() << dendl;
dout(10) << " unlocking out-of-order " << *lock << " " << *lock->get_parent() << dendl;
bool need_issue = false;
wrlock_finish(it, mdr.get(), &need_issue);
if (need_issue)
issue_set.insert(static_cast<CInode*>(have.lock->get_parent()));
issue_set.insert(static_cast<CInode*>(lock->get_parent()));
}
} else if (have.is_rdlock() && p.is_rdlock()) {
dout(10) << " already rdlocked " << *have.lock << " " << *have.lock->get_parent() << dendl;
dout(10) << " already rdlocked " << *lock << " " << *lock->get_parent() << dendl;
continue;
}
}
Expand All @@ -516,61 +520,65 @@ bool Locker::acquire_locks(MDRequestRef& mdr,
}

// lock
if (mdr->locking && p.lock != mdr->locking) {
if (mdr->locking && lock != mdr->locking) {
cancel_locking(mdr.get(), &issue_set);
}
if (p.is_xlock()) {
marker.message = "failed to xlock, waiting";
if (!xlock_start(p.lock, mdr))
if (!xlock_start(lock, mdr)) {
marker.message = "failed to xlock, waiting";
goto out;
dout(10) << " got xlock on " << *p.lock << " " << *p.lock->get_parent() << dendl;
}
dout(10) << " got xlock on " << *lock << " " << *lock->get_parent() << dendl;
} else if (need_wrlock || need_remote_wrlock) {
if (need_remote_wrlock && !mdr->is_remote_wrlocked(p)) {
if (need_remote_wrlock && !mdr->is_remote_wrlocked(lock)) {
marker.message = "waiting for remote wrlocks";
remote_wrlock_start(p, p.wrlock_target, mdr);
remote_wrlock_start(lock, p.wrlock_target, mdr);
goto out;
}
if (need_wrlock) {
marker.message = "failed to wrlock, waiting";
client_t _client = p.is_state_pin() ? p.lock->get_excl_client() : client;
if (need_remote_wrlock && !p.lock->can_wrlock(_client)) {
client_t _client = p.is_state_pin() ? lock->get_excl_client() : client;
if (need_remote_wrlock && !lock->can_wrlock(_client)) {
marker.message = "failed to wrlock, dropping remote wrlock and waiting";
// can't take the wrlock because the scatter lock is gathering. need to
// release the remote wrlock, so that the gathering process can finish.
auto it = mdr->locks.end();
++it;
remote_wrlock_finish(it, mdr.get());
remote_wrlock_start(p, p.wrlock_target, mdr);
remote_wrlock_start(lock, p.wrlock_target, mdr);
goto out;
}
// nowait if we have already gotten remote wrlock
if (!wrlock_start(p, mdr, need_remote_wrlock))
if (!wrlock_start(lock, mdr)) {
ceph_assert(!need_remote_wrlock);
marker.message = "failed to wrlock, waiting";
goto out;
dout(10) << " got wrlock on " << *p.lock << " " << *p.lock->get_parent() << dendl;
}
dout(10) << " got wrlock on " << *lock << " " << *lock->get_parent() << dendl;
}
} else {
ceph_assert(mdr->is_master());
if (p.lock->needs_recover()) {
if (lock->needs_recover()) {
if (mds->is_cluster_degraded()) {
if (!mdr->is_queued_for_replay()) {
// see comments in SimpleLock::set_state_rejoin() and
// ScatterLock::encode_state_for_rejoin()
drop_locks(mdr.get());
mds->wait_for_cluster_recovered(new C_MDS_RetryRequest(mdcache, mdr));
dout(10) << " rejoin recovering " << *p.lock << " " << *p.lock->get_parent()
dout(10) << " rejoin recovering " << *lock << " " << *lock->get_parent()
<< ", waiting for cluster recovered" << dendl;
marker.message = "rejoin recovering lock, waiting for cluster recovered";
return false;
}
} else {
p.lock->clear_need_recover();
lock->clear_need_recover();
}
}

marker.message = "failed to rdlock, waiting";
if (!rdlock_start(p, mdr))
if (!rdlock_start(lock, mdr)) {
marker.message = "failed to rdlock, waiting";
goto out;
dout(10) << " got rdlock on " << *p.lock << " " << *p.lock->get_parent() << dendl;
}
dout(10) << " got rdlock on " << *lock << " " << *lock->get_parent() << dendl;
}
}

Expand Down Expand Up @@ -1398,9 +1406,10 @@ bool Locker::can_rdlock_set(MutationImpl::LockOpVec& lov)
{
dout(10) << "can_rdlock_set " << dendl;
for (const auto& p : lov) {
auto lock = p.lock;
ceph_assert(p.is_rdlock());
if (!p.lock->can_rdlock(-1)) {
dout(10) << "can_rdlock_set can't rdlock " << *p << " on " << *p.lock->get_parent() << dendl;
if (!lock->can_rdlock(-1)) {
dout(10) << "can_rdlock_set can't rdlock " << *lock << " on " << *lock->get_parent() << dendl;
return false;
}
}
Expand Down Expand Up @@ -1433,7 +1442,36 @@ void Locker::wrlock_force(SimpleLock *lock, MutationRef& mut)
mut->locks.emplace(lock, MutationImpl::LockOp::WRLOCK);
}

bool Locker::wrlock_start(const MutationImpl::LockOp &op, MDRequestRef& mut, bool nowait)
bool Locker::wrlock_try(SimpleLock *lock, MutationRef& mut)
{
dout(10) << "wrlock_try " << *lock << " on " << *lock->get_parent() << dendl;

while (1) {
if (lock->can_wrlock(mut->get_client())) {
lock->get_wrlock();
mut->locks.emplace(lock, MutationImpl::LockOp::WRLOCK);
return true;
}
if (!lock->is_stable())
break;
CInode *in = static_cast<CInode *>(lock->get_parent());
if (!in->is_auth())
break;
// don't do nested lock state change if we have dirty scatterdata and
// may scatter_writebehind or start_scatter, because nowait==true implies
// that the caller already has a log entry open!
if (lock->is_dirty())
return false;
ScatterLock *slock = static_cast<ScatterLock*>(lock);
if (in->has_subtree_or_exporting_dirfrag() || slock->get_scatter_wanted())
scatter_mix(slock);
else
simple_lock(lock);
}
return false;
}

bool Locker::wrlock_start(const MutationImpl::LockOp &op, MDRequestRef& mut)
{
SimpleLock *lock = op.lock;
if (lock->get_type() == CEPH_LOCK_IVERSION ||
Expand All @@ -1444,7 +1482,7 @@ bool Locker::wrlock_start(const MutationImpl::LockOp &op, MDRequestRef& mut, boo

CInode *in = static_cast<CInode *>(lock->get_parent());
client_t client = op.is_state_pin() ? lock->get_excl_client() : mut->get_client();
bool want_scatter = !nowait && lock->get_parent()->is_auth() &&
bool want_scatter = lock->get_parent()->is_auth() &&
(in->has_subtree_or_exporting_dirfrag() ||
static_cast<ScatterLock*>(lock)->get_scatter_wanted());

Expand All @@ -1467,20 +1505,10 @@ bool Locker::wrlock_start(const MutationImpl::LockOp &op, MDRequestRef& mut, boo
break;

if (in->is_auth()) {
// don't do nested lock state change if we have dirty scatterdata and
// may scatter_writebehind or start_scatter, because nowait==true implies
// that the caller already has a log entry open!
if (nowait && lock->is_dirty())
return false;

if (want_scatter)
scatter_mix(static_cast<ScatterLock*>(lock));
else
simple_lock(lock);

if (nowait && !lock->can_wrlock(client))
return false;

} else {
// replica.
// auth should be auth_pinned (see acquire_locks wrlock weird mustpin case).
Expand All @@ -1495,11 +1523,9 @@ bool Locker::wrlock_start(const MutationImpl::LockOp &op, MDRequestRef& mut, boo
}
}

if (!nowait) {
dout(7) << "wrlock_start waiting on " << *lock << " on " << *lock->get_parent() << dendl;
lock->add_waiter(SimpleLock::WAIT_STABLE, new C_MDS_RetryRequest(mdcache, mut));
nudge_log(lock);
}
dout(7) << "wrlock_start waiting on " << *lock << " on " << *lock->get_parent() << dendl;
lock->add_waiter(SimpleLock::WAIT_STABLE, new C_MDS_RetryRequest(mdcache, mut));
nudge_log(lock);

return false;
}
Expand Down
3 changes: 2 additions & 1 deletion src/mds/Locker.h
Expand Up @@ -93,7 +93,8 @@ class Locker {
void rdlock_take_set(MutationImpl::LockOpVec& lov, MutationRef& mut);

void wrlock_force(SimpleLock *lock, MutationRef& mut);
bool wrlock_start(const MutationImpl::LockOp &op, MDRequestRef& mut, bool nowait=false);
bool wrlock_try(SimpleLock *lock, MutationRef& mut);
bool wrlock_start(const MutationImpl::LockOp &op, MDRequestRef& mut);
void wrlock_finish(const MutationImpl::lock_iterator& it, MutationImpl *mut, bool *pneed_issue);

void remote_wrlock_start(SimpleLock *lock, mds_rank_t target, MDRequestRef& mut);
Expand Down

0 comments on commit d7821ce

Please sign in to comment.