Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mds: misc multimds fixes #13227

Merged
merged 22 commits into from Feb 26, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
a1499bc
mds: stop purging strays when mds is being shutdown
ukernel Feb 2, 2017
20d4337
mds: drop superfluous MMDSOpenInoReply
ukernel Feb 2, 2017
086da0c
mds: avoid journal unnessary dirfrags in ESubtreeMap
ukernel Feb 3, 2017
b8ceaef
mds: set STATE_AUTH in MDSCacheObject::deocde_import
ukernel Feb 6, 2017
9c16f83
mds: avoid zero replica_nonce
ukernel Feb 6, 2017
7eed426
mds: tracking committing and rolling back slave requests
ukernel Feb 6, 2017
851003b
mds: log master commit after all slave commits get journaled
ukernel Feb 6, 2017
ed69a32
mds: cleanup ambiguous slave update when master mds fails
ukernel Feb 6, 2017
84a661b
mds: kill export finish waiters
ukernel Feb 7, 2017
2480b8d
mds: wait acknowledgment for export abort notification
ukernel Feb 7, 2017
7fc4bd1
mds: disambiguate other mds' imports when cluster enters rejoin state
ukernel Feb 7, 2017
b7b1d99
mds: properly set ambiguous auth on auth mds of rename source inode
ukernel Feb 8, 2017
246cead
mds: handle race of freezing auth pin
ukernel Feb 8, 2017
f25f054
mds: note subtree bounds when rolling back rename
ukernel Feb 9, 2017
631d638
mds: cleanup CInode::encode_inodestat()
ukernel Feb 10, 2017
3553282
mds: issue new caps to client even when session is stale
ukernel Feb 10, 2017
7fb8e01
mds: fix deadlock when wrlock and remote_wrlock the same lock
ukernel Feb 13, 2017
4f15ad0
mds: properly update replica inode's ctime
ukernel Feb 14, 2017
5d91ca3
mds: avoid race between cache expire and MDentryLink
ukernel Feb 15, 2017
fee353b
mds: don't call kick_discovers() for recovering mds twice
ukernel Feb 17, 2017
5f21e1f
mds: drop MDiscover/MMDSOpenIno messages if mds state < REJOIN
ukernel Feb 17, 2017
9eea839
mds: properly set default dir_hash for directory inodes
ukernel Feb 20, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions src/mds/CDentry.h
Expand Up @@ -315,6 +315,7 @@ class CDentry : public MDSCacheObject, public LRUObject {
_mark_dirty(ls);
if (!replica_map.empty())
get(PIN_REPLICATED);
replica_nonce = 0;
}

// -- locking --
Expand Down
3 changes: 2 additions & 1 deletion src/mds/CDir.cc
Expand Up @@ -2363,7 +2363,8 @@ void CDir::decode_import(bufferlist::iterator& blp, utime_t now, LogSegment *ls)
unsigned s;
::decode(s, blp);
state &= MASK_STATE_IMPORT_KEPT;
state |= (s & MASK_STATE_EXPORTED);
state_set(STATE_AUTH | (s & MASK_STATE_EXPORTED));

if (is_dirty()) {
get(PIN_DIRTY);
_mark_dirty(ls);
Expand Down
69 changes: 49 additions & 20 deletions src/mds/CInode.cc
Expand Up @@ -1421,6 +1421,7 @@ void CInode::encode_lock_state(int type, bufferlist& bl)
case CEPH_LOCK_IFILE:
if (is_auth()) {
::encode(inode.version, bl);
::encode(inode.ctime, bl);
::encode(inode.mtime, bl);
::encode(inode.atime, bl);
::encode(inode.time_warp_seq, bl);
Expand Down Expand Up @@ -1504,11 +1505,13 @@ void CInode::encode_lock_state(int type, bufferlist& bl)

case CEPH_LOCK_IXATTR:
::encode(inode.version, bl);
::encode(inode.ctime, bl);
::encode(xattrs, bl);
break;

case CEPH_LOCK_ISNAP:
::encode(inode.version, bl);
::encode(inode.ctime, bl);
encode_snap(bl);
break;

Expand All @@ -1520,6 +1523,7 @@ void CInode::encode_lock_state(int type, bufferlist& bl)
case CEPH_LOCK_IPOLICY:
if (inode.is_dir()) {
::encode(inode.version, bl);
::encode(inode.ctime, bl);
::encode(inode.layout, bl, mdcache->mds->mdsmap->get_up_features());
::encode(inode.quota, bl);
}
Expand Down Expand Up @@ -1617,6 +1621,8 @@ void CInode::decode_lock_state(int type, bufferlist& bl)
case CEPH_LOCK_IFILE:
if (!is_auth()) {
::decode(inode.version, p);
::decode(tm, p);
if (inode.ctime < tm) inode.ctime = tm;
::decode(inode.mtime, p);
::decode(inode.atime, p);
::decode(inode.time_warp_seq, p);
Expand Down Expand Up @@ -1751,12 +1757,16 @@ void CInode::decode_lock_state(int type, bufferlist& bl)

case CEPH_LOCK_IXATTR:
::decode(inode.version, p);
::decode(tm, p);
if (inode.ctime < tm) inode.ctime = tm;
::decode(xattrs, p);
break;

case CEPH_LOCK_ISNAP:
{
::decode(inode.version, p);
::decode(tm, p);
if (inode.ctime < tm) inode.ctime = tm;
snapid_t seq = 0;
if (snaprealm)
seq = snaprealm->srnode.seq;
Expand All @@ -1774,6 +1784,8 @@ void CInode::decode_lock_state(int type, bufferlist& bl)
case CEPH_LOCK_IPOLICY:
if (inode.is_dir()) {
::decode(inode.version, p);
::decode(tm, p);
if (inode.ctime < tm) inode.ctime = tm;
::decode(inode.layout, p);
::decode(inode.quota, p);
}
Expand Down Expand Up @@ -3052,18 +3064,6 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session,

bool valid = true;

// do not issue caps if inode differs from readdir snaprealm
SnapRealm *realm = find_snaprealm();
bool no_caps = session->is_stale() ||
(realm && dir_realm && realm != dir_realm) ||
is_frozen() || state_test(CInode::STATE_EXPORTINGCAPS);
if (no_caps)
dout(20) << "encode_inodestat no caps"
<< (session->is_stale()?", session stale ":"")
<< ((realm && dir_realm && realm != dir_realm)?", snaprealm differs ":"")
<< (state_test(CInode::STATE_EXPORTINGCAPS)?", exporting caps":"")
<< (is_frozen()?", frozen inode":"") << dendl;

// pick a version!
inode_t *oi = &inode;
inode_t *pi = get_projected_inode();
Expand Down Expand Up @@ -3102,6 +3102,23 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session,
<< " not match snapid " << snapid << dendl;
}
}

SnapRealm *realm = find_snaprealm();

bool no_caps = !valid ||
session->is_stale() ||
(dir_realm && realm != dir_realm) ||
is_frozen() ||
state_test(CInode::STATE_EXPORTINGCAPS);
if (no_caps)
dout(20) << "encode_inodestat no caps"
<< (!valid?", !valid":"")
<< (session->is_stale()?", session stale ":"")
<< ((dir_realm && realm != dir_realm)?", snaprealm differs ":"")
<< (is_frozen()?", frozen inode":"")
<< (state_test(CInode::STATE_EXPORTINGCAPS)?", exporting caps":"")
<< dendl;


// "fake" a version that is old (stable) version, +1 if projected.
version_t version = (oi->version * 2) + is_projected();
Expand Down Expand Up @@ -3224,7 +3241,7 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session,
ecap.mseq = 0;
ecap.realm = 0;
} else {
if (!no_caps && valid && !cap) {
if (!no_caps && !cap) {
// add a new cap
cap = add_client_cap(client, session, realm);
if (is_auth()) {
Expand All @@ -3235,26 +3252,36 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session,
}
}

if (!no_caps && valid && cap) {
int issue = 0;
if (!no_caps && cap) {
int likes = get_caps_liked();
int allowed = get_caps_allowed_for_client(session, file_i);
int issue = (cap->wanted() | likes) & allowed;
issue = (cap->wanted() | likes) & allowed;
cap->issue_norevoke(issue);
issue = cap->pending();
dout(10) << "encode_inodestat issuing " << ccap_string(issue)
<< " seq " << cap->get_last_seq() << dendl;
} else if (cap && cap->is_new() && !dir_realm) {
// alway issue new caps to client, otherwise the caps get lost
assert(cap->is_stale());
issue = cap->pending() | CEPH_CAP_PIN;
cap->issue_norevoke(issue);
dout(10) << "encode_inodestat issuing " << ccap_string(issue)
<< " seq " << cap->get_last_seq()
<< "(stale|new caps)" << dendl;
}

if (issue) {
cap->set_last_issue();
cap->set_last_issue_stamp(ceph_clock_now());
cap->clear_new();
ecap.caps = issue;
ecap.wanted = cap->wanted();
ecap.cap_id = cap->get_cap_id();
ecap.seq = cap->get_last_seq();
dout(10) << "encode_inodestat issuing " << ccap_string(issue)
<< " seq " << cap->get_last_seq() << dendl;
ecap.mseq = cap->get_mseq();
ecap.realm = realm->inode->ino();
} else {
if (cap)
cap->clear_new();
ecap.cap_id = 0;
ecap.caps = 0;
ecap.seq = 0;
Expand Down Expand Up @@ -3595,7 +3622,8 @@ void CInode::decode_import(bufferlist::iterator& p,

unsigned s;
::decode(s, p);
state |= (s & MASK_STATE_EXPORTED);
state_set(STATE_AUTH | (s & MASK_STATE_EXPORTED));

if (is_dirty()) {
get(PIN_DIRTY);
_mark_dirty(ls);
Expand All @@ -3610,6 +3638,7 @@ void CInode::decode_import(bufferlist::iterator& p,
::decode(replica_map, p);
if (!replica_map.empty())
get(PIN_REPLICATED);
replica_nonce = 0;

// decode fragstat info on bounding cdirs
bufferlist bounding;
Expand Down
4 changes: 4 additions & 0 deletions src/mds/Capability.h
Expand Up @@ -107,6 +107,7 @@ class Capability {

const static unsigned STATE_STALE = (1<<0);
const static unsigned STATE_NEW = (1<<1);
const static unsigned STATE_IMPORTING = (1<<2);


Capability(CInode *i = NULL, uint64_t id = 0, client_t c = 0) :
Expand Down Expand Up @@ -253,6 +254,9 @@ class Capability {
bool is_new() { return state & STATE_NEW; }
void mark_new() { state |= STATE_NEW; }
void clear_new() { state &= ~STATE_NEW; }
bool is_importing() { return state & STATE_IMPORTING; }
void mark_importing() { state |= STATE_IMPORTING; }
void clear_importing() { state &= ~STATE_IMPORTING; }

CInode *get_inode() { return inode; }
client_t get_client() const { return client; }
Expand Down
15 changes: 12 additions & 3 deletions src/mds/Locker.cc
Expand Up @@ -330,8 +330,16 @@ bool Locker::acquire_locks(MDRequestRef& mdr,

dout(10) << " must authpin " << *object << dendl;

if (mdr->is_auth_pinned(object))
continue;
if (mdr->is_auth_pinned(object)) {
if (object != (MDSCacheObject*)auth_pin_freeze)
continue;
if (mdr->more()->is_remote_frozen_authpin) {
if (mdr->more()->rename_inode == auth_pin_freeze)
continue;
// unfreeze auth pin for the wrong inode
mustpin_remote[mdr->more()->rename_inode->authority().first].size();
}
}

if (!object->is_auth()) {
if (!mdr->locks.empty())
Expand Down Expand Up @@ -531,7 +539,8 @@ bool Locker::acquire_locks(MDRequestRef& mdr,
remote_wrlock_start(*p, (*remote_wrlocks)[*p], mdr);
goto out;
}
if (!wrlock_start(*p, mdr))
// nowait if we have already gotten remote wrlock
if (!wrlock_start(*p, mdr, need_remote_wrlock))
goto out;
dout(10) << " got wrlock on " << **p << " " << *(*p)->get_parent() << dendl;
}
Expand Down