Skip to content

Commit

Permalink
Merge branch 'wip-mds'
Browse files Browse the repository at this point in the history
Reviewed-by: Sage Weil <sage@inktank.com>
  • Loading branch information
Sage Weil committed Jan 18, 2013
2 parents d81ac84 + 2dc2b48 commit 5a384f4
Show file tree
Hide file tree
Showing 23 changed files with 454 additions and 200 deletions.
7 changes: 5 additions & 2 deletions src/include/ceph_fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -536,14 +536,17 @@ int ceph_flags_to_mode(int flags);
#define CEPH_CAP_GWREXTEND 64 /* (file) client can extend EOF */
#define CEPH_CAP_GLAZYIO 128 /* (file) client can perform lazy io */

#define CEPH_CAP_SIMPLE_BITS 2
#define CEPH_CAP_FILE_BITS 8

/* per-lock shift */
#define CEPH_CAP_SAUTH 2
#define CEPH_CAP_SLINK 4
#define CEPH_CAP_SXATTR 6
#define CEPH_CAP_SFILE 8
#define CEPH_CAP_SFLOCK 20
#define CEPH_CAP_SFLOCK 20

#define CEPH_CAP_BITS 22
#define CEPH_CAP_BITS 22

/* composed values */
#define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH)
Expand Down
61 changes: 59 additions & 2 deletions src/mds/AnchorServer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -142,19 +142,21 @@ void AnchorServer::_prepare(bufferlist &bl, uint64_t reqid, int bymds)
}
inc(ino);
pending_create[version] = ino; // so we can undo
pending_ops[ino].push_back(pair<version_t, Context*>(version, NULL));
break;


case TABLE_OP_DESTROY:
version++;
pending_destroy[version] = ino;
pending_ops[ino].push_back(pair<version_t, Context*>(version, NULL));
break;

case TABLE_OP_UPDATE:
::decode(trace, p);
version++;
pending_update[version].first = ino;
pending_update[version].second = trace;
pending_ops[ino].push_back(pair<version_t, Context*>(version, NULL));
break;

default:
Expand All @@ -163,8 +165,56 @@ void AnchorServer::_prepare(bufferlist &bl, uint64_t reqid, int bymds)
//dump();
}

void AnchorServer::_commit(version_t tid)
bool AnchorServer::check_pending(version_t tid, MMDSTableRequest *req, list<Context *>& finished)
{
inodeno_t ino;
if (pending_create.count(tid))
ino = pending_create[tid];
else if (pending_destroy.count(tid))
ino = pending_destroy[tid];
else if (pending_update.count(tid))
ino = pending_update[tid].first;
else
assert(0);

assert(pending_ops.count(ino));
list< pair<version_t, Context*> >& pending = pending_ops[ino];
list< pair<version_t, Context*> >::iterator p = pending.begin();
if (p->first == tid) {
assert(p->second == NULL);
} else {
while (p != pending.end()) {
if (p->first == tid)
break;
p++;
}
assert(p != pending.end());
assert(p->second == NULL);
// not the earliest pending operation, wait if it's a commit
if (req) {
p->second = new C_MDS_RetryMessage(mds, req);
return false;
}
}

pending.erase(p);
if (pending.empty()) {
pending_ops.erase(ino);
} else {
for (p = pending.begin(); p != pending.end() && p->second; p++) {
finished.push_back(p->second);
p->second = NULL;
}
}
return true;
}

bool AnchorServer::_commit(version_t tid, MMDSTableRequest *req)
{
list<Context *> finished;
if (!check_pending(tid, req, finished))
return false;

if (pending_create.count(tid)) {
dout(7) << "commit " << tid << " create " << pending_create[tid] << dendl;
pending_create.erase(tid);
Expand Down Expand Up @@ -206,10 +256,16 @@ void AnchorServer::_commit(version_t tid)
// bump version.
version++;
//dump();

mds->queue_waiters(finished);
return true;
}

void AnchorServer::_rollback(version_t tid)
{
list<Context *> finished;
check_pending(tid, NULL, finished);

if (pending_create.count(tid)) {
inodeno_t ino = pending_create[tid];
dout(7) << "rollback " << tid << " create " << ino << dendl;
Expand All @@ -234,6 +290,7 @@ void AnchorServer::_rollback(version_t tid)
// bump version.
version++;
//dump();
mds->queue_waiters(finished);
}

/* This function DOES put the passed message before returning */
Expand Down
13 changes: 12 additions & 1 deletion src/mds/AnchorServer.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class AnchorServer : public MDSTableServer {
map<version_t, inodeno_t> pending_create;
map<version_t, inodeno_t> pending_destroy;
map<version_t, pair<inodeno_t, vector<Anchor> > > pending_update;
map<inodeno_t, list<pair<version_t, Context*> > > pending_ops;

void reset_state();
void encode_server_state(bufferlist& bl) {
Expand All @@ -47,17 +48,27 @@ class AnchorServer : public MDSTableServer {
::decode(pending_create, p);
::decode(pending_destroy, p);
::decode(pending_update, p);

map<version_t, inodeno_t> sort;
sort.insert(pending_create.begin(), pending_create.end());
sort.insert(pending_destroy.begin(), pending_destroy.end());
for (map<version_t, pair<inodeno_t, vector<Anchor> > >::iterator p = pending_update.begin();
p != pending_update.end(); p++)
sort[p->first] = p->second.first;
for (map<version_t, inodeno_t>::iterator p = sort.begin(); p != sort.end(); p++)
pending_ops[p->second].push_back(pair<version_t, Context*>(p->first, NULL));
}

bool add(inodeno_t ino, inodeno_t dirino, __u32 dn_hash, bool replace);
void inc(inodeno_t ino, int ref=1);
void dec(inodeno_t ino, int ref=1);
bool check_pending(version_t tid, MMDSTableRequest *req, list<Context *>& finished);

void dump();

// server bits
void _prepare(bufferlist &bl, uint64_t reqid, int bymds);
void _commit(version_t tid);
bool _commit(version_t tid, MMDSTableRequest *req=NULL);
void _rollback(version_t tid);
void handle_query(MMDSTableRequest *m);
};
Expand Down
17 changes: 6 additions & 11 deletions src/mds/CDentry.cc
Original file line number Diff line number Diff line change
Expand Up @@ -439,23 +439,18 @@ void CDentry::decode_replica(bufferlist::iterator& p, bool is_new)

inodeno_t rino;
unsigned char rdtype;
__s32 ls;
::decode(rino, p);
::decode(rdtype, p);
if (rino) {
if (linkage.is_null())
dir->link_remote_inode(this, rino, rdtype);
else
assert(linkage.is_remote() && linkage.remote_ino == rino);
}

__s32 ls;
::decode(ls, p);
if (is_new)

if (is_new) {
if (rino)
dir->link_remote_inode(this, rino, rdtype);
lock.set_state(ls);
}
}



// ----------------------------
// locking

Expand Down
3 changes: 3 additions & 0 deletions src/mds/CDentry.h
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,9 @@ class CDentry : public MDSCacheObject, public LRUObject {

// -- replication
void encode_replica(int mds, bufferlist& bl) {
if (!is_replicated())
lock.replicate_relax();

__u32 nonce = add_replica(mds);
::encode(nonce, bl);
::encode(first, bl);
Expand Down
20 changes: 16 additions & 4 deletions src/mds/CDir.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1022,6 +1022,9 @@ void CDir::assimilate_dirty_rstat_inodes()
for (elist<CInode*>::iterator p = dirty_rstat_inodes.begin_use_current();
!p.end(); ++p) {
CInode *in = *p;
if (in->is_frozen())
continue;

inode_t *pi = in->project_inode();
pi->version = in->pre_dirty();

Expand All @@ -1040,16 +1043,22 @@ void CDir::assimilate_dirty_rstat_inodes_finish(Mutation *mut, EMetaBlob *blob)
elist<CInode*>::iterator p = dirty_rstat_inodes.begin_use_current();
while (!p.end()) {
CInode *in = *p;
CDentry *dn = in->get_projected_parent_dn();
++p;

if (in->is_frozen())
continue;

CDentry *dn = in->get_projected_parent_dn();

mut->auth_pin(in);
mut->add_projected_inode(in);

in->clear_dirty_rstat();
blob->add_primary_dentry(dn, true, in);
}
assert(dirty_rstat_inodes.empty());

if (!dirty_rstat_inodes.empty())
inode->mdcache->mds->locker->mark_updated_scatterlock(&inode->nestlock);
}


Expand Down Expand Up @@ -1342,8 +1351,11 @@ void CDir::fetch(Context *c, const string& want_dn, bool ignore_authpinnability)
assert(!is_complete());

if (!can_auth_pin() && !ignore_authpinnability) {
dout(7) << "fetch waiting for authpinnable" << dendl;
add_waiter(WAIT_UNFREEZE, c);
if (c) {
dout(7) << "fetch waiting for authpinnable" << dendl;
add_waiter(WAIT_UNFREEZE, c);
} else
dout(7) << "fetch not authpinnable and no context" << dendl;
return;
}

Expand Down
29 changes: 24 additions & 5 deletions src/mds/CInode.cc
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,10 @@ void CInode::print(ostream& out)
out << *this;
}


bool CInode::is_in_stray()
{
return !is_base() && get_projected_parent_dir()->inode->is_stray();
}

void CInode::add_need_snapflush(CInode *snapin, snapid_t snapid, client_t client)
{
Expand Down Expand Up @@ -1932,6 +1935,20 @@ void CInode::unfreeze_auth_pin()
}
}

void CInode::clear_ambiguous_auth(list<Context*>& finished)
{
assert(state_test(CInode::STATE_AMBIGUOUSAUTH));
state_clear(CInode::STATE_AMBIGUOUSAUTH);
take_waiting(CInode::WAIT_SINGLEAUTH, finished);
}

void CInode::clear_ambiguous_auth()
{
list<Context*> finished;
clear_ambiguous_auth(finished);
mdcache->mds->queue_waiters(finished);
}

// auth_pins
bool CInode::can_auth_pin() {
if (is_freezing_inode() || is_frozen_inode() || is_frozen_auth_pin())
Expand Down Expand Up @@ -2587,17 +2604,19 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session,
{
int client = session->inst.name.num();
assert(snapid);

assert(session->connection);

bool valid = true;

// do not issue caps if inode differs from readdir snaprealm
SnapRealm *realm = find_snaprealm();
bool no_caps = (realm && dir_realm && realm != dir_realm);
bool no_caps = (realm && dir_realm && realm != dir_realm) ||
is_frozen() || state_test(CInode::STATE_EXPORTINGCAPS);
if (no_caps)
dout(20) << "encode_inodestat realm=" << realm << " snaprealm " << snaprealm
<< " no_caps=" << no_caps << dendl;
dout(20) << "encode_inodestat no caps"
<< ((realm && dir_realm && realm != dir_realm)?", snaprealm differs ":"")
<< (state_test(CInode::STATE_EXPORTINGCAPS)?", exporting caps":"")
<< (is_frozen()?", frozen inode":"") << dendl;

// pick a version!
inode_t *oi = &inode;
Expand Down
8 changes: 7 additions & 1 deletion src/mds/CInode.h
Original file line number Diff line number Diff line change
Expand Up @@ -520,12 +520,18 @@ class CInode : public MDSCacheObject {

bool is_head() { return last == CEPH_NOSNAP; }

bool is_in_stray();

// note: this overloads MDSCacheObject
bool is_ambiguous_auth() {
return state_test(STATE_AMBIGUOUSAUTH) ||
MDSCacheObject::is_ambiguous_auth();
}

void set_ambiguous_auth() {
state_set(STATE_AMBIGUOUSAUTH);
}
void clear_ambiguous_auth(list<Context*>& finished);
void clear_ambiguous_auth();

inodeno_t ino() const { return inode.ino; }
vinodeno_t vino() const { return vinodeno_t(inode.ino, last); }
Expand Down

0 comments on commit 5a384f4

Please sign in to comment.