Skip to content

Commit

Permalink
mds: restructure mds map, clean up mdsmon
Browse files Browse the repository at this point in the history
More flexible mdsmap structure.  Cleaner interface.
  • Loading branch information
liewegas committed Dec 17, 2008
1 parent 6fb760b commit 82b0bc3
Show file tree
Hide file tree
Showing 10 changed files with 472 additions and 619 deletions.
6 changes: 3 additions & 3 deletions src/client/Client.cc
Expand Up @@ -734,7 +734,7 @@ int Client::choose_target_mds(MClientRequest *req)
// pick mds
if (!diri || g_conf.client_use_random_mds) {
// no root info, pick a random MDS
mds = mdsmap->get_random_in_mds();
mds = mdsmap->get_random_up_mds();
dout(10) << "random mds" << mds << dendl;
if (mds < 0) mds = 0;

Expand Down Expand Up @@ -829,7 +829,7 @@ MClientReply *Client::make_request(MClientRequest *req,
if (mds >= 0) {
dout(10) << "chose target mds" << mds << " based on hierarchy" << dendl;
} else {
mds = mdsmap->get_random_in_mds();
mds = mdsmap->get_random_up_mds();
if (mds < 0) mds = 0; // hrm.
dout(10) << "chose random target mds" << mds << " for lack of anything better" << dendl;
}
Expand All @@ -849,7 +849,7 @@ MClientReply *Client::make_request(MClientRequest *req,

if (!mdsmap->is_active(mds)) {
dout(10) << "hmm, still have no address for mds" << mds << ", trying a random mds" << dendl;
request.resend_mds = mdsmap->get_random_in_mds();
request.resend_mds = mdsmap->get_random_up_mds();
continue;
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/client/Client.h
Expand Up @@ -409,7 +409,7 @@ class Inode {
if (dir_replicated) {// || ino() == 1) {
// pick a random mds that isn't the auth
set<int> s;
mdsmap->get_in_mds_set(s);
mdsmap->get_mds_set(s);
set<int>::iterator it = s.begin();
if (s.empty())
return 0;
Expand Down
49 changes: 23 additions & 26 deletions src/kernel/mdsmap.c
Expand Up @@ -51,55 +51,52 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
{
struct ceph_mdsmap *m;
int i, n;
u32 mds;
int err = -EINVAL;

m = kzalloc(sizeof(*m), GFP_NOFS);
if (m == NULL)
return ERR_PTR(-ENOMEM);

ceph_decode_need(p, end, 10*sizeof(u32), bad);
ceph_decode_need(p, end, 8*sizeof(u32), bad);
ceph_decode_32(p, m->m_epoch);
ceph_decode_32(p, m->m_client_epoch);
ceph_decode_32(p, m->m_last_failure);
*p += sizeof(struct ceph_timespec); /* ignore map timestamp */
*p += sizeof(u32); /* skip anchortable */
ceph_decode_32(p, m->m_root);
ceph_decode_32(p, m->m_session_timeout);
ceph_decode_32(p, m->m_session_autoclose);
ceph_decode_32(p, m->m_max_mds);

m->m_addr = kmalloc(m->m_max_mds*sizeof(*m->m_addr), GFP_NOFS);
m->m_addr = kzalloc(m->m_max_mds*sizeof(*m->m_addr), GFP_NOFS);
m->m_state = kzalloc(m->m_max_mds*sizeof(*m->m_state), GFP_NOFS);
if (m->m_addr == NULL || m->m_state == NULL)
goto badmem;

/* state */
/* pick out active nodes from mds_info (state > 0) */
ceph_decode_32(p, n);
ceph_decode_need(p, end, n*2*sizeof(u32), bad);
for (i = 0; i < n; i++) {
ceph_decode_32(p, mds);
if (mds >= m->m_max_mds)
goto bad;
ceph_decode_32(p, m->m_state[mds]);
}

/* state_seq */
ceph_decode_32_safe(p, end, n, bad);
*p += n*(sizeof(u32)+sizeof(u64));

/* mds_inst */
ceph_decode_32_safe(p, end, n, bad);
ceph_decode_need(p, end,
n*(sizeof(u32)+sizeof(struct ceph_entity_name)+
sizeof(struct ceph_entity_addr)),
n * (3*sizeof(u32) + sizeof(u64) +
2*sizeof(*m->m_addr) +
sizeof(struct ceph_timespec)),
bad);
for (i = 0; i < n; i++) {
s32 mds, inc, state;
u64 state_seq;

*p += sizeof(m->m_addr[0]); /* skip addr key */
ceph_decode_32(p, mds);
if (mds >= m->m_max_mds)
goto bad;
*p += sizeof(struct ceph_entity_name);
ceph_decode_copy(p, &m->m_addr[mds], sizeof(*m->m_addr));
ceph_decode_32(p, inc);
ceph_decode_32(p, state);
ceph_decode_64(p, state_seq);
dout(10, "mdsmap_decode %d/%d mds%d.%d state %d\n",
i+1, n, mds, inc, state);
if (mds >= 0 && mds < m->m_max_mds && state > 0) {
m->m_state[mds] = state;
ceph_decode_copy(p, &m->m_addr[mds],
sizeof(*m->m_addr));
} else {
*p += sizeof(m->m_addr[0]); /* skip it */
}
*p += sizeof(struct ceph_timespec);
}

/* ok, we don't care about the rest. */
Expand Down
4 changes: 2 additions & 2 deletions src/mds/MDBalancer.cc
Expand Up @@ -208,7 +208,7 @@ void MDBalancer::send_heartbeat()


set<int> up;
mds->get_mds_map()->get_in_mds_set(up);
mds->get_mds_map()->get_mds_set(up);
for (set<int>::iterator p = up.begin(); p != up.end(); ++p) {
if (*p == mds->get_nodeid()) continue;
MHeartbeat *hb = new MHeartbeat(load, beat_epoch);
Expand Down Expand Up @@ -246,7 +246,7 @@ void MDBalancer::handle_heartbeat(MHeartbeat *m)

//dout(0) << " load is " << load << " have " << mds_load.size() << dendl;

unsigned cluster_size = mds->get_mds_map()->get_num_in_mds();
unsigned cluster_size = mds->get_mds_map()->get_num_mds();
if (mds_load.size() == cluster_size) {
// let's go!
//export_empties(); // no!
Expand Down
63 changes: 29 additions & 34 deletions src/mds/MDS.cc
Expand Up @@ -66,15 +66,15 @@

#define DOUT_SUBSYS mds
#undef dout_prefix
#define dout_prefix *_dout << dbeginl << "mds" << whoami << " "
#define dout_prefix *_dout << dbeginl << "mds" << whoami << '.' << incarnation << ' '



// cons/des
MDS::MDS(int whoami_, Messenger *m, MonMap *mm) :
mds_lock("MDS::mds_lock"),
timer(mds_lock),
whoami(whoami_),
whoami(whoami_), incarnation(0),
messenger(m),
monmap(mm),
logclient(messenger, monmap),
Expand Down Expand Up @@ -115,7 +115,7 @@ MDS::MDS(int whoami_, Messenger *m, MonMap *mm) :

req_rate = 0;

want_state = state = MDSMap::STATE_DNE;
want_state = state = MDSMap::STATE_BOOT;

logger = logger2 = 0;
}
Expand Down Expand Up @@ -549,37 +549,36 @@ void MDS::handle_mds_map(MMDSMap *m)
// decode and process
mdsmap = new MDSMap;
mdsmap->decode(m->get_encoded());

// see who i am
whoami = mdsmap->get_addr_rank(messenger->get_myaddr());
if (whoami < 0) {
if (mdsmap->is_standby(messenger->get_myaddr())) {
if (state != MDSMap::STATE_STANDBY) {
want_state = state = MDSMap::STATE_STANDBY;
dout(1) << "handle_mds_map standby" << dendl;
}
goto out;
}

// do i exist?
if (mdsmap->is_dne(messenger->get_myaddr())) {
dout(1) << "handle_mds_map i (" << messenger->get_myaddr()
<< ") am not in the mdsmap, killing myself" << dendl;
<< ") dne in the mdsmap, killing myself" << dendl;
suicide();
goto out;
}

// see who i am
whoami = mdsmap->get_rank(messenger->get_myaddr());
state = mdsmap->get_state(messenger->get_myaddr());
if (state == MDSMap::STATE_STANDBY) {
want_state = state = MDSMap::STATE_STANDBY;
dout(1) << "handle_mds_map standby" << dendl;
goto out;
}
// ??
assert(whoami >= 0);
incarnation = mdsmap->get_inc(whoami);

// open logger?
// note that fakesyn/newsyn starts knowing who they are
if (whoami >= 0 &&
mdsmap->is_up(whoami) &&
(oldwhoami != whoami || !logger)) {
if (oldwhoami != whoami || !logger) {
_dout_create_courtesy_output_symlink("mds", whoami);
reopen_logger(mdsmap->get_created()); // adopt mds cluster timeline
}

if (oldwhoami != whoami) {
// update messenger.
dout(1) << "handle_mds_map i am now mds" << whoami
<< " incarnation " << mdsmap->get_inc(whoami)
<< dendl;
dout(1) << "handle_mds_map i am now mds" << whoami << "." << incarnation << dendl;
messenger->reset_myname(entity_name_t::MDS(whoami));

// do i need an osdmap?
Expand Down Expand Up @@ -634,10 +633,6 @@ void MDS::handle_mds_map(MMDSMap *m)
} else if (is_stopping()) {
assert(oldstate == MDSMap::STATE_ACTIVE);
stopping_start();
} else if (is_stopped()) {
assert(oldstate == MDSMap::STATE_STOPPING);
suicide();
return;
}
}

Expand Down Expand Up @@ -687,8 +682,8 @@ void MDS::handle_mds_map(MMDSMap *m)
if (true) {
// new failed?
set<int> oldfailed, failed;
oldmap->get_mds_set(oldfailed, MDSMap::STATE_FAILED);
mdsmap->get_mds_set(failed, MDSMap::STATE_FAILED);
oldmap->get_failed_mds_set(oldfailed);
mdsmap->get_failed_mds_set(failed);
for (set<int>::iterator p = failed.begin(); p != failed.end(); ++p)
if (oldfailed.count(*p) == 0)
mdcache->handle_mds_failure(*p);
Expand All @@ -705,8 +700,8 @@ void MDS::handle_mds_map(MMDSMap *m)
if (is_active() || is_stopping()) {
// did anyone stop?
set<int> oldstopped, stopped;
oldmap->get_mds_set(oldstopped, MDSMap::STATE_STOPPED);
mdsmap->get_mds_set(stopped, MDSMap::STATE_STOPPED);
oldmap->get_stopped_mds_set(oldstopped);
mdsmap->get_stopped_mds_set(stopped);
for (set<int>::iterator p = stopped.begin(); p != stopped.end(); ++p)
if (oldstopped.count(*p) == 0) // newly so?
mdcache->migrator->handle_mds_failure_or_stop(*p);
Expand Down Expand Up @@ -926,12 +921,12 @@ void MDS::replay_start()

void MDS::replay_done()
{
dout(1) << "replay_done in=" << mdsmap->get_num_in_mds()
<< " failed=" << mdsmap->get_num_mds(MDSMap::STATE_FAILED)
dout(1) << "replay_done in=" << mdsmap->get_num_mds()
<< " failed=" << mdsmap->get_num_failed()
<< dendl;

if (mdsmap->get_num_in_mds() == 1 &&
mdsmap->get_num_mds(MDSMap::STATE_FAILED) == 0) { // just me!
if (mdsmap->get_num_mds() == 1 &&
mdsmap->get_num_failed() == 0) { // just me!
dout(2) << "i am alone, moving to state reconnect" << dendl;
request_state(MDSMap::STATE_RECONNECT);
} else {
Expand Down
10 changes: 4 additions & 6 deletions src/mds/MDS.h
Expand Up @@ -73,10 +73,9 @@ class MDS : public Dispatcher {
Mutex mds_lock;
SafeTimer timer;

protected:
int whoami;
int whoami;
int incarnation;

public:
Messenger *messenger;
MonMap *monmap;
MDSMap *mdsmap;
Expand Down Expand Up @@ -129,8 +128,6 @@ class MDS : public Dispatcher {
}

int get_state() { return state; }
bool is_dne() { return state == MDSMap::STATE_DNE; }
bool is_failed() { return state == MDSMap::STATE_FAILED; }
bool is_creating() { return state == MDSMap::STATE_CREATING; }
bool is_starting() { return state == MDSMap::STATE_STARTING; }
bool is_standby() { return state == MDSMap::STATE_STANDBY; }
Expand All @@ -140,7 +137,8 @@ class MDS : public Dispatcher {
bool is_rejoin() { return state == MDSMap::STATE_REJOIN; }
bool is_active() { return state == MDSMap::STATE_ACTIVE; }
bool is_stopping() { return state == MDSMap::STATE_STOPPING; }
bool is_stopped() { return state == MDSMap::STATE_STOPPED; }

bool is_stopped() { return mdsmap->is_stopped(whoami); }

void request_state(int s);

Expand Down

0 comments on commit 82b0bc3

Please sign in to comment.