Skip to content

Commit

Permalink
mds: raise health warning if client lacks feature for root_squash
Browse files Browse the repository at this point in the history
Rather than evict all clients lacking this feature bit, raise a health error
that pushes the administrator to address it. This avoids the surprise of having
all affected clients suddenly evicted in the cluster.

Fixes: https://tracker.ceph.com/issues/65733
Fixes: 954ed30
Signed-off-by: Patrick Donnelly <pdonnell@redhat.com>
  • Loading branch information
batrick committed May 7, 2024
1 parent 6517b70 commit 66ff5c9
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 12 deletions.
24 changes: 24 additions & 0 deletions src/mds/Beacon.cc
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,30 @@ void Beacon::notify_health(MDSRank const *mds)
health.metrics.push_back(m);
}

// Report a health warning if clients have broken root_squash
if (auto c = mds->sessionmap.num_broken_root_squash_clients(); c > 0) {
std::vector<MDSHealthMetric> metrics;

for (auto&& session : mds->sessionmap.get_broken_root_squash_clients()) {
CachedStackStringStream css;
*css << "Client " << session->get_human_name() << " has broken root_squash implementation";
MDSHealthMetric m(MDS_HEALTH_CLIENTS_BROKEN_ROOTSQUASH, HEALTH_ERR, css->strv());
m.metadata["client_id"] = stringify(session->get_client());
metrics.emplace_back(std::move(m));
}

if (metrics.size() <= (size_t)g_conf()->mds_health_summarize_threshold) {
health.metrics.insert(std::end(health.metrics), std::make_move_iterator(std::begin(metrics)), std::make_move_iterator(std::end(metrics)));
} else {
CachedStackStringStream css;
*css << "There are " << c << " clients with broken root_squash implementations";
dout(20) << css->strv() << dendl;
MDSHealthMetric m(MDS_HEALTH_CLIENTS_BROKEN_ROOTSQUASH, HEALTH_ERR, css->strv());
m.metadata["client_count"] = stringify(c);
health.metrics.push_back(std::move(m));
}
}

// Report if we have significantly exceeded our cache size limit
if (mds->mdcache->cache_overfull()) {
CachedStackStringStream css;
Expand Down
14 changes: 2 additions & 12 deletions src/mds/Server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -722,15 +722,7 @@ void Server::handle_client_session(const cref_t<MClientSession> &m)
std::string_view fs_name = mds->mdsmap->get_fs_name();
bool client_caps_check = client_metadata.features.test(CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK);
if (session->auth_caps.root_squash_in_caps(fs_name) && !client_caps_check) {
CachedStackStringStream css;
*css << "client lacks CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK needed to enforce 'root_squash' MDS auth caps";
send_reject_message(css->strv());
mds->clog->warn() << "client session (" << session->info.inst
<< ") lacks CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK "
<< " needed to enforce 'root_squash' MDS auth caps";
session->clear();
break;

mds->sessionmap.add_to_broken_root_squash_clients(session);
}
// Special case for the 'root' metadata path; validate that the claimed
// root is actually within the caps of the session
Expand Down Expand Up @@ -1590,9 +1582,7 @@ void Server::handle_client_reconnect(const cref_t<MClientReconnect> &m)
std::string_view fs_name = mds->mdsmap->get_fs_name();
bool client_caps_check = session->info.client_metadata.features.test(CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK);
if (session->auth_caps.root_squash_in_caps(fs_name) && !client_caps_check) {
CachedStackStringStream css;
*css << "client lacks CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK needed to enforce 'root_squash' MDS auth caps";
error_str = css->strv();
mds->sessionmap.add_to_broken_root_squash_clients(session);
}
}

Expand Down
1 change: 1 addition & 0 deletions src/mds/SessionMap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,7 @@ void SessionMap::remove_session(Session *s)

s->trim_completed_requests(0);
s->item_session_list.remove_myself();
broken_root_squash_clients.erase(s);
session_map.erase(s->info.inst.name);
dirty_sessions.erase(s->info.inst.name);
null_sessions.insert(s->info.inst.name);
Expand Down
12 changes: 12 additions & 0 deletions src/mds/SessionMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -682,6 +682,16 @@ class SessionMap : public SessionMapStore {
void remove_session(Session *s);
void touch_session(Session *session);

void add_to_broken_root_squash_clients(Session* s) {
broken_root_squash_clients.insert(s);
}
uint64_t num_broken_root_squash_clients() const {
return broken_root_squash_clients.size();
}
auto const& get_broken_root_squash_clients() const {
return broken_root_squash_clients;
}

Session *get_oldest_session(int state) {
auto by_state_entry = by_state.find(state);
if (by_state_entry == by_state.end() || by_state_entry->second->empty())
Expand Down Expand Up @@ -849,6 +859,8 @@ class SessionMap : public SessionMapStore {

bool validate_and_encode_session(MDSRank *mds, Session *session, bufferlist& bl);
void apply_blocklist(const std::set<entity_name_t>& victims);

std::set<Session*> broken_root_squash_clients;
};

std::ostream& operator<<(std::ostream &out, const Session &s);
Expand Down
4 changes: 4 additions & 0 deletions src/messages/MMDSBeacon.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ enum mds_metric_t {
MDS_HEALTH_SLOW_METADATA_IO,
MDS_HEALTH_CLIENTS_LAGGY,
MDS_HEALTH_CLIENTS_LAGGY_MANY,
MDS_HEALTH_CLIENTS_BROKEN_ROOTSQUASH,
MDS_HEALTH_DUMMY, // not a real health warning, for testing
};

Expand All @@ -67,6 +68,7 @@ inline const char *mds_metric_name(mds_metric_t m)
case MDS_HEALTH_SLOW_METADATA_IO: return "MDS_SLOW_METADATA_IO";
case MDS_HEALTH_CLIENTS_LAGGY: return "MDS_CLIENTS_LAGGY";
case MDS_HEALTH_CLIENTS_LAGGY_MANY: return "MDS_CLIENTS_LAGGY_MANY";
case MDS_HEALTH_CLIENTS_BROKEN_ROOTSQUASH: return "MDS_CLIENTS_BROKEN_ROOTSQUASH";
case MDS_HEALTH_DUMMY: return "MDS_DUMMY";
default:
return "???";
Expand Down Expand Up @@ -103,6 +105,8 @@ inline const char *mds_metric_summary(mds_metric_t m)
return "%num% MDSs report slow metadata IOs";
case MDS_HEALTH_CLIENTS_LAGGY:
return "%num% client(s) laggy due to laggy OSDs";
case MDS_HEALTH_CLIENTS_BROKEN_ROOTSQUASH:
return "%num% MDS report clients with broken root_squash implementation";
default:
return "???";
}
Expand Down

0 comments on commit 66ff5c9

Please sign in to comment.