From 7a4c509f7289ff4b400eb8cae462f882de3cab3b Mon Sep 17 00:00:00 2001 From: Venky Shankar Date: Thu, 20 Oct 2022 06:04:08 -0400 Subject: [PATCH] mds: include last modification time and change attr of a snap realm node in snap trace reply To handle older clients, introduce a cephfs feature bit. The MDS encodes a "new" SNapRealmInfo structure for clients supporting this feature (which includes the last modification timestamp and a version/change_attr for the snap realm). For clients not advertising this feature, the MDS uses the existing SnapRealmInfo structure for snapshot traces. Signed-off-by: Venky Shankar --- src/common/snap_types.cc | 40 ++++++++++++++++++++++++++++++++++++++ src/common/snap_types.h | 30 ++++++++++++++++++++++++++++ src/mds/MDCache.cc | 12 ++++++------ src/mds/Server.cc | 19 ++++++++++++++++-- src/mds/Server.h | 3 +++ src/mds/SnapRealm.cc | 17 +++++++++++++++- src/mds/SnapRealm.h | 2 ++ src/mds/cephfs_features.cc | 1 + src/mds/cephfs_features.h | 4 +++- 9 files changed, 118 insertions(+), 10 deletions(-) diff --git a/src/common/snap_types.cc b/src/common/snap_types.cc index 521404ca2b5b0..dc634907b8ae6 100644 --- a/src/common/snap_types.cc +++ b/src/common/snap_types.cc @@ -53,6 +53,46 @@ void SnapRealmInfo::generate_test_instances(std::list& o) o.back()->prior_parent_snaps.push_back(5); } +// -- "new" SnapRealmInfo -- + +void SnapRealmInfoNew::encode(ceph::buffer::list& bl) const +{ + using ceph::encode; + ENCODE_START(1, 1, bl); + encode(info, bl); + encode(last_modified, bl); + encode(change_attr, bl); + ENCODE_FINISH(bl); +} + +void SnapRealmInfoNew::decode(ceph::buffer::list::const_iterator& bl) +{ + using ceph::decode; + DECODE_START(1, bl); + decode(info, bl); + decode(last_modified, bl); + decode(change_attr, bl); + DECODE_FINISH(bl); +} + +void SnapRealmInfoNew::dump(ceph::Formatter *f) const +{ + info.dump(f); + f->dump_stream("last_modified") << last_modified; + f->dump_unsigned("change_attr", change_attr); +} + +void SnapRealmInfoNew::generate_test_instances(std::list& o) +{ + o.push_back(new SnapRealmInfoNew); + o.push_back(new SnapRealmInfoNew(SnapRealmInfo(1, 10, 10, 0), utime_t(), 0)); + o.push_back(new SnapRealmInfoNew(SnapRealmInfo(1, 10, 10, 0), utime_t(), 1)); + o.back()->info.my_snaps.push_back(10); + o.push_back(new SnapRealmInfoNew(SnapRealmInfo(1, 10, 10, 5), utime_t(), 2)); + o.back()->info.my_snaps.push_back(10); + o.back()->info.prior_parent_snaps.push_back(3); + o.back()->info.prior_parent_snaps.push_back(5); +} // ----- diff --git a/src/common/snap_types.h b/src/common/snap_types.h index 70b72f10563d3..d87e763c4b5db 100644 --- a/src/common/snap_types.h +++ b/src/common/snap_types.h @@ -2,6 +2,7 @@ #define __CEPH_SNAP_TYPES_H #include "include/types.h" +#include "include/utime.h" #include "include/fs_types.h" namespace ceph { @@ -39,6 +40,35 @@ struct SnapRealmInfo { }; WRITE_CLASS_ENCODER(SnapRealmInfo) +// "new* snap realm info - carries additional metadata (last modified, +// change_attr) and is version encoded. +struct SnapRealmInfoNew { + SnapRealmInfo info; + utime_t last_modified; + uint64_t change_attr; + + SnapRealmInfoNew() { + } + + SnapRealmInfoNew(const SnapRealmInfo &info_, utime_t last_modified_, uint64_t change_attr_) { + // FIPS zeroization audit 20191115: this memset is not security related. + info = info_; + last_modified = last_modified_; + change_attr = change_attr_; + } + + inodeno_t ino() const { return inodeno_t(info.h.ino); } + inodeno_t parent() const { return inodeno_t(info.h.parent); } + snapid_t seq() const { return snapid_t(info.h.seq); } + snapid_t parent_since() const { return snapid_t(info.h.parent_since); } + snapid_t created() const { return snapid_t(info.h.created); } + + void encode(ceph::buffer::list& bl) const; + void decode(ceph::buffer::list::const_iterator& bl); + void dump(ceph::Formatter *f) const; + static void generate_test_instances(std::list& o); +}; +WRITE_CLASS_ENCODER(SnapRealmInfoNew) struct SnapContext { snapid_t seq; // 'time' stamp diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 38ca143bbe3c9..82e5c158ee30c 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -5620,7 +5620,7 @@ void MDCache::prepare_realm_split(SnapRealm *realm, client_t client, inodeno_t i snap = make_message(CEPH_SNAP_OP_SPLIT); splits.emplace(std::piecewise_construct, std::forward_as_tuple(client), std::forward_as_tuple(snap)); snap->head.split = realm->inode->ino(); - snap->bl = realm->get_snap_trace(); + snap->bl = mds->server->get_snap_trace(client, realm); for (const auto& child : realm->open_children) snap->split_realms.push_back(child->inode->ino()); @@ -5651,7 +5651,7 @@ void MDCache::prepare_realm_merge(SnapRealm *realm, SnapRealm *parent_realm, update->head.split = parent_realm->inode->ino(); update->split_inos = split_inos; update->split_realms = split_realms; - update->bl = parent_realm->get_snap_trace(); + update->bl = mds->server->get_snap_trace(p.first, parent_realm); em.first->second = std::move(update); } } @@ -5848,7 +5848,7 @@ void MDCache::do_cap_import(Session *session, CInode *in, Capability *cap, cap->get_last_seq(), cap->pending(), cap->wanted(), 0, cap->get_mseq(), mds->get_osd_epoch_barrier()); in->encode_cap_message(reap, cap); - reap->snapbl = realm->get_snap_trace(); + reap->snapbl = mds->server->get_snap_trace(session, realm); reap->set_cap_peer(p_cap_id, p_seq, p_mseq, peer, p_flags); mds->send_message_client_counted(reap, session); } @@ -6034,7 +6034,7 @@ void MDCache::finish_snaprealm_reconnect(client_t client, SnapRealm *realm, snap dout(10) << "finish_snaprealm_reconnect client." << client << " has old seq " << seq << " < " << realm->get_newest_seq() << " on " << *realm << dendl; auto snap = make_message(CEPH_SNAP_OP_UPDATE); - snap->bl = realm->get_snap_trace(); + snap->bl = mds->server->get_snap_trace(client, realm); for (const auto& child : realm->open_children) snap->split_realms.push_back(child->inode->ino()); updates.emplace(std::piecewise_construct, std::forward_as_tuple(client), std::forward_as_tuple(snap)); @@ -9970,7 +9970,7 @@ void MDCache::do_realm_invalidate_and_update_notify(CInode *in, int snapop, bool update->head.split = in->ino(); update->split_inos = split_inos; update->split_realms = split_realms; - update->bl = in->snaprealm->get_snap_trace(); + update->bl = mds->server->get_snap_trace(em.first->first, in->snaprealm); em.first->second = std::move(update); } } @@ -10064,7 +10064,7 @@ void MDCache::notify_global_snaprealm_update(int snap_op) continue; auto update = make_message(snap_op); update->head.split = global_snaprealm->inode->ino(); - update->bl = global_snaprealm->get_snap_trace(); + update->bl = mds->server->get_snap_trace(session, global_snaprealm); mds->send_message_client_counted(update, session); } } diff --git a/src/mds/Server.cc b/src/mds/Server.cc index fbac61e087051..1a823b40830b2 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -2342,7 +2342,7 @@ void Server::set_trace_dist(const ref_t &reply, realm = in->find_snaprealm(); else realm = dn->get_dir()->get_inode()->find_snaprealm(); - reply->snapbl = realm->get_snap_trace(); + reply->snapbl = get_snap_trace(session, realm); dout(10) << "set_trace_dist snaprealm " << *realm << " len=" << reply->snapbl.length() << dendl; } @@ -4783,7 +4783,7 @@ void Server::handle_client_readdir(MDRequestRef& mdr) // this isn't perfect, but we should capture the main variable/unbounded size items! int front_bytes = dirbl.length() + sizeof(__u32) + sizeof(__u8)*2; int bytes_left = max_bytes - front_bytes; - bytes_left -= realm->get_snap_trace().length(); + bytes_left -= get_snap_trace(session, realm).length(); // build dir contents bufferlist dnbl; @@ -11296,3 +11296,18 @@ void Server::dump_reconnect_status(Formatter *f) const f->dump_stream("client_reconnect_gather") << client_reconnect_gather; f->close_section(); } + +const bufferlist& Server::get_snap_trace(Session *session, SnapRealm *realm) const { + ceph_assert(session); + ceph_assert(realm); + if (session->info.has_feature(CEPHFS_FEATURE_NEW_SNAPREALM_INFO)) { + return realm->get_snap_trace_new(); + } else { + return realm->get_snap_trace(); + } +} + +const bufferlist& Server::get_snap_trace(client_t client, SnapRealm *realm) const { + Session *session = mds->sessionmap.get_session(entity_name_t::CLIENT(client.v)); + return get_snap_trace(session, realm); +} diff --git a/src/mds/Server.h b/src/mds/Server.h index a2d8f69670871..f955e5f942b93 100644 --- a/src/mds/Server.h +++ b/src/mds/Server.h @@ -328,6 +328,9 @@ class Server { std::set client_reclaim_gather; + const bufferlist& get_snap_trace(Session *session, SnapRealm *realm) const; + const bufferlist& get_snap_trace(client_t client, SnapRealm *realm) const; + private: friend class MDSContinuation; friend class ServerContext; diff --git a/src/mds/SnapRealm.cc b/src/mds/SnapRealm.cc index 9a81511adc7b4..c7a7d75bc242d 100644 --- a/src/mds/SnapRealm.cc +++ b/src/mds/SnapRealm.cc @@ -390,9 +390,16 @@ const bufferlist& SnapRealm::get_snap_trace() const return cached_snap_trace; } +const bufferlist& SnapRealm::get_snap_trace_new() const +{ + check_cache(); + return cached_snap_trace_new; +} + void SnapRealm::build_snap_trace() const { cached_snap_trace.clear(); + cached_snap_trace_new.clear(); if (global) { SnapRealmInfo info(inode->ino(), 0, cached_seq, 0); @@ -401,7 +408,10 @@ void SnapRealm::build_snap_trace() const info.my_snaps.push_back(*p); dout(10) << "build_snap_trace my_snaps " << info.my_snaps << dendl; + + SnapRealmInfoNew ninfo(info, srnode.last_modified, srnode.change_attr); encode(info, cached_snap_trace); + encode(ninfo, cached_snap_trace_new); return; } @@ -434,10 +444,15 @@ void SnapRealm::build_snap_trace() const info.my_snaps.push_back(p->first); dout(10) << "build_snap_trace my_snaps " << info.my_snaps << dendl; + SnapRealmInfoNew ninfo(info, srnode.last_modified, srnode.change_attr); + encode(info, cached_snap_trace); + encode(ninfo, cached_snap_trace_new); - if (parent) + if (parent) { cached_snap_trace.append(parent->get_snap_trace()); + cached_snap_trace_new.append(parent->get_snap_trace_new()); + } } void SnapRealm::prune_past_parent_snaps() diff --git a/src/mds/SnapRealm.h b/src/mds/SnapRealm.h index 7ddffe0ba9a85..700c1d81e3b57 100644 --- a/src/mds/SnapRealm.h +++ b/src/mds/SnapRealm.h @@ -45,6 +45,7 @@ struct SnapRealm { void get_snap_info(std::map& infomap, snapid_t first=0, snapid_t last=CEPH_NOSNAP); const ceph::buffer::list& get_snap_trace() const; + const ceph::buffer::list& get_snap_trace_new() const; void build_snap_trace() const; std::string_view get_snapname(snapid_t snapid, inodeno_t atino); @@ -143,6 +144,7 @@ struct SnapRealm { mutable std::set cached_snaps; mutable SnapContext cached_snap_context; mutable ceph::buffer::list cached_snap_trace; + mutable ceph::buffer::list cached_snap_trace_new; mutable inodeno_t cached_subvolume_ino = 0; }; diff --git a/src/mds/cephfs_features.cc b/src/mds/cephfs_features.cc index 6102a9b1652c2..275fd173070a1 100644 --- a/src/mds/cephfs_features.cc +++ b/src/mds/cephfs_features.cc @@ -30,6 +30,7 @@ static const std::array feature_names "notify_session_state", "op_getvxattr", "32bits_retry_fwd", + "new_snaprealm_info", }; static_assert(feature_names.size() == CEPHFS_FEATURE_MAX + 1); diff --git a/src/mds/cephfs_features.h b/src/mds/cephfs_features.h index cd6fde1a57435..9c16388ecd288 100644 --- a/src/mds/cephfs_features.h +++ b/src/mds/cephfs_features.h @@ -46,7 +46,8 @@ namespace ceph { #define CEPHFS_FEATURE_NOTIFY_SESSION_STATE 16 #define CEPHFS_FEATURE_OP_GETVXATTR 17 #define CEPHFS_FEATURE_32BITS_RETRY_FWD 18 -#define CEPHFS_FEATURE_MAX 18 +#define CEPHFS_FEATURE_NEW_SNAPREALM_INFO 19 +#define CEPHFS_FEATURE_MAX 19 #define CEPHFS_FEATURES_ALL { \ 0, 1, 2, 3, 4, \ @@ -66,6 +67,7 @@ namespace ceph { CEPHFS_FEATURE_NOTIFY_SESSION_STATE, \ CEPHFS_FEATURE_OP_GETVXATTR, \ CEPHFS_FEATURE_32BITS_RETRY_FWD, \ + CEPHFS_FEATURE_NEW_SNAPREALM_INFO \ } #define CEPHFS_METRIC_FEATURES_ALL { \